From 1897bdc4d33167e9036460631d1349e59d841f2d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Nov 2014 13:46:09 +1100
Subject: mmu_notifier: add mmu_notifier_invalidate_range()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This notifier closes an important gap in the current mmu_notifier
implementation, the existing callbacks are called too early or too late to
reliably manage a non-CPU TLB.  Specifically, invalidate_range_start() is
called when all pages are still mapped and invalidate_range_end() when all
pages are unmapped and potentially freed.

This is fine when the users of the mmu_notifiers manage their own SoftTLB,
like KVM does.  When the TLB is managed in software it is easy to wipe out
entries for a given range and prevent new entries to be established until
invalidate_range_end is called.

But when the user of mmu_notifiers has to manage a hardware TLB it can
still wipe out TLB entries in invalidate_range_start, but it can't make
sure that no new TLB entries in the given range are established between
invalidate_range_start and invalidate_range_end.

To avoid silent data corruption the entries in the non-CPU TLB need to be
flushed when the pages are unmapped (at this point in time no _new_ TLB
entries can be established in the non-CPU TLB) but not yet freed (as the
non-CPU TLB may still have _existing_ entries pointing to the pages about
to be freed).

To fix this problem we need to catch the moment when the Linux VMM flushes
remote TLBs (as a non-CPU TLB is not very CPU TLB), as this is the point
in time when the pages are unmapped but _not_ yet freed.

The mmu_notifier_invalidate_range() function aims to catch that moment.

IOMMU code will be one user of the notifier-callback.  Currently this is
only the AMD IOMMUv2 driver, but its code is about to be more generalized
and converted to a generic IOMMU-API extension to fit the needs of similar
functionality in other IOMMUs as well.

The current attempt in the AMD IOMMUv2 driver to work around the
invalidate_range_start/end() shortcoming is to assign an empty page table
to the non-CPU TLB between any invalidata_range_start/end calls.  With the
empty page-table assigned, every page-table walk to re-fill the non-CPU
TLB will cause a page-fault reported to the IOMMU driver via an interrupt,
possibly causing interrupt storms.

The page-fault handler in the AMD IOMMUv2 driver doesn't handle the fault
if an invalidate_range_start/end pair is active, it just reports back
SUCCESS to the device and let it refault the page.  But existing hardware
(newer Radeon GPUs) that makes use of this feature don't re-fault
indefinitly, after a certain number of faults for the same address the
device enters a failure state and needs to be resetted.

To avoid the GPUs entering a failure state we need to get rid of the
empty-page-table workaround and use the mmu_notifier_invalidate_range()
function introduced with this patch.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Jay Cornwall <Jay.Cornwall@amd.com>
Cc: Oded Gabbay <Oded.Gabbay@amd.com>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
 include/linux/mmu_notifier.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 88787bb4b3b9..17907908d1df 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -242,6 +242,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 		__mmu_notifier_invalidate_range_end(mm, start, end);
 }
 
+static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+}
+
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
 	mm->mmu_notifier_mm = NULL;
@@ -342,6 +347,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 {
 }
 
+static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+}
+
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
 {
 }
-- 
cgit v1.2.3


From 34ee645e83b60ae3d5955f70ab9ab9a159136673 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Nov 2014 13:46:09 +1100
Subject: mmu_notifier: call mmu_notifier_invalidate_range() from VMM
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add calls to the new mmu_notifier_invalidate_range() function to all
places in the VMM that need it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Jay Cornwall <Jay.Cornwall@amd.com>
Cc: Oded Gabbay <Oded.Gabbay@amd.com>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
 include/linux/mmu_notifier.h | 41 +++++++++++++++++++++++++++++++++++++++++
 kernel/events/uprobes.c      |  2 +-
 mm/fremap.c                  |  2 +-
 mm/huge_memory.c             |  9 +++++----
 mm/hugetlb.c                 |  7 ++++++-
 mm/ksm.c                     |  4 ++--
 mm/memory.c                  |  3 ++-
 mm/migrate.c                 |  3 ++-
 mm/rmap.c                    |  2 +-
 9 files changed, 61 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 17907908d1df..966da2b4b803 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -284,6 +284,44 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	__young;							\
 })
 
+#define	ptep_clear_flush_notify(__vma, __address, __ptep)		\
+({									\
+	unsigned long ___addr = __address & PAGE_MASK;			\
+	struct mm_struct *___mm = (__vma)->vm_mm;			\
+	pte_t ___pte;							\
+									\
+	___pte = ptep_clear_flush(__vma, __address, __ptep);		\
+	mmu_notifier_invalidate_range(___mm, ___addr,			\
+					___addr + PAGE_SIZE);		\
+									\
+	___pte;								\
+})
+
+#define pmdp_clear_flush_notify(__vma, __haddr, __pmd)			\
+({									\
+	unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;		\
+	struct mm_struct *___mm = (__vma)->vm_mm;			\
+	pmd_t ___pmd;							\
+									\
+	___pmd = pmdp_clear_flush(__vma, __haddr, __pmd);		\
+	mmu_notifier_invalidate_range(___mm, ___haddr,			\
+				      ___haddr + HPAGE_PMD_SIZE);	\
+									\
+	___pmd;								\
+})
+
+#define pmdp_get_and_clear_notify(__mm, __haddr, __pmd)			\
+({									\
+	unsigned long ___haddr = __haddr & HPAGE_PMD_MASK;		\
+	pmd_t ___pmd;							\
+									\
+	___pmd = pmdp_get_and_clear(__mm, __haddr, __pmd);		\
+	mmu_notifier_invalidate_range(__mm, ___haddr,			\
+				      ___haddr + HPAGE_PMD_SIZE);	\
+									\
+	___pmd;								\
+})
+
 /*
  * set_pte_at_notify() sets the pte _after_ running the notifier.
  * This is safe to start by updating the secondary MMUs, because the primary MMU
@@ -362,6 +400,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 
 #define ptep_clear_flush_young_notify ptep_clear_flush_young
 #define pmdp_clear_flush_young_notify pmdp_clear_flush_young
+#define	ptep_clear_flush_notify ptep_clear_flush
+#define pmdp_clear_flush_notify pmdp_clear_flush
+#define pmdp_get_and_clear_notify pmdp_get_and_clear
 #define set_pte_at_notify set_pte_at
 
 #endif /* CONFIG_MMU_NOTIFIER */
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index 1d0af8a2c646..bc143cf56cab 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -193,7 +193,7 @@ static int __replace_page(struct vm_area_struct *vma, unsigned long addr,
 	}
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
-	ptep_clear_flush(vma, addr, ptep);
+	ptep_clear_flush_notify(vma, addr, ptep);
 	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
 
 	page_remove_rmap(page);
diff --git a/mm/fremap.c b/mm/fremap.c
index 72b8fa361433..9129013732d7 100644
--- a/mm/fremap.c
+++ b/mm/fremap.c
@@ -37,7 +37,7 @@ static void zap_pte(struct mm_struct *mm, struct vm_area_struct *vma,
 
 	if (pte_present(pte)) {
 		flush_cache_page(vma, addr, pte_pfn(pte));
-		pte = ptep_clear_flush(vma, addr, ptep);
+		pte = ptep_clear_flush_notify(vma, addr, ptep);
 		page = vm_normal_page(vma, addr, pte);
 		if (page) {
 			if (pte_dirty(pte))
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index de984159cf0b..1d89526ed531 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1036,7 +1036,7 @@ static int do_huge_pmd_wp_page_fallback(struct mm_struct *mm,
 		goto out_free_pages;
 	VM_BUG_ON_PAGE(!PageHead(page), page);
 
-	pmdp_clear_flush(vma, haddr, pmd);
+	pmdp_clear_flush_notify(vma, haddr, pmd);
 	/* leave pmd empty until pte is filled */
 
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
@@ -1179,7 +1179,7 @@ alloc:
 		pmd_t entry;
 		entry = mk_huge_pmd(new_page, vma->vm_page_prot);
 		entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);
-		pmdp_clear_flush(vma, haddr, pmd);
+		pmdp_clear_flush_notify(vma, haddr, pmd);
 		page_add_new_anon_rmap(new_page, vma, haddr);
 		mem_cgroup_commit_charge(new_page, memcg, false);
 		lru_cache_add_active_or_unevictable(new_page, vma);
@@ -1512,7 +1512,7 @@ int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
 		pmd_t entry;
 		ret = 1;
 		if (!prot_numa) {
-			entry = pmdp_get_and_clear(mm, addr, pmd);
+			entry = pmdp_get_and_clear_notify(mm, addr, pmd);
 			if (pmd_numa(entry))
 				entry = pmd_mknonnuma(entry);
 			entry = pmd_modify(entry, newprot);
@@ -1644,6 +1644,7 @@ static int __split_huge_page_splitting(struct page *page,
 		 * serialize against split_huge_page*.
 		 */
 		pmdp_splitting_flush(vma, address, pmd);
+
 		ret = 1;
 		spin_unlock(ptl);
 	}
@@ -2834,7 +2835,7 @@ static void __split_huge_zero_page_pmd(struct vm_area_struct *vma,
 	pmd_t _pmd;
 	int i;
 
-	pmdp_clear_flush(vma, haddr, pmd);
+	pmdp_clear_flush_notify(vma, haddr, pmd);
 	/* leave pmd empty until pte is filled */
 
 	pgtable = pgtable_trans_huge_withdraw(mm, pmd);
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9fd722769927..2e6add04fa1b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2598,8 +2598,11 @@ int copy_hugetlb_page_range(struct mm_struct *dst, struct mm_struct *src,
 			}
 			set_huge_pte_at(dst, addr, dst_pte, entry);
 		} else {
-			if (cow)
+			if (cow) {
 				huge_ptep_set_wrprotect(src, addr, src_pte);
+				mmu_notifier_invalidate_range(src, mmun_start,
+								   mmun_end);
+			}
 			entry = huge_ptep_get(src_pte);
 			ptepage = pte_page(entry);
 			get_page(ptepage);
@@ -2899,6 +2902,7 @@ retry_avoidcopy:
 
 		/* Break COW */
 		huge_ptep_clear_flush(vma, address, ptep);
+		mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
 		set_huge_pte_at(mm, address, ptep,
 				make_huge_pte(vma, new_page, 1));
 		page_remove_rmap(old_page);
@@ -3374,6 +3378,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 	 * and that page table be reused and filled with junk.
 	 */
 	flush_tlb_range(vma, start, end);
+	mmu_notifier_invalidate_range(mm, start, end);
 	mutex_unlock(&vma->vm_file->f_mapping->i_mmap_mutex);
 	mmu_notifier_invalidate_range_end(mm, start, end);
 
diff --git a/mm/ksm.c b/mm/ksm.c
index 6b2e337bc03c..d247efab5073 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -892,7 +892,7 @@ static int write_protect_page(struct vm_area_struct *vma, struct page *page,
 		 * this assure us that no O_DIRECT can happen after the check
 		 * or in the middle of the check.
 		 */
-		entry = ptep_clear_flush(vma, addr, ptep);
+		entry = ptep_clear_flush_notify(vma, addr, ptep);
 		/*
 		 * Check that no O_DIRECT or similar I/O is in progress on the
 		 * page
@@ -960,7 +960,7 @@ static int replace_page(struct vm_area_struct *vma, struct page *page,
 	page_add_anon_rmap(kpage, vma, addr);
 
 	flush_cache_page(vma, addr, pte_pfn(*ptep));
-	ptep_clear_flush(vma, addr, ptep);
+	ptep_clear_flush_notify(vma, addr, ptep);
 	set_pte_at_notify(mm, addr, ptep, mk_pte(kpage, vma->vm_page_prot));
 
 	page_remove_rmap(page);
diff --git a/mm/memory.c b/mm/memory.c
index 3e503831e042..655fd3d34bb0 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -238,6 +238,7 @@ static void tlb_flush_mmu_tlbonly(struct mmu_gather *tlb)
 {
 	tlb->need_flush = 0;
 	tlb_flush(tlb);
+	mmu_notifier_invalidate_range(tlb->mm, tlb->start, tlb->end);
 #ifdef CONFIG_HAVE_RCU_TABLE_FREE
 	tlb_table_flush(tlb);
 #endif
@@ -2234,7 +2235,7 @@ gotten:
 		 * seen in the presence of one thread doing SMC and another
 		 * thread doing COW.
 		 */
-		ptep_clear_flush(vma, address, page_table);
+		ptep_clear_flush_notify(vma, address, page_table);
 		page_add_new_anon_rmap(new_page, vma, address);
 		mem_cgroup_commit_charge(new_page, memcg, false);
 		lru_cache_add_active_or_unevictable(new_page, vma);
diff --git a/mm/migrate.c b/mm/migrate.c
index 01439953abf5..41945cb0ca38 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1854,7 +1854,7 @@ fail_putback:
 	 */
 	flush_cache_range(vma, mmun_start, mmun_end);
 	page_add_anon_rmap(new_page, vma, mmun_start);
-	pmdp_clear_flush(vma, mmun_start, pmd);
+	pmdp_clear_flush_notify(vma, mmun_start, pmd);
 	set_pmd_at(mm, mmun_start, pmd, entry);
 	flush_tlb_range(vma, mmun_start, mmun_end);
 	update_mmu_cache_pmd(vma, address, &entry);
@@ -1862,6 +1862,7 @@ fail_putback:
 	if (page_count(page) != 2) {
 		set_pmd_at(mm, mmun_start, pmd, orig_entry);
 		flush_tlb_range(vma, mmun_start, mmun_end);
+		mmu_notifier_invalidate_range(mm, mmun_start, mmun_end);
 		update_mmu_cache_pmd(vma, address, &entry);
 		page_remove_rmap(new_page);
 		goto fail_putback;
diff --git a/mm/rmap.c b/mm/rmap.c
index 19886fb2f13a..d3eb1e02d1c6 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1378,7 +1378,7 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 
 		/* Nuke the page table entry. */
 		flush_cache_page(vma, address, pte_pfn(*pte));
-		pteval = ptep_clear_flush(vma, address, pte);
+		pteval = ptep_clear_flush_notify(vma, address, pte);
 
 		/* If nonlinear, store the file page offset in the pte. */
 		if (page->index != linear_page_index(vma, address)) {
-- 
cgit v1.2.3


From 0f0a327fa12cd55de5e7f8c05a70ac3d047f405e Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 13 Nov 2014 13:46:09 +1100
Subject: mmu_notifier: add the callback for mmu_notifier_invalidate_range()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that the mmu_notifier_invalidate_range() calls are in place, add the
callback to allow subsystems to register against it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Reviewed-by: Andrea Arcangeli <aarcange@redhat.com>
Reviewed-by: Jérôme Glisse <jglisse@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Rik van Riel <riel@redhat.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <jweiner@redhat.com>
Cc: Jay Cornwall <Jay.Cornwall@amd.com>
Cc: Oded Gabbay <Oded.Gabbay@amd.com>
Cc: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Cc: Jesse Barnes <jbarnes@virtuousgeek.org>
Cc: David Woodhouse <dwmw2@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Oded Gabbay <oded.gabbay@amd.com>
---
 include/linux/mmu_notifier.h | 37 ++++++++++++++++++++++++++++++++-----
 mm/mmu_notifier.c            | 25 +++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 966da2b4b803..94d19f64cecf 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -98,11 +98,11 @@ struct mmu_notifier_ops {
 	/*
 	 * invalidate_range_start() and invalidate_range_end() must be
 	 * paired and are called only when the mmap_sem and/or the
-	 * locks protecting the reverse maps are held. The subsystem
-	 * must guarantee that no additional references are taken to
-	 * the pages in the range established between the call to
-	 * invalidate_range_start() and the matching call to
-	 * invalidate_range_end().
+	 * locks protecting the reverse maps are held. If the subsystem
+	 * can't guarantee that no additional references are taken to
+	 * the pages in the range, it has to implement the
+	 * invalidate_range() notifier to remove any references taken
+	 * after invalidate_range_start().
 	 *
 	 * Invalidation of multiple concurrent ranges may be
 	 * optionally permitted by the driver. Either way the
@@ -144,6 +144,29 @@ struct mmu_notifier_ops {
 	void (*invalidate_range_end)(struct mmu_notifier *mn,
 				     struct mm_struct *mm,
 				     unsigned long start, unsigned long end);
+
+	/*
+	 * invalidate_range() is either called between
+	 * invalidate_range_start() and invalidate_range_end() when the
+	 * VM has to free pages that where unmapped, but before the
+	 * pages are actually freed, or outside of _start()/_end() when
+	 * a (remote) TLB is necessary.
+	 *
+	 * If invalidate_range() is used to manage a non-CPU TLB with
+	 * shared page-tables, it not necessary to implement the
+	 * invalidate_range_start()/end() notifiers, as
+	 * invalidate_range() alread catches the points in time when an
+	 * external TLB range needs to be flushed.
+	 *
+	 * The invalidate_range() function is called under the ptl
+	 * spin-lock and not allowed to sleep.
+	 *
+	 * Note that this function might be called with just a sub-range
+	 * of what was passed to invalidate_range_start()/end(), if
+	 * called between those functions.
+	 */
+	void (*invalidate_range)(struct mmu_notifier *mn, struct mm_struct *mm,
+				 unsigned long start, unsigned long end);
 };
 
 /*
@@ -190,6 +213,8 @@ extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm,
 				  unsigned long start, unsigned long end);
 extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 				  unsigned long start, unsigned long end);
+extern void __mmu_notifier_invalidate_range(struct mm_struct *mm,
+				  unsigned long start, unsigned long end);
 
 static inline void mmu_notifier_release(struct mm_struct *mm)
 {
@@ -245,6 +270,8 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 static inline void mmu_notifier_invalidate_range(struct mm_struct *mm,
 				  unsigned long start, unsigned long end)
 {
+	if (mm_has_notifiers(mm))
+		__mmu_notifier_invalidate_range(mm, start, end);
 }
 
 static inline void mmu_notifier_mm_init(struct mm_struct *mm)
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 2c8da9825fe3..3b9b3d0741b2 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -193,6 +193,16 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
+		/*
+		 * Call invalidate_range here too to avoid the need for the
+		 * subsystem of having to register an invalidate_range_end
+		 * call-back when there is invalidate_range already. Usually a
+		 * subsystem registers either invalidate_range_start()/end() or
+		 * invalidate_range(), so this will be no additional overhead
+		 * (besides the pointer check).
+		 */
+		if (mn->ops->invalidate_range)
+			mn->ops->invalidate_range(mn, mm, start, end);
 		if (mn->ops->invalidate_range_end)
 			mn->ops->invalidate_range_end(mn, mm, start, end);
 	}
@@ -200,6 +210,21 @@ void __mmu_notifier_invalidate_range_end(struct mm_struct *mm,
 }
 EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range_end);
 
+void __mmu_notifier_invalidate_range(struct mm_struct *mm,
+				  unsigned long start, unsigned long end)
+{
+	struct mmu_notifier *mn;
+	int id;
+
+	id = srcu_read_lock(&srcu);
+	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
+		if (mn->ops->invalidate_range)
+			mn->ops->invalidate_range(mn, mm, start, end);
+	}
+	srcu_read_unlock(&srcu, id);
+}
+EXPORT_SYMBOL_GPL(__mmu_notifier_invalidate_range);
+
 static int do_mmu_notifier_register(struct mmu_notifier *mn,
 				    struct mm_struct *mm,
 				    int take_mmap_sem)
-- 
cgit v1.2.3


From fd3cbdc0d1b5254a2e8793df58c409b469899a3f Mon Sep 17 00:00:00 2001
From: Ingo Molnar <mingo@kernel.org>
Date: Sun, 10 Aug 2014 08:53:39 +0200
Subject: jump_label: Fix small typos in the documentation

Was reading through the documentation of this code and noticed
a few typos, missing commas, etc.

Cc: Jason Baron <jbaron@akamai.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Mel Gorman <mgorman@suse.de>
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/jump_label.h | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h
index 784304b222b3..98f923b6a0ea 100644
--- a/include/linux/jump_label.h
+++ b/include/linux/jump_label.h
@@ -8,28 +8,28 @@
  * Copyright (C) 2011-2012 Peter Zijlstra <pzijlstr@redhat.com>
  *
  * Jump labels provide an interface to generate dynamic branches using
- * self-modifying code. Assuming toolchain and architecture support the result
- * of a "if (static_key_false(&key))" statement is a unconditional branch (which
+ * self-modifying code. Assuming toolchain and architecture support, the result
+ * of a "if (static_key_false(&key))" statement is an unconditional branch (which
  * defaults to false - and the true block is placed out of line).
  *
  * However at runtime we can change the branch target using
  * static_key_slow_{inc,dec}(). These function as a 'reference' count on the key
- * object and for as long as there are references all branches referring to
+ * object, and for as long as there are references all branches referring to
  * that particular key will point to the (out of line) true block.
  *
- * Since this relies on modifying code the static_key_slow_{inc,dec}() functions
+ * Since this relies on modifying code, the static_key_slow_{inc,dec}() functions
  * must be considered absolute slow paths (machine wide synchronization etc.).
- * OTOH, since the affected branches are unconditional their runtime overhead
+ * OTOH, since the affected branches are unconditional, their runtime overhead
  * will be absolutely minimal, esp. in the default (off) case where the total
  * effect is a single NOP of appropriate size. The on case will patch in a jump
  * to the out-of-line block.
  *
- * When the control is directly exposed to userspace it is prudent to delay the
+ * When the control is directly exposed to userspace, it is prudent to delay the
  * decrement to avoid high frequency code modifications which can (and do)
  * cause significant performance degradation. Struct static_key_deferred and
  * static_key_slow_dec_deferred() provide for this.
  *
- * Lacking toolchain and or architecture support, it falls back to a simple
+ * Lacking toolchain and or architecture support, jump labels fall back to a simple
  * conditional branch.
  *
  * struct static_key my_key = STATIC_KEY_INIT_TRUE;
@@ -43,8 +43,7 @@
  *
  * Not initializing the key (static data is initialized to 0s anyway) is the
  * same as using STATIC_KEY_INIT_FALSE.
- *
-*/
+ */
 
 #include <linux/types.h>
 #include <linux/compiler.h>
-- 
cgit v1.2.3


From fadfe7be6e50de7f03913833b33c56cd8fb66bac Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Fri, 1 Aug 2014 14:33:02 +0200
Subject: perf: Add queued work to remove orphaned child events

In cases when the  owner task exits before the workload and the
workload made some forks, all the events stay in until the last
workload process exits. Thats' because each child event holds
parent reference.

We want to release all children events once the parent is gone,
because at that time there's no process to read them anyway, so
they're just eating resources.

This removal  races with process exit, which removes all events
and fork, which clone events.  To be clear of those two, adding
work queue to remove orphaned child for context in case such
event is detected.

Using delayed work queue (with delay == 1), because we queue this
work under perf scheduler callbacks. Normal work queue tries to wake
up the queue process, which deadlocks on rq->lock in this place.

Also preventing clones from abandoned parent event.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1406896382-18404-4-git-send-email-jolsa@kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h |  4 +++
 kernel/events/core.c       | 87 +++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 90 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 707617a8c0f6..ef5b62bdb103 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -52,6 +52,7 @@ struct perf_guest_info_callbacks {
 #include <linux/atomic.h>
 #include <linux/sysfs.h>
 #include <linux/perf_regs.h>
+#include <linux/workqueue.h>
 #include <asm/local.h>
 
 struct perf_callchain_entry {
@@ -507,6 +508,9 @@ struct perf_event_context {
 	int				nr_cgroups;	 /* cgroup evts */
 	int				nr_branch_stack; /* branch_stack evt */
 	struct rcu_head			rcu_head;
+
+	struct delayed_work		orphans_remove;
+	bool				orphans_remove_sched;
 };
 
 /*
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bbb3ca22f07c..a25460559b4f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -46,6 +46,8 @@
 
 #include <asm/irq_regs.h>
 
+static struct workqueue_struct *perf_wq;
+
 struct remote_function_call {
 	struct task_struct	*p;
 	int			(*func)(void *info);
@@ -1381,6 +1383,45 @@ out:
 		perf_event__header_size(tmp);
 }
 
+/*
+ * User event without the task.
+ */
+static bool is_orphaned_event(struct perf_event *event)
+{
+	return event && !is_kernel_event(event) && !event->owner;
+}
+
+/*
+ * Event has a parent but parent's task finished and it's
+ * alive only because of children holding refference.
+ */
+static bool is_orphaned_child(struct perf_event *event)
+{
+	return is_orphaned_event(event->parent);
+}
+
+static void orphans_remove_work(struct work_struct *work);
+
+static void schedule_orphans_remove(struct perf_event_context *ctx)
+{
+	if (!ctx->task || ctx->orphans_remove_sched || !perf_wq)
+		return;
+
+	if (queue_delayed_work(perf_wq, &ctx->orphans_remove, 1)) {
+		get_ctx(ctx);
+		ctx->orphans_remove_sched = true;
+	}
+}
+
+static int __init perf_workqueue_init(void)
+{
+	perf_wq = create_singlethread_workqueue("perf");
+	WARN(!perf_wq, "failed to create perf workqueue\n");
+	return perf_wq ? 0 : -1;
+}
+
+core_initcall(perf_workqueue_init);
+
 static inline int
 event_filter_match(struct perf_event *event)
 {
@@ -1430,6 +1471,9 @@ event_sched_out(struct perf_event *event,
 	if (event->attr.exclusive || !cpuctx->active_oncpu)
 		cpuctx->exclusive = 0;
 
+	if (is_orphaned_child(event))
+		schedule_orphans_remove(ctx);
+
 	perf_pmu_enable(event->pmu);
 }
 
@@ -1732,6 +1776,9 @@ event_sched_in(struct perf_event *event,
 	if (event->attr.exclusive)
 		cpuctx->exclusive = 1;
 
+	if (is_orphaned_child(event))
+		schedule_orphans_remove(ctx);
+
 out:
 	perf_pmu_enable(event->pmu);
 
@@ -3074,6 +3121,7 @@ static void __perf_event_init_context(struct perf_event_context *ctx)
 	INIT_LIST_HEAD(&ctx->flexible_groups);
 	INIT_LIST_HEAD(&ctx->event_list);
 	atomic_set(&ctx->refcount, 1);
+	INIT_DELAYED_WORK(&ctx->orphans_remove, orphans_remove_work);
 }
 
 static struct perf_event_context *
@@ -3405,6 +3453,42 @@ static int perf_release(struct inode *inode, struct file *file)
 	return 0;
 }
 
+/*
+ * Remove all orphanes events from the context.
+ */
+static void orphans_remove_work(struct work_struct *work)
+{
+	struct perf_event_context *ctx;
+	struct perf_event *event, *tmp;
+
+	ctx = container_of(work, struct perf_event_context,
+			   orphans_remove.work);
+
+	mutex_lock(&ctx->mutex);
+	list_for_each_entry_safe(event, tmp, &ctx->event_list, event_entry) {
+		struct perf_event *parent_event = event->parent;
+
+		if (!is_orphaned_child(event))
+			continue;
+
+		perf_remove_from_context(event, true);
+
+		mutex_lock(&parent_event->child_mutex);
+		list_del_init(&event->child_list);
+		mutex_unlock(&parent_event->child_mutex);
+
+		free_event(event);
+		put_event(parent_event);
+	}
+
+	raw_spin_lock_irq(&ctx->lock);
+	ctx->orphans_remove_sched = false;
+	raw_spin_unlock_irq(&ctx->lock);
+	mutex_unlock(&ctx->mutex);
+
+	put_ctx(ctx);
+}
+
 u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running)
 {
 	struct perf_event *child;
@@ -7709,7 +7793,8 @@ inherit_event(struct perf_event *parent_event,
 	if (IS_ERR(child_event))
 		return child_event;
 
-	if (!atomic_long_inc_not_zero(&parent_event->refcount)) {
+	if (is_orphaned_event(parent_event) ||
+	    !atomic_long_inc_not_zero(&parent_event->refcount)) {
 		free_event(child_event);
 		return NULL;
 	}
-- 
cgit v1.2.3


From 770eee1fd38c70a009b321f5dbe64358f42511fd Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Mon, 11 Aug 2014 21:27:12 +0200
Subject: perf/x86: Fix data source encoding issues for load latency/precise
 store

This patch fixes issues introuduce by Andi's previous patch 'Revamp PEBS'
series.

This patch fixes the following:

 - precise_store_data_hsw() encode the mem op type whenever we can
 - precise_store_data_hsw set the default data source correctly

 - 0 is not a valid init value for data source. Define PERF_MEM_NA as the
   default value

This bug was actually introduced by

    commit 722e76e60f2775c21b087ff12c5e678cf0ebcaaf
    Author: Stephane Eranian <eranian@google.com>
    Date:   Thu May 15 17:56:44 2014 +0200

        fix Haswell precise store data source encoding

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1407785233-32193-4-git-send-email-eranian@google.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: ak@linux.intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_ds.c | 11 +++++++----
 include/linux/perf_event.h                |  9 ++++++++-
 2 files changed, 15 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index a9b60f32064f..67919ce0f76a 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -113,9 +113,12 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
 	union perf_mem_data_src dse;
 	u64 cfg = event->hw.config & INTEL_ARCH_EVENT_MASK;
 
-	dse.val = 0;
-	dse.mem_op = PERF_MEM_OP_NA;
-	dse.mem_lvl = PERF_MEM_LVL_NA;
+	dse.val = PERF_MEM_NA;
+
+	if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW)
+		dse.mem_op = PERF_MEM_OP_STORE;
+	else if (event->hw.flags & PERF_X86_EVENT_PEBS_LD_HSW)
+		dse.mem_op = PERF_MEM_OP_LOAD;
 
 	/*
 	 * L1 info only valid for following events:
@@ -126,7 +129,7 @@ static u64 precise_store_data_hsw(struct perf_event *event, u64 status)
 	 * MEM_UOPS_RETIRED.ALL_STORES
 	 */
 	if (cfg != 0x12d0 && cfg != 0x22d0 && cfg != 0x42d0 && cfg != 0x82d0)
-		return dse.mem_lvl;
+		return dse.val;
 
 	if (status & 1)
 		dse.mem_lvl = PERF_MEM_LVL_L1 | PERF_MEM_LVL_HIT;
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index ef5b62bdb103..f0a1036b1911 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -608,6 +608,13 @@ struct perf_sample_data {
 	u64				txn;
 };
 
+/* default value for data source */
+#define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
+		    PERF_MEM_S(LVL, NA)   |\
+		    PERF_MEM_S(SNOOP, NA) |\
+		    PERF_MEM_S(LOCK, NA)  |\
+		    PERF_MEM_S(TLB, NA))
+
 static inline void perf_sample_data_init(struct perf_sample_data *data,
 					 u64 addr, u64 period)
 {
@@ -620,7 +627,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->regs_user.regs = NULL;
 	data->stack_user_size = 0;
 	data->weight = 0;
-	data->data_src.val = 0;
+	data->data_src.val = PERF_MEM_NA;
 	data->txn = 0;
 }
 
-- 
cgit v1.2.3


From 2e39465abc4b7856a0ea6fcf4f6b4668bb5db877 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Mon, 4 Aug 2014 12:07:15 +0200
Subject: locking: Remove deprecated smp_mb__() barriers

Its been a while and there are no in-tree users left, so remove the
deprecated barriers.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Chen, Gong <gong.chen@linux.intel.com>
Cc: Jacob Pan <jacob.jun.pan@linux.intel.com>
Cc: Joe Perches <joe@perches.com>
Cc: John Sullivan <jsrhbz@kanargh.force9.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/atomic.h | 36 ------------------------------------
 include/linux/bitops.h | 20 --------------------
 kernel/sched/core.c    | 16 ----------------
 3 files changed, 72 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/atomic.h b/include/linux/atomic.h
index fef3a809e7cf..5b08a8540ecf 100644
--- a/include/linux/atomic.h
+++ b/include/linux/atomic.h
@@ -3,42 +3,6 @@
 #define _LINUX_ATOMIC_H
 #include <asm/atomic.h>
 
-/*
- * Provide __deprecated wrappers for the new interface, avoid flag day changes.
- * We need the ugly external functions to break header recursion hell.
- */
-#ifndef smp_mb__before_atomic_inc
-static inline void __deprecated smp_mb__before_atomic_inc(void)
-{
-	extern void __smp_mb__before_atomic(void);
-	__smp_mb__before_atomic();
-}
-#endif
-
-#ifndef smp_mb__after_atomic_inc
-static inline void __deprecated smp_mb__after_atomic_inc(void)
-{
-	extern void __smp_mb__after_atomic(void);
-	__smp_mb__after_atomic();
-}
-#endif
-
-#ifndef smp_mb__before_atomic_dec
-static inline void __deprecated smp_mb__before_atomic_dec(void)
-{
-	extern void __smp_mb__before_atomic(void);
-	__smp_mb__before_atomic();
-}
-#endif
-
-#ifndef smp_mb__after_atomic_dec
-static inline void __deprecated smp_mb__after_atomic_dec(void)
-{
-	extern void __smp_mb__after_atomic(void);
-	__smp_mb__after_atomic();
-}
-#endif
-
 /**
  * atomic_add_unless - add unless the number is already a given value
  * @v: pointer of type atomic_t
diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index cbc5833fb221..be5fd38bd5a0 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -32,26 +32,6 @@ extern unsigned long __sw_hweight64(__u64 w);
  */
 #include <asm/bitops.h>
 
-/*
- * Provide __deprecated wrappers for the new interface, avoid flag day changes.
- * We need the ugly external functions to break header recursion hell.
- */
-#ifndef smp_mb__before_clear_bit
-static inline void __deprecated smp_mb__before_clear_bit(void)
-{
-	extern void __smp_mb__before_atomic(void);
-	__smp_mb__before_atomic();
-}
-#endif
-
-#ifndef smp_mb__after_clear_bit
-static inline void __deprecated smp_mb__after_clear_bit(void)
-{
-	extern void __smp_mb__after_atomic(void);
-	__smp_mb__after_atomic();
-}
-#endif
-
 #define for_each_set_bit(bit, addr, size) \
 	for ((bit) = find_first_bit((addr), (size));		\
 	     (bit) < (size);					\
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 1211575a2208..76c518c9b3a7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -90,22 +90,6 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/sched.h>
 
-#ifdef smp_mb__before_atomic
-void __smp_mb__before_atomic(void)
-{
-	smp_mb__before_atomic();
-}
-EXPORT_SYMBOL(__smp_mb__before_atomic);
-#endif
-
-#ifdef smp_mb__after_atomic
-void __smp_mb__after_atomic(void)
-{
-	smp_mb__after_atomic();
-}
-EXPORT_SYMBOL(__smp_mb__after_atomic);
-#endif
-
 void start_bandwidth_timer(struct hrtimer *period_timer, ktime_t period)
 {
 	unsigned long delta;
-- 
cgit v1.2.3


From 7608a43d8f2e02f8b532f8e11481d7ecf8b5d3f9 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Wed, 30 Jul 2014 13:41:54 -0700
Subject: locking/mutexes: Use MUTEX_SPIN_ON_OWNER when appropriate

4badad35 ("locking/mutex: Disable optimistic spinning on some
architectures") added a ARCH_SUPPORTS_ATOMIC_RMW flag to
disable the mutex optimistic feature on specific archs.

Because CONFIG_MUTEX_SPIN_ON_OWNER only depended on DEBUG and
SMP, it was ok to have the ->owner field conditional a bit
flexible. However by adding a new variable to the matter,
we can waste space with the unused field, ie: CONFIG_SMP &&
(!CONFIG_MUTEX_SPIN_ON_OWNER && !CONFIG_DEBUG_MUTEX).

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Acked-by: Jason Low <jason.low2@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: aswin@hp.com
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Link: http://lkml.kernel.org/r/1406752916-3341-5-git-send-email-davidlohr@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/mutex.h  | 2 +-
 kernel/locking/mutex.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index 8d5535c58cc2..e4c29418f407 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -52,7 +52,7 @@ struct mutex {
 	atomic_t		count;
 	spinlock_t		wait_lock;
 	struct list_head	wait_list;
-#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_SMP)
+#if defined(CONFIG_DEBUG_MUTEXES) || defined(CONFIG_MUTEX_SPIN_ON_OWNER)
 	struct task_struct	*owner;
 #endif
 #ifdef CONFIG_MUTEX_SPIN_ON_OWNER
diff --git a/kernel/locking/mutex.h b/kernel/locking/mutex.h
index 4115fbf83b12..5cda397607f2 100644
--- a/kernel/locking/mutex.h
+++ b/kernel/locking/mutex.h
@@ -16,7 +16,7 @@
 #define mutex_remove_waiter(lock, waiter, ti) \
 		__list_del((waiter)->list.prev, (waiter)->list.next)
 
-#ifdef CONFIG_SMP
+#ifdef CONFIG_MUTEX_SPIN_ON_OWNER
 static inline void mutex_set_owner(struct mutex *lock)
 {
 	lock->owner = current;
-- 
cgit v1.2.3


From 214e0aed639ef40987bf6159fad303171a6de31e Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <davidlohr@hp.com>
Date: Wed, 30 Jul 2014 13:41:55 -0700
Subject: locking/Documentation: Move locking related docs into
 Documentation/locking/

Specifically:
  Documentation/locking/lockdep-design.txt
  Documentation/locking/lockstat.txt
  Documentation/locking/mutex-design.txt
  Documentation/locking/rt-mutex-design.txt
  Documentation/locking/rt-mutex.txt
  Documentation/locking/spinlocks.txt
  Documentation/locking/ww-mutex-design.txt

Signed-off-by: Davidlohr Bueso <davidlohr@hp.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: jason.low2@hp.com
Cc: aswin@hp.com
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Chris Mason <clm@fb.com>
Cc: Dan Streetman <ddstreet@ieee.org>
Cc: David Airlie <airlied@linux.ie>
Cc: Davidlohr Bueso <davidlohr@hp.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Jason Low <jason.low2@hp.com>
Cc: Josef Bacik <jbacik@fusionio.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Lubomir Rintel <lkundrak@v3.sk>
Cc: Masanari Iida <standby24x7@gmail.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Tim Chen <tim.c.chen@linux.intel.com>
Cc: Vineet Gupta <vgupta@synopsys.com>
Cc: fengguang.wu@intel.com
Link: http://lkml.kernel.org/r/1406752916-3341-6-git-send-email-davidlohr@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 Documentation/00-INDEX                    |   2 +
 Documentation/DocBook/kernel-locking.tmpl |   2 +-
 Documentation/lockdep-design.txt          | 286 -----------
 Documentation/locking/lockdep-design.txt  | 286 +++++++++++
 Documentation/locking/lockstat.txt        | 178 +++++++
 Documentation/locking/mutex-design.txt    | 157 ++++++
 Documentation/locking/rt-mutex-design.txt | 781 ++++++++++++++++++++++++++++++
 Documentation/locking/rt-mutex.txt        |  79 +++
 Documentation/locking/spinlocks.txt       | 167 +++++++
 Documentation/locking/ww-mutex-design.txt | 344 +++++++++++++
 Documentation/lockstat.txt                | 178 -------
 Documentation/mutex-design.txt            | 157 ------
 Documentation/rt-mutex-design.txt         | 781 ------------------------------
 Documentation/rt-mutex.txt                |  79 ---
 Documentation/spinlocks.txt               | 167 -------
 Documentation/ww-mutex-design.txt         | 344 -------------
 MAINTAINERS                               |   4 +-
 drivers/gpu/drm/drm_modeset_lock.c        |   2 +-
 include/linux/lockdep.h                   |   2 +-
 include/linux/mutex.h                     |   2 +-
 include/linux/rwsem.h                     |   2 +-
 kernel/locking/mutex.c                    |   2 +-
 kernel/locking/rtmutex.c                  |   2 +-
 lib/Kconfig.debug                         |   4 +-
 24 files changed, 2005 insertions(+), 2003 deletions(-)
 delete mode 100644 Documentation/lockdep-design.txt
 create mode 100644 Documentation/locking/lockdep-design.txt
 create mode 100644 Documentation/locking/lockstat.txt
 create mode 100644 Documentation/locking/mutex-design.txt
 create mode 100644 Documentation/locking/rt-mutex-design.txt
 create mode 100644 Documentation/locking/rt-mutex.txt
 create mode 100644 Documentation/locking/spinlocks.txt
 create mode 100644 Documentation/locking/ww-mutex-design.txt
 delete mode 100644 Documentation/lockstat.txt
 delete mode 100644 Documentation/mutex-design.txt
 delete mode 100644 Documentation/rt-mutex-design.txt
 delete mode 100644 Documentation/rt-mutex.txt
 delete mode 100644 Documentation/spinlocks.txt
 delete mode 100644 Documentation/ww-mutex-design.txt

(limited to 'include/linux')

diff --git a/Documentation/00-INDEX b/Documentation/00-INDEX
index 27e67a98b7be..1750fcef1ab4 100644
--- a/Documentation/00-INDEX
+++ b/Documentation/00-INDEX
@@ -287,6 +287,8 @@ local_ops.txt
 	- semantics and behavior of local atomic operations.
 lockdep-design.txt
 	- documentation on the runtime locking correctness validator.
+locking/
+	- directory with info about kernel locking primitives
 lockstat.txt
 	- info on collecting statistics on locks (and contention).
 lockup-watchdogs.txt
diff --git a/Documentation/DocBook/kernel-locking.tmpl b/Documentation/DocBook/kernel-locking.tmpl
index e584ee12a1e7..7c9cc4846cb6 100644
--- a/Documentation/DocBook/kernel-locking.tmpl
+++ b/Documentation/DocBook/kernel-locking.tmpl
@@ -1972,7 +1972,7 @@ machines due to caching.
    <itemizedlist>
     <listitem>
      <para>
-       <filename>Documentation/spinlocks.txt</filename>: 
+       <filename>Documentation/locking/spinlocks.txt</filename>:
        Linus Torvalds' spinlocking tutorial in the kernel sources.
      </para>
     </listitem>
diff --git a/Documentation/lockdep-design.txt b/Documentation/lockdep-design.txt
deleted file mode 100644
index 5dbc99c04f6e..000000000000
--- a/Documentation/lockdep-design.txt
+++ /dev/null
@@ -1,286 +0,0 @@
-Runtime locking correctness validator
-=====================================
-
-started by Ingo Molnar <mingo@redhat.com>
-additions by Arjan van de Ven <arjan@linux.intel.com>
-
-Lock-class
-----------
-
-The basic object the validator operates upon is a 'class' of locks.
-
-A class of locks is a group of locks that are logically the same with
-respect to locking rules, even if the locks may have multiple (possibly
-tens of thousands of) instantiations. For example a lock in the inode
-struct is one class, while each inode has its own instantiation of that
-lock class.
-
-The validator tracks the 'state' of lock-classes, and it tracks
-dependencies between different lock-classes. The validator maintains a
-rolling proof that the state and the dependencies are correct.
-
-Unlike an lock instantiation, the lock-class itself never goes away: when
-a lock-class is used for the first time after bootup it gets registered,
-and all subsequent uses of that lock-class will be attached to this
-lock-class.
-
-State
------
-
-The validator tracks lock-class usage history into 4n + 1 separate state bits:
-
-- 'ever held in STATE context'
-- 'ever held as readlock in STATE context'
-- 'ever held with STATE enabled'
-- 'ever held as readlock with STATE enabled'
-
-Where STATE can be either one of (kernel/lockdep_states.h)
- - hardirq
- - softirq
- - reclaim_fs
-
-- 'ever used'                                       [ == !unused        ]
-
-When locking rules are violated, these state bits are presented in the
-locking error messages, inside curlies. A contrived example:
-
-   modprobe/2287 is trying to acquire lock:
-    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
-
-   but task is already holding lock:
-    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
-
-
-The bit position indicates STATE, STATE-read, for each of the states listed
-above, and the character displayed in each indicates:
-
-   '.'  acquired while irqs disabled and not in irq context
-   '-'  acquired in irq context
-   '+'  acquired with irqs enabled
-   '?'  acquired in irq context with irqs enabled.
-
-Unused mutexes cannot be part of the cause of an error.
-
-
-Single-lock state rules:
-------------------------
-
-A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
-following states are exclusive, and only one of them is allowed to be
-set for any lock-class:
-
- <hardirq-safe> and <hardirq-unsafe>
- <softirq-safe> and <softirq-unsafe>
-
-The validator detects and reports lock usage that violate these
-single-lock state rules.
-
-Multi-lock dependency rules:
-----------------------------
-
-The same lock-class must not be acquired twice, because this could lead
-to lock recursion deadlocks.
-
-Furthermore, two locks may not be taken in different order:
-
- <L1> -> <L2>
- <L2> -> <L1>
-
-because this could lead to lock inversion deadlocks. (The validator
-finds such dependencies in arbitrary complexity, i.e. there can be any
-other locking sequence between the acquire-lock operations, the
-validator will still track all dependencies between locks.)
-
-Furthermore, the following usage based lock dependencies are not allowed
-between any two lock-classes:
-
-   <hardirq-safe>   ->  <hardirq-unsafe>
-   <softirq-safe>   ->  <softirq-unsafe>
-
-The first rule comes from the fact the a hardirq-safe lock could be
-taken by a hardirq context, interrupting a hardirq-unsafe lock - and
-thus could result in a lock inversion deadlock. Likewise, a softirq-safe
-lock could be taken by an softirq context, interrupting a softirq-unsafe
-lock.
-
-The above rules are enforced for any locking sequence that occurs in the
-kernel: when acquiring a new lock, the validator checks whether there is
-any rule violation between the new lock and any of the held locks.
-
-When a lock-class changes its state, the following aspects of the above
-dependency rules are enforced:
-
-- if a new hardirq-safe lock is discovered, we check whether it
-  took any hardirq-unsafe lock in the past.
-
-- if a new softirq-safe lock is discovered, we check whether it took
-  any softirq-unsafe lock in the past.
-
-- if a new hardirq-unsafe lock is discovered, we check whether any
-  hardirq-safe lock took it in the past.
-
-- if a new softirq-unsafe lock is discovered, we check whether any
-  softirq-safe lock took it in the past.
-
-(Again, we do these checks too on the basis that an interrupt context
-could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
-could lead to a lock inversion deadlock - even if that lock scenario did
-not trigger in practice yet.)
-
-Exception: Nested data dependencies leading to nested locking
--------------------------------------------------------------
-
-There are a few cases where the Linux kernel acquires more than one
-instance of the same lock-class. Such cases typically happen when there
-is some sort of hierarchy within objects of the same type. In these
-cases there is an inherent "natural" ordering between the two objects
-(defined by the properties of the hierarchy), and the kernel grabs the
-locks in this fixed order on each of the objects.
-
-An example of such an object hierarchy that results in "nested locking"
-is that of a "whole disk" block-dev object and a "partition" block-dev
-object; the partition is "part of" the whole device and as long as one
-always takes the whole disk lock as a higher lock than the partition
-lock, the lock ordering is fully correct. The validator does not
-automatically detect this natural ordering, as the locking rule behind
-the ordering is not static.
-
-In order to teach the validator about this correct usage model, new
-versions of the various locking primitives were added that allow you to
-specify a "nesting level". An example call, for the block device mutex,
-looks like this:
-
-enum bdev_bd_mutex_lock_class
-{
-       BD_MUTEX_NORMAL,
-       BD_MUTEX_WHOLE,
-       BD_MUTEX_PARTITION
-};
-
- mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
-
-In this case the locking is done on a bdev object that is known to be a
-partition.
-
-The validator treats a lock that is taken in such a nested fashion as a
-separate (sub)class for the purposes of validation.
-
-Note: When changing code to use the _nested() primitives, be careful and
-check really thoroughly that the hierarchy is correctly mapped; otherwise
-you can get false positives or false negatives.
-
-Proof of 100% correctness:
---------------------------
-
-The validator achieves perfect, mathematical 'closure' (proof of locking
-correctness) in the sense that for every simple, standalone single-task
-locking sequence that occurred at least once during the lifetime of the
-kernel, the validator proves it with a 100% certainty that no
-combination and timing of these locking sequences can cause any class of
-lock related deadlock. [*]
-
-I.e. complex multi-CPU and multi-task locking scenarios do not have to
-occur in practice to prove a deadlock: only the simple 'component'
-locking chains have to occur at least once (anytime, in any
-task/context) for the validator to be able to prove correctness. (For
-example, complex deadlocks that would normally need more than 3 CPUs and
-a very unlikely constellation of tasks, irq-contexts and timings to
-occur, can be detected on a plain, lightly loaded single-CPU system as
-well!)
-
-This radically decreases the complexity of locking related QA of the
-kernel: what has to be done during QA is to trigger as many "simple"
-single-task locking dependencies in the kernel as possible, at least
-once, to prove locking correctness - instead of having to trigger every
-possible combination of locking interaction between CPUs, combined with
-every possible hardirq and softirq nesting scenario (which is impossible
-to do in practice).
-
-[*] assuming that the validator itself is 100% correct, and no other
-    part of the system corrupts the state of the validator in any way.
-    We also assume that all NMI/SMM paths [which could interrupt
-    even hardirq-disabled codepaths] are correct and do not interfere
-    with the validator. We also assume that the 64-bit 'chain hash'
-    value is unique for every lock-chain in the system. Also, lock
-    recursion must not be higher than 20.
-
-Performance:
-------------
-
-The above rules require _massive_ amounts of runtime checking. If we did
-that for every lock taken and for every irqs-enable event, it would
-render the system practically unusably slow. The complexity of checking
-is O(N^2), so even with just a few hundred lock-classes we'd have to do
-tens of thousands of checks for every event.
-
-This problem is solved by checking any given 'locking scenario' (unique
-sequence of locks taken after each other) only once. A simple stack of
-held locks is maintained, and a lightweight 64-bit hash value is
-calculated, which hash is unique for every lock chain. The hash value,
-when the chain is validated for the first time, is then put into a hash
-table, which hash-table can be checked in a lockfree manner. If the
-locking chain occurs again later on, the hash table tells us that we
-dont have to validate the chain again.
-
-Troubleshooting:
-----------------
-
-The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
-Exceeding this number will trigger the following lockdep warning:
-
-	(DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
-
-By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
-desktop systems have less than 1,000 lock classes, so this warning
-normally results from lock-class leakage or failure to properly
-initialize locks.  These two problems are illustrated below:
-
-1.	Repeated module loading and unloading while running the validator
-	will result in lock-class leakage.  The issue here is that each
-	load of the module will create a new set of lock classes for
-	that module's locks, but module unloading does not remove old
-	classes (see below discussion of reuse of lock classes for why).
-	Therefore, if that module is loaded and unloaded repeatedly,
-	the number of lock classes will eventually reach the maximum.
-
-2.	Using structures such as arrays that have large numbers of
-	locks that are not explicitly initialized.  For example,
-	a hash table with 8192 buckets where each bucket has its own
-	spinlock_t will consume 8192 lock classes -unless- each spinlock
-	is explicitly initialized at runtime, for example, using the
-	run-time spin_lock_init() as opposed to compile-time initializers
-	such as __SPIN_LOCK_UNLOCKED().  Failure to properly initialize
-	the per-bucket spinlocks would guarantee lock-class overflow.
-	In contrast, a loop that called spin_lock_init() on each lock
-	would place all 8192 locks into a single lock class.
-
-	The moral of this story is that you should always explicitly
-	initialize your locks.
-
-One might argue that the validator should be modified to allow
-lock classes to be reused.  However, if you are tempted to make this
-argument, first review the code and think through the changes that would
-be required, keeping in mind that the lock classes to be removed are
-likely to be linked into the lock-dependency graph.  This turns out to
-be harder to do than to say.
-
-Of course, if you do run out of lock classes, the next thing to do is
-to find the offending lock classes.  First, the following command gives
-you the number of lock classes currently in use along with the maximum:
-
-	grep "lock-classes" /proc/lockdep_stats
-
-This command produces the following output on a modest system:
-
-	 lock-classes:                          748 [max: 8191]
-
-If the number allocated (748 above) increases continually over time,
-then there is likely a leak.  The following command can be used to
-identify the leaking lock classes:
-
-	grep "BD" /proc/lockdep
-
-Run the command and save the output, then compare against the output from
-a later run of this command to identify the leakers.  This same output
-can also help you find situations where runtime lock initialization has
-been omitted.
diff --git a/Documentation/locking/lockdep-design.txt b/Documentation/locking/lockdep-design.txt
new file mode 100644
index 000000000000..5dbc99c04f6e
--- /dev/null
+++ b/Documentation/locking/lockdep-design.txt
@@ -0,0 +1,286 @@
+Runtime locking correctness validator
+=====================================
+
+started by Ingo Molnar <mingo@redhat.com>
+additions by Arjan van de Ven <arjan@linux.intel.com>
+
+Lock-class
+----------
+
+The basic object the validator operates upon is a 'class' of locks.
+
+A class of locks is a group of locks that are logically the same with
+respect to locking rules, even if the locks may have multiple (possibly
+tens of thousands of) instantiations. For example a lock in the inode
+struct is one class, while each inode has its own instantiation of that
+lock class.
+
+The validator tracks the 'state' of lock-classes, and it tracks
+dependencies between different lock-classes. The validator maintains a
+rolling proof that the state and the dependencies are correct.
+
+Unlike an lock instantiation, the lock-class itself never goes away: when
+a lock-class is used for the first time after bootup it gets registered,
+and all subsequent uses of that lock-class will be attached to this
+lock-class.
+
+State
+-----
+
+The validator tracks lock-class usage history into 4n + 1 separate state bits:
+
+- 'ever held in STATE context'
+- 'ever held as readlock in STATE context'
+- 'ever held with STATE enabled'
+- 'ever held as readlock with STATE enabled'
+
+Where STATE can be either one of (kernel/lockdep_states.h)
+ - hardirq
+ - softirq
+ - reclaim_fs
+
+- 'ever used'                                       [ == !unused        ]
+
+When locking rules are violated, these state bits are presented in the
+locking error messages, inside curlies. A contrived example:
+
+   modprobe/2287 is trying to acquire lock:
+    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+   but task is already holding lock:
+    (&sio_locks[i].lock){-.-...}, at: [<c02867fd>] mutex_lock+0x21/0x24
+
+
+The bit position indicates STATE, STATE-read, for each of the states listed
+above, and the character displayed in each indicates:
+
+   '.'  acquired while irqs disabled and not in irq context
+   '-'  acquired in irq context
+   '+'  acquired with irqs enabled
+   '?'  acquired in irq context with irqs enabled.
+
+Unused mutexes cannot be part of the cause of an error.
+
+
+Single-lock state rules:
+------------------------
+
+A softirq-unsafe lock-class is automatically hardirq-unsafe as well. The
+following states are exclusive, and only one of them is allowed to be
+set for any lock-class:
+
+ <hardirq-safe> and <hardirq-unsafe>
+ <softirq-safe> and <softirq-unsafe>
+
+The validator detects and reports lock usage that violate these
+single-lock state rules.
+
+Multi-lock dependency rules:
+----------------------------
+
+The same lock-class must not be acquired twice, because this could lead
+to lock recursion deadlocks.
+
+Furthermore, two locks may not be taken in different order:
+
+ <L1> -> <L2>
+ <L2> -> <L1>
+
+because this could lead to lock inversion deadlocks. (The validator
+finds such dependencies in arbitrary complexity, i.e. there can be any
+other locking sequence between the acquire-lock operations, the
+validator will still track all dependencies between locks.)
+
+Furthermore, the following usage based lock dependencies are not allowed
+between any two lock-classes:
+
+   <hardirq-safe>   ->  <hardirq-unsafe>
+   <softirq-safe>   ->  <softirq-unsafe>
+
+The first rule comes from the fact the a hardirq-safe lock could be
+taken by a hardirq context, interrupting a hardirq-unsafe lock - and
+thus could result in a lock inversion deadlock. Likewise, a softirq-safe
+lock could be taken by an softirq context, interrupting a softirq-unsafe
+lock.
+
+The above rules are enforced for any locking sequence that occurs in the
+kernel: when acquiring a new lock, the validator checks whether there is
+any rule violation between the new lock and any of the held locks.
+
+When a lock-class changes its state, the following aspects of the above
+dependency rules are enforced:
+
+- if a new hardirq-safe lock is discovered, we check whether it
+  took any hardirq-unsafe lock in the past.
+
+- if a new softirq-safe lock is discovered, we check whether it took
+  any softirq-unsafe lock in the past.
+
+- if a new hardirq-unsafe lock is discovered, we check whether any
+  hardirq-safe lock took it in the past.
+
+- if a new softirq-unsafe lock is discovered, we check whether any
+  softirq-safe lock took it in the past.
+
+(Again, we do these checks too on the basis that an interrupt context
+could interrupt _any_ of the irq-unsafe or hardirq-unsafe locks, which
+could lead to a lock inversion deadlock - even if that lock scenario did
+not trigger in practice yet.)
+
+Exception: Nested data dependencies leading to nested locking
+-------------------------------------------------------------
+
+There are a few cases where the Linux kernel acquires more than one
+instance of the same lock-class. Such cases typically happen when there
+is some sort of hierarchy within objects of the same type. In these
+cases there is an inherent "natural" ordering between the two objects
+(defined by the properties of the hierarchy), and the kernel grabs the
+locks in this fixed order on each of the objects.
+
+An example of such an object hierarchy that results in "nested locking"
+is that of a "whole disk" block-dev object and a "partition" block-dev
+object; the partition is "part of" the whole device and as long as one
+always takes the whole disk lock as a higher lock than the partition
+lock, the lock ordering is fully correct. The validator does not
+automatically detect this natural ordering, as the locking rule behind
+the ordering is not static.
+
+In order to teach the validator about this correct usage model, new
+versions of the various locking primitives were added that allow you to
+specify a "nesting level". An example call, for the block device mutex,
+looks like this:
+
+enum bdev_bd_mutex_lock_class
+{
+       BD_MUTEX_NORMAL,
+       BD_MUTEX_WHOLE,
+       BD_MUTEX_PARTITION
+};
+
+ mutex_lock_nested(&bdev->bd_contains->bd_mutex, BD_MUTEX_PARTITION);
+
+In this case the locking is done on a bdev object that is known to be a
+partition.
+
+The validator treats a lock that is taken in such a nested fashion as a
+separate (sub)class for the purposes of validation.
+
+Note: When changing code to use the _nested() primitives, be careful and
+check really thoroughly that the hierarchy is correctly mapped; otherwise
+you can get false positives or false negatives.
+
+Proof of 100% correctness:
+--------------------------
+
+The validator achieves perfect, mathematical 'closure' (proof of locking
+correctness) in the sense that for every simple, standalone single-task
+locking sequence that occurred at least once during the lifetime of the
+kernel, the validator proves it with a 100% certainty that no
+combination and timing of these locking sequences can cause any class of
+lock related deadlock. [*]
+
+I.e. complex multi-CPU and multi-task locking scenarios do not have to
+occur in practice to prove a deadlock: only the simple 'component'
+locking chains have to occur at least once (anytime, in any
+task/context) for the validator to be able to prove correctness. (For
+example, complex deadlocks that would normally need more than 3 CPUs and
+a very unlikely constellation of tasks, irq-contexts and timings to
+occur, can be detected on a plain, lightly loaded single-CPU system as
+well!)
+
+This radically decreases the complexity of locking related QA of the
+kernel: what has to be done during QA is to trigger as many "simple"
+single-task locking dependencies in the kernel as possible, at least
+once, to prove locking correctness - instead of having to trigger every
+possible combination of locking interaction between CPUs, combined with
+every possible hardirq and softirq nesting scenario (which is impossible
+to do in practice).
+
+[*] assuming that the validator itself is 100% correct, and no other
+    part of the system corrupts the state of the validator in any way.
+    We also assume that all NMI/SMM paths [which could interrupt
+    even hardirq-disabled codepaths] are correct and do not interfere
+    with the validator. We also assume that the 64-bit 'chain hash'
+    value is unique for every lock-chain in the system. Also, lock
+    recursion must not be higher than 20.
+
+Performance:
+------------
+
+The above rules require _massive_ amounts of runtime checking. If we did
+that for every lock taken and for every irqs-enable event, it would
+render the system practically unusably slow. The complexity of checking
+is O(N^2), so even with just a few hundred lock-classes we'd have to do
+tens of thousands of checks for every event.
+
+This problem is solved by checking any given 'locking scenario' (unique
+sequence of locks taken after each other) only once. A simple stack of
+held locks is maintained, and a lightweight 64-bit hash value is
+calculated, which hash is unique for every lock chain. The hash value,
+when the chain is validated for the first time, is then put into a hash
+table, which hash-table can be checked in a lockfree manner. If the
+locking chain occurs again later on, the hash table tells us that we
+dont have to validate the chain again.
+
+Troubleshooting:
+----------------
+
+The validator tracks a maximum of MAX_LOCKDEP_KEYS number of lock classes.
+Exceeding this number will trigger the following lockdep warning:
+
+	(DEBUG_LOCKS_WARN_ON(id >= MAX_LOCKDEP_KEYS))
+
+By default, MAX_LOCKDEP_KEYS is currently set to 8191, and typical
+desktop systems have less than 1,000 lock classes, so this warning
+normally results from lock-class leakage or failure to properly
+initialize locks.  These two problems are illustrated below:
+
+1.	Repeated module loading and unloading while running the validator
+	will result in lock-class leakage.  The issue here is that each
+	load of the module will create a new set of lock classes for
+	that module's locks, but module unloading does not remove old
+	classes (see below discussion of reuse of lock classes for why).
+	Therefore, if that module is loaded and unloaded repeatedly,
+	the number of lock classes will eventually reach the maximum.
+
+2.	Using structures such as arrays that have large numbers of
+	locks that are not explicitly initialized.  For example,
+	a hash table with 8192 buckets where each bucket has its own
+	spinlock_t will consume 8192 lock classes -unless- each spinlock
+	is explicitly initialized at runtime, for example, using the
+	run-time spin_lock_init() as opposed to compile-time initializers
+	such as __SPIN_LOCK_UNLOCKED().  Failure to properly initialize
+	the per-bucket spinlocks would guarantee lock-class overflow.
+	In contrast, a loop that called spin_lock_init() on each lock
+	would place all 8192 locks into a single lock class.
+
+	The moral of this story is that you should always explicitly
+	initialize your locks.
+
+One might argue that the validator should be modified to allow
+lock classes to be reused.  However, if you are tempted to make this
+argument, first review the code and think through the changes that would
+be required, keeping in mind that the lock classes to be removed are
+likely to be linked into the lock-dependency graph.  This turns out to
+be harder to do than to say.
+
+Of course, if you do run out of lock classes, the next thing to do is
+to find the offending lock classes.  First, the following command gives
+you the number of lock classes currently in use along with the maximum:
+
+	grep "lock-classes" /proc/lockdep_stats
+
+This command produces the following output on a modest system:
+
+	 lock-classes:                          748 [max: 8191]
+
+If the number allocated (748 above) increases continually over time,
+then there is likely a leak.  The following command can be used to
+identify the leaking lock classes:
+
+	grep "BD" /proc/lockdep
+
+Run the command and save the output, then compare against the output from
+a later run of this command to identify the leakers.  This same output
+can also help you find situations where runtime lock initialization has
+been omitted.
diff --git a/Documentation/locking/lockstat.txt b/Documentation/locking/lockstat.txt
new file mode 100644
index 000000000000..7428773a1e69
--- /dev/null
+++ b/Documentation/locking/lockstat.txt
@@ -0,0 +1,178 @@
+
+LOCK STATISTICS
+
+- WHAT
+
+As the name suggests, it provides statistics on locks.
+
+- WHY
+
+Because things like lock contention can severely impact performance.
+
+- HOW
+
+Lockdep already has hooks in the lock functions and maps lock instances to
+lock classes. We build on that (see Documentation/lokcing/lockdep-design.txt).
+The graph below shows the relation between the lock functions and the various
+hooks therein.
+
+        __acquire
+            |
+           lock _____
+            |        \
+            |    __contended
+            |         |
+            |       <wait>
+            | _______/
+            |/
+            |
+       __acquired
+            |
+            .
+          <hold>
+            .
+            |
+       __release
+            |
+         unlock
+
+lock, unlock	- the regular lock functions
+__*		- the hooks
+<> 		- states
+
+With these hooks we provide the following statistics:
+
+ con-bounces       - number of lock contention that involved x-cpu data
+ contentions       - number of lock acquisitions that had to wait
+ wait time min     - shortest (non-0) time we ever had to wait for a lock
+           max     - longest time we ever had to wait for a lock
+	   total   - total time we spend waiting on this lock
+	   avg     - average time spent waiting on this lock
+ acq-bounces       - number of lock acquisitions that involved x-cpu data
+ acquisitions      - number of times we took the lock
+ hold time min     - shortest (non-0) time we ever held the lock
+	   max     - longest time we ever held the lock
+	   total   - total time this lock was held
+	   avg     - average time this lock was held
+
+These numbers are gathered per lock class, per read/write state (when
+applicable).
+
+It also tracks 4 contention points per class. A contention point is a call site
+that had to wait on lock acquisition.
+
+ - CONFIGURATION
+
+Lock statistics are enabled via CONFIG_LOCK_STAT.
+
+ - USAGE
+
+Enable collection of statistics:
+
+# echo 1 >/proc/sys/kernel/lock_stat
+
+Disable collection of statistics:
+
+# echo 0 >/proc/sys/kernel/lock_stat
+
+Look at the current lock statistics:
+
+( line numbers not part of actual output, done for clarity in the explanation
+  below )
+
+# less /proc/lock_stat
+
+01 lock_stat version 0.4
+02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+03                              class name    con-bounces    contentions   waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg
+04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
+05
+06                         &mm->mmap_sem-W:            46             84           0.26         939.10       16371.53         194.90          47291        2922365           0.16     2220301.69 17464026916.32        5975.99
+07                         &mm->mmap_sem-R:            37            100           1.31      299502.61      325629.52        3256.30         212344       34316685           0.10        7744.91    95016910.20           2.77
+08                         ---------------
+09                           &mm->mmap_sem              1          [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
+19                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+11                           &mm->mmap_sem             34          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+12                           &mm->mmap_sem             17          [<ffffffff81127e71>] vm_munmap+0x41/0x80
+13                         ---------------
+14                           &mm->mmap_sem              1          [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
+15                           &mm->mmap_sem             60          [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
+16                           &mm->mmap_sem             41          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
+17                           &mm->mmap_sem             68          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
+18
+19.............................................................................................................................................................................................................................
+20
+21                         unix_table_lock:           110            112           0.21          49.24         163.91           1.46          21094          66312           0.12         624.42       31589.81           0.48
+22                         ---------------
+23                         unix_table_lock             45          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+24                         unix_table_lock             47          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+25                         unix_table_lock             15          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+26                         unix_table_lock              5          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+27                         ---------------
+28                         unix_table_lock             39          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
+29                         unix_table_lock             49          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
+30                         unix_table_lock             20          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
+31                         unix_table_lock              4          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
+
+
+This excerpt shows the first two lock class statistics. Line 01 shows the
+output version - each time the format changes this will be updated. Line 02-04
+show the header with column descriptions. Lines 05-18 and 20-31 show the actual
+statistics. These statistics come in two parts; the actual stats separated by a
+short separator (line 08, 13) from the contention points.
+
+The first lock (05-18) is a read/write lock, and shows two lines above the
+short separator. The contention points don't match the column descriptors,
+they have two: contentions and [<IP>] symbol. The second set of contention
+points are the points we're contending with.
+
+The integer part of the time values is in us.
+
+Dealing with nested locks, subclasses may appear:
+
+32...........................................................................................................................................................................................................................
+33
+34                               &rq->lock:       13128          13128           0.43         190.53      103881.26           7.91          97454        3453404           0.00         401.11    13224683.11           3.82
+35                               ---------
+36                               &rq->lock          645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+37                               &rq->lock          297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+38                               &rq->lock          360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
+39                               &rq->lock          428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
+40                               ---------
+41                               &rq->lock           77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
+42                               &rq->lock          174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+43                               &rq->lock         4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+44                               &rq->lock          893          [<ffffffff81340524>] schedule+0x157/0x7b8
+45
+46...........................................................................................................................................................................................................................
+47
+48                             &rq->lock/1:        1526          11488           0.33         388.73      136294.31          11.86          21461          38404           0.00          37.93      109388.53           2.84
+49                             -----------
+50                             &rq->lock/1        11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+51                             -----------
+52                             &rq->lock/1         5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
+53                             &rq->lock/1         1224          [<ffffffff81340524>] schedule+0x157/0x7b8
+54                             &rq->lock/1         4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
+55                             &rq->lock/1          181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
+
+Line 48 shows statistics for the second subclass (/1) of &rq->lock class
+(subclass starts from 0), since in this case, as line 50 suggests,
+double_rq_lock actually acquires a nested lock of two spinlocks.
+
+View the top contending locks:
+
+# grep : /proc/lock_stat | head
+			clockevents_lock:       2926159        2947636           0.15       46882.81  1784540466.34         605.41        3381345        3879161           0.00        2260.97    53178395.68          13.71
+		     tick_broadcast_lock:        346460         346717           0.18        2257.43    39364622.71         113.54        3642919        4242696           0.00        2263.79    49173646.60          11.59
+		  &mapping->i_mmap_mutex:        203896         203899           3.36      645530.05 31767507988.39      155800.21        3361776        8893984           0.17        2254.15    14110121.02           1.59
+			       &rq->lock:        135014         136909           0.18         606.09      842160.68           6.15        1540728       10436146           0.00         728.72    17606683.41           1.69
+	       &(&zone->lru_lock)->rlock:         93000          94934           0.16          59.18      188253.78           1.98        1199912        3809894           0.15         391.40     3559518.81           0.93
+			 tasklist_lock-W:         40667          41130           0.23        1189.42      428980.51          10.43         270278         510106           0.16         653.51     3939674.91           7.72
+			 tasklist_lock-R:         21298          21305           0.20        1310.05      215511.12          10.12         186204         241258           0.14        1162.33     1179779.23           4.89
+			      rcu_node_1:         47656          49022           0.16         635.41      193616.41           3.95         844888        1865423           0.00         764.26     1656226.96           0.89
+       &(&dentry->d_lockref.lock)->rlock:         39791          40179           0.15        1302.08       88851.96           2.21        2790851       12527025           0.10        1910.75     3379714.27           0.27
+			      rcu_node_0:         29203          30064           0.16         786.55     1555573.00          51.74          88963         244254           0.00         398.87      428872.51           1.76
+
+Clear the statistics:
+
+# echo 0 > /proc/lock_stat
diff --git a/Documentation/locking/mutex-design.txt b/Documentation/locking/mutex-design.txt
new file mode 100644
index 000000000000..ee231ed09ec6
--- /dev/null
+++ b/Documentation/locking/mutex-design.txt
@@ -0,0 +1,157 @@
+Generic Mutex Subsystem
+
+started by Ingo Molnar <mingo@redhat.com>
+updated by Davidlohr Bueso <davidlohr@hp.com>
+
+What are mutexes?
+-----------------
+
+In the Linux kernel, mutexes refer to a particular locking primitive
+that enforces serialization on shared memory systems, and not only to
+the generic term referring to 'mutual exclusion' found in academia
+or similar theoretical text books. Mutexes are sleeping locks which
+behave similarly to binary semaphores, and were introduced in 2006[1]
+as an alternative to these. This new data structure provided a number
+of advantages, including simpler interfaces, and at that time smaller
+code (see Disadvantages).
+
+[1] http://lwn.net/Articles/164802/
+
+Implementation
+--------------
+
+Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
+and implemented in kernel/locking/mutex.c. These locks use a three
+state atomic counter (->count) to represent the different possible
+transitions that can occur during the lifetime of a lock:
+
+	  1: unlocked
+	  0: locked, no waiters
+   negative: locked, with potential waiters
+
+In its most basic form it also includes a wait-queue and a spinlock
+that serializes access to it. CONFIG_SMP systems can also include
+a pointer to the lock task owner (->owner) as well as a spinner MCS
+lock (->osq), both described below in (ii).
+
+When acquiring a mutex, there are three possible paths that can be
+taken, depending on the state of the lock:
+
+(i) fastpath: tries to atomically acquire the lock by decrementing the
+    counter. If it was already taken by another task it goes to the next
+    possible path. This logic is architecture specific. On x86-64, the
+    locking fastpath is 2 instructions:
+
+    0000000000000e10 <mutex_lock>:
+    e21:   f0 ff 0b                lock decl (%rbx)
+    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
+
+   the unlocking fastpath is equally tight:
+
+    0000000000000bc0 <mutex_unlock>:
+    bc8:   f0 ff 07                lock incl (%rdi)
+    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
+
+
+(ii) midpath: aka optimistic spinning, tries to spin for acquisition
+     while the lock owner is running and there are no other tasks ready
+     to run that have higher priority (need_resched). The rationale is
+     that if the lock owner is running, it is likely to release the lock
+     soon. The mutex spinners are queued up using MCS lock so that only
+     one spinner can compete for the mutex.
+
+     The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
+     with the desirable properties of being fair and with each cpu trying
+     to acquire the lock spinning on a local variable. It avoids expensive
+     cacheline bouncing that common test-and-set spinlock implementations
+     incur. An MCS-like lock is specially tailored for optimistic spinning
+     for sleeping lock implementation. An important feature of the customized
+     MCS lock is that it has the extra property that spinners are able to exit
+     the MCS spinlock queue when they need to reschedule. This further helps
+     avoid situations where MCS spinners that need to reschedule would continue
+     waiting to spin on mutex owner, only to go directly to slowpath upon
+     obtaining the MCS lock.
+
+
+(iii) slowpath: last resort, if the lock is still unable to be acquired,
+      the task is added to the wait-queue and sleeps until woken up by the
+      unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
+
+While formally kernel mutexes are sleepable locks, it is path (ii) that
+makes them more practically a hybrid type. By simply not interrupting a
+task and busy-waiting for a few cycles instead of immediately sleeping,
+the performance of this lock has been seen to significantly improve a
+number of workloads. Note that this technique is also used for rw-semaphores.
+
+Semantics
+---------
+
+The mutex subsystem checks and enforces the following rules:
+
+    - Only one task can hold the mutex at a time.
+    - Only the owner can unlock the mutex.
+    - Multiple unlocks are not permitted.
+    - Recursive locking/unlocking is not permitted.
+    - A mutex must only be initialized via the API (see below).
+    - A task may not exit with a mutex held.
+    - Memory areas where held locks reside must not be freed.
+    - Held mutexes must not be reinitialized.
+    - Mutexes may not be used in hardware or software interrupt
+      contexts such as tasklets and timers.
+
+These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
+In addition, the mutex debugging code also implements a number of other
+features that make lock debugging easier and faster:
+
+    - Uses symbolic names of mutexes, whenever they are printed
+      in debug output.
+    - Point-of-acquire tracking, symbolic lookup of function names,
+      list of all locks held in the system, printout of them.
+    - Owner tracking.
+    - Detects self-recursing locks and prints out all relevant info.
+    - Detects multi-task circular deadlocks and prints out all affected
+      locks and tasks (and only those tasks).
+
+
+Interfaces
+----------
+Statically define the mutex:
+   DEFINE_MUTEX(name);
+
+Dynamically initialize the mutex:
+   mutex_init(mutex);
+
+Acquire the mutex, uninterruptible:
+   void mutex_lock(struct mutex *lock);
+   void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
+   int  mutex_trylock(struct mutex *lock);
+
+Acquire the mutex, interruptible:
+   int mutex_lock_interruptible_nested(struct mutex *lock,
+				       unsigned int subclass);
+   int mutex_lock_interruptible(struct mutex *lock);
+
+Acquire the mutex, interruptible, if dec to 0:
+   int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
+
+Unlock the mutex:
+   void mutex_unlock(struct mutex *lock);
+
+Test if the mutex is taken:
+   int mutex_is_locked(struct mutex *lock);
+
+Disadvantages
+-------------
+
+Unlike its original design and purpose, 'struct mutex' is larger than
+most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
+as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the
+'struct rw_semaphore' variant. Larger structure sizes mean more CPU
+cache and memory footprint.
+
+When to use mutexes
+-------------------
+
+Unless the strict semantics of mutexes are unsuitable and/or the critical
+region prevents the lock from being shared, always prefer them to any other
+locking primitive.
diff --git a/Documentation/locking/rt-mutex-design.txt b/Documentation/locking/rt-mutex-design.txt
new file mode 100644
index 000000000000..8666070d3189
--- /dev/null
+++ b/Documentation/locking/rt-mutex-design.txt
@@ -0,0 +1,781 @@
+#
+# Copyright (c) 2006 Steven Rostedt
+# Licensed under the GNU Free Documentation License, Version 1.2
+#
+
+RT-mutex implementation design
+------------------------------
+
+This document tries to describe the design of the rtmutex.c implementation.
+It doesn't describe the reasons why rtmutex.c exists. For that please see
+Documentation/rt-mutex.txt.  Although this document does explain problems
+that happen without this code, but that is in the concept to understand
+what the code actually is doing.
+
+The goal of this document is to help others understand the priority
+inheritance (PI) algorithm that is used, as well as reasons for the
+decisions that were made to implement PI in the manner that was done.
+
+
+Unbounded Priority Inversion
+----------------------------
+
+Priority inversion is when a lower priority process executes while a higher
+priority process wants to run.  This happens for several reasons, and
+most of the time it can't be helped.  Anytime a high priority process wants
+to use a resource that a lower priority process has (a mutex for example),
+the high priority process must wait until the lower priority process is done
+with the resource.  This is a priority inversion.  What we want to prevent
+is something called unbounded priority inversion.  That is when the high
+priority process is prevented from running by a lower priority process for
+an undetermined amount of time.
+
+The classic example of unbounded priority inversion is where you have three
+processes, let's call them processes A, B, and C, where A is the highest
+priority process, C is the lowest, and B is in between. A tries to grab a lock
+that C owns and must wait and lets C run to release the lock. But in the
+meantime, B executes, and since B is of a higher priority than C, it preempts C,
+but by doing so, it is in fact preempting A which is a higher priority process.
+Now there's no way of knowing how long A will be sleeping waiting for C
+to release the lock, because for all we know, B is a CPU hog and will
+never give C a chance to release the lock.  This is called unbounded priority
+inversion.
+
+Here's a little ASCII art to show the problem.
+
+   grab lock L1 (owned by C)
+     |
+A ---+
+        C preempted by B
+          |
+C    +----+
+
+B         +-------->
+                B now keeps A from running.
+
+
+Priority Inheritance (PI)
+-------------------------
+
+There are several ways to solve this issue, but other ways are out of scope
+for this document.  Here we only discuss PI.
+
+PI is where a process inherits the priority of another process if the other
+process blocks on a lock owned by the current process.  To make this easier
+to understand, let's use the previous example, with processes A, B, and C again.
+
+This time, when A blocks on the lock owned by C, C would inherit the priority
+of A.  So now if B becomes runnable, it would not preempt C, since C now has
+the high priority of A.  As soon as C releases the lock, it loses its
+inherited priority, and A then can continue with the resource that C had.
+
+Terminology
+-----------
+
+Here I explain some terminology that is used in this document to help describe
+the design that is used to implement PI.
+
+PI chain - The PI chain is an ordered series of locks and processes that cause
+           processes to inherit priorities from a previous process that is
+           blocked on one of its locks.  This is described in more detail
+           later in this document.
+
+mutex    - In this document, to differentiate from locks that implement
+           PI and spin locks that are used in the PI code, from now on
+           the PI locks will be called a mutex.
+
+lock     - In this document from now on, I will use the term lock when
+           referring to spin locks that are used to protect parts of the PI
+           algorithm.  These locks disable preemption for UP (when
+           CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
+           entering critical sections simultaneously.
+
+spin lock - Same as lock above.
+
+waiter   - A waiter is a struct that is stored on the stack of a blocked
+           process.  Since the scope of the waiter is within the code for
+           a process being blocked on the mutex, it is fine to allocate
+           the waiter on the process's stack (local variable).  This
+           structure holds a pointer to the task, as well as the mutex that
+           the task is blocked on.  It also has the plist node structures to
+           place the task in the waiter_list of a mutex as well as the
+           pi_list of a mutex owner task (described below).
+
+           waiter is sometimes used in reference to the task that is waiting
+           on a mutex. This is the same as waiter->task.
+
+waiters  - A list of processes that are blocked on a mutex.
+
+top waiter - The highest priority process waiting on a specific mutex.
+
+top pi waiter - The highest priority process waiting on one of the mutexes
+                that a specific process owns.
+
+Note:  task and process are used interchangeably in this document, mostly to
+       differentiate between two processes that are being described together.
+
+
+PI chain
+--------
+
+The PI chain is a list of processes and mutexes that may cause priority
+inheritance to take place.  Multiple chains may converge, but a chain
+would never diverge, since a process can't be blocked on more than one
+mutex at a time.
+
+Example:
+
+   Process:  A, B, C, D, E
+   Mutexes:  L1, L2, L3, L4
+
+   A owns: L1
+           B blocked on L1
+           B owns L2
+                  C blocked on L2
+                  C owns L3
+                         D blocked on L3
+                         D owns L4
+                                E blocked on L4
+
+The chain would be:
+
+   E->L4->D->L3->C->L2->B->L1->A
+
+To show where two chains merge, we could add another process F and
+another mutex L5 where B owns L5 and F is blocked on mutex L5.
+
+The chain for F would be:
+
+   F->L5->B->L1->A
+
+Since a process may own more than one mutex, but never be blocked on more than
+one, the chains merge.
+
+Here we show both chains:
+
+   E->L4->D->L3->C->L2-+
+                       |
+                       +->B->L1->A
+                       |
+                 F->L5-+
+
+For PI to work, the processes at the right end of these chains (or we may
+also call it the Top of the chain) must be equal to or higher in priority
+than the processes to the left or below in the chain.
+
+Also since a mutex may have more than one process blocked on it, we can
+have multiple chains merge at mutexes.  If we add another process G that is
+blocked on mutex L2:
+
+  G->L2->B->L1->A
+
+And once again, to show how this can grow I will show the merging chains
+again.
+
+   E->L4->D->L3->C-+
+                   +->L2-+
+                   |     |
+                 G-+     +->B->L1->A
+                         |
+                   F->L5-+
+
+
+Plist
+-----
+
+Before I go further and talk about how the PI chain is stored through lists
+on both mutexes and processes, I'll explain the plist.  This is similar to
+the struct list_head functionality that is already in the kernel.
+The implementation of plist is out of scope for this document, but it is
+very important to understand what it does.
+
+There are a few differences between plist and list, the most important one
+being that plist is a priority sorted linked list.  This means that the
+priorities of the plist are sorted, such that it takes O(1) to retrieve the
+highest priority item in the list.  Obviously this is useful to store processes
+based on their priorities.
+
+Another difference, which is important for implementation, is that, unlike
+list, the head of the list is a different element than the nodes of a list.
+So the head of the list is declared as struct plist_head and nodes that will
+be added to the list are declared as struct plist_node.
+
+
+Mutex Waiter List
+-----------------
+
+Every mutex keeps track of all the waiters that are blocked on itself. The mutex
+has a plist to store these waiters by priority.  This list is protected by
+a spin lock that is located in the struct of the mutex. This lock is called
+wait_lock.  Since the modification of the waiter list is never done in
+interrupt context, the wait_lock can be taken without disabling interrupts.
+
+
+Task PI List
+------------
+
+To keep track of the PI chains, each process has its own PI list.  This is
+a list of all top waiters of the mutexes that are owned by the process.
+Note that this list only holds the top waiters and not all waiters that are
+blocked on mutexes owned by the process.
+
+The top of the task's PI list is always the highest priority task that
+is waiting on a mutex that is owned by the task.  So if the task has
+inherited a priority, it will always be the priority of the task that is
+at the top of this list.
+
+This list is stored in the task structure of a process as a plist called
+pi_list.  This list is protected by a spin lock also in the task structure,
+called pi_lock.  This lock may also be taken in interrupt context, so when
+locking the pi_lock, interrupts must be disabled.
+
+
+Depth of the PI Chain
+---------------------
+
+The maximum depth of the PI chain is not dynamic, and could actually be
+defined.  But is very complex to figure it out, since it depends on all
+the nesting of mutexes.  Let's look at the example where we have 3 mutexes,
+L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
+The following shows a locking order of L1->L2->L3, but may not actually
+be directly nested that way.
+
+void func1(void)
+{
+	mutex_lock(L1);
+
+	/* do anything */
+
+	mutex_unlock(L1);
+}
+
+void func2(void)
+{
+	mutex_lock(L1);
+	mutex_lock(L2);
+
+	/* do something */
+
+	mutex_unlock(L2);
+	mutex_unlock(L1);
+}
+
+void func3(void)
+{
+	mutex_lock(L2);
+	mutex_lock(L3);
+
+	/* do something else */
+
+	mutex_unlock(L3);
+	mutex_unlock(L2);
+}
+
+void func4(void)
+{
+	mutex_lock(L3);
+
+	/* do something again */
+
+	mutex_unlock(L3);
+}
+
+Now we add 4 processes that run each of these functions separately.
+Processes A, B, C, and D which run functions func1, func2, func3 and func4
+respectively, and such that D runs first and A last.  With D being preempted
+in func4 in the "do something again" area, we have a locking that follows:
+
+D owns L3
+       C blocked on L3
+       C owns L2
+              B blocked on L2
+              B owns L1
+                     A blocked on L1
+
+And thus we have the chain A->L1->B->L2->C->L3->D.
+
+This gives us a PI depth of 4 (four processes), but looking at any of the
+functions individually, it seems as though they only have at most a locking
+depth of two.  So, although the locking depth is defined at compile time,
+it still is very difficult to find the possibilities of that depth.
+
+Now since mutexes can be defined by user-land applications, we don't want a DOS
+type of application that nests large amounts of mutexes to create a large
+PI chain, and have the code holding spin locks while looking at a large
+amount of data.  So to prevent this, the implementation not only implements
+a maximum lock depth, but also only holds at most two different locks at a
+time, as it walks the PI chain.  More about this below.
+
+
+Mutex owner and flags
+---------------------
+
+The mutex structure contains a pointer to the owner of the mutex.  If the
+mutex is not owned, this owner is set to NULL.  Since all architectures
+have the task structure on at least a four byte alignment (and if this is
+not true, the rtmutex.c code will be broken!), this allows for the two
+least significant bits to be used as flags.  This part is also described
+in Documentation/rt-mutex.txt, but will also be briefly described here.
+
+Bit 0 is used as the "Pending Owner" flag.  This is described later.
+Bit 1 is used as the "Has Waiters" flags.  This is also described later
+  in more detail, but is set whenever there are waiters on a mutex.
+
+
+cmpxchg Tricks
+--------------
+
+Some architectures implement an atomic cmpxchg (Compare and Exchange).  This
+is used (when applicable) to keep the fast path of grabbing and releasing
+mutexes short.
+
+cmpxchg is basically the following function performed atomically:
+
+unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
+{
+	unsigned long T = *A;
+	if (*A == *B) {
+		*A = *C;
+	}
+	return T;
+}
+#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
+
+This is really nice to have, since it allows you to only update a variable
+if the variable is what you expect it to be.  You know if it succeeded if
+the return value (the old value of A) is equal to B.
+
+The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If
+the architecture does not support CMPXCHG, then this macro is simply set
+to fail every time.  But if CMPXCHG is supported, then this will
+help out extremely to keep the fast path short.
+
+The use of rt_mutex_cmpxchg with the flags in the owner field help optimize
+the system for architectures that support it.  This will also be explained
+later in this document.
+
+
+Priority adjustments
+--------------------
+
+The implementation of the PI code in rtmutex.c has several places that a
+process must adjust its priority.  With the help of the pi_list of a
+process this is rather easy to know what needs to be adjusted.
+
+The functions implementing the task adjustments are rt_mutex_adjust_prio,
+__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock
+to already be taken), rt_mutex_getprio, and rt_mutex_setprio.
+
+rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio.
+
+rt_mutex_getprio returns the priority that the task should have.  Either the
+task's own normal priority, or if a process of a higher priority is waiting on
+a mutex owned by the task, then that higher priority should be returned.
+Since the pi_list of a task holds an order by priority list of all the top
+waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs
+to compare the top pi waiter to its own normal priority, and return the higher
+priority back.
+
+(Note:  if looking at the code, you will notice that the lower number of
+        prio is returned.  This is because the prio field in the task structure
+        is an inverse order of the actual priority.  So a "prio" of 5 is
+        of higher priority than a "prio" of 10.)
+
+__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the
+result does not equal the task's current priority, then rt_mutex_setprio
+is called to adjust the priority of the task to the new priority.
+Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the
+actual change in priority.
+
+It is interesting to note that __rt_mutex_adjust_prio can either increase
+or decrease the priority of the task.  In the case that a higher priority
+process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio
+would increase/boost the task's priority.  But if a higher priority task
+were for some reason to leave the mutex (timeout or signal), this same function
+would decrease/unboost the priority of the task.  That is because the pi_list
+always contains the highest priority task that is waiting on a mutex owned
+by the task, so we only need to compare the priority of that top pi waiter
+to the normal priority of the given task.
+
+
+High level overview of the PI chain walk
+----------------------------------------
+
+The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain.
+
+The implementation has gone through several iterations, and has ended up
+with what we believe is the best.  It walks the PI chain by only grabbing
+at most two locks at a time, and is very efficient.
+
+The rt_mutex_adjust_prio_chain can be used either to boost or lower process
+priorities.
+
+rt_mutex_adjust_prio_chain is called with a task to be checked for PI
+(de)boosting (the owner of a mutex that a process is blocking on), a flag to
+check for deadlocking, the mutex that the task owns, and a pointer to a waiter
+that is the process's waiter struct that is blocked on the mutex (although this
+parameter may be NULL for deboosting).
+
+For this explanation, I will not mention deadlock detection. This explanation
+will try to stay at a high level.
+
+When this function is called, there are no locks held.  That also means
+that the state of the owner and lock can change when entered into this function.
+
+Before this function is called, the task has already had rt_mutex_adjust_prio
+performed on it.  This means that the task is set to the priority that it
+should be at, but the plist nodes of the task's waiter have not been updated
+with the new priorities, and that this task may not be in the proper locations
+in the pi_lists and wait_lists that the task is blocked on.  This function
+solves all that.
+
+A loop is entered, where task is the owner to be checked for PI changes that
+was passed by parameter (for the first iteration).  The pi_lock of this task is
+taken to prevent any more changes to the pi_list of the task.  This also
+prevents new tasks from completing the blocking on a mutex that is owned by this
+task.
+
+If the task is not blocked on a mutex then the loop is exited.  We are at
+the top of the PI chain.
+
+A check is now done to see if the original waiter (the process that is blocked
+on the current mutex) is the top pi waiter of the task.  That is, is this
+waiter on the top of the task's pi_list.  If it is not, it either means that
+there is another process higher in priority that is blocked on one of the
+mutexes that the task owns, or that the waiter has just woken up via a signal
+or timeout and has left the PI chain.  In either case, the loop is exited, since
+we don't need to do any more changes to the priority of the current task, or any
+task that owns a mutex that this current task is waiting on.  A priority chain
+walk is only needed when a new top pi waiter is made to a task.
+
+The next check sees if the task's waiter plist node has the priority equal to
+the priority the task is set at.  If they are equal, then we are done with
+the loop.  Remember that the function started with the priority of the
+task adjusted, but the plist nodes that hold the task in other processes
+pi_lists have not been adjusted.
+
+Next, we look at the mutex that the task is blocked on. The mutex's wait_lock
+is taken.  This is done by a spin_trylock, because the locking order of the
+pi_lock and wait_lock goes in the opposite direction. If we fail to grab the
+lock, the pi_lock is released, and we restart the loop.
+
+Now that we have both the pi_lock of the task as well as the wait_lock of
+the mutex the task is blocked on, we update the task's waiter's plist node
+that is located on the mutex's wait_list.
+
+Now we release the pi_lock of the task.
+
+Next the owner of the mutex has its pi_lock taken, so we can update the
+task's entry in the owner's pi_list.  If the task is the highest priority
+process on the mutex's wait_list, then we remove the previous top waiter
+from the owner's pi_list, and replace it with the task.
+
+Note: It is possible that the task was the current top waiter on the mutex,
+      in which case the task is not yet on the pi_list of the waiter.  This
+      is OK, since plist_del does nothing if the plist node is not on any
+      list.
+
+If the task was not the top waiter of the mutex, but it was before we
+did the priority updates, that means we are deboosting/lowering the
+task.  In this case, the task is removed from the pi_list of the owner,
+and the new top waiter is added.
+
+Lastly, we unlock both the pi_lock of the task, as well as the mutex's
+wait_lock, and continue the loop again.  On the next iteration of the
+loop, the previous owner of the mutex will be the task that will be
+processed.
+
+Note: One might think that the owner of this mutex might have changed
+      since we just grab the mutex's wait_lock. And one could be right.
+      The important thing to remember is that the owner could not have
+      become the task that is being processed in the PI chain, since
+      we have taken that task's pi_lock at the beginning of the loop.
+      So as long as there is an owner of this mutex that is not the same
+      process as the tasked being worked on, we are OK.
+
+      Looking closely at the code, one might be confused.  The check for the
+      end of the PI chain is when the task isn't blocked on anything or the
+      task's waiter structure "task" element is NULL.  This check is
+      protected only by the task's pi_lock.  But the code to unlock the mutex
+      sets the task's waiter structure "task" element to NULL with only
+      the protection of the mutex's wait_lock, which was not taken yet.
+      Isn't this a race condition if the task becomes the new owner?
+
+      The answer is No!  The trick is the spin_trylock of the mutex's
+      wait_lock.  If we fail that lock, we release the pi_lock of the
+      task and continue the loop, doing the end of PI chain check again.
+
+      In the code to release the lock, the wait_lock of the mutex is held
+      the entire time, and it is not let go when we grab the pi_lock of the
+      new owner of the mutex.  So if the switch of a new owner were to happen
+      after the check for end of the PI chain and the grabbing of the
+      wait_lock, the unlocking code would spin on the new owner's pi_lock
+      but never give up the wait_lock.  So the PI chain loop is guaranteed to
+      fail the spin_trylock on the wait_lock, release the pi_lock, and
+      try again.
+
+      If you don't quite understand the above, that's OK. You don't have to,
+      unless you really want to make a proof out of it ;)
+
+
+Pending Owners and Lock stealing
+--------------------------------
+
+One of the flags in the owner field of the mutex structure is "Pending Owner".
+What this means is that an owner was chosen by the process releasing the
+mutex, but that owner has yet to wake up and actually take the mutex.
+
+Why is this important?  Why can't we just give the mutex to another process
+and be done with it?
+
+The PI code is to help with real-time processes, and to let the highest
+priority process run as long as possible with little latencies and delays.
+If a high priority process owns a mutex that a lower priority process is
+blocked on, when the mutex is released it would be given to the lower priority
+process.  What if the higher priority process wants to take that mutex again.
+The high priority process would fail to take that mutex that it just gave up
+and it would need to boost the lower priority process to run with full
+latency of that critical section (since the low priority process just entered
+it).
+
+There's no reason a high priority process that gives up a mutex should be
+penalized if it tries to take that mutex again.  If the new owner of the
+mutex has not woken up yet, there's no reason that the higher priority process
+could not take that mutex away.
+
+To solve this, we introduced Pending Ownership and Lock Stealing.  When a
+new process is given a mutex that it was blocked on, it is only given
+pending ownership.  This means that it's the new owner, unless a higher
+priority process comes in and tries to grab that mutex.  If a higher priority
+process does come along and wants that mutex, we let the higher priority
+process "steal" the mutex from the pending owner (only if it is still pending)
+and continue with the mutex.
+
+
+Taking of a mutex (The walk through)
+------------------------------------
+
+OK, now let's take a look at the detailed walk through of what happens when
+taking a mutex.
+
+The first thing that is tried is the fast taking of the mutex.  This is
+done when we have CMPXCHG enabled (otherwise the fast taking automatically
+fails).  Only when the owner field of the mutex is NULL can the lock be
+taken with the CMPXCHG and nothing else needs to be done.
+
+If there is contention on the lock, whether it is owned or pending owner
+we go about the slow path (rt_mutex_slowlock).
+
+The slow path function is where the task's waiter structure is created on
+the stack.  This is because the waiter structure is only needed for the
+scope of this function.  The waiter structure holds the nodes to store
+the task on the wait_list of the mutex, and if need be, the pi_list of
+the owner.
+
+The wait_lock of the mutex is taken since the slow path of unlocking the
+mutex also takes this lock.
+
+We then call try_to_take_rt_mutex.  This is where the architecture that
+does not implement CMPXCHG would always grab the lock (if there's no
+contention).
+
+try_to_take_rt_mutex is used every time the task tries to grab a mutex in the
+slow path.  The first thing that is done here is an atomic setting of
+the "Has Waiters" flag of the mutex's owner field.  Yes, this could really
+be false, because if the mutex has no owner, there are no waiters and
+the current task also won't have any waiters.  But we don't have the lock
+yet, so we assume we are going to be a waiter.  The reason for this is to
+play nice for those architectures that do have CMPXCHG.  By setting this flag
+now, the owner of the mutex can't release the mutex without going into the
+slow unlock path, and it would then need to grab the wait_lock, which this
+code currently holds.  So setting the "Has Waiters" flag forces the owner
+to synchronize with this code.
+
+Now that we know that we can't have any races with the owner releasing the
+mutex, we check to see if we can take the ownership.  This is done if the
+mutex doesn't have a owner, or if we can steal the mutex from a pending
+owner.  Let's look at the situations we have here.
+
+  1) Has owner that is pending
+  ----------------------------
+
+  The mutex has a owner, but it hasn't woken up and the mutex flag
+  "Pending Owner" is set.  The first check is to see if the owner isn't the
+  current task.  This is because this function is also used for the pending
+  owner to grab the mutex.  When a pending owner wakes up, it checks to see
+  if it can take the mutex, and this is done if the owner is already set to
+  itself.  If so, we succeed and leave the function, clearing the "Pending
+  Owner" bit.
+
+  If the pending owner is not current, we check to see if the current priority is
+  higher than the pending owner.  If not, we fail the function and return.
+
+  There's also something special about a pending owner.  That is a pending owner
+  is never blocked on a mutex.  So there is no PI chain to worry about.  It also
+  means that if the mutex doesn't have any waiters, there's no accounting needed
+  to update the pending owner's pi_list, since we only worry about processes
+  blocked on the current mutex.
+
+  If there are waiters on this mutex, and we just stole the ownership, we need
+  to take the top waiter, remove it from the pi_list of the pending owner, and
+  add it to the current pi_list.  Note that at this moment, the pending owner
+  is no longer on the list of waiters.  This is fine, since the pending owner
+  would add itself back when it realizes that it had the ownership stolen
+  from itself.  When the pending owner tries to grab the mutex, it will fail
+  in try_to_take_rt_mutex if the owner field points to another process.
+
+  2) No owner
+  -----------
+
+  If there is no owner (or we successfully stole the lock), we set the owner
+  of the mutex to current, and set the flag of "Has Waiters" if the current
+  mutex actually has waiters, or we clear the flag if it doesn't.  See, it was
+  OK that we set that flag early, since now it is cleared.
+
+  3) Failed to grab ownership
+  ---------------------------
+
+  The most interesting case is when we fail to take ownership. This means that
+  there exists an owner, or there's a pending owner with equal or higher
+  priority than the current task.
+
+We'll continue on the failed case.
+
+If the mutex has a timeout, we set up a timer to go off to break us out
+of this mutex if we failed to get it after a specified amount of time.
+
+Now we enter a loop that will continue to try to take ownership of the mutex, or
+fail from a timeout or signal.
+
+Once again we try to take the mutex.  This will usually fail the first time
+in the loop, since it had just failed to get the mutex.  But the second time
+in the loop, this would likely succeed, since the task would likely be
+the pending owner.
+
+If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done
+here.
+
+The waiter structure has a "task" field that points to the task that is blocked
+on the mutex.  This field can be NULL the first time it goes through the loop
+or if the task is a pending owner and had its mutex stolen.  If the "task"
+field is NULL then we need to set up the accounting for it.
+
+Task blocks on mutex
+--------------------
+
+The accounting of a mutex and process is done with the waiter structure of
+the process.  The "task" field is set to the process, and the "lock" field
+to the mutex.  The plist nodes are initialized to the processes current
+priority.
+
+Since the wait_lock was taken at the entry of the slow lock, we can safely
+add the waiter to the wait_list.  If the current process is the highest
+priority process currently waiting on this mutex, then we remove the
+previous top waiter process (if it exists) from the pi_list of the owner,
+and add the current process to that list.  Since the pi_list of the owner
+has changed, we call rt_mutex_adjust_prio on the owner to see if the owner
+should adjust its priority accordingly.
+
+If the owner is also blocked on a lock, and had its pi_list changed
+(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead
+and run rt_mutex_adjust_prio_chain on the owner, as described earlier.
+
+Now all locks are released, and if the current process is still blocked on a
+mutex (waiter "task" field is not NULL), then we go to sleep (call schedule).
+
+Waking up in the loop
+---------------------
+
+The schedule can then wake up for a few reasons.
+  1) we were given pending ownership of the mutex.
+  2) we received a signal and was TASK_INTERRUPTIBLE
+  3) we had a timeout and was TASK_INTERRUPTIBLE
+
+In any of these cases, we continue the loop and once again try to grab the
+ownership of the mutex.  If we succeed, we exit the loop, otherwise we continue
+and on signal and timeout, will exit the loop, or if we had the mutex stolen
+we just simply add ourselves back on the lists and go back to sleep.
+
+Note: For various reasons, because of timeout and signals, the steal mutex
+      algorithm needs to be careful. This is because the current process is
+      still on the wait_list. And because of dynamic changing of priorities,
+      especially on SCHED_OTHER tasks, the current process can be the
+      highest priority task on the wait_list.
+
+Failed to get mutex on Timeout or Signal
+----------------------------------------
+
+If a timeout or signal occurred, the waiter's "task" field would not be
+NULL and the task needs to be taken off the wait_list of the mutex and perhaps
+pi_list of the owner.  If this process was a high priority process, then
+the rt_mutex_adjust_prio_chain needs to be executed again on the owner,
+but this time it will be lowering the priorities.
+
+
+Unlocking the Mutex
+-------------------
+
+The unlocking of a mutex also has a fast path for those architectures with
+CMPXCHG.  Since the taking of a mutex on contention always sets the
+"Has Waiters" flag of the mutex's owner, we use this to know if we need to
+take the slow path when unlocking the mutex.  If the mutex doesn't have any
+waiters, the owner field of the mutex would equal the current process and
+the mutex can be unlocked by just replacing the owner field with NULL.
+
+If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available),
+the slow unlock path is taken.
+
+The first thing done in the slow unlock path is to take the wait_lock of the
+mutex.  This synchronizes the locking and unlocking of the mutex.
+
+A check is made to see if the mutex has waiters or not.  On architectures that
+do not have CMPXCHG, this is the location that the owner of the mutex will
+determine if a waiter needs to be awoken or not.  On architectures that
+do have CMPXCHG, that check is done in the fast path, but it is still needed
+in the slow path too.  If a waiter of a mutex woke up because of a signal
+or timeout between the time the owner failed the fast path CMPXCHG check and
+the grabbing of the wait_lock, the mutex may not have any waiters, thus the
+owner still needs to make this check. If there are no waiters then the mutex
+owner field is set to NULL, the wait_lock is released and nothing more is
+needed.
+
+If there are waiters, then we need to wake one up and give that waiter
+pending ownership.
+
+On the wake up code, the pi_lock of the current owner is taken.  The top
+waiter of the lock is found and removed from the wait_list of the mutex
+as well as the pi_list of the current owner.  The task field of the new
+pending owner's waiter structure is set to NULL, and the owner field of the
+mutex is set to the new owner with the "Pending Owner" bit set, as well
+as the "Has Waiters" bit if there still are other processes blocked on the
+mutex.
+
+The pi_lock of the previous owner is released, and the new pending owner's
+pi_lock is taken.  Remember that this is the trick to prevent the race
+condition in rt_mutex_adjust_prio_chain from adding itself as a waiter
+on the mutex.
+
+We now clear the "pi_blocked_on" field of the new pending owner, and if
+the mutex still has waiters pending, we add the new top waiter to the pi_list
+of the pending owner.
+
+Finally we unlock the pi_lock of the pending owner and wake it up.
+
+
+Contact
+-------
+
+For updates on this document, please email Steven Rostedt <rostedt@goodmis.org>
+
+
+Credits
+-------
+
+Author:  Steven Rostedt <rostedt@goodmis.org>
+
+Reviewers:  Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap
+
+Updates
+-------
+
+This document was originally written for 2.6.17-rc3-mm1
diff --git a/Documentation/locking/rt-mutex.txt b/Documentation/locking/rt-mutex.txt
new file mode 100644
index 000000000000..243393d882ee
--- /dev/null
+++ b/Documentation/locking/rt-mutex.txt
@@ -0,0 +1,79 @@
+RT-mutex subsystem with PI support
+----------------------------------
+
+RT-mutexes with priority inheritance are used to support PI-futexes,
+which enable pthread_mutex_t priority inheritance attributes
+(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
+about PI-futexes.]
+
+This technology was developed in the -rt tree and streamlined for
+pthread_mutex support.
+
+Basic principles:
+-----------------
+
+RT-mutexes extend the semantics of simple mutexes by the priority
+inheritance protocol.
+
+A low priority owner of a rt-mutex inherits the priority of a higher
+priority waiter until the rt-mutex is released. If the temporarily
+boosted owner blocks on a rt-mutex itself it propagates the priority
+boosting to the owner of the other rt_mutex it gets blocked on. The
+priority boosting is immediately removed once the rt_mutex has been
+unlocked.
+
+This approach allows us to shorten the block of high-prio tasks on
+mutexes which protect shared resources. Priority inheritance is not a
+magic bullet for poorly designed applications, but it allows
+well-designed applications to use userspace locks in critical parts of
+an high priority thread, without losing determinism.
+
+The enqueueing of the waiters into the rtmutex waiter list is done in
+priority order. For same priorities FIFO order is chosen. For each
+rtmutex, only the top priority waiter is enqueued into the owner's
+priority waiters list. This list too queues in priority order. Whenever
+the top priority waiter of a task changes (for example it timed out or
+got a signal), the priority of the owner task is readjusted. [The
+priority enqueueing is handled by "plists", see include/linux/plist.h
+for more details.]
+
+RT-mutexes are optimized for fastpath operations and have no internal
+locking overhead when locking an uncontended mutex or unlocking a mutex
+without waiters. The optimized fastpath operations require cmpxchg
+support. [If that is not available then the rt-mutex internal spinlock
+is used]
+
+The state of the rt-mutex is tracked via the owner field of the rt-mutex
+structure:
+
+rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1
+are used to keep track of the "owner is pending" and "rtmutex has
+waiters" state.
+
+ owner		bit1	bit0
+ NULL		0	0	mutex is free (fast acquire possible)
+ NULL		0	1	invalid state
+ NULL		1	0	Transitional state*
+ NULL		1	1	invalid state
+ taskpointer	0	0	mutex is held (fast release possible)
+ taskpointer	0	1	task is pending owner
+ taskpointer	1	0	mutex is held and has waiters
+ taskpointer	1	1	task is pending owner and mutex has waiters
+
+Pending-ownership handling is a performance optimization:
+pending-ownership is assigned to the first (highest priority) waiter of
+the mutex, when the mutex is released. The thread is woken up and once
+it starts executing it can acquire the mutex. Until the mutex is taken
+by it (bit 0 is cleared) a competing higher priority thread can "steal"
+the mutex which puts the woken up thread back on the waiters list.
+
+The pending-ownership optimization is especially important for the
+uninterrupted workflow of high-prio tasks which repeatedly
+takes/releases locks that have lower-prio waiters. Without this
+optimization the higher-prio thread would ping-pong to the lower-prio
+task [because at unlock time we always assign a new owner].
+
+(*) The "mutex has waiters" bit gets set to take the lock. If the lock
+doesn't already have an owner, this bit is quickly cleared if there are
+no waiters.  So this is a transitional state to synchronize with looking
+at the owner field of the mutex and the mutex owner releasing the lock.
diff --git a/Documentation/locking/spinlocks.txt b/Documentation/locking/spinlocks.txt
new file mode 100644
index 000000000000..ff35e40bdf5b
--- /dev/null
+++ b/Documentation/locking/spinlocks.txt
@@ -0,0 +1,167 @@
+Lesson 1: Spin locks
+
+The most basic primitive for locking is spinlock.
+
+static DEFINE_SPINLOCK(xxx_lock);
+
+	unsigned long flags;
+
+	spin_lock_irqsave(&xxx_lock, flags);
+	... critical section here ..
+	spin_unlock_irqrestore(&xxx_lock, flags);
+
+The above is always safe. It will disable interrupts _locally_, but the
+spinlock itself will guarantee the global lock, so it will guarantee that
+there is only one thread-of-control within the region(s) protected by that
+lock. This works well even under UP also, so the code does _not_ need to
+worry about UP vs SMP issues: the spinlocks work correctly under both.
+
+   NOTE! Implications of spin_locks for memory are further described in:
+
+     Documentation/memory-barriers.txt
+       (5) LOCK operations.
+       (6) UNLOCK operations.
+
+The above is usually pretty simple (you usually need and want only one
+spinlock for most things - using more than one spinlock can make things a
+lot more complex and even slower and is usually worth it only for
+sequences that you _know_ need to be split up: avoid it at all cost if you
+aren't sure).
+
+This is really the only really hard part about spinlocks: once you start
+using spinlocks they tend to expand to areas you might not have noticed
+before, because you have to make sure the spinlocks correctly protect the
+shared data structures _everywhere_ they are used. The spinlocks are most
+easily added to places that are completely independent of other code (for
+example, internal driver data structures that nobody else ever touches).
+
+   NOTE! The spin-lock is safe only when you _also_ use the lock itself
+   to do locking across CPU's, which implies that EVERYTHING that
+   touches a shared variable has to agree about the spinlock they want
+   to use.
+
+----
+
+Lesson 2: reader-writer spinlocks.
+
+If your data accesses have a very natural pattern where you usually tend
+to mostly read from the shared variables, the reader-writer locks
+(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple
+readers to be in the same critical region at once, but if somebody wants
+to change the variables it has to get an exclusive write lock.
+
+   NOTE! reader-writer locks require more atomic memory operations than
+   simple spinlocks.  Unless the reader critical section is long, you
+   are better off just using spinlocks.
+
+The routines look the same as above:
+
+   rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
+
+	unsigned long flags;
+
+	read_lock_irqsave(&xxx_lock, flags);
+	.. critical section that only reads the info ...
+	read_unlock_irqrestore(&xxx_lock, flags);
+
+	write_lock_irqsave(&xxx_lock, flags);
+	.. read and write exclusive access to the info ...
+	write_unlock_irqrestore(&xxx_lock, flags);
+
+The above kind of lock may be useful for complex data structures like
+linked lists, especially searching for entries without changing the list
+itself.  The read lock allows many concurrent readers.  Anything that
+_changes_ the list will have to get the write lock.
+
+   NOTE! RCU is better for list traversal, but requires careful
+   attention to design detail (see Documentation/RCU/listRCU.txt).
+
+Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
+time need to do any changes (even if you don't do it every time), you have
+to get the write-lock at the very beginning.
+
+   NOTE! We are working hard to remove reader-writer spinlocks in most
+   cases, so please don't add a new one without consensus.  (Instead, see
+   Documentation/RCU/rcu.txt for complete information.)
+
+----
+
+Lesson 3: spinlocks revisited.
+
+The single spin-lock primitives above are by no means the only ones. They
+are the most safe ones, and the ones that work under all circumstances,
+but partly _because_ they are safe they are also fairly slow. They are slower
+than they'd need to be, because they do have to disable interrupts
+(which is just a single instruction on a x86, but it's an expensive one -
+and on other architectures it can be worse).
+
+If you have a case where you have to protect a data structure across
+several CPU's and you want to use spinlocks you can potentially use
+cheaper versions of the spinlocks. IFF you know that the spinlocks are
+never used in interrupt handlers, you can use the non-irq versions:
+
+	spin_lock(&lock);
+	...
+	spin_unlock(&lock);
+
+(and the equivalent read-write versions too, of course). The spinlock will
+guarantee the same kind of exclusive access, and it will be much faster.
+This is useful if you know that the data in question is only ever
+manipulated from a "process context", ie no interrupts involved.
+
+The reasons you mustn't use these versions if you have interrupts that
+play with the spinlock is that you can get deadlocks:
+
+	spin_lock(&lock);
+	...
+		<- interrupt comes in:
+			spin_lock(&lock);
+
+where an interrupt tries to lock an already locked variable. This is ok if
+the other interrupt happens on another CPU, but it is _not_ ok if the
+interrupt happens on the same CPU that already holds the lock, because the
+lock will obviously never be released (because the interrupt is waiting
+for the lock, and the lock-holder is interrupted by the interrupt and will
+not continue until the interrupt has been processed).
+
+(This is also the reason why the irq-versions of the spinlocks only need
+to disable the _local_ interrupts - it's ok to use spinlocks in interrupts
+on other CPU's, because an interrupt on another CPU doesn't interrupt the
+CPU that holds the lock, so the lock-holder can continue and eventually
+releases the lock).
+
+Note that you can be clever with read-write locks and interrupts. For
+example, if you know that the interrupt only ever gets a read-lock, then
+you can use a non-irq version of read locks everywhere - because they
+don't block on each other (and thus there is no dead-lock wrt interrupts.
+But when you do the write-lock, you have to use the irq-safe version.
+
+For an example of being clever with rw-locks, see the "waitqueue_lock"
+handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
+within an interrupt, they only read the queue in order to know whom to
+wake up. So read-locks are safe (which is good: they are very common
+indeed), while write-locks need to protect themselves against interrupts.
+
+		Linus
+
+----
+
+Reference information:
+
+For dynamic initialization, use spin_lock_init() or rwlock_init() as
+appropriate:
+
+   spinlock_t xxx_lock;
+   rwlock_t xxx_rw_lock;
+
+   static int __init xxx_init(void)
+   {
+	spin_lock_init(&xxx_lock);
+	rwlock_init(&xxx_rw_lock);
+	...
+   }
+
+   module_init(xxx_init);
+
+For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
+__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
diff --git a/Documentation/locking/ww-mutex-design.txt b/Documentation/locking/ww-mutex-design.txt
new file mode 100644
index 000000000000..8a112dc304c3
--- /dev/null
+++ b/Documentation/locking/ww-mutex-design.txt
@@ -0,0 +1,344 @@
+Wait/Wound Deadlock-Proof Mutex Design
+======================================
+
+Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
+
+Motivation for WW-Mutexes
+-------------------------
+
+GPU's do operations that commonly involve many buffers.  Those buffers
+can be shared across contexts/processes, exist in different memory
+domains (for example VRAM vs system memory), and so on.  And with
+PRIME / dmabuf, they can even be shared across devices.  So there are
+a handful of situations where the driver needs to wait for buffers to
+become ready.  If you think about this in terms of waiting on a buffer
+mutex for it to become available, this presents a problem because
+there is no way to guarantee that buffers appear in a execbuf/batch in
+the same order in all contexts.  That is directly under control of
+userspace, and a result of the sequence of GL calls that an application
+makes.	Which results in the potential for deadlock.  The problem gets
+more complex when you consider that the kernel may need to migrate the
+buffer(s) into VRAM before the GPU operates on the buffer(s), which
+may in turn require evicting some other buffers (and you don't want to
+evict other buffers which are already queued up to the GPU), but for a
+simplified understanding of the problem you can ignore this.
+
+The algorithm that the TTM graphics subsystem came up with for dealing with
+this problem is quite simple.  For each group of buffers (execbuf) that need
+to be locked, the caller would be assigned a unique reservation id/ticket,
+from a global counter.  In case of deadlock while locking all the buffers
+associated with a execbuf, the one with the lowest reservation ticket (i.e.
+the oldest task) wins, and the one with the higher reservation id (i.e. the
+younger task) unlocks all of the buffers that it has already locked, and then
+tries again.
+
+In the RDBMS literature this deadlock handling approach is called wait/wound:
+The older tasks waits until it can acquire the contended lock. The younger tasks
+needs to back off and drop all the locks it is currently holding, i.e. the
+younger task is wounded.
+
+Concepts
+--------
+
+Compared to normal mutexes two additional concepts/objects show up in the lock
+interface for w/w mutexes:
+
+Acquire context: To ensure eventual forward progress it is important the a task
+trying to acquire locks doesn't grab a new reservation id, but keeps the one it
+acquired when starting the lock acquisition. This ticket is stored in the
+acquire context. Furthermore the acquire context keeps track of debugging state
+to catch w/w mutex interface abuse.
+
+W/w class: In contrast to normal mutexes the lock class needs to be explicit for
+w/w mutexes, since it is required to initialize the acquire context.
+
+Furthermore there are three different class of w/w lock acquire functions:
+
+* Normal lock acquisition with a context, using ww_mutex_lock.
+
+* Slowpath lock acquisition on the contending lock, used by the wounded task
+  after having dropped all already acquired locks. These functions have the
+  _slow postfix.
+
+  From a simple semantics point-of-view the _slow functions are not strictly
+  required, since simply calling the normal ww_mutex_lock functions on the
+  contending lock (after having dropped all other already acquired locks) will
+  work correctly. After all if no other ww mutex has been acquired yet there's
+  no deadlock potential and hence the ww_mutex_lock call will block and not
+  prematurely return -EDEADLK. The advantage of the _slow functions is in
+  interface safety:
+  - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow
+    has a void return type. Note that since ww mutex code needs loops/retries
+    anyway the __must_check doesn't result in spurious warnings, even though the
+    very first lock operation can never fail.
+  - When full debugging is enabled ww_mutex_lock_slow checks that all acquired
+    ww mutex have been released (preventing deadlocks) and makes sure that we
+    block on the contending lock (preventing spinning through the -EDEADLK
+    slowpath until the contended lock can be acquired).
+
+* Functions to only acquire a single w/w mutex, which results in the exact same
+  semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL
+  context.
+
+  Again this is not strictly required. But often you only want to acquire a
+  single lock in which case it's pointless to set up an acquire context (and so
+  better to avoid grabbing a deadlock avoidance ticket).
+
+Of course, all the usual variants for handling wake-ups due to signals are also
+provided.
+
+Usage
+-----
+
+Three different ways to acquire locks within the same w/w class. Common
+definitions for methods #1 and #2:
+
+static DEFINE_WW_CLASS(ww_class);
+
+struct obj {
+	struct ww_mutex lock;
+	/* obj data */
+};
+
+struct obj_entry {
+	struct list_head head;
+	struct obj *obj;
+};
+
+Method 1, using a list in execbuf->buffers that's not allowed to be reordered.
+This is useful if a list of required objects is already tracked somewhere.
+Furthermore the lock helper can use propagate the -EALREADY return code back to
+the caller as a signal that an object is twice on the list. This is useful if
+the list is constructed from userspace input and the ABI requires userspace to
+not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl).
+
+int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	struct obj *res_obj = NULL;
+	struct obj_entry *contended_entry = NULL;
+	struct obj_entry *entry;
+
+	ww_acquire_init(ctx, &ww_class);
+
+retry:
+	list_for_each_entry (entry, list, head) {
+		if (entry->obj == res_obj) {
+			res_obj = NULL;
+			continue;
+		}
+		ret = ww_mutex_lock(&entry->obj->lock, ctx);
+		if (ret < 0) {
+			contended_entry = entry;
+			goto err;
+		}
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+
+err:
+	list_for_each_entry_continue_reverse (entry, list, head)
+		ww_mutex_unlock(&entry->obj->lock);
+
+	if (res_obj)
+		ww_mutex_unlock(&res_obj->lock);
+
+	if (ret == -EDEADLK) {
+		/* we lost out in a seqno race, lock and retry.. */
+		ww_mutex_lock_slow(&contended_entry->obj->lock, ctx);
+		res_obj = contended_entry->obj;
+		goto retry;
+	}
+	ww_acquire_fini(ctx);
+
+	return ret;
+}
+
+Method 2, using a list in execbuf->buffers that can be reordered. Same semantics
+of duplicate entry detection using -EALREADY as method 1 above. But the
+list-reordering allows for a bit more idiomatic code.
+
+int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	struct obj_entry *entry, *entry2;
+
+	ww_acquire_init(ctx, &ww_class);
+
+	list_for_each_entry (entry, list, head) {
+		ret = ww_mutex_lock(&entry->obj->lock, ctx);
+		if (ret < 0) {
+			entry2 = entry;
+
+			list_for_each_entry_continue_reverse (entry2, list, head)
+				ww_mutex_unlock(&entry2->obj->lock);
+
+			if (ret != -EDEADLK) {
+				ww_acquire_fini(ctx);
+				return ret;
+			}
+
+			/* we lost out in a seqno race, lock and retry.. */
+			ww_mutex_lock_slow(&entry->obj->lock, ctx);
+
+			/*
+			 * Move buf to head of the list, this will point
+			 * buf->next to the first unlocked entry,
+			 * restarting the for loop.
+			 */
+			list_del(&entry->head);
+			list_add(&entry->head, list);
+		}
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+}
+
+Unlocking works the same way for both methods #1 and #2:
+
+void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	struct obj_entry *entry;
+
+	list_for_each_entry (entry, list, head)
+		ww_mutex_unlock(&entry->obj->lock);
+
+	ww_acquire_fini(ctx);
+}
+
+Method 3 is useful if the list of objects is constructed ad-hoc and not upfront,
+e.g. when adjusting edges in a graph where each node has its own ww_mutex lock,
+and edges can only be changed when holding the locks of all involved nodes. w/w
+mutexes are a natural fit for such a case for two reasons:
+- They can handle lock-acquisition in any order which allows us to start walking
+  a graph from a starting point and then iteratively discovering new edges and
+  locking down the nodes those edges connect to.
+- Due to the -EALREADY return code signalling that a given objects is already
+  held there's no need for additional book-keeping to break cycles in the graph
+  or keep track off which looks are already held (when using more than one node
+  as a starting point).
+
+Note that this approach differs in two important ways from the above methods:
+- Since the list of objects is dynamically constructed (and might very well be
+  different when retrying due to hitting the -EDEADLK wound condition) there's
+  no need to keep any object on a persistent list when it's not locked. We can
+  therefore move the list_head into the object itself.
+- On the other hand the dynamic object list construction also means that the -EALREADY return
+  code can't be propagated.
+
+Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a
+list of starting nodes (passed in from userspace) using one of the above
+methods. And then lock any additional objects affected by the operations using
+method #3 below. The backoff/retry procedure will be a bit more involved, since
+when the dynamic locking step hits -EDEADLK we also need to unlock all the
+objects acquired with the fixed list. But the w/w mutex debug checks will catch
+any interface misuse for these cases.
+
+Also, method 3 can't fail the lock acquisition step since it doesn't return
+-EALREADY. Of course this would be different when using the _interruptible
+variants, but that's outside of the scope of these examples here.
+
+struct obj {
+	struct ww_mutex ww_mutex;
+	struct list_head locked_list;
+};
+
+static DEFINE_WW_CLASS(ww_class);
+
+void __unlock_objs(struct list_head *list)
+{
+	struct obj *entry, *temp;
+
+	list_for_each_entry_safe (entry, temp, list, locked_list) {
+		/* need to do that before unlocking, since only the current lock holder is
+		allowed to use object */
+		list_del(&entry->locked_list);
+		ww_mutex_unlock(entry->ww_mutex)
+	}
+}
+
+void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	struct obj *obj;
+
+	ww_acquire_init(ctx, &ww_class);
+
+retry:
+	/* re-init loop start state */
+	loop {
+		/* magic code which walks over a graph and decides which objects
+		 * to lock */
+
+		ret = ww_mutex_lock(obj->ww_mutex, ctx);
+		if (ret == -EALREADY) {
+			/* we have that one already, get to the next object */
+			continue;
+		}
+		if (ret == -EDEADLK) {
+			__unlock_objs(list);
+
+			ww_mutex_lock_slow(obj, ctx);
+			list_add(&entry->locked_list, list);
+			goto retry;
+		}
+
+		/* locked a new object, add it to the list */
+		list_add_tail(&entry->locked_list, list);
+	}
+
+	ww_acquire_done(ctx);
+	return 0;
+}
+
+void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
+{
+	__unlock_objs(list);
+	ww_acquire_fini(ctx);
+}
+
+Method 4: Only lock one single objects. In that case deadlock detection and
+prevention is obviously overkill, since with grabbing just one lock you can't
+produce a deadlock within just one class. To simplify this case the w/w mutex
+api can be used with a NULL context.
+
+Implementation Details
+----------------------
+
+Design:
+  ww_mutex currently encapsulates a struct mutex, this means no extra overhead for
+  normal mutex locks, which are far more common. As such there is only a small
+  increase in code size if wait/wound mutexes are not used.
+
+  In general, not much contention is expected. The locks are typically used to
+  serialize access to resources for devices. The only way to make wakeups
+  smarter would be at the cost of adding a field to struct mutex_waiter. This
+  would add overhead to all cases where normal mutexes are used, and
+  ww_mutexes are generally less performance sensitive.
+
+Lockdep:
+  Special care has been taken to warn for as many cases of api abuse
+  as possible. Some common api abuses will be caught with
+  CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended.
+
+  Some of the errors which will be warned about:
+   - Forgetting to call ww_acquire_fini or ww_acquire_init.
+   - Attempting to lock more mutexes after ww_acquire_done.
+   - Attempting to lock the wrong mutex after -EDEADLK and
+     unlocking all mutexes.
+   - Attempting to lock the right mutex after -EDEADLK,
+     before unlocking all mutexes.
+
+   - Calling ww_mutex_lock_slow before -EDEADLK was returned.
+
+   - Unlocking mutexes with the wrong unlock function.
+   - Calling one of the ww_acquire_* twice on the same context.
+   - Using a different ww_class for the mutex than for the ww_acquire_ctx.
+   - Normal lockdep errors that can result in deadlocks.
+
+  Some of the lockdep errors that can result in deadlocks:
+   - Calling ww_acquire_init to initialize a second ww_acquire_ctx before
+     having called ww_acquire_fini on the first.
+   - 'normal' deadlocks that can occur.
+
+FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic
+implemented.
diff --git a/Documentation/lockstat.txt b/Documentation/lockstat.txt
deleted file mode 100644
index 72d010689751..000000000000
--- a/Documentation/lockstat.txt
+++ /dev/null
@@ -1,178 +0,0 @@
-
-LOCK STATISTICS
-
-- WHAT
-
-As the name suggests, it provides statistics on locks.
-
-- WHY
-
-Because things like lock contention can severely impact performance.
-
-- HOW
-
-Lockdep already has hooks in the lock functions and maps lock instances to
-lock classes. We build on that (see Documentation/lockdep-design.txt).
-The graph below shows the relation between the lock functions and the various
-hooks therein.
-
-        __acquire
-            |
-           lock _____
-            |        \
-            |    __contended
-            |         |
-            |       <wait>
-            | _______/
-            |/
-            |
-       __acquired
-            |
-            .
-          <hold>
-            .
-            |
-       __release
-            |
-         unlock
-
-lock, unlock	- the regular lock functions
-__*		- the hooks
-<> 		- states
-
-With these hooks we provide the following statistics:
-
- con-bounces       - number of lock contention that involved x-cpu data
- contentions       - number of lock acquisitions that had to wait
- wait time min     - shortest (non-0) time we ever had to wait for a lock
-           max     - longest time we ever had to wait for a lock
-	   total   - total time we spend waiting on this lock
-	   avg     - average time spent waiting on this lock
- acq-bounces       - number of lock acquisitions that involved x-cpu data
- acquisitions      - number of times we took the lock
- hold time min     - shortest (non-0) time we ever held the lock
-	   max     - longest time we ever held the lock
-	   total   - total time this lock was held
-	   avg     - average time this lock was held
-
-These numbers are gathered per lock class, per read/write state (when
-applicable).
-
-It also tracks 4 contention points per class. A contention point is a call site
-that had to wait on lock acquisition.
-
- - CONFIGURATION
-
-Lock statistics are enabled via CONFIG_LOCK_STAT.
-
- - USAGE
-
-Enable collection of statistics:
-
-# echo 1 >/proc/sys/kernel/lock_stat
-
-Disable collection of statistics:
-
-# echo 0 >/proc/sys/kernel/lock_stat
-
-Look at the current lock statistics:
-
-( line numbers not part of actual output, done for clarity in the explanation
-  below )
-
-# less /proc/lock_stat
-
-01 lock_stat version 0.4
-02-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-03                              class name    con-bounces    contentions   waittime-min   waittime-max waittime-total   waittime-avg    acq-bounces   acquisitions   holdtime-min   holdtime-max holdtime-total   holdtime-avg
-04-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
-05
-06                         &mm->mmap_sem-W:            46             84           0.26         939.10       16371.53         194.90          47291        2922365           0.16     2220301.69 17464026916.32        5975.99
-07                         &mm->mmap_sem-R:            37            100           1.31      299502.61      325629.52        3256.30         212344       34316685           0.10        7744.91    95016910.20           2.77
-08                         ---------------
-09                           &mm->mmap_sem              1          [<ffffffff811502a7>] khugepaged_scan_mm_slot+0x57/0x280
-19                           &mm->mmap_sem             96          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
-11                           &mm->mmap_sem             34          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
-12                           &mm->mmap_sem             17          [<ffffffff81127e71>] vm_munmap+0x41/0x80
-13                         ---------------
-14                           &mm->mmap_sem              1          [<ffffffff81046fda>] dup_mmap+0x2a/0x3f0
-15                           &mm->mmap_sem             60          [<ffffffff81129e29>] SyS_mprotect+0xe9/0x250
-16                           &mm->mmap_sem             41          [<ffffffff815351c4>] __do_page_fault+0x1d4/0x510
-17                           &mm->mmap_sem             68          [<ffffffff81113d77>] vm_mmap_pgoff+0x87/0xd0
-18
-19.............................................................................................................................................................................................................................
-20
-21                         unix_table_lock:           110            112           0.21          49.24         163.91           1.46          21094          66312           0.12         624.42       31589.81           0.48
-22                         ---------------
-23                         unix_table_lock             45          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
-24                         unix_table_lock             47          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
-25                         unix_table_lock             15          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
-26                         unix_table_lock              5          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
-27                         ---------------
-28                         unix_table_lock             39          [<ffffffff8150b111>] unix_release_sock+0x31/0x250
-29                         unix_table_lock             49          [<ffffffff8150ad8e>] unix_create1+0x16e/0x1b0
-30                         unix_table_lock             20          [<ffffffff8150ca37>] unix_find_other+0x117/0x230
-31                         unix_table_lock              4          [<ffffffff8150a09f>] unix_autobind+0x11f/0x1b0
-
-
-This excerpt shows the first two lock class statistics. Line 01 shows the
-output version - each time the format changes this will be updated. Line 02-04
-show the header with column descriptions. Lines 05-18 and 20-31 show the actual
-statistics. These statistics come in two parts; the actual stats separated by a
-short separator (line 08, 13) from the contention points.
-
-The first lock (05-18) is a read/write lock, and shows two lines above the
-short separator. The contention points don't match the column descriptors,
-they have two: contentions and [<IP>] symbol. The second set of contention
-points are the points we're contending with.
-
-The integer part of the time values is in us.
-
-Dealing with nested locks, subclasses may appear:
-
-32...........................................................................................................................................................................................................................
-33
-34                               &rq->lock:       13128          13128           0.43         190.53      103881.26           7.91          97454        3453404           0.00         401.11    13224683.11           3.82
-35                               ---------
-36                               &rq->lock          645          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-37                               &rq->lock          297          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-38                               &rq->lock          360          [<ffffffff8103c4c5>] select_task_rq_fair+0x1f0/0x74a
-39                               &rq->lock          428          [<ffffffff81045f98>] scheduler_tick+0x46/0x1fb
-40                               ---------
-41                               &rq->lock           77          [<ffffffff8103bfc4>] task_rq_lock+0x43/0x75
-42                               &rq->lock          174          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-43                               &rq->lock         4715          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-44                               &rq->lock          893          [<ffffffff81340524>] schedule+0x157/0x7b8
-45
-46...........................................................................................................................................................................................................................
-47
-48                             &rq->lock/1:        1526          11488           0.33         388.73      136294.31          11.86          21461          38404           0.00          37.93      109388.53           2.84
-49                             -----------
-50                             &rq->lock/1        11526          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-51                             -----------
-52                             &rq->lock/1         5645          [<ffffffff8103ed4b>] double_rq_lock+0x42/0x54
-53                             &rq->lock/1         1224          [<ffffffff81340524>] schedule+0x157/0x7b8
-54                             &rq->lock/1         4336          [<ffffffff8103ed58>] double_rq_lock+0x4f/0x54
-55                             &rq->lock/1          181          [<ffffffff8104ba65>] try_to_wake_up+0x127/0x25a
-
-Line 48 shows statistics for the second subclass (/1) of &rq->lock class
-(subclass starts from 0), since in this case, as line 50 suggests,
-double_rq_lock actually acquires a nested lock of two spinlocks.
-
-View the top contending locks:
-
-# grep : /proc/lock_stat | head
-			clockevents_lock:       2926159        2947636           0.15       46882.81  1784540466.34         605.41        3381345        3879161           0.00        2260.97    53178395.68          13.71
-		     tick_broadcast_lock:        346460         346717           0.18        2257.43    39364622.71         113.54        3642919        4242696           0.00        2263.79    49173646.60          11.59
-		  &mapping->i_mmap_mutex:        203896         203899           3.36      645530.05 31767507988.39      155800.21        3361776        8893984           0.17        2254.15    14110121.02           1.59
-			       &rq->lock:        135014         136909           0.18         606.09      842160.68           6.15        1540728       10436146           0.00         728.72    17606683.41           1.69
-	       &(&zone->lru_lock)->rlock:         93000          94934           0.16          59.18      188253.78           1.98        1199912        3809894           0.15         391.40     3559518.81           0.93
-			 tasklist_lock-W:         40667          41130           0.23        1189.42      428980.51          10.43         270278         510106           0.16         653.51     3939674.91           7.72
-			 tasklist_lock-R:         21298          21305           0.20        1310.05      215511.12          10.12         186204         241258           0.14        1162.33     1179779.23           4.89
-			      rcu_node_1:         47656          49022           0.16         635.41      193616.41           3.95         844888        1865423           0.00         764.26     1656226.96           0.89
-       &(&dentry->d_lockref.lock)->rlock:         39791          40179           0.15        1302.08       88851.96           2.21        2790851       12527025           0.10        1910.75     3379714.27           0.27
-			      rcu_node_0:         29203          30064           0.16         786.55     1555573.00          51.74          88963         244254           0.00         398.87      428872.51           1.76
-
-Clear the statistics:
-
-# echo 0 > /proc/lock_stat
diff --git a/Documentation/mutex-design.txt b/Documentation/mutex-design.txt
deleted file mode 100644
index ee231ed09ec6..000000000000
--- a/Documentation/mutex-design.txt
+++ /dev/null
@@ -1,157 +0,0 @@
-Generic Mutex Subsystem
-
-started by Ingo Molnar <mingo@redhat.com>
-updated by Davidlohr Bueso <davidlohr@hp.com>
-
-What are mutexes?
------------------
-
-In the Linux kernel, mutexes refer to a particular locking primitive
-that enforces serialization on shared memory systems, and not only to
-the generic term referring to 'mutual exclusion' found in academia
-or similar theoretical text books. Mutexes are sleeping locks which
-behave similarly to binary semaphores, and were introduced in 2006[1]
-as an alternative to these. This new data structure provided a number
-of advantages, including simpler interfaces, and at that time smaller
-code (see Disadvantages).
-
-[1] http://lwn.net/Articles/164802/
-
-Implementation
---------------
-
-Mutexes are represented by 'struct mutex', defined in include/linux/mutex.h
-and implemented in kernel/locking/mutex.c. These locks use a three
-state atomic counter (->count) to represent the different possible
-transitions that can occur during the lifetime of a lock:
-
-	  1: unlocked
-	  0: locked, no waiters
-   negative: locked, with potential waiters
-
-In its most basic form it also includes a wait-queue and a spinlock
-that serializes access to it. CONFIG_SMP systems can also include
-a pointer to the lock task owner (->owner) as well as a spinner MCS
-lock (->osq), both described below in (ii).
-
-When acquiring a mutex, there are three possible paths that can be
-taken, depending on the state of the lock:
-
-(i) fastpath: tries to atomically acquire the lock by decrementing the
-    counter. If it was already taken by another task it goes to the next
-    possible path. This logic is architecture specific. On x86-64, the
-    locking fastpath is 2 instructions:
-
-    0000000000000e10 <mutex_lock>:
-    e21:   f0 ff 0b                lock decl (%rbx)
-    e24:   79 08                   jns    e2e <mutex_lock+0x1e>
-
-   the unlocking fastpath is equally tight:
-
-    0000000000000bc0 <mutex_unlock>:
-    bc8:   f0 ff 07                lock incl (%rdi)
-    bcb:   7f 0a                   jg     bd7 <mutex_unlock+0x17>
-
-
-(ii) midpath: aka optimistic spinning, tries to spin for acquisition
-     while the lock owner is running and there are no other tasks ready
-     to run that have higher priority (need_resched). The rationale is
-     that if the lock owner is running, it is likely to release the lock
-     soon. The mutex spinners are queued up using MCS lock so that only
-     one spinner can compete for the mutex.
-
-     The MCS lock (proposed by Mellor-Crummey and Scott) is a simple spinlock
-     with the desirable properties of being fair and with each cpu trying
-     to acquire the lock spinning on a local variable. It avoids expensive
-     cacheline bouncing that common test-and-set spinlock implementations
-     incur. An MCS-like lock is specially tailored for optimistic spinning
-     for sleeping lock implementation. An important feature of the customized
-     MCS lock is that it has the extra property that spinners are able to exit
-     the MCS spinlock queue when they need to reschedule. This further helps
-     avoid situations where MCS spinners that need to reschedule would continue
-     waiting to spin on mutex owner, only to go directly to slowpath upon
-     obtaining the MCS lock.
-
-
-(iii) slowpath: last resort, if the lock is still unable to be acquired,
-      the task is added to the wait-queue and sleeps until woken up by the
-      unlock path. Under normal circumstances it blocks as TASK_UNINTERRUPTIBLE.
-
-While formally kernel mutexes are sleepable locks, it is path (ii) that
-makes them more practically a hybrid type. By simply not interrupting a
-task and busy-waiting for a few cycles instead of immediately sleeping,
-the performance of this lock has been seen to significantly improve a
-number of workloads. Note that this technique is also used for rw-semaphores.
-
-Semantics
----------
-
-The mutex subsystem checks and enforces the following rules:
-
-    - Only one task can hold the mutex at a time.
-    - Only the owner can unlock the mutex.
-    - Multiple unlocks are not permitted.
-    - Recursive locking/unlocking is not permitted.
-    - A mutex must only be initialized via the API (see below).
-    - A task may not exit with a mutex held.
-    - Memory areas where held locks reside must not be freed.
-    - Held mutexes must not be reinitialized.
-    - Mutexes may not be used in hardware or software interrupt
-      contexts such as tasklets and timers.
-
-These semantics are fully enforced when CONFIG DEBUG_MUTEXES is enabled.
-In addition, the mutex debugging code also implements a number of other
-features that make lock debugging easier and faster:
-
-    - Uses symbolic names of mutexes, whenever they are printed
-      in debug output.
-    - Point-of-acquire tracking, symbolic lookup of function names,
-      list of all locks held in the system, printout of them.
-    - Owner tracking.
-    - Detects self-recursing locks and prints out all relevant info.
-    - Detects multi-task circular deadlocks and prints out all affected
-      locks and tasks (and only those tasks).
-
-
-Interfaces
-----------
-Statically define the mutex:
-   DEFINE_MUTEX(name);
-
-Dynamically initialize the mutex:
-   mutex_init(mutex);
-
-Acquire the mutex, uninterruptible:
-   void mutex_lock(struct mutex *lock);
-   void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
-   int  mutex_trylock(struct mutex *lock);
-
-Acquire the mutex, interruptible:
-   int mutex_lock_interruptible_nested(struct mutex *lock,
-				       unsigned int subclass);
-   int mutex_lock_interruptible(struct mutex *lock);
-
-Acquire the mutex, interruptible, if dec to 0:
-   int atomic_dec_and_mutex_lock(atomic_t *cnt, struct mutex *lock);
-
-Unlock the mutex:
-   void mutex_unlock(struct mutex *lock);
-
-Test if the mutex is taken:
-   int mutex_is_locked(struct mutex *lock);
-
-Disadvantages
--------------
-
-Unlike its original design and purpose, 'struct mutex' is larger than
-most locks in the kernel. E.g: on x86-64 it is 40 bytes, almost twice
-as large as 'struct semaphore' (24 bytes) and 8 bytes shy of the
-'struct rw_semaphore' variant. Larger structure sizes mean more CPU
-cache and memory footprint.
-
-When to use mutexes
--------------------
-
-Unless the strict semantics of mutexes are unsuitable and/or the critical
-region prevents the lock from being shared, always prefer them to any other
-locking primitive.
diff --git a/Documentation/rt-mutex-design.txt b/Documentation/rt-mutex-design.txt
deleted file mode 100644
index 8666070d3189..000000000000
--- a/Documentation/rt-mutex-design.txt
+++ /dev/null
@@ -1,781 +0,0 @@
-#
-# Copyright (c) 2006 Steven Rostedt
-# Licensed under the GNU Free Documentation License, Version 1.2
-#
-
-RT-mutex implementation design
-------------------------------
-
-This document tries to describe the design of the rtmutex.c implementation.
-It doesn't describe the reasons why rtmutex.c exists. For that please see
-Documentation/rt-mutex.txt.  Although this document does explain problems
-that happen without this code, but that is in the concept to understand
-what the code actually is doing.
-
-The goal of this document is to help others understand the priority
-inheritance (PI) algorithm that is used, as well as reasons for the
-decisions that were made to implement PI in the manner that was done.
-
-
-Unbounded Priority Inversion
-----------------------------
-
-Priority inversion is when a lower priority process executes while a higher
-priority process wants to run.  This happens for several reasons, and
-most of the time it can't be helped.  Anytime a high priority process wants
-to use a resource that a lower priority process has (a mutex for example),
-the high priority process must wait until the lower priority process is done
-with the resource.  This is a priority inversion.  What we want to prevent
-is something called unbounded priority inversion.  That is when the high
-priority process is prevented from running by a lower priority process for
-an undetermined amount of time.
-
-The classic example of unbounded priority inversion is where you have three
-processes, let's call them processes A, B, and C, where A is the highest
-priority process, C is the lowest, and B is in between. A tries to grab a lock
-that C owns and must wait and lets C run to release the lock. But in the
-meantime, B executes, and since B is of a higher priority than C, it preempts C,
-but by doing so, it is in fact preempting A which is a higher priority process.
-Now there's no way of knowing how long A will be sleeping waiting for C
-to release the lock, because for all we know, B is a CPU hog and will
-never give C a chance to release the lock.  This is called unbounded priority
-inversion.
-
-Here's a little ASCII art to show the problem.
-
-   grab lock L1 (owned by C)
-     |
-A ---+
-        C preempted by B
-          |
-C    +----+
-
-B         +-------->
-                B now keeps A from running.
-
-
-Priority Inheritance (PI)
--------------------------
-
-There are several ways to solve this issue, but other ways are out of scope
-for this document.  Here we only discuss PI.
-
-PI is where a process inherits the priority of another process if the other
-process blocks on a lock owned by the current process.  To make this easier
-to understand, let's use the previous example, with processes A, B, and C again.
-
-This time, when A blocks on the lock owned by C, C would inherit the priority
-of A.  So now if B becomes runnable, it would not preempt C, since C now has
-the high priority of A.  As soon as C releases the lock, it loses its
-inherited priority, and A then can continue with the resource that C had.
-
-Terminology
------------
-
-Here I explain some terminology that is used in this document to help describe
-the design that is used to implement PI.
-
-PI chain - The PI chain is an ordered series of locks and processes that cause
-           processes to inherit priorities from a previous process that is
-           blocked on one of its locks.  This is described in more detail
-           later in this document.
-
-mutex    - In this document, to differentiate from locks that implement
-           PI and spin locks that are used in the PI code, from now on
-           the PI locks will be called a mutex.
-
-lock     - In this document from now on, I will use the term lock when
-           referring to spin locks that are used to protect parts of the PI
-           algorithm.  These locks disable preemption for UP (when
-           CONFIG_PREEMPT is enabled) and on SMP prevents multiple CPUs from
-           entering critical sections simultaneously.
-
-spin lock - Same as lock above.
-
-waiter   - A waiter is a struct that is stored on the stack of a blocked
-           process.  Since the scope of the waiter is within the code for
-           a process being blocked on the mutex, it is fine to allocate
-           the waiter on the process's stack (local variable).  This
-           structure holds a pointer to the task, as well as the mutex that
-           the task is blocked on.  It also has the plist node structures to
-           place the task in the waiter_list of a mutex as well as the
-           pi_list of a mutex owner task (described below).
-
-           waiter is sometimes used in reference to the task that is waiting
-           on a mutex. This is the same as waiter->task.
-
-waiters  - A list of processes that are blocked on a mutex.
-
-top waiter - The highest priority process waiting on a specific mutex.
-
-top pi waiter - The highest priority process waiting on one of the mutexes
-                that a specific process owns.
-
-Note:  task and process are used interchangeably in this document, mostly to
-       differentiate between two processes that are being described together.
-
-
-PI chain
---------
-
-The PI chain is a list of processes and mutexes that may cause priority
-inheritance to take place.  Multiple chains may converge, but a chain
-would never diverge, since a process can't be blocked on more than one
-mutex at a time.
-
-Example:
-
-   Process:  A, B, C, D, E
-   Mutexes:  L1, L2, L3, L4
-
-   A owns: L1
-           B blocked on L1
-           B owns L2
-                  C blocked on L2
-                  C owns L3
-                         D blocked on L3
-                         D owns L4
-                                E blocked on L4
-
-The chain would be:
-
-   E->L4->D->L3->C->L2->B->L1->A
-
-To show where two chains merge, we could add another process F and
-another mutex L5 where B owns L5 and F is blocked on mutex L5.
-
-The chain for F would be:
-
-   F->L5->B->L1->A
-
-Since a process may own more than one mutex, but never be blocked on more than
-one, the chains merge.
-
-Here we show both chains:
-
-   E->L4->D->L3->C->L2-+
-                       |
-                       +->B->L1->A
-                       |
-                 F->L5-+
-
-For PI to work, the processes at the right end of these chains (or we may
-also call it the Top of the chain) must be equal to or higher in priority
-than the processes to the left or below in the chain.
-
-Also since a mutex may have more than one process blocked on it, we can
-have multiple chains merge at mutexes.  If we add another process G that is
-blocked on mutex L2:
-
-  G->L2->B->L1->A
-
-And once again, to show how this can grow I will show the merging chains
-again.
-
-   E->L4->D->L3->C-+
-                   +->L2-+
-                   |     |
-                 G-+     +->B->L1->A
-                         |
-                   F->L5-+
-
-
-Plist
------
-
-Before I go further and talk about how the PI chain is stored through lists
-on both mutexes and processes, I'll explain the plist.  This is similar to
-the struct list_head functionality that is already in the kernel.
-The implementation of plist is out of scope for this document, but it is
-very important to understand what it does.
-
-There are a few differences between plist and list, the most important one
-being that plist is a priority sorted linked list.  This means that the
-priorities of the plist are sorted, such that it takes O(1) to retrieve the
-highest priority item in the list.  Obviously this is useful to store processes
-based on their priorities.
-
-Another difference, which is important for implementation, is that, unlike
-list, the head of the list is a different element than the nodes of a list.
-So the head of the list is declared as struct plist_head and nodes that will
-be added to the list are declared as struct plist_node.
-
-
-Mutex Waiter List
------------------
-
-Every mutex keeps track of all the waiters that are blocked on itself. The mutex
-has a plist to store these waiters by priority.  This list is protected by
-a spin lock that is located in the struct of the mutex. This lock is called
-wait_lock.  Since the modification of the waiter list is never done in
-interrupt context, the wait_lock can be taken without disabling interrupts.
-
-
-Task PI List
-------------
-
-To keep track of the PI chains, each process has its own PI list.  This is
-a list of all top waiters of the mutexes that are owned by the process.
-Note that this list only holds the top waiters and not all waiters that are
-blocked on mutexes owned by the process.
-
-The top of the task's PI list is always the highest priority task that
-is waiting on a mutex that is owned by the task.  So if the task has
-inherited a priority, it will always be the priority of the task that is
-at the top of this list.
-
-This list is stored in the task structure of a process as a plist called
-pi_list.  This list is protected by a spin lock also in the task structure,
-called pi_lock.  This lock may also be taken in interrupt context, so when
-locking the pi_lock, interrupts must be disabled.
-
-
-Depth of the PI Chain
----------------------
-
-The maximum depth of the PI chain is not dynamic, and could actually be
-defined.  But is very complex to figure it out, since it depends on all
-the nesting of mutexes.  Let's look at the example where we have 3 mutexes,
-L1, L2, and L3, and four separate functions func1, func2, func3 and func4.
-The following shows a locking order of L1->L2->L3, but may not actually
-be directly nested that way.
-
-void func1(void)
-{
-	mutex_lock(L1);
-
-	/* do anything */
-
-	mutex_unlock(L1);
-}
-
-void func2(void)
-{
-	mutex_lock(L1);
-	mutex_lock(L2);
-
-	/* do something */
-
-	mutex_unlock(L2);
-	mutex_unlock(L1);
-}
-
-void func3(void)
-{
-	mutex_lock(L2);
-	mutex_lock(L3);
-
-	/* do something else */
-
-	mutex_unlock(L3);
-	mutex_unlock(L2);
-}
-
-void func4(void)
-{
-	mutex_lock(L3);
-
-	/* do something again */
-
-	mutex_unlock(L3);
-}
-
-Now we add 4 processes that run each of these functions separately.
-Processes A, B, C, and D which run functions func1, func2, func3 and func4
-respectively, and such that D runs first and A last.  With D being preempted
-in func4 in the "do something again" area, we have a locking that follows:
-
-D owns L3
-       C blocked on L3
-       C owns L2
-              B blocked on L2
-              B owns L1
-                     A blocked on L1
-
-And thus we have the chain A->L1->B->L2->C->L3->D.
-
-This gives us a PI depth of 4 (four processes), but looking at any of the
-functions individually, it seems as though they only have at most a locking
-depth of two.  So, although the locking depth is defined at compile time,
-it still is very difficult to find the possibilities of that depth.
-
-Now since mutexes can be defined by user-land applications, we don't want a DOS
-type of application that nests large amounts of mutexes to create a large
-PI chain, and have the code holding spin locks while looking at a large
-amount of data.  So to prevent this, the implementation not only implements
-a maximum lock depth, but also only holds at most two different locks at a
-time, as it walks the PI chain.  More about this below.
-
-
-Mutex owner and flags
----------------------
-
-The mutex structure contains a pointer to the owner of the mutex.  If the
-mutex is not owned, this owner is set to NULL.  Since all architectures
-have the task structure on at least a four byte alignment (and if this is
-not true, the rtmutex.c code will be broken!), this allows for the two
-least significant bits to be used as flags.  This part is also described
-in Documentation/rt-mutex.txt, but will also be briefly described here.
-
-Bit 0 is used as the "Pending Owner" flag.  This is described later.
-Bit 1 is used as the "Has Waiters" flags.  This is also described later
-  in more detail, but is set whenever there are waiters on a mutex.
-
-
-cmpxchg Tricks
---------------
-
-Some architectures implement an atomic cmpxchg (Compare and Exchange).  This
-is used (when applicable) to keep the fast path of grabbing and releasing
-mutexes short.
-
-cmpxchg is basically the following function performed atomically:
-
-unsigned long _cmpxchg(unsigned long *A, unsigned long *B, unsigned long *C)
-{
-	unsigned long T = *A;
-	if (*A == *B) {
-		*A = *C;
-	}
-	return T;
-}
-#define cmpxchg(a,b,c) _cmpxchg(&a,&b,&c)
-
-This is really nice to have, since it allows you to only update a variable
-if the variable is what you expect it to be.  You know if it succeeded if
-the return value (the old value of A) is equal to B.
-
-The macro rt_mutex_cmpxchg is used to try to lock and unlock mutexes. If
-the architecture does not support CMPXCHG, then this macro is simply set
-to fail every time.  But if CMPXCHG is supported, then this will
-help out extremely to keep the fast path short.
-
-The use of rt_mutex_cmpxchg with the flags in the owner field help optimize
-the system for architectures that support it.  This will also be explained
-later in this document.
-
-
-Priority adjustments
---------------------
-
-The implementation of the PI code in rtmutex.c has several places that a
-process must adjust its priority.  With the help of the pi_list of a
-process this is rather easy to know what needs to be adjusted.
-
-The functions implementing the task adjustments are rt_mutex_adjust_prio,
-__rt_mutex_adjust_prio (same as the former, but expects the task pi_lock
-to already be taken), rt_mutex_getprio, and rt_mutex_setprio.
-
-rt_mutex_getprio and rt_mutex_setprio are only used in __rt_mutex_adjust_prio.
-
-rt_mutex_getprio returns the priority that the task should have.  Either the
-task's own normal priority, or if a process of a higher priority is waiting on
-a mutex owned by the task, then that higher priority should be returned.
-Since the pi_list of a task holds an order by priority list of all the top
-waiters of all the mutexes that the task owns, rt_mutex_getprio simply needs
-to compare the top pi waiter to its own normal priority, and return the higher
-priority back.
-
-(Note:  if looking at the code, you will notice that the lower number of
-        prio is returned.  This is because the prio field in the task structure
-        is an inverse order of the actual priority.  So a "prio" of 5 is
-        of higher priority than a "prio" of 10.)
-
-__rt_mutex_adjust_prio examines the result of rt_mutex_getprio, and if the
-result does not equal the task's current priority, then rt_mutex_setprio
-is called to adjust the priority of the task to the new priority.
-Note that rt_mutex_setprio is defined in kernel/sched/core.c to implement the
-actual change in priority.
-
-It is interesting to note that __rt_mutex_adjust_prio can either increase
-or decrease the priority of the task.  In the case that a higher priority
-process has just blocked on a mutex owned by the task, __rt_mutex_adjust_prio
-would increase/boost the task's priority.  But if a higher priority task
-were for some reason to leave the mutex (timeout or signal), this same function
-would decrease/unboost the priority of the task.  That is because the pi_list
-always contains the highest priority task that is waiting on a mutex owned
-by the task, so we only need to compare the priority of that top pi waiter
-to the normal priority of the given task.
-
-
-High level overview of the PI chain walk
-----------------------------------------
-
-The PI chain walk is implemented by the function rt_mutex_adjust_prio_chain.
-
-The implementation has gone through several iterations, and has ended up
-with what we believe is the best.  It walks the PI chain by only grabbing
-at most two locks at a time, and is very efficient.
-
-The rt_mutex_adjust_prio_chain can be used either to boost or lower process
-priorities.
-
-rt_mutex_adjust_prio_chain is called with a task to be checked for PI
-(de)boosting (the owner of a mutex that a process is blocking on), a flag to
-check for deadlocking, the mutex that the task owns, and a pointer to a waiter
-that is the process's waiter struct that is blocked on the mutex (although this
-parameter may be NULL for deboosting).
-
-For this explanation, I will not mention deadlock detection. This explanation
-will try to stay at a high level.
-
-When this function is called, there are no locks held.  That also means
-that the state of the owner and lock can change when entered into this function.
-
-Before this function is called, the task has already had rt_mutex_adjust_prio
-performed on it.  This means that the task is set to the priority that it
-should be at, but the plist nodes of the task's waiter have not been updated
-with the new priorities, and that this task may not be in the proper locations
-in the pi_lists and wait_lists that the task is blocked on.  This function
-solves all that.
-
-A loop is entered, where task is the owner to be checked for PI changes that
-was passed by parameter (for the first iteration).  The pi_lock of this task is
-taken to prevent any more changes to the pi_list of the task.  This also
-prevents new tasks from completing the blocking on a mutex that is owned by this
-task.
-
-If the task is not blocked on a mutex then the loop is exited.  We are at
-the top of the PI chain.
-
-A check is now done to see if the original waiter (the process that is blocked
-on the current mutex) is the top pi waiter of the task.  That is, is this
-waiter on the top of the task's pi_list.  If it is not, it either means that
-there is another process higher in priority that is blocked on one of the
-mutexes that the task owns, or that the waiter has just woken up via a signal
-or timeout and has left the PI chain.  In either case, the loop is exited, since
-we don't need to do any more changes to the priority of the current task, or any
-task that owns a mutex that this current task is waiting on.  A priority chain
-walk is only needed when a new top pi waiter is made to a task.
-
-The next check sees if the task's waiter plist node has the priority equal to
-the priority the task is set at.  If they are equal, then we are done with
-the loop.  Remember that the function started with the priority of the
-task adjusted, but the plist nodes that hold the task in other processes
-pi_lists have not been adjusted.
-
-Next, we look at the mutex that the task is blocked on. The mutex's wait_lock
-is taken.  This is done by a spin_trylock, because the locking order of the
-pi_lock and wait_lock goes in the opposite direction. If we fail to grab the
-lock, the pi_lock is released, and we restart the loop.
-
-Now that we have both the pi_lock of the task as well as the wait_lock of
-the mutex the task is blocked on, we update the task's waiter's plist node
-that is located on the mutex's wait_list.
-
-Now we release the pi_lock of the task.
-
-Next the owner of the mutex has its pi_lock taken, so we can update the
-task's entry in the owner's pi_list.  If the task is the highest priority
-process on the mutex's wait_list, then we remove the previous top waiter
-from the owner's pi_list, and replace it with the task.
-
-Note: It is possible that the task was the current top waiter on the mutex,
-      in which case the task is not yet on the pi_list of the waiter.  This
-      is OK, since plist_del does nothing if the plist node is not on any
-      list.
-
-If the task was not the top waiter of the mutex, but it was before we
-did the priority updates, that means we are deboosting/lowering the
-task.  In this case, the task is removed from the pi_list of the owner,
-and the new top waiter is added.
-
-Lastly, we unlock both the pi_lock of the task, as well as the mutex's
-wait_lock, and continue the loop again.  On the next iteration of the
-loop, the previous owner of the mutex will be the task that will be
-processed.
-
-Note: One might think that the owner of this mutex might have changed
-      since we just grab the mutex's wait_lock. And one could be right.
-      The important thing to remember is that the owner could not have
-      become the task that is being processed in the PI chain, since
-      we have taken that task's pi_lock at the beginning of the loop.
-      So as long as there is an owner of this mutex that is not the same
-      process as the tasked being worked on, we are OK.
-
-      Looking closely at the code, one might be confused.  The check for the
-      end of the PI chain is when the task isn't blocked on anything or the
-      task's waiter structure "task" element is NULL.  This check is
-      protected only by the task's pi_lock.  But the code to unlock the mutex
-      sets the task's waiter structure "task" element to NULL with only
-      the protection of the mutex's wait_lock, which was not taken yet.
-      Isn't this a race condition if the task becomes the new owner?
-
-      The answer is No!  The trick is the spin_trylock of the mutex's
-      wait_lock.  If we fail that lock, we release the pi_lock of the
-      task and continue the loop, doing the end of PI chain check again.
-
-      In the code to release the lock, the wait_lock of the mutex is held
-      the entire time, and it is not let go when we grab the pi_lock of the
-      new owner of the mutex.  So if the switch of a new owner were to happen
-      after the check for end of the PI chain and the grabbing of the
-      wait_lock, the unlocking code would spin on the new owner's pi_lock
-      but never give up the wait_lock.  So the PI chain loop is guaranteed to
-      fail the spin_trylock on the wait_lock, release the pi_lock, and
-      try again.
-
-      If you don't quite understand the above, that's OK. You don't have to,
-      unless you really want to make a proof out of it ;)
-
-
-Pending Owners and Lock stealing
---------------------------------
-
-One of the flags in the owner field of the mutex structure is "Pending Owner".
-What this means is that an owner was chosen by the process releasing the
-mutex, but that owner has yet to wake up and actually take the mutex.
-
-Why is this important?  Why can't we just give the mutex to another process
-and be done with it?
-
-The PI code is to help with real-time processes, and to let the highest
-priority process run as long as possible with little latencies and delays.
-If a high priority process owns a mutex that a lower priority process is
-blocked on, when the mutex is released it would be given to the lower priority
-process.  What if the higher priority process wants to take that mutex again.
-The high priority process would fail to take that mutex that it just gave up
-and it would need to boost the lower priority process to run with full
-latency of that critical section (since the low priority process just entered
-it).
-
-There's no reason a high priority process that gives up a mutex should be
-penalized if it tries to take that mutex again.  If the new owner of the
-mutex has not woken up yet, there's no reason that the higher priority process
-could not take that mutex away.
-
-To solve this, we introduced Pending Ownership and Lock Stealing.  When a
-new process is given a mutex that it was blocked on, it is only given
-pending ownership.  This means that it's the new owner, unless a higher
-priority process comes in and tries to grab that mutex.  If a higher priority
-process does come along and wants that mutex, we let the higher priority
-process "steal" the mutex from the pending owner (only if it is still pending)
-and continue with the mutex.
-
-
-Taking of a mutex (The walk through)
-------------------------------------
-
-OK, now let's take a look at the detailed walk through of what happens when
-taking a mutex.
-
-The first thing that is tried is the fast taking of the mutex.  This is
-done when we have CMPXCHG enabled (otherwise the fast taking automatically
-fails).  Only when the owner field of the mutex is NULL can the lock be
-taken with the CMPXCHG and nothing else needs to be done.
-
-If there is contention on the lock, whether it is owned or pending owner
-we go about the slow path (rt_mutex_slowlock).
-
-The slow path function is where the task's waiter structure is created on
-the stack.  This is because the waiter structure is only needed for the
-scope of this function.  The waiter structure holds the nodes to store
-the task on the wait_list of the mutex, and if need be, the pi_list of
-the owner.
-
-The wait_lock of the mutex is taken since the slow path of unlocking the
-mutex also takes this lock.
-
-We then call try_to_take_rt_mutex.  This is where the architecture that
-does not implement CMPXCHG would always grab the lock (if there's no
-contention).
-
-try_to_take_rt_mutex is used every time the task tries to grab a mutex in the
-slow path.  The first thing that is done here is an atomic setting of
-the "Has Waiters" flag of the mutex's owner field.  Yes, this could really
-be false, because if the mutex has no owner, there are no waiters and
-the current task also won't have any waiters.  But we don't have the lock
-yet, so we assume we are going to be a waiter.  The reason for this is to
-play nice for those architectures that do have CMPXCHG.  By setting this flag
-now, the owner of the mutex can't release the mutex without going into the
-slow unlock path, and it would then need to grab the wait_lock, which this
-code currently holds.  So setting the "Has Waiters" flag forces the owner
-to synchronize with this code.
-
-Now that we know that we can't have any races with the owner releasing the
-mutex, we check to see if we can take the ownership.  This is done if the
-mutex doesn't have a owner, or if we can steal the mutex from a pending
-owner.  Let's look at the situations we have here.
-
-  1) Has owner that is pending
-  ----------------------------
-
-  The mutex has a owner, but it hasn't woken up and the mutex flag
-  "Pending Owner" is set.  The first check is to see if the owner isn't the
-  current task.  This is because this function is also used for the pending
-  owner to grab the mutex.  When a pending owner wakes up, it checks to see
-  if it can take the mutex, and this is done if the owner is already set to
-  itself.  If so, we succeed and leave the function, clearing the "Pending
-  Owner" bit.
-
-  If the pending owner is not current, we check to see if the current priority is
-  higher than the pending owner.  If not, we fail the function and return.
-
-  There's also something special about a pending owner.  That is a pending owner
-  is never blocked on a mutex.  So there is no PI chain to worry about.  It also
-  means that if the mutex doesn't have any waiters, there's no accounting needed
-  to update the pending owner's pi_list, since we only worry about processes
-  blocked on the current mutex.
-
-  If there are waiters on this mutex, and we just stole the ownership, we need
-  to take the top waiter, remove it from the pi_list of the pending owner, and
-  add it to the current pi_list.  Note that at this moment, the pending owner
-  is no longer on the list of waiters.  This is fine, since the pending owner
-  would add itself back when it realizes that it had the ownership stolen
-  from itself.  When the pending owner tries to grab the mutex, it will fail
-  in try_to_take_rt_mutex if the owner field points to another process.
-
-  2) No owner
-  -----------
-
-  If there is no owner (or we successfully stole the lock), we set the owner
-  of the mutex to current, and set the flag of "Has Waiters" if the current
-  mutex actually has waiters, or we clear the flag if it doesn't.  See, it was
-  OK that we set that flag early, since now it is cleared.
-
-  3) Failed to grab ownership
-  ---------------------------
-
-  The most interesting case is when we fail to take ownership. This means that
-  there exists an owner, or there's a pending owner with equal or higher
-  priority than the current task.
-
-We'll continue on the failed case.
-
-If the mutex has a timeout, we set up a timer to go off to break us out
-of this mutex if we failed to get it after a specified amount of time.
-
-Now we enter a loop that will continue to try to take ownership of the mutex, or
-fail from a timeout or signal.
-
-Once again we try to take the mutex.  This will usually fail the first time
-in the loop, since it had just failed to get the mutex.  But the second time
-in the loop, this would likely succeed, since the task would likely be
-the pending owner.
-
-If the mutex is TASK_INTERRUPTIBLE a check for signals and timeout is done
-here.
-
-The waiter structure has a "task" field that points to the task that is blocked
-on the mutex.  This field can be NULL the first time it goes through the loop
-or if the task is a pending owner and had its mutex stolen.  If the "task"
-field is NULL then we need to set up the accounting for it.
-
-Task blocks on mutex
---------------------
-
-The accounting of a mutex and process is done with the waiter structure of
-the process.  The "task" field is set to the process, and the "lock" field
-to the mutex.  The plist nodes are initialized to the processes current
-priority.
-
-Since the wait_lock was taken at the entry of the slow lock, we can safely
-add the waiter to the wait_list.  If the current process is the highest
-priority process currently waiting on this mutex, then we remove the
-previous top waiter process (if it exists) from the pi_list of the owner,
-and add the current process to that list.  Since the pi_list of the owner
-has changed, we call rt_mutex_adjust_prio on the owner to see if the owner
-should adjust its priority accordingly.
-
-If the owner is also blocked on a lock, and had its pi_list changed
-(or deadlock checking is on), we unlock the wait_lock of the mutex and go ahead
-and run rt_mutex_adjust_prio_chain on the owner, as described earlier.
-
-Now all locks are released, and if the current process is still blocked on a
-mutex (waiter "task" field is not NULL), then we go to sleep (call schedule).
-
-Waking up in the loop
----------------------
-
-The schedule can then wake up for a few reasons.
-  1) we were given pending ownership of the mutex.
-  2) we received a signal and was TASK_INTERRUPTIBLE
-  3) we had a timeout and was TASK_INTERRUPTIBLE
-
-In any of these cases, we continue the loop and once again try to grab the
-ownership of the mutex.  If we succeed, we exit the loop, otherwise we continue
-and on signal and timeout, will exit the loop, or if we had the mutex stolen
-we just simply add ourselves back on the lists and go back to sleep.
-
-Note: For various reasons, because of timeout and signals, the steal mutex
-      algorithm needs to be careful. This is because the current process is
-      still on the wait_list. And because of dynamic changing of priorities,
-      especially on SCHED_OTHER tasks, the current process can be the
-      highest priority task on the wait_list.
-
-Failed to get mutex on Timeout or Signal
-----------------------------------------
-
-If a timeout or signal occurred, the waiter's "task" field would not be
-NULL and the task needs to be taken off the wait_list of the mutex and perhaps
-pi_list of the owner.  If this process was a high priority process, then
-the rt_mutex_adjust_prio_chain needs to be executed again on the owner,
-but this time it will be lowering the priorities.
-
-
-Unlocking the Mutex
--------------------
-
-The unlocking of a mutex also has a fast path for those architectures with
-CMPXCHG.  Since the taking of a mutex on contention always sets the
-"Has Waiters" flag of the mutex's owner, we use this to know if we need to
-take the slow path when unlocking the mutex.  If the mutex doesn't have any
-waiters, the owner field of the mutex would equal the current process and
-the mutex can be unlocked by just replacing the owner field with NULL.
-
-If the owner field has the "Has Waiters" bit set (or CMPXCHG is not available),
-the slow unlock path is taken.
-
-The first thing done in the slow unlock path is to take the wait_lock of the
-mutex.  This synchronizes the locking and unlocking of the mutex.
-
-A check is made to see if the mutex has waiters or not.  On architectures that
-do not have CMPXCHG, this is the location that the owner of the mutex will
-determine if a waiter needs to be awoken or not.  On architectures that
-do have CMPXCHG, that check is done in the fast path, but it is still needed
-in the slow path too.  If a waiter of a mutex woke up because of a signal
-or timeout between the time the owner failed the fast path CMPXCHG check and
-the grabbing of the wait_lock, the mutex may not have any waiters, thus the
-owner still needs to make this check. If there are no waiters then the mutex
-owner field is set to NULL, the wait_lock is released and nothing more is
-needed.
-
-If there are waiters, then we need to wake one up and give that waiter
-pending ownership.
-
-On the wake up code, the pi_lock of the current owner is taken.  The top
-waiter of the lock is found and removed from the wait_list of the mutex
-as well as the pi_list of the current owner.  The task field of the new
-pending owner's waiter structure is set to NULL, and the owner field of the
-mutex is set to the new owner with the "Pending Owner" bit set, as well
-as the "Has Waiters" bit if there still are other processes blocked on the
-mutex.
-
-The pi_lock of the previous owner is released, and the new pending owner's
-pi_lock is taken.  Remember that this is the trick to prevent the race
-condition in rt_mutex_adjust_prio_chain from adding itself as a waiter
-on the mutex.
-
-We now clear the "pi_blocked_on" field of the new pending owner, and if
-the mutex still has waiters pending, we add the new top waiter to the pi_list
-of the pending owner.
-
-Finally we unlock the pi_lock of the pending owner and wake it up.
-
-
-Contact
--------
-
-For updates on this document, please email Steven Rostedt <rostedt@goodmis.org>
-
-
-Credits
--------
-
-Author:  Steven Rostedt <rostedt@goodmis.org>
-
-Reviewers:  Ingo Molnar, Thomas Gleixner, Thomas Duetsch, and Randy Dunlap
-
-Updates
--------
-
-This document was originally written for 2.6.17-rc3-mm1
diff --git a/Documentation/rt-mutex.txt b/Documentation/rt-mutex.txt
deleted file mode 100644
index 243393d882ee..000000000000
--- a/Documentation/rt-mutex.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-RT-mutex subsystem with PI support
-----------------------------------
-
-RT-mutexes with priority inheritance are used to support PI-futexes,
-which enable pthread_mutex_t priority inheritance attributes
-(PTHREAD_PRIO_INHERIT). [See Documentation/pi-futex.txt for more details
-about PI-futexes.]
-
-This technology was developed in the -rt tree and streamlined for
-pthread_mutex support.
-
-Basic principles:
------------------
-
-RT-mutexes extend the semantics of simple mutexes by the priority
-inheritance protocol.
-
-A low priority owner of a rt-mutex inherits the priority of a higher
-priority waiter until the rt-mutex is released. If the temporarily
-boosted owner blocks on a rt-mutex itself it propagates the priority
-boosting to the owner of the other rt_mutex it gets blocked on. The
-priority boosting is immediately removed once the rt_mutex has been
-unlocked.
-
-This approach allows us to shorten the block of high-prio tasks on
-mutexes which protect shared resources. Priority inheritance is not a
-magic bullet for poorly designed applications, but it allows
-well-designed applications to use userspace locks in critical parts of
-an high priority thread, without losing determinism.
-
-The enqueueing of the waiters into the rtmutex waiter list is done in
-priority order. For same priorities FIFO order is chosen. For each
-rtmutex, only the top priority waiter is enqueued into the owner's
-priority waiters list. This list too queues in priority order. Whenever
-the top priority waiter of a task changes (for example it timed out or
-got a signal), the priority of the owner task is readjusted. [The
-priority enqueueing is handled by "plists", see include/linux/plist.h
-for more details.]
-
-RT-mutexes are optimized for fastpath operations and have no internal
-locking overhead when locking an uncontended mutex or unlocking a mutex
-without waiters. The optimized fastpath operations require cmpxchg
-support. [If that is not available then the rt-mutex internal spinlock
-is used]
-
-The state of the rt-mutex is tracked via the owner field of the rt-mutex
-structure:
-
-rt_mutex->owner holds the task_struct pointer of the owner. Bit 0 and 1
-are used to keep track of the "owner is pending" and "rtmutex has
-waiters" state.
-
- owner		bit1	bit0
- NULL		0	0	mutex is free (fast acquire possible)
- NULL		0	1	invalid state
- NULL		1	0	Transitional state*
- NULL		1	1	invalid state
- taskpointer	0	0	mutex is held (fast release possible)
- taskpointer	0	1	task is pending owner
- taskpointer	1	0	mutex is held and has waiters
- taskpointer	1	1	task is pending owner and mutex has waiters
-
-Pending-ownership handling is a performance optimization:
-pending-ownership is assigned to the first (highest priority) waiter of
-the mutex, when the mutex is released. The thread is woken up and once
-it starts executing it can acquire the mutex. Until the mutex is taken
-by it (bit 0 is cleared) a competing higher priority thread can "steal"
-the mutex which puts the woken up thread back on the waiters list.
-
-The pending-ownership optimization is especially important for the
-uninterrupted workflow of high-prio tasks which repeatedly
-takes/releases locks that have lower-prio waiters. Without this
-optimization the higher-prio thread would ping-pong to the lower-prio
-task [because at unlock time we always assign a new owner].
-
-(*) The "mutex has waiters" bit gets set to take the lock. If the lock
-doesn't already have an owner, this bit is quickly cleared if there are
-no waiters.  So this is a transitional state to synchronize with looking
-at the owner field of the mutex and the mutex owner releasing the lock.
diff --git a/Documentation/spinlocks.txt b/Documentation/spinlocks.txt
deleted file mode 100644
index 97eaf5727178..000000000000
--- a/Documentation/spinlocks.txt
+++ /dev/null
@@ -1,167 +0,0 @@
-Lesson 1: Spin locks
-
-The most basic primitive for locking is spinlock.
-
-static DEFINE_SPINLOCK(xxx_lock);
-
-	unsigned long flags;
-
-	spin_lock_irqsave(&xxx_lock, flags);
-	... critical section here ..
-	spin_unlock_irqrestore(&xxx_lock, flags);
-
-The above is always safe. It will disable interrupts _locally_, but the
-spinlock itself will guarantee the global lock, so it will guarantee that
-there is only one thread-of-control within the region(s) protected by that
-lock. This works well even under UP also, so the code does _not_ need to
-worry about UP vs SMP issues: the spinlocks work correctly under both.
-
-   NOTE! Implications of spin_locks for memory are further described in:
-
-     Documentation/memory-barriers.txt
-       (5) LOCK operations.
-       (6) UNLOCK operations.
-
-The above is usually pretty simple (you usually need and want only one
-spinlock for most things - using more than one spinlock can make things a
-lot more complex and even slower and is usually worth it only for
-sequences that you _know_ need to be split up: avoid it at all cost if you
-aren't sure).
-
-This is really the only really hard part about spinlocks: once you start
-using spinlocks they tend to expand to areas you might not have noticed
-before, because you have to make sure the spinlocks correctly protect the
-shared data structures _everywhere_ they are used. The spinlocks are most
-easily added to places that are completely independent of other code (for
-example, internal driver data structures that nobody else ever touches).
-
-   NOTE! The spin-lock is safe only when you _also_ use the lock itself
-   to do locking across CPU's, which implies that EVERYTHING that
-   touches a shared variable has to agree about the spinlock they want
-   to use.
-
-----
-
-Lesson 2: reader-writer spinlocks.
-
-If your data accesses have a very natural pattern where you usually tend
-to mostly read from the shared variables, the reader-writer locks
-(rw_lock) versions of the spinlocks are sometimes useful. They allow multiple
-readers to be in the same critical region at once, but if somebody wants
-to change the variables it has to get an exclusive write lock.
-
-   NOTE! reader-writer locks require more atomic memory operations than
-   simple spinlocks.  Unless the reader critical section is long, you
-   are better off just using spinlocks.
-
-The routines look the same as above:
-
-   rwlock_t xxx_lock = __RW_LOCK_UNLOCKED(xxx_lock);
-
-	unsigned long flags;
-
-	read_lock_irqsave(&xxx_lock, flags);
-	.. critical section that only reads the info ...
-	read_unlock_irqrestore(&xxx_lock, flags);
-
-	write_lock_irqsave(&xxx_lock, flags);
-	.. read and write exclusive access to the info ...
-	write_unlock_irqrestore(&xxx_lock, flags);
-
-The above kind of lock may be useful for complex data structures like
-linked lists, especially searching for entries without changing the list
-itself.  The read lock allows many concurrent readers.  Anything that
-_changes_ the list will have to get the write lock.
-
-   NOTE! RCU is better for list traversal, but requires careful
-   attention to design detail (see Documentation/RCU/listRCU.txt).
-
-Also, you cannot "upgrade" a read-lock to a write-lock, so if you at _any_
-time need to do any changes (even if you don't do it every time), you have
-to get the write-lock at the very beginning.
-
-   NOTE! We are working hard to remove reader-writer spinlocks in most
-   cases, so please don't add a new one without consensus.  (Instead, see
-   Documentation/RCU/rcu.txt for complete information.)
-
-----
-
-Lesson 3: spinlocks revisited.
-
-The single spin-lock primitives above are by no means the only ones. They
-are the most safe ones, and the ones that work under all circumstances,
-but partly _because_ they are safe they are also fairly slow. They are slower
-than they'd need to be, because they do have to disable interrupts
-(which is just a single instruction on a x86, but it's an expensive one -
-and on other architectures it can be worse).
-
-If you have a case where you have to protect a data structure across
-several CPU's and you want to use spinlocks you can potentially use
-cheaper versions of the spinlocks. IFF you know that the spinlocks are
-never used in interrupt handlers, you can use the non-irq versions:
-
-	spin_lock(&lock);
-	...
-	spin_unlock(&lock);
-
-(and the equivalent read-write versions too, of course). The spinlock will
-guarantee the same kind of exclusive access, and it will be much faster. 
-This is useful if you know that the data in question is only ever
-manipulated from a "process context", ie no interrupts involved. 
-
-The reasons you mustn't use these versions if you have interrupts that
-play with the spinlock is that you can get deadlocks:
-
-	spin_lock(&lock);
-	...
-		<- interrupt comes in:
-			spin_lock(&lock);
-
-where an interrupt tries to lock an already locked variable. This is ok if
-the other interrupt happens on another CPU, but it is _not_ ok if the
-interrupt happens on the same CPU that already holds the lock, because the
-lock will obviously never be released (because the interrupt is waiting
-for the lock, and the lock-holder is interrupted by the interrupt and will
-not continue until the interrupt has been processed). 
-
-(This is also the reason why the irq-versions of the spinlocks only need
-to disable the _local_ interrupts - it's ok to use spinlocks in interrupts
-on other CPU's, because an interrupt on another CPU doesn't interrupt the
-CPU that holds the lock, so the lock-holder can continue and eventually
-releases the lock). 
-
-Note that you can be clever with read-write locks and interrupts. For
-example, if you know that the interrupt only ever gets a read-lock, then
-you can use a non-irq version of read locks everywhere - because they
-don't block on each other (and thus there is no dead-lock wrt interrupts. 
-But when you do the write-lock, you have to use the irq-safe version. 
-
-For an example of being clever with rw-locks, see the "waitqueue_lock" 
-handling in kernel/sched/core.c - nothing ever _changes_ a wait-queue from
-within an interrupt, they only read the queue in order to know whom to
-wake up. So read-locks are safe (which is good: they are very common
-indeed), while write-locks need to protect themselves against interrupts.
-
-		Linus
-
-----
-
-Reference information:
-
-For dynamic initialization, use spin_lock_init() or rwlock_init() as
-appropriate:
-
-   spinlock_t xxx_lock;
-   rwlock_t xxx_rw_lock;
-
-   static int __init xxx_init(void)
-   {
-	spin_lock_init(&xxx_lock);
-	rwlock_init(&xxx_rw_lock);
-	...
-   }
-
-   module_init(xxx_init);
-
-For static initialization, use DEFINE_SPINLOCK() / DEFINE_RWLOCK() or
-__SPIN_LOCK_UNLOCKED() / __RW_LOCK_UNLOCKED() as appropriate.
diff --git a/Documentation/ww-mutex-design.txt b/Documentation/ww-mutex-design.txt
deleted file mode 100644
index 8a112dc304c3..000000000000
--- a/Documentation/ww-mutex-design.txt
+++ /dev/null
@@ -1,344 +0,0 @@
-Wait/Wound Deadlock-Proof Mutex Design
-======================================
-
-Please read mutex-design.txt first, as it applies to wait/wound mutexes too.
-
-Motivation for WW-Mutexes
--------------------------
-
-GPU's do operations that commonly involve many buffers.  Those buffers
-can be shared across contexts/processes, exist in different memory
-domains (for example VRAM vs system memory), and so on.  And with
-PRIME / dmabuf, they can even be shared across devices.  So there are
-a handful of situations where the driver needs to wait for buffers to
-become ready.  If you think about this in terms of waiting on a buffer
-mutex for it to become available, this presents a problem because
-there is no way to guarantee that buffers appear in a execbuf/batch in
-the same order in all contexts.  That is directly under control of
-userspace, and a result of the sequence of GL calls that an application
-makes.	Which results in the potential for deadlock.  The problem gets
-more complex when you consider that the kernel may need to migrate the
-buffer(s) into VRAM before the GPU operates on the buffer(s), which
-may in turn require evicting some other buffers (and you don't want to
-evict other buffers which are already queued up to the GPU), but for a
-simplified understanding of the problem you can ignore this.
-
-The algorithm that the TTM graphics subsystem came up with for dealing with
-this problem is quite simple.  For each group of buffers (execbuf) that need
-to be locked, the caller would be assigned a unique reservation id/ticket,
-from a global counter.  In case of deadlock while locking all the buffers
-associated with a execbuf, the one with the lowest reservation ticket (i.e.
-the oldest task) wins, and the one with the higher reservation id (i.e. the
-younger task) unlocks all of the buffers that it has already locked, and then
-tries again.
-
-In the RDBMS literature this deadlock handling approach is called wait/wound:
-The older tasks waits until it can acquire the contended lock. The younger tasks
-needs to back off and drop all the locks it is currently holding, i.e. the
-younger task is wounded.
-
-Concepts
---------
-
-Compared to normal mutexes two additional concepts/objects show up in the lock
-interface for w/w mutexes:
-
-Acquire context: To ensure eventual forward progress it is important the a task
-trying to acquire locks doesn't grab a new reservation id, but keeps the one it
-acquired when starting the lock acquisition. This ticket is stored in the
-acquire context. Furthermore the acquire context keeps track of debugging state
-to catch w/w mutex interface abuse.
-
-W/w class: In contrast to normal mutexes the lock class needs to be explicit for
-w/w mutexes, since it is required to initialize the acquire context.
-
-Furthermore there are three different class of w/w lock acquire functions:
-
-* Normal lock acquisition with a context, using ww_mutex_lock.
-
-* Slowpath lock acquisition on the contending lock, used by the wounded task
-  after having dropped all already acquired locks. These functions have the
-  _slow postfix.
-
-  From a simple semantics point-of-view the _slow functions are not strictly
-  required, since simply calling the normal ww_mutex_lock functions on the
-  contending lock (after having dropped all other already acquired locks) will
-  work correctly. After all if no other ww mutex has been acquired yet there's
-  no deadlock potential and hence the ww_mutex_lock call will block and not
-  prematurely return -EDEADLK. The advantage of the _slow functions is in
-  interface safety:
-  - ww_mutex_lock has a __must_check int return type, whereas ww_mutex_lock_slow
-    has a void return type. Note that since ww mutex code needs loops/retries
-    anyway the __must_check doesn't result in spurious warnings, even though the
-    very first lock operation can never fail.
-  - When full debugging is enabled ww_mutex_lock_slow checks that all acquired
-    ww mutex have been released (preventing deadlocks) and makes sure that we
-    block on the contending lock (preventing spinning through the -EDEADLK
-    slowpath until the contended lock can be acquired).
-
-* Functions to only acquire a single w/w mutex, which results in the exact same
-  semantics as a normal mutex. This is done by calling ww_mutex_lock with a NULL
-  context.
-
-  Again this is not strictly required. But often you only want to acquire a
-  single lock in which case it's pointless to set up an acquire context (and so
-  better to avoid grabbing a deadlock avoidance ticket).
-
-Of course, all the usual variants for handling wake-ups due to signals are also
-provided.
-
-Usage
------
-
-Three different ways to acquire locks within the same w/w class. Common
-definitions for methods #1 and #2:
-
-static DEFINE_WW_CLASS(ww_class);
-
-struct obj {
-	struct ww_mutex lock;
-	/* obj data */
-};
-
-struct obj_entry {
-	struct list_head head;
-	struct obj *obj;
-};
-
-Method 1, using a list in execbuf->buffers that's not allowed to be reordered.
-This is useful if a list of required objects is already tracked somewhere.
-Furthermore the lock helper can use propagate the -EALREADY return code back to
-the caller as a signal that an object is twice on the list. This is useful if
-the list is constructed from userspace input and the ABI requires userspace to
-not have duplicate entries (e.g. for a gpu commandbuffer submission ioctl).
-
-int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj *res_obj = NULL;
-	struct obj_entry *contended_entry = NULL;
-	struct obj_entry *entry;
-
-	ww_acquire_init(ctx, &ww_class);
-
-retry:
-	list_for_each_entry (entry, list, head) {
-		if (entry->obj == res_obj) {
-			res_obj = NULL;
-			continue;
-		}
-		ret = ww_mutex_lock(&entry->obj->lock, ctx);
-		if (ret < 0) {
-			contended_entry = entry;
-			goto err;
-		}
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-
-err:
-	list_for_each_entry_continue_reverse (entry, list, head)
-		ww_mutex_unlock(&entry->obj->lock);
-
-	if (res_obj)
-		ww_mutex_unlock(&res_obj->lock);
-
-	if (ret == -EDEADLK) {
-		/* we lost out in a seqno race, lock and retry.. */
-		ww_mutex_lock_slow(&contended_entry->obj->lock, ctx);
-		res_obj = contended_entry->obj;
-		goto retry;
-	}
-	ww_acquire_fini(ctx);
-
-	return ret;
-}
-
-Method 2, using a list in execbuf->buffers that can be reordered. Same semantics
-of duplicate entry detection using -EALREADY as method 1 above. But the
-list-reordering allows for a bit more idiomatic code.
-
-int lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj_entry *entry, *entry2;
-
-	ww_acquire_init(ctx, &ww_class);
-
-	list_for_each_entry (entry, list, head) {
-		ret = ww_mutex_lock(&entry->obj->lock, ctx);
-		if (ret < 0) {
-			entry2 = entry;
-
-			list_for_each_entry_continue_reverse (entry2, list, head)
-				ww_mutex_unlock(&entry2->obj->lock);
-
-			if (ret != -EDEADLK) {
-				ww_acquire_fini(ctx);
-				return ret;
-			}
-
-			/* we lost out in a seqno race, lock and retry.. */
-			ww_mutex_lock_slow(&entry->obj->lock, ctx);
-
-			/*
-			 * Move buf to head of the list, this will point
-			 * buf->next to the first unlocked entry,
-			 * restarting the for loop.
-			 */
-			list_del(&entry->head);
-			list_add(&entry->head, list);
-		}
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-}
-
-Unlocking works the same way for both methods #1 and #2:
-
-void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj_entry *entry;
-
-	list_for_each_entry (entry, list, head)
-		ww_mutex_unlock(&entry->obj->lock);
-
-	ww_acquire_fini(ctx);
-}
-
-Method 3 is useful if the list of objects is constructed ad-hoc and not upfront,
-e.g. when adjusting edges in a graph where each node has its own ww_mutex lock,
-and edges can only be changed when holding the locks of all involved nodes. w/w
-mutexes are a natural fit for such a case for two reasons:
-- They can handle lock-acquisition in any order which allows us to start walking
-  a graph from a starting point and then iteratively discovering new edges and
-  locking down the nodes those edges connect to.
-- Due to the -EALREADY return code signalling that a given objects is already
-  held there's no need for additional book-keeping to break cycles in the graph
-  or keep track off which looks are already held (when using more than one node
-  as a starting point).
-
-Note that this approach differs in two important ways from the above methods:
-- Since the list of objects is dynamically constructed (and might very well be
-  different when retrying due to hitting the -EDEADLK wound condition) there's
-  no need to keep any object on a persistent list when it's not locked. We can
-  therefore move the list_head into the object itself.
-- On the other hand the dynamic object list construction also means that the -EALREADY return
-  code can't be propagated.
-
-Note also that methods #1 and #2 and method #3 can be combined, e.g. to first lock a
-list of starting nodes (passed in from userspace) using one of the above
-methods. And then lock any additional objects affected by the operations using
-method #3 below. The backoff/retry procedure will be a bit more involved, since
-when the dynamic locking step hits -EDEADLK we also need to unlock all the
-objects acquired with the fixed list. But the w/w mutex debug checks will catch
-any interface misuse for these cases.
-
-Also, method 3 can't fail the lock acquisition step since it doesn't return
--EALREADY. Of course this would be different when using the _interruptible
-variants, but that's outside of the scope of these examples here.
-
-struct obj {
-	struct ww_mutex ww_mutex;
-	struct list_head locked_list;
-};
-
-static DEFINE_WW_CLASS(ww_class);
-
-void __unlock_objs(struct list_head *list)
-{
-	struct obj *entry, *temp;
-
-	list_for_each_entry_safe (entry, temp, list, locked_list) {
-		/* need to do that before unlocking, since only the current lock holder is
-		allowed to use object */
-		list_del(&entry->locked_list);
-		ww_mutex_unlock(entry->ww_mutex)
-	}
-}
-
-void lock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	struct obj *obj;
-
-	ww_acquire_init(ctx, &ww_class);
-
-retry:
-	/* re-init loop start state */
-	loop {
-		/* magic code which walks over a graph and decides which objects
-		 * to lock */
-
-		ret = ww_mutex_lock(obj->ww_mutex, ctx);
-		if (ret == -EALREADY) {
-			/* we have that one already, get to the next object */
-			continue;
-		}
-		if (ret == -EDEADLK) {
-			__unlock_objs(list);
-
-			ww_mutex_lock_slow(obj, ctx);
-			list_add(&entry->locked_list, list);
-			goto retry;
-		}
-
-		/* locked a new object, add it to the list */
-		list_add_tail(&entry->locked_list, list);
-	}
-
-	ww_acquire_done(ctx);
-	return 0;
-}
-
-void unlock_objs(struct list_head *list, struct ww_acquire_ctx *ctx)
-{
-	__unlock_objs(list);
-	ww_acquire_fini(ctx);
-}
-
-Method 4: Only lock one single objects. In that case deadlock detection and
-prevention is obviously overkill, since with grabbing just one lock you can't
-produce a deadlock within just one class. To simplify this case the w/w mutex
-api can be used with a NULL context.
-
-Implementation Details
-----------------------
-
-Design:
-  ww_mutex currently encapsulates a struct mutex, this means no extra overhead for
-  normal mutex locks, which are far more common. As such there is only a small
-  increase in code size if wait/wound mutexes are not used.
-
-  In general, not much contention is expected. The locks are typically used to
-  serialize access to resources for devices. The only way to make wakeups
-  smarter would be at the cost of adding a field to struct mutex_waiter. This
-  would add overhead to all cases where normal mutexes are used, and
-  ww_mutexes are generally less performance sensitive.
-
-Lockdep:
-  Special care has been taken to warn for as many cases of api abuse
-  as possible. Some common api abuses will be caught with
-  CONFIG_DEBUG_MUTEXES, but CONFIG_PROVE_LOCKING is recommended.
-
-  Some of the errors which will be warned about:
-   - Forgetting to call ww_acquire_fini or ww_acquire_init.
-   - Attempting to lock more mutexes after ww_acquire_done.
-   - Attempting to lock the wrong mutex after -EDEADLK and
-     unlocking all mutexes.
-   - Attempting to lock the right mutex after -EDEADLK,
-     before unlocking all mutexes.
-
-   - Calling ww_mutex_lock_slow before -EDEADLK was returned.
-
-   - Unlocking mutexes with the wrong unlock function.
-   - Calling one of the ww_acquire_* twice on the same context.
-   - Using a different ww_class for the mutex than for the ww_acquire_ctx.
-   - Normal lockdep errors that can result in deadlocks.
-
-  Some of the lockdep errors that can result in deadlocks:
-   - Calling ww_acquire_init to initialize a second ww_acquire_ctx before
-     having called ww_acquire_fini on the first.
-   - 'normal' deadlocks that can occur.
-
-FIXME: Update this section once we have the TASK_DEADLOCK task state flag magic
-implemented.
diff --git a/MAINTAINERS b/MAINTAINERS
index 1acc624ecfd7..aac481fcbf5c 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5523,8 +5523,8 @@ M:	Ingo Molnar <mingo@redhat.com>
 L:	linux-kernel@vger.kernel.org
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git core/locking
 S:	Maintained
-F:	Documentation/lockdep*.txt
-F:	Documentation/lockstat.txt
+F:	Documentation/locking/lockdep*.txt
+F:	Documentation/locking/lockstat.txt
 F:	include/linux/lockdep.h
 F:	kernel/locking/
 
diff --git a/drivers/gpu/drm/drm_modeset_lock.c b/drivers/gpu/drm/drm_modeset_lock.c
index 0dc57d5ecd10..3a02e5e3e9f3 100644
--- a/drivers/gpu/drm/drm_modeset_lock.c
+++ b/drivers/gpu/drm/drm_modeset_lock.c
@@ -35,7 +35,7 @@
  * of extra utility/tracking out of our acquire-ctx.  This is provided
  * by drm_modeset_lock / drm_modeset_acquire_ctx.
  *
- * For basic principles of ww_mutex, see: Documentation/ww-mutex-design.txt
+ * For basic principles of ww_mutex, see: Documentation/locking/ww-mutex-design.txt
  *
  * The basic usage pattern is to:
  *
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f920d7..f388481201cd 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -4,7 +4,7 @@
  *  Copyright (C) 2006,2007 Red Hat, Inc., Ingo Molnar <mingo@redhat.com>
  *  Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra <pzijlstr@redhat.com>
  *
- * see Documentation/lockdep-design.txt for more details.
+ * see Documentation/locking/lockdep-design.txt for more details.
  */
 #ifndef __LINUX_LOCKDEP_H
 #define __LINUX_LOCKDEP_H
diff --git a/include/linux/mutex.h b/include/linux/mutex.h
index e4c29418f407..cc31498fc526 100644
--- a/include/linux/mutex.h
+++ b/include/linux/mutex.h
@@ -133,7 +133,7 @@ static inline int mutex_is_locked(struct mutex *lock)
 
 /*
  * See kernel/locking/mutex.c for detailed documentation of these APIs.
- * Also see Documentation/mutex-design.txt.
+ * Also see Documentation/locking/mutex-design.txt.
  */
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 extern void mutex_lock_nested(struct mutex *lock, unsigned int subclass);
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 035d3c57fc8a..8f498cdde280 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -149,7 +149,7 @@ extern void downgrade_write(struct rw_semaphore *sem);
  * static then another method for expressing nested locking is
  * the explicit definition of lock class keys and the use of
  * lockdep_set_class() at lock initialization time.
- * See Documentation/lockdep-design.txt for more details.)
+ * See Documentation/locking/lockdep-design.txt for more details.)
  */
 extern void down_read_nested(struct rw_semaphore *sem, int subclass);
 extern void down_write_nested(struct rw_semaphore *sem, int subclass);
diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c
index 0d8b6ed93874..dadbf88c22c4 100644
--- a/kernel/locking/mutex.c
+++ b/kernel/locking/mutex.c
@@ -15,7 +15,7 @@
  *    by Steven Rostedt, based on work by Gregory Haskins, Peter Morreale
  *    and Sven Dietrich.
  *
- * Also see Documentation/mutex-design.txt.
+ * Also see Documentation/locking/mutex-design.txt.
  */
 #include <linux/mutex.h>
 #include <linux/ww_mutex.h>
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index a0ea2a141b3b..7c98873a3077 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -8,7 +8,7 @@
  *  Copyright (C) 2005 Kihon Technologies Inc., Steven Rostedt
  *  Copyright (C) 2006 Esben Nielsen
  *
- *  See Documentation/rt-mutex-design.txt for details.
+ *  See Documentation/locking/rt-mutex-design.txt for details.
  */
 #include <linux/spinlock.h>
 #include <linux/export.h>
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 901096d31c66..9b94a063e26c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -924,7 +924,7 @@ config PROVE_LOCKING
 	 the proof of observed correctness is also maintained for an
 	 arbitrary combination of these separate locking variants.
 
-	 For more details, see Documentation/lockdep-design.txt.
+	 For more details, see Documentation/locking/lockdep-design.txt.
 
 config LOCKDEP
 	bool
@@ -945,7 +945,7 @@ config LOCK_STAT
 	help
 	 This feature enables tracking lock contention points
 
-	 For more details, see Documentation/lockstat.txt
+	 For more details, see Documentation/locking/lockstat.txt
 
 	 This also enables lock events required by "perf lock",
 	 subcommand of perf.
-- 
cgit v1.2.3


From 4999201a59ef555f9105d2bb2459ed895627f7aa Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Fri, 8 Aug 2014 12:35:36 +0200
Subject: locking/spinlocks: Always evaluate the second argument of
 spin_lock_nested()

Evaluating a macro argument only if certain configuration options
have been selected is confusing and error-prone. Hence always
evaluate the second argument of spin_lock_nested().

An intentional side effect of this patch is that it avoids that
the following warning is reported for netif_addr_lock_nested()
when building with CONFIG_DEBUG_LOCK_ALLOC=n and with W=1:

  include/linux/netdevice.h: In function 'netif_addr_lock_nested':
  include/linux/netdevice.h:2865:6: warning: variable 'subclass' set but not used [-Wunused-but-set-variable]
    int subclass = SINGLE_DEPTH_NESTING;
        ^

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: David Rientjes <rientjes@google.com>
Cc: David S. Miller <davem@davemloft.net>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/53E4A7F8.1040700@acm.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/spinlock.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h
index 3f2867ff0ced..262ba4ef9a8e 100644
--- a/include/linux/spinlock.h
+++ b/include/linux/spinlock.h
@@ -197,7 +197,13 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock)
 		 _raw_spin_lock_nest_lock(lock, &(nest_lock)->dep_map);	\
 	 } while (0)
 #else
-# define raw_spin_lock_nested(lock, subclass)		_raw_spin_lock(lock)
+/*
+ * Always evaluate the 'subclass' argument to avoid that the compiler
+ * warns about set-but-not-used variables when building with
+ * CONFIG_DEBUG_LOCK_ALLOC=n and with W=1.
+ */
+# define raw_spin_lock_nested(lock, subclass)		\
+	_raw_spin_lock(((void)(subclass), (lock)))
 # define raw_spin_lock_nest_lock(lock, nest_lock)	_raw_spin_lock(lock)
 #endif
 
-- 
cgit v1.2.3


From f0bab73cb539fb803c4d419951e8d28aa4964f8f Mon Sep 17 00:00:00 2001
From: Waiman Long <Waiman.Long@hp.com>
Date: Wed, 6 Aug 2014 13:22:01 -0400
Subject: locking/lockdep: Restrict the use of recursive read_lock() with
 qrwlock

Unlike the original unfair rwlock implementation, queued rwlock
will grant lock according to the chronological sequence of the lock
requests except when the lock requester is in the interrupt context.
Consequently, recursive read_lock calls will now hang the process if
there is a write_lock call somewhere in between the read_lock calls.

This patch updates the lockdep implementation to look for recursive
read_lock calls. A new read state (3) is used to mark those read_lock
call that cannot be recursively called except in the interrupt
context. The new read state does exhaust the 2 bits available in
held_lock:read bit field. The addition of any new read state in the
future may require a redesign of how all those bits are squeezed
together in the held_lock structure.

Signed-off-by: Waiman Long <Waiman.Long@hp.com>
Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Maarten Lankhorst <maarten.lankhorst@canonical.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Scott J Norton <scott.norton@hp.com>
Cc: Fengguang Wu <fengguang.wu@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1407345722-61615-2-git-send-email-Waiman.Long@hp.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h  | 10 +++++++++-
 kernel/locking/lockdep.c |  6 ++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index f388481201cd..b5a84b62fb84 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -478,16 +478,24 @@ static inline void print_irqtrace_events(struct task_struct *curr)
  * on the per lock-class debug mode:
  */
 
+/*
+ * Read states in the 2-bit held_lock:read field:
+ *  0: Exclusive lock
+ *  1: Shareable lock, cannot be recursively called
+ *  2: Shareable lock, can be recursively called
+ *  3: Shareable lock, cannot be recursively called except in interrupt context
+ */
 #define lock_acquire_exclusive(l, s, t, n, i)		lock_acquire(l, s, t, 0, 1, n, i)
 #define lock_acquire_shared(l, s, t, n, i)		lock_acquire(l, s, t, 1, 1, n, i)
 #define lock_acquire_shared_recursive(l, s, t, n, i)	lock_acquire(l, s, t, 2, 1, n, i)
+#define lock_acquire_shared_irecursive(l, s, t, n, i)	lock_acquire(l, s, t, 3, 1, n, i)
 
 #define spin_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
 #define spin_acquire_nest(l, s, t, n, i)	lock_acquire_exclusive(l, s, t, n, i)
 #define spin_release(l, n, i)			lock_release(l, n, i)
 
 #define rwlock_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
-#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_recursive(l, s, t, NULL, i)
+#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_irecursive(l, s, t, NULL, i)
 #define rwlock_release(l, n, i)			lock_release(l, n, i)
 
 #define seqcount_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 88d0d4420ad2..420ba685c4e5 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3597,6 +3597,12 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	raw_local_irq_save(flags);
 	check_flags(flags);
 
+	/*
+	 * An interrupt recursive read in interrupt context can be considered
+	 * to be the same as a recursive read from checking perspective.
+	 */
+	if ((read == 3) && in_interrupt())
+		read = 2;
 	current->lockdep_recursion = 1;
 	trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
 	__lock_acquire(lock, subclass, trylock, read, check,
-- 
cgit v1.2.3


From 515d9b2c03943ca904cd135e1b1d9ddd168c1b27 Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Tue, 12 Aug 2014 18:22:27 +0200
Subject: ata: remove deprecated struct ahci_platform_data

The last user of the deprecated struct ahci_platform_data has been
cleaned up recently (SPEAr1340 got a proper PHY driver).

Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/ahci_platform.c    | 18 +-----------------
 drivers/ata/libahci_platform.c | 23 -----------------------
 include/linux/ahci_platform.h  | 13 -------------
 3 files changed, 1 insertion(+), 53 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/ahci_platform.c b/drivers/ata/ahci_platform.c
index f61ddb9146d6..06f1d59fa678 100644
--- a/drivers/ata/ahci_platform.c
+++ b/drivers/ata/ahci_platform.c
@@ -32,7 +32,6 @@ static const struct ata_port_info ahci_port_info = {
 static int ahci_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
-	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ahci_host_priv *hpriv;
 	int rc;
 
@@ -44,29 +43,14 @@ static int ahci_probe(struct platform_device *pdev)
 	if (rc)
 		return rc;
 
-	/*
-	 * Some platforms might need to prepare for mmio region access,
-	 * which could be done in the following init call. So, the mmio
-	 * region shouldn't be accessed before init (if provided) has
-	 * returned successfully.
-	 */
-	if (pdata && pdata->init) {
-		rc = pdata->init(dev, hpriv->mmio);
-		if (rc)
-			goto disable_resources;
-	}
-
 	if (of_device_is_compatible(dev->of_node, "hisilicon,hisi-ahci"))
 		hpriv->flags |= AHCI_HFLAG_NO_FBS | AHCI_HFLAG_NO_NCQ;
 
 	rc = ahci_platform_init_host(pdev, hpriv, &ahci_port_info);
 	if (rc)
-		goto pdata_exit;
+		goto disable_resources;
 
 	return 0;
-pdata_exit:
-	if (pdata && pdata->exit)
-		pdata->exit(dev);
 disable_resources:
 	ahci_platform_disable_resources(hpriv);
 	return rc;
diff --git a/drivers/ata/libahci_platform.c b/drivers/ata/libahci_platform.c
index 5b92c290e6c6..c0510de8a4c9 100644
--- a/drivers/ata/libahci_platform.c
+++ b/drivers/ata/libahci_platform.c
@@ -502,13 +502,8 @@ EXPORT_SYMBOL_GPL(ahci_platform_init_host);
 
 static void ahci_host_stop(struct ata_host *host)
 {
-	struct device *dev = host->dev;
-	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 
-	if (pdata && pdata->exit)
-		pdata->exit(dev);
-
 	ahci_platform_disable_resources(hpriv);
 }
 
@@ -592,7 +587,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_resume_host);
  */
 int ahci_platform_suspend(struct device *dev)
 {
-	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
@@ -601,19 +595,9 @@ int ahci_platform_suspend(struct device *dev)
 	if (rc)
 		return rc;
 
-	if (pdata && pdata->suspend) {
-		rc = pdata->suspend(dev);
-		if (rc)
-			goto resume_host;
-	}
-
 	ahci_platform_disable_resources(hpriv);
 
 	return 0;
-
-resume_host:
-	ahci_platform_resume_host(dev);
-	return rc;
 }
 EXPORT_SYMBOL_GPL(ahci_platform_suspend);
 
@@ -629,7 +613,6 @@ EXPORT_SYMBOL_GPL(ahci_platform_suspend);
  */
 int ahci_platform_resume(struct device *dev)
 {
-	struct ahci_platform_data *pdata = dev_get_platdata(dev);
 	struct ata_host *host = dev_get_drvdata(dev);
 	struct ahci_host_priv *hpriv = host->private_data;
 	int rc;
@@ -638,12 +621,6 @@ int ahci_platform_resume(struct device *dev)
 	if (rc)
 		return rc;
 
-	if (pdata && pdata->resume) {
-		rc = pdata->resume(dev);
-		if (rc)
-			goto disable_resources;
-	}
-
 	rc = ahci_platform_resume_host(dev);
 	if (rc)
 		goto disable_resources;
diff --git a/include/linux/ahci_platform.h b/include/linux/ahci_platform.h
index 09a947e8bc87..642d6ae4030c 100644
--- a/include/linux/ahci_platform.h
+++ b/include/linux/ahci_platform.h
@@ -22,19 +22,6 @@ struct ata_port_info;
 struct ahci_host_priv;
 struct platform_device;
 
-/*
- * Note ahci_platform_data is deprecated, it is only kept around for use
- * by the old da850 and spear13xx ahci code.
- * New drivers should instead declare their own platform_driver struct, and
- * use ahci_platform* functions in their own probe, suspend and resume methods.
- */
-struct ahci_platform_data {
-	int (*init)(struct device *dev, void __iomem *addr);
-	void (*exit)(struct device *dev);
-	int (*suspend)(struct device *dev);
-	int (*resume)(struct device *dev);
-};
-
 int ahci_platform_enable_clks(struct ahci_host_priv *hpriv);
 void ahci_platform_disable_clks(struct ahci_host_priv *hpriv);
 int ahci_platform_enable_resources(struct ahci_host_priv *hpriv);
-- 
cgit v1.2.3


From 005547e0828ce9064afebb1e6d56a18efd80e7a3 Mon Sep 17 00:00:00 2001
From: James Ban <james.ban.opensource@diasemi.com>
Date: Fri, 8 Aug 2014 14:27:04 +0900
Subject: regulator: da9211: support DA9213

This is a patch for supporting DA9213.

Signed-off-by: James Ban <james.ban.opensource@diasemi.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/Kconfig            |  9 ++--
 drivers/regulator/da9211-regulator.c | 95 ++++++++++++++++++++++++++++--------
 drivers/regulator/da9211-regulator.h |  7 ++-
 include/linux/regulator/da9211.h     |  7 ++-
 4 files changed, 91 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 2dc8289e5dba..1344aa83b438 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -199,13 +199,14 @@ config REGULATOR_DA9210
 	  interface.
 
 config REGULATOR_DA9211
-	tristate "Dialog Semiconductor DA9211/DA9212 regulator"
+	tristate "Dialog Semiconductor DA9211/DA9212/DA9213/DA9214 regulator"
 	depends on I2C
 	select REGMAP_I2C
 	help
-	  Say y here to support for the Dialog Semiconductor DA9211/DA9212.
-	  The DA9211/DA9212 is a multi-phase synchronous step down
-	  converter 12A DC-DC Buck controlled through an I2C
+	  Say y here to support for the Dialog Semiconductor DA9211/DA9212
+	  /DA9213/DA9214.
+	  The DA9211/DA9212/DA9213/DA9214 is a multi-phase synchronous
+	  step down converter 12A or 16A DC-DC Buck controlled through an I2C
 	  interface.
 
 config REGULATOR_DBX500_PRCMU
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
index 1482adafa1ad..ccc2e362d751 100644
--- a/drivers/regulator/da9211-regulator.c
+++ b/drivers/regulator/da9211-regulator.c
@@ -1,5 +1,5 @@
 /*
- * da9211-regulator.c - Regulator device driver for DA9211
+ * da9211-regulator.c - Regulator device driver for DA9211/DA9213
  * Copyright (C) 2014  Dialog Semiconductor Ltd.
  *
  * This library is free software; you can redistribute it and/or
@@ -27,6 +27,10 @@
 #include <linux/regulator/da9211.h>
 #include "da9211-regulator.h"
 
+/* DEVICE IDs */
+#define DA9211_DEVICE_ID	0x22
+#define DA9213_DEVICE_ID	0x23
+
 #define DA9211_BUCK_MODE_SLEEP	1
 #define DA9211_BUCK_MODE_SYNC	2
 #define DA9211_BUCK_MODE_AUTO	3
@@ -42,6 +46,7 @@ struct da9211 {
 	struct regulator_dev *rdev[DA9211_MAX_REGULATORS];
 	int num_regulator;
 	int chip_irq;
+	int chip_id;
 };
 
 static const struct regmap_range_cfg da9211_regmap_range[] = {
@@ -52,14 +57,14 @@ static const struct regmap_range_cfg da9211_regmap_range[] = {
 		.window_start = 0,
 		.window_len = 256,
 		.range_min = 0,
-		.range_max = 2*256,
+		.range_max = 5*128,
 	},
 };
 
 static const struct regmap_config da9211_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
-	.max_register = 2 * 256,
+	.max_register = 5 * 128,
 	.ranges = da9211_regmap_range,
 	.num_ranges = ARRAY_SIZE(da9211_regmap_range),
 };
@@ -69,11 +74,20 @@ static const struct regmap_config da9211_regmap_config = {
 #define DA9211_MAX_MV		1570
 #define DA9211_STEP_MV		10
 
-/* Current limits for buck (uA) indices corresponds with register values */
+/* Current limits for DA9211 buck (uA) indices
+ * corresponds with register values
+ */
 static const int da9211_current_limits[] = {
 	2000000, 2200000, 2400000, 2600000, 2800000, 3000000, 3200000, 3400000,
 	3600000, 3800000, 4000000, 4200000, 4400000, 4600000, 4800000, 5000000
 };
+/* Current limits for DA9213 buck (uA) indices
+ * corresponds with register values
+ */
+static const int da9213_current_limits[] = {
+	3000000, 3200000, 3400000, 3600000, 3800000, 4000000, 4200000, 4400000,
+	4600000, 4800000, 5000000, 5200000, 5400000, 5600000, 5800000, 6000000
+};
 
 static unsigned int da9211_buck_get_mode(struct regulator_dev *rdev)
 {
@@ -129,12 +143,26 @@ static int da9211_set_current_limit(struct regulator_dev *rdev, int min,
 {
 	int id = rdev_get_id(rdev);
 	struct da9211 *chip = rdev_get_drvdata(rdev);
-	int i;
+	int i, max_size;
+	const int *current_limits;
+
+	switch (chip->chip_id) {
+	case DA9211:
+		current_limits = da9211_current_limits;
+		max_size = ARRAY_SIZE(da9211_current_limits)-1;
+		break;
+	case DA9213:
+		current_limits = da9213_current_limits;
+		max_size = ARRAY_SIZE(da9213_current_limits)-1;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	/* search for closest to maximum */
-	for (i = ARRAY_SIZE(da9211_current_limits)-1; i >= 0; i--) {
-		if (min <= da9211_current_limits[i] &&
-		    max >= da9211_current_limits[i]) {
+	for (i = max_size; i >= 0; i--) {
+		if (min <= current_limits[i] &&
+		    max >= current_limits[i]) {
 				return regmap_update_bits(chip->regmap,
 					DA9211_REG_BUCK_ILIM,
 					(0x0F << id*4), (i << id*4));
@@ -150,14 +178,28 @@ static int da9211_get_current_limit(struct regulator_dev *rdev)
 	struct da9211 *chip = rdev_get_drvdata(rdev);
 	unsigned int data;
 	int ret;
+	const int *current_limits;
+
+	switch (chip->chip_id) {
+	case DA9211:
+		current_limits = da9211_current_limits;
+		break;
+	case DA9213:
+		current_limits = da9213_current_limits;
+		break;
+	default:
+		return -EINVAL;
+	}
 
 	ret = regmap_read(chip->regmap, DA9211_REG_BUCK_ILIM, &data);
 	if (ret < 0)
 		return ret;
 
-	/* select one of 16 values: 0000 (2000mA) to 1111 (5000mA) */
+	/* select one of 16 values: 0000 (2000mA or 3000mA)
+	 * to 1111 (5000mA or 6000mA).
+	 */
 	data = (data >> id*4) & 0x0F;
-	return da9211_current_limits[data];
+	return current_limits[data];
 }
 
 static struct regulator_ops da9211_buck_ops = {
@@ -264,10 +306,7 @@ static int da9211_regulator_init(struct da9211 *chip)
 	}
 
 	for (i = 0; i < chip->num_regulator; i++) {
-		if (chip->pdata)
-			config.init_data =
-				&(chip->pdata->init_data[i]);
-
+		config.init_data = &(chip->pdata->init_data[i]);
 		config.dev = chip->dev;
 		config.driver_data = chip;
 		config.regmap = chip->regmap;
@@ -301,6 +340,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 {
 	struct da9211 *chip;
 	int error, ret;
+	unsigned int data;
 
 	chip = devm_kzalloc(&i2c->dev, sizeof(struct da9211), GFP_KERNEL);
 
@@ -308,7 +348,7 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 	chip->regmap = devm_regmap_init_i2c(i2c, &da9211_regmap_config);
 	if (IS_ERR(chip->regmap)) {
 		error = PTR_ERR(chip->regmap);
-		dev_err(&i2c->dev, "Failed to allocate register map: %d\n",
+		dev_err(chip->dev, "Failed to allocate register map: %d\n",
 			error);
 		return error;
 	}
@@ -316,8 +356,22 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 	i2c_set_clientdata(i2c, chip);
 
 	chip->pdata = i2c->dev.platform_data;
-	if (!chip->pdata) {
-		dev_err(&i2c->dev, "No platform init data supplied\n");
+
+	ret = regmap_read(chip->regmap, DA9211_REG_DEVICE_ID, &data);
+	if (ret < 0) {
+		dev_err(chip->dev, "Failed to read DEVICE_ID reg: %d\n", ret);
+		return ret;
+	}
+
+	switch (data) {
+	case DA9211_DEVICE_ID:
+		chip->chip_id = DA9211;
+		break;
+	case DA9213_DEVICE_ID:
+		chip->chip_id = DA9213;
+		break;
+	default:
+		dev_err(chip->dev, "Unsupported device id = 0x%x.\n", data);
 		return -ENODEV;
 	}
 
@@ -340,13 +394,14 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 	ret = da9211_regulator_init(chip);
 
 	if (ret < 0)
-		dev_err(&i2c->dev, "Failed to initialize regulator: %d\n", ret);
+		dev_err(chip->dev, "Failed to initialize regulator: %d\n", ret);
 
 	return ret;
 }
 
 static const struct i2c_device_id da9211_i2c_id[] = {
-	{"da9211", 0},
+	{"da9211", DA9211},
+	{"da9213", DA9213},
 	{},
 };
 
@@ -364,5 +419,5 @@ static struct i2c_driver da9211_regulator_driver = {
 module_i2c_driver(da9211_regulator_driver);
 
 MODULE_AUTHOR("James Ban <James.Ban.opensource@diasemi.com>");
-MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211");
+MODULE_DESCRIPTION("Regulator device driver for Dialog DA9211/DA9213");
 MODULE_LICENSE("GPL v2");
diff --git a/drivers/regulator/da9211-regulator.h b/drivers/regulator/da9211-regulator.h
index 88b1769e8058..93fa9df2721c 100644
--- a/drivers/regulator/da9211-regulator.h
+++ b/drivers/regulator/da9211-regulator.h
@@ -1,5 +1,5 @@
 /*
- * da9211-regulator.h - Regulator definitions for DA9211
+ * da9211-regulator.h - Regulator definitions for DA9211/DA9213
  * Copyright (C) 2014  Dialog Semiconductor Ltd.
  *
  * This library is free software; you can redistribute it and/or
@@ -53,12 +53,15 @@
 /* BUCK Phase Selection*/
 #define DA9211_REG_CONFIG_E			0x147
 
+/* Device ID */
+#define	DA9211_REG_DEVICE_ID			0x201
+
 /*
  * Registers bits
  */
 /* DA9211_REG_PAGE_CON (addr=0x00) */
 #define	DA9211_REG_PAGE_SHIFT			1
-#define	DA9211_REG_PAGE_MASK			0x02
+#define	DA9211_REG_PAGE_MASK			0x06
 /* On I2C registers 0x00 - 0xFF */
 #define	DA9211_REG_PAGE0			0
 /* On I2C registers 0x100 - 0x1FF */
diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h
index 0981ce0e72cc..658c3c33f4c0 100644
--- a/include/linux/regulator/da9211.h
+++ b/include/linux/regulator/da9211.h
@@ -1,5 +1,5 @@
 /*
- * da9211.h - Regulator device driver for DA9211
+ * da9211.h - Regulator device driver for DA9211/DA9213
  * Copyright (C) 2014  Dialog Semiconductor Ltd.
  *
  * This library is free software; you can redistribute it and/or
@@ -20,6 +20,11 @@
 
 #define DA9211_MAX_REGULATORS	2
 
+enum da9211_chip_id {
+	DA9211,
+	DA9213,
+};
+
 struct da9211_pdata {
 	/*
 	 * Number of buck
-- 
cgit v1.2.3


From 0e4f417857083f399769491f6e7773d111debd0f Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.daniel@samsung.com>
Date: Tue, 15 Jul 2014 16:32:51 +0530
Subject: regulator: s2mpxxx: Move regulator min/step voltages in common place

This is a cleanup patch and moves min/step voltages in a common samsung
header file so that they can be used by other s2mpxxx PMIC drivers. Only
few required macros are added currently and others can be added if needed.

Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/s2mpa01.c         | 32 ++++++++++++------------
 drivers/regulator/s2mps11.c         | 50 ++++++++++++++++++-------------------
 include/linux/mfd/samsung/core.h    | 21 ++++++++++++++++
 include/linux/mfd/samsung/s2mpa01.h | 12 ---------
 include/linux/mfd/samsung/s2mps11.h |  9 -------
 include/linux/mfd/samsung/s2mps14.h | 10 --------
 6 files changed, 62 insertions(+), 72 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/s2mpa01.c b/drivers/regulator/s2mpa01.c
index ee83b4876420..962c5f192f7c 100644
--- a/drivers/regulator/s2mpa01.c
+++ b/drivers/regulator/s2mpa01.c
@@ -241,8 +241,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPA01_LDO_MIN,		\
-	.uV_step	= S2MPA01_LDO_STEP1,		\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_50_MV,			\
 	.n_voltages	= S2MPA01_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPA01_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPA01_LDO_VSEL_MASK,	\
@@ -255,8 +255,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPA01_LDO_MIN,		\
-	.uV_step	= S2MPA01_LDO_STEP2,		\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_25_MV,			\
 	.n_voltages	= S2MPA01_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPA01_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPA01_LDO_VSEL_MASK,	\
@@ -270,8 +270,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN1,			\
-	.uV_step	= S2MPA01_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B1CTRL2 + (num - 1) * 2,	\
@@ -286,8 +286,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN2,			\
-	.uV_step	= S2MPA01_BUCK_STEP1,			\
+	.min_uV		= MIN_800_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B5CTRL2,			\
@@ -302,8 +302,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN1,			\
-	.uV_step	= S2MPA01_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B6CTRL2 + (num - 6) * 2,	\
@@ -318,8 +318,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN2,			\
-	.uV_step	= S2MPA01_BUCK_STEP2,			\
+	.min_uV		= MIN_800_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B8CTRL2,			\
@@ -334,8 +334,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN4,			\
-	.uV_step	= S2MPA01_BUCK_STEP2,			\
+	.min_uV		= MIN_1500_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B9CTRL2,			\
@@ -350,8 +350,8 @@ static struct regulator_ops s2mpa01_buck_ops = {
 	.ops		= &s2mpa01_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPA01_BUCK_MIN3,			\
-	.uV_step	= S2MPA01_BUCK_STEP2,			\
+	.min_uV		= MIN_1000_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPA01_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPA01_RAMP_DELAY,			\
 	.vsel_reg	= S2MPA01_REG_B10CTRL2,			\
diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index b16c53a8272f..3dede776a837 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -255,14 +255,14 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.set_ramp_delay		= s2mps11_set_ramp_delay,
 };
 
-#define regulator_desc_s2mps11_ldo1(num)	{		\
+#define regulator_desc_s2mps11_ldo1(num)	{	\
 	.name		= "LDO"#num,			\
 	.id		= S2MPS11_LDO##num,		\
 	.ops		= &s2mps11_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS11_LDO_MIN,		\
-	.uV_step	= S2MPS11_LDO_STEP1,		\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_50_MV,			\
 	.n_voltages	= S2MPS11_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS11_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS11_LDO_VSEL_MASK,	\
@@ -275,8 +275,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_ldo_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS11_LDO_MIN,		\
-	.uV_step	= S2MPS11_LDO_STEP2,		\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_25_MV,			\
 	.n_voltages	= S2MPS11_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS11_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS11_LDO_VSEL_MASK,	\
@@ -290,8 +290,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN1,			\
-	.uV_step	= S2MPS11_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B1CTRL2 + (num - 1) * 2,	\
@@ -306,8 +306,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN1,			\
-	.uV_step	= S2MPS11_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B5CTRL2,			\
@@ -322,8 +322,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN1,			\
-	.uV_step	= S2MPS11_BUCK_STEP1,			\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B6CTRL2 + (num - 6) * 2,	\
@@ -338,8 +338,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN3,			\
-	.uV_step	= S2MPS11_BUCK_STEP3,			\
+	.min_uV		= MIN_3000_MV,				\
+	.uV_step	= STEP_25_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B9CTRL2,			\
@@ -354,8 +354,8 @@ static struct regulator_ops s2mps11_buck_ops = {
 	.ops		= &s2mps11_buck_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS11_BUCK_MIN2,			\
-	.uV_step	= S2MPS11_BUCK_STEP2,			\
+	.min_uV		= MIN_750_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPS11_BUCK_N_VOLTAGES,		\
 	.ramp_delay	= S2MPS11_RAMP_DELAY,			\
 	.vsel_reg	= S2MPS11_REG_B10CTRL2,			\
@@ -516,8 +516,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS14_LDO_MIN_800MV,	\
-	.uV_step	= S2MPS14_LDO_STEP_25MV,	\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_25_MV,			\
 	.n_voltages	= S2MPS14_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS14_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS14_LDO_VSEL_MASK,	\
@@ -530,8 +530,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS14_LDO_MIN_1800MV,	\
-	.uV_step	= S2MPS14_LDO_STEP_25MV,	\
+	.min_uV		= MIN_1800_MV,			\
+	.uV_step	= STEP_25_MV,			\
 	.n_voltages	= S2MPS14_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS14_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS14_LDO_VSEL_MASK,	\
@@ -544,8 +544,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,		\
 	.type		= REGULATOR_VOLTAGE,		\
 	.owner		= THIS_MODULE,			\
-	.min_uV		= S2MPS14_LDO_MIN_800MV,	\
-	.uV_step	= S2MPS14_LDO_STEP_12_5MV,	\
+	.min_uV		= MIN_800_MV,			\
+	.uV_step	= STEP_12_5_MV,			\
 	.n_voltages	= S2MPS14_LDO_N_VOLTAGES,	\
 	.vsel_reg	= S2MPS14_REG_L1CTRL + num - 1,	\
 	.vsel_mask	= S2MPS14_LDO_VSEL_MASK,	\
@@ -558,8 +558,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS14_BUCK1235_MIN_600MV,		\
-	.uV_step	= S2MPS14_BUCK1235_STEP_6_25MV,		\
+	.min_uV		= MIN_600_MV,				\
+	.uV_step	= STEP_6_25_MV,				\
 	.n_voltages	= S2MPS14_BUCK_N_VOLTAGES,		\
 	.linear_min_sel = S2MPS14_BUCK1235_START_SEL,		\
 	.ramp_delay	= S2MPS14_BUCK_RAMP_DELAY,		\
@@ -574,8 +574,8 @@ static struct regulator_ops s2mps14_reg_ops = {
 	.ops		= &s2mps14_reg_ops,			\
 	.type		= REGULATOR_VOLTAGE,			\
 	.owner		= THIS_MODULE,				\
-	.min_uV		= S2MPS14_BUCK4_MIN_1400MV,		\
-	.uV_step	= S2MPS14_BUCK4_STEP_12_5MV,		\
+	.min_uV		= MIN_1400_MV,				\
+	.uV_step	= STEP_12_5_MV,				\
 	.n_voltages	= S2MPS14_BUCK_N_VOLTAGES,		\
 	.linear_min_sel = S2MPS14_BUCK4_START_SEL,		\
 	.ramp_delay	= S2MPS14_BUCK_RAMP_DELAY,		\
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index b5f73de81aad..1825edacbda7 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -14,6 +14,27 @@
 #ifndef __LINUX_MFD_SEC_CORE_H
 #define __LINUX_MFD_SEC_CORE_H
 
+/* Macros to represent minimum voltages for LDO/BUCK */
+#define MIN_3000_MV		3000000
+#define MIN_2500_MV		2500000
+#define MIN_2000_MV		2000000
+#define MIN_1800_MV		1800000
+#define MIN_1500_MV		1500000
+#define MIN_1400_MV		1400000
+#define MIN_1000_MV		1000000
+
+#define MIN_900_MV		900000
+#define MIN_850_MV		850000
+#define MIN_800_MV		800000
+#define MIN_750_MV		750000
+#define MIN_600_MV		600000
+
+/* Macros to represent steps for LDO/BUCK */
+#define STEP_50_MV		50000
+#define STEP_25_MV		25000
+#define STEP_12_5_MV		12500
+#define STEP_6_25_MV		6250
+
 enum sec_device_type {
 	S5M8751X,
 	S5M8763X,
diff --git a/include/linux/mfd/samsung/s2mpa01.h b/include/linux/mfd/samsung/s2mpa01.h
index fbc63bc0d6a2..2766108bca2f 100644
--- a/include/linux/mfd/samsung/s2mpa01.h
+++ b/include/linux/mfd/samsung/s2mpa01.h
@@ -155,18 +155,6 @@ enum s2mpa01_regulators {
 	S2MPA01_REGULATOR_MAX,
 };
 
-#define S2MPA01_BUCK_MIN1	600000
-#define S2MPA01_BUCK_MIN2	800000
-#define S2MPA01_BUCK_MIN3	1000000
-#define S2MPA01_BUCK_MIN4	1500000
-#define S2MPA01_LDO_MIN		800000
-
-#define S2MPA01_BUCK_STEP1	6250
-#define S2MPA01_BUCK_STEP2	12500
-
-#define S2MPA01_LDO_STEP1	50000
-#define S2MPA01_LDO_STEP2	25000
-
 #define S2MPA01_LDO_VSEL_MASK	0x3F
 #define S2MPA01_BUCK_VSEL_MASK	0xFF
 #define S2MPA01_ENABLE_MASK	(0x03 << S2MPA01_ENABLE_SHIFT)
diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h
index b3ddf98dec37..7981a9d77d3f 100644
--- a/include/linux/mfd/samsung/s2mps11.h
+++ b/include/linux/mfd/samsung/s2mps11.h
@@ -171,15 +171,6 @@ enum s2mps11_regulators {
 	S2MPS11_REGULATOR_MAX,
 };
 
-#define S2MPS11_BUCK_MIN1	600000
-#define S2MPS11_BUCK_MIN2	750000
-#define S2MPS11_BUCK_MIN3	3000000
-#define S2MPS11_LDO_MIN	800000
-#define S2MPS11_BUCK_STEP1	6250
-#define S2MPS11_BUCK_STEP2	12500
-#define S2MPS11_BUCK_STEP3	25000
-#define S2MPS11_LDO_STEP1	50000
-#define S2MPS11_LDO_STEP2	25000
 #define S2MPS11_LDO_VSEL_MASK	0x3F
 #define S2MPS11_BUCK_VSEL_MASK	0xFF
 #define S2MPS11_ENABLE_MASK	(0x03 << S2MPS11_ENABLE_SHIFT)
diff --git a/include/linux/mfd/samsung/s2mps14.h b/include/linux/mfd/samsung/s2mps14.h
index 900cd7a04314..c92f4782afb5 100644
--- a/include/linux/mfd/samsung/s2mps14.h
+++ b/include/linux/mfd/samsung/s2mps14.h
@@ -123,10 +123,6 @@ enum s2mps14_regulators {
 };
 
 /* Regulator constraints for BUCKx */
-#define S2MPS14_BUCK1235_MIN_600MV	600000
-#define S2MPS14_BUCK4_MIN_1400MV	1400000
-#define S2MPS14_BUCK1235_STEP_6_25MV	6250
-#define S2MPS14_BUCK4_STEP_12_5MV	12500
 #define S2MPS14_BUCK1235_START_SEL	0x20
 #define S2MPS14_BUCK4_START_SEL		0x40
 /*
@@ -136,12 +132,6 @@ enum s2mps14_regulators {
  */
 #define S2MPS14_BUCK_RAMP_DELAY		12500
 
-/* Regulator constraints for different types of LDOx */
-#define S2MPS14_LDO_MIN_800MV		800000
-#define S2MPS14_LDO_MIN_1800MV		1800000
-#define S2MPS14_LDO_STEP_12_5MV		12500
-#define S2MPS14_LDO_STEP_25MV		25000
-
 #define S2MPS14_LDO_VSEL_MASK		0x3F
 #define S2MPS14_BUCK_VSEL_MASK		0xFF
 #define S2MPS14_ENABLE_MASK		(0x03 << S2MPS14_ENABLE_SHIFT)
-- 
cgit v1.2.3


From 272e2315fac3bfca0edfa3252b8a643c425602af Mon Sep 17 00:00:00 2001
From: Guodong Xu <guodong.xu@linaro.org>
Date: Wed, 13 Aug 2014 19:33:38 +0800
Subject: regulator: core: add const qualifier to ops in struct regulator_desc

struct regulator_ops *ops is a member in struct regulator_desc, which gets
its value from individual regulator driver upon regulator_register() and
is used by regulator core APIs. It's not allowed for regulator core to
modify any of these callbacks in *ops. Add 'const' qualifier to enforce that.

Signed-off-by: Guodong Xu <guodong.xu@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c         | 24 ++++++++++++------------
 include/linux/regulator/driver.h |  2 +-
 2 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a3c3785901f5..052e7f1f011d 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -839,7 +839,7 @@ static void print_constraints(struct regulator_dev *rdev)
 static int machine_constraints_voltage(struct regulator_dev *rdev,
 	struct regulation_constraints *constraints)
 {
-	struct regulator_ops *ops = rdev->desc->ops;
+	const struct regulator_ops *ops = rdev->desc->ops;
 	int ret;
 
 	/* do we need to apply the constraint voltage */
@@ -938,7 +938,7 @@ static int machine_constraints_voltage(struct regulator_dev *rdev,
 static int machine_constraints_current(struct regulator_dev *rdev,
 	struct regulation_constraints *constraints)
 {
-	struct regulator_ops *ops = rdev->desc->ops;
+	const struct regulator_ops *ops = rdev->desc->ops;
 	int ret;
 
 	if (!constraints->min_uA && !constraints->max_uA)
@@ -982,7 +982,7 @@ static int set_machine_constraints(struct regulator_dev *rdev,
 	const struct regulation_constraints *constraints)
 {
 	int ret = 0;
-	struct regulator_ops *ops = rdev->desc->ops;
+	const struct regulator_ops *ops = rdev->desc->ops;
 
 	if (constraints)
 		rdev->constraints = kmemdup(constraints, sizeof(*constraints),
@@ -2208,9 +2208,9 @@ EXPORT_SYMBOL_GPL(regulator_count_voltages);
  */
 int regulator_list_voltage(struct regulator *regulator, unsigned selector)
 {
-	struct regulator_dev	*rdev = regulator->rdev;
-	struct regulator_ops	*ops = rdev->desc->ops;
-	int			ret;
+	struct regulator_dev *rdev = regulator->rdev;
+	const struct regulator_ops *ops = rdev->desc->ops;
+	int ret;
 
 	if (rdev->desc->fixed_uV && rdev->desc->n_voltages == 1 && !selector)
 		return rdev->desc->fixed_uV;
@@ -2572,8 +2572,8 @@ EXPORT_SYMBOL_GPL(regulator_set_voltage);
 int regulator_set_voltage_time(struct regulator *regulator,
 			       int old_uV, int new_uV)
 {
-	struct regulator_dev	*rdev = regulator->rdev;
-	struct regulator_ops	*ops = rdev->desc->ops;
+	struct regulator_dev *rdev = regulator->rdev;
+	const struct regulator_ops *ops = rdev->desc->ops;
 	int old_sel = -1;
 	int new_sel = -1;
 	int voltage;
@@ -3336,9 +3336,9 @@ EXPORT_SYMBOL_GPL(regulator_mode_to_status);
  */
 static int add_regulator_attributes(struct regulator_dev *rdev)
 {
-	struct device		*dev = &rdev->dev;
-	struct regulator_ops	*ops = rdev->desc->ops;
-	int			status = 0;
+	struct device *dev = &rdev->dev;
+	const struct regulator_ops *ops = rdev->desc->ops;
+	int status = 0;
 
 	/* some attributes need specific methods to be displayed */
 	if ((ops->get_voltage && ops->get_voltage(rdev) >= 0) ||
@@ -3905,7 +3905,7 @@ core_initcall(regulator_init);
 static int __init regulator_init_complete(void)
 {
 	struct regulator_dev *rdev;
-	struct regulator_ops *ops;
+	const struct regulator_ops *ops;
 	struct regulation_constraints *c;
 	int enabled, ret;
 
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index bbe03a1924c0..4b628139a9cb 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -245,7 +245,7 @@ struct regulator_desc {
 	int id;
 	bool continuous_voltage_range;
 	unsigned n_voltages;
-	struct regulator_ops *ops;
+	const struct regulator_ops *ops;
 	int irq;
 	enum regulator_type type;
 	struct module *owner;
-- 
cgit v1.2.3


From 871f565055ed232e5751da18a331b73e8254adaf Mon Sep 17 00:00:00 2001
From: Guodong Xu <guodong.xu@linaro.org>
Date: Wed, 13 Aug 2014 19:33:40 +0800
Subject: regulator: core: add guard delay between calling regulator_disable
 and _enable

Some regulator require a minimum delay between its disable and next enable.
This is to avoid damages when out-of-range frequent disable/enable of a
single regulator can bring to the regulator chip.

Add @off_on_delay to struct regulator_desc. Device drivers' can use this field
to set this guard time.

Add @last_off_jiffy to struct regulator_dev. When @off_on_delay is set by
driver, regulator core can store its last off (disable) time into this field.

Signed-off-by: Guodong Xu <guodong.xu@linaro.org>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c         | 31 +++++++++++++++++++++++++++++++
 include/linux/regulator/driver.h |  6 ++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index dc0e9813b62d..1e976b6320a2 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1813,6 +1813,31 @@ static int _regulator_do_enable(struct regulator_dev *rdev)
 
 	trace_regulator_enable(rdev_get_name(rdev));
 
+	if (rdev->desc->off_on_delay) {
+		/* if needed, keep a distance of off_on_delay from last time
+		 * this regulator was disabled.
+		 */
+		unsigned long start_jiffy = jiffies;
+		unsigned long intended, max_delay, remaining;
+
+		max_delay = usecs_to_jiffies(rdev->desc->off_on_delay);
+		intended = rdev->last_off_jiffy + max_delay;
+
+		if (time_before(start_jiffy, intended)) {
+			/* calc remaining jiffies to deal with one-time
+			 * timer wrapping.
+			 * in case of multiple timer wrapping, either it can be
+			 * detected by out-of-range remaining, or it cannot be
+			 * detected and we gets a panelty of
+			 * _regulator_enable_delay().
+			 */
+			remaining = intended - start_jiffy;
+			if (remaining <= max_delay)
+				_regulator_enable_delay(
+						jiffies_to_usecs(remaining));
+		}
+	}
+
 	if (rdev->ena_pin) {
 		ret = regulator_ena_gpio_ctrl(rdev, true);
 		if (ret < 0)
@@ -1925,6 +1950,12 @@ static int _regulator_do_disable(struct regulator_dev *rdev)
 			return ret;
 	}
 
+	/* cares about last_off_jiffy only if off_on_delay is required by
+	 * device.
+	 */
+	if (rdev->desc->off_on_delay)
+		rdev->last_off_jiffy = jiffies;
+
 	trace_regulator_disable_complete(rdev_get_name(rdev));
 
 	return 0;
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 4b628139a9cb..efe058f8f746 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -238,6 +238,7 @@ enum regulator_type {
  * @bypass_val_off: Disabling value for control when using regmap set_bypass
  *
  * @enable_time: Time taken for initial enable of regulator (in uS).
+ * @off_on_delay: guard time (in uS), before re-enabling a regulator
  */
 struct regulator_desc {
 	const char *name;
@@ -276,6 +277,8 @@ struct regulator_desc {
 	unsigned int bypass_val_off;
 
 	unsigned int enable_time;
+
+	unsigned int off_on_delay;
 };
 
 /**
@@ -348,6 +351,9 @@ struct regulator_dev {
 
 	struct regulator_enable_gpio *ena_pin;
 	unsigned int ena_gpio_state:1;
+
+	/* time when this regulator was disabled last time */
+	unsigned long last_off_jiffy;
 };
 
 struct regulator_dev *
-- 
cgit v1.2.3


From 983c684466e02b21f83c025ea539deee6c0aeac0 Mon Sep 17 00:00:00 2001
From: Trond Myklebust <trond.myklebust@primarydata.com>
Date: Sun, 3 Aug 2014 13:03:10 -0400
Subject: SUNRPC: get rid of the request wait queue

We're always _only_ waking up tasks from within the sp_threads list, so
we know that they are enqueued and alive. The rq_wait waitqueue is just
a distraction with extra atomic semantics.

Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h |  1 -
 net/sunrpc/svc.c           |  2 --
 net/sunrpc/svc_xprt.c      | 32 +++++++++++++++++---------------
 3 files changed, 17 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index cf61ecd148e0..21678464883a 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -280,7 +280,6 @@ struct svc_rqst {
 	bool			rq_splice_ok;   /* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
-	wait_queue_head_t	rq_wait;	/* synchronization */
 	struct task_struct	*rq_task;	/* service thread */
 };
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 1db5007ddbce..ca8a7958f4e6 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -612,8 +612,6 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	if (!rqstp)
 		goto out_enomem;
 
-	init_waitqueue_head(&rqstp->rq_wait);
-
 	serv->sv_nrthreads++;
 	spin_lock_bh(&pool->sp_lock);
 	pool->sp_nrthreads++;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 08e49d1e17b3..faaf2b46273b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -348,8 +348,6 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 
 	cpu = get_cpu();
 	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
-	put_cpu();
-
 	spin_lock_bh(&pool->sp_lock);
 
 	if (!list_empty(&pool->sp_threads) &&
@@ -382,10 +380,15 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 			printk(KERN_ERR
 				"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
 				rqstp, rqstp->rq_xprt);
-		rqstp->rq_xprt = xprt;
+		/* Note the order of the following 3 lines:
+		 * We want to assign xprt to rqstp->rq_xprt only _after_
+		 * we've woken up the process, so that we don't race with
+		 * the lockless check in svc_get_next_xprt().
+		 */
 		svc_xprt_get(xprt);
+		wake_up_process(rqstp->rq_task);
+		rqstp->rq_xprt = xprt;
 		pool->sp_stats.threads_woken++;
-		wake_up(&rqstp->rq_wait);
 	} else {
 		dprintk("svc: transport %p put into queue\n", xprt);
 		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
@@ -394,6 +397,7 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 
 out_unlock:
 	spin_unlock_bh(&pool->sp_lock);
+	put_cpu();
 }
 
 /*
@@ -509,7 +513,7 @@ void svc_wake_up(struct svc_serv *serv)
 			svc_thread_dequeue(pool, rqstp);
 			rqstp->rq_xprt = NULL;
 			 */
-			wake_up(&rqstp->rq_wait);
+			wake_up_process(rqstp->rq_task);
 		} else
 			pool->sp_task_pending = 1;
 		spin_unlock_bh(&pool->sp_lock);
@@ -628,7 +632,6 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 {
 	struct svc_xprt *xprt;
 	struct svc_pool		*pool = rqstp->rq_pool;
-	DECLARE_WAITQUEUE(wait, current);
 	long			time_left;
 
 	/* Normally we will wait up to 5 seconds for any required
@@ -654,15 +657,15 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 			xprt = ERR_PTR(-EAGAIN);
 			goto out;
 		}
-		/* No data pending. Go to sleep */
-		svc_thread_enqueue(pool, rqstp);
-
 		/*
 		 * We have to be able to interrupt this wait
 		 * to bring down the daemons ...
 		 */
 		set_current_state(TASK_INTERRUPTIBLE);
 
+		/* No data pending. Go to sleep */
+		svc_thread_enqueue(pool, rqstp);
+
 		/*
 		 * checking kthread_should_stop() here allows us to avoid
 		 * locking and signalling when stopping kthreads that call
@@ -676,14 +679,13 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 			goto out;
 		}
 
-		add_wait_queue(&rqstp->rq_wait, &wait);
 		spin_unlock_bh(&pool->sp_lock);
 
 		time_left = schedule_timeout(timeout);
+		__set_current_state(TASK_RUNNING);
 
 		try_to_freeze();
 
-		remove_wait_queue(&rqstp->rq_wait, &wait);
 		xprt = rqstp->rq_xprt;
 		if (xprt != NULL)
 			return xprt;
@@ -786,10 +788,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 		printk(KERN_ERR
 			"svc_recv: service %p, transport not NULL!\n",
 			 rqstp);
-	if (waitqueue_active(&rqstp->rq_wait))
-		printk(KERN_ERR
-			"svc_recv: service %p, wait queue active!\n",
-			 rqstp);
+
+	/* Make sure the task pointer is set! */
+	if (WARN_ON_ONCE(!rqstp->rq_task))
+		rqstp->rq_task = current_task;
 
 	err = svc_alloc_arg(rqstp);
 	if (err)
-- 
cgit v1.2.3


From 716845ebeb505353d900320b4a74e8330520410d Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Mon, 18 Aug 2014 10:34:08 +0800
Subject: regulator: core: Fix build error due to const qualifier for ops

Drop const qualifier for ops of struct regulator_desc.
Allow regulator drivers to update ops before registering regulator.

Fix below build error:
  CC [M]  drivers/regulator/mc13892-regulator.o
drivers/regulator/mc13892-regulator.c: In function 'mc13892_regulator_probe':
drivers/regulator/mc13892-regulator.c:586:3: error: assignment of member 'set_mode' in read-only object
drivers/regulator/mc13892-regulator.c:588:3: error: assignment of member 'get_mode' in read-only object
make[2]: *** [drivers/regulator/mc13892-regulator.o] Error 1
make[1]: *** [drivers/regulator] Error 2
make: *** [drivers] Error 2

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index efe058f8f746..3abda7554d82 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -246,7 +246,7 @@ struct regulator_desc {
 	int id;
 	bool continuous_voltage_range;
 	unsigned n_voltages;
-	const struct regulator_ops *ops;
+	struct regulator_ops *ops;
 	int irq;
 	enum regulator_type type;
 	struct module *owner;
-- 
cgit v1.2.3


From e5f81539f657af7e9f54ea37986fde8f92acef22 Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@apm.com>
Date: Wed, 30 Jul 2014 14:56:58 -0700
Subject: irqchip: gic: Replace hex numbers with defines.

This is to cleanup some hex numbers used in the code and replace
them with defines to make the code cleaner.

Signed-off-by: Feng Kan <fkan@apm.com>
Reviewed-by: Anup Patel <apatel@apm.com>
Link: https://lkml.kernel.org/r/1406757419-18729-2-git-send-email-fkan@apm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-common.c | 15 +++++++++------
 drivers/irqchip/irq-gic.c        | 25 +++++++++++++------------
 include/linux/irqchip/arm-gic.h  | 15 +++++++++++++++
 3 files changed, 37 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-common.c b/drivers/irqchip/irq-gic-common.c
index 60ac704d2090..61541ff24397 100644
--- a/drivers/irqchip/irq-gic-common.c
+++ b/drivers/irqchip/irq-gic-common.c
@@ -74,20 +74,22 @@ void __init gic_dist_config(void __iomem *base, int gic_irqs,
 	 * Set all global interrupts to be level triggered, active low.
 	 */
 	for (i = 32; i < gic_irqs; i += 16)
-		writel_relaxed(0, base + GIC_DIST_CONFIG + i / 4);
+		writel_relaxed(GICD_INT_ACTLOW_LVLTRIG,
+					base + GIC_DIST_CONFIG + i / 4);
 
 	/*
 	 * Set priority on all global interrupts.
 	 */
 	for (i = 32; i < gic_irqs; i += 4)
-		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i);
+		writel_relaxed(GICD_INT_DEF_PRI_X4, base + GIC_DIST_PRI + i);
 
 	/*
 	 * Disable all interrupts.  Leave the PPI and SGIs alone
 	 * as they are enabled by redistributor registers.
 	 */
 	for (i = 32; i < gic_irqs; i += 32)
-		writel_relaxed(0xffffffff, base + GIC_DIST_ENABLE_CLEAR + i / 8);
+		writel_relaxed(GICD_INT_EN_CLR_X32,
+					base + GIC_DIST_ENABLE_CLEAR + i / 8);
 
 	if (sync_access)
 		sync_access();
@@ -101,14 +103,15 @@ void gic_cpu_config(void __iomem *base, void (*sync_access)(void))
 	 * Deal with the banked PPI and SGI interrupts - disable all
 	 * PPI interrupts, ensure all SGI interrupts are enabled.
 	 */
-	writel_relaxed(0xffff0000, base + GIC_DIST_ENABLE_CLEAR);
-	writel_relaxed(0x0000ffff, base + GIC_DIST_ENABLE_SET);
+	writel_relaxed(GICD_INT_EN_CLR_PPI, base + GIC_DIST_ENABLE_CLEAR);
+	writel_relaxed(GICD_INT_EN_SET_SGI, base + GIC_DIST_ENABLE_SET);
 
 	/*
 	 * Set priority on PPI and SGI interrupts
 	 */
 	for (i = 0; i < 32; i += 4)
-		writel_relaxed(0xa0a0a0a0, base + GIC_DIST_PRI + i * 4 / 4);
+		writel_relaxed(GICD_INT_DEF_PRI_X4,
+					base + GIC_DIST_PRI + i * 4 / 4);
 
 	if (sync_access)
 		sync_access();
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 4b959e606fe8..35847453cecb 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -298,8 +298,8 @@ static void gic_handle_cascade_irq(unsigned int irq, struct irq_desc *desc)
 	status = readl_relaxed(gic_data_cpu_base(chip_data) + GIC_CPU_INTACK);
 	raw_spin_unlock(&irq_controller_lock);
 
-	gic_irq = (status & 0x3ff);
-	if (gic_irq == 1023)
+	gic_irq = (status & GICC_IAR_INT_ID_MASK);
+	if (gic_irq == GICC_INT_SPURIOUS)
 		goto out;
 
 	cascade_irq = irq_find_mapping(chip_data->domain, gic_irq);
@@ -360,7 +360,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 	unsigned int gic_irqs = gic->gic_irqs;
 	void __iomem *base = gic_data_dist_base(gic);
 
-	writel_relaxed(0, base + GIC_DIST_CTRL);
+	writel_relaxed(GICD_DISABLE, base + GIC_DIST_CTRL);
 
 	/*
 	 * Set all global interrupts to this CPU only.
@@ -373,7 +373,7 @@ static void __init gic_dist_init(struct gic_chip_data *gic)
 
 	gic_dist_config(base, gic_irqs, NULL);
 
-	writel_relaxed(1, base + GIC_DIST_CTRL);
+	writel_relaxed(GICD_ENABLE, base + GIC_DIST_CTRL);
 }
 
 static void gic_cpu_init(struct gic_chip_data *gic)
@@ -400,8 +400,8 @@ static void gic_cpu_init(struct gic_chip_data *gic)
 
 	gic_cpu_config(dist_base, NULL);
 
-	writel_relaxed(0xf0, base + GIC_CPU_PRIMASK);
-	writel_relaxed(1, base + GIC_CPU_CTRL);
+	writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK);
+	writel_relaxed(GICC_ENABLE, base + GIC_CPU_CTRL);
 }
 
 void gic_cpu_if_down(void)
@@ -467,14 +467,14 @@ static void gic_dist_restore(unsigned int gic_nr)
 	if (!dist_base)
 		return;
 
-	writel_relaxed(0, dist_base + GIC_DIST_CTRL);
+	writel_relaxed(GICD_DISABLE, dist_base + GIC_DIST_CTRL);
 
 	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 16); i++)
 		writel_relaxed(gic_data[gic_nr].saved_spi_conf[i],
 			dist_base + GIC_DIST_CONFIG + i * 4);
 
 	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
-		writel_relaxed(0xa0a0a0a0,
+		writel_relaxed(GICD_INT_DEF_PRI_X4,
 			dist_base + GIC_DIST_PRI + i * 4);
 
 	for (i = 0; i < DIV_ROUND_UP(gic_irqs, 4); i++)
@@ -485,7 +485,7 @@ static void gic_dist_restore(unsigned int gic_nr)
 		writel_relaxed(gic_data[gic_nr].saved_spi_enable[i],
 			dist_base + GIC_DIST_ENABLE_SET + i * 4);
 
-	writel_relaxed(1, dist_base + GIC_DIST_CTRL);
+	writel_relaxed(GICD_ENABLE, dist_base + GIC_DIST_CTRL);
 }
 
 static void gic_cpu_save(unsigned int gic_nr)
@@ -539,10 +539,11 @@ static void gic_cpu_restore(unsigned int gic_nr)
 		writel_relaxed(ptr[i], dist_base + GIC_DIST_CONFIG + i * 4);
 
 	for (i = 0; i < DIV_ROUND_UP(32, 4); i++)
-		writel_relaxed(0xa0a0a0a0, dist_base + GIC_DIST_PRI + i * 4);
+		writel_relaxed(GICD_INT_DEF_PRI_X4,
+					dist_base + GIC_DIST_PRI + i * 4);
 
-	writel_relaxed(0xf0, cpu_base + GIC_CPU_PRIMASK);
-	writel_relaxed(1, cpu_base + GIC_CPU_CTRL);
+	writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK);
+	writel_relaxed(GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
 }
 
 static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 45e2d8c15bd2..5cb9d41af5be 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -21,7 +21,10 @@
 #define GIC_CPU_ACTIVEPRIO		0xd0
 #define GIC_CPU_IDENT			0xfc
 
+#define GICC_ENABLE			0x1
+#define GICC_INT_PRI_THRESHOLD		0xf0
 #define GICC_IAR_INT_ID_MASK		0x3ff
+#define GICC_INT_SPURIOUS		1023
 
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
@@ -39,6 +42,18 @@
 #define GIC_DIST_SGI_PENDING_CLEAR	0xf10
 #define GIC_DIST_SGI_PENDING_SET	0xf20
 
+#define GICD_ENABLE			0x1
+#define GICD_DISABLE			0x0
+#define GICD_INT_ACTLOW_LVLTRIG		0x0
+#define GICD_INT_EN_CLR_X32		0xffffffff
+#define GICD_INT_EN_SET_SGI		0x0000ffff
+#define GICD_INT_EN_CLR_PPI		0xffff0000
+#define GICD_INT_DEF_PRI		0xa0
+#define GICD_INT_DEF_PRI_X4		((GICD_INT_DEF_PRI << 24) |\
+					(GICD_INT_DEF_PRI << 16) |\
+					(GICD_INT_DEF_PRI << 8) |\
+					GICD_INT_DEF_PRI)
+
 #define GICH_HCR			0x0
 #define GICH_VTR			0x4
 #define GICH_VMCR			0x8
-- 
cgit v1.2.3


From 3228950621d92f0f212378f95a6998ef3a1be0bb Mon Sep 17 00:00:00 2001
From: Feng Kan <fkan@apm.com>
Date: Wed, 30 Jul 2014 14:56:59 -0700
Subject: irqchip: gic: Preserve gic V2 bypass bits in cpu ctrl register

This change is made to preserve the GIC v2 bypass bits in the
GIC_CPU_CTRL register (also known as the GICC_CTLR register in spec).
This code will preserve all bits configured by the bootloader regarding
v2 bypass group bits. In the X-Gene platform, the bypass functionality
is not used and bypass bits should not be changed by the kernel gic
code as it could lead to incorrect behavior.

Signed-off-by: Feng Kan <fkan@apm.com>
Reviewed-by: Vinayak Kale <vkale@apm.com>
Reviewed-by: Anup Patel <apatel@apm.com>
Link: https://lkml.kernel.org/r/1406757419-18729-3-git-send-email-fkan@apm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic.c       | 25 ++++++++++++++++++++++---
 include/linux/irqchip/arm-gic.h |  1 +
 2 files changed, 23 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index 35847453cecb..2500f6ba29e1 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -353,6 +353,21 @@ static u8 gic_get_cpumask(struct gic_chip_data *gic)
 	return mask;
 }
 
+static void gic_cpu_if_up(void)
+{
+	void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
+	u32 bypass = 0;
+
+	/*
+	* Preserve bypass disable bits to be written back later
+	*/
+	bypass = readl(cpu_base + GIC_CPU_CTRL);
+	bypass &= GICC_DIS_BYPASS_MASK;
+
+	writel_relaxed(bypass | GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+}
+
+
 static void __init gic_dist_init(struct gic_chip_data *gic)
 {
 	unsigned int i;
@@ -401,13 +416,17 @@ static void gic_cpu_init(struct gic_chip_data *gic)
 	gic_cpu_config(dist_base, NULL);
 
 	writel_relaxed(GICC_INT_PRI_THRESHOLD, base + GIC_CPU_PRIMASK);
-	writel_relaxed(GICC_ENABLE, base + GIC_CPU_CTRL);
+	gic_cpu_if_up();
 }
 
 void gic_cpu_if_down(void)
 {
 	void __iomem *cpu_base = gic_data_cpu_base(&gic_data[0]);
-	writel_relaxed(0, cpu_base + GIC_CPU_CTRL);
+	u32 val = 0;
+
+	val = readl(cpu_base + GIC_CPU_CTRL);
+	val &= ~GICC_ENABLE;
+	writel_relaxed(val, cpu_base + GIC_CPU_CTRL);
 }
 
 #ifdef CONFIG_CPU_PM
@@ -543,7 +562,7 @@ static void gic_cpu_restore(unsigned int gic_nr)
 					dist_base + GIC_DIST_PRI + i * 4);
 
 	writel_relaxed(GICC_INT_PRI_THRESHOLD, cpu_base + GIC_CPU_PRIMASK);
-	writel_relaxed(GICC_ENABLE, cpu_base + GIC_CPU_CTRL);
+	gic_cpu_if_up();
 }
 
 static int gic_notifier(struct notifier_block *self, unsigned long cmd,	void *v)
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 5cb9d41af5be..13eed92c7d24 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -25,6 +25,7 @@
 #define GICC_INT_PRI_THRESHOLD		0xf0
 #define GICC_IAR_INT_ID_MASK		0x3ff
 #define GICC_INT_SPURIOUS		1023
+#define GICC_DIS_BYPASS_MASK		0x1e0
 
 #define GIC_DIST_CTRL			0x000
 #define GIC_DIST_CTR			0x004
-- 
cgit v1.2.3


From 7b7d8982f0169d5ac67c6c2877449fb7f6968cac Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 27 Jul 2014 14:31:53 -0700
Subject: mtd: fix linux/mtd/nand.h kernel-doc warning

Fix kernel-doc warning in <linux/mtd/nand.h>:

Warning(..//include/linux/mtd/nand.h:795): No description found for parameter 'ecc'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc:	David Woodhouse <dwmw2@infradead.org>
Cc:	Brian Norris <computersforpeace@gmail.com>
Cc:	linux-mtd@lists.infradead.org
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 3083c53e0270..b7c11991cb09 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -766,6 +766,7 @@ struct nand_chip {
  * @options: stores various chip bit options
  * @id_len: The valid length of the @id.
  * @oobsize: OOB size
+ * @ecc: ECC correctability and step information from the datasheet.
  * @ecc.strength_ds: The ECC correctability from the datasheet, same as the
  *                   @ecc_strength_ds in nand_chip{}.
  * @ecc.step_ds: The ECC step required by the @ecc.strength_ds, same as the
-- 
cgit v1.2.3


From 31f754628cbb12c983600f22d9f0fed50dfe2134 Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Mon, 21 Jul 2014 19:07:22 -0700
Subject: mtd: use __packed shorthand

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/mtdswap.c        |  2 +-
 drivers/mtd/nand/sm_common.h |  2 +-
 include/linux/mtd/cfi.h      | 22 +++++++++++-----------
 3 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/mtdswap.c b/drivers/mtd/mtdswap.c
index 48cf6f98df44..fc8b3d16cce7 100644
--- a/drivers/mtd/mtdswap.c
+++ b/drivers/mtd/mtdswap.c
@@ -145,7 +145,7 @@ struct mtdswap_dev {
 struct mtdswap_oobdata {
 	__le16 magic;
 	__le32 count;
-} __attribute__((packed));
+} __packed;
 
 #define MTDSWAP_MAGIC_CLEAN	0x2095
 #define MTDSWAP_MAGIC_DIRTY	(MTDSWAP_MAGIC_CLEAN + 1)
diff --git a/drivers/mtd/nand/sm_common.h b/drivers/mtd/nand/sm_common.h
index 00f4a83359b2..d3e028e58b0f 100644
--- a/drivers/mtd/nand/sm_common.h
+++ b/drivers/mtd/nand/sm_common.h
@@ -18,7 +18,7 @@ struct sm_oob {
 	uint8_t ecc2[3];
 	uint8_t lba_copy2[2];
 	uint8_t ecc1[3];
-} __attribute__((packed));
+} __packed;
 
 
 /* one sector is always 512 bytes, but it can consist of two nand pages */
diff --git a/include/linux/mtd/cfi.h b/include/linux/mtd/cfi.h
index 37ef6b194089..299d7d31fe53 100644
--- a/include/linux/mtd/cfi.h
+++ b/include/linux/mtd/cfi.h
@@ -153,7 +153,7 @@ struct cfi_ident {
 	uint16_t MaxBufWriteSize;
 	uint8_t  NumEraseRegions;
 	uint32_t EraseRegionInfo[0]; /* Not host ordered */
-} __attribute__((packed));
+} __packed;
 
 /* Extended Query Structure for both PRI and ALT */
 
@@ -161,7 +161,7 @@ struct cfi_extquery {
 	uint8_t  pri[3];
 	uint8_t  MajorVersion;
 	uint8_t  MinorVersion;
-} __attribute__((packed));
+} __packed;
 
 /* Vendor-Specific PRI for Intel/Sharp Extended Command Set (0x0001) */
 
@@ -180,7 +180,7 @@ struct cfi_pri_intelext {
 	uint8_t  FactProtRegSize;
 	uint8_t  UserProtRegSize;
 	uint8_t  extra[0];
-} __attribute__((packed));
+} __packed;
 
 struct cfi_intelext_otpinfo {
 	uint32_t ProtRegAddr;
@@ -188,7 +188,7 @@ struct cfi_intelext_otpinfo {
 	uint8_t  FactProtRegSize;
 	uint16_t UserGroups;
 	uint8_t  UserProtRegSize;
-} __attribute__((packed));
+} __packed;
 
 struct cfi_intelext_blockinfo {
 	uint16_t NumIdentBlocks;
@@ -196,7 +196,7 @@ struct cfi_intelext_blockinfo {
 	uint16_t MinBlockEraseCycles;
 	uint8_t  BitsPerCell;
 	uint8_t  BlockCap;
-} __attribute__((packed));
+} __packed;
 
 struct cfi_intelext_regioninfo {
 	uint16_t NumIdentPartitions;
@@ -205,7 +205,7 @@ struct cfi_intelext_regioninfo {
 	uint8_t  NumOpAllowedSimEraMode;
 	uint8_t  NumBlockTypes;
 	struct cfi_intelext_blockinfo BlockTypes[1];
-} __attribute__((packed));
+} __packed;
 
 struct cfi_intelext_programming_regioninfo {
 	uint8_t  ProgRegShift;
@@ -214,7 +214,7 @@ struct cfi_intelext_programming_regioninfo {
 	uint8_t  Reserved2;
 	uint8_t  ControlInvalid;
 	uint8_t  Reserved3;
-} __attribute__((packed));
+} __packed;
 
 /* Vendor-Specific PRI for AMD/Fujitsu Extended Command Set (0x0002) */
 
@@ -233,7 +233,7 @@ struct cfi_pri_amdstd {
 	uint8_t  VppMin;
 	uint8_t  VppMax;
 	uint8_t  TopBottom;
-} __attribute__((packed));
+} __packed;
 
 /* Vendor-Specific PRI for Atmel chips (command set 0x0002) */
 
@@ -245,18 +245,18 @@ struct cfi_pri_atmel {
 	uint8_t BottomBoot;
 	uint8_t BurstMode;
 	uint8_t PageMode;
-} __attribute__((packed));
+} __packed;
 
 struct cfi_pri_query {
 	uint8_t  NumFields;
 	uint32_t ProtField[1]; /* Not host ordered */
-} __attribute__((packed));
+} __packed;
 
 struct cfi_bri_query {
 	uint8_t  PageModeReadCap;
 	uint8_t  NumFields;
 	uint32_t ConfField[1]; /* Not host ordered */
-} __attribute__((packed));
+} __packed;
 
 #define P_ID_NONE               0x0000
 #define P_ID_INTEL_EXT          0x0001
-- 
cgit v1.2.3


From df11e506d330d9a0e5a701cd2c5fcb7d461b6060 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Thu, 21 Aug 2014 10:11:34 +0800
Subject: regulator: core: Add back the const qualifier for ops of struct
 regulator_desc

Fix below build warning:
CC [M]  drivers/regulator/hi6421-regulator.o
drivers/regulator/hi6421-regulator.c:356:2: warning: initialization discards 'const' qualifier from pointer target type [enabled by default]

This is a revert of commit 716845ebeb50 ("regulator: core: Fix build error due
to const qualifier for ops"). The build error was fixed by commit 39f5460d7f9c
("regulator: core: add const to regulator_ops and fix build error in mc13892").

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 include/linux/regulator/driver.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index 3abda7554d82..efe058f8f746 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -246,7 +246,7 @@ struct regulator_desc {
 	int id;
 	bool continuous_voltage_range;
 	unsigned n_voltages;
-	struct regulator_ops *ops;
+	const struct regulator_ops *ops;
 	int irq;
 	enum regulator_type type;
 	struct module *owner;
-- 
cgit v1.2.3


From e790d9ef6405633b007339d746b709aed43a928d Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 21 Aug 2014 18:08:05 +0200
Subject: KVM: add kvm_arch_sched_in
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Introduce preempt notifiers for architecture specific code.
Advantage over creating a new notifier in every arch is slightly simpler
code and guaranteed call order with respect to kvm_sched_in.

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/kvm/arm.c         | 4 ++++
 arch/mips/kvm/mips.c       | 4 ++++
 arch/powerpc/kvm/powerpc.c | 4 ++++
 arch/s390/kvm/kvm-s390.c   | 4 ++++
 arch/x86/kvm/x86.c         | 4 ++++
 include/linux/kvm_host.h   | 2 ++
 virt/kvm/kvm_main.c        | 2 ++
 7 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index a99e0cdf8ba2..9f788ebac55b 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -288,6 +288,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	vcpu->cpu = cpu;
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index cd7114147ae7..2362df2a79f9 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -1002,6 +1002,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 {
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_vcpu_ioctl_translate(struct kvm_vcpu *vcpu,
 				  struct kvm_translation *tr)
 {
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 4c79284b58be..cbc432f4f0a6 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -720,6 +720,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	kvmppc_subarch_vcpu_uninit(vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 #ifdef CONFIG_BOOKE
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index ce81eb2ab76a..a3c324ec4370 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -555,6 +555,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 	/* Nothing todo */
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu)
 {
 	save_fp_ctl(&vcpu->arch.host_fpregs.fpc);
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index cd718c01cdf1..7d43dc7bb906 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7171,6 +7171,10 @@ void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu)
 		static_key_slow_dec(&kvm_no_apic_vcpu);
 }
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu)
+{
+}
+
 int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 {
 	if (type)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a4c33b34fe3f..ebd723676633 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -624,6 +624,8 @@ void kvm_arch_exit(void);
 int kvm_arch_vcpu_init(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu);
 
+void kvm_arch_sched_in(struct kvm_vcpu *vcpu, int cpu);
+
 void kvm_arch_vcpu_free(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
 void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 39b16035386f..5a0817ee996e 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -3124,6 +3124,8 @@ static void kvm_sched_in(struct preempt_notifier *pn, int cpu)
 	if (vcpu->preempted)
 		vcpu->preempted = false;
 
+	kvm_arch_sched_in(vcpu, cpu);
+
 	kvm_arch_vcpu_load(vcpu, cpu);
 }
 
-- 
cgit v1.2.3


From 8913dc0bb913ac3dc83ed5c10bac2f4e55431981 Mon Sep 17 00:00:00 2001
From: Paul Zimmerman <Paul.Zimmerman@synopsys.com>
Date: Thu, 21 Aug 2014 20:28:20 +0000
Subject: usb: gadget: document a usb_ep_dequeue() requirement

Document the requirement that the request be dequeued and its
completion routine called before usb_ep_dequeue() returns. Also
fix some capitalization issues in the existing text.

Signed-off-by: Paul Zimmerman <paulz@synopsys.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/gadget.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index c3a61853cd13..c540557b564b 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -345,12 +345,13 @@ static inline int usb_ep_queue(struct usb_ep *ep,
  * @ep:the endpoint associated with the request
  * @req:the request being canceled
  *
- * if the request is still active on the endpoint, it is dequeued and its
+ * If the request is still active on the endpoint, it is dequeued and its
  * completion routine is called (with status -ECONNRESET); else a negative
- * error code is returned.
+ * error code is returned. This is guaranteed to happen before the call to
+ * usb_ep_dequeue() returns.
  *
- * note that some hardware can't clear out write fifos (to unlink the request
- * at the head of the queue) except as part of disconnecting from usb.  such
+ * Note that some hardware can't clear out write fifos (to unlink the request
+ * at the head of the queue) except as part of disconnecting from usb. Such
  * restrictions prevent drivers from supporting configuration changes,
  * even to configuration zero (a "chapter 9" requirement).
  */
-- 
cgit v1.2.3


From d4261e5650004d6d51137553ea5433d5828562dc Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Tue, 19 Aug 2014 16:02:12 +0200
Subject: bonding: create netlink event when bonding option is changed

Userspace needs to be notified if one changes some option.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Veaceslav Falico <vfalico@gmail.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_options.c | 2 ++
 include/linux/netdevice.h          | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c
index dc73463c2c23..d8dc17faa6b4 100644
--- a/drivers/net/bonding/bond_options.c
+++ b/drivers/net/bonding/bond_options.c
@@ -625,6 +625,8 @@ int __bond_opt_set(struct bonding *bond,
 out:
 	if (ret)
 		bond_opt_error_interpret(bond, opt, ret, val);
+	else
+		call_netdevice_notifiers(NETDEV_CHANGEINFODATA, bond->dev);
 
 	return ret;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 38377392d082..7e2b0b8b5cd7 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1982,6 +1982,7 @@ struct pcpu_sw_netstats {
 #define NETDEV_CHANGEUPPER	0x0015
 #define NETDEV_RESEND_IGMP	0x0016
 #define NETDEV_PRECHANGEMTU	0x0017 /* notify before mtu change happened */
+#define NETDEV_CHANGEINFODATA	0x0018
 
 int register_netdevice_notifier(struct notifier_block *nb);
 int unregister_netdevice_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 989e04c5bc3ff77d65e1f0d87bf7904dfa30d41c Mon Sep 17 00:00:00 2001
From: Yuchung Cheng <ycheng@google.com>
Date: Fri, 22 Aug 2014 14:15:22 -0700
Subject: tcp: improve undo on timeout

Upon timeout, undo (via both timestamps/Eifel and DSACKs) was
disabled if any retransmits were still in flight.  The concern was
perhaps that spurious retransmission sent in a previous recovery
episode may trigger DSACKs to falsely undo the current recovery.

However, this inadvertently misses undo opportunities (using either
TCP timestamps or DSACKs) when timeout occurs during a loss episode,
i.e.  recurring timeouts or timeout during fast recovery. In these
cases some retransmissions will be in flight but we should allow
undo. Furthermore, we should only reset undo_marker and undo_retrans
upon timeout if we are starting a new recovery episode. Finally,
when we do reset our undo state, we now do so in a manner similar
to tcp_enter_recovery(), so that we require a DSACK for each of
the outstsanding retransmissions. This will achieve the original
goal by requiring that we receive the same number of DSACKs as
retransmissions.

This patch increases the undo events by 50% on Google servers.

Signed-off-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h  |  2 +-
 net/ipv4/tcp_input.c | 26 +++++++++++---------------
 2 files changed, 12 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index fa5258f322e7..e567f0dbf282 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -276,7 +276,7 @@ struct tcp_sock {
 	u32	retrans_stamp;	/* Timestamp of the last retransmit,
 				 * also used in SYN-SENT to remember stamp of
 				 * the first SYN. */
-	u32	undo_marker;	/* tracking retrans started here. */
+	u32	undo_marker;	/* snd_una upon a new recovery episode. */
 	int	undo_retrans;	/* number of undoable retransmissions. */
 	u32	total_retrans;	/* Total retransmits for entire connection */
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a906e0200ff2..aba4926ca095 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -1888,21 +1888,21 @@ static inline void tcp_reset_reno_sack(struct tcp_sock *tp)
 	tp->sacked_out = 0;
 }
 
-static void tcp_clear_retrans_partial(struct tcp_sock *tp)
+void tcp_clear_retrans(struct tcp_sock *tp)
 {
 	tp->retrans_out = 0;
 	tp->lost_out = 0;
-
 	tp->undo_marker = 0;
 	tp->undo_retrans = -1;
+	tp->fackets_out = 0;
+	tp->sacked_out = 0;
 }
 
-void tcp_clear_retrans(struct tcp_sock *tp)
+static inline void tcp_init_undo(struct tcp_sock *tp)
 {
-	tcp_clear_retrans_partial(tp);
-
-	tp->fackets_out = 0;
-	tp->sacked_out = 0;
+	tp->undo_marker = tp->snd_una;
+	/* Retransmission still in flight may cause DSACKs later. */
+	tp->undo_retrans = tp->retrans_out ? : -1;
 }
 
 /* Enter Loss state. If we detect SACK reneging, forget all SACK information
@@ -1925,18 +1925,18 @@ void tcp_enter_loss(struct sock *sk)
 		tp->prior_ssthresh = tcp_current_ssthresh(sk);
 		tp->snd_ssthresh = icsk->icsk_ca_ops->ssthresh(sk);
 		tcp_ca_event(sk, CA_EVENT_LOSS);
+		tcp_init_undo(tp);
 	}
 	tp->snd_cwnd	   = 1;
 	tp->snd_cwnd_cnt   = 0;
 	tp->snd_cwnd_stamp = tcp_time_stamp;
 
-	tcp_clear_retrans_partial(tp);
+	tp->retrans_out = 0;
+	tp->lost_out = 0;
 
 	if (tcp_is_reno(tp))
 		tcp_reset_reno_sack(tp);
 
-	tp->undo_marker = tp->snd_una;
-
 	skb = tcp_write_queue_head(sk);
 	is_reneg = skb && (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED);
 	if (is_reneg) {
@@ -1950,9 +1950,6 @@ void tcp_enter_loss(struct sock *sk)
 		if (skb == tcp_send_head(sk))
 			break;
 
-		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_RETRANS)
-			tp->undo_marker = 0;
-
 		TCP_SKB_CB(skb)->sacked &= (~TCPCB_TAGBITS)|TCPCB_SACKED_ACKED;
 		if (!(TCP_SKB_CB(skb)->sacked&TCPCB_SACKED_ACKED) || is_reneg) {
 			TCP_SKB_CB(skb)->sacked &= ~TCPCB_SACKED_ACKED;
@@ -2671,8 +2668,7 @@ static void tcp_enter_recovery(struct sock *sk, bool ece_ack)
 	NET_INC_STATS_BH(sock_net(sk), mib_idx);
 
 	tp->prior_ssthresh = 0;
-	tp->undo_marker = tp->snd_una;
-	tp->undo_retrans = tp->retrans_out ? : -1;
+	tcp_init_undo(tp);
 
 	if (inet_csk(sk)->icsk_ca_state < TCP_CA_CWR) {
 		if (!ece_ack)
-- 
cgit v1.2.3


From 53f3cc46336b9e514c98556b4a009a69ed808d3b Mon Sep 17 00:00:00 2001
From: Alexander Shiyan <shc_work@mail.ru>
Date: Sat, 23 Aug 2014 14:45:47 +0400
Subject: pata_platform: Remove useless irq_flags field

IRQ flags can be obtained from resource structure, there are no need
to use additional field in the platform_data to store these values.
This patch removes this field and convert existing users of this driver
to use IRQ flags from the resources.

Signed-off-by: Alexander Shiyan <shc_work@mail.ru>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/blackfin/mach-bf537/boards/cm_bf537e.c | 3 +--
 arch/blackfin/mach-bf537/boards/cm_bf537u.c | 3 +--
 arch/blackfin/mach-bf537/boards/stamp.c     | 3 +--
 arch/blackfin/mach-bf537/boards/tcm_bf537.c | 3 +--
 arch/blackfin/mach-bf561/boards/cm_bf561.c  | 3 +--
 drivers/ata/pata_of_platform.c              | 2 --
 drivers/ata/pata_platform.c                 | 4 +---
 include/linux/ata_platform.h                | 5 -----
 8 files changed, 6 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537e.c b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
index 1e7290ef3525..1e1014df5e9e 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537e.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537e.c
@@ -733,7 +733,6 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -750,7 +749,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 
diff --git a/arch/blackfin/mach-bf537/boards/cm_bf537u.c b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
index c7495dc74690..d056db9e5592 100644
--- a/arch/blackfin/mach-bf537/boards/cm_bf537u.c
+++ b/arch/blackfin/mach-bf537/boards/cm_bf537u.c
@@ -587,7 +587,6 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -604,7 +603,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 
diff --git a/arch/blackfin/mach-bf537/boards/stamp.c b/arch/blackfin/mach-bf537/boards/stamp.c
index de19b8a56007..88a19fc9844d 100644
--- a/arch/blackfin/mach-bf537/boards/stamp.c
+++ b/arch/blackfin/mach-bf537/boards/stamp.c
@@ -2462,7 +2462,6 @@ static struct platform_device bfin_sport0_device = {
 #define PATA_INT	IRQ_PF5
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 1,
-	.irq_flags = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -2479,7 +2478,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 #elif defined(CF_IDE_NAND_CARD_USE_CF_IN_COMMON_MEMORY_MODE)
diff --git a/arch/blackfin/mach-bf537/boards/tcm_bf537.c b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
index 6b988ad653d8..ed309c9a62b6 100644
--- a/arch/blackfin/mach-bf537/boards/tcm_bf537.c
+++ b/arch/blackfin/mach-bf537/boards/tcm_bf537.c
@@ -589,7 +589,6 @@ static struct platform_device bfin_mac_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -606,7 +605,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 
diff --git a/arch/blackfin/mach-bf561/boards/cm_bf561.c b/arch/blackfin/mach-bf561/boards/cm_bf561.c
index e862f7823e68..c6db52ba3a06 100644
--- a/arch/blackfin/mach-bf561/boards/cm_bf561.c
+++ b/arch/blackfin/mach-bf561/boards/cm_bf561.c
@@ -354,7 +354,6 @@ static struct platform_device bfin_sir0_device = {
 
 static struct pata_platform_info bfin_pata_platform_data = {
 	.ioport_shift = 2,
-	.irq_type = IRQF_TRIGGER_HIGH,
 };
 
 static struct resource bfin_pata_resources[] = {
@@ -371,7 +370,7 @@ static struct resource bfin_pata_resources[] = {
 	{
 		.start = PATA_INT,
 		.end = PATA_INT,
-		.flags = IORESOURCE_IRQ,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
 	},
 };
 
diff --git a/drivers/ata/pata_of_platform.c b/drivers/ata/pata_of_platform.c
index 6af1c9b9a464..64965398914a 100644
--- a/drivers/ata/pata_of_platform.c
+++ b/drivers/ata/pata_of_platform.c
@@ -43,8 +43,6 @@ static int pata_of_platform_probe(struct platform_device *ofdev)
 	}
 
 	irq_res = platform_get_resource(ofdev, IORESOURCE_IRQ, 0);
-	if (irq_res)
-		irq_res->flags = 0;
 
 	prop = of_get_property(dn, "reg-shift", NULL);
 	if (prop)
diff --git a/drivers/ata/pata_platform.c b/drivers/ata/pata_platform.c
index a5579b55e332..f8cff3e247c5 100644
--- a/drivers/ata/pata_platform.c
+++ b/drivers/ata/pata_platform.c
@@ -118,7 +118,7 @@ int __pata_platform_probe(struct device *dev, struct resource *io_res,
 	 */
 	if (irq_res && irq_res->start > 0) {
 		irq = irq_res->start;
-		irq_flags = irq_res->flags;
+		irq_flags = irq_res->flags & IRQF_TRIGGER_MASK;
 	}
 
 	/*
@@ -213,8 +213,6 @@ static int pata_platform_probe(struct platform_device *pdev)
 	 * And the IRQ
 	 */
 	irq_res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
-	if (irq_res)
-		irq_res->flags = pp_info ? pp_info->irq_flags : 0;
 
 	return __pata_platform_probe(&pdev->dev, io_res, ctl_res, irq_res,
 				     pp_info ? pp_info->ioport_shift : 0,
diff --git a/include/linux/ata_platform.h b/include/linux/ata_platform.h
index b9fde17f767c..5c618a084225 100644
--- a/include/linux/ata_platform.h
+++ b/include/linux/ata_platform.h
@@ -8,11 +8,6 @@ struct pata_platform_info {
 	 * spacing used by ata_std_ports().
 	 */
 	unsigned int ioport_shift;
-	/* 
-	 * Indicate platform specific irq types and initial
-	 * IRQ flags when call request_irq()
-	 */
-	unsigned int irq_flags;
 };
 
 extern int __pata_platform_probe(struct device *dev,
-- 
cgit v1.2.3


From 3af20efc0f83cdc65ce56ec108c0e81f602364df Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:39 -0700
Subject: net: phy: broadcom: extract all registers to brcmphy.h

Commit 439d39a9ac8fbbba9c04581361188f33f21ced50 ("net: phy: broadcom:
extract register definitions") added a bunch of registers to brcmphy.h
but left some to broadcom.c, move all of them to the header file since
the BCM54xx and BCM7xxx PHY drivers do share all of these registers.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 104 ---------------------------------------------
 include/linux/brcmphy.h    | 103 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 103 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 34088d60da74..6001d76d8676 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -25,110 +25,6 @@
 #define BRCM_PHY_REV(phydev) \
 	((phydev)->drv->phy_id & ~((phydev)->drv->phy_id_mask))
 
-/*
- * Broadcom LED source encodings.  These are used in BCM5461, BCM5481,
- * BCM5482, and possibly some others.
- */
-#define BCM_LED_SRC_LINKSPD1	0x0
-#define BCM_LED_SRC_LINKSPD2	0x1
-#define BCM_LED_SRC_XMITLED	0x2
-#define BCM_LED_SRC_ACTIVITYLED	0x3
-#define BCM_LED_SRC_FDXLED	0x4
-#define BCM_LED_SRC_SLAVE	0x5
-#define BCM_LED_SRC_INTR	0x6
-#define BCM_LED_SRC_QUALITY	0x7
-#define BCM_LED_SRC_RCVLED	0x8
-#define BCM_LED_SRC_MULTICOLOR1	0xa
-#define BCM_LED_SRC_OPENSHORT	0xb
-#define BCM_LED_SRC_OFF		0xe	/* Tied high */
-#define BCM_LED_SRC_ON		0xf	/* Tied low */
-
-
-/*
- * BCM5482: Shadow registers
- * Shadow values go into bits [14:10] of register 0x1c to select a shadow
- * register to access.
- */
-/* 00101: Spare Control Register 3 */
-#define BCM54XX_SHD_SCR3		0x05
-#define  BCM54XX_SHD_SCR3_DEF_CLK125	0x0001
-#define  BCM54XX_SHD_SCR3_DLLAPD_DIS	0x0002
-#define  BCM54XX_SHD_SCR3_TRDDAPD	0x0004
-
-/* 01010: Auto Power-Down */
-#define BCM54XX_SHD_APD			0x0a
-#define  BCM54XX_SHD_APD_EN		0x0020
-
-#define BCM5482_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
-					/* LED3 / ~LINKSPD[2] selector */
-#define BCM5482_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
-					/* LED1 / ~LINKSPD[1] selector */
-#define BCM5482_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
-#define BCM54XX_SHD_RGMII_MODE	0x0b	/* 01011: RGMII Mode Selector */
-#define BCM5482_SHD_SSD		0x14	/* 10100: Secondary SerDes control */
-#define BCM5482_SHD_SSD_LEDM	0x0008	/* SSD LED Mode enable */
-#define BCM5482_SHD_SSD_EN	0x0001	/* SSD enable */
-#define BCM5482_SHD_MODE	0x1f	/* 11111: Mode Control Register */
-#define BCM5482_SHD_MODE_1000BX	0x0001	/* Enable 1000BASE-X registers */
-
-
-/*
- * EXPANSION SHADOW ACCESS REGISTERS.  (PHY REG 0x15, 0x16, and 0x17)
- */
-#define MII_BCM54XX_EXP_AADJ1CH0		0x001f
-#define  MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN	0x0200
-#define  MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF	0x0100
-#define MII_BCM54XX_EXP_AADJ1CH3		0x601f
-#define  MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ	0x0002
-#define MII_BCM54XX_EXP_EXP08			0x0F08
-#define  MII_BCM54XX_EXP_EXP08_RJCT_2MHZ	0x0001
-#define  MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE	0x0200
-#define MII_BCM54XX_EXP_EXP75			0x0f75
-#define  MII_BCM54XX_EXP_EXP75_VDACCTRL		0x003c
-#define  MII_BCM54XX_EXP_EXP75_CM_OSC		0x0001
-#define MII_BCM54XX_EXP_EXP96			0x0f96
-#define  MII_BCM54XX_EXP_EXP96_MYST		0x0010
-#define MII_BCM54XX_EXP_EXP97			0x0f97
-#define  MII_BCM54XX_EXP_EXP97_MYST		0x0c0c
-
-/*
- * BCM5482: Secondary SerDes registers
- */
-#define BCM5482_SSD_1000BX_CTL		0x00	/* 1000BASE-X Control */
-#define BCM5482_SSD_1000BX_CTL_PWRDOWN	0x0800	/* Power-down SSD */
-#define BCM5482_SSD_SGMII_SLAVE		0x15	/* SGMII Slave Register */
-#define BCM5482_SSD_SGMII_SLAVE_EN	0x0002	/* Slave mode enable */
-#define BCM5482_SSD_SGMII_SLAVE_AD	0x0001	/* Slave auto-detection */
-
-
-/*****************************************************************************/
-/* Fast Ethernet Transceiver definitions. */
-/*****************************************************************************/
-
-#define MII_BRCM_FET_INTREG		0x1a	/* Interrupt register */
-#define MII_BRCM_FET_IR_MASK		0x0100	/* Mask all interrupts */
-#define MII_BRCM_FET_IR_LINK_EN		0x0200	/* Link status change enable */
-#define MII_BRCM_FET_IR_SPEED_EN	0x0400	/* Link speed change enable */
-#define MII_BRCM_FET_IR_DUPLEX_EN	0x0800	/* Duplex mode change enable */
-#define MII_BRCM_FET_IR_ENABLE		0x4000	/* Interrupt enable */
-
-#define MII_BRCM_FET_BRCMTEST		0x1f	/* Brcm test register */
-#define MII_BRCM_FET_BT_SRE		0x0080	/* Shadow register enable */
-
-
-/*** Shadow register definitions ***/
-
-#define MII_BRCM_FET_SHDW_MISCCTRL	0x10	/* Shadow misc ctrl */
-#define MII_BRCM_FET_SHDW_MC_FAME	0x4000	/* Force Auto MDIX enable */
-
-#define MII_BRCM_FET_SHDW_AUXMODE4	0x1a	/* Auxiliary mode 4 */
-#define MII_BRCM_FET_SHDW_AM4_LED_MASK	0x0003
-#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001
-
-#define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
-#define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
-
-
 MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 61219b9b3445..be31bf9f60c2 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -92,4 +92,107 @@
 
 #define MII_BCM54XX_AUXCTL_SHDWSEL_AUXCTL	0x0000
 
+/*
+ * Broadcom LED source encodings.  These are used in BCM5461, BCM5481,
+ * BCM5482, and possibly some others.
+ */
+#define BCM_LED_SRC_LINKSPD1	0x0
+#define BCM_LED_SRC_LINKSPD2	0x1
+#define BCM_LED_SRC_XMITLED	0x2
+#define BCM_LED_SRC_ACTIVITYLED	0x3
+#define BCM_LED_SRC_FDXLED	0x4
+#define BCM_LED_SRC_SLAVE	0x5
+#define BCM_LED_SRC_INTR	0x6
+#define BCM_LED_SRC_QUALITY	0x7
+#define BCM_LED_SRC_RCVLED	0x8
+#define BCM_LED_SRC_MULTICOLOR1	0xa
+#define BCM_LED_SRC_OPENSHORT	0xb
+#define BCM_LED_SRC_OFF		0xe	/* Tied high */
+#define BCM_LED_SRC_ON		0xf	/* Tied low */
+
+
+/*
+ * BCM5482: Shadow registers
+ * Shadow values go into bits [14:10] of register 0x1c to select a shadow
+ * register to access.
+ */
+/* 00101: Spare Control Register 3 */
+#define BCM54XX_SHD_SCR3		0x05
+#define  BCM54XX_SHD_SCR3_DEF_CLK125	0x0001
+#define  BCM54XX_SHD_SCR3_DLLAPD_DIS	0x0002
+#define  BCM54XX_SHD_SCR3_TRDDAPD	0x0004
+
+/* 01010: Auto Power-Down */
+#define BCM54XX_SHD_APD			0x0a
+#define  BCM54XX_SHD_APD_EN		0x0020
+
+#define BCM5482_SHD_LEDS1	0x0d	/* 01101: LED Selector 1 */
+					/* LED3 / ~LINKSPD[2] selector */
+#define BCM5482_SHD_LEDS1_LED3(src)	((src & 0xf) << 4)
+					/* LED1 / ~LINKSPD[1] selector */
+#define BCM5482_SHD_LEDS1_LED1(src)	((src & 0xf) << 0)
+#define BCM54XX_SHD_RGMII_MODE	0x0b	/* 01011: RGMII Mode Selector */
+#define BCM5482_SHD_SSD		0x14	/* 10100: Secondary SerDes control */
+#define BCM5482_SHD_SSD_LEDM	0x0008	/* SSD LED Mode enable */
+#define BCM5482_SHD_SSD_EN	0x0001	/* SSD enable */
+#define BCM5482_SHD_MODE	0x1f	/* 11111: Mode Control Register */
+#define BCM5482_SHD_MODE_1000BX	0x0001	/* Enable 1000BASE-X registers */
+
+
+/*
+ * EXPANSION SHADOW ACCESS REGISTERS.  (PHY REG 0x15, 0x16, and 0x17)
+ */
+#define MII_BCM54XX_EXP_AADJ1CH0		0x001f
+#define  MII_BCM54XX_EXP_AADJ1CH0_SWP_ABCD_OEN	0x0200
+#define  MII_BCM54XX_EXP_AADJ1CH0_SWSEL_THPF	0x0100
+#define MII_BCM54XX_EXP_AADJ1CH3		0x601f
+#define  MII_BCM54XX_EXP_AADJ1CH3_ADCCKADJ	0x0002
+#define MII_BCM54XX_EXP_EXP08			0x0F08
+#define  MII_BCM54XX_EXP_EXP08_RJCT_2MHZ	0x0001
+#define  MII_BCM54XX_EXP_EXP08_EARLY_DAC_WAKE	0x0200
+#define MII_BCM54XX_EXP_EXP75			0x0f75
+#define  MII_BCM54XX_EXP_EXP75_VDACCTRL		0x003c
+#define  MII_BCM54XX_EXP_EXP75_CM_OSC		0x0001
+#define MII_BCM54XX_EXP_EXP96			0x0f96
+#define  MII_BCM54XX_EXP_EXP96_MYST		0x0010
+#define MII_BCM54XX_EXP_EXP97			0x0f97
+#define  MII_BCM54XX_EXP_EXP97_MYST		0x0c0c
+
+/*
+ * BCM5482: Secondary SerDes registers
+ */
+#define BCM5482_SSD_1000BX_CTL		0x00	/* 1000BASE-X Control */
+#define BCM5482_SSD_1000BX_CTL_PWRDOWN	0x0800	/* Power-down SSD */
+#define BCM5482_SSD_SGMII_SLAVE		0x15	/* SGMII Slave Register */
+#define BCM5482_SSD_SGMII_SLAVE_EN	0x0002	/* Slave mode enable */
+#define BCM5482_SSD_SGMII_SLAVE_AD	0x0001	/* Slave auto-detection */
+
+
+/*****************************************************************************/
+/* Fast Ethernet Transceiver definitions. */
+/*****************************************************************************/
+
+#define MII_BRCM_FET_INTREG		0x1a	/* Interrupt register */
+#define MII_BRCM_FET_IR_MASK		0x0100	/* Mask all interrupts */
+#define MII_BRCM_FET_IR_LINK_EN		0x0200	/* Link status change enable */
+#define MII_BRCM_FET_IR_SPEED_EN	0x0400	/* Link speed change enable */
+#define MII_BRCM_FET_IR_DUPLEX_EN	0x0800	/* Duplex mode change enable */
+#define MII_BRCM_FET_IR_ENABLE		0x4000	/* Interrupt enable */
+
+#define MII_BRCM_FET_BRCMTEST		0x1f	/* Brcm test register */
+#define MII_BRCM_FET_BT_SRE		0x0080	/* Shadow register enable */
+
+
+/*** Shadow register definitions ***/
+
+#define MII_BRCM_FET_SHDW_MISCCTRL	0x10	/* Shadow misc ctrl */
+#define MII_BRCM_FET_SHDW_MC_FAME	0x4000	/* Force Auto MDIX enable */
+
+#define MII_BRCM_FET_SHDW_AUXMODE4	0x1a	/* Auxiliary mode 4 */
+#define MII_BRCM_FET_SHDW_AM4_LED_MASK	0x0003
+#define MII_BRCM_FET_SHDW_AM4_LED_MODE1 0x0001
+
+#define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
+#define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
+
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3


From 705314797b8b997554b7e9d0ea7b65a497356e53 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:40 -0700
Subject: net: phy: broadcom: move shadow 0x1C register accessors to brcmphy.h

The shadow register 0x1C is used both by the BCM54xxx PHYs and the
BCM7xxx internal PHYs, move the accessors to a common location so both
drivers can use them.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 18 ------------------
 include/linux/brcmphy.h    | 20 ++++++++++++++++++++
 2 files changed, 20 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 6001d76d8676..854f2c9a7b2b 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -29,24 +29,6 @@ MODULE_DESCRIPTION("Broadcom PHY driver");
 MODULE_AUTHOR("Maciej W. Rozycki");
 MODULE_LICENSE("GPL");
 
-/*
- * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T
- * 0x1c shadow registers.
- */
-static int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow)
-{
-	phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow));
-	return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD));
-}
-
-static int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, u16 val)
-{
-	return phy_write(phydev, MII_BCM54XX_SHD,
-			 MII_BCM54XX_SHD_WRITE |
-			 MII_BCM54XX_SHD_VAL(shadow) |
-			 MII_BCM54XX_SHD_DATA(val));
-}
-
 /* Indirect register access functions for the Expansion Registers */
 static int bcm54xx_exp_read(struct phy_device *phydev, u16 regnum)
 {
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index be31bf9f60c2..722cf26567fa 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -195,4 +195,24 @@
 #define MII_BRCM_FET_SHDW_AUXSTAT2	0x1b	/* Auxiliary status 2 */
 #define MII_BRCM_FET_SHDW_AS2_APDE	0x0020	/* Auto power down enable */
 
+/*
+ * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T
+ * 0x1c shadow registers.
+ */
+static inline int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow)
+{
+	phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow));
+	return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD));
+}
+
+static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow,
+				       u16 val)
+{
+	return phy_write(phydev, MII_BCM54XX_SHD,
+			 MII_BCM54XX_SHD_WRITE |
+			 MII_BCM54XX_SHD_VAL(shadow) |
+			 MII_BCM54XX_SHD_DATA(val));
+}
+
+
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3


From 66ce7fb9807b036058aa380bfd2b3851ae25ce39 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:43 -0700
Subject: net: phy: export phy_{read,write}_mmd_indirect

Some PHY drivers might need to access Clause 45 registers in Clause 22
compatibility mode to e.g: properly advertise EEE support when disabled
by default.

Export these two helper functions: phy_read_mmd_indirect() and
phy_write_mmd_indirect() for drivers to use them.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c |  6 ++++--
 include/linux/phy.h   | 27 +++++++++++++++++++++++++++
 2 files changed, 31 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index c94e2a27446a..e7a5893f32ff 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -955,7 +955,7 @@ static inline void mmd_phy_indirect(struct mii_bus *bus, int prtad, int devad,
  * 3) Write reg 13 // MMD Data Command for MMD DEVAD
  * 3) Read  reg 14 // Read MMD data
  */
-static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
+int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
 				 int devad, int addr)
 {
 	struct phy_driver *phydrv = phydev->drv;
@@ -971,6 +971,7 @@ static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
 	}
 	return value;
 }
+EXPORT_SYMBOL(phy_read_mmd_indirect);
 
 /**
  * phy_write_mmd_indirect - writes data to the MMD registers
@@ -988,7 +989,7 @@ static int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
  * 3) Write reg 13 // MMD Data Command for MMD DEVAD
  * 3) Write reg 14 // Write MMD data
  */
-static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
+void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
 				   int devad, int addr, u32 data)
 {
 	struct phy_driver *phydrv = phydev->drv;
@@ -1002,6 +1003,7 @@ static void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
 		phydrv->write_mmd_indirect(phydev, prtad, devad, addr, data);
 	}
 }
+EXPORT_SYMBOL(phy_write_mmd_indirect);
 
 /**
  * phy_init_eee - init and check the EEE feature
diff --git a/include/linux/phy.h b/include/linux/phy.h
index ed39956b5613..d090cfcaa167 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -597,6 +597,19 @@ static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
 			    MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff));
 }
 
+/**
+ * phy_read_mmd_indirect - reads data from the MMD registers
+ * @phydev: The PHY device bus
+ * @prtad: MMD Address
+ * @devad: MMD DEVAD
+ * @addr: PHY address on the MII bus
+ *
+ * Description: it reads data from the MMD registers (clause 22 to access to
+ * clause 45) of the specified phy address.
+ */
+int phy_read_mmd_indirect(struct phy_device *phydev, int prtad,
+			  int devad, int addr);
+
 /**
  * phy_read - Convenience function for reading a given PHY register
  * @phydev: the phy_device struct
@@ -668,6 +681,20 @@ static inline int phy_write_mmd(struct phy_device *phydev, int devad,
 	return mdiobus_write(phydev->bus, phydev->addr, regnum, val);
 }
 
+/**
+ * phy_write_mmd_indirect - writes data to the MMD registers
+ * @phydev: The PHY device
+ * @prtad: MMD Address
+ * @devad: MMD DEVAD
+ * @addr: PHY address on the MII bus
+ * @data: data to write in the MMD register
+ *
+ * Description: Write data from the MMD registers of the specified
+ * phy address.
+ */
+void phy_write_mmd_indirect(struct phy_device *phydev, int prtad,
+			    int devad, int addr, u32 data);
+
 struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id,
 				     bool is_c45,
 				     struct phy_c45_device_ids *c45_ids);
-- 
cgit v1.2.3


From b8f9a02924bbeb0c46ca4c19561cbe765b80e264 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 22 Aug 2014 18:55:45 -0700
Subject: net: phy: bcm7xxx: enable EEE at the PHY level

The 28nm Gigabit PHY on BCM7xxx chips comes out of reset with absolutely
no EEE capabilities, such that we would actually return that we do not
support EEE when accessing 3.20 (MDIO_PCS_EEE_ABLE) registers.

Poke through the vendor-specific C45 register to enable EEE globally at
the PHY level, and advertise supported EEE modes.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 31 +++++++++++++++++++++++++++++++
 include/linux/brcmphy.h   |  3 +++
 2 files changed, 34 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 29e256a4ed57..e98c510b75c5 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -14,6 +14,7 @@
 #include <linux/delay.h>
 #include <linux/bitops.h>
 #include <linux/brcmphy.h>
+#include <linux/mdio.h>
 
 /* Broadcom BCM7xxx internal PHY registers */
 #define MII_BCM7XXX_CHANNEL_WIDTH	0x2000
@@ -167,6 +168,32 @@ static int bcm7xxx_apd_enable(struct phy_device *phydev)
 	return bcm54xx_shadow_write(phydev, BCM54XX_SHD_APD, val);
 }
 
+static int bcm7xxx_eee_enable(struct phy_device *phydev)
+{
+	int val;
+
+	val = phy_read_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
+				    MDIO_MMD_AN, phydev->addr);
+	if (val < 0)
+		return val;
+
+	/* Enable general EEE feature at the PHY level */
+	val |= LPI_FEATURE_EN | LPI_FEATURE_EN_DIG1000X;
+
+	phy_write_mmd_indirect(phydev, BRCM_CL45VEN_EEE_CONTROL,
+			       MDIO_MMD_AN, phydev->addr, val);
+
+	/* Advertise supported modes */
+	val = phy_read_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
+				    MDIO_MMD_AN, phydev->addr);
+
+	val |= (MDIO_AN_EEE_ADV_100TX | MDIO_AN_EEE_ADV_1000T);
+	phy_write_mmd_indirect(phydev, MDIO_AN_EEE_ADV,
+			       MDIO_MMD_AN, phydev->addr, val);
+
+	return 0;
+}
+
 static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 {
 	int ret;
@@ -179,6 +206,10 @@ static int bcm7xxx_28nm_config_init(struct phy_device *phydev)
 	if (ret)
 		return ret;
 
+	ret = bcm7xxx_eee_enable(phydev);
+	if (ret)
+		return ret;
+
 	return bcm7xxx_apd_enable(phydev);
 }
 
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 722cf26567fa..ee1431d976fa 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -214,5 +214,8 @@ static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow,
 			 MII_BCM54XX_SHD_DATA(val));
 }
 
+#define BRCM_CL45VEN_EEE_CONTROL	0x803d
+#define LPI_FEATURE_EN			0x8000
+#define LPI_FEATURE_EN_DIG1000X		0x4000
 
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3


From 690e36e726d00d2528bc569809048adf61550d80 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 23 Aug 2014 12:13:41 -0700
Subject: net: Allow raw buffers to be passed into the flow dissector.

Drivers, and perhaps other entities we have not yet considered,
sometimes want to know how deep the protocol headers go before
deciding how large of an SKB to allocate and how much of the packet to
place into the linear SKB area.

For example, consider a driver which has a device which DMAs into
pools of pages and then tells the driver where the data went in the
DMA descriptor(s).  The driver can then build an SKB and reference
most of the data via SKB fragments (which are page/offset/length
triplets).

However at least some of the front of the packet should be placed into
the linear SKB area, which comes before the fragments, so that packet
processing can get at the headers efficiently.  The first thing each
protocol layer is going to do is a "pskb_may_pull()" so we might as
well aggregate as much of this as possible while we're building the
SKB in the driver.

Part of supporting this is that we don't have an SKB yet, so we want
to be able to let the flow dissector operate on a raw buffer in order
to compute the offset of the end of the headers.

So now we have a __skb_flow_dissect() which takes an explicit data
pointer and length.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    | 18 ++++++++++++------
 include/net/flow_keys.h   | 14 ++++++++++++--
 net/core/flow_dissector.c | 40 ++++++++++++++++++++++++++--------------
 3 files changed, 50 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index abde271c18ae..18ddf9684a27 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2567,20 +2567,26 @@ __wsum __skb_checksum(const struct sk_buff *skb, int offset, int len,
 __wsum skb_checksum(const struct sk_buff *skb, int offset, int len,
 		    __wsum csum);
 
-static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
-				       int len, void *buffer)
+static inline void *__skb_header_pointer(const struct sk_buff *skb, int offset,
+					 int len, void *data, int hlen, void *buffer)
 {
-	int hlen = skb_headlen(skb);
-
 	if (hlen - offset >= len)
-		return skb->data + offset;
+		return data + offset;
 
-	if (skb_copy_bits(skb, offset, buffer, len) < 0)
+	if (!skb ||
+	    skb_copy_bits(skb, offset, buffer, len) < 0)
 		return NULL;
 
 	return buffer;
 }
 
+static inline void *skb_header_pointer(const struct sk_buff *skb, int offset,
+				       int len, void *buffer)
+{
+	return __skb_header_pointer(skb, offset, len, skb->data,
+				    skb_headlen(skb), buffer);
+}
+
 /**
  *	skb_needs_linearize - check if we need to linearize a given skb
  *			      depending on the given device features.
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index 6667a054763a..4040f63932c5 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -27,7 +27,17 @@ struct flow_keys {
 	u8 ip_proto;
 };
 
-bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow);
-__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto);
+bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow,
+			void *data, int hlen);
+static inline bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+{
+	return __skb_flow_dissect(skb, flow, NULL, 0);
+}
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+			    void *data, int hlen_proto);
+static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
+{
+	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
+}
 u32 flow_hash_from_keys(struct flow_keys *keys);
 #endif
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5f362c1d0332..660c6492fb78 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -34,29 +34,40 @@ static void iph_to_flow_copy_addrs(struct flow_keys *flow, const struct iphdr *i
  * The function will try to retrieve the ports at offset thoff + poff where poff
  * is the protocol port offset returned from proto_ports_offset
  */
-__be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto)
+__be32 __skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8 ip_proto,
+			    void *data, int hlen)
 {
 	int poff = proto_ports_offset(ip_proto);
 
+	if (!data) {
+		data = skb->data;
+		hlen = skb_headlen(skb);
+	}
+
 	if (poff >= 0) {
 		__be32 *ports, _ports;
 
-		ports = skb_header_pointer(skb, thoff + poff,
-					   sizeof(_ports), &_ports);
+		ports = __skb_header_pointer(skb, thoff + poff,
+					     sizeof(_ports), data, hlen, &_ports);
 		if (ports)
 			return *ports;
 	}
 
 	return 0;
 }
-EXPORT_SYMBOL(skb_flow_get_ports);
+EXPORT_SYMBOL(__skb_flow_get_ports);
 
-bool skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow)
+bool __skb_flow_dissect(const struct sk_buff *skb, struct flow_keys *flow, void *data, int hlen)
 {
 	int nhoff = skb_network_offset(skb);
 	u8 ip_proto;
 	__be16 proto = skb->protocol;
 
+	if (!data) {
+		data = skb->data;
+		hlen = skb_headlen(skb);
+	}
+
 	memset(flow, 0, sizeof(*flow));
 
 again:
@@ -65,7 +76,7 @@ again:
 		const struct iphdr *iph;
 		struct iphdr _iph;
 ip:
-		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
 		if (!iph || iph->ihl < 5)
 			return false;
 		nhoff += iph->ihl * 4;
@@ -83,7 +94,7 @@ ip:
 		__be32 flow_label;
 
 ipv6:
-		iph = skb_header_pointer(skb, nhoff, sizeof(_iph), &_iph);
+		iph = __skb_header_pointer(skb, nhoff, sizeof(_iph), data, hlen, &_iph);
 		if (!iph)
 			return false;
 
@@ -113,7 +124,7 @@ ipv6:
 		const struct vlan_hdr *vlan;
 		struct vlan_hdr _vlan;
 
-		vlan = skb_header_pointer(skb, nhoff, sizeof(_vlan), &_vlan);
+		vlan = __skb_header_pointer(skb, nhoff, sizeof(_vlan), data, hlen, &_vlan);
 		if (!vlan)
 			return false;
 
@@ -126,7 +137,7 @@ ipv6:
 			struct pppoe_hdr hdr;
 			__be16 proto;
 		} *hdr, _hdr;
-		hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			return false;
 		proto = hdr->proto;
@@ -151,7 +162,7 @@ ipv6:
 			__be16 proto;
 		} *hdr, _hdr;
 
-		hdr = skb_header_pointer(skb, nhoff, sizeof(_hdr), &_hdr);
+		hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data, hlen, &_hdr);
 		if (!hdr)
 			return false;
 		/*
@@ -171,8 +182,9 @@ ipv6:
 				const struct ethhdr *eth;
 				struct ethhdr _eth;
 
-				eth = skb_header_pointer(skb, nhoff,
-							 sizeof(_eth), &_eth);
+				eth = __skb_header_pointer(skb, nhoff,
+							   sizeof(_eth),
+							   data, hlen, &_eth);
 				if (!eth)
 					return false;
 				proto = eth->h_proto;
@@ -194,12 +206,12 @@ ipv6:
 
 	flow->n_proto = proto;
 	flow->ip_proto = ip_proto;
-	flow->ports = skb_flow_get_ports(skb, nhoff, ip_proto);
+	flow->ports = __skb_flow_get_ports(skb, nhoff, ip_proto, data, hlen);
 	flow->thoff = (u16) nhoff;
 
 	return true;
 }
-EXPORT_SYMBOL(skb_flow_dissect);
+EXPORT_SYMBOL(__skb_flow_dissect);
 
 static u32 hashrnd __read_mostly;
 static __always_inline void __flow_hash_secret_init(void)
-- 
cgit v1.2.3


From 179033b3e064d2cd3f5f9945e76b0a0f0fbf4883 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Thu, 7 Aug 2014 11:48:26 -0400
Subject: perf: Add PERF_EVENT_STATE_EXIT state for events with exited task

Adding new perf event state to indicate that the monitored task has
exited.  In this case the event stays alive until the owner task exits
or close the event fd while providing the last data through the read
syscall and ring buffer.

Instead it needs to propagate the error info (monitored task has died)
via poll and read  syscalls by  returning POLLHUP and 0 respectively.

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140811120102.GY9918@twins.programming.kicks-ass.net
Cc: Adrian Hunter <adrian.hunter@intel.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Corey Ashford <cjashfor@linux.vnet.ibm.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Jean Pihet <jean.pihet@linaro.org>
Cc: Namhyung Kim <namhyung@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: http://lkml.kernel.org/n/tip-t5y3w8jjx6tfo5w8y6oajsjq@git.kernel.org
Signed-off-by: Arnaldo Carvalho de Melo <acme@redhat.com>
---
 include/linux/perf_event.h |  1 +
 kernel/events/core.c       | 10 +++++++++-
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f0a1036b1911..893a0d07986f 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -269,6 +269,7 @@ struct pmu {
  * enum perf_event_active_state - the states of a event
  */
 enum perf_event_active_state {
+	PERF_EVENT_STATE_EXIT		= -3,
 	PERF_EVENT_STATE_ERROR		= -2,
 	PERF_EVENT_STATE_OFF		= -1,
 	PERF_EVENT_STATE_INACTIVE	=  0,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4575dd6e59ea..d8cb4d21a346 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -3600,7 +3600,8 @@ perf_read_hw(struct perf_event *event, char __user *buf, size_t count)
 	 * error state (i.e. because it was pinned but it couldn't be
 	 * scheduled on to the CPU at some point).
 	 */
-	if (event->state == PERF_EVENT_STATE_ERROR)
+	if ((event->state == PERF_EVENT_STATE_ERROR) ||
+	    (event->state == PERF_EVENT_STATE_EXIT))
 		return 0;
 
 	if (count < event->read_size)
@@ -3630,6 +3631,10 @@ static unsigned int perf_poll(struct file *file, poll_table *wait)
 	unsigned int events = POLLHUP;
 
 	poll_wait(file, &event->waitq, wait);
+
+	if (event->state == PERF_EVENT_STATE_EXIT)
+		return events;
+
 	/*
 	 * Pin the event->rb by taking event->mmap_mutex; otherwise
 	 * perf_event_set_output() can swizzle our rb and make us miss wakeups.
@@ -7588,6 +7593,9 @@ __perf_event_exit_task(struct perf_event *child_event,
 	if (child_event->parent) {
 		sync_child_event(child_event, child);
 		free_event(child_event);
+	} else {
+		child_event->state = PERF_EVENT_STATE_EXIT;
+		perf_event_wakeup(child_event);
 	}
 }
 
-- 
cgit v1.2.3


From 1b05756c48ea07ced9604ef01d11194d936da163 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Tue, 5 Aug 2014 22:02:34 +0200
Subject: netfilter: ipset: Fix warn: integer overflows 'sizeof(*map) + size *
 set->dsize'

Dan Carpenter reported that the static checker emits the warning

        net/netfilter/ipset/ip_set_list_set.c:600 init_list_set()
        warn: integer overflows 'sizeof(*map) + size * set->dsize'

Limit the maximal number of elements in list type of sets.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set_list.h | 1 +
 net/netfilter/ipset/ip_set_list_set.c       | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set_list.h b/include/linux/netfilter/ipset/ip_set_list.h
index 68c2aea897f5..fe2622a00151 100644
--- a/include/linux/netfilter/ipset/ip_set_list.h
+++ b/include/linux/netfilter/ipset/ip_set_list.h
@@ -6,5 +6,6 @@
 
 #define IP_SET_LIST_DEFAULT_SIZE	8
 #define IP_SET_LIST_MIN_SIZE		4
+#define IP_SET_LIST_MAX_SIZE		65536
 
 #endif /* __IP_SET_LIST_H */
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index 3e2317f3cf68..f87adbad6076 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -597,7 +597,9 @@ init_list_set(struct net *net, struct ip_set *set, u32 size)
 	struct set_elem *e;
 	u32 i;
 
-	map = kzalloc(sizeof(*map) + size * set->dsize, GFP_KERNEL);
+	map = kzalloc(sizeof(*map) +
+		      min_t(u32, size, IP_SET_LIST_MAX_SIZE) * set->dsize,
+		      GFP_KERNEL);
 	if (!map)
 		return false;
 
-- 
cgit v1.2.3


From 573e8fca255a27e3573b51f9b183d62641c47a3d Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 22 Aug 2014 13:33:47 -0700
Subject: net: skb_gro_checksum_* functions

Add skb_gro_checksum_validate, skb_gro_checksum_validate_zero_check,
and skb_gro_checksum_simple_validate, and __skb_gro_checksum_complete.
These are the cognates of the normal checksum functions but are used
in the gro_receive path and operate on GRO related fields in sk_buffs.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 76 +++++++++++++++++++++++++++++++++++++++++++++--
 net/core/dev.c            | 34 ++++++++++++++++++++-
 2 files changed, 107 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 7e2b0b8b5cd7..eb73444e1bd0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1883,7 +1883,13 @@ struct napi_gro_cb {
 	u16	proto;
 
 	/* Used in udp_gro_receive */
-	u16	udp_mark;
+	u8	udp_mark:1;
+
+	/* GRO checksum is valid */
+	u8	csum_valid:1;
+
+	/* Number encapsulation layers crossed */
+	u8	encapsulation;
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
@@ -2154,11 +2160,77 @@ static inline void *skb_gro_network_header(struct sk_buff *skb)
 static inline void skb_gro_postpull_rcsum(struct sk_buff *skb,
 					const void *start, unsigned int len)
 {
-	if (skb->ip_summed == CHECKSUM_COMPLETE)
+	if (NAPI_GRO_CB(skb)->csum_valid)
 		NAPI_GRO_CB(skb)->csum = csum_sub(NAPI_GRO_CB(skb)->csum,
 						  csum_partial(start, len, 0));
 }
 
+/* GRO checksum functions. These are logical equivalents of the normal
+ * checksum functions (in skbuff.h) except that they operate on the GRO
+ * offsets and fields in sk_buff.
+ */
+
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb);
+
+static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
+						      bool zero_okay,
+						      __sum16 check)
+{
+	return (skb->ip_summed != CHECKSUM_PARTIAL &&
+		(skb->ip_summed != CHECKSUM_UNNECESSARY ||
+		 (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
+		(!zero_okay || check));
+}
+
+static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
+							   __wsum psum)
+{
+	if (NAPI_GRO_CB(skb)->csum_valid &&
+	    !csum_fold(csum_add(psum, NAPI_GRO_CB(skb)->csum)))
+		return 0;
+
+	NAPI_GRO_CB(skb)->csum = psum;
+
+	return __skb_gro_checksum_complete(skb);
+}
+
+/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
+ * checksum or an encapsulated one during GRO. This saves work
+ * if we fallback to normal path with the packet.
+ */
+static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (NAPI_GRO_CB(skb)->encapsulation)
+			skb->encapsulation = 1;
+	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->encapsulation = 0;
+	}
+}
+
+#define __skb_gro_checksum_validate(skb, proto, zero_okay, check,	\
+				    compute_pseudo)			\
+({									\
+	__sum16 __ret = 0;						\
+	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
+		__ret = __skb_gro_checksum_validate_complete(skb,	\
+				compute_pseudo(skb, proto));		\
+	if (!__ret)							\
+		skb_gro_incr_csum_unnecessary(skb);			\
+	__ret;								\
+})
+
+#define skb_gro_checksum_validate(skb, proto, compute_pseudo)		\
+	__skb_gro_checksum_validate(skb, proto, false, 0, compute_pseudo)
+
+#define skb_gro_checksum_validate_zero_check(skb, proto, check,		\
+					     compute_pseudo)		\
+	__skb_gro_checksum_validate(skb, proto, true, check, compute_pseudo)
+
+#define skb_gro_checksum_simple_validate(skb)				\
+	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/net/core/dev.c b/net/core/dev.c
index 1421dad4cb29..b6a718ec11c1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3962,7 +3962,13 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		goto normal;
 
 	gro_list_prepare(napi, skb);
-	NAPI_GRO_CB(skb)->csum = skb->csum; /* Needed for CHECKSUM_COMPLETE */
+
+	if (skb->ip_summed == CHECKSUM_COMPLETE) {
+		NAPI_GRO_CB(skb)->csum = skb->csum;
+		NAPI_GRO_CB(skb)->csum_valid = 1;
+	} else {
+		NAPI_GRO_CB(skb)->csum_valid = 0;
+	}
 
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
@@ -3975,6 +3981,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->udp_mark = 0;
+		NAPI_GRO_CB(skb)->encapsulation = 0;
 
 		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
 		break;
@@ -4205,6 +4212,31 @@ gro_result_t napi_gro_frags(struct napi_struct *napi)
 }
 EXPORT_SYMBOL(napi_gro_frags);
 
+/* Compute the checksum from gro_offset and return the folded value
+ * after adding in any pseudo checksum.
+ */
+__sum16 __skb_gro_checksum_complete(struct sk_buff *skb)
+{
+	__wsum wsum;
+	__sum16 sum;
+
+	wsum = skb_checksum(skb, skb_gro_offset(skb), skb_gro_len(skb), 0);
+
+	/* NAPI_GRO_CB(skb)->csum holds pseudo checksum */
+	sum = csum_fold(csum_add(NAPI_GRO_CB(skb)->csum, wsum));
+	if (likely(!sum)) {
+		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE) &&
+		    !skb->csum_complete_sw)
+			netdev_rx_csum_fault(skb->dev);
+	}
+
+	NAPI_GRO_CB(skb)->csum = wsum;
+	NAPI_GRO_CB(skb)->csum_valid = 1;
+
+	return sum;
+}
+EXPORT_SYMBOL(__skb_gro_checksum_complete);
+
 /*
  * net_rps_action_and_irq_enable sends any pending IPI's for rps.
  * Note: called with local irq disabled, but exits with local irq enabled.
-- 
cgit v1.2.3


From a98406e22c12e514bac28fec0a49dc793edaf3a8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Sat, 23 Aug 2014 17:03:28 +0200
Subject: random32: improvements to prandom_bytes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch addresses a couple of minor items, mostly addesssing
prandom_bytes(): 1) prandom_bytes{,_state}() should use size_t
for length arguments, 2) We can use put_unaligned() when filling
the array instead of open coding it [ perhaps some archs will
further benefit from their own arch specific implementation when
GCC cannot make up for it ], 3) Fix a typo, 4) Better use unsigned
int as type for getting the arch seed, 5) Make use of
prandom_u32_max() for timer slack.

Regarding the change to put_unaligned(), callers of prandom_bytes()
which internally invoke prandom_bytes_state(), don't bother as
they expect the array to be filled randomly and don't have any
control of the internal state what-so-ever (that's also why we
have periodic reseeding there, etc), so they really don't care.

Now for the direct callers of prandom_bytes_state(), which
are solely located in test cases for MTD devices, that is,
drivers/mtd/tests/{oobtest.c,pagetest.c,subpagetest.c}:

These tests basically fill a test write-vector through
prandom_bytes_state() with an a-priori defined seed each time
and write that to a MTD device. Later on, they set up a read-vector
and read back that blocks from the device. So in the verification
phase, the write-vector is being re-setup [ so same seed and
prandom_bytes_state() called ], and then memcmp()'ed against the
read-vector to check if the data is the same.

Akinobu, Lothar and I also tested this patch and it runs through
the 3 relevant MTD test cases w/o any errors on the nandsim device
(simulator for MTD devs) for x86_64, ppc64, ARM (i.MX28, i.MX53
and i.MX6):

  # modprobe nandsim first_id_byte=0x20 second_id_byte=0xac \
                     third_id_byte=0x00 fourth_id_byte=0x15
  # modprobe mtd_oobtest dev=0
  # modprobe mtd_pagetest dev=0
  # modprobe mtd_subpagetest dev=0

We also don't have any users depending directly on a particular
result of the PRNG (except the PRNG self-test itself), and that's
just fine as it e.g. allowed us easily to do things like upgrading
from taus88 to taus113.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Tested-by: Akinobu Mita <akinobu.mita@gmail.com>
Tested-by: Lothar Waßmann <LW@KARO-electronics.de>
Cc: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/random.h |  4 ++--
 lib/random32.c         | 39 ++++++++++++++++++---------------------
 2 files changed, 20 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/random.h b/include/linux/random.h
index 57fbbffd77a0..b05856e16b75 100644
--- a/include/linux/random.h
+++ b/include/linux/random.h
@@ -26,7 +26,7 @@ unsigned int get_random_int(void);
 unsigned long randomize_range(unsigned long start, unsigned long end, unsigned long len);
 
 u32 prandom_u32(void);
-void prandom_bytes(void *buf, int nbytes);
+void prandom_bytes(void *buf, size_t nbytes);
 void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
@@ -35,7 +35,7 @@ struct rnd_state {
 };
 
 u32 prandom_u32_state(struct rnd_state *state);
-void prandom_bytes_state(struct rnd_state *state, void *buf, int nbytes);
+void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
 
 /**
  * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro)
diff --git a/lib/random32.c b/lib/random32.c
index c9b6bf3afe0c..0bee183fa18f 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -37,6 +37,7 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <asm/unaligned.h>
 
 #ifdef CONFIG_RANDOM32_SELFTEST
 static void __init prandom_state_selftest(void);
@@ -96,27 +97,23 @@ EXPORT_SYMBOL(prandom_u32);
  *	This is used for pseudo-randomness with no outside seeding.
  *	For more random results, use prandom_bytes().
  */
-void prandom_bytes_state(struct rnd_state *state, void *buf, int bytes)
+void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
 {
-	unsigned char *p = buf;
-	int i;
-
-	for (i = 0; i < round_down(bytes, sizeof(u32)); i += sizeof(u32)) {
-		u32 random = prandom_u32_state(state);
-		int j;
+	u8 *ptr = buf;
 
-		for (j = 0; j < sizeof(u32); j++) {
-			p[i + j] = random;
-			random >>= BITS_PER_BYTE;
-		}
+	while (bytes >= sizeof(u32)) {
+		put_unaligned(prandom_u32_state(state), (u32 *) ptr);
+		ptr += sizeof(u32);
+		bytes -= sizeof(u32);
 	}
-	if (i < bytes) {
-		u32 random = prandom_u32_state(state);
 
-		for (; i < bytes; i++) {
-			p[i] = random;
-			random >>= BITS_PER_BYTE;
-		}
+	if (bytes > 0) {
+		u32 rem = prandom_u32_state(state);
+		do {
+			*ptr++ = (u8) rem;
+			bytes--;
+			rem >>= BITS_PER_BYTE;
+		} while (bytes > 0);
 	}
 }
 EXPORT_SYMBOL(prandom_bytes_state);
@@ -126,7 +123,7 @@ EXPORT_SYMBOL(prandom_bytes_state);
  *	@buf: where to copy the pseudo-random bytes to
  *	@bytes: the requested number of bytes
  */
-void prandom_bytes(void *buf, int bytes)
+void prandom_bytes(void *buf, size_t bytes)
 {
 	struct rnd_state *state = &get_cpu_var(net_rand_state);
 
@@ -137,7 +134,7 @@ EXPORT_SYMBOL(prandom_bytes);
 
 static void prandom_warmup(struct rnd_state *state)
 {
-	/* Calling RNG ten times to satify recurrence condition */
+	/* Calling RNG ten times to satisfy recurrence condition */
 	prandom_u32_state(state);
 	prandom_u32_state(state);
 	prandom_u32_state(state);
@@ -152,7 +149,7 @@ static void prandom_warmup(struct rnd_state *state)
 
 static u32 __extract_hwseed(void)
 {
-	u32 val = 0;
+	unsigned int val = 0;
 
 	(void)(arch_get_random_seed_int(&val) ||
 	       arch_get_random_int(&val));
@@ -228,7 +225,7 @@ static void __prandom_timer(unsigned long dontcare)
 	prandom_seed(entropy);
 
 	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
-	expires = 40 + (prandom_u32() % 40);
+	expires = 40 + prandom_u32_max(40);
 	seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
 
 	add_timer(&seed_timer);
-- 
cgit v1.2.3


From 4798248e4e023170e937a65a1d30fcc52496dd42 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 22 Aug 2014 16:21:53 -0700
Subject: net: Add ops->ndo_xmit_flush()

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/dlci.c              |  2 +-
 drivers/usb/gadget/function/f_ncm.c |  2 +-
 include/linux/netdevice.h           | 35 +++++++++++++++++++++++++++++++++++
 net/atm/mpc.c                       |  2 +-
 net/core/dev.c                      |  5 ++---
 net/core/netpoll.c                  |  3 +--
 net/core/pktgen.c                   |  4 +---
 net/packet/af_packet.c              |  3 +--
 net/sched/sch_teql.c                |  3 +--
 9 files changed, 44 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 43c9960dce1c..81b22a180aad 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -193,7 +193,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
 	struct dlci_local *dlp = netdev_priv(dev);
 
 	if (skb)
-		dlp->slave->netdev_ops->ndo_start_xmit(skb, dlp->slave);
+		netdev_start_xmit(skb, dlp->slave);
 	return NETDEV_TX_OK;
 }
 
diff --git a/drivers/usb/gadget/function/f_ncm.c b/drivers/usb/gadget/function/f_ncm.c
index bcdc882cd415..cb5d646db6a7 100644
--- a/drivers/usb/gadget/function/f_ncm.c
+++ b/drivers/usb/gadget/function/f_ncm.c
@@ -1101,7 +1101,7 @@ static void ncm_tx_tasklet(unsigned long data)
 	/* Only send if data is available. */
 	if (ncm->skb_tx_data) {
 		ncm->timer_force_tx = true;
-		ncm->netdev->netdev_ops->ndo_start_xmit(NULL, ncm->netdev);
+		netdev_start_xmit(NULL, ncm->netdev);
 		ncm->timer_force_tx = false;
 	}
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index eb73444e1bd0..220c50984688 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,6 +782,19 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
+ * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
+ *	A driver implements this function when it wishes to support
+ *	deferred TX queue flushing.  The idea is that the expensive
+ *	operation to trigger TX queue processing can be done after
+ *	N calls to ndo_start_xmit rather than being done every single
+ *	time.  In this regime ndo_start_xmit will be called one or more
+ *	times, and then a final ndo_xmit_flush call will be made to
+ *	have the driver tell the device about the new pending TX queue
+ *	entries.  The kernel keeps track of which queues need flushing
+ *	by monitoring skb->queue_mapping of the packets it submits to
+ *	ndo_start_xmit.  This is the queue value that will be passed
+ *	to ndo_xmit_flush.
+ *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
  *	Called to decide which queue to when device supports multiple
@@ -1005,6 +1018,7 @@ struct net_device_ops {
 	int			(*ndo_stop)(struct net_device *dev);
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
+	void			(*ndo_xmit_flush)(struct net_device *dev, u16 queue);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
 						    void *accel_priv,
@@ -3430,6 +3444,27 @@ int __init dev_proc_init(void);
 #define dev_proc_init() 0
 #endif
 
+static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
+					      struct sk_buff *skb, struct net_device *dev)
+{
+	netdev_tx_t ret;
+	u16 q;
+
+	q = skb->queue_mapping;
+	ret = ops->ndo_start_xmit(skb, dev);
+	if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
+		ops->ndo_xmit_flush(dev, q);
+
+	return ret;
+}
+
+static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	return __netdev_start_xmit(ops, skb, dev);
+}
+
 int netdev_class_create_file_ns(struct class_attribute *class_attr,
 				const void *ns);
 void netdev_class_remove_file_ns(struct class_attribute *class_attr,
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index e8e0e7a8a23d..d662da161e5a 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
 	}
 
 non_ip:
-	return mpc->old_ops->ndo_start_xmit(skb, dev);
+	return __netdev_start_xmit(mpc->old_ops, skb, dev);
 }
 
 static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
diff --git a/net/core/dev.c b/net/core/dev.c
index b6a718ec11c1..26d296c2447c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2602,7 +2602,6 @@ EXPORT_SYMBOL(netif_skb_features);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc = NETDEV_TX_OK;
 	unsigned int skb_len;
 
@@ -2667,7 +2666,7 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_len = skb->len;
 		trace_net_dev_start_xmit(skb, dev);
-		rc = ops->ndo_start_xmit(skb, dev);
+		rc = netdev_start_xmit(skb, dev);
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
 		if (rc == NETDEV_TX_OK)
 			txq_trans_update(txq);
@@ -2686,7 +2685,7 @@ gso:
 
 		skb_len = nskb->len;
 		trace_net_dev_start_xmit(nskb, dev);
-		rc = ops->ndo_start_xmit(nskb, dev);
+		rc = netdev_start_xmit(nskb, dev);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 907fb5e36c02..a5ad06828d67 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -72,7 +72,6 @@ module_param(carrier_timeout, uint, 0644);
 static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			      struct netdev_queue *txq)
 {
-	const struct net_device_ops *ops = dev->netdev_ops;
 	int status = NETDEV_TX_OK;
 	netdev_features_t features;
 
@@ -92,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		skb->vlan_tci = 0;
 	}
 
-	status = ops->ndo_start_xmit(skb, dev);
+	status = netdev_start_xmit(skb, dev);
 	if (status == NETDEV_TX_OK)
 		txq_trans_update(txq);
 
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 8b849ddfef2e..83e2b4b19eb7 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3285,8 +3285,6 @@ static void pktgen_wait_for_skb(struct pktgen_dev *pkt_dev)
 static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 {
 	struct net_device *odev = pkt_dev->odev;
-	netdev_tx_t (*xmit)(struct sk_buff *, struct net_device *)
-		= odev->netdev_ops->ndo_start_xmit;
 	struct netdev_queue *txq;
 	u16 queue_map;
 	int ret;
@@ -3339,7 +3337,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		goto unlock;
 	}
 	atomic_inc(&(pkt_dev->skb->users));
-	ret = (*xmit)(pkt_dev->skb, odev);
+	ret = netdev_start_xmit(pkt_dev->skb, odev);
 
 	switch (ret) {
 	case NETDEV_TX_OK:
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 93896d2092f6..0dfa990d4eaa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -240,7 +240,6 @@ static void __fanout_link(struct sock *sk, struct packet_sock *po);
 static int packet_direct_xmit(struct sk_buff *skb)
 {
 	struct net_device *dev = skb->dev;
-	const struct net_device_ops *ops = dev->netdev_ops;
 	netdev_features_t features;
 	struct netdev_queue *txq;
 	int ret = NETDEV_TX_BUSY;
@@ -262,7 +261,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_drv_stopped(txq)) {
-		ret = ops->ndo_start_xmit(skb, dev);
+		ret = netdev_start_xmit(skb, dev);
 		if (ret == NETDEV_TX_OK)
 			txq_trans_update(txq);
 	}
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index bd33793b527e..64cd93ca8104 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -301,7 +301,6 @@ restart:
 	do {
 		struct net_device *slave = qdisc_dev(q);
 		struct netdev_queue *slave_txq = netdev_get_tx_queue(slave, 0);
-		const struct net_device_ops *slave_ops = slave->netdev_ops;
 
 		if (slave_txq->qdisc_sleeping != q)
 			continue;
@@ -317,7 +316,7 @@ restart:
 				unsigned int length = qdisc_pkt_len(skb);
 
 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
-				    slave_ops->ndo_start_xmit(skb, slave) == NETDEV_TX_OK) {
+				    netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
 					txq_trans_update(slave_txq);
 					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
-- 
cgit v1.2.3


From 2ee507c472939db4b146d545352b8a7c79ef47f8 Mon Sep 17 00:00:00 2001
From: Tim Chen <tim.c.chen@linux.intel.com>
Date: Thu, 31 Jul 2014 10:29:48 -0700
Subject: sched: Add function single_task_running to let a task check if it is
 the only task running on a cpu

This function will help an async task processing batched jobs from
workqueue decide if it wants to keep processing on more chunks of batched
work that can be delayed, or to accumulate more work for more efficient
batched processing later.

If no other tasks are running on the cpu, the batching process can take
advantgae of the available cpu cycles to a make decision to continue
processing the existing accumulated work to minimize delay,
otherwise it will yield.

Signed-off-by: Tim Chen <tim.c.chen@linux.intel.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   | 12 ++++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..e6d2c056d8e0 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -167,6 +167,7 @@ extern int nr_threads;
 DECLARE_PER_CPU(unsigned long, process_counts);
 extern int nr_processes(void);
 extern unsigned long nr_running(void);
+extern bool single_task_running(void);
 extern unsigned long nr_iowait(void);
 extern unsigned long nr_iowait_cpu(int cpu);
 extern void get_iowait_load(unsigned long *nr_waiters, unsigned long *load);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index ec1a286684a5..59965ec0b7de 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2366,6 +2366,18 @@ unsigned long nr_running(void)
 	return sum;
 }
 
+/*
+ * Check if only the current task is running on the cpu.
+ */
+bool single_task_running(void)
+{
+	if (cpu_rq(smp_processor_id())->nr_running == 1)
+		return true;
+	else
+		return false;
+}
+EXPORT_SYMBOL(single_task_running);
+
 unsigned long long nr_context_switches(void)
 {
 	int i;
-- 
cgit v1.2.3


From 48ea526a6877d605c961aa37fae33f3227b29424 Mon Sep 17 00:00:00 2001
From: Amir Vadai <amirv@mellanox.com>
Date: Mon, 25 Aug 2014 16:06:53 +0300
Subject: net/mlx4: Use is_kdump_kernel() to detect kdump kernel

Use is_kdump_kernel() to detect kdump kernel, instead of reset_devices.

Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx4/device.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 071f6b234604..783dd099abd1 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -38,6 +38,7 @@
 #include <linux/completion.h>
 #include <linux/radix-tree.h>
 #include <linux/cpu_rmap.h>
+#include <linux/crash_dump.h>
 
 #include <linux/atomic.h>
 
@@ -1275,7 +1276,7 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
 /* Returns true if running in low memory profile (kdump kernel) */
 static inline bool mlx4_low_memory_profile(void)
 {
-	return reset_devices;
+	return is_kdump_kernel();
 }
 
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3


From 0b725a2ca61bedc33a2a63d0451d528b268cf975 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Mon, 25 Aug 2014 15:51:53 -0700
Subject: net: Remove ndo_xmit_flush netdev operation, use signalling instead.

As reported by Jesper Dangaard Brouer, for high packet rates the
overhead of having another indirect call in the TX path is
non-trivial.

There is the indirect call itself, and then there is all of the
reloading of the state to refetch the tail pointer value and
then write the device register.

Move to a more passive scheme, which requires very light modifications
to the device drivers.

The signal is a new skb->xmit_more value, if it is non-zero it means
that more SKBs are pending to be transmitted on the same queue as the
current SKB.  And therefore, the driver may elide the tail pointer
update.

Right now skb->xmit_more is always zero.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/igb/igb_main.c | 36 +++++++++++--------------------
 drivers/net/virtio_net.c                  | 12 +++--------
 include/linux/netdevice.h                 | 25 ++-------------------
 include/linux/skbuff.h                    |  2 ++
 4 files changed, 19 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index b9c020a05fb8..89c29b40d61c 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -136,7 +136,6 @@ static void igb_update_phy_info(unsigned long);
 static void igb_watchdog(unsigned long);
 static void igb_watchdog_task(struct work_struct *);
 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb, struct net_device *);
-static void igb_xmit_flush(struct net_device *netdev, u16 queue);
 static struct rtnl_link_stats64 *igb_get_stats64(struct net_device *dev,
 					  struct rtnl_link_stats64 *stats);
 static int igb_change_mtu(struct net_device *, int);
@@ -2076,7 +2075,6 @@ static const struct net_device_ops igb_netdev_ops = {
 	.ndo_open		= igb_open,
 	.ndo_stop		= igb_close,
 	.ndo_start_xmit		= igb_xmit_frame,
-	.ndo_xmit_flush		= igb_xmit_flush,
 	.ndo_get_stats64	= igb_get_stats64,
 	.ndo_set_rx_mode	= igb_set_rx_mode,
 	.ndo_set_mac_address	= igb_set_mac,
@@ -4917,6 +4915,14 @@ static void igb_tx_map(struct igb_ring *tx_ring,
 
 	tx_ring->next_to_use = i;
 
+	if (!skb->xmit_more) {
+		writel(i, tx_ring->tail);
+
+		/* we need this if more than one processor can write to our tail
+		 * at a time, it synchronizes IO on IA64/Altix systems
+		 */
+		mmiowb();
+	}
 	return;
 
 dma_error:
@@ -5052,20 +5058,17 @@ out_drop:
 	return NETDEV_TX_OK;
 }
 
-static struct igb_ring *__igb_tx_queue_mapping(struct igb_adapter *adapter, unsigned int r_idx)
+static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
+						    struct sk_buff *skb)
 {
+	unsigned int r_idx = skb->queue_mapping;
+
 	if (r_idx >= adapter->num_tx_queues)
 		r_idx = r_idx % adapter->num_tx_queues;
 
 	return adapter->tx_ring[r_idx];
 }
 
-static inline struct igb_ring *igb_tx_queue_mapping(struct igb_adapter *adapter,
-						    struct sk_buff *skb)
-{
-	return __igb_tx_queue_mapping(adapter, skb->queue_mapping);
-}
-
 static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
 				  struct net_device *netdev)
 {
@@ -5094,21 +5097,6 @@ static netdev_tx_t igb_xmit_frame(struct sk_buff *skb,
 	return igb_xmit_frame_ring(skb, igb_tx_queue_mapping(adapter, skb));
 }
 
-static void igb_xmit_flush(struct net_device *netdev, u16 queue)
-{
-	struct igb_adapter *adapter = netdev_priv(netdev);
-	struct igb_ring *tx_ring;
-
-	tx_ring = __igb_tx_queue_mapping(adapter, queue);
-
-	writel(tx_ring->next_to_use, tx_ring->tail);
-
-	/* we need this if more than one processor can write to our tail
-	 * at a time, it synchronizes IO on IA64/Altix systems
-	 */
-	mmiowb();
-}
-
 /**
  *  igb_tx_timeout - Respond to a Tx Hang
  *  @netdev: network interface device structure
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 62421086d3e6..f0c2824f5e0f 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -953,15 +953,10 @@ static netdev_tx_t start_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	return NETDEV_TX_OK;
-}
+	if (!skb->xmit_more)
+		virtqueue_kick(sq->vq);
 
-static void xmit_flush(struct net_device *dev, u16 qnum)
-{
-	struct virtnet_info *vi = netdev_priv(dev);
-	struct send_queue *sq = &vi->sq[qnum];
-
-	virtqueue_kick(sq->vq);
+	return NETDEV_TX_OK;
 }
 
 /*
@@ -1393,7 +1388,6 @@ static const struct net_device_ops virtnet_netdev = {
 	.ndo_open            = virtnet_open,
 	.ndo_stop   	     = virtnet_close,
 	.ndo_start_xmit      = start_xmit,
-	.ndo_xmit_flush      = xmit_flush,
 	.ndo_validate_addr   = eth_validate_addr,
 	.ndo_set_mac_address = virtnet_set_mac_address,
 	.ndo_set_rx_mode     = virtnet_set_rx_mode,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 220c50984688..039b23786c22 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -782,19 +782,6 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *        (can also return NETDEV_TX_LOCKED iff NETIF_F_LLTX)
  *	Required can not be NULL.
  *
- * void (*ndo_xmit_flush)(struct net_device *dev, u16 queue);
- *	A driver implements this function when it wishes to support
- *	deferred TX queue flushing.  The idea is that the expensive
- *	operation to trigger TX queue processing can be done after
- *	N calls to ndo_start_xmit rather than being done every single
- *	time.  In this regime ndo_start_xmit will be called one or more
- *	times, and then a final ndo_xmit_flush call will be made to
- *	have the driver tell the device about the new pending TX queue
- *	entries.  The kernel keeps track of which queues need flushing
- *	by monitoring skb->queue_mapping of the packets it submits to
- *	ndo_start_xmit.  This is the queue value that will be passed
- *	to ndo_xmit_flush.
- *
  * u16 (*ndo_select_queue)(struct net_device *dev, struct sk_buff *skb,
  *                         void *accel_priv, select_queue_fallback_t fallback);
  *	Called to decide which queue to when device supports multiple
@@ -1018,7 +1005,6 @@ struct net_device_ops {
 	int			(*ndo_stop)(struct net_device *dev);
 	netdev_tx_t		(*ndo_start_xmit) (struct sk_buff *skb,
 						   struct net_device *dev);
-	void			(*ndo_xmit_flush)(struct net_device *dev, u16 queue);
 	u16			(*ndo_select_queue)(struct net_device *dev,
 						    struct sk_buff *skb,
 						    void *accel_priv,
@@ -3447,15 +3433,8 @@ int __init dev_proc_init(void);
 static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
 					      struct sk_buff *skb, struct net_device *dev)
 {
-	netdev_tx_t ret;
-	u16 q;
-
-	q = skb->queue_mapping;
-	ret = ops->ndo_start_xmit(skb, dev);
-	if (dev_xmit_complete(ret) && ops->ndo_xmit_flush)
-		ops->ndo_xmit_flush(dev, q);
-
-	return ret;
+	skb->xmit_more = 0;
+	return ops->ndo_start_xmit(skb, dev);
 }
 
 static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 18ddf9684a27..9b3802a197a8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -452,6 +452,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@tc_verd: traffic control verdict
  *	@hash: the packet hash
  *	@queue_mapping: Queue mapping for multiqueue devices
+ *	@xmit_more: More SKBs are pending for this queue
  *	@ndisc_nodetype: router type (from link layer)
  *	@ooo_okay: allow the mapping of a socket to a queue to be changed
  *	@l4_hash: indicate hash is a canonical 4-tuple hash over transport
@@ -558,6 +559,7 @@ struct sk_buff {
 
 	__u16			queue_mapping;
 	kmemcheck_bitfield_begin(flags2);
+	__u8			xmit_more:1;
 #ifdef CONFIG_IPV6_NDISC_NODETYPE
 	__u8			ndisc_nodetype:2;
 #endif
-- 
cgit v1.2.3


From b4bbb107d73bbc0d92c9ae7fd8e69580aa9381e7 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 27 Jun 2014 11:56:58 +0200
Subject: dma-mapping: Provide write-combine allocations

Provide an implementation for dma_{alloc,free,mmap}_writecombine() when
the architecture supports DMA attributes.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 arch/arm/include/asm/dma-mapping.h       | 16 ----------------
 include/asm-generic/dma-mapping-common.h |  8 --------
 include/linux/dma-mapping.h              | 26 ++++++++++++++++++++++++++
 3 files changed, 26 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index c45b61a4b4a5..85738b200023 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -265,22 +265,6 @@ extern int arm_dma_mmap(struct device *dev, struct vm_area_struct *vma,
 			void *cpu_addr, dma_addr_t dma_addr, size_t size,
 			struct dma_attrs *attrs);
 
-static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
-				       dma_addr_t *dma_handle, gfp_t flag)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	return dma_alloc_attrs(dev, size, dma_handle, flag, &attrs);
-}
-
-static inline void dma_free_writecombine(struct device *dev, size_t size,
-				     void *cpu_addr, dma_addr_t dma_handle)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	return dma_free_attrs(dev, size, cpu_addr, dma_handle, &attrs);
-}
-
 /*
  * This can be called during early boot to increase the size of the atomic
  * coherent DMA pool above the default value of 256KiB. It must be called
diff --git a/include/asm-generic/dma-mapping-common.h b/include/asm-generic/dma-mapping-common.h
index de8bf89940f8..d137431bf26f 100644
--- a/include/asm-generic/dma-mapping-common.h
+++ b/include/asm-generic/dma-mapping-common.h
@@ -205,14 +205,6 @@ dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr,
 
 #define dma_mmap_coherent(d, v, c, h, s) dma_mmap_attrs(d, v, c, h, s, NULL)
 
-static inline int dma_mmap_writecombine(struct device *dev, struct vm_area_struct *vma,
-		      void *cpu_addr, dma_addr_t dma_addr, size_t size)
-{
-	DEFINE_DMA_ATTRS(attrs);
-	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
-	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
-}
-
 int
 dma_common_get_sgtable(struct device *dev, struct sg_table *sgt,
 		       void *cpu_addr, dma_addr_t dma_addr, size_t size);
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 931b70986272..d5d388160f42 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -263,6 +263,32 @@ struct dma_attrs;
 #define dma_unmap_sg_attrs(dev, sgl, nents, dir, attrs) \
 	dma_unmap_sg(dev, sgl, nents, dir)
 
+#else
+static inline void *dma_alloc_writecombine(struct device *dev, size_t size,
+					   dma_addr_t *dma_addr, gfp_t gfp)
+{
+	DEFINE_DMA_ATTRS(attrs);
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	return dma_alloc_attrs(dev, size, dma_addr, gfp, &attrs);
+}
+
+static inline void dma_free_writecombine(struct device *dev, size_t size,
+					 void *cpu_addr, dma_addr_t dma_addr)
+{
+	DEFINE_DMA_ATTRS(attrs);
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	return dma_free_attrs(dev, size, cpu_addr, dma_addr, &attrs);
+}
+
+static inline int dma_mmap_writecombine(struct device *dev,
+					struct vm_area_struct *vma,
+					void *cpu_addr, dma_addr_t dma_addr,
+					size_t size)
+{
+	DEFINE_DMA_ATTRS(attrs);
+	dma_set_attr(DMA_ATTR_WRITE_COMBINE, &attrs);
+	return dma_mmap_attrs(dev, vma, cpu_addr, dma_addr, size, &attrs);
+}
 #endif /* CONFIG_HAVE_DMA_ATTRS */
 
 #ifdef CONFIG_NEED_DMA_MAP_STATE
-- 
cgit v1.2.3


From 50d8a189013cef83eef771c45787cee68ecdf8fe Mon Sep 17 00:00:00 2001
From: "Raymond L. Rivera" <ray.l.rivera@gmail.com>
Date: Thu, 24 Jul 2014 02:39:45 -0700
Subject: linux/pagemap.h: Fixed a typo in a code comment.

Corrected a minor typo in a code comment where 'be' was missing.

Signed-off-by: Raymond L. Rivera <ray.l.rivera@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/pagemap.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index e1474ae18c88..bf657ff3208c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -96,7 +96,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask)
 }
 
 /*
- * The page cache can done in larger chunks than
+ * The page cache can be done in larger chunks than
  * one page, because it allows for more efficient
  * throughput (it can then be mapped into user
  * space in smaller chunks for same flexibility).
-- 
cgit v1.2.3


From 170fd0b1f6108b48df4369afa0ee29a83e922748 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Wed, 30 Jul 2014 14:36:18 +0300
Subject: ieee80211: Support parsing TPC report element in action frames

TPC report element is contained in spectrum management's tpc report
action frames and in radio measurement's link measurement report
action frames. Add a function which checks whether an action frame
contains this element. This may be needed by the drivers in order
to set the correct tx power value in these frames.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 65 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 63ab3873c5ed..8018c915ee63 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -838,6 +838,16 @@ enum ieee80211_vht_opmode_bits {
 
 #define WLAN_SA_QUERY_TR_ID_LEN 2
 
+/**
+ * struct ieee80211_tpc_report_ie
+ *
+ * This structure refers to "TPC Report element"
+ */
+struct ieee80211_tpc_report_ie {
+	u8 tx_power;
+	u8 link_margin;
+} __packed;
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
@@ -973,6 +983,13 @@ struct ieee80211_mgmt {
 					u8 action_code;
 					u8 operating_mode;
 				} __packed vht_opmode_notif;
+				struct {
+					u8 action_code;
+					u8 dialog_token;
+					u8 tpc_elem_id;
+					u8 tpc_elem_length;
+					struct ieee80211_tpc_report_ie tpc;
+				} __packed tpc_report;
 			} u;
 		} __packed action;
 	} u;
@@ -1865,6 +1882,7 @@ enum ieee80211_category {
 	WLAN_CATEGORY_DLS = 2,
 	WLAN_CATEGORY_BACK = 3,
 	WLAN_CATEGORY_PUBLIC = 4,
+	WLAN_CATEGORY_RADIO_MEASUREMENT = 5,
 	WLAN_CATEGORY_HT = 7,
 	WLAN_CATEGORY_SA_QUERY = 8,
 	WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9,
@@ -2378,4 +2396,51 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 #define TU_TO_JIFFIES(x)	(usecs_to_jiffies((x) * 1024))
 #define TU_TO_EXP_TIME(x)	(jiffies + TU_TO_JIFFIES(x))
 
+/**
+ * ieee80211_action_contains_tpc - checks if the frame contains TPC element
+ * @skb: the skb containing the frame, length will be checked
+ *
+ * This function checks if it's either TPC report action frame or Link
+ * Measurement report action frame as defined in IEEE Std. 802.11-2012 8.5.2.5
+ * and 8.5.7.5 accordingly.
+ */
+static inline bool ieee80211_action_contains_tpc(struct sk_buff *skb)
+{
+	struct ieee80211_mgmt *mgmt = (void *)skb->data;
+
+	if (!ieee80211_is_action(mgmt->frame_control))
+		return false;
+
+	if (skb->len < IEEE80211_MIN_ACTION_SIZE +
+		       sizeof(mgmt->u.action.u.tpc_report))
+		return false;
+
+	/*
+	 * TPC report - check that:
+	 * category = 0 (Spectrum Management) or 5 (Radio Measurement)
+	 * spectrum management action = 3 (TPC/Link Measurement report)
+	 * TPC report EID = 35
+	 * TPC report element length = 2
+	 *
+	 * The spectrum management's tpc_report struct is used here both for
+	 * parsing tpc_report and radio measurement's link measurement report
+	 * frame, since the relevant part is identical in both frames.
+	 */
+	if (mgmt->u.action.category != WLAN_CATEGORY_SPECTRUM_MGMT &&
+	    mgmt->u.action.category != WLAN_CATEGORY_RADIO_MEASUREMENT)
+		return false;
+
+	/* both spectrum mgmt and link measurement have same action code */
+	if (mgmt->u.action.u.tpc_report.action_code !=
+	    WLAN_ACTION_SPCT_TPC_RPRT)
+		return false;
+
+	if (mgmt->u.action.u.tpc_report.tpc_elem_id != WLAN_EID_TPC_REPORT ||
+	    mgmt->u.action.u.tpc_report.tpc_elem_length !=
+	    sizeof(struct ieee80211_tpc_report_ie))
+		return false;
+
+	return true;
+}
+
 #endif /* LINUX_IEEE80211_H */
-- 
cgit v1.2.3


From 4a32fea9d78f2d2315c0072757b197d5a304dc8b Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Sun, 17 Aug 2014 12:30:27 -0500
Subject: scheduler: Replace __get_cpu_var with this_cpu_ptr

Convert all uses of __get_cpu_var for address calculation to use
this_cpu_ptr instead.

[Uses of __get_cpu_var with cpumask_var_t are no longer
handled by this patch]

Cc: Peter Zijlstra <peterz@infradead.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/kernel_stat.h   |  4 ++--
 kernel/events/callchain.c     |  4 ++--
 kernel/events/core.c          | 24 ++++++++++++------------
 kernel/sched/sched.h          |  4 ++--
 kernel/taskstats.c            |  2 +-
 kernel/time/tick-sched.c      |  4 ++--
 kernel/user-return-notifier.c |  4 ++--
 7 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index ecbc52f9ff77..8422b4ed6882 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -44,8 +44,8 @@ DECLARE_PER_CPU(struct kernel_stat, kstat);
 DECLARE_PER_CPU(struct kernel_cpustat, kernel_cpustat);
 
 /* Must have preemption disabled for this to be meaningful. */
-#define kstat_this_cpu (&__get_cpu_var(kstat))
-#define kcpustat_this_cpu (&__get_cpu_var(kernel_cpustat))
+#define kstat_this_cpu this_cpu_ptr(&kstat)
+#define kcpustat_this_cpu this_cpu_ptr(&kernel_cpustat)
 #define kstat_cpu(cpu) per_cpu(kstat, cpu)
 #define kcpustat_cpu(cpu) per_cpu(kernel_cpustat, cpu)
 
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index 97b67df8fbfe..c4f63e68a35c 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -137,7 +137,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
 	int cpu;
 	struct callchain_cpus_entries *entries;
 
-	*rctx = get_recursion_context(__get_cpu_var(callchain_recursion));
+	*rctx = get_recursion_context(this_cpu_ptr(callchain_recursion));
 	if (*rctx == -1)
 		return NULL;
 
@@ -153,7 +153,7 @@ static struct perf_callchain_entry *get_callchain_entry(int *rctx)
 static void
 put_callchain_entry(int rctx)
 {
-	put_recursion_context(__get_cpu_var(callchain_recursion), rctx);
+	put_recursion_context(this_cpu_ptr(callchain_recursion), rctx);
 }
 
 struct perf_callchain_entry *
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cf24b3e42ec..4d44e40a0483 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -239,7 +239,7 @@ static void perf_duration_warn(struct irq_work *w)
 	u64 avg_local_sample_len;
 	u64 local_samples_len;
 
-	local_samples_len = __get_cpu_var(running_sample_length);
+	local_samples_len = __this_cpu_read(running_sample_length);
 	avg_local_sample_len = local_samples_len/NR_ACCUMULATED_SAMPLES;
 
 	printk_ratelimited(KERN_WARNING
@@ -261,10 +261,10 @@ void perf_sample_event_took(u64 sample_len_ns)
 		return;
 
 	/* decay the counter by 1 average sample */
-	local_samples_len = __get_cpu_var(running_sample_length);
+	local_samples_len = __this_cpu_read(running_sample_length);
 	local_samples_len -= local_samples_len/NR_ACCUMULATED_SAMPLES;
 	local_samples_len += sample_len_ns;
-	__get_cpu_var(running_sample_length) = local_samples_len;
+	__this_cpu_write(running_sample_length, local_samples_len);
 
 	/*
 	 * note: this will be biased artifically low until we have
@@ -877,7 +877,7 @@ static DEFINE_PER_CPU(struct list_head, rotation_list);
 static void perf_pmu_rotate_start(struct pmu *pmu)
 {
 	struct perf_cpu_context *cpuctx = this_cpu_ptr(pmu->pmu_cpu_context);
-	struct list_head *head = &__get_cpu_var(rotation_list);
+	struct list_head *head = this_cpu_ptr(&rotation_list);
 
 	WARN_ON(!irqs_disabled());
 
@@ -2389,7 +2389,7 @@ void __perf_event_task_sched_out(struct task_struct *task,
 	 * to check if we have to switch out PMU state.
 	 * cgroup event are system-wide mode only
 	 */
-	if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
+	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
 		perf_cgroup_sched_out(task, next);
 }
 
@@ -2632,11 +2632,11 @@ void __perf_event_task_sched_in(struct task_struct *prev,
 	 * to check if we have to switch in PMU state.
 	 * cgroup event are system-wide mode only
 	 */
-	if (atomic_read(&__get_cpu_var(perf_cgroup_events)))
+	if (atomic_read(this_cpu_ptr(&perf_cgroup_events)))
 		perf_cgroup_sched_in(prev, task);
 
 	/* check for system-wide branch_stack events */
-	if (atomic_read(&__get_cpu_var(perf_branch_stack_events)))
+	if (atomic_read(this_cpu_ptr(&perf_branch_stack_events)))
 		perf_branch_stack_sched_in(prev, task);
 }
 
@@ -2891,7 +2891,7 @@ bool perf_event_can_stop_tick(void)
 
 void perf_event_task_tick(void)
 {
-	struct list_head *head = &__get_cpu_var(rotation_list);
+	struct list_head *head = this_cpu_ptr(&rotation_list);
 	struct perf_cpu_context *cpuctx, *tmp;
 	struct perf_event_context *ctx;
 	int throttled;
@@ -5671,7 +5671,7 @@ static void do_perf_sw_event(enum perf_type_id type, u32 event_id,
 				    struct perf_sample_data *data,
 				    struct pt_regs *regs)
 {
-	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
+	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
 	struct perf_event *event;
 	struct hlist_head *head;
 
@@ -5690,7 +5690,7 @@ end:
 
 int perf_swevent_get_recursion_context(void)
 {
-	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
+	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
 
 	return get_recursion_context(swhash->recursion);
 }
@@ -5698,7 +5698,7 @@ EXPORT_SYMBOL_GPL(perf_swevent_get_recursion_context);
 
 inline void perf_swevent_put_recursion_context(int rctx)
 {
-	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
+	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
 
 	put_recursion_context(swhash->recursion, rctx);
 }
@@ -5727,7 +5727,7 @@ static void perf_swevent_read(struct perf_event *event)
 
 static int perf_swevent_add(struct perf_event *event, int flags)
 {
-	struct swevent_htable *swhash = &__get_cpu_var(swevent_htable);
+	struct swevent_htable *swhash = this_cpu_ptr(&swevent_htable);
 	struct hw_perf_event *hwc = &event->hw;
 	struct hlist_head *head;
 
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 579712f4e9d5..77d92f8130e8 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -650,10 +650,10 @@ static inline int cpu_of(struct rq *rq)
 DECLARE_PER_CPU(struct rq, runqueues);
 
 #define cpu_rq(cpu)		(&per_cpu(runqueues, (cpu)))
-#define this_rq()		(&__get_cpu_var(runqueues))
+#define this_rq()		this_cpu_ptr(&runqueues)
 #define task_rq(p)		cpu_rq(task_cpu(p))
 #define cpu_curr(cpu)		(cpu_rq(cpu)->curr)
-#define raw_rq()		(&__raw_get_cpu_var(runqueues))
+#define raw_rq()		raw_cpu_ptr(&runqueues)
 
 static inline u64 rq_clock(struct rq *rq)
 {
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index 13d2f7cd65db..b312fcc73024 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -638,7 +638,7 @@ void taskstats_exit(struct task_struct *tsk, int group_dead)
 		fill_tgid_exit(tsk);
 	}
 
-	listeners = __this_cpu_ptr(&listener_array);
+	listeners = raw_cpu_ptr(&listener_array);
 	if (list_empty(&listeners->list))
 		return;
 
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 73f90932282b..3cadc112519f 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -924,7 +924,7 @@ static void tick_nohz_account_idle_ticks(struct tick_sched *ts)
  */
 void tick_nohz_idle_exit(void)
 {
-	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 	ktime_t now;
 
 	local_irq_disable();
@@ -1041,7 +1041,7 @@ static void tick_nohz_kick_tick(struct tick_sched *ts, ktime_t now)
 
 static inline void tick_nohz_irq_enter(void)
 {
-	struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
+	struct tick_sched *ts = this_cpu_ptr(&tick_cpu_sched);
 	ktime_t now;
 
 	if (!ts->idle_active && !ts->tick_stopped)
diff --git a/kernel/user-return-notifier.c b/kernel/user-return-notifier.c
index 394f70b17162..9586b670a5b2 100644
--- a/kernel/user-return-notifier.c
+++ b/kernel/user-return-notifier.c
@@ -14,7 +14,7 @@ static DEFINE_PER_CPU(struct hlist_head, return_notifier_list);
 void user_return_notifier_register(struct user_return_notifier *urn)
 {
 	set_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
-	hlist_add_head(&urn->link, &__get_cpu_var(return_notifier_list));
+	hlist_add_head(&urn->link, this_cpu_ptr(&return_notifier_list));
 }
 EXPORT_SYMBOL_GPL(user_return_notifier_register);
 
@@ -25,7 +25,7 @@ EXPORT_SYMBOL_GPL(user_return_notifier_register);
 void user_return_notifier_unregister(struct user_return_notifier *urn)
 {
 	hlist_del(&urn->link);
-	if (hlist_empty(&__get_cpu_var(return_notifier_list)))
+	if (hlist_empty(this_cpu_ptr(&return_notifier_list)))
 		clear_tsk_thread_flag(current, TIF_USER_RETURN_NOTIFY);
 }
 EXPORT_SYMBOL_GPL(user_return_notifier_unregister);
-- 
cgit v1.2.3


From 47405a253da4d8ca4b18ad537423083fdd790440 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Sun, 17 Aug 2014 12:30:56 -0500
Subject: percpu: Remove __this_cpu_ptr

The __this_cpu_ptr macro is no longer in use so drop it.

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu-defs.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index cfd56046ecec..420032d41d27 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -257,9 +257,6 @@ do {									\
 #define __raw_get_cpu_var(var)	(*raw_cpu_ptr(&(var)))
 #define __get_cpu_var(var)	(*this_cpu_ptr(&(var)))
 
-/* keep until we have removed all uses of __this_cpu_ptr */
-#define __this_cpu_ptr(ptr)	raw_cpu_ptr(ptr)
-
 /*
  * Must be an lvalue. Since @var must be a simple identifier,
  * we force a syntax error here if it isn't.
-- 
cgit v1.2.3


From bf3baca6c54ce8a2f51687296f868dfe20d33f13 Mon Sep 17 00:00:00 2001
From: James Ban <james.ban.opensource@diasemi.com>
Date: Wed, 27 Aug 2014 11:47:07 +0900
Subject: regulator: da9211: support device tree

This is a patch for supporting device tree of DA9211/DA9213.

Signed-off-by: James Ban <james.ban.opensource@diasemi.com>
Signed-off-by: Mark Brown <broonie@linaro.org>
---
 .../devicetree/bindings/regulator/da9211.txt       | 63 ++++++++++++++++
 drivers/regulator/da9211-regulator.c               | 85 ++++++++++++++++++++--
 include/linux/regulator/da9211.h                   |  2 +-
 3 files changed, 142 insertions(+), 8 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/regulator/da9211.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/da9211.txt b/Documentation/devicetree/bindings/regulator/da9211.txt
new file mode 100644
index 000000000000..240019a82f9a
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/da9211.txt
@@ -0,0 +1,63 @@
+* Dialog Semiconductor DA9211/DA9213 Voltage Regulator
+
+Required properties:
+- compatible: "dlg,da9211" or "dlg,da9213".
+- reg: I2C slave address, usually 0x68.
+- interrupts: the interrupt outputs of the controller
+- regulators: A node that houses a sub-node for each regulator within the
+  device. Each sub-node is identified using the node's name, with valid
+  values listed below. The content of each sub-node is defined by the
+  standard binding for regulators; see regulator.txt.
+  BUCKA and BUCKB.
+
+Optional properties:
+- Any optional property defined in regulator.txt
+
+Example 1) DA9211
+
+	pmic: da9211@68 {
+		compatible = "dlg,da9211";
+		reg = <0x68>;
+		interrupts = <3 27>;
+
+		regulators {
+			BUCKA {
+				regulator-name = "VBUCKA";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <2000000>;
+				regulator-max-microamp 	= <5000000>;
+			};
+			BUCKB {
+				regulator-name = "VBUCKB";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <2000000>;
+				regulator-max-microamp 	= <5000000>;
+			};
+		};
+	};
+
+Example 2) DA92113
+	pmic: da9213@68 {
+		compatible = "dlg,da9213";
+		reg = <0x68>;
+		interrupts = <3 27>;
+
+		regulators {
+			BUCKA {
+				regulator-name = "VBUCKA";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <3000000>;
+				regulator-max-microamp 	= <6000000>;
+			};
+			BUCKB {
+				regulator-name = "VBUCKB";
+				regulator-min-microvolt = < 300000>;
+				regulator-max-microvolt = <1570000>;
+				regulator-min-microamp 	= <3000000>;
+				regulator-max-microamp 	= <6000000>;
+			};
+		};
+	};
diff --git a/drivers/regulator/da9211-regulator.c b/drivers/regulator/da9211-regulator.c
index a26f1d283c57..5aabbac1b524 100644
--- a/drivers/regulator/da9211-regulator.c
+++ b/drivers/regulator/da9211-regulator.c
@@ -24,6 +24,7 @@
 #include <linux/regmap.h>
 #include <linux/irq.h>
 #include <linux/interrupt.h>
+#include <linux/regulator/of_regulator.h>
 #include <linux/regulator/da9211.h>
 #include "da9211-regulator.h"
 
@@ -236,6 +237,59 @@ static struct regulator_desc da9211_regulators[] = {
 	DA9211_BUCK(BUCKB),
 };
 
+#ifdef CONFIG_OF
+static struct of_regulator_match da9211_matches[] = {
+	[DA9211_ID_BUCKA] = { .name = "BUCKA" },
+	[DA9211_ID_BUCKB] = { .name = "BUCKB" },
+	};
+
+static struct da9211_pdata *da9211_parse_regulators_dt(
+		struct device *dev)
+{
+	struct da9211_pdata *pdata;
+	struct device_node *node;
+	int i, num, n;
+
+	node = of_get_child_by_name(dev->of_node, "regulators");
+	if (!node) {
+		dev_err(dev, "regulators node not found\n");
+		return ERR_PTR(-ENODEV);
+	}
+
+	num = of_regulator_match(dev, node, da9211_matches,
+				 ARRAY_SIZE(da9211_matches));
+	of_node_put(node);
+	if (num < 0) {
+		dev_err(dev, "Failed to match regulators\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	pdata->num_buck = num;
+
+	n = 0;
+	for (i = 0; i < ARRAY_SIZE(da9211_matches); i++) {
+		if (!da9211_matches[i].init_data)
+			continue;
+
+		pdata->init_data[n] = da9211_matches[i].init_data;
+
+		n++;
+	};
+
+	return pdata;
+}
+#else
+static struct da9211_pdata *da9211_parse_regulators_dt(
+		struct device *dev)
+{
+	return ERR_PTR(-ENODEV);
+}
+#endif
+
 static irqreturn_t da9211_irq_handler(int irq, void *data)
 {
 	struct da9211 *chip = data;
@@ -306,7 +360,7 @@ static int da9211_regulator_init(struct da9211 *chip)
 	}
 
 	for (i = 0; i < chip->num_regulator; i++) {
-		config.init_data = &(chip->pdata->init_data[i]);
+		config.init_data = chip->pdata->init_data[i];
 		config.dev = chip->dev;
 		config.driver_data = chip;
 		config.regmap = chip->regmap;
@@ -332,6 +386,21 @@ static int da9211_regulator_init(struct da9211 *chip)
 
 	return 0;
 }
+
+static const struct i2c_device_id da9211_i2c_id[] = {
+	{"da9211", DA9211},
+	{"da9213", DA9213},
+	{},
+};
+
+#ifdef CONFIG_OF
+static const struct of_device_id da9211_dt_ids[] = {
+	{ .compatible = "dlg,da9211", .data = &da9211_i2c_id[0] },
+	{ .compatible = "dlg,da9213", .data = &da9211_i2c_id[1] },
+	{},
+};
+#endif
+
 /*
  * I2C driver interface functions
  */
@@ -377,6 +446,14 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 		return -ENODEV;
 	}
 
+	if (!chip->pdata)
+		chip->pdata = da9211_parse_regulators_dt(chip->dev);
+
+	if (IS_ERR(chip->pdata)) {
+		dev_err(chip->dev, "No regulators defined for the platform\n");
+		return PTR_ERR(chip->pdata);
+	}
+
 	chip->chip_irq = i2c->irq;
 
 	if (chip->chip_irq != 0) {
@@ -401,12 +478,6 @@ static int da9211_i2c_probe(struct i2c_client *i2c,
 	return ret;
 }
 
-static const struct i2c_device_id da9211_i2c_id[] = {
-	{"da9211", DA9211},
-	{"da9213", DA9213},
-	{},
-};
-
 MODULE_DEVICE_TABLE(i2c, da9211_i2c_id);
 
 static struct i2c_driver da9211_regulator_driver = {
diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h
index 658c3c33f4c0..5479394fefce 100644
--- a/include/linux/regulator/da9211.h
+++ b/include/linux/regulator/da9211.h
@@ -32,6 +32,6 @@ struct da9211_pdata {
 	 * 2 : 2 phase 2 buck
 	 */
 	int num_buck;
-	struct regulator_init_data *init_data;
+	struct regulator_init_data *init_data[DA9211_MAX_REGULATORS];
 };
 #endif
-- 
cgit v1.2.3


From 6a4c264313c4ae32dc53821a9c57e0dc9696fb81 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 27 Aug 2014 06:21:23 +0930
Subject: module: rename KERNEL_PARAM_FL_NOARG to avoid confusion

Make it clear this is about kernel_param_ops, not kernel_param (which
will soon have a flags field of its own). No functional changes.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Jon Mason <jon.mason@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 2 +-
 kernel/module.c             | 2 +-
 kernel/params.c             | 6 +++---
 security/apparmor/lsm.c     | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 494f99e852da..16fdddab856a 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -42,7 +42,7 @@ struct kernel_param;
  * NOARG - the parameter allows for no argument (foo instead of foo=1)
  */
 enum {
-	KERNEL_PARAM_FL_NOARG = (1 << 0)
+	KERNEL_PARAM_OPS_FL_NOARG = (1 << 0)
 };
 
 struct kernel_param_ops {
diff --git a/kernel/module.c b/kernel/module.c
index 03214bd288e9..8a0dc91eddbc 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -135,7 +135,7 @@ static int param_set_bool_enable_only(const char *val,
 }
 
 static const struct kernel_param_ops param_ops_bool_enable_only = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_bool_enable_only,
 	.get = param_get_bool,
 };
diff --git a/kernel/params.c b/kernel/params.c
index 34f527023794..8a484fc8bde8 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -104,7 +104,7 @@ static int parse_one(char *param,
 				return 0;
 			/* No one handled NULL, so do it here. */
 			if (!val &&
-			    !(params[i].ops->flags & KERNEL_PARAM_FL_NOARG))
+			    !(params[i].ops->flags & KERNEL_PARAM_OPS_FL_NOARG))
 				return -EINVAL;
 			pr_debug("handling %s with %p\n", param,
 				params[i].ops->set);
@@ -318,7 +318,7 @@ int param_get_bool(char *buffer, const struct kernel_param *kp)
 EXPORT_SYMBOL(param_get_bool);
 
 struct kernel_param_ops param_ops_bool = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_bool,
 	.get = param_get_bool,
 };
@@ -369,7 +369,7 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
 EXPORT_SYMBOL(param_set_bint);
 
 struct kernel_param_ops param_ops_bint = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_bint,
 	.get = param_get_int,
 };
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index 998100093332..65ca451a764d 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -668,7 +668,7 @@ static int param_set_aabool(const char *val, const struct kernel_param *kp);
 static int param_get_aabool(char *buffer, const struct kernel_param *kp);
 #define param_check_aabool param_check_bool
 static struct kernel_param_ops param_ops_aabool = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_aabool,
 	.get = param_get_aabool
 };
@@ -685,7 +685,7 @@ static int param_set_aalockpolicy(const char *val, const struct kernel_param *kp
 static int param_get_aalockpolicy(char *buffer, const struct kernel_param *kp);
 #define param_check_aalockpolicy param_check_bool
 static struct kernel_param_ops param_ops_aalockpolicy = {
-	.flags = KERNEL_PARAM_FL_NOARG,
+	.flags = KERNEL_PARAM_OPS_FL_NOARG,
 	.set = param_set_aalockpolicy,
 	.get = param_get_aalockpolicy
 };
-- 
cgit v1.2.3


From 91f9d330cc14932084c37751997213cb0e7ea882 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 27 Aug 2014 06:22:23 +0930
Subject: module: make it possible to have unsafe, tainting module params

Add flags field to struct kernel_params, and add the first flag: unsafe
parameter. Modifying a kernel parameter with the unsafe flag set, either
via the kernel command line or sysfs, will issue a warning and taint the
kernel.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Jon Mason <jon.mason@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/tty/serial/8250/8250_core.c |  2 +-
 include/linux/moduleparam.h         | 44 +++++++++++++++++++++++++++++--------
 kernel/params.c                     | 11 ++++++++++
 3 files changed, 47 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 1d42dba6121d..bd672948f2f1 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -3587,7 +3587,7 @@ static void __used s8250_options(void)
 #ifdef CONFIG_SERIAL_8250_RSA
 	__module_param_call(MODULE_PARAM_PREFIX, probe_rsa,
 		&param_array_ops, .arr = &__param_arr_probe_rsa,
-		0444, -1);
+		0444, -1, 0);
 #endif
 }
 #else
diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 16fdddab856a..1e3ffb839daa 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -56,11 +56,21 @@ struct kernel_param_ops {
 	void (*free)(void *arg);
 };
 
+/*
+ * Flags available for kernel_param
+ *
+ * UNSAFE - the parameter is dangerous and setting it will taint the kernel
+ */
+enum {
+	KERNEL_PARAM_FL_UNSAFE = (1 << 0)
+};
+
 struct kernel_param {
 	const char *name;
 	const struct kernel_param_ops *ops;
 	u16 perm;
-	s16 level;
+	s8 level;
+	u8 flags;
 	union {
 		void *arg;
 		const struct kparam_string *str;
@@ -137,7 +147,7 @@ struct kparam_array
  * The ops can have NULL set or get functions.
  */
 #define module_param_cb(name, ops, arg, perm)				      \
-	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1)
+	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0)
 
 /**
  * <level>_param_cb - general callback for a module/cmdline parameter
@@ -149,7 +159,7 @@ struct kparam_array
  * The ops can have NULL set or get functions.
  */
 #define __level_param_cb(name, ops, arg, perm, level)			\
-	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, level)
+	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, level, 0)
 
 #define core_param_cb(name, ops, arg, perm)		\
 	__level_param_cb(name, ops, arg, perm, 1)
@@ -184,14 +194,14 @@ struct kparam_array
 
 /* This is the fundamental function for registering boot/module
    parameters. */
-#define __module_param_call(prefix, name, ops, arg, perm, level)	\
+#define __module_param_call(prefix, name, ops, arg, perm, level, flags)	\
 	/* Default value instead of permissions? */			\
 	static const char __param_str_##name[] = prefix #name; \
 	static struct kernel_param __moduleparam_const __param_##name	\
 	__used								\
     __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \
 	= { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm),	\
-	    level, { arg } }
+	    level, flags, { arg } }
 
 /* Obsolete - use module_param_cb() */
 #define module_param_call(name, set, get, arg, perm)			\
@@ -199,7 +209,7 @@ struct kparam_array
 		{ 0, (void *)set, (void *)get };			\
 	__module_param_call(MODULE_PARAM_PREFIX,			\
 			    name, &__param_ops_##name, arg,		\
-			    (perm) + sizeof(__check_old_set_param(set))*0, -1)
+			    (perm) + sizeof(__check_old_set_param(set))*0, -1, 0)
 
 /* We don't get oldget: it's often a new-style param_get_uint, etc. */
 static inline int
@@ -279,7 +289,7 @@ static inline void __kernel_param_unlock(void)
  */
 #define core_param(name, var, type, perm)				\
 	param_check_##type(name, &(var));				\
-	__module_param_call("", name, &param_ops_##type, &var, perm, -1)
+	__module_param_call("", name, &param_ops_##type, &var, perm, -1, 0)
 #endif /* !MODULE */
 
 /**
@@ -297,7 +307,7 @@ static inline void __kernel_param_unlock(void)
 		= { len, string };					\
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_ops_string,				\
-			    .str = &__param_string_##name, perm, -1);	\
+			    .str = &__param_string_##name, perm, -1, 0);\
 	__MODULE_PARM_TYPE(name, "string")
 
 /**
@@ -346,6 +356,22 @@ static inline void destroy_params(const struct kernel_param *params,
 #define __param_check(name, p, type) \
 	static inline type __always_unused *__check_##name(void) { return(p); }
 
+/**
+ * param_check_unsafe - Warn and taint the kernel if setting dangerous options.
+ *
+ * This gets called from all the standard param setters, but can be used from
+ * custom setters as well.
+ */
+static inline void
+param_check_unsafe(const struct kernel_param *kp)
+{
+	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
+		pr_warn("Setting dangerous option %s - tainting kernel\n",
+			kp->name);
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	}
+}
+
 extern struct kernel_param_ops param_ops_byte;
 extern int param_set_byte(const char *val, const struct kernel_param *kp);
 extern int param_get_byte(char *buffer, const struct kernel_param *kp);
@@ -444,7 +470,7 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp);
 	__module_param_call(MODULE_PARAM_PREFIX, name,			\
 			    &param_array_ops,				\
 			    .arr = &__param_arr_##name,			\
-			    perm, -1);					\
+			    perm, -1, 0);				\
 	__MODULE_PARM_TYPE(name, "array of " #type)
 
 extern struct kernel_param_ops param_array_ops;
diff --git a/kernel/params.c b/kernel/params.c
index 8a484fc8bde8..ad8d04563c3a 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -233,6 +233,7 @@ char *parse_args(const char *doing,
 #define STANDARD_PARAM_DEF(name, type, format, strtolfn)      		\
 	int param_set_##name(const char *val, const struct kernel_param *kp) \
 	{								\
+		param_check_unsafe(kp);					\
 		return strtolfn(val, 0, (type *)kp->arg);		\
 	}								\
 	int param_get_##name(char *buffer, const struct kernel_param *kp) \
@@ -265,6 +266,8 @@ int param_set_charp(const char *val, const struct kernel_param *kp)
 		return -ENOSPC;
 	}
 
+	param_check_unsafe(kp);
+
 	maybe_kfree_parameter(*(char **)kp->arg);
 
 	/* This is a hack.  We can't kmalloc in early boot, and we
@@ -302,6 +305,8 @@ EXPORT_SYMBOL(param_ops_charp);
 /* Actually could be a bool or an int, for historical reasons. */
 int param_set_bool(const char *val, const struct kernel_param *kp)
 {
+	param_check_unsafe(kp);
+
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
@@ -331,6 +336,8 @@ int param_set_invbool(const char *val, const struct kernel_param *kp)
 	bool boolval;
 	struct kernel_param dummy;
 
+	param_check_unsafe(kp);
+
 	dummy.arg = &boolval;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
@@ -357,6 +364,8 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
 	bool v;
 	int ret;
 
+	param_check_unsafe(kp);
+
 	/* Match bool exactly, by re-using it. */
 	boolkp = *kp;
 	boolkp.arg = &v;
@@ -476,6 +485,8 @@ int param_set_copystring(const char *val, const struct kernel_param *kp)
 {
 	const struct kparam_string *kps = kp->str;
 
+	param_check_unsafe(kp);
+
 	if (strlen(val)+1 > kps->maxlen) {
 		pr_err("%s: string doesn't fit in %u chars.\n",
 		       kp->name, kps->maxlen-1);
-- 
cgit v1.2.3


From 3baee201b06cfaff84c2c5ddc551b192bb3eaed3 Mon Sep 17 00:00:00 2001
From: Jani Nikula <jani.nikula@intel.com>
Date: Wed, 27 Aug 2014 06:23:23 +0930
Subject: module: add module_param_unsafe and module_param_named_unsafe

Add the helpers to be used by modules wishing to expose unsafe debugging
or testing module parameters that taint the kernel when set.

Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Jean Delvare <khali@linux-fr.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Li Zhong <zhong@linux.vnet.ibm.com>
Cc: Jon Mason <jon.mason@intel.com>
Cc: Daniel Vetter <daniel.vetter@ffwll.ch>
Signed-off-by: Jani Nikula <jani.nikula@intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 1e3ffb839daa..9531f9f9729e 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -122,6 +122,12 @@ struct kparam_array
 #define module_param(name, type, perm)				\
 	module_param_named(name, name, type, perm)
 
+/**
+ * module_param_unsafe - same as module_param but taints kernel
+ */
+#define module_param_unsafe(name, type, perm)			\
+	module_param_named_unsafe(name, name, type, perm)
+
 /**
  * module_param_named - typesafe helper for a renamed module/cmdline parameter
  * @name: a valid C identifier which is the parameter name.
@@ -138,6 +144,14 @@ struct kparam_array
 	module_param_cb(name, &param_ops_##type, &value, perm);		   \
 	__MODULE_PARM_TYPE(name, #type)
 
+/**
+ * module_param_named_unsafe - same as module_param_named but taints kernel
+ */
+#define module_param_named_unsafe(name, value, type, perm)		\
+	param_check_##type(name, &(value));				\
+	module_param_cb_unsafe(name, &param_ops_##type, &value, perm);	\
+	__MODULE_PARM_TYPE(name, #type)
+
 /**
  * module_param_cb - general callback for a module/cmdline parameter
  * @name: a valid C identifier which is the parameter name.
@@ -149,6 +163,10 @@ struct kparam_array
 #define module_param_cb(name, ops, arg, perm)				      \
 	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1, 0)
 
+#define module_param_cb_unsafe(name, ops, arg, perm)			      \
+	__module_param_call(MODULE_PARAM_PREFIX, name, ops, arg, perm, -1,    \
+			    KERNEL_PARAM_FL_UNSAFE)
+
 /**
  * <level>_param_cb - general callback for a module/cmdline parameter
  *                    to be evaluated before certain initcall level
-- 
cgit v1.2.3


From 7a486d3781295b5298cbf9556928a76d26896863 Mon Sep 17 00:00:00 2001
From: Rusty Russell <rusty@rustcorp.com.au>
Date: Wed, 27 Aug 2014 06:25:23 +0930
Subject: param: check for tainting before calling set op.

This means every set op doesn't need to call it, and it can move into
params.c.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 16 ----------------
 kernel/params.c             | 22 +++++++++++-----------
 2 files changed, 11 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 9531f9f9729e..593501996574 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -374,22 +374,6 @@ static inline void destroy_params(const struct kernel_param *params,
 #define __param_check(name, p, type) \
 	static inline type __always_unused *__check_##name(void) { return(p); }
 
-/**
- * param_check_unsafe - Warn and taint the kernel if setting dangerous options.
- *
- * This gets called from all the standard param setters, but can be used from
- * custom setters as well.
- */
-static inline void
-param_check_unsafe(const struct kernel_param *kp)
-{
-	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
-		pr_warn("Setting dangerous option %s - tainting kernel\n",
-			kp->name);
-		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
-	}
-}
-
 extern struct kernel_param_ops param_ops_byte;
 extern int param_set_byte(const char *val, const struct kernel_param *kp);
 extern int param_get_byte(char *buffer, const struct kernel_param *kp);
diff --git a/kernel/params.c b/kernel/params.c
index ad8d04563c3a..041b5899d5e2 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -83,6 +83,15 @@ bool parameq(const char *a, const char *b)
 	return parameqn(a, b, strlen(a)+1);
 }
 
+static void param_check_unsafe(const struct kernel_param *kp)
+{
+	if (kp->flags & KERNEL_PARAM_FL_UNSAFE) {
+		pr_warn("Setting dangerous option %s - tainting kernel\n",
+			kp->name);
+		add_taint(TAINT_USER, LOCKDEP_STILL_OK);
+	}
+}
+
 static int parse_one(char *param,
 		     char *val,
 		     const char *doing,
@@ -109,6 +118,7 @@ static int parse_one(char *param,
 			pr_debug("handling %s with %p\n", param,
 				params[i].ops->set);
 			mutex_lock(&param_lock);
+			param_check_unsafe(&params[i]);
 			err = params[i].ops->set(val, &params[i]);
 			mutex_unlock(&param_lock);
 			return err;
@@ -233,7 +243,6 @@ char *parse_args(const char *doing,
 #define STANDARD_PARAM_DEF(name, type, format, strtolfn)      		\
 	int param_set_##name(const char *val, const struct kernel_param *kp) \
 	{								\
-		param_check_unsafe(kp);					\
 		return strtolfn(val, 0, (type *)kp->arg);		\
 	}								\
 	int param_get_##name(char *buffer, const struct kernel_param *kp) \
@@ -266,8 +275,6 @@ int param_set_charp(const char *val, const struct kernel_param *kp)
 		return -ENOSPC;
 	}
 
-	param_check_unsafe(kp);
-
 	maybe_kfree_parameter(*(char **)kp->arg);
 
 	/* This is a hack.  We can't kmalloc in early boot, and we
@@ -305,8 +312,6 @@ EXPORT_SYMBOL(param_ops_charp);
 /* Actually could be a bool or an int, for historical reasons. */
 int param_set_bool(const char *val, const struct kernel_param *kp)
 {
-	param_check_unsafe(kp);
-
 	/* No equals means "set"... */
 	if (!val) val = "1";
 
@@ -336,8 +341,6 @@ int param_set_invbool(const char *val, const struct kernel_param *kp)
 	bool boolval;
 	struct kernel_param dummy;
 
-	param_check_unsafe(kp);
-
 	dummy.arg = &boolval;
 	ret = param_set_bool(val, &dummy);
 	if (ret == 0)
@@ -364,8 +367,6 @@ int param_set_bint(const char *val, const struct kernel_param *kp)
 	bool v;
 	int ret;
 
-	param_check_unsafe(kp);
-
 	/* Match bool exactly, by re-using it. */
 	boolkp = *kp;
 	boolkp.arg = &v;
@@ -485,8 +486,6 @@ int param_set_copystring(const char *val, const struct kernel_param *kp)
 {
 	const struct kparam_string *kps = kp->str;
 
-	param_check_unsafe(kp);
-
 	if (strlen(val)+1 > kps->maxlen) {
 		pr_err("%s: string doesn't fit in %u chars.\n",
 		       kp->name, kps->maxlen-1);
@@ -563,6 +562,7 @@ static ssize_t param_attr_store(struct module_attribute *mattr,
 		return -EPERM;
 
 	mutex_lock(&param_lock);
+	param_check_unsafe(attribute->param);
 	err = attribute->param->ops->set(buf, attribute->param);
 	mutex_unlock(&param_lock);
 	if (!err)
-- 
cgit v1.2.3


From 64d831269ccbca1fc6d739a0f3c8aa24afb43a5e Mon Sep 17 00:00:00 2001
From: Christoffer Dall <christoffer.dall@linaro.org>
Date: Tue, 19 Aug 2014 12:15:00 +0200
Subject: KVM: Introduce gfn_to_hva_memslot_prot

To support read-only memory regions on arm and arm64, we have a need to
resolve a gfn to an hva given a pointer to a memslot to avoid looping
through the memslots twice and to reuse the hva error checking of
gfn_to_hva_prot(), add a new gfn_to_hva_memslot_prot() function and
refactor gfn_to_hva_prot() to use this function.

Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 include/linux/kvm_host.h |  2 ++
 virt/kvm/kvm_main.c      | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ebd723676633..6d8a658ec174 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -528,6 +528,8 @@ struct page *gfn_to_page(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva(struct kvm *kvm, gfn_t gfn);
 unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable);
 unsigned long gfn_to_hva_memslot(struct kvm_memory_slot *slot, gfn_t gfn);
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot, gfn_t gfn,
+				      bool *writable);
 void kvm_release_page_clean(struct page *page);
 void kvm_release_page_dirty(struct page *page);
 void kvm_set_page_accessed(struct page *page);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 5a0817ee996e..76c92a7249c4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1076,9 +1076,9 @@ EXPORT_SYMBOL_GPL(gfn_to_hva);
  * If writable is set to false, the hva returned by this function is only
  * allowed to be read.
  */
-unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+unsigned long gfn_to_hva_memslot_prot(struct kvm_memory_slot *slot,
+				      gfn_t gfn, bool *writable)
 {
-	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
 	unsigned long hva = __gfn_to_hva_many(slot, gfn, NULL, false);
 
 	if (!kvm_is_error_hva(hva) && writable)
@@ -1087,6 +1087,13 @@ unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
 	return hva;
 }
 
+unsigned long gfn_to_hva_prot(struct kvm *kvm, gfn_t gfn, bool *writable)
+{
+	struct kvm_memory_slot *slot = gfn_to_memslot(kvm, gfn);
+
+	return gfn_to_hva_memslot_prot(slot, gfn, writable);
+}
+
 static int kvm_read_hva(void *data, void __user *hva, int len)
 {
 	return __copy_from_user(data, hva, len);
-- 
cgit v1.2.3


From 5c21403d74af2c9cd635a34c2f9199681a5b813e Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 27 Aug 2014 14:39:04 -0700
Subject: net: Update sk_buff flag bit availability comment.

We lost one when xmit_more was added.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 9b3802a197a8..b69b7b512c06 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -580,7 +580,7 @@ struct sk_buff {
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
-	/* 2/4 bit hole (depending on ndisc_nodetype presence) */
+	/* 1/3 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
-- 
cgit v1.2.3


From 3e8a72d1dae374cf6fc1dba97cec663585845ff9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:46 -0700
Subject: net: dsa: reduce number of protocol hooks

DSA is currently registering one packet_type function per EtherType it
needs to intercept in the receive path of a DSA-enabled Ethernet device.
Right now we have three of them: trailer, DSA and eDSA, and there might
be more in the future, this will not scale to the addition of new
protocols.

This patch proceeds with adding a new layer of abstraction and two new
functions:

dsa_switch_rcv() which will dispatch into the tag-protocol specific
receive function implemented by net/dsa/tag_*.c

dsa_slave_xmit() which will dispatch into the tag-protocol specific
transmit function implemented by net/dsa/tag_*.c

When we do create the per-port slave network devices, we iterate over
the switch protocol to assign the DSA-specific receive and transmit
operations.

A new fake ethertype value is used: ETH_P_XDSA to illustrate the fact
that this is no longer going to look like ETH_P_DSA or ETH_P_TRAILER
like it used to be.

This allows us to greatly simplify the check in eth_type_trans() and
always override the skb->protocol with ETH_P_XDSA for Ethernet switches
tagged protocol, while also reducing the number repetitive slave
netdevice_ops assignments.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h     | 28 ++++++++++++---------------
 include/net/dsa.h             | 20 +++----------------
 include/uapi/linux/if_ether.h |  1 +
 net/dsa/dsa.c                 | 39 ++++++++++++++++++++-----------------
 net/dsa/dsa_priv.h            |  9 +++------
 net/dsa/slave.c               | 45 ++++++++++++++-----------------------------
 net/dsa/tag_dsa.c             |  8 ++++----
 net/dsa/tag_edsa.c            |  8 ++++----
 net/dsa/tag_trailer.c         |  8 ++++----
 net/ethernet/eth.c            |  7 ++-----
 10 files changed, 68 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 039b23786c22..1875dc71422a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1781,24 +1781,13 @@ void dev_net_set(struct net_device *dev, struct net *net)
 #endif
 }
 
-static inline bool netdev_uses_dsa_tags(struct net_device *dev)
+static inline bool netdev_uses_dsa(struct net_device *dev)
 {
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	if (dev->dsa_ptr != NULL)
-		return dsa_uses_dsa_tags(dev->dsa_ptr);
-#endif
-
-	return 0;
-}
-
-static inline bool netdev_uses_trailer_tags(struct net_device *dev)
-{
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	if (dev->dsa_ptr != NULL)
-		return dsa_uses_trailer_tags(dev->dsa_ptr);
+#ifdef CONFIG_NET_DSA
+	return dev->dsa_ptr != NULL;
+#else
+	return false;
 #endif
-
-	return 0;
 }
 
 /**
@@ -1933,6 +1922,13 @@ struct udp_offload {
 	struct offload_callbacks callbacks;
 };
 
+struct dsa_device_ops {
+	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
+	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev);
+};
+
+
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6efce384451e..6e26f1e4d8ce 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -59,6 +59,8 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
+struct dsa_device_ops;
+
 struct dsa_switch_tree {
 	/*
 	 * Configuration data for the platform device that owns
@@ -71,6 +73,7 @@ struct dsa_switch_tree {
 	 * protocol to use.
 	 */
 	struct net_device	*master_netdev;
+	const struct dsa_device_ops	*ops;
 	__be16			tag_protocol;
 
 	/*
@@ -186,21 +189,4 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 	return (void *)(ds + 1);
 }
 
-/*
- * The original DSA tag format and some other tag formats have no
- * ethertype, which means that we need to add a little hack to the
- * networking receive path to make sure that received frames get
- * the right ->protocol assigned to them when one of those tag
- * formats is in use.
- */
-static inline bool dsa_uses_dsa_tags(struct dsa_switch_tree *dst)
-{
-	return !!(dst->tag_protocol == htons(ETH_P_DSA));
-}
-
-static inline bool dsa_uses_trailer_tags(struct dsa_switch_tree *dst)
-{
-	return !!(dst->tag_protocol == htons(ETH_P_TRAILER));
-}
-
 #endif
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index 0f8210b8e0bc..aa63ed023c2b 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -128,6 +128,7 @@
 #define ETH_P_PHONET	0x00F5		/* Nokia Phonet frames          */
 #define ETH_P_IEEE802154 0x00F6		/* IEEE802.15.4 frame		*/
 #define ETH_P_CAIF	0x00F7		/* ST-Ericsson CAIF protocol	*/
+#define ETH_P_XDSA	0x00F8		/* Multiplexed DSA protocol	*/
 
 /*
  *	This is an Ethernet frame header.
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0a49632fac47..92e71d2a2ccd 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -608,6 +608,24 @@ static void dsa_shutdown(struct platform_device *pdev)
 {
 }
 
+static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
+			  struct packet_type *pt, struct net_device *orig_dev)
+{
+	struct dsa_switch_tree *dst = dev->dsa_ptr;
+
+	if (unlikely(dst == NULL)) {
+		kfree_skb(skb);
+		return 0;
+	}
+
+	return dst->ops->rcv(skb, dev, pt, orig_dev);
+}
+
+struct packet_type dsa_pack_type __read_mostly = {
+	.type	= cpu_to_be16(ETH_P_XDSA),
+	.func	= dsa_switch_rcv,
+};
+
 static const struct of_device_id dsa_of_match_table[] = {
 	{ .compatible = "marvell,dsa", },
 	{}
@@ -633,30 +651,15 @@ static int __init dsa_init_module(void)
 	if (rc)
 		return rc;
 
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	dev_add_pack(&dsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-	dev_add_pack(&edsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	dev_add_pack(&trailer_packet_type);
-#endif
+	dev_add_pack(&dsa_pack_type);
+
 	return 0;
 }
 module_init(dsa_init_module);
 
 static void __exit dsa_cleanup_module(void)
 {
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-	dev_remove_pack(&trailer_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-	dev_remove_pack(&edsa_packet_type);
-#endif
-#ifdef CONFIG_NET_DSA_TAG_DSA
-	dev_remove_pack(&dsa_packet_type);
-#endif
+	dev_remove_pack(&dsa_pack_type);
 	platform_driver_unregister(&dsa_driver);
 }
 module_exit(dsa_cleanup_module);
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index d4cf5cc747e3..218d75d16f6f 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -45,16 +45,13 @@ struct net_device *dsa_slave_create(struct dsa_switch *ds,
 				    int port, char *name);
 
 /* tag_dsa.c */
-netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type dsa_packet_type;
+extern const struct dsa_device_ops dsa_netdev_ops;
 
 /* tag_edsa.c */
-netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type edsa_packet_type;
+extern const struct dsa_device_ops edsa_netdev_ops;
 
 /* tag_trailer.c */
-netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev);
-extern struct packet_type trailer_packet_type;
+extern const struct dsa_device_ops trailer_netdev_ops;
 
 
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 45a1e34c89e0..ad1a913533aa 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -171,6 +171,14 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return -EOPNOTSUPP;
 }
 
+static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+	struct dsa_switch_tree *dst = p->parent->dst;
+
+	return dst->ops->xmit(skb, dev);
+}
+
 
 /* ethtool operations *******************************************************/
 static int
@@ -293,42 +301,16 @@ static const struct ethtool_ops dsa_slave_ethtool_ops = {
 	.get_sset_count		= dsa_slave_get_sset_count,
 };
 
-#ifdef CONFIG_NET_DSA_TAG_DSA
-static const struct net_device_ops dsa_netdev_ops = {
+static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_init		= dsa_slave_init,
 	.ndo_open	 	= dsa_slave_open,
 	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= dsa_xmit,
+	.ndo_start_xmit		= dsa_slave_xmit,
 	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
 	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
 	.ndo_set_mac_address	= dsa_slave_set_mac_address,
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
-#endif
-#ifdef CONFIG_NET_DSA_TAG_EDSA
-static const struct net_device_ops edsa_netdev_ops = {
-	.ndo_init		= dsa_slave_init,
-	.ndo_open	 	= dsa_slave_open,
-	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= edsa_xmit,
-	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
-	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
-	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_do_ioctl		= dsa_slave_ioctl,
-};
-#endif
-#ifdef CONFIG_NET_DSA_TAG_TRAILER
-static const struct net_device_ops trailer_netdev_ops = {
-	.ndo_init		= dsa_slave_init,
-	.ndo_open	 	= dsa_slave_open,
-	.ndo_stop		= dsa_slave_close,
-	.ndo_start_xmit		= trailer_xmit,
-	.ndo_change_rx_flags	= dsa_slave_change_rx_flags,
-	.ndo_set_rx_mode	= dsa_slave_set_rx_mode,
-	.ndo_set_mac_address	= dsa_slave_set_mac_address,
-	.ndo_do_ioctl		= dsa_slave_ioctl,
-};
-#endif
 
 /* slave device setup *******************************************************/
 struct net_device *
@@ -349,21 +331,22 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	slave_dev->ethtool_ops = &dsa_slave_ethtool_ops;
 	eth_hw_addr_inherit(slave_dev, master);
 	slave_dev->tx_queue_len = 0;
+	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
 
 	switch (ds->dst->tag_protocol) {
 #ifdef CONFIG_NET_DSA_TAG_DSA
 	case htons(ETH_P_DSA):
-		slave_dev->netdev_ops = &dsa_netdev_ops;
+		ds->dst->ops = &dsa_netdev_ops;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_EDSA
 	case htons(ETH_P_EDSA):
-		slave_dev->netdev_ops = &edsa_netdev_ops;
+		ds->dst->ops = &edsa_netdev_ops;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_TRAILER
 	case htons(ETH_P_TRAILER):
-		slave_dev->netdev_ops = &trailer_netdev_ops;
+		ds->dst->ops = &trailer_netdev_ops;
 		break;
 #endif
 	default:
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index cacce1e22f9c..d7dbc5bda5c0 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -16,7 +16,7 @@
 
 #define DSA_HLEN	4
 
-netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t dsa_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	u8 *dsa_header;
@@ -186,7 +186,7 @@ out:
 	return 0;
 }
 
-struct packet_type dsa_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_DSA),
-	.func	= dsa_rcv,
+const struct dsa_device_ops dsa_netdev_ops = {
+	.xmit	= dsa_xmit,
+	.rcv	= dsa_rcv,
 };
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index e70c43c25e64..6b30abe89183 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -17,7 +17,7 @@
 #define DSA_HLEN	4
 #define EDSA_HLEN	8
 
-netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t edsa_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	u8 *edsa_header;
@@ -205,7 +205,7 @@ out:
 	return 0;
 }
 
-struct packet_type edsa_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_EDSA),
-	.func	= edsa_rcv,
+const struct dsa_device_ops edsa_netdev_ops = {
+	.xmit	= edsa_xmit,
+	.rcv	= edsa_rcv,
 };
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 94bc260d015d..5fe9444842c5 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -14,7 +14,7 @@
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
-netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t trailer_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
 	struct sk_buff *nskb;
@@ -114,7 +114,7 @@ out:
 	return 0;
 }
 
-struct packet_type trailer_packet_type __read_mostly = {
-	.type	= cpu_to_be16(ETH_P_TRAILER),
-	.func	= trailer_rcv,
+const struct dsa_device_ops trailer_netdev_ops = {
+	.xmit	= trailer_xmit,
+	.rcv	= trailer_rcv,
 };
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index f405e0592407..5cebca134585 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -181,11 +181,8 @@ __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev)
 	 * variants has been configured on the receiving interface,
 	 * and if so, set skb->protocol without looking at the packet.
 	 */
-	if (unlikely(netdev_uses_dsa_tags(dev)))
-		return htons(ETH_P_DSA);
-
-	if (unlikely(netdev_uses_trailer_tags(dev)))
-		return htons(ETH_P_TRAILER);
+	if (unlikely(netdev_uses_dsa(dev)))
+		return htons(ETH_P_XDSA);
 
 	if (likely(ntohs(eth->h_proto) >= ETH_P_802_3_MIN))
 		return eth->h_proto;
-- 
cgit v1.2.3


From 464c3668f065baeacfffa9d421959d21069389fe Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:48 -0700
Subject: net: phy: provide stub for fixed_phy_set_link_update

In preparation for updating the DSA code and avoid using ifdefs there,
provide an empty stub for fixed_phy_set_link_update when
CONFIG_FIXED_PHY is not selected.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy_fixed.h | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index ae612acebb53..941138664c1d 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -18,6 +18,9 @@ extern int fixed_phy_register(unsigned int irq,
 			      struct fixed_phy_status *status,
 			      struct device_node *np);
 extern void fixed_phy_del(int phy_addr);
+extern int fixed_phy_set_link_update(struct phy_device *phydev,
+			int (*link_update)(struct net_device *,
+					   struct fixed_phy_status *));
 #else
 static inline int fixed_phy_add(unsigned int irq, int phy_id,
 				struct fixed_phy_status *status)
@@ -34,14 +37,12 @@ static inline int fixed_phy_del(int phy_addr)
 {
 	return -ENODEV;
 }
-#endif /* CONFIG_FIXED_PHY */
-
-/*
- * This function issued only by fixed_phy-aware drivers, no need
- * protect it with #ifdef
- */
-extern int fixed_phy_set_link_update(struct phy_device *phydev,
+static inline int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
-					   struct fixed_phy_status *));
+					   struct fixed_phy_status *))
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_FIXED_PHY */
 
 #endif /* __PHY_FIXED_H */
-- 
cgit v1.2.3


From 5aed85cec29882d1c4b4b2a01cb75a99efdbe4ed Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 27 Aug 2014 17:04:52 -0700
Subject: net: dsa: allow switches to work without tagging

In case switch port tagging is disabled (voluntarily, or the switch just
does not support it), allow us to continue using the defined set of
dsa_device_ops in net/dsa/slave.c.

We introduce dsa_protocol_is_tagged() to check whether we need to
override skb->protocol and go through the DSA-specifif packet_type
function, or if we just go on and receive the SKB through the normal
path.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 +++---
 include/net/dsa.h         |  5 +++++
 net/dsa/slave.c           | 19 ++++++++++++++++++-
 3 files changed, 26 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1875dc71422a..429801370d0c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1784,10 +1784,10 @@ void dev_net_set(struct net_device *dev, struct net *net)
 static inline bool netdev_uses_dsa(struct net_device *dev)
 {
 #ifdef CONFIG_NET_DSA
-	return dev->dsa_ptr != NULL;
-#else
-	return false;
+	if (dev->dsa_ptr != NULL)
+		return dsa_uses_tagged_protocol(dev->dsa_ptr);
 #endif
+	return false;
 }
 
 /**
diff --git a/include/net/dsa.h b/include/net/dsa.h
index dc357454ae3b..1035f6452d79 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -198,4 +198,9 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 	return (void *)(ds + 1);
 }
 
+static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
+{
+	return dst->tag_protocol != 0;
+}
+
 #endif
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 03d2894a0f8a..241c2a1684cb 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -181,6 +181,17 @@ static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 	return dst->ops->xmit(skb, dev);
 }
 
+static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb,
+					struct net_device *dev)
+{
+	struct dsa_slave_priv *p = netdev_priv(dev);
+
+	skb->dev = p->parent->dst->master_netdev;
+	dev_queue_xmit(skb);
+
+	return NETDEV_TX_OK;
+}
+
 
 /* ethtool operations *******************************************************/
 static int
@@ -314,6 +325,11 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
 
+static const struct dsa_device_ops notag_netdev_ops = {
+	.xmit	= dsa_slave_notag_xmit,
+	.rcv	= NULL,
+};
+
 static void dsa_slave_adjust_link(struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -415,7 +431,8 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 		break;
 #endif
 	default:
-		BUG();
+		ds->dst->ops = &notag_netdev_ops;
+		break;
 	}
 
 	SET_NETDEV_DEV(slave_dev, parent);
-- 
cgit v1.2.3


From 97fdaab4699de3a2a91001efef60bb0622de1c53 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:25 -0700
Subject: net: phy: broadcom: fix PHY_BCM_OUI_4

PHY_BCM_OUI_4 is missing two significant digits that actually make it an
OUI, add those missing bits so it becomes usable again for matching.

Fixes: b560a58c45c6 ("net: phy: add Broadcom BCM7xxx internal PHY driver")
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index ee1431d976fa..921f17ca4c26 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -21,7 +21,7 @@
 #define PHY_BCM_OUI_1			0x00206000
 #define PHY_BCM_OUI_2			0x0143bc00
 #define PHY_BCM_OUI_3			0x03625c00
-#define PHY_BCM_OUI_4			0x600d0000
+#define PHY_BCM_OUI_4			0x600d8400
 #define PHY_BCM_OUI_5			0x03625e00
 
 
-- 
cgit v1.2.3


From 11bf2bbd596add62a86a74fc7aedc0b86c6ec154 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:26 -0700
Subject: net: phy: broadcom: add new Broadcom OUI

Broadcom started to use a new OUI for its 2013 and newer products:
D4-01-29 which translates into 0xae025000 for a 32-bits OUI, add its
definition.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 921f17ca4c26..cbcfad36d4c0 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -23,7 +23,7 @@
 #define PHY_BCM_OUI_3			0x03625c00
 #define PHY_BCM_OUI_4			0x600d8400
 #define PHY_BCM_OUI_5			0x03625e00
-
+#define PHY_BCM_OUI_6			0xae025000
 
 #define PHY_BCM_FLAGS_MODE_COPPER	0x00000001
 #define PHY_BCM_FLAGS_MODE_1000BX	0x00000002
-- 
cgit v1.2.3


From 430ad68ffb5fa632a277162e5995cd6f7a39fb78 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Tue, 26 Aug 2014 13:15:27 -0700
Subject: net: phy: bcm7xxx: add BCM7250 and BCM7364 PHY entries

Add two new entries to the Broadcom BCM7xxx internal PHY driver for
BCM7250 and BCM7364 chips. Those chips share the usual 28nm process
Gigabit PHY sequence and require the same workarounds so far.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 4 ++++
 include/linux/brcmphy.h   | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index 948c7086679a..09dd6e1dc6e1 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -335,6 +335,8 @@ static int bcm7xxx_dummy_config_init(struct phy_device *phydev)
 }
 
 static struct phy_driver bcm7xxx_driver[] = {
+	BCM7XXX_28NM_GPHY(PHY_ID_BCM7250, "Broadcom BCM7250"),
+	BCM7XXX_28NM_GPHY(PHY_ID_BCM7364, "Broadcom BCM7364"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7366, "Broadcom BCM7366"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
@@ -367,6 +369,8 @@ static struct phy_driver bcm7xxx_driver[] = {
 } };
 
 static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
+	{ PHY_ID_BCM7250, 0xfffffff0, },
+	{ PHY_ID_BCM7364, 0xfffffff0, },
 	{ PHY_ID_BCM7366, 0xfffffff0, },
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index cbcfad36d4c0..5bd35cc0d471 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -13,6 +13,8 @@
 #define PHY_ID_BCM5461			0x002060c0
 #define PHY_ID_BCM57780			0x03625d90
 
+#define PHY_ID_BCM7250			0xae025280
+#define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7445			0x600d8510
-- 
cgit v1.2.3


From 4ba2968420fa9d0604b6a6a5c61bfa8d0fa84ae0 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Tue, 26 Aug 2014 19:12:21 -0500
Subject: percpu: Resolve ambiguities in __get_cpu_var/cpumask_var_t

__get_cpu_var can paper over differences in the definitions of
cpumask_var_t and either use the address of the cpumask variable
directly or perform a fetch of the address of the struct cpumask
allocated elsewhere. This is important particularly when using per cpu
cpumask_var_t declarations because in one case we have an offset into
a per cpu area to handle and in the other case we need to fetch a
pointer from the offset.

This patch introduces a new macro

this_cpu_cpumask_var_ptr()

that is defined where cpumask_var_t is defined and performs the proper
actions. All use cases where __get_cpu_var is used with cpumask_var_t
are converted to the use of this_cpu_cpumask_var_ptr().

Signed-off-by: Christoph Lameter <cl@linux.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 arch/x86/include/asm/perf_event_p4.h  |  2 +-
 arch/x86/kernel/apic/x2apic_cluster.c |  3 +--
 arch/x86/oprofile/op_model_p4.c       |  2 +-
 include/linux/cpumask.h               | 11 +++++++++++
 kernel/sched/deadline.c               |  2 +-
 kernel/sched/fair.c                   |  2 +-
 kernel/sched/rt.c                     |  2 +-
 7 files changed, 17 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h
index 85e13ccf15c4..d725382c2ae0 100644
--- a/arch/x86/include/asm/perf_event_p4.h
+++ b/arch/x86/include/asm/perf_event_p4.h
@@ -189,7 +189,7 @@ static inline int p4_ht_thread(int cpu)
 {
 #ifdef CONFIG_SMP
 	if (smp_num_siblings == 2)
-		return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
+		return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
 #endif
 	return 0;
 }
diff --git a/arch/x86/kernel/apic/x2apic_cluster.c b/arch/x86/kernel/apic/x2apic_cluster.c
index 6ce600f9bc78..1f5d5f2ffae6 100644
--- a/arch/x86/kernel/apic/x2apic_cluster.c
+++ b/arch/x86/kernel/apic/x2apic_cluster.c
@@ -42,8 +42,7 @@ __x2apic_send_IPI_mask(const struct cpumask *mask, int vector, int apic_dest)
 	 * We are to modify mask, so we need an own copy
 	 * and be sure it's manipulated with irq off.
 	 */
-	ipi_mask_ptr = __raw_get_cpu_var(ipi_mask);
-	cpumask_copy(ipi_mask_ptr, mask);
+	ipi_mask_ptr = this_cpu_cpumask_var_ptr(ipi_mask);
 
 	/*
 	 * The idea is to send one IPI per cluster.
diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c
index 98ab13058f89..ad1d91f475ab 100644
--- a/arch/x86/oprofile/op_model_p4.c
+++ b/arch/x86/oprofile/op_model_p4.c
@@ -372,7 +372,7 @@ static unsigned int get_stagger(void)
 {
 #ifdef CONFIG_SMP
 	int cpu = smp_processor_id();
-	return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
+	return cpu != cpumask_first(this_cpu_cpumask_var_ptr(cpu_sibling_map));
 #endif
 	return 0;
 }
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 2997af6d2ccd..0a9a6da21e74 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -666,10 +666,19 @@ static inline size_t cpumask_size(void)
  *
  * This code makes NR_CPUS length memcopy and brings to a memory corruption.
  * cpumask_copy() provide safe copy functionality.
+ *
+ * Note that there is another evil here: If you define a cpumask_var_t
+ * as a percpu variable then the way to obtain the address of the cpumask
+ * structure differently influences what this_cpu_* operation needs to be
+ * used. Please use this_cpu_cpumask_var_t in those cases. The direct use
+ * of this_cpu_ptr() or this_cpu_read() will lead to failures when the
+ * other type of cpumask_var_t implementation is configured.
  */
 #ifdef CONFIG_CPUMASK_OFFSTACK
 typedef struct cpumask *cpumask_var_t;
 
+#define this_cpu_cpumask_var_ptr(x) this_cpu_read(x)
+
 bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
 bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags);
 bool zalloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node);
@@ -681,6 +690,8 @@ void free_bootmem_cpumask_var(cpumask_var_t mask);
 #else
 typedef struct cpumask cpumask_var_t[1];
 
+#define this_cpu_cpumask_var_ptr(x) this_cpu_ptr(x)
+
 static inline bool alloc_cpumask_var(cpumask_var_t *mask, gfp_t flags)
 {
 	return true;
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 255ce138b652..4a608cfaecbd 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1158,7 +1158,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask_dl);
 static int find_later_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	struct cpumask *later_mask = __get_cpu_var(local_cpu_mask_dl);
+	struct cpumask *later_mask = this_cpu_cpumask_var_ptr(local_cpu_mask_dl);
 	int this_cpu = smp_processor_id();
 	int best_cpu, cpu = task_cpu(task);
 
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bfa3c86d0d68..197d659c144c 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -6539,7 +6539,7 @@ static int load_balance(int this_cpu, struct rq *this_rq,
 	struct sched_group *group;
 	struct rq *busiest;
 	unsigned long flags;
-	struct cpumask *cpus = __get_cpu_var(load_balance_mask);
+	struct cpumask *cpus = this_cpu_cpumask_var_ptr(load_balance_mask);
 
 	struct lb_env env = {
 		.sd		= sd,
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index 5f6edca4fafd..a4c50fce9b90 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -1526,7 +1526,7 @@ static DEFINE_PER_CPU(cpumask_var_t, local_cpu_mask);
 static int find_lowest_rq(struct task_struct *task)
 {
 	struct sched_domain *sd;
-	struct cpumask *lowest_mask = __get_cpu_var(local_cpu_mask);
+	struct cpumask *lowest_mask = this_cpu_cpumask_var_ptr(local_cpu_mask);
 	int this_cpu = smp_processor_id();
 	int cpu      = task_cpu(task);
 
-- 
cgit v1.2.3


From d7cdb968081727746c8d2fb31b12ea6d1694888e Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Fri, 20 Jun 2014 17:19:06 +0200
Subject: treewide: fix synchronize_rcu() in comments

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/percpu-refcount.h | 2 +-
 net/rds/af_rds.c                | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3dfbf237cd8f..652fd64cab5e 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -29,7 +29,7 @@
  * calls io_destroy() or the process exits.
  *
  * In the aio code, kill_ioctx() is called when we wish to destroy a kioctx; it
- * calls percpu_ref_kill(), then hlist_del_rcu() and sychronize_rcu() to remove
+ * calls percpu_ref_kill(), then hlist_del_rcu() and synchronize_rcu() to remove
  * the kioctx from the proccess's list of kioctxs - after that, there can't be
  * any new users of the kioctx (from lookup_ioctx()) and it's then safe to drop
  * the initial ref with percpu_ref_put().
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 424ff622ab5f..10443377fb9d 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -83,7 +83,7 @@ static int rds_release(struct socket *sock)
 
 	/*
 	 * the binding lookup hash uses rcu, we need to
-	 * make sure we sychronize_rcu before we free our
+	 * make sure we synchronize_rcu before we free our
 	 * entry
 	 */
 	rds_remove_bound(rs);
-- 
cgit v1.2.3


From d37aba521379203b740a2929e6e6f6bd2485f5d7 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:54:18 +0200
Subject: ARM: tegra: remove unused tegra_emc.h

The header file include/linux/platform_data/tegra_emc.h does not seem
to be used anywhere. It was orphaned by a7cbe92c "ARM: tegra: remove
tegra EMC scaling driver". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
---
 include/linux/platform_data/tegra_emc.h | 34 ---------------------------------
 1 file changed, 34 deletions(-)
 delete mode 100644 include/linux/platform_data/tegra_emc.h

(limited to 'include/linux')

diff --git a/include/linux/platform_data/tegra_emc.h b/include/linux/platform_data/tegra_emc.h
deleted file mode 100644
index df67505e98f8..000000000000
--- a/include/linux/platform_data/tegra_emc.h
+++ /dev/null
@@ -1,34 +0,0 @@
-/*
- * Copyright (C) 2011 Google, Inc.
- *
- * Author:
- *	Colin Cross <ccross@android.com>
- *	Olof Johansson <olof@lixom.net>
- *
- * This software is licensed under the terms of the GNU General Public
- * License version 2, as published by the Free Software Foundation, and
- * may be copied, distributed, and modified under those terms.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- */
-
-#ifndef __TEGRA_EMC_H_
-#define __TEGRA_EMC_H_
-
-#define TEGRA_EMC_NUM_REGS 46
-
-struct tegra_emc_table {
-	unsigned long rate;
-	u32 regs[TEGRA_EMC_NUM_REGS];
-};
-
-struct tegra_emc_pdata {
-	int num_tables;
-	struct tegra_emc_table *tables;
-};
-
-#endif
-- 
cgit v1.2.3


From 2b8941b962a9f24d61c2b3c2e889928e6cf3d82b Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Wed, 27 Aug 2014 11:17:56 -0400
Subject: NFSD: Update some as-yet unused 4.2 error codes

Recent NFS v4.2 drafts have removed NFS4ERR_METADATA_NOTSUPP and
reassigned the error code to NFS4ERR_UNION_NOTSUPP.

I also add in the NFS4ERR_OFFLOAD_NO_REQS error code.

We're not using any of these yet, so there's no harm done.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfsd.h       | 2 +-
 include/linux/nfs4.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfsd.h b/fs/nfsd/nfsd.h
index 847daf37e566..747f3b95bd11 100644
--- a/fs/nfsd/nfsd.h
+++ b/fs/nfsd/nfsd.h
@@ -251,7 +251,7 @@ void		nfsd_lockd_shutdown(void);
 #define nfserr_deleg_revoked		cpu_to_be32(NFS4ERR_DELEG_REVOKED)
 #define nfserr_partner_notsupp		cpu_to_be32(NFS4ERR_PARTNER_NOTSUPP)
 #define nfserr_partner_no_auth		cpu_to_be32(NFS4ERR_PARTNER_NO_AUTH)
-#define nfserr_metadata_notsupp		cpu_to_be32(NFS4ERR_METADATA_NOTSUPP)
+#define nfserr_union_notsupp		cpu_to_be32(NFS4ERR_UNION_NOTSUPP)
 #define nfserr_offload_denied		cpu_to_be32(NFS4ERR_OFFLOAD_DENIED)
 #define nfserr_wrong_lfs		cpu_to_be32(NFS4ERR_WRONG_LFS)
 #define nfserr_badlabel		cpu_to_be32(NFS4ERR_BADLABEL)
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index a1e3064a8d99..79b2a0f5c318 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -235,10 +235,11 @@ enum nfsstat4 {
 	/* nfs42 */
 	NFS4ERR_PARTNER_NOTSUPP	= 10088,
 	NFS4ERR_PARTNER_NO_AUTH	= 10089,
-	NFS4ERR_METADATA_NOTSUPP = 10090,
+	NFS4ERR_UNION_NOTSUPP = 10090,
 	NFS4ERR_OFFLOAD_DENIED = 10091,
 	NFS4ERR_WRONG_LFS = 10092,
 	NFS4ERR_BADLABEL = 10093,
+	NFS4ERR_OFFLOAD_NO_REQS = 10094,
 };
 
 static inline bool seqid_mutating_err(u32 err)
-- 
cgit v1.2.3


From abdc08a3a263a20e49534a36291d657bf53dda5b Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 19 Aug 2014 10:06:09 -0700
Subject: gpio: change gpiochip_request_own_desc() prototype

The current prototype of gpiochip_request_own_desc() requires to obtain
a pointer to a descriptor. This is in contradiction to all other GPIO
request schemes, and imposes an extra step of obtaining a descriptor to
drivers. Most drivers actually cannot even perform that step since the
function that does it (gpichip_get_desc()) is gpiolib-private.

Change gpiochip_request_own_desc() to return a descriptor from a
(chip, hwnum) tuple and update users of this function (currently
gpiolib-acpi only).

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/driver.txt |  3 ++-
 drivers/gpio/gpiolib-acpi.c   | 20 +++-----------------
 drivers/gpio/gpiolib.c        | 18 ++++++++++++++----
 include/linux/gpio/driver.h   |  3 ++-
 4 files changed, 21 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index 18790c237977..23b751a10d7b 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -178,7 +178,8 @@ does not help since it pins the module to the kernel forever (it calls
 try_module_get()). A GPIO driver can use the following functions instead
 to request and free descriptors without being pinned to the kernel forever.
 
-	int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label)
+	struct gpio_desc *gpiochip_request_own_desc(struct gpio_desc *desc,
+						    const char *label)
 
 	void gpiochip_free_own_desc(struct gpio_desc *desc)
 
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 84540025aa08..f9103e72e2a4 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -145,14 +145,8 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 	if (!handler)
 		return AE_BAD_PARAMETER;
 
-	desc = gpiochip_get_desc(chip, pin);
+	desc = gpiochip_request_own_desc(chip, pin, "ACPI:Event");
 	if (IS_ERR(desc)) {
-		dev_err(chip->dev, "Failed to get GPIO descriptor\n");
-		return AE_ERROR;
-	}
-
-	ret = gpiochip_request_own_desc(desc, "ACPI:Event");
-	if (ret) {
 		dev_err(chip->dev, "Failed to request GPIO\n");
 		return AE_ERROR;
 	}
@@ -420,22 +414,14 @@ acpi_gpio_adr_space_handler(u32 function, acpi_physical_address address,
 			}
 		}
 		if (!found) {
-			int ret;
-
-			desc = gpiochip_get_desc(chip, pin);
+			desc = gpiochip_request_own_desc(chip, pin,
+							 "ACPI:OpRegion");
 			if (IS_ERR(desc)) {
 				status = AE_ERROR;
 				mutex_unlock(&achip->conn_lock);
 				goto out;
 			}
 
-			ret = gpiochip_request_own_desc(desc, "ACPI:OpRegion");
-			if (ret) {
-				status = AE_ERROR;
-				mutex_unlock(&achip->conn_lock);
-				goto out;
-			}
-
 			switch (agpio->io_restriction) {
 			case ACPI_IO_RESTRICT_INPUT:
 				gpiod_direction_input(desc);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 15cc0bb65dda..a5831d6a9b91 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -895,12 +895,22 @@ EXPORT_SYMBOL_GPL(gpiochip_is_requested);
  * allows the GPIO chip module to be unloaded as needed (we assume that the
  * GPIO chip driver handles freeing the GPIOs it has requested).
  */
-int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label)
+struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
+					    const char *label)
 {
-	if (!desc || !desc->chip)
-		return -EINVAL;
+	struct gpio_desc *desc = gpiochip_get_desc(chip, hwnum);
+	int err;
+
+	if (IS_ERR(desc)) {
+		chip_err(chip, "failed to get GPIO descriptor\n");
+		return desc;
+	}
+
+	err = __gpiod_request(desc, label);
+	if (err < 0)
+		return ERR_PTR(err);
 
-	return __gpiod_request(desc, label);
+	return desc;
 }
 EXPORT_SYMBOL_GPL(gpiochip_request_own_desc);
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index e78a2373e374..a2de58fffd19 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -166,7 +166,8 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 
 #endif /* CONFIG_GPIO_IRQCHIP */
 
-int gpiochip_request_own_desc(struct gpio_desc *desc, const char *label);
+struct gpio_desc *gpiochip_request_own_desc(struct gpio_chip *chip, u16 hwnum,
+					    const char *label);
 void gpiochip_free_own_desc(struct gpio_desc *desc);
 
 #else /* CONFIG_GPIOLIB */
-- 
cgit v1.2.3


From 7179569aeb52197fd2a9909ba226c4c9cc0e2e2a Mon Sep 17 00:00:00 2001
From: Heiko Stübner <heiko@sntech.de>
Date: Thu, 28 Aug 2014 12:36:04 -0700
Subject: regulator: core: Add REGULATOR_EVENT_PRE_VOLTAGE_CHANGE (and ABORT)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In some cases we need to know when a regulator is about to be changed.
Add a way for clients to be notified.  Note that for set_voltage() we
don't necessarily know what voltage we'll end up with, so we tell the
client what the range will be so they can prepare.

Signed-off-by: Heiko Stübner <heiko@sntech.de>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Mark Brown <broonie+linaro@kernel.org>
---
 drivers/regulator/core.c           | 63 +++++++++++++++++++++++++++++++++-----
 include/linux/regulator/consumer.h | 20 ++++++++++++
 2 files changed, 76 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a3c3785901f5..dabc8e8862c8 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -102,7 +102,7 @@ static int _regulator_disable(struct regulator_dev *rdev);
 static int _regulator_get_voltage(struct regulator_dev *rdev);
 static int _regulator_get_current_limit(struct regulator_dev *rdev);
 static unsigned int _regulator_get_mode(struct regulator_dev *rdev);
-static void _notifier_call_chain(struct regulator_dev *rdev,
+static int _notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data);
 static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 				     int min_uV, int max_uV);
@@ -2369,6 +2369,55 @@ int regulator_is_supported_voltage(struct regulator *regulator,
 }
 EXPORT_SYMBOL_GPL(regulator_is_supported_voltage);
 
+static int _regulator_call_set_voltage(struct regulator_dev *rdev,
+				       int min_uV, int max_uV,
+				       unsigned *selector)
+{
+	struct pre_voltage_change_data data;
+	int ret;
+
+	data.old_uV = _regulator_get_voltage(rdev);
+	data.min_uV = min_uV;
+	data.max_uV = max_uV;
+	ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE,
+				   &data);
+	if (ret & NOTIFY_STOP_MASK)
+		return -EINVAL;
+
+	ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV, selector);
+	if (ret >= 0)
+		return ret;
+
+	_notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE,
+			     (void *)data.old_uV);
+
+	return ret;
+}
+
+static int _regulator_call_set_voltage_sel(struct regulator_dev *rdev,
+					   int uV, unsigned selector)
+{
+	struct pre_voltage_change_data data;
+	int ret;
+
+	data.old_uV = _regulator_get_voltage(rdev);
+	data.min_uV = uV;
+	data.max_uV = uV;
+	ret = _notifier_call_chain(rdev, REGULATOR_EVENT_PRE_VOLTAGE_CHANGE,
+				   &data);
+	if (ret & NOTIFY_STOP_MASK)
+		return -EINVAL;
+
+	ret = rdev->desc->ops->set_voltage_sel(rdev, selector);
+	if (ret >= 0)
+		return ret;
+
+	_notifier_call_chain(rdev, REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE,
+			     (void *)data.old_uV);
+
+	return ret;
+}
+
 static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 				     int min_uV, int max_uV)
 {
@@ -2396,8 +2445,8 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 	}
 
 	if (rdev->desc->ops->set_voltage) {
-		ret = rdev->desc->ops->set_voltage(rdev, min_uV, max_uV,
-						   &selector);
+		ret = _regulator_call_set_voltage(rdev, min_uV, max_uV,
+						  &selector);
 
 		if (ret >= 0) {
 			if (rdev->desc->ops->list_voltage)
@@ -2432,8 +2481,8 @@ static int _regulator_do_set_voltage(struct regulator_dev *rdev,
 				if (old_selector == selector)
 					ret = 0;
 				else
-					ret = rdev->desc->ops->set_voltage_sel(
-								rdev, ret);
+					ret = _regulator_call_set_voltage_sel(
+						rdev, best_val, selector);
 			} else {
 				ret = -EINVAL;
 			}
@@ -3079,11 +3128,11 @@ EXPORT_SYMBOL_GPL(regulator_unregister_notifier);
 /* notify regulator consumers and downstream regulator consumers.
  * Note mutex must be held by caller.
  */
-static void _notifier_call_chain(struct regulator_dev *rdev,
+static int _notifier_call_chain(struct regulator_dev *rdev,
 				  unsigned long event, void *data)
 {
 	/* call rdev chain first */
-	blocking_notifier_call_chain(&rdev->notifier, event, data);
+	return blocking_notifier_call_chain(&rdev->notifier, event, data);
 }
 
 /**
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index f8a8733068a7..d347c805f923 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -93,7 +93,12 @@ struct regmap;
  * OVER_TEMP      Regulator over temp.
  * FORCE_DISABLE  Regulator forcibly shut down by software.
  * VOLTAGE_CHANGE Regulator voltage changed.
+ *                Data passed is old voltage cast to (void *).
  * DISABLE        Regulator was disabled.
+ * PRE_VOLTAGE_CHANGE   Regulator is about to have voltage changed.
+ *                      Data passed is "struct pre_voltage_change_data"
+ * ABORT_VOLTAGE_CHANGE Regulator voltage change failed for some reason.
+ *                      Data passed is old voltage cast to (void *).
  *
  * NOTE: These events can be OR'ed together when passed into handler.
  */
@@ -106,6 +111,21 @@ struct regmap;
 #define REGULATOR_EVENT_FORCE_DISABLE		0x20
 #define REGULATOR_EVENT_VOLTAGE_CHANGE		0x40
 #define REGULATOR_EVENT_DISABLE 		0x80
+#define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE	0x100
+#define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE	0x200
+
+/**
+ * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event
+ *
+ * @old_uV: Current voltage before change.
+ * @min_uV: Min voltage we'll change to.
+ * @max_uV: Max voltage we'll change to.
+ */
+struct pre_voltage_change_data {
+	unsigned long old_uV;
+	unsigned long min_uV;
+	unsigned long max_uV;
+};
 
 struct regulator;
 
-- 
cgit v1.2.3


From 656473003bc7e056c3bbd4a4d9832dad01e86f76 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Fri, 29 Aug 2014 14:01:17 +0200
Subject: KVM: forward declare structs in kvm_types.h

Opaque KVM structs are useful for prototypes in asm/kvm_host.h, to avoid
"'struct foo' declared inside parameter list" warnings (and consequent
breakage due to conflicting types).

Move them from individual files to a generic place in linux/kvm_types.h.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  7 ++-----
 arch/arm64/include/asm/kvm_host.h   |  6 ++----
 arch/ia64/include/asm/kvm_host.h    |  3 ---
 arch/mips/include/asm/kvm_host.h    |  5 -----
 arch/powerpc/include/asm/kvm_host.h |  5 -----
 arch/s390/include/asm/kvm_host.h    |  5 +++--
 arch/x86/include/asm/kvm_host.h     |  4 ----
 include/linux/kvm_host.h            |  6 ------
 include/linux/kvm_types.h           | 14 ++++++++++++++
 9 files changed, 21 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 6dfb404f6c46..4843397b812c 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -19,6 +19,8 @@
 #ifndef __ARM_KVM_HOST_H__
 #define __ARM_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -40,7 +42,6 @@
 
 #include <kvm/arm_vgic.h>
 
-struct kvm_vcpu;
 u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode);
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
@@ -149,20 +150,17 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 u64 kvm_call_hyp(void *hypfn, ...);
 void force_vm_exit(const cpumask_t *mask);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
@@ -187,7 +185,6 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void);
 
 int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices);
 unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu);
-struct kvm_one_reg;
 int kvm_arm_coproc_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 int kvm_arm_coproc_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *);
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index e10c45a578e3..766147baae0b 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -22,6 +22,8 @@
 #ifndef __ARM64_KVM_HOST_H__
 #define __ARM64_KVM_HOST_H__
 
+#include <linux/types.h>
+#include <linux/kvm_types.h>
 #include <asm/kvm.h>
 #include <asm/kvm_asm.h>
 #include <asm/kvm_mmio.h>
@@ -41,7 +43,6 @@
 
 #define KVM_VCPU_MAX_FEATURES 3
 
-struct kvm_vcpu;
 int kvm_target_cpu(void);
 int kvm_reset_vcpu(struct kvm_vcpu *vcpu);
 int kvm_arch_dev_ioctl_check_extension(long ext);
@@ -164,18 +165,15 @@ struct kvm_vcpu_stat {
 	u32 halt_wakeup;
 };
 
-struct kvm_vcpu_init;
 int kvm_vcpu_set_target(struct kvm_vcpu *vcpu,
 			const struct kvm_vcpu_init *init);
 int kvm_vcpu_preferred_target(struct kvm_vcpu_init *init);
 unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
-struct kvm_one_reg;
 int kvm_arm_get_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 int kvm_arm_set_reg(struct kvm_vcpu *vcpu, const struct kvm_one_reg *reg);
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
-struct kvm;
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm,
 			unsigned long start, unsigned long end);
diff --git a/arch/ia64/include/asm/kvm_host.h b/arch/ia64/include/asm/kvm_host.h
index db95f570705f..fccc09d04649 100644
--- a/arch/ia64/include/asm/kvm_host.h
+++ b/arch/ia64/include/asm/kvm_host.h
@@ -234,9 +234,6 @@ struct kvm_vm_data {
 #define KVM_REQ_PTC_G		32
 #define KVM_REQ_RESUME		33
 
-struct kvm;
-struct kvm_vcpu;
-
 struct kvm_mmio_req {
 	uint64_t addr;          /*  physical address		*/
 	uint64_t size;          /*  size in bytes		*/
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 7a3fc67bd7f9..b93bc80ed7e7 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -96,11 +96,6 @@
 #define CAUSEB_DC			27
 #define CAUSEF_DC			(_ULCAST_(1) << 27)
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-struct kvm_interrupt;
-
 extern atomic_t kvm_mips_instance;
 extern pfn_t(*kvm_mips_gfn_to_pfn) (struct kvm *kvm, gfn_t gfn);
 extern void (*kvm_mips_release_pfn_clean) (pfn_t pfn);
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 98d9dd50d063..0e597283c5c6 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -53,7 +53,6 @@
 
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 
-struct kvm;
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
@@ -76,10 +75,6 @@ extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 /* Physical Address Mask - allowed range of real mode RAM access */
 #define KVM_PAM			0x0fffffffffffffffULL
 
-struct kvm;
-struct kvm_run;
-struct kvm_vcpu;
-
 struct lppaca;
 struct slb_shadow;
 struct dtl_entry;
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index 773bef7614d8..d71291d3fb6f 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -13,8 +13,11 @@
 
 #ifndef ASM_KVM_HOST_H
 #define ASM_KVM_HOST_H
+
+#include <linux/types.h>
 #include <linux/hrtimer.h>
 #include <linux/interrupt.h>
+#include <linux/kvm_types.h>
 #include <linux/kvm_host.h>
 #include <linux/kvm.h>
 #include <asm/debug.h>
@@ -431,8 +434,6 @@ static inline bool kvm_is_error_hva(unsigned long addr)
 }
 
 #define ASYNC_PF_PER_VCPU	64
-struct kvm_vcpu;
-struct kvm_async_pf;
 struct kvm_arch_async_pf {
 	unsigned long pfault_token;
 };
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index ac0f90e26a0b..567fface45f8 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -99,10 +99,6 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level)
 
 #define ASYNC_PF_PER_VCPU 64
 
-struct kvm_vcpu;
-struct kvm;
-struct kvm_async_pf;
-
 enum kvm_reg {
 	VCPU_REGS_RAX = 0,
 	VCPU_REGS_RCX = 1,
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ebd723676633..e1cb915a1096 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -140,8 +140,6 @@ static inline bool is_error_page(struct page *page)
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
 
-struct kvm;
-struct kvm_vcpu;
 extern struct kmem_cache *kvm_vcpu_cache;
 
 extern spinlock_t kvm_lock;
@@ -325,8 +323,6 @@ struct kvm_kernel_irq_routing_entry {
 	struct hlist_node link;
 };
 
-struct kvm_irq_routing_table;
-
 #ifndef KVM_PRIVATE_MEM_SLOTS
 #define KVM_PRIVATE_MEM_SLOTS 0
 #endif
@@ -1036,8 +1032,6 @@ static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu)
 
 extern bool kvm_rebooting;
 
-struct kvm_device_ops;
-
 struct kvm_device {
 	struct kvm_device_ops *ops;
 	struct kvm *kvm;
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b0bcce0ddc95..b606bb689a3e 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -17,6 +17,20 @@
 #ifndef __KVM_TYPES_H__
 #define __KVM_TYPES_H__
 
+struct kvm;
+struct kvm_async_pf;
+struct kvm_device_ops;
+struct kvm_interrupt;
+struct kvm_irq_routing_table;
+struct kvm_memory_slot;
+struct kvm_one_reg;
+struct kvm_run;
+struct kvm_userspace_memory_region;
+struct kvm_vcpu;
+struct kvm_vcpu_init;
+
+enum kvm_mr_change;
+
 #include <asm/types.h>
 
 /*
-- 
cgit v1.2.3


From 13a34e067eab24fec882e1834fbf2cc31911d474 Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Thu, 28 Aug 2014 15:13:03 +0200
Subject: KVM: remove garbage arg to *hardware_{en,dis}able
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In the beggining was on_each_cpu(), which required an unused argument to
kvm_arch_ops.hardware_{en,dis}able, but this was soon forgotten.

Remove unnecessary arguments that stem from this.

Signed-off-by: Radim KrÄmÃ¡Å™ <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  2 +-
 arch/arm/kvm/arm.c                  |  2 +-
 arch/arm64/include/asm/kvm_host.h   |  2 +-
 arch/ia64/kvm/kvm-ia64.c            |  4 ++--
 arch/mips/include/asm/kvm_host.h    |  2 +-
 arch/mips/kvm/mips.c                |  2 +-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/kvm/powerpc.c          |  2 +-
 arch/s390/include/asm/kvm_host.h    |  2 +-
 arch/s390/kvm/kvm-s390.c            |  2 +-
 arch/x86/include/asm/kvm_host.h     |  4 ++--
 arch/x86/kvm/svm.c                  |  4 ++--
 arch/x86/kvm/vmx.c                  |  4 ++--
 arch/x86/kvm/x86.c                  | 12 ++++++------
 include/linux/kvm_host.h            |  4 ++--
 virt/kvm/kvm_main.c                 |  4 ++--
 16 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index aea259e9431f..032a8538318a 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -230,7 +230,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 int kvm_perf_init(void);
 int kvm_perf_teardown(void);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index 132bb0d9c5ad..005a7b5fd0aa 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -87,7 +87,7 @@ struct kvm_vcpu __percpu **kvm_get_running_vcpus(void)
 	return &kvm_arm_running_vcpu;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index b5045e3e05f8..be9970a59497 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -242,7 +242,7 @@ static inline void vgic_arch_setup(const struct vgic_params *vgic)
 	}
 }
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_vcpu_uninit(struct kvm_vcpu *vcpu) {}
diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index 5e14dcaf844e..ec6b9acb6bea 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -125,7 +125,7 @@ long ia64_pal_vp_create(u64 *vpd, u64 *host_iva, u64 *opt_handler)
 
 static  DEFINE_SPINLOCK(vp_lock);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	long  status;
 	long  tmp_base;
@@ -160,7 +160,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
 
 	long status;
diff --git a/arch/mips/include/asm/kvm_host.h b/arch/mips/include/asm/kvm_host.h
index 0b24d6622ec1..f2c249796ea8 100644
--- a/arch/mips/include/asm/kvm_host.h
+++ b/arch/mips/include/asm/kvm_host.h
@@ -762,7 +762,7 @@ extern int kvm_mips_trans_mtc0(uint32_t inst, uint32_t *opc,
 extern void kvm_mips_dump_stats(struct kvm_vcpu *vcpu);
 extern unsigned long kvm_mips_get_ramsize(struct kvm *kvm);
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_free_memslot(struct kvm *kvm,
diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c
index 0ec7490d70bd..e3b21e51ff7e 100644
--- a/arch/mips/kvm/mips.c
+++ b/arch/mips/kvm/mips.c
@@ -77,7 +77,7 @@ int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu)
 	return 1;
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 237cc0cc80a2..604000882352 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -682,7 +682,7 @@ struct kvm_vcpu_arch {
 #define __KVM_HAVE_ARCH_WQP
 #define __KVM_HAVE_CREATE_DEVICE
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_hardware_unsetup(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
 static inline void kvm_arch_memslots_updated(struct kvm *kvm) {}
diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c
index 72c3fc085207..da505237a664 100644
--- a/arch/powerpc/kvm/powerpc.c
+++ b/arch/powerpc/kvm/powerpc.c
@@ -384,7 +384,7 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr,
 }
 EXPORT_SYMBOL_GPL(kvmppc_ld);
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	return 0;
 }
diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h
index f6dd90684b97..a76a124dff48 100644
--- a/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@ -452,7 +452,7 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu,
 extern int sie64a(struct kvm_s390_sie_block *, u64 *);
 extern char sie_exit;
 
-static inline void kvm_arch_hardware_disable(void *garbage) {}
+static inline void kvm_arch_hardware_disable(void) {}
 static inline void kvm_arch_check_processor_compat(void *rtn) {}
 static inline void kvm_arch_exit(void) {}
 static inline void kvm_arch_sync_events(struct kvm *kvm) {}
diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b8fe1ae777db..628e992eeded 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -100,7 +100,7 @@ int test_vfacility(unsigned long nr)
 }
 
 /* Section: not file related */
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	/* every s390 is virtualization enabled ;-) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 567fface45f8..73e4149eda33 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -661,8 +661,8 @@ struct msr_data {
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
-	int (*hardware_enable)(void *dummy);
-	void (*hardware_disable)(void *dummy);
+	int (*hardware_enable)(void);
+	void (*hardware_disable)(void);
 	void (*check_processor_compatibility)(void *rtn);
 	int (*hardware_setup)(void);               /* __init */
 	void (*hardware_unsetup)(void);            /* __exit */
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 7cd230e55118..8ef704037370 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -622,7 +622,7 @@ static int has_svm(void)
 	return 1;
 }
 
-static void svm_hardware_disable(void *garbage)
+static void svm_hardware_disable(void)
 {
 	/* Make sure we clean up behind us */
 	if (static_cpu_has(X86_FEATURE_TSCRATEMSR))
@@ -633,7 +633,7 @@ static void svm_hardware_disable(void *garbage)
 	amd_pmu_disable_virt();
 }
 
-static int svm_hardware_enable(void *garbage)
+static int svm_hardware_enable(void)
 {
 
 	struct svm_cpu_data *sd;
diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c
index d70550d0bcff..671ca5edc709 100644
--- a/arch/x86/kvm/vmx.c
+++ b/arch/x86/kvm/vmx.c
@@ -2728,7 +2728,7 @@ static void kvm_cpu_vmxon(u64 addr)
 			: "memory", "cc");
 }
 
-static int hardware_enable(void *garbage)
+static int hardware_enable(void)
 {
 	int cpu = raw_smp_processor_id();
 	u64 phys_addr = __pa(per_cpu(vmxarea, cpu));
@@ -2792,7 +2792,7 @@ static void kvm_cpu_vmxoff(void)
 	asm volatile (__ex(ASM_VMX_VMXOFF) : : : "cc");
 }
 
-static void hardware_disable(void *garbage)
+static void hardware_disable(void)
 {
 	if (vmm_exclusive) {
 		vmclear_local_loaded_vmcss();
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index c10408ef9ab1..a375dfc42f6a 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -246,7 +246,7 @@ void kvm_set_shared_msr(unsigned slot, u64 value, u64 mask)
 }
 EXPORT_SYMBOL_GPL(kvm_set_shared_msr);
 
-static void drop_user_return_notifiers(void *ignore)
+static void drop_user_return_notifiers(void)
 {
 	unsigned int cpu = smp_processor_id();
 	struct kvm_shared_msrs *smsr = per_cpu_ptr(shared_msrs, cpu);
@@ -6959,7 +6959,7 @@ void kvm_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, unsigned int vector)
 	kvm_rip_write(vcpu, 0);
 }
 
-int kvm_arch_hardware_enable(void *garbage)
+int kvm_arch_hardware_enable(void)
 {
 	struct kvm *kvm;
 	struct kvm_vcpu *vcpu;
@@ -6970,7 +6970,7 @@ int kvm_arch_hardware_enable(void *garbage)
 	bool stable, backwards_tsc = false;
 
 	kvm_shared_msr_cpu_online();
-	ret = kvm_x86_ops->hardware_enable(garbage);
+	ret = kvm_x86_ops->hardware_enable();
 	if (ret != 0)
 		return ret;
 
@@ -7051,10 +7051,10 @@ int kvm_arch_hardware_enable(void *garbage)
 	return 0;
 }
 
-void kvm_arch_hardware_disable(void *garbage)
+void kvm_arch_hardware_disable(void)
 {
-	kvm_x86_ops->hardware_disable(garbage);
-	drop_user_return_notifiers(garbage);
+	kvm_x86_ops->hardware_disable();
+	drop_user_return_notifiers();
 }
 
 int kvm_arch_hardware_setup(void)
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e1cb915a1096..e098dce179df 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -630,8 +630,8 @@ int kvm_arch_vcpu_setup(struct kvm_vcpu *vcpu);
 int kvm_arch_vcpu_postcreate(struct kvm_vcpu *vcpu);
 void kvm_arch_vcpu_destroy(struct kvm_vcpu *vcpu);
 
-int kvm_arch_hardware_enable(void *garbage);
-void kvm_arch_hardware_disable(void *garbage);
+int kvm_arch_hardware_enable(void);
+void kvm_arch_hardware_disable(void);
 int kvm_arch_hardware_setup(void);
 void kvm_arch_hardware_unsetup(void);
 void kvm_arch_check_processor_compat(void *rtn);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 1d03967def40..7176929a4cda 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2669,7 +2669,7 @@ static void hardware_enable_nolock(void *junk)
 
 	cpumask_set_cpu(cpu, cpus_hardware_enabled);
 
-	r = kvm_arch_hardware_enable(NULL);
+	r = kvm_arch_hardware_enable();
 
 	if (r) {
 		cpumask_clear_cpu(cpu, cpus_hardware_enabled);
@@ -2694,7 +2694,7 @@ static void hardware_disable_nolock(void *junk)
 	if (!cpumask_test_cpu(cpu, cpus_hardware_enabled))
 		return;
 	cpumask_clear_cpu(cpu, cpus_hardware_enabled);
-	kvm_arch_hardware_disable(NULL);
+	kvm_arch_hardware_disable();
 }
 
 static void hardware_disable(void)
-- 
cgit v1.2.3


From 10c51b56232d24f150e39884a9e749fd99cbc60c Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 27 Aug 2014 11:11:27 +0200
Subject: net: add skb_get_tx_queue() helper

Replace occurences of skb_get_queue_mapping() and follow-up
netdev_get_tx_queue() with an actual helper function.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 6 ++++++
 net/core/netpoll.c        | 2 +-
 net/core/pktgen.c         | 4 +---
 net/packet/af_packet.c    | 4 +---
 net/sched/sch_generic.c   | 6 ++++--
 5 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 429801370d0c..dfc1d8b8bd0f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1747,6 +1747,12 @@ struct netdev_queue *netdev_get_tx_queue(const struct net_device *dev,
 	return &dev->_tx[index];
 }
 
+static inline struct netdev_queue *skb_get_tx_queue(const struct net_device *dev,
+						    const struct sk_buff *skb)
+{
+	return netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+}
+
 static inline void netdev_for_each_tx_queue(struct net_device *dev,
 					    void (*f)(struct net_device *,
 						      struct netdev_queue *,
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index a5ad06828d67..12b1df976562 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -115,7 +115,7 @@ static void queue_process(struct work_struct *work)
 			continue;
 		}
 
-		txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+		txq = skb_get_tx_queue(dev, skb);
 
 		local_irq_save(flags);
 		HARD_TX_LOCK(dev, txq, smp_processor_id());
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 83e2b4b19eb7..d81b540096c3 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3286,7 +3286,6 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 {
 	struct net_device *odev = pkt_dev->odev;
 	struct netdev_queue *txq;
-	u16 queue_map;
 	int ret;
 
 	/* If device is offline, then don't send */
@@ -3324,8 +3323,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 	if (pkt_dev->delay && pkt_dev->last_ok)
 		spin(pkt_dev, pkt_dev->next_tx);
 
-	queue_map = skb_get_queue_mapping(pkt_dev->skb);
-	txq = netdev_get_tx_queue(odev, queue_map);
+	txq = skb_get_tx_queue(odev, pkt_dev->skb);
 
 	local_bh_disable();
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 0dfa990d4eaa..b7a7f5a721bd 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -243,7 +243,6 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	netdev_features_t features;
 	struct netdev_queue *txq;
 	int ret = NETDEV_TX_BUSY;
-	u16 queue_map;
 
 	if (unlikely(!netif_running(dev) ||
 		     !netif_carrier_ok(dev)))
@@ -254,8 +253,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	    __skb_linearize(skb))
 		goto drop;
 
-	queue_map = skb_get_queue_mapping(skb);
-	txq = netdev_get_tx_queue(dev, queue_map);
+	txq = skb_get_tx_queue(dev, skb);
 
 	local_bh_disable();
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index fc04fe93c2da..05b3f5d104af 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -63,7 +63,7 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 
 	if (unlikely(skb)) {
 		/* check the reason of requeuing without tx lock first */
-		txq = netdev_get_tx_queue(txq->dev, skb_get_queue_mapping(skb));
+		txq = skb_get_tx_queue(txq->dev, skb);
 		if (!netif_xmit_frozen_or_stopped(txq)) {
 			q->gso_skb = NULL;
 			q->q.qlen--;
@@ -183,10 +183,12 @@ static inline int qdisc_restart(struct Qdisc *q)
 	skb = dequeue_skb(q);
 	if (unlikely(!skb))
 		return 0;
+
 	WARN_ON_ONCE(skb_dst_is_noref(skb));
+
 	root_lock = qdisc_lock(q);
 	dev = qdisc_dev(q);
-	txq = netdev_get_tx_queue(dev, skb_get_queue_mapping(skb));
+	txq = skb_get_tx_queue(dev, skb);
 
 	return sch_direct_xmit(skb, q, dev, txq, root_lock);
 }
-- 
cgit v1.2.3


From 18fe8db5f2b53e4ac67b47048f24f50c57a2a759 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:31 +0200
Subject: include/linux/cycx_x25.h: Remove unused header

The header file include/linux/cycx_x25.h does not seem to be used
anywhere. It was orphaned by 6fcdf4facb "wanrouter: delete now
orphaned header content, files/drivers". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/cycx_x25.h | 125 -----------------------------------------------
 1 file changed, 125 deletions(-)
 delete mode 100644 include/linux/cycx_x25.h

(limited to 'include/linux')

diff --git a/include/linux/cycx_x25.h b/include/linux/cycx_x25.h
deleted file mode 100644
index 362bf19d6cf1..000000000000
--- a/include/linux/cycx_x25.h
+++ /dev/null
@@ -1,125 +0,0 @@
-#ifndef	_CYCX_X25_H
-#define	_CYCX_X25_H
-/*
-* cycx_x25.h	Cyclom X.25 firmware API definitions.
-*
-* Author:	Arnaldo Carvalho de Melo <acme@conectiva.com.br>
-*
-* Copyright:	(c) 1998-2003 Arnaldo Carvalho de Melo
-*
-* Based on sdla_x25.h by Gene Kozin <74604.152@compuserve.com>
-*
-*		This program is free software; you can redistribute it and/or
-*		modify it under the terms of the GNU General Public License
-*		as published by the Free Software Foundation; either version
-*		2 of the License, or (at your option) any later version.
-* ============================================================================
-* 2000/04/02	acme		dprintk and cycx_debug
-* 1999/01/03	acme		judicious use of data types
-* 1999/01/02	acme		#define X25_ACK_N3	0x4411
-* 1998/12/28	acme		cleanup: lot'o'things removed
-*					 commands listed,
-*					 TX25Cmd & TX25Config structs
-*					 typedef'ed
-*/
-#ifndef PACKED
-#define PACKED __attribute__((packed))
-#endif 
-
-/* X.25 shared memory layout. */
-#define	X25_MBOX_OFFS	0x300	/* general mailbox block */
-#define	X25_RXMBOX_OFFS	0x340	/* receive mailbox */
-
-/* Debug */
-#define dprintk(level, format, a...) if (cycx_debug >= level) printk(format, ##a)
-
-extern unsigned int cycx_debug;
-
-/* Data Structures */
-/* X.25 Command Block. */
-struct cycx_x25_cmd {
-	u16 command;
-	u16 link;	/* values: 0 or 1 */
-	u16 len;	/* values: 0 thru 0x205 (517) */
-	u32 buf;
-} PACKED;
-
-/* Defines for the 'command' field. */
-#define X25_CONNECT_REQUEST             0x4401
-#define X25_CONNECT_RESPONSE            0x4402
-#define X25_DISCONNECT_REQUEST          0x4403
-#define X25_DISCONNECT_RESPONSE         0x4404
-#define X25_DATA_REQUEST                0x4405
-#define X25_ACK_TO_VC			0x4406
-#define X25_INTERRUPT_RESPONSE          0x4407
-#define X25_CONFIG                      0x4408
-#define X25_CONNECT_INDICATION          0x4409
-#define X25_CONNECT_CONFIRM             0x440A
-#define X25_DISCONNECT_INDICATION       0x440B
-#define X25_DISCONNECT_CONFIRM          0x440C
-#define X25_DATA_INDICATION             0x440E
-#define X25_INTERRUPT_INDICATION        0x440F
-#define X25_ACK_FROM_VC			0x4410
-#define X25_ACK_N3			0x4411
-#define X25_CONNECT_COLLISION           0x4413
-#define X25_N3WIN                       0x4414
-#define X25_LINE_ON                     0x4415
-#define X25_LINE_OFF                    0x4416
-#define X25_RESET_REQUEST               0x4417
-#define X25_LOG                         0x4500
-#define X25_STATISTIC                   0x4600
-#define X25_TRACE                       0x4700
-#define X25_N2TRACEXC                   0x4702
-#define X25_N3TRACEXC                   0x4703
-
-/**
- *	struct cycx_x25_config - cyclom2x x25 firmware configuration
- *	@link - link number
- *	@speed - line speed
- *	@clock - internal/external
- *	@n2 - # of level 2 retransm.(values: 1 thru FF)
- *	@n2win - level 2 window (values: 1 thru 7)
- *	@n3win - level 3 window (values: 1 thru 7)
- *	@nvc - # of logical channels (values: 1 thru 64)
- *	@pktlen - level 3 packet length - log base 2 of size
- *	@locaddr - my address
- *	@remaddr - remote address
- *	@t1 - time, in seconds
- *	@t2 - time, in seconds
- *	@t21 - time, in seconds
- *	@npvc - # of permanent virt. circuits (1 thru nvc)
- *	@t23 - time, in seconds
- *	@flags - see dosx25.doc, in portuguese, for details
- */
-struct cycx_x25_config {
-	u8  link;
-	u8  speed;
-	u8  clock;
-	u8  n2;
-	u8  n2win;
-	u8  n3win;
-	u8  nvc;
-	u8  pktlen;
-	u8  locaddr;
-	u8  remaddr;
-	u16 t1;
-	u16 t2;
-	u8  t21;
-	u8  npvc;
-	u8  t23;
-	u8  flags;
-} PACKED;
-
-struct cycx_x25_stats {
-	u16 rx_crc_errors;
-	u16 rx_over_errors;
-	u16 n2_tx_frames;
-	u16 n2_rx_frames;
-	u16 tx_timeouts;
-	u16 rx_timeouts;
-	u16 n3_tx_packets;
-	u16 n3_rx_packets;
-	u16 tx_aborts;
-	u16 rx_aborts;
-} PACKED;
-#endif	/* _CYCX_X25_H */
-- 
cgit v1.2.3


From fbd74659d4513816a6249b0db491e8d831803520 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:32 +0200
Subject: include/linux/i82593.h: Remove unused header

The header file include/linux/i82593.h does not seem to be used
anywhere. It was orphaned by 8a594170 "drivers/net: delete intel
i825xx based znet notebook driver". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/i82593.h | 229 -------------------------------------------------
 1 file changed, 229 deletions(-)
 delete mode 100644 include/linux/i82593.h

(limited to 'include/linux')

diff --git a/include/linux/i82593.h b/include/linux/i82593.h
deleted file mode 100644
index afac5c7a323d..000000000000
--- a/include/linux/i82593.h
+++ /dev/null
@@ -1,229 +0,0 @@
-/*
- * Definitions for Intel 82593 CSMA/CD Core LAN Controller
- * The definitions are taken from the 1992 users manual with Intel
- * order number 297125-001.
- *
- * /usr/src/pc/RCS/i82593.h,v 1.1 1996/07/17 15:23:12 root Exp
- *
- * Copyright 1994, Anders Klemets <klemets@it.kth.se>
- *
- * HISTORY
- * i82593.h,v
- * Revision 1.4  2005/11/4  09:15:00  baroniunas
- * Modified copyright with permission of author as follows:
- *
- *   "If I82539.H is the only file with my copyright statement
- *    that is included in the Source Forge project, then you have
- *    my approval to change the copyright statement to be a GPL
- *    license, in the way you proposed on October 10."
- *
- * Revision 1.1  1996/07/17 15:23:12  root
- * Initial revision
- *
- * Revision 1.3  1995/04/05  15:13:58  adj
- * Initial alpha release
- *
- * Revision 1.2  1994/06/16  23:57:31  klemets
- * Mirrored all the fields in the configuration block.
- *
- * Revision 1.1  1994/06/02  20:25:34  klemets
- * Initial revision
- *
- *
- */
-#ifndef	_I82593_H
-#define	_I82593_H
-
-/* Intel 82593 CSMA/CD Core LAN Controller */
-
-/* Port 0 Command Register definitions */
-
-/* Execution operations */
-#define OP0_NOP			0	/* CHNL = 0 */
-#define OP0_SWIT_TO_PORT_1	0	/* CHNL = 1 */
-#define OP0_IA_SETUP		1
-#define OP0_CONFIGURE		2
-#define OP0_MC_SETUP		3
-#define OP0_TRANSMIT		4
-#define OP0_TDR			5
-#define OP0_DUMP		6
-#define OP0_DIAGNOSE		7
-#define OP0_TRANSMIT_NO_CRC	9
-#define OP0_RETRANSMIT		12
-#define OP0_ABORT		13
-/* Reception operations */
-#define OP0_RCV_ENABLE		8
-#define OP0_RCV_DISABLE		10
-#define OP0_STOP_RCV		11
-/* Status pointer control operations */
-#define OP0_FIX_PTR		15	/* CHNL = 1 */
-#define OP0_RLS_PTR		15	/* CHNL = 0 */
-#define OP0_RESET		14
-
-#define CR0_CHNL		(1 << 4)	/* 0=Channel 0, 1=Channel 1 */
-#define CR0_STATUS_0		0x00
-#define CR0_STATUS_1		0x20
-#define CR0_STATUS_2		0x40
-#define CR0_STATUS_3		0x60
-#define CR0_INT_ACK		(1 << 7)	/* 0=No ack, 1=acknowledge */
-
-/* Port 0 Status Register definitions */
-
-#define SR0_NO_RESULT		0		/* dummy */
-#define SR0_EVENT_MASK		0x0f
-#define SR0_IA_SETUP_DONE	1
-#define SR0_CONFIGURE_DONE	2
-#define SR0_MC_SETUP_DONE	3
-#define SR0_TRANSMIT_DONE	4
-#define SR0_TDR_DONE		5
-#define SR0_DUMP_DONE		6
-#define SR0_DIAGNOSE_PASSED	7
-#define SR0_TRANSMIT_NO_CRC_DONE 9
-#define SR0_RETRANSMIT_DONE	12
-#define SR0_EXECUTION_ABORTED	13
-#define SR0_END_OF_FRAME	8
-#define SR0_RECEPTION_ABORTED	10
-#define SR0_DIAGNOSE_FAILED	15
-#define SR0_STOP_REG_HIT	11
-
-#define SR0_CHNL		(1 << 4)
-#define SR0_EXECUTION		(1 << 5)
-#define SR0_RECEPTION		(1 << 6)
-#define SR0_INTERRUPT		(1 << 7)
-#define SR0_BOTH_RX_TX		(SR0_EXECUTION | SR0_RECEPTION)
-
-#define SR3_EXEC_STATE_MASK	0x03
-#define SR3_EXEC_IDLE		0
-#define SR3_TX_ABORT_IN_PROGRESS 1
-#define SR3_EXEC_ACTIVE		2
-#define SR3_ABORT_IN_PROGRESS	3
-#define SR3_EXEC_CHNL		(1 << 2)
-#define SR3_STP_ON_NO_RSRC	(1 << 3)
-#define SR3_RCVING_NO_RSRC	(1 << 4)
-#define SR3_RCV_STATE_MASK	0x60
-#define SR3_RCV_IDLE		0x00
-#define SR3_RCV_READY		0x20
-#define SR3_RCV_ACTIVE		0x40
-#define SR3_RCV_STOP_IN_PROG	0x60
-#define SR3_RCV_CHNL		(1 << 7)
-
-/* Port 1 Command Register definitions */
-
-#define OP1_NOP			0
-#define OP1_SWIT_TO_PORT_0	1
-#define OP1_INT_DISABLE		2
-#define OP1_INT_ENABLE		3
-#define OP1_SET_TS		5
-#define OP1_RST_TS		7
-#define OP1_POWER_DOWN		8
-#define OP1_RESET_RING_MNGMT	11
-#define OP1_RESET		14
-#define OP1_SEL_RST		15
-
-#define CR1_STATUS_4		0x00
-#define CR1_STATUS_5		0x20
-#define CR1_STATUS_6		0x40
-#define CR1_STOP_REG_UPDATE	(1 << 7)
-
-/* Receive frame status bits */
-
-#define	RX_RCLD			(1 << 0)
-#define RX_IA_MATCH		(1 << 1)
-#define	RX_NO_AD_MATCH		(1 << 2)
-#define RX_NO_SFD		(1 << 3)
-#define RX_SRT_FRM		(1 << 7)
-#define RX_OVRRUN		(1 << 8)
-#define RX_ALG_ERR		(1 << 10)
-#define RX_CRC_ERR		(1 << 11)
-#define RX_LEN_ERR		(1 << 12)
-#define RX_RCV_OK		(1 << 13)
-#define RX_TYP_LEN		(1 << 15)
-
-/* Transmit status bits */
-
-#define TX_NCOL_MASK		0x0f
-#define TX_FRTL			(1 << 4)
-#define TX_MAX_COL		(1 << 5)
-#define TX_HRT_BEAT		(1 << 6)
-#define TX_DEFER		(1 << 7)
-#define TX_UND_RUN		(1 << 8)
-#define TX_LOST_CTS		(1 << 9)
-#define TX_LOST_CRS		(1 << 10)
-#define TX_LTCOL		(1 << 11)
-#define TX_OK			(1 << 13)
-#define TX_COLL			(1 << 15)
-
-struct i82593_conf_block {
-  u_char fifo_limit : 4,
-  	 forgnesi   : 1,
-  	 fifo_32    : 1,
-  	 d6mod      : 1,
-  	 throttle_enb : 1;
-  u_char throttle   : 6,
-	 cntrxint   : 1,
-	 contin	    : 1;
-  u_char addr_len   : 3,
-  	 acloc 	    : 1,
- 	 preamb_len : 2,
-  	 loopback   : 2;
-  u_char lin_prio   : 3,
-	 tbofstop   : 1,
-	 exp_prio   : 3,
-	 bof_met    : 1;
-  u_char	    : 4,
-	 ifrm_spc   : 4;
-  u_char	    : 5,
-	 slottim_low : 3;
-  u_char slottim_hi : 3,
-		    : 1,
-	 max_retr   : 4;
-  u_char prmisc     : 1,
-	 bc_dis     : 1,
-  		    : 1,
-	 crs_1	    : 1,
-	 nocrc_ins  : 1,
-	 crc_1632   : 1,
-  	 	    : 1,
-  	 crs_cdt    : 1;
-  u_char cs_filter  : 3,
-	 crs_src    : 1,
-	 cd_filter  : 3,
-		    : 1;
-  u_char	    : 2,
-  	 min_fr_len : 6;
-  u_char lng_typ    : 1,
-	 lng_fld    : 1,
-	 rxcrc_xf   : 1,
-	 artx	    : 1,
-	 sarec	    : 1,
-	 tx_jabber  : 1,	/* why is this called max_len in the manual? */
-	 hash_1	    : 1,
-  	 lbpkpol    : 1;
-  u_char	    : 6,
-  	 fdx	    : 1,
-  	  	    : 1;
-  u_char dummy_6    : 6,	/* supposed to be ones */
-  	 mult_ia    : 1,
-  	 dis_bof    : 1;
-  u_char dummy_1    : 1,	/* supposed to be one */
-	 tx_ifs_retrig : 2,
-	 mc_all     : 1,
-	 rcv_mon    : 2,
-	 frag_acpt  : 1,
-  	 tstrttrs   : 1;
-  u_char fretx	    : 1,
-	 runt_eop   : 1,
-	 hw_sw_pin  : 1,
-	 big_endn   : 1,
-	 syncrqs    : 1,
-	 sttlen     : 1,
-	 tx_eop     : 1,
-  	 rx_eop	    : 1;
-  u_char rbuf_size  : 5,
-	 rcvstop    : 1,
-  	 	    : 2;
-};
-
-#define I82593_MAX_MULTICAST_ADDRESSES	128	/* Hardware hashed filter */
-
-#endif /* _I82593_H */
-- 
cgit v1.2.3


From 6fb7c3778f0fba0bad099c30e834c413c4f8bcb5 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:44:33 +0200
Subject: include/linux/phonedev.h: Remove unused header

The header file include/linux/phonedev.h does not seem to be used
anywhere. It was orphaned by 7326446c "Staging: remove telephony
drivers". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phonedev.h | 25 -------------------------
 1 file changed, 25 deletions(-)
 delete mode 100644 include/linux/phonedev.h

(limited to 'include/linux')

diff --git a/include/linux/phonedev.h b/include/linux/phonedev.h
deleted file mode 100644
index 4269de99e320..000000000000
--- a/include/linux/phonedev.h
+++ /dev/null
@@ -1,25 +0,0 @@
-#ifndef __LINUX_PHONEDEV_H
-#define __LINUX_PHONEDEV_H
-
-#include <linux/types.h>
-
-#ifdef __KERNEL__
-
-#include <linux/poll.h>
-
-struct phone_device {
-	struct phone_device *next;
-	const struct file_operations *f_op;
-	int (*open) (struct phone_device *, struct file *);
-	int board;		/* Device private index */
-	int minor;
-};
-
-extern int phonedev_init(void);
-#define PHONE_MAJOR	100
-extern int phone_register_device(struct phone_device *, int unit);
-#define PHONE_UNIT_ANY	-1
-extern void phone_unregister_device(struct phone_device *);
-
-#endif
-#endif
-- 
cgit v1.2.3


From de20fe8e2cc3c4ca13fdb529e6720d9d199333fe Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:35 -0700
Subject: net: Allocate a new 16 bits for flags in skbuff

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index b69b7b512c06..3c9574c80933 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -598,6 +598,10 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
+	kmemcheck_bitfield_begin(flags3);
+	/* 16 bit hole */
+	kmemcheck_bitfield_end(flags3);
+
 	__be16			inner_protocol;
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
-- 
cgit v1.2.3


From 77cffe23c1f88835f6bd7b47bfa0c060c2969828 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:46 -0700
Subject: net: Clarification of CHECKSUM_UNNECESSARY

This patch:
 - Clarifies the specific requirements of devices returning
   CHECKSUM_UNNECESSARY (comments in skbuff.h).
 - Adds csum_level field to skbuff. This is used to express how
   many checksums are covered by CHECKSUM_UNNECESSARY (stores n - 1).
   This replaces the overloading of skb->encapsulation, that field is
   is now only used to indicate inner headers are valid.
 - Change __skb_checksum_validate_needed to "consume" each checksum
   as indicated by csum_level as layers of the the packet are parsed.
 - Remove skb_pop_rcv_encapsulation, no longer needed in the new
   csum_level model.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c    |  2 --
 include/linux/skbuff.h | 74 ++++++++++++++++++++++++++++++++++----------------
 net/ipv4/gre_demux.c   |  1 -
 3 files changed, 51 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index beb377b2d4b7..67527f3d3be2 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1158,8 +1158,6 @@ static int vxlan_udp_encap_recv(struct sock *sk, struct sk_buff *skb)
 	if (!vs)
 		goto drop;
 
-	skb_pop_rcv_encapsulation(skb);
-
 	vs->rcv(vs, skb, vxh->vx_vni);
 	return 0;
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3c9574c80933..c93b5859a772 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -47,11 +47,29 @@
  *
  *   The hardware you're dealing with doesn't calculate the full checksum
  *   (as in CHECKSUM_COMPLETE), but it does parse headers and verify checksums
- *   for specific protocols e.g. TCP/UDP/SCTP, then, for such packets it will
- *   set CHECKSUM_UNNECESSARY if their checksums are okay. skb->csum is still
- *   undefined in this case though. It is a bad option, but, unfortunately,
- *   nowadays most vendors do this. Apparently with the secret goal to sell
- *   you new devices, when you will add new protocol to your host, f.e. IPv6 8)
+ *   for specific protocols. For such packets it will set CHECKSUM_UNNECESSARY
+ *   if their checksums are okay. skb->csum is still undefined in this case
+ *   though. It is a bad option, but, unfortunately, nowadays most vendors do
+ *   this. Apparently with the secret goal to sell you new devices, when you
+ *   will add new protocol to your host, f.e. IPv6 8)
+ *
+ *   CHECKSUM_UNNECESSARY is applicable to following protocols:
+ *     TCP: IPv6 and IPv4.
+ *     UDP: IPv4 and IPv6. A device may apply CHECKSUM_UNNECESSARY to a
+ *       zero UDP checksum for either IPv4 or IPv6, the networking stack
+ *       may perform further validation in this case.
+ *     GRE: only if the checksum is present in the header.
+ *     SCTP: indicates the CRC in SCTP header has been validated.
+ *
+ *   skb->csum_level indicates the number of consecutive checksums found in
+ *   the packet minus one that have been verified as CHECKSUM_UNNECESSARY.
+ *   For instance if a device receives an IPv6->UDP->GRE->IPv4->TCP packet
+ *   and a device is able to verify the checksums for UDP (possibly zero),
+ *   GRE (checksum flag is set), and TCP-- skb->csum_level would be set to
+ *   two. If the device were only able to verify the UDP checksum and not
+ *   GRE, either because it doesn't support GRE checksum of because GRE
+ *   checksum is bad, skb->csum_level would be set to zero (TCP checksum is
+ *   not considered in this case).
  *
  * CHECKSUM_COMPLETE:
  *
@@ -112,6 +130,9 @@
 #define CHECKSUM_COMPLETE	2
 #define CHECKSUM_PARTIAL	3
 
+/* Maximum value in skb->csum_level */
+#define SKB_MAX_CSUM_LEVEL	3
+
 #define SKB_DATA_ALIGN(X)	ALIGN(X, SMP_CACHE_BYTES)
 #define SKB_WITH_OVERHEAD(X)	\
 	((X) - SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
@@ -571,11 +592,7 @@ struct sk_buff {
 	__u8			wifi_acked:1;
 	__u8			no_fcs:1;
 	__u8			head_frag:1;
-	/* Encapsulation protocol and NIC drivers should use
-	 * this flag to indicate to each other if the skb contains
-	 * encapsulated packet or not and maybe use the inner packet
-	 * headers if needed
-	 */
+	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
@@ -599,7 +616,8 @@ struct sk_buff {
 	};
 
 	kmemcheck_bitfield_begin(flags3);
-	/* 16 bit hole */
+	__u8			csum_level:2;
+	/* 14 bit hole */
 	kmemcheck_bitfield_end(flags3);
 
 	__be16			inner_protocol;
@@ -1866,18 +1884,6 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len)
 	return pskb_may_pull(skb, skb_network_offset(skb) + len);
 }
 
-static inline void skb_pop_rcv_encapsulation(struct sk_buff *skb)
-{
-	/* Only continue with checksum unnecessary if device indicated
-	 * it is valid across encapsulation (skb->encapsulation was set).
-	 */
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY && !skb->encapsulation)
-		skb->ip_summed = CHECKSUM_NONE;
-
-	skb->encapsulation = 0;
-	skb->csum_valid = 0;
-}
-
 /*
  * CPUs often take a performance hit when accessing unaligned memory
  * locations. The actual performance hit varies, it can be small if the
@@ -2798,6 +2804,27 @@ static inline __sum16 skb_checksum_complete(struct sk_buff *skb)
 	       0 : __skb_checksum_complete(skb);
 }
 
+static inline void __skb_decr_checksum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (skb->csum_level == 0)
+			skb->ip_summed = CHECKSUM_NONE;
+		else
+			skb->csum_level--;
+	}
+}
+
+static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
+			skb->csum_level++;
+	} else if (skb->ip_summed == CHECKSUM_NONE) {
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		skb->csum_level = 0;
+	}
+}
+
 /* Check if we need to perform checksum complete validation.
  *
  * Returns true if checksum complete is needed, false otherwise
@@ -2809,6 +2836,7 @@ static inline bool __skb_checksum_validate_needed(struct sk_buff *skb,
 {
 	if (skb_csum_unnecessary(skb) || (zero_okay && !check)) {
 		skb->csum_valid = 1;
+		__skb_decr_checksum_unnecessary(skb);
 		return false;
 	}
 
diff --git a/net/ipv4/gre_demux.c b/net/ipv4/gre_demux.c
index 7c1a8ff974dd..0485bf7f8f03 100644
--- a/net/ipv4/gre_demux.c
+++ b/net/ipv4/gre_demux.c
@@ -125,7 +125,6 @@ static int parse_gre_header(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 			*csum_err = true;
 			return -EINVAL;
 		}
-		skb_pop_rcv_encapsulation(skb);
 		options++;
 	}
 
-- 
cgit v1.2.3


From 662880f4420340aad4f9a62a349c6c9d4faa1a5d Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 27 Aug 2014 21:26:56 -0700
Subject: net: Allow GRO to use and set levels of checksum unnecessary

Allow GRO path to "consume" checksums provided in CHECKSUM_UNNECESSARY
and to report new checksums verfied for use in fallback to normal
path.

Change GRO checksum path to track csum_level using a csum_cnt field
in NAPI_GRO_CB. On GRO initialization, if ip_summed is
CHECKSUM_UNNECESSARY set NAPI_GRO_CB(skb)->csum_cnt to
skb->csum_level + 1. For each checksum verified, decrement
NAPI_GRO_CB(skb)->csum_cnt while its greater than zero. If a checksum
is verfied and NAPI_GRO_CB(skb)->csum_cnt == 0, we have verified a
deeper checksum than originally indicated in skbuf so increment
csum_level (or initialize to CHECKSUM_UNNECESSARY if ip_summed is
CHECKSUM_NONE or CHECKSUM_COMPLETE).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 26 ++++++++++++--------------
 net/core/dev.c            | 24 ++++++++++++++++--------
 net/ipv4/gre_offload.c    |  7 ++-----
 net/ipv4/udp_offload.c    |  5 +++--
 4 files changed, 33 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index dfc1d8b8bd0f..456eb1fe51e8 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1883,8 +1883,8 @@ struct napi_gro_cb {
 	/* GRO checksum is valid */
 	u8	csum_valid:1;
 
-	/* Number encapsulation layers crossed */
-	u8	encapsulation;
+	/* Number of checksums via CHECKSUM_UNNECESSARY */
+	u8	csum_cnt:3;
 
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
@@ -2179,8 +2179,7 @@ static inline bool __skb_gro_checksum_validate_needed(struct sk_buff *skb,
 						      __sum16 check)
 {
 	return (skb->ip_summed != CHECKSUM_PARTIAL &&
-		(skb->ip_summed != CHECKSUM_UNNECESSARY ||
-		 (NAPI_GRO_CB(skb)->encapsulation > skb->encapsulation)) &&
+		NAPI_GRO_CB(skb)->csum_cnt == 0 &&
 		(!zero_okay || check));
 }
 
@@ -2196,18 +2195,17 @@ static inline __sum16 __skb_gro_checksum_validate_complete(struct sk_buff *skb,
 	return __skb_gro_checksum_complete(skb);
 }
 
-/* Update skb for CHECKSUM_UNNECESSARY when we verified a top level
- * checksum or an encapsulated one during GRO. This saves work
- * if we fallback to normal path with the packet.
- */
 static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 {
-	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
-		if (NAPI_GRO_CB(skb)->encapsulation)
-			skb->encapsulation = 1;
-	} else if (skb->ip_summed != CHECKSUM_PARTIAL) {
-		skb->ip_summed = CHECKSUM_UNNECESSARY;
-		skb->encapsulation = 0;
+	if (NAPI_GRO_CB(skb)->csum_cnt > 0) {
+		/* Consume a checksum from CHECKSUM_UNNECESSARY */
+		NAPI_GRO_CB(skb)->csum_cnt--;
+	} else {
+		/* Update skb for CHECKSUM_UNNECESSARY and csum_level when we
+		 * verified a new top level checksum or an encapsulated one
+		 * during GRO. This saves work if we fallback to normal path.
+		 */
+		__skb_incr_checksum_unnecessary(skb);
 	}
 }
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 26d296c2447c..a6077ef56345 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3962,13 +3962,6 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 
 	gro_list_prepare(napi, skb);
 
-	if (skb->ip_summed == CHECKSUM_COMPLETE) {
-		NAPI_GRO_CB(skb)->csum = skb->csum;
-		NAPI_GRO_CB(skb)->csum_valid = 1;
-	} else {
-		NAPI_GRO_CB(skb)->csum_valid = 0;
-	}
-
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, head, list) {
 		if (ptype->type != type || !ptype->callbacks.gro_receive)
@@ -3980,7 +3973,22 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 		NAPI_GRO_CB(skb)->flush = 0;
 		NAPI_GRO_CB(skb)->free = 0;
 		NAPI_GRO_CB(skb)->udp_mark = 0;
-		NAPI_GRO_CB(skb)->encapsulation = 0;
+
+		/* Setup for GRO checksum validation */
+		switch (skb->ip_summed) {
+		case CHECKSUM_COMPLETE:
+			NAPI_GRO_CB(skb)->csum = skb->csum;
+			NAPI_GRO_CB(skb)->csum_valid = 1;
+			NAPI_GRO_CB(skb)->csum_cnt = 0;
+			break;
+		case CHECKSUM_UNNECESSARY:
+			NAPI_GRO_CB(skb)->csum_cnt = skb->csum_level + 1;
+			NAPI_GRO_CB(skb)->csum_valid = 0;
+			break;
+		default:
+			NAPI_GRO_CB(skb)->csum_cnt = 0;
+			NAPI_GRO_CB(skb)->csum_valid = 0;
+		}
 
 		pp = ptype->callbacks.gro_receive(&napi->gro_list, skb);
 		break;
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d1bd16937d93..a4d7965fb880 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -172,12 +172,9 @@ static struct sk_buff **gre_gro_receive(struct sk_buff **head,
 	}
 
 	/* Don't bother verifying checksum if we're going to flush anyway. */
-	if (greh->flags & GRE_CSUM) {
-		if (!NAPI_GRO_CB(skb)->flush &&
-		    skb_gro_checksum_simple_validate(skb))
+	if ((greh->flags & GRE_CSUM) && !NAPI_GRO_CB(skb)->flush &&
+	    skb_gro_checksum_simple_validate(skb))
 			goto out_unlock;
-		NAPI_GRO_CB(skb)->encapsulation++;
-	}
 
 	flush = 0;
 
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8ed460e3753c..a6adff98382a 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -238,12 +238,13 @@ struct sk_buff **udp_gro_receive(struct sk_buff **head, struct sk_buff *skb,
 	int flush = 1;
 
 	if (NAPI_GRO_CB(skb)->udp_mark ||
-	    (!skb->encapsulation && !NAPI_GRO_CB(skb)->csum_valid))
+	    (skb->ip_summed != CHECKSUM_PARTIAL &&
+	     NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+	     !NAPI_GRO_CB(skb)->csum_valid))
 		goto out;
 
 	/* mark that this skb passed once through the udp gro layer */
 	NAPI_GRO_CB(skb)->udp_mark = 1;
-	NAPI_GRO_CB(skb)->encapsulation++;
 
 	rcu_read_lock();
 	uo_priv = rcu_dereference(udp_offload_base);
-- 
cgit v1.2.3


From 4e00517945bed110f1b8de580cce97626e9ef0b5 Mon Sep 17 00:00:00 2001
From: Robert Jarzmik <robert.jarzmik@free.fr>
Date: Sun, 31 Aug 2014 21:10:51 +0200
Subject: regulator: max1586: add device-tree support

Add device-tree support to max1586.
The driver can still be used with the legacy platform data, or the new
device-tree way.

This work is heavily inspired by the device-tree support of its cousin
max8660 driver.

Signed-off-by: Robert Jarzmik <robert.jarzmik@free.fr>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max1586.c       | 81 ++++++++++++++++++++++++++++++++++++++-
 include/linux/regulator/max1586.h |  2 +-
 2 files changed, 80 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/max1586.c b/drivers/regulator/max1586.c
index d23d0577754b..5c04a7159baa 100644
--- a/drivers/regulator/max1586.c
+++ b/drivers/regulator/max1586.c
@@ -24,6 +24,8 @@
 #include <linux/regulator/driver.h>
 #include <linux/slab.h>
 #include <linux/regulator/max1586.h>
+#include <linux/of_device.h>
+#include <linux/regulator/of_regulator.h>
 
 #define MAX1586_V3_MAX_VSEL 31
 #define MAX1586_V6_MAX_VSEL 3
@@ -157,13 +159,87 @@ static struct regulator_desc max1586_reg[] = {
 	},
 };
 
+int of_get_max1586_platform_data(struct device *dev,
+				 struct max1586_platform_data *pdata)
+{
+	struct max1586_subdev_data *sub;
+	struct of_regulator_match rmatch[ARRAY_SIZE(max1586_reg)];
+	struct device_node *np = dev->of_node;
+	int i, matched;
+
+	if (of_property_read_u32(np, "v3-gain",
+				 &pdata->v3_gain) < 0) {
+		dev_err(dev, "%s has no 'v3-gain' property\n", np->full_name);
+		return -EINVAL;
+	}
+
+	np = of_get_child_by_name(np, "regulators");
+	if (!np) {
+		dev_err(dev, "missing 'regulators' subnode in DT\n");
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(rmatch); i++)
+		rmatch[i].name = max1586_reg[i].name;
+
+	matched = of_regulator_match(dev, np, rmatch, ARRAY_SIZE(rmatch));
+	of_node_put(np);
+	/*
+	 * If matched is 0, ie. neither Output_V3 nor Output_V6 have been found,
+	 * return 0, which signals the normal situation where no subregulator is
+	 * available. This is normal because the max1586 doesn't provide any
+	 * readback support, so the subregulators can't report any status
+	 * anyway.  If matched < 0, return the error.
+	 */
+	if (matched <= 0)
+		return matched;
+
+	pdata->subdevs = devm_kzalloc(dev, sizeof(struct max1586_subdev_data) *
+						matched, GFP_KERNEL);
+	if (!pdata->subdevs)
+		return -ENOMEM;
+
+	pdata->num_subdevs = matched;
+	sub = pdata->subdevs;
+
+	for (i = 0; i < matched; i++) {
+		sub->id = i;
+		sub->name = rmatch[i].of_node->name;
+		sub->platform_data = rmatch[i].init_data;
+		sub++;
+	}
+
+	return 0;
+}
+
+static const struct of_device_id max1586_of_match[] = {
+	{ .compatible = "maxim,max1586", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, max1586_of_match);
+
 static int max1586_pmic_probe(struct i2c_client *client,
 					const struct i2c_device_id *i2c_id)
 {
-	struct max1586_platform_data *pdata = dev_get_platdata(&client->dev);
+	struct max1586_platform_data *pdata, pdata_of;
 	struct regulator_config config = { };
 	struct max1586_data *max1586;
-	int i, id;
+	int i, id, ret;
+	const struct of_device_id *match;
+
+	pdata = dev_get_platdata(&client->dev);
+	if (client->dev.of_node && !pdata) {
+		match = of_match_device(of_match_ptr(max1586_of_match),
+					&client->dev);
+		if (!match) {
+			dev_err(&client->dev, "Error: No device match found\n");
+			return -ENODEV;
+		}
+		ret = of_get_max1586_platform_data(&client->dev, &pdata_of);
+		if (ret < 0)
+			return ret;
+		pdata = &pdata_of;
+	}
 
 	max1586 = devm_kzalloc(&client->dev, sizeof(struct max1586_data),
 			GFP_KERNEL);
@@ -229,6 +305,7 @@ static struct i2c_driver max1586_pmic_driver = {
 	.driver		= {
 		.name	= "max1586",
 		.owner	= THIS_MODULE,
+		.of_match_table = of_match_ptr(max1586_of_match),
 	},
 	.id_table	= max1586_id,
 };
diff --git a/include/linux/regulator/max1586.h b/include/linux/regulator/max1586.h
index de9a7fae20be..cedd0febe882 100644
--- a/include/linux/regulator/max1586.h
+++ b/include/linux/regulator/max1586.h
@@ -40,7 +40,7 @@
  */
 struct max1586_subdev_data {
 	int				id;
-	char				*name;
+	const char			*name;
 	struct regulator_init_data	*platform_data;
 };
 
-- 
cgit v1.2.3


From 068765ba7987e73d4381edfe47b70aa121c7155c Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 1 Sep 2014 13:47:49 +0200
Subject: PM / sleep: Mechanism for aborting system suspends unconditionally

It sometimes may be necessary to abort a system suspend in
progress or wake up the system from suspend-to-idle even if the
pm_wakeup_event()/pm_stay_awake() mechanism is not enabled.

For this purpose, introduce a new global variable pm_abort_suspend
and make pm_wakeup_pending() check its value.  Also add routines
for manipulating that variable.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/wakeup.c | 16 +++++++++++++++-
 include/linux/suspend.h     |  4 ++++
 kernel/power/process.c      |  1 +
 3 files changed, 20 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c
index eb1bd2ecad8b..c2744b30d5d9 100644
--- a/drivers/base/power/wakeup.c
+++ b/drivers/base/power/wakeup.c
@@ -24,6 +24,9 @@
  */
 bool events_check_enabled __read_mostly;
 
+/* If set and the system is suspending, terminate the suspend. */
+static bool pm_abort_suspend __read_mostly;
+
 /*
  * Combined counters of registered wakeup events and wakeup events in progress.
  * They need to be modified together atomically, so it's better to use one
@@ -719,7 +722,18 @@ bool pm_wakeup_pending(void)
 		pm_print_active_wakeup_sources();
 	}
 
-	return ret;
+	return ret || pm_abort_suspend;
+}
+
+void pm_system_wakeup(void)
+{
+	pm_abort_suspend = true;
+	freeze_wake();
+}
+
+void pm_wakeup_clear(void)
+{
+	pm_abort_suspend = false;
 }
 
 /**
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 519064e0c943..06a9910827c2 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -371,6 +371,8 @@ extern int unregister_pm_notifier(struct notifier_block *nb);
 extern bool events_check_enabled;
 
 extern bool pm_wakeup_pending(void);
+extern void pm_system_wakeup(void);
+extern void pm_wakeup_clear(void);
 extern bool pm_get_wakeup_count(unsigned int *count, bool block);
 extern bool pm_save_wakeup_count(unsigned int count);
 extern void pm_wakep_autosleep_enabled(bool set);
@@ -418,6 +420,8 @@ static inline int unregister_pm_notifier(struct notifier_block *nb)
 #define pm_notifier(fn, pri)	do { (void)(fn); } while (0)
 
 static inline bool pm_wakeup_pending(void) { return false; }
+static inline void pm_system_wakeup(void) {}
+static inline void pm_wakeup_clear(void) {}
 
 static inline void lock_system_sleep(void) {}
 static inline void unlock_system_sleep(void) {}
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 4ee194eb524b..7b323221b9ee 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -129,6 +129,7 @@ int freeze_processes(void)
 	if (!pm_freezing)
 		atomic_inc(&system_freezing_cnt);
 
+	pm_wakeup_clear();
 	printk("Freezing user space processes ... ");
 	pm_freezing = true;
 	error = try_to_freeze_tasks(true);
-- 
cgit v1.2.3


From cab303be91dc47942bc25de33dc1140123540800 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 28 Aug 2014 11:44:31 +0200
Subject: genirq: Add sanity checks for PM options on shared interrupt lines

Account the IRQF_NO_SUSPEND and IRQF_RESUME_EARLY actions on shared
interrupt lines and yell loudly if there is a mismatch.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/irqdesc.h | 10 ++++++++++
 kernel/irq/internals.h  | 10 ++++++++++
 kernel/irq/manage.c     |  4 ++++
 kernel/irq/pm.c         | 36 ++++++++++++++++++++++++++++++++++++
 4 files changed, 60 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 472c021a2d4f..cb1a31e448ae 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -36,6 +36,11 @@ struct irq_desc;
  * @threads_oneshot:	bitfield to handle shared oneshot threads
  * @threads_active:	number of irqaction threads currently running
  * @wait_for_threads:	wait queue for sync_irq to wait for threaded handlers
+ * @nr_actions:		number of installed actions on this descriptor
+ * @no_suspend_depth:	number of irqactions on a irq descriptor with
+ *			IRQF_NO_SUSPEND set
+ * @force_resume_depth:	number of irqactions on a irq descriptor with
+ *			IRQF_FORCE_RESUME set
  * @dir:		/proc/irq/ procfs entry
  * @name:		flow handler name for /proc/interrupts output
  */
@@ -68,6 +73,11 @@ struct irq_desc {
 	unsigned long		threads_oneshot;
 	atomic_t		threads_active;
 	wait_queue_head_t       wait_for_threads;
+#ifdef CONFIG_PM_SLEEP
+	unsigned int		nr_actions;
+	unsigned int		no_suspend_depth;
+	unsigned int		force_resume_depth;
+#endif
 #ifdef CONFIG_PROC_FS
 	struct proc_dir_entry	*dir;
 #endif
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index af2821178900..c402502a5111 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -194,3 +194,13 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d
 	__this_cpu_inc(*desc->kstat_irqs);
 	__this_cpu_inc(kstat.irqs_sum);
 }
+
+#ifdef CONFIG_PM_SLEEP
+void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
+void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action);
+#else
+static inline void
+irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
+static inline void
+irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action) { }
+#endif
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index fa564e8db996..0a9104b4608b 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -1200,6 +1200,8 @@ __setup_irq(unsigned int irq, struct irq_desc *desc, struct irqaction *new)
 	new->irq = irq;
 	*old_ptr = new;
 
+	irq_pm_install_action(desc, new);
+
 	/* Reset broken irq detection when installing new handler */
 	desc->irq_count = 0;
 	desc->irqs_unhandled = 0;
@@ -1318,6 +1320,8 @@ static struct irqaction *__free_irq(unsigned int irq, void *dev_id)
 	/* Found it - now remove it from the list of entries: */
 	*action_ptr = action->next;
 
+	irq_pm_remove_action(desc, action);
+
 	/* If this was the last handler, shut down the IRQ line: */
 	if (!desc->action) {
 		irq_shutdown(desc);
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index b84141dcee5e..1b1b67a73218 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -13,6 +13,42 @@
 
 #include "internals.h"
 
+/*
+ * Called from __setup_irq() with desc->lock held after @action has
+ * been installed in the action chain.
+ */
+void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action)
+{
+	desc->nr_actions++;
+
+	if (action->flags & IRQF_FORCE_RESUME)
+		desc->force_resume_depth++;
+
+	WARN_ON_ONCE(desc->force_resume_depth &&
+		     desc->force_resume_depth != desc->nr_actions);
+
+	if (action->flags & IRQF_NO_SUSPEND)
+		desc->no_suspend_depth++;
+
+	WARN_ON_ONCE(desc->no_suspend_depth &&
+		     desc->no_suspend_depth != desc->nr_actions);
+}
+
+/*
+ * Called from __free_irq() with desc->lock held after @action has
+ * been removed from the action chain.
+ */
+void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action)
+{
+	desc->nr_actions--;
+
+	if (action->flags & IRQF_FORCE_RESUME)
+		desc->force_resume_depth--;
+
+	if (action->flags & IRQF_NO_SUSPEND)
+		desc->no_suspend_depth--;
+}
+
 static void suspend_device_irq(struct irq_desc *desc, int irq)
 {
 	if (!desc->action || (desc->action->flags & IRQF_NO_SUSPEND))
-- 
cgit v1.2.3


From b76f16748fa61801b1a1fd3ffb6f25ee228a35e0 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Aug 2014 13:54:09 +0200
Subject: genirq: Mark wakeup sources as armed on suspend

This allows us to utilize this information in the irq_may_run() check
without adding another conditional to the fast path.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/irq.h | 8 ++++++++
 kernel/irq/pm.c     | 5 +++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 62af59242ddc..03f48d936f66 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -173,6 +173,7 @@ struct irq_data {
  * IRQD_IRQ_DISABLED		- Disabled state of the interrupt
  * IRQD_IRQ_MASKED		- Masked state of the interrupt
  * IRQD_IRQ_INPROGRESS		- In progress state of the interrupt
+ * IRQD_WAKEUP_ARMED		- Wakeup mode armed
  */
 enum {
 	IRQD_TRIGGER_MASK		= 0xf,
@@ -186,6 +187,7 @@ enum {
 	IRQD_IRQ_DISABLED		= (1 << 16),
 	IRQD_IRQ_MASKED			= (1 << 17),
 	IRQD_IRQ_INPROGRESS		= (1 << 18),
+	IRQD_WAKEUP_ARMED		= (1 << 19),
 };
 
 static inline bool irqd_is_setaffinity_pending(struct irq_data *d)
@@ -257,6 +259,12 @@ static inline bool irqd_irq_inprogress(struct irq_data *d)
 	return d->state_use_accessors & IRQD_IRQ_INPROGRESS;
 }
 
+static inline bool irqd_is_wakeup_armed(struct irq_data *d)
+{
+	return d->state_use_accessors & IRQD_WAKEUP_ARMED;
+}
+
+
 /*
  * Functions for chained handlers which can be enabled/disabled by the
  * standard disable_irq/enable_irq calls. Must be called with
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index cf0ce0163db9..766930eaeed9 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -54,6 +54,9 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq)
 	if (!desc->action || desc->no_suspend_depth)
 		return false;
 
+	if (irqd_is_wakeup_set(&desc->irq_data))
+		irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
+
 	desc->istate |= IRQS_SUSPENDED;
 	__disable_irq(desc, irq);
 
@@ -101,6 +104,8 @@ EXPORT_SYMBOL_GPL(suspend_device_irqs);
 
 static void resume_irq(struct irq_desc *desc, int irq)
 {
+	irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED);
+
 	if (desc->istate & IRQS_SUSPENDED)
 		goto resume;
 
-- 
cgit v1.2.3


From 9ce7a25849e80cfb264f4995f832b932c1987e1a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Fri, 29 Aug 2014 14:00:16 +0200
Subject: genirq: Simplify wakeup mechanism

Currently we suspend wakeup interrupts by lazy disabling them and
check later whether the interrupt has fired, but that's not sufficient
for suspend to idle as there is no way to check that once we
transitioned into the CPU idle state.

So we change the mechanism in the following way:

1) Leave the wakeup interrupts enabled across suspend

2) Add a check to irq_may_run() which is called at the beginning of
   each flow handler whether the interrupt is an armed wakeup source.

   This check is basically free as it just extends the existing check
   for IRQD_IRQ_INPROGRESS. So no new conditional in the hot path.

   If the IRQD_WAKEUP_ARMED flag is set, then the interrupt is
   disabled, marked as pending/suspended and the pm core is notified
   about the wakeup event.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
[ rjw: syscore.c and put irq_pm_check_wakeup() into pm.c ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/syscore.c    |  7 +++---
 include/linux/interrupt.h |  5 -----
 kernel/irq/chip.c         | 20 ++++++++++++++++-
 kernel/irq/internals.h    |  2 ++
 kernel/irq/pm.c           | 55 +++++++++++++++++++++++++----------------------
 5 files changed, 53 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/syscore.c b/drivers/base/syscore.c
index dbb8350ea8dc..8d98a329f6ea 100644
--- a/drivers/base/syscore.c
+++ b/drivers/base/syscore.c
@@ -9,7 +9,7 @@
 #include <linux/syscore_ops.h>
 #include <linux/mutex.h>
 #include <linux/module.h>
-#include <linux/interrupt.h>
+#include <linux/suspend.h>
 #include <trace/events/power.h>
 
 static LIST_HEAD(syscore_ops_list);
@@ -54,9 +54,8 @@ int syscore_suspend(void)
 	pr_debug("Checking wakeup interrupts\n");
 
 	/* Return error code if there are any wakeup interrupts pending. */
-	ret = check_wakeup_irqs();
-	if (ret)
-		return ret;
+	if (pm_wakeup_pending())
+		return -EBUSY;
 
 	WARN_ONCE(!irqs_disabled(),
 		"Interrupts enabled before system core suspend.\n");
diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 698ad053d064..69517a24bc50 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -193,11 +193,6 @@ extern void irq_wake_thread(unsigned int irq, void *dev_id);
 /* The following three functions are for the core kernel use only. */
 extern void suspend_device_irqs(void);
 extern void resume_device_irqs(void);
-#ifdef CONFIG_PM_SLEEP
-extern int check_wakeup_irqs(void);
-#else
-static inline int check_wakeup_irqs(void) { return 0; }
-#endif
 
 /**
  * struct irq_affinity_notify - context for notification of IRQ affinity changes
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 6baf86085571..e7917ff8a486 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -344,8 +344,26 @@ static bool irq_check_poll(struct irq_desc *desc)
 
 static bool irq_may_run(struct irq_desc *desc)
 {
-	if (!irqd_irq_inprogress(&desc->irq_data))
+	unsigned int mask = IRQD_IRQ_INPROGRESS | IRQD_WAKEUP_ARMED;
+
+	/*
+	 * If the interrupt is not in progress and is not an armed
+	 * wakeup interrupt, proceed.
+	 */
+	if (!irqd_has_set(&desc->irq_data, mask))
 		return true;
+
+	/*
+	 * If the interrupt is an armed wakeup source, mark it pending
+	 * and suspended, disable it and notify the pm core about the
+	 * event.
+	 */
+	if (irq_pm_check_wakeup(desc))
+		return false;
+
+	/*
+	 * Handle a potential concurrent poll on a different core.
+	 */
 	return irq_check_poll(desc);
 }
 
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index c402502a5111..4332d766619d 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -196,9 +196,11 @@ static inline void kstat_incr_irqs_this_cpu(unsigned int irq, struct irq_desc *d
 }
 
 #ifdef CONFIG_PM_SLEEP
+bool irq_pm_check_wakeup(struct irq_desc *desc);
 void irq_pm_install_action(struct irq_desc *desc, struct irqaction *action);
 void irq_pm_remove_action(struct irq_desc *desc, struct irqaction *action);
 #else
+static inline bool irq_pm_check_wakeup(struct irq_desc *desc) { return false; }
 static inline void
 irq_pm_install_action(struct irq_desc *desc, struct irqaction *action) { }
 static inline void
diff --git a/kernel/irq/pm.c b/kernel/irq/pm.c
index 766930eaeed9..3ca532592704 100644
--- a/kernel/irq/pm.c
+++ b/kernel/irq/pm.c
@@ -9,10 +9,24 @@
 #include <linux/irq.h>
 #include <linux/module.h>
 #include <linux/interrupt.h>
+#include <linux/suspend.h>
 #include <linux/syscore_ops.h>
 
 #include "internals.h"
 
+bool irq_pm_check_wakeup(struct irq_desc *desc)
+{
+	if (irqd_is_wakeup_armed(&desc->irq_data)) {
+		irqd_clear(&desc->irq_data, IRQD_WAKEUP_ARMED);
+		desc->istate |= IRQS_SUSPENDED | IRQS_PENDING;
+		desc->depth++;
+		irq_disable(desc);
+		pm_system_wakeup();
+		return true;
+	}
+	return false;
+}
+
 /*
  * Called from __setup_irq() with desc->lock held after @action has
  * been installed in the action chain.
@@ -54,8 +68,16 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq)
 	if (!desc->action || desc->no_suspend_depth)
 		return false;
 
-	if (irqd_is_wakeup_set(&desc->irq_data))
+	if (irqd_is_wakeup_set(&desc->irq_data)) {
 		irqd_set(&desc->irq_data, IRQD_WAKEUP_ARMED);
+		/*
+		 * We return true here to force the caller to issue
+		 * synchronize_irq(). We need to make sure that the
+		 * IRQD_WAKEUP_ARMED is visible before we return from
+		 * suspend_device_irqs().
+		 */
+		return true;
+	}
 
 	desc->istate |= IRQS_SUSPENDED;
 	__disable_irq(desc, irq);
@@ -79,9 +101,13 @@ static bool suspend_device_irq(struct irq_desc *desc, int irq)
  * for this purpose.
  *
  * So we disable all interrupts and mark them IRQS_SUSPENDED except
- * for those which are unused and those which are marked as not
+ * for those which are unused, those which are marked as not
  * suspendable via an interrupt request with the flag IRQF_NO_SUSPEND
- * set.
+ * set and those which are marked as active wakeup sources.
+ *
+ * The active wakeup sources are handled by the flow handler entry
+ * code which checks for the IRQD_WAKEUP_ARMED flag, suspends the
+ * interrupt and notifies the pm core about the wakeup.
  */
 void suspend_device_irqs(void)
 {
@@ -173,26 +199,3 @@ void resume_device_irqs(void)
 	resume_irqs(false);
 }
 EXPORT_SYMBOL_GPL(resume_device_irqs);
-
-/**
- * check_wakeup_irqs - check if any wake-up interrupts are pending
- */
-int check_wakeup_irqs(void)
-{
-	struct irq_desc *desc;
-	int irq;
-
-	for_each_irq_desc(irq, desc) {
-		/*
-		 * Only interrupts which are marked as wakeup source
-		 * and have not been disabled before the suspend check
-		 * can abort suspend.
-		 */
-		if (irqd_is_wakeup_set(&desc->irq_data)) {
-			if (desc->depth == 1 && desc->istate & IRQS_PENDING)
-				return -EBUSY;
-		}
-	}
-
-	return 0;
-}
-- 
cgit v1.2.3


From 10b3ad8c21bb4b135768c30dd4c51a1c744da699 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 29 Aug 2014 21:07:24 -0700
Subject: net: Do txq_trans_update() in netdev_start_xmit()

That way we don't have to audit every call site to make sure it is
doing this properly.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/dlci.c    |  6 ++++--
 include/linux/netdevice.h | 10 ++++++++--
 net/core/dev.c            |  7 ++-----
 net/core/netpoll.c        |  4 +---
 net/core/pktgen.c         |  3 +--
 net/packet/af_packet.c    |  7 ++-----
 net/sched/sch_teql.c      |  3 +--
 7 files changed, 19 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 81b22a180aad..6427e8283419 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -192,8 +192,10 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dlci_local *dlp = netdev_priv(dev);
 
-	if (skb)
-		netdev_start_xmit(skb, dlp->slave);
+	if (skb) {
+		struct netdev_queue *txq = skb_get_tx_queue(dev, skb);
+		netdev_start_xmit(skb, dlp->slave, txq);
+	}
 	return NETDEV_TX_OK;
 }
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 456eb1fe51e8..16171802ea7d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3437,11 +3437,17 @@ static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
 	return ops->ndo_start_xmit(skb, dev);
 }
 
-static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
+					    struct netdev_queue *txq)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
+	int rc;
 
-	return __netdev_start_xmit(ops, skb, dev);
+	rc = __netdev_start_xmit(ops, skb, dev);
+	if (rc == NETDEV_TX_OK)
+		txq_trans_update(txq);
+
+	return rc;
 }
 
 int netdev_class_create_file_ns(struct class_attribute *class_attr,
diff --git a/net/core/dev.c b/net/core/dev.c
index a6077ef56345..6392adaaa22f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2666,10 +2666,8 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 		skb_len = skb->len;
 		trace_net_dev_start_xmit(skb, dev);
-		rc = netdev_start_xmit(skb, dev);
+		rc = netdev_start_xmit(skb, dev, txq);
 		trace_net_dev_xmit(skb, rc, dev, skb_len);
-		if (rc == NETDEV_TX_OK)
-			txq_trans_update(txq);
 		return rc;
 	}
 
@@ -2685,7 +2683,7 @@ gso:
 
 		skb_len = nskb->len;
 		trace_net_dev_start_xmit(nskb, dev);
-		rc = netdev_start_xmit(nskb, dev);
+		rc = netdev_start_xmit(nskb, dev, txq);
 		trace_net_dev_xmit(nskb, rc, dev, skb_len);
 		if (unlikely(rc != NETDEV_TX_OK)) {
 			if (rc & ~NETDEV_TX_MASK)
@@ -2694,7 +2692,6 @@ gso:
 			skb->next = nskb;
 			return rc;
 		}
-		txq_trans_update(txq);
 		if (unlikely(netif_xmit_stopped(txq) && skb->next))
 			return NETDEV_TX_BUSY;
 	} while (skb->next);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 12b1df976562..05bc57edaa81 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -91,9 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		skb->vlan_tci = 0;
 	}
 
-	status = netdev_start_xmit(skb, dev);
-	if (status == NETDEV_TX_OK)
-		txq_trans_update(txq);
+	status = netdev_start_xmit(skb, dev, txq);
 
 out:
 	return status;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index d81b540096c3..34bd2ff9f121 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3335,11 +3335,10 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		goto unlock;
 	}
 	atomic_inc(&(pkt_dev->skb->users));
-	ret = netdev_start_xmit(pkt_dev->skb, odev);
+	ret = netdev_start_xmit(pkt_dev->skb, odev, txq);
 
 	switch (ret) {
 	case NETDEV_TX_OK:
-		txq_trans_update(txq);
 		pkt_dev->last_ok = 1;
 		pkt_dev->sofar++;
 		pkt_dev->seq_num++;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index b7a7f5a721bd..fe305a05a8fc 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -258,11 +258,8 @@ static int packet_direct_xmit(struct sk_buff *skb)
 	local_bh_disable();
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_xmit_frozen_or_drv_stopped(txq)) {
-		ret = netdev_start_xmit(skb, dev);
-		if (ret == NETDEV_TX_OK)
-			txq_trans_update(txq);
-	}
+	if (!netif_xmit_frozen_or_drv_stopped(txq))
+		ret = netdev_start_xmit(skb, dev, txq);
 	HARD_TX_UNLOCK(dev, txq);
 
 	local_bh_enable();
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 64cd93ca8104..193dc2cba1ec 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -316,8 +316,7 @@ restart:
 				unsigned int length = qdisc_pkt_len(skb);
 
 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
-				    netdev_start_xmit(skb, slave) == NETDEV_TX_OK) {
-					txq_trans_update(slave_txq);
+				    netdev_start_xmit(skb, slave, slave_txq) == NETDEV_TX_OK) {
 					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
-- 
cgit v1.2.3


From fa2dbdc253c2aee2a760c64de454cb62469ec11d Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 29 Aug 2014 21:55:22 -0700
Subject: net: Pass a "more" indication down into netdev_start_xmit() code
 paths.

For now it will always be false.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wan/dlci.c    | 2 +-
 include/linux/netdevice.h | 9 +++++----
 net/atm/mpc.c             | 2 +-
 net/core/dev.c            | 2 +-
 net/core/netpoll.c        | 2 +-
 net/core/pktgen.c         | 2 +-
 net/packet/af_packet.c    | 2 +-
 net/sched/sch_teql.c      | 3 ++-
 8 files changed, 13 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wan/dlci.c b/drivers/net/wan/dlci.c
index 6427e8283419..ae6ecf401189 100644
--- a/drivers/net/wan/dlci.c
+++ b/drivers/net/wan/dlci.c
@@ -194,7 +194,7 @@ static netdev_tx_t dlci_transmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (skb) {
 		struct netdev_queue *txq = skb_get_tx_queue(dev, skb);
-		netdev_start_xmit(skb, dlp->slave, txq);
+		netdev_start_xmit(skb, dlp->slave, txq, false);
 	}
 	return NETDEV_TX_OK;
 }
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 16171802ea7d..5050218c5b7f 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3431,19 +3431,20 @@ int __init dev_proc_init(void);
 #endif
 
 static inline netdev_tx_t __netdev_start_xmit(const struct net_device_ops *ops,
-					      struct sk_buff *skb, struct net_device *dev)
+					      struct sk_buff *skb, struct net_device *dev,
+					      bool more)
 {
-	skb->xmit_more = 0;
+	skb->xmit_more = more ? 1 : 0;
 	return ops->ndo_start_xmit(skb, dev);
 }
 
 static inline netdev_tx_t netdev_start_xmit(struct sk_buff *skb, struct net_device *dev,
-					    struct netdev_queue *txq)
+					    struct netdev_queue *txq, bool more)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 	int rc;
 
-	rc = __netdev_start_xmit(ops, skb, dev);
+	rc = __netdev_start_xmit(ops, skb, dev, more);
 	if (rc == NETDEV_TX_OK)
 		txq_trans_update(txq);
 
diff --git a/net/atm/mpc.c b/net/atm/mpc.c
index d662da161e5a..0e982222d425 100644
--- a/net/atm/mpc.c
+++ b/net/atm/mpc.c
@@ -599,7 +599,7 @@ static netdev_tx_t mpc_send_packet(struct sk_buff *skb,
 	}
 
 non_ip:
-	return __netdev_start_xmit(mpc->old_ops, skb, dev);
+	return __netdev_start_xmit(mpc->old_ops, skb, dev, false);
 }
 
 static int atm_mpoa_vcc_attach(struct atm_vcc *vcc, void __user *arg)
diff --git a/net/core/dev.c b/net/core/dev.c
index ab7bb809711e..f0ed5a611a97 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2610,7 +2610,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
 
 	len = skb->len;
 	trace_net_dev_start_xmit(skb, dev);
-	rc = netdev_start_xmit(skb, dev, txq);
+	rc = netdev_start_xmit(skb, dev, txq, false);
 	trace_net_dev_xmit(skb, rc, dev, len);
 
 	return rc;
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 05bc57edaa81..e6645b4f330a 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -91,7 +91,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 		skb->vlan_tci = 0;
 	}
 
-	status = netdev_start_xmit(skb, dev, txq);
+	status = netdev_start_xmit(skb, dev, txq, false);
 
 out:
 	return status;
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 34bd2ff9f121..5b36a9428c59 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -3335,7 +3335,7 @@ static void pktgen_xmit(struct pktgen_dev *pkt_dev)
 		goto unlock;
 	}
 	atomic_inc(&(pkt_dev->skb->users));
-	ret = netdev_start_xmit(pkt_dev->skb, odev, txq);
+	ret = netdev_start_xmit(pkt_dev->skb, odev, txq, false);
 
 	switch (ret) {
 	case NETDEV_TX_OK:
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index fe305a05a8fc..87d20f48ff06 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -259,7 +259,7 @@ static int packet_direct_xmit(struct sk_buff *skb)
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_drv_stopped(txq))
-		ret = netdev_start_xmit(skb, dev, txq);
+		ret = netdev_start_xmit(skb, dev, txq, false);
 	HARD_TX_UNLOCK(dev, txq);
 
 	local_bh_enable();
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 193dc2cba1ec..aaa8d03ed054 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -316,7 +316,8 @@ restart:
 				unsigned int length = qdisc_pkt_len(skb);
 
 				if (!netif_xmit_frozen_or_stopped(slave_txq) &&
-				    netdev_start_xmit(skb, slave, slave_txq) == NETDEV_TX_OK) {
+				    netdev_start_xmit(skb, slave, slave_txq, false) ==
+				    NETDEV_TX_OK) {
 					__netif_tx_unlock(slave_txq);
 					master->slaves = NEXT_SLAVE(q);
 					netif_wake_queue(dev);
-- 
cgit v1.2.3


From 50cbe9ab5f8d92d2d4a327b56e96559d8f63a1fa Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 30 Aug 2014 19:13:51 -0700
Subject: net: Validate xmit SKBs right when we pull them out of the qdisc.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 1 +
 net/core/dev.c            | 6 +-----
 net/sched/sch_generic.c   | 5 ++++-
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5050218c5b7f..47c49ba2dcf4 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2827,6 +2827,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
+struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
 int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			struct netdev_queue *txq);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 704a5434f77d..75bc5b068a13 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2656,7 +2656,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur
 	return skb;
 }
 
-static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
 {
 	netdev_features_t features;
 
@@ -2719,10 +2719,6 @@ int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 {
 	int rc = NETDEV_TX_OK;
 
-	skb = validate_xmit_skb(skb, dev);
-	if (!skb)
-		return rc;
-
 	if (likely(!skb->next))
 		return xmit_one(skb, dev, txq, false);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 05b3f5d104af..f178798a5836 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -70,8 +70,11 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 		} else
 			skb = NULL;
 	} else {
-		if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq))
+		if (!(q->flags & TCQ_F_ONETXQUEUE) || !netif_xmit_frozen_or_stopped(txq)) {
 			skb = q->dequeue(q);
+			if (skb)
+				skb = validate_xmit_skb(skb, qdisc_dev(q));
+		}
 	}
 
 	return skb;
-- 
cgit v1.2.3


From ce93718fb7cdbc064c3000ff59e4d3200bdfa744 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 30 Aug 2014 19:22:20 -0700
Subject: net: Don't keep around original SKB when we software segment GSO
 frames.

Just maintain the list properly by returning the head of the remaining
SKB list from dev_hard_start_xmit().

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +--
 net/core/dev.c            | 79 +++++++++--------------------------------------
 net/sched/sch_generic.c   |  2 +-
 3 files changed, 17 insertions(+), 68 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 47c49ba2dcf4..202c25a9aadf 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2828,8 +2828,8 @@ int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
 struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq);
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+				    struct netdev_queue *txq, int *ret);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 int dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
 bool is_skb_forwardable(struct net_device *dev, struct sk_buff *skb);
diff --git a/net/core/dev.c b/net/core/dev.c
index 75bc5b068a13..c89da4f306b1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2485,52 +2485,6 @@ static int illegal_highdma(struct net_device *dev, struct sk_buff *skb)
 	return 0;
 }
 
-struct dev_gso_cb {
-	void (*destructor)(struct sk_buff *skb);
-};
-
-#define DEV_GSO_CB(skb) ((struct dev_gso_cb *)(skb)->cb)
-
-static void dev_gso_skb_destructor(struct sk_buff *skb)
-{
-	struct dev_gso_cb *cb;
-
-	kfree_skb_list(skb->next);
-	skb->next = NULL;
-
-	cb = DEV_GSO_CB(skb);
-	if (cb->destructor)
-		cb->destructor(skb);
-}
-
-/**
- *	dev_gso_segment - Perform emulated hardware segmentation on skb.
- *	@skb: buffer to segment
- *	@features: device features as applicable to this skb
- *
- *	This function segments the given skb and stores the list of segments
- *	in skb->next.
- */
-static int dev_gso_segment(struct sk_buff *skb, netdev_features_t features)
-{
-	struct sk_buff *segs;
-
-	segs = skb_gso_segment(skb, features);
-
-	/* Verifying header integrity only. */
-	if (!segs)
-		return 0;
-
-	if (IS_ERR(segs))
-		return PTR_ERR(segs);
-
-	skb->next = segs;
-	DEV_GSO_CB(skb)->destructor = skb->destructor;
-	skb->destructor = dev_gso_skb_destructor;
-
-	return 0;
-}
-
 /* If MPLS offload request, verify we are testing hardware MPLS features
  * instead of standard features for the netdev.
  */
@@ -2682,8 +2636,13 @@ struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
 		features &= dev->hw_enc_features;
 
 	if (netif_needs_gso(skb, features)) {
-		if (unlikely(dev_gso_segment(skb, features)))
-			goto out_kfree_skb;
+		struct sk_buff *segs;
+
+		segs = skb_gso_segment(skb, features);
+		kfree_skb(skb);
+		if (IS_ERR(segs))
+			segs = NULL;
+		skb = segs;
 	} else {
 		if (skb_needs_linearize(skb, features) &&
 		    __skb_linearize(skb))
@@ -2714,26 +2673,16 @@ out_null:
 	return NULL;
 }
 
-int dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			struct netdev_queue *txq)
+struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
+				    struct netdev_queue *txq, int *ret)
 {
-	int rc = NETDEV_TX_OK;
-
-	if (likely(!skb->next))
-		return xmit_one(skb, dev, txq, false);
-
-	skb->next = xmit_list(skb->next, dev, txq, &rc);
-	if (likely(skb->next == NULL)) {
-		skb->destructor = DEV_GSO_CB(skb)->destructor;
-		consume_skb(skb);
-		return rc;
+	if (likely(!skb->next)) {
+		*ret = xmit_one(skb, dev, txq, false);
+		return skb;
 	}
 
-	kfree_skb(skb);
-
-	return rc;
+	return xmit_list(skb, dev, txq, ret);
 }
-EXPORT_SYMBOL_GPL(dev_hard_start_xmit);
 
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
@@ -2945,7 +2894,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 
 			if (!netif_xmit_stopped(txq)) {
 				__this_cpu_inc(xmit_recursion);
-				rc = dev_hard_start_xmit(skb, dev, txq);
+				skb = dev_hard_start_xmit(skb, dev, txq, &rc);
 				__this_cpu_dec(xmit_recursion);
 				if (dev_xmit_complete(rc)) {
 					HARD_TX_UNLOCK(dev, txq);
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index f178798a5836..a8bf9f9928bd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -129,7 +129,7 @@ int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 
 	HARD_TX_LOCK(dev, txq, smp_processor_id());
 	if (!netif_xmit_frozen_or_stopped(txq))
-		ret = dev_hard_start_xmit(skb, dev, txq);
+		skb = dev_hard_start_xmit(skb, dev, txq, &ret);
 
 	HARD_TX_UNLOCK(dev, txq);
 
-- 
cgit v1.2.3


From 5a21232983aa7acfe7fd26170832a9e0a4a7b4ae Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:41 -0700
Subject: net: Support for csum_bad in skbuff

This flag indicates that an invalid checksum was detected in the
packet. __skb_mark_checksum_bad helper function was added to set this.

Checksums can be marked bad from a driver or the GRO path (the latter
is implemented in this patch). csum_bad is checked in
__skb_checksum_validate_complete (i.e. calling that when ip_summed ==
CHECKSUM_NONE).

csum_bad works in conjunction with ip_summed value. In the case that
ip_summed is CHECKSUM_NONE and csum_bad is set, this implies that the
first (or next) checksum encountered in the packet is bad. When
ip_summed is CHECKSUM_UNNECESSARY, the first checksum after the last
one validated is bad. For example, if ip_summed == CHECKSUM_UNNECESSARY,
csum_level == 1, and csum_bad is set-- then the third checksum in the
packet is bad. In the normal path, the packet will be dropped when
processing the protocol layer of the bad checksum:
__skb_decr_checksum_unnecessary called twice for the good checksums
changing ip_summed to CHECKSUM_NONE so that
__skb_checksum_validate_complete is called to validate the third
checksum and that will fail since csum_bad is set.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 +++-
 include/linux/skbuff.h    | 21 ++++++++++++++++++++-
 net/core/dev.c            |  2 +-
 3 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 202c25a9aadf..a0ab6d9d400a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2216,7 +2216,9 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 	if (__skb_gro_checksum_validate_needed(skb, zero_okay, check))	\
 		__ret = __skb_gro_checksum_validate_complete(skb,	\
 				compute_pseudo(skb, proto));		\
-	if (!__ret)							\
+	if (__ret)							\
+		__skb_mark_checksum_bad(skb);				\
+	else								\
 		skb_gro_incr_csum_unnecessary(skb);			\
 	__ret;								\
 })
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index c93b5859a772..23710a243439 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -617,7 +617,8 @@ struct sk_buff {
 
 	kmemcheck_bitfield_begin(flags3);
 	__u8			csum_level:2;
-	/* 14 bit hole */
+	__u8			csum_bad:1;
+	/* 13 bit hole */
 	kmemcheck_bitfield_end(flags3);
 
 	__be16			inner_protocol;
@@ -2825,6 +2826,21 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
 	}
 }
 
+static inline void __skb_mark_checksum_bad(struct sk_buff *skb)
+{
+	/* Mark current checksum as bad (typically called from GRO
+	 * path). In the case that ip_summed is CHECKSUM_NONE
+	 * this must be the first checksum encountered in the packet.
+	 * When ip_summed is CHECKSUM_UNNECESSARY, this is the first
+	 * checksum after the last one validated. For UDP, a zero
+	 * checksum can not be marked as bad.
+	 */
+
+	if (skb->ip_summed == CHECKSUM_NONE ||
+	    skb->ip_summed == CHECKSUM_UNNECESSARY)
+		skb->csum_bad = 1;
+}
+
 /* Check if we need to perform checksum complete validation.
  *
  * Returns true if checksum complete is needed, false otherwise
@@ -2866,6 +2882,9 @@ static inline __sum16 __skb_checksum_validate_complete(struct sk_buff *skb,
 			skb->csum_valid = 1;
 			return 0;
 		}
+	} else if (skb->csum_bad) {
+		/* ip_summed == CHECKSUM_NONE in this case */
+		return 1;
 	}
 
 	skb->csum = psum;
diff --git a/net/core/dev.c b/net/core/dev.c
index 6857d57aa294..3774afc3bebf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -3918,7 +3918,7 @@ static enum gro_result dev_gro_receive(struct napi_struct *napi, struct sk_buff
 	if (!(skb->dev->features & NETIF_F_GRO))
 		goto normal;
 
-	if (skb_is_gso(skb) || skb_has_frag_list(skb))
+	if (skb_is_gso(skb) || skb_has_frag_list(skb) || skb->csum_bad)
 		goto normal;
 
 	gro_list_prepare(napi, skb);
-- 
cgit v1.2.3


From d96535a17dbbafd567961d14c08c0984ddda9c3c Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:42 -0700
Subject: net: Infrastructure for checksum unnecessary conversions

For normal path, added skb_checksum_try_convert which is called
to attempt to convert CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE. The
primary condition to allow this is that ip_summed is CHECKSUM_NONE
and csum_valid is true, which will be the state after consuming
a CHECKSUM_UNNECESSARY.

For GRO path, added skb_gro_checksum_try_convert which is the GRO
analogue of skb_checksum_try_convert. The primary condition to allow
this is that NAPI_GRO_CB(skb)->csum_cnt == 0 and
NAPI_GRO_CB(skb)->csum_valid is set. This implies that we have consumed
all available CHECKSUM_UNNECESSARY checksums in the GRO path.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 ++++++++++++++++++++
 include/linux/skbuff.h    | 20 ++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a0ab6d9d400a..5be20a7bbb0d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2233,6 +2233,26 @@ static inline void skb_gro_incr_csum_unnecessary(struct sk_buff *skb)
 #define skb_gro_checksum_simple_validate(skb)				\
 	__skb_gro_checksum_validate(skb, 0, false, 0, null_compute_pseudo)
 
+static inline bool __skb_gro_checksum_convert_check(struct sk_buff *skb)
+{
+	return (NAPI_GRO_CB(skb)->csum_cnt == 0 &&
+		!NAPI_GRO_CB(skb)->csum_valid);
+}
+
+static inline void __skb_gro_checksum_convert(struct sk_buff *skb,
+					      __sum16 check, __wsum pseudo)
+{
+	NAPI_GRO_CB(skb)->csum = ~pseudo;
+	NAPI_GRO_CB(skb)->csum_valid = 1;
+}
+
+#define skb_gro_checksum_try_convert(skb, proto, check, compute_pseudo)	\
+do {									\
+	if (__skb_gro_checksum_convert_check(skb))			\
+		__skb_gro_checksum_convert(skb, check,			\
+					   compute_pseudo(skb, proto));	\
+} while (0)
+
 static inline int dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 				  unsigned short type,
 				  const void *daddr, const void *saddr,
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 23710a243439..02529fcad1ac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2942,6 +2942,26 @@ static inline __wsum null_compute_pseudo(struct sk_buff *skb, int proto)
 #define skb_checksum_simple_validate(skb)				\
 	__skb_checksum_validate(skb, 0, true, false, 0, null_compute_pseudo)
 
+static inline bool __skb_checksum_convert_check(struct sk_buff *skb)
+{
+	return (skb->ip_summed == CHECKSUM_NONE &&
+		skb->csum_valid && !skb->csum_bad);
+}
+
+static inline void __skb_checksum_convert(struct sk_buff *skb,
+					  __sum16 check, __wsum pseudo)
+{
+	skb->csum = ~pseudo;
+	skb->ip_summed = CHECKSUM_COMPLETE;
+}
+
+#define skb_checksum_try_convert(skb, proto, check, compute_pseudo)	\
+do {									\
+	if (__skb_checksum_convert_check(skb))				\
+		__skb_checksum_convert(skb, check,			\
+				       compute_pseudo(skb, proto));	\
+} while (0)
+
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 void nf_conntrack_destroy(struct nf_conntrack *nfct);
 static inline void nf_conntrack_put(struct nf_conntrack *nfct)
-- 
cgit v1.2.3


From 2abb7cdc0dc84e99b76ef983a1ae1978922aa9b3 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sun, 31 Aug 2014 15:12:43 -0700
Subject: udp: Add support for doing checksum unnecessary conversion

Add support for doing CHECKSUM_UNNECESSARY to CHECKSUM_COMPLETE
conversion in UDP tunneling path.

In the normal UDP path, we call skb_checksum_try_convert after locating
the UDP socket. The check is that checksum conversion is enabled for
the socket (new flag in UDP socket) and that checksum field is
non-zero.

In the UDP GRO path, we call skb_gro_checksum_try_convert after
checksum is validated and checksum field is non-zero. Since this is
already in GRO we assume that checksum conversion is always wanted.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/udp.h    | 16 +++++++++++++++-
 net/ipv4/udp.c         |  4 ++++
 net/ipv4/udp_offload.c | 25 +++++++++++++++++--------
 net/ipv6/udp.c         |  4 ++++
 net/ipv6/udp_offload.c | 24 +++++++++++++++++-------
 5 files changed, 57 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/udp.h b/include/linux/udp.h
index 247cfdcc4b08..ee3277593222 100644
--- a/include/linux/udp.h
+++ b/include/linux/udp.h
@@ -49,7 +49,11 @@ struct udp_sock {
 	unsigned int	 corkflag;	/* Cork is required */
 	__u8		 encap_type;	/* Is this an Encapsulation socket? */
 	unsigned char	 no_check6_tx:1,/* Send zero UDP6 checksums on TX? */
-			 no_check6_rx:1;/* Allow zero UDP6 checksums on RX? */
+			 no_check6_rx:1,/* Allow zero UDP6 checksums on RX? */
+			 convert_csum:1;/* On receive, convert checksum
+					 * unnecessary to checksum complete
+					 * if possible.
+					 */
 	/*
 	 * Following member retains the information to create a UDP header
 	 * when the socket is uncorked.
@@ -98,6 +102,16 @@ static inline bool udp_get_no_check6_rx(struct sock *sk)
 	return udp_sk(sk)->no_check6_rx;
 }
 
+static inline void udp_set_convert_csum(struct sock *sk, bool val)
+{
+	udp_sk(sk)->convert_csum = val;
+}
+
+static inline bool udp_get_convert_csum(struct sock *sk)
+{
+	return udp_sk(sk)->convert_csum;
+}
+
 #define udp_portaddr_for_each_entry(__sk, node, list) \
 	hlist_nulls_for_each_entry(__sk, node, list, __sk_common.skc_portaddr_node)
 
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3549c21fe5f7..0da3849fd35b 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1788,6 +1788,10 @@ int __udp4_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 	if (sk != NULL) {
 		int ret;
 
+		if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+						 inet_compute_pseudo);
+
 		ret = udp_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a6adff98382a..84e0e05c9c0e 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -290,16 +290,25 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
-	/* Don't bother verifying checksum if we're going to flush anyway. */
-	if (unlikely(!uh) ||
-	    (!NAPI_GRO_CB(skb)->flush &&
-	     skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
-						  inet_gro_compute_pseudo))) {
-		NAPI_GRO_CB(skb)->flush = 1;
-		return NULL;
-	}
+	if (unlikely(!uh))
+		goto flush;
 
+	/* Don't bother verifying checksum if we're going to flush anyway. */
+	if (!NAPI_GRO_CB(skb)->flush)
+		goto skip;
+
+	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
+						 inet_gro_compute_pseudo))
+		goto flush;
+	else if (uh->check)
+		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+					     inet_gro_compute_pseudo);
+skip:
 	return udp_gro_receive(head, skb, uh);
+
+flush:
+	NAPI_GRO_CB(skb)->flush = 1;
+	return NULL;
 }
 
 int udp_gro_complete(struct sk_buff *skb, int nhoff)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index 12fcce8fba46..f6ba535b6feb 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -891,6 +891,10 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable,
 			goto csum_error;
 		}
 
+		if (udp_sk(sk)->convert_csum && uh->check && !IS_UDPLITE(sk))
+			skb_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+						 ip6_compute_pseudo);
+
 		ret = udpv6_queue_rcv_skb(sk, skb);
 		sock_put(sk);
 
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index b13e377e9c53..89cb9a9b8537 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -134,16 +134,26 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
 {
 	struct udphdr *uh = udp_gro_udphdr(skb);
 
+	if (unlikely(!uh))
+		goto flush;
+
 	/* Don't bother verifying checksum if we're going to flush anyway. */
-	if (unlikely(!uh) ||
-	    (!NAPI_GRO_CB(skb)->flush &&
-	     skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
-						  ip6_gro_compute_pseudo))) {
-		NAPI_GRO_CB(skb)->flush = 1;
-		return NULL;
-	}
+	if (!NAPI_GRO_CB(skb)->flush)
+		goto skip;
 
+	if (skb_gro_checksum_validate_zero_check(skb, IPPROTO_UDP, uh->check,
+						 ip6_gro_compute_pseudo))
+		goto flush;
+	else if (uh->check)
+		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
+					     ip6_gro_compute_pseudo);
+
+skip:
 	return udp_gro_receive(head, skb, uh);
+
+flush:
+	NAPI_GRO_CB(skb)->flush = 1;
+	return NULL;
 }
 
 int udp6_gro_complete(struct sk_buff *skb, int nhoff)
-- 
cgit v1.2.3


From fbff66108352d19b5cffa7dce26d7638c9dd4d70 Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Thu, 28 Aug 2014 04:43:09 -0700
Subject: security: Silence shadow warning

Renaming an unused formal parameter in the static inline function
security_inode_init_security eliminates many W=2 warnings.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: James Morris <james.l.morris@oracle.com>
---
 include/linux/security.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/security.h b/include/linux/security.h
index 623f90e5f38d..3b3aeb1b74cb 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -2108,7 +2108,7 @@ static inline int security_dentry_init_security(struct dentry *dentry,
 static inline int security_inode_init_security(struct inode *inode,
 						struct inode *dir,
 						const struct qstr *qstr,
-						const initxattrs initxattrs,
+						const initxattrs xattrs,
 						void *fs_data)
 {
 	return 0;
-- 
cgit v1.2.3


From ea2fdf842365066c82ab941086c6a1741ced4f2a Mon Sep 17 00:00:00 2001
From: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Date: Thu, 28 Aug 2014 13:58:53 +0200
Subject: usb: phy: samsung: remove old common USB PHY code

drivers/usb/phy/phy-samsung-usb[2,3] drivers got replaced by
drivers/phy/phy-samsung-usb[2,3] ones and the old common Samsung
USB PHY code is no longer used.

Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Reviewed-by: Vivek Gautam <gautam.vivek@samsung.com>
Reviewed-by: Jingoo Han <jg1.han@samsung.com>
Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Cc: Kamil Debski <k.debski@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy-samsung-usb.c            | 241 --------------------
 drivers/usb/phy/phy-samsung-usb.h            | 317 ---------------------------
 include/linux/platform_data/samsung-usbphy.h |  27 ---
 3 files changed, 585 deletions(-)
 delete mode 100644 drivers/usb/phy/phy-samsung-usb.c
 delete mode 100644 drivers/usb/phy/phy-samsung-usb.h
 delete mode 100644 include/linux/platform_data/samsung-usbphy.h

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy-samsung-usb.c b/drivers/usb/phy/phy-samsung-usb.c
deleted file mode 100644
index ac025ca08425..000000000000
--- a/drivers/usb/phy/phy-samsung-usb.c
+++ /dev/null
@@ -1,241 +0,0 @@
-/* linux/drivers/usb/phy/phy-samsung-usb.c
- *
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *              http://www.samsung.com
- *
- * Author: Praveen Paneri <p.paneri@samsung.com>
- *
- * Samsung USB-PHY helper driver with common function calls;
- * interacts with Samsung USB 2.0 PHY controller driver and later
- * with Samsung USB 3.0 PHY driver.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/clk.h>
-#include <linux/device.h>
-#include <linux/err.h>
-#include <linux/io.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/usb/samsung_usb_phy.h>
-
-#include "phy-samsung-usb.h"
-
-int samsung_usbphy_parse_dt(struct samsung_usbphy *sphy)
-{
-	struct device_node *usbphy_sys;
-
-	/* Getting node for system controller interface for usb-phy */
-	usbphy_sys = of_get_child_by_name(sphy->dev->of_node, "usbphy-sys");
-	if (!usbphy_sys) {
-		dev_err(sphy->dev, "No sys-controller interface for usb-phy\n");
-		return -ENODEV;
-	}
-
-	sphy->pmuregs = of_iomap(usbphy_sys, 0);
-
-	if (sphy->pmuregs == NULL) {
-		dev_err(sphy->dev, "Can't get usb-phy pmu control register\n");
-		goto err0;
-	}
-
-	sphy->sysreg = of_iomap(usbphy_sys, 1);
-
-	/*
-	 * Not returning error code here, since this situation is not fatal.
-	 * Few SoCs may not have this switch available
-	 */
-	if (sphy->sysreg == NULL)
-		dev_warn(sphy->dev, "Can't get usb-phy sysreg cfg register\n");
-
-	of_node_put(usbphy_sys);
-
-	return 0;
-
-err0:
-	of_node_put(usbphy_sys);
-	return -ENXIO;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_parse_dt);
-
-/*
- * Set isolation here for phy.
- * Here 'on = true' would mean USB PHY block is isolated, hence
- * de-activated and vice-versa.
- */
-void samsung_usbphy_set_isolation_4210(struct samsung_usbphy *sphy, bool on)
-{
-	void __iomem *reg = NULL;
-	u32 reg_val;
-	u32 en_mask = 0;
-
-	if (!sphy->pmuregs) {
-		dev_warn(sphy->dev, "Can't set pmu isolation\n");
-		return;
-	}
-
-	if (sphy->phy_type == USB_PHY_TYPE_DEVICE) {
-		reg = sphy->pmuregs + sphy->drv_data->devphy_reg_offset;
-		en_mask = sphy->drv_data->devphy_en_mask;
-	} else if (sphy->phy_type == USB_PHY_TYPE_HOST) {
-		reg = sphy->pmuregs + sphy->drv_data->hostphy_reg_offset;
-		en_mask = sphy->drv_data->hostphy_en_mask;
-	}
-
-	reg_val = readl(reg);
-
-	if (on)
-		reg_val &= ~en_mask;
-	else
-		reg_val |= en_mask;
-
-	writel(reg_val, reg);
-
-	if (sphy->drv_data->cpu_type == TYPE_EXYNOS4X12) {
-		writel(reg_val, sphy->pmuregs + EXYNOS4X12_PHY_HSIC_CTRL0);
-		writel(reg_val, sphy->pmuregs + EXYNOS4X12_PHY_HSIC_CTRL1);
-	}
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_set_isolation_4210);
-
-/*
- * Configure the mode of working of usb-phy here: HOST/DEVICE.
- */
-void samsung_usbphy_cfg_sel(struct samsung_usbphy *sphy)
-{
-	u32 reg;
-
-	if (!sphy->sysreg) {
-		dev_warn(sphy->dev, "Can't configure specified phy mode\n");
-		return;
-	}
-
-	reg = readl(sphy->sysreg);
-
-	if (sphy->phy_type == USB_PHY_TYPE_DEVICE)
-		reg &= ~EXYNOS_USB20PHY_CFG_HOST_LINK;
-	else if (sphy->phy_type == USB_PHY_TYPE_HOST)
-		reg |= EXYNOS_USB20PHY_CFG_HOST_LINK;
-
-	writel(reg, sphy->sysreg);
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_cfg_sel);
-
-/*
- * PHYs are different for USB Device and USB Host.
- * This make sure that correct PHY type is selected before
- * any operation on PHY.
- */
-int samsung_usbphy_set_type(struct usb_phy *phy,
-				enum samsung_usb_phy_type phy_type)
-{
-	struct samsung_usbphy *sphy = phy_to_sphy(phy);
-
-	sphy->phy_type = phy_type;
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_set_type);
-
-int samsung_usbphy_rate_to_clksel_64xx(struct samsung_usbphy *sphy,
-							unsigned long rate)
-{
-	unsigned int clksel;
-
-	switch (rate) {
-	case 12 * MHZ:
-		clksel = PHYCLK_CLKSEL_12M;
-		break;
-	case 24 * MHZ:
-		clksel = PHYCLK_CLKSEL_24M;
-		break;
-	case 48 * MHZ:
-		clksel = PHYCLK_CLKSEL_48M;
-		break;
-	default:
-		dev_err(sphy->dev,
-			"Invalid reference clock frequency: %lu\n", rate);
-		return -EINVAL;
-	}
-
-	return clksel;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_rate_to_clksel_64xx);
-
-int samsung_usbphy_rate_to_clksel_4x12(struct samsung_usbphy *sphy,
-							unsigned long rate)
-{
-	unsigned int clksel;
-
-	switch (rate) {
-	case 9600 * KHZ:
-		clksel = FSEL_CLKSEL_9600K;
-		break;
-	case 10 * MHZ:
-		clksel = FSEL_CLKSEL_10M;
-		break;
-	case 12 * MHZ:
-		clksel = FSEL_CLKSEL_12M;
-		break;
-	case 19200 * KHZ:
-		clksel = FSEL_CLKSEL_19200K;
-		break;
-	case 20 * MHZ:
-		clksel = FSEL_CLKSEL_20M;
-		break;
-	case 24 * MHZ:
-		clksel = FSEL_CLKSEL_24M;
-		break;
-	case 50 * MHZ:
-		clksel = FSEL_CLKSEL_50M;
-		break;
-	default:
-		dev_err(sphy->dev,
-			"Invalid reference clock frequency: %lu\n", rate);
-		return -EINVAL;
-	}
-
-	return clksel;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_rate_to_clksel_4x12);
-
-/*
- * Returns reference clock frequency selection value
- */
-int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy)
-{
-	struct clk *ref_clk;
-	unsigned long rate;
-	int refclk_freq;
-
-	/*
-	 * In exynos5250 USB host and device PHY use
-	 * external crystal clock XXTI
-	 */
-	if (sphy->drv_data->cpu_type == TYPE_EXYNOS5250)
-		ref_clk = clk_get(sphy->dev, "ext_xtal");
-	else
-		ref_clk = clk_get(sphy->dev, "xusbxti");
-	if (IS_ERR(ref_clk)) {
-		dev_err(sphy->dev, "Failed to get reference clock\n");
-		return PTR_ERR(ref_clk);
-	}
-
-	rate = clk_get_rate(ref_clk);
-	refclk_freq = sphy->drv_data->rate_to_clksel(sphy, rate);
-
-	clk_put(ref_clk);
-
-	return refclk_freq;
-}
-EXPORT_SYMBOL_GPL(samsung_usbphy_get_refclk_freq);
diff --git a/drivers/usb/phy/phy-samsung-usb.h b/drivers/usb/phy/phy-samsung-usb.h
deleted file mode 100644
index 4eef45555971..000000000000
--- a/drivers/usb/phy/phy-samsung-usb.h
+++ /dev/null
@@ -1,317 +0,0 @@
-/* linux/drivers/usb/phy/phy-samsung-usb.h
- *
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *              http://www.samsung.com
- *
- * Samsung USB-PHY transceiver; talks to S3C HS OTG controller, EHCI-S5P and
- * OHCI-EXYNOS controllers.
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#include <linux/usb/phy.h>
-
-/* Register definitions */
-
-#define SAMSUNG_PHYPWR				(0x00)
-#define PHYPWR_NORMAL_MASK			(0x19 << 0)
-#define PHYPWR_OTG_DISABLE			(0x1 << 4)
-#define PHYPWR_ANALOG_POWERDOWN			(0x1 << 3)
-#define PHYPWR_FORCE_SUSPEND			(0x1 << 1)
-/* For Exynos4 */
-#define PHYPWR_NORMAL_MASK_PHY0			(0x39 << 0)
-#define PHYPWR_SLEEP_PHY0			(0x1 << 5)
-
-#define SAMSUNG_PHYCLK				(0x04)
-#define PHYCLK_MODE_USB11			(0x1 << 6)
-#define PHYCLK_EXT_OSC				(0x1 << 5)
-#define PHYCLK_COMMON_ON_N			(0x1 << 4)
-#define PHYCLK_ID_PULL				(0x1 << 2)
-#define PHYCLK_CLKSEL_MASK			(0x3 << 0)
-#define PHYCLK_CLKSEL_48M			(0x0 << 0)
-#define PHYCLK_CLKSEL_12M			(0x2 << 0)
-#define PHYCLK_CLKSEL_24M			(0x3 << 0)
-
-#define SAMSUNG_RSTCON				(0x08)
-#define RSTCON_PHYLINK_SWRST			(0x1 << 2)
-#define RSTCON_HLINK_SWRST			(0x1 << 1)
-#define RSTCON_SWRST				(0x1 << 0)
-
-/* EXYNOS4X12 */
-#define EXYNOS4X12_PHY_HSIC_CTRL0		(0x04)
-#define EXYNOS4X12_PHY_HSIC_CTRL1		(0x08)
-#define PHYPWR_NORMAL_MASK_HSIC1		(0x7 << 12)
-#define PHYPWR_NORMAL_MASK_HSIC0		(0x7 << 9)
-#define PHYPWR_NORMAL_MASK_PHY1			(0x7 << 6)
-#define RSTCON_HOSTPHY_SWRST			(0xf << 3)
-
-/* EXYNOS5 */
-#define EXYNOS5_PHY_HOST_CTRL0			(0x00)
-#define HOST_CTRL0_PHYSWRSTALL			(0x1 << 31)
-#define HOST_CTRL0_REFCLKSEL_MASK		(0x3 << 19)
-#define HOST_CTRL0_REFCLKSEL_XTAL		(0x0 << 19)
-#define HOST_CTRL0_REFCLKSEL_EXTL		(0x1 << 19)
-#define HOST_CTRL0_REFCLKSEL_CLKCORE		(0x2 << 19)
-#define HOST_CTRL0_FSEL_MASK			(0x7 << 16)
-#define HOST_CTRL0_FSEL(_x)			((_x) << 16)
-#define FSEL_CLKSEL_50M				(0x7)
-#define FSEL_CLKSEL_24M				(0x5)
-#define FSEL_CLKSEL_20M				(0x4)
-#define FSEL_CLKSEL_19200K			(0x3)
-#define FSEL_CLKSEL_12M				(0x2)
-#define FSEL_CLKSEL_10M				(0x1)
-#define FSEL_CLKSEL_9600K			(0x0)
-#define HOST_CTRL0_TESTBURNIN			(0x1 << 11)
-#define HOST_CTRL0_RETENABLE			(0x1 << 10)
-#define HOST_CTRL0_COMMONON_N			(0x1 << 9)
-#define HOST_CTRL0_SIDDQ			(0x1 << 6)
-#define HOST_CTRL0_FORCESLEEP			(0x1 << 5)
-#define HOST_CTRL0_FORCESUSPEND			(0x1 << 4)
-#define HOST_CTRL0_WORDINTERFACE		(0x1 << 3)
-#define HOST_CTRL0_UTMISWRST			(0x1 << 2)
-#define HOST_CTRL0_LINKSWRST			(0x1 << 1)
-#define HOST_CTRL0_PHYSWRST			(0x1 << 0)
-
-#define EXYNOS5_PHY_HOST_TUNE0			(0x04)
-
-#define EXYNOS5_PHY_HSIC_CTRL1			(0x10)
-
-#define EXYNOS5_PHY_HSIC_TUNE1			(0x14)
-
-#define EXYNOS5_PHY_HSIC_CTRL2			(0x20)
-
-#define EXYNOS5_PHY_HSIC_TUNE2			(0x24)
-#define HSIC_CTRL_REFCLKSEL_MASK		(0x3 << 23)
-#define HSIC_CTRL_REFCLKSEL			(0x2 << 23)
-#define HSIC_CTRL_REFCLKDIV_MASK		(0x7f << 16)
-#define HSIC_CTRL_REFCLKDIV(_x)			((_x) << 16)
-#define HSIC_CTRL_REFCLKDIV_12			(0x24 << 16)
-#define HSIC_CTRL_REFCLKDIV_15			(0x1c << 16)
-#define HSIC_CTRL_REFCLKDIV_16			(0x1a << 16)
-#define HSIC_CTRL_REFCLKDIV_19_2		(0x15 << 16)
-#define HSIC_CTRL_REFCLKDIV_20			(0x14 << 16)
-#define HSIC_CTRL_SIDDQ				(0x1 << 6)
-#define HSIC_CTRL_FORCESLEEP			(0x1 << 5)
-#define HSIC_CTRL_FORCESUSPEND			(0x1 << 4)
-#define HSIC_CTRL_WORDINTERFACE			(0x1 << 3)
-#define HSIC_CTRL_UTMISWRST			(0x1 << 2)
-#define HSIC_CTRL_PHYSWRST			(0x1 << 0)
-
-#define EXYNOS5_PHY_HOST_EHCICTRL		(0x30)
-#define HOST_EHCICTRL_ENAINCRXALIGN		(0x1 << 29)
-#define HOST_EHCICTRL_ENAINCR4			(0x1 << 28)
-#define HOST_EHCICTRL_ENAINCR8			(0x1 << 27)
-#define HOST_EHCICTRL_ENAINCR16			(0x1 << 26)
-
-#define EXYNOS5_PHY_HOST_OHCICTRL		(0x34)
-#define HOST_OHCICTRL_SUSPLGCY			(0x1 << 3)
-#define HOST_OHCICTRL_APPSTARTCLK		(0x1 << 2)
-#define HOST_OHCICTRL_CNTSEL			(0x1 << 1)
-#define HOST_OHCICTRL_CLKCKTRST			(0x1 << 0)
-
-#define EXYNOS5_PHY_OTG_SYS			(0x38)
-#define OTG_SYS_PHYLINK_SWRESET			(0x1 << 14)
-#define OTG_SYS_LINKSWRST_UOTG			(0x1 << 13)
-#define OTG_SYS_PHY0_SWRST			(0x1 << 12)
-#define OTG_SYS_REFCLKSEL_MASK			(0x3 << 9)
-#define OTG_SYS_REFCLKSEL_XTAL			(0x0 << 9)
-#define OTG_SYS_REFCLKSEL_EXTL			(0x1 << 9)
-#define OTG_SYS_REFCLKSEL_CLKCORE		(0x2 << 9)
-#define OTG_SYS_IDPULLUP_UOTG			(0x1 << 8)
-#define OTG_SYS_COMMON_ON			(0x1 << 7)
-#define OTG_SYS_FSEL_MASK			(0x7 << 4)
-#define OTG_SYS_FSEL(_x)			((_x) << 4)
-#define OTG_SYS_FORCESLEEP			(0x1 << 3)
-#define OTG_SYS_OTGDISABLE			(0x1 << 2)
-#define OTG_SYS_SIDDQ_UOTG			(0x1 << 1)
-#define OTG_SYS_FORCESUSPEND			(0x1 << 0)
-
-#define EXYNOS5_PHY_OTG_TUNE			(0x40)
-
-/* EXYNOS5: USB 3.0 DRD */
-#define EXYNOS5_DRD_LINKSYSTEM			(0x04)
-#define LINKSYSTEM_FLADJ_MASK			(0x3f << 1)
-#define LINKSYSTEM_FLADJ(_x)			((_x) << 1)
-#define LINKSYSTEM_XHCI_VERSION_CONTROL		(0x1 << 27)
-
-#define EXYNOS5_DRD_PHYUTMI			(0x08)
-#define PHYUTMI_OTGDISABLE			(0x1 << 6)
-#define PHYUTMI_FORCESUSPEND			(0x1 << 1)
-#define PHYUTMI_FORCESLEEP			(0x1 << 0)
-
-#define EXYNOS5_DRD_PHYPIPE			(0x0c)
-
-#define EXYNOS5_DRD_PHYCLKRST			(0x10)
-#define PHYCLKRST_SSC_REFCLKSEL_MASK		(0xff << 23)
-#define PHYCLKRST_SSC_REFCLKSEL(_x)		((_x) << 23)
-#define PHYCLKRST_SSC_RANGE_MASK		(0x03 << 21)
-#define PHYCLKRST_SSC_RANGE(_x)			((_x) << 21)
-#define PHYCLKRST_SSC_EN			(0x1 << 20)
-#define PHYCLKRST_REF_SSP_EN			(0x1 << 19)
-#define PHYCLKRST_REF_CLKDIV2			(0x1 << 18)
-#define PHYCLKRST_MPLL_MULTIPLIER_MASK		(0x7f << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_100MHZ_REF	(0x19 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_50M_REF	(0x02 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_24MHZ_REF	(0x68 << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_20MHZ_REF	(0x7d << 11)
-#define PHYCLKRST_MPLL_MULTIPLIER_19200KHZ_REF	(0x02 << 11)
-#define PHYCLKRST_FSEL_MASK			(0x3f << 5)
-#define PHYCLKRST_FSEL(_x)			((_x) << 5)
-#define PHYCLKRST_FSEL_PAD_100MHZ		(0x27 << 5)
-#define PHYCLKRST_FSEL_PAD_24MHZ		(0x2a << 5)
-#define PHYCLKRST_FSEL_PAD_20MHZ		(0x31 << 5)
-#define PHYCLKRST_FSEL_PAD_19_2MHZ		(0x38 << 5)
-#define PHYCLKRST_RETENABLEN			(0x1 << 4)
-#define PHYCLKRST_REFCLKSEL_MASK		(0x03 << 2)
-#define PHYCLKRST_REFCLKSEL_PAD_REFCLK		(0x2 << 2)
-#define PHYCLKRST_REFCLKSEL_EXT_REFCLK		(0x3 << 2)
-#define PHYCLKRST_PORTRESET			(0x1 << 1)
-#define PHYCLKRST_COMMONONN			(0x1 << 0)
-
-#define EXYNOS5_DRD_PHYREG0			(0x14)
-
-#define EXYNOS5_DRD_PHYREG1			(0x18)
-
-#define EXYNOS5_DRD_PHYPARAM0			(0x1c)
-#define PHYPARAM0_REF_USE_PAD			(0x1 << 31)
-#define PHYPARAM0_REF_LOSLEVEL_MASK		(0x1f << 26)
-#define PHYPARAM0_REF_LOSLEVEL			(0x9 << 26)
-
-#define EXYNOS5_DRD_PHYPARAM1			(0x20)
-#define PHYPARAM1_PCS_TXDEEMPH_MASK		(0x1f << 0)
-#define PHYPARAM1_PCS_TXDEEMPH			(0x1c)
-
-#define EXYNOS5_DRD_PHYTERM			(0x24)
-
-#define EXYNOS5_DRD_PHYTEST			(0x28)
-#define PHYTEST_POWERDOWN_SSP			(0x1 << 3)
-#define PHYTEST_POWERDOWN_HSP			(0x1 << 2)
-
-#define EXYNOS5_DRD_PHYADP			(0x2c)
-
-#define EXYNOS5_DRD_PHYBATCHG			(0x30)
-#define PHYBATCHG_UTMI_CLKSEL			(0x1 << 2)
-
-#define EXYNOS5_DRD_PHYRESUME			(0x34)
-
-#define EXYNOS5_DRD_LINKPORT			(0x44)
-
-#ifndef MHZ
-#define MHZ (1000*1000)
-#endif
-
-#ifndef KHZ
-#define KHZ (1000)
-#endif
-
-#define EXYNOS_USBHOST_PHY_CTRL_OFFSET		(0x4)
-#define S3C64XX_USBPHY_ENABLE			(0x1 << 16)
-#define EXYNOS_USBPHY_ENABLE			(0x1 << 0)
-#define EXYNOS_USB20PHY_CFG_HOST_LINK		(0x1 << 0)
-
-enum samsung_cpu_type {
-	TYPE_S3C64XX,
-	TYPE_EXYNOS4210,
-	TYPE_EXYNOS4X12,
-	TYPE_EXYNOS5250,
-};
-
-struct samsung_usbphy;
-
-/*
- * struct samsung_usbphy_drvdata - driver data for various SoC variants
- * @cpu_type: machine identifier
- * @devphy_en_mask: device phy enable mask for PHY CONTROL register
- * @hostphy_en_mask: host phy enable mask for PHY CONTROL register
- * @devphy_reg_offset: offset to DEVICE PHY CONTROL register from
- *		       mapped address of system controller.
- * @hostphy_reg_offset: offset to HOST PHY CONTROL register from
- *		       mapped address of system controller.
- *
- *	Here we have a separate mask for device type phy.
- *	Having different masks for host and device type phy helps
- *	in setting independent masks in case of SoCs like S5PV210,
- *	in which PHY0 and PHY1 enable bits belong to same register
- *	placed at position 0 and 1 respectively.
- *	Although for newer SoCs like exynos these bits belong to
- *	different registers altogether placed at position 0.
- */
-struct samsung_usbphy_drvdata {
-	int cpu_type;
-	int devphy_en_mask;
-	int hostphy_en_mask;
-	u32 devphy_reg_offset;
-	u32 hostphy_reg_offset;
-	int (*rate_to_clksel)(struct samsung_usbphy *, unsigned long);
-	void (*set_isolation)(struct samsung_usbphy *, bool);
-	void (*phy_enable)(struct samsung_usbphy *);
-	void (*phy_disable)(struct samsung_usbphy *);
-};
-
-/*
- * struct samsung_usbphy - transceiver driver state
- * @phy: transceiver structure
- * @plat: platform data
- * @dev: The parent device supplied to the probe function
- * @clk: usb phy clock
- * @regs: usb phy controller registers memory base
- * @pmuregs: USB device PHY_CONTROL register memory base
- * @sysreg: USB2.0 PHY_CFG register memory base
- * @ref_clk_freq: reference clock frequency selection
- * @drv_data: driver data available for different SoCs
- * @phy_type: Samsung SoCs specific phy types:	#HOST
- *						#DEVICE
- * @phy_usage: usage count for phy
- * @lock: lock for phy operations
- */
-struct samsung_usbphy {
-	struct usb_phy	phy;
-	struct samsung_usbphy_data *plat;
-	struct device	*dev;
-	struct clk	*clk;
-	void __iomem	*regs;
-	void __iomem	*pmuregs;
-	void __iomem	*sysreg;
-	int		ref_clk_freq;
-	const struct samsung_usbphy_drvdata *drv_data;
-	enum samsung_usb_phy_type phy_type;
-	atomic_t	phy_usage;
-	spinlock_t	lock;
-};
-
-#define phy_to_sphy(x)		container_of((x), struct samsung_usbphy, phy)
-
-static const struct of_device_id samsung_usbphy_dt_match[];
-
-static inline const struct samsung_usbphy_drvdata
-*samsung_usbphy_get_driver_data(struct platform_device *pdev)
-{
-	if (pdev->dev.of_node) {
-		const struct of_device_id *match;
-		match = of_match_node(samsung_usbphy_dt_match,
-							pdev->dev.of_node);
-		return match->data;
-	}
-
-	return (struct samsung_usbphy_drvdata *)
-				platform_get_device_id(pdev)->driver_data;
-}
-
-extern int samsung_usbphy_parse_dt(struct samsung_usbphy *sphy);
-extern void samsung_usbphy_set_isolation_4210(struct samsung_usbphy *sphy,
-								bool on);
-extern void samsung_usbphy_cfg_sel(struct samsung_usbphy *sphy);
-extern int samsung_usbphy_set_type(struct usb_phy *phy,
-					enum samsung_usb_phy_type phy_type);
-extern int samsung_usbphy_get_refclk_freq(struct samsung_usbphy *sphy);
-extern int samsung_usbphy_rate_to_clksel_64xx(struct samsung_usbphy *sphy,
-							unsigned long rate);
-extern int samsung_usbphy_rate_to_clksel_4x12(struct samsung_usbphy *sphy,
-							unsigned long rate);
diff --git a/include/linux/platform_data/samsung-usbphy.h b/include/linux/platform_data/samsung-usbphy.h
deleted file mode 100644
index 1bd24cba982b..000000000000
--- a/include/linux/platform_data/samsung-usbphy.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Copyright (C) 2012 Samsung Electronics Co.Ltd
- *		http://www.samsung.com/
- * Author: Praveen Paneri <p.paneri@samsung.com>
- *
- * Defines platform data for samsung usb phy driver.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#ifndef __SAMSUNG_USBPHY_PLATFORM_H
-#define __SAMSUNG_USBPHY_PLATFORM_H
-
-/**
- * samsung_usbphy_data - Platform data for USB PHY driver.
- * @pmu_isolation: Function to control usb phy isolation in PMU.
- */
-struct samsung_usbphy_data {
-	void (*pmu_isolation)(int on);
-};
-
-extern void samsung_usbphy_set_pdata(struct samsung_usbphy_data *pd);
-
-#endif /* __SAMSUNG_USBPHY_PLATFORM_H */
-- 
cgit v1.2.3


From 5835d96e9ce4efdba8c6cefffc2f1575925456de Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Sep 2014 14:46:04 -0400
Subject: percpu: implement [__]alloc_percpu_gfp()

Now that pcpu_alloc_area() can allocate only from populated areas,
it's easy to add atomic allocation support to [__]alloc_percpu().
Update pcpu_alloc() so that it accepts @gfp and skips all the blocking
operations and allocates only from the populated areas if @gfp doesn't
contain GFP_KERNEL.  New interface functions [__]alloc_percpu_gfp()
are added.

While this means that atomic allocations are possible, this isn't
complete yet as there's no mechanism to ensure that certain amount of
populated areas is kept available and atomic allocations may keep
failing under certain conditions.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h |  9 +++++--
 mm/percpu.c            | 64 ++++++++++++++++++++++++++++++--------------------
 2 files changed, 46 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index 6f61b61b7996..d1b416da25ed 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -122,11 +122,16 @@ extern void __init setup_per_cpu_areas(void);
 #endif
 extern void __init percpu_init_late(void);
 
+extern void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp);
 extern void __percpu *__alloc_percpu(size_t size, size_t align);
 extern void free_percpu(void __percpu *__pdata);
 extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 
-#define alloc_percpu(type)	\
-	(typeof(type) __percpu *)__alloc_percpu(sizeof(type), __alignof__(type))
+#define alloc_percpu_gfp(type, gfp)					\
+	(typeof(type) __percpu *)__alloc_percpu_gfp(sizeof(type),	\
+						__alignof__(type), gfp)
+#define alloc_percpu(type)						\
+	(typeof(type) __percpu *)__alloc_percpu(sizeof(type),		\
+						__alignof__(type))
 
 #endif /* __LINUX_PERCPU_H */
diff --git a/mm/percpu.c b/mm/percpu.c
index 577d84fb3002..c52b93117dc2 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -151,11 +151,6 @@ static struct pcpu_chunk *pcpu_first_chunk;
 static struct pcpu_chunk *pcpu_reserved_chunk;
 static int pcpu_reserved_chunk_limit;
 
-/*
- * Free path accesses and alters only the index data structures and can be
- * safely called from atomic context.  When memory needs to be returned to
- * the system, free path schedules reclaim_work.
- */
 static DEFINE_SPINLOCK(pcpu_lock);	/* all internal data structures */
 static DEFINE_MUTEX(pcpu_alloc_mutex);	/* chunk create/destroy, [de]pop */
 
@@ -727,20 +722,21 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
  * @size: size of area to allocate in bytes
  * @align: alignment of area (max PAGE_SIZE)
  * @reserved: allocate from the reserved chunk if available
+ * @gfp: allocation flags
  *
- * Allocate percpu area of @size bytes aligned at @align.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * Allocate percpu area of @size bytes aligned at @align.  If @gfp doesn't
+ * contain %GFP_KERNEL, the allocation is atomic.
  *
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
-static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
+static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
+				 gfp_t gfp)
 {
 	static int warn_limit = 10;
 	struct pcpu_chunk *chunk;
 	const char *err;
+	bool is_atomic = !(gfp & GFP_KERNEL);
 	int slot, off, new_alloc, cpu, ret;
 	unsigned long flags;
 	void __percpu *ptr;
@@ -773,14 +769,15 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved)
 
 		while ((new_alloc = pcpu_need_to_extend(chunk))) {
 			spin_unlock_irqrestore(&pcpu_lock, flags);
-			if (pcpu_extend_area_map(chunk, new_alloc) < 0) {
+			if (is_atomic ||
+			    pcpu_extend_area_map(chunk, new_alloc) < 0) {
 				err = "failed to extend area map of reserved chunk";
 				goto fail;
 			}
 			spin_lock_irqsave(&pcpu_lock, flags);
 		}
 
-		off = pcpu_alloc_area(chunk, size, align, false);
+		off = pcpu_alloc_area(chunk, size, align, is_atomic);
 		if (off >= 0)
 			goto area_found;
 
@@ -797,6 +794,8 @@ restart:
 
 			new_alloc = pcpu_need_to_extend(chunk);
 			if (new_alloc) {
+				if (is_atomic)
+					continue;
 				spin_unlock_irqrestore(&pcpu_lock, flags);
 				if (pcpu_extend_area_map(chunk,
 							 new_alloc) < 0) {
@@ -811,7 +810,7 @@ restart:
 				goto restart;
 			}
 
-			off = pcpu_alloc_area(chunk, size, align, false);
+			off = pcpu_alloc_area(chunk, size, align, is_atomic);
 			if (off >= 0)
 				goto area_found;
 		}
@@ -824,6 +823,9 @@ restart:
 	 * tasks to create chunks simultaneously.  Serialize and create iff
 	 * there's still no empty chunk after grabbing the mutex.
 	 */
+	if (is_atomic)
+		goto fail;
+
 	mutex_lock(&pcpu_alloc_mutex);
 
 	if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
@@ -846,7 +848,7 @@ area_found:
 	spin_unlock_irqrestore(&pcpu_lock, flags);
 
 	/* populate if not all pages are already there */
-	if (true) {
+	if (!is_atomic) {
 		int page_start, page_end, rs, re;
 
 		mutex_lock(&pcpu_alloc_mutex);
@@ -884,9 +886,9 @@ area_found:
 fail_unlock:
 	spin_unlock_irqrestore(&pcpu_lock, flags);
 fail:
-	if (warn_limit) {
-		pr_warning("PERCPU: allocation failed, size=%zu align=%zu, "
-			   "%s\n", size, align, err);
+	if (!is_atomic && warn_limit) {
+		pr_warning("PERCPU: allocation failed, size=%zu align=%zu atomic=%d, %s\n",
+			   size, align, is_atomic, err);
 		dump_stack();
 		if (!--warn_limit)
 			pr_info("PERCPU: limit reached, disable warning\n");
@@ -895,22 +897,34 @@ fail:
 }
 
 /**
- * __alloc_percpu - allocate dynamic percpu area
+ * __alloc_percpu_gfp - allocate dynamic percpu area
  * @size: size of area to allocate in bytes
  * @align: alignment of area (max PAGE_SIZE)
+ * @gfp: allocation flags
  *
- * Allocate zero-filled percpu area of @size bytes aligned at @align.
- * Might sleep.  Might trigger writeouts.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * Allocate zero-filled percpu area of @size bytes aligned at @align.  If
+ * @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can
+ * be called from any context but is a lot more likely to fail.
  *
  * RETURNS:
  * Percpu pointer to the allocated area on success, NULL on failure.
  */
+void __percpu *__alloc_percpu_gfp(size_t size, size_t align, gfp_t gfp)
+{
+	return pcpu_alloc(size, align, false, gfp);
+}
+EXPORT_SYMBOL_GPL(__alloc_percpu_gfp);
+
+/**
+ * __alloc_percpu - allocate dynamic percpu area
+ * @size: size of area to allocate in bytes
+ * @align: alignment of area (max PAGE_SIZE)
+ *
+ * Equivalent to __alloc_percpu_gfp(size, align, %GFP_KERNEL).
+ */
 void __percpu *__alloc_percpu(size_t size, size_t align)
 {
-	return pcpu_alloc(size, align, false);
+	return pcpu_alloc(size, align, false, GFP_KERNEL);
 }
 EXPORT_SYMBOL_GPL(__alloc_percpu);
 
@@ -932,7 +946,7 @@ EXPORT_SYMBOL_GPL(__alloc_percpu);
  */
 void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
 {
-	return pcpu_alloc(size, align, true);
+	return pcpu_alloc(size, align, true, GFP_KERNEL);
 }
 
 /**
-- 
cgit v1.2.3


From 1a4d76076cda69b0abf15463a8cebc172406da25 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Sep 2014 14:46:05 -0400
Subject: percpu: implement asynchronous chunk population

The percpu allocator now supports atomic allocations by only
allocating from already populated areas but the mechanism to ensure
that there's adequate amount of populated areas was missing.

This patch expands pcpu_balance_work so that in addition to freeing
excess free chunks it also populates chunks to maintain an adequate
level of populated areas.  pcpu_alloc() schedules pcpu_balance_work if
the amount of free populated areas is too low or after an atomic
allocation failure.

* PERPCU_DYNAMIC_RESERVE is increased by two pages to account for
  PCPU_EMPTY_POP_PAGES_LOW.

* pcpu_async_enabled is added to gate both async jobs -
  chunk->map_extend_work and pcpu_balance_work - so that we don't end
  up scheduling them while the needed subsystems aren't up yet.

Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu.h |   4 +-
 mm/percpu.c            | 117 +++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 115 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index d1b416da25ed..a3aa63e47637 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -48,9 +48,9 @@
  * intelligent way to determine this would be nice.
  */
 #if BITS_PER_LONG > 32
-#define PERCPU_DYNAMIC_RESERVE		(20 << 10)
+#define PERCPU_DYNAMIC_RESERVE		(28 << 10)
 #else
-#define PERCPU_DYNAMIC_RESERVE		(12 << 10)
+#define PERCPU_DYNAMIC_RESERVE		(20 << 10)
 #endif
 
 extern void *pcpu_base_addr;
diff --git a/mm/percpu.c b/mm/percpu.c
index 28a830590b4c..867efd38d879 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -78,6 +78,8 @@
 #define PCPU_DFL_MAP_ALLOC		16	/* start a map with 16 ents */
 #define PCPU_ATOMIC_MAP_MARGIN_LOW	32
 #define PCPU_ATOMIC_MAP_MARGIN_HIGH	64
+#define PCPU_EMPTY_POP_PAGES_LOW	2
+#define PCPU_EMPTY_POP_PAGES_HIGH	4
 
 #ifdef CONFIG_SMP
 /* default addr <-> pcpu_ptr mapping, override in asm/percpu.h if necessary */
@@ -168,9 +170,22 @@ static struct list_head *pcpu_slot __read_mostly; /* chunk list slots */
  */
 static int pcpu_nr_empty_pop_pages;
 
-/* balance work is used to populate or destroy chunks asynchronously */
+/*
+ * Balance work is used to populate or destroy chunks asynchronously.  We
+ * try to keep the number of populated free pages between
+ * PCPU_EMPTY_POP_PAGES_LOW and HIGH for atomic allocations and at most one
+ * empty chunk.
+ */
 static void pcpu_balance_workfn(struct work_struct *work);
 static DECLARE_WORK(pcpu_balance_work, pcpu_balance_workfn);
+static bool pcpu_async_enabled __read_mostly;
+static bool pcpu_atomic_alloc_failed;
+
+static void pcpu_schedule_balance_work(void)
+{
+	if (pcpu_async_enabled)
+		schedule_work(&pcpu_balance_work);
+}
 
 static bool pcpu_addr_in_first_chunk(void *addr)
 {
@@ -386,7 +401,8 @@ static int pcpu_need_to_extend(struct pcpu_chunk *chunk, bool is_atomic)
 		margin = 3;
 
 		if (chunk->map_alloc <
-		    chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW)
+		    chunk->map_used + PCPU_ATOMIC_MAP_MARGIN_LOW &&
+		    pcpu_async_enabled)
 			schedule_work(&chunk->map_extend_work);
 	} else {
 		margin = PCPU_ATOMIC_MAP_MARGIN_HIGH;
@@ -1005,6 +1021,9 @@ area_found:
 	if (chunk != pcpu_reserved_chunk)
 		pcpu_nr_empty_pop_pages -= occ_pages;
 
+	if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW)
+		pcpu_schedule_balance_work();
+
 	/* clear the areas and return address relative to base address */
 	for_each_possible_cpu(cpu)
 		memset((void *)pcpu_chunk_addr(chunk, cpu, 0) + off, 0, size);
@@ -1023,6 +1042,11 @@ fail:
 		if (!--warn_limit)
 			pr_info("PERCPU: limit reached, disable warning\n");
 	}
+	if (is_atomic) {
+		/* see the flag handling in pcpu_blance_workfn() */
+		pcpu_atomic_alloc_failed = true;
+		pcpu_schedule_balance_work();
+	}
 	return NULL;
 }
 
@@ -1080,7 +1104,7 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
 }
 
 /**
- * pcpu_balance_workfn - reclaim fully free chunks, workqueue function
+ * pcpu_balance_workfn - manage the amount of free chunks and populated pages
  * @work: unused
  *
  * Reclaim all fully free chunks except for the first one.
@@ -1090,7 +1114,12 @@ static void pcpu_balance_workfn(struct work_struct *work)
 	LIST_HEAD(to_free);
 	struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
 	struct pcpu_chunk *chunk, *next;
+	int slot, nr_to_pop, ret;
 
+	/*
+	 * There's no reason to keep around multiple unused chunks and VM
+	 * areas can be scarce.  Destroy all free chunks except for one.
+	 */
 	mutex_lock(&pcpu_alloc_mutex);
 	spin_lock_irq(&pcpu_lock);
 
@@ -1118,6 +1147,74 @@ static void pcpu_balance_workfn(struct work_struct *work)
 		pcpu_destroy_chunk(chunk);
 	}
 
+	/*
+	 * Ensure there are certain number of free populated pages for
+	 * atomic allocs.  Fill up from the most packed so that atomic
+	 * allocs don't increase fragmentation.  If atomic allocation
+	 * failed previously, always populate the maximum amount.  This
+	 * should prevent atomic allocs larger than PAGE_SIZE from keeping
+	 * failing indefinitely; however, large atomic allocs are not
+	 * something we support properly and can be highly unreliable and
+	 * inefficient.
+	 */
+retry_pop:
+	if (pcpu_atomic_alloc_failed) {
+		nr_to_pop = PCPU_EMPTY_POP_PAGES_HIGH;
+		/* best effort anyway, don't worry about synchronization */
+		pcpu_atomic_alloc_failed = false;
+	} else {
+		nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH -
+				  pcpu_nr_empty_pop_pages,
+				  0, PCPU_EMPTY_POP_PAGES_HIGH);
+	}
+
+	for (slot = pcpu_size_to_slot(PAGE_SIZE); slot < pcpu_nr_slots; slot++) {
+		int nr_unpop = 0, rs, re;
+
+		if (!nr_to_pop)
+			break;
+
+		spin_lock_irq(&pcpu_lock);
+		list_for_each_entry(chunk, &pcpu_slot[slot], list) {
+			nr_unpop = pcpu_unit_pages - chunk->nr_populated;
+			if (nr_unpop)
+				break;
+		}
+		spin_unlock_irq(&pcpu_lock);
+
+		if (!nr_unpop)
+			continue;
+
+		/* @chunk can't go away while pcpu_alloc_mutex is held */
+		pcpu_for_each_unpop_region(chunk, rs, re, 0, pcpu_unit_pages) {
+			int nr = min(re - rs, nr_to_pop);
+
+			ret = pcpu_populate_chunk(chunk, rs, rs + nr);
+			if (!ret) {
+				nr_to_pop -= nr;
+				spin_lock_irq(&pcpu_lock);
+				pcpu_chunk_populated(chunk, rs, rs + nr);
+				spin_unlock_irq(&pcpu_lock);
+			} else {
+				nr_to_pop = 0;
+			}
+
+			if (!nr_to_pop)
+				break;
+		}
+	}
+
+	if (nr_to_pop) {
+		/* ran out of chunks to populate, create a new one and retry */
+		chunk = pcpu_create_chunk();
+		if (chunk) {
+			spin_lock_irq(&pcpu_lock);
+			pcpu_chunk_relocate(chunk, -1);
+			spin_unlock_irq(&pcpu_lock);
+			goto retry_pop;
+		}
+	}
+
 	mutex_unlock(&pcpu_alloc_mutex);
 }
 
@@ -1160,7 +1257,7 @@ void free_percpu(void __percpu *ptr)
 
 		list_for_each_entry(pos, &pcpu_slot[pcpu_nr_slots - 1], list)
 			if (pos != chunk) {
-				schedule_work(&pcpu_balance_work);
+				pcpu_schedule_balance_work();
 				break;
 			}
 	}
@@ -2187,3 +2284,15 @@ void __init percpu_init_late(void)
 		spin_unlock_irqrestore(&pcpu_lock, flags);
 	}
 }
+
+/*
+ * Percpu allocator is initialized early during boot when neither slab or
+ * workqueue is available.  Plug async management until everything is up
+ * and running.
+ */
+static int __init percpu_enable_async(void)
+{
+	pcpu_async_enabled = true;
+	return 0;
+}
+subsys_initcall(percpu_enable_async);
-- 
cgit v1.2.3


From 76ba59f8366f2d9282cb5bda9de75b4b68cbe55f Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 26 Aug 2014 11:03:16 +0100
Subject: genirq: Add irq_domain-aware core IRQ handler

Calling irq_find_mapping from outside a irq_{enter,exit} section is
unsafe and produces ugly messages if CONFIG_PROVE_RCU is enabled:
If coming from the idle state, the rcu_read_lock call in irq_find_mapping
will generate an unpleasant warning:

<quote>
===============================
[ INFO: suspicious RCU usage. ]
3.16.0-rc1+ #135 Not tainted
-------------------------------
include/linux/rcupdate.h:871 rcu_read_lock() used illegally while idle!

other info that might help us debug this:

RCU used illegally from idle CPU!
rcu_scheduler_active = 1, debug_locks = 0
RCU used illegally from extended quiescent state!
1 lock held by swapper/0/0:
 #0:  (rcu_read_lock){......}, at: [<ffffffc00010206c>]
irq_find_mapping+0x4c/0x198
</quote>

As this issue is fairly widespread and involves at least three
different architectures, a possible solution is to add a new
handle_domain_irq entry point into the generic IRQ code that
the interrupt controller code can call.

This new function takes an irq_domain, and calls into irq_find_domain
inside the irq_{enter,exit} block. An additional "lookup" parameter is
used to allow non-domain architecture code to be replaced by this as well.

Interrupt controllers can then be updated to use the new mechanism.

This code is sitting behind a new CONFIG_HANDLE_DOMAIN_IRQ, as not all
architectures implement set_irq_regs (yes, mn10300, I'm looking at you...).

Reported-by: Vladimir Murzin <vladimir.murzin@arm.com>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1409047421-27649-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 include/linux/irqdesc.h | 19 +++++++++++++++++++
 kernel/irq/Kconfig      |  3 +++
 kernel/irq/irqdesc.c    | 42 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h
index 472c021a2d4f..ff24667cd86c 100644
--- a/include/linux/irqdesc.h
+++ b/include/linux/irqdesc.h
@@ -12,6 +12,8 @@ struct irq_affinity_notify;
 struct proc_dir_entry;
 struct module;
 struct irq_desc;
+struct irq_domain;
+struct pt_regs;
 
 /**
  * struct irq_desc - interrupt descriptor
@@ -118,6 +120,23 @@ static inline void generic_handle_irq_desc(unsigned int irq, struct irq_desc *de
 
 int generic_handle_irq(unsigned int irq);
 
+#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+/*
+ * Convert a HW interrupt number to a logical one using a IRQ domain,
+ * and handle the result interrupt number. Return -EINVAL if
+ * conversion failed. Providing a NULL domain indicates that the
+ * conversion has already been done.
+ */
+int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
+			bool lookup, struct pt_regs *regs);
+
+static inline int handle_domain_irq(struct irq_domain *domain,
+				    unsigned int hwirq, struct pt_regs *regs)
+{
+	return __handle_domain_irq(domain, hwirq, true, regs);
+}
+#endif
+
 /* Test to see if a driver has successfully requested an irq */
 static inline int irq_has_action(unsigned int irq)
 {
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index d269cecdfbf0..225086b2652e 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -55,6 +55,9 @@ config GENERIC_IRQ_CHIP
 config IRQ_DOMAIN
 	bool
 
+config HANDLE_DOMAIN_IRQ
+	bool
+
 config IRQ_DOMAIN_DEBUG
 	bool "Expose hardware/virtual IRQ mapping via debugfs"
 	depends on IRQ_DOMAIN && DEBUG_FS
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index 1487a123db5c..a1782f88f0af 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -14,6 +14,7 @@
 #include <linux/kernel_stat.h>
 #include <linux/radix-tree.h>
 #include <linux/bitmap.h>
+#include <linux/irqdomain.h>
 
 #include "internals.h"
 
@@ -336,6 +337,47 @@ int generic_handle_irq(unsigned int irq)
 }
 EXPORT_SYMBOL_GPL(generic_handle_irq);
 
+#ifdef CONFIG_HANDLE_DOMAIN_IRQ
+/**
+ * __handle_domain_irq - Invoke the handler for a HW irq belonging to a domain
+ * @domain:	The domain where to perform the lookup
+ * @hwirq:	The HW irq number to convert to a logical one
+ * @lookup:	Whether to perform the domain lookup or not
+ * @regs:	Register file coming from the low-level handling code
+ *
+ * Returns:	0 on success, or -EINVAL if conversion has failed
+ */
+int __handle_domain_irq(struct irq_domain *domain, unsigned int hwirq,
+			bool lookup, struct pt_regs *regs)
+{
+	struct pt_regs *old_regs = set_irq_regs(regs);
+	unsigned int irq = hwirq;
+	int ret = 0;
+
+	irq_enter();
+
+#ifdef CONFIG_IRQ_DOMAIN
+	if (lookup)
+		irq = irq_find_mapping(domain, hwirq);
+#endif
+
+	/*
+	 * Some hardware gives randomly wrong interrupts.  Rather
+	 * than crashing, do something sensible.
+	 */
+	if (unlikely(!irq || irq >= nr_irqs)) {
+		ack_bad_irq(irq);
+		ret = -EINVAL;
+	} else {
+		generic_handle_irq(irq);
+	}
+
+	irq_exit();
+	set_irq_regs(old_regs);
+	return ret;
+}
+#endif
+
 /* Dynamic interrupt handling */
 
 /**
-- 
cgit v1.2.3


From a4412fc9486ec85686c6c7929e7e829f62ae377e Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 21 Jul 2014 18:49:14 -0700
Subject: seccomp,x86,arm,mips,s390: Remove nr parameter from secure_computing

The secure_computing function took a syscall number parameter, but
it only paid any attention to that parameter if seccomp mode 1 was
enabled.  Rather than coming up with a kludge to get the parameter
to work in mode 2, just remove the parameter.

To avoid churn in arches that don't have seccomp filters (and may
not even support syscall_get_nr right now), this leaves the
parameter in secure_computing_strict, which is now a real function.

For ARM, this is a bit ugly due to the fact that ARM conditionally
supports seccomp filters.  Fixing that would probably only be a
couple of lines of code, but it should be coordinated with the audit
maintainers.

This will be a slight slowdown on some arches.  The right fix is to
pass in all of seccomp_data instead of trying to make just the
syscall nr part be fast.

This is a prerequisite for making two-phase seccomp work cleanly.

Cc: Russell King <linux@arm.linux.org.uk>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Ralf Baechle <ralf@linux-mips.org>
Cc: linux-mips@linux-mips.org
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: linux-s390@vger.kernel.org
Cc: x86@kernel.org
Cc: Kees Cook <keescook@chromium.org>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 arch/arm/kernel/ptrace.c      |  7 ++++-
 arch/mips/kernel/ptrace.c     |  2 +-
 arch/s390/kernel/ptrace.c     |  2 +-
 arch/x86/kernel/ptrace.c      |  2 +-
 arch/x86/kernel/vsyscall_64.c |  2 +-
 include/linux/seccomp.h       | 21 +++++++-------
 kernel/seccomp.c              | 64 ++++++++++++++++++++++++++++++-------------
 7 files changed, 66 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 0c27ed6f3f23..5e772a21ab97 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -933,8 +933,13 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 	current_thread_info()->syscall = scno;
 
 	/* Do the secure computing check first; failures should be fast. */
-	if (secure_computing(scno) == -1)
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+	if (secure_computing() == -1)
 		return -1;
+#else
+	/* XXX: remove this once OABI gets fixed */
+	secure_computing_strict(scno);
+#endif
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE))
 		tracehook_report_syscall(regs, PTRACE_SYSCALL_ENTER);
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index 645b3c4fcfba..f7aac5b57b4b 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -770,7 +770,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
 	long ret = 0;
 	user_exit();
 
-	if (secure_computing(syscall) == -1)
+	if (secure_computing() == -1)
 		return -1;
 
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 5dc7ad9e2fbf..bebacad48305 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -803,7 +803,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	long ret = 0;
 
 	/* Do the secure computing check first. */
-	if (secure_computing(regs->gprs[2])) {
+	if (secure_computing()) {
 		/* seccomp failures shouldn't expose any additional code. */
 		ret = -1;
 		goto out;
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 678c0ada3b3c..93c182a00506 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1471,7 +1471,7 @@ long syscall_trace_enter(struct pt_regs *regs)
 		regs->flags |= X86_EFLAGS_TF;
 
 	/* do the secure computing check first */
-	if (secure_computing(regs->orig_ax)) {
+	if (secure_computing()) {
 		/* seccomp failures shouldn't expose any additional code. */
 		ret = -1L;
 		goto out;
diff --git a/arch/x86/kernel/vsyscall_64.c b/arch/x86/kernel/vsyscall_64.c
index e1e1e80fc6a6..957779f4eb40 100644
--- a/arch/x86/kernel/vsyscall_64.c
+++ b/arch/x86/kernel/vsyscall_64.c
@@ -216,7 +216,7 @@ bool emulate_vsyscall(struct pt_regs *regs, unsigned long address)
 	 */
 	regs->orig_ax = syscall_nr;
 	regs->ax = -ENOSYS;
-	tmp = secure_computing(syscall_nr);
+	tmp = secure_computing();
 	if ((!tmp && regs->orig_ax != syscall_nr) || regs->ip != address) {
 		warn_bad_vsyscall(KERN_DEBUG, regs,
 				  "seccomp tried to change syscall nr or ip");
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 5d586a45a319..aa3c040230be 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -27,19 +27,17 @@ struct seccomp {
 	struct seccomp_filter *filter;
 };
 
-extern int __secure_computing(int);
-static inline int secure_computing(int this_syscall)
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+extern int __secure_computing(void);
+static inline int secure_computing(void)
 {
 	if (unlikely(test_thread_flag(TIF_SECCOMP)))
-		return  __secure_computing(this_syscall);
+		return  __secure_computing();
 	return 0;
 }
-
-/* A wrapper for architectures supporting only SECCOMP_MODE_STRICT. */
-static inline void secure_computing_strict(int this_syscall)
-{
-	BUG_ON(secure_computing(this_syscall) != 0);
-}
+#else
+extern void secure_computing_strict(int this_syscall);
+#endif
 
 extern long prctl_get_seccomp(void);
 extern long prctl_set_seccomp(unsigned long, char __user *);
@@ -56,8 +54,11 @@ static inline int seccomp_mode(struct seccomp *s)
 struct seccomp { };
 struct seccomp_filter { };
 
-static inline int secure_computing(int this_syscall) { return 0; }
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+static inline int secure_computing(void) { return 0; }
+#else
 static inline void secure_computing_strict(int this_syscall) { return; }
+#endif
 
 static inline long prctl_get_seccomp(void)
 {
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 44eb005c6695..5e738e0dd2e9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -23,8 +23,11 @@
 
 /* #define SECCOMP_DEBUG 1 */
 
-#ifdef CONFIG_SECCOMP_FILTER
+#ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
 #include <asm/syscall.h>
+#endif
+
+#ifdef CONFIG_SECCOMP_FILTER
 #include <linux/filter.h>
 #include <linux/pid.h>
 #include <linux/ptrace.h>
@@ -172,7 +175,7 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(int syscall)
+static u32 seccomp_run_filters(void)
 {
 	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
 	struct seccomp_data sd;
@@ -564,10 +567,43 @@ static int mode1_syscalls_32[] = {
 };
 #endif
 
-int __secure_computing(int this_syscall)
+static void __secure_computing_strict(int this_syscall)
+{
+	int *syscall_whitelist = mode1_syscalls;
+#ifdef CONFIG_COMPAT
+	if (is_compat_task())
+		syscall_whitelist = mode1_syscalls_32;
+#endif
+	do {
+		if (*syscall_whitelist == this_syscall)
+			return;
+	} while (*++syscall_whitelist);
+
+#ifdef SECCOMP_DEBUG
+	dump_stack();
+#endif
+	audit_seccomp(this_syscall, SIGKILL, SECCOMP_RET_KILL);
+	do_exit(SIGKILL);
+}
+
+#ifndef CONFIG_HAVE_ARCH_SECCOMP_FILTER
+void secure_computing_strict(int this_syscall)
+{
+	int mode = current->seccomp.mode;
+
+	if (mode == 0)
+		return;
+	else if (mode == SECCOMP_MODE_STRICT)
+		__secure_computing_strict(this_syscall);
+	else
+		BUG();
+}
+#else
+int __secure_computing(void)
 {
+	struct pt_regs *regs = task_pt_regs(current);
+	int this_syscall = syscall_get_nr(current, regs);
 	int exit_sig = 0;
-	int *syscall;
 	u32 ret;
 
 	/*
@@ -578,23 +614,12 @@ int __secure_computing(int this_syscall)
 
 	switch (current->seccomp.mode) {
 	case SECCOMP_MODE_STRICT:
-		syscall = mode1_syscalls;
-#ifdef CONFIG_COMPAT
-		if (is_compat_task())
-			syscall = mode1_syscalls_32;
-#endif
-		do {
-			if (*syscall == this_syscall)
-				return 0;
-		} while (*++syscall);
-		exit_sig = SIGKILL;
-		ret = SECCOMP_RET_KILL;
-		break;
+		__secure_computing_strict(this_syscall);
+		return 0;
 #ifdef CONFIG_SECCOMP_FILTER
 	case SECCOMP_MODE_FILTER: {
 		int data;
-		struct pt_regs *regs = task_pt_regs(current);
-		ret = seccomp_run_filters(this_syscall);
+		ret = seccomp_run_filters();
 		data = ret & SECCOMP_RET_DATA;
 		ret &= SECCOMP_RET_ACTION;
 		switch (ret) {
@@ -652,9 +677,10 @@ int __secure_computing(int this_syscall)
 #ifdef CONFIG_SECCOMP_FILTER
 skip:
 	audit_seccomp(this_syscall, exit_sig, ret);
-#endif
 	return -1;
+#endif
 }
+#endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
 
 long prctl_get_seccomp(void)
 {
-- 
cgit v1.2.3


From 13aa72f0fd0a9f98a41cefb662487269e2f1ad65 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 21 Jul 2014 18:49:15 -0700
Subject: seccomp: Refactor the filter callback and the API

The reason I did this is to add a seccomp API that will be usable
for an x86 fast path.  The x86 entry code needs to use a rather
expensive slow path for a syscall that might be visible to things
like ptrace.  By splitting seccomp into two phases, we can check
whether we need the slow path and then use the fast path in if the
filter allows the syscall or just returns some errno.

As a side effect, I think the new code is much easier to understand
than the old code.

This has one user-visible effect: the audit record written for
SECCOMP_RET_TRACE is now a simple indication that SECCOMP_RET_TRACE
happened.  It used to depend in a complicated way on what the tracer
did.  I couldn't make much sense of it.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h |   6 ++
 kernel/seccomp.c        | 190 +++++++++++++++++++++++++++++++-----------------
 2 files changed, 130 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index aa3c040230be..38851085e481 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -35,6 +35,12 @@ static inline int secure_computing(void)
 		return  __secure_computing();
 	return 0;
 }
+
+#define SECCOMP_PHASE1_OK	0
+#define SECCOMP_PHASE1_SKIP	1
+
+extern u32 seccomp_phase1(void);
+int seccomp_phase2(u32 phase1_result);
 #else
 extern void secure_computing_strict(int this_syscall);
 #endif
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 5e738e0dd2e9..6c8528ce9df9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -21,8 +21,6 @@
 #include <linux/slab.h>
 #include <linux/syscalls.h>
 
-/* #define SECCOMP_DEBUG 1 */
-
 #ifdef CONFIG_HAVE_ARCH_SECCOMP_FILTER
 #include <asm/syscall.h>
 #endif
@@ -601,10 +599,21 @@ void secure_computing_strict(int this_syscall)
 #else
 int __secure_computing(void)
 {
-	struct pt_regs *regs = task_pt_regs(current);
-	int this_syscall = syscall_get_nr(current, regs);
-	int exit_sig = 0;
-	u32 ret;
+	u32 phase1_result = seccomp_phase1();
+
+	if (likely(phase1_result == SECCOMP_PHASE1_OK))
+		return 0;
+	else if (likely(phase1_result == SECCOMP_PHASE1_SKIP))
+		return -1;
+	else
+		return seccomp_phase2(phase1_result);
+}
+
+#ifdef CONFIG_SECCOMP_FILTER
+static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
+{
+	u32 filter_ret, action;
+	int data;
 
 	/*
 	 * Make sure that any changes to mode from another thread have
@@ -612,73 +621,122 @@ int __secure_computing(void)
 	 */
 	rmb();
 
-	switch (current->seccomp.mode) {
+	filter_ret = seccomp_run_filters();
+	data = filter_ret & SECCOMP_RET_DATA;
+	action = filter_ret & SECCOMP_RET_ACTION;
+
+	switch (action) {
+	case SECCOMP_RET_ERRNO:
+		/* Set the low-order 16-bits as a errno. */
+		syscall_set_return_value(current, regs,
+					 -data, 0);
+		goto skip;
+
+	case SECCOMP_RET_TRAP:
+		/* Show the handler the original registers. */
+		syscall_rollback(current, regs);
+		/* Let the filter pass back 16 bits of data. */
+		seccomp_send_sigsys(this_syscall, data);
+		goto skip;
+
+	case SECCOMP_RET_TRACE:
+		return filter_ret;  /* Save the rest for phase 2. */
+
+	case SECCOMP_RET_ALLOW:
+		return SECCOMP_PHASE1_OK;
+
+	case SECCOMP_RET_KILL:
+	default:
+		audit_seccomp(this_syscall, SIGSYS, action);
+		do_exit(SIGSYS);
+	}
+
+	unreachable();
+
+skip:
+	audit_seccomp(this_syscall, 0, action);
+	return SECCOMP_PHASE1_SKIP;
+}
+#endif
+
+/**
+ * seccomp_phase1() - run fast path seccomp checks on the current syscall
+ *
+ * This only reads pt_regs via the syscall_xyz helpers.  The only change
+ * it will make to pt_regs is via syscall_set_return_value, and it will
+ * only do that if it returns SECCOMP_PHASE1_SKIP.
+ *
+ * It may also call do_exit or force a signal; these actions must be
+ * safe.
+ *
+ * If it returns SECCOMP_PHASE1_OK, the syscall passes checks and should
+ * be processed normally.
+ *
+ * If it returns SECCOMP_PHASE1_SKIP, then the syscall should not be
+ * invoked.  In this case, seccomp_phase1 will have set the return value
+ * using syscall_set_return_value.
+ *
+ * If it returns anything else, then the return value should be passed
+ * to seccomp_phase2 from a context in which ptrace hooks are safe.
+ */
+u32 seccomp_phase1(void)
+{
+	int mode = current->seccomp.mode;
+	struct pt_regs *regs = task_pt_regs(current);
+	int this_syscall = syscall_get_nr(current, regs);
+
+	switch (mode) {
 	case SECCOMP_MODE_STRICT:
-		__secure_computing_strict(this_syscall);
-		return 0;
+		__secure_computing_strict(this_syscall);  /* may call do_exit */
+		return SECCOMP_PHASE1_OK;
 #ifdef CONFIG_SECCOMP_FILTER
-	case SECCOMP_MODE_FILTER: {
-		int data;
-		ret = seccomp_run_filters();
-		data = ret & SECCOMP_RET_DATA;
-		ret &= SECCOMP_RET_ACTION;
-		switch (ret) {
-		case SECCOMP_RET_ERRNO:
-			/* Set the low-order 16-bits as a errno. */
-			syscall_set_return_value(current, regs,
-						 -data, 0);
-			goto skip;
-		case SECCOMP_RET_TRAP:
-			/* Show the handler the original registers. */
-			syscall_rollback(current, regs);
-			/* Let the filter pass back 16 bits of data. */
-			seccomp_send_sigsys(this_syscall, data);
-			goto skip;
-		case SECCOMP_RET_TRACE:
-			/* Skip these calls if there is no tracer. */
-			if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
-				syscall_set_return_value(current, regs,
-							 -ENOSYS, 0);
-				goto skip;
-			}
-			/* Allow the BPF to provide the event message */
-			ptrace_event(PTRACE_EVENT_SECCOMP, data);
-			/*
-			 * The delivery of a fatal signal during event
-			 * notification may silently skip tracer notification.
-			 * Terminating the task now avoids executing a system
-			 * call that may not be intended.
-			 */
-			if (fatal_signal_pending(current))
-				break;
-			if (syscall_get_nr(current, regs) < 0)
-				goto skip;  /* Explicit request to skip. */
-
-			return 0;
-		case SECCOMP_RET_ALLOW:
-			return 0;
-		case SECCOMP_RET_KILL:
-		default:
-			break;
-		}
-		exit_sig = SIGSYS;
-		break;
-	}
+	case SECCOMP_MODE_FILTER:
+		return __seccomp_phase1_filter(this_syscall, regs);
 #endif
 	default:
 		BUG();
 	}
+}
 
-#ifdef SECCOMP_DEBUG
-	dump_stack();
-#endif
-	audit_seccomp(this_syscall, exit_sig, ret);
-	do_exit(exit_sig);
-#ifdef CONFIG_SECCOMP_FILTER
-skip:
-	audit_seccomp(this_syscall, exit_sig, ret);
-	return -1;
-#endif
+/**
+ * seccomp_phase2() - finish slow path seccomp work for the current syscall
+ * @phase1_result: The return value from seccomp_phase1()
+ *
+ * This must be called from a context in which ptrace hooks can be used.
+ *
+ * Returns 0 if the syscall should be processed or -1 to skip the syscall.
+ */
+int seccomp_phase2(u32 phase1_result)
+{
+	struct pt_regs *regs = task_pt_regs(current);
+	u32 action = phase1_result & SECCOMP_RET_ACTION;
+	int data = phase1_result & SECCOMP_RET_DATA;
+
+	BUG_ON(action != SECCOMP_RET_TRACE);
+
+	audit_seccomp(syscall_get_nr(current, regs), 0, action);
+
+	/* Skip these calls if there is no tracer. */
+	if (!ptrace_event_enabled(current, PTRACE_EVENT_SECCOMP)) {
+		syscall_set_return_value(current, regs,
+					 -ENOSYS, 0);
+		return -1;
+	}
+
+	/* Allow the BPF to provide the event message */
+	ptrace_event(PTRACE_EVENT_SECCOMP, data);
+	/*
+	 * The delivery of a fatal signal during event
+	 * notification may silently skip tracer notification.
+	 * Terminating the task now avoids executing a system
+	 * call that may not be intended.
+	 */
+	if (fatal_signal_pending(current))
+		do_exit(SIGSYS);
+	if (syscall_get_nr(current, regs) < 0)
+		return -1;  /* Explicit request to skip. */
+
+	return 0;
 }
 #endif /* CONFIG_HAVE_ARCH_SECCOMP_FILTER */
 
-- 
cgit v1.2.3


From d39bd00deabe57420f2a3669eb71b0e0c4997184 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Mon, 21 Jul 2014 18:49:16 -0700
Subject: seccomp: Allow arch code to provide seccomp_data

populate_seccomp_data is expensive: it works by inspecting
task_pt_regs and various other bits to piece together all the
information, and it's does so in multiple partially redundant steps.

Arch-specific code in the syscall entry path can do much better.

Admittedly this adds a bit of additional room for error, but the
speedup should be worth it.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Kees Cook <keescook@chromium.org>
---
 include/linux/seccomp.h |  2 +-
 kernel/seccomp.c        | 32 +++++++++++++++++++-------------
 2 files changed, 20 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 38851085e481..a19ddacdac30 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -39,7 +39,7 @@ static inline int secure_computing(void)
 #define SECCOMP_PHASE1_OK	0
 #define SECCOMP_PHASE1_SKIP	1
 
-extern u32 seccomp_phase1(void);
+extern u32 seccomp_phase1(struct seccomp_data *sd);
 int seccomp_phase2(u32 phase1_result);
 #else
 extern void secure_computing_strict(int this_syscall);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 6c8528ce9df9..1285cb205d49 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -173,10 +173,10 @@ static int seccomp_check_filter(struct sock_filter *filter, unsigned int flen)
  *
  * Returns valid seccomp BPF response codes.
  */
-static u32 seccomp_run_filters(void)
+static u32 seccomp_run_filters(struct seccomp_data *sd)
 {
 	struct seccomp_filter *f = ACCESS_ONCE(current->seccomp.filter);
-	struct seccomp_data sd;
+	struct seccomp_data sd_local;
 	u32 ret = SECCOMP_RET_ALLOW;
 
 	/* Ensure unexpected behavior doesn't result in failing open. */
@@ -186,14 +186,17 @@ static u32 seccomp_run_filters(void)
 	/* Make sure cross-thread synced filter points somewhere sane. */
 	smp_read_barrier_depends();
 
-	populate_seccomp_data(&sd);
+	if (!sd) {
+		populate_seccomp_data(&sd_local);
+		sd = &sd_local;
+	}
 
 	/*
 	 * All filters in the list are evaluated and the lowest BPF return
 	 * value always takes priority (ignoring the DATA).
 	 */
 	for (; f; f = f->prev) {
-		u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)&sd);
+		u32 cur_ret = BPF_PROG_RUN(f->prog, (void *)sd);
 
 		if ((cur_ret & SECCOMP_RET_ACTION) < (ret & SECCOMP_RET_ACTION))
 			ret = cur_ret;
@@ -599,7 +602,7 @@ void secure_computing_strict(int this_syscall)
 #else
 int __secure_computing(void)
 {
-	u32 phase1_result = seccomp_phase1();
+	u32 phase1_result = seccomp_phase1(NULL);
 
 	if (likely(phase1_result == SECCOMP_PHASE1_OK))
 		return 0;
@@ -610,7 +613,7 @@ int __secure_computing(void)
 }
 
 #ifdef CONFIG_SECCOMP_FILTER
-static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
+static u32 __seccomp_phase1_filter(int this_syscall, struct seccomp_data *sd)
 {
 	u32 filter_ret, action;
 	int data;
@@ -621,20 +624,20 @@ static u32 __seccomp_phase1_filter(int this_syscall, struct pt_regs *regs)
 	 */
 	rmb();
 
-	filter_ret = seccomp_run_filters();
+	filter_ret = seccomp_run_filters(sd);
 	data = filter_ret & SECCOMP_RET_DATA;
 	action = filter_ret & SECCOMP_RET_ACTION;
 
 	switch (action) {
 	case SECCOMP_RET_ERRNO:
 		/* Set the low-order 16-bits as a errno. */
-		syscall_set_return_value(current, regs,
+		syscall_set_return_value(current, task_pt_regs(current),
 					 -data, 0);
 		goto skip;
 
 	case SECCOMP_RET_TRAP:
 		/* Show the handler the original registers. */
-		syscall_rollback(current, regs);
+		syscall_rollback(current, task_pt_regs(current));
 		/* Let the filter pass back 16 bits of data. */
 		seccomp_send_sigsys(this_syscall, data);
 		goto skip;
@@ -661,11 +664,14 @@ skip:
 
 /**
  * seccomp_phase1() - run fast path seccomp checks on the current syscall
+ * @arg sd: The seccomp_data or NULL
  *
  * This only reads pt_regs via the syscall_xyz helpers.  The only change
  * it will make to pt_regs is via syscall_set_return_value, and it will
  * only do that if it returns SECCOMP_PHASE1_SKIP.
  *
+ * If sd is provided, it will not read pt_regs at all.
+ *
  * It may also call do_exit or force a signal; these actions must be
  * safe.
  *
@@ -679,11 +685,11 @@ skip:
  * If it returns anything else, then the return value should be passed
  * to seccomp_phase2 from a context in which ptrace hooks are safe.
  */
-u32 seccomp_phase1(void)
+u32 seccomp_phase1(struct seccomp_data *sd)
 {
 	int mode = current->seccomp.mode;
-	struct pt_regs *regs = task_pt_regs(current);
-	int this_syscall = syscall_get_nr(current, regs);
+	int this_syscall = sd ? sd->nr :
+		syscall_get_nr(current, task_pt_regs(current));
 
 	switch (mode) {
 	case SECCOMP_MODE_STRICT:
@@ -691,7 +697,7 @@ u32 seccomp_phase1(void)
 		return SECCOMP_PHASE1_OK;
 #ifdef CONFIG_SECCOMP_FILTER
 	case SECCOMP_MODE_FILTER:
-		return __seccomp_phase1_filter(this_syscall, regs);
+		return __seccomp_phase1_filter(this_syscall, sd);
 #endif
 	default:
 		BUG();
-- 
cgit v1.2.3


From 26ae4980b5e4739af93543a147facb421fb78ae8 Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Fri, 15 Aug 2014 16:07:48 -0500
Subject: fsl_ifc: Fix csor_ext position in fsl_ifc_regs

According to Freescale manuals, the IFC_CSORn_EXT register is located
immediately _after_ the bank's IFC_CSORn register.

This patch adjusts the csor_ext member of and reserved register arrays
immediately surrounding the csor_cs structure to provide proper access
to this register.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Acked-by: Prabhakar Kushwaha <prabhakar@freescale.com>
Signed-off-by: Scott Wood <scottwood@freescale.com>
---
 include/linux/fsl_ifc.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h
index f49ddb1b2273..84d60cb841b1 100644
--- a/include/linux/fsl_ifc.h
+++ b/include/linux/fsl_ifc.h
@@ -781,13 +781,13 @@ struct fsl_ifc_regs {
 		__be32 amask;
 		u32 res4[0x2];
 	} amask_cs[FSL_IFC_BANK_COUNT];
-	u32 res5[0x17];
+	u32 res5[0x18];
 	struct {
-		__be32 csor_ext;
 		__be32 csor;
+		__be32 csor_ext;
 		u32 res6;
 	} csor_cs[FSL_IFC_BANK_COUNT];
-	u32 res7[0x19];
+	u32 res7[0x18];
 	struct {
 		__be32 ftim[4];
 		u32 res8[0x8];
-- 
cgit v1.2.3


From 940001762ac514810e305aab356983829e5fa82a Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Wed, 3 Sep 2014 09:22:36 +0800
Subject: lib/rhashtable: allow user to set the minimum shifts of shrinking

Although rhashtable library allows user to specify a quiet big size
for user's created hash table, the table may be shrunk to a
very small size - HASH_MIN_SIZE(4) after object is removed from
the table at the first time. Subsequently, even if the total amount
of objects saved in the table is quite lower than user's initial
setting in a long time, the hash table size is still dynamically
adjusted by rhashtable_shrink() or rhashtable_expand() each time
object is inserted or removed from the table. However, as
synchronize_rcu() has to be called when table is shrunk or
expanded by the two functions, we should permit user to set the
minimum table size through configuring the minimum number of shifts
according to user specific requirement, avoiding these expensive
actions of shrinking or expanding because of calling synchronize_rcu().

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h |  2 ++
 lib/rhashtable.c           | 12 ++++++++----
 2 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 36826c0166c5..fb298e9d6d3a 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -44,6 +44,7 @@ struct rhashtable;
  * @head_offset: Offset of rhash_head in struct to be hashed
  * @hash_rnd: Seed to use while hashing
  * @max_shift: Maximum number of shifts while expanding
+ * @min_shift: Minimum number of shifts while shrinking
  * @hashfn: Function to hash key
  * @obj_hashfn: Function to hash object
  * @grow_decision: If defined, may return true if table should expand
@@ -57,6 +58,7 @@ struct rhashtable_params {
 	size_t			head_offset;
 	u32			hash_rnd;
 	size_t			max_shift;
+	size_t			min_shift;
 	rht_hashfn_t		hashfn;
 	rht_obj_hashfn_t	obj_hashfn;
 	bool			(*grow_decision)(const struct rhashtable *ht,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index a2c78810ebc1..8dfec3f26d4c 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -298,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
 
 	ASSERT_RHT_MUTEX(ht);
 
-	if (tbl->size <= HASH_MIN_SIZE)
+	if (ht->shift <= ht->p.min_shift)
 		return 0;
 
 	ntbl = bucket_table_alloc(tbl->size / 2, flags);
@@ -506,9 +506,10 @@ void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
 }
 EXPORT_SYMBOL_GPL(rhashtable_lookup_compare);
 
-static size_t rounded_hashtable_size(unsigned int nelem)
+static size_t rounded_hashtable_size(struct rhashtable_params *params)
 {
-	return max(roundup_pow_of_two(nelem * 4 / 3), HASH_MIN_SIZE);
+	return max(roundup_pow_of_two(params->nelem_hint * 4 / 3),
+		   1UL << params->min_shift);
 }
 
 /**
@@ -566,8 +567,11 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	    (!params->key_len && !params->obj_hashfn))
 		return -EINVAL;
 
+	params->min_shift = max_t(size_t, params->min_shift,
+				  ilog2(HASH_MIN_SIZE));
+
 	if (params->nelem_hint)
-		size = rounded_hashtable_size(params->nelem_hint);
+		size = rounded_hashtable_size(params);
 
 	tbl = bucket_table_alloc(size, GFP_KERNEL);
 	if (tbl == NULL)
-- 
cgit v1.2.3


From 03e9f0cac5da6af85758276cb4624caf5911f2b9 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 3 Sep 2014 13:02:56 +0200
Subject: pinctrl: clean up after enable refactoring

commit 2243a87d90b42eb38bc281957df3e57c712b5e56
"pinctrl: avoid duplicated calling enable_pinmux_setting for a pin"
removed the .disable callback from the struct pinmux_ops,
making the .enable() callback the only remaining callback.

However .enable() is a bad name as it seems to imply that a
muxing can also be disabled. Rename the callback to .set_mux()
and also take this opportunity to clean out any remaining
mentions of .disable() from the documentation.

Acked-by: Stephen Warren <swarren@nvidia.com>
Acked-by: Bjorn Andersson <bjorn.andersson@sonymobile.com>
Acked-by: Fan Wu <fwu@marvell.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/pinctrl.txt                    | 14 ++------------
 drivers/pinctrl/berlin/berlin.c              |  8 ++++----
 drivers/pinctrl/mvebu/pinctrl-mvebu.c        |  6 +++---
 drivers/pinctrl/nomadik/pinctrl-abx500.c     |  6 +++---
 drivers/pinctrl/nomadik/pinctrl-nomadik.c    |  6 +++---
 drivers/pinctrl/pinctrl-adi2.c               |  6 +++---
 drivers/pinctrl/pinctrl-as3722.c             |  4 ++--
 drivers/pinctrl/pinctrl-at91.c               |  6 +++---
 drivers/pinctrl/pinctrl-bcm281xx.c           |  8 ++++----
 drivers/pinctrl/pinctrl-bcm2835.c            |  4 ++--
 drivers/pinctrl/pinctrl-imx.c                |  6 +++---
 drivers/pinctrl/pinctrl-imx1-core.c          |  6 +++---
 drivers/pinctrl/pinctrl-lantiq.c             |  8 ++++----
 drivers/pinctrl/pinctrl-mxs.c                |  6 +++---
 drivers/pinctrl/pinctrl-palmas.c             |  5 +++--
 drivers/pinctrl/pinctrl-rockchip.c           |  6 +++---
 drivers/pinctrl/pinctrl-single.c             |  4 ++--
 drivers/pinctrl/pinctrl-st.c                 |  6 +++---
 drivers/pinctrl/pinctrl-tb10x.c              |  4 ++--
 drivers/pinctrl/pinctrl-tegra-xusb.c         |  8 ++++----
 drivers/pinctrl/pinctrl-tegra.c              |  7 ++++---
 drivers/pinctrl/pinctrl-tz1090-pdc.c         |  7 ++++---
 drivers/pinctrl/pinctrl-tz1090.c             |  6 +++---
 drivers/pinctrl/pinctrl-u300.c               |  6 +++---
 drivers/pinctrl/pinmux.c                     | 10 +++++-----
 drivers/pinctrl/qcom/pinctrl-msm.c           |  8 ++++----
 drivers/pinctrl/samsung/pinctrl-exynos5440.c |  7 ++++---
 drivers/pinctrl/samsung/pinctrl-samsung.c    |  7 ++++---
 drivers/pinctrl/sh-pfc/pinctrl.c             |  6 +++---
 drivers/pinctrl/sirf/pinctrl-sirf.c          |  7 ++++---
 drivers/pinctrl/spear/pinctrl-spear.c        |  4 ++--
 drivers/pinctrl/sunxi/pinctrl-sunxi.c        |  8 ++++----
 drivers/pinctrl/vt8500/pinctrl-wmt.c         |  8 ++++----
 include/linux/pinctrl/pinmux.h               |  7 +++----
 34 files changed, 110 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/pinctrl.txt b/Documentation/pinctrl.txt
index 23f1590f49fe..b8f2147b96dd 100644
--- a/Documentation/pinctrl.txt
+++ b/Documentation/pinctrl.txt
@@ -702,7 +702,7 @@ static int foo_get_groups(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-int foo_enable(struct pinctrl_dev *pctldev, unsigned selector,
+int foo_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
 		unsigned group)
 {
 	u8 regbit = (1 << selector + group);
@@ -711,21 +711,11 @@ int foo_enable(struct pinctrl_dev *pctldev, unsigned selector,
 	return 0;
 }
 
-void foo_disable(struct pinctrl_dev *pctldev, unsigned selector,
-		unsigned group)
-{
-	u8 regbit = (1 << selector + group);
-
-	writeb((readb(MUX) & ~(regbit)), MUX)
-	return 0;
-}
-
 struct pinmux_ops foo_pmxops = {
 	.get_functions_count = foo_get_functions_count,
 	.get_function_name = foo_get_fname,
 	.get_function_groups = foo_get_groups,
-	.enable = foo_enable,
-	.disable = foo_disable,
+	.set_mux = foo_set_mux,
 };
 
 /* Pinmux operations are handled by some pin controller */
diff --git a/drivers/pinctrl/berlin/berlin.c b/drivers/pinctrl/berlin/berlin.c
index 86db2235ab00..61f1bf0e60ba 100644
--- a/drivers/pinctrl/berlin/berlin.c
+++ b/drivers/pinctrl/berlin/berlin.c
@@ -170,9 +170,9 @@ berlin_pinctrl_find_function_by_name(struct berlin_pinctrl *pctrl,
 	return NULL;
 }
 
-static int berlin_pinmux_enable(struct pinctrl_dev *pctrl_dev,
-				unsigned function,
-				unsigned group)
+static int berlin_pinmux_set(struct pinctrl_dev *pctrl_dev,
+			     unsigned function,
+			     unsigned group)
 {
 	struct berlin_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctrl_dev);
 	const struct berlin_desc_group *group_desc = pctrl->desc->groups + group;
@@ -197,7 +197,7 @@ static const struct pinmux_ops berlin_pinmux_ops = {
 	.get_functions_count	= &berlin_pinmux_get_functions_count,
 	.get_function_name	= &berlin_pinmux_get_function_name,
 	.get_function_groups	= &berlin_pinmux_get_function_groups,
-	.enable			= &berlin_pinmux_enable,
+	.set_mux		= &berlin_pinmux_set,
 };
 
 static int berlin_pinctrl_add_function(struct berlin_pinctrl *pctrl,
diff --git a/drivers/pinctrl/mvebu/pinctrl-mvebu.c b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
index 9908374f8f92..f3b426cdaf8f 100644
--- a/drivers/pinctrl/mvebu/pinctrl-mvebu.c
+++ b/drivers/pinctrl/mvebu/pinctrl-mvebu.c
@@ -259,8 +259,8 @@ static int mvebu_pinmux_get_groups(struct pinctrl_dev *pctldev, unsigned fid,
 	return 0;
 }
 
-static int mvebu_pinmux_enable(struct pinctrl_dev *pctldev, unsigned fid,
-			unsigned gid)
+static int mvebu_pinmux_set(struct pinctrl_dev *pctldev, unsigned fid,
+			    unsigned gid)
 {
 	struct mvebu_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
 	struct mvebu_pinctrl_function *func = &pctl->functions[fid];
@@ -344,7 +344,7 @@ static const struct pinmux_ops mvebu_pinmux_ops = {
 	.get_function_groups = mvebu_pinmux_get_groups,
 	.gpio_request_enable = mvebu_pinmux_gpio_request_enable,
 	.gpio_set_direction = mvebu_pinmux_gpio_set_direction,
-	.enable = mvebu_pinmux_enable,
+	.set_mux = mvebu_pinmux_set,
 };
 
 static int mvebu_pinctrl_get_groups_count(struct pinctrl_dev *pctldev)
diff --git a/drivers/pinctrl/nomadik/pinctrl-abx500.c b/drivers/pinctrl/nomadik/pinctrl-abx500.c
index a53a689a2bfa..b0289824cf73 100644
--- a/drivers/pinctrl/nomadik/pinctrl-abx500.c
+++ b/drivers/pinctrl/nomadik/pinctrl-abx500.c
@@ -709,8 +709,8 @@ static int abx500_pmx_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int abx500_pmx_enable(struct pinctrl_dev *pctldev, unsigned function,
-			     unsigned group)
+static int abx500_pmx_set(struct pinctrl_dev *pctldev, unsigned function,
+			  unsigned group)
 {
 	struct abx500_pinctrl *pct = pinctrl_dev_get_drvdata(pctldev);
 	struct gpio_chip *chip = &pct->chip;
@@ -784,7 +784,7 @@ static const struct pinmux_ops abx500_pinmux_ops = {
 	.get_functions_count = abx500_pmx_get_funcs_cnt,
 	.get_function_name = abx500_pmx_get_func_name,
 	.get_function_groups = abx500_pmx_get_func_groups,
-	.enable = abx500_pmx_enable,
+	.set_mux = abx500_pmx_set,
 	.gpio_request_enable = abx500_gpio_request_enable,
 	.gpio_disable_free = abx500_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/nomadik/pinctrl-nomadik.c b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
index e7cab07eef47..c093d75a022a 100644
--- a/drivers/pinctrl/nomadik/pinctrl-nomadik.c
+++ b/drivers/pinctrl/nomadik/pinctrl-nomadik.c
@@ -1647,8 +1647,8 @@ static int nmk_pmx_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int nmk_pmx_enable(struct pinctrl_dev *pctldev, unsigned function,
-			  unsigned group)
+static int nmk_pmx_set(struct pinctrl_dev *pctldev, unsigned function,
+		       unsigned group)
 {
 	struct nmk_pinctrl *npct = pinctrl_dev_get_drvdata(pctldev);
 	const struct nmk_pingroup *g;
@@ -1810,7 +1810,7 @@ static const struct pinmux_ops nmk_pinmux_ops = {
 	.get_functions_count = nmk_pmx_get_funcs_cnt,
 	.get_function_name = nmk_pmx_get_func_name,
 	.get_function_groups = nmk_pmx_get_func_groups,
-	.enable = nmk_pmx_enable,
+	.set_mux = nmk_pmx_set,
 	.gpio_request_enable = nmk_gpio_request_enable,
 	.gpio_disable_free = nmk_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-adi2.c b/drivers/pinctrl/pinctrl-adi2.c
index b092b93c67a1..e02943b0285d 100644
--- a/drivers/pinctrl/pinctrl-adi2.c
+++ b/drivers/pinctrl/pinctrl-adi2.c
@@ -619,8 +619,8 @@ static struct pinctrl_ops adi_pctrl_ops = {
 	.get_group_pins = adi_get_group_pins,
 };
 
-static int adi_pinmux_enable(struct pinctrl_dev *pctldev, unsigned func_id,
-	unsigned group_id)
+static int adi_pinmux_set(struct pinctrl_dev *pctldev, unsigned func_id,
+			  unsigned group_id)
 {
 	struct adi_pinctrl *pinctrl = pinctrl_dev_get_drvdata(pctldev);
 	struct gpio_port *port;
@@ -698,7 +698,7 @@ static int adi_pinmux_request_gpio(struct pinctrl_dev *pctldev,
 }
 
 static struct pinmux_ops adi_pinmux_ops = {
-	.enable = adi_pinmux_enable,
+	.set_mux = adi_pinmux_set,
 	.get_functions_count = adi_pinmux_get_funcs_count,
 	.get_function_name = adi_pinmux_get_func_name,
 	.get_function_groups = adi_pinmux_get_groups,
diff --git a/drivers/pinctrl/pinctrl-as3722.c b/drivers/pinctrl/pinctrl-as3722.c
index 0e4ec91f4d49..1f790a4b83fe 100644
--- a/drivers/pinctrl/pinctrl-as3722.c
+++ b/drivers/pinctrl/pinctrl-as3722.c
@@ -230,7 +230,7 @@ static int as3722_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int as3722_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
+static int as3722_pinctrl_set(struct pinctrl_dev *pctldev, unsigned function,
 		unsigned group)
 {
 	struct as3722_pctrl_info *as_pci = pinctrl_dev_get_drvdata(pctldev);
@@ -327,7 +327,7 @@ static const struct pinmux_ops as3722_pinmux_ops = {
 	.get_functions_count	= as3722_pinctrl_get_funcs_count,
 	.get_function_name	= as3722_pinctrl_get_func_name,
 	.get_function_groups	= as3722_pinctrl_get_func_groups,
-	.enable			= as3722_pinctrl_enable,
+	.set_mux		= as3722_pinctrl_set,
 	.gpio_request_enable	= as3722_pinctrl_gpio_request_enable,
 	.gpio_set_direction	= as3722_pinctrl_gpio_set_direction,
 };
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index af1ba4fc150d..4839c36a3a1c 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -554,8 +554,8 @@ static void at91_mux_gpio_enable(void __iomem *pio, unsigned mask, bool input)
 	writel_relaxed(mask, pio + (input ? PIO_ODR : PIO_OER));
 }
 
-static int at91_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
+static int at91_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+			unsigned group)
 {
 	struct at91_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
 	const struct at91_pmx_pin *pins_conf = info->groups[group].pins_conf;
@@ -684,7 +684,7 @@ static const struct pinmux_ops at91_pmx_ops = {
 	.get_functions_count	= at91_pmx_get_funcs_count,
 	.get_function_name	= at91_pmx_get_func_name,
 	.get_function_groups	= at91_pmx_get_groups,
-	.enable			= at91_pmx_enable,
+	.set_mux		= at91_pmx_set,
 	.gpio_request_enable	= at91_gpio_request_enable,
 	.gpio_disable_free	= at91_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-bcm281xx.c b/drivers/pinctrl/pinctrl-bcm281xx.c
index c5ca9e633fff..a26e0c2ba33e 100644
--- a/drivers/pinctrl/pinctrl-bcm281xx.c
+++ b/drivers/pinctrl/pinctrl-bcm281xx.c
@@ -1055,9 +1055,9 @@ static int bcm281xx_pinctrl_get_fcn_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int bcm281xx_pinmux_enable(struct pinctrl_dev *pctldev,
-				  unsigned function,
-				  unsigned group)
+static int bcm281xx_pinmux_set(struct pinctrl_dev *pctldev,
+			       unsigned function,
+			       unsigned group)
 {
 	struct bcm281xx_pinctrl_data *pdata = pinctrl_dev_get_drvdata(pctldev);
 	const struct bcm281xx_pin_function *f = &pdata->functions[function];
@@ -1084,7 +1084,7 @@ static struct pinmux_ops bcm281xx_pinctrl_pinmux_ops = {
 	.get_functions_count = bcm281xx_pinctrl_get_fcns_count,
 	.get_function_name = bcm281xx_pinctrl_get_fcn_name,
 	.get_function_groups = bcm281xx_pinctrl_get_fcn_groups,
-	.enable = bcm281xx_pinmux_enable,
+	.set_mux = bcm281xx_pinmux_set,
 };
 
 static int bcm281xx_pinctrl_pin_config_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-bcm2835.c b/drivers/pinctrl/pinctrl-bcm2835.c
index 5bcfd7ace0cd..eabba02f71f9 100644
--- a/drivers/pinctrl/pinctrl-bcm2835.c
+++ b/drivers/pinctrl/pinctrl-bcm2835.c
@@ -830,7 +830,7 @@ static int bcm2835_pmx_get_function_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int bcm2835_pmx_enable(struct pinctrl_dev *pctldev,
+static int bcm2835_pmx_set(struct pinctrl_dev *pctldev,
 		unsigned func_selector,
 		unsigned group_selector)
 {
@@ -869,7 +869,7 @@ static const struct pinmux_ops bcm2835_pmx_ops = {
 	.get_functions_count = bcm2835_pmx_get_functions_count,
 	.get_function_name = bcm2835_pmx_get_function_name,
 	.get_function_groups = bcm2835_pmx_get_function_groups,
-	.enable = bcm2835_pmx_enable,
+	.set_mux = bcm2835_pmx_set,
 	.gpio_disable_free = bcm2835_pmx_gpio_disable_free,
 	.gpio_set_direction = bcm2835_pmx_gpio_set_direction,
 };
diff --git a/drivers/pinctrl/pinctrl-imx.c b/drivers/pinctrl/pinctrl-imx.c
index 946d594a64dd..570e5acb4a6a 100644
--- a/drivers/pinctrl/pinctrl-imx.c
+++ b/drivers/pinctrl/pinctrl-imx.c
@@ -179,8 +179,8 @@ static const struct pinctrl_ops imx_pctrl_ops = {
 
 };
 
-static int imx_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
+static int imx_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+		       unsigned group)
 {
 	struct imx_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev);
 	const struct imx_pinctrl_soc_info *info = ipctl->info;
@@ -298,7 +298,7 @@ static const struct pinmux_ops imx_pmx_ops = {
 	.get_functions_count = imx_pmx_get_funcs_count,
 	.get_function_name = imx_pmx_get_func_name,
 	.get_function_groups = imx_pmx_get_groups,
-	.enable = imx_pmx_enable,
+	.set_mux = imx_pmx_set,
 };
 
 static int imx_pinconf_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-imx1-core.c b/drivers/pinctrl/pinctrl-imx1-core.c
index 483420757c9f..176a3e62f1cf 100644
--- a/drivers/pinctrl/pinctrl-imx1-core.c
+++ b/drivers/pinctrl/pinctrl-imx1-core.c
@@ -298,8 +298,8 @@ static const struct pinctrl_ops imx1_pctrl_ops = {
 
 };
 
-static int imx1_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
+static int imx1_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+			unsigned group)
 {
 	struct imx1_pinctrl *ipctl = pinctrl_dev_get_drvdata(pctldev);
 	const struct imx1_pinctrl_soc_info *info = ipctl->info;
@@ -385,7 +385,7 @@ static const struct pinmux_ops imx1_pmx_ops = {
 	.get_functions_count = imx1_pmx_get_funcs_count,
 	.get_function_name = imx1_pmx_get_func_name,
 	.get_function_groups = imx1_pmx_get_groups,
-	.enable = imx1_pmx_enable,
+	.set_mux = imx1_pmx_set,
 };
 
 static int imx1_pinconf_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-lantiq.c b/drivers/pinctrl/pinctrl-lantiq.c
index d22ca252b80d..296e5b37f768 100644
--- a/drivers/pinctrl/pinctrl-lantiq.c
+++ b/drivers/pinctrl/pinctrl-lantiq.c
@@ -257,9 +257,9 @@ static int match_group_mux(const struct ltq_pin_group *grp,
 	return ret;
 }
 
-static int ltq_pmx_enable(struct pinctrl_dev *pctrldev,
-				unsigned func,
-				unsigned group)
+static int ltq_pmx_set(struct pinctrl_dev *pctrldev,
+		       unsigned func,
+		       unsigned group)
 {
 	struct ltq_pinmux_info *info = pinctrl_dev_get_drvdata(pctrldev);
 	const struct ltq_pin_group *pin_grp = &info->grps[group];
@@ -316,7 +316,7 @@ static const struct pinmux_ops ltq_pmx_ops = {
 	.get_functions_count	= ltq_pmx_func_count,
 	.get_function_name	= ltq_pmx_func_name,
 	.get_function_groups	= ltq_pmx_get_groups,
-	.enable			= ltq_pmx_enable,
+	.set_mux		= ltq_pmx_set,
 	.gpio_request_enable	= ltq_pmx_gpio_request_enable,
 };
 
diff --git a/drivers/pinctrl/pinctrl-mxs.c b/drivers/pinctrl/pinctrl-mxs.c
index 40c76f26998c..67035091f8fd 100644
--- a/drivers/pinctrl/pinctrl-mxs.c
+++ b/drivers/pinctrl/pinctrl-mxs.c
@@ -195,8 +195,8 @@ static int mxs_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int mxs_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			      unsigned group)
+static int mxs_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
+			       unsigned group)
 {
 	struct mxs_pinctrl_data *d = pinctrl_dev_get_drvdata(pctldev);
 	struct mxs_group *g = &d->soc->groups[group];
@@ -223,7 +223,7 @@ static const struct pinmux_ops mxs_pinmux_ops = {
 	.get_functions_count = mxs_pinctrl_get_funcs_count,
 	.get_function_name = mxs_pinctrl_get_func_name,
 	.get_function_groups = mxs_pinctrl_get_func_groups,
-	.enable = mxs_pinctrl_enable,
+	.set_mux = mxs_pinctrl_set_mux,
 };
 
 static int mxs_pinconf_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-palmas.c b/drivers/pinctrl/pinctrl-palmas.c
index f13d0e78a41c..e3079d3d19fe 100644
--- a/drivers/pinctrl/pinctrl-palmas.c
+++ b/drivers/pinctrl/pinctrl-palmas.c
@@ -685,7 +685,8 @@ static int palmas_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int palmas_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
+static int palmas_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+		unsigned function,
 		unsigned group)
 {
 	struct palmas_pctrl_chip_info *pci = pinctrl_dev_get_drvdata(pctldev);
@@ -742,7 +743,7 @@ static const struct pinmux_ops palmas_pinmux_ops = {
 	.get_functions_count = palmas_pinctrl_get_funcs_count,
 	.get_function_name = palmas_pinctrl_get_func_name,
 	.get_function_groups = palmas_pinctrl_get_func_groups,
-	.enable = palmas_pinctrl_enable,
+	.set_mux = palmas_pinctrl_set_mux,
 };
 
 static int palmas_pinconf_get(struct pinctrl_dev *pctldev,
diff --git a/drivers/pinctrl/pinctrl-rockchip.c b/drivers/pinctrl/pinctrl-rockchip.c
index 5e8b2e04cd7a..b91b966add8f 100644
--- a/drivers/pinctrl/pinctrl-rockchip.c
+++ b/drivers/pinctrl/pinctrl-rockchip.c
@@ -813,8 +813,8 @@ static int rockchip_pmx_get_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int rockchip_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-							    unsigned group)
+static int rockchip_pmx_set(struct pinctrl_dev *pctldev, unsigned selector,
+			    unsigned group)
 {
 	struct rockchip_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
 	const unsigned int *pins = info->groups[group].pins;
@@ -889,7 +889,7 @@ static const struct pinmux_ops rockchip_pmx_ops = {
 	.get_functions_count	= rockchip_pmx_get_funcs_count,
 	.get_function_name	= rockchip_pmx_get_func_name,
 	.get_function_groups	= rockchip_pmx_get_groups,
-	.enable			= rockchip_pmx_enable,
+	.set_mux		= rockchip_pmx_set,
 	.gpio_set_direction	= rockchip_pmx_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-single.c b/drivers/pinctrl/pinctrl-single.c
index 784de13facf3..9032422ad18f 100644
--- a/drivers/pinctrl/pinctrl-single.c
+++ b/drivers/pinctrl/pinctrl-single.c
@@ -447,7 +447,7 @@ static int pcs_get_function(struct pinctrl_dev *pctldev, unsigned pin,
 	return 0;
 }
 
-static int pcs_enable(struct pinctrl_dev *pctldev, unsigned fselector,
+static int pcs_set_mux(struct pinctrl_dev *pctldev, unsigned fselector,
 	unsigned group)
 {
 	struct pcs_device *pcs;
@@ -519,7 +519,7 @@ static const struct pinmux_ops pcs_pinmux_ops = {
 	.get_functions_count = pcs_get_functions_count,
 	.get_function_name = pcs_get_function_name,
 	.get_function_groups = pcs_get_function_groups,
-	.enable = pcs_enable,
+	.set_mux = pcs_sex_mux,
 	.gpio_request_enable = pcs_request_gpio,
 };
 
diff --git a/drivers/pinctrl/pinctrl-st.c b/drivers/pinctrl/pinctrl-st.c
index 5475374d803f..6c4c41bed1e3 100644
--- a/drivers/pinctrl/pinctrl-st.c
+++ b/drivers/pinctrl/pinctrl-st.c
@@ -914,8 +914,8 @@ static struct st_pio_control *st_get_pio_control(
 	return &bank->pc;
 }
 
-static int st_pmx_enable(struct pinctrl_dev *pctldev, unsigned fselector,
-		unsigned group)
+static int st_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned fselector,
+			unsigned group)
 {
 	struct st_pinctrl *info = pinctrl_dev_get_drvdata(pctldev);
 	struct st_pinconf *conf = info->groups[group].pin_conf;
@@ -951,7 +951,7 @@ static struct pinmux_ops st_pmxops = {
 	.get_functions_count	= st_pmx_get_funcs_count,
 	.get_function_name	= st_pmx_get_fname,
 	.get_function_groups	= st_pmx_get_groups,
-	.enable			= st_pmx_enable,
+	.set_mux		= st_pmx_set_mux,
 	.gpio_set_direction	= st_pmx_set_gpio_direction,
 };
 
diff --git a/drivers/pinctrl/pinctrl-tb10x.c b/drivers/pinctrl/pinctrl-tb10x.c
index 71c5d4f0c538..3b9bfcf717ac 100644
--- a/drivers/pinctrl/pinctrl-tb10x.c
+++ b/drivers/pinctrl/pinctrl-tb10x.c
@@ -697,7 +697,7 @@ static void tb10x_gpio_disable_free(struct pinctrl_dev *pctl,
 	mutex_unlock(&state->mutex);
 }
 
-static int tb10x_pctl_enable(struct pinctrl_dev *pctl,
+static int tb10x_pctl_set_mux(struct pinctrl_dev *pctl,
 			unsigned func_selector, unsigned group_selector)
 {
 	struct tb10x_pinctrl *state = pinctrl_dev_get_drvdata(pctl);
@@ -744,7 +744,7 @@ static struct pinmux_ops tb10x_pinmux_ops = {
 	.get_function_groups = tb10x_get_function_groups,
 	.gpio_request_enable = tb10x_gpio_request_enable,
 	.gpio_disable_free = tb10x_gpio_disable_free,
-	.enable = tb10x_pctl_enable,
+	.set_mux = tb10x_pctl_set_mux,
 };
 
 static struct pinctrl_desc tb10x_pindesc = {
diff --git a/drivers/pinctrl/pinctrl-tegra-xusb.c b/drivers/pinctrl/pinctrl-tegra-xusb.c
index a06620474845..4648c9e5c582 100644
--- a/drivers/pinctrl/pinctrl-tegra-xusb.c
+++ b/drivers/pinctrl/pinctrl-tegra-xusb.c
@@ -281,9 +281,9 @@ static int tegra_xusb_padctl_get_function_groups(struct pinctrl_dev *pinctrl,
 	return 0;
 }
 
-static int tegra_xusb_padctl_pinmux_enable(struct pinctrl_dev *pinctrl,
-					   unsigned int function,
-					   unsigned int group)
+static int tegra_xusb_padctl_pinmux_set(struct pinctrl_dev *pinctrl,
+					unsigned int function,
+					unsigned int group)
 {
 	struct tegra_xusb_padctl *padctl = pinctrl_dev_get_drvdata(pinctrl);
 	const struct tegra_xusb_padctl_lane *lane;
@@ -311,7 +311,7 @@ static const struct pinmux_ops tegra_xusb_padctl_pinmux_ops = {
 	.get_functions_count = tegra_xusb_padctl_get_functions_count,
 	.get_function_name = tegra_xusb_padctl_get_function_name,
 	.get_function_groups = tegra_xusb_padctl_get_function_groups,
-	.enable = tegra_xusb_padctl_pinmux_enable,
+	.set_mux = tegra_xusb_padctl_pinmux_set,
 };
 
 static int tegra_xusb_padctl_pinconf_group_get(struct pinctrl_dev *pinctrl,
diff --git a/drivers/pinctrl/pinctrl-tegra.c b/drivers/pinctrl/pinctrl-tegra.c
index 150af5503c09..e5949d51bc52 100644
--- a/drivers/pinctrl/pinctrl-tegra.c
+++ b/drivers/pinctrl/pinctrl-tegra.c
@@ -262,8 +262,9 @@ static int tegra_pinctrl_get_func_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int tegra_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
-			       unsigned group)
+static int tegra_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+				 unsigned function,
+				 unsigned group)
 {
 	struct tegra_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
 	const struct tegra_pingroup *g;
@@ -294,7 +295,7 @@ static const struct pinmux_ops tegra_pinmux_ops = {
 	.get_functions_count = tegra_pinctrl_get_funcs_count,
 	.get_function_name = tegra_pinctrl_get_func_name,
 	.get_function_groups = tegra_pinctrl_get_func_groups,
-	.enable = tegra_pinctrl_enable,
+	.set_mux = tegra_pinctrl_set_mux,
 };
 
 static int tegra_pinconf_reg(struct tegra_pmx *pmx,
diff --git a/drivers/pinctrl/pinctrl-tz1090-pdc.c b/drivers/pinctrl/pinctrl-tz1090-pdc.c
index 41e81a35cabb..3bb6a3b78864 100644
--- a/drivers/pinctrl/pinctrl-tz1090-pdc.c
+++ b/drivers/pinctrl/pinctrl-tz1090-pdc.c
@@ -547,8 +547,9 @@ static void tz1090_pdc_pinctrl_mux(struct tz1090_pdc_pmx *pmx,
 	__global_unlock2(flags);
 }
 
-static int tz1090_pdc_pinctrl_enable(struct pinctrl_dev *pctldev,
-				     unsigned int function, unsigned int group)
+static int tz1090_pdc_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+				      unsigned int function,
+				      unsigned int group)
 {
 	struct tz1090_pdc_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
 	const struct tz1090_pdc_pingroup *grp = &tz1090_pdc_groups[group];
@@ -634,7 +635,7 @@ static struct pinmux_ops tz1090_pdc_pinmux_ops = {
 	.get_functions_count	= tz1090_pdc_pinctrl_get_funcs_count,
 	.get_function_name	= tz1090_pdc_pinctrl_get_func_name,
 	.get_function_groups	= tz1090_pdc_pinctrl_get_func_groups,
-	.enable			= tz1090_pdc_pinctrl_enable,
+	.set_mux		= tz1090_pdc_pinctrl_set_mux,
 	.gpio_request_enable	= tz1090_pdc_pinctrl_gpio_request_enable,
 	.gpio_disable_free	= tz1090_pdc_pinctrl_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-tz1090.c b/drivers/pinctrl/pinctrl-tz1090.c
index 24082216842e..48d36413b99f 100644
--- a/drivers/pinctrl/pinctrl-tz1090.c
+++ b/drivers/pinctrl/pinctrl-tz1090.c
@@ -1415,8 +1415,8 @@ found_mux:
  * the effect is the same as enabling the function on each individual pin in the
  * group.
  */
-static int tz1090_pinctrl_enable(struct pinctrl_dev *pctldev,
-				 unsigned int function, unsigned int group)
+static int tz1090_pinctrl_set_mux(struct pinctrl_dev *pctldev,
+				  unsigned int function, unsigned int group)
 {
 	struct tz1090_pmx *pmx = pinctrl_dev_get_drvdata(pctldev);
 	struct tz1090_pingroup *grp;
@@ -1517,7 +1517,7 @@ static struct pinmux_ops tz1090_pinmux_ops = {
 	.get_functions_count	= tz1090_pinctrl_get_funcs_count,
 	.get_function_name	= tz1090_pinctrl_get_func_name,
 	.get_function_groups	= tz1090_pinctrl_get_func_groups,
-	.enable			= tz1090_pinctrl_enable,
+	.set_mux		= tz1090_pinctrl_set_mux,
 	.gpio_request_enable	= tz1090_pinctrl_gpio_request_enable,
 	.gpio_disable_free	= tz1090_pinctrl_gpio_disable_free,
 };
diff --git a/drivers/pinctrl/pinctrl-u300.c b/drivers/pinctrl/pinctrl-u300.c
index 0959bb36450f..e9c7113d81f2 100644
--- a/drivers/pinctrl/pinctrl-u300.c
+++ b/drivers/pinctrl/pinctrl-u300.c
@@ -955,8 +955,8 @@ static void u300_pmx_endisable(struct u300_pmx *upmx, unsigned selector,
 	}
 }
 
-static int u300_pmx_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			   unsigned group)
+static int u300_pmx_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
+			    unsigned group)
 {
 	struct u300_pmx *upmx;
 
@@ -994,7 +994,7 @@ static const struct pinmux_ops u300_pmx_ops = {
 	.get_functions_count = u300_pmx_get_funcs_count,
 	.get_function_name = u300_pmx_get_func_name,
 	.get_function_groups = u300_pmx_get_groups,
-	.enable = u300_pmx_enable,
+	.set_mux = u300_pmx_set_mux,
 };
 
 static int u300_pin_config_get(struct pinctrl_dev *pctldev, unsigned pin,
diff --git a/drivers/pinctrl/pinmux.c b/drivers/pinctrl/pinmux.c
index c055daf9a80f..b874458dcb88 100644
--- a/drivers/pinctrl/pinmux.c
+++ b/drivers/pinctrl/pinmux.c
@@ -41,7 +41,7 @@ int pinmux_check_ops(struct pinctrl_dev *pctldev)
 	    !ops->get_functions_count ||
 	    !ops->get_function_name ||
 	    !ops->get_function_groups ||
-	    !ops->enable) {
+	    !ops->set_mux) {
 		dev_err(pctldev->dev, "pinmux ops lacks necessary functions\n");
 		return -EINVAL;
 	}
@@ -445,15 +445,15 @@ int pinmux_enable_setting(struct pinctrl_setting const *setting)
 		desc->mux_setting = &(setting->data.mux);
 	}
 
-	ret = ops->enable(pctldev, setting->data.mux.func,
-			  setting->data.mux.group);
+	ret = ops->set_mux(pctldev, setting->data.mux.func,
+			   setting->data.mux.group);
 
 	if (ret)
-		goto err_enable;
+		goto err_set_mux;
 
 	return 0;
 
-err_enable:
+err_set_mux:
 	for (i = 0; i < num_pins; i++) {
 		desc = pin_desc_get(pctldev, pins[i]);
 		if (desc)
diff --git a/drivers/pinctrl/qcom/pinctrl-msm.c b/drivers/pinctrl/qcom/pinctrl-msm.c
index 80a64cad907d..01eab47a746f 100644
--- a/drivers/pinctrl/qcom/pinctrl-msm.c
+++ b/drivers/pinctrl/qcom/pinctrl-msm.c
@@ -134,9 +134,9 @@ static int msm_get_function_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int msm_pinmux_enable(struct pinctrl_dev *pctldev,
-			     unsigned function,
-			     unsigned group)
+static int msm_pinmux_set_mux(struct pinctrl_dev *pctldev,
+			      unsigned function,
+			      unsigned group)
 {
 	struct msm_pinctrl *pctrl = pinctrl_dev_get_drvdata(pctldev);
 	const struct msm_pingroup *g;
@@ -170,7 +170,7 @@ static const struct pinmux_ops msm_pinmux_ops = {
 	.get_functions_count	= msm_get_functions_count,
 	.get_function_name	= msm_get_function_name,
 	.get_function_groups	= msm_get_function_groups,
-	.enable			= msm_pinmux_enable,
+	.set_mux		= msm_pinmux_set_mux,
 };
 
 static int msm_config_reg(struct msm_pinctrl *pctrl,
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos5440.c b/drivers/pinctrl/samsung/pinctrl-exynos5440.c
index 603da2f9dd95..b995ec2c5d16 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos5440.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos5440.c
@@ -364,8 +364,9 @@ static void exynos5440_pinmux_setup(struct pinctrl_dev *pctldev, unsigned select
 }
 
 /* enable a specified pinmux by writing to registers */
-static int exynos5440_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector,
-					unsigned group)
+static int exynos5440_pinmux_set_mux(struct pinctrl_dev *pctldev,
+				     unsigned selector,
+				     unsigned group)
 {
 	exynos5440_pinmux_setup(pctldev, selector, group, true);
 	return 0;
@@ -387,7 +388,7 @@ static const struct pinmux_ops exynos5440_pinmux_ops = {
 	.get_functions_count	= exynos5440_get_functions_count,
 	.get_function_name	= exynos5440_pinmux_get_fname,
 	.get_function_groups	= exynos5440_pinmux_get_groups,
-	.enable			= exynos5440_pinmux_enable,
+	.set_mux		= exynos5440_pinmux_set_mux,
 	.gpio_set_direction	= exynos5440_pinmux_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/samsung/pinctrl-samsung.c b/drivers/pinctrl/samsung/pinctrl-samsung.c
index b07406da333c..4a47691c32b1 100644
--- a/drivers/pinctrl/samsung/pinctrl-samsung.c
+++ b/drivers/pinctrl/samsung/pinctrl-samsung.c
@@ -401,8 +401,9 @@ static void samsung_pinmux_setup(struct pinctrl_dev *pctldev, unsigned selector,
 }
 
 /* enable a specified pinmux by writing to registers */
-static int samsung_pinmux_enable(struct pinctrl_dev *pctldev, unsigned selector,
-					unsigned group)
+static int samsung_pinmux_set_mux(struct pinctrl_dev *pctldev,
+				  unsigned selector,
+				  unsigned group)
 {
 	samsung_pinmux_setup(pctldev, selector, group, true);
 	return 0;
@@ -413,7 +414,7 @@ static const struct pinmux_ops samsung_pinmux_ops = {
 	.get_functions_count	= samsung_get_functions_count,
 	.get_function_name	= samsung_pinmux_get_fname,
 	.get_function_groups	= samsung_pinmux_get_groups,
-	.enable			= samsung_pinmux_enable,
+	.set_mux		= samsung_pinmux_set_mux,
 };
 
 /* set or get the pin config settings for a specified pin */
diff --git a/drivers/pinctrl/sh-pfc/pinctrl.c b/drivers/pinctrl/sh-pfc/pinctrl.c
index 11db3ee39d40..910deaefa0ac 100644
--- a/drivers/pinctrl/sh-pfc/pinctrl.c
+++ b/drivers/pinctrl/sh-pfc/pinctrl.c
@@ -312,8 +312,8 @@ static int sh_pfc_get_function_groups(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int sh_pfc_func_enable(struct pinctrl_dev *pctldev, unsigned selector,
-			      unsigned group)
+static int sh_pfc_func_set_mux(struct pinctrl_dev *pctldev, unsigned selector,
+			       unsigned group)
 {
 	struct sh_pfc_pinctrl *pmx = pinctrl_dev_get_drvdata(pctldev);
 	struct sh_pfc *pfc = pmx->pfc;
@@ -442,7 +442,7 @@ static const struct pinmux_ops sh_pfc_pinmux_ops = {
 	.get_functions_count	= sh_pfc_get_functions_count,
 	.get_function_name	= sh_pfc_get_function_name,
 	.get_function_groups	= sh_pfc_get_function_groups,
-	.enable			= sh_pfc_func_enable,
+	.set_mux		= sh_pfc_func_set_mux,
 	.gpio_request_enable	= sh_pfc_gpio_request_enable,
 	.gpio_disable_free	= sh_pfc_gpio_disable_free,
 	.gpio_set_direction	= sh_pfc_gpio_set_direction,
diff --git a/drivers/pinctrl/sirf/pinctrl-sirf.c b/drivers/pinctrl/sirf/pinctrl-sirf.c
index 45f93f952a39..6f252fd7cb0c 100644
--- a/drivers/pinctrl/sirf/pinctrl-sirf.c
+++ b/drivers/pinctrl/sirf/pinctrl-sirf.c
@@ -179,8 +179,9 @@ static void sirfsoc_pinmux_endisable(struct sirfsoc_pmx *spmx,
 	}
 }
 
-static int sirfsoc_pinmux_enable(struct pinctrl_dev *pmxdev, unsigned selector,
-	unsigned group)
+static int sirfsoc_pinmux_set_mux(struct pinctrl_dev *pmxdev,
+				unsigned selector,
+				unsigned group)
 {
 	struct sirfsoc_pmx *spmx;
 
@@ -237,7 +238,7 @@ static int sirfsoc_pinmux_request_gpio(struct pinctrl_dev *pmxdev,
 }
 
 static struct pinmux_ops sirfsoc_pinmux_ops = {
-	.enable = sirfsoc_pinmux_enable,
+	.set_mux = sirfsoc_pinmux_set_mux,
 	.get_functions_count = sirfsoc_pinmux_get_funcs_count,
 	.get_function_name = sirfsoc_pinmux_get_func_name,
 	.get_function_groups = sirfsoc_pinmux_get_groups,
diff --git a/drivers/pinctrl/spear/pinctrl-spear.c b/drivers/pinctrl/spear/pinctrl-spear.c
index f72cc4e192bd..abdb05ac43dc 100644
--- a/drivers/pinctrl/spear/pinctrl-spear.c
+++ b/drivers/pinctrl/spear/pinctrl-spear.c
@@ -268,7 +268,7 @@ static int spear_pinctrl_endisable(struct pinctrl_dev *pctldev,
 	return 0;
 }
 
-static int spear_pinctrl_enable(struct pinctrl_dev *pctldev, unsigned function,
+static int spear_pinctrl_set_mux(struct pinctrl_dev *pctldev, unsigned function,
 		unsigned group)
 {
 	return spear_pinctrl_endisable(pctldev, function, group, true);
@@ -338,7 +338,7 @@ static const struct pinmux_ops spear_pinmux_ops = {
 	.get_functions_count = spear_pinctrl_get_funcs_count,
 	.get_function_name = spear_pinctrl_get_func_name,
 	.get_function_groups = spear_pinctrl_get_func_groups,
-	.enable = spear_pinctrl_enable,
+	.set_mux = spear_pinctrl_set_mux,
 	.gpio_request_enable = gpio_request_enable,
 	.gpio_disable_free = gpio_disable_free,
 };
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index 3df66e366c87..ef9d804e55de 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -393,9 +393,9 @@ static void sunxi_pmx_set(struct pinctrl_dev *pctldev,
 	spin_unlock_irqrestore(&pctl->lock, flags);
 }
 
-static int sunxi_pmx_enable(struct pinctrl_dev *pctldev,
-			    unsigned function,
-			    unsigned group)
+static int sunxi_pmx_set_mux(struct pinctrl_dev *pctldev,
+			     unsigned function,
+			     unsigned group)
 {
 	struct sunxi_pinctrl *pctl = pinctrl_dev_get_drvdata(pctldev);
 	struct sunxi_pinctrl_group *g = pctl->groups + group;
@@ -441,7 +441,7 @@ static const struct pinmux_ops sunxi_pmx_ops = {
 	.get_functions_count	= sunxi_pmx_get_funcs_cnt,
 	.get_function_name	= sunxi_pmx_get_func_name,
 	.get_function_groups	= sunxi_pmx_get_func_groups,
-	.enable			= sunxi_pmx_enable,
+	.set_mux		= sunxi_pmx_set_mux,
 	.gpio_set_direction	= sunxi_pmx_gpio_set_direction,
 };
 
diff --git a/drivers/pinctrl/vt8500/pinctrl-wmt.c b/drivers/pinctrl/vt8500/pinctrl-wmt.c
index 8cea355f9a81..d055d63309e4 100644
--- a/drivers/pinctrl/vt8500/pinctrl-wmt.c
+++ b/drivers/pinctrl/vt8500/pinctrl-wmt.c
@@ -131,9 +131,9 @@ static int wmt_set_pinmux(struct wmt_pinctrl_data *data, unsigned func,
 	return 0;
 }
 
-static int wmt_pmx_enable(struct pinctrl_dev *pctldev,
-			  unsigned func_selector,
-			  unsigned group_selector)
+static int wmt_pmx_set_mux(struct pinctrl_dev *pctldev,
+			   unsigned func_selector,
+			   unsigned group_selector)
 {
 	struct wmt_pinctrl_data *data = pinctrl_dev_get_drvdata(pctldev);
 	u32 pinnum = data->pins[group_selector].number;
@@ -168,7 +168,7 @@ static struct pinmux_ops wmt_pinmux_ops = {
 	.get_functions_count = wmt_pmx_get_functions_count,
 	.get_function_name = wmt_pmx_get_function_name,
 	.get_function_groups = wmt_pmx_get_function_groups,
-	.enable = wmt_pmx_enable,
+	.set_mux = wmt_pmx_set_mux,
 	.gpio_disable_free = wmt_pmx_gpio_disable_free,
 	.gpio_set_direction = wmt_pmx_gpio_set_direction,
 };
diff --git a/include/linux/pinctrl/pinmux.h b/include/linux/pinctrl/pinmux.h
index 3097aafbeb24..511bda9ed4bf 100644
--- a/include/linux/pinctrl/pinmux.h
+++ b/include/linux/pinctrl/pinmux.h
@@ -39,13 +39,12 @@ struct pinctrl_dev;
  *	name can be used with the generic @pinctrl_ops to retrieve the
  *	actual pins affected. The applicable groups will be returned in
  *	@groups and the number of groups in @num_groups
- * @enable: enable a certain muxing function with a certain pin group. The
+ * @set_mux: enable a certain muxing function with a certain pin group. The
  *	driver does not need to figure out whether enabling this function
  *	conflicts some other use of the pins in that group, such collisions
  *	are handled by the pinmux subsystem. The @func_selector selects a
  *	certain function whereas @group_selector selects a certain set of pins
  *	to be used. On simple controllers the latter argument may be ignored
- * @disable: disable a certain muxing selector with a certain pin group
  * @gpio_request_enable: requests and enables GPIO on a certain pin.
  *	Implement this only if you can mux every pin individually as GPIO. The
  *	affected GPIO range is passed along with an offset(pin number) into that
@@ -68,8 +67,8 @@ struct pinmux_ops {
 				  unsigned selector,
 				  const char * const **groups,
 				  unsigned * const num_groups);
-	int (*enable) (struct pinctrl_dev *pctldev, unsigned func_selector,
-		       unsigned group_selector);
+	int (*set_mux) (struct pinctrl_dev *pctldev, unsigned func_selector,
+			unsigned group_selector);
 	int (*gpio_request_enable) (struct pinctrl_dev *pctldev,
 				    struct pinctrl_gpio_range *range,
 				    unsigned offset);
-- 
cgit v1.2.3


From 3af0dbd592fe0a92002f16e341519ba03e92adf7 Mon Sep 17 00:00:00 2001
From: Sonic Zhang <sonic.zhang@analog.com>
Date: Mon, 1 Sep 2014 11:19:52 +0800
Subject: gpio: mcp23s08 to support both device tree and platform data

Device tree is not enabled in some architecture where gpio
driver mcp23s08 is still required.

v2-changes:
- Parse device tree properties into platform data other than
  individual variables.
v3-changes:
- Use of_node in gpio_chip device structure, because the
  struct device * always has an of_node which is NULL when
  OF is not used.

Signed-off-by: Sonic Zhang <sonic.zhang@analog.com>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/Kconfig         |  1 -
 drivers/gpio/gpio-mcp23s08.c | 64 +++++++++++++++++++++++---------------------
 include/linux/spi/mcp23s08.h | 18 +++++++++++++
 3 files changed, 52 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index ec27ec0be8c2..ec398bee7c60 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -806,7 +806,6 @@ config GPIO_MAX7301
 
 config GPIO_MCP23S08
 	tristate "Microchip MCP23xxx I/O expander"
-	depends on OF_GPIO
 	depends on (SPI_MASTER && !I2C) || I2C
 	help
 	  SPI/I2C driver for Microchip MCP23S08/MCP23S17/MCP23008/MCP23017
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index 6f183d9b487e..8488e2fd307c 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -479,7 +479,7 @@ static int mcp23s08_irq_setup(struct mcp23s08 *mcp)
 
 	mutex_init(&mcp->irq_lock);
 
-	mcp->irq_domain = irq_domain_add_linear(chip->of_node, chip->ngpio,
+	mcp->irq_domain = irq_domain_add_linear(chip->dev->of_node, chip->ngpio,
 						&irq_domain_simple_ops, mcp);
 	if (!mcp->irq_domain)
 		return -ENODEV;
@@ -581,7 +581,7 @@ done:
 
 static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 			      void *data, unsigned addr, unsigned type,
-			      unsigned base, unsigned pullups)
+			      struct mcp23s08_platform_data *pdata, int cs)
 {
 	int status;
 	bool mirror = false;
@@ -635,7 +635,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 		return -EINVAL;
 	}
 
-	mcp->chip.base = base;
+	mcp->chip.base = pdata->base;
 	mcp->chip.can_sleep = true;
 	mcp->chip.dev = dev;
 	mcp->chip.owner = THIS_MODULE;
@@ -648,11 +648,9 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 	if (status < 0)
 		goto fail;
 
-	mcp->irq_controller = of_property_read_bool(mcp->chip.of_node,
-						"interrupt-controller");
+	mcp->irq_controller = pdata->irq_controller;
 	if (mcp->irq && mcp->irq_controller && (type == MCP_TYPE_017))
-		mirror = of_property_read_bool(mcp->chip.of_node,
-						"microchip,irq-mirror");
+		mirror = pdata->mirror;
 
 	if ((status & IOCON_SEQOP) || !(status & IOCON_HAEN) || mirror) {
 		/* mcp23s17 has IOCON twice, make sure they are in sync */
@@ -668,7 +666,7 @@ static int mcp23s08_probe_one(struct mcp23s08 *mcp, struct device *dev,
 	}
 
 	/* configure ~100K pullups */
-	status = mcp->ops->write(mcp, MCP_GPPU, pullups);
+	status = mcp->ops->write(mcp, MCP_GPPU, pdata->chip[cs].pullups);
 	if (status < 0)
 		goto fail;
 
@@ -768,25 +766,29 @@ MODULE_DEVICE_TABLE(of, mcp23s08_i2c_of_match);
 static int mcp230xx_probe(struct i2c_client *client,
 				    const struct i2c_device_id *id)
 {
-	struct mcp23s08_platform_data *pdata;
+	struct mcp23s08_platform_data *pdata, local_pdata;
 	struct mcp23s08 *mcp;
-	int status, base, pullups;
+	int status;
 	const struct of_device_id *match;
 
 	match = of_match_device(of_match_ptr(mcp23s08_i2c_of_match),
 					&client->dev);
-	pdata = dev_get_platdata(&client->dev);
-	if (match || !pdata) {
-		base = -1;
-		pullups = 0;
+	if (match) {
+		pdata = &local_pdata;
+		pdata->base = -1;
+		pdata->chip[0].pullups = 0;
+		pdata->irq_controller =	of_property_read_bool(
+					client->dev.of_node,
+					"interrupt-controller");
+		pdata->mirror = of_property_read_bool(client->dev.of_node,
+						      "microchip,irq-mirror");
 		client->irq = irq_of_parse_and_map(client->dev.of_node, 0);
 	} else {
-		if (!gpio_is_valid(pdata->base)) {
+		pdata = dev_get_platdata(&client->dev);
+		if (!pdata || !gpio_is_valid(pdata->base)) {
 			dev_dbg(&client->dev, "invalid platform data\n");
 			return -EINVAL;
 		}
-		base = pdata->base;
-		pullups = pdata->chip[0].pullups;
 	}
 
 	mcp = kzalloc(sizeof(*mcp), GFP_KERNEL);
@@ -795,7 +797,7 @@ static int mcp230xx_probe(struct i2c_client *client,
 
 	mcp->irq = client->irq;
 	status = mcp23s08_probe_one(mcp, &client->dev, client, client->addr,
-				    id->driver_data, base, pullups);
+				    id->driver_data, pdata, 0);
 	if (status)
 		goto fail;
 
@@ -863,14 +865,12 @@ static void mcp23s08_i2c_exit(void) { }
 
 static int mcp23s08_probe(struct spi_device *spi)
 {
-	struct mcp23s08_platform_data	*pdata;
+	struct mcp23s08_platform_data	*pdata, local_pdata;
 	unsigned			addr;
 	int				chips = 0;
 	struct mcp23s08_driver_data	*data;
 	int				status, type;
-	unsigned			base = -1,
-					ngpio = 0,
-					pullups[ARRAY_SIZE(pdata->chip)];
+	unsigned			ngpio = 0;
 	const struct			of_device_id *match;
 	u32				spi_present_mask = 0;
 
@@ -893,11 +893,18 @@ static int mcp23s08_probe(struct spi_device *spi)
 			return -ENODEV;
 		}
 
+		pdata = &local_pdata;
+		pdata->base = -1;
 		for (addr = 0; addr < ARRAY_SIZE(pdata->chip); addr++) {
-			pullups[addr] = 0;
+			pdata->chip[addr].pullups = 0;
 			if (spi_present_mask & (1 << addr))
 				chips++;
 		}
+		pdata->irq_controller =	of_property_read_bool(
+					spi->dev.of_node,
+					"interrupt-controller");
+		pdata->mirror = of_property_read_bool(spi->dev.of_node,
+						      "microchip,irq-mirror");
 	} else {
 		type = spi_get_device_id(spi)->driver_data;
 		pdata = dev_get_platdata(&spi->dev);
@@ -917,10 +924,7 @@ static int mcp23s08_probe(struct spi_device *spi)
 				return -EINVAL;
 			}
 			spi_present_mask |= 1 << addr;
-			pullups[addr] = pdata->chip[addr].pullups;
 		}
-
-		base = pdata->base;
 	}
 
 	if (!chips)
@@ -938,13 +942,13 @@ static int mcp23s08_probe(struct spi_device *spi)
 		chips--;
 		data->mcp[addr] = &data->chip[chips];
 		status = mcp23s08_probe_one(data->mcp[addr], &spi->dev, spi,
-					    0x40 | (addr << 1), type, base,
-					    pullups[addr]);
+					    0x40 | (addr << 1), type, pdata,
+					    addr);
 		if (status < 0)
 			goto fail;
 
-		if (base != -1)
-			base += (type == MCP_TYPE_S17) ? 16 : 8;
+		if (pdata->base != -1)
+			pdata->base += (type == MCP_TYPE_S17) ? 16 : 8;
 		ngpio += (type == MCP_TYPE_S17) ? 16 : 8;
 	}
 	data->ngpio = ngpio;
diff --git a/include/linux/spi/mcp23s08.h b/include/linux/spi/mcp23s08.h
index 2d676d5aaa89..aa07d7b32568 100644
--- a/include/linux/spi/mcp23s08.h
+++ b/include/linux/spi/mcp23s08.h
@@ -22,4 +22,22 @@ struct mcp23s08_platform_data {
 	 * base to base+15 (or base+31 for s17 variant).
 	 */
 	unsigned	base;
+	/* Marks the device as a interrupt controller.
+	 * NOTE: The interrupt functionality is only supported for i2c
+	 * versions of the chips. The spi chips can also do the interrupts,
+	 * but this is not supported by the linux driver yet.
+	 */
+	bool		irq_controller;
+
+	/* Sets the mirror flag in the IOCON register. Devices
+	 * with two interrupt outputs (these are the devices ending with 17 and
+	 * those that have 16 IOs) have two IO banks: IO 0-7 form bank 1 and
+	 * IO 8-15 are bank 2. These chips have two different interrupt outputs:
+	 * One for bank 1 and another for bank 2. If irq-mirror is set, both
+	 * interrupts are generated regardless of the bank that an input change
+	 * occurred on. If it is not set, the interrupt are only generated for
+	 * the bank they belong to.
+	 * On devices with only one interrupt output this property is useless.
+	 */
+	bool		mirror;
 };
-- 
cgit v1.2.3


From 0d37899363b0e5486f8800231b7edd75e8b60942 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 3 Sep 2014 20:01:55 +0200
Subject: pinctrl: generic: Fix PIN_CONFIG_DRIVE_OPEN_SOURCE source/drain doc
 mismatch

PIN_CONFIG_DRIVE_OPEN_SOURCE enables open source, not open drain.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 include/linux/pinctrl/pinconf-generic.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h
index a15f10727eb8..d578a60eff23 100644
--- a/include/linux/pinctrl/pinconf-generic.h
+++ b/include/linux/pinctrl/pinconf-generic.h
@@ -57,7 +57,7 @@
  *	which are then pulled up with an external resistor. Setting this
  *	config will enable open drain mode, the argument is ignored.
  * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source
- *	(open emitter). Setting this config will enable open drain mode, the
+ *	(open emitter). Setting this config will enable open source mode, the
  *	argument is ignored.
  * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current
  *	passed as argument. The argument is in mA.
-- 
cgit v1.2.3


From 87fed556d08d21dd7dd3e0222c94c187e4c2d5e2 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Wed, 3 Sep 2014 10:35:13 +0200
Subject: bcma: get info about flash type SoC booted from
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There is an ongoing work on cleaning MIPS's nvram support so it could be
re-used on other platforms (bcm53xx to say precisely).
This will require a bit of extra logic in bcma this patch implements.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_mips.c     | 62 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/bcma/bcma_regs.h |  5 ++++
 2 files changed, 67 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 11115bbe115c..004d6aa671ce 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -21,6 +21,14 @@
 #include <linux/serial_reg.h>
 #include <linux/time.h>
 
+enum bcma_boot_dev {
+	BCMA_BOOT_DEV_UNK = 0,
+	BCMA_BOOT_DEV_ROM,
+	BCMA_BOOT_DEV_PARALLEL,
+	BCMA_BOOT_DEV_SERIAL,
+	BCMA_BOOT_DEV_NAND,
+};
+
 static const char * const part_probes[] = { "bcm47xxpart", NULL };
 
 static struct physmap_flash_data bcma_pflash_data = {
@@ -229,11 +237,51 @@ u32 bcma_cpu_clock(struct bcma_drv_mips *mcore)
 }
 EXPORT_SYMBOL(bcma_cpu_clock);
 
+static enum bcma_boot_dev bcma_boot_dev(struct bcma_bus *bus)
+{
+	struct bcma_drv_cc *cc = &bus->drv_cc;
+	u8 cc_rev = cc->core->id.rev;
+
+	if (cc_rev == 42) {
+		struct bcma_device *core;
+
+		core = bcma_find_core(bus, BCMA_CORE_NS_ROM);
+		if (core) {
+			switch (bcma_aread32(core, BCMA_IOST) &
+				BCMA_NS_ROM_IOST_BOOT_DEV_MASK) {
+			case BCMA_NS_ROM_IOST_BOOT_DEV_NOR:
+				return BCMA_BOOT_DEV_SERIAL;
+			case BCMA_NS_ROM_IOST_BOOT_DEV_NAND:
+				return BCMA_BOOT_DEV_NAND;
+			case BCMA_NS_ROM_IOST_BOOT_DEV_ROM:
+			default:
+				return BCMA_BOOT_DEV_ROM;
+			}
+		}
+	} else {
+		if (cc_rev == 38) {
+			if (cc->status & BCMA_CC_CHIPST_5357_NAND_BOOT)
+				return BCMA_BOOT_DEV_NAND;
+			else if (cc->status & BIT(5))
+				return BCMA_BOOT_DEV_ROM;
+		}
+
+		if ((cc->capabilities & BCMA_CC_CAP_FLASHT) ==
+		    BCMA_CC_FLASHT_PARA)
+			return BCMA_BOOT_DEV_PARALLEL;
+		else
+			return BCMA_BOOT_DEV_SERIAL;
+	}
+
+	return BCMA_BOOT_DEV_SERIAL;
+}
+
 static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 {
 	struct bcma_bus *bus = mcore->core->bus;
 	struct bcma_drv_cc *cc = &bus->drv_cc;
 	struct bcma_pflash *pflash = &cc->pflash;
+	enum bcma_boot_dev boot_dev;
 
 	switch (cc->capabilities & BCMA_CC_CAP_FLASHT) {
 	case BCMA_CC_FLASHT_STSER:
@@ -269,6 +317,20 @@ static void bcma_core_mips_flash_detect(struct bcma_drv_mips *mcore)
 			bcma_nflash_init(cc);
 		}
 	}
+
+	/* Determine flash type this SoC boots from */
+	boot_dev = bcma_boot_dev(bus);
+	switch (boot_dev) {
+	case BCMA_BOOT_DEV_PARALLEL:
+	case BCMA_BOOT_DEV_SERIAL:
+		/* TODO: Init NVRAM using BCMA_SOC_FLASH2 window */
+		break;
+	case BCMA_BOOT_DEV_NAND:
+		/* TODO: Init NVRAM using BCMA_SOC_FLASH1 window */
+		break;
+	default:
+		break;
+	}
 }
 
 void bcma_core_mips_early_init(struct bcma_drv_mips *mcore)
diff --git a/include/linux/bcma/bcma_regs.h b/include/linux/bcma/bcma_regs.h
index 917dcd7965e7..e64ae7bf80a1 100644
--- a/include/linux/bcma/bcma_regs.h
+++ b/include/linux/bcma/bcma_regs.h
@@ -39,6 +39,11 @@
 #define  BCMA_RESET_CTL_RESET		0x0001
 #define BCMA_RESET_ST			0x0804
 
+#define BCMA_NS_ROM_IOST_BOOT_DEV_MASK	0x0003
+#define BCMA_NS_ROM_IOST_BOOT_DEV_NOR	0x0000
+#define BCMA_NS_ROM_IOST_BOOT_DEV_NAND	0x0001
+#define BCMA_NS_ROM_IOST_BOOT_DEV_ROM	0x0002
+
 /* BCMA PCI config space registers. */
 #define BCMA_PCI_PMCSR			0x44
 #define  BCMA_PCI_PE			0x100
-- 
cgit v1.2.3


From 8d38821cbcf51292cd5a23469d03bd38932a3ba9 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Fri, 1 Aug 2014 14:15:10 +0200
Subject: resources: Add device-managed request/release_resource()

Provide device-managed implementations of the request_resource() and
release_resource() functions.  Upon failure to request a resource, the new
devm_request_resource() function will output an error message for
consistent error reporting.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Tejun Heo <tj@kernel.org>
---
 Documentation/driver-model/devres.txt |  2 +
 include/linux/ioport.h                |  5 +++
 kernel/resource.c                     | 70 +++++++++++++++++++++++++++++++++++
 3 files changed, 77 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index d14710b04439..befc3fe12ba6 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -264,8 +264,10 @@ IIO
 IO region
   devm_release_mem_region()
   devm_release_region()
+  devm_release_resource()
   devm_request_mem_region()
   devm_request_region()
+  devm_request_resource()
 
 IOMAP
   devm_ioport_map()
diff --git a/include/linux/ioport.h b/include/linux/ioport.h
index 142ec544167c..2c5250222278 100644
--- a/include/linux/ioport.h
+++ b/include/linux/ioport.h
@@ -215,6 +215,11 @@ static inline int __deprecated check_region(resource_size_t s,
 
 /* Wrappers for managed devices */
 struct device;
+
+extern int devm_request_resource(struct device *dev, struct resource *root,
+				 struct resource *new);
+extern void devm_release_resource(struct device *dev, struct resource *new);
+
 #define devm_request_region(dev,start,n,name) \
 	__devm_request_region(dev, &ioport_resource, (start), (n), (name))
 #define devm_request_mem_region(dev,start,n,name) \
diff --git a/kernel/resource.c b/kernel/resource.c
index da14b8d09296..ca24f19f9d18 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1248,6 +1248,76 @@ int release_mem_region_adjustable(struct resource *parent,
 /*
  * Managed region resource
  */
+static void devm_resource_release(struct device *dev, void *ptr)
+{
+	struct resource **r = ptr;
+
+	release_resource(*r);
+}
+
+/**
+ * devm_request_resource() - request and reserve an I/O or memory resource
+ * @dev: device for which to request the resource
+ * @root: root of the resource tree from which to request the resource
+ * @new: descriptor of the resource to request
+ *
+ * This is a device-managed version of request_resource(). There is usually
+ * no need to release resources requested by this function explicitly since
+ * that will be taken care of when the device is unbound from its driver.
+ * If for some reason the resource needs to be released explicitly, because
+ * of ordering issues for example, drivers must call devm_release_resource()
+ * rather than the regular release_resource().
+ *
+ * When a conflict is detected between any existing resources and the newly
+ * requested resource, an error message will be printed.
+ *
+ * Returns 0 on success or a negative error code on failure.
+ */
+int devm_request_resource(struct device *dev, struct resource *root,
+			  struct resource *new)
+{
+	struct resource *conflict, **ptr;
+
+	ptr = devres_alloc(devm_resource_release, sizeof(*ptr), GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	*ptr = new;
+
+	conflict = request_resource_conflict(root, new);
+	if (conflict) {
+		dev_err(dev, "resource collision: %pR conflicts with %s %pR\n",
+			new, conflict->name, conflict);
+		devres_free(ptr);
+		return -EBUSY;
+	}
+
+	devres_add(dev, ptr);
+	return 0;
+}
+EXPORT_SYMBOL(devm_request_resource);
+
+static int devm_resource_match(struct device *dev, void *res, void *data)
+{
+	struct resource **ptr = res;
+
+	return *ptr == data;
+}
+
+/**
+ * devm_release_resource() - release a previously requested resource
+ * @dev: device for which to release the resource
+ * @new: descriptor of the resource to release
+ *
+ * Releases a resource previously requested using devm_request_resource().
+ */
+void devm_release_resource(struct device *dev, struct resource *new)
+{
+	WARN_ON(devres_release(dev, devm_resource_release, devm_resource_match,
+			       new));
+}
+EXPORT_SYMBOL(devm_release_resource);
+
 struct region_devres {
 	struct resource *parent;
 	resource_size_t start;
-- 
cgit v1.2.3


From efd01a72e7ec99ed583151fbf16b176cd2158967 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 5 Aug 2014 14:08:55 +0200
Subject: PCI/AER: Make <linux/aer.h> standalone includable

The header file references u16 and u32 types, but they are not defined in
the header nor does the header pull in the necessary includes for them.
This causes build breakage when the file is included without any of the
dependencies being satisfied from somewhere else.

Fix this by including linux/types.h (for u16 and u32).

[bhelgaas: removed pci_dev declaration (already added by 5ccb8225abf2)]
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/aer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/aer.h b/include/linux/aer.h
index c826d1c28f9c..4fef65e57023 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -7,6 +7,8 @@
 #ifndef _AER_H_
 #define _AER_H_
 
+#include <linux/types.h>
+
 #define AER_NONFATAL			0
 #define AER_FATAL			1
 #define AER_CORRECTABLE			2
-- 
cgit v1.2.3


From 3b5e6454aaf6b4439b19400d8365e2ec2d24e411 Mon Sep 17 00:00:00 2001
From: Gioh Kim <gioh.kim@lge.com>
Date: Thu, 4 Sep 2014 22:04:42 -0400
Subject: fs/buffer.c: support buffer cache allocations with gfp modifiers

A buffer cache is allocated from movable area because it is referred
for a while and released soon.  But some filesystems are taking buffer
cache for a long time and it can disturb page migration.

New APIs are introduced to allocate buffer cache with user specific
flag.  *_gfp APIs are for user want to set page allocation flag for
page cache allocation.  And *_unmovable APIs are for the user wants to
allocate page cache from non-movable area.

Signed-off-by: Gioh Kim <gioh.kim@lge.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Reviewed-by: Jan Kara <jack@suse.cz>
---
 fs/buffer.c                 | 45 +++++++++++++++++++++++++------------------
 include/linux/buffer_head.h | 47 ++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 68 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 8f05111bbb8b..9a6029e0dd71 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -993,7 +993,7 @@ init_page_buffers(struct page *page, struct block_device *bdev,
  */
 static int
 grow_dev_page(struct block_device *bdev, sector_t block,
-		pgoff_t index, int size, int sizebits)
+	      pgoff_t index, int size, int sizebits, gfp_t gfp)
 {
 	struct inode *inode = bdev->bd_inode;
 	struct page *page;
@@ -1002,8 +1002,8 @@ grow_dev_page(struct block_device *bdev, sector_t block,
 	int ret = 0;		/* Will call free_more_memory() */
 	gfp_t gfp_mask;
 
-	gfp_mask = mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS;
-	gfp_mask |= __GFP_MOVABLE;
+	gfp_mask = (mapping_gfp_mask(inode->i_mapping) & ~__GFP_FS) | gfp;
+
 	/*
 	 * XXX: __getblk_slow() can not really deal with failure and
 	 * will endlessly loop on improvised global reclaim.  Prefer
@@ -1058,7 +1058,7 @@ failed:
  * that page was dirty, the buffers are set dirty also.
  */
 static int
-grow_buffers(struct block_device *bdev, sector_t block, int size)
+grow_buffers(struct block_device *bdev, sector_t block, int size, gfp_t gfp)
 {
 	pgoff_t index;
 	int sizebits;
@@ -1085,11 +1085,12 @@ grow_buffers(struct block_device *bdev, sector_t block, int size)
 	}
 
 	/* Create a page with the proper size buffers.. */
-	return grow_dev_page(bdev, block, index, size, sizebits);
+	return grow_dev_page(bdev, block, index, size, sizebits, gfp);
 }
 
-static struct buffer_head *
-__getblk_slow(struct block_device *bdev, sector_t block, int size)
+struct buffer_head *
+__getblk_slow(struct block_device *bdev, sector_t block,
+	     unsigned size, gfp_t gfp)
 {
 	/* Size must be multiple of hard sectorsize */
 	if (unlikely(size & (bdev_logical_block_size(bdev)-1) ||
@@ -1111,13 +1112,14 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size)
 		if (bh)
 			return bh;
 
-		ret = grow_buffers(bdev, block, size);
+		ret = grow_buffers(bdev, block, size, gfp);
 		if (ret < 0)
 			return NULL;
 		if (ret == 0)
 			free_more_memory();
 	}
 }
+EXPORT_SYMBOL(__getblk_slow);
 
 /*
  * The relationship between dirty buffers and dirty pages:
@@ -1371,24 +1373,25 @@ __find_get_block(struct block_device *bdev, sector_t block, unsigned size)
 EXPORT_SYMBOL(__find_get_block);
 
 /*
- * __getblk will locate (and, if necessary, create) the buffer_head
+ * __getblk_gfp() will locate (and, if necessary, create) the buffer_head
  * which corresponds to the passed block_device, block and size. The
  * returned buffer has its reference count incremented.
  *
- * __getblk() will lock up the machine if grow_dev_page's try_to_free_buffers()
- * attempt is failing.  FIXME, perhaps?
+ * __getblk_gfp() will lock up the machine if grow_dev_page's
+ * try_to_free_buffers() attempt is failing.  FIXME, perhaps?
  */
 struct buffer_head *
-__getblk(struct block_device *bdev, sector_t block, unsigned size)
+__getblk_gfp(struct block_device *bdev, sector_t block,
+	     unsigned size, gfp_t gfp)
 {
 	struct buffer_head *bh = __find_get_block(bdev, block, size);
 
 	might_sleep();
 	if (bh == NULL)
-		bh = __getblk_slow(bdev, block, size);
+		bh = __getblk_slow(bdev, block, size, gfp);
 	return bh;
 }
-EXPORT_SYMBOL(__getblk);
+EXPORT_SYMBOL(__getblk_gfp);
 
 /*
  * Do async read-ahead on a buffer..
@@ -1404,24 +1407,28 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size)
 EXPORT_SYMBOL(__breadahead);
 
 /**
- *  __bread() - reads a specified block and returns the bh
+ *  __bread_gfp() - reads a specified block and returns the bh
  *  @bdev: the block_device to read from
  *  @block: number of block
  *  @size: size (in bytes) to read
- * 
+ *  @gfp: page allocation flag
+ *
  *  Reads a specified block, and returns buffer head that contains it.
+ *  The page cache can be allocated from non-movable area
+ *  not to prevent page migration if you set gfp to zero.
  *  It returns NULL if the block was unreadable.
  */
 struct buffer_head *
-__bread(struct block_device *bdev, sector_t block, unsigned size)
+__bread_gfp(struct block_device *bdev, sector_t block,
+		   unsigned size, gfp_t gfp)
 {
-	struct buffer_head *bh = __getblk(bdev, block, size);
+	struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp);
 
 	if (likely(bh) && !buffer_uptodate(bh))
 		bh = __bread_slow(bh);
 	return bh;
 }
-EXPORT_SYMBOL(__bread);
+EXPORT_SYMBOL(__bread_gfp);
 
 /*
  * invalidate_bh_lrus() is called rarely - but not only at unmount.
diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h
index 324329ceea1e..73b45225a7ca 100644
--- a/include/linux/buffer_head.h
+++ b/include/linux/buffer_head.h
@@ -175,12 +175,13 @@ void __wait_on_buffer(struct buffer_head *);
 wait_queue_head_t *bh_waitq_head(struct buffer_head *bh);
 struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block,
 			unsigned size);
-struct buffer_head *__getblk(struct block_device *bdev, sector_t block,
-			unsigned size);
+struct buffer_head *__getblk_gfp(struct block_device *bdev, sector_t block,
+				  unsigned size, gfp_t gfp);
 void __brelse(struct buffer_head *);
 void __bforget(struct buffer_head *);
 void __breadahead(struct block_device *, sector_t block, unsigned int size);
-struct buffer_head *__bread(struct block_device *, sector_t block, unsigned size);
+struct buffer_head *__bread_gfp(struct block_device *,
+				sector_t block, unsigned size, gfp_t gfp);
 void invalidate_bh_lrus(void);
 struct buffer_head *alloc_buffer_head(gfp_t gfp_flags);
 void free_buffer_head(struct buffer_head * bh);
@@ -295,7 +296,13 @@ static inline void bforget(struct buffer_head *bh)
 static inline struct buffer_head *
 sb_bread(struct super_block *sb, sector_t block)
 {
-	return __bread(sb->s_bdev, block, sb->s_blocksize);
+	return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
+}
+
+static inline struct buffer_head *
+sb_bread_unmovable(struct super_block *sb, sector_t block)
+{
+	return __bread_gfp(sb->s_bdev, block, sb->s_blocksize, 0);
 }
 
 static inline void
@@ -307,7 +314,7 @@ sb_breadahead(struct super_block *sb, sector_t block)
 static inline struct buffer_head *
 sb_getblk(struct super_block *sb, sector_t block)
 {
-	return __getblk(sb->s_bdev, block, sb->s_blocksize);
+	return __getblk_gfp(sb->s_bdev, block, sb->s_blocksize, __GFP_MOVABLE);
 }
 
 static inline struct buffer_head *
@@ -344,6 +351,36 @@ static inline void lock_buffer(struct buffer_head *bh)
 		__lock_buffer(bh);
 }
 
+static inline struct buffer_head *getblk_unmovable(struct block_device *bdev,
+						   sector_t block,
+						   unsigned size)
+{
+	return __getblk_gfp(bdev, block, size, 0);
+}
+
+static inline struct buffer_head *__getblk(struct block_device *bdev,
+					   sector_t block,
+					   unsigned size)
+{
+	return __getblk_gfp(bdev, block, size, __GFP_MOVABLE);
+}
+
+/**
+ *  __bread() - reads a specified block and returns the bh
+ *  @bdev: the block_device to read from
+ *  @block: number of block
+ *  @size: size (in bytes) to read
+ *
+ *  Reads a specified block, and returns buffer head that contains it.
+ *  The page cache is allocated from movable area so that it can be migrated.
+ *  It returns NULL if the block was unreadable.
+ */
+static inline struct buffer_head *
+__bread(struct block_device *bdev, sector_t block, unsigned size)
+{
+	return __bread_gfp(bdev, block, size, __GFP_MOVABLE);
+}
+
 extern int __set_page_dirty_buffers(struct page *page);
 
 #else /* CONFIG_BLOCK */
-- 
cgit v1.2.3


From a9fe8e29945d56f35235a3a0fba99b4cf181d211 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Tue, 2 Sep 2014 15:49:26 +0200
Subject: ipv4: implement igmp_qrv sysctl to tune igmp robustness variable

As in IPv6 people might increase the igmp query robustness variable to
make sure unsolicited state change reports aren't lost on the network. Add
and document this new knob to igmp code.

RFCs allow tuning this parameter back to first IGMP RFC, so we also use
this setting for all counters, including source specific multicast.

Also take over sysctl value when upping the interface and don't reuse
the last one seen on the interface.

Cc: Flavio Leitner <fbl@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Flavio Leitner <fbl@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt |  5 +++++
 include/linux/igmp.h                   |  1 +
 net/ipv4/igmp.c                        | 31 +++++++++++++++----------------
 net/ipv4/sysctl_net_ipv4.c             | 10 ++++++++++
 4 files changed, 31 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index cfc71ac0f764..db2383cb1df9 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -844,6 +844,11 @@ igmp_max_memberships - INTEGER
 
 	conf/all/*	  is special, changes the settings for all interfaces
 
+igmp_qrv - INTEGER
+	 Controls the IGMP query robustness variable (see RFC2236 8.1).
+	 Default: 2 (as specified by RFC2236 8.1)
+	 Minimum: 1 (as specified by RFC6636 4.5)
+
 log_martians - BOOLEAN
 	Log packets with impossible addresses to kernel log.
 	log_martians for the interface will be enabled if at least one of
diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index f47550d75f85..2c677afeea47 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -39,6 +39,7 @@ static inline struct igmpv3_query *
 
 extern int sysctl_igmp_max_memberships;
 extern int sysctl_igmp_max_msf;
+extern int sysctl_igmp_qrv;
 
 struct ip_sf_socklist {
 	unsigned int		sl_max;
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 890c4258804c..4146153d875d 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -117,7 +117,7 @@
 #define IGMP_V2_Unsolicited_Report_Interval	(10*HZ)
 #define IGMP_V3_Unsolicited_Report_Interval	(1*HZ)
 #define IGMP_Query_Response_Interval		(10*HZ)
-#define IGMP_Unsolicited_Report_Count		2
+#define IGMP_Query_Robustness_Variable		2
 
 
 #define IGMP_Initial_Report_Delay		(1)
@@ -756,8 +756,7 @@ static void igmp_ifc_event(struct in_device *in_dev)
 {
 	if (IGMP_V1_SEEN(in_dev) || IGMP_V2_SEEN(in_dev))
 		return;
-	in_dev->mr_ifc_count = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
+	in_dev->mr_ifc_count = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 	igmp_ifc_start_timer(in_dev, 1);
 }
 
@@ -1086,8 +1085,7 @@ static void igmpv3_add_delrec(struct in_device *in_dev, struct ip_mc_list *im)
 	pmc->interface = im->interface;
 	in_dev_hold(in_dev);
 	pmc->multiaddr = im->multiaddr;
-	pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
+	pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 	pmc->sfmode = im->sfmode;
 	if (pmc->sfmode == MCAST_INCLUDE) {
 		struct ip_sf_list *psf;
@@ -1226,8 +1224,7 @@ static void igmp_group_added(struct ip_mc_list *im)
 	}
 	/* else, v3 */
 
-	im->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-		IGMP_Unsolicited_Report_Count;
+	im->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 	igmp_ifc_event(in_dev);
 #endif
 }
@@ -1322,7 +1319,7 @@ void ip_mc_inc_group(struct in_device *in_dev, __be32 addr)
 	spin_lock_init(&im->lock);
 #ifdef CONFIG_IP_MULTICAST
 	setup_timer(&im->timer, igmp_timer_expire, (unsigned long)im);
-	im->unsolicit_count = IGMP_Unsolicited_Report_Count;
+	im->unsolicit_count = sysctl_igmp_qrv;
 #endif
 
 	im->next_rcu = in_dev->mc_list;
@@ -1460,7 +1457,7 @@ void ip_mc_init_dev(struct in_device *in_dev)
 			(unsigned long)in_dev);
 	setup_timer(&in_dev->mr_ifc_timer, igmp_ifc_timer_expire,
 			(unsigned long)in_dev);
-	in_dev->mr_qrv = IGMP_Unsolicited_Report_Count;
+	in_dev->mr_qrv = sysctl_igmp_qrv;
 #endif
 
 	spin_lock_init(&in_dev->mc_tomb_lock);
@@ -1474,6 +1471,9 @@ void ip_mc_up(struct in_device *in_dev)
 
 	ASSERT_RTNL();
 
+#ifdef CONFIG_IP_MULTICAST
+	in_dev->mr_qrv = sysctl_igmp_qrv;
+#endif
 	ip_mc_inc_group(in_dev, IGMP_ALL_HOSTS);
 
 	for_each_pmc_rtnl(in_dev, pmc)
@@ -1540,7 +1540,9 @@ static struct in_device *ip_mc_find_dev(struct net *net, struct ip_mreqn *imr)
  */
 int sysctl_igmp_max_memberships __read_mostly = IP_MAX_MEMBERSHIPS;
 int sysctl_igmp_max_msf __read_mostly = IP_MAX_MSF;
-
+#ifdef CONFIG_IP_MULTICAST
+int sysctl_igmp_qrv __read_mostly = IGMP_Query_Robustness_Variable;
+#endif
 
 static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 	__be32 *psfsrc)
@@ -1575,8 +1577,7 @@ static int ip_mc_del1_src(struct ip_mc_list *pmc, int sfmode,
 #ifdef CONFIG_IP_MULTICAST
 		if (psf->sf_oldin &&
 		    !IGMP_V1_SEEN(in_dev) && !IGMP_V2_SEEN(in_dev)) {
-			psf->sf_crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-				IGMP_Unsolicited_Report_Count;
+			psf->sf_crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 			psf->sf_next = pmc->tomb;
 			pmc->tomb = psf;
 			rv = 1;
@@ -1639,8 +1640,7 @@ static int ip_mc_del_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 		/* filter mode change */
 		pmc->sfmode = MCAST_INCLUDE;
 #ifdef CONFIG_IP_MULTICAST
-		pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-			IGMP_Unsolicited_Report_Count;
+		pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 		in_dev->mr_ifc_count = pmc->crcount;
 		for (psf = pmc->sources; psf; psf = psf->sf_next)
 			psf->sf_crcount = 0;
@@ -1818,8 +1818,7 @@ static int ip_mc_add_src(struct in_device *in_dev, __be32 *pmca, int sfmode,
 #ifdef CONFIG_IP_MULTICAST
 		/* else no filters; keep old mode for reports */
 
-		pmc->crcount = in_dev->mr_qrv ? in_dev->mr_qrv :
-			IGMP_Unsolicited_Report_Count;
+		pmc->crcount = in_dev->mr_qrv ?: sysctl_igmp_qrv;
 		in_dev->mr_ifc_count = pmc->crcount;
 		for (psf = pmc->sources; psf; psf = psf->sf_next)
 			psf->sf_crcount = 0;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 79a007c52558..45d156dacd61 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -450,6 +450,16 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+#ifdef CONFIG_IP_MULTICAST
+	{
+		.procname	= "igmp_qrv",
+		.data		= &sysctl_igmp_qrv,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &one
+	},
+#endif
 	{
 		.procname	= "inet_peer_threshold",
 		.data		= &inet_peer_threshold,
-- 
cgit v1.2.3


From 6b44f519017b219a12b37173c7eef8dfce2c0100 Mon Sep 17 00:00:00 2001
From: Scot Doyle <lkml14@scotdoyle.com>
Date: Sun, 24 Aug 2014 17:12:27 +0000
Subject: sched/wait: Document timeout corner case

The timeout may elapse without 0 being returned, such as when waiting
on an unused queue. Document this possibility.

Signed-off-by: Scot Doyle <lkml14@scotdoyle.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/alpine.LNX.2.11.1408241710070.6462@localhost.localdomain
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6fb1ba5f9b2f..034f6fc7c65f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -280,9 +280,11 @@ do {									\
  * wake_up() has to be called after changing any variable that could
  * change the result of the wait condition.
  *
- * The function returns 0 if the @timeout elapsed, or the remaining
- * jiffies (at least 1) if the @condition evaluated to %true before
- * the @timeout elapsed.
+ * Returns:
+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
+ * or the remaining jiffies (at least 1) if the @condition evaluated
+ * to %true before the @timeout elapsed.
  */
 #define wait_event_timeout(wq, condition, timeout)			\
 ({									\
@@ -363,9 +365,11 @@ do {									\
  * change the result of the wait condition.
  *
  * Returns:
- * 0 if the @timeout elapsed, -%ERESTARTSYS if it was interrupted by
- * a signal, or the remaining jiffies (at least 1) if the @condition
- * evaluated to %true before the @timeout elapsed.
+ * 0 if the @condition evaluated to %false after the @timeout elapsed,
+ * 1 if the @condition evaluated to %true after the @timeout elapsed,
+ * the remaining jiffies (at least 1) if the @condition evaluated
+ * to %true before the @timeout elapsed, or -%ERESTARTSYS if it was
+ * interrupted by a signal.
  */
 #define wait_event_interruptible_timeout(wq, condition, timeout)	\
 ({									\
-- 
cgit v1.2.3


From 2740f0cf8ec8bc7ee6a58f68841759e367dda98f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Wed, 3 Sep 2014 15:24:58 +0300
Subject: cfg80211: add Intel Mobile Communications copyright

Our legal structure changed at some point (see wikipedia), but
we forgot to immediately switch over to the new copyright
notice.

For files that we have modified in the time since the change,
add the proper copyright notice now.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 include/net/cfg80211.h    | 1 +
 net/wireless/chan.c       | 1 +
 net/wireless/core.c       | 1 +
 net/wireless/nl80211.c    | 1 +
 net/wireless/reg.c        | 1 +
 net/wireless/scan.c       | 1 +
 net/wireless/util.c       | 1 +
 8 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 8018c915ee63..77d4f26e0235 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -6,6 +6,7 @@
  * Copyright (c) 2002-2003, Jouni Malinen <jkmaline@cc.hut.fi>
  * Copyright (c) 2005, Devicescape Software, Inc.
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
+ * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index ab21299c8f4d..6be68bb31918 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4,6 +4,7 @@
  * 802.11 device and configuration interface
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index 992b34070bcb..72d81e2154d5 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -4,6 +4,7 @@
  * any point in time.
  *
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 
 #include <linux/export.h>
diff --git a/net/wireless/core.c b/net/wireless/core.c
index a207eb116829..9698fe709251 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -2,6 +2,7 @@
  * This is the linux wireless configuration interface.
  *
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3011401f52c0..d876146498dd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -2,6 +2,7 @@
  * This is the new netlink-based wireless configuration interface.
  *
  * Copyright 2006-2010	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 
 #include <linux/if.h>
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index 1afdf45db38f..8bfc9b172a49 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -3,6 +3,7 @@
  * Copyright 2005-2006, Devicescape Software, Inc.
  * Copyright 2007	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2008-2011	Luis R. Rodriguez <mcgrof@qca.qualcomm.com>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  *
  * Permission to use, copy, modify, and/or distribute this software for any
  * purpose with or without fee is hereby granted, provided that the above
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 620a4b40d466..bda39f149810 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -2,6 +2,7 @@
  * cfg80211 scan result handling
  *
  * Copyright 2008 Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 #include <linux/kernel.h>
 #include <linux/slab.h>
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 728f1c0dc70d..a8b2816ffcb9 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -2,6 +2,7 @@
  * Wireless utility functions
  *
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
+ * Copyright 2013-2014  Intel Mobile Communications GmbH
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
-- 
cgit v1.2.3


From 7d75a871888e3f5e1a7c99bf240d1cd67d8bdfa0 Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Fri, 5 Sep 2014 13:09:25 +0200
Subject: gpio: fix 'CONFIG_GPIO_IRQCHIP' comments

These two typos were introduced in commit 1425052097b5 ("gpio: add IRQ
chip helpers in gpiolib").

The correct symbol name is CONFIG_GPIOLIB_IRQCHIP.

[jkosina@suse.cz: add changelog]
Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/gpio/driver.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 573e4f3243d0..4fcd60913a42 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -105,7 +105,7 @@ struct gpio_chip {
 
 #ifdef CONFIG_GPIOLIB_IRQCHIP
 	/*
-	 * With CONFIG_GPIO_IRQCHIP we get an irqchip inside the gpiolib
+	 * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib
 	 * to handle IRQs for most practical cases.
 	 */
 	struct irq_chip		*irqchip;
@@ -221,7 +221,7 @@ int gpiochip_irqchip_add(struct gpio_chip *gpiochip,
 		irq_flow_handler_t handler,
 		unsigned int type);
 
-#endif /* CONFIG_GPIO_IRQCHIP */
+#endif /* CONFIG_GPIOLIB_IRQCHIP */
 
 #else /* CONFIG_GPIOLIB */
 
-- 
cgit v1.2.3


From 60a3b2253c413cf601783b070507d7dd6620c954 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 2 Sep 2014 22:53:44 +0200
Subject: net: bpf: make eBPF interpreter images read-only

With eBPF getting more extended and exposure to user space is on it's way,
hardening the memory range the interpreter uses to steer its command flow
seems appropriate.  This patch moves the to be interpreted bytecode to
read-only pages.

In case we execute a corrupted BPF interpreter image for some reason e.g.
caused by an attacker which got past a verifier stage, it would not only
provide arbitrary read/write memory access but arbitrary function calls
as well. After setting up the BPF interpreter image, its contents do not
change until destruction time, thus we can setup the image on immutable
made pages in order to mitigate modifications to that code. The idea
is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks").

This is possible because bpf_prog is not part of sk_filter anymore.
After setup bpf_prog cannot be altered during its life-time. This prevents
any modifications to the entire bpf_prog structure (incl. function/JIT
image pointer).

Every eBPF program (including classic BPF that are migrated) have to call
bpf_prog_select_runtime() to select either interpreter or a JIT image
as a last setup step, and they all are being freed via bpf_prog_free(),
including non-JIT. Therefore, we can easily integrate this into the
eBPF life-time, plus since we directly allocate a bpf_prog, we have no
performance penalty.

Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual
inspection of kernel_page_tables.  Brad Spengler proposed the same idea
via Twitter during development of this patch.

Joint work with Hannes Frederic Sowa.

Suggested-by: Brad Spengler <spender@grsecurity.net>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Alexei Starovoitov <ast@plumgrid.com>
Cc: Kees Cook <keescook@chromium.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       |  3 +-
 arch/mips/net/bpf_jit.c         |  3 +-
 arch/powerpc/net/bpf_jit_comp.c |  3 +-
 arch/s390/net/bpf_jit_comp.c    |  2 +-
 arch/sparc/net/bpf_jit_comp.c   |  3 +-
 arch/x86/net/bpf_jit_comp.c     | 18 ++++------
 include/linux/filter.h          | 49 ++++++++++++++++++++++---
 kernel/bpf/core.c               | 80 +++++++++++++++++++++++++++++++++++++++--
 kernel/seccomp.c                |  7 ++--
 lib/test_bpf.c                  |  2 +-
 net/core/filter.c               |  6 ++--
 11 files changed, 144 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index a37b989a2f91..a76623bcf722 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -930,5 +930,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
-	kfree(fp);
+
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 05a56619ece2..cfa83cf2447d 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1427,5 +1427,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
-	kfree(fp);
+
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 3afa6f4c1957..40c53ff59124 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -697,5 +697,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
-	kfree(fp);
+
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 61e45b7c04d7..f2833c5b218a 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -887,5 +887,5 @@ void bpf_jit_free(struct bpf_prog *fp)
 	module_free(NULL, header);
 
 free_filter:
-	kfree(fp);
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index 1f76c22a6a75..f7a736b645e8 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -812,5 +812,6 @@ void bpf_jit_free(struct bpf_prog *fp)
 {
 	if (fp->jited)
 		module_free(NULL, fp->bpf_func);
-	kfree(fp);
+
+	bpf_prog_unlock_free(fp);
 }
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index b08a98c59530..39ccfbb4a723 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -972,23 +972,17 @@ out:
 	kfree(addrs);
 }
 
-static void bpf_jit_free_deferred(struct work_struct *work)
+void bpf_jit_free(struct bpf_prog *fp)
 {
-	struct bpf_prog *fp = container_of(work, struct bpf_prog, work);
 	unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK;
 	struct bpf_binary_header *header = (void *)addr;
 
+	if (!fp->jited)
+		goto free_filter;
+
 	set_memory_rw(addr, header->pages);
 	module_free(NULL, header);
-	kfree(fp);
-}
 
-void bpf_jit_free(struct bpf_prog *fp)
-{
-	if (fp->jited) {
-		INIT_WORK(&fp->work, bpf_jit_free_deferred);
-		schedule_work(&fp->work);
-	} else {
-		kfree(fp);
-	}
+free_filter:
+	bpf_prog_unlock_free(fp);
 }
diff --git a/include/linux/filter.h b/include/linux/filter.h
index a5227ab8ccb1..c78994593355 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -9,6 +9,11 @@
 #include <linux/skbuff.h>
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
+#include <asm/cacheflush.h>
+
+struct sk_buff;
+struct sock;
+struct seccomp_data;
 
 /* Internally used and optimized filter representation with extended
  * instruction set based on top of classic BPF.
@@ -320,20 +325,23 @@ struct sock_fprog_kern {
 	struct sock_filter	*filter;
 };
 
-struct sk_buff;
-struct sock;
-struct seccomp_data;
+struct bpf_work_struct {
+	struct bpf_prog *prog;
+	struct work_struct work;
+};
 
 struct bpf_prog {
+	u32			pages;		/* Number of allocated pages */
 	u32			jited:1,	/* Is our filter JIT'ed? */
 				len:31;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
+	struct bpf_work_struct	*work;		/* Deferred free work struct */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
+	/* Instructions for interpreter */
 	union {
 		struct sock_filter	insns[0];
 		struct bpf_insn		insnsi[0];
-		struct work_struct	work;
 	};
 };
 
@@ -353,6 +361,26 @@ static inline unsigned int bpf_prog_size(unsigned int proglen)
 
 #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0]))
 
+#ifdef CONFIG_DEBUG_SET_MODULE_RONX
+static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+{
+	set_memory_ro((unsigned long)fp, fp->pages);
+}
+
+static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+{
+	set_memory_rw((unsigned long)fp, fp->pages);
+}
+#else
+static inline void bpf_prog_lock_ro(struct bpf_prog *fp)
+{
+}
+
+static inline void bpf_prog_unlock_ro(struct bpf_prog *fp)
+{
+}
+#endif /* CONFIG_DEBUG_SET_MODULE_RONX */
+
 int sk_filter(struct sock *sk, struct sk_buff *skb);
 
 void bpf_prog_select_runtime(struct bpf_prog *fp);
@@ -361,6 +389,17 @@ void bpf_prog_free(struct bpf_prog *fp);
 int bpf_convert_filter(struct sock_filter *prog, int len,
 		       struct bpf_insn *new_prog, int *new_len);
 
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags);
+struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
+				  gfp_t gfp_extra_flags);
+void __bpf_prog_free(struct bpf_prog *fp);
+
+static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
+{
+	bpf_prog_unlock_ro(fp);
+	__bpf_prog_free(fp);
+}
+
 int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 void bpf_prog_destroy(struct bpf_prog *fp);
 
@@ -450,7 +489,7 @@ static inline void bpf_jit_compile(struct bpf_prog *fp)
 
 static inline void bpf_jit_free(struct bpf_prog *fp)
 {
-	kfree(fp);
+	bpf_prog_unlock_free(fp);
 }
 #endif /* CONFIG_BPF_JIT */
 
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 7f0dbcbb34af..b54bb2c2e494 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -22,6 +22,7 @@
  */
 #include <linux/filter.h>
 #include <linux/skbuff.h>
+#include <linux/vmalloc.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -63,6 +64,67 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns
 	return NULL;
 }
 
+struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
+{
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
+			  gfp_extra_flags;
+	struct bpf_work_struct *ws;
+	struct bpf_prog *fp;
+
+	size = round_up(size, PAGE_SIZE);
+	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+	if (fp == NULL)
+		return NULL;
+
+	ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags);
+	if (ws == NULL) {
+		vfree(fp);
+		return NULL;
+	}
+
+	fp->pages = size / PAGE_SIZE;
+	fp->work = ws;
+
+	return fp;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_alloc);
+
+struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
+				  gfp_t gfp_extra_flags)
+{
+	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
+			  gfp_extra_flags;
+	struct bpf_prog *fp;
+
+	BUG_ON(fp_old == NULL);
+
+	size = round_up(size, PAGE_SIZE);
+	if (size <= fp_old->pages * PAGE_SIZE)
+		return fp_old;
+
+	fp = __vmalloc(size, gfp_flags, PAGE_KERNEL);
+	if (fp != NULL) {
+		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
+		fp->pages = size / PAGE_SIZE;
+
+		/* We keep fp->work from fp_old around in the new
+		 * reallocated structure.
+		 */
+		fp_old->work = NULL;
+		__bpf_prog_free(fp_old);
+	}
+
+	return fp;
+}
+EXPORT_SYMBOL_GPL(bpf_prog_realloc);
+
+void __bpf_prog_free(struct bpf_prog *fp)
+{
+	kfree(fp->work);
+	vfree(fp);
+}
+EXPORT_SYMBOL_GPL(__bpf_prog_free);
+
 /* Base function for offset calculation. Needs to go into .text section,
  * therefore keeping it non-static as well; will also be used by JITs
  * anyway later on, so do not let the compiler omit it.
@@ -523,12 +585,26 @@ void bpf_prog_select_runtime(struct bpf_prog *fp)
 
 	/* Probe if internal BPF can be JITed */
 	bpf_int_jit_compile(fp);
+	/* Lock whole bpf_prog as read-only */
+	bpf_prog_lock_ro(fp);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
-/* free internal BPF program */
+static void bpf_prog_free_deferred(struct work_struct *work)
+{
+	struct bpf_work_struct *ws;
+
+	ws = container_of(work, struct bpf_work_struct, work);
+	bpf_jit_free(ws->prog);
+}
+
+/* Free internal BPF program */
 void bpf_prog_free(struct bpf_prog *fp)
 {
-	bpf_jit_free(fp);
+	struct bpf_work_struct *ws = fp->work;
+
+	INIT_WORK(&ws->work, bpf_prog_free_deferred);
+	ws->prog = fp;
+	schedule_work(&ws->work);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 44eb005c6695..84922befea84 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -395,16 +395,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 	if (!filter)
 		goto free_prog;
 
-	filter->prog = kzalloc(bpf_prog_size(new_len),
-			       GFP_KERNEL|__GFP_NOWARN);
+	filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN);
 	if (!filter->prog)
 		goto free_filter;
 
 	ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len);
 	if (ret)
 		goto free_filter_prog;
-	kfree(fp);
 
+	kfree(fp);
 	atomic_set(&filter->usage, 1);
 	filter->prog->len = new_len;
 
@@ -413,7 +412,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog)
 	return filter;
 
 free_filter_prog:
-	kfree(filter->prog);
+	__bpf_prog_free(filter->prog);
 free_filter:
 	kfree(filter);
 free_prog:
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 8c66c6aace04..9a67456ba29a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
 		break;
 
 	case INTERNAL:
-		fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL);
+		fp = bpf_prog_alloc(bpf_prog_size(flen), 0);
 		if (fp == NULL) {
 			pr_cont("UNEXPECTED_FAIL no memory left\n");
 			*err = -ENOMEM;
diff --git a/net/core/filter.c b/net/core/filter.c
index d814b8a89d0f..37f8eb06fdee 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp)
 
 	/* Expand fp for appending the new filter representation. */
 	old_fp = fp;
-	fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL);
+	fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0);
 	if (!fp) {
 		/* The old_fp is still around in case we couldn't
 		 * allocate new memory, so uncharge on that one.
@@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL);
+	fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0);
 	if (!fp)
 		return -ENOMEM;
 
@@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 	if (fprog->filter == NULL)
 		return -EINVAL;
 
-	prog = kmalloc(bpf_fsize, GFP_KERNEL);
+	prog = bpf_prog_alloc(bpf_fsize, 0);
 	if (!prog)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From f0db9b073415848709dd59a6394969882f517da9 Mon Sep 17 00:00:00 2001
From: Govindarajulu Varadarajan <_govind@gmx.com>
Date: Wed, 3 Sep 2014 03:17:20 +0530
Subject: ethtool: Add generic options for tunables

This patch adds new ethtool cmd, ETHTOOL_GTUNABLE & ETHTOOL_STUNABLE for getting
tunable values from driver.

Add get_tunable and set_tunable to ethtool_ops. Driver implements these
functions for getting/setting tunable value.

Signed-off-by: Govindarajulu Varadarajan <_govind@gmx.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool.h      |  4 +++
 include/uapi/linux/ethtool.h | 28 +++++++++++++++
 net/core/ethtool.c           | 81 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 113 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index e658229fee39..c1a2d60dfb82 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -257,6 +257,10 @@ struct ethtool_ops {
 				     struct ethtool_eeprom *, u8 *);
 	int	(*get_eee)(struct net_device *, struct ethtool_eee *);
 	int	(*set_eee)(struct net_device *, struct ethtool_eee *);
+	int	(*get_tunable)(struct net_device *,
+			       const struct ethtool_tunable *, void *);
+	int	(*set_tunable)(struct net_device *,
+			       const struct ethtool_tunable *, const void *);
 
 
 };
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index e3c7a719c76b..7a364f2f3d3f 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -209,6 +209,32 @@ struct ethtool_value {
 	__u32	data;
 };
 
+enum tunable_id {
+	ETHTOOL_ID_UNSPEC,
+	ETHTOOL_RX_COPYBREAK,
+};
+
+enum tunable_type_id {
+	ETHTOOL_TUNABLE_UNSPEC,
+	ETHTOOL_TUNABLE_U8,
+	ETHTOOL_TUNABLE_U16,
+	ETHTOOL_TUNABLE_U32,
+	ETHTOOL_TUNABLE_U64,
+	ETHTOOL_TUNABLE_STRING,
+	ETHTOOL_TUNABLE_S8,
+	ETHTOOL_TUNABLE_S16,
+	ETHTOOL_TUNABLE_S32,
+	ETHTOOL_TUNABLE_S64,
+};
+
+struct ethtool_tunable {
+	__u32	cmd;
+	__u32	id;
+	__u32	type_id;
+	__u32	len;
+	void	*data[0];
+};
+
 /**
  * struct ethtool_regs - hardware register dump
  * @cmd: Command number = %ETHTOOL_GREGS
@@ -1152,6 +1178,8 @@ enum ethtool_sfeatures_retval_bits {
 
 #define ETHTOOL_GRSSH		0x00000046 /* Get RX flow hash configuration */
 #define ETHTOOL_SRSSH		0x00000047 /* Set RX flow hash configuration */
+#define ETHTOOL_GTUNABLE	0x00000048 /* Get tunable configuration */
+#define ETHTOOL_STUNABLE	0x00000049 /* Set tunable configuration */
 
 /* compatibility with older code */
 #define SPARC_ETH_GSET		ETHTOOL_GSET
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 17cb912793fa..27e61b886520 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -1621,6 +1621,80 @@ static int ethtool_get_module_eeprom(struct net_device *dev,
 				      modinfo.eeprom_len);
 }
 
+static int ethtool_tunable_valid(const struct ethtool_tunable *tuna)
+{
+	switch (tuna->id) {
+	case ETHTOOL_RX_COPYBREAK:
+		if (tuna->len != sizeof(u32) ||
+		    tuna->type_id != ETHTOOL_TUNABLE_U32)
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int ethtool_get_tunable(struct net_device *dev, void __user *useraddr)
+{
+	int ret;
+	struct ethtool_tunable tuna;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data;
+
+	if (!ops->get_tunable)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+		return -EFAULT;
+	ret = ethtool_tunable_valid(&tuna);
+	if (ret)
+		return ret;
+	data = kmalloc(tuna.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+	ret = ops->get_tunable(dev, &tuna, data);
+	if (ret)
+		goto out;
+	useraddr += sizeof(tuna);
+	ret = -EFAULT;
+	if (copy_to_user(useraddr, data, tuna.len))
+		goto out;
+	ret = 0;
+
+out:
+	kfree(data);
+	return ret;
+}
+
+static int ethtool_set_tunable(struct net_device *dev, void __user *useraddr)
+{
+	int ret;
+	struct ethtool_tunable tuna;
+	const struct ethtool_ops *ops = dev->ethtool_ops;
+	void *data;
+
+	if (!ops->set_tunable)
+		return -EOPNOTSUPP;
+	if (copy_from_user(&tuna, useraddr, sizeof(tuna)))
+		return -EFAULT;
+	ret = ethtool_tunable_valid(&tuna);
+	if (ret)
+		return ret;
+	data = kmalloc(tuna.len, GFP_USER);
+	if (!data)
+		return -ENOMEM;
+	useraddr += sizeof(tuna);
+	ret = -EFAULT;
+	if (copy_from_user(data, useraddr, tuna.len))
+		goto out;
+	ret = ops->set_tunable(dev, &tuna, data);
+
+out:
+	kfree(data);
+	return ret;
+}
+
 /* The main entry point in this file.  Called from net/core/dev_ioctl.c */
 
 int dev_ethtool(struct net *net, struct ifreq *ifr)
@@ -1670,6 +1744,7 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GCHANNELS:
 	case ETHTOOL_GET_TS_INFO:
 	case ETHTOOL_GEEE:
+	case ETHTOOL_GTUNABLE:
 		break;
 	default:
 		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
@@ -1857,6 +1932,12 @@ int dev_ethtool(struct net *net, struct ifreq *ifr)
 	case ETHTOOL_GMODULEEEPROM:
 		rc = ethtool_get_module_eeprom(dev, useraddr);
 		break;
+	case ETHTOOL_GTUNABLE:
+		rc = ethtool_get_tunable(dev, useraddr);
+		break;
+	case ETHTOOL_STUNABLE:
+		rc = ethtool_set_tunable(dev, useraddr);
+		break;
 	default:
 		rc = -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3


From 62bccb8cdb69051b95a55ab0c489e3cab261c8ef Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Thu, 4 Sep 2014 13:31:35 -0400
Subject: net-timestamp: Make the clone operation stand-alone from phy
 timestamping

The phy timestamping takes a different path than the regular timestamping
does in that it will create a clone first so that the packets needing to be
timestamped can be placed in a queue, or the context block could be used.

In order to support these use cases I am pulling the core of the code out
so it can be used in other drivers beyond just phy devices.

In addition I have added a destructor named sock_efree which is meant to
provide a simple way for dropping the reference to skb exceptions that
aren't part of either the receive or send windows for the socket, and I
have removed some duplication in spots where this destructor could be used
in place of sock_edemux.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83640.c |  6 +++---
 include/linux/skbuff.h    |  2 ++
 include/net/sock.h        |  1 +
 net/core/skbuff.c         | 32 +++++++++++++++++++++++++-------
 net/core/sock.c           |  6 ++++++
 net/core/timestamping.c   | 14 +++-----------
 6 files changed, 40 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/dp83640.c b/drivers/net/phy/dp83640.c
index d5991ac46ab0..87648b306551 100644
--- a/drivers/net/phy/dp83640.c
+++ b/drivers/net/phy/dp83640.c
@@ -1148,7 +1148,7 @@ static void dp83640_remove(struct phy_device *phydev)
 		kfree_skb(skb);
 
 	while ((skb = skb_dequeue(&dp83640->tx_queue)) != NULL)
-		skb_complete_tx_timestamp(skb, NULL);
+		kfree_skb(skb);
 
 	clock = dp83640_clock_get(dp83640->clock);
 
@@ -1405,7 +1405,7 @@ static void dp83640_txtstamp(struct phy_device *phydev,
 
 	case HWTSTAMP_TX_ONESTEP_SYNC:
 		if (is_sync(skb, type)) {
-			skb_complete_tx_timestamp(skb, NULL);
+			kfree_skb(skb);
 			return;
 		}
 		/* fall through */
@@ -1416,7 +1416,7 @@ static void dp83640_txtstamp(struct phy_device *phydev,
 
 	case HWTSTAMP_TX_OFF:
 	default:
-		skb_complete_tx_timestamp(skb, NULL);
+		kfree_skb(skb);
 		break;
 	}
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 02529fcad1ac..1cf0cfaef10a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2690,6 +2690,8 @@ static inline ktime_t net_invalid_timestamp(void)
 	return ktime_set(0, 0);
 }
 
+struct sk_buff *skb_clone_sk(struct sk_buff *skb);
+
 #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING
 
 void skb_clone_tx_timestamp(struct sk_buff *skb);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3fde6130789d..e02be37a3d91 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1574,6 +1574,7 @@ struct sk_buff *sock_wmalloc(struct sock *sk, unsigned long size, int force,
 void sock_wfree(struct sk_buff *skb);
 void skb_orphan_partial(struct sk_buff *skb);
 void sock_rfree(struct sk_buff *skb);
+void sock_efree(struct sk_buff *skb);
 void sock_edemux(struct sk_buff *skb);
 
 int sock_setsockopt(struct socket *sock, int level, int op,
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 697e696c914b..a936a401564e 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3511,6 +3511,27 @@ struct sk_buff *sock_dequeue_err_skb(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_dequeue_err_skb);
 
+struct sk_buff *skb_clone_sk(struct sk_buff *skb)
+{
+	struct sock *sk = skb->sk;
+	struct sk_buff *clone;
+
+	if (!sk || !atomic_inc_not_zero(&sk->sk_refcnt))
+		return NULL;
+
+	clone = skb_clone(skb, GFP_ATOMIC);
+	if (!clone) {
+		sock_put(sk);
+		return NULL;
+	}
+
+	clone->sk = sk;
+	clone->destructor = sock_efree;
+
+	return clone;
+}
+EXPORT_SYMBOL(skb_clone_sk);
+
 static void __skb_complete_tx_timestamp(struct sk_buff *skb,
 					struct sock *sk,
 					int tstype)
@@ -3540,14 +3561,11 @@ void skb_complete_tx_timestamp(struct sk_buff *skb,
 {
 	struct sock *sk = skb->sk;
 
-	skb->sk = NULL;
+	/* take a reference to prevent skb_orphan() from freeing the socket */
+	sock_hold(sk);
 
-	if (hwtstamps) {
-		*skb_hwtstamps(skb) = *hwtstamps;
-		__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
-	} else {
-		kfree_skb(skb);
-	}
+	*skb_hwtstamps(skb) = *hwtstamps;
+	__skb_complete_tx_timestamp(skb, sk, SCM_TSTAMP_SND);
 
 	sock_put(sk);
 }
diff --git a/net/core/sock.c b/net/core/sock.c
index f1a638ee93d9..d04005c51724 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1637,6 +1637,12 @@ void sock_rfree(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sock_rfree);
 
+void sock_efree(struct sk_buff *skb)
+{
+	sock_put(skb->sk);
+}
+EXPORT_SYMBOL(sock_efree);
+
 void sock_edemux(struct sk_buff *skb)
 {
 	struct sock *sk = skb->sk;
diff --git a/net/core/timestamping.c b/net/core/timestamping.c
index f48a59fd8f39..43d3dd62fcc8 100644
--- a/net/core/timestamping.c
+++ b/net/core/timestamping.c
@@ -36,10 +36,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
 {
 	struct phy_device *phydev;
 	struct sk_buff *clone;
-	struct sock *sk = skb->sk;
 	unsigned int type;
 
-	if (!sk)
+	if (!skb->sk)
 		return;
 
 	type = classify(skb);
@@ -48,16 +47,9 @@ void skb_clone_tx_timestamp(struct sk_buff *skb)
 
 	phydev = skb->dev->phydev;
 	if (likely(phydev->drv->txtstamp)) {
-		if (!atomic_inc_not_zero(&sk->sk_refcnt))
+		clone = skb_clone_sk(skb);
+		if (!clone)
 			return;
-
-		clone = skb_clone(skb, GFP_ATOMIC);
-		if (!clone) {
-			sock_put(sk);
-			return;
-		}
-
-		clone->sk = sk;
 		phydev->drv->txtstamp(phydev, clone, type);
 	}
 }
-- 
cgit v1.2.3


From 56193d1bce2b2759cb4bdcc00cd05544894a0c90 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 5 Sep 2014 19:20:26 -0400
Subject: net: Add function for parsing the header length out of linear
 ethernet frames

This patch updates some of the flow_dissector api so that it can be used to
parse the length of ethernet buffers stored in fragments.  Most of the
changes needed were to __skb_get_poff as it needed to be updated to support
sending a linear buffer instead of a skb.

I have split __skb_get_poff into two functions, the first is skb_get_poff
and it retains the functionality of the original __skb_get_poff.  The other
function is __skb_get_poff which now works much like __skb_flow_dissect in
relation to skb_flow_dissect in that it provides the same functionality but
works with just a data buffer and hlen instead of needing an skb.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h |  1 +
 include/linux/skbuff.h      |  4 +++-
 include/net/flow_keys.h     |  2 ++
 net/core/filter.c           |  2 +-
 net/core/flow_dissector.c   | 46 +++++++++++++++++++++++++++++++--------------
 net/ethernet/eth.c          | 27 ++++++++++++++++++++++++++
 6 files changed, 66 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 9c5529dc6d07..733980fce8e3 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -29,6 +29,7 @@
 #include <asm/bitsperlong.h>
 
 #ifdef __KERNEL__
+u32 eth_get_headlen(void *data, unsigned int max_len);
 __be16 eth_type_trans(struct sk_buff *skb, struct net_device *dev);
 extern const struct header_ops eth_header_ops;
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 1cf0cfaef10a..07c9fdd0c126 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3218,7 +3218,9 @@ bool skb_partial_csum_set(struct sk_buff *skb, u16 start, u16 off);
 
 int skb_checksum_setup(struct sk_buff *skb, bool recalculate);
 
-u32 __skb_get_poff(const struct sk_buff *skb);
+u32 skb_get_poff(const struct sk_buff *skb);
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+		   const struct flow_keys *keys, int hlen);
 
 /**
  * skb_head_is_locked - Determine if the skb->head is locked down
diff --git a/include/net/flow_keys.h b/include/net/flow_keys.h
index 9a03f73c4974..7ee2df083542 100644
--- a/include/net/flow_keys.h
+++ b/include/net/flow_keys.h
@@ -40,4 +40,6 @@ static inline __be32 skb_flow_get_ports(const struct sk_buff *skb, int thoff, u8
 	return __skb_flow_get_ports(skb, thoff, ip_proto, NULL, 0);
 }
 u32 flow_hash_from_keys(struct flow_keys *keys);
+unsigned int flow_get_hlen(const unsigned char *data, unsigned int max_len,
+			   __be16 protocol);
 #endif
diff --git a/net/core/filter.c b/net/core/filter.c
index 37f8eb06fdee..fa5b7d0f77ac 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -113,7 +113,7 @@ static unsigned int pkt_type_offset(void)
 
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
-	return __skb_get_poff((struct sk_buff *)(unsigned long) ctx);
+	return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
 }
 
 static u64 __skb_get_nlattr(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 12f48ca7a0b0..8560dea58803 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -13,6 +13,7 @@
 #include <linux/if_pppox.h>
 #include <linux/ppp_defs.h>
 #include <net/flow_keys.h>
+#include <scsi/fc/fc_fcoe.h>
 
 /* copy saddr & daddr, possibly using 64bit load/store
  * Equivalent to :	flow->src = iph->saddr;
@@ -117,6 +118,13 @@ ipv6:
 		flow->dst = (__force __be32)ipv6_addr_hash(&iph->daddr);
 		nhoff += sizeof(struct ipv6hdr);
 
+		/* skip the flow label processing if skb is NULL.  The
+		 * assumption here is that if there is no skb we are not
+		 * looking for flow info as much as we are length.
+		 */
+		if (!skb)
+			break;
+
 		flow_label = ip6_flowlabel(iph);
 		if (flow_label) {
 			/* Awesome, IPv6 packet has a flow label so we can
@@ -165,6 +173,9 @@ ipv6:
 			return false;
 		}
 	}
+	case htons(ETH_P_FCOE):
+		flow->thoff = (u16)(nhoff + FCOE_HEADER_LEN);
+		/* fall through */
 	default:
 		return false;
 	}
@@ -316,26 +327,18 @@ u16 __skb_tx_hash(const struct net_device *dev, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(__skb_tx_hash);
 
-/* __skb_get_poff() returns the offset to the payload as far as it could
- * be dissected. The main user is currently BPF, so that we can dynamically
- * truncate packets without needing to push actual payload to the user
- * space and can analyze headers only, instead.
- */
-u32 __skb_get_poff(const struct sk_buff *skb)
+u32 __skb_get_poff(const struct sk_buff *skb, void *data,
+		   const struct flow_keys *keys, int hlen)
 {
-	struct flow_keys keys;
-	u32 poff = 0;
+	u32 poff = keys->thoff;
 
-	if (!skb_flow_dissect(skb, &keys))
-		return 0;
-
-	poff += keys.thoff;
-	switch (keys.ip_proto) {
+	switch (keys->ip_proto) {
 	case IPPROTO_TCP: {
 		const struct tcphdr *tcph;
 		struct tcphdr _tcph;
 
-		tcph = skb_header_pointer(skb, poff, sizeof(_tcph), &_tcph);
+		tcph = __skb_header_pointer(skb, poff, sizeof(_tcph),
+					    data, hlen, &_tcph);
 		if (!tcph)
 			return poff;
 
@@ -369,6 +372,21 @@ u32 __skb_get_poff(const struct sk_buff *skb)
 	return poff;
 }
 
+/* skb_get_poff() returns the offset to the payload as far as it could
+ * be dissected. The main user is currently BPF, so that we can dynamically
+ * truncate packets without needing to push actual payload to the user
+ * space and can analyze headers only, instead.
+ */
+u32 skb_get_poff(const struct sk_buff *skb)
+{
+	struct flow_keys keys;
+
+	if (!skb_flow_dissect(skb, &keys))
+		return 0;
+
+	return __skb_get_poff(skb, skb->data, &keys, skb_headlen(skb));
+}
+
 static inline int get_xps_queue(struct net_device *dev, struct sk_buff *skb)
 {
 #ifdef CONFIG_XPS
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index 5cebca134585..33a140e15834 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -145,6 +145,33 @@ int eth_rebuild_header(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(eth_rebuild_header);
 
+/**
+ * eth_get_headlen - determine the the length of header for an ethernet frame
+ * @data: pointer to start of frame
+ * @len: total length of frame
+ *
+ * Make a best effort attempt to pull the length for all of the headers for
+ * a given frame in a linear buffer.
+ */
+u32 eth_get_headlen(void *data, unsigned int len)
+{
+	const struct ethhdr *eth = (const struct ethhdr *)data;
+	struct flow_keys keys;
+
+	/* this should never happen, but better safe than sorry */
+	if (len < sizeof(*eth))
+		return len;
+
+	/* parse any remaining L2/L3 headers, check for L4 */
+	if (!__skb_flow_dissect(NULL, &keys, data,
+				eth->h_proto, sizeof(*eth), len))
+		return max_t(u32, keys.thoff, sizeof(*eth));
+
+	/* parse for any L4 headers */
+	return min_t(u32, __skb_get_poff(NULL, data, &keys, len), len);
+}
+EXPORT_SYMBOL(eth_get_headlen);
+
 /**
  * eth_type_trans - determine the packet's protocol ID.
  * @skb: received socket data
-- 
cgit v1.2.3


From dec38b5ce6a9edb406c60c2670b26a1a4262fdb9 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Sat, 6 Sep 2014 01:11:12 +0100
Subject: regulator: isl9305: Add Intersil ISL9305/H driver

The ISL9305 and ISL9305H are mini-PMICs offering two DCDC regulators and
two LDO regulators. While there are some register differences between them
these do not affect the current Linux driver as the relevant features are
not yet supported.

Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/regulator/isl9305.txt      |  36 +++
 drivers/regulator/Kconfig                          |   6 +
 drivers/regulator/Makefile                         |   1 +
 drivers/regulator/isl9305.c                        | 247 +++++++++++++++++++++
 include/linux/platform_data/isl9305.h              |  30 +++
 5 files changed, 320 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/regulator/isl9305.txt
 create mode 100644 drivers/regulator/isl9305.c
 create mode 100644 include/linux/platform_data/isl9305.h

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/regulator/isl9305.txt b/Documentation/devicetree/bindings/regulator/isl9305.txt
new file mode 100644
index 000000000000..a626fc1bbf0d
--- /dev/null
+++ b/Documentation/devicetree/bindings/regulator/isl9305.txt
@@ -0,0 +1,36 @@
+Intersil ISL9305/ISL9305H voltage regulator
+
+Required properties:
+
+- compatible: "isl,isl9305" or "isl,isl9305h"
+- reg: I2C slave address, usually 0x68.
+- regulators: A node that houses a sub-node for each regulator within the
+  device. Each sub-node is identified using the node's name, with valid
+  values being "dcd1", "dcd2", "ldo1" and "ldo2". The content of each sub-node
+  is defined by the standard binding for regulators; see regulator.txt.
+- VINDCD1-supply: A phandle to a regulator node supplying VINDCD1.
+  VINDCD2-supply: A phandle to a regulator node supplying VINDCD2.
+  VINLDO1-supply: A phandle to a regulator node supplying VINLDO1.
+  VINLDO2-supply: A phandle to a regulator node supplying VINLDO2.
+
+Optional properties:
+- Per-regulator optional properties are defined in regulator.txt
+
+Example
+
+	pmic: isl9305@68 {
+		compatible = "isl,isl9305";
+		reg = <0x68>;
+
+		VINDCD1-supply = <&system_power>;
+		VINDCD2-supply = <&system_power>;
+		VINLDO1-supply = <&system_power>;
+		VINLDO2-supply = <&system_power>;
+
+		regulators {
+			dcd1 {
+			        regulator-name = "VDD_DSP";
+				regulator-always-on;
+                        };
+		};
+	};
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index e6b98ed4c12f..3c8b6f401e4f 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -250,6 +250,12 @@ config REGULATOR_HI6421
 	  21 general purpose LDOs, 3 dedicated LDOs, and 5 BUCKs. All
 	  of them come with support to either ECO (idle) or sleep mode.
 
+config REGULATOR_ISL9305
+	tristate "Intersil ISL9305 regulator"
+	depends on I2C
+	help
+	  This driver supports ISL9305 voltage regulator chip.
+
 config REGULATOR_ISL6271A
 	tristate "Intersil ISL6271A Power regulator"
 	depends on I2C
diff --git a/drivers/regulator/Makefile b/drivers/regulator/Makefile
index 5513e2c141b1..e12fee8c943b 100644
--- a/drivers/regulator/Makefile
+++ b/drivers/regulator/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_REGULATOR_FAN53555) += fan53555.o
 obj-$(CONFIG_REGULATOR_GPIO) += gpio-regulator.o
 obj-$(CONFIG_REGULATOR_HI6421) += hi6421-regulator.o
 obj-$(CONFIG_REGULATOR_ISL6271A) += isl6271a-regulator.o
+obj-$(CONFIG_REGULATOR_ISL9305) += isl9305.o
 obj-$(CONFIG_REGULATOR_LP3971) += lp3971.o
 obj-$(CONFIG_REGULATOR_LP3972) += lp3972.o
 obj-$(CONFIG_REGULATOR_LP872X) += lp872x.o
diff --git a/drivers/regulator/isl9305.c b/drivers/regulator/isl9305.c
new file mode 100644
index 000000000000..b0d12d186b68
--- /dev/null
+++ b/drivers/regulator/isl9305.c
@@ -0,0 +1,247 @@
+/*
+ * isl9305 - Intersil ISL9305 DCDC regulator
+ *
+ * Copyright 2014 Linaro Ltd
+ *
+ * Author: Mark Brown <broonie@kernel.org>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#include <linux/module.h>
+#include <linux/err.h>
+#include <linux/i2c.h>
+#include <linux/of.h>
+#include <linux/platform_data/isl9305.h>
+#include <linux/regmap.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/of_regulator.h>
+#include <linux/slab.h>
+
+/*
+ * Registers
+ */
+#define ISL9305_DCD1OUT          0x0
+#define ISL9305_DCD2OUT          0x1
+#define ISL9305_LDO1OUT          0x2
+#define ISL9305_LDO2OUT          0x3
+#define ISL9305_DCD_PARAMETER    0x4
+#define ISL9305_SYSTEM_PARAMETER 0x5
+#define ISL9305_DCD_SRCTL        0x6
+
+#define ISL9305_MAX_REG ISL9305_DCD_SRCTL
+
+/*
+ * DCD_PARAMETER
+ */
+#define ISL9305_DCD_PHASE   0x40
+#define ISL9305_DCD2_ULTRA  0x20
+#define ISL9305_DCD1_ULTRA  0x10
+#define ISL9305_DCD2_BLD    0x08
+#define ISL9305_DCD1_BLD    0x04
+#define ISL9305_DCD2_MODE   0x02
+#define ISL9305_DCD1_MODE   0x01
+
+/*
+ * SYSTEM_PARAMETER
+ */
+#define ISL9305_I2C_EN      0x40
+#define ISL9305_DCDPOR_MASK 0x30
+#define ISL9305_LDO2_EN     0x08
+#define ISL9305_LDO1_EN     0x04
+#define ISL9305_DCD2_EN     0x02
+#define ISL9305_DCD1_EN     0x01
+
+/*
+ * DCD_SRCTL
+ */
+#define ISL9305_DCD2SR_MASK 0xc0
+#define ISL9305_DCD1SR_MASK 0x07
+
+static const struct regulator_ops isl9305_ops = {
+	.enable = regulator_enable_regmap,
+	.disable = regulator_disable_regmap,
+	.is_enabled = regulator_is_enabled_regmap,
+	.list_voltage = regulator_list_voltage_linear,
+	.get_voltage_sel = regulator_get_voltage_sel_regmap,
+	.set_voltage_sel = regulator_set_voltage_sel_regmap,
+};
+
+static const struct regulator_desc isl9305_regulators[] = {
+	[ISL9305_DCD1] = {
+		.name =		"DCD1",
+		.n_voltages =	0x70,
+		.min_uV =	825000,
+		.uV_step =	25000,
+		.vsel_reg =	ISL9305_DCD1OUT,
+		.vsel_mask =	0x7f,
+		.enable_reg =	ISL9305_SYSTEM_PARAMETER,
+		.enable_mask =	ISL9305_DCD1_EN,
+		.supply_name =	"VINDCD1",
+		.ops =		&isl9305_ops,
+	},
+	[ISL9305_DCD2] = {
+		.name =		"DCD2",
+		.n_voltages =	0x70,
+		.min_uV =	825000,
+		.uV_step =	25000,
+		.vsel_reg =	ISL9305_DCD2OUT,
+		.vsel_mask =	0x7f,
+		.enable_reg =	ISL9305_SYSTEM_PARAMETER,
+		.enable_mask =	ISL9305_DCD2_EN,
+		.supply_name =	"VINDCD2",
+		.ops =		&isl9305_ops,
+	},
+	[ISL9305_LDO1] = {
+		.name =		"LDO1",
+		.n_voltages =	0x37,
+		.min_uV =	900000,
+		.uV_step =	50000,
+		.vsel_reg =	ISL9305_LDO1OUT,
+		.vsel_mask =	0x3f,
+		.enable_reg =	ISL9305_SYSTEM_PARAMETER,
+		.enable_mask =	ISL9305_LDO1_EN,
+		.supply_name =	"VINLDO1",
+		.ops =		&isl9305_ops,
+	},
+	[ISL9305_LDO2] = {
+		.name =		"LDO2",
+		.n_voltages =	0x37,
+		.min_uV =	900000,
+		.uV_step =	50000,
+		.vsel_reg =	ISL9305_LDO2OUT,
+		.vsel_mask =	0x3f,
+		.enable_reg =	ISL9305_SYSTEM_PARAMETER,
+		.enable_mask =	ISL9305_LDO2_EN,
+		.supply_name =	"VINLDO2",
+		.ops =		&isl9305_ops,
+	},
+};
+
+#ifdef CONFIG_OF
+static struct of_regulator_match isl9305_reg_matches[] = {
+	[ISL9305_DCD1] = { .name = "dcd1" },
+	[ISL9305_DCD2] = { .name = "dcd2" },
+	[ISL9305_LDO1] = { .name = "ldo1" },
+	[ISL9305_LDO2] = { .name = "ldo2" },
+};
+
+static struct of_regulator_match *isl9305_parse_dt(struct i2c_client *i2c)
+{
+	struct device_node *node = i2c->dev.of_node;
+	struct of_regulator_match *matches;
+	struct device_node *regs;
+	int count;
+
+	regs = of_get_child_by_name(node, "regulators");
+	if (!regs)
+		return NULL;
+
+	matches = devm_kmemdup(&i2c->dev, isl9305_reg_matches,
+			       sizeof(isl9305_reg_matches), GFP_KERNEL);
+	if (!matches)
+		return NULL;
+
+	count = of_regulator_match(&i2c->dev, regs, matches,
+				   ARRAY_SIZE(isl9305_reg_matches));
+	of_node_put(regs);
+	if ((count < 0) || (count > ARRAY_SIZE(isl9305_reg_matches)))
+		return NULL;
+
+	return matches;
+}
+#else
+static struct of_regulator_match *isl9305_parse_dt(struct i2c_client *i2c)
+{
+	return NULL;
+}
+#endif
+
+static const struct regmap_config isl9305_regmap = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = ISL9305_MAX_REG,
+	.cache_type = REGCACHE_RBTREE,
+};
+
+static int isl9305_i2c_probe(struct i2c_client *i2c,
+			     const struct i2c_device_id *id)
+{
+	struct regulator_config config = { };
+	struct isl9305_pdata *pdata = i2c->dev.platform_data;
+	struct of_regulator_match *of_matches;
+	struct regulator_dev *rdev;
+	struct regmap *regmap;
+	int i, ret;
+
+	of_matches = isl9305_parse_dt(i2c);
+
+	regmap = devm_regmap_init_i2c(i2c, &isl9305_regmap);
+	if (IS_ERR(regmap)) {
+		ret = PTR_ERR(regmap);
+		dev_err(&i2c->dev, "Failed to create regmap: %d\n", ret);
+		return ret;
+	}
+
+	config.dev = &i2c->dev;
+
+	for (i = 0; i < ARRAY_SIZE(isl9305_regulators); i++) {
+		config.of_node = NULL;
+		config.init_data = NULL;
+
+		if (of_matches) {
+			config.init_data = of_matches[i].init_data;
+			config.of_node = of_matches[i].of_node;
+		}
+
+		if (!config.init_data && pdata)
+			config.init_data = pdata->init_data[i];
+
+		rdev = devm_regulator_register(&i2c->dev,
+					       &isl9305_regulators[i],
+					       &config);
+		if (IS_ERR(rdev)) {
+			ret = PTR_ERR(rdev);
+			dev_err(&i2c->dev, "Failed to register %s: %d\n",
+				isl9305_regulators[i].name, ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+#ifdef CONFIG_OF
+static const struct of_device_id isl9305_dt_ids[] = {
+	{ .compatible = "isl,isl9305" },
+	{ .compatible = "isl,isl9305h" },
+	{},
+};
+#endif
+
+static const struct i2c_device_id isl9305_i2c_id[] = {
+	{ "isl9305", },
+	{ "isl9305h", },
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, isl9305_i2c_id);
+
+static struct i2c_driver isl9305_regulator_driver = {
+	.driver = {
+		.name = "isl9305",
+		.owner = THIS_MODULE,
+		.of_match_table	= of_match_ptr(isl9305_dt_ids),
+	},
+	.probe = isl9305_i2c_probe,
+	.id_table = isl9305_i2c_id,
+};
+
+module_i2c_driver(isl9305_regulator_driver);
+
+MODULE_AUTHOR("Mark Brown");
+MODULE_DESCRIPTION("Intersil ISL9305 DCDC regulator");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/platform_data/isl9305.h b/include/linux/platform_data/isl9305.h
new file mode 100644
index 000000000000..1419133fa69e
--- /dev/null
+++ b/include/linux/platform_data/isl9305.h
@@ -0,0 +1,30 @@
+/*
+ * isl9305 - Intersil ISL9305 DCDC regulator
+ *
+ * Copyright 2014 Linaro Ltd
+ *
+ * Author: Mark Brown <broonie@kernel.org>
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+#ifndef __ISL9305_H
+#define __ISL9305_H
+
+#define ISL9305_DCD1 0
+#define ISL9305_DCD2 1
+#define ISL9305_LDO1 2
+#define ISL9305_LDO2 3
+
+#define ISL9305_MAX_REGULATOR ISL9305_LDO2
+
+struct regulator_init_data;
+
+struct isl9305_pdata {
+	struct regulator_init_data *init_data[ISL9305_MAX_REGULATOR];
+};
+
+#endif
-- 
cgit v1.2.3


From a8adcc9012d8502e06ba7b3f966bad8f2c58edc3 Mon Sep 17 00:00:00 2001
From: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Date: Wed, 27 Aug 2014 23:44:08 +0530
Subject: power_supply: Add boot and calibration attributes

Usually PMIC's come with coulomb counting mechanism which can be
used to implement a Fuel Gauginig solution in Software itself.
One of key input to these SW Fuel Gauge solutioons is the boot up
parameters like boot voltage and boot current.

This patch adds the VOLTAGE_BOOT and CURRENT_BOOT power supply attributes
to report bootup voltage and current.

This patch also adds CALIBRATE power supply attribute which useful is
for calibrating the battery/coulomb counter.

Signed-off-by: Ramakrishna Pallala <ramakrishna.pallala@intel.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 Documentation/power/power_supply_class.txt | 6 ++++++
 drivers/power/power_supply_sysfs.c         | 3 +++
 include/linux/power_supply.h               | 5 +++++
 3 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/power_supply_class.txt b/Documentation/power/power_supply_class.txt
index 48cff881cb8a..82dacc06e355 100644
--- a/Documentation/power/power_supply_class.txt
+++ b/Documentation/power/power_supply_class.txt
@@ -101,6 +101,10 @@ VOLTAGE_MAX, VOLTAGE_MIN - same as _DESIGN voltage values except that
 these ones should be used if hardware could only guess (measure and
 retain) the thresholds of a given power supply.
 
+VOLTAGE_BOOT - Reports the voltage measured during boot
+
+CURRENT_BOOT - Reports the current measured during boot
+
 CHARGE_FULL_DESIGN, CHARGE_EMPTY_DESIGN - design charge values, when
 battery considered full/empty.
 
@@ -123,6 +127,8 @@ the current drawn from a charging source.
 CHARGE_TERM_CURRENT - Charge termination current used to detect the end of charge
 condition.
 
+CALIBRATE - battery or coulomb counter calibration status
+
 CONSTANT_CHARGE_VOLTAGE - constant charge voltage programmed by charger.
 CONSTANT_CHARGE_VOLTAGE_MAX - maximum charge voltage supported by the
 power supply object.
diff --git a/drivers/power/power_supply_sysfs.c b/drivers/power/power_supply_sysfs.c
index 750a20275664..7e4726d37211 100644
--- a/drivers/power/power_supply_sysfs.c
+++ b/drivers/power/power_supply_sysfs.c
@@ -149,9 +149,11 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(voltage_now),
 	POWER_SUPPLY_ATTR(voltage_avg),
 	POWER_SUPPLY_ATTR(voltage_ocv),
+	POWER_SUPPLY_ATTR(voltage_boot),
 	POWER_SUPPLY_ATTR(current_max),
 	POWER_SUPPLY_ATTR(current_now),
 	POWER_SUPPLY_ATTR(current_avg),
+	POWER_SUPPLY_ATTR(current_boot),
 	POWER_SUPPLY_ATTR(power_now),
 	POWER_SUPPLY_ATTR(power_avg),
 	POWER_SUPPLY_ATTR(charge_full_design),
@@ -193,6 +195,7 @@ static struct device_attribute power_supply_attrs[] = {
 	POWER_SUPPLY_ATTR(type),
 	POWER_SUPPLY_ATTR(scope),
 	POWER_SUPPLY_ATTR(charge_term_current),
+	POWER_SUPPLY_ATTR(calibrate),
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_ATTR(model_name),
 	POWER_SUPPLY_ATTR(manufacturer),
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index f3dea41dbcd2..de59a28b1b5b 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -102,9 +102,11 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_VOLTAGE_NOW,
 	POWER_SUPPLY_PROP_VOLTAGE_AVG,
 	POWER_SUPPLY_PROP_VOLTAGE_OCV,
+	POWER_SUPPLY_PROP_VOLTAGE_BOOT,
 	POWER_SUPPLY_PROP_CURRENT_MAX,
 	POWER_SUPPLY_PROP_CURRENT_NOW,
 	POWER_SUPPLY_PROP_CURRENT_AVG,
+	POWER_SUPPLY_PROP_CURRENT_BOOT,
 	POWER_SUPPLY_PROP_POWER_NOW,
 	POWER_SUPPLY_PROP_POWER_AVG,
 	POWER_SUPPLY_PROP_CHARGE_FULL_DESIGN,
@@ -146,6 +148,7 @@ enum power_supply_property {
 	POWER_SUPPLY_PROP_TYPE, /* use power_supply.type instead */
 	POWER_SUPPLY_PROP_SCOPE,
 	POWER_SUPPLY_PROP_CHARGE_TERM_CURRENT,
+	POWER_SUPPLY_PROP_CALIBRATE,
 	/* Properties of type `const char *' */
 	POWER_SUPPLY_PROP_MODEL_NAME,
 	POWER_SUPPLY_PROP_MANUFACTURER,
@@ -291,6 +294,7 @@ static inline bool power_supply_is_amp_property(enum power_supply_property psp)
 	case POWER_SUPPLY_PROP_CURRENT_MAX:
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
 	case POWER_SUPPLY_PROP_CURRENT_AVG:
+	case POWER_SUPPLY_PROP_CURRENT_BOOT:
 		return 1;
 	default:
 		break;
@@ -315,6 +319,7 @@ static inline bool power_supply_is_watt_property(enum power_supply_property psp)
 	case POWER_SUPPLY_PROP_VOLTAGE_NOW:
 	case POWER_SUPPLY_PROP_VOLTAGE_AVG:
 	case POWER_SUPPLY_PROP_VOLTAGE_OCV:
+	case POWER_SUPPLY_PROP_VOLTAGE_BOOT:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE:
 	case POWER_SUPPLY_PROP_CONSTANT_CHARGE_VOLTAGE_MAX:
 	case POWER_SUPPLY_PROP_POWER_NOW:
-- 
cgit v1.2.3


From 521d24ee598bd8a8b71d7ac76ce2c0da0e548406 Mon Sep 17 00:00:00 2001
From: Pranith Kumar <bobby.prani@gmail.com>
Date: Tue, 8 Jul 2014 18:26:18 -0400
Subject: rcu: Return bool type in rcu_lockdep_current_cpu_online()

Return true instead of 1 in rcu_lockdep_current_cpu_online() as this
has bool as return type.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d231aa17b1d7..7e47e44bce03 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -349,7 +349,7 @@ bool rcu_lockdep_current_cpu_online(void);
 #else /* #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 static inline bool rcu_lockdep_current_cpu_online(void)
 {
-	return 1;
+	return true;
 }
 #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
 
-- 
cgit v1.2.3


From 85b39d305bfe809a11ff2770d380be3e2465beec Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Tue, 8 Jul 2014 15:17:59 -0700
Subject: rcu: Uninline rcu_read_lock_held()

This commit uninlines rcu_read_lock_held(). According to "size vmlinux"
this saves 28549 in .text:

	- 5541731 3014560 14757888 23314179
	+ 5513182 3026848 14757888 23297918

Note: it looks as if the data grows by 12288 bytes but this is not true,
it does not actually grow. But .data starts with ALIGN(THREAD_SIZE) and
since .text shrinks the padding grows, and thus .data grows too as it
seen by /bin/size. diff System.map:

	- ffffffff81510000 D _sdata
	- ffffffff81510000 D init_thread_union
	+ ffffffff81509000 D _sdata
	+ ffffffff8150c000 D init_thread_union

Perhaps we can change vmlinux.lds.S to .data itself, so that /bin/size
can't "wrongly" report that .data grows if .text shinks.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 36 +-----------------------------------
 kernel/rcu/update.c      | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 7e47e44bce03..321ed0d4e675 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -371,41 +371,7 @@ extern struct lockdep_map rcu_sched_lock_map;
 extern struct lockdep_map rcu_callback_map;
 int debug_lockdep_rcu_enabled(void);
 
-/**
- * rcu_read_lock_held() - might we be in RCU read-side critical section?
- *
- * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
- * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
- * this assumes we are in an RCU read-side critical section unless it can
- * prove otherwise.  This is useful for debug checks in functions that
- * require that they be called within an RCU read-side critical section.
- *
- * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
- * and while lockdep is disabled.
- *
- * Note that rcu_read_lock() and the matching rcu_read_unlock() must
- * occur in the same context, for example, it is illegal to invoke
- * rcu_read_unlock() in process context if the matching rcu_read_lock()
- * was invoked from within an irq handler.
- *
- * Note that rcu_read_lock() is disallowed if the CPU is either idle or
- * offline from an RCU perspective, so check for those as well.
- */
-static inline int rcu_read_lock_held(void)
-{
-	if (!debug_lockdep_rcu_enabled())
-		return 1;
-	if (!rcu_is_watching())
-		return 0;
-	if (!rcu_lockdep_current_cpu_online())
-		return 0;
-	return lock_is_held(&rcu_lock_map);
-}
-
-/*
- * rcu_read_lock_bh_held() is defined out of line to avoid #include-file
- * hell.
- */
+int rcu_read_lock_held(void);
 int rcu_read_lock_bh_held(void);
 
 /**
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..ea8ea7b16e11 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -136,6 +136,38 @@ int notrace debug_lockdep_rcu_enabled(void)
 }
 EXPORT_SYMBOL_GPL(debug_lockdep_rcu_enabled);
 
+/**
+ * rcu_read_lock_held() - might we be in RCU read-side critical section?
+ *
+ * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an RCU
+ * read-side critical section.  In absence of CONFIG_DEBUG_LOCK_ALLOC,
+ * this assumes we are in an RCU read-side critical section unless it can
+ * prove otherwise.  This is useful for debug checks in functions that
+ * require that they be called within an RCU read-side critical section.
+ *
+ * Checks debug_lockdep_rcu_enabled() to prevent false positives during boot
+ * and while lockdep is disabled.
+ *
+ * Note that rcu_read_lock() and the matching rcu_read_unlock() must
+ * occur in the same context, for example, it is illegal to invoke
+ * rcu_read_unlock() in process context if the matching rcu_read_lock()
+ * was invoked from within an irq handler.
+ *
+ * Note that rcu_read_lock() is disallowed if the CPU is either idle or
+ * offline from an RCU perspective, so check for those as well.
+ */
+int rcu_read_lock_held(void)
+{
+	if (!debug_lockdep_rcu_enabled())
+		return 1;
+	if (!rcu_is_watching())
+		return 0;
+	if (!rcu_lockdep_current_cpu_online())
+		return 0;
+	return lock_is_held(&rcu_lock_map);
+}
+EXPORT_SYMBOL_GPL(rcu_read_lock_held);
+
 /**
  * rcu_read_lock_bh_held() - might we be in RCU-bh read-side critical section?
  *
-- 
cgit v1.2.3


From eea203fea3484598280a07fe503e025e886297fb Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 14 Jul 2014 09:16:15 -0400
Subject: rcu: Use pr_alert/pr_cont for printing logs

User pr_alert/pr_cont for printing the logs from rcutorture module directly
instead of writing it to a buffer and then printing it. This allows us from not
having to allocate such buffers. Also remove a resulting empty function.

I tested this using the parse-torture.sh script as follows:

$ dmesg | grep torture > log.txt
$ bash parse-torture.sh log.txt test
$

There were no warnings which means that parsing went fine.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/torture.h |   2 +-
 kernel/rcu/rcutorture.c | 127 +++++++++++++++++++++---------------------------
 kernel/torture.c        |  16 +++---
 3 files changed, 64 insertions(+), 81 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index 5ca58fcbaf1b..fec46f8c08eb 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -51,7 +51,7 @@
 
 /* Definitions for online/offline exerciser. */
 int torture_onoff_init(long ooholdoff, long oointerval);
-char *torture_onoff_stats(char *page);
+void torture_onoff_stats(void);
 bool torture_onoff_failures(void);
 
 /* Low-rider random number generator. */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 7e67711cbae8..ff4f0c756dee 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -242,7 +242,7 @@ struct rcu_torture_ops {
 	void (*call)(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
 	void (*cb_barrier)(void);
 	void (*fqs)(void);
-	void (*stats)(char *page);
+	void (*stats)(void);
 	int irq_capable;
 	int can_boost;
 	const char *name;
@@ -525,21 +525,21 @@ static void srcu_torture_barrier(void)
 	srcu_barrier(&srcu_ctl);
 }
 
-static void srcu_torture_stats(char *page)
+static void srcu_torture_stats(void)
 {
 	int cpu;
 	int idx = srcu_ctl.completed & 0x1;
 
-	page += sprintf(page, "%s%s per-CPU(idx=%d):",
-		       torture_type, TORTURE_FLAG, idx);
+	pr_alert("%s%s per-CPU(idx=%d):",
+		 torture_type, TORTURE_FLAG, idx);
 	for_each_possible_cpu(cpu) {
 		long c0, c1;
 
 		c0 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[!idx];
 		c1 = (long)per_cpu_ptr(srcu_ctl.per_cpu_ref, cpu)->c[idx];
-		page += sprintf(page, " %d(%ld,%ld)", cpu, c0, c1);
+		pr_cont(" %d(%ld,%ld)", cpu, c0, c1);
 	}
-	sprintf(page, "\n");
+	pr_cont("\n");
 }
 
 static void srcu_torture_synchronize_expedited(void)
@@ -1031,10 +1031,15 @@ rcu_torture_reader(void *arg)
 }
 
 /*
- * Create an RCU-torture statistics message in the specified buffer.
+ * Print torture statistics.  Caller must ensure that there is only
+ * one call to this function at a given time!!!  This is normally
+ * accomplished by relying on the module system to only have one copy
+ * of the module loaded, and then by giving the rcu_torture_stats
+ * kthread full control (or the init/cleanup functions when rcu_torture_stats
+ * thread is not running).
  */
 static void
-rcu_torture_printk(char *page)
+rcu_torture_stats_print(void)
 {
 	int cpu;
 	int i;
@@ -1052,55 +1057,60 @@ rcu_torture_printk(char *page)
 		if (pipesummary[i] != 0)
 			break;
 	}
-	page += sprintf(page, "%s%s ", torture_type, TORTURE_FLAG);
-	page += sprintf(page,
-		       "rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
-		       rcu_torture_current,
-		       rcu_torture_current_version,
-		       list_empty(&rcu_torture_freelist),
-		       atomic_read(&n_rcu_torture_alloc),
-		       atomic_read(&n_rcu_torture_alloc_fail),
-		       atomic_read(&n_rcu_torture_free));
-	page += sprintf(page, "rtmbe: %d rtbke: %ld rtbre: %ld ",
-		       atomic_read(&n_rcu_torture_mberror),
-		       n_rcu_torture_boost_ktrerror,
-		       n_rcu_torture_boost_rterror);
-	page += sprintf(page, "rtbf: %ld rtb: %ld nt: %ld ",
-		       n_rcu_torture_boost_failure,
-		       n_rcu_torture_boosts,
-		       n_rcu_torture_timers);
-	page = torture_onoff_stats(page);
-	page += sprintf(page, "barrier: %ld/%ld:%ld",
-		       n_barrier_successes,
-		       n_barrier_attempts,
-		       n_rcu_torture_barrier_error);
-	page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
+
+	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+	pr_cont("rtc: %p ver: %lu tfle: %d rta: %d rtaf: %d rtf: %d ",
+		rcu_torture_current,
+		rcu_torture_current_version,
+		list_empty(&rcu_torture_freelist),
+		atomic_read(&n_rcu_torture_alloc),
+		atomic_read(&n_rcu_torture_alloc_fail),
+		atomic_read(&n_rcu_torture_free));
+	pr_cont("rtmbe: %d rtbke: %ld rtbre: %ld ",
+		atomic_read(&n_rcu_torture_mberror),
+		n_rcu_torture_boost_ktrerror,
+		n_rcu_torture_boost_rterror);
+	pr_cont("rtbf: %ld rtb: %ld nt: %ld ",
+		n_rcu_torture_boost_failure,
+		n_rcu_torture_boosts,
+		n_rcu_torture_timers);
+	torture_onoff_stats();
+	pr_cont("barrier: %ld/%ld:%ld\n",
+		n_barrier_successes,
+		n_barrier_attempts,
+		n_rcu_torture_barrier_error);
+
+	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
 	if (atomic_read(&n_rcu_torture_mberror) != 0 ||
 	    n_rcu_torture_barrier_error != 0 ||
 	    n_rcu_torture_boost_ktrerror != 0 ||
 	    n_rcu_torture_boost_rterror != 0 ||
 	    n_rcu_torture_boost_failure != 0 ||
 	    i > 1) {
-		page += sprintf(page, "!!! ");
+		pr_cont("%s", "!!! ");
 		atomic_inc(&n_rcu_torture_error);
 		WARN_ON_ONCE(1);
 	}
-	page += sprintf(page, "Reader Pipe: ");
+	pr_cont("Reader Pipe: ");
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-		page += sprintf(page, " %ld", pipesummary[i]);
-	page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
-	page += sprintf(page, "Reader Batch: ");
+		pr_cont(" %ld", pipesummary[i]);
+	pr_cont("\n");
+
+	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+	pr_cont("Reader Batch: ");
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++)
-		page += sprintf(page, " %ld", batchsummary[i]);
-	page += sprintf(page, "\n%s%s ", torture_type, TORTURE_FLAG);
-	page += sprintf(page, "Free-Block Circulation: ");
+		pr_cont(" %ld", batchsummary[i]);
+	pr_cont("\n");
+
+	pr_alert("%s%s ", torture_type, TORTURE_FLAG);
+	pr_cont("Free-Block Circulation: ");
 	for (i = 0; i < RCU_TORTURE_PIPE_LEN + 1; i++) {
-		page += sprintf(page, " %d",
-			       atomic_read(&rcu_torture_wcount[i]));
+		pr_cont(" %d", atomic_read(&rcu_torture_wcount[i]));
 	}
-	page += sprintf(page, "\n");
+	pr_cont("\n");
+
 	if (cur_ops->stats)
-		cur_ops->stats(page);
+		cur_ops->stats();
 	if (rtcv_snap == rcu_torture_current_version &&
 	    rcu_torture_current != NULL) {
 		int __maybe_unused flags;
@@ -1109,40 +1119,15 @@ rcu_torture_printk(char *page)
 
 		rcutorture_get_gp_data(cur_ops->ttype,
 				       &flags, &gpnum, &completed);
-		page += sprintf(page,
-				"??? Writer stall state %d g%lu c%lu f%#x\n",
-				rcu_torture_writer_state,
-				gpnum, completed, flags);
+		pr_alert("??? Writer stall state %d g%lu c%lu f%#x\n",
+			 rcu_torture_writer_state,
+			 gpnum, completed, flags);
 		show_rcu_gp_kthreads();
 		rcutorture_trace_dump();
 	}
 	rtcv_snap = rcu_torture_current_version;
 }
 
-/*
- * Print torture statistics.  Caller must ensure that there is only
- * one call to this function at a given time!!!  This is normally
- * accomplished by relying on the module system to only have one copy
- * of the module loaded, and then by giving the rcu_torture_stats
- * kthread full control (or the init/cleanup functions when rcu_torture_stats
- * thread is not running).
- */
-static void
-rcu_torture_stats_print(void)
-{
-	int size = nr_cpu_ids * 200 + 8192;
-	char *buf;
-
-	buf = kmalloc(size, GFP_KERNEL);
-	if (!buf) {
-		pr_err("rcu-torture: Out of memory, need: %d", size);
-		return;
-	}
-	rcu_torture_printk(buf);
-	pr_alert("%s", buf);
-	kfree(buf);
-}
-
 /*
  * Periodically prints torture statistics, if periodic statistics printing
  * was specified via the stat_interval module parameter.
diff --git a/kernel/torture.c b/kernel/torture.c
index d600af21f022..ede8b25ec1ae 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -211,18 +211,16 @@ EXPORT_SYMBOL_GPL(torture_onoff_cleanup);
 /*
  * Print online/offline testing statistics.
  */
-char *torture_onoff_stats(char *page)
+void torture_onoff_stats(void)
 {
 #ifdef CONFIG_HOTPLUG_CPU
-	page += sprintf(page,
-		       "onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
-		       n_online_successes, n_online_attempts,
-		       n_offline_successes, n_offline_attempts,
-		       min_online, max_online,
-		       min_offline, max_offline,
-		       sum_online, sum_offline, HZ);
+	pr_cont("onoff: %ld/%ld:%ld/%ld %d,%d:%d,%d %lu:%lu (HZ=%d) ",
+		n_online_successes, n_online_attempts,
+		n_offline_successes, n_offline_attempts,
+		min_online, max_online,
+		min_offline, max_offline,
+		sum_online, sum_offline, HZ);
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
-	return page;
 }
 EXPORT_SYMBOL_GPL(torture_onoff_stats);
 
-- 
cgit v1.2.3


From 8315f42295d2667a7f942f154b73a86fd7cb2227 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 27 Jun 2014 13:42:20 -0700
Subject: rcu: Add call_rcu_tasks()

This commit adds a new RCU-tasks flavor of RCU, which provides
call_rcu_tasks().  This RCU flavor's quiescent states are voluntary
context switch (not preemption!) and userspace execution (not the idle
loop -- use some sort of schedule_on_each_cpu() if you need to handle the
idle tasks.  Note that unlike other RCU flavors, these quiescent states
occur in tasks, not necessarily CPUs.  Includes fixes from Steven Rostedt.

This RCU flavor is assumed to have very infrequent latency-tolerant
updaters.  This assumption permits significant simplifications, including
a single global callback list protected by a single global lock, along
with a single task-private linked list containing all tasks that have not
yet passed through a quiescent state.  If experience shows this assumption
to be incorrect, the required additional complexity will be added.

Suggested-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h |   9 +++
 include/linux/rcupdate.h  |  36 ++++++++++
 include/linux/sched.h     |  23 ++++---
 init/Kconfig              |  10 +++
 kernel/rcu/tiny.c         |   2 +
 kernel/rcu/tree.c         |   2 +
 kernel/rcu/update.c       | 171 ++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 242 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 2bb4c4f3531a..dffd9258ee60 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -117,6 +117,14 @@ extern struct group_info init_groups;
 #else
 #define INIT_TASK_RCU_PREEMPT(tsk)
 #endif
+#ifdef CONFIG_TASKS_RCU
+#define INIT_TASK_RCU_TASKS(tsk)					\
+	.rcu_tasks_holdout = false,					\
+	.rcu_tasks_holdout_list =					\
+		LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),
+#else
+#define INIT_TASK_RCU_TASKS(tsk)
+#endif
 
 extern struct cred init_cred;
 
@@ -224,6 +232,7 @@ extern struct task_group root_task_group;
 	INIT_FTRACE_GRAPH						\
 	INIT_TRACE_RECURSION						\
 	INIT_TASK_RCU_PREEMPT(tsk)					\
+	INIT_TASK_RCU_TASKS(tsk)					\
 	INIT_CPUSET_SEQ(tsk)						\
 	INIT_RT_MUTEXES(tsk)						\
 	INIT_VTIME(tsk)							\
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d231aa17b1d7..3432063f4c87 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -197,6 +197,26 @@ void call_rcu_sched(struct rcu_head *head,
 
 void synchronize_sched(void);
 
+/**
+ * call_rcu_tasks() - Queue an RCU for invocation task-based grace period
+ * @head: structure to be used for queueing the RCU updates.
+ * @func: actual callback function to be invoked after the grace period
+ *
+ * The callback function will be invoked some time after a full grace
+ * period elapses, in other words after all currently executing RCU
+ * read-side critical sections have completed. call_rcu_tasks() assumes
+ * that the read-side critical sections end at a voluntary context
+ * switch (not a preemption!), entry into idle, or transition to usermode
+ * execution.  As such, there are no read-side primitives analogous to
+ * rcu_read_lock() and rcu_read_unlock() because this primitive is intended
+ * to determine that all tasks have passed through a safe state, not so
+ * much for data-strcuture synchronization.
+ *
+ * See the description of call_rcu() for more detailed information on
+ * memory ordering guarantees.
+ */
+void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
+
 #ifdef CONFIG_PREEMPT_RCU
 
 void __rcu_read_lock(void);
@@ -294,6 +314,22 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 		rcu_irq_exit(); \
 	} while (0)
 
+/*
+ * Note a voluntary context switch for RCU-tasks benefit.  This is a
+ * macro rather than an inline function to avoid #include hell.
+ */
+#ifdef CONFIG_TASKS_RCU
+#define rcu_note_voluntary_context_switch(t) \
+	do { \
+		preempt_disable(); /* Exclude synchronize_sched(); */ \
+		if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
+			ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
+		preempt_enable(); \
+	} while (0)
+#else /* #ifdef CONFIG_TASKS_RCU */
+#define rcu_note_voluntary_context_switch(t)	do { } while (0)
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
+
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
 bool __rcu_is_watching(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..eaacac4ae77d 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1270,6 +1270,11 @@ struct task_struct {
 #ifdef CONFIG_TREE_PREEMPT_RCU
 	struct rcu_node *rcu_blocked_node;
 #endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	unsigned long rcu_tasks_nvcsw;
+	bool rcu_tasks_holdout;
+	struct list_head rcu_tasks_holdout_list;
+#endif /* #ifdef CONFIG_TASKS_RCU */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
 	struct sched_info sched_info;
@@ -2000,28 +2005,24 @@ extern void task_clear_jobctl_pending(struct task_struct *task,
 				      unsigned int mask);
 
 #ifdef CONFIG_PREEMPT_RCU
-
 #define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
 #define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 static inline void rcu_copy_process(struct task_struct *p)
 {
+#ifdef CONFIG_PREEMPT_RCU
 	p->rcu_read_lock_nesting = 0;
 	p->rcu_read_unlock_special = 0;
-#ifdef CONFIG_TREE_PREEMPT_RCU
 	p->rcu_blocked_node = NULL;
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
 	INIT_LIST_HEAD(&p->rcu_node_entry);
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
+#ifdef CONFIG_TASKS_RCU
+	p->rcu_tasks_holdout = false;
+	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+#endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
-#else
-
-static inline void rcu_copy_process(struct task_struct *p)
-{
-}
-
-#endif
-
 static inline void tsk_restore_flags(struct task_struct *task,
 				unsigned long orig_flags, unsigned long flags)
 {
diff --git a/init/Kconfig b/init/Kconfig
index e84c6423a2e5..c4539c4e177f 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -507,6 +507,16 @@ config PREEMPT_RCU
 	  This option enables preemptible-RCU code that is common between
 	  TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU.
 
+config TASKS_RCU
+	bool "Task_based RCU implementation using voluntary context switch"
+	default n
+	help
+	  This option enables a task-based RCU implementation that uses
+	  only voluntary context switch (not preemption!), idle, and
+	  user-mode execution as quiescent states.
+
+	  If unsure, say N.
+
 config RCU_STALL_COMMON
 	def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
 	help
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index d9efcc13008c..717f00854fc0 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -254,6 +254,8 @@ void rcu_check_callbacks(int cpu, int user)
 		rcu_sched_qs(cpu);
 	else if (!in_softirq())
 		rcu_bh_qs(cpu);
+	if (user)
+		rcu_note_voluntary_context_switch(current);
 }
 
 /*
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1b70cb6fbe3c..8ad91d1e317d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2410,6 +2410,8 @@ void rcu_check_callbacks(int cpu, int user)
 	rcu_preempt_check_callbacks(cpu);
 	if (rcu_pending(cpu))
 		invoke_rcu_core();
+	if (user)
+		rcu_note_voluntary_context_switch(current);
 	trace_rcu_utilization(TPS("End scheduler-tick"));
 }
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 4056d7992a6c..19b3dacb0753 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -47,6 +47,7 @@
 #include <linux/hardirq.h>
 #include <linux/delay.h>
 #include <linux/module.h>
+#include <linux/kthread.h>
 
 #define CREATE_TRACE_POINTS
 
@@ -347,3 +348,173 @@ static int __init check_cpu_stall_init(void)
 early_initcall(check_cpu_stall_init);
 
 #endif /* #ifdef CONFIG_RCU_STALL_COMMON */
+
+#ifdef CONFIG_TASKS_RCU
+
+/*
+ * Simple variant of RCU whose quiescent states are voluntary context switch,
+ * user-space execution, and idle.  As such, grace periods can take one good
+ * long time.  There are no read-side primitives similar to rcu_read_lock()
+ * and rcu_read_unlock() because this implementation is intended to get
+ * the system into a safe state for some of the manipulations involved in
+ * tracing and the like.  Finally, this implementation does not support
+ * high call_rcu_tasks() rates from multiple CPUs.  If this is required,
+ * per-CPU callback lists will be needed.
+ */
+
+/* Global list of callbacks and associated lock. */
+static struct rcu_head *rcu_tasks_cbs_head;
+static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
+static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
+
+/* Post an RCU-tasks callback. */
+void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
+{
+	unsigned long flags;
+
+	rhp->next = NULL;
+	rhp->func = func;
+	raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
+	*rcu_tasks_cbs_tail = rhp;
+	rcu_tasks_cbs_tail = &rhp->next;
+	raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
+}
+EXPORT_SYMBOL_GPL(call_rcu_tasks);
+
+/* See if the current task has stopped holding out, remove from list if so. */
+static void check_holdout_task(struct task_struct *t)
+{
+	if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
+	    t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
+	    !ACCESS_ONCE(t->on_rq)) {
+		ACCESS_ONCE(t->rcu_tasks_holdout) = false;
+		list_del_rcu(&t->rcu_tasks_holdout_list);
+		put_task_struct(t);
+	}
+}
+
+/* RCU-tasks kthread that detects grace periods and invokes callbacks. */
+static int __noreturn rcu_tasks_kthread(void *arg)
+{
+	unsigned long flags;
+	struct task_struct *g, *t;
+	struct rcu_head *list;
+	struct rcu_head *next;
+	LIST_HEAD(rcu_tasks_holdouts);
+
+	/* FIXME: Add housekeeping affinity. */
+
+	/*
+	 * Each pass through the following loop makes one check for
+	 * newly arrived callbacks, and, if there are some, waits for
+	 * one RCU-tasks grace period and then invokes the callbacks.
+	 * This loop is terminated by the system going down.  ;-)
+	 */
+	for (;;) {
+
+		/* Pick up any new callbacks. */
+		raw_spin_lock_irqsave(&rcu_tasks_cbs_lock, flags);
+		list = rcu_tasks_cbs_head;
+		rcu_tasks_cbs_head = NULL;
+		rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
+		raw_spin_unlock_irqrestore(&rcu_tasks_cbs_lock, flags);
+
+		/* If there were none, wait a bit and start over. */
+		if (!list) {
+			schedule_timeout_interruptible(HZ);
+			WARN_ON(signal_pending(current));
+			continue;
+		}
+
+		/*
+		 * Wait for all pre-existing t->on_rq and t->nvcsw
+		 * transitions to complete.  Invoking synchronize_sched()
+		 * suffices because all these transitions occur with
+		 * interrupts disabled.  Without this synchronize_sched(),
+		 * a read-side critical section that started before the
+		 * grace period might be incorrectly seen as having started
+		 * after the grace period.
+		 *
+		 * This synchronize_sched() also dispenses with the
+		 * need for a memory barrier on the first store to
+		 * ->rcu_tasks_holdout, as it forces the store to happen
+		 * after the beginning of the grace period.
+		 */
+		synchronize_sched();
+
+		/*
+		 * There were callbacks, so we need to wait for an
+		 * RCU-tasks grace period.  Start off by scanning
+		 * the task list for tasks that are not already
+		 * voluntarily blocked.  Mark these tasks and make
+		 * a list of them in rcu_tasks_holdouts.
+		 */
+		rcu_read_lock();
+		for_each_process_thread(g, t) {
+			if (t != current && ACCESS_ONCE(t->on_rq) &&
+			    !is_idle_task(t)) {
+				get_task_struct(t);
+				t->rcu_tasks_nvcsw = ACCESS_ONCE(t->nvcsw);
+				ACCESS_ONCE(t->rcu_tasks_holdout) = true;
+				list_add(&t->rcu_tasks_holdout_list,
+					 &rcu_tasks_holdouts);
+			}
+		}
+		rcu_read_unlock();
+
+		/*
+		 * Each pass through the following loop scans the list
+		 * of holdout tasks, removing any that are no longer
+		 * holdouts.  When the list is empty, we are done.
+		 */
+		while (!list_empty(&rcu_tasks_holdouts)) {
+			schedule_timeout_interruptible(HZ);
+			WARN_ON(signal_pending(current));
+			rcu_read_lock();
+			list_for_each_entry_rcu(t, &rcu_tasks_holdouts,
+						rcu_tasks_holdout_list)
+				check_holdout_task(t);
+			rcu_read_unlock();
+		}
+
+		/*
+		 * Because ->on_rq and ->nvcsw are not guaranteed
+		 * to have a full memory barriers prior to them in the
+		 * schedule() path, memory reordering on other CPUs could
+		 * cause their RCU-tasks read-side critical sections to
+		 * extend past the end of the grace period.  However,
+		 * because these ->nvcsw updates are carried out with
+		 * interrupts disabled, we can use synchronize_sched()
+		 * to force the needed ordering on all such CPUs.
+		 *
+		 * This synchronize_sched() also confines all
+		 * ->rcu_tasks_holdout accesses to be within the grace
+		 * period, avoiding the need for memory barriers for
+		 * ->rcu_tasks_holdout accesses.
+		 */
+		synchronize_sched();
+
+		/* Invoke the callbacks. */
+		while (list) {
+			next = list->next;
+			local_bh_disable();
+			list->func(list);
+			local_bh_enable();
+			list = next;
+			cond_resched();
+		}
+	}
+}
+
+/* Spawn rcu_tasks_kthread() at boot time. */
+static int __init rcu_spawn_tasks_kthread(void)
+{
+	struct task_struct __maybe_unused *t;
+
+	t = kthread_run(rcu_tasks_kthread, NULL, "rcu_tasks_kthread");
+	BUG_ON(IS_ERR(t));
+	return 0;
+}
+early_initcall(rcu_spawn_tasks_kthread);
+
+#endif /* #ifdef CONFIG_TASKS_RCU */
-- 
cgit v1.2.3


From bde6c3aa993066acb0d6ce32ecabe03b9d5df92d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 Jul 2014 11:26:57 -0700
Subject: rcu: Provide cond_resched_rcu_qs() to force quiescent states in long
 loops

RCU-tasks requires the occasional voluntary context switch
from CPU-bound in-kernel tasks.  In some cases, this requires
instrumenting cond_resched().  However, there is some reluctance
to countenance unconditionally instrumenting cond_resched() (see
http://lwn.net/Articles/603252/), so this commit creates a separate
cond_resched_rcu_qs() that may be used in place of cond_resched() in
locations prone to long-duration in-kernel looping.

This commit currently instruments only RCU-tasks.  Future possibilities
include also instrumenting RCU, RCU-bh, and RCU-sched in order to reduce
IPI usage.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 fs/file.c                |  2 +-
 include/linux/rcupdate.h | 13 +++++++++++++
 kernel/rcu/rcutorture.c  |  4 ++--
 kernel/rcu/tree.c        | 12 ++++++------
 kernel/rcu/tree_plugin.h |  2 +-
 mm/mlock.c               |  2 +-
 6 files changed, 24 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/fs/file.c b/fs/file.c
index 66923fe3176e..1cafc4c9275b 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -367,7 +367,7 @@ static struct fdtable *close_files(struct files_struct * files)
 				struct file * file = xchg(&fdt->fd[i], NULL);
 				if (file) {
 					filp_close(file, files);
-					cond_resched();
+					cond_resched_rcu_qs();
 				}
 			}
 			i++;
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 3432063f4c87..473350462d04 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -330,6 +330,19 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 #define rcu_note_voluntary_context_switch(t)	do { } while (0)
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
+/**
+ * cond_resched_rcu_qs - Report potential quiescent states to RCU
+ *
+ * This macro resembles cond_resched(), except that it is defined to
+ * report potential quiescent states to RCU-tasks even if the cond_resched()
+ * machinery were to be shut off, as some advocate for PREEMPT kernels.
+ */
+#define cond_resched_rcu_qs() \
+do { \
+	rcu_note_voluntary_context_switch(current); \
+	cond_resched(); \
+} while (0)
+
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
 bool __rcu_is_watching(void);
 #endif /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP) */
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 948a7693748e..178716713e11 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -667,7 +667,7 @@ static int rcu_torture_boost(void *arg)
 				}
 				call_rcu_time = jiffies;
 			}
-			cond_resched();
+			cond_resched_rcu_qs();
 			stutter_wait("rcu_torture_boost");
 			if (torture_must_stop())
 				goto checkwait;
@@ -1019,7 +1019,7 @@ rcu_torture_reader(void *arg)
 		__this_cpu_inc(rcu_torture_batch[completed]);
 		preempt_enable();
 		cur_ops->readunlock(idx);
-		cond_resched();
+		cond_resched_rcu_qs();
 		stutter_wait("rcu_torture_reader");
 	} while (!torture_must_stop());
 	if (irqreader && cur_ops->irq_capable) {
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 8ad91d1e317d..e23dad0661e2 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -1647,7 +1647,7 @@ static int rcu_gp_init(struct rcu_state *rsp)
 					    rnp->level, rnp->grplo,
 					    rnp->grphi, rnp->qsmask);
 		raw_spin_unlock_irq(&rnp->lock);
-		cond_resched();
+		cond_resched_rcu_qs();
 	}
 
 	mutex_unlock(&rsp->onoff_mutex);
@@ -1736,7 +1736,7 @@ static void rcu_gp_cleanup(struct rcu_state *rsp)
 		/* smp_mb() provided by prior unlock-lock pair. */
 		nocb += rcu_future_gp_cleanup(rsp, rnp);
 		raw_spin_unlock_irq(&rnp->lock);
-		cond_resched();
+		cond_resched_rcu_qs();
 	}
 	rnp = rcu_get_root(rsp);
 	raw_spin_lock_irq(&rnp->lock);
@@ -1785,7 +1785,7 @@ static int __noreturn rcu_gp_kthread(void *arg)
 			/* Locking provides needed memory barrier. */
 			if (rcu_gp_init(rsp))
 				break;
-			cond_resched();
+			cond_resched_rcu_qs();
 			flush_signals(current);
 			trace_rcu_grace_period(rsp->name,
 					       ACCESS_ONCE(rsp->gpnum),
@@ -1828,10 +1828,10 @@ static int __noreturn rcu_gp_kthread(void *arg)
 				trace_rcu_grace_period(rsp->name,
 						       ACCESS_ONCE(rsp->gpnum),
 						       TPS("fqsend"));
-				cond_resched();
+				cond_resched_rcu_qs();
 			} else {
 				/* Deal with stray signal. */
-				cond_resched();
+				cond_resched_rcu_qs();
 				flush_signals(current);
 				trace_rcu_grace_period(rsp->name,
 						       ACCESS_ONCE(rsp->gpnum),
@@ -2434,7 +2434,7 @@ static void force_qs_rnp(struct rcu_state *rsp,
 	struct rcu_node *rnp;
 
 	rcu_for_each_leaf_node(rsp, rnp) {
-		cond_resched();
+		cond_resched_rcu_qs();
 		mask = 0;
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		smp_mb__after_unlock_lock();
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a7997e272564..7672586d3920 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1848,7 +1848,7 @@ static int rcu_oom_notify(struct notifier_block *self,
 	get_online_cpus();
 	for_each_online_cpu(cpu) {
 		smp_call_function_single(cpu, rcu_oom_notify_cpu, NULL, 1);
-		cond_resched();
+		cond_resched_rcu_qs();
 	}
 	put_online_cpus();
 
diff --git a/mm/mlock.c b/mm/mlock.c
index ce84cb0b83ef..ab3150c26711 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -789,7 +789,7 @@ static int do_mlockall(int flags)
 
 		/* Ignore errors */
 		mlock_fixup(vma, &prev, vma->vm_start, vma->vm_end, newflags);
-		cond_resched();
+		cond_resched_rcu_qs();
 	}
 out:
 	return 0;
-- 
cgit v1.2.3


From 53c6d4edf874d3cbc031a53738c6cba9277faea5 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 Jul 2014 12:22:23 -0700
Subject: rcu: Add synchronous grace-period waiting for RCU-tasks

It turns out to be easier to add the synchronous grace-period waiting
functions to RCU-tasks than to work around their absense in rcutorture,
so this commit adds them.  The key point is that the existence of
call_rcu_tasks() means that rcutorture needs an rcu_barrier_tasks().

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  2 ++
 kernel/rcu/update.c      | 55 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 473350462d04..640152fedcde 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -216,6 +216,8 @@ void synchronize_sched(void);
  * memory ordering guarantees.
  */
 void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head));
+void synchronize_rcu_tasks(void);
+void rcu_barrier_tasks(void);
 
 #ifdef CONFIG_PREEMPT_RCU
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 19b3dacb0753..5fd1ddbfcc55 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -381,6 +381,61 @@ void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
 }
 EXPORT_SYMBOL_GPL(call_rcu_tasks);
 
+/**
+ * synchronize_rcu_tasks - wait until an rcu-tasks grace period has elapsed.
+ *
+ * Control will return to the caller some time after a full rcu-tasks
+ * grace period has elapsed, in other words after all currently
+ * executing rcu-tasks read-side critical sections have elapsed.  These
+ * read-side critical sections are delimited by calls to schedule(),
+ * cond_resched_rcu_qs(), idle execution, userspace execution, calls
+ * to synchronize_rcu_tasks(), and (in theory, anyway) cond_resched().
+ *
+ * This is a very specialized primitive, intended only for a few uses in
+ * tracing and other situations requiring manipulation of function
+ * preambles and profiling hooks.  The synchronize_rcu_tasks() function
+ * is not (yet) intended for heavy use from multiple CPUs.
+ *
+ * Note that this guarantee implies further memory-ordering guarantees.
+ * On systems with more than one CPU, when synchronize_rcu_tasks() returns,
+ * each CPU is guaranteed to have executed a full memory barrier since the
+ * end of its last RCU-tasks read-side critical section whose beginning
+ * preceded the call to synchronize_rcu_tasks().  In addition, each CPU
+ * having an RCU-tasks read-side critical section that extends beyond
+ * the return from synchronize_rcu_tasks() is guaranteed to have executed
+ * a full memory barrier after the beginning of synchronize_rcu_tasks()
+ * and before the beginning of that RCU-tasks read-side critical section.
+ * Note that these guarantees include CPUs that are offline, idle, or
+ * executing in user mode, as well as CPUs that are executing in the kernel.
+ *
+ * Furthermore, if CPU A invoked synchronize_rcu_tasks(), which returned
+ * to its caller on CPU B, then both CPU A and CPU B are guaranteed
+ * to have executed a full memory barrier during the execution of
+ * synchronize_rcu_tasks() -- even if CPU A and CPU B are the same CPU
+ * (but again only if the system has more than one CPU).
+ */
+void synchronize_rcu_tasks(void)
+{
+	/* Complain if the scheduler has not started.  */
+	rcu_lockdep_assert(!rcu_scheduler_active,
+			   "synchronize_rcu_tasks called too soon");
+
+	/* Wait for the grace period. */
+	wait_rcu_gp(call_rcu_tasks);
+}
+
+/**
+ * rcu_barrier_tasks - Wait for in-flight call_rcu_tasks() callbacks.
+ *
+ * Although the current implementation is guaranteed to wait, it is not
+ * obligated to, for example, if there are no pending callbacks.
+ */
+void rcu_barrier_tasks(void)
+{
+	/* There is only one callback queue, so this is easy.  ;-) */
+	synchronize_rcu_tasks();
+}
+
 /* See if the current task has stopped holding out, remove from list if so. */
 static void check_holdout_task(struct task_struct *t)
 {
-- 
cgit v1.2.3


From 3f95aa81d265223fdb13ea2b59883766a05adbdf Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 4 Aug 2014 06:10:23 -0700
Subject: rcu: Make TASKS_RCU handle tasks that are almost done exiting

Once a task has passed exit_notify() in the do_exit() code path, it
is no longer on the task lists, and is therefore no longer visible
to rcu_tasks_kthread().  This means that an almost-exited task might
be preempted while within a trampoline, and this task won't be waited
on by rcu_tasks_kthread().  This commit fixes this bug by adding an
srcu_struct.  An exiting task does srcu_read_lock() just before calling
exit_notify(), and does the corresponding srcu_read_unlock() after
doing the final preempt_disable().  This means that rcu_tasks_kthread()
can do synchronize_srcu() to wait for all mostly-exited tasks to reach
their final preempt_disable() region, and then use synchronize_sched()
to wait for those tasks to finish exiting.

Reported-by: Oleg Nesterov <oleg@redhat.com>
Suggested-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  3 +++
 kernel/exit.c            |  3 +++
 kernel/rcu/update.c      | 21 +++++++++++++++++++++
 3 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 640152fedcde..54b2ebb20313 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -321,6 +321,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
  * macro rather than an inline function to avoid #include hell.
  */
 #ifdef CONFIG_TASKS_RCU
+#define TASKS_RCU(x) x
+extern struct srcu_struct tasks_rcu_exit_srcu;
 #define rcu_note_voluntary_context_switch(t) \
 	do { \
 		preempt_disable(); /* Exclude synchronize_sched(); */ \
@@ -329,6 +331,7 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 		preempt_enable(); \
 	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
+#define TASKS_RCU(x) do { } while (0)
 #define rcu_note_voluntary_context_switch(t)	do { } while (0)
 #endif /* #else #ifdef CONFIG_TASKS_RCU */
 
diff --git a/kernel/exit.c b/kernel/exit.c
index 32c58f7433a3..d13f2eec4bb8 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -667,6 +667,7 @@ void do_exit(long code)
 {
 	struct task_struct *tsk = current;
 	int group_dead;
+	TASKS_RCU(int tasks_rcu_i);
 
 	profile_task_exit(tsk);
 
@@ -775,6 +776,7 @@ void do_exit(long code)
 	 */
 	flush_ptrace_hw_breakpoint(tsk);
 
+	TASKS_RCU(tasks_rcu_i = __srcu_read_lock(&tasks_rcu_exit_srcu));
 	exit_notify(tsk, group_dead);
 	proc_exit_connector(tsk);
 #ifdef CONFIG_NUMA
@@ -814,6 +816,7 @@ void do_exit(long code)
 	if (tsk->nr_dirtied)
 		__this_cpu_add(dirty_throttle_leaks, tsk->nr_dirtied);
 	exit_rcu();
+	TASKS_RCU(__srcu_read_unlock(&tasks_rcu_exit_srcu, tasks_rcu_i));
 
 	/*
 	 * The setting of TASK_RUNNING by try_to_wake_up() may be delayed
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 5fd1ddbfcc55..403fc4ae539e 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -367,6 +367,13 @@ static struct rcu_head *rcu_tasks_cbs_head;
 static struct rcu_head **rcu_tasks_cbs_tail = &rcu_tasks_cbs_head;
 static DEFINE_RAW_SPINLOCK(rcu_tasks_cbs_lock);
 
+/* Track exiting tasks in order to allow them to be waited for. */
+DEFINE_SRCU(tasks_rcu_exit_srcu);
+
+/* Control stall timeouts.  Disable with <= 0, otherwise jiffies till stall. */
+static int rcu_task_stall_timeout __read_mostly = HZ * 60 * 3;
+module_param(rcu_task_stall_timeout, int, 0644);
+
 /* Post an RCU-tasks callback. */
 void call_rcu_tasks(struct rcu_head *rhp, void (*func)(struct rcu_head *rhp))
 {
@@ -517,6 +524,15 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 		}
 		rcu_read_unlock();
 
+		/*
+		 * Wait for tasks that are in the process of exiting.
+		 * This does only part of the job, ensuring that all
+		 * tasks that were previously exiting reach the point
+		 * where they have disabled preemption, allowing the
+		 * later synchronize_sched() to finish the job.
+		 */
+		synchronize_srcu(&tasks_rcu_exit_srcu);
+
 		/*
 		 * Each pass through the following loop scans the list
 		 * of holdout tasks, removing any that are no longer
@@ -546,6 +562,11 @@ static int __noreturn rcu_tasks_kthread(void *arg)
 		 * ->rcu_tasks_holdout accesses to be within the grace
 		 * period, avoiding the need for memory barriers for
 		 * ->rcu_tasks_holdout accesses.
+		 *
+		 * In addition, this synchronize_sched() waits for exiting
+		 * tasks to complete their final preempt_disable() region
+		 * of execution, cleaning up after the synchronize_srcu()
+		 * above.
 		 */
 		synchronize_sched();
 
-- 
cgit v1.2.3


From 69c604557ce34015629b325b85ff1a4996038a3b Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 1 Jul 2014 11:59:36 -0700
Subject: rcutorture: Add torture tests for RCU-tasks

This commit adds torture tests for RCU-tasks.  It also fixes a bug that
would segfault for an RCU flavor lacking a callback-barrier function.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
---
 include/linux/rcupdate.h |  1 +
 kernel/rcu/rcutorture.c  | 50 +++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 54b2ebb20313..a3123f53a4ce 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -55,6 +55,7 @@ enum rcutorture_type {
 	RCU_FLAVOR,
 	RCU_BH_FLAVOR,
 	RCU_SCHED_FLAVOR,
+	RCU_TASKS_FLAVOR,
 	SRCU_FLAVOR,
 	INVALID_RCU_FLAVOR
 };
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 178716713e11..75b1abf78c48 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -601,6 +601,52 @@ static struct rcu_torture_ops sched_ops = {
 	.name		= "sched"
 };
 
+#ifdef CONFIG_TASKS_RCU
+
+/*
+ * Definitions for RCU-tasks torture testing.
+ */
+
+static int tasks_torture_read_lock(void)
+{
+	return 0;
+}
+
+static void tasks_torture_read_unlock(int idx)
+{
+}
+
+static void rcu_tasks_torture_deferred_free(struct rcu_torture *p)
+{
+	call_rcu_tasks(&p->rtort_rcu, rcu_torture_cb);
+}
+
+static struct rcu_torture_ops tasks_ops = {
+	.ttype		= RCU_TASKS_FLAVOR,
+	.init		= rcu_sync_torture_init,
+	.readlock	= tasks_torture_read_lock,
+	.read_delay	= rcu_read_delay,  /* just reuse rcu's version. */
+	.readunlock	= tasks_torture_read_unlock,
+	.completed	= rcu_no_completed,
+	.deferred_free	= rcu_tasks_torture_deferred_free,
+	.sync		= synchronize_rcu_tasks,
+	.exp_sync	= synchronize_rcu_tasks,
+	.call		= call_rcu_tasks,
+	.cb_barrier	= rcu_barrier_tasks,
+	.fqs		= NULL,
+	.stats		= NULL,
+	.irq_capable	= 1,
+	.name		= "tasks"
+};
+
+#define RCUTORTURE_TASKS_OPS &tasks_ops,
+
+#else /* #ifdef CONFIG_TASKS_RCU */
+
+#define RCUTORTURE_TASKS_OPS
+
+#endif /* #else #ifdef CONFIG_TASKS_RCU */
+
 /*
  * RCU torture priority-boost testing.  Runs one real-time thread per
  * CPU for moderate bursts, repeatedly registering RCU callbacks and
@@ -1295,7 +1341,8 @@ static int rcu_torture_barrier_cbs(void *arg)
 		if (atomic_dec_and_test(&barrier_cbs_count))
 			wake_up(&barrier_wq);
 	} while (!torture_must_stop());
-	cur_ops->cb_barrier();
+	if (cur_ops->cb_barrier != NULL)
+		cur_ops->cb_barrier();
 	destroy_rcu_head_on_stack(&rcu);
 	torture_kthread_stopping("rcu_torture_barrier_cbs");
 	return 0;
@@ -1534,6 +1581,7 @@ rcu_torture_init(void)
 	int firsterr = 0;
 	static struct rcu_torture_ops *torture_ops[] = {
 		&rcu_ops, &rcu_bh_ops, &rcu_busted_ops, &srcu_ops, &sched_ops,
+		RCUTORTURE_TASKS_OPS
 	};
 
 	if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
-- 
cgit v1.2.3


From 176f8f7a52cc6d09d686f0d900abda6942a52fbb Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 4 Aug 2014 17:43:50 -0700
Subject: rcu: Make TASKS_RCU handle nohz_full= CPUs

Currently TASKS_RCU would ignore a CPU running a task in nohz_full=
usermode execution.  There would be neither a context switch nor a
scheduling-clock interrupt to tell TASKS_RCU that the task in question
had passed through a quiescent state.  The grace period would therefore
extend indefinitely.  This commit therefore makes RCU's dyntick-idle
subsystem record the task_struct structure of the task that is running
in dyntick-idle mode on each CPU.  The TASKS_RCU grace period can
then access this information and record a quiescent state on
behalf of any CPU running in dyntick-idle usermode.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h |  3 ++-
 include/linux/sched.h     |  2 ++
 kernel/rcu/tree.c         |  2 ++
 kernel/rcu/tree.h         |  2 ++
 kernel/rcu/tree_plugin.h  | 16 ++++++++++++++++
 kernel/rcu/update.c       |  4 +++-
 6 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index dffd9258ee60..03b274873b06 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -121,7 +121,8 @@ extern struct group_info init_groups;
 #define INIT_TASK_RCU_TASKS(tsk)					\
 	.rcu_tasks_holdout = false,					\
 	.rcu_tasks_holdout_list =					\
-		LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),
+		LIST_HEAD_INIT(tsk.rcu_tasks_holdout_list),		\
+	.rcu_tasks_idle_cpu = -1,
 #else
 #define INIT_TASK_RCU_TASKS(tsk)
 #endif
diff --git a/include/linux/sched.h b/include/linux/sched.h
index eaacac4ae77d..ec8b34722bcc 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1274,6 +1274,7 @@ struct task_struct {
 	unsigned long rcu_tasks_nvcsw;
 	bool rcu_tasks_holdout;
 	struct list_head rcu_tasks_holdout_list;
+	int rcu_tasks_idle_cpu;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 
 #if defined(CONFIG_SCHEDSTATS) || defined(CONFIG_TASK_DELAY_ACCT)
@@ -2020,6 +2021,7 @@ static inline void rcu_copy_process(struct task_struct *p)
 #ifdef CONFIG_TASKS_RCU
 	p->rcu_tasks_holdout = false;
 	INIT_LIST_HEAD(&p->rcu_tasks_holdout_list);
+	p->rcu_tasks_idle_cpu = -1;
 #endif /* #ifdef CONFIG_TASKS_RCU */
 }
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index e23dad0661e2..c880f5387b1f 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -526,6 +526,7 @@ static void rcu_eqs_enter_common(struct rcu_dynticks *rdtp, long long oldval,
 	atomic_inc(&rdtp->dynticks);
 	smp_mb__after_atomic();  /* Force ordering with next sojourn. */
 	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);
+	rcu_dynticks_task_enter();
 
 	/*
 	 * It is illegal to enter an extended quiescent state while
@@ -642,6 +643,7 @@ void rcu_irq_exit(void)
 static void rcu_eqs_exit_common(struct rcu_dynticks *rdtp, long long oldval,
 			       int user)
 {
+	rcu_dynticks_task_exit();
 	smp_mb__before_atomic();  /* Force ordering w/previous sojourn. */
 	atomic_inc(&rdtp->dynticks);
 	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 6a86eb7bac45..3a92000c354f 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -605,6 +605,8 @@ static void rcu_sysidle_report_gp(struct rcu_state *rsp, int isidle,
 static void rcu_bind_gp_kthread(void);
 static void rcu_sysidle_init_percpu_data(struct rcu_dynticks *rdtp);
 static bool rcu_nohz_full_cpu(struct rcu_state *rsp);
+static void rcu_dynticks_task_enter(void);
+static void rcu_dynticks_task_exit(void);
 
 #endif /* #ifndef RCU_TREE_NONCORE */
 
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7672586d3920..e466b40052a7 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -3036,3 +3036,19 @@ static void rcu_bind_gp_kthread(void)
 		housekeeping_affine(current);
 #endif /* #else #ifdef CONFIG_NO_HZ_FULL_SYSIDLE */
 }
+
+/* Record the current task on dyntick-idle entry. */
+static void rcu_dynticks_task_enter(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+	ACCESS_ONCE(current->rcu_tasks_idle_cpu) = smp_processor_id();
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
+
+/* Record no current task on dyntick-idle exit. */
+static void rcu_dynticks_task_exit(void)
+{
+#if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL)
+	ACCESS_ONCE(current->rcu_tasks_idle_cpu) = -1;
+#endif /* #if defined(CONFIG_TASKS_RCU) && defined(CONFIG_NO_HZ_FULL) */
+}
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index e1d71741958f..2658de4a5975 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -463,7 +463,9 @@ static void check_holdout_task(struct task_struct *t,
 {
 	if (!ACCESS_ONCE(t->rcu_tasks_holdout) ||
 	    t->rcu_tasks_nvcsw != ACCESS_ONCE(t->nvcsw) ||
-	    !ACCESS_ONCE(t->on_rq)) {
+	    !ACCESS_ONCE(t->on_rq) ||
+	    (IS_ENABLED(CONFIG_NO_HZ_FULL) &&
+	     !is_idle_task(t) && t->rcu_tasks_idle_cpu >= 0)) {
 		ACCESS_ONCE(t->rcu_tasks_holdout) = false;
 		list_del_rcu(&t->rcu_tasks_holdout_list);
 		put_task_struct(t);
-- 
cgit v1.2.3


From 01a81330344b09028881c953a51d1106a9e63518 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 5 Aug 2014 05:23:35 -0700
Subject: rcu: Remove redundant preempt_disable() from
 rcu_note_voluntary_context_switch()

In theory, synchronize_sched() requires a read-side critical section
to order against.  In practice, preemption can be thought of as
being disabled across every machine instruction, at least for those
machine instructions that are not in the idle loop and not on offline
CPUs.  So this commit removes the redundant preempt_disable() from
rcu_note_voluntary_context_switch().

Please note that the single instruction in question is the store of
zero to ->rcu_tasks_holdout.  The "if" is simply a performance optimization
that avoids unnecessary stores.  To see this, keep in mind that both
the "if" condition and the store are in a quiescent state.  Therefore,
even if the task is preempted for a full grace period (presumably due
to its having done a context switch beforehand), the store will be
recording a legitimate quiescent state.

Reported-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>

Conflicts:
	include/linux/rcupdate.h
---
 include/linux/rcupdate.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a3123f53a4ce..132e1e34cdca 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -326,10 +326,8 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 extern struct srcu_struct tasks_rcu_exit_srcu;
 #define rcu_note_voluntary_context_switch(t) \
 	do { \
-		preempt_disable(); /* Exclude synchronize_sched(); */ \
 		if (ACCESS_ONCE((t)->rcu_tasks_holdout)) \
 			ACCESS_ONCE((t)->rcu_tasks_holdout) = false; \
-		preempt_enable(); \
 	} while (0)
 #else /* #ifdef CONFIG_TASKS_RCU */
 #define TASKS_RCU(x) do { } while (0)
-- 
cgit v1.2.3


From 1d082fd061884a587c490c4fc8a2056ce1e47624 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 Aug 2014 16:01:53 -0700
Subject: rcu: Remove local_irq_disable() in rcu_preempt_note_context_switch()

The rcu_preempt_note_context_switch() function is on a scheduling fast
path, so it would be good to avoid disabling irqs.  The reason that irqs
are disabled is to synchronize process-level and irq-handler access to
the task_struct ->rcu_read_unlock_special bitmask.  This commit therefore
makes ->rcu_read_unlock_special instead be a union of bools with a short
allowing single-access checks in RCU's __rcu_read_unlock().  This results
in the process-level and irq-handler accesses being simple loads and
stores, so that irqs need no longer be disabled.  This commit therefore
removes the irq disabling from rcu_preempt_note_context_switch().

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/init_task.h |  2 +-
 include/linux/sched.h     | 16 +++++++++-------
 kernel/rcu/tree_plugin.h  | 32 +++++++++++++++-----------------
 kernel/rcu/update.c       |  2 +-
 4 files changed, 26 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 03b274873b06..77fc43f8fb72 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -111,7 +111,7 @@ extern struct group_info init_groups;
 #ifdef CONFIG_PREEMPT_RCU
 #define INIT_TASK_RCU_PREEMPT(tsk)					\
 	.rcu_read_lock_nesting = 0,					\
-	.rcu_read_unlock_special = 0,					\
+	.rcu_read_unlock_special.s = 0,					\
 	.rcu_node_entry = LIST_HEAD_INIT(tsk.rcu_node_entry),		\
 	INIT_TASK_RCU_TREE_PREEMPT()
 #else
diff --git a/include/linux/sched.h b/include/linux/sched.h
index ec8b34722bcc..42888d715fb1 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1212,6 +1212,13 @@ struct sched_dl_entity {
 	struct hrtimer dl_timer;
 };
 
+union rcu_special {
+	struct {
+		bool blocked;
+		bool need_qs;
+	} b;
+	short s;
+};
 struct rcu_node;
 
 enum perf_event_task_context {
@@ -1264,7 +1271,7 @@ struct task_struct {
 
 #ifdef CONFIG_PREEMPT_RCU
 	int rcu_read_lock_nesting;
-	char rcu_read_unlock_special;
+	union rcu_special rcu_read_unlock_special;
 	struct list_head rcu_node_entry;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TREE_PREEMPT_RCU
@@ -2005,16 +2012,11 @@ extern void task_clear_jobctl_trapping(struct task_struct *task);
 extern void task_clear_jobctl_pending(struct task_struct *task,
 				      unsigned int mask);
 
-#ifdef CONFIG_PREEMPT_RCU
-#define RCU_READ_UNLOCK_BLOCKED (1 << 0) /* blocked while in RCU read-side. */
-#define RCU_READ_UNLOCK_NEED_QS (1 << 1) /* RCU core needs CPU response. */
-#endif /* #ifdef CONFIG_PREEMPT_RCU */
-
 static inline void rcu_copy_process(struct task_struct *p)
 {
 #ifdef CONFIG_PREEMPT_RCU
 	p->rcu_read_lock_nesting = 0;
-	p->rcu_read_unlock_special = 0;
+	p->rcu_read_unlock_special.s = 0;
 	p->rcu_blocked_node = NULL;
 	INIT_LIST_HEAD(&p->rcu_node_entry);
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e466b40052a7..0981c0cd70fe 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -155,9 +155,8 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * not in a quiescent state.  There might be any number of tasks blocked
  * while in an RCU read-side critical section.
  *
- * Unlike the other rcu_*_qs() functions, callers to this function
- * must disable irqs in order to protect the assignment to
- * ->rcu_read_unlock_special.
+ * As with the other rcu_*_qs() functions, callers to this function
+ * must disable preemption.
  */
 static void rcu_preempt_qs(int cpu)
 {
@@ -166,7 +165,7 @@ static void rcu_preempt_qs(int cpu)
 	if (rdp->passed_quiesce == 0)
 		trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
 	rdp->passed_quiesce = 1;
-	current->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_NEED_QS;
+	current->rcu_read_unlock_special.b.need_qs = false;
 }
 
 /*
@@ -190,14 +189,14 @@ static void rcu_preempt_note_context_switch(int cpu)
 	struct rcu_node *rnp;
 
 	if (t->rcu_read_lock_nesting > 0 &&
-	    (t->rcu_read_unlock_special & RCU_READ_UNLOCK_BLOCKED) == 0) {
+	    !t->rcu_read_unlock_special.b.blocked) {
 
 		/* Possibly blocking in an RCU read-side critical section. */
 		rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
 		rnp = rdp->mynode;
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		smp_mb__after_unlock_lock();
-		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_BLOCKED;
+		t->rcu_read_unlock_special.b.blocked = true;
 		t->rcu_blocked_node = rnp;
 
 		/*
@@ -239,7 +238,7 @@ static void rcu_preempt_note_context_switch(int cpu)
 				       : rnp->gpnum + 1);
 		raw_spin_unlock_irqrestore(&rnp->lock, flags);
 	} else if (t->rcu_read_lock_nesting < 0 &&
-		   t->rcu_read_unlock_special) {
+		   t->rcu_read_unlock_special.s) {
 
 		/*
 		 * Complete exit from RCU read-side critical section on
@@ -257,9 +256,7 @@ static void rcu_preempt_note_context_switch(int cpu)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	local_irq_save(flags);
 	rcu_preempt_qs(cpu);
-	local_irq_restore(flags);
 }
 
 /*
@@ -340,7 +337,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 	bool drop_boost_mutex = false;
 #endif /* #ifdef CONFIG_RCU_BOOST */
 	struct rcu_node *rnp;
-	int special;
+	union rcu_special special;
 
 	/* NMI handlers cannot block and cannot safely manipulate state. */
 	if (in_nmi())
@@ -350,12 +347,13 @@ void rcu_read_unlock_special(struct task_struct *t)
 
 	/*
 	 * If RCU core is waiting for this CPU to exit critical section,
-	 * let it know that we have done so.
+	 * let it know that we have done so.  Because irqs are disabled,
+	 * t->rcu_read_unlock_special cannot change.
 	 */
 	special = t->rcu_read_unlock_special;
-	if (special & RCU_READ_UNLOCK_NEED_QS) {
+	if (special.b.need_qs) {
 		rcu_preempt_qs(smp_processor_id());
-		if (!t->rcu_read_unlock_special) {
+		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
 			return;
 		}
@@ -368,8 +366,8 @@ void rcu_read_unlock_special(struct task_struct *t)
 	}
 
 	/* Clean up if blocked during RCU read-side critical section. */
-	if (special & RCU_READ_UNLOCK_BLOCKED) {
-		t->rcu_read_unlock_special &= ~RCU_READ_UNLOCK_BLOCKED;
+	if (special.b.blocked) {
+		t->rcu_read_unlock_special.b.blocked = false;
 
 		/*
 		 * Remove this task from the list it blocked on.  The
@@ -658,7 +656,7 @@ static void rcu_preempt_check_callbacks(int cpu)
 	}
 	if (t->rcu_read_lock_nesting > 0 &&
 	    per_cpu(rcu_preempt_data, cpu).qs_pending)
-		t->rcu_read_unlock_special |= RCU_READ_UNLOCK_NEED_QS;
+		t->rcu_read_unlock_special.b.need_qs = true;
 }
 
 #ifdef CONFIG_RCU_BOOST
@@ -941,7 +939,7 @@ void exit_rcu(void)
 		return;
 	t->rcu_read_lock_nesting = 1;
 	barrier();
-	t->rcu_read_unlock_special = RCU_READ_UNLOCK_BLOCKED;
+	t->rcu_read_unlock_special.b.blocked = true;
 	__rcu_read_unlock();
 }
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 9487b4898e51..6fb911558562 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -93,7 +93,7 @@ void __rcu_read_unlock(void)
 		barrier();  /* critical section before exit code. */
 		t->rcu_read_lock_nesting = INT_MIN;
 		barrier();  /* assign before ->rcu_read_unlock_special load */
-		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special)))
+		if (unlikely(ACCESS_ONCE(t->rcu_read_unlock_special.s)))
 			rcu_read_unlock_special(t);
 		barrier();  /* ->rcu_read_unlock_special load before assign */
 		t->rcu_read_lock_nesting = 0;
-- 
cgit v1.2.3


From 284a8c93af47306beed967a303d84730b32bab39 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Thu, 14 Aug 2014 16:38:46 -0700
Subject: rcu: Per-CPU operation cleanups to rcu_*_qs() functions

The rcu_bh_qs(), rcu_preempt_qs(), and rcu_sched_qs() functions use
old-style per-CPU variable access and write to ->passed_quiesce even
if it is already set.  This commit therefore updates to use the new-style
per-CPU variable access functions and avoids the spurious writes.
This commit also eliminates the "cpu" argument to these functions because
they are always invoked on the indicated CPU.

Reported-by: Peter Zijlstra <peterz@infradead.org>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h |  4 ++--
 include/linux/rcutiny.h  |  2 +-
 kernel/rcu/tiny.c        | 10 +++++-----
 kernel/rcu/tree.c        | 34 ++++++++++++++++++----------------
 kernel/rcu/tree_plugin.h | 27 +++++++++++++++------------
 kernel/softirq.c         |  2 +-
 6 files changed, 42 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 132e1e34cdca..2fab0e37afe0 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -261,8 +261,8 @@ static inline int rcu_preempt_depth(void)
 
 /* Internal to kernel */
 void rcu_init(void);
-void rcu_sched_qs(int cpu);
-void rcu_bh_qs(int cpu);
+void rcu_sched_qs(void);
+void rcu_bh_qs(void);
 void rcu_check_callbacks(int cpu, int user);
 struct notifier_block;
 void rcu_idle_enter(void);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index d40a6a451330..38cc5b1e252d 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -80,7 +80,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 
 static inline void rcu_note_context_switch(int cpu)
 {
-	rcu_sched_qs(cpu);
+	rcu_sched_qs();
 }
 
 /*
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index 717f00854fc0..61b8d2ccc2cb 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -72,7 +72,7 @@ static void rcu_idle_enter_common(long long newval)
 			  current->pid, current->comm,
 			  idle->pid, idle->comm); /* must be idle task! */
 	}
-	rcu_sched_qs(0); /* implies rcu_bh_qsctr_inc(0) */
+	rcu_sched_qs(); /* implies rcu_bh_inc() */
 	barrier();
 	rcu_dynticks_nesting = newval;
 }
@@ -217,7 +217,7 @@ static int rcu_qsctr_help(struct rcu_ctrlblk *rcp)
  * are at it, given that any rcu quiescent state is also an rcu_bh
  * quiescent state.  Use "+" instead of "||" to defeat short circuiting.
  */
-void rcu_sched_qs(int cpu)
+void rcu_sched_qs(void)
 {
 	unsigned long flags;
 
@@ -231,7 +231,7 @@ void rcu_sched_qs(int cpu)
 /*
  * Record an rcu_bh quiescent state.
  */
-void rcu_bh_qs(int cpu)
+void rcu_bh_qs(void)
 {
 	unsigned long flags;
 
@@ -251,9 +251,9 @@ void rcu_check_callbacks(int cpu, int user)
 {
 	RCU_TRACE(check_cpu_stalls());
 	if (user || rcu_is_cpu_rrupt_from_idle())
-		rcu_sched_qs(cpu);
+		rcu_sched_qs();
 	else if (!in_softirq())
-		rcu_bh_qs(cpu);
+		rcu_bh_qs();
 	if (user)
 		rcu_note_voluntary_context_switch(current);
 }
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index c880f5387b1f..4c340625ffd4 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -188,22 +188,24 @@ static int rcu_gp_in_progress(struct rcu_state *rsp)
  * one since the start of the grace period, this just sets a flag.
  * The caller must have disabled preemption.
  */
-void rcu_sched_qs(int cpu)
+void rcu_sched_qs(void)
 {
-	struct rcu_data *rdp = &per_cpu(rcu_sched_data, cpu);
-
-	if (rdp->passed_quiesce == 0)
-		trace_rcu_grace_period(TPS("rcu_sched"), rdp->gpnum, TPS("cpuqs"));
-	rdp->passed_quiesce = 1;
+	if (!__this_cpu_read(rcu_sched_data.passed_quiesce)) {
+		trace_rcu_grace_period(TPS("rcu_sched"),
+				       __this_cpu_read(rcu_sched_data.gpnum),
+				       TPS("cpuqs"));
+		__this_cpu_write(rcu_sched_data.passed_quiesce, 1);
+	}
 }
 
-void rcu_bh_qs(int cpu)
+void rcu_bh_qs(void)
 {
-	struct rcu_data *rdp = &per_cpu(rcu_bh_data, cpu);
-
-	if (rdp->passed_quiesce == 0)
-		trace_rcu_grace_period(TPS("rcu_bh"), rdp->gpnum, TPS("cpuqs"));
-	rdp->passed_quiesce = 1;
+	if (!__this_cpu_read(rcu_bh_data.passed_quiesce)) {
+		trace_rcu_grace_period(TPS("rcu_bh"),
+				       __this_cpu_read(rcu_bh_data.gpnum),
+				       TPS("cpuqs"));
+		__this_cpu_write(rcu_bh_data.passed_quiesce, 1);
+	}
 }
 
 static DEFINE_PER_CPU(int, rcu_sched_qs_mask);
@@ -278,7 +280,7 @@ static void rcu_momentary_dyntick_idle(void)
 void rcu_note_context_switch(int cpu)
 {
 	trace_rcu_utilization(TPS("Start context switch"));
-	rcu_sched_qs(cpu);
+	rcu_sched_qs();
 	rcu_preempt_note_context_switch(cpu);
 	if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
 		rcu_momentary_dyntick_idle();
@@ -2395,8 +2397,8 @@ void rcu_check_callbacks(int cpu, int user)
 		 * at least not while the corresponding CPU is online.
 		 */
 
-		rcu_sched_qs(cpu);
-		rcu_bh_qs(cpu);
+		rcu_sched_qs();
+		rcu_bh_qs();
 
 	} else if (!in_softirq()) {
 
@@ -2407,7 +2409,7 @@ void rcu_check_callbacks(int cpu, int user)
 		 * critical section, so note it.
 		 */
 
-		rcu_bh_qs(cpu);
+		rcu_bh_qs();
 	}
 	rcu_preempt_check_callbacks(cpu);
 	if (rcu_pending(cpu))
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 0981c0cd70fe..25e692a36280 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -158,14 +158,16 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * As with the other rcu_*_qs() functions, callers to this function
  * must disable preemption.
  */
-static void rcu_preempt_qs(int cpu)
-{
-	struct rcu_data *rdp = &per_cpu(rcu_preempt_data, cpu);
-
-	if (rdp->passed_quiesce == 0)
-		trace_rcu_grace_period(TPS("rcu_preempt"), rdp->gpnum, TPS("cpuqs"));
-	rdp->passed_quiesce = 1;
-	current->rcu_read_unlock_special.b.need_qs = false;
+static void rcu_preempt_qs(void)
+{
+	if (!__this_cpu_read(rcu_preempt_data.passed_quiesce)) {
+		trace_rcu_grace_period(TPS("rcu_preempt"),
+				       __this_cpu_read(rcu_preempt_data.gpnum),
+				       TPS("cpuqs"));
+		__this_cpu_write(rcu_preempt_data.passed_quiesce, 1);
+		barrier(); /* Coordinate with rcu_preempt_check_callbacks(). */
+		current->rcu_read_unlock_special.b.need_qs = false;
+	}
 }
 
 /*
@@ -256,7 +258,7 @@ static void rcu_preempt_note_context_switch(int cpu)
 	 * grace period, then the fact that the task has been enqueued
 	 * means that we continue to block the current grace period.
 	 */
-	rcu_preempt_qs(cpu);
+	rcu_preempt_qs();
 }
 
 /*
@@ -352,7 +354,7 @@ void rcu_read_unlock_special(struct task_struct *t)
 	 */
 	special = t->rcu_read_unlock_special;
 	if (special.b.need_qs) {
-		rcu_preempt_qs(smp_processor_id());
+		rcu_preempt_qs();
 		if (!t->rcu_read_unlock_special.s) {
 			local_irq_restore(flags);
 			return;
@@ -651,11 +653,12 @@ static void rcu_preempt_check_callbacks(int cpu)
 	struct task_struct *t = current;
 
 	if (t->rcu_read_lock_nesting == 0) {
-		rcu_preempt_qs(cpu);
+		rcu_preempt_qs();
 		return;
 	}
 	if (t->rcu_read_lock_nesting > 0 &&
-	    per_cpu(rcu_preempt_data, cpu).qs_pending)
+	    per_cpu(rcu_preempt_data, cpu).qs_pending &&
+	    !per_cpu(rcu_preempt_data, cpu).passed_quiesce)
 		t->rcu_read_unlock_special.b.need_qs = true;
 }
 
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 5918d227730f..348ec763b104 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -278,7 +278,7 @@ restart:
 		pending >>= softirq_bit;
 	}
 
-	rcu_bh_qs(smp_processor_id());
+	rcu_bh_qs();
 	local_irq_disable();
 
 	pending = local_softirq_pending();
-- 
cgit v1.2.3


From 908c7f1949cb7cc6e92ba8f18f2998e87e265b8e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:29 +0900
Subject: percpu_counter: add @gfp to percpu_counter_init()

Percpu allocator now supports allocation mask.  Add @gfp to
percpu_counter_init() so that !GFP_KERNEL allocation masks can be used
with percpu_counters too.

We could have left percpu_counter_init() alone and added
percpu_counter_init_gfp(); however, the number of users isn't that
high and introducing _gfp variants to all percpu data structures would
be quite ugly, so let's just do the conversion.  This is the one with
the most users.  Other percpu data structures are a lot easier to
convert.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Jan Kara <jack@suse.cz>
Acked-by: "David S. Miller" <davem@davemloft.net>
Cc: x86@kernel.org
Cc: Jens Axboe <axboe@kernel.dk>
Cc: "Theodore Ts'o" <tytso@mit.edu>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Andrew Morton <akpm@linux-foundation.org>
---
 arch/x86/kvm/mmu.c             |  2 +-
 fs/btrfs/disk-io.c             |  8 ++++----
 fs/btrfs/extent-tree.c         |  2 +-
 fs/ext2/super.c                |  6 +++---
 fs/ext3/super.c                |  6 +++---
 fs/ext4/super.c                | 14 +++++++++-----
 fs/file_table.c                |  2 +-
 fs/quota/dquot.c               |  2 +-
 fs/super.c                     |  3 ++-
 include/linux/percpu_counter.h | 10 ++++++----
 include/net/dst_ops.h          |  2 +-
 include/net/inet_frag.h        |  2 +-
 lib/flex_proportions.c         |  4 ++--
 lib/percpu_counter.c           |  4 ++--
 lib/proportions.c              |  6 +++---
 mm/backing-dev.c               |  2 +-
 mm/mmap.c                      |  2 +-
 mm/nommu.c                     |  2 +-
 mm/shmem.c                     |  2 +-
 net/dccp/proto.c               |  2 +-
 net/ipv4/tcp.c                 |  4 ++--
 net/ipv4/tcp_memcontrol.c      |  2 +-
 net/sctp/protocol.c            |  2 +-
 23 files changed, 49 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 931467881da7..5bd53f206f4f 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -4534,7 +4534,7 @@ int kvm_mmu_module_init(void)
 	if (!mmu_page_header_cache)
 		goto nomem;
 
-	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0))
+	if (percpu_counter_init(&kvm_total_used_mmu_pages, 0, GFP_KERNEL))
 		goto nomem;
 
 	register_shrinker(&mmu_shrinker);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 08e65e9cf2aa..61dae01788d7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1180,7 +1180,7 @@ static struct btrfs_subvolume_writers *btrfs_alloc_subvolume_writers(void)
 	if (!writers)
 		return ERR_PTR(-ENOMEM);
 
-	ret = percpu_counter_init(&writers->counter, 0);
+	ret = percpu_counter_init(&writers->counter, 0, GFP_KERNEL);
 	if (ret < 0) {
 		kfree(writers);
 		return ERR_PTR(ret);
@@ -2185,7 +2185,7 @@ int open_ctree(struct super_block *sb,
 		goto fail_srcu;
 	}
 
-	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0);
+	ret = percpu_counter_init(&fs_info->dirty_metadata_bytes, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_bdi;
@@ -2193,13 +2193,13 @@ int open_ctree(struct super_block *sb,
 	fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
 					(1 + ilog2(nr_cpu_ids));
 
-	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0);
+	ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_dirty_metadata_bytes;
 	}
 
-	ret = percpu_counter_init(&fs_info->bio_counter, 0);
+	ret = percpu_counter_init(&fs_info->bio_counter, 0, GFP_KERNEL);
 	if (ret) {
 		err = ret;
 		goto fail_delalloc_bytes;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 813537f362f9..94ec71eda86b 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3493,7 +3493,7 @@ static int update_space_info(struct btrfs_fs_info *info, u64 flags,
 	if (!found)
 		return -ENOMEM;
 
-	ret = percpu_counter_init(&found->total_bytes_pinned, 0);
+	ret = percpu_counter_init(&found->total_bytes_pinned, 0, GFP_KERNEL);
 	if (ret) {
 		kfree(found);
 		return ret;
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index b88edc05c230..170dc41e8bf4 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -1067,14 +1067,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
 	ext2_rsv_window_add(sb, &sbi->s_rsv_window_head);
 
 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
-				ext2_count_free_blocks(sb));
+				ext2_count_free_blocks(sb), GFP_KERNEL);
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
-				ext2_count_free_inodes(sb));
+				ext2_count_free_inodes(sb), GFP_KERNEL);
 	}
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-				ext2_count_dirs(sb));
+				ext2_count_dirs(sb), GFP_KERNEL);
 	}
 	if (err) {
 		ext2_msg(sb, KERN_ERR, "error: insufficient memory");
diff --git a/fs/ext3/super.c b/fs/ext3/super.c
index 08cdfe5461e3..eba021b88440 100644
--- a/fs/ext3/super.c
+++ b/fs/ext3/super.c
@@ -2039,14 +2039,14 @@ static int ext3_fill_super (struct super_block *sb, void *data, int silent)
 		goto failed_mount2;
 	}
 	err = percpu_counter_init(&sbi->s_freeblocks_counter,
-			ext3_count_free_blocks(sb));
+			ext3_count_free_blocks(sb), GFP_KERNEL);
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_freeinodes_counter,
-				ext3_count_free_inodes(sb));
+				ext3_count_free_inodes(sb), GFP_KERNEL);
 	}
 	if (!err) {
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-				ext3_count_dirs(sb));
+				ext3_count_dirs(sb), GFP_KERNEL);
 	}
 	if (err) {
 		ext3_msg(sb, KERN_ERR, "error: insufficient memory");
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index 32b43ad154b9..e25ca8fdde7d 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -3891,7 +3891,8 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
 	/* Register extent status tree shrinker */
 	ext4_es_register_shrinker(sbi);
 
-	if ((err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0)) != 0) {
+	err = percpu_counter_init(&sbi->s_extent_cache_cnt, 0, GFP_KERNEL);
+	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount3;
 	}
@@ -4105,17 +4106,20 @@ no_journal:
 	block = ext4_count_free_clusters(sb);
 	ext4_free_blocks_count_set(sbi->s_es, 
 				   EXT4_C2B(sbi, block));
-	err = percpu_counter_init(&sbi->s_freeclusters_counter, block);
+	err = percpu_counter_init(&sbi->s_freeclusters_counter, block,
+				  GFP_KERNEL);
 	if (!err) {
 		unsigned long freei = ext4_count_free_inodes(sb);
 		sbi->s_es->s_free_inodes_count = cpu_to_le32(freei);
-		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei);
+		err = percpu_counter_init(&sbi->s_freeinodes_counter, freei,
+					  GFP_KERNEL);
 	}
 	if (!err)
 		err = percpu_counter_init(&sbi->s_dirs_counter,
-					  ext4_count_dirs(sb));
+					  ext4_count_dirs(sb), GFP_KERNEL);
 	if (!err)
-		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0);
+		err = percpu_counter_init(&sbi->s_dirtyclusters_counter, 0,
+					  GFP_KERNEL);
 	if (err) {
 		ext4_msg(sb, KERN_ERR, "insufficient memory");
 		goto failed_mount6;
diff --git a/fs/file_table.c b/fs/file_table.c
index 385bfd31512a..0bab12b20460 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -331,5 +331,5 @@ void __init files_init(unsigned long mempages)
 
 	n = (mempages * (PAGE_SIZE / 1024)) / 10;
 	files_stat.max_files = max_t(unsigned long, n, NR_FILE);
-	percpu_counter_init(&nr_files, 0);
+	percpu_counter_init(&nr_files, 0, GFP_KERNEL);
 } 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index f2d0eee9d1f1..8b663b2d9562 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2725,7 +2725,7 @@ static int __init dquot_init(void)
 		panic("Cannot create dquot hash table");
 
 	for (i = 0; i < _DQST_DQSTAT_LAST; i++) {
-		ret = percpu_counter_init(&dqstats.counter[i], 0);
+		ret = percpu_counter_init(&dqstats.counter[i], 0, GFP_KERNEL);
 		if (ret)
 			panic("Cannot create dquot stat counters");
 	}
diff --git a/fs/super.c b/fs/super.c
index b9a214d2fe98..1b836107acee 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -175,7 +175,8 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 		goto fail;
 
 	for (i = 0; i < SB_FREEZE_LEVELS; i++) {
-		if (percpu_counter_init(&s->s_writers.counter[i], 0) < 0)
+		if (percpu_counter_init(&s->s_writers.counter[i], 0,
+					GFP_KERNEL) < 0)
 			goto fail;
 		lockdep_init_map(&s->s_writers.lock_map[i], sb_writers_name[i],
 				 &type->s_writers_key[i], 0);
diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index d5dd4657c8d6..50e50095c8d1 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -12,6 +12,7 @@
 #include <linux/threads.h>
 #include <linux/percpu.h>
 #include <linux/types.h>
+#include <linux/gfp.h>
 
 #ifdef CONFIG_SMP
 
@@ -26,14 +27,14 @@ struct percpu_counter {
 
 extern int percpu_counter_batch;
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key);
 
-#define percpu_counter_init(fbc, value)					\
+#define percpu_counter_init(fbc, value, gfp)				\
 	({								\
 		static struct lock_class_key __key;			\
 									\
-		__percpu_counter_init(fbc, value, &__key);		\
+		__percpu_counter_init(fbc, value, gfp, &__key);		\
 	})
 
 void percpu_counter_destroy(struct percpu_counter *fbc);
@@ -89,7 +90,8 @@ struct percpu_counter {
 	s64 count;
 };
 
-static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount)
+static inline int percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+				      gfp_t gfp)
 {
 	fbc->count = amount;
 	return 0;
diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 2f26dfb8450e..1f99a1de0e4f 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -63,7 +63,7 @@ static inline void dst_entries_add(struct dst_ops *dst, int val)
 
 static inline int dst_entries_init(struct dst_ops *dst)
 {
-	return percpu_counter_init(&dst->pcpuc_entries, 0);
+	return percpu_counter_init(&dst->pcpuc_entries, 0, GFP_KERNEL);
 }
 
 static inline void dst_entries_destroy(struct dst_ops *dst)
diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h
index 65a8855e99fe..8d1765577acc 100644
--- a/include/net/inet_frag.h
+++ b/include/net/inet_frag.h
@@ -151,7 +151,7 @@ static inline void add_frag_mem_limit(struct inet_frag_queue *q, int i)
 
 static inline void init_frag_mem_limit(struct netns_frags *nf)
 {
-	percpu_counter_init(&nf->mem, 0);
+	percpu_counter_init(&nf->mem, 0, GFP_KERNEL);
 }
 
 static inline unsigned int sum_frag_mem_limit(struct netns_frags *nf)
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index ebf3bac460b0..b9d026bfcf38 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -40,7 +40,7 @@ int fprop_global_init(struct fprop_global *p)
 
 	p->period = 0;
 	/* Use 1 to avoid dealing with periods with 0 events... */
-	err = percpu_counter_init(&p->events, 1);
+	err = percpu_counter_init(&p->events, 1, GFP_KERNEL);
 	if (err)
 		return err;
 	seqcount_init(&p->sequence);
@@ -172,7 +172,7 @@ int fprop_local_init_percpu(struct fprop_local_percpu *pl)
 {
 	int err;
 
-	err = percpu_counter_init(&pl->events, 0);
+	err = percpu_counter_init(&pl->events, 0, GFP_KERNEL);
 	if (err)
 		return err;
 	pl->period = 0;
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index 3fde78275cd1..48144cdae819 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -112,7 +112,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 }
 EXPORT_SYMBOL(__percpu_counter_sum);
 
-int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
+int __percpu_counter_init(struct percpu_counter *fbc, s64 amount, gfp_t gfp,
 			  struct lock_class_key *key)
 {
 	unsigned long flags __maybe_unused;
@@ -120,7 +120,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 	raw_spin_lock_init(&fbc->lock);
 	lockdep_set_class(&fbc->lock, key);
 	fbc->count = amount;
-	fbc->counters = alloc_percpu(s32);
+	fbc->counters = alloc_percpu_gfp(s32, gfp);
 	if (!fbc->counters)
 		return -ENOMEM;
 
diff --git a/lib/proportions.c b/lib/proportions.c
index 05df84801b56..ca95f8d54384 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 	pd->index = 0;
 	pd->pg[0].shift = shift;
 	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0);
+	err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL);
 	if (err)
 		goto out;
 
-	err = percpu_counter_init(&pd->pg[1].events, 0);
+	err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL);
 	if (err)
 		percpu_counter_destroy(&pd->pg[0].events);
 
@@ -193,7 +193,7 @@ int prop_local_init_percpu(struct prop_local_percpu *pl)
 	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0);
+	return percpu_counter_init(&pl->events, 0, GFP_KERNEL);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1706cbbdf5f0..f19a818be2d3 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -455,7 +455,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi_wb_init(&bdi->wb, bdi);
 
 	for (i = 0; i < NR_BDI_STAT_ITEMS; i++) {
-		err = percpu_counter_init(&bdi->bdi_stat[i], 0);
+		err = percpu_counter_init(&bdi->bdi_stat[i], 0, GFP_KERNEL);
 		if (err)
 			goto err;
 	}
diff --git a/mm/mmap.c b/mm/mmap.c
index c1f2ea4a0b99..d7ec93e25fa1 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -3196,7 +3196,7 @@ void __init mmap_init(void)
 {
 	int ret;
 
-	ret = percpu_counter_init(&vm_committed_as, 0);
+	ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
 	VM_BUG_ON(ret);
 }
 
diff --git a/mm/nommu.c b/mm/nommu.c
index a881d9673c6b..bd1808e194a7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -539,7 +539,7 @@ void __init mmap_init(void)
 {
 	int ret;
 
-	ret = percpu_counter_init(&vm_committed_as, 0);
+	ret = percpu_counter_init(&vm_committed_as, 0, GFP_KERNEL);
 	VM_BUG_ON(ret);
 	vm_region_jar = KMEM_CACHE(vm_region, SLAB_PANIC);
 }
diff --git a/mm/shmem.c b/mm/shmem.c
index 0e5fb225007c..d4bc55d3f107 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -2993,7 +2993,7 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent)
 #endif
 
 	spin_lock_init(&sbinfo->stat_lock);
-	if (percpu_counter_init(&sbinfo->used_blocks, 0))
+	if (percpu_counter_init(&sbinfo->used_blocks, 0, GFP_KERNEL))
 		goto failed;
 	sbinfo->free_inodes = sbinfo->max_inodes;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index de2c1e719305..e421eddf67b4 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -1115,7 +1115,7 @@ static int __init dccp_init(void)
 
 	BUILD_BUG_ON(sizeof(struct dccp_skb_cb) >
 		     FIELD_SIZEOF(struct sk_buff, cb));
-	rc = percpu_counter_init(&dccp_orphan_count, 0);
+	rc = percpu_counter_init(&dccp_orphan_count, 0, GFP_KERNEL);
 	if (rc)
 		goto out_fail;
 	rc = -ENOBUFS;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 541f26a67ba2..d59c2604c247 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3188,8 +3188,8 @@ void __init tcp_init(void)
 
 	BUILD_BUG_ON(sizeof(struct tcp_skb_cb) > sizeof(skb->cb));
 
-	percpu_counter_init(&tcp_sockets_allocated, 0);
-	percpu_counter_init(&tcp_orphan_count, 0);
+	percpu_counter_init(&tcp_sockets_allocated, 0, GFP_KERNEL);
+	percpu_counter_init(&tcp_orphan_count, 0, GFP_KERNEL);
 	tcp_hashinfo.bind_bucket_cachep =
 		kmem_cache_create("tcp_bind_bucket",
 				  sizeof(struct inet_bind_bucket), 0,
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 3af522622fad..1d191357bf88 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -32,7 +32,7 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 		res_parent = &parent_cg->memory_allocated;
 
 	res_counter_init(&cg_proto->memory_allocated, res_parent);
-	percpu_counter_init(&cg_proto->sockets_allocated, 0);
+	percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
 
 	return 0;
 }
diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c
index 6240834f4b95..f00a85a3fddd 100644
--- a/net/sctp/protocol.c
+++ b/net/sctp/protocol.c
@@ -1341,7 +1341,7 @@ static __init int sctp_init(void)
 	if (!sctp_chunk_cachep)
 		goto err_chunk_cachep;
 
-	status = percpu_counter_init(&sctp_sockets_allocated, 0);
+	status = percpu_counter_init(&sctp_sockets_allocated, 0, GFP_KERNEL);
 	if (status)
 		goto err_percpu_counter_init;
 
-- 
cgit v1.2.3


From 20ae00792c6f1f18fc4fc5965445a145df92827e Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:30 +0900
Subject: proportions: add @gfp to init functions

Percpu allocator now supports allocation mask.  Add @gfp to
[flex_]proportions init functions so that !GFP_KERNEL allocation masks
can be used with them too.

This patch doesn't make any functional difference.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: Peter Zijlstra <peterz@infradead.org>
---
 include/linux/flex_proportions.h |  5 +++--
 include/linux/proportions.h      |  5 +++--
 lib/flex_proportions.c           |  8 ++++----
 lib/proportions.c                | 10 +++++-----
 mm/backing-dev.c                 |  2 +-
 mm/page-writeback.c              |  2 +-
 6 files changed, 17 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/flex_proportions.h b/include/linux/flex_proportions.h
index 4ebc49fae391..0d348e011a6e 100644
--- a/include/linux/flex_proportions.h
+++ b/include/linux/flex_proportions.h
@@ -10,6 +10,7 @@
 #include <linux/percpu_counter.h>
 #include <linux/spinlock.h>
 #include <linux/seqlock.h>
+#include <linux/gfp.h>
 
 /*
  * When maximum proportion of some event type is specified, this is the
@@ -32,7 +33,7 @@ struct fprop_global {
 	seqcount_t sequence;
 };
 
-int fprop_global_init(struct fprop_global *p);
+int fprop_global_init(struct fprop_global *p, gfp_t gfp);
 void fprop_global_destroy(struct fprop_global *p);
 bool fprop_new_period(struct fprop_global *p, int periods);
 
@@ -79,7 +80,7 @@ struct fprop_local_percpu {
 	raw_spinlock_t lock;	/* Protect period and numerator */
 };
 
-int fprop_local_init_percpu(struct fprop_local_percpu *pl);
+int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp);
 void fprop_local_destroy_percpu(struct fprop_local_percpu *pl);
 void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl);
 void __fprop_inc_percpu_max(struct fprop_global *p, struct fprop_local_percpu *pl,
diff --git a/include/linux/proportions.h b/include/linux/proportions.h
index 26a8a4ed9b07..00e8e8fa7358 100644
--- a/include/linux/proportions.h
+++ b/include/linux/proportions.h
@@ -12,6 +12,7 @@
 #include <linux/percpu_counter.h>
 #include <linux/spinlock.h>
 #include <linux/mutex.h>
+#include <linux/gfp.h>
 
 struct prop_global {
 	/*
@@ -40,7 +41,7 @@ struct prop_descriptor {
 	struct mutex mutex;		/* serialize the prop_global switch */
 };
 
-int prop_descriptor_init(struct prop_descriptor *pd, int shift);
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp);
 void prop_change_shift(struct prop_descriptor *pd, int new_shift);
 
 /*
@@ -61,7 +62,7 @@ struct prop_local_percpu {
 	raw_spinlock_t lock;		/* protect the snapshot state */
 };
 
-int prop_local_init_percpu(struct prop_local_percpu *pl);
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp);
 void prop_local_destroy_percpu(struct prop_local_percpu *pl);
 void __prop_inc_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl);
 void prop_fraction_percpu(struct prop_descriptor *pd, struct prop_local_percpu *pl,
diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c
index b9d026bfcf38..8f25652f40d4 100644
--- a/lib/flex_proportions.c
+++ b/lib/flex_proportions.c
@@ -34,13 +34,13 @@
  */
 #include <linux/flex_proportions.h>
 
-int fprop_global_init(struct fprop_global *p)
+int fprop_global_init(struct fprop_global *p, gfp_t gfp)
 {
 	int err;
 
 	p->period = 0;
 	/* Use 1 to avoid dealing with periods with 0 events... */
-	err = percpu_counter_init(&p->events, 1, GFP_KERNEL);
+	err = percpu_counter_init(&p->events, 1, gfp);
 	if (err)
 		return err;
 	seqcount_init(&p->sequence);
@@ -168,11 +168,11 @@ void fprop_fraction_single(struct fprop_global *p,
  */
 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-int fprop_local_init_percpu(struct fprop_local_percpu *pl)
+int fprop_local_init_percpu(struct fprop_local_percpu *pl, gfp_t gfp)
 {
 	int err;
 
-	err = percpu_counter_init(&pl->events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pl->events, 0, gfp);
 	if (err)
 		return err;
 	pl->period = 0;
diff --git a/lib/proportions.c b/lib/proportions.c
index ca95f8d54384..6f724298f67a 100644
--- a/lib/proportions.c
+++ b/lib/proportions.c
@@ -73,7 +73,7 @@
 #include <linux/proportions.h>
 #include <linux/rcupdate.h>
 
-int prop_descriptor_init(struct prop_descriptor *pd, int shift)
+int prop_descriptor_init(struct prop_descriptor *pd, int shift, gfp_t gfp)
 {
 	int err;
 
@@ -83,11 +83,11 @@ int prop_descriptor_init(struct prop_descriptor *pd, int shift)
 	pd->index = 0;
 	pd->pg[0].shift = shift;
 	mutex_init(&pd->mutex);
-	err = percpu_counter_init(&pd->pg[0].events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pd->pg[0].events, 0, gfp);
 	if (err)
 		goto out;
 
-	err = percpu_counter_init(&pd->pg[1].events, 0, GFP_KERNEL);
+	err = percpu_counter_init(&pd->pg[1].events, 0, gfp);
 	if (err)
 		percpu_counter_destroy(&pd->pg[0].events);
 
@@ -188,12 +188,12 @@ prop_adjust_shift(int *pl_shift, unsigned long *pl_period, int new_shift)
 
 #define PROP_BATCH (8*(1+ilog2(nr_cpu_ids)))
 
-int prop_local_init_percpu(struct prop_local_percpu *pl)
+int prop_local_init_percpu(struct prop_local_percpu *pl, gfp_t gfp)
 {
 	raw_spin_lock_init(&pl->lock);
 	pl->shift = 0;
 	pl->period = 0;
-	return percpu_counter_init(&pl->events, 0, GFP_KERNEL);
+	return percpu_counter_init(&pl->events, 0, gfp);
 }
 
 void prop_local_destroy_percpu(struct prop_local_percpu *pl)
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index f19a818be2d3..64ec49d1772b 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -470,7 +470,7 @@ int bdi_init(struct backing_dev_info *bdi)
 	bdi->write_bandwidth = INIT_BW;
 	bdi->avg_write_bandwidth = INIT_BW;
 
-	err = fprop_local_init_percpu(&bdi->completions);
+	err = fprop_local_init_percpu(&bdi->completions, GFP_KERNEL);
 
 	if (err) {
 err:
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 91d73ef1744d..508599403721 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1777,7 +1777,7 @@ void __init page_writeback_init(void)
 	writeback_set_ratelimit();
 	register_cpu_notifier(&ratelimit_nb);
 
-	fprop_global_init(&writeout_completions);
+	fprop_global_init(&writeout_completions, GFP_KERNEL);
 }
 
 /**
-- 
cgit v1.2.3


From a34375ef9e65340a138fc0be287de5c940d260fc Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 09:51:30 +0900
Subject: percpu-refcount: add @gfp to percpu_ref_init()

Percpu allocator now supports allocation mask.  Add @gfp to
percpu_ref_init() so that !GFP_KERNEL allocation masks can be used
with percpu_refs too.

This patch doesn't make any functional difference.

v2: blk-mq conversion was missing.  Updated.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <koverstreet@google.com>
Cc: Benjamin LaHaise <bcrl@kvack.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Nicholas A. Bellinger <nab@linux-iscsi.org>
Cc: Jens Axboe <axboe@kernel.dk>
---
 block/blk-mq.c                   | 3 ++-
 drivers/target/target_core_tpg.c | 3 ++-
 fs/aio.c                         | 4 ++--
 include/linux/percpu-refcount.h  | 3 ++-
 kernel/cgroup.c                  | 6 +++---
 lib/percpu-refcount.c            | 6 ++++--
 6 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 5189cb1e478a..702df07b980d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1776,7 +1776,8 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!q)
 		goto err_hctxs;
 
-	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release))
+	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
+			    GFP_KERNEL))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index fddfae61222f..4ab6da338585 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -819,7 +819,8 @@ int core_tpg_add_lun(
 {
 	int ret;
 
-	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release);
+	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release,
+			      GFP_KERNEL);
 	if (ret < 0)
 		return ret;
 
diff --git a/fs/aio.c b/fs/aio.c
index bd7ec2cc2674..93fbcc0f5696 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -666,10 +666,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 
-	if (percpu_ref_init(&ctx->users, free_ioctx_users))
+	if (percpu_ref_init(&ctx->users, free_ioctx_users, GFP_KERNEL))
 		goto err;
 
-	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs))
+	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, GFP_KERNEL))
 		goto err;
 
 	ctx->cpu = alloc_percpu(struct kioctx_cpu);
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3dfbf237cd8f..ee8325122dbd 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -49,6 +49,7 @@
 #include <linux/kernel.h>
 #include <linux/percpu.h>
 #include <linux/rcupdate.h>
+#include <linux/gfp.h>
 
 struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
@@ -66,7 +67,7 @@ struct percpu_ref {
 };
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
-				 percpu_ref_func_t *release);
+				 percpu_ref_func_t *release, gfp_t gfp);
 void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 7dc8788cfd52..589b4d89a0a5 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1628,7 +1628,7 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		goto out;
 	root_cgrp->id = ret;
 
-	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release);
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, GFP_KERNEL);
 	if (ret)
 		goto out;
 
@@ -4487,7 +4487,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
 
 	init_and_link_css(css, ss, cgrp);
 
-	err = percpu_ref_init(&css->refcnt, css_release);
+	err = percpu_ref_init(&css->refcnt, css_release, GFP_KERNEL);
 	if (err)
 		goto err_free_css;
 
@@ -4555,7 +4555,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 		goto out_unlock;
 	}
 
-	ret = percpu_ref_init(&cgrp->self.refcnt, css_release);
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, GFP_KERNEL);
 	if (ret)
 		goto out_free_cgrp;
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index fe5a3342e960..ff9903264a91 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -40,6 +40,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * percpu_ref_init - initialize a percpu refcount
  * @ref: percpu_ref to initialize
  * @release: function which will be called when refcount hits 0
+ * @gfp: allocation mask to use
  *
  * Initializes the refcount in single atomic counter mode with a refcount of 1;
  * analagous to atomic_set(ref, 1).
@@ -47,11 +48,12 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
  */
-int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release)
+int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
+		    gfp_t gfp)
 {
 	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu(unsigned);
+	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned, gfp);
 	if (!ref->pcpu_count_ptr)
 		return -ENOMEM;
 
-- 
cgit v1.2.3


From e78c3496790ee8a36522a838b59b388e8a709e65 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Sat, 16 Aug 2014 13:40:10 -0400
Subject: time, signal: Protect resource use statistics with seqlock

Both times() and clock_gettime(CLOCK_PROCESS_CPUTIME_ID) have scalability
issues on large systems, due to both functions being serialized with a
lock.

The lock protects against reporting a wrong value, due to a thread in the
task group exiting, its statistics reporting up to the signal struct, and
that exited task's statistics being counted twice (or not at all).

Protecting that with a lock results in times() and clock_gettime() being
completely serialized on large systems.

This can be fixed by using a seqlock around the events that gather and
propagate statistics. As an additional benefit, the protection code can
be moved into thread_group_cputime(), slightly simplifying the calling
functions.

In the case of posix_cpu_clock_get_task() things can be simplified a
lot, because the calling function already ensures that the task sticks
around, and the rest is now taken care of in thread_group_cputime().

This way the statistics reporting code can run lockless.

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Daeseok Youn <daeseok.youn@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Dongsheng Yang <yangds.fnst@cn.fujitsu.com>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Guillaume Morin <guillaume@morinfr.org>
Cc: Ionut Alexa <ionut.m.alexa@gmail.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Michal Schmidt <mschmidt@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: umgwanakikbuti@gmail.com
Cc: fweisbec@gmail.com
Cc: srao@redhat.com
Cc: lwoodman@redhat.com
Cc: atheurer@redhat.com
Link: http://lkml.kernel.org/r/20140816134010.26a9b572@annuminas.surriel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h          |  1 +
 kernel/exit.c                  |  4 ++++
 kernel/fork.c                  |  1 +
 kernel/sched/cputime.c         | 33 ++++++++++++++++++++-------------
 kernel/sys.c                   |  2 --
 kernel/time/posix-cpu-timers.c | 14 --------------
 6 files changed, 26 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5c2c885ee52b..dd9eb4807389 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -645,6 +645,7 @@ struct signal_struct {
 	 * Live threads maintain their own counters and add to these
 	 * in __exit_signal, except for the group leader.
 	 */
+	seqlock_t stats_lock;
 	cputime_t utime, stime, cutime, cstime;
 	cputime_t gtime;
 	cputime_t cgtime;
diff --git a/kernel/exit.c b/kernel/exit.c
index b93d46dab6fc..fa09b86609db 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -127,6 +127,7 @@ static void __exit_signal(struct task_struct *tsk)
 	 * the signal_struct.
 	 */
 	task_cputime(tsk, &utime, &stime);
+	write_seqlock(&sig->stats_lock);
 	sig->utime += utime;
 	sig->stime += stime;
 	sig->gtime += task_gtime(tsk);
@@ -140,6 +141,7 @@ static void __exit_signal(struct task_struct *tsk)
 	sig->sum_sched_runtime += tsk->se.sum_exec_runtime;
 	sig->nr_threads--;
 	__unhash_process(tsk, group_dead);
+	write_sequnlock(&sig->stats_lock);
 
 	/*
 	 * Do this under ->siglock, we can race with another thread
@@ -1042,6 +1044,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 		spin_lock_irq(&p->real_parent->sighand->siglock);
 		psig = p->real_parent->signal;
 		sig = p->signal;
+		write_seqlock(&psig->stats_lock);
 		psig->cutime += tgutime + sig->cutime;
 		psig->cstime += tgstime + sig->cstime;
 		psig->cgtime += task_gtime(p) + sig->gtime + sig->cgtime;
@@ -1064,6 +1067,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 			psig->cmaxrss = maxrss;
 		task_io_accounting_add(&psig->ioac, &p->ioac);
 		task_io_accounting_add(&psig->ioac, &sig->ioac);
+		write_sequnlock(&psig->stats_lock);
 		spin_unlock_irq(&p->real_parent->sighand->siglock);
 	}
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 0cf9cdb6e491..9387ae8ab048 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -1068,6 +1068,7 @@ static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 	sig->curr_target = tsk;
 	init_sigpending(&sig->shared_pending);
 	INIT_LIST_HEAD(&sig->posix_timers);
+	seqlock_init(&sig->stats_lock);
 
 	hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 	sig->real_timer.function = it_real_fn;
diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c
index 3e52836359ba..49b7cfe98f7a 100644
--- a/kernel/sched/cputime.c
+++ b/kernel/sched/cputime.c
@@ -288,18 +288,28 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times)
 	struct signal_struct *sig = tsk->signal;
 	cputime_t utime, stime;
 	struct task_struct *t;
-
-	times->utime = sig->utime;
-	times->stime = sig->stime;
-	times->sum_exec_runtime = sig->sum_sched_runtime;
+	unsigned int seq, nextseq;
 
 	rcu_read_lock();
-	for_each_thread(tsk, t) {
-		task_cputime(t, &utime, &stime);
-		times->utime += utime;
-		times->stime += stime;
-		times->sum_exec_runtime += task_sched_runtime(t);
-	}
+	/* Attempt a lockless read on the first round. */
+	nextseq = 0;
+	do {
+		seq = nextseq;
+		read_seqbegin_or_lock(&sig->stats_lock, &seq);
+		times->utime = sig->utime;
+		times->stime = sig->stime;
+		times->sum_exec_runtime = sig->sum_sched_runtime;
+
+		for_each_thread(tsk, t) {
+			task_cputime(t, &utime, &stime);
+			times->utime += utime;
+			times->stime += stime;
+			times->sum_exec_runtime += task_sched_runtime(t);
+		}
+		/* If lockless access failed, take the lock. */
+		nextseq = 1;
+	} while (need_seqretry(&sig->stats_lock, seq));
+	done_seqretry(&sig->stats_lock, seq);
 	rcu_read_unlock();
 }
 
@@ -611,9 +621,6 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 	cputime_adjust(&cputime, &p->prev_cputime, ut, st);
 }
 
-/*
- * Must be called with siglock held.
- */
 void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st)
 {
 	struct task_cputime cputime;
diff --git a/kernel/sys.c b/kernel/sys.c
index ce8129192a26..b6636643cbd1 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -862,11 +862,9 @@ void do_sys_times(struct tms *tms)
 {
 	cputime_t tgutime, tgstime, cutime, cstime;
 
-	spin_lock_irq(&current->sighand->siglock);
 	thread_group_cputime_adjusted(current, &tgutime, &tgstime);
 	cutime = current->signal->cutime;
 	cstime = current->signal->cstime;
-	spin_unlock_irq(&current->sighand->siglock);
 	tms->tms_utime = cputime_to_clock_t(tgutime);
 	tms->tms_stime = cputime_to_clock_t(tgstime);
 	tms->tms_cutime = cputime_to_clock_t(cutime);
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 3b8946416a5f..492b986195d5 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -272,22 +272,8 @@ static int posix_cpu_clock_get_task(struct task_struct *tsk,
 		if (same_thread_group(tsk, current))
 			err = cpu_clock_sample(which_clock, tsk, &rtn);
 	} else {
-		unsigned long flags;
-		struct sighand_struct *sighand;
-
-		/*
-		 * while_each_thread() is not yet entirely RCU safe,
-		 * keep locking the group while sampling process
-		 * clock for now.
-		 */
-		sighand = lock_task_sighand(tsk, &flags);
-		if (!sighand)
-			return err;
-
 		if (tsk == current || thread_group_leader(tsk))
 			err = cpu_clock_sample_group(which_clock, tsk, &rtn);
-
-		unlock_task_sighand(tsk, &flags);
 	}
 
 	if (!err)
-- 
cgit v1.2.3


From 0b750b3baa2d64f1b77aecc10f20deeb28efe60d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 5 Sep 2014 18:08:47 +0200
Subject: HID: usbhid: add always-poll quirk

Add quirk to make sure that a device is always polled for input events
even if it hasn't been opened.

This is needed for devices that disconnects from the bus unless the
interrupt endpoint has been polled at least once or when not responding
to an input event (e.g. after having shut down X).

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/usbhid/hid-core.c | 26 +++++++++++++++++++++++---
 include/linux/hid.h           |  1 +
 2 files changed, 24 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index 79cf503e37bf..ddd547ad6d7e 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -82,7 +82,7 @@ static int hid_start_in(struct hid_device *hid)
 	struct usbhid_device *usbhid = hid->driver_data;
 
 	spin_lock_irqsave(&usbhid->lock, flags);
-	if (hid->open > 0 &&
+	if ((hid->open > 0 || hid->quirks & HID_QUIRK_ALWAYS_POLL) &&
 			!test_bit(HID_DISCONNECTED, &usbhid->iofl) &&
 			!test_bit(HID_SUSPENDED, &usbhid->iofl) &&
 			!test_and_set_bit(HID_IN_RUNNING, &usbhid->iofl)) {
@@ -292,6 +292,8 @@ static void hid_irq_in(struct urb *urb)
 	case 0:			/* success */
 		usbhid_mark_busy(usbhid);
 		usbhid->retry_delay = 0;
+		if ((hid->quirks & HID_QUIRK_ALWAYS_POLL) && !hid->open)
+			break;
 		hid_input_report(urb->context, HID_INPUT_REPORT,
 				 urb->transfer_buffer,
 				 urb->actual_length, 1);
@@ -735,8 +737,10 @@ void usbhid_close(struct hid_device *hid)
 	if (!--hid->open) {
 		spin_unlock_irq(&usbhid->lock);
 		hid_cancel_delayed_stuff(usbhid);
-		usb_kill_urb(usbhid->urbin);
-		usbhid->intf->needs_remote_wakeup = 0;
+		if (!(hid->quirks & HID_QUIRK_ALWAYS_POLL)) {
+			usb_kill_urb(usbhid->urbin);
+			usbhid->intf->needs_remote_wakeup = 0;
+		}
 	} else {
 		spin_unlock_irq(&usbhid->lock);
 	}
@@ -1134,6 +1138,19 @@ static int usbhid_start(struct hid_device *hid)
 
 	set_bit(HID_STARTED, &usbhid->iofl);
 
+	if (hid->quirks & HID_QUIRK_ALWAYS_POLL) {
+		ret = usb_autopm_get_interface(usbhid->intf);
+		if (ret)
+			goto fail;
+		usbhid->intf->needs_remote_wakeup = 1;
+		ret = hid_start_in(hid);
+		if (ret) {
+			dev_err(&hid->dev,
+				"failed to start in urb: %d\n", ret);
+		}
+		usb_autopm_put_interface(usbhid->intf);
+	}
+
 	/* Some keyboards don't work until their LEDs have been set.
 	 * Since BIOSes do set the LEDs, it must be safe for any device
 	 * that supports the keyboard boot protocol.
@@ -1166,6 +1183,9 @@ static void usbhid_stop(struct hid_device *hid)
 	if (WARN_ON(!usbhid))
 		return;
 
+	if (hid->quirks & HID_QUIRK_ALWAYS_POLL)
+		usbhid->intf->needs_remote_wakeup = 0;
+
 	clear_bit(HID_STARTED, &usbhid->iofl);
 	spin_lock_irq(&usbhid->lock);	/* Sync with error and led handlers */
 	set_bit(HID_DISCONNECTED, &usbhid->iofl);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index f53c4a9cca1d..26ee25fced27 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -287,6 +287,7 @@ struct hid_item {
 #define HID_QUIRK_HIDINPUT_FORCE		0x00000080
 #define HID_QUIRK_NO_EMPTY_INPUT		0x00000100
 #define HID_QUIRK_NO_INIT_INPUT_REPORTS		0x00000200
+#define HID_QUIRK_ALWAYS_POLL			0x00000400
 #define HID_QUIRK_SKIP_OUTPUT_REPORTS		0x00010000
 #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID		0x00020000
 #define HID_QUIRK_NO_OUTPUT_REPORTS_ON_INTR_EP	0x00040000
-- 
cgit v1.2.3


From c8d6591752e96c550cb98b781326d72d8eedcc79 Mon Sep 17 00:00:00 2001
From: "Steinar H. Gunderson" <sgunderson@bigfoot.com>
Date: Wed, 3 Sep 2014 06:48:37 -0700
Subject: mac80211: support DTPC IE (from Cisco Client eXtensions)

Linux already supports 802.11h, where the access point can tell the
client to reduce its transmission power. However, 802.11h is only
defined for 5 GHz, where the need for this is much smaller than on
2.4 GHz.

Cisco has their own solution, called DTPC (Dynamic Transmit Power
Control). Cisco APs on a controller sometimes but not always send
802.11h; they always send DTPC, even on 2.4 GHz. This patch adds support
for parsing and honoring the DTPC IE in addition to the 802.11h
element (they do not always contain the same limits, so both must
be honored); the format is not documented, but very simple.

Tested (on top of wireless.git and on 3.16.1) against a Cisco Aironet
1142 joined to a Cisco 2504 WLC, by setting various transmit power
levels for the given access points and observing the results.
The Wireshark 802.11 dissector agrees with the interpretation of the
element, except for negative numbers, which seem to never happen
anyway.

Signed-off-by: Steinar H. Gunderson <sgunderson@bigfoot.com>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
---
 include/linux/ieee80211.h  |  3 ++-
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mlme.c        | 60 +++++++++++++++++++++++++++++++++++++---------
 net/mac80211/util.c        | 25 +++++++++++++++++++
 4 files changed, 77 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 77d4f26e0235..61a09f0644aa 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1824,7 +1824,8 @@ enum ieee80211_eid {
 	WLAN_EID_DMG_TSPEC = 146,
 	WLAN_EID_DMG_AT = 147,
 	WLAN_EID_DMG_CAP = 148,
-	/* 149-150 reserved for Cisco */
+	/* 149 reserved for Cisco */
+	WLAN_EID_CISCO_VENDOR_SPECIFIC = 150,
 	WLAN_EID_DMG_OPERATION = 151,
 	WLAN_EID_DMG_BSS_PARAM_CHANGE = 152,
 	WLAN_EID_DMG_BEAM_REFINEMENT = 153,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6b7bdd8fae29..c2aaec4dfcf0 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1369,6 +1369,7 @@ struct ieee802_11_elems {
 	const struct ieee80211_wide_bw_chansw_ie *wide_bw_chansw_ie;
 	const u8 *country_elem;
 	const u8 *pwr_constr_elem;
+	const u8 *cisco_dtpc_elem;
 	const struct ieee80211_timeout_interval_ie *timeout_int;
 	const u8 *opmode_notif;
 	const struct ieee80211_sec_chan_offs_ie *sec_chan_offs;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 0b812a88f95f..a7b92f5f7161 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1230,14 +1230,30 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
 	return have_chan_pwr;
 }
 
+static void ieee80211_find_cisco_dtpc(struct ieee80211_sub_if_data *sdata,
+				      struct ieee80211_channel *channel,
+				      const u8 *cisco_dtpc_ie,
+				      int *pwr_level)
+{
+	/* From practical testing, the first data byte of the DTPC element
+	 * seems to contain the requested dBm level, and the CLI on Cisco
+	 * APs clearly state the range is -127 to 127 dBm, which indicates
+	 * a signed byte, although it seemingly never actually goes negative.
+	 * The other byte seems to always be zero.
+	 */
+	*pwr_level = (__s8)cisco_dtpc_ie[4];
+}
+
 static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 				       struct ieee80211_channel *channel,
 				       struct ieee80211_mgmt *mgmt,
 				       const u8 *country_ie, u8 country_ie_len,
-				       const u8 *pwr_constr_ie)
+				       const u8 *pwr_constr_ie,
+				       const u8 *cisco_dtpc_ie)
 {
-	bool has_80211h_pwr = false;
-	int chan_pwr, pwr_reduction_80211h;
+	bool has_80211h_pwr = false, has_cisco_pwr = false;
+	int chan_pwr = 0, pwr_reduction_80211h = 0;
+	int pwr_level_cisco, pwr_level_80211h;
 	int new_ap_level;
 
 	if (country_ie && pwr_constr_ie &&
@@ -1246,16 +1262,35 @@ static u32 ieee80211_handle_pwr_constr(struct ieee80211_sub_if_data *sdata,
 		has_80211h_pwr = ieee80211_find_80211h_pwr_constr(
 			sdata, channel, country_ie, country_ie_len,
 			pwr_constr_ie, &chan_pwr, &pwr_reduction_80211h);
-		new_ap_level = max_t(int, 0, chan_pwr - pwr_reduction_80211h);
+		pwr_level_80211h =
+			max_t(int, 0, chan_pwr - pwr_reduction_80211h);
+	}
+
+	if (cisco_dtpc_ie) {
+		ieee80211_find_cisco_dtpc(
+			sdata, channel, cisco_dtpc_ie, &pwr_level_cisco);
+		has_cisco_pwr = true;
 	}
 
-	if (!has_80211h_pwr)
+	if (!has_80211h_pwr && !has_cisco_pwr)
 		return 0;
 
-	sdata_info(sdata,
-		   "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
-		   new_ap_level, chan_pwr, pwr_reduction_80211h,
-		   sdata->u.mgd.bssid);
+	/* If we have both 802.11h and Cisco DTPC, apply both limits
+	 * by picking the smallest of the two power levels advertised.
+	 */
+	if (has_80211h_pwr &&
+	    (!has_cisco_pwr || pwr_level_80211h <= pwr_level_cisco)) {
+		sdata_info(sdata,
+			   "Limiting TX power to %d (%d - %d) dBm as advertised by %pM\n",
+			   pwr_level_80211h, chan_pwr, pwr_reduction_80211h,
+			   sdata->u.mgd.bssid);
+		new_ap_level = pwr_level_80211h;
+	} else {  /* has_cisco_pwr is always true here. */
+		sdata_info(sdata,
+			   "Limiting TX power to %d dBm as advertised by %pM\n",
+			   pwr_level_cisco, sdata->u.mgd.bssid);
+		new_ap_level = pwr_level_cisco;
+	}
 
 	if (sdata->ap_power_level == new_ap_level)
 		return 0;
@@ -2923,7 +2958,9 @@ static void ieee80211_rx_mgmt_probe_resp(struct ieee80211_sub_if_data *sdata,
 /*
  * This is the canonical list of information elements we care about,
  * the filter code also gives us all changes to the Microsoft OUI
- * (00:50:F2) vendor IE which is used for WMM which we need to track.
+ * (00:50:F2) vendor IE which is used for WMM which we need to track,
+ * as well as the DTPC IE (part of the Cisco OUI) used for signaling
+ * changes to requested client power.
  *
  * We implement beacon filtering in software since that means we can
  * avoid processing the frame here and in cfg80211, and userspace
@@ -3232,7 +3269,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 	changed |= ieee80211_handle_pwr_constr(sdata, chan, mgmt,
 					       elems.country_elem,
 					       elems.country_elem_len,
-					       elems.pwr_constr_elem);
+					       elems.pwr_constr_elem,
+					       elems.cisco_dtpc_elem);
 
 	ieee80211_bss_info_change_notify(sdata, changed);
 }
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index b444f27a788e..3c61060a4d2b 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1015,6 +1015,31 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 			}
 			elems->pwr_constr_elem = pos;
 			break;
+		case WLAN_EID_CISCO_VENDOR_SPECIFIC:
+			/* Lots of different options exist, but we only care
+			 * about the Dynamic Transmit Power Control element.
+			 * First check for the Cisco OUI, then for the DTPC
+			 * tag (0x00).
+			 */
+			if (elen < 4) {
+				elem_parse_failed = true;
+				break;
+			}
+
+			if (pos[0] != 0x00 || pos[1] != 0x40 ||
+			    pos[2] != 0x96 || pos[3] != 0x00)
+				break;
+
+			if (elen != 6) {
+				elem_parse_failed = true;
+				break;
+			}
+
+			if (calc_crc)
+				crc = crc32_be(crc, pos - 2, elen + 2);
+
+			elems->cisco_dtpc_elem = pos;
+			break;
 		case WLAN_EID_TIMEOUT_INTERVAL:
 			if (elen >= sizeof(struct ieee80211_timeout_interval_ie))
 				elems->timeout_int = (void *)pos;
-- 
cgit v1.2.3


From 4930d247af29f849cd1bddd65be2400684dc886e Mon Sep 17 00:00:00 2001
From: Gaël PORTAY <gael.portay@gmail.com>
Date: Sat, 6 Sep 2014 19:52:35 +0200
Subject: ARM: at91/tclib: move initialization from alloc to probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move resource retrieval from atmel_tc_alloc to tc_probe to avoid lately
reporting resource related issues when a TC block user request a TC block.

Moreover, resources retrieval are usually done in the probe function,
thus moving them add some consistency with other drivers.

Initialization is done once, ie not every time a tc block is requested.
If it fails, the device is not appended to the list of tc blocks.

Furhermore, the device id is retrieved at probe as well, avoiding parsing
DT every time the user requests of tc block.

Signed-off-by: Gaël PORTAY <gael.portay@gmail.com>
Acked-by: Thierry Reding <thierry.reding@gmail.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/clocksource/tcb_clksrc.c |  2 +-
 drivers/misc/atmel_tclib.c       | 71 +++++++++++++---------------------------
 drivers/pwm/pwm-atmel-tcb.c      |  2 +-
 include/linux/atmel_tc.h         |  8 +++--
 4 files changed, 29 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clocksource/tcb_clksrc.c b/drivers/clocksource/tcb_clksrc.c
index a8d7ea14f183..f922e81d531b 100644
--- a/drivers/clocksource/tcb_clksrc.c
+++ b/drivers/clocksource/tcb_clksrc.c
@@ -279,7 +279,7 @@ static int __init tcb_clksrc_init(void)
 	int i;
 	int ret;
 
-	tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK, clksrc.name);
+	tc = atmel_tc_alloc(CONFIG_ATMEL_TCB_CLKSRC_BLOCK);
 	if (!tc) {
 		pr_debug("can't alloc TC for clocksource\n");
 		return -ENODEV;
diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
index b514a2d4485b..d505d1e0857b 100644
--- a/drivers/misc/atmel_tclib.c
+++ b/drivers/misc/atmel_tclib.c
@@ -35,60 +35,31 @@ static LIST_HEAD(tc_list);
 /**
  * atmel_tc_alloc - allocate a specified TC block
  * @block: which block to allocate
- * @name: name to be associated with the iomem resource
  *
  * Caller allocates a block.  If it is available, a pointer to a
  * pre-initialized struct atmel_tc is returned. The caller can access
  * the registers directly through the "regs" field.
  */
-struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name)
+struct atmel_tc *atmel_tc_alloc(unsigned block)
 {
 	struct atmel_tc		*tc;
 	struct platform_device	*pdev = NULL;
-	struct resource		*r;
-	size_t			size;
 
 	spin_lock(&tc_list_lock);
 	list_for_each_entry(tc, &tc_list, node) {
-		if (tc->pdev->dev.of_node) {
-			if (of_alias_get_id(tc->pdev->dev.of_node, "tcb")
-					== block) {
-				pdev = tc->pdev;
-				break;
-			}
-		} else if (tc->pdev->id == block) {
+		if (tc->allocated)
+			continue;
+
+		if ((tc->pdev->dev.of_node && tc->id == block) ||
+		    (tc->pdev->id == block)) {
 			pdev = tc->pdev;
+			tc->allocated = true;
 			break;
 		}
 	}
-
-	if (!pdev || tc->iomem)
-		goto fail;
-
-	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!r)
-		goto fail;
-
-	size = resource_size(r);
-	r = request_mem_region(r->start, size, name);
-	if (!r)
-		goto fail;
-
-	tc->regs = ioremap(r->start, size);
-	if (!tc->regs)
-		goto fail_ioremap;
-
-	tc->iomem = r;
-
-out:
 	spin_unlock(&tc_list_lock);
-	return tc;
 
-fail_ioremap:
-	release_mem_region(r->start, size);
-fail:
-	tc = NULL;
-	goto out;
+	return pdev ? tc : NULL;
 }
 EXPORT_SYMBOL_GPL(atmel_tc_alloc);
 
@@ -96,19 +67,14 @@ EXPORT_SYMBOL_GPL(atmel_tc_alloc);
  * atmel_tc_free - release a specified TC block
  * @tc: Timer/counter block that was returned by atmel_tc_alloc()
  *
- * This reverses the effect of atmel_tc_alloc(), unmapping the I/O
- * registers, invalidating the resource returned by that routine and
- * making the TC available to other drivers.
+ * This reverses the effect of atmel_tc_alloc(), invalidating the resource
+ * returned by that routine and making the TC available to other drivers.
  */
 void atmel_tc_free(struct atmel_tc *tc)
 {
 	spin_lock(&tc_list_lock);
-	if (tc->regs) {
-		iounmap(tc->regs);
-		release_mem_region(tc->iomem->start, resource_size(tc->iomem));
-		tc->regs = NULL;
-		tc->iomem = NULL;
-	}
+	if (tc->allocated)
+		tc->allocated = false;
 	spin_unlock(&tc_list_lock);
 }
 EXPORT_SYMBOL_GPL(atmel_tc_free);
@@ -142,9 +108,7 @@ static int __init tc_probe(struct platform_device *pdev)
 	struct atmel_tc *tc;
 	struct clk	*clk;
 	int		irq;
-
-	if (!platform_get_resource(pdev, IORESOURCE_MEM, 0))
-		return -EINVAL;
+	struct resource	*r;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -160,12 +124,21 @@ static int __init tc_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return PTR_ERR(clk);
 
+	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	tc->regs = devm_ioremap_resource(&pdev->dev, r);
+	if (IS_ERR(tc->regs))
+		return PTR_ERR(tc->regs);
+
 	/* Now take SoC information if available */
 	if (pdev->dev.of_node) {
 		const struct of_device_id *match;
 		match = of_match_node(atmel_tcb_dt_ids, pdev->dev.of_node);
 		if (match)
 			tc->tcb_config = match->data;
+
+		tc->id = of_alias_get_id(tc->pdev->dev.of_node, "tcb");
+	} else {
+		tc->id = pdev->id;
 	}
 
 	tc->clk[0] = clk;
diff --git a/drivers/pwm/pwm-atmel-tcb.c b/drivers/pwm/pwm-atmel-tcb.c
index f3dcd02390f1..d56e5b717431 100644
--- a/drivers/pwm/pwm-atmel-tcb.c
+++ b/drivers/pwm/pwm-atmel-tcb.c
@@ -379,7 +379,7 @@ static int atmel_tcb_pwm_probe(struct platform_device *pdev)
 		return err;
 	}
 
-	tc = atmel_tc_alloc(tcblock, "tcb-pwm");
+	tc = atmel_tc_alloc(tcblock);
 	if (tc == NULL) {
 		dev_err(&pdev->dev, "failed to allocate Timer Counter Block\n");
 		return -ENOMEM;
diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h
index 89a931babecf..d8aa88461c64 100644
--- a/include/linux/atmel_tc.h
+++ b/include/linux/atmel_tc.h
@@ -44,12 +44,13 @@ struct atmel_tcb_config {
 /**
  * struct atmel_tc - information about a Timer/Counter Block
  * @pdev: physical device
- * @iomem: resource associated with the I/O register
  * @regs: mapping through which the I/O registers can be accessed
+ * @id: block id
  * @tcb_config: configuration data from SoC
  * @irq: irq for each of the three channels
  * @clk: internal clock source for each of the three channels
  * @node: list node, for tclib internal use
+ * @allocated: if already used, for tclib internal use
  *
  * On some platforms, each TC channel has its own clocks and IRQs,
  * while on others, all TC channels share the same clock and IRQ.
@@ -61,15 +62,16 @@ struct atmel_tcb_config {
  */
 struct atmel_tc {
 	struct platform_device	*pdev;
-	struct resource		*iomem;
 	void __iomem		*regs;
+	int                     id;
 	const struct atmel_tcb_config *tcb_config;
 	int			irq[3];
 	struct clk		*clk[3];
 	struct list_head	node;
+	bool			allocated;
 };
 
-extern struct atmel_tc *atmel_tc_alloc(unsigned block, const char *name);
+extern struct atmel_tc *atmel_tc_alloc(unsigned block);
 extern void atmel_tc_free(struct atmel_tc *tc);
 
 /* platform-specific ATMEL_TC_TIMER_CLOCKx divisors (0 means 32KiHz) */
-- 
cgit v1.2.3


From 84f462371cc07272a17e2ae96c3540f795db273a Mon Sep 17 00:00:00 2001
From: Gaël PORTAY <gael.portay@gmail.com>
Date: Sat, 6 Sep 2014 19:52:36 +0200
Subject: ARM: at91/tclib: mask interruptions at shutdown and probe
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Shutdown properly the timer counter block by masking interruptions. Otherwise,
a segmentation may happen when kexec-ing a new kernel (see backtrace below).
An interruption may happen before the handler is set, leading to a kernel
segmentation fault.

Furthermore, we make sure the interruptions are masked when the driver is
initialized. This will prevent freshly kexec-ed kernel from crashing when
launched from a kernel which does not properly mask interruptions at shutdown.

The backtrace below happened after kexec-ing a new kernel, from a kernel
that did not shut down properly leaving interruptions unmasked.

Unable to handle kernel NULL pointer dereference at virtual address 00000000
pgd = c0004000
[00000000] *pgd=00000000
Internal error: Oops: 80000005 [#1] ARM
Modules linked in:
CPU: 0 PID: 1 Comm: swapper Not tainted 3.16.0+ #144
task: c1828aa0 ti: c182a000 task.ti: c182a000
PC is at 0x0
LR is at ch2_irq+0x28/0x30
pc : [<00000000>]    lr : [<c01db904>]    psr: 000000d3
sp : c182bd38  ip : c182bd48  fp : c182bd44
r10: c0373390  r9 : c1825b00  r8 : 60000053
r7 : 00000000  r6 : 00000000  r5 : 00000013  r4 : c036e800
r3 : 00000000  r2 : 00002004  r1 : c036e760  r0 : c036e760
Flags: nzcv  IRQs off  FIQs off  Mode SVC_32  ISA ARM  Segment kernel
Control: 0005317f  Table: 20004000  DAC: 00000017
Process swapper (pid: 1, stack limit = 0xc182a1c0)
Stack: (0xc182bd38 to 0xc182c000)
bd20:                                                       c182bd7c c182bd48
bd40: c0045430 c01db8ec 00000000 c18c6f40 c182bd74 c1825b00 c035cec4 00000000
bd60: c182be2c 60000053 c1825b34 00000000 c182bd94 c182bd80 c0045570 c0045408
bd80: 00000000 c1825b00 c182bdac c182bd98 c0047f34 c0045550 00000013 c036619c
bda0: c182bdc4 c182bdb0 c0044da4 c0047e98 0000007f 00000013 c182bde4 c182bdc8
bdc0: c0009e34 c0044d8c fefff000 c0046728 60000053 ffffffff c182bdf4 c182bde8
bde0: c00086a8 c0009ddc c182be74 c182bdf8 c000cb80 c0008674 00000000 00000013
be00: 00000000 00014200 c1825b00 c036e800 00000013 c035ed98 60000053 c1825b34
be20: 00000000 c182be74 c182be20 c182be40 c0047994 c0046728 60000053 ffffffff
be40: 00000013 c036e800 c182be64 c1825b00 00000013 c036e800 c035ed98 c03874bc
be60: 00000004 c036e700 c182be94 c182be78 c004689c c0046398 c036e760 c18c6080
be80: 00000000 c035ed10 c182bedc c182be98 c0348b08 c004684c 0000000c c034dac8
bea0: 004c4b3f c028c338 c036e760 00000013 c014ecc8 c18e67e0 c035b9c0 c0348884
bec0: c035b9c0 c182a020 00000000 00000000 c182bf54 c182bee0 c00089fc c0348894
bee0: c00da51c c1ffcc78 c182bf0c c182bef8 c002d100 c002d09c c1ffcc78 00000000
bf00: c182bf54 c182bf10 c002d308 c0336570 c182bf3c c0334e44 00000003 00000003
bf20: 00000030 c0334b44 c0044d74 00000003 00000003 c034dac8 c0350a94 c0373440
bf40: c0373440 00000030 c182bf94 c182bf58 c0336d24 c000890c 00000003 00000003
bf60: c0336560 c182bf64 c182bf64 6e616e0d 00000000 c0272fc8 00000000 00000000
bf80: 00000000 00000000 c182bfac c182bf98 c0272fd8 c0336bd8 c182a000 00000000
bfa0: 00000000 c182bfb0 c00095d0 c0272fd8 00000000 00000000 00000000 00000000
bfc0: 00000000 00000000 00000000 00000000 00000000 00000000 00000000 00000000
bfe0: 00000000 00000000 00000000 00000000 00000013 00000000 374d27cd 33cc33e4
Backtrace:
[<c01db8dc>] (ch2_irq) from [<c0045430>] (handle_irq_event_percpu+0x38/0x148)
[<c00453f8>] (handle_irq_event_percpu) from [<c0045570>] (handle_irq_event+0x30/0x40)
 r10:00000000 r9:c1825b34 r8:60000053 r7:c182be2c r6:00000000 r5:c035cec4
 r4:c1825b00
[<c0045540>] (handle_irq_event) from [<c0047f34>] (handle_fasteoi_irq+0xac/0x11c)
 r4:c1825b00 r3:00000000
[<c0047e88>] (handle_fasteoi_irq) from [<c0044da4>] (generic_handle_irq+0x28/0x38)
 r5:c036619c r4:00000013
[<c0044d7c>] (generic_handle_irq) from [<c0009e34>] (handle_IRQ+0x68/0x88)
 r4:00000013 r3:0000007f
[<c0009dcc>] (handle_IRQ) from [<c00086a8>] (at91_aic_handle_irq+0x44/0x4c)
 r6:ffffffff r5:60000053 r4:c0046728 r3:fefff000
[<c0008664>] (at91_aic_handle_irq) from [<c000cb80>] (__irq_svc+0x40/0x4c)
Exception stack(0xc182bdf8 to 0xc182be40)
bde0:                                                       00000000 00000013
be00: 00000000 00014200 c1825b00 c036e800 00000013 c035ed98 60000053 c1825b34
be20: 00000000 c182be74 c182be20 c182be40 c0047994 c0046728 60000053 ffffffff
[<c0046388>] (__setup_irq) from [<c004689c>] (setup_irq+0x60/0x8c)
 r10:c036e700 r9:00000004 r8:c03874bc r7:c035ed98 r6:c036e800 r5:00000013
 r4:c1825b00
[<c004683c>] (setup_irq) from [<c0348b08>] (tcb_clksrc_init+0x284/0x31c)
 r6:c035ed10 r5:00000000 r4:c18c6080 r3:c036e760
[<c0348884>] (tcb_clksrc_init) from [<c00089fc>] (do_one_initcall+0x100/0x1b4)
 r10:00000000 r9:00000000 r8:c182a020 r7:c035b9c0 r6:c0348884 r5:c035b9c0
 r4:c18e67e0
[<c00088fc>] (do_one_initcall) from [<c0336d24>] (kernel_init_freeable+0x15c/0x224)
 r9:00000030 r8:c0373440 r7:c0373440 r6:c0350a94 r5:c034dac8 r4:00000003
[<c0336bc8>] (kernel_init_freeable) from [<c0272fd8>] (kernel_init+0x10/0xec)
 r9:00000000 r8:00000000 r7:00000000 r6:00000000 r5:c0272fc8 r4:00000000
[<c0272fc8>] (kernel_init) from [<c00095d0>] (ret_from_fork+0x14/0x24)
 r4:00000000 r3:c182a000
Code: bad PC value
---[ end trace 5b30f0017e282e47 ]---
Kernel panic - not syncing: Fatal exception in interrupt

Signed-off-by: Gaël PORTAY <gael.portay@gmail.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 drivers/misc/atmel_tclib.c | 16 ++++++++++++++++
 include/linux/atmel_tc.h   |  5 +++++
 2 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/misc/atmel_tclib.c b/drivers/misc/atmel_tclib.c
index d505d1e0857b..0ca05c3ec8d6 100644
--- a/drivers/misc/atmel_tclib.c
+++ b/drivers/misc/atmel_tclib.c
@@ -109,6 +109,7 @@ static int __init tc_probe(struct platform_device *pdev)
 	struct clk	*clk;
 	int		irq;
 	struct resource	*r;
+	unsigned int	i;
 
 	irq = platform_get_irq(pdev, 0);
 	if (irq < 0)
@@ -157,18 +158,33 @@ static int __init tc_probe(struct platform_device *pdev)
 	if (tc->irq[2] < 0)
 		tc->irq[2] = irq;
 
+	for (i = 0; i < 3; i++)
+		writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
+
 	spin_lock(&tc_list_lock);
 	list_add_tail(&tc->node, &tc_list);
 	spin_unlock(&tc_list_lock);
 
+	platform_set_drvdata(pdev, tc);
+
 	return 0;
 }
 
+static void tc_shutdown(struct platform_device *pdev)
+{
+	int i;
+	struct atmel_tc *tc = platform_get_drvdata(pdev);
+
+	for (i = 0; i < 3; i++)
+		writel(ATMEL_TC_ALL_IRQ, tc->regs + ATMEL_TC_REG(i, IDR));
+}
+
 static struct platform_driver tc_driver = {
 	.driver = {
 		.name	= "atmel_tcb",
 		.of_match_table	= of_match_ptr(atmel_tcb_dt_ids),
 	},
+	.shutdown = tc_shutdown,
 };
 
 static int __init tc_init(void)
diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h
index d8aa88461c64..b87c1c7c242a 100644
--- a/include/linux/atmel_tc.h
+++ b/include/linux/atmel_tc.h
@@ -260,5 +260,10 @@ extern const u8 atmel_tc_divisors[5];
 #define     ATMEL_TC_LDRAS	(1 <<  5)	/* RA loading */
 #define     ATMEL_TC_LDRBS	(1 <<  6)	/* RB loading */
 #define     ATMEL_TC_ETRGS	(1 <<  7)	/* external trigger */
+#define     ATMEL_TC_ALL_IRQ	(ATMEL_TC_COVFS	| ATMEL_TC_LOVRS | \
+				 ATMEL_TC_CPAS | ATMEL_TC_CPBS | \
+				 ATMEL_TC_CPCS | ATMEL_TC_LDRAS | \
+				 ATMEL_TC_LDRBS | ATMEL_TC_ETRGS) \
+				 /* all IRQs */
 
 #endif
-- 
cgit v1.2.3


From ff9ea323816dc1c8ac7144afd4eab3ac97704430 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 08:03:56 +0900
Subject: block, bdi: an active gendisk always has a request_queue associated
 with it

bdev_get_queue() returns the request_queue associated with the
specified block_device.  blk_get_backing_dev_info() makes use of
bdev_get_queue() to determine the associated bdi given a block_device.

All the callers of bdev_get_queue() including
blk_get_backing_dev_info() assume that bdev_get_queue() may return
NULL and implement NULL handling; however, bdev_get_queue() requires
the passed in block_device is opened and attached to its gendisk.
Because an active gendisk always has a valid request_queue associated
with it, bdev_get_queue() can never return NULL and neither can
blk_get_backing_dev_info().

Make it clear that neither of the two functions can return NULL and
remove NULL handling from all the callers.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Chris Mason <clm@fb.com>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       | 10 +++-------
 block/compat_ioctl.c   |  4 ----
 block/ioctl.c          |  4 ----
 fs/block_dev.c         |  2 --
 fs/btrfs/disk-io.c     |  2 +-
 fs/xfs/xfs_buf.c       |  2 --
 include/linux/blkdev.h |  2 +-
 7 files changed, 5 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 93603e6ff479..817446175489 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -83,18 +83,14 @@ void blk_queue_congestion_threshold(struct request_queue *q)
  * @bdev:	device
  *
  * Locates the passed device's request queue and returns the address of its
- * backing_dev_info
- *
- * Will return NULL if the request queue cannot be located.
+ * backing_dev_info.  This function can only be called if @bdev is opened
+ * and the return value is never NULL.
  */
 struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev)
 {
-	struct backing_dev_info *ret = NULL;
 	struct request_queue *q = bdev_get_queue(bdev);
 
-	if (q)
-		ret = &q->backing_dev_info;
-	return ret;
+	return &q->backing_dev_info;
 }
 EXPORT_SYMBOL(blk_get_backing_dev_info);
 
diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c
index 18b282ce361e..f678c733df40 100644
--- a/block/compat_ioctl.c
+++ b/block/compat_ioctl.c
@@ -709,8 +709,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		if (!arg)
 			return -EINVAL;
 		bdi = blk_get_backing_dev_info(bdev);
-		if (bdi == NULL)
-			return -ENOTTY;
 		return compat_put_long(arg,
 				       (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET: /* compatible */
@@ -731,8 +729,6 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 		if (!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdi = blk_get_backing_dev_info(bdev);
-		if (bdi == NULL)
-			return -ENOTTY;
 		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
 		return 0;
 	case BLKGETSIZE:
diff --git a/block/ioctl.c b/block/ioctl.c
index d6cda8147c91..6c7bf903742f 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -356,8 +356,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		if (!arg)
 			return -EINVAL;
 		bdi = blk_get_backing_dev_info(bdev);
-		if (bdi == NULL)
-			return -ENOTTY;
 		return put_long(arg, (bdi->ra_pages * PAGE_CACHE_SIZE) / 512);
 	case BLKROGET:
 		return put_int(arg, bdev_read_only(bdev) != 0);
@@ -386,8 +384,6 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd,
 		if(!capable(CAP_SYS_ADMIN))
 			return -EACCES;
 		bdi = blk_get_backing_dev_info(bdev);
-		if (bdi == NULL)
-			return -ENOTTY;
 		bdi->ra_pages = (arg * 512) / PAGE_CACHE_SIZE;
 		return 0;
 	case BLKBSZSET:
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 6d7274619bf9..d3251eca6429 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1173,8 +1173,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part)
 			if (!ret) {
 				bd_set_size(bdev,(loff_t)get_capacity(disk)<<9);
 				bdi = blk_get_backing_dev_info(bdev);
-				if (bdi == NULL)
-					bdi = &default_backing_dev_info;
 				bdev_inode_switch_bdi(bdev->bd_inode, bdi);
 			}
 
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d0ed9e664f7d..39ff591ae1b4 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1694,7 +1694,7 @@ static int btrfs_congested_fn(void *congested_data, int bdi_bits)
 		if (!device->bdev)
 			continue;
 		bdi = blk_get_backing_dev_info(device->bdev);
-		if (bdi && bdi_congested(bdi, bdi_bits)) {
+		if (bdi_congested(bdi, bdi_bits)) {
 			ret = 1;
 			break;
 		}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index cd7b8ca9b064..497fcde381d7 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -1678,8 +1678,6 @@ xfs_alloc_buftarg(
 	btp->bt_dev =  bdev->bd_dev;
 	btp->bt_bdev = bdev;
 	btp->bt_bdi = blk_get_backing_dev_info(bdev);
-	if (!btp->bt_bdi)
-		goto error;
 
 	if (xfs_setsize_buftarg_early(btp, bdev))
 		goto error;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 518b46555b80..e267bf0db559 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -865,7 +865,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *,
 
 static inline struct request_queue *bdev_get_queue(struct block_device *bdev)
 {
-	return bdev->bd_disk->queue;
+	return bdev->bd_disk->queue;	/* this is never NULL */
 }
 
 /*
-- 
cgit v1.2.3


From e36f1dfce0b45d347927568efe1088821758cc3c Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 08:03:57 +0900
Subject: bdi: remove unused stuff

Two flags and one bdi_writeback field are no longer used.  Remove
them.

Signed-off-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/backing-dev.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index e488e9459a93..2103a7fa3fd8 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -28,12 +28,10 @@ struct dentry;
  * Bits in backing_dev_info.state
  */
 enum bdi_state {
-	BDI_wb_alloc,		/* Default embedded wb allocated */
 	BDI_async_congested,	/* The async (write) queue is getting full */
 	BDI_sync_congested,	/* The sync queue is getting full */
 	BDI_registered,		/* bdi_register() was done */
 	BDI_writeback_running,	/* Writeback is in progress */
-	BDI_unused,		/* Available bits start here */
 };
 
 typedef int (congested_fn)(void *, int);
@@ -50,7 +48,6 @@ enum bdi_stat_item {
 
 struct bdi_writeback {
 	struct backing_dev_info *bdi;	/* our parent bdi */
-	unsigned int nr;
 
 	unsigned long last_old_flush;	/* last old data flush */
 
-- 
cgit v1.2.3


From 018a17bdc8658ad448497c84d4ba21b6985820ec Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Mon, 8 Sep 2014 08:04:01 +0900
Subject: bdi: reimplement bdev_inode_switch_bdi()

A block_device may be attached to different gendisks and thus
different bdis over time.  bdev_inode_switch_bdi() is used to switch
the associated bdi.  The function assumes that the inode could be
dirty and transfers it between bdis if so.  This is a bit nasty in
that it reaches into bdi internals.

This patch reimplements the function so that it writes out the inode
if dirty.  This is a lot simpler and can be implemented without
exposing bdi internals.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/block_dev.c              | 32 +++++++++++---------------------
 include/linux/backing-dev.h |  1 -
 mm/backing-dev.c            |  2 +-
 3 files changed, 12 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index d3251eca6429..cc8d68ac29aa 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -50,32 +50,22 @@ inline struct block_device *I_BDEV(struct inode *inode)
 EXPORT_SYMBOL(I_BDEV);
 
 /*
- * Move the inode from its current bdi to a new bdi. If the inode is dirty we
- * need to move it onto the dirty list of @dst so that the inode is always on
- * the right list.
+ * Move the inode from its current bdi to a new bdi.  Make sure the inode
+ * is clean before moving so that it doesn't linger on the old bdi.
  */
 static void bdev_inode_switch_bdi(struct inode *inode,
 			struct backing_dev_info *dst)
 {
-	struct backing_dev_info *old = inode->i_data.backing_dev_info;
-	bool wakeup_bdi = false;
-
-	if (unlikely(dst == old))		/* deadlock avoidance */
-		return;
-	bdi_lock_two(&old->wb, &dst->wb);
-	spin_lock(&inode->i_lock);
-	inode->i_data.backing_dev_info = dst;
-	if (inode->i_state & I_DIRTY) {
-		if (bdi_cap_writeback_dirty(dst) && !wb_has_dirty_io(&dst->wb))
-			wakeup_bdi = true;
-		list_move(&inode->i_wb_list, &dst->wb.b_dirty);
+	while (true) {
+		spin_lock(&inode->i_lock);
+		if (!(inode->i_state & I_DIRTY)) {
+			inode->i_data.backing_dev_info = dst;
+			spin_unlock(&inode->i_lock);
+			return;
+		}
+		spin_unlock(&inode->i_lock);
+		WARN_ON_ONCE(write_inode_now(inode, true));
 	}
-	spin_unlock(&inode->i_lock);
-	spin_unlock(&old->wb.list_lock);
-	spin_unlock(&dst->wb.list_lock);
-
-	if (wakeup_bdi)
-		bdi_wakeup_thread_delayed(dst);
 }
 
 /* Kill _all_ buffers and pagecache , dirty or not.. */
diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h
index 2103a7fa3fd8..5da6012b7a14 100644
--- a/include/linux/backing-dev.h
+++ b/include/linux/backing-dev.h
@@ -121,7 +121,6 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi);
 void bdi_writeback_workfn(struct work_struct *work);
 int bdi_has_dirty_io(struct backing_dev_info *bdi);
 void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi);
-void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2);
 
 extern spinlock_t bdi_lock;
 extern struct list_head bdi_list;
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index b65fe93ad612..7d63d5e9d3de 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -40,7 +40,7 @@ LIST_HEAD(bdi_list);
 /* bdi_wq serves all asynchronous writeback tasks */
 struct workqueue_struct *bdi_wq;
 
-void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
+static void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2)
 {
 	if (wb1 < wb2) {
 		spin_lock(&wb1->list_lock);
-- 
cgit v1.2.3


From e676253b19b2d269cccf67fdb1592120a0cd0676 Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Tue, 5 Aug 2014 11:45:59 +0200
Subject: serial/8250: Add support for RS485 IOCTLs

This patch allow the users of the 8250 infrastructure to define a
handler for RS485 configration.

If no handler is defined the 8250 driver will work as usual.

Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Acked-by: Alan Cox <alan@linux.intel.com>
--
v2:Change suggested by Alan "One Thousand Gnomes":
- Move rs485 structure further down on the uart_8250_port structure

 drivers/tty/serial/8250/8250_core.c | 39 +++++++++++++++++++++++++++++++++++++
 include/linux/serial_8250.h         |  3 +++
 2 files changed, 42 insertions(+)
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 39 +++++++++++++++++++++++++++++++++++++
 include/linux/serial_8250.h         |  3 +++
 2 files changed, 42 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 1d42dba6121d..b28ed1beb50f 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -2843,6 +2843,42 @@ serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
 	return 0;
 }
 
+static int serial8250_ioctl(struct uart_port *port, unsigned int cmd,
+			   unsigned long arg)
+{
+	struct uart_8250_port *up =
+		container_of(port, struct uart_8250_port, port);
+	int ret;
+	struct serial_rs485 rs485_config;
+
+	if (!up->rs485_config)
+		return -ENOIOCTLCMD;
+
+	switch (cmd) {
+	case TIOCSRS485:
+		if (copy_from_user(&rs485_config, (void __user *)arg,
+				   sizeof(rs485_config)))
+			return -EFAULT;
+
+		ret = up->rs485_config(up, &rs485_config);
+		if (ret)
+			return ret;
+
+		memcpy(&up->rs485, &rs485_config, sizeof(rs485_config));
+
+		return 0;
+	case TIOCGRS485:
+		if (copy_to_user((void __user *)arg, &up->rs485,
+				 sizeof(up->rs485)))
+			return -EFAULT;
+		return 0;
+	default:
+		break;
+	}
+
+	return -ENOIOCTLCMD;
+}
+
 static const char *
 serial8250_type(struct uart_port *port)
 {
@@ -2872,6 +2908,7 @@ static struct uart_ops serial8250_pops = {
 	.request_port	= serial8250_request_port,
 	.config_port	= serial8250_config_port,
 	.verify_port	= serial8250_verify_port,
+	.ioctl		= serial8250_ioctl,
 #ifdef CONFIG_CONSOLE_POLL
 	.poll_get_char = serial8250_get_poll_char,
 	.poll_put_char = serial8250_put_poll_char,
@@ -3388,6 +3425,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		uart->port.fifosize	= up->port.fifosize;
 		uart->tx_loadsz		= up->tx_loadsz;
 		uart->capabilities	= up->capabilities;
+		uart->rs485_config	= up->rs485_config;
+		uart->rs485		= up->rs485;
 
 		/* Take tx_loadsz from fifosize if it wasn't set separately */
 		if (uart->port.fifosize && !uart->tx_loadsz)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index f93649e22c43..6dd671765312 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -96,10 +96,13 @@ struct uart_8250_port {
 	unsigned char		msr_saved_flags;
 
 	struct uart_8250_dma	*dma;
+	struct serial_rs485     rs485;
 
 	/* 8250 specific callbacks */
 	int			(*dl_read)(struct uart_8250_port *);
 	void			(*dl_write)(struct uart_8250_port *, int);
+	int			(*rs485_config)(struct uart_8250_port *,
+						struct serial_rs485 *rs485);
 };
 
 static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up)
-- 
cgit v1.2.3


From 55e15c949fd05d247a889df0ed0177a676fec665 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:17 +0200
Subject: PM / domains: Remove the pm_genpd_add|remove_callbacks APIs

There are no users of these APIs. To simplify the generic power domain
let's remove them.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 106 --------------------------------------------
 include/linux/pm_domain.h   |  19 --------
 2 files changed, 125 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index eee55c1e5fde..e613e3cb96d8 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1743,112 +1743,6 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 	return ret;
 }
 
-/**
- * pm_genpd_add_callbacks - Add PM domain callbacks to a given device.
- * @dev: Device to add the callbacks to.
- * @ops: Set of callbacks to add.
- * @td: Timing data to add to the device along with the callbacks (optional).
- *
- * Every call to this routine should be balanced with a call to
- * __pm_genpd_remove_callbacks() and they must not be nested.
- */
-int pm_genpd_add_callbacks(struct device *dev, struct gpd_dev_ops *ops,
-			   struct gpd_timing_data *td)
-{
-	struct generic_pm_domain_data *gpd_data_new, *gpd_data = NULL;
-	int ret = 0;
-
-	if (!(dev && ops))
-		return -EINVAL;
-
-	gpd_data_new = __pm_genpd_alloc_dev_data(dev);
-	if (!gpd_data_new)
-		return -ENOMEM;
-
-	pm_runtime_disable(dev);
-	device_pm_lock();
-
-	ret = dev_pm_get_subsys_data(dev);
-	if (ret)
-		goto out;
-
-	spin_lock_irq(&dev->power.lock);
-
-	if (dev->power.subsys_data->domain_data) {
-		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-	} else {
-		gpd_data = gpd_data_new;
-		dev->power.subsys_data->domain_data = &gpd_data->base;
-	}
-	gpd_data->refcount++;
-	gpd_data->ops = *ops;
-	if (td)
-		gpd_data->td = *td;
-
-	spin_unlock_irq(&dev->power.lock);
-
- out:
-	device_pm_unlock();
-	pm_runtime_enable(dev);
-
-	if (gpd_data != gpd_data_new)
-		__pm_genpd_free_dev_data(dev, gpd_data_new);
-
-	return ret;
-}
-EXPORT_SYMBOL_GPL(pm_genpd_add_callbacks);
-
-/**
- * __pm_genpd_remove_callbacks - Remove PM domain callbacks from a given device.
- * @dev: Device to remove the callbacks from.
- * @clear_td: If set, clear the device's timing data too.
- *
- * This routine can only be called after pm_genpd_add_callbacks().
- */
-int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
-{
-	struct generic_pm_domain_data *gpd_data = NULL;
-	bool remove = false;
-	int ret = 0;
-
-	if (!(dev && dev->power.subsys_data))
-		return -EINVAL;
-
-	pm_runtime_disable(dev);
-	device_pm_lock();
-
-	spin_lock_irq(&dev->power.lock);
-
-	if (dev->power.subsys_data->domain_data) {
-		gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
-		gpd_data->ops = (struct gpd_dev_ops){ NULL };
-		if (clear_td)
-			gpd_data->td = (struct gpd_timing_data){ 0 };
-
-		if (--gpd_data->refcount == 0) {
-			dev->power.subsys_data->domain_data = NULL;
-			remove = true;
-		}
-	} else {
-		ret = -EINVAL;
-	}
-
-	spin_unlock_irq(&dev->power.lock);
-
-	device_pm_unlock();
-	pm_runtime_enable(dev);
-
-	if (ret)
-		return ret;
-
-	dev_pm_put_subsys_data(dev);
-	if (remove)
-		__pm_genpd_free_dev_data(dev, gpd_data);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(__pm_genpd_remove_callbacks);
-
 /**
  * pm_genpd_attach_cpuidle - Connect the given PM domain with cpuidle.
  * @genpd: PM domain to be connected with cpuidle.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index ebc4c76ffb73..9ae2b9e88d61 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -151,10 +151,6 @@ extern int pm_genpd_add_subdomain_names(const char *master_name,
 					const char *subdomain_name);
 extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 				     struct generic_pm_domain *target);
-extern int pm_genpd_add_callbacks(struct device *dev,
-				  struct gpd_dev_ops *ops,
-				  struct gpd_timing_data *td);
-extern int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td);
 extern int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state);
 extern int pm_genpd_name_attach_cpuidle(const char *name, int state);
 extern int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd);
@@ -217,16 +213,6 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline int pm_genpd_add_callbacks(struct device *dev,
-					 struct gpd_dev_ops *ops,
-					 struct gpd_timing_data *td)
-{
-	return -ENOSYS;
-}
-static inline int __pm_genpd_remove_callbacks(struct device *dev, bool clear_td)
-{
-	return -ENOSYS;
-}
 static inline int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int st)
 {
 	return -ENOSYS;
@@ -281,11 +267,6 @@ static inline int pm_genpd_name_add_device(const char *domain_name,
 	return __pm_genpd_name_add_device(domain_name, dev, NULL);
 }
 
-static inline int pm_genpd_remove_callbacks(struct device *dev)
-{
-	return __pm_genpd_remove_callbacks(dev, true);
-}
-
 #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME
 extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
 extern void pm_genpd_poweroff_unused(void);
-- 
cgit v1.2.3


From 67da6d4bf43c4208433ef8f3ee487401b4dc9c74 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:18 +0200
Subject: PM / domains: Ignore callbacks for subsys generic_pm_domain_data

In a step of simplifying the generic power domain let's move away from
using these callbacks.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 131 +++-----------------------------------------
 include/linux/pm_domain.h   |   1 -
 2 files changed, 8 insertions(+), 124 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e613e3cb96d8..aa5b14c1e643 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -25,10 +25,6 @@
 	__routine = genpd->dev_ops.callback; 			\
 	if (__routine) {					\
 		__ret = __routine(dev); 			\
-	} else {						\
-		__routine = dev_gpd_data(dev)->ops.callback;	\
-		if (__routine) 					\
-			__ret = __routine(dev);			\
 	}							\
 	__ret;							\
 })
@@ -1871,10 +1867,6 @@ static int pm_genpd_default_save_state(struct device *dev)
 {
 	int (*cb)(struct device *__dev);
 
-	cb = dev_gpd_data(dev)->ops.save_state;
-	if (cb)
-		return cb(dev);
-
 	if (dev->type && dev->type->pm)
 		cb = dev->type->pm->runtime_suspend;
 	else if (dev->class && dev->class->pm)
@@ -1898,10 +1890,6 @@ static int pm_genpd_default_restore_state(struct device *dev)
 {
 	int (*cb)(struct device *__dev);
 
-	cb = dev_gpd_data(dev)->ops.restore_state;
-	if (cb)
-		return cb(dev);
-
 	if (dev->type && dev->type->pm)
 		cb = dev->type->pm->runtime_resume;
 	else if (dev->class && dev->class->pm)
@@ -1917,109 +1905,6 @@ static int pm_genpd_default_restore_state(struct device *dev)
 	return cb ? cb(dev) : 0;
 }
 
-#ifdef CONFIG_PM_SLEEP
-
-/**
- * pm_genpd_default_suspend - Default "device suspend" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_suspend(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend;
-
-	return cb ? cb(dev) : pm_generic_suspend(dev);
-}
-
-/**
- * pm_genpd_default_suspend_late - Default "late device suspend" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_suspend_late(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.suspend_late;
-
-	return cb ? cb(dev) : pm_generic_suspend_late(dev);
-}
-
-/**
- * pm_genpd_default_resume_early - Default "early device resume" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_resume_early(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume_early;
-
-	return cb ? cb(dev) : pm_generic_resume_early(dev);
-}
-
-/**
- * pm_genpd_default_resume - Default "device resume" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_resume(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.resume;
-
-	return cb ? cb(dev) : pm_generic_resume(dev);
-}
-
-/**
- * pm_genpd_default_freeze - Default "device freeze" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_freeze(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze;
-
-	return cb ? cb(dev) : pm_generic_freeze(dev);
-}
-
-/**
- * pm_genpd_default_freeze_late - Default "late device freeze" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_freeze_late(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.freeze_late;
-
-	return cb ? cb(dev) : pm_generic_freeze_late(dev);
-}
-
-/**
- * pm_genpd_default_thaw_early - Default "early device thaw" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_thaw_early(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw_early;
-
-	return cb ? cb(dev) : pm_generic_thaw_early(dev);
-}
-
-/**
- * pm_genpd_default_thaw - Default "device thaw" for PM domians.
- * @dev: Device to handle.
- */
-static int pm_genpd_default_thaw(struct device *dev)
-{
-	int (*cb)(struct device *__dev) = dev_gpd_data(dev)->ops.thaw;
-
-	return cb ? cb(dev) : pm_generic_thaw(dev);
-}
-
-#else /* !CONFIG_PM_SLEEP */
-
-#define pm_genpd_default_suspend	NULL
-#define pm_genpd_default_suspend_late	NULL
-#define pm_genpd_default_resume_early	NULL
-#define pm_genpd_default_resume		NULL
-#define pm_genpd_default_freeze		NULL
-#define pm_genpd_default_freeze_late	NULL
-#define pm_genpd_default_thaw_early	NULL
-#define pm_genpd_default_thaw		NULL
-
-#endif /* !CONFIG_PM_SLEEP */
-
 /**
  * pm_genpd_init - Initialize a generic I/O PM domain object.
  * @genpd: PM domain object to initialize.
@@ -2071,14 +1956,14 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.complete = pm_genpd_complete;
 	genpd->dev_ops.save_state = pm_genpd_default_save_state;
 	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
-	genpd->dev_ops.suspend = pm_genpd_default_suspend;
-	genpd->dev_ops.suspend_late = pm_genpd_default_suspend_late;
-	genpd->dev_ops.resume_early = pm_genpd_default_resume_early;
-	genpd->dev_ops.resume = pm_genpd_default_resume;
-	genpd->dev_ops.freeze = pm_genpd_default_freeze;
-	genpd->dev_ops.freeze_late = pm_genpd_default_freeze_late;
-	genpd->dev_ops.thaw_early = pm_genpd_default_thaw_early;
-	genpd->dev_ops.thaw = pm_genpd_default_thaw;
+	genpd->dev_ops.suspend = pm_generic_suspend;
+	genpd->dev_ops.suspend_late = pm_generic_suspend_late;
+	genpd->dev_ops.resume_early = pm_generic_resume_early;
+	genpd->dev_ops.resume = pm_generic_resume;
+	genpd->dev_ops.freeze = pm_generic_freeze;
+	genpd->dev_ops.freeze_late = pm_generic_freeze_late;
+	genpd->dev_ops.thaw_early = pm_generic_thaw_early;
+	genpd->dev_ops.thaw = pm_generic_thaw;
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 9ae2b9e88d61..436ea149d40c 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -108,7 +108,6 @@ struct gpd_timing_data {
 
 struct generic_pm_domain_data {
 	struct pm_domain_data base;
-	struct gpd_dev_ops ops;
 	struct gpd_timing_data td;
 	struct notifier_block nb;
 	struct mutex lock;
-- 
cgit v1.2.3


From 1e0407ca54d28db8e5f02e437ff21cc6416c0be8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:19 +0200
Subject: PM / domains: Remove system PM callbacks from gpd_dev_ops

There no users of these callbacks, let's simplify the generic power
domain by removing them.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 64 ++++++---------------------------------------
 include/linux/pm_domain.h   |  8 ------
 2 files changed, 8 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index aa5b14c1e643..e777609ccc77 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -770,46 +770,6 @@ static bool genpd_dev_active_wakeup(struct generic_pm_domain *genpd,
 	return GENPD_DEV_CALLBACK(genpd, bool, active_wakeup, dev);
 }
 
-static int genpd_suspend_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, suspend, dev);
-}
-
-static int genpd_suspend_late(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, suspend_late, dev);
-}
-
-static int genpd_resume_early(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, resume_early, dev);
-}
-
-static int genpd_resume_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, resume, dev);
-}
-
-static int genpd_freeze_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, freeze, dev);
-}
-
-static int genpd_freeze_late(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, freeze_late, dev);
-}
-
-static int genpd_thaw_early(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, thaw_early, dev);
-}
-
-static int genpd_thaw_dev(struct generic_pm_domain *genpd, struct device *dev)
-{
-	return GENPD_DEV_CALLBACK(genpd, int, thaw, dev);
-}
-
 /**
  * pm_genpd_sync_poweroff - Synchronously power off a PM domain and its masters.
  * @genpd: PM domain to power off, if possible.
@@ -991,7 +951,7 @@ static int pm_genpd_suspend(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_suspend_dev(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_suspend(dev);
 }
 
 /**
@@ -1012,7 +972,7 @@ static int pm_genpd_suspend_late(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_suspend_late(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_suspend_late(dev);
 }
 
 /**
@@ -1099,7 +1059,7 @@ static int pm_genpd_resume_early(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_resume_early(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_resume_early(dev);
 }
 
 /**
@@ -1120,7 +1080,7 @@ static int pm_genpd_resume(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_resume_dev(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_resume(dev);
 }
 
 /**
@@ -1141,7 +1101,7 @@ static int pm_genpd_freeze(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_freeze_dev(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_freeze(dev);
 }
 
 /**
@@ -1163,7 +1123,7 @@ static int pm_genpd_freeze_late(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_freeze_late(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_freeze_late(dev);
 }
 
 /**
@@ -1227,7 +1187,7 @@ static int pm_genpd_thaw_early(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_thaw_early(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_thaw_early(dev);
 }
 
 /**
@@ -1248,7 +1208,7 @@ static int pm_genpd_thaw(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	return genpd->suspend_power_off ? 0 : genpd_thaw_dev(genpd, dev);
+	return genpd->suspend_power_off ? 0 : pm_generic_thaw(dev);
 }
 
 /**
@@ -1956,14 +1916,6 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.complete = pm_genpd_complete;
 	genpd->dev_ops.save_state = pm_genpd_default_save_state;
 	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
-	genpd->dev_ops.suspend = pm_generic_suspend;
-	genpd->dev_ops.suspend_late = pm_generic_suspend_late;
-	genpd->dev_ops.resume_early = pm_generic_resume_early;
-	genpd->dev_ops.resume = pm_generic_resume;
-	genpd->dev_ops.freeze = pm_generic_freeze;
-	genpd->dev_ops.freeze_late = pm_generic_freeze_late;
-	genpd->dev_ops.thaw_early = pm_generic_thaw_early;
-	genpd->dev_ops.thaw = pm_generic_thaw;
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 436ea149d40c..b6343d4b9b04 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -35,14 +35,6 @@ struct gpd_dev_ops {
 	int (*stop)(struct device *dev);
 	int (*save_state)(struct device *dev);
 	int (*restore_state)(struct device *dev);
-	int (*suspend)(struct device *dev);
-	int (*suspend_late)(struct device *dev);
-	int (*resume_early)(struct device *dev);
-	int (*resume)(struct device *dev);
-	int (*freeze)(struct device *dev);
-	int (*freeze_late)(struct device *dev);
-	int (*thaw_early)(struct device *dev);
-	int (*thaw)(struct device *dev);
 	bool (*active_wakeup)(struct device *dev);
 };
 
-- 
cgit v1.2.3


From c5d79ec2a5715489cff16a0d1cf4fa9108a5509e Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:22 +0200
Subject: PM / domains: Remove dev_irq_safe from genpd config

The genpd dev_irq_safe configuration somewhat overlaps with the runtime
PM pm_runtime_irq_safe() option. Also, currently genpd don't have a
good way to deal with these device. So, until we figured out if and how
to support this in genpd, let's remove the option to configure it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 4 ----
 include/linux/pm_domain.h   | 1 -
 2 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e777609ccc77..4298a2bcd228 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -615,8 +615,6 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	might_sleep_if(!genpd->dev_irq_safe);
-
 	stop_ok = genpd->gov ? genpd->gov->stop_ok : NULL;
 	if (stop_ok && !stop_ok(dev))
 		return -EBUSY;
@@ -661,8 +659,6 @@ static int pm_genpd_runtime_resume(struct device *dev)
 	if (IS_ERR(genpd))
 		return -EINVAL;
 
-	might_sleep_if(!genpd->dev_irq_safe);
-
 	/* If power.irq_safe, the PM domain is never powered off. */
 	if (dev->power.irq_safe)
 		return genpd_start_dev_no_timing(genpd, dev);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b6343d4b9b04..360c94ceeebb 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -63,7 +63,6 @@ struct generic_pm_domain {
 	unsigned int suspended_count;	/* System suspend device counter */
 	unsigned int prepared_count;	/* Suspend counter of prepared devices */
 	bool suspend_power_off;	/* Power status before system suspend */
-	bool dev_irq_safe;	/* Device callbacks are IRQ-safe */
 	int (*power_off)(struct generic_pm_domain *domain);
 	s64 power_off_latency_ns;
 	int (*power_on)(struct generic_pm_domain *domain);
-- 
cgit v1.2.3


From d47e6464ae6c96735d4706f5cb0537fe717b6b00 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:24 +0200
Subject: PM / domains: Remove pm_genpd_syscore_switch() API

The pm_genpd_syscore_poweroff() API and pm_genpd_syscore_poweron() API
makes the pm_genpd_syscore_switch() API redundant.

Moreover, since there are no active users, let's just remove it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 17 ++++++++++++++---
 include/linux/pm_domain.h   | 16 ++++------------
 2 files changed, 18 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b910d0f6ff60..601e35b2fa71 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1292,13 +1292,13 @@ static void pm_genpd_complete(struct device *dev)
 }
 
 /**
- * pm_genpd_syscore_switch - Switch power during system core suspend or resume.
+ * genpd_syscore_switch - Switch power during system core suspend or resume.
  * @dev: Device that normally is marked as "always on" to switch power for.
  *
  * This routine may only be called during the system core (syscore) suspend or
  * resume phase for devices whose "always on" flags are set.
  */
-void pm_genpd_syscore_switch(struct device *dev, bool suspend)
+static void genpd_syscore_switch(struct device *dev, bool suspend)
 {
 	struct generic_pm_domain *genpd;
 
@@ -1314,7 +1314,18 @@ void pm_genpd_syscore_switch(struct device *dev, bool suspend)
 		genpd->suspended_count--;
 	}
 }
-EXPORT_SYMBOL_GPL(pm_genpd_syscore_switch);
+
+void pm_genpd_syscore_poweroff(struct device *dev)
+{
+	genpd_syscore_switch(dev, true);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweroff);
+
+void pm_genpd_syscore_poweron(struct device *dev)
+{
+	genpd_syscore_switch(dev, false);
+}
+EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
 
 #else
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 360c94ceeebb..3997af691600 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -266,19 +266,11 @@ static inline void pm_genpd_poweroff_unused(void) {}
 #endif
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP
-extern void pm_genpd_syscore_switch(struct device *dev, bool suspend);
+extern void pm_genpd_syscore_poweroff(struct device *dev);
+extern void pm_genpd_syscore_poweron(struct device *dev);
 #else
-static inline void pm_genpd_syscore_switch(struct device *dev, bool suspend) {}
+static inline void pm_genpd_syscore_poweroff(struct device *dev) {}
+static inline void pm_genpd_syscore_poweron(struct device *dev) {}
 #endif
 
-static inline void pm_genpd_syscore_poweroff(struct device *dev)
-{
-	pm_genpd_syscore_switch(dev, true);
-}
-
-static inline void pm_genpd_syscore_poweron(struct device *dev)
-{
-	pm_genpd_syscore_switch(dev, false);
-}
-
 #endif /* _LINUX_PM_DOMAIN_H */
-- 
cgit v1.2.3


From d971f0b0eaaf3f2086bf21bbd64f7ea7e2f28459 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:25 +0200
Subject: PM / domains: Remove genpd_queue_power_off_work() API

There are no active users of this API. Let's remove it and if future
needs shows up we could consider to have a get/put API instead.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 5 ++++-
 include/linux/pm_domain.h   | 2 --
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 601e35b2fa71..d71d0458d41f 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -422,7 +422,7 @@ static bool genpd_abort_poweroff(struct generic_pm_domain *genpd)
  * Queue up the execution of pm_genpd_poweroff() unless it's already been done
  * before.
  */
-void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
+static void genpd_queue_power_off_work(struct generic_pm_domain *genpd)
 {
 	queue_work(pm_wq, &genpd->power_off_work);
 }
@@ -729,6 +729,9 @@ static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
 	return NOTIFY_DONE;
 }
 
+static inline void
+genpd_queue_power_off_work(struct generic_pm_domain *genpd) {}
+
 static inline void genpd_power_off_work_fn(struct work_struct *work) {}
 
 #define pm_genpd_runtime_suspend	NULL
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 3997af691600..c19c90b839b8 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -258,10 +258,8 @@ static inline int pm_genpd_name_add_device(const char *domain_name,
 }
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME
-extern void genpd_queue_power_off_work(struct generic_pm_domain *genpd);
 extern void pm_genpd_poweroff_unused(void);
 #else
-static inline void genpd_queue_power_off_work(struct generic_pm_domain *gpd) {}
 static inline void pm_genpd_poweroff_unused(void) {}
 #endif
 
-- 
cgit v1.2.3


From 0f574d4c3a7a325cbbef28ee738dedca9851e957 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:30 +0200
Subject: PM / domains: Remove default_stop_ok() API

There are currently no need to export default_stop_ok() as an API,
instead let's keep it local to the PM domain governor.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain_governor.c | 7 ++-----
 include/linux/pm_domain.h            | 6 ------
 2 files changed, 2 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index a089e3bcdfbc..d88a62e104d4 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -42,7 +42,7 @@ static int dev_update_qos_constraint(struct device *dev, void *data)
  * default_stop_ok - Default PM domain governor routine for stopping devices.
  * @dev: Device to check.
  */
-bool default_stop_ok(struct device *dev)
+static bool default_stop_ok(struct device *dev)
 {
 	struct gpd_timing_data *td = &dev_gpd_data(dev)->td;
 	unsigned long flags;
@@ -229,10 +229,7 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain)
 
 #else /* !CONFIG_PM_RUNTIME */
 
-bool default_stop_ok(struct device *dev)
-{
-	return false;
-}
+static inline bool default_stop_ok(struct device *dev) { return false; }
 
 #define default_power_down_ok	NULL
 #define always_on_power_down_ok	NULL
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index c19c90b839b8..3b59f296e6ec 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -151,8 +151,6 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd,
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
 extern int pm_genpd_name_poweron(const char *domain_name);
 
-extern bool default_stop_ok(struct device *dev);
-
 extern struct dev_power_governor pm_domain_always_on_gov;
 #else
 
@@ -231,10 +229,6 @@ static inline int pm_genpd_name_poweron(const char *domain_name)
 {
 	return -ENOSYS;
 }
-static inline bool default_stop_ok(struct device *dev)
-{
-	return false;
-}
 #define simple_qos_governor NULL
 #define pm_domain_always_on_gov NULL
 #endif
-- 
cgit v1.2.3


From ae3c511c2d72161b11e93866203b59a3a37dfac7 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Sep 2014 12:52:31 +0200
Subject: PM / domains: Keep declaration of dev_power_governors together

This is a pure code cleanup in the header file for the PM domain. No
functional change.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_domain.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 3b59f296e6ec..aa03586c94a2 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -117,8 +117,6 @@ static inline struct generic_pm_domain_data *dev_gpd_data(struct device *dev)
 	return to_gpd_data(dev->power.subsys_data->domain_data);
 }
 
-extern struct dev_power_governor simple_qos_governor;
-
 extern struct generic_pm_domain *dev_to_genpd(struct device *dev);
 extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 				 struct device *dev,
@@ -151,6 +149,7 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd,
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
 extern int pm_genpd_name_poweron(const char *domain_name);
 
+extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
 #else
 
-- 
cgit v1.2.3


From 8a949b07e4062cbd07e04e6a47249e69ca65b944 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Tue, 2 Sep 2014 17:39:19 -0400
Subject: serial: core: Document lock requirement for UPF_* flags updates

The flags field of struct uart_port can only be safely modified
if the port mutex is held; no other lock prevents concurrent
changes from corrupting the field.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index cf3a1e789bf5..8cb267b1fcd5 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -152,6 +152,7 @@ struct uart_port {
 	unsigned long		sysrq;			/* sysrq timeout */
 #endif
 
+	/* flags must be updated while holding port mutex */
 	upf_t			flags;
 
 #define UPF_FOURPORT		((__force upf_t) (1 << 1))
-- 
cgit v1.2.3


From b99b121b2aa42e60e5b73fdd3a49863337839c7b Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Fri, 5 Sep 2014 21:02:37 +0200
Subject: tty: serial: 8250_core: allow to overwrite & export
 serial8250_startup()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The OMAP version of the 8250 can actually use 1:1 serial8250_startup().
However it needs to be extended by a wake up irq which should to be
requested & enabled at ->startup() time and disabled at ->shutdown() time.

v2…v3: properly copy callbacks
v1…v2: add shutdown callback

Acked-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 25 +++++++++++++++++++++++--
 include/linux/serial_8250.h         |  2 ++
 include/linux/serial_core.h         |  2 ++
 3 files changed, 27 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 90e0d5e2b7ee..872c020607a8 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1930,7 +1930,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
 
 #endif /* CONFIG_CONSOLE_POLL */
 
-static int serial8250_startup(struct uart_port *port)
+int serial8250_do_startup(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
@@ -2181,8 +2181,16 @@ dont_test_tx_en:
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(serial8250_do_startup);
 
-static void serial8250_shutdown(struct uart_port *port)
+static int serial8250_startup(struct uart_port *port)
+{
+	if (port->startup)
+		return port->startup(port);
+	return serial8250_do_startup(port);
+}
+
+void serial8250_do_shutdown(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
@@ -2232,6 +2240,15 @@ static void serial8250_shutdown(struct uart_port *port)
 	if (port->irq)
 		serial_unlink_irq_chain(up);
 }
+EXPORT_SYMBOL_GPL(serial8250_do_shutdown);
+
+static void serial8250_shutdown(struct uart_port *port)
+{
+	if (port->shutdown)
+		port->shutdown(port);
+	else
+		serial8250_do_shutdown(port);
+}
 
 static unsigned int serial8250_get_divisor(struct uart_port *port, unsigned int baud)
 {
@@ -3466,6 +3483,10 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		/*  Possibly override set_termios call */
 		if (up->port.set_termios)
 			uart->port.set_termios = up->port.set_termios;
+		if (up->port.startup)
+			uart->port.startup = up->port.startup;
+		if (up->port.shutdown)
+			uart->port.shutdown = up->port.shutdown;
 		if (up->port.pm)
 			uart->port.pm = up->port.pm;
 		if (up->port.handle_break)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 6dd671765312..6fc9d7bee05e 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -124,6 +124,8 @@ extern void serial8250_early_out(struct uart_port *port, int offset, int value);
 extern int setup_early_serial8250_console(char *cmdline);
 extern void serial8250_do_set_termios(struct uart_port *port,
 		struct ktermios *termios, struct ktermios *old);
+extern int serial8250_do_startup(struct uart_port *port);
+extern void serial8250_do_shutdown(struct uart_port *port);
 extern void serial8250_do_pm(struct uart_port *port, unsigned int state,
 			     unsigned int oldstate);
 extern int fsl8250_handle_irq(struct uart_port *port);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 8cb267b1fcd5..3bd7d55eebce 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -122,6 +122,8 @@ struct uart_port {
 	void			(*set_termios)(struct uart_port *,
 				               struct ktermios *new,
 				               struct ktermios *old);
+	int			(*startup)(struct uart_port *port);
+	void			(*shutdown)(struct uart_port *port);
 	int			(*handle_irq)(struct uart_port *);
 	void			(*pm)(struct uart_port *, unsigned int state,
 				      unsigned int old);
-- 
cgit v1.2.3


From 413fffc3a1db7f270afdf1ecb35c1edc013acc68 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 28 Aug 2014 11:22:23 +0530
Subject: cpufreq: Add support for per-policy driver data

Drivers supporting multiple clusters or multiple 'struct cpufreq_policy'
instances may need to keep per-policy data. If the core doesn't provide support
for that, they might do it in the most unoptimized way: 'per-cpu' data.

This patch adds another field in struct cpufreq_policy: 'driver_data'. It isn't
accessed by core and is for driver's internal use only.

Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 7d1955afa62c..138336b6bb04 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -112,6 +112,9 @@ struct cpufreq_policy {
 	spinlock_t		transition_lock;
 	wait_queue_head_t	transition_wait;
 	struct task_struct	*transition_task; /* Task which is doing the transition */
+
+	/* For cpufreq driver's internal use */
+	void			*driver_data;
 };
 
 /* Only for ACPI */
-- 
cgit v1.2.3


From da3dae54e4ff09886b9a19224c8d9556bb2ba096 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Tue, 9 Sep 2014 01:27:23 +0900
Subject: Documentation: Docbook: Fix generated DocBook/kernel-api.xml

This patch fix spelling typo found in DocBook/kernel-api.xml.
It is because the file is generated from the source comments,
I have to fix the comments in source codes.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 block/blk-core.c      | 8 ++++----
 block/genhd.c         | 2 +-
 include/linux/clk.h   | 2 +-
 include/linux/kfifo.h | 2 +-
 ipc/util.c            | 6 +++---
 kernel/auditsc.c      | 2 +-
 lib/bitmap.c          | 4 ++--
 lib/idr.c             | 2 +-
 lib/vsprintf.c        | 2 +-
 mm/filemap.c          | 2 +-
 security/inode.c      | 2 +-
 11 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 6f8dba161bfe..ce5a389cea94 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -240,7 +240,7 @@ EXPORT_SYMBOL(blk_stop_queue);
  *     this function.
  *
  *     This function does not cancel any asynchronous activity arising
- *     out of elevator or throttling code. That would require elevaotor_exit()
+ *     out of elevator or throttling code. That would require elevator_exit()
  *     and blkcg_exit_queue() to be called with queue lock initialized.
  *
  */
@@ -930,7 +930,7 @@ static struct io_context *rq_ioc(struct bio *bio)
  * Get a free request from @q.  This function may fail under memory
  * pressure or if @q is dead.
  *
- * Must be callled with @q->queue_lock held and,
+ * Must be called with @q->queue_lock held and,
  * Returns %NULL on failure, with @q->queue_lock held.
  * Returns !%NULL on success, with @q->queue_lock *not held*.
  */
@@ -1107,7 +1107,7 @@ rq_starved:
  * Get a free request from @q.  If %__GFP_WAIT is set in @gfp_mask, this
  * function keeps retrying under memory pressure and fails iff @q is dead.
  *
- * Must be callled with @q->queue_lock held and,
+ * Must be called with @q->queue_lock held and,
  * Returns %NULL on failure, with @q->queue_lock held.
  * Returns !%NULL on success, with @q->queue_lock *not held*.
  */
@@ -1238,7 +1238,7 @@ struct request *blk_make_request(struct request_queue *q, struct bio *bio,
 EXPORT_SYMBOL(blk_make_request);
 
 /**
- * blk_rq_set_block_pc - initialize a requeest to type BLOCK_PC
+ * blk_rq_set_block_pc - initialize a request to type BLOCK_PC
  * @rq:		request to be initialized
  *
  */
diff --git a/block/genhd.c b/block/genhd.c
index 791f41943132..048e2cee43d2 100644
--- a/block/genhd.c
+++ b/block/genhd.c
@@ -1543,7 +1543,7 @@ void disk_flush_events(struct gendisk *disk, unsigned int mask)
 /**
  * disk_clear_events - synchronously check, clear and return pending events
  * @disk: disk to fetch and clear events from
- * @mask: mask of events to be fetched and clearted
+ * @mask: mask of events to be fetched and cleared
  *
  * Disk events are synchronously checked and pending events in @mask
  * are cleared and returned.  This ignores the block count.
diff --git a/include/linux/clk.h b/include/linux/clk.h
index fb5e097d8f72..afb44bfaf8d1 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -238,7 +238,7 @@ void clk_put(struct clk *clk);
 
 /**
  * devm_clk_put	- "free" a managed clock source
- * @dev: device used to acuqire the clock
+ * @dev: device used to acquire the clock
  * @clk: clock source acquired with devm_clk_get()
  *
  * Note: drivers must ensure that all clk_enable calls made on this
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index 554fde3a3927..473b43678ad1 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -722,7 +722,7 @@ __kfifo_uint_must_check_helper( \
 /**
  * kfifo_dma_out_finish - finish a DMA OUT operation
  * @fifo: address of the fifo to be used
- * @len: number of bytes transferrd
+ * @len: number of bytes transferred
  *
  * This macro finish a DMA OUT operation. The out counter will be updated by
  * the len parameter. No error checking will be done.
diff --git a/ipc/util.c b/ipc/util.c
index 27d74e69fd57..d73b7af581e2 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -309,7 +309,7 @@ int ipc_addid(struct ipc_ids *ids, struct kern_ipc_perm *new, int size)
 /**
  * ipcget_new -	create a new ipc object
  * @ns: ipc namespace
- * @ids: ipc identifer set
+ * @ids: ipc identifier set
  * @ops: the actual creation routine to call
  * @params: its parameters
  *
@@ -363,7 +363,7 @@ static int ipc_check_perms(struct ipc_namespace *ns,
 /**
  * ipcget_public - get an ipc object or create a new one
  * @ns: ipc namespace
- * @ids: ipc identifer set
+ * @ids: ipc identifier set
  * @ops: the actual creation routine to call
  * @params: its parameters
  *
@@ -669,7 +669,7 @@ out:
 
 /**
  * ipcget - Common sys_*get() code
- * @ns: namsepace
+ * @ns: namespace
  * @ids: ipc identifier set
  * @ops: operations to be called on ipc object creation, permission checks
  *       and further checks
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 21eae3c05ec0..7208c1df248d 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2406,7 +2406,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
  * @new: the new credentials
  * @old: the old (current) credentials
  *
- * Record the aguments userspace sent to sys_capset for later printing by the
+ * Record the arguments userspace sent to sys_capset for later printing by the
  * audit system if applicable
  */
 void __audit_log_capset(const struct cred *new, const struct cred *old)
diff --git a/lib/bitmap.c b/lib/bitmap.c
index 06f7e4fe8d2d..ed0f00c3da0a 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -882,7 +882,7 @@ EXPORT_SYMBOL(bitmap_bitremap);
  * read it, you're overqualified for your current job.)
  *
  * In other words, @orig is mapped onto (surjectively) @dst,
- * using the the map { <n, m> | the n-th bit of @relmap is the
+ * using the map { <n, m> | the n-th bit of @relmap is the
  * m-th set bit of @relmap }.
  *
  * Any set bits in @orig above bit number W, where W is the
@@ -930,7 +930,7 @@ EXPORT_SYMBOL(bitmap_bitremap);
  *
  *  Further lets say we use the following code, invoking
  *  bitmap_fold() then bitmap_onto, as suggested above to
- *  avoid the possitility of an empty @dst result:
+ *  avoid the possibility of an empty @dst result:
  *
  *	unsigned long *tmp;	// a temporary bitmap's bits
  *
diff --git a/lib/idr.c b/lib/idr.c
index 39158abebad1..460abfd1450b 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -625,7 +625,7 @@ static void __idr_remove_all(struct idr *idp)
  * idr_destroy().
  *
  * A typical clean-up sequence for objects stored in an idr tree will use
- * idr_for_each() to free all objects, if necessay, then idr_destroy() to
+ * idr_for_each() to free all objects, if necessary, then idr_destroy() to
  * free up the id mappings and cached idr_layers.
  */
 void idr_destroy(struct idr *idp)
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 6fe2c84eb055..ba3cd0a35640 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1937,7 +1937,7 @@ EXPORT_SYMBOL(sprintf);
  * @args: Arguments for the format string
  *
  * The format follows C99 vsnprintf, except %n is ignored, and its argument
- * is skiped.
+ * is skipped.
  *
  * The return value is the number of words(32bits) which would be generated for
  * the given input.
diff --git a/mm/filemap.c b/mm/filemap.c
index 65d44fd88c78..f092654cc1a3 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -720,7 +720,7 @@ EXPORT_SYMBOL_GPL(add_page_wait_queue);
  *
  * Unlocks the page and wakes up sleepers in ___wait_on_page_locked().
  * Also wakes sleepers in wait_on_page_writeback() because the wakeup
- * mechananism between PageLocked pages and PageWriteback pages is shared.
+ * mechanism between PageLocked pages and PageWriteback pages is shared.
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
  * The mb is necessary to enforce ordering between the clear_bit and the read
diff --git a/security/inode.c b/security/inode.c
index 43ce6e19015f..8e7ca62078ab 100644
--- a/security/inode.c
+++ b/security/inode.c
@@ -74,7 +74,7 @@ static struct file_system_type fs_type = {
  * pointer must be passed to the securityfs_remove() function when the file is
  * to be removed (no automatic cleanup happens if your module is unloaded,
  * you are responsible here).  If an error occurs, the function will return
- * the erorr value (via ERR_PTR).
+ * the error value (via ERR_PTR).
  *
  * If securityfs is not enabled in the kernel, the value %-ENODEV is
  * returned.
-- 
cgit v1.2.3


From 8ca28610e5e37193cd61fefa4310941e28de10ca Mon Sep 17 00:00:00 2001
From: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Date: Thu, 7 Aug 2014 15:14:06 +0200
Subject: mmc: include linux/types.h for bool definition in atmel-mci.h

This patch adds an include of linux/types.h to make sure bool is defined
before utilized in this header file.

Signed-off-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/atmel-mci.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
index 4c7a4b2104bf..91b77f8d495d 100644
--- a/include/linux/atmel-mci.h
+++ b/include/linux/atmel-mci.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_ATMEL_MCI_H
 #define __LINUX_ATMEL_MCI_H
 
+#include <linux/types.h>
+
 #define ATMCI_MAX_NR_SLOTS	2
 
 /**
-- 
cgit v1.2.3


From b3683994843a0ede0e19daccd1ac32a46b21eb39 Mon Sep 17 00:00:00 2001
From: Yi Sun <yi.y.sun@intel.com>
Date: Wed, 13 Aug 2014 13:34:01 +0800
Subject: mmc: Correct the value of MMC_NUM_PHY_PARTITION

eMMC card can support up to 7 physical partitions, including 2 boot,
1 RPMB and 4 GPs. Change MMC_NUM_PHY_PARTITION from 6 to 7, which is
the correct value.

Signed-off-by: Yi Sun <yi.y.sun@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index d424b9de3aff..bde5147a4221 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -214,11 +214,12 @@ enum mmc_blk_status {
 };
 
 /* The number of MMC physical partitions.  These consist of:
- * boot partitions (2), general purpose partitions (4) in MMC v4.4.
+ * boot partitions (2), general purpose partitions (4) and
+ * RPMB partition (1) in MMC v4.4.
  */
 #define MMC_NUM_BOOT_PARTITION	2
 #define MMC_NUM_GP_PARTITION	4
-#define MMC_NUM_PHY_PARTITION	6
+#define MMC_NUM_PHY_PARTITION	7
 #define MAX_MMC_PART_NAME_LEN	20
 
 /*
-- 
cgit v1.2.3


From 3d705d14fe4c72be83bae1610680e209ee226b9d Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Tue, 19 Aug 2014 10:45:51 +0200
Subject: mmc: implement Driver Stage Register handling
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some eMMC and SD cards implement a DSR register that allows to tune
raise/fall times and drive strength of the CMD and DATA outputs.
The values to use depend on the card in use and the host.
It might be needed to reduce the drive strength to prevent voltage peaks
above the host's specification.

Implement a 'dsr' devicetree property that allows to specify the value
to set the DSR to. For non-dt setups the new members of mmc_host can be
set by board code.

This patch was initially authored by Sascha Hauer. It contains
improvements authored by Markus Niebel and Uwe Kleine-König.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Signed-off-by: Markus Niebel <Markus.Niebel@tq-group.com>
Signed-off-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 Documentation/devicetree/bindings/mmc/mmc.txt |  2 ++
 drivers/mmc/core/host.c                       |  8 ++++++++
 drivers/mmc/core/mmc.c                        |  8 ++++++++
 drivers/mmc/core/mmc_ops.c                    | 20 ++++++++++++++++++++
 drivers/mmc/core/mmc_ops.h                    |  1 +
 drivers/mmc/core/sd.c                         |  8 ++++++++
 include/linux/mmc/card.h                      |  3 ++-
 include/linux/mmc/host.h                      |  3 +++
 8 files changed, 52 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mmc/mmc.txt b/Documentation/devicetree/bindings/mmc/mmc.txt
index 431716e37a39..b52628b18a53 100644
--- a/Documentation/devicetree/bindings/mmc/mmc.txt
+++ b/Documentation/devicetree/bindings/mmc/mmc.txt
@@ -40,6 +40,8 @@ Optional properties:
 - mmc-hs200-1_2v: eMMC HS200 mode(1.2V I/O) is supported
 - mmc-hs400-1_8v: eMMC HS400 mode(1.8V I/O) is supported
 - mmc-hs400-1_2v: eMMC HS400 mode(1.2V I/O) is supported
+- dsr: Value the card's (optional) Driver Stage Register (DSR) should be
+  programmed with. Valid range: [0 .. 0xffff].
 
 *NOTE* on CD and WP polarity. To use common for all SD/MMC host controllers line
 polarity properties, we have to fix the meaning of the "normal" and "inverted"
diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 95cceae96944..d572b2beb65a 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -452,6 +452,14 @@ int mmc_of_parse(struct mmc_host *host)
 	if (of_find_property(np, "mmc-hs400-1_2v", &len))
 		host->caps2 |= MMC_CAP2_HS400_1_2V | MMC_CAP2_HS200_1_2V_SDR;
 
+	host->dsr_req = !of_property_read_u32(np, "dsr", &host->dsr);
+	if (host->dsr_req && (host->dsr & ~0xffff)) {
+		dev_err(host->parent,
+			"device tree specified broken value for DSR: 0x%x, ignoring\n",
+			host->dsr);
+		host->dsr_req = 0;
+	}
+
 	return 0;
 
 out:
diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 1eda8dd8c867..31c43165f8bc 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -162,6 +162,7 @@ static int mmc_decode_csd(struct mmc_card *card)
 	csd->read_partial = UNSTUFF_BITS(resp, 79, 1);
 	csd->write_misalign = UNSTUFF_BITS(resp, 78, 1);
 	csd->read_misalign = UNSTUFF_BITS(resp, 77, 1);
+	csd->dsr_imp = UNSTUFF_BITS(resp, 76, 1);
 	csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3);
 	csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4);
 	csd->write_partial = UNSTUFF_BITS(resp, 21, 1);
@@ -1271,6 +1272,13 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 			goto free_card;
 	}
 
+	/*
+	 * handling only for cards supporting DSR and hosts requesting
+	 * DSR configuration
+	 */
+	if (card->csd.dsr_imp && host->dsr_req)
+		mmc_set_dsr(host);
+
 	/*
 	 * Select card, as all following commands rely on that.
 	 */
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index f51b5ba3bbea..ba0275e90617 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -93,6 +93,26 @@ int mmc_deselect_cards(struct mmc_host *host)
 	return _mmc_select_card(host, NULL);
 }
 
+/*
+ * Write the value specified in the device tree or board code into the optional
+ * 16 bit Driver Stage Register. This can be used to tune raise/fall times and
+ * drive strength of the DAT and CMD outputs. The actual meaning of a given
+ * value is hardware dependant.
+ * The presence of the DSR register can be determined from the CSD register,
+ * bit 76.
+ */
+int mmc_set_dsr(struct mmc_host *host)
+{
+	struct mmc_command cmd = {0};
+
+	cmd.opcode = MMC_SET_DSR;
+
+	cmd.arg = (host->dsr << 16) | 0xffff;
+	cmd.flags = MMC_RSP_NONE | MMC_CMD_AC;
+
+	return mmc_wait_for_cmd(host, &cmd, MMC_CMD_RETRIES);
+}
+
 int mmc_go_idle(struct mmc_host *host)
 {
 	int err;
diff --git a/drivers/mmc/core/mmc_ops.h b/drivers/mmc/core/mmc_ops.h
index 80ae9f4e0293..390dac665b2a 100644
--- a/drivers/mmc/core/mmc_ops.h
+++ b/drivers/mmc/core/mmc_ops.h
@@ -14,6 +14,7 @@
 
 int mmc_select_card(struct mmc_card *card);
 int mmc_deselect_cards(struct mmc_host *host);
+int mmc_set_dsr(struct mmc_host *host);
 int mmc_go_idle(struct mmc_host *host);
 int mmc_send_op_cond(struct mmc_host *host, u32 ocr, u32 *rocr);
 int mmc_all_send_cid(struct mmc_host *host, u32 *cid);
diff --git a/drivers/mmc/core/sd.c b/drivers/mmc/core/sd.c
index 0c44510bf717..25913889cbaa 100644
--- a/drivers/mmc/core/sd.c
+++ b/drivers/mmc/core/sd.c
@@ -127,6 +127,7 @@ static int mmc_decode_csd(struct mmc_card *card)
 		csd->read_partial = UNSTUFF_BITS(resp, 79, 1);
 		csd->write_misalign = UNSTUFF_BITS(resp, 78, 1);
 		csd->read_misalign = UNSTUFF_BITS(resp, 77, 1);
+		csd->dsr_imp = UNSTUFF_BITS(resp, 76, 1);
 		csd->r2w_factor = UNSTUFF_BITS(resp, 26, 3);
 		csd->write_blkbits = UNSTUFF_BITS(resp, 22, 4);
 		csd->write_partial = UNSTUFF_BITS(resp, 21, 1);
@@ -953,6 +954,13 @@ static int mmc_sd_init_card(struct mmc_host *host, u32 ocr,
 		mmc_decode_cid(card);
 	}
 
+	/*
+	 * handling only for cards supporting DSR and hosts requesting
+	 * DSR configuration
+	 */
+	if (card->csd.dsr_imp && host->dsr_req)
+		mmc_set_dsr(host);
+
 	/*
 	 * Select card, as all following commands rely on that.
 	 */
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index bde5147a4221..0ea795f5feb9 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -42,7 +42,8 @@ struct mmc_csd {
 	unsigned int		read_partial:1,
 				read_misalign:1,
 				write_partial:1,
-				write_misalign:1;
+				write_misalign:1,
+				dsr_imp:1;
 };
 
 struct mmc_ext_csd {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 7960424d0bc0..4cbf61476999 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -365,6 +365,9 @@ struct mmc_host {
 
 	unsigned int		slotno;	/* used for sdio acpi binding */
 
+	int			dsr_req;	/* DSR value is valid */
+	u32			dsr;	/* optional driver stage (DSR) value */
+
 	unsigned long		private[0] ____cacheline_aligned;
 };
 
-- 
cgit v1.2.3


From 384b2cbd56a02efb16358ed7c0c039e4afca5ed0 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 24 Aug 2014 19:58:48 -0700
Subject: mmc: tmio: care about DMA tx/rx addr offset

Basically, SD_BUF0 Tx/Rx addresses are same
in normal TMIO controller,
but, it is different on Renesas R-Car SDHI controller
if it uses DMAC
(Rx address needs to add 0x2000 to Tx address)

This patch adds new .dma_rx_offset and cares it

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Acked-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Acked-by: Ben Dooks <ben-linux@fluff.org>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 3 +++
 drivers/mmc/host/tmio_mmc_dma.c   | 2 +-
 include/linux/mfd/tmio.h          | 1 +
 3 files changed, 5 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 2dafe28c8df9..f0818cd0242c 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -39,6 +39,7 @@ struct sh_mobile_sdhi_of_data {
 	unsigned long tmio_flags;
 	unsigned long capabilities;
 	unsigned long capabilities2;
+	dma_addr_t dma_rx_offset;
 };
 
 static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = {
@@ -56,6 +57,7 @@ static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = {
 	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 	.capabilities2	= MMC_CAP2_NO_MULTI_READ,
+	.dma_rx_offset	= 0x2000,
 };
 
 static const struct of_device_id sh_mobile_sdhi_of_match[] = {
@@ -228,6 +230,7 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 		mmc_data->flags |= of_data->tmio_flags;
 		mmc_data->capabilities |= of_data->capabilities;
 		mmc_data->capabilities2 |= of_data->capabilities2;
+		dma_priv->dma_rx_offset = of_data->dma_rx_offset;
 	}
 
 	/* SD control register space size is 0x100, 0x200 for bus_shift=1 */
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index eb8f1d5c34b1..bd646b041085 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -312,7 +312,7 @@ void tmio_mmc_request_dma(struct tmio_mmc_host *host, struct tmio_mmc_data *pdat
 		if (pdata->dma->chan_priv_rx)
 			cfg.slave_id = pdata->dma->slave_id_rx;
 		cfg.direction = DMA_DEV_TO_MEM;
-		cfg.src_addr = cfg.dst_addr;
+		cfg.src_addr = cfg.dst_addr + pdata->dma->dma_rx_offset;
 		cfg.src_addr_width = DMA_SLAVE_BUSWIDTH_2_BYTES;
 		cfg.dst_addr = 0;
 		ret = dmaengine_slave_config(host->chan_rx, &cfg);
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 8f6f2e91e7ae..777e29b1ad0f 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -96,6 +96,7 @@ struct tmio_mmc_dma {
 	int slave_id_tx;
 	int slave_id_rx;
 	int alignment_shift;
+	dma_addr_t dma_rx_offset;
 	bool (*filter)(struct dma_chan *chan, void *arg);
 };
 
-- 
cgit v1.2.3


From b8d11962c2d83c984d5afd091e5b725ad2fd5607 Mon Sep 17 00:00:00 2001
From: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Date: Sun, 24 Aug 2014 20:00:25 -0700
Subject: mmc: tmio: control multiple block transfer mode

Renesas SDHI has "Multiple Block Transfer Mode" settings
on SD_CMD register which controls CMD12 automatically.

This patch cares it, because
CMD12 is not needed when CMD53 (= SD_IO_RW_EXTENDED)

[Kuninori Morimoto: tidyuped for upstreaming
                    enabled this flags for all SH-Mobile/R-Car]

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c |  5 +++++
 drivers/mmc/host/tmio_mmc_pio.c   | 10 ++++++++++
 include/linux/mfd/tmio.h          |  6 ++++++
 3 files changed, 21 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index f0818cd0242c..4727586b063e 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -225,6 +225,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 	 */
 	mmc_data->flags |= TMIO_MMC_SDIO_IRQ;
 
+	/*
+	 * All SDHI have CMD12 controll bit
+	 */
+	mmc_data->flags |= TMIO_MMC_HAVE_CMD12_CTRL;
+
 	if (of_id && of_id->data) {
 		const struct sh_mobile_sdhi_of_data *of_data = of_id->data;
 		mmc_data->flags |= of_data->tmio_flags;
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 3d0ad737abea..c2804827be9f 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -44,6 +44,7 @@
 #include <linux/pm_qos.h>
 #include <linux/pm_runtime.h>
 #include <linux/regulator/consumer.h>
+#include <linux/mmc/sdio.h>
 #include <linux/scatterlist.h>
 #include <linux/spinlock.h>
 #include <linux/workqueue.h>
@@ -310,6 +311,7 @@ static void tmio_mmc_done_work(struct work_struct *work)
 #define TRANSFER_READ  0x1000
 #define TRANSFER_MULTI 0x2000
 #define SECURITY_CMD   0x4000
+#define NO_CMD12_ISSUE 0x4000 /* TMIO_MMC_HAVE_CMD12_CTRL */
 
 static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command *cmd)
 {
@@ -346,6 +348,14 @@ static int tmio_mmc_start_command(struct tmio_mmc_host *host, struct mmc_command
 		if (data->blocks > 1) {
 			sd_ctrl_write16(host, CTL_STOP_INTERNAL_ACTION, 0x100);
 			c |= TRANSFER_MULTI;
+
+			/*
+			 * Disable auto CMD12 at IO_RW_EXTENDED when
+			 * multiple block transfer
+			 */
+			if ((host->pdata->flags & TMIO_MMC_HAVE_CMD12_CTRL) &&
+			    (cmd->opcode == SD_IO_RW_EXTENDED))
+				c |= NO_CMD12_ISSUE;
 		}
 		if (data->flags & MMC_DATA_READ)
 			c |= TRANSFER_READ;
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 777e29b1ad0f..7432d95b08e2 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -83,6 +83,12 @@
  */
 #define TMIO_MMC_HAVE_HIGH_REG		(1 << 6)
 
+/*
+ * Some controllers have CMD12 automatically
+ * issue/non-issue register
+ */
+#define TMIO_MMC_HAVE_CMD12_CTRL	(1 << 7)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From 6b98757e53cb0e93b02db4067c14afcb32c90615 Mon Sep 17 00:00:00 2001
From: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Date: Sun, 24 Aug 2014 20:00:52 -0700
Subject: mmc: tmio: add TMIO_MMC_SDIO_STATUS_QUIRK

Renesas R-Car SDHI should set reserved bits
on CTL_SDIO_STATUS register when writing.
This patch adds new TMIO_MMC_SDIO_STATUS_QUIRK flags
for this purpose

[Kuninori Morimoto: tidyuped for upstreaming
                    enabled this flags for all SH-Mobile/R-Car]

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++
 drivers/mmc/host/tmio_mmc_pio.c   | 7 ++++++-
 include/linux/mfd/tmio.h          | 5 +++++
 3 files changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index 4727586b063e..ebcfc659b799 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -230,6 +230,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 	 */
 	mmc_data->flags |= TMIO_MMC_HAVE_CMD12_CTRL;
 
+	/*
+	 * All SDHI need SDIO_INFO1 reserved bit
+	 */
+	mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK;
+
 	if (of_id && of_id->data) {
 		const struct sh_mobile_sdhi_of_data *of_data = of_id->data;
 		mmc_data->flags |= of_data->tmio_flags;
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index c2804827be9f..c5ffa92ccb18 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -665,6 +665,7 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid)
 	struct mmc_host *mmc = host->mmc;
 	struct tmio_mmc_data *pdata = host->pdata;
 	unsigned int ireg, status;
+	unsigned int sdio_status;
 
 	if (!(pdata->flags & TMIO_MMC_SDIO_IRQ))
 		return IRQ_HANDLED;
@@ -672,7 +673,11 @@ irqreturn_t tmio_mmc_sdio_irq(int irq, void *devid)
 	status = sd_ctrl_read16(host, CTL_SDIO_STATUS);
 	ireg = status & TMIO_SDIO_MASK_ALL & ~host->sdcard_irq_mask;
 
-	sd_ctrl_write16(host, CTL_SDIO_STATUS, status & ~TMIO_SDIO_MASK_ALL);
+	sdio_status = status & ~TMIO_SDIO_MASK_ALL;
+	if (pdata->flags & TMIO_MMC_SDIO_STATUS_QUIRK)
+		sdio_status |= 6;
+
+	sd_ctrl_write16(host, CTL_SDIO_STATUS, sdio_status);
 
 	if (mmc->caps & MMC_CAP_SDIO_IRQ && ireg & TMIO_SDIO_STAT_IOIRQ)
 		mmc_signal_sdio_irq(mmc);
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 7432d95b08e2..a7493ae01b58 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -89,6 +89,11 @@
  */
 #define TMIO_MMC_HAVE_CMD12_CTRL	(1 << 7)
 
+/*
+ * Some controllers needs to set 1 on SDIO status reserved bits
+ */
+#define TMIO_MMC_SDIO_STATUS_QUIRK	(1 << 8)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From e85dd04ea8c8d32ba8eae278959d28df34338e9d Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Sun, 24 Aug 2014 20:01:54 -0700
Subject: mmc: tmio: remove Renesas specific #ifdef

This patch adds new TMIO_MMC_HAVE_CTL_DMA_REG flag,
and remove Renesas specific #ifdef from tmio driver

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 5 +++++
 drivers/mmc/host/tmio_mmc_dma.c   | 6 ++----
 include/linux/mfd/tmio.h          | 5 +++++
 3 files changed, 12 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index ebcfc659b799..a077da02bb8e 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -235,6 +235,11 @@ static int sh_mobile_sdhi_probe(struct platform_device *pdev)
 	 */
 	mmc_data->flags |= TMIO_MMC_SDIO_STATUS_QUIRK;
 
+	/*
+	 * All SDHI have DMA control register
+	 */
+	mmc_data->flags |= TMIO_MMC_HAVE_CTL_DMA_REG;
+
 	if (of_id && of_id->data) {
 		const struct sh_mobile_sdhi_of_data *of_data = of_id->data;
 		mmc_data->flags |= of_data->tmio_flags;
diff --git a/drivers/mmc/host/tmio_mmc_dma.c b/drivers/mmc/host/tmio_mmc_dma.c
index bd646b041085..7d077388b9eb 100644
--- a/drivers/mmc/host/tmio_mmc_dma.c
+++ b/drivers/mmc/host/tmio_mmc_dma.c
@@ -28,10 +28,8 @@ void tmio_mmc_enable_dma(struct tmio_mmc_host *host, bool enable)
 	if (!host->chan_tx || !host->chan_rx)
 		return;
 
-#if defined(CONFIG_SUPERH) || defined(CONFIG_ARCH_SHMOBILE)
-	/* Switch DMA mode on or off - SuperH specific? */
-	sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0);
-#endif
+	if (host->pdata->flags & TMIO_MMC_HAVE_CTL_DMA_REG)
+		sd_ctrl_write16(host, CTL_DMA_ENABLE, enable ? 2 : 0);
 }
 
 void tmio_mmc_abort_dma(struct tmio_mmc_host *host)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index a7493ae01b58..adcb0cdb2fdb 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -94,6 +94,11 @@
  */
 #define TMIO_MMC_SDIO_STATUS_QUIRK	(1 << 8)
 
+/*
+ * Some controllers have DMA enable/disable register
+ */
+#define TMIO_MMC_HAVE_CTL_DMA_REG	(1 << 9)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From da29fe2bf573f0ae56fdc2e790387cb73fc8c6f8 Mon Sep 17 00:00:00 2001
From: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Date: Sun, 24 Aug 2014 20:03:00 -0700
Subject: mmc: tmio: add actual clock support as option

Some controller is supporting actual clock on SD_CLK_CTRL :: DIV[7:0].
Renesas SH-Mobile SDHI doesn't support,
but, Renesas R-Car SDHI supports it.
This patch adds new TMIO_MMC_CLK_ACTUAL flag for it.

[Kuninori Morimoto: tidyuped for upstreaming]

Tested-by: Nguyen Xuan Nui <nx-nui@jinso.co.jp>
Tested-by: Hiep Cao Minh <cm-hiep@jinso.co.jp>
Signed-off-by: Shinobu Uehara <shinobu.uehara.xc@renesas.com>
Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sh_mobile_sdhi.c | 6 ++++--
 drivers/mmc/host/tmio_mmc_pio.c   | 5 +++++
 include/linux/mfd/tmio.h          | 5 +++++
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index a077da02bb8e..d5d493719491 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -49,12 +49,14 @@ static const struct sh_mobile_sdhi_of_data sh_mobile_sdhi_of_cfg[] = {
 };
 
 static const struct sh_mobile_sdhi_of_data of_rcar_gen1_compatible = {
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE,
+	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
+			  TMIO_MMC_CLK_ACTUAL,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 };
 
 static const struct sh_mobile_sdhi_of_data of_rcar_gen2_compatible = {
-	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE,
+	.tmio_flags	= TMIO_MMC_HAS_IDLE_WAIT | TMIO_MMC_WRPROTECT_DISABLE |
+			  TMIO_MMC_CLK_ACTUAL,
 	.capabilities	= MMC_CAP_SD_HIGHSPEED | MMC_CAP_SDIO_IRQ,
 	.capabilities2	= MMC_CAP2_NO_MULTI_READ,
 	.dma_rx_offset	= 0x2000,
diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index 7cfe939992de..ba454131f9a8 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -159,6 +159,11 @@ static void tmio_mmc_set_clock(struct tmio_mmc_host *host,
 		for (clock = host->mmc->f_min, clk = 0x80000080;
 			new_clock >= (clock<<1); clk >>= 1)
 			clock <<= 1;
+
+		/* 1/1 clock is option */
+		if ((host->pdata->flags & TMIO_MMC_CLK_ACTUAL) &&
+		    ((clk >> 22) & 0x1))
+			clk |= 0xff;
 	}
 
 	if (host->set_clk_div)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index adcb0cdb2fdb..90436d523e5e 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -99,6 +99,11 @@
  */
 #define TMIO_MMC_HAVE_CTL_DMA_REG	(1 << 9)
 
+/*
+ * Some controllers allows to set SDx actual clock
+ */
+#define TMIO_MMC_CLK_ACTUAL		(1 << 10)
+
 int tmio_core_mmc_enable(void __iomem *cnf, int shift, unsigned long base);
 int tmio_core_mmc_resume(void __iomem *cnf, int shift, unsigned long base);
 void tmio_core_mmc_pwr(void __iomem *cnf, int shift, int state);
-- 
cgit v1.2.3


From 51da2240906cb94e8f6ba55e403b6206df6fb2dd Mon Sep 17 00:00:00 2001
From: Yuvaraj CD <yuvaraj.cd@gmail.com>
Date: Fri, 22 Aug 2014 19:17:50 +0530
Subject: mmc: dw_mmc: use mmc_regulator_get_supply to handle regulators

This patch makes use of mmc_regulator_get_supply() to handle
the vmmc and vqmmc regulators.Also it moves the code handling
the these regulators to dw_mci_set_ios().It turned on the vmmc
and vqmmc during MMC_POWER_UP and MMC_POWER_ON,and turned off
during MMC_POWER_OFF.

Signed-off-by: Yuvaraj Kumar C D <yuvaraj.cd@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 72 ++++++++++++++++++++++------------------------
 include/linux/mmc/dw_mmc.h |  2 +-
 2 files changed, 36 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 7f227e964265..aadb0d6aa63f 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -936,6 +936,7 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	struct dw_mci_slot *slot = mmc_priv(mmc);
 	const struct dw_mci_drv_data *drv_data = slot->host->drv_data;
 	u32 regs;
+	int ret;
 
 	switch (ios->bus_width) {
 	case MMC_BUS_WIDTH_4:
@@ -974,12 +975,38 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
+		if (!IS_ERR(mmc->supply.vmmc)) {
+			ret = mmc_regulator_set_ocr(mmc, mmc->supply.vmmc,
+					ios->vdd);
+			if (ret) {
+				dev_err(slot->host->dev,
+					"failed to enable vmmc regulator\n");
+				/*return, if failed turn on vmmc*/
+				return;
+			}
+		}
+		if (!IS_ERR(mmc->supply.vqmmc) && !slot->host->vqmmc_enabled) {
+			ret = regulator_enable(mmc->supply.vqmmc);
+			if (ret < 0)
+				dev_err(slot->host->dev,
+					"failed to enable vqmmc regulator\n");
+			else
+				slot->host->vqmmc_enabled = true;
+		}
 		set_bit(DW_MMC_CARD_NEED_INIT, &slot->flags);
 		regs = mci_readl(slot->host, PWREN);
 		regs |= (1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
 		break;
 	case MMC_POWER_OFF:
+		if (!IS_ERR(mmc->supply.vmmc))
+			mmc_regulator_set_ocr(mmc, mmc->supply.vmmc, 0);
+
+		if (!IS_ERR(mmc->supply.vqmmc) && slot->host->vqmmc_enabled) {
+			regulator_disable(mmc->supply.vqmmc);
+			slot->host->vqmmc_enabled = false;
+		}
+
 		regs = mci_readl(slot->host, PWREN);
 		regs &= ~(1 << slot->id);
 		mci_writel(slot->host, PWREN, regs);
@@ -2110,7 +2137,13 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 		mmc->f_max = freq[1];
 	}
 
-	mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
+	/*if there are external regulators, get them*/
+	ret = mmc_regulator_get_supply(mmc);
+	if (ret == -EPROBE_DEFER)
+		goto err_setup_bus;
+
+	if (!mmc->ocr_avail)
+		mmc->ocr_avail = MMC_VDD_32_33 | MMC_VDD_33_34;
 
 	if (host->pdata->caps)
 		mmc->caps = host->pdata->caps;
@@ -2176,7 +2209,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 
 err_setup_bus:
 	mmc_free_host(mmc);
-	return -EINVAL;
+	return ret;
 }
 
 static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
@@ -2469,24 +2502,6 @@ int dw_mci_probe(struct dw_mci *host)
 		}
 	}
 
-	host->vmmc = devm_regulator_get_optional(host->dev, "vmmc");
-	if (IS_ERR(host->vmmc)) {
-		ret = PTR_ERR(host->vmmc);
-		if (ret == -EPROBE_DEFER)
-			goto err_clk_ciu;
-
-		dev_info(host->dev, "no vmmc regulator found: %d\n", ret);
-		host->vmmc = NULL;
-	} else {
-		ret = regulator_enable(host->vmmc);
-		if (ret) {
-			if (ret != -EPROBE_DEFER)
-				dev_err(host->dev,
-					"regulator_enable fail: %d\n", ret);
-			goto err_clk_ciu;
-		}
-	}
-
 	host->quirks = host->pdata->quirks;
 
 	spin_lock_init(&host->lock);
@@ -2630,8 +2645,6 @@ err_workqueue:
 err_dmaunmap:
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
-	if (host->vmmc)
-		regulator_disable(host->vmmc);
 
 err_clk_ciu:
 	if (!IS_ERR(host->ciu_clk))
@@ -2667,9 +2680,6 @@ void dw_mci_remove(struct dw_mci *host)
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
 
-	if (host->vmmc)
-		regulator_disable(host->vmmc);
-
 	if (!IS_ERR(host->ciu_clk))
 		clk_disable_unprepare(host->ciu_clk);
 
@@ -2686,9 +2696,6 @@ EXPORT_SYMBOL(dw_mci_remove);
  */
 int dw_mci_suspend(struct dw_mci *host)
 {
-	if (host->vmmc)
-		regulator_disable(host->vmmc);
-
 	return 0;
 }
 EXPORT_SYMBOL(dw_mci_suspend);
@@ -2697,15 +2704,6 @@ int dw_mci_resume(struct dw_mci *host)
 {
 	int i, ret;
 
-	if (host->vmmc) {
-		ret = regulator_enable(host->vmmc);
-		if (ret) {
-			dev_err(host->dev,
-				"failed to enable regulator: %d\n", ret);
-			return ret;
-		}
-	}
-
 	if (!dw_mci_ctrl_reset(host, SDMMC_CTRL_ALL_RESET_FLAGS)) {
 		ret = -ENODEV;
 		return ret;
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 29ce014ab421..84e2827d0f0b 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -188,7 +188,7 @@ struct dw_mci {
 	/* Workaround flags */
 	u32			quirks;
 
-	struct regulator	*vmmc;	/* Power regulator */
+	bool			vqmmc_enabled;
 	unsigned long		irq_flags; /* IRQ flags */
 	int			irq;
 };
-- 
cgit v1.2.3


From 0173055842cd1d9ed3984e70891c22dbf2f29372 Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Fri, 22 Aug 2014 19:17:51 +0530
Subject: mmc: dw_mmc: Support voltage changes

For UHS cards we need the ability to switch voltages from 3.3V to
1.8V.  Add support to the dw_mmc driver to handle this.  Note that
dw_mmc needs a little bit of extra code since the interface needs a
special bit programmed to the CMD register while CMD11 is progressing.
This means adding a few extra states to the state machine to track.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Yuvaraj Kumar C D <yuvaraj.cd@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 137 ++++++++++++++++++++++++++++++++++++++++++---
 drivers/mmc/host/dw_mmc.h  |   5 +-
 include/linux/mmc/dw_mmc.h |   2 +
 3 files changed, 134 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index aadb0d6aa63f..23719249182b 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -29,6 +29,7 @@
 #include <linux/irq.h>
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
+#include <linux/mmc/sd.h>
 #include <linux/mmc/sdio.h>
 #include <linux/mmc/dw_mmc.h>
 #include <linux/bitops.h>
@@ -234,10 +235,13 @@ err:
 }
 #endif /* defined(CONFIG_DEBUG_FS) */
 
+static void mci_send_cmd(struct dw_mci_slot *slot, u32 cmd, u32 arg);
+
 static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 {
 	struct mmc_data	*data;
 	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
 	const struct dw_mci_drv_data *drv_data = slot->host->drv_data;
 	u32 cmdr;
 	cmd->error = -EINPROGRESS;
@@ -253,6 +257,34 @@ static u32 dw_mci_prepare_command(struct mmc_host *mmc, struct mmc_command *cmd)
 	else if (cmd->opcode != MMC_SEND_STATUS && cmd->data)
 		cmdr |= SDMMC_CMD_PRV_DAT_WAIT;
 
+	if (cmd->opcode == SD_SWITCH_VOLTAGE) {
+		u32 clk_en_a;
+
+		/* Special bit makes CMD11 not die */
+		cmdr |= SDMMC_CMD_VOLT_SWITCH;
+
+		/* Change state to continue to handle CMD11 weirdness */
+		WARN_ON(slot->host->state != STATE_SENDING_CMD);
+		slot->host->state = STATE_SENDING_CMD11;
+
+		/*
+		 * We need to disable low power mode (automatic clock stop)
+		 * while doing voltage switch so we don't confuse the card,
+		 * since stopping the clock is a specific part of the UHS
+		 * voltage change dance.
+		 *
+		 * Note that low power mode (SDMMC_CLKEN_LOW_PWR) will be
+		 * unconditionally turned back on in dw_mci_setup_bus() if it's
+		 * ever called with a non-zero clock.  That shouldn't happen
+		 * until the voltage change is all done.
+		 */
+		clk_en_a = mci_readl(host, CLKENA);
+		clk_en_a &= ~(SDMMC_CLKEN_LOW_PWR << slot->id);
+		mci_writel(host, CLKENA, clk_en_a);
+		mci_send_cmd(slot, SDMMC_CMD_UPD_CLK |
+			     SDMMC_CMD_PRV_DAT_WAIT, 0);
+	}
+
 	if (cmd->flags & MMC_RSP_PRESENT) {
 		/* We expect a response, so set this bit */
 		cmdr |= SDMMC_CMD_RESP_EXP;
@@ -775,11 +807,15 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 	unsigned int clock = slot->clock;
 	u32 div;
 	u32 clk_en_a;
+	u32 sdmmc_cmd_bits = SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT;
+
+	/* We must continue to set bit 28 in CMD until the change is complete */
+	if (host->state == STATE_WAITING_CMD11_DONE)
+		sdmmc_cmd_bits |= SDMMC_CMD_VOLT_SWITCH;
 
 	if (!clock) {
 		mci_writel(host, CLKENA, 0);
-		mci_send_cmd(slot,
-			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
 	} else if (clock != host->current_speed || force_clkinit) {
 		div = host->bus_hz / clock;
 		if (host->bus_hz % clock && host->bus_hz > clock)
@@ -803,15 +839,13 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 		mci_writel(host, CLKSRC, 0);
 
 		/* inform CIU */
-		mci_send_cmd(slot,
-			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
 
 		/* set clock to desired speed */
 		mci_writel(host, CLKDIV, div);
 
 		/* inform CIU */
-		mci_send_cmd(slot,
-			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
 
 		/* enable clock; only low power if no SDIO */
 		clk_en_a = SDMMC_CLKEN_ENABLE << slot->id;
@@ -820,8 +854,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 		mci_writel(host, CLKENA, clk_en_a);
 
 		/* inform CIU */
-		mci_send_cmd(slot,
-			     SDMMC_CMD_UPD_CLK | SDMMC_CMD_PRV_DAT_WAIT, 0);
+		mci_send_cmd(slot, sdmmc_cmd_bits, 0);
 
 		/* keep the clock with reflecting clock dividor */
 		slot->__clk_old = clock << div;
@@ -897,6 +930,17 @@ static void dw_mci_queue_request(struct dw_mci *host, struct dw_mci_slot *slot,
 
 	slot->mrq = mrq;
 
+	if (host->state == STATE_WAITING_CMD11_DONE) {
+		dev_warn(&slot->mmc->class_dev,
+			 "Voltage change didn't complete\n");
+		/*
+		 * this case isn't expected to happen, so we can
+		 * either crash here or just try to continue on
+		 * in the closest possible state
+		 */
+		host->state = STATE_IDLE;
+	}
+
 	if (host->state == STATE_IDLE) {
 		host->state = STATE_SENDING_CMD;
 		dw_mci_start_request(host, slot);
@@ -973,6 +1017,9 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	/* Slot specific timing and width adjustment */
 	dw_mci_setup_bus(slot, false);
 
+	if (slot->host->state == STATE_WAITING_CMD11_DONE && ios->clock != 0)
+		slot->host->state = STATE_IDLE;
+
 	switch (ios->power_mode) {
 	case MMC_POWER_UP:
 		if (!IS_ERR(mmc->supply.vmmc)) {
@@ -1016,6 +1063,59 @@ static void dw_mci_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	}
 }
 
+static int dw_mci_card_busy(struct mmc_host *mmc)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	u32 status;
+
+	/*
+	 * Check the busy bit which is low when DAT[3:0]
+	 * (the data lines) are 0000
+	 */
+	status = mci_readl(slot->host, STATUS);
+
+	return !!(status & SDMMC_STATUS_BUSY);
+}
+
+static int dw_mci_switch_voltage(struct mmc_host *mmc, struct mmc_ios *ios)
+{
+	struct dw_mci_slot *slot = mmc_priv(mmc);
+	struct dw_mci *host = slot->host;
+	u32 uhs;
+	u32 v18 = SDMMC_UHS_18V << slot->id;
+	int min_uv, max_uv;
+	int ret;
+
+	/*
+	 * Program the voltage.  Note that some instances of dw_mmc may use
+	 * the UHS_REG for this.  For other instances (like exynos) the UHS_REG
+	 * does no harm but you need to set the regulator directly.  Try both.
+	 */
+	uhs = mci_readl(host, UHS_REG);
+	if (ios->signal_voltage == MMC_SIGNAL_VOLTAGE_330) {
+		min_uv = 2700000;
+		max_uv = 3600000;
+		uhs &= ~v18;
+	} else {
+		min_uv = 1700000;
+		max_uv = 1950000;
+		uhs |= v18;
+	}
+	if (!IS_ERR(mmc->supply.vqmmc)) {
+		ret = regulator_set_voltage(mmc->supply.vqmmc, min_uv, max_uv);
+
+		if (ret) {
+			dev_err(&mmc->class_dev,
+					 "Regulator set error %d: %d - %d\n",
+					 ret, min_uv, max_uv);
+			return ret;
+		}
+	}
+	mci_writel(host, UHS_REG, uhs);
+
+	return 0;
+}
+
 static int dw_mci_get_ro(struct mmc_host *mmc)
 {
 	int read_only;
@@ -1158,6 +1258,9 @@ static const struct mmc_host_ops dw_mci_ops = {
 	.get_cd			= dw_mci_get_cd,
 	.enable_sdio_irq	= dw_mci_enable_sdio_irq,
 	.execute_tuning		= dw_mci_execute_tuning,
+	.card_busy		= dw_mci_card_busy,
+	.start_signal_voltage_switch = dw_mci_switch_voltage,
+
 };
 
 static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
@@ -1181,7 +1284,11 @@ static void dw_mci_request_end(struct dw_mci *host, struct mmc_request *mrq)
 		dw_mci_start_request(host, slot);
 	} else {
 		dev_vdbg(host->dev, "list empty\n");
-		host->state = STATE_IDLE;
+
+		if (host->state == STATE_SENDING_CMD11)
+			host->state = STATE_WAITING_CMD11_DONE;
+		else
+			host->state = STATE_IDLE;
 	}
 
 	spin_unlock(&host->lock);
@@ -1292,8 +1399,10 @@ static void dw_mci_tasklet_func(unsigned long priv)
 
 		switch (state) {
 		case STATE_IDLE:
+		case STATE_WAITING_CMD11_DONE:
 			break;
 
+		case STATE_SENDING_CMD11:
 		case STATE_SENDING_CMD:
 			if (!test_and_clear_bit(EVENT_CMD_COMPLETE,
 						&host->pending_events))
@@ -1894,6 +2003,14 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 	}
 
 	if (pending) {
+		/* Check volt switch first, since it can look like an error */
+		if ((host->state == STATE_SENDING_CMD11) &&
+		    (pending & SDMMC_INT_VOLT_SWITCH)) {
+			mci_writel(host, RINTSTS, SDMMC_INT_VOLT_SWITCH);
+			pending &= ~SDMMC_INT_VOLT_SWITCH;
+			dw_mci_cmd_interrupt(host, pending);
+		}
+
 		if (pending & DW_MCI_CMD_ERROR_FLAGS) {
 			mci_writel(host, RINTSTS, DW_MCI_CMD_ERROR_FLAGS);
 			host->cmd_status = pending;
@@ -1999,7 +2116,9 @@ static void dw_mci_work_routine_card(struct work_struct *work)
 
 					switch (host->state) {
 					case STATE_IDLE:
+					case STATE_WAITING_CMD11_DONE:
 						break;
+					case STATE_SENDING_CMD11:
 					case STATE_SENDING_CMD:
 						mrq->cmd->error = -ENOMEDIUM;
 						if (!mrq->data)
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 08fd956d81f3..01b99e8a9190 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -99,6 +99,7 @@
 #define SDMMC_INT_HLE			BIT(12)
 #define SDMMC_INT_FRUN			BIT(11)
 #define SDMMC_INT_HTO			BIT(10)
+#define SDMMC_INT_VOLT_SWITCH		BIT(10) /* overloads bit 10! */
 #define SDMMC_INT_DRTO			BIT(9)
 #define SDMMC_INT_RTO			BIT(8)
 #define SDMMC_INT_DCRC			BIT(7)
@@ -113,6 +114,7 @@
 /* Command register defines */
 #define SDMMC_CMD_START			BIT(31)
 #define SDMMC_CMD_USE_HOLD_REG	BIT(29)
+#define SDMMC_CMD_VOLT_SWITCH		BIT(28)
 #define SDMMC_CMD_CCS_EXP		BIT(23)
 #define SDMMC_CMD_CEATA_RD		BIT(22)
 #define SDMMC_CMD_UPD_CLK		BIT(21)
@@ -130,6 +132,7 @@
 /* Status register defines */
 #define SDMMC_GET_FCNT(x)		(((x)>>17) & 0x1FFF)
 #define SDMMC_STATUS_DMA_REQ		BIT(31)
+#define SDMMC_STATUS_BUSY		BIT(9)
 /* FIFOTH register defines */
 #define SDMMC_SET_FIFOTH(m, r, t)	(((m) & 0x7) << 28 | \
 					 ((r) & 0xFFF) << 16 | \
@@ -150,7 +153,7 @@
 #define SDMMC_GET_VERID(x)		((x) & 0xFFFF)
 /* Card read threshold */
 #define SDMMC_SET_RD_THLD(v, x)		(((v) & 0x1FFF) << 16 | (x))
-
+#define SDMMC_UHS_18V			BIT(0)
 /* All ctrl reset bits */
 #define SDMMC_CTRL_ALL_RESET_FLAGS \
 	(SDMMC_CTRL_RESET | SDMMC_CTRL_FIFO_RESET | SDMMC_CTRL_DMA_RESET)
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 84e2827d0f0b..001366927cf4 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -26,6 +26,8 @@ enum dw_mci_state {
 	STATE_DATA_BUSY,
 	STATE_SENDING_STOP,
 	STATE_DATA_ERROR,
+	STATE_SENDING_CMD11,
+	STATE_WAITING_CMD11_DONE,
 };
 
 enum {
-- 
cgit v1.2.3


From e99783a45220a2c5f5a598e0e81213ecf2dbcf2f Mon Sep 17 00:00:00 2001
From: Chanho Min <chanho.min@lge.com>
Date: Sat, 30 Aug 2014 12:40:40 +0900
Subject: mmc: sdhci: handle busy-end interrupt during command

It is fully legal for a controller to start handling busy-end interrupt
before it has signaled that the command has completed. So make sure
we do things in the proper order, Or it results that command interrupt
is ignored so it can cause unexpected operations. This is founded at some
toshiba emmc with the bellow warning.

"mmc0: Got command interrupt 0x00000001 even though
no command operation was in progress."

This issue has been also reported by Youssef TRIKI:
It is not specific to Toshiba devices, and happens with eMMC devices
as well as SD card which support Auto-CMD12 rather than CMD23.

Also, similar patch is submitted by:
Gwendal Grignou <gwendal@chromium.org>

Changes since v1:
 Fixed conflict with the next of git.linaro.org/people/ulf.hansson/mmc.git
 and Tested if issue is fixed again.

Signed-off-by: Hankyung Yu <hankyung.yu@lge.com>
Signed-off-by: Chanho Min <chanho.min@lge.com>
Tested-by: Youssef TRIKI <youssef.triki@st.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 17 +++++++++++++++--
 include/linux/mmc/sdhci.h |  1 +
 2 files changed, 16 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index b5e22b8c4885..335cdf3b5224 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1016,6 +1016,7 @@ void sdhci_send_command(struct sdhci_host *host, struct mmc_command *cmd)
 	mod_timer(&host->timer, timeout);
 
 	host->cmd = cmd;
+	host->busy_handle = 0;
 
 	sdhci_prepare_data(host, cmd);
 
@@ -2261,8 +2262,12 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
 		if (host->cmd->data)
 			DBG("Cannot wait for busy signal when also "
 				"doing a data transfer");
-		else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ))
+		else if (!(host->quirks & SDHCI_QUIRK_NO_BUSY_IRQ)
+				&& !host->busy_handle) {
+			/* Mark that command complete before busy is ended */
+			host->busy_handle = 1;
 			return;
+		}
 
 		/* The controller does not support the end-of-busy IRQ,
 		 * fall through and take the SDHCI_INT_RESPONSE */
@@ -2330,7 +2335,15 @@ static void sdhci_data_irq(struct sdhci_host *host, u32 intmask)
 				return;
 			}
 			if (intmask & SDHCI_INT_DATA_END) {
-				sdhci_finish_command(host);
+				/*
+				 * Some cards handle busy-end interrupt
+				 * before the command completed, so make
+				 * sure we do things in the proper order.
+				 */
+				if (host->busy_handle)
+					sdhci_finish_command(host);
+				else
+					host->busy_handle = 1;
 				return;
 			}
 		}
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 09ebe57d5ce9..0aa85ca0c788 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -146,6 +146,7 @@ struct sdhci_host {
 	struct mmc_command *cmd;	/* Current command */
 	struct mmc_data *data;	/* Current data request */
 	unsigned int data_early:1;	/* Data finished before cmd */
+	unsigned int busy_handle:1;	/* Handling the order of Busy-end */
 
 	struct sg_mapping_iter sg_miter;	/* SG state for PIO */
 	unsigned int blocks;	/* remaining PIO blocks */
-- 
cgit v1.2.3


From 2e47e84245adcb1b3872210678b6146f674fb3ff Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Tue, 2 Sep 2014 19:08:53 -0700
Subject: mmc: Add .multi_io_quirk callback for multi I/O HW bug

Historically, we have been using MMC_CAP* to handle host HW issues and
currently the block layer uses MMC_CAP2_NO_MULTI_READ flag for a multi
I/O HW bug workaround.

There are a few tweaks needed to make MMC_CAP2_NO_MULTI_READ suite all
situations. Therefore let's add an optional host ops callback to enable
host drivers to return the number of blocks it allows per request.

In a future patch and when host drivers have converted to the new
callback, MMC_CAP2_NO_MULTI_READ shall be removed.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 10 ++++++++++
 include/linux/mmc/host.h |  7 +++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index ede41f05c392..adab9038f6f7 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1402,6 +1402,16 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 		if (card->host->caps2 & MMC_CAP2_NO_MULTI_READ &&
 		    rq_data_dir(req) == READ)
 			brq->data.blocks = 1;
+
+		/*
+		 * Some controllers have HW issues while operating
+		 * in multiple I/O mode
+		 */
+		if (card->host->ops->multi_io_quirk)
+			brq->data.blocks = card->host->ops->multi_io_quirk(card,
+						(rq_data_dir(req) == READ) ?
+						MMC_DATA_READ : MMC_DATA_WRITE,
+						brq->data.blocks);
 	}
 
 	if (brq->data.blocks > 1 || do_rel_wr) {
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 4cbf61476999..10e2bd6985ae 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -139,6 +139,13 @@ struct mmc_host_ops {
 	int	(*select_drive_strength)(unsigned int max_dtr, int host_drv, int card_drv);
 	void	(*hw_reset)(struct mmc_host *host);
 	void	(*card_event)(struct mmc_host *host);
+
+	/*
+	 * Optional callback to support controllers with HW issues for multiple
+	 * I/O. Returns the number of supported blocks for the request.
+	 */
+	int	(*multi_io_quirk)(struct mmc_card *card,
+				  unsigned int direction, int blk_size);
 };
 
 struct mmc_card;
-- 
cgit v1.2.3


From bbf0208d39121bd8873b032459cb2b5f35e14593 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 8 Sep 2014 23:45:25 -0700
Subject: mmc: use .multi_io_quirk on tmio_mmc

Now, tmio_mmc can use .multi_io_quirk callback
instead of MMC_CAP2_NO_MULTI_READ flags.
let's use it.

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/tmio_mmc_pio.c | 13 +++++++++++++
 include/linux/mfd/tmio.h        |  3 +++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/tmio_mmc_pio.c b/drivers/mmc/host/tmio_mmc_pio.c
index ba454131f9a8..ff5ff0f725c9 100644
--- a/drivers/mmc/host/tmio_mmc_pio.c
+++ b/drivers/mmc/host/tmio_mmc_pio.c
@@ -970,12 +970,25 @@ static int tmio_mmc_get_ro(struct mmc_host *mmc)
 	return ret;
 }
 
+static int tmio_multi_io_quirk(struct mmc_card *card,
+			       unsigned int direction, int blk_size)
+{
+	struct tmio_mmc_host *host = mmc_priv(card->host);
+	struct tmio_mmc_data *pdata = host->pdata;
+
+	if (pdata->multi_io_quirk)
+		return pdata->multi_io_quirk(card, direction, blk_size);
+
+	return blk_size;
+}
+
 static const struct mmc_host_ops tmio_mmc_ops = {
 	.request	= tmio_mmc_request,
 	.set_ios	= tmio_mmc_set_ios,
 	.get_ro         = tmio_mmc_get_ro,
 	.get_cd		= mmc_gpio_get_cd,
 	.enable_sdio_irq = tmio_mmc_enable_sdio_irq,
+	.multi_io_quirk	= tmio_multi_io_quirk,
 };
 
 static int tmio_mmc_init_ocr(struct tmio_mmc_host *host)
diff --git a/include/linux/mfd/tmio.h b/include/linux/mfd/tmio.h
index 90436d523e5e..57388171610d 100644
--- a/include/linux/mfd/tmio.h
+++ b/include/linux/mfd/tmio.h
@@ -5,6 +5,7 @@
 #include <linux/fb.h>
 #include <linux/io.h>
 #include <linux/jiffies.h>
+#include <linux/mmc/card.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 
@@ -142,6 +143,8 @@ struct tmio_mmc_data {
 	/* clock management callbacks */
 	int (*clk_enable)(struct platform_device *pdev, unsigned int *f);
 	void (*clk_disable)(struct platform_device *pdev);
+	int (*multi_io_quirk)(struct mmc_card *card,
+			      unsigned int direction, int blk_size);
 };
 
 /*
-- 
cgit v1.2.3


From 0abb71feb228ddbd17e0dfa13216541e036bb549 Mon Sep 17 00:00:00 2001
From: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Date: Mon, 8 Sep 2014 23:46:49 -0700
Subject: mmc: remove MMC_CAP2_NO_MULTI_READ flags

Now, mmc framework uses multi_io_quirk
for I/O HW bug workaround.
MMC_CAP2_NO_MULTI_READ flag is no longer needed

Signed-off-by: Kuninori Morimoto <kuninori.morimoto.gx@renesas.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 5 -----
 include/linux/mmc/host.h | 1 -
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index adab9038f6f7..413f984cb76d 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -1398,11 +1398,6 @@ static void mmc_blk_rw_rq_prep(struct mmc_queue_req *mqrq,
 		if (disable_multi)
 			brq->data.blocks = 1;
 
-		/* Some controllers can't do multiblock reads due to hw bugs */
-		if (card->host->caps2 & MMC_CAP2_NO_MULTI_READ &&
-		    rq_data_dir(req) == READ)
-			brq->data.blocks = 1;
-
 		/*
 		 * Some controllers have HW issues while operating
 		 * in multiple I/O mode
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 10e2bd6985ae..797ae657dc3d 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -272,7 +272,6 @@ struct mmc_host {
 
 #define MMC_CAP2_BOOTPART_NOACC	(1 << 0)	/* Boot partition no access */
 #define MMC_CAP2_FULL_PWR_CYCLE	(1 << 2)	/* Can do full power cycle */
-#define MMC_CAP2_NO_MULTI_READ	(1 << 3)	/* Multiblock reads don't work */
 #define MMC_CAP2_HS200_1_8V_SDR	(1 << 5)        /* can support */
 #define MMC_CAP2_HS200_1_2V_SDR	(1 << 6)        /* can support */
 #define MMC_CAP2_HS200		(MMC_CAP2_HS200_1_8V_SDR | \
-- 
cgit v1.2.3


From 9d2fa2428ae149ba3a5b7a4ceb0a9e11f1882b3b Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Wed, 27 Aug 2014 13:00:51 +0200
Subject: mmc: slot-gpio: add gpiod variant to get wp GPIO

This makes it possible to get the write protect (read only)
GPIO line from a GPIO descriptor. Written to exactly mirror
the card detect function.

Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/slot-gpio.c  | 48 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/slot-gpio.h |  3 +++
 2 files changed, 51 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 908c2b29e79f..e3fce4493fab 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -325,6 +325,54 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 }
 EXPORT_SYMBOL(mmc_gpiod_request_cd);
 
+/**
+ * mmc_gpiod_request_ro - request a gpio descriptor for write protection
+ * @host: mmc host
+ * @con_id: function within the GPIO consumer
+ * @idx: index of the GPIO to obtain in the consumer
+ * @override_active_level: ignore %GPIO_ACTIVE_LOW flag
+ * @debounce: debounce time in microseconds
+ *
+ * Use this function in place of mmc_gpio_request_ro() to use the GPIO
+ * descriptor API.  Note that it is paired with mmc_gpiod_free_ro() not
+ * mmc_gpio_free_ro().
+ *
+ * Returns zero on success, else an error.
+ */
+int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
+			 unsigned int idx, bool override_active_level,
+			 unsigned int debounce)
+{
+	struct mmc_gpio *ctx;
+	struct gpio_desc *desc;
+	int ret;
+
+	ret = mmc_gpio_alloc(host);
+	if (ret < 0)
+		return ret;
+
+	ctx = host->slot.handler_priv;
+
+	if (!con_id)
+		con_id = ctx->ro_label;
+
+	desc = devm_gpiod_get_index(host->parent, con_id, idx, GPIOD_IN);
+	if (IS_ERR(desc))
+		return PTR_ERR(desc);
+
+	if (debounce) {
+		ret = gpiod_set_debounce(desc, debounce);
+		if (ret < 0)
+			return ret;
+	}
+
+	ctx->override_ro_active_level = override_active_level;
+	ctx->ro_gpio = desc;
+
+	return 0;
+}
+EXPORT_SYMBOL(mmc_gpiod_request_ro);
+
 /**
  * mmc_gpiod_free_cd - free the card-detection gpio descriptor
  * @host: mmc host
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index d2433381e828..a0d0442c15bf 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -25,6 +25,9 @@ void mmc_gpio_free_cd(struct mmc_host *host);
 int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
 			 unsigned int debounce);
+int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
+			 unsigned int idx, bool override_active_level,
+			 unsigned int debounce);
 void mmc_gpiod_free_cd(struct mmc_host *host);
 void mmc_gpiod_request_cd_irq(struct mmc_host *host);
 
-- 
cgit v1.2.3


From 3034a146820c26fe6da66a45f6340fe87fe0983a Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <d.kasatkin@samsung.com>
Date: Fri, 27 Jun 2014 18:15:44 +0300
Subject: ima: pass 'opened' flag to identify newly created files

Empty files and missing xattrs do not guarantee that a file was
just created.  This patch passes FILE_CREATED flag to IMA to
reliably identify new files.

Signed-off-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
Cc: <stable@vger.kernel.org>  3.14+
---
 fs/namei.c                            |  2 +-
 fs/nfsd/vfs.c                         |  2 +-
 include/linux/ima.h                   |  4 ++--
 security/integrity/ima/ima.h          |  4 ++--
 security/integrity/ima/ima_appraise.c |  4 ++--
 security/integrity/ima/ima_main.c     | 16 ++++++++--------
 6 files changed, 16 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 985c6f368485..005771f97189 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3058,7 +3058,7 @@ opened:
 	error = open_check_o_direct(file);
 	if (error)
 		goto exit_fput;
-	error = ima_file_check(file, op->acc_mode);
+	error = ima_file_check(file, op->acc_mode, *opened);
 	if (error)
 		goto exit_fput;
 
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 140c496f612c..d49c778faecb 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -709,7 +709,7 @@ nfsd_open(struct svc_rqst *rqstp, struct svc_fh *fhp, umode_t type,
 		host_err = PTR_ERR(*filp);
 		*filp = NULL;
 	} else {
-		host_err = ima_file_check(*filp, may_flags);
+		host_err = ima_file_check(*filp, may_flags, 0);
 
 		if (may_flags & NFSD_MAY_64BIT_COOKIE)
 			(*filp)->f_mode |= FMODE_64BITHASH;
diff --git a/include/linux/ima.h b/include/linux/ima.h
index 7cf5e9b32550..120ccc53fcb7 100644
--- a/include/linux/ima.h
+++ b/include/linux/ima.h
@@ -15,7 +15,7 @@ struct linux_binprm;
 
 #ifdef CONFIG_IMA
 extern int ima_bprm_check(struct linux_binprm *bprm);
-extern int ima_file_check(struct file *file, int mask);
+extern int ima_file_check(struct file *file, int mask, int opened);
 extern void ima_file_free(struct file *file);
 extern int ima_file_mmap(struct file *file, unsigned long prot);
 extern int ima_module_check(struct file *file);
@@ -27,7 +27,7 @@ static inline int ima_bprm_check(struct linux_binprm *bprm)
 	return 0;
 }
 
-static inline int ima_file_check(struct file *file, int mask)
+static inline int ima_file_check(struct file *file, int mask, int opened)
 {
 	return 0;
 }
diff --git a/security/integrity/ima/ima.h b/security/integrity/ima/ima.h
index 57da4bd7ba0c..0fb456c20eda 100644
--- a/security/integrity/ima/ima.h
+++ b/security/integrity/ima/ima.h
@@ -177,7 +177,7 @@ void ima_delete_rules(void);
 int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len);
+			     int xattr_len, int opened);
 int ima_must_appraise(struct inode *inode, int mask, enum ima_hooks func);
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file);
 enum integrity_status ima_get_cache_status(struct integrity_iint_cache *iint,
@@ -193,7 +193,7 @@ static inline int ima_appraise_measurement(int func,
 					   struct file *file,
 					   const unsigned char *filename,
 					   struct evm_ima_xattr_data *xattr_value,
-					   int xattr_len)
+					   int xattr_len, int opened)
 {
 	return INTEGRITY_UNKNOWN;
 }
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index a4605d677248..225fd944a4ef 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -183,7 +183,7 @@ int ima_read_xattr(struct dentry *dentry,
 int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
 			     struct file *file, const unsigned char *filename,
 			     struct evm_ima_xattr_data *xattr_value,
-			     int xattr_len)
+			     int xattr_len, int opened)
 {
 	static const char op[] = "appraise_data";
 	char *cause = "unknown";
@@ -203,7 +203,7 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
 
 		cause = "missing-hash";
 		status = INTEGRITY_NOLABEL;
-		if (inode->i_size == 0) {
+		if (opened & FILE_CREATED) {
 			iint->flags |= IMA_NEW_FILE;
 			status = INTEGRITY_PASS;
 		}
diff --git a/security/integrity/ima/ima_main.c b/security/integrity/ima/ima_main.c
index 0a2298f90c9c..f82cf9b8e92b 100644
--- a/security/integrity/ima/ima_main.c
+++ b/security/integrity/ima/ima_main.c
@@ -157,7 +157,7 @@ void ima_file_free(struct file *file)
 }
 
 static int process_measurement(struct file *file, const char *filename,
-			       int mask, int function)
+			       int mask, int function, int opened)
 {
 	struct inode *inode = file_inode(file);
 	struct integrity_iint_cache *iint;
@@ -226,7 +226,7 @@ static int process_measurement(struct file *file, const char *filename,
 				      xattr_value, xattr_len);
 	if (action & IMA_APPRAISE_SUBMASK)
 		rc = ima_appraise_measurement(_func, iint, file, pathname,
-					      xattr_value, xattr_len);
+					      xattr_value, xattr_len, opened);
 	if (action & IMA_AUDIT)
 		ima_audit_measurement(iint, pathname);
 	kfree(pathbuf);
@@ -255,7 +255,7 @@ out:
 int ima_file_mmap(struct file *file, unsigned long prot)
 {
 	if (file && (prot & PROT_EXEC))
-		return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK);
+		return process_measurement(file, NULL, MAY_EXEC, MMAP_CHECK, 0);
 	return 0;
 }
 
@@ -277,7 +277,7 @@ int ima_bprm_check(struct linux_binprm *bprm)
 	return process_measurement(bprm->file,
 				   (strcmp(bprm->filename, bprm->interp) == 0) ?
 				   bprm->filename : bprm->interp,
-				   MAY_EXEC, BPRM_CHECK);
+				   MAY_EXEC, BPRM_CHECK, 0);
 }
 
 /**
@@ -290,12 +290,12 @@ int ima_bprm_check(struct linux_binprm *bprm)
  * On success return 0.  On integrity appraisal error, assuming the file
  * is in policy and IMA-appraisal is in enforcing mode, return -EACCES.
  */
-int ima_file_check(struct file *file, int mask)
+int ima_file_check(struct file *file, int mask, int opened)
 {
 	ima_rdwr_violation_check(file);
 	return process_measurement(file, NULL,
 				   mask & (MAY_READ | MAY_WRITE | MAY_EXEC),
-				   FILE_CHECK);
+				   FILE_CHECK, opened);
 }
 EXPORT_SYMBOL_GPL(ima_file_check);
 
@@ -318,7 +318,7 @@ int ima_module_check(struct file *file)
 #endif
 		return 0;	/* We rely on module signature checking */
 	}
-	return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK);
+	return process_measurement(file, NULL, MAY_EXEC, MODULE_CHECK, 0);
 }
 
 int ima_fw_from_file(struct file *file, char *buf, size_t size)
@@ -329,7 +329,7 @@ int ima_fw_from_file(struct file *file, char *buf, size_t size)
 			return -EACCES;	/* INTEGRITY_UNKNOWN */
 		return 0;
 	}
-	return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK);
+	return process_measurement(file, NULL, MAY_EXEC, FIRMWARE_CHECK, 0);
 }
 
 static int __init init_ima(void)
-- 
cgit v1.2.3


From ef979a26e3d521d51dbd9950e46a69e303073171 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 9 Sep 2014 08:56:48 +0800
Subject: usb: gadget: add reset API at usb_gadget_driver

Adding reset API for UDC bus reset handler is useful for below
two issues.

Current disconnect API at usb_gadget_driver is also invoked at
udc's bus reset handler, but the document says it is invoked when
the host is disconnected.

Besides, we may expect the gadget_driver to do different things
for host sends bus reset and host disconnects gadget, eg, we may not
want to flush dirty page for mass storage at bus reset, and want to
do it at disconnection.

Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/gadget.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index c540557b564b..598a6e9b2850 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -817,6 +817,8 @@ static inline int usb_gadget_disconnect(struct usb_gadget *gadget)
  *	Called in a context that permits sleeping.
  * @suspend: Invoked on USB suspend.  May be called in_interrupt.
  * @resume: Invoked on USB resume.  May be called in_interrupt.
+ * @reset: Invoked on USB bus reset. It is mandatory for all gadget drivers
+ *	and should be called in_interrupt.
  * @driver: Driver model state for this driver.
  *
  * Devices are disabled till a gadget driver successfully bind()s, which
@@ -874,6 +876,7 @@ struct usb_gadget_driver {
 	void			(*disconnect)(struct usb_gadget *);
 	void			(*suspend)(struct usb_gadget *);
 	void			(*resume)(struct usb_gadget *);
+	void			(*reset)(struct usb_gadget *);
 
 	/* FIXME support safe rmmod */
 	struct device_driver	driver;
-- 
cgit v1.2.3


From 02ab695bb37ee9ad515df0d0790d5977505dd04a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Sep 2014 22:17:17 -0700
Subject: net: filter: add "load 64-bit immediate" eBPF instruction

add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register.
All previous instructions were 8-byte. This is first 16-byte instruction.
Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction:
insn[0].code = BPF_LD | BPF_DW | BPF_IMM
insn[0].dst_reg = destination register
insn[0].imm = lower 32-bit
insn[1].code = 0
insn[1].imm = upper 32-bit
All unused fields must be zero.

Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM
which loads 32-bit immediate value into a register.

x64 JITs it as single 'movabsq %rax, imm64'
arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn

Note that old eBPF programs are binary compatible with new interpreter.

It helps eBPF programs load 64-bit constant into a register with one
instruction instead of using two registers and 4 instructions:
BPF_MOV32_IMM(R1, imm32)
BPF_ALU64_IMM(BPF_LSH, R1, 32)
BPF_MOV32_IMM(R2, imm32)
BPF_ALU64_REG(BPF_OR, R1, R2)

User space generated programs will use this instruction to load constants only.

To tell kernel that user space needs a pointer the _pseudo_ variant of
this instruction may be added later, which will use extra bits of encoding
to indicate what type of pointer user space is asking kernel to provide.
For example 'off' or 'src_reg' fields can be used for such purpose.
src_reg = 1 could mean that user space is asking kernel to validate and
load in-kernel map pointer.
src_reg = 2 could mean that user space needs readonly data section pointer
src_reg = 3 could mean that user space needs a pointer to per-cpu local data
All such future pseudo instructions will not be carrying the actual pointer
as part of the instruction, but rather will be treated as a request to kernel
to provide one. The kernel will verify the request_for_a_pointer, then
will drop _pseudo_ marking and will store actual internal pointer inside
the instruction, so the end result is the interpreter and JITs never
see pseudo BPF_LD_IMM64 insns and only operate on generic BPF_LD_IMM64 that
loads 64-bit immediate into a register. User space never operates on direct
pointers and verifier can easily recognize request_for_pointer vs other
instructions.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  8 +++++++-
 arch/x86/net/bpf_jit_comp.c         | 17 +++++++++++++++++
 include/linux/filter.h              | 18 ++++++++++++++++++
 kernel/bpf/core.c                   |  5 +++++
 lib/test_bpf.c                      | 21 +++++++++++++++++++++
 5 files changed, 68 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index c48a9704bda8..81916ab5d96f 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -951,7 +951,7 @@ Size modifier is one of ...
 
 Mode modifier is one of:
 
-  BPF_IMM  0x00  /* classic BPF only, reserved in eBPF */
+  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
   BPF_ABS  0x20
   BPF_IND  0x40
   BPF_MEM  0x60
@@ -995,6 +995,12 @@ BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
 Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
 2 byte atomic increments are not supported.
 
+eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
+of two consecutive 'struct bpf_insn' 8-byte blocks and interpreted as single
+instruction that loads 64-bit immediate value into a dst_reg.
+Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
+32-bit immediate value into a register.
+
 Testing
 -------
 
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 39ccfbb4a723..06f8c17f5484 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -393,6 +393,23 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 			EMIT1_off32(add_1reg(0xB8, dst_reg), imm32);
 			break;
 
+		case BPF_LD | BPF_IMM | BPF_DW:
+			if (insn[1].code != 0 || insn[1].src_reg != 0 ||
+			    insn[1].dst_reg != 0 || insn[1].off != 0) {
+				/* verifier must catch invalid insns */
+				pr_err("invalid BPF_LD_IMM64 insn\n");
+				return -EINVAL;
+			}
+
+			/* movabsq %rax, imm64 */
+			EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg));
+			EMIT(insn[0].imm, 4);
+			EMIT(insn[1].imm, 4);
+
+			insn++;
+			i++;
+			break;
+
 			/* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */
 		case BPF_ALU | BPF_MOD | BPF_X:
 		case BPF_ALU | BPF_DIV | BPF_X:
diff --git a/include/linux/filter.h b/include/linux/filter.h
index c78994593355..bf323da77950 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -166,6 +166,24 @@ enum {
 		.off   = 0,					\
 		.imm   = IMM })
 
+/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */
+#define BPF_LD_IMM64(DST, IMM)					\
+	BPF_LD_IMM64_RAW(DST, 0, IMM)
+
+#define BPF_LD_IMM64_RAW(DST, SRC, IMM)				\
+	((struct bpf_insn) {					\
+		.code  = BPF_LD | BPF_DW | BPF_IMM,		\
+		.dst_reg = DST,					\
+		.src_reg = SRC,					\
+		.off   = 0,					\
+		.imm   = (__u32) (IMM) }),			\
+	((struct bpf_insn) {					\
+		.code  = 0, /* zero is reserved opcode */	\
+		.dst_reg = 0,					\
+		.src_reg = 0,					\
+		.off   = 0,					\
+		.imm   = ((__u64) (IMM)) >> 32 })
+
 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
 #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index b54bb2c2e494..2c2bfaacce66 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -242,6 +242,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn)
 		[BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W,
 		[BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H,
 		[BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B,
+		[BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW,
 	};
 	void *ptr;
 	int off;
@@ -301,6 +302,10 @@ select_insn:
 	ALU64_MOV_K:
 		DST = IMM;
 		CONT;
+	LD_IMM_DW:
+		DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32;
+		insn++;
+		CONT;
 	ALU64_ARSH_X:
 		(*(s64 *) &DST) >>= SRC;
 		CONT;
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 9a67456ba29a..413890815d3e 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -1735,6 +1735,27 @@ static struct bpf_test tests[] = {
 		{ },
 		{ { 1, 0 } },
 	},
+	{
+		"load 64-bit immediate",
+		.u.insns_int = {
+			BPF_LD_IMM64(R1, 0x567800001234L),
+			BPF_MOV64_REG(R2, R1),
+			BPF_MOV64_REG(R3, R2),
+			BPF_ALU64_IMM(BPF_RSH, R2, 32),
+			BPF_ALU64_IMM(BPF_LSH, R3, 32),
+			BPF_ALU64_IMM(BPF_RSH, R3, 32),
+			BPF_ALU64_IMM(BPF_MOV, R0, 0),
+			BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1),
+			BPF_EXIT_INSN(),
+			BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1),
+			BPF_EXIT_INSN(),
+			BPF_ALU64_IMM(BPF_MOV, R0, 1),
+			BPF_EXIT_INSN(),
+		},
+		INTERNAL,
+		{ },
+		{ { 0, 1 } }
+	},
 };
 
 static struct net_device dev;
-- 
cgit v1.2.3


From daedfb22451dd02b35c0549566cbb7cc06bdd53b Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 4 Sep 2014 22:17:18 -0700
Subject: net: filter: split filter.h and expose eBPF to user space

allow user space to generate eBPF programs

uapi/linux/bpf.h: eBPF instruction set definition

linux/filter.h: the rest

This patch only moves macro definitions, but practically it freezes existing
eBPF instruction set, though new instructions can still be added in the future.

These eBPF definitions cannot go into uapi/linux/filter.h, since the names
may conflict with existing applications.

Full eBPF ISA description is in Documentation/networking/filter.txt

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h    | 56 +---------------------------------------
 include/uapi/linux/Kbuild |  1 +
 include/uapi/linux/bpf.h  | 65 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 67 insertions(+), 55 deletions(-)
 create mode 100644 include/uapi/linux/bpf.h

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index bf323da77950..8f82ef3f1cdd 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -10,58 +10,12 @@
 #include <linux/workqueue.h>
 #include <uapi/linux/filter.h>
 #include <asm/cacheflush.h>
+#include <uapi/linux/bpf.h>
 
 struct sk_buff;
 struct sock;
 struct seccomp_data;
 
-/* Internally used and optimized filter representation with extended
- * instruction set based on top of classic BPF.
- */
-
-/* instruction classes */
-#define BPF_ALU64	0x07	/* alu mode in double word width */
-
-/* ld/ldx fields */
-#define BPF_DW		0x18	/* double word */
-#define BPF_XADD	0xc0	/* exclusive add */
-
-/* alu/jmp fields */
-#define BPF_MOV		0xb0	/* mov reg to reg */
-#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
-
-/* change endianness of a register */
-#define BPF_END		0xd0	/* flags for endianness conversion: */
-#define BPF_TO_LE	0x00	/* convert to little-endian */
-#define BPF_TO_BE	0x08	/* convert to big-endian */
-#define BPF_FROM_LE	BPF_TO_LE
-#define BPF_FROM_BE	BPF_TO_BE
-
-#define BPF_JNE		0x50	/* jump != */
-#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
-#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
-#define BPF_CALL	0x80	/* function call */
-#define BPF_EXIT	0x90	/* function return */
-
-/* Register numbers */
-enum {
-	BPF_REG_0 = 0,
-	BPF_REG_1,
-	BPF_REG_2,
-	BPF_REG_3,
-	BPF_REG_4,
-	BPF_REG_5,
-	BPF_REG_6,
-	BPF_REG_7,
-	BPF_REG_8,
-	BPF_REG_9,
-	BPF_REG_10,
-	__MAX_BPF_REG,
-};
-
-/* BPF has 10 general purpose 64-bit registers and stack frame. */
-#define MAX_BPF_REG	__MAX_BPF_REG
-
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
  * calls in BPF_CALL instruction.
@@ -322,14 +276,6 @@ enum {
 #define SK_RUN_FILTER(filter, ctx) \
 	(*filter->prog->bpf_func)(ctx, filter->prog->insnsi)
 
-struct bpf_insn {
-	__u8	code;		/* opcode */
-	__u8	dst_reg:4;	/* dest register */
-	__u8	src_reg:4;	/* source register */
-	__s16	off;		/* signed offset */
-	__s32	imm;		/* signed immediate constant */
-};
-
 #ifdef CONFIG_COMPAT
 /* A struct sock_filter is architecture independent. */
 struct compat_sock_fprog {
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 24e9033f8b3f..fb3f7b675229 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -67,6 +67,7 @@ header-y += bfs_fs.h
 header-y += binfmts.h
 header-y += blkpg.h
 header-y += blktrace_api.h
+header-y += bpf.h
 header-y += bpqether.h
 header-y += bsg.h
 header-y += btrfs.h
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
new file mode 100644
index 000000000000..479ed0b6be16
--- /dev/null
+++ b/include/uapi/linux/bpf.h
@@ -0,0 +1,65 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _UAPI__LINUX_BPF_H__
+#define _UAPI__LINUX_BPF_H__
+
+#include <linux/types.h>
+
+/* Extended instruction set based on top of classic BPF */
+
+/* instruction classes */
+#define BPF_ALU64	0x07	/* alu mode in double word width */
+
+/* ld/ldx fields */
+#define BPF_DW		0x18	/* double word */
+#define BPF_XADD	0xc0	/* exclusive add */
+
+/* alu/jmp fields */
+#define BPF_MOV		0xb0	/* mov reg to reg */
+#define BPF_ARSH	0xc0	/* sign extending arithmetic shift right */
+
+/* change endianness of a register */
+#define BPF_END		0xd0	/* flags for endianness conversion: */
+#define BPF_TO_LE	0x00	/* convert to little-endian */
+#define BPF_TO_BE	0x08	/* convert to big-endian */
+#define BPF_FROM_LE	BPF_TO_LE
+#define BPF_FROM_BE	BPF_TO_BE
+
+#define BPF_JNE		0x50	/* jump != */
+#define BPF_JSGT	0x60	/* SGT is signed '>', GT in x86 */
+#define BPF_JSGE	0x70	/* SGE is signed '>=', GE in x86 */
+#define BPF_CALL	0x80	/* function call */
+#define BPF_EXIT	0x90	/* function return */
+
+/* Register numbers */
+enum {
+	BPF_REG_0 = 0,
+	BPF_REG_1,
+	BPF_REG_2,
+	BPF_REG_3,
+	BPF_REG_4,
+	BPF_REG_5,
+	BPF_REG_6,
+	BPF_REG_7,
+	BPF_REG_8,
+	BPF_REG_9,
+	BPF_REG_10,
+	__MAX_BPF_REG,
+};
+
+/* BPF has 10 general purpose 64-bit registers and stack frame. */
+#define MAX_BPF_REG	__MAX_BPF_REG
+
+struct bpf_insn {
+	__u8	code;		/* opcode */
+	__u8	dst_reg:4;	/* dest register */
+	__u8	src_reg:4;	/* source register */
+	__s16	off;		/* signed offset */
+	__s32	imm;		/* signed immediate constant */
+};
+
+#endif /* _UAPI__LINUX_BPF_H__ */
-- 
cgit v1.2.3


From dc8ecdd3a3fccf73fcb07711cde064ce5727f9d1 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 1 Sep 2014 23:11:06 +0200
Subject: bcma: move bus struct setup into early part of host specific code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This change is important for SoC host. In future we will want to know
chip ID (needed for early MIPS boot) before doing cores scanning.

Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/host_pci.c   | 3 +++
 drivers/bcma/host_soc.c   | 3 +++
 drivers/bcma/main.c       | 2 --
 drivers/bcma/scan.c       | 7 -------
 include/linux/bcma/bcma.h | 1 -
 5 files changed, 6 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/host_pci.c b/drivers/bcma/host_pci.c
index f032ed6dd459..1e5ac0a79696 100644
--- a/drivers/bcma/host_pci.c
+++ b/drivers/bcma/host_pci.c
@@ -208,6 +208,9 @@ static int bcma_host_pci_probe(struct pci_dev *dev,
 	bus->boardinfo.vendor = bus->host_pci->subsystem_vendor;
 	bus->boardinfo.type = bus->host_pci->subsystem_device;
 
+	/* Initialize struct, detect chip */
+	bcma_init_bus(bus);
+
 	/* Register */
 	err = bcma_bus_register(bus);
 	if (err)
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index 1edd7e064621..379e0d4ebe72 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -178,6 +178,9 @@ int __init bcma_host_soc_register(struct bcma_soc *soc)
 	bus->hosttype = BCMA_HOSTTYPE_SOC;
 	bus->ops = &bcma_host_soc_ops;
 
+	/* Initialize struct, detect chip */
+	bcma_init_bus(bus);
+
 	/* Register */
 	err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips);
 	if (err)
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 0ff8d58831ef..9f6b0cb9a12c 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -334,8 +334,6 @@ int __init bcma_bus_early_register(struct bcma_bus *bus,
 	struct bcma_device *core;
 	struct bcma_device_id match;
 
-	bcma_init_bus(bus);
-
 	match.manuf = BCMA_MANUF_BCM;
 	match.id = bcma_cc_core_id(bus);
 	match.class = BCMA_CL_SIM;
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index e9bd77249a4c..e2b990303042 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -438,9 +438,6 @@ void bcma_init_bus(struct bcma_bus *bus)
 	s32 tmp;
 	struct bcma_chipinfo *chipinfo = &(bus->chipinfo);
 
-	if (bus->init_done)
-		return;
-
 	INIT_LIST_HEAD(&bus->cores);
 	bus->nr_cores = 0;
 
@@ -452,8 +449,6 @@ void bcma_init_bus(struct bcma_bus *bus)
 	chipinfo->pkg = (tmp & BCMA_CC_ID_PKG) >> BCMA_CC_ID_PKG_SHIFT;
 	bcma_info(bus, "Found chip with id 0x%04X, rev 0x%02X and package 0x%02X\n",
 		  chipinfo->id, chipinfo->rev, chipinfo->pkg);
-
-	bus->init_done = true;
 }
 
 int bcma_bus_scan(struct bcma_bus *bus)
@@ -463,8 +458,6 @@ int bcma_bus_scan(struct bcma_bus *bus)
 
 	int err, core_num = 0;
 
-	bcma_init_bus(bus);
-
 	erombase = bcma_scan_read32(bus, 0, BCMA_CC_EROM);
 	if (bus->hosttype == BCMA_HOSTTYPE_SOC) {
 		eromptr = ioremap_nocache(erombase, BCMA_CORE_SIZE);
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 0272e49135d0..c1ba87d1548e 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -332,7 +332,6 @@ struct bcma_bus {
 	struct bcma_device *mapped_core;
 	struct list_head cores;
 	u8 nr_cores;
-	u8 init_done:1;
 	u8 num;
 
 	struct bcma_drv_cc drv_cc;
-- 
cgit v1.2.3


From a395135ddebb0a06052b84c309eb6cb68b79c797 Mon Sep 17 00:00:00 2001
From: Rafał Miłecki <zajec5@gmail.com>
Date: Mon, 1 Sep 2014 23:11:07 +0200
Subject: bcma: use separated function to initialize bus on SoC
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This is required to split SoC bus init into two phases. The later one
(which includes scanning) should be called when kalloc is available.

Cc: Ralf Baechle <ralf@linux-mips.org>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Acked-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 arch/mips/bcm47xx/setup.c     |  4 ++++
 drivers/bcma/host_soc.c       | 11 +++++++++--
 include/linux/bcma/bcma_soc.h |  1 +
 3 files changed, 14 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c
index 2b63e7e7d3d3..fff6ed4978c7 100644
--- a/arch/mips/bcm47xx/setup.c
+++ b/arch/mips/bcm47xx/setup.c
@@ -201,6 +201,10 @@ static void __init bcm47xx_register_bcma(void)
 		pr_warn("bcm47xx: someone else already registered a bcma SPROM callback handler (err %d)\n", err);
 
 	err = bcma_host_soc_register(&bcm47xx_bus.bcma);
+	if (err)
+		panic("Failed to register BCMA bus (err %d)", err);
+
+	err = bcma_host_soc_init(&bcm47xx_bus.bcma);
 	if (err)
 		panic("Failed to initialize BCMA bus (err %d)", err);
 
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index 379e0d4ebe72..718e054dd727 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -165,7 +165,6 @@ static const struct bcma_host_ops bcma_host_soc_ops = {
 int __init bcma_host_soc_register(struct bcma_soc *soc)
 {
 	struct bcma_bus *bus = &soc->bus;
-	int err;
 
 	/* iomap only first core. We have to read some register on this core
 	 * to scan the bus.
@@ -181,7 +180,15 @@ int __init bcma_host_soc_register(struct bcma_soc *soc)
 	/* Initialize struct, detect chip */
 	bcma_init_bus(bus);
 
-	/* Register */
+	return 0;
+}
+
+int __init bcma_host_soc_init(struct bcma_soc *soc)
+{
+	struct bcma_bus *bus = &soc->bus;
+	int err;
+
+	/* Scan bus and initialize it */
 	err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips);
 	if (err)
 		iounmap(bus->mmio);
diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h
index 4203c5593b9f..f24d245f8394 100644
--- a/include/linux/bcma/bcma_soc.h
+++ b/include/linux/bcma/bcma_soc.h
@@ -10,6 +10,7 @@ struct bcma_soc {
 };
 
 int __init bcma_host_soc_register(struct bcma_soc *soc);
+int __init bcma_host_soc_init(struct bcma_soc *soc);
 
 int bcma_bus_register(struct bcma_bus *bus);
 
-- 
cgit v1.2.3


From 23a2f39c8f4035eade7f226eb7ada30c78d9eee3 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 8 Sep 2014 22:53:35 +0200
Subject: bcma: store more alternative addresses

Each core could have more than one alternative address. There are cores
with 8 alternative addresses for different functions. The PHY control
in the Chip common B core is done through the 2. alternative address
and not the first one.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
CC: linux-usb@vger.kernel.org
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/scan.c         | 9 +++++----
 drivers/usb/host/bcma-hcd.c | 2 +-
 include/linux/bcma/bcma.h   | 2 +-
 3 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index e2b990303042..06be7282cbc4 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -276,7 +276,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 			      struct bcma_device *core)
 {
 	u32 tmp;
-	u8 i, j;
+	u8 i, j, k;
 	s32 cia, cib;
 	u8 ports[2], wrappers[2];
 
@@ -367,6 +367,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 	core->addr = tmp;
 
 	/* get & parse slave ports */
+	k = 0;
 	for (i = 0; i < ports[1]; i++) {
 		for (j = 0; ; j++) {
 			tmp = bcma_erom_get_addr_desc(bus, eromptr,
@@ -376,9 +377,9 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 				/* pr_debug("erom: slave port %d "
 				 * "has %d descriptors\n", i, j); */
 				break;
-			} else {
-				if (i == 0 && j == 0)
-					core->addr1 = tmp;
+			} else if (k < ARRAY_SIZE(core->addr_s)) {
+				core->addr_s[k] = tmp;
+				k++;
 			}
 		}
 	}
diff --git a/drivers/usb/host/bcma-hcd.c b/drivers/usb/host/bcma-hcd.c
index 205f4a336583..cd6d0afb6b8f 100644
--- a/drivers/usb/host/bcma-hcd.c
+++ b/drivers/usb/host/bcma-hcd.c
@@ -237,7 +237,7 @@ static int bcma_hcd_probe(struct bcma_device *dev)
 	bcma_hcd_init_chip(dev);
 
 	/* In AI chips EHCI is addrspace 0, OHCI is 1 */
-	ohci_addr = dev->addr1;
+	ohci_addr = dev->addr_s[0];
 	if ((chipinfo->id == 0x5357 || chipinfo->id == 0x4749)
 	    && chipinfo->rev == 0)
 		ohci_addr = 0x18009000;
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index c1ba87d1548e..7fc16c991291 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -267,7 +267,7 @@ struct bcma_device {
 	u8 core_unit;
 
 	u32 addr;
-	u32 addr1;
+	u32 addr_s[8];
 	u32 wrap;
 
 	void __iomem *io_addr;
-- 
cgit v1.2.3


From 1716bcf3f76fe71e98d4851a3eb73ea3d93d4773 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Mon, 8 Sep 2014 22:53:36 +0200
Subject: bcma: add support for chipcommon B core

This core is used on BCM4708 to configure the PCIe and USB3 PHYs and it
contains the addresses to the Device Management unit. This will be used
by the PCIe driver first.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/Makefile                       |  1 +
 drivers/bcma/bcma_private.h                 |  4 ++
 drivers/bcma/driver_chipcommon_b.c          | 61 +++++++++++++++++++++++++++++
 drivers/bcma/main.c                         | 10 +++++
 drivers/bcma/scan.c                         |  1 +
 include/linux/bcma/bcma.h                   |  1 +
 include/linux/bcma/bcma_driver_chipcommon.h |  8 ++++
 7 files changed, 86 insertions(+)
 create mode 100644 drivers/bcma/driver_chipcommon_b.c

(limited to 'include/linux')

diff --git a/drivers/bcma/Makefile b/drivers/bcma/Makefile
index 91290f7f61b8..838b4b9d352f 100644
--- a/drivers/bcma/Makefile
+++ b/drivers/bcma/Makefile
@@ -1,5 +1,6 @@
 bcma-y					+= main.o scan.o core.o sprom.o
 bcma-y					+= driver_chipcommon.o driver_chipcommon_pmu.o
+bcma-y					+= driver_chipcommon_b.o
 bcma-$(CONFIG_BCMA_SFLASH)		+= driver_chipcommon_sflash.o
 bcma-$(CONFIG_BCMA_NFLASH)		+= driver_chipcommon_nflash.o
 bcma-y					+= driver_pci.o
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index 09b632ad0fe2..b40be43c6f31 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -50,6 +50,10 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc);
 extern struct platform_device bcma_pflash_dev;
 #endif /* CONFIG_BCMA_DRIVER_MIPS */
 
+/* driver_chipcommon_b.c */
+int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb);
+void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb);
+
 /* driver_chipcommon_pmu.c */
 u32 bcma_pmu_get_alp_clock(struct bcma_drv_cc *cc);
 u32 bcma_pmu_get_cpu_clock(struct bcma_drv_cc *cc);
diff --git a/drivers/bcma/driver_chipcommon_b.c b/drivers/bcma/driver_chipcommon_b.c
new file mode 100644
index 000000000000..c20b5f4ff290
--- /dev/null
+++ b/drivers/bcma/driver_chipcommon_b.c
@@ -0,0 +1,61 @@
+/*
+ * Broadcom specific AMBA
+ * ChipCommon B Unit driver
+ *
+ * Copyright 2014, Hauke Mehrtens <hauke@hauke-m.de>
+ *
+ * Licensed under the GNU/GPL. See COPYING for details.
+ */
+
+#include "bcma_private.h"
+#include <linux/export.h>
+#include <linux/bcma/bcma.h>
+
+static bool bcma_wait_reg(struct bcma_bus *bus, void __iomem *addr, u32 mask,
+			  u32 value, int timeout)
+{
+	unsigned long deadline = jiffies + timeout;
+	u32 val;
+
+	do {
+		val = readl(addr);
+		if ((val & mask) == value)
+			return true;
+		cpu_relax();
+		udelay(10);
+	} while (!time_after_eq(jiffies, deadline));
+
+	bcma_err(bus, "Timeout waiting for register %p\n", addr);
+
+	return false;
+}
+
+void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value)
+{
+	struct bcma_bus *bus = ccb->core->bus;
+
+	writel(offset, ccb->mii + 0x00);
+	bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100);
+	writel(value, ccb->mii + 0x04);
+	bcma_wait_reg(bus, ccb->mii + 0x00, 0x0100, 0x0000, 100);
+}
+EXPORT_SYMBOL_GPL(bcma_chipco_b_mii_write);
+
+int bcma_core_chipcommon_b_init(struct bcma_drv_cc_b *ccb)
+{
+	if (ccb->setup_done)
+		return 0;
+
+	ccb->setup_done = 1;
+	ccb->mii = ioremap_nocache(ccb->core->addr_s[1], BCMA_CORE_SIZE);
+	if (!ccb->mii)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void bcma_core_chipcommon_b_free(struct bcma_drv_cc_b *ccb)
+{
+	if (ccb->mii)
+		iounmap(ccb->mii);
+}
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 297a2d46985a..c421403cab43 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -173,6 +173,7 @@ static int bcma_register_devices(struct bcma_bus *bus)
 		switch (core->id.id) {
 		case BCMA_CORE_4706_CHIPCOMMON:
 		case BCMA_CORE_CHIPCOMMON:
+		case BCMA_CORE_NS_CHIPCOMMON_B:
 		case BCMA_CORE_PCI:
 		case BCMA_CORE_PCIE:
 		case BCMA_CORE_PCIE2:
@@ -287,6 +288,13 @@ int bcma_bus_register(struct bcma_bus *bus)
 		bcma_core_chipcommon_init(&bus->drv_cc);
 	}
 
+	/* Init CC core */
+	core = bcma_find_core(bus, BCMA_CORE_NS_CHIPCOMMON_B);
+	if (core) {
+		bus->drv_cc_b.core = core;
+		bcma_core_chipcommon_b_init(&bus->drv_cc_b);
+	}
+
 	/* Init MIPS core */
 	core = bcma_find_core(bus, BCMA_CORE_MIPS_74K);
 	if (core) {
@@ -341,6 +349,8 @@ void bcma_bus_unregister(struct bcma_bus *bus)
 	else if (err)
 		bcma_err(bus, "Can not unregister GPIO driver: %i\n", err);
 
+	bcma_core_chipcommon_b_free(&bus->drv_cc_b);
+
 	cores[0] = bcma_find_core(bus, BCMA_CORE_MIPS_74K);
 	cores[1] = bcma_find_core(bus, BCMA_CORE_PCIE);
 	cores[2] = bcma_find_core(bus, BCMA_CORE_4706_MAC_GBIT_COMMON);
diff --git a/drivers/bcma/scan.c b/drivers/bcma/scan.c
index 06be7282cbc4..b3a403c136fb 100644
--- a/drivers/bcma/scan.c
+++ b/drivers/bcma/scan.c
@@ -314,6 +314,7 @@ static int bcma_get_next_core(struct bcma_bus *bus, u32 __iomem **eromptr,
 		/* Some specific cores don't need wrappers */
 		switch (core->id.id) {
 		case BCMA_CORE_4706_MAC_GBIT_COMMON:
+		case BCMA_CORE_NS_CHIPCOMMON_B:
 		/* Not used yet: case BCMA_CORE_OOB_ROUTER: */
 			break;
 		default:
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 7fc16c991291..634597917670 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -335,6 +335,7 @@ struct bcma_bus {
 	u8 num;
 
 	struct bcma_drv_cc drv_cc;
+	struct bcma_drv_cc_b drv_cc_b;
 	struct bcma_drv_pci drv_pci[2];
 	struct bcma_drv_pcie2 drv_pcie2;
 	struct bcma_drv_mips drv_mips;
diff --git a/include/linux/bcma/bcma_driver_chipcommon.h b/include/linux/bcma/bcma_driver_chipcommon.h
index 63d105cd14a3..db6fa217f98b 100644
--- a/include/linux/bcma/bcma_driver_chipcommon.h
+++ b/include/linux/bcma/bcma_driver_chipcommon.h
@@ -644,6 +644,12 @@ struct bcma_drv_cc {
 #endif
 };
 
+struct bcma_drv_cc_b {
+	struct bcma_device *core;
+	u8 setup_done:1;
+	void __iomem *mii;
+};
+
 /* Register access */
 #define bcma_cc_read32(cc, offset) \
 	bcma_read32((cc)->core, offset)
@@ -699,4 +705,6 @@ extern void bcma_pmu_spuravoid_pllupdate(struct bcma_drv_cc *cc, int spuravoid);
 
 extern u32 bcma_pmu_get_bus_clock(struct bcma_drv_cc *cc);
 
+void bcma_chipco_b_mii_write(struct bcma_drv_cc_b *ccb, u32 offset, u32 value);
+
 #endif /* LINUX_BCMA_DRIVER_CC_H_ */
-- 
cgit v1.2.3


From d0449b90f80f263e17e8b3ce31442e45121dc46c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Fri, 22 Aug 2014 10:18:42 -0400
Subject: locks: Remove unused conf argument from lm_grant

This argument is always NULL so don't pass it around.

[jlayton: remove dependencies on previous patches in series]

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/dlm/plock.c     |  8 ++++----
 fs/lockd/svclock.c | 12 +++---------
 include/linux/fs.h |  2 +-
 3 files changed, 8 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dlm/plock.c b/fs/dlm/plock.c
index f704458ea5f5..e0ab3a93eeff 100644
--- a/fs/dlm/plock.c
+++ b/fs/dlm/plock.c
@@ -30,7 +30,7 @@ struct plock_op {
 
 struct plock_xop {
 	struct plock_op xop;
-	void *callback;
+	int (*callback)(struct file_lock *fl, int result);
 	void *fl;
 	void *file;
 	struct file_lock flc;
@@ -190,7 +190,7 @@ static int dlm_plock_callback(struct plock_op *op)
 	struct file *file;
 	struct file_lock *fl;
 	struct file_lock *flc;
-	int (*notify)(void *, void *, int) = NULL;
+	int (*notify)(struct file_lock *fl, int result) = NULL;
 	struct plock_xop *xop = (struct plock_xop *)op;
 	int rv = 0;
 
@@ -209,7 +209,7 @@ static int dlm_plock_callback(struct plock_op *op)
 	notify = xop->callback;
 
 	if (op->info.rv) {
-		notify(fl, NULL, op->info.rv);
+		notify(fl, op->info.rv);
 		goto out;
 	}
 
@@ -228,7 +228,7 @@ static int dlm_plock_callback(struct plock_op *op)
 			  (unsigned long long)op->info.number, file, fl);
 	}
 
-	rv = notify(fl, NULL, 0);
+	rv = notify(fl, 0);
 	if (rv) {
 		/* XXX: We need to cancel the fs lock here: */
 		log_print("dlm_plock_callback: lock granted after lock request "
diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index ab798a88ec1d..2a6170133c1d 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -667,22 +667,16 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
  * deferred rpc for GETLK and SETLK.
  */
 static void
-nlmsvc_update_deferred_block(struct nlm_block *block, struct file_lock *conf,
-			     int result)
+nlmsvc_update_deferred_block(struct nlm_block *block, int result)
 {
 	block->b_flags |= B_GOT_CALLBACK;
 	if (result == 0)
 		block->b_granted = 1;
 	else
 		block->b_flags |= B_TIMED_OUT;
-	if (conf) {
-		if (block->b_fl)
-			__locks_copy_lock(block->b_fl, conf);
-	}
 }
 
-static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
-					int result)
+static int nlmsvc_grant_deferred(struct file_lock *fl, int result)
 {
 	struct nlm_block *block;
 	int rc = -ENOENT;
@@ -697,7 +691,7 @@ static int nlmsvc_grant_deferred(struct file_lock *fl, struct file_lock *conf,
 					rc = -ENOLCK;
 					break;
 				}
-				nlmsvc_update_deferred_block(block, conf, result);
+				nlmsvc_update_deferred_block(block, result);
 			} else if (result == 0)
 				block->b_granted = 1;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 94187721ad41..908af4f81680 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -869,7 +869,7 @@ struct lock_manager_operations {
 	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
 	unsigned long (*lm_owner_key)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
-	int (*lm_grant)(struct file_lock *, struct file_lock *, int);
+	int (*lm_grant)(struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
 	int (*lm_change)(struct file_lock **, int);
 };
-- 
cgit v1.2.3


From 3fe0fff18fe87c6a2179837de68d1174903c6367 Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 22 Aug 2014 10:18:42 -0400
Subject: locks: Rename __locks_copy_lock() to locks_copy_conflock()

Jeff advice, " Right now __locks_copy_lock is only used to copy
conflocks. It would be good to rename that to something more
distinct (i.e.locks_copy_conflock), to make it clear that we're
generating a conflock there."

v5: change order from 3/6 to 2/6
v4: new patch only renaming function name

Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/locks.c         | 10 +++++-----
 include/linux/fs.h |  4 ++--
 2 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index bb08857f90b5..ec9becd02d3d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -281,7 +281,7 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
 /*
  * Initialize a new lock from an existing file_lock structure.
  */
-void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
+void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
 {
 	new->fl_owner = fl->fl_owner;
 	new->fl_pid = fl->fl_pid;
@@ -293,14 +293,14 @@ void __locks_copy_lock(struct file_lock *new, const struct file_lock *fl)
 	new->fl_ops = NULL;
 	new->fl_lmops = NULL;
 }
-EXPORT_SYMBOL(__locks_copy_lock);
+EXPORT_SYMBOL(locks_copy_conflock);
 
 void locks_copy_lock(struct file_lock *new, struct file_lock *fl)
 {
 	/* "new" must be a freshly-initialized lock */
 	WARN_ON_ONCE(new->fl_ops);
 
-	__locks_copy_lock(new, fl);
+	locks_copy_conflock(new, fl);
 	new->fl_file = fl->fl_file;
 	new->fl_ops = fl->fl_ops;
 	new->fl_lmops = fl->fl_lmops;
@@ -735,7 +735,7 @@ posix_test_lock(struct file *filp, struct file_lock *fl)
 			break;
 	}
 	if (cfl) {
-		__locks_copy_lock(fl, cfl);
+		locks_copy_conflock(fl, cfl);
 		if (cfl->fl_nspid)
 			fl->fl_pid = pid_vnr(cfl->fl_nspid);
 	} else
@@ -941,7 +941,7 @@ static int __posix_lock_file(struct inode *inode, struct file_lock *request, str
 			if (!posix_locks_conflict(request, fl))
 				continue;
 			if (conflock)
-				__locks_copy_lock(conflock, fl);
+				locks_copy_conflock(conflock, fl);
 			error = -EAGAIN;
 			if (!(request->fl_flags & FL_SLEEP))
 				goto out;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 908af4f81680..5ab86f44b697 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -966,7 +966,7 @@ void locks_free_lock(struct file_lock *fl);
 extern void locks_init_lock(struct file_lock *);
 extern struct file_lock * locks_alloc_lock(void);
 extern void locks_copy_lock(struct file_lock *, struct file_lock *);
-extern void __locks_copy_lock(struct file_lock *, const struct file_lock *);
+extern void locks_copy_conflock(struct file_lock *, struct file_lock *);
 extern void locks_remove_posix(struct file *, fl_owner_t);
 extern void locks_remove_file(struct file *);
 extern void locks_release_private(struct file_lock *);
@@ -1026,7 +1026,7 @@ static inline void locks_init_lock(struct file_lock *fl)
 	return;
 }
 
-static inline void __locks_copy_lock(struct file_lock *new, struct file_lock *fl)
+static inline void locks_copy_conflock(struct file_lock *new, struct file_lock *fl)
 {
 	return;
 }
-- 
cgit v1.2.3


From 5c97d7b1479982a48cf2129062b880c2555049ac Mon Sep 17 00:00:00 2001
From: Kinglong Mee <kinglongmee@gmail.com>
Date: Fri, 22 Aug 2014 10:18:43 -0400
Subject: locks: New ops in lock_manager_operations for get/put owner

NFSD or other lockmanager may increase the owner's reference,
so adds two new options for copying and releasing owner.

v5: change order from 2/6 to 3/6
v4: rename lm_copy_owner/lm_release_owner to lm_get_owner/lm_put_owner

Reviewed-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Kinglong Mee <kinglongmee@gmail.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/locks.c         | 12 ++++++++++--
 include/linux/fs.h |  2 ++
 2 files changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index ec9becd02d3d..5e83f3a99377 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -230,8 +230,12 @@ void locks_release_private(struct file_lock *fl)
 			fl->fl_ops->fl_release_private(fl);
 		fl->fl_ops = NULL;
 	}
-	fl->fl_lmops = NULL;
 
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->lm_put_owner)
+			fl->fl_lmops->lm_put_owner(fl);
+		fl->fl_lmops = NULL;
+	}
 }
 EXPORT_SYMBOL_GPL(locks_release_private);
 
@@ -274,8 +278,12 @@ static void locks_copy_private(struct file_lock *new, struct file_lock *fl)
 			fl->fl_ops->fl_copy_lock(new, fl);
 		new->fl_ops = fl->fl_ops;
 	}
-	if (fl->fl_lmops)
+
+	if (fl->fl_lmops) {
+		if (fl->fl_lmops->lm_get_owner)
+			fl->fl_lmops->lm_get_owner(new, fl);
 		new->fl_lmops = fl->fl_lmops;
+	}
 }
 
 /*
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5ab86f44b697..3b07ce2698de 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -868,6 +868,8 @@ struct file_lock_operations {
 struct lock_manager_operations {
 	int (*lm_compare_owner)(struct file_lock *, struct file_lock *);
 	unsigned long (*lm_owner_key)(struct file_lock *);
+	void (*lm_get_owner)(struct file_lock *, struct file_lock *);
+	void (*lm_put_owner)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
 	int (*lm_grant)(struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
-- 
cgit v1.2.3


From 09802fd2a8caea2a2147fca8d7975697c5de573d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 10:18:44 -0400
Subject: lockd: rip out deferred lock handling from testlock codepath

As Kinglong points out, the nlm_block->b_fl field is no longer used at
all. Also, vfs_test_lock in the generic locking code will only return
FILE_LOCK_DEFERRED if FL_SLEEP is set, and it isn't here.

The only other place that returns that value is the DLM lock code, but
it only does that in dlm_posix_lock, never in dlm_posix_get.

Remove all of the deferred locking code from the testlock codepath
since it doesn't appear to ever be used anyway.

I do have a small concern that this might cause a behavior change in the
case where you have a block already sitting on the list when the
testlock request comes in, but that looks like it doesn't really work
properly anyway. I think it's best to just pass that down to
vfs_test_lock and let the filesystem report that instead of trying to
infer what's going on with the lock by looking at an existing block.

Cc: cluster-devel@redhat.com
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Kinglong Mee <kinglongmee@gmail.com>
---
 fs/lockd/svclock.c          | 55 +++++----------------------------------------
 include/linux/lockd/lockd.h |  1 -
 2 files changed, 6 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index acfa94d5b489..13db95f54176 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -245,7 +245,6 @@ nlmsvc_create_block(struct svc_rqst *rqstp, struct nlm_host *host,
 	block->b_daemon = rqstp->rq_server;
 	block->b_host   = host;
 	block->b_file   = file;
-	block->b_fl = NULL;
 	file->f_count++;
 
 	/* Add to file's list of blocks */
@@ -295,7 +294,6 @@ static void nlmsvc_free_block(struct kref *kref)
 	nlmsvc_freegrantargs(block->b_call);
 	nlmsvc_release_call(block->b_call);
 	nlm_release_file(block->b_file);
-	kfree(block->b_fl);
 	kfree(block);
 }
 
@@ -508,7 +506,6 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 		struct nlm_host *host, struct nlm_lock *lock,
 		struct nlm_lock *conflock, struct nlm_cookie *cookie)
 {
-	struct nlm_block 	*block = NULL;
 	int			error;
 	__be32			ret;
 
@@ -519,63 +516,26 @@ nlmsvc_testlock(struct svc_rqst *rqstp, struct nlm_file *file,
 				(long long)lock->fl.fl_start,
 				(long long)lock->fl.fl_end);
 
-	/* Get existing block (in case client is busy-waiting) */
-	block = nlmsvc_lookup_block(file, lock);
-
-	if (block == NULL) {
-		struct file_lock *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
-
-		if (conf == NULL)
-			return nlm_granted;
-		block = nlmsvc_create_block(rqstp, host, file, lock, cookie);
-		if (block == NULL) {
-			kfree(conf);
-			return nlm_granted;
-		}
-		block->b_fl = conf;
-	}
-	if (block->b_flags & B_QUEUED) {
-		dprintk("lockd: nlmsvc_testlock deferred block %p flags %d fl %p\n",
-			block, block->b_flags, block->b_fl);
-		if (block->b_flags & B_TIMED_OUT) {
-			nlmsvc_unlink_block(block);
-			ret = nlm_lck_denied;
-			goto out;
-		}
-		if (block->b_flags & B_GOT_CALLBACK) {
-			nlmsvc_unlink_block(block);
-			if (block->b_fl != NULL
-					&& block->b_fl->fl_type != F_UNLCK) {
-				lock->fl = *block->b_fl;
-				goto conf_lock;
-			} else {
-				ret = nlm_granted;
-				goto out;
-			}
-		}
-		ret = nlm_drop_reply;
-		goto out;
-	}
-
 	if (locks_in_grace(SVC_NET(rqstp))) {
 		ret = nlm_lck_denied_grace_period;
 		goto out;
 	}
+
 	error = vfs_test_lock(file->f_file, &lock->fl);
-	if (error == FILE_LOCK_DEFERRED) {
-		ret = nlmsvc_defer_lock_rqst(rqstp, block);
-		goto out;
-	}
 	if (error) {
+		/* We can't currently deal with deferred test requests */
+		if (error == FILE_LOCK_DEFERRED)
+			WARN_ON_ONCE(1);
+
 		ret = nlm_lck_denied_nolocks;
 		goto out;
 	}
+
 	if (lock->fl.fl_type == F_UNLCK) {
 		ret = nlm_granted;
 		goto out;
 	}
 
-conf_lock:
 	dprintk("lockd: conflicting lock(ty=%d, %Ld-%Ld)\n",
 		lock->fl.fl_type, (long long)lock->fl.fl_start,
 		(long long)lock->fl.fl_end);
@@ -589,8 +549,6 @@ conf_lock:
 	locks_release_private(&lock->fl);
 	ret = nlm_lck_denied;
 out:
-	if (block)
-		nlmsvc_release_block(block);
 	return ret;
 }
 
@@ -661,7 +619,6 @@ nlmsvc_cancel_blocked(struct net *net, struct nlm_file *file, struct nlm_lock *l
  * This is a callback from the filesystem for VFS file lock requests.
  * It will be used if lm_grant is defined and the filesystem can not
  * respond to the request immediately.
- * For GETLK request it will copy the reply to the nlm_block.
  * For SETLK or SETLKW request it will get the local posix lock.
  * In all cases it will move the block to the head of nlm_blocked q where
  * nlmsvc_retry_blocked() can send back a reply for SETLKW or revisit the
diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h
index 219d79627c05..ff82a32871b5 100644
--- a/include/linux/lockd/lockd.h
+++ b/include/linux/lockd/lockd.h
@@ -178,7 +178,6 @@ struct nlm_block {
 	unsigned char		b_granted;	/* VFS granted lock */
 	struct nlm_file *	b_file;		/* file in question */
 	struct cache_req *	b_cache_req;	/* deferred request handling */
-	struct file_lock *	b_fl;		/* set for GETLK */
 	struct cache_deferred_req * b_deferred_req;
 	unsigned int		b_flags;	/* block flags */
 #define B_QUEUED		1	/* lock queued */
-- 
cgit v1.2.3


From 699688a416524c3cea9eafaca69fc6c06c13c02e Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 10:18:44 -0400
Subject: locks: remove lock_may_read and lock_may_write

There are no callers of these functions.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/locks.c         | 80 ------------------------------------------------------
 include/linux/fs.h | 14 ----------
 2 files changed, 94 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index c3991dc80137..5200ffd2ba9b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -2597,86 +2597,6 @@ static int __init proc_locks_init(void)
 module_init(proc_locks_init);
 #endif
 
-/**
- *	lock_may_read - checks that the region is free of locks
- *	@inode: the inode that is being read
- *	@start: the first byte to read
- *	@len: the number of bytes to read
- *
- *	Emulates Windows locking requirements.  Whole-file
- *	mandatory locks (share modes) can prohibit a read and
- *	byte-range POSIX locks can prohibit a read if they overlap.
- *
- *	N.B. this function is only ever called
- *	from knfsd and ownership of locks is never checked.
- */
-int lock_may_read(struct inode *inode, loff_t start, unsigned long len)
-{
-	struct file_lock *fl;
-	int result = 1;
-
-	spin_lock(&inode->i_lock);
-	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
-		if (IS_POSIX(fl)) {
-			if (fl->fl_type == F_RDLCK)
-				continue;
-			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
-				continue;
-		} else if (IS_FLOCK(fl)) {
-			if (!(fl->fl_type & LOCK_MAND))
-				continue;
-			if (fl->fl_type & LOCK_READ)
-				continue;
-		} else
-			continue;
-		result = 0;
-		break;
-	}
-	spin_unlock(&inode->i_lock);
-	return result;
-}
-
-EXPORT_SYMBOL(lock_may_read);
-
-/**
- *	lock_may_write - checks that the region is free of locks
- *	@inode: the inode that is being written
- *	@start: the first byte to write
- *	@len: the number of bytes to write
- *
- *	Emulates Windows locking requirements.  Whole-file
- *	mandatory locks (share modes) can prohibit a write and
- *	byte-range POSIX locks can prohibit a write if they overlap.
- *
- *	N.B. this function is only ever called
- *	from knfsd and ownership of locks is never checked.
- */
-int lock_may_write(struct inode *inode, loff_t start, unsigned long len)
-{
-	struct file_lock *fl;
-	int result = 1;
-
-	spin_lock(&inode->i_lock);
-	for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) {
-		if (IS_POSIX(fl)) {
-			if ((fl->fl_end < start) || (fl->fl_start > (start + len)))
-				continue;
-		} else if (IS_FLOCK(fl)) {
-			if (!(fl->fl_type & LOCK_MAND))
-				continue;
-			if (fl->fl_type & LOCK_WRITE)
-				continue;
-		} else
-			continue;
-		result = 0;
-		break;
-	}
-	spin_unlock(&inode->i_lock);
-	return result;
-}
-
-EXPORT_SYMBOL(lock_may_write);
-
 static int __init filelock_init(void)
 {
 	int i;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 3b07ce2698de..458f733c96bd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -985,8 +985,6 @@ extern void lease_get_mtime(struct inode *, struct timespec *time);
 extern int generic_setlease(struct file *, long, struct file_lock **);
 extern int vfs_setlease(struct file *, long, struct file_lock **);
 extern int lease_modify(struct file_lock **, int);
-extern int lock_may_read(struct inode *, loff_t start, unsigned long count);
-extern int lock_may_write(struct inode *, loff_t start, unsigned long count);
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, unsigned int cmd,
 			      struct flock __user *user)
@@ -1117,18 +1115,6 @@ static inline int lease_modify(struct file_lock **before, int arg)
 {
 	return -EINVAL;
 }
-
-static inline int lock_may_read(struct inode *inode, loff_t start,
-				unsigned long len)
-{
-	return 1;
-}
-
-static inline int lock_may_write(struct inode *inode, loff_t start,
-				 unsigned long len)
-{
-	return 1;
-}
 #endif /* !CONFIG_FILE_LOCKING */
 
 
-- 
cgit v1.2.3


From 1c994a0909a556508c2cc26ab5d9e13c5ce33aa0 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 27 Aug 2014 06:49:41 -0400
Subject: locks: consolidate "nolease" routines

GFS2 and NFS have setlease routines that always just return -EINVAL.
Turn that into a generic routine that can live in fs/libfs.c.

Cc: <linux-nfs@vger.kernel.org>
Cc: Steven Whitehouse <swhiteho@redhat.com>
Cc: <cluster-devel@redhat.com>
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Acked-by: Trond Myklebust <trond.myklebust@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/gfs2/file.c     | 22 +---------------------
 fs/libfs.c         | 16 ++++++++++++++++
 fs/nfs/file.c      | 13 +------------
 fs/nfs/internal.h  |  1 -
 fs/nfs/nfs4file.c  |  2 +-
 include/linux/fs.h |  1 +
 6 files changed, 20 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index 26b3f952e6b1..2c02478a86b0 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -912,26 +912,6 @@ out_uninit:
 
 #ifdef CONFIG_GFS2_FS_LOCKING_DLM
 
-/**
- * gfs2_setlease - acquire/release a file lease
- * @file: the file pointer
- * @arg: lease type
- * @fl: file lock
- *
- * We don't currently have a way to enforce a lease across the whole
- * cluster; until we do, disable leases (by just returning -EINVAL),
- * unless the administrator has requested purely local locking.
- *
- * Locking: called under i_lock
- *
- * Returns: errno
- */
-
-static int gfs2_setlease(struct file *file, long arg, struct file_lock **fl)
-{
-	return -EINVAL;
-}
-
 /**
  * gfs2_lock - acquire/release a posix lock on a file
  * @file: the file pointer
@@ -1069,7 +1049,7 @@ const struct file_operations gfs2_file_fops = {
 	.flock		= gfs2_flock,
 	.splice_read	= generic_file_splice_read,
 	.splice_write	= iter_file_splice_write,
-	.setlease	= gfs2_setlease,
+	.setlease	= simple_nosetlease,
 	.fallocate	= gfs2_fallocate,
 };
 
diff --git a/fs/libfs.c b/fs/libfs.c
index 88e3e00e2eca..29012a303ef8 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1075,3 +1075,19 @@ struct inode *alloc_anon_inode(struct super_block *s)
 	return inode;
 }
 EXPORT_SYMBOL(alloc_anon_inode);
+
+/**
+ * simple_nosetlease - generic helper for prohibiting leases
+ * @filp: file pointer
+ * @arg: type of lease to obtain
+ * @flp: new lease supplied for insertion
+ *
+ * Generic helper for filesystems that do not wish to allow leases to be set.
+ * All arguments are ignored and it just returns -EINVAL.
+ */
+int
+simple_nosetlease(struct file *filp, long arg, struct file_lock **flp)
+{
+	return -EINVAL;
+}
+EXPORT_SYMBOL(simple_nosetlease);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 524dd80d1898..8c4048ecdad1 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -891,17 +891,6 @@ int nfs_flock(struct file *filp, int cmd, struct file_lock *fl)
 }
 EXPORT_SYMBOL_GPL(nfs_flock);
 
-/*
- * There is no protocol support for leases, so we have no way to implement
- * them correctly in the face of opens by other clients.
- */
-int nfs_setlease(struct file *file, long arg, struct file_lock **fl)
-{
-	dprintk("NFS: setlease(%pD2, arg=%ld)\n", file, arg);
-	return -EINVAL;
-}
-EXPORT_SYMBOL_GPL(nfs_setlease);
-
 const struct file_operations nfs_file_operations = {
 	.llseek		= nfs_file_llseek,
 	.read		= new_sync_read,
@@ -918,6 +907,6 @@ const struct file_operations nfs_file_operations = {
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.check_flags	= nfs_check_flags,
-	.setlease	= nfs_setlease,
+	.setlease	= simple_nosetlease,
 };
 EXPORT_SYMBOL_GPL(nfs_file_operations);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9056622d2230..94d922ebb5ac 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -346,7 +346,6 @@ int nfs_file_release(struct inode *, struct file *);
 int nfs_lock(struct file *, int, struct file_lock *);
 int nfs_flock(struct file *, int, struct file_lock *);
 int nfs_check_flags(int);
-int nfs_setlease(struct file *, long, struct file_lock **);
 
 /* inode.c */
 extern struct workqueue_struct *nfsiod_workqueue;
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index a816f0627a6c..3e987ad9ae25 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -131,5 +131,5 @@ const struct file_operations nfs4_file_operations = {
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
 	.check_flags	= nfs_check_flags,
-	.setlease	= nfs_setlease,
+	.setlease	= simple_nosetlease,
 };
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 458f733c96bd..435e3d9ec5cf 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2599,6 +2599,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping,
 			struct page *page, void *fsdata);
 extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
+extern int simple_nosetlease(struct file *, long, struct file_lock **);
 extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
-- 
cgit v1.2.3


From e0b93eddfe17dcb7d644eb5d6ad02a86fc41a977 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 11:27:32 -0400
Subject: security: make security_file_set_fowner, f_setown and __f_setown void
 return

security_file_set_fowner always returns 0, so make it f_setown and
__f_setown void return functions and fix up the error handling in the
callers.

Cc: linux-security-module@vger.kernel.org
Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 drivers/net/tun.c           |  4 +---
 drivers/tty/tty_io.c        |  3 ++-
 fs/fcntl.c                  | 21 +++++++--------------
 fs/locks.c                  |  2 +-
 fs/notify/dnotify/dnotify.c |  8 +-------
 include/linux/fs.h          |  4 ++--
 include/linux/security.h    |  8 ++++----
 net/socket.c                |  3 ++-
 security/capability.c       |  4 ++--
 security/security.c         |  4 ++--
 security/selinux/hooks.c    |  4 +---
 security/smack/smack_lsm.c  |  3 +--
 12 files changed, 26 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index acaaf6784179..186ce541c657 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2152,9 +2152,7 @@ static int tun_chr_fasync(int fd, struct file *file, int on)
 		goto out;
 
 	if (on) {
-		ret = __f_setown(file, task_pid(current), PIDTYPE_PID, 0);
-		if (ret)
-			goto out;
+		__f_setown(file, task_pid(current), PIDTYPE_PID, 0);
 		tfile->flags |= TUN_FASYNC;
 	} else
 		tfile->flags &= ~TUN_FASYNC;
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 8fbad3410c75..aea3b66f7bf2 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2163,8 +2163,9 @@ static int __tty_fasync(int fd, struct file *filp, int on)
 		}
 		get_pid(pid);
 		spin_unlock_irqrestore(&tty->ctrl_lock, flags);
-		retval = __f_setown(filp, pid, type, 0);
+		__f_setown(filp, pid, type, 0);
 		put_pid(pid);
+		retval = 0;
 	}
 out:
 	return retval;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 22d1c3df61ac..99d440a4a6ba 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -98,26 +98,19 @@ static void f_modown(struct file *filp, struct pid *pid, enum pid_type type,
 	write_unlock_irq(&filp->f_owner.lock);
 }
 
-int __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
+void __f_setown(struct file *filp, struct pid *pid, enum pid_type type,
 		int force)
 {
-	int err;
-
-	err = security_file_set_fowner(filp);
-	if (err)
-		return err;
-
+	security_file_set_fowner(filp);
 	f_modown(filp, pid, type, force);
-	return 0;
 }
 EXPORT_SYMBOL(__f_setown);
 
-int f_setown(struct file *filp, unsigned long arg, int force)
+void f_setown(struct file *filp, unsigned long arg, int force)
 {
 	enum pid_type type;
 	struct pid *pid;
 	int who = arg;
-	int result;
 	type = PIDTYPE_PID;
 	if (who < 0) {
 		type = PIDTYPE_PGID;
@@ -125,9 +118,8 @@ int f_setown(struct file *filp, unsigned long arg, int force)
 	}
 	rcu_read_lock();
 	pid = find_vpid(who);
-	result = __f_setown(filp, pid, type, force);
+	__f_setown(filp, pid, type, force);
 	rcu_read_unlock();
-	return result;
 }
 EXPORT_SYMBOL(f_setown);
 
@@ -181,7 +173,7 @@ static int f_setown_ex(struct file *filp, unsigned long arg)
 	if (owner.pid && !pid)
 		ret = -ESRCH;
 	else
-		ret = __f_setown(filp, pid, type, 1);
+		 __f_setown(filp, pid, type, 1);
 	rcu_read_unlock();
 
 	return ret;
@@ -302,7 +294,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		force_successful_syscall_return();
 		break;
 	case F_SETOWN:
-		err = f_setown(filp, arg, 1);
+		f_setown(filp, arg, 1);
+		err = 0;
 		break;
 	case F_GETOWN_EX:
 		err = f_getown_ex(filp, arg);
diff --git a/fs/locks.c b/fs/locks.c
index 5200ffd2ba9b..f5f648e003dd 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1776,7 +1776,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 	if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
 		new = NULL;
 
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 out_unlock:
 	spin_unlock(&inode->i_lock);
 	if (fl)
diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index abc8cbcfe90e..caaaf9dfe353 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -346,13 +346,7 @@ int fcntl_dirnotify(int fd, struct file *filp, unsigned long arg)
 		goto out;
 	}
 
-	error = __f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-	if (error) {
-		/* if we added, we must shoot */
-		if (dn_mark == new_dn_mark)
-			destroy = 1;
-		goto out;
-	}
+	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
 
 	error = attach_dn(dn, dn_mark, id, fd, filp, mask);
 	/* !error means that we attached the dn to the dn_mark, so don't free it */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 435e3d9ec5cf..96528f73dda4 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1139,8 +1139,8 @@ extern void fasync_free(struct fasync_struct *);
 /* can be called from interrupts */
 extern void kill_fasync(struct fasync_struct **, int, int);
 
-extern int __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
-extern int f_setown(struct file *filp, unsigned long arg, int force);
+extern void __f_setown(struct file *filp, struct pid *, enum pid_type, int force);
+extern void f_setown(struct file *filp, unsigned long arg, int force);
 extern void f_delown(struct file *filp);
 extern pid_t f_getown(struct file *filp);
 extern int send_sigurg(struct fown_struct *fown);
diff --git a/include/linux/security.h b/include/linux/security.h
index 623f90e5f38d..b10e7af95d3b 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -1559,7 +1559,7 @@ struct security_operations {
 	int (*file_lock) (struct file *file, unsigned int cmd);
 	int (*file_fcntl) (struct file *file, unsigned int cmd,
 			   unsigned long arg);
-	int (*file_set_fowner) (struct file *file);
+	void (*file_set_fowner) (struct file *file);
 	int (*file_send_sigiotask) (struct task_struct *tsk,
 				    struct fown_struct *fown, int sig);
 	int (*file_receive) (struct file *file);
@@ -1834,7 +1834,7 @@ int security_file_mprotect(struct vm_area_struct *vma, unsigned long reqprot,
 			   unsigned long prot);
 int security_file_lock(struct file *file, unsigned int cmd);
 int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg);
-int security_file_set_fowner(struct file *file);
+void security_file_set_fowner(struct file *file);
 int security_file_send_sigiotask(struct task_struct *tsk,
 				 struct fown_struct *fown, int sig);
 int security_file_receive(struct file *file);
@@ -2312,9 +2312,9 @@ static inline int security_file_fcntl(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-static inline int security_file_set_fowner(struct file *file)
+static inline void security_file_set_fowner(struct file *file)
 {
-	return 0;
+	return;
 }
 
 static inline int security_file_send_sigiotask(struct task_struct *tsk,
diff --git a/net/socket.c b/net/socket.c
index 95ee7d8682e7..769c9671847e 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1069,7 +1069,8 @@ static long sock_ioctl(struct file *file, unsigned cmd, unsigned long arg)
 			err = -EFAULT;
 			if (get_user(pid, (int __user *)argp))
 				break;
-			err = f_setown(sock->file, pid, 1);
+			f_setown(sock->file, pid, 1);
+			err = 0;
 			break;
 		case FIOGETOWN:
 		case SIOCGPGRP:
diff --git a/security/capability.c b/security/capability.c
index a74fde6a7468..d68c57a62bcf 100644
--- a/security/capability.c
+++ b/security/capability.c
@@ -343,9 +343,9 @@ static int cap_file_fcntl(struct file *file, unsigned int cmd,
 	return 0;
 }
 
-static int cap_file_set_fowner(struct file *file)
+static void cap_file_set_fowner(struct file *file)
 {
-	return 0;
+	return;
 }
 
 static int cap_file_send_sigiotask(struct task_struct *tsk,
diff --git a/security/security.c b/security/security.c
index e41b1a8d7644..18b35c63fc0c 100644
--- a/security/security.c
+++ b/security/security.c
@@ -775,9 +775,9 @@ int security_file_fcntl(struct file *file, unsigned int cmd, unsigned long arg)
 	return security_ops->file_fcntl(file, cmd, arg);
 }
 
-int security_file_set_fowner(struct file *file)
+void security_file_set_fowner(struct file *file)
 {
-	return security_ops->file_set_fowner(file);
+	security_ops->file_set_fowner(file);
 }
 
 int security_file_send_sigiotask(struct task_struct *tsk,
diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c
index b0e940497e23..ada0d0bf3463 100644
--- a/security/selinux/hooks.c
+++ b/security/selinux/hooks.c
@@ -3346,14 +3346,12 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd,
 	return err;
 }
 
-static int selinux_file_set_fowner(struct file *file)
+static void selinux_file_set_fowner(struct file *file)
 {
 	struct file_security_struct *fsec;
 
 	fsec = file->f_security;
 	fsec->fown_sid = current_sid();
-
-	return 0;
 }
 
 static int selinux_file_send_sigiotask(struct task_struct *tsk,
diff --git a/security/smack/smack_lsm.c b/security/smack/smack_lsm.c
index e6ab307ce86e..69e5635d89e5 100644
--- a/security/smack/smack_lsm.c
+++ b/security/smack/smack_lsm.c
@@ -1390,12 +1390,11 @@ static int smack_mmap_file(struct file *file,
  * Returns 0
  * Further research may be required on this one.
  */
-static int smack_file_set_fowner(struct file *file)
+static void smack_file_set_fowner(struct file *file)
 {
 	struct smack_known *skp = smk_of_current();
 
 	file->f_security = skp->smk_known;
-	return 0;
 }
 
 /**
-- 
cgit v1.2.3


From 2ae4c673e3cbd69bc2decf6d7f5961f3c7b9b38b Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Tue, 2 Sep 2014 15:43:52 -0700
Subject: f2fs: retain inconsistency information to initiate fsck.f2fs

This patch adds sbi->need_fsck to conduct fsck.f2fs later.
This flag can only be removed by fsck.f2fs.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/checkpoint.c    | 3 +++
 fs/f2fs/f2fs.h          | 1 +
 fs/f2fs/super.c         | 1 +
 include/linux/f2fs_fs.h | 1 +
 4 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index cb5cb4ca1814..5af97d99106e 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -882,6 +882,9 @@ static void do_checkpoint(struct f2fs_sb_info *sbi, bool is_umount)
 	else
 		clear_ckpt_flags(ckpt, CP_ORPHAN_PRESENT_FLAG);
 
+	if (sbi->need_fsck)
+		set_ckpt_flags(ckpt, CP_FSCK_FLAG);
+
 	/* update SIT/NAT bitmap */
 	get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
 	get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 222ff5b4d1e1..210c62df08c3 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -434,6 +434,7 @@ struct f2fs_sb_info {
 	struct buffer_head *raw_super_buf;	/* buffer head of raw sb */
 	struct f2fs_super_block *raw_super;	/* raw super block pointer */
 	int s_dirty;				/* dirty flag for checkpoint */
+	bool need_fsck;				/* need fsck.f2fs to fix */
 
 	/* for node-related operations */
 	struct f2fs_nm_info *nm_info;		/* node manager */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 41bdf511003d..a6923041f41a 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -849,6 +849,7 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
 		atomic_set(&sbi->nr_pages[i], 0);
 
 	sbi->dir_level = DEF_DIR_LEVEL;
+	sbi->need_fsck = false;
 }
 
 /*
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 08ed2b0a96e6..9ca1ff3d4662 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -85,6 +85,7 @@ struct f2fs_super_block {
 /*
  * For checkpoint
  */
+#define CP_FSCK_FLAG		0x00000010
 #define CP_ERROR_FLAG		0x00000008
 #define CP_COMPACT_SUM_FLAG	0x00000004
 #define CP_ORPHAN_PRESENT_FLAG	0x00000002
-- 
cgit v1.2.3


From 87354059881ce9315181604dc17076c535f4d744 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 22 Jul 2014 20:41:42 -0400
Subject: ftrace: Add helper function ftrace_ops_get_func()

Add the helper function to what the mcount trampoline is to call
for a ftrace_ops function. This helper will be used by arch code
in the future to set up dynamic trampolines. But as this does the
same tests that are performed in choosing what function to call for
the default mcount trampoline, might as well use it to clean up
the existing code.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  2 ++
 kernel/trace/ftrace.c  | 47 +++++++++++++++++++++++++++++++++++------------
 2 files changed, 37 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f0b0edbf55a9..ef37286547fc 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -56,6 +56,8 @@ struct ftrace_ops;
 typedef void (*ftrace_func_t)(unsigned long ip, unsigned long parent_ip,
 			      struct ftrace_ops *op, struct pt_regs *regs);
 
+ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
+
 /*
  * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
  * set in the flags member.
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 17b606362ab4..dabf734f909c 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -259,20 +259,12 @@ static void update_ftrace_function(void)
 	 * then have the mcount trampoline call the function directly.
 	 */
 	if (ftrace_ops_list == &ftrace_list_end ||
-	    (ftrace_ops_list->next == &ftrace_list_end &&
-	     !(ftrace_ops_list->flags & FTRACE_OPS_FL_DYNAMIC) &&
-	     !FTRACE_FORCE_LIST_FUNC)) {
+	    (ftrace_ops_list->next == &ftrace_list_end)) {
+
 		/* Set the ftrace_ops that the arch callback uses */
 		set_function_trace_op = ftrace_ops_list;
-		/*
-		 * If the func handles its own recursion, call it directly.
-		 * Otherwise call the recursion protected function that
-		 * will call the ftrace ops function.
-		 */
-		if (ftrace_ops_list->flags & FTRACE_OPS_FL_RECURSION_SAFE)
-			func = ftrace_ops_list->func;
-		else
-			func = ftrace_ops_recurs_func;
+
+		func = ftrace_ops_get_func(ftrace_ops_list);
 	} else {
 		/* Just use the default ftrace_ops */
 		set_function_trace_op = &ftrace_list_end;
@@ -4856,6 +4848,37 @@ static void ftrace_ops_recurs_func(unsigned long ip, unsigned long parent_ip,
 	trace_clear_recursion(bit);
 }
 
+/**
+ * ftrace_ops_get_func - get the function a trampoline should call
+ * @ops: the ops to get the function for
+ *
+ * Normally the mcount trampoline will call the ops->func, but there
+ * are times that it should not. For example, if the ops does not
+ * have its own recursion protection, then it should call the
+ * ftrace_ops_recurs_func() instead.
+ *
+ * Returns the function that the trampoline should call for @ops.
+ */
+ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops)
+{
+	/*
+	 * If this is a dynamic ops or we force list func,
+	 * then it needs to call the list anyway.
+	 */
+	if (ops->flags & FTRACE_OPS_FL_DYNAMIC || FTRACE_FORCE_LIST_FUNC)
+		return ftrace_ops_list_func;
+
+	/*
+	 * If the func handles its own recursion, call it directly.
+	 * Otherwise call the recursion protected function that
+	 * will call the ftrace ops function.
+	 */
+	if (!(ops->flags & FTRACE_OPS_FL_RECURSION_SAFE))
+		return ftrace_ops_recurs_func;
+
+	return ops->func;
+}
+
 static void clear_ftrace_swapper(void)
 {
 	struct task_struct *p;
-- 
cgit v1.2.3


From 738cbe72adc5c8f2016c4c68aa5162631d4f27e1 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 8 Sep 2014 08:04:47 +0200
Subject: net: bpf: consolidate JIT binary allocator

Introduced in commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit
against spraying attacks") and later on replicated in aa2d2c73c21f
("s390/bpf,jit: address randomize and write protect jit code") for
s390 architecture, write protection for BPF JIT images got added and
a random start address of the JIT code, so that it's not on a page
boundary anymore.

Since both use a very similar allocator for the BPF binary header,
we can consolidate this code into the BPF core as it's mostly JIT
independant anyway.

This will also allow for future archs that support DEBUG_SET_MODULE_RONX
to just reuse instead of reimplementing it.

JIT tested on x86_64 and s390x with BPF test suite.

Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/net/bpf_jit_comp.c | 45 ++++++++-------------------------------
 arch/x86/net/bpf_jit_comp.c  | 50 ++++++++++----------------------------------
 include/linux/filter.h       | 13 ++++++++++++
 kernel/bpf/core.c            | 39 ++++++++++++++++++++++++++++++++++
 4 files changed, 72 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index f2833c5b218a..b734f975c22e 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -5,11 +5,9 @@
  *
  * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com>
  */
-#include <linux/moduleloader.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/filter.h>
-#include <linux/random.h>
 #include <linux/init.h>
 #include <asm/cacheflush.h>
 #include <asm/facility.h>
@@ -148,6 +146,12 @@ struct bpf_jit {
 	ret;						\
 })
 
+static void bpf_jit_fill_hole(void *area, unsigned int size)
+{
+	/* Fill whole space with illegal instructions */
+	memset(area, 0, size);
+}
+
 static void bpf_jit_prologue(struct bpf_jit *jit)
 {
 	/* Save registers and create stack frame if necessary */
@@ -780,38 +784,6 @@ out:
 	return -1;
 }
 
-/*
- * Note: for security reasons, bpf code will follow a randomly
- *	 sized amount of illegal instructions.
- */
-struct bpf_binary_header {
-	unsigned int pages;
-	u8 image[];
-};
-
-static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize,
-						  u8 **image_ptr)
-{
-	struct bpf_binary_header *header;
-	unsigned int sz, hole;
-
-	/* Most BPF filters are really small, but if some of them fill a page,
-	 * allow at least 128 extra bytes for illegal instructions.
-	 */
-	sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE);
-	header = module_alloc(sz);
-	if (!header)
-		return NULL;
-	memset(header, 0, sz);
-	header->pages = sz / PAGE_SIZE;
-	hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header));
-	/* Insert random number of illegal instructions before BPF code
-	 * and make sure the first instruction starts at an even address.
-	 */
-	*image_ptr = &header->image[(prandom_u32() % hole) & -2];
-	return header;
-}
-
 void bpf_jit_compile(struct bpf_prog *fp)
 {
 	struct bpf_binary_header *header = NULL;
@@ -850,7 +822,8 @@ void bpf_jit_compile(struct bpf_prog *fp)
 			size = prg_len + lit_len;
 			if (size >= BPF_SIZE_MAX)
 				goto out;
-			header = bpf_alloc_binary(size, &jit.start);
+			header = bpf_jit_binary_alloc(size, &jit.start,
+						      2, bpf_jit_fill_hole);
 			if (!header)
 				goto out;
 			jit.prg = jit.mid = jit.start + prg_len;
@@ -884,7 +857,7 @@ void bpf_jit_free(struct bpf_prog *fp)
 		goto free_filter;
 
 	set_memory_rw(addr, header->pages);
-	module_free(NULL, header);
+	bpf_jit_binary_free(header);
 
 free_filter:
 	bpf_prog_unlock_free(fp);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 06f8c17f5484..9de0b5476b0c 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -8,12 +8,10 @@
  * as published by the Free Software Foundation; version 2
  * of the License.
  */
-#include <linux/moduleloader.h>
-#include <asm/cacheflush.h>
 #include <linux/netdevice.h>
 #include <linux/filter.h>
 #include <linux/if_vlan.h>
-#include <linux/random.h>
+#include <asm/cacheflush.h>
 
 int bpf_jit_enable __read_mostly;
 
@@ -109,39 +107,6 @@ static inline void bpf_flush_icache(void *start, void *end)
 #define CHOOSE_LOAD_FUNC(K, func) \
 	((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset)
 
-struct bpf_binary_header {
-	unsigned int	pages;
-	/* Note : for security reasons, bpf code will follow a randomly
-	 * sized amount of int3 instructions
-	 */
-	u8		image[];
-};
-
-static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen,
-						  u8 **image_ptr)
-{
-	unsigned int sz, hole;
-	struct bpf_binary_header *header;
-
-	/* Most of BPF filters are really small,
-	 * but if some of them fill a page, allow at least
-	 * 128 extra bytes to insert a random section of int3
-	 */
-	sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE);
-	header = module_alloc(sz);
-	if (!header)
-		return NULL;
-
-	memset(header, 0xcc, sz); /* fill whole space with int3 instructions */
-
-	header->pages = sz / PAGE_SIZE;
-	hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header));
-
-	/* insert a random number of int3 instructions before BPF code */
-	*image_ptr = &header->image[prandom_u32() % hole];
-	return header;
-}
-
 /* pick a register outside of BPF range for JIT internal work */
 #define AUX_REG (MAX_BPF_REG + 1)
 
@@ -206,6 +171,12 @@ static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg)
 	return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3);
 }
 
+static void jit_fill_hole(void *area, unsigned int size)
+{
+	/* fill whole space with int3 instructions */
+	memset(area, 0xcc, size);
+}
+
 struct jit_context {
 	unsigned int cleanup_addr; /* epilogue code offset */
 	bool seen_ld_abs;
@@ -959,7 +930,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 		if (proglen <= 0) {
 			image = NULL;
 			if (header)
-				module_free(NULL, header);
+				bpf_jit_binary_free(header);
 			goto out;
 		}
 		if (image) {
@@ -969,7 +940,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 			break;
 		}
 		if (proglen == oldproglen) {
-			header = bpf_alloc_binary(proglen, &image);
+			header = bpf_jit_binary_alloc(proglen, &image,
+						      1, jit_fill_hole);
 			if (!header)
 				goto out;
 		}
@@ -998,7 +970,7 @@ void bpf_jit_free(struct bpf_prog *fp)
 		goto free_filter;
 
 	set_memory_rw(addr, header->pages);
-	module_free(NULL, header);
+	bpf_jit_binary_free(header);
 
 free_filter:
 	bpf_prog_unlock_free(fp);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 8f82ef3f1cdd..868764fcffb8 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -289,6 +289,11 @@ struct sock_fprog_kern {
 	struct sock_filter	*filter;
 };
 
+struct bpf_binary_header {
+	unsigned int pages;
+	u8 image[];
+};
+
 struct bpf_work_struct {
 	struct bpf_prog *prog;
 	struct work_struct work;
@@ -358,6 +363,14 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags);
 void __bpf_prog_free(struct bpf_prog *fp);
 
+typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+
+struct bpf_binary_header *
+bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+		     unsigned int alignment,
+		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
+void bpf_jit_binary_free(struct bpf_binary_header *hdr);
+
 static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 {
 	bpf_prog_unlock_ro(fp);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 2c2bfaacce66..8ee520f0ec70 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -20,9 +20,12 @@
  * Andi Kleen - Fix a few bad bugs and races.
  * Kris Katterjohn - Added many additional checks in bpf_check_classic()
  */
+
 #include <linux/filter.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
+#include <linux/random.h>
+#include <linux/moduleloader.h>
 #include <asm/unaligned.h>
 
 /* Registers */
@@ -125,6 +128,42 @@ void __bpf_prog_free(struct bpf_prog *fp)
 }
 EXPORT_SYMBOL_GPL(__bpf_prog_free);
 
+struct bpf_binary_header *
+bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+		     unsigned int alignment,
+		     bpf_jit_fill_hole_t bpf_fill_ill_insns)
+{
+	struct bpf_binary_header *hdr;
+	unsigned int size, hole, start;
+
+	/* Most of BPF filters are really small, but if some of them
+	 * fill a page, allow at least 128 extra bytes to insert a
+	 * random section of illegal instructions.
+	 */
+	size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE);
+	hdr = module_alloc(size);
+	if (hdr == NULL)
+		return NULL;
+
+	/* Fill space with illegal/arch-dep instructions. */
+	bpf_fill_ill_insns(hdr, size);
+
+	hdr->pages = size / PAGE_SIZE;
+	hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)),
+		     PAGE_SIZE - sizeof(*hdr));
+	start = (prandom_u32() % hole) & ~(alignment - 1);
+
+	/* Leave a random number of instructions before BPF code. */
+	*image_ptr = &hdr->image[start];
+
+	return hdr;
+}
+
+void bpf_jit_binary_free(struct bpf_binary_header *hdr)
+{
+	module_free(NULL, hdr);
+}
+
 /* Base function for offset calculation. Needs to go into .text section,
  * therefore keeping it non-static as well; will also be used by JITs
  * anyway later on, so do not let the compiler omit it.
-- 
cgit v1.2.3


From 286aad3c4014ca825c447e07e24f8929e6d266d2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Mon, 8 Sep 2014 08:04:49 +0200
Subject: net: bpf: be friendly to kmemcheck

Reported by Mikulas Patocka, kmemcheck currently barks out a
false positive since we don't have special kmemcheck annotation
for bitfields used in bpf_prog structure.

We currently have jited:1, len:31 and thus when accessing len
while CONFIG_KMEMCHECK enabled, kmemcheck throws a warning that
we're reading uninitialized memory.

As we don't need the whole bit universe for pages member, we
can just split it to u16 and use a bool flag for jited instead
of a bitfield.

Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/net/bpf_jit_32.c       | 2 +-
 arch/mips/net/bpf_jit.c         | 2 +-
 arch/powerpc/net/bpf_jit_comp.c | 2 +-
 arch/s390/net/bpf_jit_comp.c    | 2 +-
 arch/sparc/net/bpf_jit_comp.c   | 2 +-
 arch/x86/net/bpf_jit_comp.c     | 2 +-
 include/linux/filter.h          | 6 +++---
 net/core/filter.c               | 2 +-
 8 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 2d1a5b93d91c..6b45f649eff0 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -933,7 +933,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 
 	set_memory_ro((unsigned long)header, header->pages);
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = 1;
+	fp->jited = true;
 out:
 	kfree(ctx.offsets);
 	return;
diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index cfa83cf2447d..0e97ccd29fe3 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -1417,7 +1417,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		bpf_jit_dump(fp->len, alloc_size, 2, ctx.target);
 
 	fp->bpf_func = (void *)ctx.target;
-	fp->jited = 1;
+	fp->jited = true;
 
 out:
 	kfree(ctx.offsets);
diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c
index 40c53ff59124..cbae2dfd053c 100644
--- a/arch/powerpc/net/bpf_jit_comp.c
+++ b/arch/powerpc/net/bpf_jit_comp.c
@@ -686,7 +686,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 		((u64 *)image)[0] = (u64)code_base;
 		((u64 *)image)[1] = local_paca->kernel_toc;
 		fp->bpf_func = (void *)image;
-		fp->jited = 1;
+		fp->jited = true;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index b734f975c22e..555f5c7e83ab 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -842,7 +842,7 @@ void bpf_jit_compile(struct bpf_prog *fp)
 	if (jit.start) {
 		set_memory_ro((unsigned long)header, header->pages);
 		fp->bpf_func = (void *) jit.start;
-		fp->jited = 1;
+		fp->jited = true;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c
index f7a736b645e8..b2ad9dc5425e 100644
--- a/arch/sparc/net/bpf_jit_comp.c
+++ b/arch/sparc/net/bpf_jit_comp.c
@@ -801,7 +801,7 @@ cond_branch:			f_offset = addrs[i + filter[i].jf];
 	if (image) {
 		bpf_flush_icache(image, image + proglen);
 		fp->bpf_func = (void *)image;
-		fp->jited = 1;
+		fp->jited = true;
 	}
 out:
 	kfree(addrs);
diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 9de0b5476b0c..d56cd1f515bd 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -955,7 +955,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog)
 		bpf_flush_icache(header, image + proglen);
 		set_memory_ro((unsigned long)header, header->pages);
 		prog->bpf_func = (void *)image;
-		prog->jited = 1;
+		prog->jited = true;
 	}
 out:
 	kfree(addrs);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 868764fcffb8..4b59edead908 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -300,9 +300,9 @@ struct bpf_work_struct {
 };
 
 struct bpf_prog {
-	u32			pages;		/* Number of allocated pages */
-	u32			jited:1,	/* Is our filter JIT'ed? */
-				len:31;		/* Number of filter blocks */
+	u16			pages;		/* Number of allocated pages */
+	bool			jited;		/* Is our filter JIT'ed? */
+	u32			len;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
 	struct bpf_work_struct	*work;		/* Deferred free work struct */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
diff --git a/net/core/filter.c b/net/core/filter.c
index fa5b7d0f77ac..dfc716ffa44b 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -972,7 +972,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp)
 	int err;
 
 	fp->bpf_func = NULL;
-	fp->jited = 0;
+	fp->jited = false;
 
 	err = bpf_check_classic(fp->insns, fp->len);
 	if (err) {
-- 
cgit v1.2.3


From a0c7b164ad115ec0556dc0904ee2218cbc5cedfa Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 9 Sep 2014 23:13:57 +0100
Subject: regulator: of: Provide simplified DT parsing method

Currently regulator drivers which support DT all repeat very similar code
to supply a list of known regulator identifiers to be matched with DT,
convert that to platform data which is then matched up with the regulators
as they are registered. This is both fiddly to get right and for devices
which can use the standard helpers to provide their operations is the main
source of code in the driver.

Since this code is essentially identical for most drivers we can factor it
out into the core, moving the identifiers in the match table into the
regulator descriptors and also allowing drivers to pass in the name of the
subnode to search. When a driver provides an of_match string for the
regulator the core will attempt to use that to obtain init_data, allowing
the driver to remove all explicit code for DT parsing and simply provide
data instead.

The current code leaks the phandles for the child nodes, this will be
addressed incrementally and makes no practical difference for FDT anyway
as the DT data structures are never freed.

Signed-off-by: Mark Brown <broonie@linaro.org>
---
 drivers/regulator/core.c         | 10 +++++---
 drivers/regulator/internal.h     |  4 ++++
 drivers/regulator/of_regulator.c | 51 ++++++++++++++++++++++++++++++++++++++++
 include/linux/regulator/driver.h |  4 ++++
 4 files changed, 66 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index a3c3785901f5..496002efd961 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3516,12 +3516,17 @@ regulator_register(const struct regulator_desc *regulator_desc,
 		return ERR_PTR(-EINVAL);
 	}
 
-	init_data = config->init_data;
-
 	rdev = kzalloc(sizeof(struct regulator_dev), GFP_KERNEL);
 	if (rdev == NULL)
 		return ERR_PTR(-ENOMEM);
 
+	init_data = regulator_of_get_init_data(dev, regulator_desc,
+					       &rdev->dev.of_node);
+	if (!init_data) {
+		init_data = config->init_data;
+		rdev->dev.of_node = of_node_get(config->of_node);
+	}
+
 	mutex_lock(&regulator_list_mutex);
 
 	mutex_init(&rdev->mutex);
@@ -3548,7 +3553,6 @@ regulator_register(const struct regulator_desc *regulator_desc,
 
 	/* register with sysfs */
 	rdev->dev.class = &regulator_class;
-	rdev->dev.of_node = of_node_get(config->of_node);
 	rdev->dev.parent = dev;
 	dev_set_name(&rdev->dev, "regulator.%d",
 		     atomic_inc_return(&regulator_no) - 1);
diff --git a/drivers/regulator/internal.h b/drivers/regulator/internal.h
index 84bbda10c396..a6043ad32ead 100644
--- a/drivers/regulator/internal.h
+++ b/drivers/regulator/internal.h
@@ -35,4 +35,8 @@ struct regulator {
 	struct dentry *debugfs;
 };
 
+struct regulator_init_data *regulator_of_get_init_data(struct device *dev,
+			         const struct regulator_desc *desc,
+				 struct device_node **node);
+
 #endif
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index ee5e67bc8d5b..7a51814abdc5 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -14,8 +14,11 @@
 #include <linux/slab.h>
 #include <linux/of.h>
 #include <linux/regulator/machine.h>
+#include <linux/regulator/driver.h>
 #include <linux/regulator/of_regulator.h>
 
+#include "internal.h"
+
 static void of_get_regulation_constraints(struct device_node *np,
 					struct regulator_init_data **init_data)
 {
@@ -189,3 +192,51 @@ int of_regulator_match(struct device *dev, struct device_node *node,
 	return count;
 }
 EXPORT_SYMBOL_GPL(of_regulator_match);
+
+struct regulator_init_data *regulator_of_get_init_data(struct device *dev,
+					    const struct regulator_desc *desc,
+					    struct device_node **node)
+{
+	struct device_node *search, *child;
+	struct regulator_init_data *init_data = NULL;
+	const char *name;
+
+	if (!dev->of_node || !desc->of_match)
+		return NULL;
+
+	if (desc->regulators_node)
+		search = of_get_child_by_name(dev->of_node,
+					      desc->regulators_node);
+	else
+		search = dev->of_node;
+
+	if (!search) {
+		dev_err(dev, "Failed to find regulator container node\n");
+		return NULL;
+	}
+
+	for_each_child_of_node(search, child) {
+		name = of_get_property(child, "regulator-compatible", NULL);
+		if (!name)
+			name = child->name;
+
+		if (strcmp(desc->of_match, name))
+			continue;
+
+		init_data = of_get_regulator_init_data(dev, child);
+		if (!init_data) {
+			dev_err(dev,
+				"failed to parse DT for regulator %s\n",
+				child->name);
+			break;
+		}
+
+		of_node_get(child);
+		*node = child;
+		break;
+	}
+
+	of_node_put(search);
+
+	return init_data;
+}
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index bbe03a1924c0..c35f5f97a147 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -203,6 +203,8 @@ enum regulator_type {
  *
  * @name: Identifying name for the regulator.
  * @supply_name: Identifying the regulator supply
+ * @of_match: Name used to identify regulator in DT.
+ * @regulators_node: Name of node containing regulator definitions in DT.
  * @id: Numerical identifier for the regulator.
  * @ops: Regulator operations table.
  * @irq: Interrupt number for the regulator.
@@ -242,6 +244,8 @@ enum regulator_type {
 struct regulator_desc {
 	const char *name;
 	const char *supply_name;
+	const char *of_match;
+	const char *regulators_node;
 	int id;
 	bool continuous_voltage_range;
 	unsigned n_voltages;
-- 
cgit v1.2.3


From e1effa0144a1ddf5b456c388ffaf784f3c5163fd Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 5 Aug 2014 17:19:38 -0400
Subject: ftrace: Annotate the ops operation on update

Add three new flags for ftrace_ops:

  FTRACE_OPS_FL_ADDING
  FTRACE_OPS_FL_REMOVING
  FTRACE_OPS_FL_MODIFYING

These will be set for the ftrace_ops when they are first added
to the function tracing, being removed from function tracing
or just having their functions changed from function tracing,
respectively.

This will be needed to remove the tramp_hash, which can grow quite
big. The tramp_hash is used to note what functions a ftrace_ops
is using a trampoline for. Denoting which ftrace_ops is being
modified, will allow us to use the ftrace_ops hashes themselves,
which are much smaller as they have a global flag to denote if
a ftrace_ops is tracing all functions, as well as a notrace hash
if the ftrace_ops is tracing all but a few. The tramp_hash just
creates a hash item for every function, which can go into the 10s
of thousands if all functions are using the ftrace_ops trampoline.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |  6 ++++++
 kernel/trace/ftrace.c  | 45 +++++++++++++++++++++++++++++++++++++++------
 2 files changed, 45 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ef37286547fc..d9216f6385d9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -91,6 +91,9 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
  * INITIALIZED - The ftrace_ops has already been initialized (first use time
  *            register_ftrace_function() is called, it will initialized the ops)
  * DELETED - The ops are being deleted, do not let them be registered again.
+ * ADDING  - The ops is in the process of being added.
+ * REMOVING - The ops is in the process of being removed.
+ * MODIFYING - The ops is in the process of changing its filter functions.
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
@@ -102,6 +105,9 @@ enum {
 	FTRACE_OPS_FL_STUB			= 1 << 6,
 	FTRACE_OPS_FL_INITIALIZED		= 1 << 7,
 	FTRACE_OPS_FL_DELETED			= 1 << 8,
+	FTRACE_OPS_FL_ADDING			= 1 << 9,
+	FTRACE_OPS_FL_REMOVING			= 1 << 10,
+	FTRACE_OPS_FL_MODIFYING			= 1 << 11,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 858ac16f8492..e43c793093e5 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1057,6 +1057,12 @@ static struct pid * const ftrace_swapper_pid = &init_struct_pid;
 
 static struct ftrace_ops *removed_ops;
 
+/*
+ * Set when doing a global update, like enabling all recs or disabling them.
+ * It is not set when just updating a single ftrace_ops.
+ */
+static bool update_all_ops;
+
 #ifndef CONFIG_FTRACE_MCOUNT_RECORD
 # error Dynamic ftrace depends on MCOUNT_RECORD
 #endif
@@ -2366,6 +2372,13 @@ static void ftrace_run_update_code(int command)
 	FTRACE_WARN_ON(ret);
 }
 
+static void ftrace_run_modify_code(struct ftrace_ops *ops, int command)
+{
+	ops->flags |= FTRACE_OPS_FL_MODIFYING;
+	ftrace_run_update_code(command);
+	ops->flags &= ~FTRACE_OPS_FL_MODIFYING;
+}
+
 static ftrace_func_t saved_ftrace_func;
 static int ftrace_start_up;
 
@@ -2387,6 +2400,13 @@ static void ftrace_startup_enable(int command)
 	ftrace_run_update_code(command);
 }
 
+static void ftrace_startup_all(int command)
+{
+	update_all_ops = true;
+	ftrace_startup_enable(command);
+	update_all_ops = false;
+}
+
 static int ftrace_startup(struct ftrace_ops *ops, int command)
 {
 	int ret;
@@ -2401,12 +2421,22 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
 	ftrace_start_up++;
 	command |= FTRACE_UPDATE_CALLS;
 
-	ops->flags |= FTRACE_OPS_FL_ENABLED;
+	/*
+	 * Note that ftrace probes uses this to start up
+	 * and modify functions it will probe. But we still
+	 * set the ADDING flag for modification, as probes
+	 * do not have trampolines. If they add them in the
+	 * future, then the probes will need to distinguish
+	 * between adding and updating probes.
+	 */
+	ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING;
 
 	ftrace_hash_rec_enable(ops, 1);
 
 	ftrace_startup_enable(command);
 
+	ops->flags &= ~FTRACE_OPS_FL_ADDING;
+
 	return 0;
 }
 
@@ -2456,11 +2486,12 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 * If the ops uses a trampoline, then it needs to be
 	 * tested first on update.
 	 */
+	ops->flags |= FTRACE_OPS_FL_REMOVING;
 	removed_ops = ops;
 
 	ftrace_run_update_code(command);
 
-	removed_ops = NULL;
+	ops->flags &= ~FTRACE_OPS_FL_REMOVING;
 
 	/*
 	 * Dynamic ops may be freed, we must make sure that all
@@ -3373,7 +3404,7 @@ static void __enable_ftrace_function_probe(void)
 	if (ftrace_probe_registered) {
 		/* still need to update the function call sites */
 		if (ftrace_enabled)
-			ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+			ftrace_run_modify_code(&trace_probe_ops, FTRACE_UPDATE_CALLS);
 		return;
 	}
 
@@ -3792,7 +3823,7 @@ ftrace_match_addr(struct ftrace_hash *hash, unsigned long ip, int remove)
 static void ftrace_ops_update_code(struct ftrace_ops *ops)
 {
 	if (ops->flags & FTRACE_OPS_FL_ENABLED && ftrace_enabled)
-		ftrace_run_update_code(FTRACE_UPDATE_CALLS);
+		ftrace_run_modify_code(ops, FTRACE_UPDATE_CALLS);
 }
 
 static int
@@ -4717,6 +4748,7 @@ core_initcall(ftrace_nodyn_init);
 
 static inline int ftrace_init_dyn_debugfs(struct dentry *d_tracer) { return 0; }
 static inline void ftrace_startup_enable(int command) { }
+static inline void ftrace_startup_all(int command) { }
 /* Keep as macros so we do not need to define the commands */
 # define ftrace_startup(ops, command)					\
 	({								\
@@ -5016,7 +5048,8 @@ static int ftrace_pid_add(int p)
 	set_ftrace_pid_task(pid);
 
 	ftrace_update_pid_func();
-	ftrace_startup_enable(0);
+
+	ftrace_startup_all(0);
 
 	mutex_unlock(&ftrace_lock);
 	return 0;
@@ -5045,7 +5078,7 @@ static void ftrace_pid_reset(void)
 	}
 
 	ftrace_update_pid_func();
-	ftrace_startup_enable(0);
+	ftrace_startup_all(0);
 
 	mutex_unlock(&ftrace_lock);
 }
-- 
cgit v1.2.3


From fef5aeeee9e3717e7aea991a7ae9ff6a7a2d4c85 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 24 Jul 2014 12:25:47 -0400
Subject: ftrace: Replace tramp_hash with old_*_hash to save space

Allowing function callbacks to declare their own trampolines requires
that each ftrace_ops that has a trampoline must have some sort of
accounting that keeps track of which ops has a trampoline attached
to a record.

The easy way to solve this was to add a "tramp_hash" that created a
hash entry for every function that a ops uses with a trampoline.
But since we can have literally tens of thousands of functions being
traced, that means we need tens of thousands of descriptors to map
the ops to the function in the hash. This is quite expensive and
can cause enabling and disabling the function graph tracer to take
some time to start and stop. It can take up to several seconds to
disable or enable all functions in the function graph tracer for this
reason.

The better approach albeit more complex, is to keep track of how ops
are being enabled and disabled, and use that along with the counting
of the number of ops attached to records, to determive what ops has
a trampoline attached to a record at enabling and disabling of
tracing.

To do this, the tramp_hash has been replaced with an old_filter_hash
and old_notrace_hash, which get the copy of the ops filter_hash and
notrace_hash respectively. The old hashes is kept until the ops has
been modified or removed and the old hashes are used with the logic
of the accounting to determine the ops that have the trampoline of
a record. The reason this has less of a footprint is due to the trick
that an "empty" hash in the filter_hash means "all functions" and
an empty hash in the notrace hash means "no functions" in the hash.

This is much more efficienct, doesn't have the delay, and takes up
much less memory, as we do not need to map all the functions but
just figure out which functions are mapped at the time it is
enabled or disabled.

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h |   2 +-
 kernel/trace/ftrace.c  | 239 +++++++++++++++++--------------------------------
 2 files changed, 85 insertions(+), 156 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index d9216f6385d9..662697babd48 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -140,7 +140,7 @@ struct ftrace_ops {
 	int				nr_trampolines;
 	struct ftrace_ops_hash		local_hash;
 	struct ftrace_ops_hash		*func_hash;
-	struct ftrace_hash		*tramp_hash;
+	struct ftrace_ops_hash		old_hash;
 	unsigned long			trampoline;
 #endif
 };
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index e43c793093e5..d325a1e76554 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1373,6 +1373,21 @@ update:
 	return 0;
 }
 
+static bool hash_contains_ip(unsigned long ip,
+			     struct ftrace_ops_hash *hash)
+{
+	/*
+	 * The function record is a match if it exists in the filter
+	 * hash and not in the notrace hash. Note, an emty hash is
+	 * considered a match for the filter hash, but an empty
+	 * notrace hash is considered not in the notrace hash.
+	 */
+	return (ftrace_hash_empty(hash->filter_hash) ||
+		ftrace_lookup_ip(hash->filter_hash, ip)) &&
+		(ftrace_hash_empty(hash->notrace_hash) ||
+		 !ftrace_lookup_ip(hash->notrace_hash, ip));
+}
+
 /*
  * Test the hashes for this ops to see if we want to call
  * the ops->func or not.
@@ -1388,8 +1403,7 @@ update:
 static int
 ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 {
-	struct ftrace_hash *filter_hash;
-	struct ftrace_hash *notrace_hash;
+	struct ftrace_ops_hash hash;
 	int ret;
 
 #ifdef CONFIG_DYNAMIC_FTRACE_WITH_REGS
@@ -1402,13 +1416,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 		return 0;
 #endif
 
-	filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
-	notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
+	hash.filter_hash = rcu_dereference_raw_notrace(ops->func_hash->filter_hash);
+	hash.notrace_hash = rcu_dereference_raw_notrace(ops->func_hash->notrace_hash);
 
-	if ((ftrace_hash_empty(filter_hash) ||
-	     ftrace_lookup_ip(filter_hash, ip)) &&
-	    (ftrace_hash_empty(notrace_hash) ||
-	     !ftrace_lookup_ip(notrace_hash, ip)))
+	if (hash_contains_ip(ip, &hash))
 		ret = 1;
 	else
 		ret = 0;
@@ -1520,46 +1531,6 @@ static bool test_rec_ops_needs_regs(struct dyn_ftrace *rec)
 	return  keep_regs;
 }
 
-static void ftrace_remove_tramp(struct ftrace_ops *ops,
-				struct dyn_ftrace *rec)
-{
-	/* If TRAMP is not set, no ops should have a trampoline for this */
-	if (!(rec->flags & FTRACE_FL_TRAMP))
-		return;
-
-	rec->flags &= ~FTRACE_FL_TRAMP;
-
-	if ((!ftrace_hash_empty(ops->func_hash->filter_hash) &&
-	     !ftrace_lookup_ip(ops->func_hash->filter_hash, rec->ip)) ||
-	    ftrace_lookup_ip(ops->func_hash->notrace_hash, rec->ip))
-		return;
-	/*
-	 * The tramp_hash entry will be removed at time
-	 * of update.
-	 */
-	ops->nr_trampolines--;
-}
-
-static void ftrace_clear_tramps(struct dyn_ftrace *rec, struct ftrace_ops *ops)
-{
-	struct ftrace_ops *op;
-
-	/* If TRAMP is not set, no ops should have a trampoline for this */
-	if (!(rec->flags & FTRACE_FL_TRAMP))
-		return;
-
-	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		/*
-		 * This function is called to clear other tramps
-		 * not the one that is being updated.
-		 */
-		if (op == ops)
-			continue;
-		if (op->nr_trampolines)
-			ftrace_remove_tramp(op, rec);
-	} while_for_each_ftrace_op(op);
-}
-
 static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				     int filter_hash,
 				     bool inc)
@@ -1648,18 +1619,16 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 			 * function, and the ops has a trampoline registered
 			 * for it, then we can call it directly.
 			 */
-			if (ftrace_rec_count(rec) == 1 && ops->trampoline) {
+			if (ftrace_rec_count(rec) == 1 && ops->trampoline)
 				rec->flags |= FTRACE_FL_TRAMP;
-				ops->nr_trampolines++;
-			} else {
+			else
 				/*
 				 * If we are adding another function callback
 				 * to this function, and the previous had a
 				 * custom trampoline in use, then we need to go
 				 * back to the default trampoline.
 				 */
-				ftrace_clear_tramps(rec, ops);
-			}
+				rec->flags &= ~FTRACE_FL_TRAMP;
 
 			/*
 			 * If any ops wants regs saved for this function
@@ -1672,9 +1641,6 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 				return;
 			rec->flags--;
 
-			if (ops->trampoline && !ftrace_rec_count(rec))
-				ftrace_remove_tramp(ops, rec);
-
 			/*
 			 * If the rec had REGS enabled and the ops that is
 			 * being removed had REGS set, then see if there is
@@ -1688,6 +1654,17 @@ static void __ftrace_hash_rec_update(struct ftrace_ops *ops,
 					rec->flags &= ~FTRACE_FL_REGS;
 			}
 
+			/*
+			 * If the rec had TRAMP enabled, then it needs to
+			 * be cleared. As TRAMP can only be enabled iff
+			 * there is only a single ops attached to it.
+			 * In otherwords, always disable it on decrementing.
+			 * In the future, we may set it if rec count is
+			 * decremented to one, and the ops that is left
+			 * has a trampoline.
+			 */
+			rec->flags &= ~FTRACE_FL_TRAMP;
+
 			/*
 			 * flags will be cleared in ftrace_check_record()
 			 * if rec count is zero.
@@ -1910,15 +1887,14 @@ static struct ftrace_ops *
 ftrace_find_tramp_ops_any(struct dyn_ftrace *rec)
 {
 	struct ftrace_ops *op;
+	unsigned long ip = rec->ip;
 
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
 
 		if (!op->trampoline)
 			continue;
 
-		if (ftrace_lookup_ip(op->func_hash->filter_hash, rec->ip) &&
-		    (ftrace_hash_empty(op->func_hash->notrace_hash) ||
-		     !ftrace_lookup_ip(op->func_hash->notrace_hash, rec->ip)))
+		if (hash_contains_ip(ip, op->func_hash))
 			return op;
 	} while_for_each_ftrace_op(op);
 
@@ -1929,18 +1905,51 @@ static struct ftrace_ops *
 ftrace_find_tramp_ops_curr(struct dyn_ftrace *rec)
 {
 	struct ftrace_ops *op;
+	unsigned long ip = rec->ip;
 
-	/* Removed ops need to be tested first */
-	if (removed_ops && removed_ops->tramp_hash) {
-		if (ftrace_lookup_ip(removed_ops->tramp_hash, rec->ip))
+	/*
+	 * Need to check removed ops first.
+	 * If they are being removed, and this rec has a tramp,
+	 * and this rec is in the ops list, then it would be the
+	 * one with the tramp.
+	 */
+	if (removed_ops) {
+		if (hash_contains_ip(ip, &removed_ops->old_hash))
 			return removed_ops;
 	}
 
+	/*
+	 * Need to find the current trampoline for a rec.
+	 * Now, a trampoline is only attached to a rec if there
+	 * was a single 'ops' attached to it. But this can be called
+	 * when we are adding another op to the rec or removing the
+	 * current one. Thus, if the op is being added, we can
+	 * ignore it because it hasn't attached itself to the rec
+	 * yet. That means we just need to find the op that has a
+	 * trampoline and is not beeing added.
+	 */
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
-		if (!op->tramp_hash)
+
+		if (!op->trampoline)
+			continue;
+
+		/*
+		 * If the ops is being added, it hasn't gotten to
+		 * the point to be removed from this tree yet.
+		 */
+		if (op->flags & FTRACE_OPS_FL_ADDING)
 			continue;
 
-		if (ftrace_lookup_ip(op->tramp_hash, rec->ip))
+		/*
+		 * If the ops is not being added and has a trampoline,
+		 * then it must be the one that we want!
+		 */
+		if (hash_contains_ip(ip, op->func_hash))
+			return op;
+
+		/* If the ops is being modified, it may be in the old hash. */
+		if ((op->flags & FTRACE_OPS_FL_MODIFYING) &&
+		    hash_contains_ip(ip, &op->old_hash))
 			return op;
 
 	} while_for_each_ftrace_op(op);
@@ -1952,10 +1961,11 @@ static struct ftrace_ops *
 ftrace_find_tramp_ops_new(struct dyn_ftrace *rec)
 {
 	struct ftrace_ops *op;
+	unsigned long ip = rec->ip;
 
 	do_for_each_ftrace_op(op, ftrace_ops_list) {
 		/* pass rec in as regs to have non-NULL val */
-		if (ftrace_ops_test(op, rec->ip, rec))
+		if (hash_contains_ip(ip, op->func_hash))
 			return op;
 	} while_for_each_ftrace_op(op);
 
@@ -2262,92 +2272,6 @@ void __weak arch_ftrace_update_code(int command)
 	ftrace_run_stop_machine(command);
 }
 
-static int ftrace_save_ops_tramp_hash(struct ftrace_ops *ops)
-{
-	struct ftrace_page *pg;
-	struct dyn_ftrace *rec;
-	int size, bits;
-	int ret;
-
-	size = ops->nr_trampolines;
-	bits = 0;
-	/*
-	 * Make the hash size about 1/2 the # found
-	 */
-	for (size /= 2; size; size >>= 1)
-		bits++;
-
-	ops->tramp_hash = alloc_ftrace_hash(bits);
-	/*
-	 * TODO: a failed allocation is going to screw up
-	 * the accounting of what needs to be modified
-	 * and not. For now, we kill ftrace if we fail
-	 * to allocate here. But there are ways around this,
-	 * but that will take a little more work.
-	 */
-	if (!ops->tramp_hash)
-		return -ENOMEM;
-
-	do_for_each_ftrace_rec(pg, rec) {
-		if (ftrace_rec_count(rec) == 1 &&
-		    ftrace_ops_test(ops, rec->ip, rec)) {
-
-			/*
-			 * If another ops adds to a rec, the rec will
-			 * lose its trampoline and never get it back
-			 * until all ops are off of it.
-			 */
-			if (!(rec->flags & FTRACE_FL_TRAMP))
-				continue;
-
-			/* This record had better have a trampoline */
-			if (FTRACE_WARN_ON(!(rec->flags & FTRACE_FL_TRAMP_EN)))
-				return -1;
-
-			ret = add_hash_entry(ops->tramp_hash, rec->ip);
-			if (ret < 0)
-				return ret;
-		}
-	} while_for_each_ftrace_rec();
-
-	/* The number of recs in the hash must match nr_trampolines */
-	if (FTRACE_WARN_ON(ops->tramp_hash->count != ops->nr_trampolines))
-		pr_warn("count=%ld trampolines=%d\n",
-			ops->tramp_hash->count,
-			ops->nr_trampolines);
-
-	return 0;
-}
-
-static int ftrace_save_tramp_hashes(void)
-{
-	struct ftrace_ops *op;
-	int ret;
-
-	/*
-	 * Now that any trampoline is being used, we need to save the
-	 * hashes for the ops that have them. This allows the mapping
-	 * back from the record to the ops that has the trampoline to
-	 * know what code is being replaced. Modifying code must always
-	 * verify what it is changing.
-	 */
-	do_for_each_ftrace_op(op, ftrace_ops_list) {
-
-		/* The tramp_hash is recreated each time. */
-		free_ftrace_hash(op->tramp_hash);
-		op->tramp_hash = NULL;
-
-		if (op->nr_trampolines) {
-			ret = ftrace_save_ops_tramp_hash(op);
-			if (ret)
-				return ret;
-		}
-
-	} while_for_each_ftrace_op(op);
-
-	return 0;
-}
-
 static void ftrace_run_update_code(int command)
 {
 	int ret;
@@ -2367,9 +2291,6 @@ static void ftrace_run_update_code(int command)
 
 	ret = ftrace_arch_code_modify_post_process();
 	FTRACE_WARN_ON(ret);
-
-	ret = ftrace_save_tramp_hashes();
-	FTRACE_WARN_ON(ret);
 }
 
 static void ftrace_run_modify_code(struct ftrace_ops *ops, int command)
@@ -2489,8 +2410,16 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	ops->flags |= FTRACE_OPS_FL_REMOVING;
 	removed_ops = ops;
 
+	/* The trampoline logic checks the old hashes */
+	ops->old_hash.filter_hash = ops->func_hash->filter_hash;
+	ops->old_hash.notrace_hash = ops->func_hash->notrace_hash;
+
 	ftrace_run_update_code(command);
 
+	ops->old_hash.filter_hash = NULL;
+	ops->old_hash.notrace_hash = NULL;
+
+	removed_ops = NULL;
 	ops->flags &= ~FTRACE_OPS_FL_REMOVING;
 
 	/*
@@ -3017,7 +2946,7 @@ static int t_show(struct seq_file *m, void *v)
 			struct ftrace_ops *ops;
 
 			ops = ftrace_find_tramp_ops_any(rec);
-			if (ops && ops->trampoline)
+			if (ops)
 				seq_printf(m, "\ttramp: %pS",
 					   (void *)ops->trampoline);
 			else
-- 
cgit v1.2.3


From 3a630178fd5f30c285fd7016c5340a176b625913 Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Thu, 7 Aug 2014 10:52:04 -0700
Subject: tracing: generate RCU warnings even when tracepoints are disabled

Dave Jones reported seeing a bug from one of my TLB tracepoints:

	http://lkml.kernel.org/r/20140806181801.GA4605@redhat.com

I've been running these patches for months and never saw this.
But, a big chunk of my testing, especially with all the debugging
enabled, was in a vm where intel_idle doesn't work.  On the
systems where I was using intel_idle, I never had lockdep enabled
and this tracepoint on at the same time.

This patch ensures that whenever we have lockdep available, we do
_some_ RCU activity at the site of the tracepoint, despite
whether the tracepoint's condition matches or even if the
tracepoint itself is completely disabled.  This is a bit of a
hack, but it is pretty self-contained.

I confirmed that with this patch plus lockdep I get the same
splat as Dave Jones did, but without enabling the tracepoint
explicitly.

Link: http://lkml.kernel.org/p/20140807175204.C257CAC5@viggo.jf.intel.com

Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: Dave Hansen <dave@sr71.net>
Cc: Dave Jones <davej@redhat.com>,
Cc: paulmck@linux.vnet.ibm.com
Cc: Ingo Molnar <mingo@redhat.com>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/tracepoint.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h
index b1293f15f592..e08e21e5f601 100644
--- a/include/linux/tracepoint.h
+++ b/include/linux/tracepoint.h
@@ -157,6 +157,12 @@ extern void syscall_unregfunc(void);
  * Make sure the alignment of the structure in the __tracepoints section will
  * not add unwanted padding between the beginning of the section and the
  * structure. Force alignment to the same alignment as the section start.
+ *
+ * When lockdep is enabled, we make sure to always do the RCU portions of
+ * the tracepoint code, regardless of whether tracing is on or we match the
+ * condition.  This lets us find RCU issues triggered with tracepoints even
+ * when this tracepoint is off.  This code has no purpose other than poking
+ * RCU a bit.
  */
 #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \
 	extern struct tracepoint __tracepoint_##name;			\
@@ -167,6 +173,11 @@ extern void syscall_unregfunc(void);
 				TP_PROTO(data_proto),			\
 				TP_ARGS(data_args),			\
 				TP_CONDITION(cond),,);			\
+		if (IS_ENABLED(CONFIG_LOCKDEP)) {			\
+			rcu_read_lock_sched_notrace();			\
+			rcu_dereference_sched(__tracepoint_##name.funcs);\
+			rcu_read_unlock_sched_notrace();		\
+		}							\
 	}								\
 	__DECLARE_TRACE_RCU(name, PARAMS(proto), PARAMS(args),		\
 		PARAMS(cond), PARAMS(data_proto), PARAMS(data_args))	\
-- 
cgit v1.2.3


From 378520b837cf4da769600b83690d8e825f16a611 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Thu, 7 Aug 2014 10:15:02 +0800
Subject: nfs41: add a helper function to set layoutcommit after commit

Track lwb in nfs_commit_data so that we can use it to setup
layoutcommit in commit_done callback.

Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/pnfs.c           | 29 +++++++++++++++++++++++++++++
 fs/nfs/pnfs.h           |  1 +
 fs/nfs/write.c          | 15 +++++++++++++++
 include/linux/nfs_xdr.h |  1 +
 4 files changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index a3851debf8a2..a6586cdee211 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1797,6 +1797,35 @@ pnfs_set_layoutcommit(struct nfs_pgio_header *hdr)
 }
 EXPORT_SYMBOL_GPL(pnfs_set_layoutcommit);
 
+void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data)
+{
+	struct inode *inode = data->inode;
+	struct nfs_inode *nfsi = NFS_I(inode);
+	bool mark_as_dirty = false;
+
+	spin_lock(&inode->i_lock);
+	if (!test_and_set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags)) {
+		mark_as_dirty = true;
+		dprintk("%s: Set layoutcommit for inode %lu ",
+			__func__, inode->i_ino);
+	}
+	if (!test_and_set_bit(NFS_LSEG_LAYOUTCOMMIT, &data->lseg->pls_flags)) {
+		/* references matched in nfs4_layoutcommit_release */
+		pnfs_get_lseg(data->lseg);
+	}
+	if (data->lwb > nfsi->layout->plh_lwb)
+		nfsi->layout->plh_lwb = data->lwb;
+	spin_unlock(&inode->i_lock);
+	dprintk("%s: lseg %p end_pos %llu\n",
+		__func__, data->lseg, nfsi->layout->plh_lwb);
+
+	/* if pnfs_layoutcommit_inode() runs between inode locks, the next one
+	 * will be a noop because NFS_INO_LAYOUTCOMMIT will not be set */
+	if (mark_as_dirty)
+		mark_inode_dirty_sync(inode);
+}
+EXPORT_SYMBOL_GPL(pnfs_commit_set_layoutcommit);
+
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
 {
 	struct nfs_server *nfss = NFS_SERVER(data->args.inode);
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index aca3dff5dae6..79c63114ce77 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -219,6 +219,7 @@ void pnfs_roc_release(struct inode *ino);
 void pnfs_roc_set_barrier(struct inode *ino, u32 barrier);
 bool pnfs_roc_drain(struct inode *ino, u32 *barrier, struct rpc_task *task);
 void pnfs_set_layoutcommit(struct nfs_pgio_header *);
+void pnfs_commit_set_layoutcommit(struct nfs_commit_data *data);
 void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data);
 int pnfs_layoutcommit_inode(struct inode *inode, bool sync);
 int _pnfs_return_layout(struct inode *);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 175d5d073ccf..3c5638f381cd 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -1538,6 +1538,18 @@ int nfs_initiate_commit(struct rpc_clnt *clnt, struct nfs_commit_data *data,
 }
 EXPORT_SYMBOL_GPL(nfs_initiate_commit);
 
+static loff_t nfs_get_lwb(struct list_head *head)
+{
+	loff_t lwb = 0;
+	struct nfs_page *req;
+
+	list_for_each_entry(req, head, wb_list)
+		if (lwb < (req_offset(req) + req->wb_bytes))
+			lwb = req_offset(req) + req->wb_bytes;
+
+	return lwb;
+}
+
 /*
  * Set up the argument/result storage required for the RPC call.
  */
@@ -1557,6 +1569,9 @@ void nfs_init_commit(struct nfs_commit_data *data,
 	data->inode	  = inode;
 	data->cred	  = first->wb_context->cred;
 	data->lseg	  = lseg; /* reference transferred */
+	/* only set lwb for pnfs commit */
+	if (lseg)
+		data->lwb = nfs_get_lwb(&data->pages);
 	data->mds_ops     = &nfs_commit_ops;
 	data->completion_ops = cinfo->completion_ops;
 	data->dreq	  = cinfo->dreq;
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0040629894df..e563b2c976ef 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1328,6 +1328,7 @@ struct nfs_commit_data {
 	struct pnfs_layout_segment *lseg;
 	struct nfs_client	*ds_clp;	/* pNFS data server */
 	int			ds_commit_index;
+	loff_t			lwb;
 	const struct rpc_call_ops *mds_ops;
 	const struct nfs_commit_completion_ops *completion_ops;
 	int (*commit_done_cb) (struct rpc_task *task, struct nfs_commit_data *data);
-- 
cgit v1.2.3


From 5f919c9f10c1cf821ee5f414683214a361a1b98c Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 21 Aug 2014 11:09:25 -0500
Subject: pnfs: allow splicing pre-encoded pages into the layoutcommit args

Currently there is no XDR buffer space allocated for the per-layout driver
layoutcommit payload, which leads to server buffer overflows in the
blocklayout driver even under simple workloads.  As we can't do per-layout
sizes for XDR operations we'll have to splice a previously encoded list
of pages into the XDR stream, similar to how we handle ACL buffers.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4xdr.c        | 18 +++++++++++++-----
 fs/nfs/pnfs.c           | 15 +++++++++++++++
 fs/nfs/pnfs.h           |  4 ++--
 include/linux/nfs_xdr.h |  3 +++
 4 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e13b59d8d9aa..f2cd957adb90 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -395,7 +395,10 @@ static int nfs4_stat_to_errno(int);
 				2 /* last byte written */ + \
 				1 /* nt_timechanged (false) */ + \
 				1 /* layoutupdate4 layout type */ + \
-				1 /* NULL filelayout layoutupdate4 payload */)
+				1 /* layoutupdate4 opaqueue len */)
+				  /* the actual content of layoutupdate4 should
+				     be allocated by drivers and spliced in
+				     using xdr_write_pages */
 #define decode_layoutcommit_maxsz (op_decode_hdr_maxsz + 3)
 #define encode_layoutreturn_maxsz (8 + op_encode_hdr_maxsz + \
 				encode_stateid_maxsz + \
@@ -1990,7 +1993,7 @@ encode_layoutget(struct xdr_stream *xdr,
 static int
 encode_layoutcommit(struct xdr_stream *xdr,
 		    struct inode *inode,
-		    const struct nfs4_layoutcommit_args *args,
+		    struct nfs4_layoutcommit_args *args,
 		    struct compound_hdr *hdr)
 {
 	__be32 *p;
@@ -2011,11 +2014,16 @@ encode_layoutcommit(struct xdr_stream *xdr,
 	*p++ = cpu_to_be32(0); /* Never send time_modify_changed */
 	*p++ = cpu_to_be32(NFS_SERVER(args->inode)->pnfs_curr_ld->id);/* type */
 
-	if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit)
+	if (NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit) {
 		NFS_SERVER(inode)->pnfs_curr_ld->encode_layoutcommit(
 			NFS_I(inode)->layout, xdr, args);
-	else
-		encode_uint32(xdr, 0); /* no layout-type payload */
+	} else {
+		encode_uint32(xdr, args->layoutupdate_len);
+		if (args->layoutupdate_pages) {
+			xdr_write_pages(xdr, args->layoutupdate_pages, 0,
+					args->layoutupdate_len);
+		}
+	}
 
 	return 0;
 }
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 57b5728e0b8e..8827ab130ed3 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -1854,6 +1854,7 @@ void pnfs_cleanup_layoutcommit(struct nfs4_layoutcommit_data *data)
 int
 pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 {
+	struct pnfs_layoutdriver_type *ld = NFS_SERVER(inode)->pnfs_curr_ld;
 	struct nfs4_layoutcommit_data *data;
 	struct nfs_inode *nfsi = NFS_I(inode);
 	loff_t end_pos;
@@ -1904,6 +1905,20 @@ pnfs_layoutcommit_inode(struct inode *inode, bool sync)
 	data->args.lastbytewritten = end_pos - 1;
 	data->res.server = NFS_SERVER(inode);
 
+	if (ld->prepare_layoutcommit) {
+		status = ld->prepare_layoutcommit(&data->args);
+		if (status) {
+			spin_lock(&inode->i_lock);
+			if (end_pos < nfsi->layout->plh_lwb)
+				nfsi->layout->plh_lwb = end_pos;
+			spin_unlock(&inode->i_lock);
+			put_rpccred(data->cred);
+			set_bit(NFS_INO_LAYOUTCOMMIT, &nfsi->flags);
+			goto clear_layoutcommitting;
+		}
+	}
+
+
 	status = nfs4_proc_layoutcommit(data, sync);
 out:
 	if (status)
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index 1dd8a5e96c9f..8835b5a320cc 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -128,8 +128,8 @@ struct pnfs_layoutdriver_type {
 				     const struct nfs4_layoutreturn_args *args);
 
 	void (*cleanup_layoutcommit) (struct nfs4_layoutcommit_data *data);
-
-	void (*encode_layoutcommit) (struct pnfs_layout_hdr *layoutid,
+	int (*prepare_layoutcommit) (struct nfs4_layoutcommit_args *args);
+	void (*encode_layoutcommit) (struct pnfs_layout_hdr *lo,
 				     struct xdr_stream *xdr,
 				     const struct nfs4_layoutcommit_args *args);
 };
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index e563b2c976ef..f4092c6b90fb 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -279,6 +279,9 @@ struct nfs4_layoutcommit_args {
 	__u64 lastbytewritten;
 	struct inode *inode;
 	const u32 *bitmask;
+	size_t layoutupdate_len;
+	struct page *layoutupdate_page;
+	struct page **layoutupdate_pages;
 };
 
 struct nfs4_layoutcommit_res {
-- 
cgit v1.2.3


From b954d83421d51d822c42e5ab7b65069b25ad3005 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 10 Sep 2014 15:01:02 +0200
Subject: net: bpf: only build bpf_jit_binary_{alloc, free}() when jit selected

Since BPF JIT depends on the availability of module_alloc() and
module_free() helpers (HAVE_BPF_JIT and MODULES), we better build
that code only in case we have BPF_JIT in our config enabled, just
like with other JIT code. Fixes builds for arm/marzen_defconfig
and sh/rsk7269_defconfig.

====================
kernel/built-in.o: In function `bpf_jit_binary_alloc':
/home/cwang/linux/kernel/bpf/core.c:144: undefined reference to `module_alloc'
kernel/built-in.o: In function `bpf_jit_binary_free':
/home/cwang/linux/kernel/bpf/core.c:164: undefined reference to `module_free'
make: *** [vmlinux] Error 1
====================

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Fixes: 738cbe72adc5 ("net: bpf: consolidate JIT binary allocator")
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h | 78 +++++++++++++++++++++++++-------------------------
 kernel/bpf/core.c      |  2 ++
 2 files changed, 41 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4b59edead908..1a0bc6d134d7 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -4,12 +4,18 @@
 #ifndef __LINUX_FILTER_H__
 #define __LINUX_FILTER_H__
 
+#include <stdarg.h>
+
 #include <linux/atomic.h>
 #include <linux/compat.h>
 #include <linux/skbuff.h>
+#include <linux/linkage.h>
+#include <linux/printk.h>
 #include <linux/workqueue.h>
-#include <uapi/linux/filter.h>
+
 #include <asm/cacheflush.h>
+
+#include <uapi/linux/filter.h>
 #include <uapi/linux/bpf.h>
 
 struct sk_buff;
@@ -363,14 +369,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 				  gfp_t gfp_extra_flags);
 void __bpf_prog_free(struct bpf_prog *fp);
 
-typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
-
-struct bpf_binary_header *
-bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
-		     unsigned int alignment,
-		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
-void bpf_jit_binary_free(struct bpf_binary_header *hdr);
-
 static inline void bpf_prog_unlock_free(struct bpf_prog *fp)
 {
 	bpf_prog_unlock_ro(fp);
@@ -393,6 +391,38 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp);
 u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 void bpf_int_jit_compile(struct bpf_prog *fp);
 
+#ifdef CONFIG_BPF_JIT
+typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size);
+
+struct bpf_binary_header *
+bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
+		     unsigned int alignment,
+		     bpf_jit_fill_hole_t bpf_fill_ill_insns);
+void bpf_jit_binary_free(struct bpf_binary_header *hdr);
+
+void bpf_jit_compile(struct bpf_prog *fp);
+void bpf_jit_free(struct bpf_prog *fp);
+
+static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
+				u32 pass, void *image)
+{
+	pr_err("flen=%u proglen=%u pass=%u image=%pK\n",
+	       flen, proglen, pass, image);
+	if (image)
+		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
+			       16, 1, image, proglen, false);
+}
+#else
+static inline void bpf_jit_compile(struct bpf_prog *fp)
+{
+}
+
+static inline void bpf_jit_free(struct bpf_prog *fp)
+{
+	bpf_prog_unlock_free(fp);
+}
+#endif /* CONFIG_BPF_JIT */
+
 #define BPF_ANC		BIT(15)
 
 static inline u16 bpf_anc_helper(const struct sock_filter *ftest)
@@ -440,36 +470,6 @@ static inline void *bpf_load_pointer(const struct sk_buff *skb, int k,
 	return bpf_internal_load_pointer_neg_helper(skb, k, size);
 }
 
-#ifdef CONFIG_BPF_JIT
-#include <stdarg.h>
-#include <linux/linkage.h>
-#include <linux/printk.h>
-
-void bpf_jit_compile(struct bpf_prog *fp);
-void bpf_jit_free(struct bpf_prog *fp);
-
-static inline void bpf_jit_dump(unsigned int flen, unsigned int proglen,
-				u32 pass, void *image)
-{
-	pr_err("flen=%u proglen=%u pass=%u image=%pK\n",
-	       flen, proglen, pass, image);
-	if (image)
-		print_hex_dump(KERN_ERR, "JIT code: ", DUMP_PREFIX_OFFSET,
-			       16, 1, image, proglen, false);
-}
-#else
-#include <linux/slab.h>
-
-static inline void bpf_jit_compile(struct bpf_prog *fp)
-{
-}
-
-static inline void bpf_jit_free(struct bpf_prog *fp)
-{
-	bpf_prog_unlock_free(fp);
-}
-#endif /* CONFIG_BPF_JIT */
-
 static inline int bpf_tell_extensions(void)
 {
 	return SKF_AD_MAX;
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8ee520f0ec70..8b7002488251 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -128,6 +128,7 @@ void __bpf_prog_free(struct bpf_prog *fp)
 }
 EXPORT_SYMBOL_GPL(__bpf_prog_free);
 
+#ifdef CONFIG_BPF_JIT
 struct bpf_binary_header *
 bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr,
 		     unsigned int alignment,
@@ -163,6 +164,7 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr)
 {
 	module_free(NULL, hdr);
 }
+#endif /* CONFIG_BPF_JIT */
 
 /* Base function for offset calculation. Needs to go into .text section,
  * therefore keeping it non-static as well; will also be used by JITs
-- 
cgit v1.2.3


From 6314b6796e3c070d4c8086b08dfd453a0aeac4cf Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Thu, 4 Sep 2014 23:37:49 -0700
Subject: clk: Don't hold prepare_lock across debugfs creation

Rob Clark reports a lockdep splat that involves the prepare_lock
chained with the mmap semaphore.

======================================================
[ INFO: possible circular locking dependency detected ]
3.17.0-rc1-00050-g07a489b #802 Tainted: G        W
-------------------------------------------------------
Xorg.bin/5413 is trying to acquire lock:
 (prepare_lock){+.+.+.}, at: [<c0781280>] clk_prepare_lock+0x88/0xfc

but task is already holding lock:
 (qcom_iommu_lock){+.+...}, at: [<c079f664>] qcom_iommu_unmap+0x1c/0x1f0

which lock already depends on the new lock.

the existing dependency chain (in reverse order) is:

-> #4 (qcom_iommu_lock){+.+...}:
       [<c079f860>] qcom_iommu_map+0x28/0x450
       [<c079eb50>] iommu_map+0xc8/0x12c
       [<c056c1fc>] msm_iommu_map+0xb4/0x130
       [<c05697bc>] msm_gem_get_iova_locked+0x9c/0xe8
       [<c0569854>] msm_gem_get_iova+0x4c/0x64
       [<c0562208>] mdp4_kms_init+0x4c4/0x6c0
       [<c056881c>] msm_load+0x2ac/0x34c
       [<c0545724>] drm_dev_register+0xac/0x108
       [<c0547510>] drm_platform_init+0x50/0xf0
       [<c0578a60>] try_to_bring_up_master.part.3+0xc8/0x108
       [<c0578b48>] component_master_add_with_match+0xa8/0x104
       [<c0568294>] msm_pdev_probe+0x64/0x70
       [<c057e704>] platform_drv_probe+0x2c/0x60
       [<c057cff8>] driver_probe_device+0x108/0x234
       [<c057b65c>] bus_for_each_drv+0x64/0x98
       [<c057cec0>] device_attach+0x78/0x8c
       [<c057c590>] bus_probe_device+0x88/0xac
       [<c057c9b8>] deferred_probe_work_func+0x68/0x9c
       [<c0259db4>] process_one_work+0x1a0/0x40c
       [<c025a710>] worker_thread+0x44/0x4d8
       [<c025ec54>] kthread+0xd8/0xec
       [<c020e9a8>] ret_from_fork+0x14/0x2c

-> #3 (&dev->struct_mutex){+.+.+.}:
       [<c0541188>] drm_gem_mmap+0x38/0xd0
       [<c05695b8>] msm_gem_mmap+0xc/0x5c
       [<c02f0b6c>] mmap_region+0x35c/0x6c8
       [<c02f11ec>] do_mmap_pgoff+0x314/0x398
       [<c02de1e0>] vm_mmap_pgoff+0x84/0xb4
       [<c02ef83c>] SyS_mmap_pgoff+0x94/0xbc
       [<c020e8e0>] ret_fast_syscall+0x0/0x48

-> #2 (&mm->mmap_sem){++++++}:
       [<c0321138>] filldir64+0x68/0x180
       [<c0333fe0>] dcache_readdir+0x188/0x22c
       [<c0320ed0>] iterate_dir+0x9c/0x11c
       [<c03213b0>] SyS_getdents64+0x78/0xe8
       [<c020e8e0>] ret_fast_syscall+0x0/0x48

-> #1 (&sb->s_type->i_mutex_key#3){+.+.+.}:
       [<c03fc544>] __create_file+0x58/0x1dc
       [<c03fc70c>] debugfs_create_dir+0x1c/0x24
       [<c0781c7c>] clk_debug_create_subtree+0x20/0x170
       [<c0be2af8>] clk_debug_init+0xec/0x14c
       [<c0208c70>] do_one_initcall+0x8c/0x1c8
       [<c0b9cce4>] kernel_init_freeable+0x13c/0x1dc
       [<c0877bc4>] kernel_init+0x8/0xe8
       [<c020e9a8>] ret_from_fork+0x14/0x2c

-> #0 (prepare_lock){+.+.+.}:
       [<c087c408>] mutex_lock_nested+0x70/0x3e8
       [<c0781280>] clk_prepare_lock+0x88/0xfc
       [<c0782c50>] clk_prepare+0xc/0x24
       [<c079f474>] __enable_clocks.isra.4+0x18/0xa4
       [<c079f614>] __flush_iotlb_va+0xe0/0x114
       [<c079f6f4>] qcom_iommu_unmap+0xac/0x1f0
       [<c079ea3c>] iommu_unmap+0x9c/0xe8
       [<c056c2fc>] msm_iommu_unmap+0x64/0x84
       [<c0569da4>] msm_gem_free_object+0x11c/0x338
       [<c05413ec>] drm_gem_object_handle_unreference_unlocked+0xfc/0x130
       [<c0541604>] drm_gem_object_release_handle+0x50/0x68
       [<c0447a98>] idr_for_each+0xa8/0xdc
       [<c0541c10>] drm_gem_release+0x1c/0x28
       [<c0540b3c>] drm_release+0x370/0x428
       [<c031105c>] __fput+0x98/0x1e8
       [<c025d73c>] task_work_run+0xb0/0xfc
       [<c02477ec>] do_exit+0x2ec/0x948
       [<c0247ec0>] do_group_exit+0x4c/0xb8
       [<c025180c>] get_signal+0x28c/0x6ac
       [<c0211204>] do_signal+0xc4/0x3e4
       [<c02116cc>] do_work_pending+0xb4/0xc4
       [<c020e938>] work_pending+0xc/0x20

other info that might help us debug this:

Chain exists of:
  prepare_lock --> &dev->struct_mutex --> qcom_iommu_lock

 Possible unsafe locking scenario:

       CPU0                    CPU1
       ----                    ----
  lock(qcom_iommu_lock);
                               lock(&dev->struct_mutex);
                               lock(qcom_iommu_lock);
  lock(prepare_lock);

 *** DEADLOCK ***

3 locks held by Xorg.bin/5413:
 #0:  (drm_global_mutex){+.+.+.}, at: [<c0540800>] drm_release+0x34/0x428
 #1:  (&dev->struct_mutex){+.+.+.}, at: [<c05413bc>] drm_gem_object_handle_unreference_unlocked+0xcc/0x130
 #2:  (qcom_iommu_lock){+.+...}, at: [<c079f664>] qcom_iommu_unmap+0x1c/0x1f0

stack backtrace:
CPU: 1 PID: 5413 Comm: Xorg.bin Tainted: G        W      3.17.0-rc1-00050-g07a489b #802
[<c0216290>] (unwind_backtrace) from [<c0211d8c>] (show_stack+0x10/0x14)
[<c0211d8c>] (show_stack) from [<c087a078>] (dump_stack+0x98/0xb8)
[<c087a078>] (dump_stack) from [<c027f024>] (print_circular_bug+0x218/0x340)
[<c027f024>] (print_circular_bug) from [<c0283e08>] (__lock_acquire+0x1d24/0x20b8)
[<c0283e08>] (__lock_acquire) from [<c0284774>] (lock_acquire+0x9c/0xbc)
[<c0284774>] (lock_acquire) from [<c087c408>] (mutex_lock_nested+0x70/0x3e8)
[<c087c408>] (mutex_lock_nested) from [<c0781280>] (clk_prepare_lock+0x88/0xfc)
[<c0781280>] (clk_prepare_lock) from [<c0782c50>] (clk_prepare+0xc/0x24)
[<c0782c50>] (clk_prepare) from [<c079f474>] (__enable_clocks.isra.4+0x18/0xa4)
[<c079f474>] (__enable_clocks.isra.4) from [<c079f614>] (__flush_iotlb_va+0xe0/0x114)
[<c079f614>] (__flush_iotlb_va) from [<c079f6f4>] (qcom_iommu_unmap+0xac/0x1f0)
[<c079f6f4>] (qcom_iommu_unmap) from [<c079ea3c>] (iommu_unmap+0x9c/0xe8)
[<c079ea3c>] (iommu_unmap) from [<c056c2fc>] (msm_iommu_unmap+0x64/0x84)
[<c056c2fc>] (msm_iommu_unmap) from [<c0569da4>] (msm_gem_free_object+0x11c/0x338)
[<c0569da4>] (msm_gem_free_object) from [<c05413ec>] (drm_gem_object_handle_unreference_unlocked+0xfc/0x130)
[<c05413ec>] (drm_gem_object_handle_unreference_unlocked) from [<c0541604>] (drm_gem_object_release_handle+0x50/0x68)
[<c0541604>] (drm_gem_object_release_handle) from [<c0447a98>] (idr_for_each+0xa8/0xdc)
[<c0447a98>] (idr_for_each) from [<c0541c10>] (drm_gem_release+0x1c/0x28)
[<c0541c10>] (drm_gem_release) from [<c0540b3c>] (drm_release+0x370/0x428)
[<c0540b3c>] (drm_release) from [<c031105c>] (__fput+0x98/0x1e8)
[<c031105c>] (__fput) from [<c025d73c>] (task_work_run+0xb0/0xfc)
[<c025d73c>] (task_work_run) from [<c02477ec>] (do_exit+0x2ec/0x948)
[<c02477ec>] (do_exit) from [<c0247ec0>] (do_group_exit+0x4c/0xb8)
[<c0247ec0>] (do_group_exit) from [<c025180c>] (get_signal+0x28c/0x6ac)
[<c025180c>] (get_signal) from [<c0211204>] (do_signal+0xc4/0x3e4)
[<c0211204>] (do_signal) from [<c02116cc>] (do_work_pending+0xb4/0xc4)
[<c02116cc>] (do_work_pending) from [<c020e938>] (work_pending+0xc/0x20)

We can break this chain if we don't hold the prepare_lock while
creating debugfs directories. We only hold the prepare_lock right
now because we're traversing the clock tree recursively and we
don't want the hierarchy to change during the traversal.
Replacing this traversal with a simple linked list walk allows us
to only grab a list lock instead of the prepare_lock, thus
breaking the lock chain.

Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c           | 73 +++++++++++++++++----------------------------
 include/linux/clk-private.h |  1 +
 2 files changed, 28 insertions(+), 46 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b76fa69b44cb..8ca28189e4e9 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -100,6 +100,8 @@ static void clk_enable_unlock(unsigned long flags)
 
 static struct dentry *rootdir;
 static int inited = 0;
+static DEFINE_MUTEX(clk_debug_lock);
+static HLIST_HEAD(clk_debug_list);
 
 static struct hlist_head *all_lists[] = {
 	&clk_root_list,
@@ -300,28 +302,6 @@ out:
 	return ret;
 }
 
-/* caller must hold prepare_lock */
-static int clk_debug_create_subtree(struct clk *clk, struct dentry *pdentry)
-{
-	struct clk *child;
-	int ret = -EINVAL;;
-
-	if (!clk || !pdentry)
-		goto out;
-
-	ret = clk_debug_create_one(clk, pdentry);
-
-	if (ret)
-		goto out;
-
-	hlist_for_each_entry(child, &clk->children, child_node)
-		clk_debug_create_subtree(child, pdentry);
-
-	ret = 0;
-out:
-	return ret;
-}
-
 /**
  * clk_debug_register - add a clk node to the debugfs clk tree
  * @clk: the clk being added to the debugfs clk tree
@@ -329,20 +309,21 @@ out:
  * Dynamically adds a clk to the debugfs clk tree if debugfs has been
  * initialized.  Otherwise it bails out early since the debugfs clk tree
  * will be created lazily by clk_debug_init as part of a late_initcall.
- *
- * Caller must hold prepare_lock.  Only clk_init calls this function (so
- * far) so this is taken care.
  */
 static int clk_debug_register(struct clk *clk)
 {
 	int ret = 0;
 
+	mutex_lock(&clk_debug_lock);
+	hlist_add_head(&clk->debug_node, &clk_debug_list);
+
 	if (!inited)
-		goto out;
+		goto unlock;
 
-	ret = clk_debug_create_subtree(clk, rootdir);
+	ret = clk_debug_create_one(clk, rootdir);
+unlock:
+	mutex_unlock(&clk_debug_lock);
 
-out:
 	return ret;
 }
 
@@ -353,12 +334,18 @@ out:
  * Dynamically removes a clk and all it's children clk nodes from the
  * debugfs clk tree if clk->dentry points to debugfs created by
  * clk_debug_register in __clk_init.
- *
- * Caller must hold prepare_lock.
  */
 static void clk_debug_unregister(struct clk *clk)
 {
+	mutex_lock(&clk_debug_lock);
+	if (!clk->dentry)
+		goto out;
+
+	hlist_del_init(&clk->debug_node);
 	debugfs_remove_recursive(clk->dentry);
+	clk->dentry = NULL;
+out:
+	mutex_unlock(&clk_debug_lock);
 }
 
 struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode,
@@ -415,17 +402,12 @@ static int __init clk_debug_init(void)
 	if (!d)
 		return -ENOMEM;
 
-	clk_prepare_lock();
-
-	hlist_for_each_entry(clk, &clk_root_list, child_node)
-		clk_debug_create_subtree(clk, rootdir);
-
-	hlist_for_each_entry(clk, &clk_orphan_list, child_node)
-		clk_debug_create_subtree(clk, rootdir);
+	mutex_lock(&clk_debug_lock);
+	hlist_for_each_entry(clk, &clk_debug_list, debug_node)
+		clk_debug_create_one(clk, rootdir);
 
 	inited = 1;
-
-	clk_prepare_unlock();
+	mutex_unlock(&clk_debug_lock);
 
 	return 0;
 }
@@ -2087,14 +2069,16 @@ void clk_unregister(struct clk *clk)
 {
 	unsigned long flags;
 
-       if (!clk || WARN_ON_ONCE(IS_ERR(clk)))
-               return;
+	if (!clk || WARN_ON_ONCE(IS_ERR(clk)))
+		return;
+
+	clk_debug_unregister(clk);
 
 	clk_prepare_lock();
 
 	if (clk->ops == &clk_nodrv_ops) {
 		pr_err("%s: unregistered clock: %s\n", __func__, clk->name);
-		goto out;
+		return;
 	}
 	/*
 	 * Assign empty clock ops for consumers that might still hold
@@ -2113,16 +2097,13 @@ void clk_unregister(struct clk *clk)
 			clk_set_parent(child, NULL);
 	}
 
-	clk_debug_unregister(clk);
-
 	hlist_del_init(&clk->child_node);
 
 	if (clk->prepare_count)
 		pr_warn("%s: unregistering prepared clock: %s\n",
 					__func__, clk->name);
-
 	kref_put(&clk->ref, __clk_release);
-out:
+
 	clk_prepare_unlock();
 }
 EXPORT_SYMBOL_GPL(clk_unregister);
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index efbf70b9fd84..4ed34105c371 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -48,6 +48,7 @@ struct clk {
 	unsigned long		accuracy;
 	struct hlist_head	children;
 	struct hlist_node	child_node;
+	struct hlist_node	debug_node;
 	unsigned int		notifier_count;
 #ifdef CONFIG_DEBUG_FS
 	struct dentry		*dentry;
-- 
cgit v1.2.3


From 184c3fc3f52fb75800deb76deffb70907d1f76ea Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Thu, 11 Sep 2014 09:47:23 +0930
Subject: moduleparam: Resolve missing-field-initializer warning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resolve a missing-field-initializer warning, that is produced
by every reference to module_param_call, by using designated
initialization for the first field. That is enough to silence
the complaint.

The message is only seen when doing a W=2 build. I happened to be using gcc
4.8.3, but I think most versions would produce the warning when it is
enabled. It can either be silenced by using even a single designated
initializer as I did here, or providing values for all of the fields. Because
of the number of references to the macro, this change silences many warnings
in W=2 builds.

One instance of the full warning message looks like this:

/home/share/git/nn-mdr/include/linux/moduleparam.h:198:16: warning: missing
initializer for field ‘free’ of ‘struct kernel_param_ops’
[-Wmissing-field-initializers]
  static struct kernel_param_ops __param_ops_##name =  \
		  ^
/home/share/git/nn-mdr/fs/fuse/inode.c:35:1: note: in expansion of macro
‘module_param_call’
 module_param_call(max_user_bgreq, set_global_limit, param_get_uint,
 ^
/home/share/git/nn-mdr/include/linux/moduleparam.h:56:9: note: ‘free’
declared here
  void (*free)(void *arg);

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/moduleparam.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index 593501996574..b43f4752304e 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -224,7 +224,7 @@ struct kparam_array
 /* Obsolete - use module_param_cb() */
 #define module_param_call(name, set, get, arg, perm)			\
 	static struct kernel_param_ops __param_ops_##name =		\
-		{ 0, (void *)set, (void *)get };			\
+		{ .flags = 0, (void *)set, (void *)get };		\
 	__module_param_call(MODULE_PARAM_PREFIX,			\
 			    name, &__param_ops_##name, arg,		\
 			    (perm) + sizeof(__check_old_set_param(set))*0, -1, 0)
-- 
cgit v1.2.3


From 3d598f47e804a77208c6bb0a454123018e2f2281 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 19 Aug 2014 20:29:12 +0300
Subject: dmaengine: dw: move dw_dmac.h to where it belongs to

There is a common storage for platform data related structures and definitions
inside kernel source tree. The patch moves file from include/linux to
include/linux/platform_data and renames it acoordingly. The users are also
updated.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
[For the arch/avr32/.* and .*sound/atmel.*]
Acked-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 MAINTAINERS                                     |   2 +-
 arch/avr32/mach-at32ap/at32ap700x.c             |   2 +-
 arch/avr32/mach-at32ap/include/mach/atmel-mci.h |   2 +-
 drivers/dma/dw/internal.h                       |   2 +-
 drivers/dma/dw/regs.h                           |   6 +-
 include/linux/dw_dmac.h                         | 111 ------------------------
 include/linux/platform_data/dma-dw.h            | 111 ++++++++++++++++++++++++
 include/sound/atmel-abdac.h                     |   2 +-
 include/sound/atmel-ac97c.h                     |   2 +-
 sound/atmel/abdac.c                             |   2 +-
 sound/atmel/ac97c.c                             |   2 +-
 11 files changed, 122 insertions(+), 122 deletions(-)
 delete mode 100644 include/linux/dw_dmac.h
 create mode 100644 include/linux/platform_data/dma-dw.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index aefa94841ff3..a10072963889 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7875,7 +7875,7 @@ SYNOPSYS DESIGNWARE DMAC DRIVER
 M:	Viresh Kumar <viresh.linux@gmail.com>
 M:	Andy Shevchenko <andriy.shevchenko@linux.intel.com>
 S:	Maintained
-F:	include/linux/dw_dmac.h
+F:	include/linux/platform_data/dma-dw.h
 F:	drivers/dma/dw/
 
 SYNOPSYS DESIGNWARE MMC/SD/SDIO DRIVER
diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index db85b5ec3351..2a1aa712c6e7 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -7,7 +7,7 @@
  */
 #include <linux/clk.h>
 #include <linux/delay.h>
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 #include <linux/fb.h>
 #include <linux/init.h>
 #include <linux/platform_device.h>
diff --git a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h
index 4bba58561d5c..11d7f4b28dc8 100644
--- a/arch/avr32/mach-at32ap/include/mach/atmel-mci.h
+++ b/arch/avr32/mach-at32ap/include/mach/atmel-mci.h
@@ -1,7 +1,7 @@
 #ifndef __MACH_ATMEL_MCI_H
 #define __MACH_ATMEL_MCI_H
 
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 /**
  * struct mci_dma_data - DMA data for MCI interface
diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 32667f9e0dda..43cc1dfad5c9 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -12,7 +12,7 @@
 #define _DW_DMAC_INTERNAL_H
 
 #include <linux/device.h>
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 #include "regs.h"
 
diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index bb98d3e91e8b..af02439155e9 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -11,7 +11,7 @@
 
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 #define DW_DMA_MAX_NR_CHANNELS	8
 #define DW_DMA_MAX_NR_REQUESTS	16
@@ -161,7 +161,7 @@ struct dw_dma_regs {
 #define DWC_CTLH_DONE		0x00001000
 #define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
 
-/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/dw_dmac.h> */
+/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/platform_data/dma-dw.h> */
 #define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
 #define DWC_CFGL_CH_PRIOR(x)	((x) << 5)	/* priority */
 #define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
@@ -172,7 +172,7 @@ struct dw_dma_regs {
 #define DWC_CFGL_RELOAD_SAR	(1 << 30)
 #define DWC_CFGL_RELOAD_DAR	(1 << 31)
 
-/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/dw_dmac.h> */
+/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/platform_data/dma-dw.h> */
 #define DWC_CFGH_DS_UPD_EN	(1 << 5)
 #define DWC_CFGH_SS_UPD_EN	(1 << 6)
 
diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h
deleted file mode 100644
index 68b4024184de..000000000000
--- a/include/linux/dw_dmac.h
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Driver for the Synopsys DesignWare DMA Controller
- *
- * Copyright (C) 2007 Atmel Corporation
- * Copyright (C) 2010-2011 ST Microelectronics
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- */
-#ifndef DW_DMAC_H
-#define DW_DMAC_H
-
-#include <linux/dmaengine.h>
-
-/**
- * struct dw_dma_slave - Controller-specific information about a slave
- *
- * @dma_dev: required DMA master device. Depricated.
- * @bus_id: name of this device channel, not just a device name since
- *          devices may have more than one channel e.g. "foo_tx"
- * @cfg_hi: Platform-specific initializer for the CFG_HI register
- * @cfg_lo: Platform-specific initializer for the CFG_LO register
- * @src_master: src master for transfers on allocated channel.
- * @dst_master: dest master for transfers on allocated channel.
- */
-struct dw_dma_slave {
-	struct device		*dma_dev;
-	u32			cfg_hi;
-	u32			cfg_lo;
-	u8			src_master;
-	u8			dst_master;
-};
-
-/**
- * struct dw_dma_platform_data - Controller configuration parameters
- * @nr_channels: Number of channels supported by hardware (max 8)
- * @is_private: The device channels should be marked as private and not for
- *	by the general purpose DMA channel allocator.
- * @chan_allocation_order: Allocate channels starting from 0 or 7
- * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
- * @block_size: Maximum block size supported by the controller
- * @nr_masters: Number of AHB masters supported by the controller
- * @data_width: Maximum data width supported by hardware per AHB master
- *		(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
- */
-struct dw_dma_platform_data {
-	unsigned int	nr_channels;
-	bool		is_private;
-#define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
-#define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
-	unsigned char	chan_allocation_order;
-#define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
-#define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
-	unsigned char	chan_priority;
-	unsigned short	block_size;
-	unsigned char	nr_masters;
-	unsigned char	data_width[4];
-};
-
-/* bursts size */
-enum dw_dma_msize {
-	DW_DMA_MSIZE_1,
-	DW_DMA_MSIZE_4,
-	DW_DMA_MSIZE_8,
-	DW_DMA_MSIZE_16,
-	DW_DMA_MSIZE_32,
-	DW_DMA_MSIZE_64,
-	DW_DMA_MSIZE_128,
-	DW_DMA_MSIZE_256,
-};
-
-/* Platform-configurable bits in CFG_HI */
-#define DWC_CFGH_FCMODE		(1 << 0)
-#define DWC_CFGH_FIFO_MODE	(1 << 1)
-#define DWC_CFGH_PROTCTL(x)	((x) << 2)
-#define DWC_CFGH_SRC_PER(x)	((x) << 7)
-#define DWC_CFGH_DST_PER(x)	((x) << 11)
-
-/* Platform-configurable bits in CFG_LO */
-#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
-#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
-#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
-#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
-#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
-#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
-#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
-#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
-#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
-#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
-
-/* DMA API extensions */
-struct dw_cyclic_desc {
-	struct dw_desc	**desc;
-	unsigned long	periods;
-	void		(*period_callback)(void *param);
-	void		*period_callback_param;
-};
-
-struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
-		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_transfer_direction direction);
-void dw_dma_cyclic_free(struct dma_chan *chan);
-int dw_dma_cyclic_start(struct dma_chan *chan);
-void dw_dma_cyclic_stop(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
-
-#endif /* DW_DMAC_H */
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
new file mode 100644
index 000000000000..68b4024184de
--- /dev/null
+++ b/include/linux/platform_data/dma-dw.h
@@ -0,0 +1,111 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef DW_DMAC_H
+#define DW_DMAC_H
+
+#include <linux/dmaengine.h>
+
+/**
+ * struct dw_dma_slave - Controller-specific information about a slave
+ *
+ * @dma_dev: required DMA master device. Depricated.
+ * @bus_id: name of this device channel, not just a device name since
+ *          devices may have more than one channel e.g. "foo_tx"
+ * @cfg_hi: Platform-specific initializer for the CFG_HI register
+ * @cfg_lo: Platform-specific initializer for the CFG_LO register
+ * @src_master: src master for transfers on allocated channel.
+ * @dst_master: dest master for transfers on allocated channel.
+ */
+struct dw_dma_slave {
+	struct device		*dma_dev;
+	u32			cfg_hi;
+	u32			cfg_lo;
+	u8			src_master;
+	u8			dst_master;
+};
+
+/**
+ * struct dw_dma_platform_data - Controller configuration parameters
+ * @nr_channels: Number of channels supported by hardware (max 8)
+ * @is_private: The device channels should be marked as private and not for
+ *	by the general purpose DMA channel allocator.
+ * @chan_allocation_order: Allocate channels starting from 0 or 7
+ * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0.
+ * @block_size: Maximum block size supported by the controller
+ * @nr_masters: Number of AHB masters supported by the controller
+ * @data_width: Maximum data width supported by hardware per AHB master
+ *		(0 - 8bits, 1 - 16bits, ..., 5 - 256bits)
+ */
+struct dw_dma_platform_data {
+	unsigned int	nr_channels;
+	bool		is_private;
+#define CHAN_ALLOCATION_ASCENDING	0	/* zero to seven */
+#define CHAN_ALLOCATION_DESCENDING	1	/* seven to zero */
+	unsigned char	chan_allocation_order;
+#define CHAN_PRIORITY_ASCENDING		0	/* chan0 highest */
+#define CHAN_PRIORITY_DESCENDING	1	/* chan7 highest */
+	unsigned char	chan_priority;
+	unsigned short	block_size;
+	unsigned char	nr_masters;
+	unsigned char	data_width[4];
+};
+
+/* bursts size */
+enum dw_dma_msize {
+	DW_DMA_MSIZE_1,
+	DW_DMA_MSIZE_4,
+	DW_DMA_MSIZE_8,
+	DW_DMA_MSIZE_16,
+	DW_DMA_MSIZE_32,
+	DW_DMA_MSIZE_64,
+	DW_DMA_MSIZE_128,
+	DW_DMA_MSIZE_256,
+};
+
+/* Platform-configurable bits in CFG_HI */
+#define DWC_CFGH_FCMODE		(1 << 0)
+#define DWC_CFGH_FIFO_MODE	(1 << 1)
+#define DWC_CFGH_PROTCTL(x)	((x) << 2)
+#define DWC_CFGH_SRC_PER(x)	((x) << 7)
+#define DWC_CFGH_DST_PER(x)	((x) << 11)
+
+/* Platform-configurable bits in CFG_LO */
+#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
+#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
+#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
+#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
+#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
+#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
+#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
+#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
+#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
+#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
+
+/* DMA API extensions */
+struct dw_cyclic_desc {
+	struct dw_desc	**desc;
+	unsigned long	periods;
+	void		(*period_callback)(void *param);
+	void		*period_callback_param;
+};
+
+struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
+		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
+		enum dma_transfer_direction direction);
+void dw_dma_cyclic_free(struct dma_chan *chan);
+int dw_dma_cyclic_start(struct dma_chan *chan);
+void dw_dma_cyclic_stop(struct dma_chan *chan);
+
+dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
+
+dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
+
+#endif /* DW_DMAC_H */
diff --git a/include/sound/atmel-abdac.h b/include/sound/atmel-abdac.h
index edff6a8ba1b5..a8f735d677fa 100644
--- a/include/sound/atmel-abdac.h
+++ b/include/sound/atmel-abdac.h
@@ -10,7 +10,7 @@
 #ifndef __INCLUDE_SOUND_ATMEL_ABDAC_H
 #define __INCLUDE_SOUND_ATMEL_ABDAC_H
 
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 /**
  * struct atmel_abdac_pdata - board specific ABDAC configuration
diff --git a/include/sound/atmel-ac97c.h b/include/sound/atmel-ac97c.h
index 00e6c289a936..f2a1cdc37661 100644
--- a/include/sound/atmel-ac97c.h
+++ b/include/sound/atmel-ac97c.h
@@ -10,7 +10,7 @@
 #ifndef __INCLUDE_SOUND_ATMEL_AC97C_H
 #define __INCLUDE_SOUND_ATMEL_AC97C_H
 
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 #define AC97C_CAPTURE	0x01
 #define AC97C_PLAYBACK	0x02
diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c
index edf2ca72d518..154a7c44e38d 100644
--- a/sound/atmel/abdac.c
+++ b/sound/atmel/abdac.c
@@ -9,7 +9,7 @@
  */
 #include <linux/clk.h>
 #include <linux/bitmap.h>
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c
index a04d23174dc2..1dfb35afef8f 100644
--- a/sound/atmel/ac97c.c
+++ b/sound/atmel/ac97c.c
@@ -31,7 +31,7 @@
 #include <sound/atmel-ac97c.h>
 #include <sound/memalloc.h>
 
-#include <linux/dw_dmac.h>
+#include <linux/platform_data/dma-dw.h>
 
 #include <mach/cpu.h>
 
-- 
cgit v1.2.3


From 7e1e2f27c5508518e58e5cbb11e26cbb815f4c56 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 19 Aug 2014 20:29:14 +0300
Subject: dmaengine: dw: convert dw_dma_slave to use explicit HS interfaces

Instead of exposing the possibility to set DMA registers CFG_HI and CFG_LO
strict user to provide handshake interfaces explicitly.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Hans-Christian Egtvedt <egtvedt@samfundet.no>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 arch/avr32/mach-at32ap/at32ap700x.c  | 15 +++++----------
 drivers/dma/dw/core.c                |  4 ++--
 include/linux/platform_data/dma-dw.h | 10 ++++------
 3 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/avr32/mach-at32ap/at32ap700x.c b/arch/avr32/mach-at32ap/at32ap700x.c
index ec7be287a97e..37b75602adf6 100644
--- a/arch/avr32/mach-at32ap/at32ap700x.c
+++ b/arch/avr32/mach-at32ap/at32ap700x.c
@@ -1356,10 +1356,8 @@ at32_add_device_mci(unsigned int id, struct mci_platform_data *data)
 		goto fail;
 
 	slave->sdata.dma_dev = &dw_dmac0_device.dev;
-	slave->sdata.cfg_hi = (DWC_CFGH_SRC_PER(0)
-				| DWC_CFGH_DST_PER(1));
-	slave->sdata.cfg_lo &= ~(DWC_CFGL_HS_DST_POL
-				| DWC_CFGL_HS_SRC_POL);
+	slave->sdata.src_id = 0;
+	slave->sdata.dst_id = 1;
 	slave->sdata.src_master = 1;
 	slave->sdata.dst_master = 0;
 
@@ -2054,8 +2052,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 	/* Check if DMA slave interface for capture should be configured. */
 	if (flags & AC97C_CAPTURE) {
 		rx_dws->dma_dev = &dw_dmac0_device.dev;
-		rx_dws->cfg_hi = DWC_CFGH_SRC_PER(3);
-		rx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+		rx_dws->src_id = 3;
 		rx_dws->src_master = 0;
 		rx_dws->dst_master = 1;
 	}
@@ -2063,8 +2060,7 @@ at32_add_device_ac97c(unsigned int id, struct ac97c_platform_data *data,
 	/* Check if DMA slave interface for playback should be configured. */
 	if (flags & AC97C_PLAYBACK) {
 		tx_dws->dma_dev = &dw_dmac0_device.dev;
-		tx_dws->cfg_hi = DWC_CFGH_DST_PER(4);
-		tx_dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+		tx_dws->dst_id = 4;
 		tx_dws->src_master = 0;
 		tx_dws->dst_master = 1;
 	}
@@ -2136,8 +2132,7 @@ at32_add_device_abdac(unsigned int id, struct atmel_abdac_pdata *data)
 	dws = &data->dws;
 
 	dws->dma_dev = &dw_dmac0_device.dev;
-	dws->cfg_hi = DWC_CFGH_DST_PER(2);
-	dws->cfg_lo &= ~(DWC_CFGL_HS_DST_POL | DWC_CFGL_HS_SRC_POL);
+	dws->dst_id = 2;
 	dws->src_master = 0;
 	dws->dst_master = 1;
 
diff --git a/drivers/dma/dw/core.c b/drivers/dma/dw/core.c
index 1af731b83b3f..0a9c052d437c 100644
--- a/drivers/dma/dw/core.c
+++ b/drivers/dma/dw/core.c
@@ -155,8 +155,8 @@ static void dwc_initialize(struct dw_dma_chan *dwc)
 		 */
 		BUG_ON(!dws->dma_dev || dws->dma_dev != dw->dma.dev);
 
-		cfghi = dws->cfg_hi;
-		cfglo |= dws->cfg_lo & ~DWC_CFGL_CH_PRIOR_MASK;
+		cfghi |= DWC_CFGH_DST_PER(dws->dst_id);
+		cfghi |= DWC_CFGH_SRC_PER(dws->src_id);
 	} else {
 		if (dwc->direction == DMA_MEM_TO_DEV)
 			cfghi = DWC_CFGH_DST_PER(dwc->request_line);
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index 68b4024184de..bc411a1bf8e7 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -17,17 +17,15 @@
  * struct dw_dma_slave - Controller-specific information about a slave
  *
  * @dma_dev: required DMA master device. Depricated.
- * @bus_id: name of this device channel, not just a device name since
- *          devices may have more than one channel e.g. "foo_tx"
- * @cfg_hi: Platform-specific initializer for the CFG_HI register
- * @cfg_lo: Platform-specific initializer for the CFG_LO register
+ * @src_id:	src request line
+ * @dst_id:	dst request line
  * @src_master: src master for transfers on allocated channel.
  * @dst_master: dest master for transfers on allocated channel.
  */
 struct dw_dma_slave {
 	struct device		*dma_dev;
-	u32			cfg_hi;
-	u32			cfg_lo;
+	u8			src_id;
+	u8			dst_id;
 	u8			src_master;
 	u8			dst_master;
 };
-- 
cgit v1.2.3


From 960d01acf62747d6518694f92be5b06f67473833 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 9 Sep 2014 22:55:35 +0300
Subject: cfg80211: add WMM traffic stream API

Add nl80211 and driver API to validate, add and delete traffic
streams with appropriate settings.

The API calls for userspace doing the action frame handshake
with the peer, and then allows only to set up the parameters
in the driver. To avoid setting up a session only to tear it
down again, the validate API is provided, but the real usage
later can still fail so userspace must be prepared for that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    |   4 ++
 include/net/cfg80211.h       |  23 ++++++++-
 include/uapi/linux/nl80211.h |  28 +++++++++++
 net/wireless/nl80211.c       | 109 +++++++++++++++++++++++++++++++++++++++++++
 net/wireless/rdev-ops.h      |  31 ++++++++++++
 net/wireless/trace.h         |  45 ++++++++++++++++++
 6 files changed, 239 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 61a09f0644aa..b1be39c76931 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -166,8 +166,12 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
 
 #define IEEE80211_MAX_MESH_ID_LEN	32
 
+#define IEEE80211_FIRST_TSPEC_TSID	8
 #define IEEE80211_NUM_TIDS		16
 
+/* number of user priorities 802.11 uses */
+#define IEEE80211_NUM_UPS		8
+
 #define IEEE80211_QOS_CTL_LEN		2
 /* 1d tag mask */
 #define IEEE80211_QOS_CTL_TAG1D_MASK		0x0007
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index c2c710c14a50..a13beab123dc 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2318,6 +2318,17 @@ struct cfg80211_qos_map {
  * @set_ap_chanwidth: Set the AP (including P2P GO) mode channel width for the
  *	given interface This is used e.g. for dynamic HT 20/40 MHz channel width
  *	changes during the lifetime of the BSS.
+ *
+ * @add_tx_ts: validate (if admitted_time is 0) or add a TX TS to the device
+ *	with the given parameters; action frame exchange has been handled by
+ *	userspace so this just has to modify the TX path to take the TS into
+ *	account.
+ *	If the admitted time is 0 just validate the parameters to make sure
+ *	the session can be created at all; it is valid to just always return
+ *	success for that but that may result in inefficient behaviour (handshake
+ *	with the peer followed by immediate teardown when the addition is later
+ *	rejected)
+ * @del_tx_ts: remove an existing TX TS
  */
 struct cfg80211_ops {
 	int	(*suspend)(struct wiphy *wiphy, struct cfg80211_wowlan *wow);
@@ -2558,6 +2569,12 @@ struct cfg80211_ops {
 
 	int	(*set_ap_chanwidth)(struct wiphy *wiphy, struct net_device *dev,
 				    struct cfg80211_chan_def *chandef);
+
+	int	(*add_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
+			     u8 tsid, const u8 *peer, u8 user_prio,
+			     u16 admitted_time);
+	int	(*del_tx_ts)(struct wiphy *wiphy, struct net_device *dev,
+			     u8 tsid, const u8 *peer);
 };
 
 /*
@@ -2604,9 +2621,13 @@ struct cfg80211_ops {
  * @WIPHY_FLAG_SUPPORTS_5_10_MHZ: Device supports 5 MHz and 10 MHz channels.
  * @WIPHY_FLAG_HAS_CHANNEL_SWITCH: Device supports channel switch in
  *	beaconing mode (AP, IBSS, Mesh, ...).
+ * @WIPHY_FLAG_SUPPORTS_WMM_ADMISSION: the device supports setting up WMM
+ *	TSPEC sessions (TID aka TSID 0-7) with the NL80211_CMD_ADD_TX_TS
+ *	command. Standard IEEE 802.11 TSPEC setup is not yet supported, it
+ *	needs to be able to handle Block-Ack agreements and other things.
  */
 enum wiphy_flags {
-	/* use hole at 0 */
+	WIPHY_FLAG_SUPPORTS_WMM_ADMISSION	= BIT(0),
 	/* use hole at 1 */
 	/* use hole at 2 */
 	WIPHY_FLAG_NETNS_OK			= BIT(3),
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 29c4399e08b9..e5b8caf5e466 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -722,6 +722,22 @@
  *	QoS mapping is relevant for IP packets, it is only valid during an
  *	association. This is cleared on disassociation and AP restart.
  *
+ * @NL80211_CMD_ADD_TX_TS: Ask the kernel to add a traffic stream for the given
+ *	%NL80211_ATTR_TSID and %NL80211_ATTR_MAC with %NL80211_ATTR_USER_PRIO
+ *	and %NL80211_ATTR_ADMITTED_TIME parameters.
+ *	Note that the action frame handshake with the AP shall be handled by
+ *	userspace via the normal management RX/TX framework, this only sets
+ *	up the TX TS in the driver/device.
+ *	If the admitted time attribute is not added then the request just checks
+ *	if a subsequent setup could be successful, the intent is to use this to
+ *	avoid setting up a session with the AP when local restrictions would
+ *	make that impossible. However, the subsequent "real" setup may still
+ *	fail even if the check was successful.
+ * @NL80211_CMD_DEL_TX_TS: Remove an existing TS with the %NL80211_ATTR_TSID
+ *	and %NL80211_ATTR_MAC parameters. It isn't necessary to call this
+ *	before removing a station entry entirely, or before disassociating
+ *	or similar, cleanup will happen in the driver/device in this case.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -893,6 +909,9 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_QOS_MAP,
 
+	NL80211_CMD_ADD_TX_TS,
+	NL80211_CMD_DEL_TX_TS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -1611,6 +1630,11 @@ enum nl80211_commands {
  *	drivers to indicate dynack capability. Dynack is automatically disabled
  *	setting valid value for coverage class.
  *
+ * @NL80211_ATTR_TSID: a TSID value (u8 attribute)
+ * @NL80211_ATTR_USER_PRIO: user priority value (u8 attribute)
+ * @NL80211_ATTR_ADMITTED_TIME: admitted time in units of 32 microseconds
+ *	(per second) (u16 attribute)
+ *
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
  */
@@ -1957,6 +1981,10 @@ enum nl80211_attrs {
 
 	NL80211_ATTR_WIPHY_DYN_ACK,
 
+	NL80211_ATTR_TSID,
+	NL80211_ATTR_USER_PRIO,
+	NL80211_ATTR_ADMITTED_TIME,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index e9fbd4f4ddb0..ab7ee4893e40 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -391,6 +391,9 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = {
 	[NL80211_ATTR_IFACE_SOCKET_OWNER] = { .type = NLA_FLAG },
 	[NL80211_ATTR_CSA_C_OFFSETS_TX] = { .type = NLA_BINARY },
 	[NL80211_ATTR_USE_RRM] = { .type = NLA_FLAG },
+	[NL80211_ATTR_TSID] = { .type = NLA_U8 },
+	[NL80211_ATTR_USER_PRIO] = { .type = NLA_U8 },
+	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
 };
 
 /* policy for the key attributes */
@@ -1510,6 +1513,9 @@ static int nl80211_send_wiphy(struct cfg80211_registered_device *rdev,
 			if (rdev->wiphy.flags & WIPHY_FLAG_HAS_CHANNEL_SWITCH)
 				CMD(channel_switch, CHANNEL_SWITCH);
 			CMD(set_qos_map, SET_QOS_MAP);
+			if (rdev->wiphy.flags &
+					WIPHY_FLAG_SUPPORTS_WMM_ADMISSION)
+				CMD(add_tx_ts, ADD_TX_TS);
 		}
 		/* add into the if now */
 #undef CMD
@@ -9390,6 +9396,93 @@ static int nl80211_set_qos_map(struct sk_buff *skb,
 	return ret;
 }
 
+static int nl80211_add_tx_ts(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const u8 *peer;
+	u8 tsid, up;
+	u16 admitted_time = 0;
+	int err;
+
+	if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION))
+		return -EOPNOTSUPP;
+
+	if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC] ||
+	    !info->attrs[NL80211_ATTR_USER_PRIO])
+		return -EINVAL;
+
+	tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
+	if (tsid >= IEEE80211_NUM_TIDS)
+		return -EINVAL;
+
+	up = nla_get_u8(info->attrs[NL80211_ATTR_USER_PRIO]);
+	if (up >= IEEE80211_NUM_UPS)
+		return -EINVAL;
+
+	/* WMM uses TIDs 0-7 even for TSPEC */
+	if (tsid < IEEE80211_FIRST_TSPEC_TSID) {
+		if (!(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_WMM_ADMISSION))
+			return -EINVAL;
+	} else {
+		/* TODO: handle 802.11 TSPEC/admission control
+		 * need more attributes for that (e.g. BA session requirement)
+		 */
+		return -EINVAL;
+	}
+
+	peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	if (info->attrs[NL80211_ATTR_ADMITTED_TIME]) {
+		admitted_time =
+			nla_get_u16(info->attrs[NL80211_ATTR_ADMITTED_TIME]);
+		if (!admitted_time)
+			return -EINVAL;
+	}
+
+	wdev_lock(wdev);
+	switch (wdev->iftype) {
+	case NL80211_IFTYPE_STATION:
+	case NL80211_IFTYPE_P2P_CLIENT:
+		if (wdev->current_bss)
+			break;
+		err = -ENOTCONN;
+		goto out;
+	default:
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
+	err = rdev_add_tx_ts(rdev, dev, tsid, peer, up, admitted_time);
+
+ out:
+	wdev_unlock(wdev);
+	return err;
+}
+
+static int nl80211_del_tx_ts(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg80211_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wireless_dev *wdev = dev->ieee80211_ptr;
+	const u8 *peer;
+	u8 tsid;
+	int err;
+
+	if (!info->attrs[NL80211_ATTR_TSID] || !info->attrs[NL80211_ATTR_MAC])
+		return -EINVAL;
+
+	tsid = nla_get_u8(info->attrs[NL80211_ATTR_TSID]);
+	peer = nla_data(info->attrs[NL80211_ATTR_MAC]);
+
+	wdev_lock(wdev);
+	err = rdev_del_tx_ts(rdev, dev, tsid, peer);
+	wdev_unlock(wdev);
+
+	return err;
+}
+
 #define NL80211_FLAG_NEED_WIPHY		0x01
 #define NL80211_FLAG_NEED_NETDEV	0x02
 #define NL80211_FLAG_NEED_RTNL		0x04
@@ -10147,6 +10240,22 @@ static const struct genl_ops nl80211_ops[] = {
 		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
 				  NL80211_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL80211_CMD_ADD_TX_TS,
+		.doit = nl80211_add_tx_ts,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
+	{
+		.cmd = NL80211_CMD_DEL_TX_TS,
+		.doit = nl80211_del_tx_ts,
+		.policy = nl80211_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL80211_FLAG_NEED_NETDEV_UP |
+				  NL80211_FLAG_NEED_RTNL,
+	},
 };
 
 /* notification functions */
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 56c2240c30ce..f6d457d6a558 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -915,4 +915,35 @@ rdev_set_ap_chanwidth(struct cfg80211_registered_device *rdev,
 	return ret;
 }
 
+static inline int
+rdev_add_tx_ts(struct cfg80211_registered_device *rdev,
+	       struct net_device *dev, u8 tsid, const u8 *peer,
+	       u8 user_prio, u16 admitted_time)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_add_tx_ts(&rdev->wiphy, dev, tsid, peer,
+			     user_prio, admitted_time);
+	if (rdev->ops->add_tx_ts)
+		ret = rdev->ops->add_tx_ts(&rdev->wiphy, dev, tsid, peer,
+					   user_prio, admitted_time);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
+static inline int
+rdev_del_tx_ts(struct cfg80211_registered_device *rdev,
+	       struct net_device *dev, u8 tsid, const u8 *peer)
+{
+	int ret = -EOPNOTSUPP;
+
+	trace_rdev_del_tx_ts(&rdev->wiphy, dev, tsid, peer);
+	if (rdev->ops->del_tx_ts)
+		ret = rdev->ops->del_tx_ts(&rdev->wiphy, dev, tsid, peer);
+	trace_rdev_return_int(&rdev->wiphy, ret);
+
+	return ret;
+}
+
 #endif /* __CFG80211_RDEV_OPS */
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 0c524cd76c83..625a6e6d1168 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1896,6 +1896,51 @@ TRACE_EVENT(rdev_set_ap_chanwidth,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, CHAN_DEF_PR_ARG)
 );
 
+TRACE_EVENT(rdev_add_tx_ts,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 u8 tsid, const u8 *peer, u8 user_prio, u16 admitted_time),
+	TP_ARGS(wiphy, netdev, tsid, peer, user_prio, admitted_time),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(peer)
+		__field(u8, tsid)
+		__field(u8, user_prio)
+		__field(u16, admitted_time)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(peer, peer);
+		__entry->tsid = tsid;
+		__entry->user_prio = user_prio;
+		__entry->admitted_time = admitted_time;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d, UP %d, time %d",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer),
+		  __entry->tsid, __entry->user_prio, __entry->admitted_time)
+);
+
+TRACE_EVENT(rdev_del_tx_ts,
+	TP_PROTO(struct wiphy *wiphy, struct net_device *netdev,
+		 u8 tsid, const u8 *peer),
+	TP_ARGS(wiphy, netdev, tsid, peer),
+	TP_STRUCT__entry(
+		WIPHY_ENTRY
+		NETDEV_ENTRY
+		MAC_ENTRY(peer)
+		__field(u8, tsid)
+	),
+	TP_fast_assign(
+		WIPHY_ASSIGN;
+		NETDEV_ASSIGN;
+		MAC_ASSIGN(peer, peer);
+		__entry->tsid = tsid;
+	),
+	TP_printk(WIPHY_PR_FMT ", " NETDEV_PR_FMT ", " MAC_PR_FMT ", TSID %d",
+		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer), __entry->tsid)
+);
+
 /*************************************************************
  *	     cfg80211 exported functions traces		     *
  *************************************************************/
-- 
cgit v1.2.3


From 047133066e6c2549403fe5a2d619f47ba4212ef5 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Thu, 7 Aug 2014 05:10:22 -0700
Subject: leds: Reorder include directives

Reorder include directives so that they are arranged
in alphabetical order.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c | 13 +++++++------
 drivers/leds/led-core.c  |  3 ++-
 include/linux/leds.h     |  2 +-
 3 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index aa29198fca3e..4bd4572efe6e 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -9,16 +9,17 @@
  * published by the Free Software Foundation.
  */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/err.h>
 #include <linux/init.h>
+#include <linux/kernel.h>
+#include <linux/leds.h>
 #include <linux/list.h>
+#include <linux/module.h>
+#include <linux/slab.h>
 #include <linux/spinlock.h>
-#include <linux/device.h>
 #include <linux/timer.h>
-#include <linux/err.h>
-#include <linux/ctype.h>
-#include <linux/leds.h>
 #include "leds.h"
 
 static struct class *leds_class;
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 71b40d3bf776..50b579ad948e 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -12,10 +12,11 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/leds.h>
 #include <linux/list.h>
 #include <linux/module.h>
+#include <linux/mutex.h>
 #include <linux/rwsem.h>
-#include <linux/leds.h>
 #include "leds.h"
 
 DECLARE_RWSEM(leds_list_lock);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index e43686472197..4be2d7623d9e 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -13,8 +13,8 @@
 #define __LINUX_LEDS_H_INCLUDED
 
 #include <linux/list.h>
-#include <linux/spinlock.h>
 #include <linux/rwsem.h>
+#include <linux/spinlock.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 
-- 
cgit v1.2.3


From d8082827d8a214343b761f2c4554d2a7d1573d63 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Thu, 7 Aug 2014 05:10:23 -0700
Subject: leds: make brightness type consistent across whole subsystem

Documentations states that brightness units type is enum led_brightness
and this is the type used by the led API functions. Adjust the type
of brightness variables in the struct led_classdev accordingly.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 include/linux/leds.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/leds.h b/include/linux/leds.h
index 4be2d7623d9e..f2e1cbc25705 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -31,8 +31,8 @@ enum led_brightness {
 
 struct led_classdev {
 	const char		*name;
-	int			 brightness;
-	int			 max_brightness;
+	enum led_brightness	 brightness;
+	enum led_brightness	 max_brightness;
 	int			 flags;
 
 	/* Lower 16 bits reflect status */
-- 
cgit v1.2.3


From 974a70bdecea5296db1b643e4046ef208e99c592 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Fri, 12 Sep 2014 09:32:41 +0800
Subject: usb: gadget: udc-core: add utility for bus reset

The udc driver can notify the udc core that bus reset occurs by
calling this utility, the core will notify gadget driver this
information and update gadget state accordingly.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/udc/udc-core.c | 17 +++++++++++++++++
 include/linux/usb/gadget.h        |  6 ++++++
 2 files changed, 23 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index b0d98172bc07..ba661c6da54a 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -123,6 +123,23 @@ EXPORT_SYMBOL_GPL(usb_gadget_set_state);
 
 /* ------------------------------------------------------------------------- */
 
+/**
+ * usb_gadget_udc_reset - notifies the udc core that bus reset occurs
+ * @gadget: The gadget which bus reset occurs
+ * @driver: The gadget driver we want to notify
+ *
+ * If the udc driver has bus reset handler, it needs to call this when the bus
+ * reset occurs, it notifies the gadget driver that the bus reset occurs as
+ * well as updates gadget state.
+ */
+void usb_gadget_udc_reset(struct usb_gadget *gadget,
+		struct usb_gadget_driver *driver)
+{
+	driver->reset(gadget);
+	usb_gadget_set_state(gadget, USB_STATE_DEFAULT);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_udc_reset);
+
 /**
  * usb_gadget_udc_start - tells usb device controller to start up
  * @gadget: The gadget we want to get started
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 598a6e9b2850..d18811433324 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -1017,6 +1017,12 @@ extern void usb_gadget_set_state(struct usb_gadget *gadget,
 
 /*-------------------------------------------------------------------------*/
 
+/* utility to tell udc core that the bus reset occurs */
+extern void usb_gadget_udc_reset(struct usb_gadget *gadget,
+		struct usb_gadget_driver *driver);
+
+/*-------------------------------------------------------------------------*/
+
 /* utility wrapping a simple endpoint selection policy */
 
 extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *,
-- 
cgit v1.2.3


From d4b18c3e00b8d18fbd316abe9639b91ad416e1f3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 10 Sep 2014 17:36:31 -0700
Subject: pnfs: remove GETDEVICELIST implementation

The current GETDEVICELIST implementation is buggy in that it doesn't handle
cursors correctly, and in that it returns an error if the server returns
NFSERR_NOTSUPP.  Given that there is no actual need for GETDEVICELIST,
it has various issues and might get removed for NFSv4.2 stop using it in
the blocklayout driver, and thus the Linux NFS client as whole.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/blocklayout/blocklayout.c |   2 +-
 fs/nfs/nfs4proc.c                |  48 ---------------
 fs/nfs/nfs4xdr.c                 | 130 ---------------------------------------
 fs/nfs/pnfs.h                    |   5 --
 fs/nfs/pnfs_dev.c                |  30 ---------
 include/linux/nfs_xdr.h          |  11 ----
 6 files changed, 1 insertion(+), 225 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index 25ba9e0e6fff..3e1f1afc6db4 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -532,7 +532,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
 		return -EINVAL;
 	}
 
-	return nfs4_deviceid_getdevicelist(server, fh);
+	return 0;
 }
 
 static bool
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 47fa67a3a8cf..0b711227cfa5 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -7808,54 +7808,6 @@ int nfs4_proc_layoutreturn(struct nfs4_layoutreturn *lrp)
 	return status;
 }
 
-/*
- * Retrieve the list of Data Server devices from the MDS.
- */
-static int _nfs4_getdevicelist(struct nfs_server *server,
-				    const struct nfs_fh *fh,
-				    struct pnfs_devicelist *devlist)
-{
-	struct nfs4_getdevicelist_args args = {
-		.fh = fh,
-		.layoutclass = server->pnfs_curr_ld->id,
-	};
-	struct nfs4_getdevicelist_res res = {
-		.devlist = devlist,
-	};
-	struct rpc_message msg = {
-		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_GETDEVICELIST],
-		.rpc_argp = &args,
-		.rpc_resp = &res,
-	};
-	int status;
-
-	dprintk("--> %s\n", __func__);
-	status = nfs4_call_sync(server->client, server, &msg, &args.seq_args,
-				&res.seq_res, 0);
-	dprintk("<-- %s status=%d\n", __func__, status);
-	return status;
-}
-
-int nfs4_proc_getdevicelist(struct nfs_server *server,
-			    const struct nfs_fh *fh,
-			    struct pnfs_devicelist *devlist)
-{
-	struct nfs4_exception exception = { };
-	int err;
-
-	do {
-		err = nfs4_handle_exception(server,
-				_nfs4_getdevicelist(server, fh, devlist),
-				&exception);
-	} while (exception.retry);
-
-	dprintk("%s: err=%d, num_devs=%u\n", __func__,
-		err, devlist->num_devs);
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(nfs4_proc_getdevicelist);
-
 static int
 _nfs4_proc_getdeviceinfo(struct nfs_server *server,
 		struct pnfs_device *pdev,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index f2cd957adb90..b8165eab0a32 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -362,17 +362,6 @@ static int nfs4_stat_to_errno(int);
 				XDR_QUADLEN(NFS4_MAX_SESSIONID_LEN) + 5)
 #define encode_reclaim_complete_maxsz	(op_encode_hdr_maxsz + 4)
 #define decode_reclaim_complete_maxsz	(op_decode_hdr_maxsz + 4)
-#define encode_getdevicelist_maxsz (op_encode_hdr_maxsz + 4 + \
-				encode_verifier_maxsz)
-#define decode_getdevicelist_maxsz (op_decode_hdr_maxsz + \
-				2 /* nfs_cookie4 gdlr_cookie */ + \
-				decode_verifier_maxsz \
-				  /* verifier4 gdlr_verifier */ + \
-				1 /* gdlr_deviceid_list count */ + \
-				XDR_QUADLEN(NFS4_PNFS_GETDEVLIST_MAXNUM * \
-					    NFS4_DEVICEID4_SIZE) \
-				  /* gdlr_deviceid_list */ + \
-				1 /* bool gdlr_eof */)
 #define encode_getdeviceinfo_maxsz (op_encode_hdr_maxsz + 4 + \
 				XDR_QUADLEN(NFS4_DEVICEID4_SIZE))
 #define decode_getdeviceinfo_maxsz (op_decode_hdr_maxsz + \
@@ -812,14 +801,6 @@ static int nfs4_stat_to_errno(int);
 #define NFS4_dec_reclaim_complete_sz	(compound_decode_hdr_maxsz + \
 					 decode_sequence_maxsz + \
 					 decode_reclaim_complete_maxsz)
-#define NFS4_enc_getdevicelist_sz (compound_encode_hdr_maxsz + \
-				encode_sequence_maxsz + \
-				encode_putfh_maxsz + \
-				encode_getdevicelist_maxsz)
-#define NFS4_dec_getdevicelist_sz (compound_decode_hdr_maxsz + \
-				decode_sequence_maxsz + \
-				decode_putfh_maxsz + \
-				decode_getdevicelist_maxsz)
 #define NFS4_enc_getdeviceinfo_sz (compound_encode_hdr_maxsz +    \
 				encode_sequence_maxsz +\
 				encode_getdeviceinfo_maxsz)
@@ -1929,24 +1910,6 @@ static void encode_sequence(struct xdr_stream *xdr,
 }
 
 #ifdef CONFIG_NFS_V4_1
-static void
-encode_getdevicelist(struct xdr_stream *xdr,
-		     const struct nfs4_getdevicelist_args *args,
-		     struct compound_hdr *hdr)
-{
-	__be32 *p;
-	nfs4_verifier dummy = {
-		.data = "dummmmmy",
-	};
-
-	encode_op_hdr(xdr, OP_GETDEVICELIST, decode_getdevicelist_maxsz, hdr);
-	p = reserve_space(xdr, 16);
-	*p++ = cpu_to_be32(args->layoutclass);
-	*p++ = cpu_to_be32(NFS4_PNFS_GETDEVLIST_MAXNUM);
-	xdr_encode_hyper(p, 0ULL);                          /* cookie */
-	encode_nfs4_verifier(xdr, &dummy);
-}
-
 static void
 encode_getdeviceinfo(struct xdr_stream *xdr,
 		     const struct nfs4_getdeviceinfo_args *args,
@@ -2900,24 +2863,6 @@ static void nfs4_xdr_enc_reclaim_complete(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
-/*
- * Encode GETDEVICELIST request
- */
-static void nfs4_xdr_enc_getdevicelist(struct rpc_rqst *req,
-				       struct xdr_stream *xdr,
-				       struct nfs4_getdevicelist_args *args)
-{
-	struct compound_hdr hdr = {
-		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
-	};
-
-	encode_compound_hdr(xdr, req, &hdr);
-	encode_sequence(xdr, &args->seq_args, &hdr);
-	encode_putfh(xdr, args->fh, &hdr);
-	encode_getdevicelist(xdr, args, &hdr);
-	encode_nops(&hdr);
-}
-
 /*
  * Encode GETDEVICEINFO request
  */
@@ -5773,54 +5718,6 @@ out_overflow:
 }
 
 #if defined(CONFIG_NFS_V4_1)
-/*
- * TODO: Need to handle case when EOF != true;
- */
-static int decode_getdevicelist(struct xdr_stream *xdr,
-				struct pnfs_devicelist *res)
-{
-	__be32 *p;
-	int status, i;
-	nfs4_verifier verftemp;
-
-	status = decode_op_hdr(xdr, OP_GETDEVICELIST);
-	if (status)
-		return status;
-
-	p = xdr_inline_decode(xdr, 8 + 8 + 4);
-	if (unlikely(!p))
-		goto out_overflow;
-
-	/* TODO: Skip cookie for now */
-	p += 2;
-
-	/* Read verifier */
-	p = xdr_decode_opaque_fixed(p, verftemp.data, NFS4_VERIFIER_SIZE);
-
-	res->num_devs = be32_to_cpup(p);
-
-	dprintk("%s: num_dev %d\n", __func__, res->num_devs);
-
-	if (res->num_devs > NFS4_PNFS_GETDEVLIST_MAXNUM) {
-		printk(KERN_ERR "NFS: %s too many result dev_num %u\n",
-				__func__, res->num_devs);
-		return -EIO;
-	}
-
-	p = xdr_inline_decode(xdr,
-			      res->num_devs * NFS4_DEVICEID4_SIZE + 4);
-	if (unlikely(!p))
-		goto out_overflow;
-	for (i = 0; i < res->num_devs; i++)
-		p = xdr_decode_opaque_fixed(p, res->dev_id[i].data,
-					    NFS4_DEVICEID4_SIZE);
-	res->eof = be32_to_cpup(p);
-	return 0;
-out_overflow:
-	print_overflow_msg(__func__, xdr);
-	return -EIO;
-}
-
 static int decode_getdeviceinfo(struct xdr_stream *xdr,
 				struct pnfs_device *pdev)
 {
@@ -7104,32 +7001,6 @@ static int nfs4_xdr_dec_reclaim_complete(struct rpc_rqst *rqstp,
 	return status;
 }
 
-/*
- * Decode GETDEVICELIST response
- */
-static int nfs4_xdr_dec_getdevicelist(struct rpc_rqst *rqstp,
-				      struct xdr_stream *xdr,
-				      struct nfs4_getdevicelist_res *res)
-{
-	struct compound_hdr hdr;
-	int status;
-
-	dprintk("encoding getdevicelist!\n");
-
-	status = decode_compound_hdr(xdr, &hdr);
-	if (status != 0)
-		goto out;
-	status = decode_sequence(xdr, &res->seq_res, rqstp);
-	if (status != 0)
-		goto out;
-	status = decode_putfh(xdr);
-	if (status != 0)
-		goto out;
-	status = decode_getdevicelist(xdr, res->devlist);
-out:
-	return status;
-}
-
 /*
  * Decode GETDEVINFO response
  */
@@ -7498,7 +7369,6 @@ struct rpc_procinfo	nfs4_procedures[] = {
 	PROC(SECINFO_NO_NAME,	enc_secinfo_no_name,	dec_secinfo_no_name),
 	PROC(TEST_STATEID,	enc_test_stateid,	dec_test_stateid),
 	PROC(FREE_STATEID,	enc_free_stateid,	dec_free_stateid),
-	PROC(GETDEVICELIST,	enc_getdevicelist,	dec_getdevicelist),
 	PROC(BIND_CONN_TO_SESSION,
 			enc_bind_conn_to_session, dec_bind_conn_to_session),
 	PROC(DESTROY_CLIENTID,	enc_destroy_clientid,	dec_destroy_clientid),
diff --git a/fs/nfs/pnfs.h b/fs/nfs/pnfs.h
index d84fe29fc88b..b10f12fc6818 100644
--- a/fs/nfs/pnfs.h
+++ b/fs/nfs/pnfs.h
@@ -180,9 +180,6 @@ extern int pnfs_register_layoutdriver(struct pnfs_layoutdriver_type *);
 extern void pnfs_unregister_layoutdriver(struct pnfs_layoutdriver_type *);
 
 /* nfs4proc.c */
-extern int nfs4_proc_getdevicelist(struct nfs_server *server,
-				   const struct nfs_fh *fh,
-				   struct pnfs_devicelist *devlist);
 extern int nfs4_proc_getdeviceinfo(struct nfs_server *server,
 				   struct pnfs_device *dev,
 				   struct rpc_cred *cred);
@@ -277,8 +274,6 @@ bool nfs4_put_deviceid_node(struct nfs4_deviceid_node *);
 void nfs4_mark_deviceid_unavailable(struct nfs4_deviceid_node *node);
 bool nfs4_test_deviceid_unavailable(struct nfs4_deviceid_node *node);
 void nfs4_deviceid_purge_client(const struct nfs_client *);
-int nfs4_deviceid_getdevicelist(struct nfs_server *server,
-		const struct nfs_fh *fh);
 
 static inline struct nfs4_deviceid_node *
 nfs4_get_deviceid(struct nfs4_deviceid_node *d)
diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c
index 82c2836fdb38..aa2ec0015183 100644
--- a/fs/nfs/pnfs_dev.c
+++ b/fs/nfs/pnfs_dev.c
@@ -358,33 +358,3 @@ nfs4_deviceid_mark_client_invalid(struct nfs_client *clp)
 	}
 	rcu_read_unlock();
 }
-
-int
-nfs4_deviceid_getdevicelist(struct nfs_server *server,
-		const struct nfs_fh *fh)
-{
-	struct pnfs_devicelist *dlist;
-	struct nfs4_deviceid_node *d;
-	int error = 0, i;
-
-	dlist = kzalloc(sizeof(struct pnfs_devicelist), GFP_NOFS);
-	if (!dlist)
-		return -ENOMEM;
-
-	while (!dlist->eof) {
-		error = nfs4_proc_getdevicelist(server, fh, dlist);
-		if (error)
-			break;
-
-		for (i = 0; i < dlist->num_devs; i++) {
-			d = nfs4_find_get_deviceid(server, &dlist->dev_id[i],
-					NULL, GFP_NOFS);
-			if (d)
-				nfs4_put_deviceid_node(d);
-		}
-	}
-
-	kfree(dlist);
-	return error;
-}
-EXPORT_SYMBOL_GPL(nfs4_deviceid_getdevicelist);
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index f4092c6b90fb..7ae249ccb78d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -252,17 +252,6 @@ struct nfs4_layoutget {
 	gfp_t gfp_flags;
 };
 
-struct nfs4_getdevicelist_args {
-	struct nfs4_sequence_args seq_args;
-	const struct nfs_fh *fh;
-	u32 layoutclass;
-};
-
-struct nfs4_getdevicelist_res {
-	struct nfs4_sequence_res seq_res;
-	struct pnfs_devicelist *devlist;
-};
-
 struct nfs4_getdeviceinfo_args {
 	struct nfs4_sequence_args seq_args;
 	struct pnfs_device *pdev;
-- 
cgit v1.2.3


From f418c64b71590bac8fdebd0969a1eeaffaf036d2 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Wed, 3 Sep 2014 12:19:07 -0400
Subject: NFS: Unconditionally enable commit code

The goal is to create a generic NFS module with code that does not
depend on what versions of NFS are enabled.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c        | 14 ------------
 fs/nfs/write.c         | 61 --------------------------------------------------
 include/linux/nfs_fs.h |  8 -------
 3 files changed, 83 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 65ef6e00deee..dda4b8667c02 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -178,7 +178,6 @@ static int nfs_direct_set_or_cmp_hdr_verf(struct nfs_direct_req *dreq,
 	return memcmp(verfp, &hdr->verf, sizeof(struct nfs_writeverf));
 }
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 /*
  * nfs_direct_cmp_commit_data_verf - compare verifier for commit data
  * @dreq - direct request possibly spanning multiple servers
@@ -197,7 +196,6 @@ static int nfs_direct_cmp_commit_data_verf(struct nfs_direct_req *dreq,
 	WARN_ON_ONCE(verfp->committed < 0);
 	return memcmp(verfp, &data->verf, sizeof(struct nfs_writeverf));
 }
-#endif
 
 /**
  * nfs_direct_IO - NFS address space operation for direct I/O
@@ -576,7 +574,6 @@ out:
 	return result;
 }
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 static void nfs_direct_write_reschedule(struct nfs_direct_req *dreq)
 {
 	struct nfs_pageio_descriptor desc;
@@ -700,17 +697,6 @@ static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode
 	schedule_work(&dreq->work); /* Calls nfs_direct_write_schedule_work */
 }
 
-#else
-static void nfs_direct_write_schedule_work(struct work_struct *work)
-{
-}
-
-static void nfs_direct_write_complete(struct nfs_direct_req *dreq, struct inode *inode)
-{
-	nfs_direct_complete(dreq, true);
-}
-#endif
-
 static void nfs_direct_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_direct_req *dreq = hdr->dreq;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 128d97f01d1c..6786873a2901 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -720,7 +720,6 @@ nfs_mark_request_dirty(struct nfs_page *req)
 	__set_page_dirty_nobuffers(req->wb_page);
 }
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 /*
  * nfs_page_search_commits_for_head_request_locked
  *
@@ -870,43 +869,6 @@ int nfs_write_need_commit(struct nfs_pgio_header *hdr)
 	return hdr->verf.committed != NFS_FILE_SYNC;
 }
 
-#else
-static struct nfs_page *
-nfs_page_search_commits_for_head_request_locked(struct nfs_inode *nfsi,
-						struct page *page)
-{
-	return NULL;
-}
-
-static void nfs_init_cinfo_from_inode(struct nfs_commit_info *cinfo,
-				      struct inode *inode)
-{
-}
-
-void nfs_init_cinfo(struct nfs_commit_info *cinfo,
-		    struct inode *inode,
-		    struct nfs_direct_req *dreq)
-{
-}
-
-void
-nfs_mark_request_commit(struct nfs_page *req, struct pnfs_layout_segment *lseg,
-			struct nfs_commit_info *cinfo)
-{
-}
-
-static void
-nfs_clear_request_commit(struct nfs_page *req)
-{
-}
-
-int nfs_write_need_commit(struct nfs_pgio_header *hdr)
-{
-	return 0;
-}
-
-#endif
-
 static void nfs_write_completion(struct nfs_pgio_header *hdr)
 {
 	struct nfs_commit_info cinfo;
@@ -942,7 +904,6 @@ out:
 	hdr->release(hdr);
 }
 
-#if  IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 unsigned long
 nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
 {
@@ -999,19 +960,6 @@ nfs_scan_commit(struct inode *inode, struct list_head *dst,
 	return ret;
 }
 
-#else
-unsigned long nfs_reqs_to_commit(struct nfs_commit_info *cinfo)
-{
-	return 0;
-}
-
-int nfs_scan_commit(struct inode *inode, struct list_head *dst,
-		    struct nfs_commit_info *cinfo)
-{
-	return 0;
-}
-#endif
-
 /*
  * Search for an existing write request, and attempt to update
  * it to reflect a new dirty region on a given page.
@@ -1404,7 +1352,6 @@ static int nfs_writeback_done(struct rpc_task *task,
 		return status;
 	nfs_add_stats(inode, NFSIOS_SERVERWRITTENBYTES, hdr->res.count);
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 	if (hdr->res.verf->committed < hdr->args.stable &&
 	    task->tk_status >= 0) {
 		/* We tried a write call, but the server did not
@@ -1426,7 +1373,6 @@ static int nfs_writeback_done(struct rpc_task *task,
 			complain = jiffies + 300 * HZ;
 		}
 	}
-#endif
 
 	/* Deal with the suid/sgid bit corner case */
 	if (nfs_should_remove_suid(inode))
@@ -1479,7 +1425,6 @@ static void nfs_writeback_result(struct rpc_task *task,
 }
 
 
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 static int nfs_commit_set_lock(struct nfs_inode *nfsi, int may_wait)
 {
 	int ret;
@@ -1803,12 +1748,6 @@ out_mark_dirty:
 	__mark_inode_dirty(inode, I_DIRTY_DATASYNC);
 	return ret;
 }
-#else
-static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_control *wbc)
-{
-	return 0;
-}
-#endif
 
 int nfs_write_inode(struct inode *inode, struct writeback_control *wbc)
 {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 5180a7ededec..fd334d4b0d41 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -529,17 +529,9 @@ extern int  nfs_updatepage(struct file *, struct page *, unsigned int, unsigned
 extern int nfs_wb_all(struct inode *inode);
 extern int nfs_wb_page(struct inode *inode, struct page* page);
 extern int nfs_wb_page_cancel(struct inode *inode, struct page* page);
-#if IS_ENABLED(CONFIG_NFS_V3) || IS_ENABLED(CONFIG_NFS_V4)
 extern int  nfs_commit_inode(struct inode *, int);
 extern struct nfs_commit_data *nfs_commitdata_alloc(void);
 extern void nfs_commit_free(struct nfs_commit_data *data);
-#else
-static inline int
-nfs_commit_inode(struct inode *inode, int how)
-{
-	return 0;
-}
-#endif
 
 static inline int
 nfs_have_writebacks(struct inode *inode)
-- 
cgit v1.2.3


From cb8c20fa53ec28602793ee43ddc7e8883be62e69 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Wed, 3 Sep 2014 12:19:10 -0400
Subject: NFS: Move NFS v3 acl functions to nfs3_fs.h

This code is internal to the v3 module, so other parts of the client
shouldn't have any knowledge of it.

nfs3_getxattr(), nfs3_setxattr(), and nfs3_removexattr() no longer exist
anywhere so I remove the declarations while I'm here.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs3_fs.h       | 19 +++++++++++++++++++
 fs/nfs/nfs3super.c     |  1 +
 include/linux/nfs_fs.h | 33 ---------------------------------
 3 files changed, 20 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs3_fs.h b/fs/nfs/nfs3_fs.h
index 6599e0dcd69f..333ae4068506 100644
--- a/fs/nfs/nfs3_fs.h
+++ b/fs/nfs/nfs3_fs.h
@@ -6,6 +6,25 @@
 #ifndef __LINUX_FS_NFS_NFS3_FS_H
 #define __LINUX_FS_NFS_NFS3_FS_H
 
+/*
+ * nfs3acl.c
+ */
+#ifdef CONFIG_NFS_V3_ACL
+extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type);
+extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		struct posix_acl *dfacl);
+extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
+extern const struct xattr_handler *nfs3_xattr_handlers[];
+#else
+static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
+		struct posix_acl *dfacl)
+{
+	return 0;
+}
+#define nfs3_listxattr NULL
+#endif /* CONFIG_NFS_V3_ACL */
+
 /* nfs3client.c */
 struct nfs_server *nfs3_create_server(struct nfs_mount_info *, struct nfs_subversion *);
 struct nfs_server *nfs3_clone_server(struct nfs_server *, struct nfs_fh *,
diff --git a/fs/nfs/nfs3super.c b/fs/nfs/nfs3super.c
index d6a98949af19..6af29c2da352 100644
--- a/fs/nfs/nfs3super.c
+++ b/fs/nfs/nfs3super.c
@@ -4,6 +4,7 @@
 #include <linux/module.h>
 #include <linux/nfs_fs.h>
 #include "internal.h"
+#include "nfs3_fs.h"
 #include "nfs.h"
 
 static struct nfs_subversion nfs_v3 = {
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index fd334d4b0d41..28d649054d5f 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -442,22 +442,6 @@ static inline struct rpc_cred *nfs_file_cred(struct file *file)
 	return NULL;
 }
 
-/*
- * linux/fs/nfs/xattr.c
- */
-#ifdef CONFIG_NFS_V3_ACL
-extern ssize_t nfs3_listxattr(struct dentry *, char *, size_t);
-extern ssize_t nfs3_getxattr(struct dentry *, const char *, void *, size_t);
-extern int nfs3_setxattr(struct dentry *, const char *,
-			const void *, size_t, int);
-extern int nfs3_removexattr (struct dentry *, const char *name);
-#else
-# define nfs3_listxattr NULL
-# define nfs3_getxattr NULL
-# define nfs3_setxattr NULL
-# define nfs3_removexattr NULL
-#endif
-
 /*
  * linux/fs/nfs/direct.c
  */
@@ -548,23 +532,6 @@ extern int  nfs_readpages(struct file *, struct address_space *,
 extern int  nfs_readpage_async(struct nfs_open_context *, struct inode *,
 			       struct page *);
 
-/*
- * linux/fs/nfs3proc.c
- */
-#ifdef CONFIG_NFS_V3_ACL
-extern struct posix_acl *nfs3_get_acl(struct inode *inode, int type);
-extern int nfs3_set_acl(struct inode *inode, struct posix_acl *acl, int type);
-extern int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
-		struct posix_acl *dfacl);
-extern const struct xattr_handler *nfs3_xattr_handlers[];
-#else
-static inline int nfs3_proc_setacls(struct inode *inode, struct posix_acl *acl,
-		struct posix_acl *dfacl)
-{
-	return 0;
-}
-#endif /* CONFIG_NFS_V3_ACL */
-
 /*
  * inline functions
  */
-- 
cgit v1.2.3


From 3ef7de5304edf60d0b8674dd7cdacc104e15a93c Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Wed, 20 Aug 2014 06:41:55 -0700
Subject: leds: Improve and export led_update_brightness

led_update_brightness helper function used to be exploited only locally
in the led-class.c module, where its result was being passed to the
brightness_show sysfs callback. With the introduction of v4l2-flash
subdevice the same functionality becomes required for reading current
brightness from a LED device. This patch adds checking of return value
of the brightness_get callback and moves the led_update_brightness()
function to the LED subsystem public API.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c |  6 ------
 drivers/leds/led-core.c  | 16 ++++++++++++++++
 include/linux/leds.h     | 10 ++++++++++
 3 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 71666a40b79a..7440c58b8e6f 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -24,12 +24,6 @@
 
 static struct class *leds_class;
 
-static void led_update_brightness(struct led_classdev *led_cdev)
-{
-	if (led_cdev->brightness_get)
-		led_cdev->brightness = led_cdev->brightness_get(led_cdev);
-}
-
 static ssize_t brightness_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index 50b579ad948e..aaa8eba9099f 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -127,3 +127,19 @@ void led_set_brightness(struct led_classdev *led_cdev,
 	__led_set_brightness(led_cdev, brightness);
 }
 EXPORT_SYMBOL(led_set_brightness);
+
+int led_update_brightness(struct led_classdev *led_cdev)
+{
+	int ret = 0;
+
+	if (led_cdev->brightness_get) {
+		ret = led_cdev->brightness_get(led_cdev);
+		if (ret >= 0) {
+			led_cdev->brightness = ret;
+			return 0;
+		}
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL(led_update_brightness);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index f2e1cbc25705..a57611d0c94e 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -140,6 +140,16 @@ extern void led_blink_set_oneshot(struct led_classdev *led_cdev,
  */
 extern void led_set_brightness(struct led_classdev *led_cdev,
 			       enum led_brightness brightness);
+/**
+ * led_update_brightness - update LED brightness
+ * @led_cdev: the LED to query
+ *
+ * Get an LED's current brightness and update led_cdev->brightness
+ * member with the obtained value.
+ *
+ * Returns: 0 on success or negative error value on failure
+ */
+extern int led_update_brightness(struct led_classdev *led_cdev);
 
 /*
  * LED Triggers
-- 
cgit v1.2.3


From fbfa398b84a5fc6e085dedba5ec3e94f21815d05 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Thu, 28 Aug 2014 12:21:44 -0600
Subject: PCI: Remove unused pci_configure_slot()

All pci_configure_slot() uses have been removed, so remove the definition
as well.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Yinghai Lu <yinghai@kernel.org>
---
 drivers/pci/probe.c         | 28 ----------------------------
 include/linux/pci_hotplug.h |  2 --
 2 files changed, 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e9482867555b..4b3b29bc7a82 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -1358,34 +1358,6 @@ static void program_hpp_type2(struct pci_dev *dev, struct hpp_type2 *hpp)
 	 */
 }
 
-void pci_configure_slot(struct pci_dev *dev)
-{
-	struct pci_dev *cdev;
-	struct hotplug_params hpp;
-	int ret;
-
-	if (!(dev->hdr_type == PCI_HEADER_TYPE_NORMAL ||
-			(dev->hdr_type == PCI_HEADER_TYPE_BRIDGE &&
-			(dev->class >> 8) == PCI_CLASS_BRIDGE_PCI)))
-		return;
-
-	pcie_bus_configure_settings(dev->bus);
-
-	memset(&hpp, 0, sizeof(hpp));
-	ret = pci_get_hp_params(dev, &hpp);
-
-	program_hpp_type2(dev, hpp.t2);
-	program_hpp_type1(dev, hpp.t1);
-	program_hpp_type0(dev, hpp.t0);
-
-	if (dev->subordinate) {
-		list_for_each_entry(cdev, &dev->subordinate->devices,
-				    bus_list)
-			pci_configure_slot(cdev);
-	}
-}
-EXPORT_SYMBOL_GPL(pci_configure_slot);
-
 static void pci_configure_device(struct pci_dev *dev)
 {
 	struct hotplug_params hpp;
diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 5f2e559af6b0..2706ee9a4327 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -187,6 +187,4 @@ static inline int pci_get_hp_params(struct pci_dev *dev,
 	return -ENODEV;
 }
 #endif
-
-void pci_configure_slot(struct pci_dev *dev);
 #endif
-- 
cgit v1.2.3


From 46e5da40aec256155cfedee96dd21a75da941f2c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 12 Sep 2014 20:04:52 -0700
Subject: net: qdisc: use rcu prefix and silence sparse warnings

Add __rcu notation to qdisc handling by doing this we can make
smatch output more legible. And anyways some of the cases should
be using rcu_dereference() see qdisc_all_tx_empty(),
qdisc_tx_chainging(), and so on.

Also *wake_queue() API is commonly called from driver timer routines
without rcu lock or rtnl lock. So I added rcu_read_lock() blocks
around netif_wake_subqueue and netif_tx_wake_queue.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 29 ++++-----------------------
 include/net/sch_generic.h | 21 +++++++++++++------
 net/core/dev.c            | 51 +++++++++++++++++++++++++++++++++++++++++++++--
 net/sched/sch_generic.c   |  4 ++--
 net/sched/sch_mqprio.c    |  6 ++++--
 net/sched/sch_teql.c      | 13 +++++++-----
 6 files changed, 82 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ba72f6baae1a..ae721f53739e 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -543,7 +543,7 @@ struct netdev_queue {
  * read mostly part
  */
 	struct net_device	*dev;
-	struct Qdisc		*qdisc;
+	struct Qdisc __rcu	*qdisc;
 	struct Qdisc		*qdisc_sleeping;
 #ifdef CONFIG_SYSFS
 	struct kobject		kobj;
@@ -2356,12 +2356,7 @@ static inline void input_queue_tail_incr_save(struct softnet_data *sd,
 DECLARE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 
 void __netif_schedule(struct Qdisc *q);
-
-static inline void netif_schedule_queue(struct netdev_queue *txq)
-{
-	if (!(txq->state & QUEUE_STATE_ANY_XOFF))
-		__netif_schedule(txq->qdisc);
-}
+void netif_schedule_queue(struct netdev_queue *txq);
 
 static inline void netif_tx_schedule_all(struct net_device *dev)
 {
@@ -2397,11 +2392,7 @@ static inline void netif_tx_start_all_queues(struct net_device *dev)
 	}
 }
 
-static inline void netif_tx_wake_queue(struct netdev_queue *dev_queue)
-{
-	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state))
-		__netif_schedule(dev_queue->qdisc);
-}
+void netif_tx_wake_queue(struct netdev_queue *dev_queue);
 
 /**
  *	netif_wake_queue - restart transmit
@@ -2673,19 +2664,7 @@ static inline bool netif_subqueue_stopped(const struct net_device *dev,
 	return __netif_subqueue_stopped(dev, skb_get_queue_mapping(skb));
 }
 
-/**
- *	netif_wake_subqueue - allow sending packets on subqueue
- *	@dev: network device
- *	@queue_index: sub queue index
- *
- * Resume individual transmit queue of a device with multiple transmit queues.
- */
-static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
-{
-	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
-	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state))
-		__netif_schedule(txq->qdisc);
-}
+void netif_wake_subqueue(struct net_device *dev, u16 queue_index);
 
 #ifdef CONFIG_XPS
 int netif_set_xps_queue(struct net_device *dev, const struct cpumask *mask,
diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index a3cfb8ebeb53..56838ab29b42 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -259,7 +259,9 @@ static inline spinlock_t *qdisc_lock(struct Qdisc *qdisc)
 
 static inline struct Qdisc *qdisc_root(const struct Qdisc *qdisc)
 {
-	return qdisc->dev_queue->qdisc;
+	struct Qdisc *q = rcu_dereference_rtnl(qdisc->dev_queue->qdisc);
+
+	return q;
 }
 
 static inline struct Qdisc *qdisc_root_sleeping(const struct Qdisc *qdisc)
@@ -384,7 +386,7 @@ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 	struct Qdisc *qdisc;
 
 	for (; i < dev->num_tx_queues; i++) {
-		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
 		if (qdisc) {
 			spin_lock_bh(qdisc_lock(qdisc));
 			qdisc_reset(qdisc);
@@ -402,13 +404,18 @@ static inline void qdisc_reset_all_tx(struct net_device *dev)
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
 	unsigned int i;
+
+	rcu_read_lock();
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		const struct Qdisc *q = txq->qdisc;
+		const struct Qdisc *q = rcu_dereference(txq->qdisc);
 
-		if (q->q.qlen)
+		if (q->q.qlen) {
+			rcu_read_unlock();
 			return false;
+		}
 	}
+	rcu_read_unlock();
 	return true;
 }
 
@@ -416,9 +423,10 @@ static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 static inline bool qdisc_tx_changing(const struct net_device *dev)
 {
 	unsigned int i;
+
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		if (txq->qdisc != txq->qdisc_sleeping)
+		if (rcu_access_pointer(txq->qdisc) != txq->qdisc_sleeping)
 			return true;
 	}
 	return false;
@@ -428,9 +436,10 @@ static inline bool qdisc_tx_changing(const struct net_device *dev)
 static inline bool qdisc_tx_is_noop(const struct net_device *dev)
 {
 	unsigned int i;
+
 	for (i = 0; i < dev->num_tx_queues; i++) {
 		struct netdev_queue *txq = netdev_get_tx_queue(dev, i);
-		if (txq->qdisc != &noop_qdisc)
+		if (rcu_access_pointer(txq->qdisc) != &noop_qdisc)
 			return false;
 	}
 	return true;
diff --git a/net/core/dev.c b/net/core/dev.c
index 3c6a967e5830..b3d6dbc0c696 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2177,6 +2177,53 @@ static struct dev_kfree_skb_cb *get_kfree_skb_cb(const struct sk_buff *skb)
 	return (struct dev_kfree_skb_cb *)skb->cb;
 }
 
+void netif_schedule_queue(struct netdev_queue *txq)
+{
+	rcu_read_lock();
+	if (!(txq->state & QUEUE_STATE_ANY_XOFF)) {
+		struct Qdisc *q = rcu_dereference(txq->qdisc);
+
+		__netif_schedule(q);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL(netif_schedule_queue);
+
+/**
+ *	netif_wake_subqueue - allow sending packets on subqueue
+ *	@dev: network device
+ *	@queue_index: sub queue index
+ *
+ * Resume individual transmit queue of a device with multiple transmit queues.
+ */
+void netif_wake_subqueue(struct net_device *dev, u16 queue_index)
+{
+	struct netdev_queue *txq = netdev_get_tx_queue(dev, queue_index);
+
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &txq->state)) {
+		struct Qdisc *q;
+
+		rcu_read_lock();
+		q = rcu_dereference(txq->qdisc);
+		__netif_schedule(q);
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(netif_wake_subqueue);
+
+void netif_tx_wake_queue(struct netdev_queue *dev_queue)
+{
+	if (test_and_clear_bit(__QUEUE_STATE_DRV_XOFF, &dev_queue->state)) {
+		struct Qdisc *q;
+
+		rcu_read_lock();
+		q = rcu_dereference(dev_queue->qdisc);
+		__netif_schedule(q);
+		rcu_read_unlock();
+	}
+}
+EXPORT_SYMBOL(netif_tx_wake_queue);
+
 void __dev_kfree_skb_irq(struct sk_buff *skb, enum skb_free_reason reason)
 {
 	unsigned long flags;
@@ -3432,7 +3479,7 @@ static int ing_filter(struct sk_buff *skb, struct netdev_queue *rxq)
 	skb->tc_verd = SET_TC_RTTL(skb->tc_verd, ttl);
 	skb->tc_verd = SET_TC_AT(skb->tc_verd, AT_INGRESS);
 
-	q = rxq->qdisc;
+	q = rcu_dereference(rxq->qdisc);
 	if (q != &noop_qdisc) {
 		spin_lock(qdisc_lock(q));
 		if (likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state)))
@@ -3449,7 +3496,7 @@ static inline struct sk_buff *handle_ing(struct sk_buff *skb,
 {
 	struct netdev_queue *rxq = rcu_dereference(skb->dev->ingress_queue);
 
-	if (!rxq || rxq->qdisc == &noop_qdisc)
+	if (!rxq || rcu_access_pointer(rxq->qdisc) == &noop_qdisc)
 		goto out;
 
 	if (*pt_prev) {
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 19696ebe9ebc..346ef85617d3 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -783,7 +783,7 @@ static void dev_deactivate_queue(struct net_device *dev,
 	struct Qdisc *qdisc_default = _qdisc_default;
 	struct Qdisc *qdisc;
 
-	qdisc = dev_queue->qdisc;
+	qdisc = rtnl_dereference(dev_queue->qdisc);
 	if (qdisc) {
 		spin_lock_bh(qdisc_lock(qdisc));
 
@@ -876,7 +876,7 @@ static void dev_init_scheduler_queue(struct net_device *dev,
 {
 	struct Qdisc *qdisc = _qdisc;
 
-	dev_queue->qdisc = qdisc;
+	rcu_assign_pointer(dev_queue->qdisc, qdisc);
 	dev_queue->qdisc_sleeping = qdisc;
 }
 
diff --git a/net/sched/sch_mqprio.c b/net/sched/sch_mqprio.c
index 6749e2f540d0..37e7d25d21f1 100644
--- a/net/sched/sch_mqprio.c
+++ b/net/sched/sch_mqprio.c
@@ -231,7 +231,7 @@ static int mqprio_dump(struct Qdisc *sch, struct sk_buff *skb)
 	memset(&sch->qstats, 0, sizeof(sch->qstats));
 
 	for (i = 0; i < dev->num_tx_queues; i++) {
-		qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+		qdisc = rtnl_dereference(netdev_get_tx_queue(dev, i)->qdisc);
 		spin_lock_bh(qdisc_lock(qdisc));
 		sch->q.qlen		+= qdisc->q.qlen;
 		sch->bstats.bytes	+= qdisc->bstats.bytes;
@@ -340,7 +340,9 @@ static int mqprio_dump_class_stats(struct Qdisc *sch, unsigned long cl,
 		spin_unlock_bh(d->lock);
 
 		for (i = tc.offset; i < tc.offset + tc.count; i++) {
-			qdisc = netdev_get_tx_queue(dev, i)->qdisc;
+			struct netdev_queue *q = netdev_get_tx_queue(dev, i);
+
+			qdisc = rtnl_dereference(q->qdisc);
 			spin_lock_bh(qdisc_lock(qdisc));
 			bstats.bytes      += qdisc->bstats.bytes;
 			bstats.packets    += qdisc->bstats.packets;
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index aaa8d03ed054..5cd291bd00e4 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -96,11 +96,14 @@ teql_dequeue(struct Qdisc *sch)
 	struct teql_sched_data *dat = qdisc_priv(sch);
 	struct netdev_queue *dat_queue;
 	struct sk_buff *skb;
+	struct Qdisc *q;
 
 	skb = __skb_dequeue(&dat->q);
 	dat_queue = netdev_get_tx_queue(dat->m->dev, 0);
+	q = rcu_dereference_bh(dat_queue->qdisc);
+
 	if (skb == NULL) {
-		struct net_device *m = qdisc_dev(dat_queue->qdisc);
+		struct net_device *m = qdisc_dev(q);
 		if (m) {
 			dat->m->slaves = sch;
 			netif_wake_queue(m);
@@ -108,7 +111,7 @@ teql_dequeue(struct Qdisc *sch)
 	} else {
 		qdisc_bstats_update(sch, skb);
 	}
-	sch->q.qlen = dat->q.qlen + dat_queue->qdisc->q.qlen;
+	sch->q.qlen = dat->q.qlen + q->q.qlen;
 	return skb;
 }
 
@@ -157,9 +160,9 @@ teql_destroy(struct Qdisc *sch)
 						txq = netdev_get_tx_queue(master->dev, 0);
 						master->slaves = NULL;
 
-						root_lock = qdisc_root_sleeping_lock(txq->qdisc);
+						root_lock = qdisc_root_sleeping_lock(rtnl_dereference(txq->qdisc));
 						spin_lock_bh(root_lock);
-						qdisc_reset(txq->qdisc);
+						qdisc_reset(rtnl_dereference(txq->qdisc));
 						spin_unlock_bh(root_lock);
 					}
 				}
@@ -266,7 +269,7 @@ static inline int teql_resolve(struct sk_buff *skb,
 	struct dst_entry *dst = skb_dst(skb);
 	int res;
 
-	if (txq->qdisc == &noop_qdisc)
+	if (rcu_access_pointer(txq->qdisc) == &noop_qdisc)
 		return -ENODEV;
 
 	if (!dev->header_ops || !dst)
-- 
cgit v1.2.3


From 331b72922c5f58d48fd5500acadc91777cc31970 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 12 Sep 2014 20:08:20 -0700
Subject: net: sched: RCU cls_tcindex

Make cls_tcindex RCU safe.

This patch addds a new RCU routine rcu_dereference_bh_rtnl() to check
caller either holds the rcu read lock or RTNL. This is needed to
handle the case where tcindex_lookup() is being called in both cases.

Signed-off-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  10 ++
 net/sched/cls_tcindex.c   | 248 ++++++++++++++++++++++++++++------------------
 2 files changed, 164 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 167bae7bdfa4..6cacbce1a06c 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -46,6 +46,16 @@ static inline int lockdep_rtnl_is_held(void)
 #define rcu_dereference_rtnl(p)					\
 	rcu_dereference_check(p, lockdep_rtnl_is_held())
 
+/**
+ * rcu_dereference_bh_rtnl - rcu_dereference_bh with debug checking
+ * @p: The pointer to read, prior to dereference
+ *
+ * Do an rcu_dereference_bh(p), but check caller either holds rcu_read_lock_bh()
+ * or RTNL. Note : Please prefer rtnl_dereference() or rcu_dereference_bh()
+ */
+#define rcu_dereference_bh_rtnl(p)				\
+	rcu_dereference_bh_check(p, lockdep_rtnl_is_held())
+
 /**
  * rtnl_dereference - fetch RCU pointer when updates are prevented by RTNL
  * @p: The pointer to read, prior to dereferencing
diff --git a/net/sched/cls_tcindex.c b/net/sched/cls_tcindex.c
index 3e9f76413b3b..a9f4279fbd69 100644
--- a/net/sched/cls_tcindex.c
+++ b/net/sched/cls_tcindex.c
@@ -32,19 +32,21 @@ struct tcindex_filter_result {
 struct tcindex_filter {
 	u16 key;
 	struct tcindex_filter_result result;
-	struct tcindex_filter *next;
+	struct tcindex_filter __rcu *next;
+	struct rcu_head rcu;
 };
 
 
 struct tcindex_data {
 	struct tcindex_filter_result *perfect; /* perfect hash; NULL if none */
-	struct tcindex_filter **h; /* imperfect hash; only used if !perfect;
-				      NULL if unused */
+	struct tcindex_filter __rcu **h; /* imperfect hash; */
+	struct tcf_proto *tp;
 	u16 mask;		/* AND key with mask */
-	int shift;		/* shift ANDed key to the right */
-	int hash;		/* hash table size; 0 if undefined */
-	int alloc_hash;		/* allocated size */
-	int fall_through;	/* 0: only classify if explicit match */
+	u32 shift;		/* shift ANDed key to the right */
+	u32 hash;		/* hash table size; 0 if undefined */
+	u32 alloc_hash;		/* allocated size */
+	u32 fall_through;	/* 0: only classify if explicit match */
+	struct rcu_head rcu;
 };
 
 static inline int
@@ -56,13 +58,18 @@ tcindex_filter_is_set(struct tcindex_filter_result *r)
 static struct tcindex_filter_result *
 tcindex_lookup(struct tcindex_data *p, u16 key)
 {
-	struct tcindex_filter *f;
+	if (p->perfect) {
+		struct tcindex_filter_result *f = p->perfect + key;
+
+		return tcindex_filter_is_set(f) ? f : NULL;
+	} else if (p->h) {
+		struct tcindex_filter __rcu **fp;
+		struct tcindex_filter *f;
 
-	if (p->perfect)
-		return tcindex_filter_is_set(p->perfect + key) ?
-			p->perfect + key : NULL;
-	else if (p->h) {
-		for (f = p->h[key % p->hash]; f; f = f->next)
+		fp = &p->h[key % p->hash];
+		for (f = rcu_dereference_bh_rtnl(*fp);
+		     f;
+		     fp = &f->next, f = rcu_dereference_bh_rtnl(*fp))
 			if (f->key == key)
 				return &f->result;
 	}
@@ -74,7 +81,7 @@ tcindex_lookup(struct tcindex_data *p, u16 key)
 static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 			    struct tcf_result *res)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rcu_dereference(tp->root);
 	struct tcindex_filter_result *f;
 	int key = (skb->tc_index & p->mask) >> p->shift;
 
@@ -99,7 +106,7 @@ static int tcindex_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 
 static unsigned long tcindex_get(struct tcf_proto *tp, u32 handle)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r;
 
 	pr_debug("tcindex_get(tp %p,handle 0x%08x)\n", tp, handle);
@@ -129,49 +136,59 @@ static int tcindex_init(struct tcf_proto *tp)
 	p->hash = DEFAULT_HASH_SIZE;
 	p->fall_through = 1;
 
-	tp->root = p;
+	rcu_assign_pointer(tp->root, p);
 	return 0;
 }
 
-
 static int
-__tcindex_delete(struct tcf_proto *tp, unsigned long arg, int lock)
+tcindex_delete(struct tcf_proto *tp, unsigned long arg)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) arg;
+	struct tcindex_filter __rcu **walk;
 	struct tcindex_filter *f = NULL;
 
-	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p,f %p\n", tp, arg, p, f);
+	pr_debug("tcindex_delete(tp %p,arg 0x%lx),p %p\n", tp, arg, p);
 	if (p->perfect) {
 		if (!r->res.class)
 			return -ENOENT;
 	} else {
 		int i;
-		struct tcindex_filter **walk = NULL;
 
-		for (i = 0; i < p->hash; i++)
-			for (walk = p->h+i; *walk; walk = &(*walk)->next)
-				if (&(*walk)->result == r)
+		for (i = 0; i < p->hash; i++) {
+			walk = p->h + i;
+			for (f = rtnl_dereference(*walk); f;
+			     walk = &f->next, f = rtnl_dereference(*walk)) {
+				if (&f->result == r)
 					goto found;
+			}
+		}
 		return -ENOENT;
 
 found:
-		f = *walk;
-		if (lock)
-			tcf_tree_lock(tp);
-		*walk = f->next;
-		if (lock)
-			tcf_tree_unlock(tp);
+		rcu_assign_pointer(*walk, rtnl_dereference(f->next));
 	}
 	tcf_unbind_filter(tp, &r->res);
 	tcf_exts_destroy(tp, &r->exts);
-	kfree(f);
+	if (f)
+		kfree_rcu(f, rcu);
 	return 0;
 }
 
-static int tcindex_delete(struct tcf_proto *tp, unsigned long arg)
+static int tcindex_destroy_element(struct tcf_proto *tp,
+				   unsigned long arg,
+				   struct tcf_walker *walker)
 {
-	return __tcindex_delete(tp, arg, 1);
+	return tcindex_delete(tp, arg);
+}
+
+static void __tcindex_destroy(struct rcu_head *head)
+{
+	struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+	kfree(p->perfect);
+	kfree(p->h);
+	kfree(p);
 }
 
 static inline int
@@ -194,6 +211,14 @@ static void tcindex_filter_result_init(struct tcindex_filter_result *r)
 	tcf_exts_init(&r->exts, TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 }
 
+static void __tcindex_partial_destroy(struct rcu_head *head)
+{
+	struct tcindex_data *p = container_of(head, struct tcindex_data, rcu);
+
+	kfree(p->perfect);
+	kfree(p);
+}
+
 static int
 tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 		  u32 handle, struct tcindex_data *p,
@@ -203,7 +228,7 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	int err, balloc = 0;
 	struct tcindex_filter_result new_filter_result, *old_r = r;
 	struct tcindex_filter_result cr;
-	struct tcindex_data cp;
+	struct tcindex_data *cp, *oldp;
 	struct tcindex_filter *f = NULL; /* make gcc behave */
 	struct tcf_exts e;
 
@@ -212,84 +237,118 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	if (err < 0)
 		return err;
 
-	memcpy(&cp, p, sizeof(cp));
-	tcindex_filter_result_init(&new_filter_result);
+	/* tcindex_data attributes must look atomic to classifier/lookup so
+	 * allocate new tcindex data and RCU assign it onto root. Keeping
+	 * perfect hash and hash pointers from old data.
+	 */
+	cp = kzalloc(sizeof(cp), GFP_KERNEL);
+	if (!cp)
+		return -ENOMEM;
+
+	cp->mask = p->mask;
+	cp->shift = p->shift;
+	cp->hash = p->hash;
+	cp->alloc_hash = p->alloc_hash;
+	cp->fall_through = p->fall_through;
+	cp->tp = tp;
+
+	if (p->perfect) {
+		cp->perfect = kmemdup(p->perfect,
+				      sizeof(*r) * cp->hash, GFP_KERNEL);
+		if (!cp->perfect)
+			goto errout;
+	}
+	cp->h = p->h;
+
+	memset(&new_filter_result, 0, sizeof(new_filter_result));
+	tcf_exts_init(&new_filter_result.exts,
+		      TCA_TCINDEX_ACT, TCA_TCINDEX_POLICE);
 
 	tcindex_filter_result_init(&cr);
 	if (old_r)
 		cr.res = r->res;
 
 	if (tb[TCA_TCINDEX_HASH])
-		cp.hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
+		cp->hash = nla_get_u32(tb[TCA_TCINDEX_HASH]);
 
 	if (tb[TCA_TCINDEX_MASK])
-		cp.mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
+		cp->mask = nla_get_u16(tb[TCA_TCINDEX_MASK]);
 
 	if (tb[TCA_TCINDEX_SHIFT])
-		cp.shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
+		cp->shift = nla_get_u32(tb[TCA_TCINDEX_SHIFT]);
 
 	err = -EBUSY;
+
 	/* Hash already allocated, make sure that we still meet the
 	 * requirements for the allocated hash.
 	 */
-	if (cp.perfect) {
-		if (!valid_perfect_hash(&cp) ||
-		    cp.hash > cp.alloc_hash)
+	if (cp->perfect) {
+		if (!valid_perfect_hash(cp) ||
+		    cp->hash > cp->alloc_hash)
 			goto errout;
-	} else if (cp.h && cp.hash != cp.alloc_hash)
+	} else if (cp->h && cp->hash != cp->alloc_hash) {
 		goto errout;
+	}
 
 	err = -EINVAL;
 	if (tb[TCA_TCINDEX_FALL_THROUGH])
-		cp.fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
+		cp->fall_through = nla_get_u32(tb[TCA_TCINDEX_FALL_THROUGH]);
 
-	if (!cp.hash) {
+	if (!cp->hash) {
 		/* Hash not specified, use perfect hash if the upper limit
 		 * of the hashing index is below the threshold.
 		 */
-		if ((cp.mask >> cp.shift) < PERFECT_HASH_THRESHOLD)
-			cp.hash = (cp.mask >> cp.shift) + 1;
+		if ((cp->mask >> cp->shift) < PERFECT_HASH_THRESHOLD)
+			cp->hash = (cp->mask >> cp->shift) + 1;
 		else
-			cp.hash = DEFAULT_HASH_SIZE;
+			cp->hash = DEFAULT_HASH_SIZE;
 	}
 
-	if (!cp.perfect && !cp.h)
-		cp.alloc_hash = cp.hash;
+	if (!cp->perfect && cp->h)
+		cp->alloc_hash = cp->hash;
 
 	/* Note: this could be as restrictive as if (handle & ~(mask >> shift))
 	 * but then, we'd fail handles that may become valid after some future
 	 * mask change. While this is extremely unlikely to ever matter,
 	 * the check below is safer (and also more backwards-compatible).
 	 */
-	if (cp.perfect || valid_perfect_hash(&cp))
-		if (handle >= cp.alloc_hash)
+	if (cp->perfect || valid_perfect_hash(cp))
+		if (handle >= cp->alloc_hash)
 			goto errout;
 
 
 	err = -ENOMEM;
-	if (!cp.perfect && !cp.h) {
-		if (valid_perfect_hash(&cp)) {
+	if (!cp->perfect && !cp->h) {
+		if (valid_perfect_hash(cp)) {
 			int i;
 
-			cp.perfect = kcalloc(cp.hash, sizeof(*r), GFP_KERNEL);
-			if (!cp.perfect)
+			cp->perfect = kcalloc(cp->hash, sizeof(*r), GFP_KERNEL);
+			if (!cp->perfect)
 				goto errout;
-			for (i = 0; i < cp.hash; i++)
-				tcf_exts_init(&cp.perfect[i].exts, TCA_TCINDEX_ACT,
+			for (i = 0; i < cp->hash; i++)
+				tcf_exts_init(&cp->perfect[i].exts,
+					      TCA_TCINDEX_ACT,
 					      TCA_TCINDEX_POLICE);
 			balloc = 1;
 		} else {
-			cp.h = kcalloc(cp.hash, sizeof(f), GFP_KERNEL);
-			if (!cp.h)
+			struct tcindex_filter __rcu **hash;
+
+			hash = kcalloc(cp->hash,
+				       sizeof(struct tcindex_filter *),
+				       GFP_KERNEL);
+
+			if (!hash)
 				goto errout;
+
+			cp->h = hash;
 			balloc = 2;
 		}
 	}
 
-	if (cp.perfect)
-		r = cp.perfect + handle;
+	if (cp->perfect)
+		r = cp->perfect + handle;
 	else
-		r = tcindex_lookup(&cp, handle) ? : &new_filter_result;
+		r = tcindex_lookup(cp, handle) ? : &new_filter_result;
 
 	if (r == &new_filter_result) {
 		f = kzalloc(sizeof(*f), GFP_KERNEL);
@@ -307,33 +366,41 @@ tcindex_set_parms(struct net *net, struct tcf_proto *tp, unsigned long base,
 	else
 		tcf_exts_change(tp, &cr.exts, &e);
 
-	tcf_tree_lock(tp);
 	if (old_r && old_r != r)
 		tcindex_filter_result_init(old_r);
 
-	memcpy(p, &cp, sizeof(cp));
+	oldp = p;
 	r->res = cr.res;
+	rcu_assign_pointer(tp->root, cp);
 
 	if (r == &new_filter_result) {
-		struct tcindex_filter **fp;
+		struct tcindex_filter *nfp;
+		struct tcindex_filter __rcu **fp;
 
 		f->key = handle;
 		f->result = new_filter_result;
 		f->next = NULL;
-		for (fp = p->h+(handle % p->hash); *fp; fp = &(*fp)->next)
-			/* nothing */;
-		*fp = f;
+
+		fp = p->h + (handle % p->hash);
+		for (nfp = rtnl_dereference(*fp);
+		     nfp;
+		     fp = &nfp->next, nfp = rtnl_dereference(*fp))
+				; /* nothing */
+
+		rcu_assign_pointer(*fp, f);
 	}
-	tcf_tree_unlock(tp);
 
+	if (oldp)
+		call_rcu(&oldp->rcu, __tcindex_partial_destroy);
 	return 0;
 
 errout_alloc:
 	if (balloc == 1)
-		kfree(cp.perfect);
+		kfree(cp->perfect);
 	else if (balloc == 2)
-		kfree(cp.h);
+		kfree(cp->h);
 errout:
+	kfree(cp);
 	tcf_exts_destroy(tp, &e);
 	return err;
 }
@@ -345,7 +412,7 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
 {
 	struct nlattr *opt = tca[TCA_OPTIONS];
 	struct nlattr *tb[TCA_TCINDEX_MAX + 1];
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) *arg;
 	int err;
 
@@ -364,10 +431,9 @@ tcindex_change(struct net *net, struct sk_buff *in_skb,
 				 tca[TCA_RATE], ovr);
 }
 
-
 static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter *f, *next;
 	int i;
 
@@ -390,8 +456,8 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 	if (!p->h)
 		return;
 	for (i = 0; i < p->hash; i++) {
-		for (f = p->h[i]; f; f = next) {
-			next = f->next;
+		for (f = rtnl_dereference(p->h[i]); f; f = next) {
+			next = rtnl_dereference(f->next);
 			if (walker->count >= walker->skip) {
 				if (walker->fn(tp, (unsigned long) &f->result,
 				    walker) < 0) {
@@ -404,17 +470,9 @@ static void tcindex_walk(struct tcf_proto *tp, struct tcf_walker *walker)
 	}
 }
 
-
-static int tcindex_destroy_element(struct tcf_proto *tp,
-    unsigned long arg, struct tcf_walker *walker)
-{
-	return __tcindex_delete(tp, arg, 0);
-}
-
-
 static void tcindex_destroy(struct tcf_proto *tp)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcf_walker walker;
 
 	pr_debug("tcindex_destroy(tp %p),p %p\n", tp, p);
@@ -422,17 +480,16 @@ static void tcindex_destroy(struct tcf_proto *tp)
 	walker.skip = 0;
 	walker.fn = tcindex_destroy_element;
 	tcindex_walk(tp, &walker);
-	kfree(p->perfect);
-	kfree(p->h);
-	kfree(p);
-	tp->root = NULL;
+
+	RCU_INIT_POINTER(tp->root, NULL);
+	call_rcu(&p->rcu, __tcindex_destroy);
 }
 
 
 static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
     struct sk_buff *skb, struct tcmsg *t)
 {
-	struct tcindex_data *p = tp->root;
+	struct tcindex_data *p = rtnl_dereference(tp->root);
 	struct tcindex_filter_result *r = (struct tcindex_filter_result *) fh;
 	unsigned char *b = skb_tail_pointer(skb);
 	struct nlattr *nest;
@@ -455,15 +512,18 @@ static int tcindex_dump(struct net *net, struct tcf_proto *tp, unsigned long fh,
 		nla_nest_end(skb, nest);
 	} else {
 		if (p->perfect) {
-			t->tcm_handle = r-p->perfect;
+			t->tcm_handle = r - p->perfect;
 		} else {
 			struct tcindex_filter *f;
+			struct tcindex_filter __rcu **fp;
 			int i;
 
 			t->tcm_handle = 0;
 			for (i = 0; !t->tcm_handle && i < p->hash; i++) {
-				for (f = p->h[i]; !t->tcm_handle && f;
-				     f = f->next) {
+				fp = &p->h[i];
+				for (f = rtnl_dereference(*fp);
+				     !t->tcm_handle && f;
+				     fp = &f->next, f = rtnl_dereference(*fp)) {
 					if (&f->result == r)
 						t->tcm_handle = f->key;
 				}
-- 
cgit v1.2.3


From a80e49e2cc3145af014a8ae44f575829cc236192 Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 Aug 2014 17:47:18 +0200
Subject: nohz: Move nohz full init call to tick init

This way we unbloat a bit main.c and more importantly we initialize
nohz full after init_IRQ(). This dependency will be needed in further
patches because nohz full needs irq work to raise its own IRQ.
Information about the support for this ability on ARM64 is obtained on
init_IRQ() which initialize the pointer to __smp_call_function.

Since tick_init() is called right after init_IRQ(), this is a good place
to call tick_nohz_init() and prepare for that dependency.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/tick.h        | 2 --
 init/main.c                 | 1 -
 kernel/time/tick-common.c   | 1 +
 kernel/time/tick-internal.h | 7 +++++++
 4 files changed, 8 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9a82c7dc3fdd..595ee86f5e0d 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -181,14 +181,12 @@ static inline bool tick_nohz_full_cpu(int cpu)
 	return cpumask_test_cpu(cpu, tick_nohz_full_mask);
 }
 
-extern void tick_nohz_init(void);
 extern void __tick_nohz_full_check(void);
 extern void tick_nohz_full_kick(void);
 extern void tick_nohz_full_kick_cpu(int cpu);
 extern void tick_nohz_full_kick_all(void);
 extern void __tick_nohz_task_switch(struct task_struct *tsk);
 #else
-static inline void tick_nohz_init(void) { }
 static inline bool tick_nohz_full_enabled(void) { return false; }
 static inline bool tick_nohz_full_cpu(int cpu) { return false; }
 static inline void __tick_nohz_full_check(void) { }
diff --git a/init/main.c b/init/main.c
index bb1aed928f21..8af2f1abfe38 100644
--- a/init/main.c
+++ b/init/main.c
@@ -577,7 +577,6 @@ asmlinkage __visible void __init start_kernel(void)
 		local_irq_disable();
 	idr_init_cache();
 	rcu_init();
-	tick_nohz_init();
 	context_tracking_init();
 	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
diff --git a/kernel/time/tick-common.c b/kernel/time/tick-common.c
index 0a0608edeb26..052b4b53c3d6 100644
--- a/kernel/time/tick-common.c
+++ b/kernel/time/tick-common.c
@@ -400,4 +400,5 @@ void tick_resume(void)
 void __init tick_init(void)
 {
 	tick_broadcast_init();
+	tick_nohz_init();
 }
diff --git a/kernel/time/tick-internal.h b/kernel/time/tick-internal.h
index c19c1d84b6f3..366aeb4f2c66 100644
--- a/kernel/time/tick-internal.h
+++ b/kernel/time/tick-internal.h
@@ -99,6 +99,13 @@ static inline int tick_broadcast_oneshot_active(void) { return 0; }
 static inline bool tick_broadcast_oneshot_available(void) { return false; }
 #endif /* !TICK_ONESHOT */
 
+/* NO_HZ_FULL internal */
+#ifdef CONFIG_NO_HZ_FULL
+extern void tick_nohz_init(void);
+# else
+static inline void tick_nohz_init(void) { }
+#endif
+
 /*
  * Broadcasting support
  */
-- 
cgit v1.2.3


From c5c38ef3d70377dc504a6a3f611a3ec814bc757b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Sat, 6 Sep 2014 15:43:02 +0200
Subject: irq_work: Introduce arch_irq_work_has_interrupt()

The nohz full code needs irq work to trigger its own interrupt so that
the subsystem can work even when the tick is stopped.

Lets introduce arch_irq_work_has_interrupt() that archs can override to
tell about their support for this ability.

Signed-off-by: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 arch/alpha/include/asm/Kbuild      |  1 +
 arch/arc/include/asm/Kbuild        |  1 +
 arch/arm/include/asm/Kbuild        |  1 +
 arch/arm64/include/asm/Kbuild      |  3 ++-
 arch/avr32/include/asm/Kbuild      |  1 +
 arch/blackfin/include/asm/Kbuild   |  1 +
 arch/c6x/include/asm/Kbuild        |  1 +
 arch/cris/include/asm/Kbuild       |  1 +
 arch/frv/include/asm/Kbuild        |  1 +
 arch/hexagon/include/asm/Kbuild    |  1 +
 arch/ia64/include/asm/Kbuild       |  1 +
 arch/m32r/include/asm/Kbuild       |  1 +
 arch/m68k/include/asm/Kbuild       |  1 +
 arch/metag/include/asm/Kbuild      |  1 +
 arch/microblaze/include/asm/Kbuild |  1 +
 arch/mips/include/asm/Kbuild       |  1 +
 arch/mn10300/include/asm/Kbuild    |  1 +
 arch/openrisc/include/asm/Kbuild   |  1 +
 arch/parisc/include/asm/Kbuild     |  1 +
 arch/powerpc/include/asm/Kbuild    |  1 +
 arch/s390/include/asm/Kbuild       |  1 +
 arch/score/include/asm/Kbuild      |  1 +
 arch/sh/include/asm/Kbuild         |  1 +
 arch/sparc/include/asm/Kbuild      |  1 +
 arch/tile/include/asm/Kbuild       |  1 +
 arch/um/include/asm/Kbuild         |  1 +
 arch/unicore32/include/asm/Kbuild  |  1 +
 arch/x86/include/asm/Kbuild        |  1 +
 arch/xtensa/include/asm/Kbuild     |  1 +
 include/asm-generic/irq_work.h     | 10 ++++++++++
 include/linux/irq_work.h           |  2 ++
 31 files changed, 42 insertions(+), 1 deletion(-)
 create mode 100644 include/asm-generic/irq_work.h

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index e858aa0ad8af..a52cbf178c3a 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index e76fd79f32b0..b8fffc1a2ac2 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -18,6 +18,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
 generic-y += local.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 70cd84eb7fda..202905e7ea0c 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += hash.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index 0b3fcf86e6ba..d617789b1ebd 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -9,8 +9,8 @@ generic-y += current.h
 generic-y += delay.h
 generic-y += div64.h
 generic-y += dma.h
-generic-y += emergency-restart.h
 generic-y += early_ioremap.h
+generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += ftrace.h
 generic-y += hash.h
@@ -19,6 +19,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 00a0f3ccd6eb..2a71b1cb9848 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -9,6 +9,7 @@ generic-y += exec.h
 generic-y += futex.h
 generic-y += hash.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 0d93b9a79ca9..46ed6bb9c679 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -15,6 +15,7 @@ generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index 8dbdce8421b0..e77e0c1dbe75 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 31742dfadff9..802b94c4ca86 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -8,6 +8,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += linkage.h
 generic-y += mcs_spinlock.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 5b73921b6e9d..3caf05cabfc5 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 0e69796b58c7..5f234a5a2320 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -23,6 +23,7 @@ generic-y += ioctls.h
 generic-y += iomap.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index e8317d2d6c8d..747320be9d0e 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -2,6 +2,7 @@
 generic-y += clkdev.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index accc10a3dc78..e02448b0648b 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += module.h
 generic-y += preempt.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index c67c94a2d672..dbaf9f3065e8 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -11,6 +11,7 @@ generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index c29ead89a317..7b8111c8f937 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -19,6 +19,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 27a3acda6c19..448143b8cabd 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -5,6 +5,7 @@ generic-y += cputime.h
 generic-y += device.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 335e5290ec75..57012ef1f51e 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -3,6 +3,7 @@ generic-y += cputime.h
 generic-y += current.h
 generic-y += emergency-restart.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
 generic-y += mutex.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index ecbd6676bd33..77eb1a68d13b 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -4,6 +4,7 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 480af0d9c2f5..89b61d7dc790 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -31,6 +31,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index ecf25e6678ad..ffb024b8423f 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -10,6 +10,7 @@ generic-y += exec.h
 generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kvm_para.h
 generic-y += local.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 7f23f162ce9c..31e8f59aff38 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,6 +1,7 @@
 
 generic-y += clkdev.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += rwsem.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index b3fea0722ff1..773f86676588 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -2,6 +2,7 @@
 
 generic-y += clkdev.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index aad209199f7e..1909d2a5b82f 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -6,6 +6,7 @@ generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += hash.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
 generic-y += scatterlist.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index c19e47dacb31..5a6c9acff0d2 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += hash.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index cdd1b447bb6c..f5f94ce1692c 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -8,6 +8,7 @@ generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += hash.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += linkage.h
 generic-y += local.h
 generic-y += local64.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index 0aa5675e7025..e6462b8a6284 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -17,6 +17,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += local.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 7bd64aa2e94a..244b12c8cb39 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -14,6 +14,7 @@ generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += io.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += mcs_spinlock.h
 generic-y += mutex.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 1e5fb872a4aa..5a2bb53faa42 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -22,6 +22,7 @@ generic-y += ioctl.h
 generic-y += ioctls.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += local.h
diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild
index 3bf000fab0ae..8fa909cc6553 100644
--- a/arch/x86/include/asm/Kbuild
+++ b/arch/x86/include/asm/Kbuild
@@ -7,5 +7,6 @@ genhdr-y += unistd_x32.h
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += early_ioremap.h
+generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += scatterlist.h
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index c3d20ba6eb86..105d38922c44 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -12,6 +12,7 @@ generic-y += hardirq.h
 generic-y += hash.h
 generic-y += ioctl.h
 generic-y += irq_regs.h
+generic-y += irq_work.h
 generic-y += kdebug.h
 generic-y += kmap_types.h
 generic-y += kvm_para.h
diff --git a/include/asm-generic/irq_work.h b/include/asm-generic/irq_work.h
new file mode 100644
index 000000000000..a44f452c6590
--- /dev/null
+++ b/include/asm-generic/irq_work.h
@@ -0,0 +1,10 @@
+#ifndef __ASM_IRQ_WORK_H
+#define __ASM_IRQ_WORK_H
+
+static inline bool arch_irq_work_has_interrupt(void)
+{
+	return false;
+}
+
+#endif /* __ASM_IRQ_WORK_H */
+
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index bf9422c3aefe..6b47b2ede405 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -42,6 +42,8 @@ void irq_work_run(void);
 void irq_work_sync(struct irq_work *work);
 
 #ifdef CONFIG_IRQ_WORK
+#include <asm/irq_work.h>
+
 bool irq_work_needs_cpu(void);
 #else
 static inline bool irq_work_needs_cpu(void) { return false; }
-- 
cgit v1.2.3


From 76a33061b9323b7fdb220ae5fa116c10833ec22e Mon Sep 17 00:00:00 2001
From: Frederic Weisbecker <fweisbec@gmail.com>
Date: Sat, 16 Aug 2014 18:37:19 +0200
Subject: irq_work: Force raised irq work to run on irq work interrupt

The nohz full kick, which restarts the tick when any resource depend
on it, can't be executed anywhere given the operation it does on timers.
If it is called from the scheduler or timers code, chances are that
we run into a deadlock.

This is why we run the nohz full kick from an irq work. That way we make
sure that the kick runs on a virgin context.

However if that's the case when irq work runs in its own dedicated
self-ipi, things are different for the big bunch of archs that don't
support the self triggered way. In order to support them, irq works are
also handled by the timer interrupt as fallback.

Now when irq works run on the timer interrupt, the context isn't blank.
More precisely, they can run in the context of the hrtimer that runs the
tick. But the nohz kick cancels and restarts this hrtimer and cancelling
an hrtimer from itself isn't allowed. This is why we run in an endless
loop:

	Kernel panic - not syncing: Watchdog detected hard LOCKUP on cpu 2
	CPU: 2 PID: 7538 Comm: kworker/u8:8 Not tainted 3.16.0+ #34
	Workqueue: btrfs-endio-write normal_work_helper [btrfs]
	 ffff880244c06c88 000000001b486fe1 ffff880244c06bf0 ffffffff8a7f1e37
	 ffffffff8ac52a18 ffff880244c06c78 ffffffff8a7ef928 0000000000000010
	 ffff880244c06c88 ffff880244c06c20 000000001b486fe1 0000000000000000
	Call Trace:
	 <NMI[<ffffffff8a7f1e37>] dump_stack+0x4e/0x7a
	 [<ffffffff8a7ef928>] panic+0xd4/0x207
	 [<ffffffff8a1450e8>] watchdog_overflow_callback+0x118/0x120
	 [<ffffffff8a186b0e>] __perf_event_overflow+0xae/0x350
	 [<ffffffff8a184f80>] ? perf_event_task_disable+0xa0/0xa0
	 [<ffffffff8a01a4cf>] ? x86_perf_event_set_period+0xbf/0x150
	 [<ffffffff8a187934>] perf_event_overflow+0x14/0x20
	 [<ffffffff8a020386>] intel_pmu_handle_irq+0x206/0x410
	 [<ffffffff8a01937b>] perf_event_nmi_handler+0x2b/0x50
	 [<ffffffff8a007b72>] nmi_handle+0xd2/0x390
	 [<ffffffff8a007aa5>] ? nmi_handle+0x5/0x390
	 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0
	 [<ffffffff8a008062>] default_do_nmi+0x72/0x1c0
	 [<ffffffff8a008268>] do_nmi+0xb8/0x100
	 [<ffffffff8a7ff66a>] end_repeat_nmi+0x1e/0x2e
	 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0
	 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0
	 [<ffffffff8a0cb7f8>] ? match_held_lock+0x8/0x1b0
	 <<EOE><IRQ[<ffffffff8a0ccd2f>] lock_acquired+0xaf/0x450
	 [<ffffffff8a0f74c5>] ? lock_hrtimer_base.isra.20+0x25/0x50
	 [<ffffffff8a7fc678>] _raw_spin_lock_irqsave+0x78/0x90
	 [<ffffffff8a0f74c5>] ? lock_hrtimer_base.isra.20+0x25/0x50
	 [<ffffffff8a0f74c5>] lock_hrtimer_base.isra.20+0x25/0x50
	 [<ffffffff8a0f7723>] hrtimer_try_to_cancel+0x33/0x1e0
	 [<ffffffff8a0f78ea>] hrtimer_cancel+0x1a/0x30
	 [<ffffffff8a109237>] tick_nohz_restart+0x17/0x90
	 [<ffffffff8a10a213>] __tick_nohz_full_check+0xc3/0x100
	 [<ffffffff8a10a25e>] nohz_full_kick_work_func+0xe/0x10
	 [<ffffffff8a17c884>] irq_work_run_list+0x44/0x70
	 [<ffffffff8a17c8da>] irq_work_run+0x2a/0x50
	 [<ffffffff8a0f700b>] update_process_times+0x5b/0x70
	 [<ffffffff8a109005>] tick_sched_handle.isra.21+0x25/0x60
	 [<ffffffff8a109b81>] tick_sched_timer+0x41/0x60
	 [<ffffffff8a0f7aa2>] __run_hrtimer+0x72/0x470
	 [<ffffffff8a109b40>] ? tick_sched_do_timer+0xb0/0xb0
	 [<ffffffff8a0f8707>] hrtimer_interrupt+0x117/0x270
	 [<ffffffff8a034357>] local_apic_timer_interrupt+0x37/0x60
	 [<ffffffff8a80010f>] smp_apic_timer_interrupt+0x3f/0x50
	 [<ffffffff8a7fe52f>] apic_timer_interrupt+0x6f/0x80

To fix this we force non-lazy irq works to run on irq work self-IPIs
when available. That ability of the arch to trigger irq work self IPIs
is available with arch_irq_work_has_interrupt().

Reported-by: Catalin Iacob <iacobcatalin@gmail.com>
Reported-by: Dave Jones <davej@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Frederic Weisbecker <fweisbec@gmail.com>
---
 include/linux/irq_work.h |  1 +
 kernel/irq_work.c        | 15 +++++++++++++--
 kernel/time/timer.c      |  2 +-
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 6b47b2ede405..bf3fe719c7ce 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -39,6 +39,7 @@ bool irq_work_queue_on(struct irq_work *work, int cpu);
 #endif
 
 void irq_work_run(void);
+void irq_work_tick(void);
 void irq_work_sync(struct irq_work *work);
 
 #ifdef CONFIG_IRQ_WORK
diff --git a/kernel/irq_work.c b/kernel/irq_work.c
index e6bcbe756663..385b85aded19 100644
--- a/kernel/irq_work.c
+++ b/kernel/irq_work.c
@@ -115,8 +115,10 @@ bool irq_work_needs_cpu(void)
 
 	raised = &__get_cpu_var(raised_list);
 	lazy = &__get_cpu_var(lazy_list);
-	if (llist_empty(raised) && llist_empty(lazy))
-		return false;
+
+	if (llist_empty(raised) || arch_irq_work_has_interrupt())
+		if (llist_empty(lazy))
+			return false;
 
 	/* All work should have been flushed before going offline */
 	WARN_ON_ONCE(cpu_is_offline(smp_processor_id()));
@@ -171,6 +173,15 @@ void irq_work_run(void)
 }
 EXPORT_SYMBOL_GPL(irq_work_run);
 
+void irq_work_tick(void)
+{
+	struct llist_head *raised = &__get_cpu_var(raised_list);
+
+	if (!llist_empty(raised) && !arch_irq_work_has_interrupt())
+		irq_work_run_list(raised);
+	irq_work_run_list(&__get_cpu_var(lazy_list));
+}
+
 /*
  * Synchronize against the irq_work @entry, ensures the entry is not
  * currently in use.
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index aca5dfe2fa3d..9bbb8344ed3b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1385,7 +1385,7 @@ void update_process_times(int user_tick)
 	rcu_check_callbacks(cpu, user_tick);
 #ifdef CONFIG_IRQ_WORK
 	if (in_irq())
-		irq_work_run();
+		irq_work_tick();
 #endif
 	scheduler_tick();
 	run_posix_cpu_timers(p);
-- 
cgit v1.2.3


From 6c555490e0ce885a9caf0a045db69382a3ccbc9c Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Thu, 11 Sep 2014 15:35:09 -0700
Subject: ipv6: drop useless rcu_read_lock() in anycast

These code is now protected by rtnl lock, rcu read lock
is useless now.

Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  4 ++--
 net/core/dev.c            | 14 ++++++++------
 net/ipv6/anycast.c        | 18 ++++++------------
 3 files changed, 16 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ae721f53739e..ee38b948d9a0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2083,8 +2083,8 @@ void __dev_remove_pack(struct packet_type *pt);
 void dev_add_offload(struct packet_offload *po);
 void dev_remove_offload(struct packet_offload *po);
 
-struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short flags,
-					unsigned short mask);
+struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags,
+				      unsigned short mask);
 struct net_device *dev_get_by_name(struct net *net, const char *name);
 struct net_device *dev_get_by_name_rcu(struct net *net, const char *name);
 struct net_device *__dev_get_by_name(struct net *net, const char *name);
diff --git a/net/core/dev.c b/net/core/dev.c
index b3d6dbc0c696..e916ba8caccf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -897,23 +897,25 @@ struct net_device *dev_getfirstbyhwtype(struct net *net, unsigned short type)
 EXPORT_SYMBOL(dev_getfirstbyhwtype);
 
 /**
- *	dev_get_by_flags_rcu - find any device with given flags
+ *	__dev_get_by_flags - find any device with given flags
  *	@net: the applicable net namespace
  *	@if_flags: IFF_* values
  *	@mask: bitmask of bits in if_flags to check
  *
  *	Search for any interface with the given flags. Returns NULL if a device
  *	is not found or a pointer to the device. Must be called inside
- *	rcu_read_lock(), and result refcount is unchanged.
+ *	rtnl_lock(), and result refcount is unchanged.
  */
 
-struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags,
-				    unsigned short mask)
+struct net_device *__dev_get_by_flags(struct net *net, unsigned short if_flags,
+				      unsigned short mask)
 {
 	struct net_device *dev, *ret;
 
+	ASSERT_RTNL();
+
 	ret = NULL;
-	for_each_netdev_rcu(net, dev) {
+	for_each_netdev(net, dev) {
 		if (((dev->flags ^ if_flags) & mask) == 0) {
 			ret = dev;
 			break;
@@ -921,7 +923,7 @@ struct net_device *dev_get_by_flags_rcu(struct net *net, unsigned short if_flags
 	}
 	return ret;
 }
-EXPORT_SYMBOL(dev_get_by_flags_rcu);
+EXPORT_SYMBOL(__dev_get_by_flags);
 
 /**
  *	dev_valid_name - check if name is okay for network device
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index ff2de7d9d8e6..3b0429bc6b5a 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -78,7 +78,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	pac->acl_addr = *addr;
 
 	rtnl_lock();
-	rcu_read_lock();
 	if (ifindex == 0) {
 		struct rt6_info *rt;
 
@@ -91,11 +90,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 			goto error;
 		} else {
 			/* router, no matching interface: just pick one */
-			dev = dev_get_by_flags_rcu(net, IFF_UP,
-						   IFF_UP | IFF_LOOPBACK);
+			dev = __dev_get_by_flags(net, IFF_UP,
+						 IFF_UP | IFF_LOOPBACK);
 		}
 	} else
-		dev = dev_get_by_index_rcu(net, ifindex);
+		dev = __dev_get_by_index(net, ifindex);
 
 	if (dev == NULL) {
 		err = -ENODEV;
@@ -137,7 +136,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	}
 
 error:
-	rcu_read_unlock();
 	rtnl_unlock();
 	if (pac)
 		sock_kfree_s(sk, pac, sizeof(*pac));
@@ -174,11 +172,9 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr)
 	spin_unlock_bh(&ipv6_sk_ac_lock);
 
 	rtnl_lock();
-	rcu_read_lock();
-	dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+	dev = __dev_get_by_index(net, pac->acl_ifindex);
 	if (dev)
 		ipv6_dev_ac_dec(dev, &pac->acl_addr);
-	rcu_read_unlock();
 	rtnl_unlock();
 
 	sock_kfree_s(sk, pac, sizeof(*pac));
@@ -203,12 +199,11 @@ void ipv6_sock_ac_close(struct sock *sk)
 
 	prev_index = 0;
 	rtnl_lock();
-	rcu_read_lock();
 	while (pac) {
 		struct ipv6_ac_socklist *next = pac->acl_next;
 
 		if (pac->acl_ifindex != prev_index) {
-			dev = dev_get_by_index_rcu(net, pac->acl_ifindex);
+			dev = __dev_get_by_index(net, pac->acl_ifindex);
 			prev_index = pac->acl_ifindex;
 		}
 		if (dev)
@@ -216,7 +211,6 @@ void ipv6_sock_ac_close(struct sock *sk)
 		sock_kfree_s(sk, pac, sizeof(*pac));
 		pac = next;
 	}
-	rcu_read_unlock();
 	rtnl_unlock();
 }
 
@@ -341,7 +335,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	return 0;
 }
 
-/* called with rcu_read_lock() */
+/* called with rtnl_lock() */
 static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr)
 {
 	struct inet6_dev *idev = __in6_dev_get(dev);
-- 
cgit v1.2.3


From 233577a22089facf5271ab5e845b2262047c971f Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Fri, 12 Sep 2014 14:04:43 +0200
Subject: net: filter: constify detection of pkt_type_offset

Currently we have 2 pkt_type_offset functions doing the same thing and
spread across the architecture files. Remove those and replace them
with a PKT_TYPE_OFFSET macro helper which gets the constant value from a
zero sized sk_buff member right in front of the bitfield with offsetof.
This new offset marker does not change size of struct sk_buff.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: Markos Chandras <markos.chandras@imgtec.com>
Cc: Martin Schwidefsky <schwidefsky@de.ibm.com>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Alexei Starovoitov <alexei.starovoitov@gmail.com>
Signed-off-by: Denis Kirjanov <kda@linux-powerpc.org>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Acked-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/mips/net/bpf_jit.c      | 27 +--------------------------
 arch/s390/net/bpf_jit_comp.c | 35 +----------------------------------
 include/linux/skbuff.h       | 10 ++++++++++
 net/core/filter.c            | 31 ++-----------------------------
 4 files changed, 14 insertions(+), 89 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c
index 0e97ccd29fe3..7edc08398c4a 100644
--- a/arch/mips/net/bpf_jit.c
+++ b/arch/mips/net/bpf_jit.c
@@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset)
 	return (u64)err << 32 | ntohl(ret);
 }
 
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	u8 *ct = (u8 *)&skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-	pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n");
-	return -1;
-}
-
 static int build_body(struct jit_ctx *ctx)
 {
 	void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w};
@@ -1332,11 +1311,7 @@ jmp_cmp:
 		case BPF_ANC | SKF_AD_PKTTYPE:
 			ctx->flags |= SEEN_SKB;
 
-			off = pkt_type_offset();
-
-			if (off < 0)
-				return -1;
-			emit_load_byte(r_tmp, r_skb, off, ctx);
+			emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx);
 			/* Keep only the last 3 bits */
 			emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx);
 #ifdef __BIG_ENDIAN_BITFIELD
diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 555f5c7e83ab..c52ac77408ca 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -227,37 +227,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit)
 	EMIT2(0x07fe);
 }
 
-/* Helper to find the offset of pkt_type in sk_buff
- * Make sure its still a 3bit field starting at the MSBs within a byte.
- */
-#define PKT_TYPE_MAX 0xe0
-static int pkt_type_offset;
-
-static int __init bpf_pkt_type_offset_init(void)
-{
-	struct sk_buff skb_probe = {
-		.pkt_type = ~0,
-	};
-	char *ct = (char *)&skb_probe;
-	int off;
-
-	pkt_type_offset = -1;
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (!ct[off])
-			continue;
-		if (ct[off] == PKT_TYPE_MAX)
-			pkt_type_offset = off;
-		else {
-			/* Found non matching bit pattern, fix needed. */
-			WARN_ON_ONCE(1);
-			pkt_type_offset = -1;
-			return -1;
-		}
-	}
-	return 0;
-}
-device_initcall(bpf_pkt_type_offset_init);
-
 /*
  * make sure we dont leak kernel information to user
  */
@@ -757,12 +726,10 @@ call_fn:	/* lg %r1,<d(function)>(%r13) */
 		}
 		break;
 	case BPF_ANC | SKF_AD_PKTTYPE:
-		if (pkt_type_offset < 0)
-			goto out;
 		/* lhi %r5,0 */
 		EMIT4(0xa7580000);
 		/* ic %r5,<d(pkt_type_offset)>(%r2) */
-		EMIT4_DISP(0x43502000, pkt_type_offset);
+		EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET());
 		/* srl %r5,5 */
 		EMIT4_DISP(0x88500000, 5);
 		break;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 07c9fdd0c126..756e3d057e84 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -548,6 +548,16 @@ struct sk_buff {
 				ip_summed:2,
 				nohdr:1,
 				nfctinfo:3;
+
+/* if you move pkt_type around you also must adapt those constants */
+#ifdef __BIG_ENDIAN_BITFIELD
+#define PKT_TYPE_MAX	(7 << 5)
+#else
+#define PKT_TYPE_MAX	7
+#endif
+#define PKT_TYPE_OFFSET()	offsetof(struct sk_buff, __pkt_type_offset)
+
+	__u8			__pkt_type_offset[0];
 	__u8			pkt_type:3,
 				fclone:2,
 				ipvs_property:1,
diff --git a/net/core/filter.c b/net/core/filter.c
index dfc716ffa44b..601f28de7311 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -87,30 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
 }
 EXPORT_SYMBOL(sk_filter);
 
-/* Helper to find the offset of pkt_type in sk_buff structure. We want
- * to make sure its still a 3bit field starting at a byte boundary;
- * taken from arch/x86/net/bpf_jit_comp.c.
- */
-#ifdef __BIG_ENDIAN_BITFIELD
-#define PKT_TYPE_MAX	(7 << 5)
-#else
-#define PKT_TYPE_MAX	7
-#endif
-static unsigned int pkt_type_offset(void)
-{
-	struct sk_buff skb_probe = { .pkt_type = ~0, };
-	u8 *ct = (u8 *) &skb_probe;
-	unsigned int off;
-
-	for (off = 0; off < sizeof(struct sk_buff); off++) {
-		if (ct[off] == PKT_TYPE_MAX)
-			return off;
-	}
-
-	pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__);
-	return -1;
-}
-
 static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5)
 {
 	return skb_get_poff((struct sk_buff *)(unsigned long) ctx);
@@ -190,11 +166,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp,
 		break;
 
 	case SKF_AD_OFF + SKF_AD_PKTTYPE:
-		*insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
-				    pkt_type_offset());
-		if (insn->off < 0)
-			return false;
-		insn++;
+		*insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX,
+				      PKT_TYPE_OFFSET());
 		*insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX);
 #ifdef __BIG_ENDIAN_BITFIELD
 		insn++;
-- 
cgit v1.2.3


From 3fc8867740b4a0bf56f372c6f5ddd14970962fb1 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Fri, 12 Sep 2014 23:12:46 -0700
Subject: netdevice: Support DSA tagging when DSA is built as a module

This change corrects an error seen when DSA tagging is built as a module.
Without this change it is not possible to get XDSA tagged frames as the
test for tagging is stripped by the #ifdef check.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index ee38b948d9a0..f9e81d10a3b9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1789,7 +1789,7 @@ void dev_net_set(struct net_device *dev, struct net *net)
 
 static inline bool netdev_uses_dsa(struct net_device *dev)
 {
-#ifdef CONFIG_NET_DSA
+#if IS_ENABLED(CONFIG_NET_DSA)
 	if (dev->dsa_ptr != NULL)
 		return dsa_uses_tagged_protocol(dev->dsa_ptr);
 #endif
-- 
cgit v1.2.3


From 1d46fea7d091f9dc2d4fd3fcb9f0117ca288f9a5 Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 27 Aug 2014 00:42:56 +0200
Subject: drm/rcar-du: Use struct videomode in platform data

In preparation for DT support where panel timings will be described by a
DRM-agnostic video mode, replace the struct drm_mode_modeinfo instance
in the panel platform data with a struct videomode.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
---
 arch/arm/mach-shmobile/board-koelsch-reference.c | 19 +++++++++----------
 arch/arm/mach-shmobile/board-koelsch.c           | 19 +++++++++----------
 arch/arm/mach-shmobile/board-lager-reference.c   | 19 +++++++++----------
 arch/arm/mach-shmobile/board-lager.c             | 19 +++++++++----------
 arch/arm/mach-shmobile/board-marzen.c            | 19 +++++++++----------
 drivers/gpu/drm/rcar-du/Kconfig                  |  1 +
 drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c        | 15 +++------------
 include/linux/platform_data/rcar-du.h            |  4 ++--
 8 files changed, 51 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-shmobile/board-koelsch-reference.c b/arch/arm/mach-shmobile/board-koelsch-reference.c
index 3ff88c138896..364e69bf85d4 100644
--- a/arch/arm/mach-shmobile/board-koelsch-reference.c
+++ b/arch/arm/mach-shmobile/board-koelsch-reference.c
@@ -41,16 +41,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/arch/arm/mach-shmobile/board-koelsch.c b/arch/arm/mach-shmobile/board-koelsch.c
index b7d5bc7659cd..ad10ddb6a321 100644
--- a/arch/arm/mach-shmobile/board-koelsch.c
+++ b/arch/arm/mach-shmobile/board-koelsch.c
@@ -63,16 +63,15 @@ static struct rcar_du_encoder_data koelsch_du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/arch/arm/mach-shmobile/board-lager-reference.c b/arch/arm/mach-shmobile/board-lager-reference.c
index 41c808e56005..12a53a1c3d02 100644
--- a/arch/arm/mach-shmobile/board-lager-reference.c
+++ b/arch/arm/mach-shmobile/board-lager-reference.c
@@ -43,16 +43,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/arch/arm/mach-shmobile/board-lager.c b/arch/arm/mach-shmobile/board-lager.c
index e1d8215da0b0..80576c2ee668 100644
--- a/arch/arm/mach-shmobile/board-lager.c
+++ b/arch/arm/mach-shmobile/board-lager.c
@@ -99,16 +99,15 @@ static struct rcar_du_encoder_data lager_du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/arch/arm/mach-shmobile/board-marzen.c b/arch/arm/mach-shmobile/board-marzen.c
index e5cf4201e769..ce33d7825c49 100644
--- a/arch/arm/mach-shmobile/board-marzen.c
+++ b/arch/arm/mach-shmobile/board-marzen.c
@@ -192,16 +192,15 @@ static struct rcar_du_encoder_data du_encoders[] = {
 			.width_mm = 210,
 			.height_mm = 158,
 			.mode = {
-				.clock = 65000,
-				.hdisplay = 1024,
-				.hsync_start = 1048,
-				.hsync_end = 1184,
-				.htotal = 1344,
-				.vdisplay = 768,
-				.vsync_start = 771,
-				.vsync_end = 777,
-				.vtotal = 806,
-				.flags = 0,
+				.pixelclock = 65000000,
+				.hactive = 1024,
+				.hfront_porch = 20,
+				.hback_porch = 160,
+				.hsync_len = 136,
+				.vactive = 768,
+				.vfront_porch = 3,
+				.vback_porch = 29,
+				.vsync_len = 6,
 			},
 		},
 	},
diff --git a/drivers/gpu/drm/rcar-du/Kconfig b/drivers/gpu/drm/rcar-du/Kconfig
index 2e3d7b5b0ad7..c96f6089f8bf 100644
--- a/drivers/gpu/drm/rcar-du/Kconfig
+++ b/drivers/gpu/drm/rcar-du/Kconfig
@@ -6,6 +6,7 @@ config DRM_RCAR_DU
 	select DRM_KMS_CMA_HELPER
 	select DRM_GEM_CMA_HELPER
 	select DRM_KMS_FB_HELPER
+	select VIDEOMODE_HELPERS
 	help
 	  Choose this option if you have an R-Car chipset.
 	  If M is selected the module will be called rcar-du-drm.
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
index cfcf6e74ad0a..d29544121658 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
@@ -40,18 +40,9 @@ static int rcar_du_lvds_connector_get_modes(struct drm_connector *connector)
 		return 0;
 
 	mode->type = DRM_MODE_TYPE_PREFERRED | DRM_MODE_TYPE_DRIVER;
-	mode->clock = lvdscon->panel->mode.clock;
-	mode->hdisplay = lvdscon->panel->mode.hdisplay;
-	mode->hsync_start = lvdscon->panel->mode.hsync_start;
-	mode->hsync_end = lvdscon->panel->mode.hsync_end;
-	mode->htotal = lvdscon->panel->mode.htotal;
-	mode->vdisplay = lvdscon->panel->mode.vdisplay;
-	mode->vsync_start = lvdscon->panel->mode.vsync_start;
-	mode->vsync_end = lvdscon->panel->mode.vsync_end;
-	mode->vtotal = lvdscon->panel->mode.vtotal;
-	mode->flags = lvdscon->panel->mode.flags;
-
-	drm_mode_set_name(mode);
+
+	drm_display_mode_from_videomode(&lvdscon->panel->mode, mode);
+
 	drm_mode_probed_add(connector, mode);
 
 	return 1;
diff --git a/include/linux/platform_data/rcar-du.h b/include/linux/platform_data/rcar-du.h
index 1a2e9901a22e..a5f045e1d8fe 100644
--- a/include/linux/platform_data/rcar-du.h
+++ b/include/linux/platform_data/rcar-du.h
@@ -14,7 +14,7 @@
 #ifndef __RCAR_DU_H__
 #define __RCAR_DU_H__
 
-#include <drm/drm_mode.h>
+#include <video/videomode.h>
 
 enum rcar_du_output {
 	RCAR_DU_OUTPUT_DPAD0,
@@ -35,7 +35,7 @@ enum rcar_du_encoder_type {
 struct rcar_du_panel_data {
 	unsigned int width_mm;		/* Panel width in mm */
 	unsigned int height_mm;		/* Panel height in mm */
-	struct drm_mode_modeinfo mode;
+	struct videomode mode;
 };
 
 struct rcar_du_connector_lvds_data {
-- 
cgit v1.2.3


From 0e9871e3f79fd17c691b50a9669220c54ff084a2 Mon Sep 17 00:00:00 2001
From: Anton Danilov <littlesmilingcloud@gmail.com>
Date: Thu, 28 Aug 2014 10:11:27 +0400
Subject: netfilter: ipset: Add skbinfo extension kernel support in the ipset
 core.

Skbinfo extension provides mapping of metainformation with lookup in the ipset tables.
This patch defines the flags, the constants, the functions and the structures
for the data type independent support of the extension.
Note the firewall mark stores in the kernel structures as two 32bit values,
but transfered through netlink as one 64bit value.

Signed-off-by: Anton Danilov <littlesmilingcloud@gmail.com>
Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h      | 56 ++++++++++++++++++++++++++++-
 include/uapi/linux/netfilter/ipset/ip_set.h | 12 +++++++
 net/netfilter/ipset/ip_set_core.c           | 27 +++++++++++++-
 3 files changed, 93 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index 96afc29184be..b97aac5142ed 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -57,6 +57,8 @@ enum ip_set_extension {
 	IPSET_EXT_COUNTER = (1 << IPSET_EXT_BIT_COUNTER),
 	IPSET_EXT_BIT_COMMENT = 2,
 	IPSET_EXT_COMMENT = (1 << IPSET_EXT_BIT_COMMENT),
+	IPSET_EXT_BIT_SKBINFO = 3,
+	IPSET_EXT_SKBINFO = (1 << IPSET_EXT_BIT_SKBINFO),
 	/* Mark set with an extension which needs to call destroy */
 	IPSET_EXT_BIT_DESTROY = 7,
 	IPSET_EXT_DESTROY = (1 << IPSET_EXT_BIT_DESTROY),
@@ -65,12 +67,14 @@ enum ip_set_extension {
 #define SET_WITH_TIMEOUT(s)	((s)->extensions & IPSET_EXT_TIMEOUT)
 #define SET_WITH_COUNTER(s)	((s)->extensions & IPSET_EXT_COUNTER)
 #define SET_WITH_COMMENT(s)	((s)->extensions & IPSET_EXT_COMMENT)
+#define SET_WITH_SKBINFO(s)	((s)->extensions & IPSET_EXT_SKBINFO)
 #define SET_WITH_FORCEADD(s)	((s)->flags & IPSET_CREATE_FLAG_FORCEADD)
 
 /* Extension id, in size order */
 enum ip_set_ext_id {
 	IPSET_EXT_ID_COUNTER = 0,
 	IPSET_EXT_ID_TIMEOUT,
+	IPSET_EXT_ID_SKBINFO,
 	IPSET_EXT_ID_COMMENT,
 	IPSET_EXT_ID_MAX,
 };
@@ -92,6 +96,10 @@ struct ip_set_ext {
 	u64 packets;
 	u64 bytes;
 	u32 timeout;
+	u32 skbmark;
+	u32 skbmarkmask;
+	u32 skbprio;
+	u16 skbqueue;
 	char *comment;
 };
 
@@ -104,6 +112,13 @@ struct ip_set_comment {
 	char *str;
 };
 
+struct ip_set_skbinfo {
+	u32 skbmark;
+	u32 skbmarkmask;
+	u32 skbprio;
+	u16 skbqueue;
+};
+
 struct ip_set;
 
 #define ext_timeout(e, s)	\
@@ -112,7 +127,8 @@ struct ip_set;
 (struct ip_set_counter *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COUNTER])
 #define ext_comment(e, s)	\
 (struct ip_set_comment *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_COMMENT])
-
+#define ext_skbinfo(e, s)	\
+(struct ip_set_skbinfo *)(((void *)(e)) + (s)->offset[IPSET_EXT_ID_SKBINFO])
 
 typedef int (*ipset_adtfn)(struct ip_set *set, void *value,
 			   const struct ip_set_ext *ext,
@@ -256,6 +272,8 @@ ip_set_put_flags(struct sk_buff *skb, struct ip_set *set)
 		cadt_flags |= IPSET_FLAG_WITH_COUNTERS;
 	if (SET_WITH_COMMENT(set))
 		cadt_flags |= IPSET_FLAG_WITH_COMMENT;
+	if (SET_WITH_SKBINFO(set))
+		cadt_flags |= IPSET_FLAG_WITH_SKBINFO;
 	if (SET_WITH_FORCEADD(set))
 		cadt_flags |= IPSET_FLAG_WITH_FORCEADD;
 
@@ -304,6 +322,39 @@ ip_set_update_counter(struct ip_set_counter *counter,
 	}
 }
 
+static inline void
+ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
+		      const struct ip_set_ext *ext,
+		      struct ip_set_ext *mext, u32 flags)
+{
+		mext->skbmark = skbinfo->skbmark;
+		mext->skbmarkmask = skbinfo->skbmarkmask;
+		mext->skbprio = skbinfo->skbprio;
+		mext->skbqueue = skbinfo->skbqueue;
+}
+static inline bool
+ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
+{
+	return nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			     cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					 skbinfo->skbmarkmask)) ||
+	       nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			     cpu_to_be32(skbinfo->skbprio)) ||
+	       nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			     cpu_to_be16(skbinfo->skbqueue));
+
+}
+
+static inline void
+ip_set_init_skbinfo(struct ip_set_skbinfo *skbinfo,
+		    const struct ip_set_ext *ext)
+{
+	skbinfo->skbmark = ext->skbmark;
+	skbinfo->skbmarkmask = ext->skbmarkmask;
+	skbinfo->skbprio = ext->skbprio;
+	skbinfo->skbqueue = ext->skbqueue;
+}
+
 static inline bool
 ip_set_put_counter(struct sk_buff *skb, struct ip_set_counter *counter)
 {
@@ -497,6 +548,9 @@ ip_set_put_extensions(struct sk_buff *skb, const struct ip_set *set,
 	if (SET_WITH_COMMENT(set) &&
 	    ip_set_put_comment(skb, ext_comment(e, set)))
 		return -EMSGSIZE;
+	if (SET_WITH_SKBINFO(set) &&
+	    ip_set_put_skbinfo(skb, ext_skbinfo(e, set)))
+		return -EMSGSIZE;
 	return 0;
 }
 
diff --git a/include/uapi/linux/netfilter/ipset/ip_set.h b/include/uapi/linux/netfilter/ipset/ip_set.h
index 78c2f2e79920..ca03119111a2 100644
--- a/include/uapi/linux/netfilter/ipset/ip_set.h
+++ b/include/uapi/linux/netfilter/ipset/ip_set.h
@@ -115,6 +115,9 @@ enum {
 	IPSET_ATTR_BYTES,
 	IPSET_ATTR_PACKETS,
 	IPSET_ATTR_COMMENT,
+	IPSET_ATTR_SKBMARK,
+	IPSET_ATTR_SKBPRIO,
+	IPSET_ATTR_SKBQUEUE,
 	__IPSET_ATTR_ADT_MAX,
 };
 #define IPSET_ATTR_ADT_MAX	(__IPSET_ATTR_ADT_MAX - 1)
@@ -147,6 +150,7 @@ enum ipset_errno {
 	IPSET_ERR_COUNTER,
 	IPSET_ERR_COMMENT,
 	IPSET_ERR_INVALID_MARKMASK,
+	IPSET_ERR_SKBINFO,
 
 	/* Type specific error codes */
 	IPSET_ERR_TYPE_SPECIFIC = 4352,
@@ -170,6 +174,12 @@ enum ipset_cmd_flags {
 	IPSET_FLAG_MATCH_COUNTERS = (1 << IPSET_FLAG_BIT_MATCH_COUNTERS),
 	IPSET_FLAG_BIT_RETURN_NOMATCH = 7,
 	IPSET_FLAG_RETURN_NOMATCH = (1 << IPSET_FLAG_BIT_RETURN_NOMATCH),
+	IPSET_FLAG_BIT_MAP_SKBMARK = 8,
+	IPSET_FLAG_MAP_SKBMARK = (1 << IPSET_FLAG_BIT_MAP_SKBMARK),
+	IPSET_FLAG_BIT_MAP_SKBPRIO = 9,
+	IPSET_FLAG_MAP_SKBPRIO = (1 << IPSET_FLAG_BIT_MAP_SKBPRIO),
+	IPSET_FLAG_BIT_MAP_SKBQUEUE = 10,
+	IPSET_FLAG_MAP_SKBQUEUE = (1 << IPSET_FLAG_BIT_MAP_SKBQUEUE),
 	IPSET_FLAG_CMD_MAX = 15,
 };
 
@@ -187,6 +197,8 @@ enum ipset_cadt_flags {
 	IPSET_FLAG_WITH_COMMENT = (1 << IPSET_FLAG_BIT_WITH_COMMENT),
 	IPSET_FLAG_BIT_WITH_FORCEADD = 5,
 	IPSET_FLAG_WITH_FORCEADD = (1 << IPSET_FLAG_BIT_WITH_FORCEADD),
+	IPSET_FLAG_BIT_WITH_SKBINFO = 6,
+	IPSET_FLAG_WITH_SKBINFO = (1 << IPSET_FLAG_BIT_WITH_SKBINFO),
 	IPSET_FLAG_CADT_MAX	= 15,
 };
 
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4ca4e5ca6f57..26c795e6b57f 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -337,6 +337,12 @@ const struct ip_set_ext_type ip_set_extensions[] = {
 		.len	= sizeof(unsigned long),
 		.align	= __alignof__(unsigned long),
 	},
+	[IPSET_EXT_ID_SKBINFO] = {
+		.type	= IPSET_EXT_SKBINFO,
+		.flag	= IPSET_FLAG_WITH_SKBINFO,
+		.len	= sizeof(struct ip_set_skbinfo),
+		.align	= __alignof__(struct ip_set_skbinfo),
+	},
 	[IPSET_EXT_ID_COMMENT] = {
 		.type	 = IPSET_EXT_COMMENT | IPSET_EXT_DESTROY,
 		.flag	 = IPSET_FLAG_WITH_COMMENT,
@@ -382,6 +388,7 @@ int
 ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 		      struct ip_set_ext *ext)
 {
+	u64 fullmark;
 	if (tb[IPSET_ATTR_TIMEOUT]) {
 		if (!(set->extensions & IPSET_EXT_TIMEOUT))
 			return -IPSET_ERR_TIMEOUT;
@@ -402,7 +409,25 @@ ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[],
 			return -IPSET_ERR_COMMENT;
 		ext->comment = ip_set_comment_uget(tb[IPSET_ATTR_COMMENT]);
 	}
-
+	if (tb[IPSET_ATTR_SKBMARK]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		fullmark = be64_to_cpu(nla_get_be64(tb[IPSET_ATTR_SKBMARK]));
+		ext->skbmark = fullmark >> 32;
+		ext->skbmarkmask = fullmark & 0xffffffff;
+	}
+	if (tb[IPSET_ATTR_SKBPRIO]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		ext->skbprio = be32_to_cpu(nla_get_be32(
+					    tb[IPSET_ATTR_SKBPRIO]));
+	}
+	if (tb[IPSET_ATTR_SKBQUEUE]) {
+		if (!(set->extensions & IPSET_EXT_SKBINFO))
+			return -IPSET_ERR_SKBINFO;
+		ext->skbqueue = be16_to_cpu(nla_get_be16(
+					    tb[IPSET_ATTR_SKBQUEUE]));
+	}
 	return 0;
 }
 EXPORT_SYMBOL_GPL(ip_set_get_extensions);
-- 
cgit v1.2.3


From aef96193fe7b2791c4a3b19fe75426b929769471 Mon Sep 17 00:00:00 2001
From: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
Date: Mon, 15 Sep 2014 17:30:54 +0200
Subject: netfilter: ipset: send nonzero skbinfo extensions only

Do not send zero valued skbinfo extensions to userspace at listing.

Signed-off-by: Jozsef Kadlecsik <kadlec@blackhole.kfki.hu>
---
 include/linux/netfilter/ipset/ip_set.h | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h
index b97aac5142ed..f1606fa6132d 100644
--- a/include/linux/netfilter/ipset/ip_set.h
+++ b/include/linux/netfilter/ipset/ip_set.h
@@ -335,13 +335,17 @@ ip_set_get_skbinfo(struct ip_set_skbinfo *skbinfo,
 static inline bool
 ip_set_put_skbinfo(struct sk_buff *skb, struct ip_set_skbinfo *skbinfo)
 {
-	return nla_put_net64(skb, IPSET_ATTR_SKBMARK,
-			     cpu_to_be64((u64)skbinfo->skbmark << 32 |
-					 skbinfo->skbmarkmask)) ||
-	       nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
-			     cpu_to_be32(skbinfo->skbprio)) ||
-	       nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
-			     cpu_to_be16(skbinfo->skbqueue));
+	/* Send nonzero parameters only */
+	return ((skbinfo->skbmark || skbinfo->skbmarkmask) &&
+		nla_put_net64(skb, IPSET_ATTR_SKBMARK,
+			      cpu_to_be64((u64)skbinfo->skbmark << 32 |
+					  skbinfo->skbmarkmask))) ||
+	       (skbinfo->skbprio &&
+	        nla_put_net32(skb, IPSET_ATTR_SKBPRIO,
+			      cpu_to_be32(skbinfo->skbprio))) ||
+	       (skbinfo->skbqueue &&
+	        nla_put_net16(skb, IPSET_ATTR_SKBQUEUE,
+			     cpu_to_be16(skbinfo->skbqueue)));
 
 }
 
-- 
cgit v1.2.3


From 5075314e4e4b559cc37675ad8a721a89bccd6284 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@intel.com>
Date: Mon, 15 Sep 2014 13:00:19 -0400
Subject: dsa: Split ops up, and avoid assigning tag_protocol and receive
 separately

This change addresses several issues.

First, it was possible to set tag_protocol without setting the ops pointer.
To correct that I have reordered things so that rcv is now populated before
we set tag_protocol.

Second, it didn't make much sense to keep setting the device ops each time a
new slave was registered.  So by moving the receive portion out into root
switch initialization that issue should be addressed.

Third, I wanted to avoid sending tags if the rcv pointer was not registered
so I changed the tag check to verify if the rcv function pointer is set on
the root tree.  If it is then we start sending DSA tagged frames.

Finally I split the device ops pointer in the structures into two spots.  I
placed the rcv function pointer in the root switch since this makes it
easiest to access from there, and I placed the xmit function pointer in the
slave for the same reason.

Signed-off-by: Alexander Duyck <alexander.h.duyck@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  7 -------
 include/net/dsa.h         | 10 ++++++----
 net/dsa/dsa.c             | 32 ++++++++++++++++++++++++++++----
 net/dsa/dsa_priv.h        | 11 ++++++++++-
 net/dsa/slave.c           | 37 +++++++++++++++----------------------
 net/dsa/tag_brcm.c        |  1 -
 net/dsa/tag_dsa.c         |  1 -
 net/dsa/tag_edsa.c        |  1 -
 net/dsa/tag_trailer.c     |  1 -
 9 files changed, 59 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index f9e81d10a3b9..28d4378615e5 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1928,13 +1928,6 @@ struct udp_offload {
 	struct offload_callbacks callbacks;
 };
 
-struct dsa_device_ops {
-	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
-	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
-		   struct packet_type *pt, struct net_device *orig_dev);
-};
-
-
 /* often modified stats are per cpu, other are shared (netdev->stats) */
 struct pcpu_sw_netstats {
 	u64     rx_packets;
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 8a8a5d976f97..a55c4e6a4f0f 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -77,7 +77,7 @@ struct dsa_platform_data {
 	struct dsa_chip_data	*chip;
 };
 
-struct dsa_device_ops;
+struct packet_type;
 
 struct dsa_switch_tree {
 	/*
@@ -91,7 +91,10 @@ struct dsa_switch_tree {
 	 * protocol to use.
 	 */
 	struct net_device	*master_netdev;
-	const struct dsa_device_ops	*ops;
+	int			(*rcv)(struct sk_buff *skb,
+				       struct net_device *dev,
+				       struct packet_type *pt,
+				       struct net_device *orig_dev);
 	enum dsa_tag_protocol	tag_protocol;
 
 	/*
@@ -218,7 +221,6 @@ static inline void *ds_to_priv(struct dsa_switch *ds)
 
 static inline bool dsa_uses_tagged_protocol(struct dsa_switch_tree *dst)
 {
-	return dst->tag_protocol != DSA_TAG_PROTO_NONE;
+	return dst->rcv != NULL;
 }
-
 #endif
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 61f145c44555..1df0a7cf1e9e 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/module.h>
@@ -154,9 +153,34 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index,
 	 * tagging protocol to the preferred tagging format of this
 	 * switch.
 	 */
-	if (ds->dst->cpu_switch == index)
-		ds->dst->tag_protocol = drv->tag_protocol;
+	if (dst->cpu_switch == index) {
+		switch (drv->tag_protocol) {
+#ifdef CONFIG_NET_DSA_TAG_DSA
+		case DSA_TAG_PROTO_DSA:
+			dst->rcv = dsa_netdev_ops.rcv;
+			break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_EDSA
+		case DSA_TAG_PROTO_EDSA:
+			dst->rcv = edsa_netdev_ops.rcv;
+			break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_TRAILER
+		case DSA_TAG_PROTO_TRAILER:
+			dst->rcv = trailer_netdev_ops.rcv;
+			break;
+#endif
+#ifdef CONFIG_NET_DSA_TAG_BRCM
+		case DSA_TAG_PROTO_BRCM:
+			dst->rcv = brcm_netdev_ops.rcv;
+			break;
+#endif
+		default:
+			break;
+		}
 
+		dst->tag_protocol = drv->tag_protocol;
+	}
 
 	/*
 	 * Do basic register setup.
@@ -626,7 +650,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 		return 0;
 	}
 
-	return dst->ops->rcv(skb, dev, pt, orig_dev);
+	return dst->rcv(skb, dev, pt, orig_dev);
 }
 
 static struct packet_type dsa_pack_type __read_mostly = {
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 98afed4d92ba..f90899e8ab5a 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -12,7 +12,13 @@
 #define __DSA_PRIV_H
 
 #include <linux/phy.h>
-#include <net/dsa.h>
+#include <linux/netdevice.h>
+
+struct dsa_device_ops {
+	netdev_tx_t (*xmit)(struct sk_buff *skb, struct net_device *dev);
+	int (*rcv)(struct sk_buff *skb, struct net_device *dev,
+		   struct packet_type *pt, struct net_device *orig_dev);
+};
 
 struct dsa_slave_priv {
 	/*
@@ -20,6 +26,8 @@ struct dsa_slave_priv {
 	 * switch port.
 	 */
 	struct net_device	*dev;
+	netdev_tx_t		(*xmit)(struct sk_buff *skb,
+					struct net_device *dev);
 
 	/*
 	 * Which switch this port is a part of, and the port index
@@ -43,6 +51,7 @@ struct dsa_slave_priv {
 extern char dsa_driver_version[];
 
 /* slave.c */
+extern const struct dsa_device_ops notag_netdev_ops;
 void dsa_slave_mii_bus_init(struct dsa_switch *ds);
 struct net_device *dsa_slave_create(struct dsa_switch *ds,
 				    struct device *parent,
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 809eeb13eb12..e38a331111c0 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -9,7 +9,6 @@
  */
 
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 #include <linux/phy.h>
 #include <linux/of_net.h>
@@ -176,9 +175,8 @@ static int dsa_slave_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 static netdev_tx_t dsa_slave_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
-	struct dsa_switch_tree *dst = p->parent->dst;
 
-	return dst->ops->xmit(skb, dev);
+	return p->xmit(skb, dev);
 }
 
 static netdev_tx_t dsa_slave_notag_xmit(struct sk_buff *skb,
@@ -325,11 +323,6 @@ static const struct net_device_ops dsa_slave_netdev_ops = {
 	.ndo_do_ioctl		= dsa_slave_ioctl,
 };
 
-static const struct dsa_device_ops notag_netdev_ops = {
-	.xmit	= dsa_slave_notag_xmit,
-	.rcv	= NULL,
-};
-
 static void dsa_slave_adjust_link(struct net_device *dev)
 {
 	struct dsa_slave_priv *p = netdev_priv(dev);
@@ -435,41 +428,41 @@ dsa_slave_create(struct dsa_switch *ds, struct device *parent,
 	slave_dev->tx_queue_len = 0;
 	slave_dev->netdev_ops = &dsa_slave_netdev_ops;
 
+	SET_NETDEV_DEV(slave_dev, parent);
+	slave_dev->dev.of_node = ds->pd->port_dn[port];
+	slave_dev->vlan_features = master->vlan_features;
+
+	p = netdev_priv(slave_dev);
+	p->dev = slave_dev;
+	p->parent = ds;
+	p->port = port;
+
 	switch (ds->dst->tag_protocol) {
 #ifdef CONFIG_NET_DSA_TAG_DSA
 	case DSA_TAG_PROTO_DSA:
-		ds->dst->ops = &dsa_netdev_ops;
+		p->xmit = dsa_netdev_ops.xmit;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_EDSA
 	case DSA_TAG_PROTO_EDSA:
-		ds->dst->ops = &edsa_netdev_ops;
+		p->xmit = edsa_netdev_ops.xmit;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_TRAILER
 	case DSA_TAG_PROTO_TRAILER:
-		ds->dst->ops = &trailer_netdev_ops;
+		p->xmit = trailer_netdev_ops.xmit;
 		break;
 #endif
 #ifdef CONFIG_NET_DSA_TAG_BRCM
 	case DSA_TAG_PROTO_BRCM:
-		ds->dst->ops = &brcm_netdev_ops;
+		p->xmit = brcm_netdev_ops.xmit;
 		break;
 #endif
 	default:
-		ds->dst->ops = &notag_netdev_ops;
+		p->xmit	= dsa_slave_notag_xmit;
 		break;
 	}
 
-	SET_NETDEV_DEV(slave_dev, parent);
-	slave_dev->dev.of_node = ds->pd->port_dn[port];
-	slave_dev->vlan_features = master->vlan_features;
-
-	p = netdev_priv(slave_dev);
-	p->dev = slave_dev;
-	p->parent = ds;
-	p->port = port;
-
 	p->old_pause = -1;
 	p->old_link = -1;
 	p->old_duplex = -1;
diff --git a/net/dsa/tag_brcm.c b/net/dsa/tag_brcm.c
index 8fbc21c0de78..83d3572cdb20 100644
--- a/net/dsa/tag_brcm.c
+++ b/net/dsa/tag_brcm.c
@@ -11,7 +11,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
diff --git a/net/dsa/tag_dsa.c b/net/dsa/tag_dsa.c
index d7dbc5bda5c0..ce90c8bdc658 100644
--- a/net/dsa/tag_dsa.c
+++ b/net/dsa/tag_dsa.c
@@ -10,7 +10,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
diff --git a/net/dsa/tag_edsa.c b/net/dsa/tag_edsa.c
index 6b30abe89183..94fcce778679 100644
--- a/net/dsa/tag_edsa.c
+++ b/net/dsa/tag_edsa.c
@@ -10,7 +10,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
diff --git a/net/dsa/tag_trailer.c b/net/dsa/tag_trailer.c
index 5fe9444842c5..115fdca34077 100644
--- a/net/dsa/tag_trailer.c
+++ b/net/dsa/tag_trailer.c
@@ -10,7 +10,6 @@
 
 #include <linux/etherdevice.h>
 #include <linux/list.h>
-#include <linux/netdevice.h>
 #include <linux/slab.h>
 #include "dsa_priv.h"
 
-- 
cgit v1.2.3


From 2e151c70dfb0075ff83bec305c52a9da1ba49089 Mon Sep 17 00:00:00 2001
From: Peter Neubauer <pneubauer@bluerwhite.org>
Date: Fri, 12 Sep 2014 13:06:13 +0200
Subject: x86: HPET force enable for e6xx based systems

As the Soekris net6501 and other e6xx based systems do not have
any ACPI implementation, HPET won't get enabled.
This patch enables HPET on such platforms.

[    0.430149] pci 0000:00:01.0: Force enabled HPET at 0xfed00000
[    0.644838] HPET: 3 timers in total, 0 timers will be used for per-cpu timer

Original patch by Peter Neubauer (http://www.mail-archive.com/soekris-tech@lists.soekris.com/msg06462.html)
slightly modified by Conrad Kostecki <ck@conrad-kostecki.de> and massaged
accoring to Thomas Gleixners <tglx@linutronix.de> by me.

Suggested-by: Conrad Kostecki <ck@conrad-kostecki.de>
Signed-off-by: Eric Sesterhenn <eric.sesterhenn@lsexperts.de>
Cc: Peter Neubauer <pneubauer@bluerwhite.org>
Link: http://lkml.kernel.org/r/5412D3A5.2030909@lsexperts.de
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/kernel/quirks.c | 18 ++++++++++++++++++
 include/linux/pci_ids.h  |  1 +
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index ff898bbf579d..176a0f99d4da 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -497,6 +497,24 @@ void force_hpet_resume(void)
 	}
 }
 
+/*
+ * According to the datasheet e6xx systems have the HPET hardwired to
+ * 0xfed00000
+ */
+static void e6xx_force_enable_hpet(struct pci_dev *dev)
+{
+	if (hpet_address || force_hpet_address)
+		return;
+
+	force_hpet_address = 0xFED00000;
+	force_hpet_resume_type = NONE_FORCE_HPET_RESUME;
+	dev_printk(KERN_DEBUG, &dev->dev, "Force enabled HPET at "
+		"0x%lx\n", force_hpet_address);
+	return;
+}
+DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_E6XX_CU,
+			 e6xx_force_enable_hpet);
+
 /*
  * HPET MSI on some boards (ATI SB700/SB800) has side effect on
  * floppy DMA. Disable HPET MSI on such platforms.
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..aa0d39073e9c 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2860,6 +2860,7 @@
 #define PCI_DEVICE_ID_INTEL_82372FB_1	0x7601
 #define PCI_DEVICE_ID_INTEL_SCH_LPC	0x8119
 #define PCI_DEVICE_ID_INTEL_SCH_IDE	0x811a
+#define PCI_DEVICE_ID_INTEL_E6XX_CU	0x8183
 #define PCI_DEVICE_ID_INTEL_ITC_LPC	0x8186
 #define PCI_DEVICE_ID_INTEL_82454GX	0x84c4
 #define PCI_DEVICE_ID_INTEL_82450GX	0x84c5
-- 
cgit v1.2.3


From 3c5f8853469d3e549799808b9bf639b5d32751f0 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 4 Sep 2014 17:31:25 +0530
Subject: power-supply: Forward declare structs together

power_supply.h requires to forward declare few structures. One of them is done
at the top of the file and other one just before it is used. Declare them
together for better readability.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 include/linux/power_supply.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index de59a28b1b5b..3ed049673022 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -18,8 +18,6 @@
 #include <linux/spinlock.h>
 #include <linux/notifier.h>
 
-struct device;
-
 /*
  * All voltages, currents, charges, energies, time and temperatures in uV,
  * µA, µAh, µWh, seconds and tenths of degree Celsius unless otherwise
@@ -175,6 +173,7 @@ union power_supply_propval {
 	const char *strval;
 };
 
+struct device;
 struct device_node;
 
 struct power_supply {
-- 
cgit v1.2.3


From 53d91c5ce0cb8945b55e8bb54e551cabc51eb28d Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:01 +0100
Subject: Provide a binary to hex conversion function

Provide a function to convert a buffer of binary data into an unterminated
ascii hex string representation of that data.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 include/linux/kernel.h |  1 +
 lib/hexdump.c          | 16 ++++++++++++++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 4c52907a6d8b..89a0b8e5a952 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -500,6 +500,7 @@ static inline char * __deprecated pack_hex_byte(char *buf, u8 byte)
 
 extern int hex_to_bin(char ch);
 extern int __must_check hex2bin(u8 *dst, const char *src, size_t count);
+extern char *bin2hex(char *dst, const void *src, size_t count);
 
 int mac_pton(const char *s, u8 *mac);
 
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 8499c810909a..270773b91923 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -58,6 +58,22 @@ int hex2bin(u8 *dst, const char *src, size_t count)
 }
 EXPORT_SYMBOL(hex2bin);
 
+/**
+ * bin2hex - convert binary data to an ascii hexadecimal string
+ * @dst: ascii hexadecimal result
+ * @src: binary data
+ * @count: binary data length
+ */
+char *bin2hex(char *dst, const void *src, size_t count)
+{
+	const unsigned char *_src = src;
+
+	while (count--)
+		dst = hex_byte_pack(dst, *_src++);
+	return dst;
+}
+EXPORT_SYMBOL(bin2hex);
+
 /**
  * hex_dump_to_buffer - convert a blob of data to "hex ASCII" in memory
  * @buf: data blob to dump
-- 
cgit v1.2.3


From 462919591a1791e76042dc5c1e0148715df59beb Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:02 +0100
Subject: KEYS: Preparse match data

Preparse the match data.  This provides several advantages:

 (1) The preparser can reject invalid criteria up front.

 (2) The preparser can convert the criteria to binary data if necessary (the
     asymmetric key type really wants to do binary comparison of the key IDs).

 (3) The preparser can set the type of search to be performed.  This means
     that it's not then a one-off setting in the key type.

 (4) The preparser can set an appropriate comparator function.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c | 31 +++++++++++++++++++-
 include/keys/user-type.h                 |  4 ++-
 include/linux/key-type.h                 | 31 ++++++++++++++++++--
 net/dns_resolver/dns_key.c               |  5 ++--
 security/keys/internal.h                 |  8 ++----
 security/keys/keyring.c                  | 49 +++++++++++++++++++-------------
 security/keys/proc.c                     |  8 +++---
 security/keys/process_keys.c             | 13 +++++----
 security/keys/request_key.c              | 21 ++++++++++----
 security/keys/request_key_auth.c         |  6 ++--
 security/keys/user_defined.c             |  4 +--
 11 files changed, 129 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index eb8cd46961a5..f666b4e8d256 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -59,9 +59,11 @@ EXPORT_SYMBOL_GPL(asymmetric_keyid_match);
  *	"id:<id>"	- request a key matching the ID
  *	"<subtype>:<id>" - request a key of a subtype
  */
-static int asymmetric_key_match(const struct key *key, const void *description)
+static int asymmetric_key_match(const struct key *key,
+				const struct key_match_data *match_data)
 {
 	const struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key);
+	const char *description = match_data->raw_data;
 	const char *spec = description;
 	const char *id;
 	ptrdiff_t speclen;
@@ -93,6 +95,31 @@ static int asymmetric_key_match(const struct key *key, const void *description)
 	return 0;
 }
 
+/*
+ * Preparse the match criterion.  If we don't set lookup_type and cmp,
+ * the default will be an exact match on the key description.
+ *
+ * There are some specifiers for matching key IDs rather than by the key
+ * description:
+ *
+ *	"id:<id>" - request a key by any available ID
+ *
+ * These have to be searched by iteration rather than by direct lookup because
+ * the key is hashed according to its description.
+ */
+static int asymmetric_key_match_preparse(struct key_match_data *match_data)
+{
+	match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE;
+	return 0;
+}
+
+/*
+ * Free the preparsed the match criterion.
+ */
+static void asymmetric_key_match_free(struct key_match_data *match_data)
+{
+}
+
 /*
  * Describe the asymmetric key
  */
@@ -196,7 +223,9 @@ struct key_type key_type_asymmetric = {
 	.preparse	= asymmetric_key_preparse,
 	.free_preparse	= asymmetric_key_free_preparse,
 	.instantiate	= generic_key_instantiate,
+	.match_preparse	= asymmetric_key_match_preparse,
 	.match		= asymmetric_key_match,
+	.match_free	= asymmetric_key_match_free,
 	.destroy	= asymmetric_key_destroy,
 	.describe	= asymmetric_key_describe,
 	.def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE,
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index 3ab1873a4bfa..66d92af30e7c 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -36,11 +36,13 @@ extern struct key_type key_type_user;
 extern struct key_type key_type_logon;
 
 struct key_preparsed_payload;
+struct key_match_data;
 
 extern int user_preparse(struct key_preparsed_payload *prep);
 extern void user_free_preparse(struct key_preparsed_payload *prep);
 extern int user_update(struct key *key, struct key_preparsed_payload *prep);
-extern int user_match(const struct key *key, const void *criterion);
+extern int user_match(const struct key *key,
+		      const struct key_match_data *match_data);
 extern void user_revoke(struct key *key);
 extern void user_destroy(struct key *key);
 extern void user_describe(const struct key *user, struct seq_file *m);
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 44792ee649de..8aba688a451a 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -52,6 +52,22 @@ struct key_preparsed_payload {
 typedef int (*request_key_actor_t)(struct key_construction *key,
 				   const char *op, void *aux);
 
+/*
+ * Preparsed matching criterion.
+ */
+struct key_match_data {
+	/* Comparison function, defaults to type->match, but can be replaced by
+	 * type->match_preparse(). */
+	int (*cmp)(const struct key *key,
+		   const struct key_match_data *match_data);
+
+	const void	*raw_data;	/* Raw match data */
+	void		*preparsed;	/* For ->match_preparse() to stash stuff */
+	unsigned	lookup_type;	/* Type of lookup for this search. */
+#define KEYRING_SEARCH_LOOKUP_DIRECT	0x0000	/* Direct lookup by description. */
+#define KEYRING_SEARCH_LOOKUP_ITERATE	0x0001	/* Iterative search. */
+};
+
 /*
  * kernel managed key type definition
  */
@@ -67,8 +83,6 @@ struct key_type {
 
 	/* Default key search algorithm. */
 	unsigned def_lookup_type;
-#define KEYRING_SEARCH_LOOKUP_DIRECT	0x0000	/* Direct lookup by description. */
-#define KEYRING_SEARCH_LOOKUP_ITERATE	0x0001	/* Iterative search. */
 
 	/* vet a description */
 	int (*vet_description)(const char *description);
@@ -96,8 +110,19 @@ struct key_type {
 	 */
 	int (*update)(struct key *key, struct key_preparsed_payload *prep);
 
+	/* Preparse the data supplied to ->match() (optional).  The
+	 * data to be preparsed can be found in match_data->raw_data.
+	 * The lookup type can also be set by this function.
+	 */
+	int (*match_preparse)(struct key_match_data *match_data);
+
 	/* match a key against a description */
-	int (*match)(const struct key *key, const void *desc);
+	int (*match)(const struct key *key,
+		     const struct key_match_data *match_data);
+
+	/* Free preparsed match data (optional).  This should be supplied it
+	 * ->match_preparse() is supplied. */
+	void (*match_free)(struct key_match_data *match_data);
 
 	/* clear some of the data from a key on revokation (optional)
 	 * - the key's semaphore will be write-locked by the caller
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index f380b2c58178..92df6e508ae7 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -177,10 +177,11 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
  * should end with a period).  The domain name is case-independent.
  */
 static int
-dns_resolver_match(const struct key *key, const void *description)
+dns_resolver_match(const struct key *key,
+		   const struct key_match_data *match_data)
 {
 	int slen, dlen, ret = 0;
-	const char *src = key->description, *dsp = description;
+	const char *src = key->description, *dsp = match_data->raw_data;
 
 	kenter("%s,%s", src, dsp);
 
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 5f20da01fd8d..805e60b0b87e 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -107,13 +107,10 @@ extern int iterate_over_keyring(const struct key *keyring,
 				int (*func)(const struct key *key, void *data),
 				void *data);
 
-typedef int (*key_match_func_t)(const struct key *, const void *);
-
 struct keyring_search_context {
 	struct keyring_index_key index_key;
 	const struct cred	*cred;
-	key_match_func_t	match;
-	const void		*match_data;
+	struct key_match_data	match_data;
 	unsigned		flags;
 #define KEYRING_SEARCH_LOOKUP_TYPE	0x0001	/* [as type->def_lookup_type] */
 #define KEYRING_SEARCH_NO_STATE_CHECK	0x0002	/* Skip state checks */
@@ -152,7 +149,8 @@ extern struct key *request_key_and_link(struct key_type *type,
 					struct key *dest_keyring,
 					unsigned long flags);
 
-extern int lookup_user_key_possessed(const struct key *key, const void *target);
+extern int lookup_user_key_possessed(const struct key *key,
+				     const struct key_match_data *match_data);
 extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
 				 key_perm_t perm);
 #define KEY_LOOKUP_CREATE	0x01
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 8314a7d2104d..10f0a5f2d362 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -545,7 +545,7 @@ static int keyring_search_iterator(const void *object, void *iterator_data)
 	}
 
 	/* keys that don't match */
-	if (!ctx->match(key, ctx->match_data)) {
+	if (!ctx->match_data.cmp(key, &ctx->match_data)) {
 		kleave(" = 0 [!match]");
 		return 0;
 	}
@@ -585,8 +585,7 @@ skipped:
  */
 static int search_keyring(struct key *keyring, struct keyring_search_context *ctx)
 {
-	if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) ==
-	    KEYRING_SEARCH_LOOKUP_DIRECT) {
+	if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_DIRECT) {
 		const void *object;
 
 		object = assoc_array_find(&keyring->keys,
@@ -627,7 +626,7 @@ static bool search_nested_keyrings(struct key *keyring,
 	/* Check to see if this top-level keyring is what we are looking for
 	 * and whether it is valid or not.
 	 */
-	if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE ||
+	if (ctx->match_data.lookup_type == KEYRING_SEARCH_LOOKUP_ITERATE ||
 	    keyring_compare_object(keyring, &ctx->index_key)) {
 		ctx->skipped_ret = 2;
 		ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK;
@@ -885,16 +884,28 @@ key_ref_t keyring_search(key_ref_t keyring,
 		.index_key.type		= type,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match			= type->match,
-		.match_data		= description,
-		.flags			= (type->def_lookup_type |
-					   KEYRING_SEARCH_DO_STATE_CHECK),
+		.match_data.cmp		= type->match,
+		.match_data.raw_data	= description,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.flags			= KEYRING_SEARCH_DO_STATE_CHECK,
 	};
+	key_ref_t key;
+	int ret;
 
-	if (!ctx.match)
+	if (!ctx.match_data.cmp)
 		return ERR_PTR(-ENOKEY);
 
-	return keyring_search_aux(keyring, &ctx);
+	if (type->match_preparse) {
+		ret = type->match_preparse(&ctx.match_data);
+		if (ret < 0)
+			return ERR_PTR(ret);
+	}
+
+	key = keyring_search_aux(keyring, &ctx);
+
+	if (type->match_free)
+		type->match_free(&ctx.match_data);
+	return key;
 }
 EXPORT_SYMBOL(keyring_search);
 
@@ -1014,7 +1025,7 @@ static int keyring_detect_cycle_iterator(const void *object,
 
 	/* We might get a keyring with matching index-key that is nonetheless a
 	 * different keyring. */
-	if (key != ctx->match_data)
+	if (key != ctx->match_data.raw_data)
 		return 0;
 
 	ctx->result = ERR_PTR(-EDEADLK);
@@ -1031,14 +1042,14 @@ static int keyring_detect_cycle_iterator(const void *object,
 static int keyring_detect_cycle(struct key *A, struct key *B)
 {
 	struct keyring_search_context ctx = {
-		.index_key	= A->index_key,
-		.match_data	= A,
-		.iterator	= keyring_detect_cycle_iterator,
-		.flags		= (KEYRING_SEARCH_LOOKUP_DIRECT |
-				   KEYRING_SEARCH_NO_STATE_CHECK |
-				   KEYRING_SEARCH_NO_UPDATE_TIME |
-				   KEYRING_SEARCH_NO_CHECK_PERM |
-				   KEYRING_SEARCH_DETECT_TOO_DEEP),
+		.index_key		= A->index_key,
+		.match_data.raw_data	= A,
+		.match_data.lookup_type = KEYRING_SEARCH_LOOKUP_DIRECT,
+		.iterator		= keyring_detect_cycle_iterator,
+		.flags			= (KEYRING_SEARCH_NO_STATE_CHECK |
+					   KEYRING_SEARCH_NO_UPDATE_TIME |
+					   KEYRING_SEARCH_NO_CHECK_PERM |
+					   KEYRING_SEARCH_DETECT_TOO_DEEP),
 	};
 
 	rcu_read_lock();
diff --git a/security/keys/proc.c b/security/keys/proc.c
index d3f6f2fd21db..972eeb336b81 100644
--- a/security/keys/proc.c
+++ b/security/keys/proc.c
@@ -194,10 +194,10 @@ static int proc_keys_show(struct seq_file *m, void *v)
 		.index_key.type		= key->type,
 		.index_key.description	= key->description,
 		.cred			= current_cred(),
-		.match			= lookup_user_key_possessed,
-		.match_data		= key,
-		.flags			= (KEYRING_SEARCH_NO_STATE_CHECK |
-					   KEYRING_SEARCH_LOOKUP_DIRECT),
+		.match_data.cmp		= lookup_user_key_possessed,
+		.match_data.raw_data	= key,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.flags			= KEYRING_SEARCH_NO_STATE_CHECK,
 	};
 
 	key_ref = make_key_ref(key, 0);
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 0cf8a130a267..08bd533d014f 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -489,9 +489,10 @@ found:
 /*
  * See if the key we're looking at is the target key.
  */
-int lookup_user_key_possessed(const struct key *key, const void *target)
+int lookup_user_key_possessed(const struct key *key,
+			      const struct key_match_data *match_data)
 {
-	return key == target;
+	return key == match_data->raw_data;
 }
 
 /*
@@ -516,9 +517,9 @@ key_ref_t lookup_user_key(key_serial_t id, unsigned long lflags,
 			  key_perm_t perm)
 {
 	struct keyring_search_context ctx = {
-		.match	= lookup_user_key_possessed,
-		.flags	= (KEYRING_SEARCH_NO_STATE_CHECK |
-			   KEYRING_SEARCH_LOOKUP_DIRECT),
+		.match_data.cmp		= lookup_user_key_possessed,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.flags			= KEYRING_SEARCH_NO_STATE_CHECK,
 	};
 	struct request_key_auth *rka;
 	struct key *key;
@@ -673,7 +674,7 @@ try_again:
 		ctx.index_key.type		= key->type;
 		ctx.index_key.description	= key->description;
 		ctx.index_key.desc_len		= strlen(key->description);
-		ctx.match_data			= key;
+		ctx.match_data.raw_data		= key;
 		kdebug("check possessed");
 		skey_ref = search_process_keyrings(&ctx);
 		kdebug("possessed=%p", skey_ref);
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 381411941cc1..408523e5e2e2 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -531,9 +531,9 @@ struct key *request_key_and_link(struct key_type *type,
 		.index_key.type		= type,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match			= type->match,
-		.match_data		= description,
-		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.match_data.cmp		= type->match,
+		.match_data.raw_data	= description,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	};
 	struct key *key;
 	key_ref_t key_ref;
@@ -543,6 +543,14 @@ struct key *request_key_and_link(struct key_type *type,
 	       ctx.index_key.type->name, ctx.index_key.description,
 	       callout_info, callout_len, aux, dest_keyring, flags);
 
+	if (type->match_preparse) {
+		ret = type->match_preparse(&ctx.match_data);
+		if (ret < 0) {
+			key = ERR_PTR(ret);
+			goto error;
+		}
+	}
+
 	/* search all the process keyrings for a key */
 	key_ref = search_process_keyrings(&ctx);
 
@@ -555,7 +563,7 @@ struct key *request_key_and_link(struct key_type *type,
 			if (ret < 0) {
 				key_put(key);
 				key = ERR_PTR(ret);
-				goto error;
+				goto error_free;
 			}
 		}
 	} else if (PTR_ERR(key_ref) != -EAGAIN) {
@@ -565,12 +573,15 @@ struct key *request_key_and_link(struct key_type *type,
 		 * should consult userspace if we can */
 		key = ERR_PTR(-ENOKEY);
 		if (!callout_info)
-			goto error;
+			goto error_free;
 
 		key = construct_key_and_link(&ctx, callout_info, callout_len,
 					     aux, dest_keyring, flags);
 	}
 
+error_free:
+	if (type->match_free)
+		type->match_free(&ctx.match_data);
 error:
 	kleave(" = %p", key);
 	return key;
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 739e7455d388..9ae02819cc06 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -246,9 +246,9 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
 		.index_key.type		= &key_type_request_key_auth,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match			= user_match,
-		.match_data		= description,
-		.flags			= KEYRING_SEARCH_LOOKUP_DIRECT,
+		.match_data.cmp		= user_match,
+		.match_data.raw_data	= description,
+		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	};
 	struct key *authkey;
 	key_ref_t authkey_ref;
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index eee340011f2b..ec8a56063b02 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -141,9 +141,9 @@ EXPORT_SYMBOL_GPL(user_update);
 /*
  * match users on their name
  */
-int user_match(const struct key *key, const void *description)
+int user_match(const struct key *key, const struct key_match_data *match_data)
 {
-	return strcmp(key->description, description) == 0;
+	return strcmp(key->description, match_data->raw_data) == 0;
 }
 
 EXPORT_SYMBOL_GPL(user_match);
-- 
cgit v1.2.3


From 614d8c39014c185aa0f7254f0a470cc33fc1b284 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:04 +0100
Subject: KEYS: Remove key_type::def_lookup_type

Remove key_type::def_lookup_type as it's no longer used.  The information now
defaults to KEYRING_SEARCH_LOOKUP_DIRECT but may be overridden by
type->match_preparse().

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c |  1 -
 crypto/asymmetric_keys/pkcs7_key_type.c  |  1 -
 include/linux/key-type.h                 |  3 ---
 security/keys/big_key.c                  |  1 -
 security/keys/internal.h                 | 11 +++++------
 security/keys/user_defined.c             |  2 --
 6 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index f666b4e8d256..9d78ad7754d9 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -228,7 +228,6 @@ struct key_type key_type_asymmetric = {
 	.match_free	= asymmetric_key_match_free,
 	.destroy	= asymmetric_key_destroy,
 	.describe	= asymmetric_key_describe,
-	.def_lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE,
 };
 EXPORT_SYMBOL_GPL(key_type_asymmetric);
 
diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index 3de5fb011de0..d1faa1df1dec 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -72,7 +72,6 @@ error:
  */
 static struct key_type key_type_pkcs7 = {
 	.name			= "pkcs7_test",
-	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.preparse		= pkcs7_preparse,
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index 8aba688a451a..bf93ea609273 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -81,9 +81,6 @@ struct key_type {
 	 */
 	size_t def_datalen;
 
-	/* Default key search algorithm. */
-	unsigned def_lookup_type;
-
 	/* vet a description */
 	int (*vet_description)(const char *description);
 
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index c2f91a0cf889..4045c13a761a 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -33,7 +33,6 @@ MODULE_LICENSE("GPL");
  */
 struct key_type key_type_big_key = {
 	.name			= "big_key",
-	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.preparse		= big_key_preparse,
 	.free_preparse		= big_key_free_preparse,
 	.instantiate		= generic_key_instantiate,
diff --git a/security/keys/internal.h b/security/keys/internal.h
index 805e60b0b87e..b47cc532be1e 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -112,12 +112,11 @@ struct keyring_search_context {
 	const struct cred	*cred;
 	struct key_match_data	match_data;
 	unsigned		flags;
-#define KEYRING_SEARCH_LOOKUP_TYPE	0x0001	/* [as type->def_lookup_type] */
-#define KEYRING_SEARCH_NO_STATE_CHECK	0x0002	/* Skip state checks */
-#define KEYRING_SEARCH_DO_STATE_CHECK	0x0004	/* Override NO_STATE_CHECK */
-#define KEYRING_SEARCH_NO_UPDATE_TIME	0x0008	/* Don't update times */
-#define KEYRING_SEARCH_NO_CHECK_PERM	0x0010	/* Don't check permissions */
-#define KEYRING_SEARCH_DETECT_TOO_DEEP	0x0020	/* Give an error on excessive depth */
+#define KEYRING_SEARCH_NO_STATE_CHECK	0x0001	/* Skip state checks */
+#define KEYRING_SEARCH_DO_STATE_CHECK	0x0002	/* Override NO_STATE_CHECK */
+#define KEYRING_SEARCH_NO_UPDATE_TIME	0x0004	/* Don't update times */
+#define KEYRING_SEARCH_NO_CHECK_PERM	0x0008	/* Don't check permissions */
+#define KEYRING_SEARCH_DETECT_TOO_DEEP	0x0010	/* Give an error on excessive depth */
 
 	int (*iterator)(const void *object, void *iterator_data);
 
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index ec8a56063b02..cd7e726e8646 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -26,7 +26,6 @@ static int logon_vet_description(const char *desc);
  */
 struct key_type key_type_user = {
 	.name			= "user",
-	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.preparse		= user_preparse,
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
@@ -48,7 +47,6 @@ EXPORT_SYMBOL_GPL(key_type_user);
  */
 struct key_type key_type_logon = {
 	.name			= "logon",
-	.def_lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	.preparse		= user_preparse,
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
-- 
cgit v1.2.3


From c06cfb08b88dfbe13be44a69ae2fdc3a7c902d81 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:06 +0100
Subject: KEYS: Remove key_type::match in favour of overriding default by
 match_preparse

A previous patch added a ->match_preparse() method to the key type.  This is
allowed to override the function called by the iteration algorithm.
Therefore, we can just set a default that simply checks for an exact match of
the key description with the original criterion data and allow match_preparse
to override it as needed.

The key_type::match op is then redundant and can be removed, as can the
user_match() function.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c |  6 +++---
 crypto/asymmetric_keys/pkcs7_key_type.c  |  1 -
 fs/cifs/cifs_spnego.c                    |  1 -
 fs/cifs/cifsacl.c                        |  1 -
 fs/nfs/idmap.c                           |  2 --
 include/keys/user-type.h                 |  3 ---
 include/linux/key-type.h                 |  4 ----
 net/ceph/crypto.c                        |  1 -
 net/dns_resolver/dns_key.c               | 17 +++++++++++++----
 net/rxrpc/ar-key.c                       |  2 --
 security/keys/big_key.c                  |  1 -
 security/keys/encrypted-keys/encrypted.c |  1 -
 security/keys/internal.h                 |  2 ++
 security/keys/key.c                      |  2 +-
 security/keys/keyring.c                  | 15 ++++++++++-----
 security/keys/request_key.c              |  2 +-
 security/keys/request_key_auth.c         |  2 +-
 security/keys/trusted.c                  |  1 -
 security/keys/user_defined.c             | 12 ------------
 19 files changed, 31 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 9d78ad7754d9..7c0498968975 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -59,8 +59,8 @@ EXPORT_SYMBOL_GPL(asymmetric_keyid_match);
  *	"id:<id>"	- request a key matching the ID
  *	"<subtype>:<id>" - request a key of a subtype
  */
-static int asymmetric_key_match(const struct key *key,
-				const struct key_match_data *match_data)
+static int asymmetric_key_cmp(const struct key *key,
+			      const struct key_match_data *match_data)
 {
 	const struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key);
 	const char *description = match_data->raw_data;
@@ -110,6 +110,7 @@ static int asymmetric_key_match(const struct key *key,
 static int asymmetric_key_match_preparse(struct key_match_data *match_data)
 {
 	match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE;
+	match_data->cmp = asymmetric_key_cmp;
 	return 0;
 }
 
@@ -224,7 +225,6 @@ struct key_type key_type_asymmetric = {
 	.free_preparse	= asymmetric_key_free_preparse,
 	.instantiate	= generic_key_instantiate,
 	.match_preparse	= asymmetric_key_match_preparse,
-	.match		= asymmetric_key_match,
 	.match_free	= asymmetric_key_match_free,
 	.destroy	= asymmetric_key_destroy,
 	.describe	= asymmetric_key_describe,
diff --git a/crypto/asymmetric_keys/pkcs7_key_type.c b/crypto/asymmetric_keys/pkcs7_key_type.c
index d1faa1df1dec..751f8fd7335d 100644
--- a/crypto/asymmetric_keys/pkcs7_key_type.c
+++ b/crypto/asymmetric_keys/pkcs7_key_type.c
@@ -75,7 +75,6 @@ static struct key_type key_type_pkcs7 = {
 	.preparse		= pkcs7_preparse,
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
-	.match			= user_match,
 	.revoke			= user_revoke,
 	.destroy		= user_destroy,
 	.describe		= user_describe,
diff --git a/fs/cifs/cifs_spnego.c b/fs/cifs/cifs_spnego.c
index a3e932547617..f4cf200b3c76 100644
--- a/fs/cifs/cifs_spnego.c
+++ b/fs/cifs/cifs_spnego.c
@@ -62,7 +62,6 @@ cifs_spnego_key_destroy(struct key *key)
 struct key_type cifs_spnego_key_type = {
 	.name		= "cifs.spnego",
 	.instantiate	= cifs_spnego_key_instantiate,
-	.match		= user_match,
 	.destroy	= cifs_spnego_key_destroy,
 	.describe	= user_describe,
 };
diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c
index 7ff866dbb89e..6d00c419cbae 100644
--- a/fs/cifs/cifsacl.c
+++ b/fs/cifs/cifsacl.c
@@ -84,7 +84,6 @@ static struct key_type cifs_idmap_key_type = {
 	.instantiate = cifs_idmap_key_instantiate,
 	.destroy     = cifs_idmap_key_destroy,
 	.describe    = user_describe,
-	.match       = user_match,
 };
 
 static char *
diff --git a/fs/nfs/idmap.c b/fs/nfs/idmap.c
index 7dd55b745c4d..2f5db844c172 100644
--- a/fs/nfs/idmap.c
+++ b/fs/nfs/idmap.c
@@ -177,7 +177,6 @@ static struct key_type key_type_id_resolver = {
 	.preparse	= user_preparse,
 	.free_preparse	= user_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
 	.describe	= user_describe,
@@ -401,7 +400,6 @@ static struct key_type key_type_id_resolver_legacy = {
 	.preparse	= user_preparse,
 	.free_preparse	= user_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
 	.describe	= user_describe,
diff --git a/include/keys/user-type.h b/include/keys/user-type.h
index 66d92af30e7c..cebefb069c44 100644
--- a/include/keys/user-type.h
+++ b/include/keys/user-type.h
@@ -36,13 +36,10 @@ extern struct key_type key_type_user;
 extern struct key_type key_type_logon;
 
 struct key_preparsed_payload;
-struct key_match_data;
 
 extern int user_preparse(struct key_preparsed_payload *prep);
 extern void user_free_preparse(struct key_preparsed_payload *prep);
 extern int user_update(struct key *key, struct key_preparsed_payload *prep);
-extern int user_match(const struct key *key,
-		      const struct key_match_data *match_data);
 extern void user_revoke(struct key *key);
 extern void user_destroy(struct key *key);
 extern void user_describe(const struct key *user, struct seq_file *m);
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index bf93ea609273..c14816bd3b44 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -113,10 +113,6 @@ struct key_type {
 	 */
 	int (*match_preparse)(struct key_match_data *match_data);
 
-	/* match a key against a description */
-	int (*match)(const struct key *key,
-		     const struct key_match_data *match_data);
-
 	/* Free preparsed match data (optional).  This should be supplied it
 	 * ->match_preparse() is supplied. */
 	void (*match_free)(struct key_match_data *match_data);
diff --git a/net/ceph/crypto.c b/net/ceph/crypto.c
index ffeba8f9dda9..62fc5e7a9acf 100644
--- a/net/ceph/crypto.c
+++ b/net/ceph/crypto.c
@@ -476,7 +476,6 @@ struct key_type key_type_ceph = {
 	.preparse	= ceph_key_preparse,
 	.free_preparse	= ceph_key_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.destroy	= ceph_key_destroy,
 };
 
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index 92df6e508ae7..a07b9ba7e0b7 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -176,9 +176,8 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
  * The domain name may be a simple name or an absolute domain name (which
  * should end with a period).  The domain name is case-independent.
  */
-static int
-dns_resolver_match(const struct key *key,
-		   const struct key_match_data *match_data)
+static int dns_resolver_cmp(const struct key *key,
+			    const struct key_match_data *match_data)
 {
 	int slen, dlen, ret = 0;
 	const char *src = key->description, *dsp = match_data->raw_data;
@@ -209,6 +208,16 @@ no_match:
 	return ret;
 }
 
+/*
+ * Preparse the match criterion.
+ */
+static int dns_resolver_match_preparse(struct key_match_data *match_data)
+{
+	match_data->lookup_type = KEYRING_SEARCH_LOOKUP_ITERATE;
+	match_data->cmp = dns_resolver_cmp;
+	return 0;
+}
+
 /*
  * Describe a DNS key
  */
@@ -243,7 +252,7 @@ struct key_type key_type_dns_resolver = {
 	.preparse	= dns_resolver_preparse,
 	.free_preparse	= dns_resolver_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= dns_resolver_match,
+	.match_preparse	= dns_resolver_match_preparse,
 	.revoke		= user_revoke,
 	.destroy	= user_destroy,
 	.describe	= dns_resolver_describe,
diff --git a/net/rxrpc/ar-key.c b/net/rxrpc/ar-key.c
index 3907add75932..10c6cb694b43 100644
--- a/net/rxrpc/ar-key.c
+++ b/net/rxrpc/ar-key.c
@@ -44,7 +44,6 @@ struct key_type key_type_rxrpc = {
 	.preparse	= rxrpc_preparse,
 	.free_preparse	= rxrpc_free_preparse,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.destroy	= rxrpc_destroy,
 	.describe	= rxrpc_describe,
 	.read		= rxrpc_read,
@@ -61,7 +60,6 @@ struct key_type key_type_rxrpc_s = {
 	.preparse	= rxrpc_preparse_s,
 	.free_preparse	= rxrpc_free_preparse_s,
 	.instantiate	= generic_key_instantiate,
-	.match		= user_match,
 	.destroy	= rxrpc_destroy_s,
 	.describe	= rxrpc_describe,
 };
diff --git a/security/keys/big_key.c b/security/keys/big_key.c
index 4045c13a761a..b6adb94f6d52 100644
--- a/security/keys/big_key.c
+++ b/security/keys/big_key.c
@@ -36,7 +36,6 @@ struct key_type key_type_big_key = {
 	.preparse		= big_key_preparse,
 	.free_preparse		= big_key_free_preparse,
 	.instantiate		= generic_key_instantiate,
-	.match			= user_match,
 	.revoke			= big_key_revoke,
 	.destroy		= big_key_destroy,
 	.describe		= big_key_describe,
diff --git a/security/keys/encrypted-keys/encrypted.c b/security/keys/encrypted-keys/encrypted.c
index 5fe443d120af..db9675db1026 100644
--- a/security/keys/encrypted-keys/encrypted.c
+++ b/security/keys/encrypted-keys/encrypted.c
@@ -970,7 +970,6 @@ struct key_type key_type_encrypted = {
 	.name = "encrypted",
 	.instantiate = encrypted_instantiate,
 	.update = encrypted_update,
-	.match = user_match,
 	.destroy = encrypted_destroy,
 	.describe = user_describe,
 	.read = encrypted_read,
diff --git a/security/keys/internal.h b/security/keys/internal.h
index b47cc532be1e..e66a16cb63e1 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -127,6 +127,8 @@ struct keyring_search_context {
 	struct timespec		now;
 };
 
+extern int key_default_cmp(const struct key *key,
+			   const struct key_match_data *match_data);
 extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 				    struct keyring_search_context *ctx);
 
diff --git a/security/keys/key.c b/security/keys/key.c
index b90a68c4e2c4..8c0092ca0443 100644
--- a/security/keys/key.c
+++ b/security/keys/key.c
@@ -799,7 +799,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref,
 	}
 
 	key_ref = ERR_PTR(-EINVAL);
-	if (!index_key.type->match || !index_key.type->instantiate ||
+	if (!index_key.type->instantiate ||
 	    (!index_key.description && !index_key.type->preparse))
 		goto error_put_type;
 
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 10f0a5f2d362..253c9a0eb092 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -89,7 +89,6 @@ struct key_type key_type_keyring = {
 	.preparse	= keyring_preparse,
 	.free_preparse	= keyring_free_preparse,
 	.instantiate	= keyring_instantiate,
-	.match		= user_match,
 	.revoke		= keyring_revoke,
 	.destroy	= keyring_destroy,
 	.describe	= keyring_describe,
@@ -511,6 +510,15 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid,
 }
 EXPORT_SYMBOL(keyring_alloc);
 
+/*
+ * By default, we keys found by getting an exact match on their descriptions.
+ */
+int key_default_cmp(const struct key *key,
+		    const struct key_match_data *match_data)
+{
+	return strcmp(key->description, match_data->raw_data) == 0;
+}
+
 /*
  * Iteration function to consider each key found.
  */
@@ -884,7 +892,7 @@ key_ref_t keyring_search(key_ref_t keyring,
 		.index_key.type		= type,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match_data.cmp		= type->match,
+		.match_data.cmp		= key_default_cmp,
 		.match_data.raw_data	= description,
 		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 		.flags			= KEYRING_SEARCH_DO_STATE_CHECK,
@@ -892,9 +900,6 @@ key_ref_t keyring_search(key_ref_t keyring,
 	key_ref_t key;
 	int ret;
 
-	if (!ctx.match_data.cmp)
-		return ERR_PTR(-ENOKEY);
-
 	if (type->match_preparse) {
 		ret = type->match_preparse(&ctx.match_data);
 		if (ret < 0)
diff --git a/security/keys/request_key.c b/security/keys/request_key.c
index 408523e5e2e2..dc6ed32b7844 100644
--- a/security/keys/request_key.c
+++ b/security/keys/request_key.c
@@ -531,7 +531,7 @@ struct key *request_key_and_link(struct key_type *type,
 		.index_key.type		= type,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match_data.cmp		= type->match,
+		.match_data.cmp		= key_default_cmp,
 		.match_data.raw_data	= description,
 		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	};
diff --git a/security/keys/request_key_auth.c b/security/keys/request_key_auth.c
index 9ae02819cc06..6639e2cb8853 100644
--- a/security/keys/request_key_auth.c
+++ b/security/keys/request_key_auth.c
@@ -246,7 +246,7 @@ struct key *key_get_instantiation_authkey(key_serial_t target_id)
 		.index_key.type		= &key_type_request_key_auth,
 		.index_key.description	= description,
 		.cred			= current_cred(),
-		.match_data.cmp		= user_match,
+		.match_data.cmp		= key_default_cmp,
 		.match_data.raw_data	= description,
 		.match_data.lookup_type	= KEYRING_SEARCH_LOOKUP_DIRECT,
 	};
diff --git a/security/keys/trusted.c b/security/keys/trusted.c
index 6b804aa4529a..c0594cb07ada 100644
--- a/security/keys/trusted.c
+++ b/security/keys/trusted.c
@@ -1096,7 +1096,6 @@ struct key_type key_type_trusted = {
 	.name = "trusted",
 	.instantiate = trusted_instantiate,
 	.update = trusted_update,
-	.match = user_match,
 	.destroy = trusted_destroy,
 	.describe = user_describe,
 	.read = trusted_read,
diff --git a/security/keys/user_defined.c b/security/keys/user_defined.c
index cd7e726e8646..36b47bbd3d8c 100644
--- a/security/keys/user_defined.c
+++ b/security/keys/user_defined.c
@@ -30,7 +30,6 @@ struct key_type key_type_user = {
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
 	.update			= user_update,
-	.match			= user_match,
 	.revoke			= user_revoke,
 	.destroy		= user_destroy,
 	.describe		= user_describe,
@@ -51,7 +50,6 @@ struct key_type key_type_logon = {
 	.free_preparse		= user_free_preparse,
 	.instantiate		= generic_key_instantiate,
 	.update			= user_update,
-	.match			= user_match,
 	.revoke			= user_revoke,
 	.destroy		= user_destroy,
 	.describe		= user_describe,
@@ -136,16 +134,6 @@ error:
 
 EXPORT_SYMBOL_GPL(user_update);
 
-/*
- * match users on their name
- */
-int user_match(const struct key *key, const struct key_match_data *match_data)
-{
-	return strcmp(key->description, match_data->raw_data) == 0;
-}
-
-EXPORT_SYMBOL_GPL(user_match);
-
 /*
  * dispose of the links from a revoked keyring
  * - called with the key sem write-locked
-- 
cgit v1.2.3


From 0c903ab64feb0fe83eac9f67a06e2f5b9508de16 Mon Sep 17 00:00:00 2001
From: David Howells <dhowells@redhat.com>
Date: Tue, 16 Sep 2014 17:36:08 +0100
Subject: KEYS: Make the key matching functions return bool

Make the key matching functions pointed to by key_match_data::cmp return bool
rather than int.

Signed-off-by: David Howells <dhowells@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
---
 crypto/asymmetric_keys/asymmetric_type.c |  4 ++--
 include/linux/key-type.h                 | 10 ++++++----
 net/dns_resolver/dns_key.c               |  4 ++--
 security/keys/internal.h                 |  8 ++++----
 security/keys/keyring.c                  |  4 ++--
 security/keys/process_keys.c             |  4 ++--
 6 files changed, 18 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/asymmetric_keys/asymmetric_type.c b/crypto/asymmetric_keys/asymmetric_type.c
index 7c0498968975..7755f918e8d9 100644
--- a/crypto/asymmetric_keys/asymmetric_type.c
+++ b/crypto/asymmetric_keys/asymmetric_type.c
@@ -59,8 +59,8 @@ EXPORT_SYMBOL_GPL(asymmetric_keyid_match);
  *	"id:<id>"	- request a key matching the ID
  *	"<subtype>:<id>" - request a key of a subtype
  */
-static int asymmetric_key_cmp(const struct key *key,
-			      const struct key_match_data *match_data)
+static bool asymmetric_key_cmp(const struct key *key,
+			       const struct key_match_data *match_data)
 {
 	const struct asymmetric_key_subtype *subtype = asymmetric_key_subtype(key);
 	const char *description = match_data->raw_data;
diff --git a/include/linux/key-type.h b/include/linux/key-type.h
index c14816bd3b44..ff9f1d394235 100644
--- a/include/linux/key-type.h
+++ b/include/linux/key-type.h
@@ -56,10 +56,12 @@ typedef int (*request_key_actor_t)(struct key_construction *key,
  * Preparsed matching criterion.
  */
 struct key_match_data {
-	/* Comparison function, defaults to type->match, but can be replaced by
-	 * type->match_preparse(). */
-	int (*cmp)(const struct key *key,
-		   const struct key_match_data *match_data);
+	/* Comparison function, defaults to exact description match, but can be
+	 * overridden by type->match_preparse().  Should return true if a match
+	 * is found and false if not.
+	 */
+	bool (*cmp)(const struct key *key,
+		    const struct key_match_data *match_data);
 
 	const void	*raw_data;	/* Raw match data */
 	void		*preparsed;	/* For ->match_preparse() to stash stuff */
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index a07b9ba7e0b7..31cd4fd75486 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -176,8 +176,8 @@ static void dns_resolver_free_preparse(struct key_preparsed_payload *prep)
  * The domain name may be a simple name or an absolute domain name (which
  * should end with a period).  The domain name is case-independent.
  */
-static int dns_resolver_cmp(const struct key *key,
-			    const struct key_match_data *match_data)
+static bool dns_resolver_cmp(const struct key *key,
+			     const struct key_match_data *match_data)
 {
 	int slen, dlen, ret = 0;
 	const char *src = key->description, *dsp = match_data->raw_data;
diff --git a/security/keys/internal.h b/security/keys/internal.h
index e66a16cb63e1..b8960c4959a5 100644
--- a/security/keys/internal.h
+++ b/security/keys/internal.h
@@ -127,8 +127,8 @@ struct keyring_search_context {
 	struct timespec		now;
 };
 
-extern int key_default_cmp(const struct key *key,
-			   const struct key_match_data *match_data);
+extern bool key_default_cmp(const struct key *key,
+			    const struct key_match_data *match_data);
 extern key_ref_t keyring_search_aux(key_ref_t keyring_ref,
 				    struct keyring_search_context *ctx);
 
@@ -150,8 +150,8 @@ extern struct key *request_key_and_link(struct key_type *type,
 					struct key *dest_keyring,
 					unsigned long flags);
 
-extern int lookup_user_key_possessed(const struct key *key,
-				     const struct key_match_data *match_data);
+extern bool lookup_user_key_possessed(const struct key *key,
+				      const struct key_match_data *match_data);
 extern key_ref_t lookup_user_key(key_serial_t id, unsigned long flags,
 				 key_perm_t perm);
 #define KEY_LOOKUP_CREATE	0x01
diff --git a/security/keys/keyring.c b/security/keys/keyring.c
index 253c9a0eb092..8177010174f7 100644
--- a/security/keys/keyring.c
+++ b/security/keys/keyring.c
@@ -513,8 +513,8 @@ EXPORT_SYMBOL(keyring_alloc);
 /*
  * By default, we keys found by getting an exact match on their descriptions.
  */
-int key_default_cmp(const struct key *key,
-		    const struct key_match_data *match_data)
+bool key_default_cmp(const struct key *key,
+		     const struct key_match_data *match_data)
 {
 	return strcmp(key->description, match_data->raw_data) == 0;
 }
diff --git a/security/keys/process_keys.c b/security/keys/process_keys.c
index 08bd533d014f..bd536cb221e2 100644
--- a/security/keys/process_keys.c
+++ b/security/keys/process_keys.c
@@ -489,8 +489,8 @@ found:
 /*
  * See if the key we're looking at is the target key.
  */
-int lookup_user_key_possessed(const struct key *key,
-			      const struct key_match_data *match_data)
+bool lookup_user_key_possessed(const struct key *key,
+			       const struct key_match_data *match_data)
 {
 	return key == match_data->raw_data;
 }
-- 
cgit v1.2.3


From f4579fc57cf4244057b713b1f73f4dc9f0b11e97 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 25 Jul 2014 11:21:47 -0700
Subject: rcu: Fix attempt to avoid unsolicited offloading of callbacks

Commit b58cc46c5f6b (rcu: Don't offload callbacks unless specifically
requested) failed to adjust the callback lists of the CPUs that are
known to be no-CBs CPUs only because they are also nohz_full= CPUs.
This failure can result in callbacks that are posted during early boot
getting stranded on nxtlist for CPUs whose no-CBs property becomes
apparent late, and there can also be spurious warnings about offline
CPUs posting callbacks.

This commit fixes these problems by adding an early-boot rcu_init_nohz()
that properly initializes the no-CBs CPUs.

Note that kernels built with CONFIG_RCU_NOCB_CPU_ALL=y or with
CONFIG_RCU_NOCB_CPU=n do not exhibit this bug.  Neither do kernels
booted without the nohz_full= boot parameter.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
Tested-by: Paul Gortmaker <paul.gortmaker@windriver.com>
---
 include/linux/rcupdate.h |  8 +++++
 init/Kconfig             |  4 +--
 init/main.c              |  1 +
 kernel/rcu/tree_plugin.h | 92 ++++++++++++++++++++++++++++++++----------------
 4 files changed, 72 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index d231aa17b1d7..cc7bed1c90dc 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -269,6 +269,14 @@ static inline void rcu_user_hooks_switch(struct task_struct *prev,
 					 struct task_struct *next) { }
 #endif /* CONFIG_RCU_USER_QS */
 
+#ifdef CONFIG_RCU_NOCB_CPU
+void rcu_init_nohz(void);
+#else /* #ifdef CONFIG_RCU_NOCB_CPU */
+static inline void rcu_init_nohz(void)
+{
+}
+#endif /* #else #ifdef CONFIG_RCU_NOCB_CPU */
+
 /**
  * RCU_NONIDLE - Indicate idle-loop code that needs RCU readers
  * @a: Code that RCU needs to pay attention to.
diff --git a/init/Kconfig b/init/Kconfig
index e84c6423a2e5..64ee4d967786 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -737,7 +737,7 @@ choice
 
 config RCU_NOCB_CPU_NONE
 	bool "No build_forced no-CBs CPUs"
-	depends on RCU_NOCB_CPU && !NO_HZ_FULL_ALL
+	depends on RCU_NOCB_CPU
 	help
 	  This option does not force any of the CPUs to be no-CBs CPUs.
 	  Only CPUs designated by the rcu_nocbs= boot parameter will be
@@ -751,7 +751,7 @@ config RCU_NOCB_CPU_NONE
 
 config RCU_NOCB_CPU_ZERO
 	bool "CPU 0 is a build_forced no-CBs CPU"
-	depends on RCU_NOCB_CPU && !NO_HZ_FULL_ALL
+	depends on RCU_NOCB_CPU
 	help
 	  This option forces CPU 0 to be a no-CBs CPU, so that its RCU
 	  callbacks are invoked by a per-CPU kthread whose name begins
diff --git a/init/main.c b/init/main.c
index bb1aed928f21..e3c4cdd94d5b 100644
--- a/init/main.c
+++ b/init/main.c
@@ -578,6 +578,7 @@ asmlinkage __visible void __init start_kernel(void)
 	idr_init_cache();
 	rcu_init();
 	tick_nohz_init();
+	rcu_init_nohz();
 	context_tracking_init();
 	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index a7997e272564..06d077ccf8d5 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -85,33 +85,6 @@ static void __init rcu_bootup_announce_oddness(void)
 		pr_info("\tBoot-time adjustment of leaf fanout to %d.\n", rcu_fanout_leaf);
 	if (nr_cpu_ids != NR_CPUS)
 		pr_info("\tRCU restricting CPUs from NR_CPUS=%d to nr_cpu_ids=%d.\n", NR_CPUS, nr_cpu_ids);
-#ifdef CONFIG_RCU_NOCB_CPU
-#ifndef CONFIG_RCU_NOCB_CPU_NONE
-	if (!have_rcu_nocb_mask) {
-		zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
-		have_rcu_nocb_mask = true;
-	}
-#ifdef CONFIG_RCU_NOCB_CPU_ZERO
-	pr_info("\tOffload RCU callbacks from CPU 0\n");
-	cpumask_set_cpu(0, rcu_nocb_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
-#ifdef CONFIG_RCU_NOCB_CPU_ALL
-	pr_info("\tOffload RCU callbacks from all CPUs\n");
-	cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
-#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
-	if (have_rcu_nocb_mask) {
-		if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
-			pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
-			cpumask_and(rcu_nocb_mask, cpu_possible_mask,
-				    rcu_nocb_mask);
-		}
-		cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
-		pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
-		if (rcu_nocb_poll)
-			pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
-	}
-#endif /* #ifdef CONFIG_RCU_NOCB_CPU */
 }
 
 #ifdef CONFIG_TREE_PREEMPT_RCU
@@ -2451,6 +2424,67 @@ static void do_nocb_deferred_wakeup(struct rcu_data *rdp)
 	trace_rcu_nocb_wake(rdp->rsp->name, rdp->cpu, TPS("DeferredWakeEmpty"));
 }
 
+void __init rcu_init_nohz(void)
+{
+	int cpu;
+	bool need_rcu_nocb_mask = true;
+	struct rcu_state *rsp;
+
+#ifdef CONFIG_RCU_NOCB_CPU_NONE
+	need_rcu_nocb_mask = false;
+#endif /* #ifndef CONFIG_RCU_NOCB_CPU_NONE */
+
+#if defined(CONFIG_NO_HZ_FULL)
+	if (tick_nohz_full_running && cpumask_weight(tick_nohz_full_mask))
+		need_rcu_nocb_mask = true;
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+
+	if (!have_rcu_nocb_mask && need_rcu_nocb_mask) {
+		zalloc_cpumask_var(&rcu_nocb_mask, GFP_KERNEL);
+		have_rcu_nocb_mask = true;
+	}
+	if (!have_rcu_nocb_mask)
+		return;
+
+#ifdef CONFIG_RCU_NOCB_CPU_ZERO
+	pr_info("\tOffload RCU callbacks from CPU 0\n");
+	cpumask_set_cpu(0, rcu_nocb_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ZERO */
+#ifdef CONFIG_RCU_NOCB_CPU_ALL
+	pr_info("\tOffload RCU callbacks from all CPUs\n");
+	cpumask_copy(rcu_nocb_mask, cpu_possible_mask);
+#endif /* #ifdef CONFIG_RCU_NOCB_CPU_ALL */
+#if defined(CONFIG_NO_HZ_FULL)
+	if (tick_nohz_full_running)
+		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
+#endif /* #if defined(CONFIG_NO_HZ_FULL) */
+
+	if (!cpumask_subset(rcu_nocb_mask, cpu_possible_mask)) {
+		pr_info("\tNote: kernel parameter 'rcu_nocbs=' contains nonexistent CPUs.\n");
+		cpumask_and(rcu_nocb_mask, cpu_possible_mask,
+			    rcu_nocb_mask);
+	}
+	cpulist_scnprintf(nocb_buf, sizeof(nocb_buf), rcu_nocb_mask);
+	pr_info("\tOffload RCU callbacks from CPUs: %s.\n", nocb_buf);
+	if (rcu_nocb_poll)
+		pr_info("\tPoll for callbacks from no-CBs CPUs.\n");
+
+	for_each_rcu_flavor(rsp) {
+		for_each_cpu(cpu, rcu_nocb_mask) {
+			struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
+
+			/*
+			 * If there are early callbacks, they will need
+			 * to be moved to the nocb lists.
+			 */
+			WARN_ON_ONCE(rdp->nxttail[RCU_NEXT_TAIL] !=
+				     &rdp->nxtlist &&
+				     rdp->nxttail[RCU_NEXT_TAIL] != NULL);
+			init_nocb_callback_list(rdp);
+		}
+	}
+}
+
 /* Initialize per-rcu_data variables for no-CBs CPUs. */
 static void __init rcu_boot_init_nocb_percpu_data(struct rcu_data *rdp)
 {
@@ -2479,10 +2513,6 @@ static void __init rcu_spawn_nocb_kthreads(struct rcu_state *rsp)
 
 	if (rcu_nocb_mask == NULL)
 		return;
-#if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL)
-	if (tick_nohz_full_running)
-		cpumask_or(rcu_nocb_mask, rcu_nocb_mask, tick_nohz_full_mask);
-#endif /* #if defined(CONFIG_NO_HZ_FULL) && !defined(CONFIG_NO_HZ_FULL_ALL) */
 	if (ls == -1) {
 		ls = int_sqrt(nr_cpu_ids);
 		rcu_nocb_leader_stride = ls;
-- 
cgit v1.2.3


From d36a7a0d5e8b5bff1671723d733eb61621b0cee4 Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Thu, 11 Sep 2014 20:40:21 -0700
Subject: torture: Address race in module cleanup

When performing module cleanups by calling torture_cleanup() the
'torture_type' string in nullified However, callers are not necessarily
done, and might still need to reference the variable. This impacts
both rcutorture and locktorture, causing printing things like:

[   94.226618] (null)-torture: Stopping lock_torture_writer task
[   94.226624] (null)-torture: Stopping lock_torture_stats task

Thus delay this operation until the very end of the cleanup process.
The consequence (which shouldn't matter for this kid of program) is,
of course, that we delay the window between rmmod and modprobing,
for instance in module_torture_begin().

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/torture.h      |  3 ++-
 kernel/locking/locktorture.c |  3 ++-
 kernel/rcu/rcutorture.c      |  3 ++-
 kernel/torture.c             | 16 +++++++++++++---
 4 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/torture.h b/include/linux/torture.h
index fec46f8c08eb..7759fc3c622d 100644
--- a/include/linux/torture.h
+++ b/include/linux/torture.h
@@ -77,7 +77,8 @@ int torture_stutter_init(int s);
 /* Initialization and cleanup. */
 bool torture_init_begin(char *ttype, bool v, int *runnable);
 void torture_init_end(void);
-bool torture_cleanup(void);
+bool torture_cleanup_begin(void);
+void torture_cleanup_end(void);
 bool torture_must_stop(void);
 bool torture_must_stop_irq(void);
 void torture_kthread_stopping(char *title);
diff --git a/kernel/locking/locktorture.c b/kernel/locking/locktorture.c
index de703a769c1d..988267cc92c1 100644
--- a/kernel/locking/locktorture.c
+++ b/kernel/locking/locktorture.c
@@ -361,7 +361,7 @@ static void lock_torture_cleanup(void)
 {
 	int i;
 
-	if (torture_cleanup())
+	if (torture_cleanup_begin())
 		return;
 
 	if (writer_tasks) {
@@ -384,6 +384,7 @@ static void lock_torture_cleanup(void)
 	else
 		lock_torture_print_module_parms(cur_ops,
 						"End of test: SUCCESS");
+	torture_cleanup_end();
 }
 
 static int __init lock_torture_init(void)
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 6d1509500d2b..04c4b5afb759 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -1514,7 +1514,7 @@ rcu_torture_cleanup(void)
 	int i;
 
 	rcutorture_record_test_transition();
-	if (torture_cleanup()) {
+	if (torture_cleanup_begin()) {
 		if (cur_ops->cb_barrier != NULL)
 			cur_ops->cb_barrier();
 		return;
@@ -1566,6 +1566,7 @@ rcu_torture_cleanup(void)
 					       "End of test: RCU_HOTPLUG");
 	else
 		rcu_torture_print_module_parms(cur_ops, "End of test: SUCCESS");
+	torture_cleanup_end();
 }
 
 #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD
diff --git a/kernel/torture.c b/kernel/torture.c
index ede8b25ec1ae..dd70993c266c 100644
--- a/kernel/torture.c
+++ b/kernel/torture.c
@@ -633,8 +633,13 @@ EXPORT_SYMBOL_GPL(torture_init_end);
  *
  * This must be called before the caller starts shutting down its own
  * kthreads.
+ *
+ * Both torture_cleanup_begin() and torture_cleanup_end() must be paired,
+ * in order to correctly perform the cleanup. They are separated because
+ * threads can still need to reference the torture_type type, thus nullify
+ * only after completing all other relevant calls.
  */
-bool torture_cleanup(void)
+bool torture_cleanup_begin(void)
 {
 	mutex_lock(&fullstop_mutex);
 	if (ACCESS_ONCE(fullstop) == FULLSTOP_SHUTDOWN) {
@@ -649,12 +654,17 @@ bool torture_cleanup(void)
 	torture_shuffle_cleanup();
 	torture_stutter_cleanup();
 	torture_onoff_cleanup();
+	return false;
+}
+EXPORT_SYMBOL_GPL(torture_cleanup_begin);
+
+void torture_cleanup_end(void)
+{
 	mutex_lock(&fullstop_mutex);
 	torture_type = NULL;
 	mutex_unlock(&fullstop_mutex);
-	return false;
 }
-EXPORT_SYMBOL_GPL(torture_cleanup);
+EXPORT_SYMBOL_GPL(torture_cleanup_end);
 
 /*
  * Is it time for the current torture test to stop?
-- 
cgit v1.2.3


From 59da22a02032cf1a069ec431f93d403b321ff6b4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Fri, 12 Sep 2014 10:36:15 -0700
Subject: rcutorture: Rename rcutorture_runnable parameter

This commit changes rcutorture_runnable to torture_runnable, which is
consistent with the names of the other parameters and is a bit shorter
as well.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/kernel-parameters.txt                             | 2 +-
 include/linux/rcupdate.h                                        | 3 ---
 kernel/rcu/rcutorture.c                                         | 8 ++++----
 kernel/sysctl.c                                                 | 9 ---------
 tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh | 2 +-
 5 files changed, 6 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index e1147bc62633..7aba744afcde 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2938,7 +2938,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Set time (s) between CPU-hotplug operations, or
 			zero to disable CPU-hotplug testing.
 
-	rcutorture.rcutorture_runnable= [BOOT]
+	rcutorture.torture_runnable= [BOOT]
 			Start rcutorture running at boot time.
 
 	rcutorture.shuffle_interval= [KNL]
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 5cafd60c1ee4..a4a819ffb2d1 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -47,9 +47,6 @@
 #include <asm/barrier.h>
 
 extern int rcu_expedited; /* for sysctl */
-#ifdef CONFIG_RCU_TORTURE_TEST
-extern int rcutorture_runnable; /* for sysctl */
-#endif /* #ifdef CONFIG_RCU_TORTURE_TEST */
 
 enum rcutorture_type {
 	RCU_FLAVOR,
diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c
index 04c4b5afb759..240fa9094f83 100644
--- a/kernel/rcu/rcutorture.c
+++ b/kernel/rcu/rcutorture.c
@@ -168,9 +168,9 @@ static int rcu_torture_writer_state;
 #else
 #define RCUTORTURE_RUNNABLE_INIT 0
 #endif
-int rcutorture_runnable = RCUTORTURE_RUNNABLE_INIT;
-module_param(rcutorture_runnable, int, 0444);
-MODULE_PARM_DESC(rcutorture_runnable, "Start rcutorture at boot");
+static int torture_runnable = RCUTORTURE_RUNNABLE_INIT;
+module_param(torture_runnable, int, 0444);
+MODULE_PARM_DESC(torture_runnable, "Start rcutorture at boot");
 
 #if defined(CONFIG_RCU_BOOST) && !defined(CONFIG_HOTPLUG_CPU)
 #define rcu_can_boost() 1
@@ -1636,7 +1636,7 @@ rcu_torture_init(void)
 		RCUTORTURE_TASKS_OPS
 	};
 
-	if (!torture_init_begin(torture_type, verbose, &rcutorture_runnable))
+	if (!torture_init_begin(torture_type, verbose, &torture_runnable))
 		return -EBUSY;
 
 	/* Process args and tell the world that the torturer is on the job. */
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75875a741b5e..ab456664609d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1055,15 +1055,6 @@ static struct ctl_table kern_table[] = {
 		.child		= key_sysctls,
 	},
 #endif
-#ifdef CONFIG_RCU_TORTURE_TEST
-	{
-		.procname       = "rcutorture_runnable",
-		.data           = &rcutorture_runnable,
-		.maxlen         = sizeof(int),
-		.mode           = 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
 #ifdef CONFIG_PERF_EVENTS
 	/*
 	 * User-space scripts rely on the existence of this file
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
index 8977d8d31b19..ffb85ed786fa 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
+++ b/tools/testing/selftests/rcutorture/configs/rcu/ver_functions.sh
@@ -51,7 +51,7 @@ per_version_boot_params () {
 		`rcutorture_param_n_barrier_cbs "$1"` \
 		rcutorture.stat_interval=15 \
 		rcutorture.shutdown_secs=$3 \
-		rcutorture.rcutorture_runnable=1 \
+		rcutorture.torture_runnable=1 \
 		rcutorture.test_no_idle_hz=1 \
 		rcutorture.verbose=1
 }
-- 
cgit v1.2.3


From eaacabc0d9b637c82788c66955b4ba0efebd5500 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Mon, 15 Sep 2014 16:15:01 -0500
Subject: irqchip: add irq-omap-intc.h header

OMAP INTC irqchip driver will be moved under
drivers/irqchip/ soon but we still have a dependency
with mach-omap2 when it comes to idle functions.

In order to make it easy to share those function
prototypes with OMAP PM code, we introduce this new
header.

To avoid modifying several board-files and some of
the PM-related code, we just include the new header
from common.h which was already included by all
users of IRQ-related PM code.

Signed-off-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/common.h          | 10 +---------
 include/linux/irqchip/irq-omap-intc.h | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+), 9 deletions(-)
 create mode 100644 include/linux/irqchip/irq-omap-intc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/common.h b/arch/arm/mach-omap2/common.h
index 180009343adb..377eea849e7b 100644
--- a/arch/arm/mach-omap2/common.h
+++ b/arch/arm/mach-omap2/common.h
@@ -32,6 +32,7 @@
 #include <linux/i2c/twl.h>
 #include <linux/i2c-omap.h>
 #include <linux/reboot.h>
+#include <linux/irqchip/irq-omap-intc.h>
 
 #include <asm/proc-fns.h>
 
@@ -210,15 +211,6 @@ extern struct device *omap2_get_iva_device(void);
 extern struct device *omap2_get_l3_device(void);
 extern struct device *omap4_get_dsp_device(void);
 
-void omap2_init_irq(void);
-void omap3_init_irq(void);
-void ti81xx_init_irq(void);
-extern int omap_irq_pending(void);
-void omap_intc_save_context(void);
-void omap_intc_restore_context(void);
-void omap3_intc_suspend(void);
-void omap3_intc_prepare_idle(void);
-void omap3_intc_resume_idle(void);
 void omap_gic_of_init(void);
 
 #ifdef CONFIG_CACHE_L2X0
diff --git a/include/linux/irqchip/irq-omap-intc.h b/include/linux/irqchip/irq-omap-intc.h
new file mode 100644
index 000000000000..e06b370cfc0d
--- /dev/null
+++ b/include/linux/irqchip/irq-omap-intc.h
@@ -0,0 +1,32 @@
+/**
+ * irq-omap-intc.h - INTC Idle Functions
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ *
+ * Author: Felipe Balbi <balbi@ti.com>
+ *
+ * This program is free software: you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2  of
+ * the License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __INCLUDE_LINUX_IRQCHIP_IRQ_OMAP_INTC_H
+#define __INCLUDE_LINUX_IRQCHIP_IRQ_OMAP_INTC_H
+
+void omap2_init_irq(void);
+void omap3_init_irq(void);
+void ti81xx_init_irq(void);
+
+int omap_irq_pending(void);
+void omap_intc_save_context(void);
+void omap_intc_restore_context(void);
+void omap3_intc_suspend(void);
+void omap3_intc_prepare_idle(void);
+void omap3_intc_resume_idle(void);
+
+#endif /* __INCLUDE_LINUX_IRQCHIP_IRQ_OMAP_INTC_H */
-- 
cgit v1.2.3


From ce0529843a505d09f5809a7db6288d2f038f64c4 Mon Sep 17 00:00:00 2001
From: Ethan Zhao <ethan.zhao@oracle.com>
Date: Tue, 9 Sep 2014 10:21:25 +0800
Subject: PCI: Add device flag helper functions

Add helper functions to hide direct device flag operations:

    void pci_set_dev_assigned(struct pci_dev *dev);
    void pci_clear_dev_assigned(struct pci_dev *dev);
    bool pci_is_dev_assigned(struct pci_dev *dev);

Signed-off-by: Ethan Zhao <ethan.zhao@oracle.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61978a460841..92c131efec1c 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1839,4 +1839,17 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
  */
 struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
 
+/* helper functions for operation of device flag */
+static inline void pci_set_dev_assigned(struct pci_dev *pdev)
+{
+	pdev->dev_flags |= PCI_DEV_FLAGS_ASSIGNED;
+}
+static inline void pci_clear_dev_assigned(struct pci_dev *pdev)
+{
+	pdev->dev_flags &= ~PCI_DEV_FLAGS_ASSIGNED;
+}
+static inline bool pci_is_dev_assigned(struct pci_dev *pdev)
+{
+	return (pdev->dev_flags & PCI_DEV_FLAGS_ASSIGNED) == PCI_DEV_FLAGS_ASSIGNED;
+}
 #endif /* LINUX_PCI_H */
-- 
cgit v1.2.3


From e0d1b6b77ced59d852d38fcf9a8a0a1c40c84cee Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Tue, 5 Aug 2014 14:08:55 +0200
Subject: PCI/AER: Make <linux/aer.h> standalone includable

The header file references u16 and u32 types, but they are not defined in
the header nor does the header pull in the necessary includes for them.
This causes build breakage when the file is included without any of the
dependencies being satisfied from somewhere else.

Fix this by including linux/types.h (for u16 and u32).

[bhelgaas: removed pci_dev declaration (already added by 5ccb8225abf2)]
Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/aer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/aer.h b/include/linux/aer.h
index c826d1c28f9c..4fef65e57023 100644
--- a/include/linux/aer.h
+++ b/include/linux/aer.h
@@ -7,6 +7,8 @@
 #ifndef _AER_H_
 #define _AER_H_
 
+#include <linux/types.h>
+
 #define AER_NONFATAL			0
 #define AER_FATAL			1
 #define AER_CORRECTABLE			2
-- 
cgit v1.2.3


From 63ddc0b8fe5ebbac88e2ac84b489470bf3a22965 Mon Sep 17 00:00:00 2001
From: Megan Kamiya <megan.a.kamiya@intel.com>
Date: Fri, 5 Sep 2014 20:19:10 -0700
Subject: PCI: Parenthesize PCI_DEVID and PCI_VPD_LRDT_ID parameters

Add parentheses around parameters in PCI_DEVID and PCI_VPD_LRDT_ID macros
to prevent possible expansion errors as described by the CERT Secure Coding
Standard: PRE01-C: Use parentheses within macros around parameter names

Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Megan Kamiya <megan.a.kamiya@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61978a460841..cb744f3925a4 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -45,7 +45,7 @@
  * In the interest of not exposing interfaces to user-space unnecessarily,
  * the following kernel-only defines are being added here.
  */
-#define PCI_DEVID(bus, devfn)  ((((u16)bus) << 8) | devfn)
+#define PCI_DEVID(bus, devfn)  ((((u16)(bus)) << 8) | (devfn))
 /* return bus from PCI devid = ((u16)bus_number) << 8) | devfn */
 #define PCI_BUS_NUM(x) (((x) >> 8) & 0xff)
 
@@ -1701,7 +1701,7 @@ bool pci_acs_path_enabled(struct pci_dev *start,
 			  struct pci_dev *end, u16 acs_flags);
 
 #define PCI_VPD_LRDT			0x80	/* Large Resource Data Type */
-#define PCI_VPD_LRDT_ID(x)		(x | PCI_VPD_LRDT)
+#define PCI_VPD_LRDT_ID(x)		((x) | PCI_VPD_LRDT)
 
 /* Large Resource Data Type Tag Item Names */
 #define PCI_VPD_LTIN_ID_STRING		0x02	/* Identifier String */
-- 
cgit v1.2.3


From fef775caa705255358cdf7bbaf9bbc2fd1111761 Mon Sep 17 00:00:00 2001
From: Ezequiel García <ezequiel@vanguardiasur.com.ar>
Date: Thu, 11 Sep 2014 12:02:08 -0300
Subject: nand: omap2: Add support for flash-based bad block table

This commit adds a new platform-data boolean property that enables use
of a flash-based bad block table. This can also be enabled by setting
the 'nand-on-flash-bbt' devicetree property.

If the flash BBT is not enabled, the driver falls back to use OOB
bad block markers only, as before. If the flash BBT is enabled the
kernel will keep track of bad blocks using a BBT, in addition to
the OOB markers.

As explained by Brian Norris the reasons for using a BBT are:

""
The primary reason would be that NAND datasheets specify it these days.
A better argument is that nobody guarantees that you can write a
bad block marker to a worn out block; you may just get program failures.

This has been acknowledged by several developers over the last several
years.

Additionally, you get a boot-time performance improvement if you only
have to read a few pages, instead of a page or two from every block on
the flash.
""

Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 arch/arm/mach-omap2/gpmc.c                   | 2 ++
 drivers/mtd/nand/omap2.c                     | 6 +++++-
 include/linux/platform_data/mtd-nand-omap2.h | 1 +
 3 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 2f97228f188a..b55a225387cd 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -1440,6 +1440,8 @@ static int gpmc_probe_nand_child(struct platform_device *pdev,
 				break;
 			}
 
+	gpmc_nand_data->flash_bbt = of_get_nand_on_flash_bbt(child);
+
 	val = of_get_nand_bus_width(child);
 	if (val == 16)
 		gpmc_nand_data->devsize = NAND_BUSWIDTH_16;
diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index 5967b385141b..e1a9b310c159 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -1663,7 +1663,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 	mtd->owner		= THIS_MODULE;
 	nand_chip		= &info->nand;
 	nand_chip->ecc.priv	= NULL;
-	nand_chip->options	|= NAND_SKIP_BBTSCAN;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	nand_chip->IO_ADDR_R = devm_ioremap_resource(&pdev->dev, res);
@@ -1692,6 +1691,11 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->chip_delay = 50;
 	}
 
+	if (pdata->flash_bbt)
+		nand_chip->bbt_options |= NAND_BBT_USE_FLASH | NAND_BBT_NO_OOB;
+	else
+		nand_chip->options |= NAND_SKIP_BBTSCAN;
+
 	/* scan NAND device connected to chip controller */
 	nand_chip->options |= pdata->devsize & NAND_BUSWIDTH_16;
 	if (nand_scan_ident(mtd, 1, NULL)) {
diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h
index 16ec262dfcc8..090bbab0130a 100644
--- a/include/linux/platform_data/mtd-nand-omap2.h
+++ b/include/linux/platform_data/mtd-nand-omap2.h
@@ -71,6 +71,7 @@ struct omap_nand_platform_data {
 	struct mtd_partition	*parts;
 	int			nr_parts;
 	bool			dev_ready;
+	bool			flash_bbt;
 	enum nand_io		xfer_type;
 	int			devsize;
 	enum omap_ecc           ecc_opt;
-- 
cgit v1.2.3


From d60eacb07053142bfb9b41582074a89a790a9d46 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:33 +0100
Subject: KVM: device: add simple registration mechanism for kvm_device_ops

kvm_ioctl_create_device currently has knowledge of all the device types
and their associated ops. This is fairly inflexible when adding support
for new in-kernel device emulations, so move what we currently have out
into a table, which can support dynamic registration of ops by new
drivers for virtual hardware.

Cc: Alex Williamson <Alex.Williamson@redhat.com>
Cc: Alex Graf <agraf@suse.de>
Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 include/uapi/linux/kvm.h | 22 +++++++++++-----
 virt/kvm/kvm_main.c      | 65 ++++++++++++++++++++++++++++--------------------
 3 files changed, 55 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index e098dce179df..b6e954742951 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1064,6 +1064,7 @@ struct kvm_device_ops {
 void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h
index 0695a1e3e332..60768822b140 100644
--- a/include/uapi/linux/kvm.h
+++ b/include/uapi/linux/kvm.h
@@ -943,15 +943,25 @@ struct kvm_device_attr {
 	__u64	addr;		/* userspace address of attr data */
 };
 
-#define KVM_DEV_TYPE_FSL_MPIC_20	1
-#define KVM_DEV_TYPE_FSL_MPIC_42	2
-#define KVM_DEV_TYPE_XICS		3
-#define KVM_DEV_TYPE_VFIO		4
 #define  KVM_DEV_VFIO_GROUP			1
 #define   KVM_DEV_VFIO_GROUP_ADD			1
 #define   KVM_DEV_VFIO_GROUP_DEL			2
-#define KVM_DEV_TYPE_ARM_VGIC_V2	5
-#define KVM_DEV_TYPE_FLIC		6
+
+enum kvm_device_type {
+	KVM_DEV_TYPE_FSL_MPIC_20	= 1,
+#define KVM_DEV_TYPE_FSL_MPIC_20	KVM_DEV_TYPE_FSL_MPIC_20
+	KVM_DEV_TYPE_FSL_MPIC_42,
+#define KVM_DEV_TYPE_FSL_MPIC_42	KVM_DEV_TYPE_FSL_MPIC_42
+	KVM_DEV_TYPE_XICS,
+#define KVM_DEV_TYPE_XICS		KVM_DEV_TYPE_XICS
+	KVM_DEV_TYPE_VFIO,
+#define KVM_DEV_TYPE_VFIO		KVM_DEV_TYPE_VFIO
+	KVM_DEV_TYPE_ARM_VGIC_V2,
+#define KVM_DEV_TYPE_ARM_VGIC_V2	KVM_DEV_TYPE_ARM_VGIC_V2
+	KVM_DEV_TYPE_FLIC,
+#define KVM_DEV_TYPE_FLIC		KVM_DEV_TYPE_FLIC
+	KVM_DEV_TYPE_MAX,
+};
 
 /*
  * ioctls for VM fds
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index c338599804e0..686d783387a0 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2272,44 +2272,55 @@ struct kvm_device *kvm_device_from_filp(struct file *filp)
 	return filp->private_data;
 }
 
-static int kvm_ioctl_create_device(struct kvm *kvm,
-				   struct kvm_create_device *cd)
-{
-	struct kvm_device_ops *ops = NULL;
-	struct kvm_device *dev;
-	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
-	int ret;
-
-	switch (cd->type) {
+static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_MPIC
-	case KVM_DEV_TYPE_FSL_MPIC_20:
-	case KVM_DEV_TYPE_FSL_MPIC_42:
-		ops = &kvm_mpic_ops;
-		break;
+	[KVM_DEV_TYPE_FSL_MPIC_20]	= &kvm_mpic_ops,
+	[KVM_DEV_TYPE_FSL_MPIC_42]	= &kvm_mpic_ops,
 #endif
+
 #ifdef CONFIG_KVM_XICS
-	case KVM_DEV_TYPE_XICS:
-		ops = &kvm_xics_ops;
-		break;
+	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
+
 #ifdef CONFIG_KVM_VFIO
-	case KVM_DEV_TYPE_VFIO:
-		ops = &kvm_vfio_ops;
-		break;
+	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
+
 #ifdef CONFIG_KVM_ARM_VGIC
-	case KVM_DEV_TYPE_ARM_VGIC_V2:
-		ops = &kvm_arm_vgic_v2_ops;
-		break;
+	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
 #endif
+
 #ifdef CONFIG_S390
-	case KVM_DEV_TYPE_FLIC:
-		ops = &kvm_flic_ops;
-		break;
+	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
 #endif
-	default:
+};
+
+int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
+{
+	if (type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENOSPC;
+
+	if (kvm_device_ops_table[type] != NULL)
+		return -EEXIST;
+
+	kvm_device_ops_table[type] = ops;
+	return 0;
+}
+
+static int kvm_ioctl_create_device(struct kvm *kvm,
+				   struct kvm_create_device *cd)
+{
+	struct kvm_device_ops *ops = NULL;
+	struct kvm_device *dev;
+	bool test = cd->flags & KVM_CREATE_DEVICE_TEST;
+	int ret;
+
+	if (cd->type >= ARRAY_SIZE(kvm_device_ops_table))
+		return -ENODEV;
+
+	ops = kvm_device_ops_table[cd->type];
+	if (ops == NULL)
 		return -ENODEV;
-	}
 
 	if (test)
 		return 0;
-- 
cgit v1.2.3


From c06a841bf36340e9e917ce60d11a6425ac85d0bd Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:34 +0100
Subject: KVM: ARM: vgic: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the ARM VGIC, instead of relying on the static table.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Marc Zyngier <marc.zyngier@arm.com>
Reviewed-by: Christoffer Dall <christoffer.dall@linaro.org>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |   1 -
 virt/kvm/arm/vgic.c      | 157 ++++++++++++++++++++++++-----------------------
 virt/kvm/kvm_main.c      |   4 --
 3 files changed, 79 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index b6e954742951..601d321f96e7 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_arm_vgic_v2_ops;
 extern struct kvm_device_ops kvm_flic_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
diff --git a/virt/kvm/arm/vgic.c b/virt/kvm/arm/vgic.c
index 73eba793b17f..3ee3ce06bbec 100644
--- a/virt/kvm/arm/vgic.c
+++ b/virt/kvm/arm/vgic.c
@@ -1522,83 +1522,6 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu)
 	return 0;
 }
 
-static void vgic_init_maintenance_interrupt(void *info)
-{
-	enable_percpu_irq(vgic->maint_irq, 0);
-}
-
-static int vgic_cpu_notify(struct notifier_block *self,
-			   unsigned long action, void *cpu)
-{
-	switch (action) {
-	case CPU_STARTING:
-	case CPU_STARTING_FROZEN:
-		vgic_init_maintenance_interrupt(NULL);
-		break;
-	case CPU_DYING:
-	case CPU_DYING_FROZEN:
-		disable_percpu_irq(vgic->maint_irq);
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block vgic_cpu_nb = {
-	.notifier_call = vgic_cpu_notify,
-};
-
-static const struct of_device_id vgic_ids[] = {
-	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
-	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
-	{},
-};
-
-int kvm_vgic_hyp_init(void)
-{
-	const struct of_device_id *matched_id;
-	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
-			  const struct vgic_params **);
-	struct device_node *vgic_node;
-	int ret;
-
-	vgic_node = of_find_matching_node_and_match(NULL,
-						    vgic_ids, &matched_id);
-	if (!vgic_node) {
-		kvm_err("error: no compatible GIC node found\n");
-		return -ENODEV;
-	}
-
-	vgic_probe = matched_id->data;
-	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
-	if (ret)
-		return ret;
-
-	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
-				 "vgic", kvm_get_running_vcpus());
-	if (ret) {
-		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
-		return ret;
-	}
-
-	ret = __register_cpu_notifier(&vgic_cpu_nb);
-	if (ret) {
-		kvm_err("Cannot register vgic CPU notifier\n");
-		goto out_free_irq;
-	}
-
-	/* Callback into for arch code for setup */
-	vgic_arch_setup(vgic);
-
-	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
-
-	return 0;
-
-out_free_irq:
-	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
-	return ret;
-}
-
 /**
  * kvm_vgic_init - Initialize global VGIC state before running any VCPUs
  * @kvm: pointer to the kvm struct
@@ -2062,7 +1985,7 @@ static int vgic_create(struct kvm_device *dev, u32 type)
 	return kvm_vgic_create(dev->kvm);
 }
 
-struct kvm_device_ops kvm_arm_vgic_v2_ops = {
+static struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.name = "kvm-arm-vgic",
 	.create = vgic_create,
 	.destroy = vgic_destroy,
@@ -2070,3 +1993,81 @@ struct kvm_device_ops kvm_arm_vgic_v2_ops = {
 	.get_attr = vgic_get_attr,
 	.has_attr = vgic_has_attr,
 };
+
+static void vgic_init_maintenance_interrupt(void *info)
+{
+	enable_percpu_irq(vgic->maint_irq, 0);
+}
+
+static int vgic_cpu_notify(struct notifier_block *self,
+			   unsigned long action, void *cpu)
+{
+	switch (action) {
+	case CPU_STARTING:
+	case CPU_STARTING_FROZEN:
+		vgic_init_maintenance_interrupt(NULL);
+		break;
+	case CPU_DYING:
+	case CPU_DYING_FROZEN:
+		disable_percpu_irq(vgic->maint_irq);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block vgic_cpu_nb = {
+	.notifier_call = vgic_cpu_notify,
+};
+
+static const struct of_device_id vgic_ids[] = {
+	{ .compatible = "arm,cortex-a15-gic", .data = vgic_v2_probe, },
+	{ .compatible = "arm,gic-v3", .data = vgic_v3_probe, },
+	{},
+};
+
+int kvm_vgic_hyp_init(void)
+{
+	const struct of_device_id *matched_id;
+	int (*vgic_probe)(struct device_node *,const struct vgic_ops **,
+			  const struct vgic_params **);
+	struct device_node *vgic_node;
+	int ret;
+
+	vgic_node = of_find_matching_node_and_match(NULL,
+						    vgic_ids, &matched_id);
+	if (!vgic_node) {
+		kvm_err("error: no compatible GIC node found\n");
+		return -ENODEV;
+	}
+
+	vgic_probe = matched_id->data;
+	ret = vgic_probe(vgic_node, &vgic_ops, &vgic);
+	if (ret)
+		return ret;
+
+	ret = request_percpu_irq(vgic->maint_irq, vgic_maintenance_handler,
+				 "vgic", kvm_get_running_vcpus());
+	if (ret) {
+		kvm_err("Cannot register interrupt %d\n", vgic->maint_irq);
+		return ret;
+	}
+
+	ret = __register_cpu_notifier(&vgic_cpu_nb);
+	if (ret) {
+		kvm_err("Cannot register vgic CPU notifier\n");
+		goto out_free_irq;
+	}
+
+	/* Callback into for arch code for setup */
+	vgic_arch_setup(vgic);
+
+	on_each_cpu(vgic_init_maintenance_interrupt, NULL, 1);
+
+	return kvm_register_device_ops(&kvm_arm_vgic_v2_ops,
+				       KVM_DEV_TYPE_ARM_VGIC_V2);
+
+out_free_irq:
+	free_percpu_irq(vgic->maint_irq, kvm_get_running_vcpus());
+	return ret;
+}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 686d783387a0..68d96f5dbfe2 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2286,10 +2286,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
 
-#ifdef CONFIG_KVM_ARM_VGIC
-	[KVM_DEV_TYPE_ARM_VGIC_V2]	= &kvm_arm_vgic_v2_ops,
-#endif
-
 #ifdef CONFIG_S390
 	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
 #endif
-- 
cgit v1.2.3


From 84877d93336de21a6251db00b841468a83c65906 Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Tue, 2 Sep 2014 10:27:35 +0100
Subject: KVM: s390: register flic ops dynamically

Using the new kvm_register_device_ops() interface makes us get rid of
an #ifdef in common code.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/s390/kvm/kvm-s390.c | 3 ++-
 arch/s390/kvm/kvm-s390.h | 1 +
 include/linux/kvm_host.h | 1 -
 virt/kvm/kvm_main.c      | 4 ----
 4 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c
index b95d4a481b0c..56a411c0245a 100644
--- a/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@ -122,7 +122,8 @@ void kvm_arch_hardware_unsetup(void)
 
 int kvm_arch_init(void *opaque)
 {
-	return 0;
+	/* Register floating interrupt controller interface. */
+	return kvm_register_device_ops(&kvm_flic_ops, KVM_DEV_TYPE_FLIC);
 }
 
 /* Section: device related */
diff --git a/arch/s390/kvm/kvm-s390.h b/arch/s390/kvm/kvm-s390.h
index b1a77669137b..244d02303182 100644
--- a/arch/s390/kvm/kvm-s390.h
+++ b/arch/s390/kvm/kvm-s390.h
@@ -227,6 +227,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int psw_extint_disabled(struct kvm_vcpu *vcpu);
 void kvm_s390_destroy_adapters(struct kvm *kvm);
 int kvm_s390_si_ext_call_pending(struct kvm_vcpu *vcpu);
+extern struct kvm_device_ops kvm_flic_ops;
 
 /* implemented in guestdbg.c */
 void kvm_s390_backup_guest_per_regs(struct kvm_vcpu *vcpu);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 601d321f96e7..7ef088bec715 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1069,7 +1069,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
 extern struct kvm_device_ops kvm_vfio_ops;
-extern struct kvm_device_ops kvm_flic_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 68d96f5dbfe2..f4e792fedb4f 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2285,10 +2285,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_VFIO
 	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
 #endif
-
-#ifdef CONFIG_S390
-	[KVM_DEV_TYPE_FLIC]		= &kvm_flic_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
-- 
cgit v1.2.3


From 80ce1639727e9d38729c34f162378508c307ca25 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Tue, 2 Sep 2014 10:27:36 +0100
Subject: KVM: VFIO: register kvm_device_ops dynamically

Now that we have a dynamic means to register kvm_device_ops, use that
for the VFIO kvm device, instead of relying on the static table.

This is achieved by a module_init call to register the ops with KVM.

Cc: Gleb Natapov <gleb@kernel.org>
Cc: Paolo Bonzini <pbonzini@redhat.com>
Acked-by: Alex Williamson <Alex.Williamson@redhat.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 -
 virt/kvm/kvm_main.c      |  4 ----
 virt/kvm/vfio.c          | 22 +++++++++++++++-------
 3 files changed, 15 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 7ef088bec715..d44a2d640551 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1068,7 +1068,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
-extern struct kvm_device_ops kvm_vfio_ops;
 
 #ifdef CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT
 
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index f4e792fedb4f..db57363cc287 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2281,10 +2281,6 @@ static struct kvm_device_ops *kvm_device_ops_table[KVM_DEV_TYPE_MAX] = {
 #ifdef CONFIG_KVM_XICS
 	[KVM_DEV_TYPE_XICS]		= &kvm_xics_ops,
 #endif
-
-#ifdef CONFIG_KVM_VFIO
-	[KVM_DEV_TYPE_VFIO]		= &kvm_vfio_ops,
-#endif
 };
 
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index ba1a93f935c7..bb11b36ee8a2 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -246,6 +246,16 @@ static void kvm_vfio_destroy(struct kvm_device *dev)
 	kfree(dev); /* alloc by kvm_ioctl_create_device, free by .destroy */
 }
 
+static int kvm_vfio_create(struct kvm_device *dev, u32 type);
+
+static struct kvm_device_ops kvm_vfio_ops = {
+	.name = "kvm-vfio",
+	.create = kvm_vfio_create,
+	.destroy = kvm_vfio_destroy,
+	.set_attr = kvm_vfio_set_attr,
+	.has_attr = kvm_vfio_has_attr,
+};
+
 static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 {
 	struct kvm_device *tmp;
@@ -268,10 +278,8 @@ static int kvm_vfio_create(struct kvm_device *dev, u32 type)
 	return 0;
 }
 
-struct kvm_device_ops kvm_vfio_ops = {
-	.name = "kvm-vfio",
-	.create = kvm_vfio_create,
-	.destroy = kvm_vfio_destroy,
-	.set_attr = kvm_vfio_set_attr,
-	.has_attr = kvm_vfio_has_attr,
-};
+static int __init kvm_vfio_ops_init(void)
+{
+	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
+}
+module_init(kvm_vfio_ops_init);
-- 
cgit v1.2.3


From f7790029655f79cdcee4fa7c7884e0c2795ebebe Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 12 Sep 2014 16:40:20 -0400
Subject: lockd: move lockd's grace period handling into its own module

Currently, all of the grace period handling is part of lockd. Eventually
though we'd like to be able to build v4-only servers, at which point
we'll need to put all of this elsewhere.

Move the code itself into fs/nfs_common and have it build a grace.ko
module. Then, rejigger the Kconfig options so that both nfsd and lockd
enable it automatically.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/Kconfig              |   6 ++-
 fs/lockd/Makefile       |   2 +-
 fs/lockd/grace.c        |  65 ----------------------------
 fs/lockd/netns.h        |   1 -
 fs/lockd/svc.c          |   2 +-
 fs/nfs_common/Makefile  |   3 +-
 fs/nfs_common/grace.c   | 113 ++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/Kconfig         |   1 +
 include/linux/proc_fs.h |   2 +
 9 files changed, 125 insertions(+), 70 deletions(-)
 delete mode 100644 fs/lockd/grace.c
 create mode 100644 fs/nfs_common/grace.c

(limited to 'include/linux')

diff --git a/fs/Kconfig b/fs/Kconfig
index 312393f32948..db5dc1598716 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -233,9 +233,13 @@ if NETWORK_FILESYSTEMS
 source "fs/nfs/Kconfig"
 source "fs/nfsd/Kconfig"
 
+config GRACE_PERIOD
+	tristate
+
 config LOCKD
 	tristate
 	depends on FILE_LOCKING
+	select GRACE_PERIOD
 
 config LOCKD_V4
 	bool
@@ -249,7 +253,7 @@ config NFS_ACL_SUPPORT
 
 config NFS_COMMON
 	bool
-	depends on NFSD || NFS_FS
+	depends on NFSD || NFS_FS || LOCKD
 	default y
 
 source "net/sunrpc/Kconfig"
diff --git a/fs/lockd/Makefile b/fs/lockd/Makefile
index ca58d64374ca..6a0b351ce30e 100644
--- a/fs/lockd/Makefile
+++ b/fs/lockd/Makefile
@@ -5,6 +5,6 @@
 obj-$(CONFIG_LOCKD) += lockd.o
 
 lockd-objs-y := clntlock.o clntproc.o clntxdr.o host.o svc.o svclock.o \
-	        svcshare.o svcproc.o svcsubs.o mon.o xdr.o grace.o
+	        svcshare.o svcproc.o svcsubs.o mon.o xdr.o
 lockd-objs-$(CONFIG_LOCKD_V4) += clnt4xdr.o xdr4.o svc4proc.o
 lockd-objs		      := $(lockd-objs-y)
diff --git a/fs/lockd/grace.c b/fs/lockd/grace.c
deleted file mode 100644
index 6d1ee7204c88..000000000000
--- a/fs/lockd/grace.c
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Common code for control of lockd and nfsv4 grace periods.
- */
-
-#include <linux/module.h>
-#include <linux/lockd/bind.h>
-#include <net/net_namespace.h>
-
-#include "netns.h"
-
-static DEFINE_SPINLOCK(grace_lock);
-
-/**
- * locks_start_grace
- * @lm: who this grace period is for
- *
- * A grace period is a period during which locks should not be given
- * out.  Currently grace periods are only enforced by the two lock
- * managers (lockd and nfsd), using the locks_in_grace() function to
- * check when they are in a grace period.
- *
- * This function is called to start a grace period.
- */
-void locks_start_grace(struct net *net, struct lock_manager *lm)
-{
-	struct lockd_net *ln = net_generic(net, lockd_net_id);
-
-	spin_lock(&grace_lock);
-	list_add(&lm->list, &ln->grace_list);
-	spin_unlock(&grace_lock);
-}
-EXPORT_SYMBOL_GPL(locks_start_grace);
-
-/**
- * locks_end_grace
- * @lm: who this grace period is for
- *
- * Call this function to state that the given lock manager is ready to
- * resume regular locking.  The grace period will not end until all lock
- * managers that called locks_start_grace() also call locks_end_grace().
- * Note that callers count on it being safe to call this more than once,
- * and the second call should be a no-op.
- */
-void locks_end_grace(struct lock_manager *lm)
-{
-	spin_lock(&grace_lock);
-	list_del_init(&lm->list);
-	spin_unlock(&grace_lock);
-}
-EXPORT_SYMBOL_GPL(locks_end_grace);
-
-/**
- * locks_in_grace
- *
- * Lock managers call this function to determine when it is OK for them
- * to answer ordinary lock requests, and when they should accept only
- * lock reclaims.
- */
-int locks_in_grace(struct net *net)
-{
-	struct lockd_net *ln = net_generic(net, lockd_net_id);
-
-	return !list_empty(&ln->grace_list);
-}
-EXPORT_SYMBOL_GPL(locks_in_grace);
diff --git a/fs/lockd/netns.h b/fs/lockd/netns.h
index 5010b55628b4..097bfa3adb1c 100644
--- a/fs/lockd/netns.h
+++ b/fs/lockd/netns.h
@@ -11,7 +11,6 @@ struct lockd_net {
 
 	struct delayed_work grace_period_end;
 	struct lock_manager lockd_manager;
-	struct list_head grace_list;
 
 	spinlock_t nsm_clnt_lock;
 	unsigned int nsm_users;
diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c
index 09857b48d0c3..266b67972305 100644
--- a/fs/lockd/svc.c
+++ b/fs/lockd/svc.c
@@ -586,7 +586,7 @@ static int lockd_init_net(struct net *net)
 	struct lockd_net *ln = net_generic(net, lockd_net_id);
 
 	INIT_DELAYED_WORK(&ln->grace_period_end, grace_ender);
-	INIT_LIST_HEAD(&ln->grace_list);
+	INIT_LIST_HEAD(&ln->lockd_manager.list);
 	spin_lock_init(&ln->nsm_clnt_lock);
 	return 0;
 }
diff --git a/fs/nfs_common/Makefile b/fs/nfs_common/Makefile
index f689ed82af3a..d153ca3ea577 100644
--- a/fs/nfs_common/Makefile
+++ b/fs/nfs_common/Makefile
@@ -3,5 +3,6 @@
 #
 
 obj-$(CONFIG_NFS_ACL_SUPPORT) += nfs_acl.o
-
 nfs_acl-objs := nfsacl.o
+
+obj-$(CONFIG_GRACE_PERIOD) += grace.o
diff --git a/fs/nfs_common/grace.c b/fs/nfs_common/grace.c
new file mode 100644
index 000000000000..ae6e58ea4de5
--- /dev/null
+++ b/fs/nfs_common/grace.c
@@ -0,0 +1,113 @@
+/*
+ * Common code for control of lockd and nfsv4 grace periods.
+ *
+ * Transplanted from lockd code
+ */
+
+#include <linux/module.h>
+#include <net/net_namespace.h>
+#include <net/netns/generic.h>
+#include <linux/fs.h>
+
+static int grace_net_id;
+static DEFINE_SPINLOCK(grace_lock);
+
+/**
+ * locks_start_grace
+ * @net: net namespace that this lock manager belongs to
+ * @lm: who this grace period is for
+ *
+ * A grace period is a period during which locks should not be given
+ * out.  Currently grace periods are only enforced by the two lock
+ * managers (lockd and nfsd), using the locks_in_grace() function to
+ * check when they are in a grace period.
+ *
+ * This function is called to start a grace period.
+ */
+void
+locks_start_grace(struct net *net, struct lock_manager *lm)
+{
+	struct list_head *grace_list = net_generic(net, grace_net_id);
+
+	spin_lock(&grace_lock);
+	list_add(&lm->list, grace_list);
+	spin_unlock(&grace_lock);
+}
+EXPORT_SYMBOL_GPL(locks_start_grace);
+
+/**
+ * locks_end_grace
+ * @net: net namespace that this lock manager belongs to
+ * @lm: who this grace period is for
+ *
+ * Call this function to state that the given lock manager is ready to
+ * resume regular locking.  The grace period will not end until all lock
+ * managers that called locks_start_grace() also call locks_end_grace().
+ * Note that callers count on it being safe to call this more than once,
+ * and the second call should be a no-op.
+ */
+void
+locks_end_grace(struct lock_manager *lm)
+{
+	spin_lock(&grace_lock);
+	list_del_init(&lm->list);
+	spin_unlock(&grace_lock);
+}
+EXPORT_SYMBOL_GPL(locks_end_grace);
+
+/**
+ * locks_in_grace
+ *
+ * Lock managers call this function to determine when it is OK for them
+ * to answer ordinary lock requests, and when they should accept only
+ * lock reclaims.
+ */
+int
+locks_in_grace(struct net *net)
+{
+	struct list_head *grace_list = net_generic(net, grace_net_id);
+
+	return !list_empty(grace_list);
+}
+EXPORT_SYMBOL_GPL(locks_in_grace);
+
+static int __net_init
+grace_init_net(struct net *net)
+{
+	struct list_head *grace_list = net_generic(net, grace_net_id);
+
+	INIT_LIST_HEAD(grace_list);
+	return 0;
+}
+
+static void __net_exit
+grace_exit_net(struct net *net)
+{
+	struct list_head *grace_list = net_generic(net, grace_net_id);
+
+	BUG_ON(!list_empty(grace_list));
+}
+
+static struct pernet_operations grace_net_ops = {
+	.init = grace_init_net,
+	.exit = grace_exit_net,
+	.id   = &grace_net_id,
+	.size = sizeof(struct list_head),
+};
+
+static int __init
+init_grace(void)
+{
+	return register_pernet_subsys(&grace_net_ops);
+}
+
+static void __exit
+exit_grace(void)
+{
+	unregister_pernet_subsys(&grace_net_ops);
+}
+
+MODULE_AUTHOR("Jeff Layton <jlayton@primarydata.com>");
+MODULE_LICENSE("GPL");
+module_init(init_grace)
+module_exit(exit_grace)
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index f3586b645d7d..73395156bdb4 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -71,6 +71,7 @@ config NFSD_V4
 	select FS_POSIX_ACL
 	select SUNRPC_GSS
 	select CRYPTO
+	select GRACE_PERIOD
 	help
 	  This option enables support in your system's NFS server for
 	  version 4 of the NFS protocol (RFC 3530).
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 9d117f61d976..b97bf2ef996e 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -74,6 +74,8 @@ static inline int remove_proc_subtree(const char *name, struct proc_dir_entry *p
 
 #endif /* CONFIG_PROC_FS */
 
+struct net;
+
 static inline struct proc_dir_entry *proc_net_mkdir(
 	struct net *net, const char *name, struct proc_dir_entry *parent)
 {
-- 
cgit v1.2.3


From 6213daab2547fdc0d02a86abf3ac209ac6881ae3 Mon Sep 17 00:00:00 2001
From: Li Zefan <lizefan@huawei.com>
Date: Wed, 17 Sep 2014 18:18:09 +0800
Subject: cgroup: remove some useless forward declarations

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 1 -
 kernel/cgroup.c        | 2 --
 2 files changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index b5223c570eba..f7898e0bce1e 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -27,7 +27,6 @@
 
 struct cgroup_root;
 struct cgroup_subsys;
-struct inode;
 struct cgroup;
 
 extern int cgroup_init_early(void);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index ebd4476c57de..619aae399a3a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -185,7 +185,6 @@ static int need_forkexit_callback __read_mostly;
 static struct cftype cgroup_dfl_base_files[];
 static struct cftype cgroup_legacy_base_files[];
 
-static void cgroup_put(struct cgroup *cgrp);
 static int rebind_subsystems(struct cgroup_root *dst_root,
 			     unsigned int ss_mask);
 static int cgroup_destroy_locked(struct cgroup *cgrp);
@@ -195,7 +194,6 @@ static void css_release(struct percpu_ref *ref);
 static void kill_css(struct cgroup_subsys_state *css);
 static int cgroup_addrm_files(struct cgroup *cgrp, struct cftype cfts[],
 			      bool is_add);
-static void cgroup_pidlist_destroy_all(struct cgroup *cgrp);
 
 /* IDR wrappers which synchronize using cgroup_idr_lock */
 static int cgroup_idr_alloc(struct idr *idr, void *ptr, int start, int end,
-- 
cgit v1.2.3


From 50849db32a9f529235a84bcc84a6b8e631b1d0ec Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 18 Sep 2014 00:58:12 -0400
Subject: jbd2: simplify calling convention around
 __jbd2_journal_clean_checkpoint_list

__jbd2_journal_clean_checkpoint_list() returns number of buffers it
freed but noone was using the value so just stop doing that. This
also allows for simplifying the calling convention for
journal_clean_once_cp_list().

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
---
 fs/jbd2/checkpoint.c | 56 ++++++++++++++++++++++------------------------------
 include/linux/jbd2.h |  2 +-
 2 files changed, 25 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/jbd2/checkpoint.c b/fs/jbd2/checkpoint.c
index 3ab4c5ee12ce..988b32ed4c87 100644
--- a/fs/jbd2/checkpoint.c
+++ b/fs/jbd2/checkpoint.c
@@ -421,16 +421,15 @@ int jbd2_cleanup_journal_tail(journal_t *journal)
  * release them.
  *
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
+ * Returns 1 if we freed the transaction, 0 otherwise.
  */
-
-static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
+static int journal_clean_one_cp_list(struct journal_head *jh)
 {
 	struct journal_head *last_jh;
 	struct journal_head *next_jh = jh;
-	int ret, freed = 0;
+	int ret;
+	int freed = 0;
 
-	*released = 0;
 	if (!jh)
 		return 0;
 
@@ -441,11 +440,9 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
 		ret = __try_to_free_cp_buf(jh);
 		if (!ret)
 			return freed;
-		freed++;
-		if (ret == 2) {
-			*released = 1;
-			return freed;
-		}
+		if (ret == 2)
+			return 1;
+		freed = 1;
 		/*
 		 * This function only frees up some memory
 		 * if possible so we dont have an obligation
@@ -465,53 +462,48 @@ static int journal_clean_one_cp_list(struct journal_head *jh, int *released)
  * Find all the written-back checkpoint buffers in the journal and release them.
  *
  * Called with j_list_lock held.
- * Returns number of buffers reaped (for debug)
  */
-
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal)
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal)
 {
 	transaction_t *transaction, *last_transaction, *next_transaction;
 	int ret;
-	int freed = 0;
-	int released;
 
 	transaction = journal->j_checkpoint_transactions;
 	if (!transaction)
-		goto out;
+		return;
 
 	last_transaction = transaction->t_cpprev;
 	next_transaction = transaction;
 	do {
 		transaction = next_transaction;
 		next_transaction = transaction->t_cpnext;
-		ret = journal_clean_one_cp_list(transaction->
-				t_checkpoint_list, &released);
+		ret = journal_clean_one_cp_list(transaction->t_checkpoint_list);
 		/*
 		 * This function only frees up some memory if possible so we
 		 * dont have an obligation to finish processing. Bail out if
 		 * preemption requested:
 		 */
-		if (need_resched()) {
-			freed += ret;
-			goto out;
-		}
-		if (released) {
-			freed += ret;
+		if (need_resched())
+			return;
+		if (ret)
 			continue;
-		}
 		/*
 		 * It is essential that we are as careful as in the case of
 		 * t_checkpoint_list with removing the buffer from the list as
 		 * we can possibly see not yet submitted buffers on io_list
 		 */
-		ret += journal_clean_one_cp_list(transaction->
-				t_checkpoint_io_list, &released);
-		freed += ret;
-		if (need_resched() || !ret)
-			goto out;
+		ret = journal_clean_one_cp_list(transaction->
+				t_checkpoint_io_list);
+		if (need_resched())
+			return;
+		/*
+		 * Stop scanning if we couldn't free the transaction. This
+		 * avoids pointless scanning of transactions which still
+		 * weren't checkpointed.
+		 */
+		if (!ret)
+			return;
 	} while (transaction != last_transaction);
-out:
-	return freed;
 }
 
 /*
diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h
index 0dae71e9971c..704b9a599b26 100644
--- a/include/linux/jbd2.h
+++ b/include/linux/jbd2.h
@@ -1042,7 +1042,7 @@ void jbd2_update_log_tail(journal_t *journal, tid_t tid, unsigned long block);
 extern void jbd2_journal_commit_transaction(journal_t *);
 
 /* Checkpoint list management */
-int __jbd2_journal_clean_checkpoint_list(journal_t *journal);
+void __jbd2_journal_clean_checkpoint_list(journal_t *journal);
 int __jbd2_journal_remove_checkpoint(struct journal_head *);
 void __jbd2_journal_insert_checkpoint(struct journal_head *, transaction_t *);
 
-- 
cgit v1.2.3


From b729bf34535ed413667b397a2f59cfa81266facf Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 19 Aug 2014 20:29:19 +0300
Subject: spi/pxa2xx: Don't use slave_id of dma_slave_config

That field has been deprecated in favour of getting the necessary
information from ACPI/DT.

However, we still need to deal systems that are PCI only (no ACPI to back
up). In order to support such systems, we allow the DMA filter function and
its corresponding parameter via pxa2xx_spi_master platform data. Then when
the pxa2xx_spi_dma_setup() doesn't find the channel via ACPI, it falls back
to use the given filter function.

Suggested-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/spi/spi-pxa2xx-dma.c   | 15 ++--------
 drivers/spi/spi-pxa2xx-pci.c   | 64 +++++++++++++++++++++++++++++++-----------
 drivers/spi/spi-pxa2xx.c       |  2 --
 include/linux/spi/pxa2xx_spi.h |  9 +++---
 4 files changed, 54 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx-dma.c b/drivers/spi/spi-pxa2xx-dma.c
index c41ff148a2b4..62a9297e96ac 100644
--- a/drivers/spi/spi-pxa2xx-dma.c
+++ b/drivers/spi/spi-pxa2xx-dma.c
@@ -157,7 +157,6 @@ static struct dma_async_tx_descriptor *
 pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 			   enum dma_transfer_direction dir)
 {
-	struct pxa2xx_spi_master *pdata = drv_data->master_info;
 	struct chip_data *chip = drv_data->cur_chip;
 	enum dma_slave_buswidth width;
 	struct dma_slave_config cfg;
@@ -184,7 +183,6 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 		cfg.dst_addr = drv_data->ssdr_physical;
 		cfg.dst_addr_width = width;
 		cfg.dst_maxburst = chip->dma_burst_size;
-		cfg.slave_id = pdata->tx_slave_id;
 
 		sgt = &drv_data->tx_sgt;
 		nents = drv_data->tx_nents;
@@ -193,7 +191,6 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 		cfg.src_addr = drv_data->ssdr_physical;
 		cfg.src_addr_width = width;
 		cfg.src_maxburst = chip->dma_burst_size;
-		cfg.slave_id = pdata->rx_slave_id;
 
 		sgt = &drv_data->rx_sgt;
 		nents = drv_data->rx_nents;
@@ -210,14 +207,6 @@ pxa2xx_spi_dma_prepare_one(struct driver_data *drv_data,
 				       DMA_PREP_INTERRUPT | DMA_CTRL_ACK);
 }
 
-static bool pxa2xx_spi_dma_filter(struct dma_chan *chan, void *param)
-{
-	const struct pxa2xx_spi_master *pdata = param;
-
-	return chan->chan_id == pdata->tx_chan_id ||
-	       chan->chan_id == pdata->rx_chan_id;
-}
-
 bool pxa2xx_spi_dma_is_possible(size_t len)
 {
 	return len <= MAX_DMA_LEN;
@@ -321,12 +310,12 @@ int pxa2xx_spi_dma_setup(struct driver_data *drv_data)
 		return -ENOMEM;
 
 	drv_data->tx_chan = dma_request_slave_channel_compat(mask,
-				pxa2xx_spi_dma_filter, pdata, dev, "tx");
+				pdata->dma_filter, pdata->tx_param, dev, "tx");
 	if (!drv_data->tx_chan)
 		return -ENODEV;
 
 	drv_data->rx_chan = dma_request_slave_channel_compat(mask,
-				pxa2xx_spi_dma_filter, pdata, dev, "rx");
+				pdata->dma_filter, pdata->rx_param, dev, "rx");
 	if (!drv_data->rx_chan) {
 		dma_release_channel(drv_data->tx_chan);
 		drv_data->tx_chan = NULL;
diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 20ebbc764693..0424b67c983e 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -10,6 +10,9 @@
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 
+#include <linux/dmaengine.h>
+#include <linux/platform_data/dma-dw.h>
+
 enum {
 	PORT_CE4100,
 	PORT_BYT,
@@ -19,33 +22,41 @@ struct pxa_spi_info {
 	enum pxa_ssp_type type;
 	int port_id;
 	int num_chipselect;
-	int tx_slave_id;
-	int tx_chan_id;
-	int rx_slave_id;
-	int rx_chan_id;
 	unsigned long max_clk_rate;
+
+	/* DMA channel request parameters */
+	void *tx_param;
+	void *rx_param;
 };
 
+static struct dw_dma_slave byt_tx_param = { .dst_id = 0 };
+static struct dw_dma_slave byt_rx_param = { .src_id = 1 };
+
+static bool lpss_dma_filter(struct dma_chan *chan, void *param)
+{
+	struct dw_dma_slave *dws = param;
+
+	if (dws->dma_dev != chan->device->dev)
+		return false;
+
+	chan->private = dws;
+	return true;
+}
+
 static struct pxa_spi_info spi_info_configs[] = {
 	[PORT_CE4100] = {
 		.type = PXA25x_SSP,
 		.port_id =  -1,
 		.num_chipselect = -1,
-		.tx_slave_id = -1,
-		.tx_chan_id = -1,
-		.rx_slave_id = -1,
-		.rx_chan_id = -1,
 		.max_clk_rate = 3686400,
 	},
 	[PORT_BYT] = {
 		.type = LPSS_SSP,
 		.port_id = 0,
 		.num_chipselect = 1,
-		.tx_slave_id = 0,
-		.tx_chan_id = 0,
-		.rx_slave_id = 1,
-		.rx_chan_id = 1,
 		.max_clk_rate = 50000000,
+		.tx_param = &byt_tx_param,
+		.rx_param = &byt_rx_param,
 	},
 };
 
@@ -59,6 +70,7 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	struct ssp_device *ssp;
 	struct pxa_spi_info *c;
 	char buf[40];
+	struct pci_dev *dma_dev;
 
 	ret = pcim_enable_device(dev);
 	if (ret)
@@ -73,11 +85,29 @@ static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
 	memset(&spi_pdata, 0, sizeof(spi_pdata));
 	spi_pdata.num_chipselect = (c->num_chipselect > 0) ?
 					c->num_chipselect : dev->devfn;
-	spi_pdata.tx_slave_id = c->tx_slave_id;
-	spi_pdata.tx_chan_id = c->tx_chan_id;
-	spi_pdata.rx_slave_id = c->rx_slave_id;
-	spi_pdata.rx_chan_id = c->rx_chan_id;
-	spi_pdata.enable_dma = c->rx_slave_id >= 0 && c->tx_slave_id >= 0;
+
+	dma_dev = pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
+
+	if (c->tx_param) {
+		struct dw_dma_slave *slave = c->tx_param;
+
+		slave->dma_dev = &dma_dev->dev;
+		slave->src_master = 1;
+		slave->dst_master = 0;
+	}
+
+	if (c->rx_param) {
+		struct dw_dma_slave *slave = c->rx_param;
+
+		slave->dma_dev = &dma_dev->dev;
+		slave->src_master = 1;
+		slave->dst_master = 0;
+	}
+
+	spi_pdata.dma_filter = lpss_dma_filter;
+	spi_pdata.tx_param = c->tx_param;
+	spi_pdata.rx_param = c->rx_param;
+	spi_pdata.enable_dma = c->rx_param && c->tx_param;
 
 	ssp = &spi_pdata.ssp;
 	ssp->phys_base = pci_resource_start(dev, 0);
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index fe792106bdc5..256c0abbddd2 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -1062,8 +1062,6 @@ pxa2xx_spi_acpi_get_pdata(struct platform_device *pdev)
 
 	pdata->num_chipselect = 1;
 	pdata->enable_dma = true;
-	pdata->tx_chan_id = -1;
-	pdata->rx_chan_id = -1;
 
 	return pdata;
 }
diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h
index 82d5111cd0c2..d5a316550177 100644
--- a/include/linux/spi/pxa2xx_spi.h
+++ b/include/linux/spi/pxa2xx_spi.h
@@ -23,6 +23,8 @@
 #define PXA2XX_CS_ASSERT (0x01)
 #define PXA2XX_CS_DEASSERT (0x02)
 
+struct dma_chan;
+
 /* device.platform_data for SSP controller devices */
 struct pxa2xx_spi_master {
 	u32 clock_enable;
@@ -30,10 +32,9 @@ struct pxa2xx_spi_master {
 	u8 enable_dma;
 
 	/* DMA engine specific config */
-	int rx_chan_id;
-	int tx_chan_id;
-	int rx_slave_id;
-	int tx_slave_id;
+	bool (*dma_filter)(struct dma_chan *chan, void *param);
+	void *tx_param;
+	void *rx_param;
 
 	/* For non-PXA arches */
 	struct ssp_device ssp;
-- 
cgit v1.2.3


From 175655bd79b815f2b2035f3b44117c60be90e1d4 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 16 Sep 2014 17:36:28 -0700
Subject: ARM: OMAP: Remove unused pieces of legacy DMA API

We're moving to the dmaengine API, so let's remove the unused
pieces of the omap legacy DMA code to make sure we don't get
any new users for these:

omap_set_dma_color_mode
omap_set_dma_src_index
omap_set_dma_dest_index
omap_dma_unlink_lch
omap_clear_dma
omap_dma_running
omap_dma_set_prio_lch
omap_set_dma_dst_endian_type
omap_set_dma_src_endian_type
omap_get_dma_index
omap_dma_disable_irq
omap_request_dma_chain
omap_free_dma_chain
omap_dma_chain_a_transfer
omap_start_dma_chain_transfers
omap_stop_dma_chain_transfers
omap_get_dma_chain_index
omap_get_dma_chain_dst_pos
omap_get_dma_chain_src_pos
omap_modify_dma_chain_params
omap_dma_chain_status

Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/plat-omap/dma.c | 737 +----------------------------------------------
 include/linux/omap-dma.h |  37 ---
 2 files changed, 6 insertions(+), 768 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/plat-omap/dma.c b/arch/arm/plat-omap/dma.c
index c2baa8ede543..24770e5a5081 100644
--- a/arch/arm/plat-omap/dma.c
+++ b/arch/arm/plat-omap/dma.c
@@ -64,7 +64,9 @@ enum { DMA_CHAIN_STARTED, DMA_CHAIN_NOTSTARTED };
 
 static struct omap_system_dma_plat_info *p;
 static struct omap_dma_dev_attr *d;
-
+static void omap_clear_dma(int lch);
+static int omap_dma_set_prio_lch(int lch, unsigned char read_prio,
+				 unsigned char write_prio);
 static int enable_1510_mode;
 static u32 errata;
 
@@ -284,66 +286,6 @@ void omap_set_dma_transfer_params(int lch, int data_type, int elem_count,
 }
 EXPORT_SYMBOL(omap_set_dma_transfer_params);
 
-void omap_set_dma_color_mode(int lch, enum omap_dma_color_mode mode, u32 color)
-{
-	BUG_ON(omap_dma_in_1510_mode());
-
-	if (dma_omap1()) {
-		u16 w;
-
-		w = p->dma_read(CCR2, lch);
-		w &= ~0x03;
-
-		switch (mode) {
-		case OMAP_DMA_CONSTANT_FILL:
-			w |= 0x01;
-			break;
-		case OMAP_DMA_TRANSPARENT_COPY:
-			w |= 0x02;
-			break;
-		case OMAP_DMA_COLOR_DIS:
-			break;
-		default:
-			BUG();
-		}
-		p->dma_write(w, CCR2, lch);
-
-		w = p->dma_read(LCH_CTRL, lch);
-		w &= ~0x0f;
-		/* Default is channel type 2D */
-		if (mode) {
-			p->dma_write(color, COLOR, lch);
-			w |= 1;		/* Channel type G */
-		}
-		p->dma_write(w, LCH_CTRL, lch);
-	}
-
-	if (dma_omap2plus()) {
-		u32 val;
-
-		val = p->dma_read(CCR, lch);
-		val &= ~((1 << 17) | (1 << 16));
-
-		switch (mode) {
-		case OMAP_DMA_CONSTANT_FILL:
-			val |= 1 << 16;
-			break;
-		case OMAP_DMA_TRANSPARENT_COPY:
-			val |= 1 << 17;
-			break;
-		case OMAP_DMA_COLOR_DIS:
-			break;
-		default:
-			BUG();
-		}
-		p->dma_write(val, CCR, lch);
-
-		color &= 0xffffff;
-		p->dma_write(color, COLOR, lch);
-	}
-}
-EXPORT_SYMBOL(omap_set_dma_color_mode);
-
 void omap_set_dma_write_mode(int lch, enum omap_dma_write_mode mode)
 {
 	if (dma_omap2plus()) {
@@ -417,16 +359,6 @@ void omap_set_dma_params(int lch, struct omap_dma_channel_params *params)
 }
 EXPORT_SYMBOL(omap_set_dma_params);
 
-void omap_set_dma_src_index(int lch, int eidx, int fidx)
-{
-	if (dma_omap2plus())
-		return;
-
-	p->dma_write(eidx, CSEI, lch);
-	p->dma_write(fidx, CSFI, lch);
-}
-EXPORT_SYMBOL(omap_set_dma_src_index);
-
 void omap_set_dma_src_data_pack(int lch, int enable)
 {
 	u32 l;
@@ -510,16 +442,6 @@ void omap_set_dma_dest_params(int lch, int dest_port, int dest_amode,
 }
 EXPORT_SYMBOL(omap_set_dma_dest_params);
 
-void omap_set_dma_dest_index(int lch, int eidx, int fidx)
-{
-	if (dma_omap2plus())
-		return;
-
-	p->dma_write(eidx, CDEI, lch);
-	p->dma_write(fidx, CDFI, lch);
-}
-EXPORT_SYMBOL(omap_set_dma_dest_index);
-
 void omap_set_dma_dest_data_pack(int lch, int enable)
 {
 	u32 l;
@@ -843,7 +765,7 @@ EXPORT_SYMBOL(omap_dma_set_global_params);
  * Both of the above can be set with one of the following values :
  * 	DMA_CH_PRIO_HIGH/DMA_CH_PRIO_LOW
  */
-int
+static int
 omap_dma_set_prio_lch(int lch, unsigned char read_prio,
 		      unsigned char write_prio)
 {
@@ -864,13 +786,13 @@ omap_dma_set_prio_lch(int lch, unsigned char read_prio,
 
 	return 0;
 }
-EXPORT_SYMBOL(omap_dma_set_prio_lch);
+
 
 /*
  * Clears any DMA state so the DMA engine is ready to restart with new buffers
  * through omap_start_dma(). Any buffers in flight are discarded.
  */
-void omap_clear_dma(int lch)
+static void omap_clear_dma(int lch)
 {
 	unsigned long flags;
 
@@ -878,7 +800,6 @@ void omap_clear_dma(int lch)
 	p->clear_dma(lch);
 	local_irq_restore(flags);
 }
-EXPORT_SYMBOL(omap_clear_dma);
 
 void omap_start_dma(int lch)
 {
@@ -1167,652 +1088,6 @@ void omap_dma_link_lch(int lch_head, int lch_queue)
 }
 EXPORT_SYMBOL(omap_dma_link_lch);
 
-/*
- * Once the DMA queue is stopped, we can destroy it.
- */
-void omap_dma_unlink_lch(int lch_head, int lch_queue)
-{
-	if (omap_dma_in_1510_mode()) {
-		if (lch_head == lch_queue) {
-			p->dma_write(p->dma_read(CCR, lch_head) & ~(3 << 8),
-								CCR, lch_head);
-			return;
-		}
-		printk(KERN_ERR "DMA linking is not supported in 1510 mode\n");
-		BUG();
-		return;
-	}
-
-	if (dma_chan[lch_head].next_lch != lch_queue ||
-	    dma_chan[lch_head].next_lch == -1) {
-		pr_err("omap_dma: trying to unlink non linked channels\n");
-		dump_stack();
-	}
-
-	if ((dma_chan[lch_head].flags & OMAP_DMA_ACTIVE) ||
-	    (dma_chan[lch_queue].flags & OMAP_DMA_ACTIVE)) {
-		pr_err("omap_dma: You need to stop the DMA channels before unlinking\n");
-		dump_stack();
-	}
-
-	dma_chan[lch_head].next_lch = -1;
-}
-EXPORT_SYMBOL(omap_dma_unlink_lch);
-
-#ifndef CONFIG_ARCH_OMAP1
-/* Create chain of DMA channesls */
-static void create_dma_lch_chain(int lch_head, int lch_queue)
-{
-	u32 l;
-
-	/* Check if this is the first link in chain */
-	if (dma_chan[lch_head].next_linked_ch == -1) {
-		dma_chan[lch_head].next_linked_ch = lch_queue;
-		dma_chan[lch_head].prev_linked_ch = lch_queue;
-		dma_chan[lch_queue].next_linked_ch = lch_head;
-		dma_chan[lch_queue].prev_linked_ch = lch_head;
-	}
-
-	/* a link exists, link the new channel in circular chain */
-	else {
-		dma_chan[lch_queue].next_linked_ch =
-					dma_chan[lch_head].next_linked_ch;
-		dma_chan[lch_queue].prev_linked_ch = lch_head;
-		dma_chan[lch_head].next_linked_ch = lch_queue;
-		dma_chan[dma_chan[lch_queue].next_linked_ch].prev_linked_ch =
-					lch_queue;
-	}
-
-	l = p->dma_read(CLNK_CTRL, lch_head);
-	l &= ~(0x1f);
-	l |= lch_queue;
-	p->dma_write(l, CLNK_CTRL, lch_head);
-
-	l = p->dma_read(CLNK_CTRL, lch_queue);
-	l &= ~(0x1f);
-	l |= (dma_chan[lch_queue].next_linked_ch);
-	p->dma_write(l, CLNK_CTRL, lch_queue);
-}
-
-/**
- * @brief omap_request_dma_chain : Request a chain of DMA channels
- *
- * @param dev_id - Device id using the dma channel
- * @param dev_name - Device name
- * @param callback - Call back function
- * @chain_id -
- * @no_of_chans - Number of channels requested
- * @chain_mode - Dynamic or static chaining : OMAP_DMA_STATIC_CHAIN
- * 					      OMAP_DMA_DYNAMIC_CHAIN
- * @params - Channel parameters
- *
- * @return - Success : 0
- * 	     Failure: -EINVAL/-ENOMEM
- */
-int omap_request_dma_chain(int dev_id, const char *dev_name,
-			   void (*callback) (int lch, u16 ch_status,
-					     void *data),
-			   int *chain_id, int no_of_chans, int chain_mode,
-			   struct omap_dma_channel_params params)
-{
-	int *channels;
-	int i, err;
-
-	/* Is the chain mode valid ? */
-	if (chain_mode != OMAP_DMA_STATIC_CHAIN
-			&& chain_mode != OMAP_DMA_DYNAMIC_CHAIN) {
-		printk(KERN_ERR "Invalid chain mode requested\n");
-		return -EINVAL;
-	}
-
-	if (unlikely((no_of_chans < 1
-			|| no_of_chans > dma_lch_count))) {
-		printk(KERN_ERR "Invalid Number of channels requested\n");
-		return -EINVAL;
-	}
-
-	/*
-	 * Allocate a queue to maintain the status of the channels
-	 * in the chain
-	 */
-	channels = kmalloc(sizeof(*channels) * no_of_chans, GFP_KERNEL);
-	if (channels == NULL) {
-		printk(KERN_ERR "omap_dma: No memory for channel queue\n");
-		return -ENOMEM;
-	}
-
-	/* request and reserve DMA channels for the chain */
-	for (i = 0; i < no_of_chans; i++) {
-		err = omap_request_dma(dev_id, dev_name,
-					callback, NULL, &channels[i]);
-		if (err < 0) {
-			int j;
-			for (j = 0; j < i; j++)
-				omap_free_dma(channels[j]);
-			kfree(channels);
-			printk(KERN_ERR "omap_dma: Request failed %d\n", err);
-			return err;
-		}
-		dma_chan[channels[i]].prev_linked_ch = -1;
-		dma_chan[channels[i]].state = DMA_CH_NOTSTARTED;
-
-		/*
-		 * Allowing client drivers to set common parameters now,
-		 * so that later only relevant (src_start, dest_start
-		 * and element count) can be set
-		 */
-		omap_set_dma_params(channels[i], &params);
-	}
-
-	*chain_id = channels[0];
-	dma_linked_lch[*chain_id].linked_dmach_q = channels;
-	dma_linked_lch[*chain_id].chain_mode = chain_mode;
-	dma_linked_lch[*chain_id].chain_state = DMA_CHAIN_NOTSTARTED;
-	dma_linked_lch[*chain_id].no_of_lchs_linked = no_of_chans;
-
-	for (i = 0; i < no_of_chans; i++)
-		dma_chan[channels[i]].chain_id = *chain_id;
-
-	/* Reset the Queue pointers */
-	OMAP_DMA_CHAIN_QINIT(*chain_id);
-
-	/* Set up the chain */
-	if (no_of_chans == 1)
-		create_dma_lch_chain(channels[0], channels[0]);
-	else {
-		for (i = 0; i < (no_of_chans - 1); i++)
-			create_dma_lch_chain(channels[i], channels[i + 1]);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_request_dma_chain);
-
-/**
- * @brief omap_modify_dma_chain_param : Modify the chain's params - Modify the
- * params after setting it. Dont do this while dma is running!!
- *
- * @param chain_id - Chained logical channel id.
- * @param params
- *
- * @return - Success : 0
- * 	     Failure : -EINVAL
- */
-int omap_modify_dma_chain_params(int chain_id,
-				struct omap_dma_channel_params params)
-{
-	int *channels;
-	u32 i;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0
-			|| chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	for (i = 0; i < dma_linked_lch[chain_id].no_of_lchs_linked; i++) {
-		/*
-		 * Allowing client drivers to set common parameters now,
-		 * so that later only relevant (src_start, dest_start
-		 * and element count) can be set
-		 */
-		omap_set_dma_params(channels[i], &params);
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_modify_dma_chain_params);
-
-/**
- * @brief omap_free_dma_chain - Free all the logical channels in a chain.
- *
- * @param chain_id
- *
- * @return - Success : 0
- * 	     Failure : -EINVAL
- */
-int omap_free_dma_chain(int chain_id)
-{
-	int *channels;
-	u32 i;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-	for (i = 0; i < dma_linked_lch[chain_id].no_of_lchs_linked; i++) {
-		dma_chan[channels[i]].next_linked_ch = -1;
-		dma_chan[channels[i]].prev_linked_ch = -1;
-		dma_chan[channels[i]].chain_id = -1;
-		dma_chan[channels[i]].state = DMA_CH_NOTSTARTED;
-		omap_free_dma(channels[i]);
-	}
-
-	kfree(channels);
-
-	dma_linked_lch[chain_id].linked_dmach_q = NULL;
-	dma_linked_lch[chain_id].chain_mode = -1;
-	dma_linked_lch[chain_id].chain_state = -1;
-
-	return (0);
-}
-EXPORT_SYMBOL(omap_free_dma_chain);
-
-/**
- * @brief omap_dma_chain_status - Check if the chain is in
- * active / inactive state.
- * @param chain_id
- *
- * @return - Success : OMAP_DMA_CHAIN_ACTIVE/OMAP_DMA_CHAIN_INACTIVE
- * 	     Failure : -EINVAL
- */
-int omap_dma_chain_status(int chain_id)
-{
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-	pr_debug("CHAINID=%d, qcnt=%d\n", chain_id,
-			dma_linked_lch[chain_id].q_count);
-
-	if (OMAP_DMA_CHAIN_QEMPTY(chain_id))
-		return OMAP_DMA_CHAIN_INACTIVE;
-
-	return OMAP_DMA_CHAIN_ACTIVE;
-}
-EXPORT_SYMBOL(omap_dma_chain_status);
-
-/**
- * @brief omap_dma_chain_a_transfer - Get a free channel from a chain,
- * set the params and start the transfer.
- *
- * @param chain_id
- * @param src_start - buffer start address
- * @param dest_start - Dest address
- * @param elem_count
- * @param frame_count
- * @param callbk_data - channel callback parameter data.
- *
- * @return  - Success : 0
- * 	      Failure: -EINVAL/-EBUSY
- */
-int omap_dma_chain_a_transfer(int chain_id, int src_start, int dest_start,
-			int elem_count, int frame_count, void *callbk_data)
-{
-	int *channels;
-	u32 l, lch;
-	int start_dma = 0;
-
-	/*
-	 * if buffer size is less than 1 then there is
-	 * no use of starting the chain
-	 */
-	if (elem_count < 1) {
-		printk(KERN_ERR "Invalid buffer size\n");
-		return -EINVAL;
-	}
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0
-			|| chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exist\n");
-		return -EINVAL;
-	}
-
-	/* Check if all the channels in chain are in use */
-	if (OMAP_DMA_CHAIN_QFULL(chain_id))
-		return -EBUSY;
-
-	/* Frame count may be negative in case of indexed transfers */
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	/* Get a free channel */
-	lch = channels[dma_linked_lch[chain_id].q_tail];
-
-	/* Store the callback data */
-	dma_chan[lch].data = callbk_data;
-
-	/* Increment the q_tail */
-	OMAP_DMA_CHAIN_INCQTAIL(chain_id);
-
-	/* Set the params to the free channel */
-	if (src_start != 0)
-		p->dma_write(src_start, CSSA, lch);
-	if (dest_start != 0)
-		p->dma_write(dest_start, CDSA, lch);
-
-	/* Write the buffer size */
-	p->dma_write(elem_count, CEN, lch);
-	p->dma_write(frame_count, CFN, lch);
-
-	/*
-	 * If the chain is dynamically linked,
-	 * then we may have to start the chain if its not active
-	 */
-	if (dma_linked_lch[chain_id].chain_mode == OMAP_DMA_DYNAMIC_CHAIN) {
-
-		/*
-		 * In Dynamic chain, if the chain is not started,
-		 * queue the channel
-		 */
-		if (dma_linked_lch[chain_id].chain_state ==
-						DMA_CHAIN_NOTSTARTED) {
-			/* Enable the link in previous channel */
-			if (dma_chan[dma_chan[lch].prev_linked_ch].state ==
-								DMA_CH_QUEUED)
-				enable_lnk(dma_chan[lch].prev_linked_ch);
-			dma_chan[lch].state = DMA_CH_QUEUED;
-		}
-
-		/*
-		 * Chain is already started, make sure its active,
-		 * if not then start the chain
-		 */
-		else {
-			start_dma = 1;
-
-			if (dma_chan[dma_chan[lch].prev_linked_ch].state ==
-							DMA_CH_STARTED) {
-				enable_lnk(dma_chan[lch].prev_linked_ch);
-				dma_chan[lch].state = DMA_CH_QUEUED;
-				start_dma = 0;
-				if (0 == ((1 << 7) & p->dma_read(
-					CCR, dma_chan[lch].prev_linked_ch))) {
-					disable_lnk(dma_chan[lch].
-						    prev_linked_ch);
-					pr_debug("\n prev ch is stopped\n");
-					start_dma = 1;
-				}
-			}
-
-			else if (dma_chan[dma_chan[lch].prev_linked_ch].state
-							== DMA_CH_QUEUED) {
-				enable_lnk(dma_chan[lch].prev_linked_ch);
-				dma_chan[lch].state = DMA_CH_QUEUED;
-				start_dma = 0;
-			}
-			omap_enable_channel_irq(lch);
-
-			l = p->dma_read(CCR, lch);
-
-			if ((0 == (l & (1 << 24))))
-				l &= ~(1 << 25);
-			else
-				l |= (1 << 25);
-			if (start_dma == 1) {
-				if (0 == (l & (1 << 7))) {
-					l |= (1 << 7);
-					dma_chan[lch].state = DMA_CH_STARTED;
-					pr_debug("starting %d\n", lch);
-					p->dma_write(l, CCR, lch);
-				} else
-					start_dma = 0;
-			} else {
-				if (0 == (l & (1 << 7)))
-					p->dma_write(l, CCR, lch);
-			}
-			dma_chan[lch].flags |= OMAP_DMA_ACTIVE;
-		}
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_dma_chain_a_transfer);
-
-/**
- * @brief omap_start_dma_chain_transfers - Start the chain
- *
- * @param chain_id
- *
- * @return - Success : 0
- * 	     Failure : -EINVAL/-EBUSY
- */
-int omap_start_dma_chain_transfers(int chain_id)
-{
-	int *channels;
-	u32 l, i;
-
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	if (dma_linked_lch[channels[0]].chain_state == DMA_CHAIN_STARTED) {
-		printk(KERN_ERR "Chain is already started\n");
-		return -EBUSY;
-	}
-
-	if (dma_linked_lch[chain_id].chain_mode == OMAP_DMA_STATIC_CHAIN) {
-		for (i = 0; i < dma_linked_lch[chain_id].no_of_lchs_linked;
-									i++) {
-			enable_lnk(channels[i]);
-			omap_enable_channel_irq(channels[i]);
-		}
-	} else {
-		omap_enable_channel_irq(channels[0]);
-	}
-
-	l = p->dma_read(CCR, channels[0]);
-	l |= (1 << 7);
-	dma_linked_lch[chain_id].chain_state = DMA_CHAIN_STARTED;
-	dma_chan[channels[0]].state = DMA_CH_STARTED;
-
-	if ((0 == (l & (1 << 24))))
-		l &= ~(1 << 25);
-	else
-		l |= (1 << 25);
-	p->dma_write(l, CCR, channels[0]);
-
-	dma_chan[channels[0]].flags |= OMAP_DMA_ACTIVE;
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_start_dma_chain_transfers);
-
-/**
- * @brief omap_stop_dma_chain_transfers - Stop the dma transfer of a chain.
- *
- * @param chain_id
- *
- * @return - Success : 0
- * 	     Failure : EINVAL
- */
-int omap_stop_dma_chain_transfers(int chain_id)
-{
-	int *channels;
-	u32 l, i;
-	u32 sys_cf = 0;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	if (IS_DMA_ERRATA(DMA_ERRATA_i88)) {
-		sys_cf = p->dma_read(OCP_SYSCONFIG, 0);
-		l = sys_cf;
-		/* Middle mode reg set no Standby */
-		l &= ~((1 << 12)|(1 << 13));
-		p->dma_write(l, OCP_SYSCONFIG, 0);
-	}
-
-	for (i = 0; i < dma_linked_lch[chain_id].no_of_lchs_linked; i++) {
-
-		/* Stop the Channel transmission */
-		l = p->dma_read(CCR, channels[i]);
-		l &= ~(1 << 7);
-		p->dma_write(l, CCR, channels[i]);
-
-		/* Disable the link in all the channels */
-		disable_lnk(channels[i]);
-		dma_chan[channels[i]].state = DMA_CH_NOTSTARTED;
-
-	}
-	dma_linked_lch[chain_id].chain_state = DMA_CHAIN_NOTSTARTED;
-
-	/* Reset the Queue pointers */
-	OMAP_DMA_CHAIN_QINIT(chain_id);
-
-	if (IS_DMA_ERRATA(DMA_ERRATA_i88))
-		p->dma_write(sys_cf, OCP_SYSCONFIG, 0);
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_stop_dma_chain_transfers);
-
-/* Get the index of the ongoing DMA in chain */
-/**
- * @brief omap_get_dma_chain_index - Get the element and frame index
- * of the ongoing DMA in chain
- *
- * @param chain_id
- * @param ei - Element index
- * @param fi - Frame index
- *
- * @return - Success : 0
- * 	     Failure : -EINVAL
- */
-int omap_get_dma_chain_index(int chain_id, int *ei, int *fi)
-{
-	int lch;
-	int *channels;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-	if ((!ei) || (!fi))
-		return -EINVAL;
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	/* Get the current channel */
-	lch = channels[dma_linked_lch[chain_id].q_head];
-
-	*ei = p->dma_read(CCEN, lch);
-	*fi = p->dma_read(CCFN, lch);
-
-	return 0;
-}
-EXPORT_SYMBOL(omap_get_dma_chain_index);
-
-/**
- * @brief omap_get_dma_chain_dst_pos - Get the destination position of the
- * ongoing DMA in chain
- *
- * @param chain_id
- *
- * @return - Success : Destination position
- * 	     Failure : -EINVAL
- */
-int omap_get_dma_chain_dst_pos(int chain_id)
-{
-	int lch;
-	int *channels;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	/* Get the current channel */
-	lch = channels[dma_linked_lch[chain_id].q_head];
-
-	return p->dma_read(CDAC, lch);
-}
-EXPORT_SYMBOL(omap_get_dma_chain_dst_pos);
-
-/**
- * @brief omap_get_dma_chain_src_pos - Get the source position
- * of the ongoing DMA in chain
- * @param chain_id
- *
- * @return - Success : Destination position
- * 	     Failure : -EINVAL
- */
-int omap_get_dma_chain_src_pos(int chain_id)
-{
-	int lch;
-	int *channels;
-
-	/* Check for input params */
-	if (unlikely((chain_id < 0 || chain_id >= dma_lch_count))) {
-		printk(KERN_ERR "Invalid chain id\n");
-		return -EINVAL;
-	}
-
-	/* Check if the chain exists */
-	if (dma_linked_lch[chain_id].linked_dmach_q == NULL) {
-		printk(KERN_ERR "Chain doesn't exists\n");
-		return -EINVAL;
-	}
-
-	channels = dma_linked_lch[chain_id].linked_dmach_q;
-
-	/* Get the current channel */
-	lch = channels[dma_linked_lch[chain_id].q_head];
-
-	return p->dma_read(CSAC, lch);
-}
-EXPORT_SYMBOL(omap_get_dma_chain_src_pos);
-#endif	/* ifndef CONFIG_ARCH_OMAP1 */
-
 /*----------------------------------------------------------------------------*/
 
 #ifdef CONFIG_ARCH_OMAP1
diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h
index 6f06f8bc612c..e5a70132a240 100644
--- a/include/linux/omap-dma.h
+++ b/include/linux/omap-dma.h
@@ -306,15 +306,12 @@ extern void omap_set_dma_transfer_params(int lch, int data_type,
 					 int elem_count, int frame_count,
 					 int sync_mode,
 					 int dma_trigger, int src_or_dst_synch);
-extern void omap_set_dma_color_mode(int lch, enum omap_dma_color_mode mode,
-				    u32 color);
 extern void omap_set_dma_write_mode(int lch, enum omap_dma_write_mode mode);
 extern void omap_set_dma_channel_mode(int lch, enum omap_dma_channel_mode mode);
 
 extern void omap_set_dma_src_params(int lch, int src_port, int src_amode,
 				    unsigned long src_start,
 				    int src_ei, int src_fi);
-extern void omap_set_dma_src_index(int lch, int eidx, int fidx);
 extern void omap_set_dma_src_data_pack(int lch, int enable);
 extern void omap_set_dma_src_burst_mode(int lch,
 					enum omap_dma_burst_mode burst_mode);
@@ -322,7 +319,6 @@ extern void omap_set_dma_src_burst_mode(int lch,
 extern void omap_set_dma_dest_params(int lch, int dest_port, int dest_amode,
 				     unsigned long dest_start,
 				     int dst_ei, int dst_fi);
-extern void omap_set_dma_dest_index(int lch, int eidx, int fidx);
 extern void omap_set_dma_dest_data_pack(int lch, int enable);
 extern void omap_set_dma_dest_burst_mode(int lch,
 					 enum omap_dma_burst_mode burst_mode);
@@ -331,52 +327,19 @@ extern void omap_set_dma_params(int lch,
 				struct omap_dma_channel_params *params);
 
 extern void omap_dma_link_lch(int lch_head, int lch_queue);
-extern void omap_dma_unlink_lch(int lch_head, int lch_queue);
 
 extern int omap_set_dma_callback(int lch,
 			void (*callback)(int lch, u16 ch_status, void *data),
 			void *data);
 extern dma_addr_t omap_get_dma_src_pos(int lch);
 extern dma_addr_t omap_get_dma_dst_pos(int lch);
-extern void omap_clear_dma(int lch);
 extern int omap_get_dma_active_status(int lch);
 extern int omap_dma_running(void);
 extern void omap_dma_set_global_params(int arb_rate, int max_fifo_depth,
 				       int tparams);
-extern int omap_dma_set_prio_lch(int lch, unsigned char read_prio,
-				 unsigned char write_prio);
-extern void omap_set_dma_dst_endian_type(int lch, enum end_type etype);
-extern void omap_set_dma_src_endian_type(int lch, enum end_type etype);
-extern int omap_get_dma_index(int lch, int *ei, int *fi);
-
 void omap_dma_global_context_save(void);
 void omap_dma_global_context_restore(void);
 
-extern void omap_dma_disable_irq(int lch);
-
-/* Chaining APIs */
-#ifndef CONFIG_ARCH_OMAP1
-extern int omap_request_dma_chain(int dev_id, const char *dev_name,
-				  void (*callback) (int lch, u16 ch_status,
-						    void *data),
-				  int *chain_id, int no_of_chans,
-				  int chain_mode,
-				  struct omap_dma_channel_params params);
-extern int omap_free_dma_chain(int chain_id);
-extern int omap_dma_chain_a_transfer(int chain_id, int src_start,
-				     int dest_start, int elem_count,
-				     int frame_count, void *callbk_data);
-extern int omap_start_dma_chain_transfers(int chain_id);
-extern int omap_stop_dma_chain_transfers(int chain_id);
-extern int omap_get_dma_chain_index(int chain_id, int *ei, int *fi);
-extern int omap_get_dma_chain_dst_pos(int chain_id);
-extern int omap_get_dma_chain_src_pos(int chain_id);
-
-extern int omap_modify_dma_chain_params(int chain_id,
-					struct omap_dma_channel_params params);
-extern int omap_dma_chain_status(int chain_id);
-#endif
-
 #if defined(CONFIG_ARCH_OMAP1) && IS_ENABLED(CONFIG_FB_OMAP)
 #include <mach/lcd_dma.h>
 #else
-- 
cgit v1.2.3


From 971ff49355387fef41d1327434d8939721a4eb35 Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 18 Sep 2014 16:06:19 +0800
Subject: cgroup: use a per-cgroup work for release agent

Instead of using a global work to schedule release agent on removable
cgroups, we change to use a per-cgroup work to do this, which makes
the code much simpler.

v2: use a dedicated work instead of reusing css->destroy_work. (Tejun)

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |  10 ++---
 kernel/cgroup.c        | 108 +++++++++++++++----------------------------------
 2 files changed, 36 insertions(+), 82 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index f7898e0bce1e..51958d0fb88f 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -233,13 +233,6 @@ struct cgroup {
 	 */
 	struct list_head e_csets[CGROUP_SUBSYS_COUNT];
 
-	/*
-	 * Linked list running through all cgroups that can
-	 * potentially be reaped by the release agent. Protected by
-	 * release_list_lock
-	 */
-	struct list_head release_list;
-
 	/*
 	 * list of pidlists, up to two for each namespace (one for procs, one
 	 * for tasks); created on demand.
@@ -249,6 +242,9 @@ struct cgroup {
 
 	/* used to wait for offlining of csses */
 	wait_queue_head_t offline_waitq;
+
+	/* used to schedule release agent */
+	struct work_struct release_agent_work;
 };
 
 #define MAX_CGROUP_ROOT_NAMELEN 64
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 4ddc75588983..db19a4884a7f 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -392,12 +392,7 @@ static int notify_on_release(const struct cgroup *cgrp)
 			;						\
 		else
 
-/* the list of cgroups eligible for automatic release. Protected by
- * release_list_lock */
-static LIST_HEAD(release_list);
-static DEFINE_RAW_SPINLOCK(release_list_lock);
 static void cgroup_release_agent(struct work_struct *work);
-static DECLARE_WORK(release_agent_work, cgroup_release_agent);
 static void check_for_release(struct cgroup *cgrp);
 
 /*
@@ -1577,7 +1572,6 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 	INIT_LIST_HEAD(&cgrp->self.sibling);
 	INIT_LIST_HEAD(&cgrp->self.children);
 	INIT_LIST_HEAD(&cgrp->cset_links);
-	INIT_LIST_HEAD(&cgrp->release_list);
 	INIT_LIST_HEAD(&cgrp->pidlists);
 	mutex_init(&cgrp->pidlist_mutex);
 	cgrp->self.cgroup = cgrp;
@@ -1587,6 +1581,7 @@ static void init_cgroup_housekeeping(struct cgroup *cgrp)
 		INIT_LIST_HEAD(&cgrp->e_csets[ssid]);
 
 	init_waitqueue_head(&cgrp->offline_waitq);
+	INIT_WORK(&cgrp->release_agent_work, cgroup_release_agent);
 }
 
 static void init_cgroup_root(struct cgroup_root *root,
@@ -4342,6 +4337,7 @@ static void css_free_work_fn(struct work_struct *work)
 		/* cgroup free path */
 		atomic_dec(&cgrp->root->nr_cgrps);
 		cgroup_pidlist_destroy_all(cgrp);
+		cancel_work_sync(&cgrp->release_agent_work);
 
 		if (cgroup_parent(cgrp)) {
 			/*
@@ -4804,12 +4800,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	for_each_css(css, ssid, cgrp)
 		kill_css(css);
 
-	/* CSS_ONLINE is clear, remove from ->release_list for the last time */
-	raw_spin_lock(&release_list_lock);
-	if (!list_empty(&cgrp->release_list))
-		list_del_init(&cgrp->release_list);
-	raw_spin_unlock(&release_list_lock);
-
 	/*
 	 * Remove @cgrp directory along with the base files.  @cgrp has an
 	 * extra ref on its kn.
@@ -5271,25 +5261,9 @@ void cgroup_exit(struct task_struct *tsk)
 
 static void check_for_release(struct cgroup *cgrp)
 {
-	if (cgroup_is_releasable(cgrp) && list_empty(&cgrp->cset_links) &&
-	    !css_has_online_children(&cgrp->self)) {
-		/*
-		 * Control Group is currently removeable. If it's not
-		 * already queued for a userspace notification, queue
-		 * it now
-		 */
-		int need_schedule_work = 0;
-
-		raw_spin_lock(&release_list_lock);
-		if (!cgroup_is_dead(cgrp) &&
-		    list_empty(&cgrp->release_list)) {
-			list_add(&cgrp->release_list, &release_list);
-			need_schedule_work = 1;
-		}
-		raw_spin_unlock(&release_list_lock);
-		if (need_schedule_work)
-			schedule_work(&release_agent_work);
-	}
+	if (cgroup_is_releasable(cgrp) && !cgroup_has_tasks(cgrp) &&
+	    !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
+		schedule_work(&cgrp->release_agent_work);
 }
 
 /*
@@ -5317,52 +5291,36 @@ static void check_for_release(struct cgroup *cgrp)
  */
 static void cgroup_release_agent(struct work_struct *work)
 {
-	BUG_ON(work != &release_agent_work);
+	struct cgroup *cgrp =
+		container_of(work, struct cgroup, release_agent_work);
+	char *pathbuf = NULL, *agentbuf = NULL, *path;
+	char *argv[3], *envp[3];
+
 	mutex_lock(&cgroup_mutex);
-	raw_spin_lock(&release_list_lock);
-	while (!list_empty(&release_list)) {
-		char *argv[3], *envp[3];
-		int i;
-		char *pathbuf = NULL, *agentbuf = NULL, *path;
-		struct cgroup *cgrp = list_entry(release_list.next,
-						    struct cgroup,
-						    release_list);
-		list_del_init(&cgrp->release_list);
-		raw_spin_unlock(&release_list_lock);
-		pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
-		if (!pathbuf)
-			goto continue_free;
-		path = cgroup_path(cgrp, pathbuf, PATH_MAX);
-		if (!path)
-			goto continue_free;
-		agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
-		if (!agentbuf)
-			goto continue_free;
-
-		i = 0;
-		argv[i++] = agentbuf;
-		argv[i++] = path;
-		argv[i] = NULL;
-
-		i = 0;
-		/* minimal command environment */
-		envp[i++] = "HOME=/";
-		envp[i++] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
-		envp[i] = NULL;
-
-		/* Drop the lock while we invoke the usermode helper,
-		 * since the exec could involve hitting disk and hence
-		 * be a slow process */
-		mutex_unlock(&cgroup_mutex);
-		call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
-		mutex_lock(&cgroup_mutex);
- continue_free:
-		kfree(pathbuf);
-		kfree(agentbuf);
-		raw_spin_lock(&release_list_lock);
-	}
-	raw_spin_unlock(&release_list_lock);
+
+	pathbuf = kmalloc(PATH_MAX, GFP_KERNEL);
+	agentbuf = kstrdup(cgrp->root->release_agent_path, GFP_KERNEL);
+	if (!pathbuf || !agentbuf)
+		goto out;
+
+	path = cgroup_path(cgrp, pathbuf, PATH_MAX);
+	if (!path)
+		goto out;
+
+	argv[0] = agentbuf;
+	argv[1] = path;
+	argv[2] = NULL;
+
+	/* minimal command environment */
+	envp[0] = "HOME=/";
+	envp[1] = "PATH=/sbin:/bin:/usr/sbin:/usr/bin";
+	envp[2] = NULL;
+
 	mutex_unlock(&cgroup_mutex);
+	call_usermodehelper(argv[0], argv, envp, UMH_WAIT_EXEC);
+out:
+	kfree(agentbuf);
+	kfree(pathbuf);
 }
 
 static int __init cgroup_disable(char *str)
-- 
cgit v1.2.3


From 006f4ac49742b5f70ef7e39176fd42a500144ccc Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 18 Sep 2014 16:03:15 +0800
Subject: cgroup: simplify proc_cgroup_show()

Use the ONE macro instead of REG, and we can simplify proc_cgroup_show().

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/proc/base.c         | 19 ++-----------------
 include/linux/cgroup.h |  3 ++-
 kernel/cgroup.c        | 18 +++---------------
 3 files changed, 7 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index baf852b648ad..6b96892015ec 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -376,21 +376,6 @@ static const struct file_operations proc_lstats_operations = {
 
 #endif
 
-#ifdef CONFIG_CGROUPS
-static int cgroup_open(struct inode *inode, struct file *file)
-{
-	struct pid *pid = PROC_I(inode)->pid;
-	return single_open(file, proc_cgroup_show, pid);
-}
-
-static const struct file_operations proc_cgroup_operations = {
-	.open		= cgroup_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif
-
 #ifdef CONFIG_PROC_PID_CPUSET
 
 static int cpuset_open(struct inode *inode, struct file *file)
@@ -2576,7 +2561,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("cpuset",     S_IRUGO, proc_cpuset_operations),
 #endif
 #ifdef CONFIG_CGROUPS
-	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
+	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
 #endif
 	ONE("oom_score",  S_IRUGO, proc_oom_score),
 	REG("oom_adj",    S_IRUGO|S_IWUSR, proc_oom_adj_operations),
@@ -2922,7 +2907,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("cpuset",    S_IRUGO, proc_cpuset_operations),
 #endif
 #ifdef CONFIG_CGROUPS
-	REG("cgroup",  S_IRUGO, proc_cgroup_operations),
+	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
 #endif
 	ONE("oom_score", S_IRUGO, proc_oom_score),
 	REG("oom_adj",   S_IRUGO|S_IWUSR, proc_oom_adj_operations),
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 51958d0fb88f..77a1d37b742b 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -37,7 +37,8 @@ extern void cgroup_exit(struct task_struct *p);
 extern int cgroupstats_build(struct cgroupstats *stats,
 				struct dentry *dentry);
 
-extern int proc_cgroup_show(struct seq_file *, void *);
+extern int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+			    struct pid *pid, struct task_struct *tsk);
 
 /* define the enumeration of all cgroup subsystems */
 #define SUBSYS(_x) _x ## _cgrp_id,
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index db19a4884a7f..df7733b48d2e 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -5030,12 +5030,9 @@ core_initcall(cgroup_wq_init);
  *  - Print task's cgroup paths into seq_file, one line for each hierarchy
  *  - Used for /proc/<pid>/cgroup.
  */
-
-/* TODO: Use a proper seq_file iterator */
-int proc_cgroup_show(struct seq_file *m, void *v)
+int proc_cgroup_show(struct seq_file *m, struct pid_namespace *ns,
+		     struct pid *pid, struct task_struct *tsk)
 {
-	struct pid *pid;
-	struct task_struct *tsk;
 	char *buf, *path;
 	int retval;
 	struct cgroup_root *root;
@@ -5045,14 +5042,6 @@ int proc_cgroup_show(struct seq_file *m, void *v)
 	if (!buf)
 		goto out;
 
-	retval = -ESRCH;
-	pid = m->private;
-	tsk = get_pid_task(pid, PIDTYPE_PID);
-	if (!tsk)
-		goto out_free;
-
-	retval = 0;
-
 	mutex_lock(&cgroup_mutex);
 	down_read(&css_set_rwsem);
 
@@ -5082,11 +5071,10 @@ int proc_cgroup_show(struct seq_file *m, void *v)
 		seq_putc(m, '\n');
 	}
 
+	retval = 0;
 out_unlock:
 	up_read(&css_set_rwsem);
 	mutex_unlock(&cgroup_mutex);
-	put_task_struct(tsk);
-out_free:
 	kfree(buf);
 out:
 	return retval;
-- 
cgit v1.2.3


From 52de4779f201758ddcf37360f09a16895756e708 Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Thu, 18 Sep 2014 16:03:36 +0800
Subject: cpuset: simplify proc_cpuset_show()

Use the ONE macro instead of REG, and we can simplify proc_cpuset_show().

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 fs/proc/base.c         | 20 ++------------------
 include/linux/cpuset.h |  3 ++-
 kernel/cpuset.c        | 15 +++------------
 3 files changed, 7 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 6b96892015ec..4e8aa35fc3eb 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -376,22 +376,6 @@ static const struct file_operations proc_lstats_operations = {
 
 #endif
 
-#ifdef CONFIG_PROC_PID_CPUSET
-
-static int cpuset_open(struct inode *inode, struct file *file)
-{
-	struct pid *pid = PROC_I(inode)->pid;
-	return single_open(file, proc_cpuset_show, pid);
-}
-
-static const struct file_operations proc_cpuset_operations = {
-	.open		= cpuset_open,
-	.read		= seq_read,
-	.llseek		= seq_lseek,
-	.release	= single_release,
-};
-#endif
-
 static int proc_oom_score(struct seq_file *m, struct pid_namespace *ns,
 			  struct pid *pid, struct task_struct *task)
 {
@@ -2558,7 +2542,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("latency",  S_IRUGO, proc_lstats_operations),
 #endif
 #ifdef CONFIG_PROC_PID_CPUSET
-	REG("cpuset",     S_IRUGO, proc_cpuset_operations),
+	ONE("cpuset",     S_IRUGO, proc_cpuset_show),
 #endif
 #ifdef CONFIG_CGROUPS
 	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
@@ -2904,7 +2888,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("latency",  S_IRUGO, proc_lstats_operations),
 #endif
 #ifdef CONFIG_PROC_PID_CPUSET
-	REG("cpuset",    S_IRUGO, proc_cpuset_operations),
+	ONE("cpuset",    S_IRUGO, proc_cpuset_show),
 #endif
 #ifdef CONFIG_CGROUPS
 	ONE("cgroup",  S_IRUGO, proc_cgroup_show),
diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index ade2390ffe92..0d4e0675b318 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -86,7 +86,8 @@ extern void __cpuset_memory_pressure_bump(void);
 
 extern void cpuset_task_status_allowed(struct seq_file *m,
 					struct task_struct *task);
-extern int proc_cpuset_show(struct seq_file *, void *);
+extern int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+			    struct pid *pid, struct task_struct *tsk);
 
 extern int cpuset_mem_spread_node(void);
 extern int cpuset_slab_spread_node(void);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 22874d7cf2c0..a37f4ed24867 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2729,10 +2729,9 @@ void __cpuset_memory_pressure_bump(void)
  *    and we take cpuset_mutex, keeping cpuset_attach() from changing it
  *    anyway.
  */
-int proc_cpuset_show(struct seq_file *m, void *unused_v)
+int proc_cpuset_show(struct seq_file *m, struct pid_namespace *ns,
+		     struct pid *pid, struct task_struct *tsk)
 {
-	struct pid *pid;
-	struct task_struct *tsk;
 	char *buf, *p;
 	struct cgroup_subsys_state *css;
 	int retval;
@@ -2742,24 +2741,16 @@ int proc_cpuset_show(struct seq_file *m, void *unused_v)
 	if (!buf)
 		goto out;
 
-	retval = -ESRCH;
-	pid = m->private;
-	tsk = get_pid_task(pid, PIDTYPE_PID);
-	if (!tsk)
-		goto out_free;
-
 	retval = -ENAMETOOLONG;
 	rcu_read_lock();
 	css = task_css(tsk, cpuset_cgrp_id);
 	p = cgroup_path(css->cgroup, buf, PATH_MAX);
 	rcu_read_unlock();
 	if (!p)
-		goto out_put_task;
+		goto out_free;
 	seq_puts(m, p);
 	seq_putc(m, '\n');
 	retval = 0;
-out_put_task:
-	put_task_struct(tsk);
 out_free:
 	kfree(buf);
 out:
-- 
cgit v1.2.3


From 69803d4f487fc60ce740f1fe1f0d2092d97277b6 Mon Sep 17 00:00:00 2001
From: Grégory Soutadé <gsoutade@neotion.com>
Date: Mon, 15 Sep 2014 17:47:09 +0200
Subject: mmc: Replace "enhanced_area_en" attribute by
 "partition_setting_completed"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace ext_csd "enhanced_area_en" attribute by
 "partition_setting_completed". It was used whether or
 not enhanced user area is defined and without checks of
 EXT_CSD_PARTITION_SETTING_COMPLETED bit.

Signed-off-by: Grégory Soutadé <gsoutade@neotion.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c   | 13 ++++++++-----
 include/linux/mmc/card.h |  2 +-
 include/linux/mmc/mmc.h  |  2 ++
 3 files changed, 11 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index 5b4b969c18bf..eb78fcd01e52 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -315,7 +315,6 @@ static void mmc_manage_enhanced_area(struct mmc_card *card, u8 *ext_csd)
 		hc_wp_grp_sz =
 			ext_csd[EXT_CSD_HC_WP_GRP_SIZE];
 
-		card->ext_csd.enhanced_area_en = 1;
 		/*
 		 * calculate the enhanced data area offset, in bytes
 		 */
@@ -356,13 +355,11 @@ static void mmc_manage_gp_partitions(struct mmc_card *card, u8 *ext_csd)
 	 */
 	if (ext_csd[EXT_CSD_PARTITION_SUPPORT] &
 	    EXT_CSD_PART_SUPPORT_PART_EN) {
-		if (card->ext_csd.enhanced_area_en != 1) {
+		if (card->ext_csd.partition_setting_completed) {
 			hc_erase_grp_sz =
 				ext_csd[EXT_CSD_HC_ERASE_GRP_SIZE];
 			hc_wp_grp_sz =
 				ext_csd[EXT_CSD_HC_WP_GRP_SIZE];
-
-			card->ext_csd.enhanced_area_en = 1;
 		}
 
 		for (idx = 0; idx < MMC_NUM_GP_PARTITION; idx++) {
@@ -489,6 +486,12 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		ext_csd[EXT_CSD_TRIM_MULT];
 	card->ext_csd.raw_partition_support = ext_csd[EXT_CSD_PARTITION_SUPPORT];
 	if (card->ext_csd.rev >= 4) {
+		if (ext_csd[EXT_CSD_PARTITION_SETTING_COMPLETED] &
+		    EXT_CSD_PART_SETTING_COMPLETED)
+			card->ext_csd.partition_setting_completed = 1;
+		else
+			card->ext_csd.partition_setting_completed = 0;
+
 		mmc_manage_enhanced_area(card, ext_csd);
 
 		mmc_manage_gp_partitions(card, ext_csd);
@@ -1348,7 +1351,7 @@ static int mmc_init_card(struct mmc_host *host, u32 ocr,
 	 * If enhanced_area_en is TRUE, host needs to enable ERASE_GRP_DEF
 	 * bit.  This bit will be lost every time after a reset or power off.
 	 */
-	if (card->ext_csd.enhanced_area_en ||
+	if (card->ext_csd.partition_setting_completed ||
 	    (card->ext_csd.rev >= 3 && (host->caps2 & MMC_CAP2_HC_ERASE_SZ))) {
 		err = mmc_switch(card, EXT_CSD_CMD_SET_NORMAL,
 				 EXT_CSD_ERASE_GROUP_DEF, 1,
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 0ea795f5feb9..b0692d28f8e6 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -75,7 +75,7 @@ struct mmc_ext_csd {
 	unsigned int		sec_trim_mult;	/* Secure trim multiplier  */
 	unsigned int		sec_erase_mult;	/* Secure erase multiplier */
 	unsigned int		trim_timeout;		/* In milliseconds */
-	bool			enhanced_area_en;	/* enable bit */
+	bool			partition_setting_completed;	/* enable bit */
 	unsigned long long	enhanced_area_offset;	/* Units: Byte */
 	unsigned int		enhanced_area_size;	/* Units: KB */
 	unsigned int		cache_size;		/* Units: KB */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 64ec963ed347..78753bc90a87 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -281,6 +281,7 @@ struct _mmc_csd {
 #define EXT_CSD_EXP_EVENTS_CTRL		56	/* R/W, 2 bytes */
 #define EXT_CSD_DATA_SECTOR_SIZE	61	/* R */
 #define EXT_CSD_GP_SIZE_MULT		143	/* R/W */
+#define EXT_CSD_PARTITION_SETTING_COMPLETED 155	/* R/W */
 #define EXT_CSD_PARTITION_ATTRIBUTE	156	/* R/W */
 #define EXT_CSD_PARTITION_SUPPORT	160	/* RO */
 #define EXT_CSD_HPI_MGMT		161	/* R/W */
@@ -349,6 +350,7 @@ struct _mmc_csd {
 #define EXT_CSD_PART_CONFIG_ACC_RPMB	(0x3)
 #define EXT_CSD_PART_CONFIG_ACC_GP0	(0x4)
 
+#define EXT_CSD_PART_SETTING_COMPLETED	(0x1)
 #define EXT_CSD_PART_SUPPORT_PART_EN	(0x1)
 
 #define EXT_CSD_CMD_SET_NORMAL		(1<<0)
-- 
cgit v1.2.3


From dd56af42bd829c6e770ed69812bd65a04eaeb1e4 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 25 Aug 2014 20:25:06 -0700
Subject: rcu: Eliminate deadlock between CPU hotplug and expedited grace
 periods

Currently, the expedited grace-period primitives do get_online_cpus().
This greatly simplifies their implementation, but means that calls
to them holding locks that are acquired by CPU-hotplug notifiers (to
say nothing of calls to these primitives from CPU-hotplug notifiers)
can deadlock.  But this is starting to become inconvenient, as can be
seen here: https://lkml.org/lkml/2014/8/5/754.  The problem in this
case is that some developers need to acquire a mutex from a CPU-hotplug
notifier, but also need to hold it across a synchronize_rcu_expedited().
As noted above, this currently results in deadlock.

This commit avoids the deadlock and retains the simplicity by creating
a try_get_online_cpus(), which returns false if the get_online_cpus()
reference count could not immediately be incremented.  If a call to
try_get_online_cpus() returns true, the expedited primitives operate as
before.  If a call returns false, the expedited primitives fall back to
normal grace-period operations.  This falling back of course results in
increased grace-period latency, but only during times when CPU hotplug
operations are actually in flight.  The effect should therefore be
negligible during normal operation.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Josh Triplett <josh@joshtriplett.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Tested-by: Lan Tianyu <tianyu.lan@intel.com>
---
 include/linux/cpu.h      |  2 ++
 include/linux/lockdep.h  |  1 +
 kernel/cpu.c             | 16 +++++++++++++++-
 kernel/rcu/tree.c        | 19 ++++++++++++-------
 kernel/rcu/tree_plugin.h | 11 +++++------
 5 files changed, 35 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index 95978ad7fcdd..b2d9a43012b2 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -213,6 +213,7 @@ extern struct bus_type cpu_subsys;
 extern void cpu_hotplug_begin(void);
 extern void cpu_hotplug_done(void);
 extern void get_online_cpus(void);
+extern bool try_get_online_cpus(void);
 extern void put_online_cpus(void);
 extern void cpu_hotplug_disable(void);
 extern void cpu_hotplug_enable(void);
@@ -230,6 +231,7 @@ int cpu_down(unsigned int cpu);
 static inline void cpu_hotplug_begin(void) {}
 static inline void cpu_hotplug_done(void) {}
 #define get_online_cpus()	do { } while (0)
+#define try_get_online_cpus()	true
 #define put_online_cpus()	do { } while (0)
 #define cpu_hotplug_disable()	do { } while (0)
 #define cpu_hotplug_enable()	do { } while (0)
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f920d7..4f86465cc317 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -505,6 +505,7 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 
 #define lock_map_acquire(l)			lock_acquire_exclusive(l, 0, 0, NULL, _THIS_IP_)
 #define lock_map_acquire_read(l)		lock_acquire_shared_recursive(l, 0, 0, NULL, _THIS_IP_)
+#define lock_map_acquire_tryread(l)		lock_acquire_shared_recursive(l, 0, 1, NULL, _THIS_IP_)
 #define lock_map_release(l)			lock_release(l, 1, _THIS_IP_)
 
 #ifdef CONFIG_PROVE_LOCKING
diff --git a/kernel/cpu.c b/kernel/cpu.c
index 81e2a388a0f6..356450f09c1f 100644
--- a/kernel/cpu.c
+++ b/kernel/cpu.c
@@ -79,6 +79,8 @@ static struct {
 
 /* Lockdep annotations for get/put_online_cpus() and cpu_hotplug_begin/end() */
 #define cpuhp_lock_acquire_read() lock_map_acquire_read(&cpu_hotplug.dep_map)
+#define cpuhp_lock_acquire_tryread() \
+				  lock_map_acquire_tryread(&cpu_hotplug.dep_map)
 #define cpuhp_lock_acquire()      lock_map_acquire(&cpu_hotplug.dep_map)
 #define cpuhp_lock_release()      lock_map_release(&cpu_hotplug.dep_map)
 
@@ -91,10 +93,22 @@ void get_online_cpus(void)
 	mutex_lock(&cpu_hotplug.lock);
 	cpu_hotplug.refcount++;
 	mutex_unlock(&cpu_hotplug.lock);
-
 }
 EXPORT_SYMBOL_GPL(get_online_cpus);
 
+bool try_get_online_cpus(void)
+{
+	if (cpu_hotplug.active_writer == current)
+		return true;
+	if (!mutex_trylock(&cpu_hotplug.lock))
+		return false;
+	cpuhp_lock_acquire_tryread();
+	cpu_hotplug.refcount++;
+	mutex_unlock(&cpu_hotplug.lock);
+	return true;
+}
+EXPORT_SYMBOL_GPL(try_get_online_cpus);
+
 void put_online_cpus(void)
 {
 	if (cpu_hotplug.active_writer == current)
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d7a3b13bc94c..133e47223095 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2940,11 +2940,6 @@ static int synchronize_sched_expedited_cpu_stop(void *data)
  * restructure your code to batch your updates, and then use a single
  * synchronize_sched() instead.
  *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
- *
  * This implementation can be thought of as an application of ticket
  * locking to RCU, with sync_sched_expedited_started and
  * sync_sched_expedited_done taking on the roles of the halves
@@ -2994,7 +2989,12 @@ void synchronize_sched_expedited(void)
 	 */
 	snap = atomic_long_inc_return(&rsp->expedited_start);
 	firstsnap = snap;
-	get_online_cpus();
+	if (!try_get_online_cpus()) {
+		/* CPU hotplug operation in flight, fall back to normal GP. */
+		wait_rcu_gp(call_rcu_sched);
+		atomic_long_inc(&rsp->expedited_normal);
+		return;
+	}
 	WARN_ON_ONCE(cpu_is_offline(raw_smp_processor_id()));
 
 	/*
@@ -3041,7 +3041,12 @@ void synchronize_sched_expedited(void)
 		 * and they started after our first try, so their grace
 		 * period works for us.
 		 */
-		get_online_cpus();
+		if (!try_get_online_cpus()) {
+			/* CPU hotplug operation in flight, use normal GP. */
+			wait_rcu_gp(call_rcu_sched);
+			atomic_long_inc(&rsp->expedited_normal);
+			return;
+		}
 		snap = atomic_long_read(&rsp->expedited_start);
 		smp_mb(); /* ensure read is before try_stop_cpus(). */
 	}
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index e2c5910546f6..387dd4599344 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -793,11 +793,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
  * In fact, if you are using synchronize_rcu_expedited() in a loop,
  * please restructure your code to batch your updates, and then Use a
  * single synchronize_rcu() instead.
- *
- * Note that it is illegal to call this function while holding any lock
- * that is acquired by a CPU-hotplug notifier.  And yes, it is also illegal
- * to call this function from a CPU-hotplug notifier.  Failing to observe
- * these restriction will result in deadlock.
  */
 void synchronize_rcu_expedited(void)
 {
@@ -819,7 +814,11 @@ void synchronize_rcu_expedited(void)
 	 * being boosted.  This simplifies the process of moving tasks
 	 * from leaf to root rcu_node structures.
 	 */
-	get_online_cpus();
+	if (!try_get_online_cpus()) {
+		/* CPU-hotplug operation in flight, fall back to normal GP. */
+		wait_rcu_gp(call_rcu);
+		return;
+	}
 
 	/*
 	 * Acquire lock, falling back to synchronize_rcu() if too many
-- 
cgit v1.2.3


From f6be8af1c95de4a46e325e728900a70ceadb52cf Mon Sep 17 00:00:00 2001
From: Chuansheng Liu <chuansheng.liu@intel.com>
Date: Thu, 4 Sep 2014 15:17:53 +0800
Subject: sched: Add new API wake_up_if_idle() to wake up the idle cpu

Implementing one new API wake_up_if_idle(), which is used to
wake up the idle CPU.

Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Chuansheng Liu <chuansheng.liu@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: daniel.lezcano@linaro.org
Cc: rjw@rjwysocki.net
Cc: linux-pm@vger.kernel.org
Cc: changcheng.liu@intel.com
Cc: xiaoming.wang@intel.com
Cc: souvik.k.chakravarty@intel.com
Cc: chuansheng.liu@intel.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/1409815075-4180-1-git-send-email-chuansheng.liu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  1 +
 kernel/sched/core.c   | 19 +++++++++++++++++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index dd9eb4807389..82ff3d6efb19 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1024,6 +1024,7 @@ struct sched_domain_topology_level {
 extern struct sched_domain_topology_level *sched_domain_topology;
 
 extern void set_sched_topology(struct sched_domain_topology_level *tl);
+extern void wake_up_if_idle(int cpu);
 
 #ifdef CONFIG_SCHED_DEBUG
 # define SD_INIT_NAME(type)		.name = #type
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 78e5c839df13..f7c6ed2fd69d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1634,6 +1634,25 @@ static void ttwu_queue_remote(struct task_struct *p, int cpu)
 	}
 }
 
+void wake_up_if_idle(int cpu)
+{
+	struct rq *rq = cpu_rq(cpu);
+	unsigned long flags;
+
+	if (!is_idle_task(rq->curr))
+		return;
+
+	if (set_nr_if_polling(rq->idle)) {
+		trace_sched_wake_idle_without_ipi(cpu);
+	} else {
+		raw_spin_lock_irqsave(&rq->lock, flags);
+		if (is_idle_task(rq->curr))
+			smp_send_reschedule(cpu);
+		/* Else cpu is not in idle, do nothing here */
+		raw_spin_unlock_irqrestore(&rq->lock, flags);
+	}
+}
+
 bool cpus_share_cache(int this_cpu, int that_cpu)
 {
 	return per_cpu(sd_llc_id, this_cpu) == per_cpu(sd_llc_id, that_cpu);
-- 
cgit v1.2.3


From c6f4459fc3ba532e896cb678e29b45cb985f82bf Mon Sep 17 00:00:00 2001
From: Chuansheng Liu <chuansheng.liu@intel.com>
Date: Thu, 4 Sep 2014 15:17:54 +0800
Subject: smp: Add new wake_up_all_idle_cpus() function

Currently kick_all_cpus_sync() can break non-polling idle cpus
thru IPI interrupts.

But sometimes we need to break the polling idle cpus immediately
to reselect the suitable c-state, also for non-idle cpus, we need
to do nothing if we try to wake up them.

Here adding one new function wake_up_all_idle_cpus() to let all cpus
out of idle based on function wake_up_if_idle().

Signed-off-by: Chuansheng Liu <chuansheng.liu@intel.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: daniel.lezcano@linaro.org
Cc: rjw@rjwysocki.net
Cc: linux-pm@vger.kernel.org
Cc: changcheng.liu@intel.com
Cc: xiaoming.wang@intel.com
Cc: souvik.k.chakravarty@intel.com
Cc: luto@amacapital.net
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Geert Uytterhoeven <geert+renesas@glider.be>
Cc: Jan Kara <jack@suse.cz>
Cc: Jens Axboe <axboe@fb.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Paul Gortmaker <paul.gortmaker@windriver.com>
Cc: Roman Gushchin <klamm@yandex-team.ru>
Cc: Srivatsa S. Bhat <srivatsa.bhat@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/1409815075-4180-2-git-send-email-chuansheng.liu@intel.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/smp.h |  2 ++
 kernel/smp.c        | 22 ++++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/smp.h b/include/linux/smp.h
index 34347f26be9b..93dff5fff524 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -100,6 +100,7 @@ int smp_call_function_any(const struct cpumask *mask,
 			  smp_call_func_t func, void *info, int wait);
 
 void kick_all_cpus_sync(void);
+void wake_up_all_idle_cpus(void);
 
 /*
  * Generic and arch helpers
@@ -148,6 +149,7 @@ smp_call_function_any(const struct cpumask *mask, smp_call_func_t func,
 }
 
 static inline void kick_all_cpus_sync(void) {  }
+static inline void wake_up_all_idle_cpus(void) {  }
 
 #endif /* !SMP */
 
diff --git a/kernel/smp.c b/kernel/smp.c
index aff8aa14f547..9e0d0b289118 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -13,6 +13,7 @@
 #include <linux/gfp.h>
 #include <linux/smp.h>
 #include <linux/cpu.h>
+#include <linux/sched.h>
 
 #include "smpboot.h"
 
@@ -699,3 +700,24 @@ void kick_all_cpus_sync(void)
 	smp_call_function(do_nothing, NULL, 1);
 }
 EXPORT_SYMBOL_GPL(kick_all_cpus_sync);
+
+/**
+ * wake_up_all_idle_cpus - break all cpus out of idle
+ * wake_up_all_idle_cpus try to break all cpus which is in idle state even
+ * including idle polling cpus, for non-idle cpus, we will do nothing
+ * for them.
+ */
+void wake_up_all_idle_cpus(void)
+{
+	int cpu;
+
+	preempt_disable();
+	for_each_online_cpu(cpu) {
+		if (cpu == smp_processor_id())
+			continue;
+
+		wake_up_if_idle(cpu);
+	}
+	preempt_enable();
+}
+EXPORT_SYMBOL_GPL(wake_up_all_idle_cpus);
-- 
cgit v1.2.3


From ef8ac06359ddf95431cf6bb04ad2b36fff562328 Mon Sep 17 00:00:00 2001
From: Rik van Riel <riel@redhat.com>
Date: Fri, 12 Sep 2014 09:12:14 -0400
Subject: seqlock: Add irqsave variant of read_seqbegin_or_lock()

There are cases where read_seqbegin_or_lock() needs to block irqs,
because the seqlock in question nests inside a lock that is also
be taken from irq context.

Add read_seqbegin_or_lock_irqsave() and done_seqretry_irqrestore(), which
are almost identical to read_seqbegin_or_lock() and done_seqretry().

Signed-off-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: prarit@redhat.com
Cc: oleg@redhat.com
Cc: sgruszka@redhat.com
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com>
Cc: Stephen Boyd <sboyd@codeaurora.org>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Link: http://lkml.kernel.org/r/1410527535-9814-2-git-send-email-riel@redhat.com
[ Improved the readability of the code a bit. ]
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/seqlock.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index cc359636cfa3..f5df8f687b4d 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -456,4 +456,23 @@ read_sequnlock_excl_irqrestore(seqlock_t *sl, unsigned long flags)
 	spin_unlock_irqrestore(&sl->lock, flags);
 }
 
+static inline unsigned long
+read_seqbegin_or_lock_irqsave(seqlock_t *lock, int *seq)
+{
+	unsigned long flags = 0;
+
+	if (!(*seq & 1))	/* Even */
+		*seq = read_seqbegin(lock);
+	else			/* Odd */
+		read_seqlock_excl_irqsave(lock, flags);
+
+	return flags;
+}
+
+static inline void
+done_seqretry_irqrestore(seqlock_t *lock, int seq, unsigned long flags)
+{
+	if (seq & 1)
+		read_sequnlock_excl_irqrestore(lock, flags);
+}
 #endif /* __LINUX_SEQLOCK_H */
-- 
cgit v1.2.3


From d4311ff1a8da48d609db9500f121c15580dfeeb7 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Fri, 12 Sep 2014 14:16:17 +0100
Subject: init/main.c: Give init_task a canary

Tasks get their end of stack set to STACK_END_MAGIC with the
aim to catch stack overruns. Currently this feature does not
apply to init_task. This patch removes this restriction.

Note that a similar patch was posted by Prarit Bhargava
some time ago but was never merged:

  http://marc.info/?l=linux-kernel&m=127144305403241&w=2

Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Acked-by: Oleg Nesterov <oleg@redhat.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: aneesh.kumar@linux.vnet.ibm.com
Cc: dzickus@redhat.com
Cc: bmr@redhat.com
Cc: jcastillo@redhat.com
Cc: jgh@redhat.com
Cc: minchan@kernel.org
Cc: tglx@linutronix.de
Cc: hannes@cmpxchg.org
Cc: Alex Thorlton <athorlton@sgi.com>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Daeseok Youn <daeseok.youn@gmail.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Fabian Frederick <fabf@skynet.be>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1410527779-8133-2-git-send-email-atomlin@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/mm/fault.c    |  3 +--
 arch/x86/mm/fault.c        |  3 +--
 include/linux/sched.h      |  2 ++
 init/main.c                |  1 +
 kernel/fork.c              | 12 +++++++++---
 kernel/trace/trace_stack.c |  4 +---
 6 files changed, 15 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 51ab9e7e6c39..35d0760c3fa4 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -30,7 +30,6 @@
 #include <linux/kprobes.h>
 #include <linux/kdebug.h>
 #include <linux/perf_event.h>
-#include <linux/magic.h>
 #include <linux/ratelimit.h>
 #include <linux/context_tracking.h>
 
@@ -538,7 +537,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 		regs->nip);
 
 	stackend = end_of_stack(current);
-	if (current != &init_task && *stackend != STACK_END_MAGIC)
+	if (*stackend != STACK_END_MAGIC)
 		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
 
 	die("Kernel access of bad area", regs, sig);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index a24194681513..bc23a7043c65 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -3,7 +3,6 @@
  *  Copyright (C) 2001, 2002 Andi Kleen, SuSE Labs.
  *  Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar
  */
-#include <linux/magic.h>		/* STACK_END_MAGIC		*/
 #include <linux/sched.h>		/* test_thread_flag(), ...	*/
 #include <linux/kdebug.h>		/* oops_begin/end, ...		*/
 #include <linux/module.h>		/* search_exception_table	*/
@@ -710,7 +709,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	show_fault_oops(regs, error_code, address);
 
 	stackend = end_of_stack(tsk);
-	if (tsk != &init_task && *stackend != STACK_END_MAGIC)
+	if (*stackend != STACK_END_MAGIC)
 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
 
 	tsk->thread.cr2		= address;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 82ff3d6efb19..118dca7d5a28 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -57,6 +57,7 @@ struct sched_param {
 #include <linux/llist.h>
 #include <linux/uidgid.h>
 #include <linux/gfp.h>
+#include <linux/magic.h>
 
 #include <asm/processor.h>
 
@@ -2638,6 +2639,7 @@ static inline unsigned long stack_not_used(struct task_struct *p)
 	return (unsigned long)n - (unsigned long)end_of_stack(p);
 }
 #endif
+extern void set_task_stack_end_magic(struct task_struct *tsk);
 
 /* set thread flags in other task's structures
  * - see asm/thread_info.h for TIF_xxxx flags available
diff --git a/init/main.c b/init/main.c
index bb1aed928f21..5fc3fc7bd475 100644
--- a/init/main.c
+++ b/init/main.c
@@ -508,6 +508,7 @@ asmlinkage __visible void __init start_kernel(void)
 	 * lockdep hash:
 	 */
 	lockdep_init();
+	set_task_stack_end_magic(&init_task);
 	smp_setup_processor_id();
 	debug_objects_early_init();
 
diff --git a/kernel/fork.c b/kernel/fork.c
index 9387ae8ab048..ad64248c4b18 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -294,11 +294,18 @@ int __weak arch_dup_task_struct(struct task_struct *dst,
 	return 0;
 }
 
+void set_task_stack_end_magic(struct task_struct *tsk)
+{
+	unsigned long *stackend;
+
+	stackend = end_of_stack(tsk);
+	*stackend = STACK_END_MAGIC;	/* for overflow detection */
+}
+
 static struct task_struct *dup_task_struct(struct task_struct *orig)
 {
 	struct task_struct *tsk;
 	struct thread_info *ti;
-	unsigned long *stackend;
 	int node = tsk_fork_get_node(orig);
 	int err;
 
@@ -328,8 +335,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig)
 	setup_thread_stack(tsk, orig);
 	clear_user_return_notifier(tsk);
 	clear_tsk_need_resched(tsk);
-	stackend = end_of_stack(tsk);
-	*stackend = STACK_END_MAGIC;	/* for overflow detection */
+	set_task_stack_end_magic(tsk);
 
 #ifdef CONFIG_CC_STACKPROTECTOR
 	tsk->stack_canary = get_random_int();
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 8a4e5cb66a4c..1636e41828c2 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -13,7 +13,6 @@
 #include <linux/sysctl.h>
 #include <linux/init.h>
 #include <linux/fs.h>
-#include <linux/magic.h>
 
 #include <asm/setup.h>
 
@@ -171,8 +170,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 			i++;
 	}
 
-	if ((current != &init_task &&
-		*(end_of_stack(current)) != STACK_END_MAGIC)) {
+	if (*end_of_stack(current) != STACK_END_MAGIC) {
 		print_max_stack();
 		BUG();
 	}
-- 
cgit v1.2.3


From a70857e46dd13e87ae06bf0e64cb6a2d4f436265 Mon Sep 17 00:00:00 2001
From: Aaron Tomlin <atomlin@redhat.com>
Date: Fri, 12 Sep 2014 14:16:18 +0100
Subject: sched: Add helper for task stack page overrun checking

This facility is used in a few places so let's introduce
a helper function to improve code readability.

Signed-off-by: Aaron Tomlin <atomlin@redhat.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: aneesh.kumar@linux.vnet.ibm.com
Cc: dzickus@redhat.com
Cc: bmr@redhat.com
Cc: jcastillo@redhat.com
Cc: oleg@redhat.com
Cc: riel@redhat.com
Cc: prarit@redhat.com
Cc: jgh@redhat.com
Cc: minchan@kernel.org
Cc: mpe@ellerman.id.au
Cc: tglx@linutronix.de
Cc: hannes@cmpxchg.org
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Jiri Olsa <jolsa@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Seiji Aguchi <seiji.aguchi@hds.com>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: linuxppc-dev@lists.ozlabs.org
Link: http://lkml.kernel.org/r/1410527779-8133-3-git-send-email-atomlin@redhat.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/powerpc/mm/fault.c    | 4 +---
 arch/x86/mm/fault.c        | 4 +---
 include/linux/sched.h      | 2 ++
 kernel/trace/trace_stack.c | 2 +-
 4 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index 35d0760c3fa4..99b2f2775658 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -507,7 +507,6 @@ bail:
 void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 {
 	const struct exception_table_entry *entry;
-	unsigned long *stackend;
 
 	/* Are we prepared to handle this fault?  */
 	if ((entry = search_exception_tables(regs->nip)) != NULL) {
@@ -536,8 +535,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig)
 	printk(KERN_ALERT "Faulting instruction address: 0x%08lx\n",
 		regs->nip);
 
-	stackend = end_of_stack(current);
-	if (*stackend != STACK_END_MAGIC)
+	if (task_stack_end_corrupted(current))
 		printk(KERN_ALERT "Thread overran stack, or stack corrupted\n");
 
 	die("Kernel access of bad area", regs, sig);
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index bc23a7043c65..6240bc7ae741 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -648,7 +648,6 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 	   unsigned long address, int signal, int si_code)
 {
 	struct task_struct *tsk = current;
-	unsigned long *stackend;
 	unsigned long flags;
 	int sig;
 
@@ -708,8 +707,7 @@ no_context(struct pt_regs *regs, unsigned long error_code,
 
 	show_fault_oops(regs, error_code, address);
 
-	stackend = end_of_stack(tsk);
-	if (*stackend != STACK_END_MAGIC)
+	if (task_stack_end_corrupted(tsk))
 		printk(KERN_EMERG "Thread overran stack, or stack corrupted\n");
 
 	tsk->thread.cr2		= address;
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 118dca7d5a28..18f52624eaa6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2617,6 +2617,8 @@ static inline unsigned long *end_of_stack(struct task_struct *p)
 }
 
 #endif
+#define task_stack_end_corrupted(task) \
+		(*(end_of_stack(task)) != STACK_END_MAGIC)
 
 static inline int object_is_on_stack(void *obj)
 {
diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c
index 1636e41828c2..16eddb308c33 100644
--- a/kernel/trace/trace_stack.c
+++ b/kernel/trace/trace_stack.c
@@ -170,7 +170,7 @@ check_stack(unsigned long ip, unsigned long *stack)
 			i++;
 	}
 
-	if (*end_of_stack(current) != STACK_END_MAGIC) {
+	if (task_stack_end_corrupted(current)) {
 		print_max_stack();
 		BUG();
 	}
-- 
cgit v1.2.3


From f29374b146dd02f5f99742aedaddd6ef3512fc9c Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Fri, 19 Sep 2014 16:29:31 +0800
Subject: cgroup: remove redundant check in cgroup_ino()

After we implemented default unified hierarchy, cgrp->kn can never
be NULL.

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h | 7 ++-----
 mm/memory-failure.c    | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 77a1d37b742b..818a81fe7ccc 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -532,13 +532,10 @@ static inline bool cgroup_has_tasks(struct cgroup *cgrp)
 	return !list_empty(&cgrp->cset_links);
 }
 
-/* returns ino associated with a cgroup, 0 indicates unmounted root */
+/* returns ino associated with a cgroup */
 static inline ino_t cgroup_ino(struct cgroup *cgrp)
 {
-	if (cgrp->kn)
-		return cgrp->kn->ino;
-	else
-		return 0;
+	return cgrp->kn->ino;
 }
 
 /* cft/css accessors for cftype->write() operation */
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 44c6bd201d3a..8639f6b28746 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -148,7 +148,7 @@ static int hwpoison_filter_task(struct page *p)
 	ino = cgroup_ino(css->cgroup);
 	css_put(css);
 
-	if (!ino || ino != hwpoison_filter_memcg)
+	if (ino != hwpoison_filter_memcg)
 		return -EINVAL;
 
 	return 0;
-- 
cgit v1.2.3


From a25eb52e81a40e986179a790fbb5a1f02f482b7a Mon Sep 17 00:00:00 2001
From: Zefan Li <lizefan@huawei.com>
Date: Fri, 19 Sep 2014 16:51:00 +0800
Subject: cgroup: remove CGRP_RELEASABLE flag

We call put_css_set() after setting CGRP_RELEASABLE flag in
cgroup_task_migrate(), but in other places we call it without setting
the flag. I don't see the necessity of this flag.

Moreover once the flag is set, it will never be cleared, unless writing
to the notify_on_release control file, so it can be quite confusing
if we look at the output of debug.releasable.

  # mount -t cgroup -o debug xxx /cgroup
  # mkdir /cgroup/child
  # cat /cgroup/child/debug.releasable
  0   <-- shows 0 though the cgroup is empty
  # echo $$ > /cgroup/child/tasks
  # cat /cgroup/child/debug.releasable
  0
  # echo $$ > /cgroup/tasks && echo $$ > /cgroup/child/tasks
  # cat /proc/child/debug.releasable
  1   <-- shows 1 though the cgroup is not empty

This patch removes the flag, and now debug.releasable shows if the
cgroup is empty or not.

Signed-off-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cgroup.h |  5 -----
 kernel/cgroup.c        | 40 +++++++++++++---------------------------
 2 files changed, 13 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 818a81fe7ccc..1d5196889048 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -161,11 +161,6 @@ static inline void css_put(struct cgroup_subsys_state *css)
 
 /* bits in struct cgroup flags field */
 enum {
-	/*
-	 * Control Group has previously had a child cgroup or a task,
-	 * but no longer (only if CGRP_NOTIFY_ON_RELEASE is set)
-	 */
-	CGRP_RELEASABLE,
 	/* Control Group requires release notifications to userspace */
 	CGRP_NOTIFY_ON_RELEASE,
 	/*
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index df7733b48d2e..16e3a4f5c9dc 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -329,14 +329,6 @@ bool cgroup_is_descendant(struct cgroup *cgrp, struct cgroup *ancestor)
 	return false;
 }
 
-static int cgroup_is_releasable(const struct cgroup *cgrp)
-{
-	const int bits =
-		(1 << CGRP_RELEASABLE) |
-		(1 << CGRP_NOTIFY_ON_RELEASE);
-	return (cgrp->flags & bits) == bits;
-}
-
 static int notify_on_release(const struct cgroup *cgrp)
 {
 	return test_bit(CGRP_NOTIFY_ON_RELEASE, &cgrp->flags);
@@ -491,7 +483,7 @@ static unsigned long css_set_hash(struct cgroup_subsys_state *css[])
 	return key;
 }
 
-static void put_css_set_locked(struct css_set *cset, bool taskexit)
+static void put_css_set_locked(struct css_set *cset)
 {
 	struct cgrp_cset_link *link, *tmp_link;
 	struct cgroup_subsys *ss;
@@ -517,11 +509,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
 		/* @cgrp can't go away while we're holding css_set_rwsem */
 		if (list_empty(&cgrp->cset_links)) {
 			cgroup_update_populated(cgrp, false);
-			if (notify_on_release(cgrp)) {
-				if (taskexit)
-					set_bit(CGRP_RELEASABLE, &cgrp->flags);
-				check_for_release(cgrp);
-			}
+			check_for_release(cgrp);
 		}
 
 		kfree(link);
@@ -530,7 +518,7 @@ static void put_css_set_locked(struct css_set *cset, bool taskexit)
 	kfree_rcu(cset, rcu_head);
 }
 
-static void put_css_set(struct css_set *cset, bool taskexit)
+static void put_css_set(struct css_set *cset)
 {
 	/*
 	 * Ensure that the refcount doesn't hit zero while any readers
@@ -541,7 +529,7 @@ static void put_css_set(struct css_set *cset, bool taskexit)
 		return;
 
 	down_write(&css_set_rwsem);
-	put_css_set_locked(cset, taskexit);
+	put_css_set_locked(cset);
 	up_write(&css_set_rwsem);
 }
 
@@ -2037,8 +2025,7 @@ static void cgroup_task_migrate(struct cgroup *old_cgrp,
 	 * task. As trading it for new_cset is protected by cgroup_mutex,
 	 * we're safe to drop it here; it will be freed under RCU.
 	 */
-	set_bit(CGRP_RELEASABLE, &old_cgrp->flags);
-	put_css_set_locked(old_cset, false);
+	put_css_set_locked(old_cset);
 }
 
 /**
@@ -2059,7 +2046,7 @@ static void cgroup_migrate_finish(struct list_head *preloaded_csets)
 		cset->mg_src_cgrp = NULL;
 		cset->mg_dst_cset = NULL;
 		list_del_init(&cset->mg_preload_node);
-		put_css_set_locked(cset, false);
+		put_css_set_locked(cset);
 	}
 	up_write(&css_set_rwsem);
 }
@@ -2153,8 +2140,8 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 		if (src_cset == dst_cset) {
 			src_cset->mg_src_cgrp = NULL;
 			list_del_init(&src_cset->mg_preload_node);
-			put_css_set(src_cset, false);
-			put_css_set(dst_cset, false);
+			put_css_set(src_cset);
+			put_css_set(dst_cset);
 			continue;
 		}
 
@@ -2163,7 +2150,7 @@ static int cgroup_migrate_prepare_dst(struct cgroup *dst_cgrp,
 		if (list_empty(&dst_cset->mg_preload_node))
 			list_add(&dst_cset->mg_preload_node, &csets);
 		else
-			put_css_set(dst_cset, false);
+			put_css_set(dst_cset);
 	}
 
 	list_splice_tail(&csets, preloaded_csets);
@@ -4159,7 +4146,6 @@ static u64 cgroup_read_notify_on_release(struct cgroup_subsys_state *css,
 static int cgroup_write_notify_on_release(struct cgroup_subsys_state *css,
 					  struct cftype *cft, u64 val)
 {
-	clear_bit(CGRP_RELEASABLE, &css->cgroup->flags);
 	if (val)
 		set_bit(CGRP_NOTIFY_ON_RELEASE, &css->cgroup->flags);
 	else
@@ -4806,7 +4792,6 @@ static int cgroup_destroy_locked(struct cgroup *cgrp)
 	 */
 	kernfs_remove(cgrp->kn);
 
-	set_bit(CGRP_RELEASABLE, &cgroup_parent(cgrp)->flags);
 	check_for_release(cgroup_parent(cgrp));
 
 	/* put the base reference */
@@ -5244,12 +5229,12 @@ void cgroup_exit(struct task_struct *tsk)
 	}
 
 	if (put_cset)
-		put_css_set(cset, true);
+		put_css_set(cset);
 }
 
 static void check_for_release(struct cgroup *cgrp)
 {
-	if (cgroup_is_releasable(cgrp) && !cgroup_has_tasks(cgrp) &&
+	if (notify_on_release(cgrp) && !cgroup_has_tasks(cgrp) &&
 	    !css_has_online_children(&cgrp->self) && !cgroup_is_dead(cgrp))
 		schedule_work(&cgrp->release_agent_work);
 }
@@ -5496,7 +5481,8 @@ static int cgroup_css_links_read(struct seq_file *seq, void *v)
 
 static u64 releasable_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
-	return test_bit(CGRP_RELEASABLE, &css->cgroup->flags);
+	return (!cgroup_has_tasks(css->cgroup) &&
+		!css_has_online_children(&css->cgroup->self));
 }
 
 static struct cftype debug_files[] =  {
-- 
cgit v1.2.3


From 2e4e44107176d552f8bb1bb76053e850e3809841 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 17 Sep 2014 04:49:49 -0700
Subject: net: add alloc_skb_with_frags() helper

Extract from sock_alloc_send_pskb() code building skb with frags,
so that we can reuse this in other contexts.

Intent is to use it from tcp_send_rcvq(), tcp_collapse(), ...

We also want to replace some skb_linearize() calls to a more reliable
strategy in pathological cases where we need to reduce number of frags.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  6 ++++
 net/core/skbuff.c      | 78 ++++++++++++++++++++++++++++++++++++++++++++++++++
 net/core/sock.c        | 78 ++++++++++----------------------------------------
 3 files changed, 99 insertions(+), 63 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 756e3d057e84..f1bfa3781c75 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -769,6 +769,12 @@ static inline struct sk_buff *alloc_skb(unsigned int size,
 	return __alloc_skb(size, priority, 0, NUMA_NO_NODE);
 }
 
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+				     unsigned long data_len,
+				     int max_page_order,
+				     int *errcode,
+				     gfp_t gfp_mask);
+
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 29f7f0121491..06a8feb10099 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4102,3 +4102,81 @@ err_free:
 	return NULL;
 }
 EXPORT_SYMBOL(skb_vlan_untag);
+
+/**
+ * alloc_skb_with_frags - allocate skb with page frags
+ *
+ * header_len: size of linear part
+ * data_len: needed length in frags
+ * max_page_order: max page order desired.
+ * errcode: pointer to error code if any
+ * gfp_mask: allocation mask
+ *
+ * This can be used to allocate a paged skb, given a maximal order for frags.
+ */
+struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
+				     unsigned long data_len,
+				     int max_page_order,
+				     int *errcode,
+				     gfp_t gfp_mask)
+{
+	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
+	unsigned long chunk;
+	struct sk_buff *skb;
+	struct page *page;
+	gfp_t gfp_head;
+	int i;
+
+	*errcode = -EMSGSIZE;
+	/* Note this test could be relaxed, if we succeed to allocate
+	 * high order pages...
+	 */
+	if (npages > MAX_SKB_FRAGS)
+		return NULL;
+
+	gfp_head = gfp_mask;
+	if (gfp_head & __GFP_WAIT)
+		gfp_head |= __GFP_REPEAT;
+
+	*errcode = -ENOBUFS;
+	skb = alloc_skb(header_len, gfp_head);
+	if (!skb)
+		return NULL;
+
+	skb->truesize += npages << PAGE_SHIFT;
+
+	for (i = 0; npages > 0; i++) {
+		int order = max_page_order;
+
+		while (order) {
+			if (npages >= 1 << order) {
+				page = alloc_pages(gfp_mask |
+						   __GFP_COMP |
+						   __GFP_NOWARN |
+						   __GFP_NORETRY,
+						   order);
+				if (page)
+					goto fill_page;
+				/* Do not retry other high order allocations */
+				order = 1;
+				max_page_order = 0;
+			}
+			order--;
+		}
+		page = alloc_page(gfp_mask);
+		if (!page)
+			goto failure;
+fill_page:
+		chunk = min_t(unsigned long, data_len,
+			      PAGE_SIZE << order);
+		skb_fill_page_desc(skb, i, page, 0, chunk);
+		data_len -= chunk;
+		npages -= 1 << order;
+	}
+	return skb;
+
+failure:
+	kfree_skb(skb);
+	return NULL;
+}
+EXPORT_SYMBOL(alloc_skb_with_frags);
diff --git a/net/core/sock.c b/net/core/sock.c
index 6f436b5e4961..de887c45c63b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1762,21 +1762,12 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 				     unsigned long data_len, int noblock,
 				     int *errcode, int max_page_order)
 {
-	struct sk_buff *skb = NULL;
-	unsigned long chunk;
-	gfp_t gfp_mask;
+	struct sk_buff *skb;
 	long timeo;
 	int err;
-	int npages = (data_len + (PAGE_SIZE - 1)) >> PAGE_SHIFT;
-	struct page *page;
-	int i;
-
-	err = -EMSGSIZE;
-	if (npages > MAX_SKB_FRAGS)
-		goto failure;
 
 	timeo = sock_sndtimeo(sk, noblock);
-	while (!skb) {
+	for (;;) {
 		err = sock_error(sk);
 		if (err != 0)
 			goto failure;
@@ -1785,66 +1776,27 @@ struct sk_buff *sock_alloc_send_pskb(struct sock *sk, unsigned long header_len,
 		if (sk->sk_shutdown & SEND_SHUTDOWN)
 			goto failure;
 
-		if (atomic_read(&sk->sk_wmem_alloc) >= sk->sk_sndbuf) {
-			set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
-			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
-			err = -EAGAIN;
-			if (!timeo)
-				goto failure;
-			if (signal_pending(current))
-				goto interrupted;
-			timeo = sock_wait_for_wmem(sk, timeo);
-			continue;
-		}
-
-		err = -ENOBUFS;
-		gfp_mask = sk->sk_allocation;
-		if (gfp_mask & __GFP_WAIT)
-			gfp_mask |= __GFP_REPEAT;
+		if (sk_wmem_alloc_get(sk) < sk->sk_sndbuf)
+			break;
 
-		skb = alloc_skb(header_len, gfp_mask);
-		if (!skb)
+		set_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags);
+		set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+		err = -EAGAIN;
+		if (!timeo)
 			goto failure;
-
-		skb->truesize += data_len;
-
-		for (i = 0; npages > 0; i++) {
-			int order = max_page_order;
-
-			while (order) {
-				if (npages >= 1 << order) {
-					page = alloc_pages(sk->sk_allocation |
-							   __GFP_COMP |
-							   __GFP_NOWARN |
-							   __GFP_NORETRY,
-							   order);
-					if (page)
-						goto fill_page;
-					/* Do not retry other high order allocations */
-					order = 1;
-					max_page_order = 0;
-				}
-				order--;
-			}
-			page = alloc_page(sk->sk_allocation);
-			if (!page)
-				goto failure;
-fill_page:
-			chunk = min_t(unsigned long, data_len,
-				      PAGE_SIZE << order);
-			skb_fill_page_desc(skb, i, page, 0, chunk);
-			data_len -= chunk;
-			npages -= 1 << order;
-		}
+		if (signal_pending(current))
+			goto interrupted;
+		timeo = sock_wait_for_wmem(sk, timeo);
 	}
-
-	skb_set_owner_w(skb, sk);
+	skb = alloc_skb_with_frags(header_len, data_len, max_page_order,
+				   errcode, sk->sk_allocation);
+	if (skb)
+		skb_set_owner_w(skb, sk);
 	return skb;
 
 interrupted:
 	err = sock_intr_errno(timeo);
 failure:
-	kfree_skb(skb);
 	*errcode = err;
 	return NULL;
 }
-- 
cgit v1.2.3


From bb7d93496f7ac203f7c3e9678000d1c83eb4e0ba Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 19 Sep 2014 13:07:50 -0700
Subject: net: phy: broadcom: add helper for PHY revision and patch level

The Broadcom BCM7xxx internal PHYs do not contain any useful revision
information in the low 4-bits of their MII_PHYSID2 (MII register 3)
which could allow us to properly identify them.

As a result, we need the actual hardware block integrating these PHYs:
GENET or the SF2 switch to tell us what revision they are built with. To
assist with that, add two helper macros for fetching the the PHY
revision and patch level from the struct phy_device::dev_flags.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/brcmphy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 5bd35cc0d471..e4ee9c6679e8 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -41,6 +41,8 @@
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
 /* Broadcom BCM7xxx specific workarounds */
 #define PHY_BRCM_100MBPS_WAR		0x00010000
+#define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
+#define PHY_BRCM_7XXX_PATCH(x)		((x) & 0xff)
 #define PHY_BCM_FLAGS_VALID		0x80000000
 
 /* Broadcom BCM54XX register definitions, common to most Broadcom PHYs */
-- 
cgit v1.2.3


From 80780a54ecded1647e661ababde13554a149f7f3 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 19 Sep 2014 13:07:52 -0700
Subject: net: bcmgenet: remove PHY_BRCM_100MBPS_WAR

Now that we have removed the need for the PHY_BRCM_100MBPS_WAR flag, we
can remove it from the GENET driver and the broadcom shared header file.
The PHY driver checks the PHY supported bitmask instead.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmmii.c | 10 ----------
 include/linux/brcmphy.h                      |  1 -
 2 files changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index c88f7ae99636..71f2ef7d33a9 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -296,7 +296,6 @@ static int bcmgenet_mii_probe(struct net_device *dev)
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct device_node *dn = priv->pdev->dev.of_node;
 	struct phy_device *phydev;
-	unsigned int phy_flags;
 	int ret;
 
 	if (priv->phydev) {
@@ -338,15 +337,6 @@ static int bcmgenet_mii_probe(struct net_device *dev)
 		return ret;
 	}
 
-	phy_flags = PHY_BRCM_100MBPS_WAR;
-
-	/* workarounds are only needed for 100Mpbs PHYs, and
-	 * never on GENET V1 hardware
-	 */
-	if ((phydev->supported & PHY_GBIT_FEATURES) || GENET_IS_V1(priv))
-		phy_flags = 0;
-
-	phydev->dev_flags |= phy_flags;
 	phydev->advertising = phydev->supported;
 
 	/* The internal PHY has its link interrupts routed to the
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index e4ee9c6679e8..3f626fe48c9a 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -40,7 +40,6 @@
 #define PHY_BRCM_CLEAR_RGMII_MODE	0x00004000
 #define PHY_BRCM_DIS_TXCRXC_NOENRGY	0x00008000
 /* Broadcom BCM7xxx specific workarounds */
-#define PHY_BRCM_100MBPS_WAR		0x00010000
 #define PHY_BRCM_7XXX_REV(x)		(((x) >> 8) & 0xff)
 #define PHY_BRCM_7XXX_PATCH(x)		((x) & 0xff)
 #define PHY_BCM_FLAGS_VALID		0x80000000
-- 
cgit v1.2.3


From afe93325bc02a5b2dea0cd7d78225de692265e6e Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Wed, 17 Sep 2014 12:25:57 -0700
Subject: fou: Add GRO support

Implement fou_gro_receive and fou_gro_complete, and populate these
in the correponsing udp_offloads for the socket. Added ipproto to
udp_offloads and pass this from UDP to the fou GRO routine in proto
field of napi_gro_cb structure.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +-
 net/ipv4/fou.c            | 89 +++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv4/udp_offload.c    |  5 ++-
 3 files changed, 95 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 28d4378615e5..4354b4307e37 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1874,7 +1874,7 @@ struct napi_gro_cb {
 	/* jiffies when first packet was created/queued */
 	unsigned long age;
 
-	/* Used in ipv6_gro_receive() */
+	/* Used in ipv6_gro_receive() and foo-over-udp */
 	u16	proto;
 
 	/* Used in udp_gro_receive */
@@ -1925,6 +1925,7 @@ struct packet_offload {
 
 struct udp_offload {
 	__be16			 port;
+	u8			 ipproto;
 	struct offload_callbacks callbacks;
 };
 
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index d44f97b79418..dced89fbe480 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -8,6 +8,7 @@
 #include <linux/kernel.h>
 #include <net/genetlink.h>
 #include <net/ip.h>
+#include <net/protocol.h>
 #include <net/udp.h>
 #include <net/udp_tunnel.h>
 #include <net/xfrm.h>
@@ -21,6 +22,7 @@ struct fou {
 	struct socket *sock;
 	u8 protocol;
 	u16 port;
+	struct udp_offload udp_offloads;
 	struct list_head list;
 };
 
@@ -62,6 +64,69 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 					  sizeof(struct udphdr));
 }
 
+static struct sk_buff **fou_gro_receive(struct sk_buff **head,
+					struct sk_buff *skb,
+					const struct net_offload **offloads)
+{
+	const struct net_offload *ops;
+	struct sk_buff **pp = NULL;
+	u8 proto = NAPI_GRO_CB(skb)->proto;
+
+	rcu_read_lock();
+	ops = rcu_dereference(offloads[proto]);
+	if (!ops || !ops->callbacks.gro_receive)
+		goto out_unlock;
+
+	pp = ops->callbacks.gro_receive(head, skb);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return pp;
+}
+
+static int fou_gro_complete(struct sk_buff *skb, int nhoff,
+			    const struct net_offload **offloads)
+{
+	const struct net_offload *ops;
+	u8 proto = NAPI_GRO_CB(skb)->proto;
+	int err = -ENOSYS;
+
+	rcu_read_lock();
+	ops = rcu_dereference(offloads[proto]);
+	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
+		goto out_unlock;
+
+	err = ops->callbacks.gro_complete(skb, nhoff);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return err;
+}
+
+static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	return fou_gro_receive(head, skb, inet_offloads);
+}
+
+static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	return fou_gro_complete(skb, nhoff, inet_offloads);
+}
+
+static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
+					 struct sk_buff *skb)
+{
+	return fou_gro_receive(head, skb, inet6_offloads);
+}
+
+static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
+{
+	return fou_gro_complete(skb, nhoff, inet6_offloads);
+}
+
 static int fou_add_to_port_list(struct fou *fou)
 {
 	struct fou *fout;
@@ -134,6 +199,29 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk->sk_allocation = GFP_ATOMIC;
 
+	switch (cfg->udp_config.family) {
+	case AF_INET:
+		fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
+		fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
+		break;
+	case AF_INET6:
+		fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
+		fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
+		break;
+	default:
+		err = -EPFNOSUPPORT;
+		goto error;
+	}
+
+	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
+	fou->udp_offloads.ipproto = cfg->protocol;
+
+	if (cfg->udp_config.family == AF_INET) {
+		err = udp_add_offload(&fou->udp_offloads);
+		if (err)
+			goto error;
+	}
+
 	err = fou_add_to_port_list(fou);
 	if (err)
 		goto error;
@@ -160,6 +248,7 @@ static int fou_destroy(struct net *net, struct fou_cfg *cfg)
 	spin_lock(&fou_lock);
 	list_for_each_entry(fou, &fou_list, list) {
 		if (fou->port == port) {
+			udp_del_offload(&fou->udp_offloads);
 			fou_release(fou);
 			err = 0;
 			break;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index adab393b2fe5..d7c43f764c71 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -276,6 +276,7 @@ unflush:
 
 	skb_gro_pull(skb, sizeof(struct udphdr)); /* pull encapsulating udp header */
 	skb_gro_postpull_rcsum(skb, uh, sizeof(struct udphdr));
+	NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
 	pp = uo_priv->offload->callbacks.gro_receive(head, skb);
 
 out_unlock:
@@ -329,8 +330,10 @@ int udp_gro_complete(struct sk_buff *skb, int nhoff)
 			break;
 	}
 
-	if (uo_priv != NULL)
+	if (uo_priv != NULL) {
+		NAPI_GRO_CB(skb)->proto = uo_priv->offload->ipproto;
 		err = uo_priv->offload->callbacks.gro_complete(skb, nhoff + sizeof(struct udphdr));
+	}
 
 	rcu_read_unlock();
 	return err;
-- 
cgit v1.2.3


From 77507aa249aecd06fa25ad058b64481e46887a01 Mon Sep 17 00:00:00 2001
From: Ido Shamay <idos@mellanox.com>
Date: Thu, 18 Sep 2014 11:50:59 +0300
Subject: net/mlx4_core: Enable CQE/EQE stride support

This feature is intended for archs having cache line larger then 64B.

Since our CQE/EQEs are generally 64B in those systems, HW will write
twice to the same cache line consecutively, causing pipe locks due to
he hazard prevention mechanism. For elements in a cyclic buffer, writes
are consecutive, so entries smaller than a cache line should be
avoided, especially if they are written at a high rate.

Reduce consecutive writes to same cache line in CQs/EQs, by allowing the
driver to increase the distance between entries so that each will reside
in a different cache line. Until the introduction of this feature, there
were two types of CQE/EQE:

1. 32B stride and context in the [0-31] segment
2. 64B stride and context in the [32-63] segment

This feature introduces two additional types:

3. 128B stride and context in the [0-31] segment (128B cache line)
4. 256B stride and context in the [0-31] segment (256B cache line)

Modify the mlx4_core driver to query the device for the CQE/EQE cache
line stride capability and to enable that capability when the host
cache line size is larger than 64 bytes (supported cache lines are
128B and 256B).

The mlx4 IB driver and libmlx4 need not be aware of this change. The PF
context behaviour is changed to require this change in VF drivers
running on such archs.

Signed-off-by: Ido Shamay <idos@mellanox.com>
Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   | 38 ++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h   |  2 +
 drivers/net/ethernet/mellanox/mlx4/main.c | 61 ++++++++++++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h |  3 ++
 include/linux/mlx4/device.h               | 11 ++++--
 5 files changed, 108 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 494753e44ae3..13b2e4a51ef4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -137,7 +137,9 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[8] = "Dynamic QP updates support",
 		[9] = "Device managed flow steering IPoIB support",
 		[10] = "TCP/IP offloads/flow-steering for VXLAN support",
-		[11] = "MAD DEMUX (Secure-Host) support"
+		[11] = "MAD DEMUX (Secure-Host) support",
+		[12] = "Large cache line (>64B) CQE stride support",
+		[13] = "Large cache line (>64B) EQE stride support"
 	};
 	int i;
 
@@ -557,6 +559,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_FLOW_STEERING_IPOIB_OFFSET	0x74
 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET	0x76
 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET	0x77
+#define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE	0x7a
 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET	0x80
 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET	0x82
 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET	0x84
@@ -733,6 +736,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev_cap->max_rq_sg = field;
 	MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
 	dev_cap->max_rq_desc_sz = size;
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+	if (field & (1 << 6))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+	if (field & (1 << 7))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 
 	MLX4_GET(dev_cap->bmme_flags, outbox,
 		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
@@ -1376,6 +1384,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define	 INIT_HCA_CQC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x30)
 #define	 INIT_HCA_LOG_CQ_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x37)
 #define	 INIT_HCA_EQE_CQE_OFFSETS	 (INIT_HCA_QPC_OFFSET + 0x38)
+#define	 INIT_HCA_EQE_CQE_STRIDE_OFFSET  (INIT_HCA_QPC_OFFSET + 0x3b)
 #define	 INIT_HCA_ALTC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x40)
 #define	 INIT_HCA_AUXC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x50)
 #define	 INIT_HCA_EQC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x60)
@@ -1452,11 +1461,25 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	if (dev->caps.flags & MLX4_DEV_CAP_FLAG_64B_CQE) {
 		*(inbox + INIT_HCA_EQE_CQE_OFFSETS / 4) |= cpu_to_be32(1 << 30);
 		dev->caps.cqe_size   = 64;
-		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 	} else {
 		dev->caps.cqe_size   = 32;
 	}
 
+	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+	if ((dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) &&
+	    (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE)) {
+		dev->caps.eqe_size = cache_line_size();
+		dev->caps.cqe_size = cache_line_size();
+		dev->caps.eqe_factor = 0;
+		MLX4_PUT(inbox, (u8)((ilog2(dev->caps.eqe_size) - 5) << 4 |
+				      (ilog2(dev->caps.eqe_size) - 5)),
+			 INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+
+		/* User still need to know to support CQE > 32B */
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+	}
+
 	/* QPC/EEC/CQC/EQC/RDMARC attributes */
 
 	MLX4_PUT(inbox, param->qpc_base,      INIT_HCA_QPC_BASE_OFFSET);
@@ -1616,6 +1639,17 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	if (byte_field & 0x40) /* 64-bytes cqe enabled */
 		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
 
+	/* CX3 is capable of extending CQEs\EQEs to strides larger than 64B */
+	MLX4_GET(byte_field, outbox, INIT_HCA_EQE_CQE_STRIDE_OFFSET);
+	if (byte_field) {
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_EQE_ENABLED;
+		param->dev_cap_enabled |= MLX4_DEV_CAP_64B_CQE_ENABLED;
+		param->cqe_size = 1 << ((byte_field &
+					 MLX4_CQE_SIZE_MASK_STRIDE) + 5);
+		param->eqe_size = 1 << (((byte_field &
+					  MLX4_EQE_SIZE_MASK_STRIDE) >> 4) + 5);
+	}
+
 	/* TPT attributes */
 
 	MLX4_GET(param->dmpt_base,  outbox, INIT_HCA_DMPT_BASE_OFFSET);
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 1fce03ebe5c4..9b835aecac96 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -178,6 +178,8 @@ struct mlx4_init_hca_param {
 	u8  uar_page_sz; /* log pg sz in 4k chunks */
 	u8  steering_mode; /* for QUERY_HCA */
 	u64 dev_cap_enabled;
+	u16 cqe_size; /* For use only when CQE stride feature enabled */
+	u16 eqe_size; /* For use only when EQE stride feature enabled */
 };
 
 struct mlx4_init_ib_param {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 7e2d5d57c598..1f10023af1db 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -104,7 +104,8 @@ module_param(enable_64b_cqe_eqe, bool, 0444);
 MODULE_PARM_DESC(enable_64b_cqe_eqe,
 		 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
 
-#define PF_CONTEXT_BEHAVIOUR_MASK	MLX4_FUNC_CAP_64B_EQE_CQE
+#define PF_CONTEXT_BEHAVIOUR_MASK	(MLX4_FUNC_CAP_64B_EQE_CQE | \
+					 MLX4_FUNC_CAP_EQE_CQE_STRIDE)
 
 static char mlx4_version[] =
 	DRV_NAME ": Mellanox ConnectX core driver v"
@@ -196,6 +197,40 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
 		dev->caps.port_mask[i] = dev->caps.port_type[i];
 }
 
+static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
+{
+	struct mlx4_caps *dev_cap = &dev->caps;
+
+	/* FW not supporting or cancelled by user */
+	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_EQE_STRIDE) ||
+	    !(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_CQE_STRIDE))
+		return;
+
+	/* Must have 64B CQE_EQE enabled by FW to use bigger stride
+	 * When FW has NCSI it may decide not to report 64B CQE/EQEs
+	 */
+	if (!(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_EQE) ||
+	    !(dev_cap->flags & MLX4_DEV_CAP_FLAG_64B_CQE)) {
+		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+		return;
+	}
+
+	if (cache_line_size() == 128 || cache_line_size() == 256) {
+		mlx4_dbg(dev, "Enabling CQE stride cacheLine supported\n");
+		/* Changing the real data inside CQE size to 32B */
+		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
+		dev_cap->flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
+
+		if (mlx4_is_master(dev))
+			dev_cap->function_caps |= MLX4_FUNC_CAP_EQE_CQE_STRIDE;
+	} else {
+		mlx4_dbg(dev, "Disabling CQE stride cacheLine unsupported\n");
+		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+		dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+	}
+}
+
 static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 {
 	int err;
@@ -390,6 +425,14 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_CQE;
 			dev->caps.flags &= ~MLX4_DEV_CAP_FLAG_64B_EQE;
 		}
+
+		if (dev_cap->flags2 &
+		    (MLX4_DEV_CAP_FLAG2_CQE_STRIDE |
+		     MLX4_DEV_CAP_FLAG2_EQE_STRIDE)) {
+			mlx4_warn(dev, "Disabling EQE/CQE stride per user request\n");
+			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
+			dev_cap->flags2 &= ~MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
+		}
 	}
 
 	if ((dev->caps.flags &
@@ -397,6 +440,9 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	    mlx4_is_master(dev))
 		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
 
+	if (!mlx4_is_slave(dev))
+		mlx4_enable_cqe_eqe_stride(dev);
+
 	return 0;
 }
 
@@ -724,11 +770,22 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 
 	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_64B_CQE_ENABLED) {
 		dev->caps.cqe_size   = 64;
-		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_64B_CQE;
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
 	} else {
 		dev->caps.cqe_size   = 32;
 	}
 
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_EQE_STRIDE_ENABLED) {
+		dev->caps.eqe_size = hca_param.eqe_size;
+		dev->caps.eqe_factor = 0;
+	}
+
+	if (hca_param.dev_cap_enabled & MLX4_DEV_CAP_CQE_STRIDE_ENABLED) {
+		dev->caps.cqe_size = hca_param.cqe_size;
+		/* User still need to know when CQE > 32B */
+		dev->caps.userspace_caps |= MLX4_USER_DEV_CAP_LARGE_CQE;
+	}
+
 	dev->caps.flags2 &= ~MLX4_DEV_CAP_FLAG2_TS;
 	mlx4_warn(dev, "Timestamping is not supported in slave mode\n");
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index b508c7887ef8..de10dbb2e6ed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -285,6 +285,9 @@ struct mlx4_icm_table {
 #define MLX4_MPT_STATUS_SW		0xF0
 #define MLX4_MPT_STATUS_HW		0x00
 
+#define MLX4_CQE_SIZE_MASK_STRIDE	0x3
+#define MLX4_EQE_SIZE_MASK_STRIDE	0x30
+
 /*
  * Must be packed because mtt_seg is 64 bits but only aligned to 32 bits.
  */
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1befd8df9cfc..7bcefe749a39 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -185,19 +185,24 @@ enum {
 	MLX4_DEV_CAP_FLAG2_DMFS_IPOIB		= 1LL <<  9,
 	MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS	= 1LL <<  10,
 	MLX4_DEV_CAP_FLAG2_MAD_DEMUX		= 1LL <<  11,
+	MLX4_DEV_CAP_FLAG2_CQE_STRIDE		= 1LL <<  12,
+	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13
 };
 
 enum {
 	MLX4_DEV_CAP_64B_EQE_ENABLED	= 1LL << 0,
-	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1
+	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1,
+	MLX4_DEV_CAP_CQE_STRIDE_ENABLED	= 1LL << 2,
+	MLX4_DEV_CAP_EQE_STRIDE_ENABLED	= 1LL << 3
 };
 
 enum {
-	MLX4_USER_DEV_CAP_64B_CQE	= 1L << 0
+	MLX4_USER_DEV_CAP_LARGE_CQE	= 1L << 0
 };
 
 enum {
-	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0
+	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0,
+	MLX4_FUNC_CAP_EQE_CQE_STRIDE	= 1L << 1
 };
 
 
-- 
cgit v1.2.3


From e625305b390790717cf2cccf61efb81299647028 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 20 Sep 2014 01:27:25 -0400
Subject: percpu-refcount: make percpu_ref based on longs instead of ints

percpu_ref is currently based on ints and the number of refs it can
cover is (1 << 31).  This makes it impossible to use a percpu_ref to
count memory objects or pages on 64bit machines as it may overflow.
This forces those users to somehow aggregate the references before
contributing to the percpu_ref which is often cumbersome and sometimes
challenging to get the same level of performance as using the
percpu_ref directly.

While using ints for the percpu counters makes them pack tighter on
64bit machines, the possible gain from using ints instead of longs is
extremely small compared to the overall gain from per-cpu operation.
This patch makes percpu_ref based on longs so that it can be used to
directly count memory objects or pages.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h | 24 ++++++++++++------------
 lib/percpu-refcount.c           | 37 +++++++++++++++++++------------------
 2 files changed, 31 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index ee8325122dbd..5df6784bd9d2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -55,7 +55,7 @@ struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
 
 struct percpu_ref {
-	atomic_t		count;
+	atomic_long_t		count;
 	/*
 	 * The low bit of the pointer indicates whether the ref is in percpu
 	 * mode; if set, then get/put will manipulate the atomic_t.
@@ -97,7 +97,7 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
  * branches as it can't assume that @ref->pcpu_count is not NULL.
  */
 static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
-				    unsigned __percpu **pcpu_countp)
+				    unsigned long __percpu **pcpu_countp)
 {
 	unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
 
@@ -107,7 +107,7 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
 	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
 		return false;
 
-	*pcpu_countp = (unsigned __percpu *)pcpu_ptr;
+	*pcpu_countp = (unsigned long __percpu *)pcpu_ptr;
 	return true;
 }
 
@@ -119,14 +119,14 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
   */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		this_cpu_inc(*pcpu_count);
 	else
-		atomic_inc(&ref->count);
+		atomic_long_inc(&ref->count);
 
 	rcu_read_unlock_sched();
 }
@@ -142,7 +142,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
@@ -151,7 +151,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 		this_cpu_inc(*pcpu_count);
 		ret = true;
 	} else {
-		ret = atomic_inc_not_zero(&ref->count);
+		ret = atomic_long_inc_not_zero(&ref->count);
 	}
 
 	rcu_read_unlock_sched();
@@ -175,7 +175,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
@@ -199,13 +199,13 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		this_cpu_dec(*pcpu_count);
-	else if (unlikely(atomic_dec_and_test(&ref->count)))
+	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
 
 	rcu_read_unlock_sched();
@@ -219,11 +219,11 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count;
+	unsigned long __percpu *pcpu_count;
 
 	if (__pcpu_ref_alive(ref, &pcpu_count))
 		return false;
-	return !atomic_read(&ref->count);
+	return !atomic_long_read(&ref->count);
 }
 
 #endif
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 70d28c91f35a..559ee0b20318 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -25,15 +25,15 @@
  * works.
  *
  * Converting to non percpu mode is done with some RCUish stuff in
- * percpu_ref_kill. Additionally, we need a bias value so that the atomic_t
- * can't hit 0 before we've added up all the percpu refs.
+ * percpu_ref_kill. Additionally, we need a bias value so that the
+ * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  */
 
-#define PCPU_COUNT_BIAS		(1U << 31)
+#define PCPU_COUNT_BIAS		(1LU << (BITS_PER_LONG - 1))
 
-static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
+static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 {
-	return (unsigned __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
 }
 
 /**
@@ -43,7 +43,7 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
  * @gfp: allocation mask to use
  *
  * Initializes the refcount in single atomic counter mode with a refcount of 1;
- * analagous to atomic_set(ref, 1).
+ * analagous to atomic_long_set(ref, 1).
  *
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
@@ -51,9 +51,9 @@ static unsigned __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
-	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned, gfp);
+	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
 	if (!ref->pcpu_count_ptr)
 		return -ENOMEM;
 
@@ -75,13 +75,13 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
 	int cpu;
 
 	BUG_ON(!pcpu_count);
 	WARN_ON(!percpu_ref_is_zero(ref));
 
-	atomic_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
@@ -109,7 +109,7 @@ EXPORT_SYMBOL_GPL(percpu_ref_reinit);
  */
 void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
 
 	if (pcpu_count) {
 		free_percpu(pcpu_count);
@@ -121,14 +121,15 @@ EXPORT_SYMBOL_GPL(percpu_ref_exit);
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
-	unsigned __percpu *pcpu_count = pcpu_count_ptr(ref);
-	unsigned count = 0;
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long count = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu)
 		count += *per_cpu_ptr(pcpu_count, cpu);
 
-	pr_debug("global %i pcpu %i", atomic_read(&ref->count), (int) count);
+	pr_debug("global %ld pcpu %ld",
+		 atomic_long_read(&ref->count), (long)count);
 
 	/*
 	 * It's crucial that we sum the percpu counters _before_ adding the sum
@@ -143,11 +144,11 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * time is equivalent and saves us atomic operations:
 	 */
 
-	atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
 
-	WARN_ONCE(atomic_read(&ref->count) <= 0,
-		  "percpu ref (%pf) <= 0 (%i) after killed",
-		  ref->release, atomic_read(&ref->count));
+	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
+		  "percpu ref (%pf) <= 0 (%ld) after killed",
+		  ref->release, atomic_long_read(&ref->count));
 
 	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
 	if (ref->confirm_kill)
-- 
cgit v1.2.3


From ca2a07e45d1d3d31a0a85d2f63d81a897c610040 Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Thu, 31 Jul 2014 16:32:46 +0900
Subject: extcon: sm5502: Move sm5502.h header file to extcon directory

This patch move sm5502.h header file from 'include/linux/extcon' to
'driver/extcon' because sm5502.h is used for driver/extcon/extcon-sm5502.c.
and remove duplicate license description.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-sm5502.c |   8 +-
 drivers/extcon/extcon-sm5502.h | 282 ++++++++++++++++++++++++++++++++++++++++
 include/linux/extcon/sm5502.h  | 287 -----------------------------------------
 3 files changed, 284 insertions(+), 293 deletions(-)
 create mode 100644 drivers/extcon/extcon-sm5502.h
 delete mode 100644 include/linux/extcon/sm5502.h

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-sm5502.c b/drivers/extcon/extcon-sm5502.c
index a1ba9242e9cb..f94d66aae9e3 100644
--- a/drivers/extcon/extcon-sm5502.c
+++ b/drivers/extcon/extcon-sm5502.c
@@ -8,11 +8,6 @@
  * under  the terms of  the GNU General  Public License as published by the
  * Free Software Foundation;  either version 2 of the  License, or (at your
  * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
  */
 
 #include <linux/err.h>
@@ -26,7 +21,8 @@
 #include <linux/regmap.h>
 #include <linux/slab.h>
 #include <linux/extcon.h>
-#include <linux/extcon/sm5502.h>
+
+#include "extcon-sm5502.h"
 
 #define	DELAY_MS_DEFAULT		17000	/* unit: millisecond */
 
diff --git a/drivers/extcon/extcon-sm5502.h b/drivers/extcon/extcon-sm5502.h
new file mode 100644
index 000000000000..974b53222f56
--- /dev/null
+++ b/drivers/extcon/extcon-sm5502.h
@@ -0,0 +1,282 @@
+/*
+ * sm5502.h
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#ifndef __LINUX_EXTCON_SM5502_H
+#define __LINUX_EXTCON_SM5502_H
+
+enum sm5502_types {
+	TYPE_SM5502,
+};
+
+/* SM5502 registers */
+enum sm5502_reg {
+	SM5502_REG_DEVICE_ID = 0x01,
+	SM5502_REG_CONTROL,
+	SM5502_REG_INT1,
+	SM5502_REG_INT2,
+	SM5502_REG_INTMASK1,
+	SM5502_REG_INTMASK2,
+	SM5502_REG_ADC,
+	SM5502_REG_TIMING_SET1,
+	SM5502_REG_TIMING_SET2,
+	SM5502_REG_DEV_TYPE1,
+	SM5502_REG_DEV_TYPE2,
+	SM5502_REG_BUTTON1,
+	SM5502_REG_BUTTON2,
+	SM5502_REG_CAR_KIT_STATUS,
+	SM5502_REG_RSVD1,
+	SM5502_REG_RSVD2,
+	SM5502_REG_RSVD3,
+	SM5502_REG_RSVD4,
+	SM5502_REG_MANUAL_SW1,
+	SM5502_REG_MANUAL_SW2,
+	SM5502_REG_DEV_TYPE3,
+	SM5502_REG_RSVD5,
+	SM5502_REG_RSVD6,
+	SM5502_REG_RSVD7,
+	SM5502_REG_RSVD8,
+	SM5502_REG_RSVD9,
+	SM5502_REG_RESET,
+	SM5502_REG_RSVD10,
+	SM5502_REG_RESERVED_ID1,
+	SM5502_REG_RSVD11,
+	SM5502_REG_RSVD12,
+	SM5502_REG_RESERVED_ID2,
+	SM5502_REG_RSVD13,
+	SM5502_REG_OCP,
+	SM5502_REG_RSVD14,
+	SM5502_REG_RSVD15,
+	SM5502_REG_RSVD16,
+	SM5502_REG_RSVD17,
+	SM5502_REG_RSVD18,
+	SM5502_REG_RSVD19,
+	SM5502_REG_RSVD20,
+	SM5502_REG_RSVD21,
+	SM5502_REG_RSVD22,
+	SM5502_REG_RSVD23,
+	SM5502_REG_RSVD24,
+	SM5502_REG_RSVD25,
+	SM5502_REG_RSVD26,
+	SM5502_REG_RSVD27,
+	SM5502_REG_RSVD28,
+	SM5502_REG_RSVD29,
+	SM5502_REG_RSVD30,
+	SM5502_REG_RSVD31,
+	SM5502_REG_RSVD32,
+	SM5502_REG_RSVD33,
+	SM5502_REG_RSVD34,
+	SM5502_REG_RSVD35,
+	SM5502_REG_RSVD36,
+	SM5502_REG_RESERVED_ID3,
+
+	SM5502_REG_END,
+};
+
+/* Define SM5502 MASK/SHIFT constant */
+#define SM5502_REG_DEVICE_ID_VENDOR_SHIFT	0
+#define SM5502_REG_DEVICE_ID_VERSION_SHIFT	3
+#define SM5502_REG_DEVICE_ID_VENDOR_MASK	(0x3 << SM5502_REG_DEVICE_ID_VENDOR_SHIFT)
+#define SM5502_REG_DEVICE_ID_VERSION_MASK	(0x1f << SM5502_REG_DEVICE_ID_VERSION_SHIFT)
+
+#define SM5502_REG_CONTROL_MASK_INT_SHIFT	0
+#define SM5502_REG_CONTROL_WAIT_SHIFT		1
+#define SM5502_REG_CONTROL_MANUAL_SW_SHIFT	2
+#define SM5502_REG_CONTROL_RAW_DATA_SHIFT	3
+#define SM5502_REG_CONTROL_SW_OPEN_SHIFT	4
+#define SM5502_REG_CONTROL_MASK_INT_MASK	(0x1 << SM5502_REG_CONTROL_MASK_INT_SHIFT)
+#define SM5502_REG_CONTROL_WAIT_MASK		(0x1 << SM5502_REG_CONTROL_WAIT_SHIFT)
+#define SM5502_REG_CONTROL_MANUAL_SW_MASK	(0x1 << SM5502_REG_CONTROL_MANUAL_SW_SHIFT)
+#define SM5502_REG_CONTROL_RAW_DATA_MASK	(0x1 << SM5502_REG_CONTROL_RAW_DATA_SHIFT)
+#define SM5502_REG_CONTROL_SW_OPEN_MASK		(0x1 << SM5502_REG_CONTROL_SW_OPEN_SHIFT)
+
+#define SM5502_REG_INTM1_ATTACH_SHIFT		0
+#define SM5502_REG_INTM1_DETACH_SHIFT		1
+#define SM5502_REG_INTM1_KP_SHIFT		2
+#define SM5502_REG_INTM1_LKP_SHIFT		3
+#define SM5502_REG_INTM1_LKR_SHIFT		4
+#define SM5502_REG_INTM1_OVP_EVENT_SHIFT	5
+#define SM5502_REG_INTM1_OCP_EVENT_SHIFT	6
+#define SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT	7
+#define SM5502_REG_INTM1_ATTACH_MASK		(0x1 << SM5502_REG_INTM1_ATTACH_SHIFT)
+#define SM5502_REG_INTM1_DETACH_MASK		(0x1 << SM5502_REG_INTM1_DETACH_SHIFT)
+#define SM5502_REG_INTM1_KP_MASK		(0x1 << SM5502_REG_INTM1_KP_SHIFT)
+#define SM5502_REG_INTM1_LKP_MASK		(0x1 << SM5502_REG_INTM1_LKP_SHIFT)
+#define SM5502_REG_INTM1_LKR_MASK		(0x1 << SM5502_REG_INTM1_LKR_SHIFT)
+#define SM5502_REG_INTM1_OVP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OVP_EVENT_SHIFT)
+#define SM5502_REG_INTM1_OCP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OCP_EVENT_SHIFT)
+#define SM5502_REG_INTM1_OVP_OCP_DIS_MASK	(0x1 << SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT)
+
+#define SM5502_REG_INTM2_VBUS_DET_SHIFT		0
+#define SM5502_REG_INTM2_REV_ACCE_SHIFT		1
+#define SM5502_REG_INTM2_ADC_CHG_SHIFT		2
+#define SM5502_REG_INTM2_STUCK_KEY_SHIFT	3
+#define SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT	4
+#define SM5502_REG_INTM2_MHL_SHIFT		5
+#define SM5502_REG_INTM2_VBUS_DET_MASK		(0x1 << SM5502_REG_INTM2_VBUS_DET_SHIFT)
+#define SM5502_REG_INTM2_REV_ACCE_MASK		(0x1 << SM5502_REG_INTM2_REV_ACCE_SHIFT)
+#define SM5502_REG_INTM2_ADC_CHG_MASK		(0x1 << SM5502_REG_INTM2_ADC_CHG_SHIFT)
+#define SM5502_REG_INTM2_STUCK_KEY_MASK		(0x1 << SM5502_REG_INTM2_STUCK_KEY_SHIFT)
+#define SM5502_REG_INTM2_STUCK_KEY_RCV_MASK	(0x1 << SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT)
+#define SM5502_REG_INTM2_MHL_MASK		(0x1 << SM5502_REG_INTM2_MHL_SHIFT)
+
+#define SM5502_REG_ADC_SHIFT			0
+#define SM5502_REG_ADC_MASK			(0x1f << SM5502_REG_ADC_SHIFT)
+
+#define SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT	4
+#define SM5502_REG_TIMING_SET1_KEY_PRESS_MASK	(0xf << SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT)
+#define TIMING_KEY_PRESS_100MS			0x0
+#define TIMING_KEY_PRESS_200MS			0x1
+#define TIMING_KEY_PRESS_300MS			0x2
+#define TIMING_KEY_PRESS_400MS			0x3
+#define TIMING_KEY_PRESS_500MS			0x4
+#define TIMING_KEY_PRESS_600MS			0x5
+#define TIMING_KEY_PRESS_700MS			0x6
+#define TIMING_KEY_PRESS_800MS			0x7
+#define TIMING_KEY_PRESS_900MS			0x8
+#define TIMING_KEY_PRESS_1000MS			0x9
+#define SM5502_REG_TIMING_SET1_ADC_DET_SHIFT	0
+#define SM5502_REG_TIMING_SET1_ADC_DET_MASK	(0xf << SM5502_REG_TIMING_SET1_ADC_DET_SHIFT)
+#define TIMING_ADC_DET_50MS			0x0
+#define TIMING_ADC_DET_100MS			0x1
+#define TIMING_ADC_DET_150MS			0x2
+#define TIMING_ADC_DET_200MS			0x3
+#define TIMING_ADC_DET_300MS			0x4
+#define TIMING_ADC_DET_400MS			0x5
+#define TIMING_ADC_DET_500MS			0x6
+#define TIMING_ADC_DET_600MS			0x7
+#define TIMING_ADC_DET_700MS			0x8
+#define TIMING_ADC_DET_800MS			0x9
+#define TIMING_ADC_DET_900MS			0xA
+#define TIMING_ADC_DET_1000MS			0xB
+
+#define SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT	4
+#define SM5502_REG_TIMING_SET2_SW_WAIT_MASK	(0xf << SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT)
+#define TIMING_SW_WAIT_10MS			0x0
+#define TIMING_SW_WAIT_30MS			0x1
+#define TIMING_SW_WAIT_50MS			0x2
+#define TIMING_SW_WAIT_70MS			0x3
+#define TIMING_SW_WAIT_90MS			0x4
+#define TIMING_SW_WAIT_110MS			0x5
+#define TIMING_SW_WAIT_130MS			0x6
+#define TIMING_SW_WAIT_150MS			0x7
+#define TIMING_SW_WAIT_170MS			0x8
+#define TIMING_SW_WAIT_190MS			0x9
+#define TIMING_SW_WAIT_210MS			0xA
+#define SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT	0
+#define SM5502_REG_TIMING_SET2_LONG_KEY_MASK	(0xf << SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT)
+#define TIMING_LONG_KEY_300MS			0x0
+#define TIMING_LONG_KEY_400MS			0x1
+#define TIMING_LONG_KEY_500MS			0x2
+#define TIMING_LONG_KEY_600MS			0x3
+#define TIMING_LONG_KEY_700MS			0x4
+#define TIMING_LONG_KEY_800MS			0x5
+#define TIMING_LONG_KEY_900MS			0x6
+#define TIMING_LONG_KEY_1000MS			0x7
+#define TIMING_LONG_KEY_1100MS			0x8
+#define TIMING_LONG_KEY_1200MS			0x9
+#define TIMING_LONG_KEY_1300MS			0xA
+#define TIMING_LONG_KEY_1400MS			0xB
+#define TIMING_LONG_KEY_1500MS			0xC
+
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT		0
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT		1
+#define SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT		2
+#define SM5502_REG_DEV_TYPE1_UART_SHIFT			3
+#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT	4
+#define SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT		5
+#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT	6
+#define SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT		7
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT)
+#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1__MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_SDP_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT)
+#define SM5502_REG_DEV_TYPE1_UART_MASK			(0x1 << SM5502_REG_DEV_TYPE1_UART_SHIFT)
+#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_MASK	(0x1 << SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT)
+#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT)
+#define SM5502_REG_DEV_TYPE1_USB_OTG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT)
+
+#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT		0
+#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT		1
+#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT		2
+#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT		3
+#define SM5502_REG_DEV_TYPE2_PPD_SHIFT			4
+#define SM5502_REG_DEV_TYPE2_TTY_SHIFT			5
+#define SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT		6
+#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT)
+#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT)
+#define SM5502_REG_DEV_TYPE2_PPD_MASK			(0x1 << SM5502_REG_DEV_TYPE2_PPD_SHIFT)
+#define SM5502_REG_DEV_TYPE2_TTY_MASK			(0x1 << SM5502_REG_DEV_TYPE2_TTY_SHIFT)
+#define SM5502_REG_DEV_TYPE2_AV_CABLE_MASK		(0x1 << SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT)
+
+#define SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT	0
+#define SM5502_REG_MANUAL_SW1_DP_SHIFT		2
+#define SM5502_REG_MANUAL_SW1_DM_SHIFT		5
+#define SM5502_REG_MANUAL_SW1_VBUSIN_MASK	(0x3 << SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT)
+#define SM5502_REG_MANUAL_SW1_DP_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DP_SHIFT)
+#define SM5502_REG_MANUAL_SW1_DM_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DM_SHIFT)
+#define VBUSIN_SWITCH_OPEN			0x0
+#define VBUSIN_SWITCH_VBUSOUT			0x1
+#define VBUSIN_SWITCH_MIC			0x2
+#define VBUSIN_SWITCH_VBUSOUT_WITH_USB		0x3
+#define DM_DP_CON_SWITCH_OPEN			0x0
+#define DM_DP_CON_SWITCH_USB			0x1
+#define DM_DP_CON_SWITCH_AUDIO			0x2
+#define DM_DP_CON_SWITCH_UART			0x3
+#define DM_DP_SWITCH_OPEN			((DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_USB			((DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_AUDIO			((DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+#define DM_DP_SWITCH_UART			((DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
+						| (DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
+
+/* SM5502 Interrupts */
+enum sm5502_irq {
+	/* INT1 */
+	SM5502_IRQ_INT1_ATTACH,
+	SM5502_IRQ_INT1_DETACH,
+	SM5502_IRQ_INT1_KP,
+	SM5502_IRQ_INT1_LKP,
+	SM5502_IRQ_INT1_LKR,
+	SM5502_IRQ_INT1_OVP_EVENT,
+	SM5502_IRQ_INT1_OCP_EVENT,
+	SM5502_IRQ_INT1_OVP_OCP_DIS,
+
+	/* INT2 */
+	SM5502_IRQ_INT2_VBUS_DET,
+	SM5502_IRQ_INT2_REV_ACCE,
+	SM5502_IRQ_INT2_ADC_CHG,
+	SM5502_IRQ_INT2_STUCK_KEY,
+	SM5502_IRQ_INT2_STUCK_KEY_RCV,
+	SM5502_IRQ_INT2_MHL,
+
+	SM5502_IRQ_NUM,
+};
+
+#define SM5502_IRQ_INT1_ATTACH_MASK		BIT(0)
+#define SM5502_IRQ_INT1_DETACH_MASK		BIT(1)
+#define SM5502_IRQ_INT1_KP_MASK			BIT(2)
+#define SM5502_IRQ_INT1_LKP_MASK		BIT(3)
+#define SM5502_IRQ_INT1_LKR_MASK		BIT(4)
+#define SM5502_IRQ_INT1_OVP_EVENT_MASK		BIT(5)
+#define SM5502_IRQ_INT1_OCP_EVENT_MASK		BIT(6)
+#define SM5502_IRQ_INT1_OVP_OCP_DIS_MASK	BIT(7)
+#define SM5502_IRQ_INT2_VBUS_DET_MASK		BIT(0)
+#define SM5502_IRQ_INT2_REV_ACCE_MASK		BIT(1)
+#define SM5502_IRQ_INT2_ADC_CHG_MASK		BIT(2)
+#define SM5502_IRQ_INT2_STUCK_KEY_MASK		BIT(3)
+#define SM5502_IRQ_INT2_STUCK_KEY_RCV_MASK	BIT(4)
+#define SM5502_IRQ_INT2_MHL_MASK		BIT(5)
+
+#endif /*  __LINUX_EXTCON_SM5502_H */
diff --git a/include/linux/extcon/sm5502.h b/include/linux/extcon/sm5502.h
deleted file mode 100644
index 030526bf8d79..000000000000
--- a/include/linux/extcon/sm5502.h
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * sm5502.h
- *
- * Copyright (c) 2014 Samsung Electronics Co., Ltd
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- */
-
-#ifndef __LINUX_EXTCON_SM5502_H
-#define __LINUX_EXTCON_SM5502_H
-
-enum sm5502_types {
-	TYPE_SM5502,
-};
-
-/* SM5502 registers */
-enum sm5502_reg {
-	SM5502_REG_DEVICE_ID = 0x01,
-	SM5502_REG_CONTROL,
-	SM5502_REG_INT1,
-	SM5502_REG_INT2,
-	SM5502_REG_INTMASK1,
-	SM5502_REG_INTMASK2,
-	SM5502_REG_ADC,
-	SM5502_REG_TIMING_SET1,
-	SM5502_REG_TIMING_SET2,
-	SM5502_REG_DEV_TYPE1,
-	SM5502_REG_DEV_TYPE2,
-	SM5502_REG_BUTTON1,
-	SM5502_REG_BUTTON2,
-	SM5502_REG_CAR_KIT_STATUS,
-	SM5502_REG_RSVD1,
-	SM5502_REG_RSVD2,
-	SM5502_REG_RSVD3,
-	SM5502_REG_RSVD4,
-	SM5502_REG_MANUAL_SW1,
-	SM5502_REG_MANUAL_SW2,
-	SM5502_REG_DEV_TYPE3,
-	SM5502_REG_RSVD5,
-	SM5502_REG_RSVD6,
-	SM5502_REG_RSVD7,
-	SM5502_REG_RSVD8,
-	SM5502_REG_RSVD9,
-	SM5502_REG_RESET,
-	SM5502_REG_RSVD10,
-	SM5502_REG_RESERVED_ID1,
-	SM5502_REG_RSVD11,
-	SM5502_REG_RSVD12,
-	SM5502_REG_RESERVED_ID2,
-	SM5502_REG_RSVD13,
-	SM5502_REG_OCP,
-	SM5502_REG_RSVD14,
-	SM5502_REG_RSVD15,
-	SM5502_REG_RSVD16,
-	SM5502_REG_RSVD17,
-	SM5502_REG_RSVD18,
-	SM5502_REG_RSVD19,
-	SM5502_REG_RSVD20,
-	SM5502_REG_RSVD21,
-	SM5502_REG_RSVD22,
-	SM5502_REG_RSVD23,
-	SM5502_REG_RSVD24,
-	SM5502_REG_RSVD25,
-	SM5502_REG_RSVD26,
-	SM5502_REG_RSVD27,
-	SM5502_REG_RSVD28,
-	SM5502_REG_RSVD29,
-	SM5502_REG_RSVD30,
-	SM5502_REG_RSVD31,
-	SM5502_REG_RSVD32,
-	SM5502_REG_RSVD33,
-	SM5502_REG_RSVD34,
-	SM5502_REG_RSVD35,
-	SM5502_REG_RSVD36,
-	SM5502_REG_RESERVED_ID3,
-
-	SM5502_REG_END,
-};
-
-/* Define SM5502 MASK/SHIFT constant */
-#define SM5502_REG_DEVICE_ID_VENDOR_SHIFT	0
-#define SM5502_REG_DEVICE_ID_VERSION_SHIFT	3
-#define SM5502_REG_DEVICE_ID_VENDOR_MASK	(0x3 << SM5502_REG_DEVICE_ID_VENDOR_SHIFT)
-#define SM5502_REG_DEVICE_ID_VERSION_MASK	(0x1f << SM5502_REG_DEVICE_ID_VERSION_SHIFT)
-
-#define SM5502_REG_CONTROL_MASK_INT_SHIFT	0
-#define SM5502_REG_CONTROL_WAIT_SHIFT		1
-#define SM5502_REG_CONTROL_MANUAL_SW_SHIFT	2
-#define SM5502_REG_CONTROL_RAW_DATA_SHIFT	3
-#define SM5502_REG_CONTROL_SW_OPEN_SHIFT	4
-#define SM5502_REG_CONTROL_MASK_INT_MASK	(0x1 << SM5502_REG_CONTROL_MASK_INT_SHIFT)
-#define SM5502_REG_CONTROL_WAIT_MASK		(0x1 << SM5502_REG_CONTROL_WAIT_SHIFT)
-#define SM5502_REG_CONTROL_MANUAL_SW_MASK	(0x1 << SM5502_REG_CONTROL_MANUAL_SW_SHIFT)
-#define SM5502_REG_CONTROL_RAW_DATA_MASK	(0x1 << SM5502_REG_CONTROL_RAW_DATA_SHIFT)
-#define SM5502_REG_CONTROL_SW_OPEN_MASK		(0x1 << SM5502_REG_CONTROL_SW_OPEN_SHIFT)
-
-#define SM5502_REG_INTM1_ATTACH_SHIFT		0
-#define SM5502_REG_INTM1_DETACH_SHIFT		1
-#define SM5502_REG_INTM1_KP_SHIFT		2
-#define SM5502_REG_INTM1_LKP_SHIFT		3
-#define SM5502_REG_INTM1_LKR_SHIFT		4
-#define SM5502_REG_INTM1_OVP_EVENT_SHIFT	5
-#define SM5502_REG_INTM1_OCP_EVENT_SHIFT	6
-#define SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT	7
-#define SM5502_REG_INTM1_ATTACH_MASK		(0x1 << SM5502_REG_INTM1_ATTACH_SHIFT)
-#define SM5502_REG_INTM1_DETACH_MASK		(0x1 << SM5502_REG_INTM1_DETACH_SHIFT)
-#define SM5502_REG_INTM1_KP_MASK		(0x1 << SM5502_REG_INTM1_KP_SHIFT)
-#define SM5502_REG_INTM1_LKP_MASK		(0x1 << SM5502_REG_INTM1_LKP_SHIFT)
-#define SM5502_REG_INTM1_LKR_MASK		(0x1 << SM5502_REG_INTM1_LKR_SHIFT)
-#define SM5502_REG_INTM1_OVP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OVP_EVENT_SHIFT)
-#define SM5502_REG_INTM1_OCP_EVENT_MASK		(0x1 << SM5502_REG_INTM1_OCP_EVENT_SHIFT)
-#define SM5502_REG_INTM1_OVP_OCP_DIS_MASK	(0x1 << SM5502_REG_INTM1_OVP_OCP_DIS_SHIFT)
-
-#define SM5502_REG_INTM2_VBUS_DET_SHIFT		0
-#define SM5502_REG_INTM2_REV_ACCE_SHIFT		1
-#define SM5502_REG_INTM2_ADC_CHG_SHIFT		2
-#define SM5502_REG_INTM2_STUCK_KEY_SHIFT	3
-#define SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT	4
-#define SM5502_REG_INTM2_MHL_SHIFT		5
-#define SM5502_REG_INTM2_VBUS_DET_MASK		(0x1 << SM5502_REG_INTM2_VBUS_DET_SHIFT)
-#define SM5502_REG_INTM2_REV_ACCE_MASK		(0x1 << SM5502_REG_INTM2_REV_ACCE_SHIFT)
-#define SM5502_REG_INTM2_ADC_CHG_MASK		(0x1 << SM5502_REG_INTM2_ADC_CHG_SHIFT)
-#define SM5502_REG_INTM2_STUCK_KEY_MASK		(0x1 << SM5502_REG_INTM2_STUCK_KEY_SHIFT)
-#define SM5502_REG_INTM2_STUCK_KEY_RCV_MASK	(0x1 << SM5502_REG_INTM2_STUCK_KEY_RCV_SHIFT)
-#define SM5502_REG_INTM2_MHL_MASK		(0x1 << SM5502_REG_INTM2_MHL_SHIFT)
-
-#define SM5502_REG_ADC_SHIFT			0
-#define SM5502_REG_ADC_MASK			(0x1f << SM5502_REG_ADC_SHIFT)
-
-#define SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT	4
-#define SM5502_REG_TIMING_SET1_KEY_PRESS_MASK	(0xf << SM5502_REG_TIMING_SET1_KEY_PRESS_SHIFT)
-#define TIMING_KEY_PRESS_100MS			0x0
-#define TIMING_KEY_PRESS_200MS			0x1
-#define TIMING_KEY_PRESS_300MS			0x2
-#define TIMING_KEY_PRESS_400MS			0x3
-#define TIMING_KEY_PRESS_500MS			0x4
-#define TIMING_KEY_PRESS_600MS			0x5
-#define TIMING_KEY_PRESS_700MS			0x6
-#define TIMING_KEY_PRESS_800MS			0x7
-#define TIMING_KEY_PRESS_900MS			0x8
-#define TIMING_KEY_PRESS_1000MS			0x9
-#define SM5502_REG_TIMING_SET1_ADC_DET_SHIFT	0
-#define SM5502_REG_TIMING_SET1_ADC_DET_MASK	(0xf << SM5502_REG_TIMING_SET1_ADC_DET_SHIFT)
-#define TIMING_ADC_DET_50MS			0x0
-#define TIMING_ADC_DET_100MS			0x1
-#define TIMING_ADC_DET_150MS			0x2
-#define TIMING_ADC_DET_200MS			0x3
-#define TIMING_ADC_DET_300MS			0x4
-#define TIMING_ADC_DET_400MS			0x5
-#define TIMING_ADC_DET_500MS			0x6
-#define TIMING_ADC_DET_600MS			0x7
-#define TIMING_ADC_DET_700MS			0x8
-#define TIMING_ADC_DET_800MS			0x9
-#define TIMING_ADC_DET_900MS			0xA
-#define TIMING_ADC_DET_1000MS			0xB
-
-#define SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT	4
-#define SM5502_REG_TIMING_SET2_SW_WAIT_MASK	(0xf << SM5502_REG_TIMING_SET2_SW_WAIT_SHIFT)
-#define TIMING_SW_WAIT_10MS			0x0
-#define TIMING_SW_WAIT_30MS			0x1
-#define TIMING_SW_WAIT_50MS			0x2
-#define TIMING_SW_WAIT_70MS			0x3
-#define TIMING_SW_WAIT_90MS			0x4
-#define TIMING_SW_WAIT_110MS			0x5
-#define TIMING_SW_WAIT_130MS			0x6
-#define TIMING_SW_WAIT_150MS			0x7
-#define TIMING_SW_WAIT_170MS			0x8
-#define TIMING_SW_WAIT_190MS			0x9
-#define TIMING_SW_WAIT_210MS			0xA
-#define SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT	0
-#define SM5502_REG_TIMING_SET2_LONG_KEY_MASK	(0xf << SM5502_REG_TIMING_SET2_LONG_KEY_SHIFT)
-#define TIMING_LONG_KEY_300MS			0x0
-#define TIMING_LONG_KEY_400MS			0x1
-#define TIMING_LONG_KEY_500MS			0x2
-#define TIMING_LONG_KEY_600MS			0x3
-#define TIMING_LONG_KEY_700MS			0x4
-#define TIMING_LONG_KEY_800MS			0x5
-#define TIMING_LONG_KEY_900MS			0x6
-#define TIMING_LONG_KEY_1000MS			0x7
-#define TIMING_LONG_KEY_1100MS			0x8
-#define TIMING_LONG_KEY_1200MS			0x9
-#define TIMING_LONG_KEY_1300MS			0xA
-#define TIMING_LONG_KEY_1400MS			0xB
-#define TIMING_LONG_KEY_1500MS			0xC
-
-#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT		0
-#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT		1
-#define SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT		2
-#define SM5502_REG_DEV_TYPE1_UART_SHIFT			3
-#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT	4
-#define SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT		5
-#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT	6
-#define SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT		7
-#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE1_SHIFT)
-#define SM5502_REG_DEV_TYPE1_AUDIO_TYPE1__MASK		(0x1 << SM5502_REG_DEV_TYPE1_AUDIO_TYPE2_SHIFT)
-#define SM5502_REG_DEV_TYPE1_USB_SDP_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_SDP_SHIFT)
-#define SM5502_REG_DEV_TYPE1_UART_MASK			(0x1 << SM5502_REG_DEV_TYPE1_UART_SHIFT)
-#define SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_MASK	(0x1 << SM5502_REG_DEV_TYPE1_CAR_KIT_CHARGER_SHIFT)
-#define SM5502_REG_DEV_TYPE1_USB_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_CHG_SHIFT)
-#define SM5502_REG_DEV_TYPE1_DEDICATED_CHG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_DEDICATED_CHG_SHIFT)
-#define SM5502_REG_DEV_TYPE1_USB_OTG_MASK		(0x1 << SM5502_REG_DEV_TYPE1_USB_OTG_SHIFT)
-
-#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT		0
-#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT		1
-#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT		2
-#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT		3
-#define SM5502_REG_DEV_TYPE2_PPD_SHIFT			4
-#define SM5502_REG_DEV_TYPE2_TTY_SHIFT			5
-#define SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT		6
-#define SM5502_REG_DEV_TYPE2_JIG_USB_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_ON_SHIFT)
-#define SM5502_REG_DEV_TYPE2_JIG_USB_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_USB_OFF_SHIFT)
-#define SM5502_REG_DEV_TYPE2_JIG_UART_ON_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_ON_SHIFT)
-#define SM5502_REG_DEV_TYPE2_JIG_UART_OFF_MASK		(0x1 << SM5502_REG_DEV_TYPE2_JIG_UART_OFF_SHIFT)
-#define SM5502_REG_DEV_TYPE2_PPD_MASK			(0x1 << SM5502_REG_DEV_TYPE2_PPD_SHIFT)
-#define SM5502_REG_DEV_TYPE2_TTY_MASK			(0x1 << SM5502_REG_DEV_TYPE2_TTY_SHIFT)
-#define SM5502_REG_DEV_TYPE2_AV_CABLE_MASK		(0x1 << SM5502_REG_DEV_TYPE2_AV_CABLE_SHIFT)
-
-#define SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT	0
-#define SM5502_REG_MANUAL_SW1_DP_SHIFT		2
-#define SM5502_REG_MANUAL_SW1_DM_SHIFT		5
-#define SM5502_REG_MANUAL_SW1_VBUSIN_MASK	(0x3 << SM5502_REG_MANUAL_SW1_VBUSIN_SHIFT)
-#define SM5502_REG_MANUAL_SW1_DP_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DP_SHIFT)
-#define SM5502_REG_MANUAL_SW1_DM_MASK		(0x7 << SM5502_REG_MANUAL_SW1_DM_SHIFT)
-#define VBUSIN_SWITCH_OPEN			0x0
-#define VBUSIN_SWITCH_VBUSOUT			0x1
-#define VBUSIN_SWITCH_MIC			0x2
-#define VBUSIN_SWITCH_VBUSOUT_WITH_USB		0x3
-#define DM_DP_CON_SWITCH_OPEN			0x0
-#define DM_DP_CON_SWITCH_USB			0x1
-#define DM_DP_CON_SWITCH_AUDIO			0x2
-#define DM_DP_CON_SWITCH_UART			0x3
-#define DM_DP_SWITCH_OPEN			((DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
-						| (DM_DP_CON_SWITCH_OPEN <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
-#define DM_DP_SWITCH_USB			((DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
-						| (DM_DP_CON_SWITCH_USB <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
-#define DM_DP_SWITCH_AUDIO			((DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
-						| (DM_DP_CON_SWITCH_AUDIO <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
-#define DM_DP_SWITCH_UART			((DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DP_SHIFT) \
-						| (DM_DP_CON_SWITCH_UART <<SM5502_REG_MANUAL_SW1_DM_SHIFT))
-
-/* SM5502 Interrupts */
-enum sm5502_irq {
-	/* INT1 */
-	SM5502_IRQ_INT1_ATTACH,
-	SM5502_IRQ_INT1_DETACH,
-	SM5502_IRQ_INT1_KP,
-	SM5502_IRQ_INT1_LKP,
-	SM5502_IRQ_INT1_LKR,
-	SM5502_IRQ_INT1_OVP_EVENT,
-	SM5502_IRQ_INT1_OCP_EVENT,
-	SM5502_IRQ_INT1_OVP_OCP_DIS,
-
-	/* INT2 */
-	SM5502_IRQ_INT2_VBUS_DET,
-	SM5502_IRQ_INT2_REV_ACCE,
-	SM5502_IRQ_INT2_ADC_CHG,
-	SM5502_IRQ_INT2_STUCK_KEY,
-	SM5502_IRQ_INT2_STUCK_KEY_RCV,
-	SM5502_IRQ_INT2_MHL,
-
-	SM5502_IRQ_NUM,
-};
-
-#define SM5502_IRQ_INT1_ATTACH_MASK		BIT(0)
-#define SM5502_IRQ_INT1_DETACH_MASK		BIT(1)
-#define SM5502_IRQ_INT1_KP_MASK			BIT(2)
-#define SM5502_IRQ_INT1_LKP_MASK		BIT(3)
-#define SM5502_IRQ_INT1_LKR_MASK		BIT(4)
-#define SM5502_IRQ_INT1_OVP_EVENT_MASK		BIT(5)
-#define SM5502_IRQ_INT1_OCP_EVENT_MASK		BIT(6)
-#define SM5502_IRQ_INT1_OVP_OCP_DIS_MASK	BIT(7)
-#define SM5502_IRQ_INT2_VBUS_DET_MASK		BIT(0)
-#define SM5502_IRQ_INT2_REV_ACCE_MASK		BIT(1)
-#define SM5502_IRQ_INT2_ADC_CHG_MASK		BIT(2)
-#define SM5502_IRQ_INT2_STUCK_KEY_MASK		BIT(3)
-#define SM5502_IRQ_INT2_STUCK_KEY_RCV_MASK	BIT(4)
-#define SM5502_IRQ_INT2_MHL_MASK		BIT(5)
-
-#endif /*  __LINUX_EXTCON_SM5502_H */
-- 
cgit v1.2.3


From 62364357c184db52d556f868e493963fac2aea78 Mon Sep 17 00:00:00 2001
From: George Cherian <george.cherian@ti.com>
Date: Tue, 9 Sep 2014 09:44:34 +0530
Subject: extcon: gpio: Fix code cleanup

This patch fixes following minor cleanup:
 - Order the include files in alphabetical order.
 - Fix description of state_off in extcon_gpio.h
 - Add a descrition for check_on_resume in extcon_gpio.h

Signed-off-by: George Cherian <george.cherian@ti.com>
[Modify the name/description of patch to keep standary codiyg style by Chanwoo Choi]
Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
---
 drivers/extcon/extcon-gpio.c       | 10 +++++-----
 include/linux/extcon/extcon-gpio.h |  4 +++-
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/extcon/extcon-gpio.c b/drivers/extcon/extcon-gpio.c
index 5b7ec274cb63..72f19a37fd01 100644
--- a/drivers/extcon/extcon-gpio.c
+++ b/drivers/extcon/extcon-gpio.c
@@ -20,16 +20,16 @@
  *
 */
 
-#include <linux/module.h>
-#include <linux/kernel.h>
+#include <linux/extcon.h>
+#include <linux/extcon/extcon-gpio.h>
+#include <linux/gpio.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
 #include <linux/platform_device.h>
 #include <linux/slab.h>
 #include <linux/workqueue.h>
-#include <linux/gpio.h>
-#include <linux/extcon.h>
-#include <linux/extcon/extcon-gpio.h>
 
 struct gpio_extcon_data {
 	struct extcon_dev *edev;
diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h
index 8900fdf511c6..0b17ad43fbfc 100644
--- a/include/linux/extcon/extcon-gpio.h
+++ b/include/linux/extcon/extcon-gpio.h
@@ -34,8 +34,10 @@
  * @irq_flags:		IRQ Flags (e.g., IRQF_TRIGGER_LOW).
  * @state_on:		print_state is overriden with state_on if attached.
  *			If NULL, default method of extcon class is used.
- * @state_off:		print_state is overriden with state_on if detached.
+ * @state_off:		print_state is overriden with state_off if detached.
  *			If NUll, default method of extcon class is used.
+ * @check_on_resume:	Boolean describing whether to check the state of gpio
+ *			while resuming from sleep.
  *
  * Note that in order for state_on or state_off to be valid, both state_on
  * and state_off should be not NULL. If at least one of them is NULL,
-- 
cgit v1.2.3


From bcc5fd49a0fda5abc22057f65b318788ccb5d2ad Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Mon, 15 Sep 2014 18:15:53 +0200
Subject: clk: at91: add a driver for the h32mx clock

Newer SoCs have two different AHB interconnect. The AHB 32 bits Matrix
interconnect (h32mx) has a clock that can be setup at the half of the h64mx
clock (which is mck). The h32mx clock can not exceed 90 MHz.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Nicolas Ferre <nicolas.ferre@atmel.com>
---
 .../devicetree/bindings/clock/at91-clock.txt       |  14 +++
 arch/arm/mach-at91/Kconfig                         |   3 +
 drivers/clk/at91/Makefile                          |   1 +
 drivers/clk/at91/clk-h32mx.c                       | 123 +++++++++++++++++++++
 drivers/clk/at91/pmc.c                             |   6 +
 drivers/clk/at91/pmc.h                             |   5 +
 include/linux/clk/at91_pmc.h                       |   1 +
 7 files changed, 153 insertions(+)
 create mode 100644 drivers/clk/at91/clk-h32mx.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/at91-clock.txt b/Documentation/devicetree/bindings/clock/at91-clock.txt
index b3d544ca522a..7a4d4926f44e 100644
--- a/Documentation/devicetree/bindings/clock/at91-clock.txt
+++ b/Documentation/devicetree/bindings/clock/at91-clock.txt
@@ -74,6 +74,9 @@ Required properties:
 	"atmel,at91sam9x5-clk-utmi":
 		at91 utmi clock
 
+	"atmel,sama5d4-clk-h32mx":
+		at91 h32mx clock
+
 Required properties for SCKC node:
 - reg : defines the IO memory reserved for the SCKC.
 - #size-cells : shall be 0 (reg is used to encode clk id).
@@ -447,3 +450,14 @@ For example:
 		#clock-cells = <0>;
 		clocks = <&main>;
 	};
+
+Required properties for 32 bits bus Matrix clock (h32mx clock):
+- #clock-cells : from common clock binding; shall be set to 0.
+- clocks : shall be the master clock source phandle.
+
+For example:
+	h32ck: h32mxck {
+		#clock-cells = <0>;
+		compatible = "atmel,sama5d4-clk-h32mx";
+		clocks = <&mck>;
+	};
diff --git a/arch/arm/mach-at91/Kconfig b/arch/arm/mach-at91/Kconfig
index 6cc6f7aebdae..321210cc3be7 100644
--- a/arch/arm/mach-at91/Kconfig
+++ b/arch/arm/mach-at91/Kconfig
@@ -42,6 +42,9 @@ config AT91_SAM9_TIME
 config HAVE_AT91_SMD
 	bool
 
+config HAVE_AT91_H32MX
+	bool
+
 config SOC_AT91SAM9
 	bool
 	select AT91_SAM9_TIME
diff --git a/drivers/clk/at91/Makefile b/drivers/clk/at91/Makefile
index 4998aee59267..89a48a7bd5df 100644
--- a/drivers/clk/at91/Makefile
+++ b/drivers/clk/at91/Makefile
@@ -9,3 +9,4 @@ obj-y += clk-system.o clk-peripheral.o clk-programmable.o
 obj-$(CONFIG_HAVE_AT91_UTMI)		+= clk-utmi.o
 obj-$(CONFIG_HAVE_AT91_USB_CLK)		+= clk-usb.o
 obj-$(CONFIG_HAVE_AT91_SMD)		+= clk-smd.o
+obj-$(CONFIG_HAVE_AT91_H32MX)		+= clk-h32mx.o
diff --git a/drivers/clk/at91/clk-h32mx.c b/drivers/clk/at91/clk-h32mx.c
new file mode 100644
index 000000000000..152dcb3f7b5f
--- /dev/null
+++ b/drivers/clk/at91/clk-h32mx.c
@@ -0,0 +1,123 @@
+/*
+ * clk-h32mx.c
+ *
+ *  Copyright (C) 2014 Atmel
+ *
+ * Alexandre Belloni <alexandre.belloni@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/clkdev.h>
+#include <linux/clk/at91_pmc.h>
+#include <linux/delay.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/irq.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
+
+#include "pmc.h"
+
+#define H32MX_MAX_FREQ	90000000
+
+struct clk_sama5d4_h32mx {
+	struct clk_hw hw;
+	struct at91_pmc *pmc;
+};
+
+#define to_clk_sama5d4_h32mx(hw) container_of(hw, struct clk_sama5d4_h32mx, hw)
+
+static unsigned long clk_sama5d4_h32mx_recalc_rate(struct clk_hw *hw,
+						 unsigned long parent_rate)
+{
+	struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
+
+	if (pmc_read(h32mxclk->pmc, AT91_PMC_MCKR) & AT91_PMC_H32MXDIV)
+		return parent_rate / 2;
+
+	if (parent_rate > H32MX_MAX_FREQ)
+		pr_warn("H32MX clock is too fast\n");
+	return parent_rate;
+}
+
+static long clk_sama5d4_h32mx_round_rate(struct clk_hw *hw, unsigned long rate,
+				       unsigned long *parent_rate)
+{
+	unsigned long div;
+
+	if (rate > *parent_rate)
+		return *parent_rate;
+	div = *parent_rate / 2;
+	if (rate < div)
+		return div;
+
+	if (rate - div < *parent_rate - rate)
+		return div;
+
+	return *parent_rate;
+}
+
+static int clk_sama5d4_h32mx_set_rate(struct clk_hw *hw, unsigned long rate,
+				    unsigned long parent_rate)
+{
+	struct clk_sama5d4_h32mx *h32mxclk = to_clk_sama5d4_h32mx(hw);
+	struct at91_pmc *pmc = h32mxclk->pmc;
+	u32 tmp;
+
+	if (parent_rate != rate && (parent_rate / 2) != rate)
+		return -EINVAL;
+
+	pmc_lock(pmc);
+	tmp = pmc_read(pmc, AT91_PMC_MCKR) & ~AT91_PMC_H32MXDIV;
+	if ((parent_rate / 2) == rate)
+		tmp |= AT91_PMC_H32MXDIV;
+	pmc_write(pmc, AT91_PMC_MCKR, tmp);
+	pmc_unlock(pmc);
+
+	return 0;
+}
+
+static const struct clk_ops h32mx_ops = {
+	.recalc_rate = clk_sama5d4_h32mx_recalc_rate,
+	.round_rate = clk_sama5d4_h32mx_round_rate,
+	.set_rate = clk_sama5d4_h32mx_set_rate,
+};
+
+void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
+				     struct at91_pmc *pmc)
+{
+	struct clk_sama5d4_h32mx *h32mxclk;
+	struct clk_init_data init;
+	const char *parent_name;
+	struct clk *clk;
+
+	h32mxclk = kzalloc(sizeof(*h32mxclk), GFP_KERNEL);
+	if (!h32mxclk)
+		return;
+
+	parent_name = of_clk_get_parent_name(np, 0);
+
+	init.name = np->name;
+	init.ops = &h32mx_ops;
+	init.parent_names = parent_name ? &parent_name : NULL;
+	init.num_parents = parent_name ? 1 : 0;
+	init.flags = CLK_SET_RATE_GATE;
+
+	h32mxclk->hw.init = &init;
+	h32mxclk->pmc = pmc;
+
+	clk = clk_register(NULL, &h32mxclk->hw);
+	if (!clk)
+		return;
+
+	of_clk_add_provider(np, of_clk_src_simple_get, clk);
+}
diff --git a/drivers/clk/at91/pmc.c b/drivers/clk/at91/pmc.c
index 524196bb35a5..386999b4f8eb 100644
--- a/drivers/clk/at91/pmc.c
+++ b/drivers/clk/at91/pmc.c
@@ -336,6 +336,12 @@ static const struct of_device_id pmc_clk_ids[] __initconst = {
 		.compatible = "atmel,at91sam9x5-clk-smd",
 		.data = of_at91sam9x5_clk_smd_setup,
 	},
+#endif
+#if defined(CONFIG_HAVE_AT91_H32MX)
+	{
+		.compatible = "atmel,sama5d4-clk-h32mx",
+		.data = of_sama5d4_clk_h32mx_setup,
+	},
 #endif
 	{ /*sentinel*/ }
 };
diff --git a/drivers/clk/at91/pmc.h b/drivers/clk/at91/pmc.h
index 6c7625976113..52d2041fa3f6 100644
--- a/drivers/clk/at91/pmc.h
+++ b/drivers/clk/at91/pmc.h
@@ -120,4 +120,9 @@ extern void __init of_at91sam9x5_clk_smd_setup(struct device_node *np,
 					       struct at91_pmc *pmc);
 #endif
 
+#if defined(CONFIG_HAVE_AT91_SMD)
+extern void __init of_sama5d4_clk_h32mx_setup(struct device_node *np,
+					      struct at91_pmc *pmc);
+#endif
+
 #endif /* __PMC_H_ */
diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h
index de4268d4987a..c8e3b3d1eded 100644
--- a/include/linux/clk/at91_pmc.h
+++ b/include/linux/clk/at91_pmc.h
@@ -125,6 +125,7 @@ extern void __iomem *at91_pmc_base;
 #define		AT91_PMC_PLLADIV2	(1 << 12)		/* PLLA divisor by 2 [some SAM9 only] */
 #define			AT91_PMC_PLLADIV2_OFF		(0 << 12)
 #define			AT91_PMC_PLLADIV2_ON		(1 << 12)
+#define		AT91_PMC_H32MXDIV	BIT(24)
 
 #define	AT91_PMC_USB		0x38			/* USB Clock Register [some SAM9 only] */
 #define		AT91_PMC_USBS		(0x1 <<  0)		/* USB OHCI Input clock selection */
-- 
cgit v1.2.3


From c3099a5294f2c7266234e8ea35cbffc20a41aa9a Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 19 Sep 2014 20:27:34 +0200
Subject: PM / Domains: Add a detach callback to the struct dev_pm_domain

The intent of this callback is to simplify detachment of devices from
their PM domains. Further patches will show the benefit.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 72c0fe098a27..1022ba1eb4de 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -619,6 +619,7 @@ extern int dev_pm_put_subsys_data(struct device *dev);
  */
 struct dev_pm_domain {
 	struct dev_pm_ops	ops;
+	void (*detach)(struct device *dev, bool power_off);
 };
 
 /*
-- 
cgit v1.2.3


From aa42240ab2544a8bcb2efb400193826f57f3175e Mon Sep 17 00:00:00 2001
From: Tomasz Figa <tomasz.figa@gmail.com>
Date: Fri, 19 Sep 2014 20:27:36 +0200
Subject: PM / Domains: Add generic OF-based PM domain look-up

This patch introduces generic code to perform PM domain look-up using
device tree and automatically bind devices to their PM domains.

Generic device tree bindings are introduced to specify PM domains of
devices in their device tree nodes.

Backwards compatibility with legacy Samsung-specific PM domain bindings
is provided, but for now the new code is not compiled when
CONFIG_ARCH_EXYNOS is selected to avoid collision with legacy code.
This will change as soon as the Exynos PM domain code gets converted to
use the generic framework in further patch.

This patch was originally submitted by Tomasz Figa when he was employed
by Samsung.

Link: http://marc.info/?l=linux-pm&m=139955349702152&w=2
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Rob Herring <robh@kernel.org>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../devicetree/bindings/power/power_domain.txt     |  49 ++++
 drivers/base/power/domain.c                        | 289 +++++++++++++++++++++
 include/linux/pm_domain.h                          |  52 ++++
 kernel/power/Kconfig                               |   4 +
 4 files changed, 394 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/power/power_domain.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
new file mode 100644
index 000000000000..98c16672ab5f
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -0,0 +1,49 @@
+* Generic PM domains
+
+System on chip designs are often divided into multiple PM domains that can be
+used for power gating of selected IP blocks for power saving by reduced leakage
+current.
+
+This device tree binding can be used to bind PM domain consumer devices with
+their PM domains provided by PM domain providers. A PM domain provider can be
+represented by any node in the device tree and can provide one or more PM
+domains. A consumer node can refer to the provider by a phandle and a set of
+phandle arguments (so called PM domain specifiers) of length specified by the
+#power-domain-cells property in the PM domain provider node.
+
+==PM domain providers==
+
+Required properties:
+ - #power-domain-cells : Number of cells in a PM domain specifier;
+   Typically 0 for nodes representing a single PM domain and 1 for nodes
+   providing multiple PM domains (e.g. power controllers), but can be any value
+   as specified by device tree binding documentation of particular provider.
+
+Example:
+
+	power: power-controller@12340000 {
+		compatible = "foo,power-controller";
+		reg = <0x12340000 0x1000>;
+		#power-domain-cells = <1>;
+	};
+
+The node above defines a power controller that is a PM domain provider and
+expects one cell as its phandle argument.
+
+==PM domain consumers==
+
+Required properties:
+ - power-domains : A phandle and PM domain specifier as defined by bindings of
+                   the power controller specified by phandle.
+
+Example:
+
+	leaky-device@12350000 {
+		compatible = "foo,i-leak-current";
+		reg = <0x12350000 0x1000>;
+		power-domains = <&power 0>;
+	};
+
+The node above defines a typical PM domain consumer device, which is located
+inside a PM domain with index 0 of a power controller represented by a node
+with the label "power".
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e6a11ca3ce26..a3d41a883d83 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -8,6 +8,7 @@
 
 #include <linux/kernel.h>
 #include <linux/io.h>
+#include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
@@ -1933,3 +1934,291 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
 }
+
+#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+/*
+ * Device Tree based PM domain providers.
+ *
+ * The code below implements generic device tree based PM domain providers that
+ * bind device tree nodes with generic PM domains registered in the system.
+ *
+ * Any driver that registers generic PM domains and needs to support binding of
+ * devices to these domains is supposed to register a PM domain provider, which
+ * maps a PM domain specifier retrieved from the device tree to a PM domain.
+ *
+ * Two simple mapping functions have been provided for convenience:
+ *  - __of_genpd_xlate_simple() for 1:1 device tree node to PM domain mapping.
+ *  - __of_genpd_xlate_onecell() for mapping of multiple PM domains per node by
+ *    index.
+ */
+
+/**
+ * struct of_genpd_provider - PM domain provider registration structure
+ * @link: Entry in global list of PM domain providers
+ * @node: Pointer to device tree node of PM domain provider
+ * @xlate: Provider-specific xlate callback mapping a set of specifier cells
+ *         into a PM domain.
+ * @data: context pointer to be passed into @xlate callback
+ */
+struct of_genpd_provider {
+	struct list_head link;
+	struct device_node *node;
+	genpd_xlate_t xlate;
+	void *data;
+};
+
+/* List of registered PM domain providers. */
+static LIST_HEAD(of_genpd_providers);
+/* Mutex to protect the list above. */
+static DEFINE_MUTEX(of_genpd_mutex);
+
+/**
+ * __of_genpd_xlate_simple() - Xlate function for direct node-domain mapping
+ * @genpdspec: OF phandle args to map into a PM domain
+ * @data: xlate function private data - pointer to struct generic_pm_domain
+ *
+ * This is a generic xlate function that can be used to model PM domains that
+ * have their own device tree nodes. The private data of xlate function needs
+ * to be a valid pointer to struct generic_pm_domain.
+ */
+struct generic_pm_domain *__of_genpd_xlate_simple(
+					struct of_phandle_args *genpdspec,
+					void *data)
+{
+	if (genpdspec->args_count != 0)
+		return ERR_PTR(-EINVAL);
+	return data;
+}
+EXPORT_SYMBOL_GPL(__of_genpd_xlate_simple);
+
+/**
+ * __of_genpd_xlate_onecell() - Xlate function using a single index.
+ * @genpdspec: OF phandle args to map into a PM domain
+ * @data: xlate function private data - pointer to struct genpd_onecell_data
+ *
+ * This is a generic xlate function that can be used to model simple PM domain
+ * controllers that have one device tree node and provide multiple PM domains.
+ * A single cell is used as an index into an array of PM domains specified in
+ * the genpd_onecell_data struct when registering the provider.
+ */
+struct generic_pm_domain *__of_genpd_xlate_onecell(
+					struct of_phandle_args *genpdspec,
+					void *data)
+{
+	struct genpd_onecell_data *genpd_data = data;
+	unsigned int idx = genpdspec->args[0];
+
+	if (genpdspec->args_count != 1)
+		return ERR_PTR(-EINVAL);
+
+	if (idx >= genpd_data->num_domains) {
+		pr_err("%s: invalid domain index %u\n", __func__, idx);
+		return ERR_PTR(-EINVAL);
+	}
+
+	if (!genpd_data->domains[idx])
+		return ERR_PTR(-ENOENT);
+
+	return genpd_data->domains[idx];
+}
+EXPORT_SYMBOL_GPL(__of_genpd_xlate_onecell);
+
+/**
+ * __of_genpd_add_provider() - Register a PM domain provider for a node
+ * @np: Device node pointer associated with the PM domain provider.
+ * @xlate: Callback for decoding PM domain from phandle arguments.
+ * @data: Context pointer for @xlate callback.
+ */
+int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
+			void *data)
+{
+	struct of_genpd_provider *cp;
+
+	cp = kzalloc(sizeof(*cp), GFP_KERNEL);
+	if (!cp)
+		return -ENOMEM;
+
+	cp->node = of_node_get(np);
+	cp->data = data;
+	cp->xlate = xlate;
+
+	mutex_lock(&of_genpd_mutex);
+	list_add(&cp->link, &of_genpd_providers);
+	mutex_unlock(&of_genpd_mutex);
+	pr_debug("Added domain provider from %s\n", np->full_name);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(__of_genpd_add_provider);
+
+/**
+ * of_genpd_del_provider() - Remove a previously registered PM domain provider
+ * @np: Device node pointer associated with the PM domain provider
+ */
+void of_genpd_del_provider(struct device_node *np)
+{
+	struct of_genpd_provider *cp;
+
+	mutex_lock(&of_genpd_mutex);
+	list_for_each_entry(cp, &of_genpd_providers, link) {
+		if (cp->node == np) {
+			list_del(&cp->link);
+			of_node_put(cp->node);
+			kfree(cp);
+			break;
+		}
+	}
+	mutex_unlock(&of_genpd_mutex);
+}
+EXPORT_SYMBOL_GPL(of_genpd_del_provider);
+
+/**
+ * of_genpd_get_from_provider() - Look-up PM domain
+ * @genpdspec: OF phandle args to use for look-up
+ *
+ * Looks for a PM domain provider under the node specified by @genpdspec and if
+ * found, uses xlate function of the provider to map phandle args to a PM
+ * domain.
+ *
+ * Returns a valid pointer to struct generic_pm_domain on success or ERR_PTR()
+ * on failure.
+ */
+static struct generic_pm_domain *of_genpd_get_from_provider(
+					struct of_phandle_args *genpdspec)
+{
+	struct generic_pm_domain *genpd = ERR_PTR(-ENOENT);
+	struct of_genpd_provider *provider;
+
+	mutex_lock(&of_genpd_mutex);
+
+	/* Check if we have such a provider in our array */
+	list_for_each_entry(provider, &of_genpd_providers, link) {
+		if (provider->node == genpdspec->np)
+			genpd = provider->xlate(genpdspec, provider->data);
+		if (!IS_ERR(genpd))
+			break;
+	}
+
+	mutex_unlock(&of_genpd_mutex);
+
+	return genpd;
+}
+
+/**
+ * genpd_dev_pm_detach - Detach a device from its PM domain.
+ * @dev: Device to attach.
+ * @power_off: Currently not used
+ *
+ * Try to locate a corresponding generic PM domain, which the device was
+ * attached to previously. If such is found, the device is detached from it.
+ */
+static void genpd_dev_pm_detach(struct device *dev, bool power_off)
+{
+	struct generic_pm_domain *pd = NULL, *gpd;
+	int ret = 0;
+
+	if (!dev->pm_domain)
+		return;
+
+	mutex_lock(&gpd_list_lock);
+	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
+		if (&gpd->domain == dev->pm_domain) {
+			pd = gpd;
+			break;
+		}
+	}
+	mutex_unlock(&gpd_list_lock);
+
+	if (!pd)
+		return;
+
+	dev_dbg(dev, "removing from PM domain %s\n", pd->name);
+
+	while (1) {
+		ret = pm_genpd_remove_device(pd, dev);
+		if (ret != -EAGAIN)
+			break;
+		cond_resched();
+	}
+
+	if (ret < 0) {
+		dev_err(dev, "failed to remove from PM domain %s: %d",
+			pd->name, ret);
+		return;
+	}
+
+	/* Check if PM domain can be powered off after removing this device. */
+	genpd_queue_power_off_work(pd);
+}
+
+/**
+ * genpd_dev_pm_attach - Attach a device to its PM domain using DT.
+ * @dev: Device to attach.
+ *
+ * Parse device's OF node to find a PM domain specifier. If such is found,
+ * attaches the device to retrieved pm_domain ops.
+ *
+ * Both generic and legacy Samsung-specific DT bindings are supported to keep
+ * backwards compatibility with existing DTBs.
+ *
+ * Returns 0 on successfully attached PM domain or negative error code.
+ */
+int genpd_dev_pm_attach(struct device *dev)
+{
+	struct of_phandle_args pd_args;
+	struct generic_pm_domain *pd;
+	int ret;
+
+	if (!dev->of_node)
+		return -ENODEV;
+
+	if (dev->pm_domain)
+		return -EEXIST;
+
+	ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
+					"#power-domain-cells", 0, &pd_args);
+	if (ret < 0) {
+		if (ret != -ENOENT)
+			return ret;
+
+		/*
+		 * Try legacy Samsung-specific bindings
+		 * (for backwards compatibility of DT ABI)
+		 */
+		pd_args.args_count = 0;
+		pd_args.np = of_parse_phandle(dev->of_node,
+						"samsung,power-domain", 0);
+		if (!pd_args.np)
+			return -ENOENT;
+	}
+
+	pd = of_genpd_get_from_provider(&pd_args);
+	if (IS_ERR(pd)) {
+		dev_dbg(dev, "%s() failed to find PM domain: %ld\n",
+			__func__, PTR_ERR(pd));
+		of_node_put(dev->of_node);
+		return PTR_ERR(pd);
+	}
+
+	dev_dbg(dev, "adding to PM domain %s\n", pd->name);
+
+	while (1) {
+		ret = pm_genpd_add_device(pd, dev);
+		if (ret != -EAGAIN)
+			break;
+		cond_resched();
+	}
+
+	if (ret < 0) {
+		dev_err(dev, "failed to add to PM domain %s: %d",
+			pd->name, ret);
+		of_node_put(dev->of_node);
+		return ret;
+	}
+
+	dev->pm_domain->detach = genpd_dev_pm_detach;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
+#endif
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index aa03586c94a2..292079d8da6b 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -264,4 +264,56 @@ static inline void pm_genpd_syscore_poweroff(struct device *dev) {}
 static inline void pm_genpd_syscore_poweron(struct device *dev) {}
 #endif
 
+/* OF PM domain providers */
+struct of_device_id;
+
+struct genpd_onecell_data {
+	struct generic_pm_domain **domains;
+	unsigned int num_domains;
+};
+
+typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
+						void *data);
+
+#ifdef CONFIG_PM_GENERIC_DOMAINS_OF
+int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
+			void *data);
+void of_genpd_del_provider(struct device_node *np);
+
+struct generic_pm_domain *__of_genpd_xlate_simple(
+					struct of_phandle_args *genpdspec,
+					void *data);
+struct generic_pm_domain *__of_genpd_xlate_onecell(
+					struct of_phandle_args *genpdspec,
+					void *data);
+
+int genpd_dev_pm_attach(struct device *dev);
+#else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
+static inline int __of_genpd_add_provider(struct device_node *np,
+					genpd_xlate_t xlate, void *data)
+{
+	return 0;
+}
+static inline void of_genpd_del_provider(struct device_node *np) {}
+
+#define __of_genpd_xlate_simple		NULL
+#define __of_genpd_xlate_onecell	NULL
+
+static inline int genpd_dev_pm_attach(struct device *dev)
+{
+	return -ENODEV;
+}
+#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
+
+static inline int of_genpd_add_provider_simple(struct device_node *np,
+					struct generic_pm_domain *genpd)
+{
+	return __of_genpd_add_provider(np, __of_genpd_xlate_simple, genpd);
+}
+static inline int of_genpd_add_provider_onecell(struct device_node *np,
+					struct genpd_onecell_data *data)
+{
+	return __of_genpd_add_provider(np, __of_genpd_xlate_onecell, data);
+}
+
 #endif /* _LINUX_PM_DOMAIN_H */
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index e4e4121fa327..897619b11fb2 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -302,6 +302,10 @@ config PM_GENERIC_DOMAINS_RUNTIME
 	def_bool y
 	depends on PM_RUNTIME && PM_GENERIC_DOMAINS
 
+config PM_GENERIC_DOMAINS_OF
+	def_bool y
+	depends on PM_GENERIC_DOMAINS && OF && !ARCH_EXYNOS
+
 config CPU_PM
 	bool
 	depends on SUSPEND || CPU_IDLE
-- 
cgit v1.2.3


From 46420dd73b800f87a19af13af5883855cf38cb08 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 19 Sep 2014 20:27:37 +0200
Subject: PM / Domains: Add APIs to attach/detach a PM domain for a device

To maintain scalability let's add common methods to attach and detach
a PM domain for a device, dev_pm_domain_attach|detach().

Typically dev_pm_domain_attach() shall be invoked from subsystem level
code at the probe phase to try to attach a device to its PM domain.
The reversed actions may be done a the remove phase and then by
invoking dev_pm_domain_detach().

When attachment succeeds, the attach function should assign its
corresponding detach function to a new ->detach() callback added in the
struct dev_pm_domain.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Tested-by: Philipp Zabel <p.zabel@pengutronix.de>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/common.c | 52 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm.h          | 11 ++++++++++
 2 files changed, 63 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index df2e5eeaeb05..b0f138806bbc 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -11,6 +11,8 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/pm_clock.h>
+#include <linux/acpi.h>
+#include <linux/pm_domain.h>
 
 /**
  * dev_pm_get_subsys_data - Create or refcount power.subsys_data for device.
@@ -82,3 +84,53 @@ int dev_pm_put_subsys_data(struct device *dev)
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_put_subsys_data);
+
+/**
+ * dev_pm_domain_attach - Attach a device to its PM domain.
+ * @dev: Device to attach.
+ * @power_on: Used to indicate whether we should power on the device.
+ *
+ * The @dev may only be attached to a single PM domain. By iterating through
+ * the available alternatives we try to find a valid PM domain for the device.
+ * As attachment succeeds, the ->detach() callback in the struct dev_pm_domain
+ * should be assigned by the corresponding attach function.
+ *
+ * This function should typically be invoked from subsystem level code during
+ * the probe phase. Especially for those that holds devices which requires
+ * power management through PM domains.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ *
+ * Returns 0 on successfully attached PM domain or negative error code.
+ */
+int dev_pm_domain_attach(struct device *dev, bool power_on)
+{
+	int ret;
+
+	ret = acpi_dev_pm_attach(dev, power_on);
+	if (ret)
+		ret = genpd_dev_pm_attach(dev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_attach);
+
+/**
+ * dev_pm_domain_detach - Detach a device from its PM domain.
+ * @dev: Device to attach.
+ * @power_off: Used to indicate whether we should power off the device.
+ *
+ * This functions will reverse the actions from dev_pm_domain_attach() and thus
+ * try to detach the @dev from its PM domain. Typically it should be invoked
+ * from subsystem level code during the remove phase.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ */
+void dev_pm_domain_detach(struct device *dev, bool power_off)
+{
+	if (dev->pm_domain && dev->pm_domain->detach)
+		dev->pm_domain->detach(dev, power_off);
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_detach);
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 1022ba1eb4de..c4cbf485a5d6 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -622,6 +622,17 @@ struct dev_pm_domain {
 	void (*detach)(struct device *dev, bool power_off);
 };
 
+#ifdef CONFIG_PM
+extern int dev_pm_domain_attach(struct device *dev, bool power_on);
+extern void dev_pm_domain_detach(struct device *dev, bool power_off);
+#else
+static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
+{
+	return -ENODEV;
+}
+static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
+#endif
+
 /*
  * The PM_EVENT_ messages are also used by drivers implementing the legacy
  * suspend framework, based on the ->suspend() and ->resume() callbacks common
-- 
cgit v1.2.3


From 91d66cd27f5fd8a3bca4621a3505c9067925478d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 19 Sep 2014 20:27:44 +0200
Subject: ACPI / PM: Convert acpi_dev_pm_detach() into a static function

The ->detach() callback for the PM domain has now been fully adopted,
thus there no users left of the acpi_dev_pm_detach() API. This allow us
to convert it into a static function.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/device_pm.c | 69 ++++++++++++++++++++++++------------------------
 include/linux/acpi.h     |  2 --
 2 files changed, 34 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index eec5ed5be949..bea6896be122 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -1040,6 +1040,40 @@ static struct dev_pm_domain acpi_general_pm_domain = {
 	},
 };
 
+/**
+ * acpi_dev_pm_detach - Remove ACPI power management from the device.
+ * @dev: Device to take care of.
+ * @power_off: Whether or not to try to remove power from the device.
+ *
+ * Remove the device from the general ACPI PM domain and remove its wakeup
+ * notifier.  If @power_off is set, additionally remove power from the device if
+ * possible.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ */
+static void acpi_dev_pm_detach(struct device *dev, bool power_off)
+{
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+
+	if (adev && dev->pm_domain == &acpi_general_pm_domain) {
+		dev->pm_domain = NULL;
+		acpi_remove_pm_notifier(adev);
+		if (power_off) {
+			/*
+			 * If the device's PM QoS resume latency limit or flags
+			 * have been exposed to user space, they have to be
+			 * hidden at this point, so that they don't affect the
+			 * choice of the low-power state to put the device into.
+			 */
+			dev_pm_qos_hide_latency_limit(dev);
+			dev_pm_qos_hide_flags(dev);
+			acpi_device_wakeup(adev, ACPI_STATE_S0, false);
+			acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0);
+		}
+	}
+}
+
 /**
  * acpi_dev_pm_attach - Prepare device for ACPI power management.
  * @dev: Device to prepare.
@@ -1077,39 +1111,4 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_pm_attach);
-
-/**
- * acpi_dev_pm_detach - Remove ACPI power management from the device.
- * @dev: Device to take care of.
- * @power_off: Whether or not to try to remove power from the device.
- *
- * Remove the device from the general ACPI PM domain and remove its wakeup
- * notifier.  If @power_off is set, additionally remove power from the device if
- * possible.
- *
- * Callers must ensure proper synchronization of this function with power
- * management callbacks.
- */
-void acpi_dev_pm_detach(struct device *dev, bool power_off)
-{
-	struct acpi_device *adev = ACPI_COMPANION(dev);
-
-	if (adev && dev->pm_domain == &acpi_general_pm_domain) {
-		dev->pm_domain = NULL;
-		acpi_remove_pm_notifier(adev);
-		if (power_off) {
-			/*
-			 * If the device's PM QoS resume latency limit or flags
-			 * have been exposed to user space, they have to be
-			 * hidden at this point, so that they don't affect the
-			 * choice of the low-power state to put the device into.
-			 */
-			dev_pm_qos_hide_latency_limit(dev);
-			dev_pm_qos_hide_flags(dev);
-			acpi_device_wakeup(adev, ACPI_STATE_S0, false);
-			acpi_dev_pm_low_power(dev, adev, ACPI_STATE_S0);
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(acpi_dev_pm_detach);
 #endif /* CONFIG_PM */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 807cbc46d73e..b7926bb9b444 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -587,7 +587,6 @@ static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM)
 struct acpi_device *acpi_dev_pm_get_node(struct device *dev);
 int acpi_dev_pm_attach(struct device *dev, bool power_on);
-void acpi_dev_pm_detach(struct device *dev, bool power_off);
 #else
 static inline struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
 {
@@ -597,7 +596,6 @@ static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
 {
 	return -ENODEV;
 }
-static inline void acpi_dev_pm_detach(struct device *dev, bool power_off) {}
 #endif
 
 #ifdef CONFIG_ACPI
-- 
cgit v1.2.3


From bf57229745f849e500ba69ff91e35bc8160a7373 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:08 -0700
Subject: blk-mq: remove REQ_END

Pass an explicit parameter for the last request in a batch to ->queue_rq
instead of using a request flag.  Besides being a cleaner and non-stateful
interface this is also required for the next patch, which fixes the blk-mq
I/O submission code to not start a time too early.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                    | 22 +++++-----------------
 drivers/block/mtip32xx/mtip32xx.c |  3 ++-
 drivers/block/null_blk.c          |  3 ++-
 drivers/block/virtio_blk.c        |  4 ++--
 drivers/scsi/scsi_lib.c           |  3 ++-
 include/linux/blk-mq.h            |  2 +-
 include/linux/blk_types.h         |  2 --
 7 files changed, 14 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index e743d28620b2..32b4797f4186 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,7 +384,7 @@ void blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
-static void blk_mq_start_request(struct request *rq, bool last)
+static void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
@@ -421,16 +421,6 @@ static void blk_mq_start_request(struct request *rq, bool last)
 		 */
 		rq->nr_phys_segments++;
 	}
-
-	/*
-	 * Flag the last request in the series so that drivers know when IO
-	 * should be kicked off, if they don't do it on a per-request basis.
-	 *
-	 * Note: the flag isn't the only condition drivers should do kick off.
-	 * If drive is busy, the last request might not have the bit set.
-	 */
-	if (last)
-		rq->cmd_flags |= REQ_END;
 }
 
 static void __blk_mq_requeue_request(struct request *rq)
@@ -440,8 +430,6 @@ static void __blk_mq_requeue_request(struct request *rq)
 	trace_block_rq_requeue(q, rq);
 	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 
-	rq->cmd_flags &= ~REQ_END;
-
 	if (q->dma_drain_size && blk_rq_bytes(rq))
 		rq->nr_phys_segments--;
 }
@@ -755,9 +743,9 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		rq = list_first_entry(&rq_list, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 
-		blk_mq_start_request(rq, list_empty(&rq_list));
+		blk_mq_start_request(rq);
 
-		ret = q->mq_ops->queue_rq(hctx, rq);
+		ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
 		switch (ret) {
 		case BLK_MQ_RQ_QUEUE_OK:
 			queued++;
@@ -1198,14 +1186,14 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		int ret;
 
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_start_request(rq, true);
+		blk_mq_start_request(rq);
 
 		/*
 		 * For OK queue, we are done. For error, kill it. Any other
 		 * error (busy), just add it to our list as we previously
 		 * would have done
 		 */
-		ret = q->mq_ops->queue_rq(data.hctx, rq);
+		ret = q->mq_ops->queue_rq(data.hctx, rq, true);
 		if (ret == BLK_MQ_RQ_QUEUE_OK)
 			goto done;
 		else {
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 5c8e7fe07745..0e2084f37c67 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3775,7 +3775,8 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
+		bool last)
 {
 	int ret;
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 00d469c7f9f7..c5b7315c2c13 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -313,7 +313,8 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq)
+static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
+		bool last)
 {
 	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
 
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 0a581400de0f..13756e016797 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -164,14 +164,14 @@ static void virtblk_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
+		bool last)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
 	unsigned int num;
 	int qid = hctx->queue_num;
-	const bool last = (req->cmd_flags & REQ_END) != 0;
 	int err;
 	bool notify = false;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 1f2bae475cb7..f1df41168391 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1855,7 +1855,8 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req)
+static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
+		bool last)
 {
 	struct request_queue *q = req->q;
 	struct scsi_device *sdev = q->queuedata;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a1e31f274fcd..9c4e306a9217 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -77,7 +77,7 @@ struct blk_mq_tag_set {
 	struct list_head	tag_list;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *);
+typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 66c2167f04a9..bb7d66460e7a 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -188,7 +188,6 @@ enum rq_flag_bits {
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
 	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_PM,		/* runtime pm request */
-	__REQ_END,		/* last of chain of requests */
 	__REQ_HASHED,		/* on IO scheduler merge hash */
 	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
 	__REQ_NR_BITS,		/* stops here */
@@ -242,7 +241,6 @@ enum rq_flag_bits {
 #define REQ_SECURE		(1ULL << __REQ_SECURE)
 #define REQ_KERNEL		(1ULL << __REQ_KERNEL)
 #define REQ_PM			(1ULL << __REQ_PM)
-#define REQ_END			(1ULL << __REQ_END)
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
 
-- 
cgit v1.2.3


From e2490073cd7c3d6f6ef6e029a208edd4d38efac4 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:09 -0700
Subject: blk-mq: call blk_mq_start_request from ->queue_rq

When we call blk_mq_start_request from the core blk-mq code before calling into
->queue_rq there is a racy window where the timeout handler can hit before we've
fully set up the driver specific part of the command.

Move the call to blk_mq_start_request into the driver so the driver can start
the request only once it is fully set up.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                    | 13 ++++++-------
 drivers/block/mtip32xx/mtip32xx.c |  2 ++
 drivers/block/null_blk.c          |  2 ++
 drivers/block/virtio_blk.c        |  2 ++
 drivers/scsi/scsi_lib.c           |  1 +
 include/linux/blk-mq.h            |  1 +
 6 files changed, 14 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 32b4797f4186..141f2e06803a 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,7 +384,7 @@ void blk_mq_complete_request(struct request *rq)
 }
 EXPORT_SYMBOL(blk_mq_complete_request);
 
-static void blk_mq_start_request(struct request *rq)
+void blk_mq_start_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
@@ -422,16 +422,18 @@ static void blk_mq_start_request(struct request *rq)
 		rq->nr_phys_segments++;
 	}
 }
+EXPORT_SYMBOL(blk_mq_start_request);
 
 static void __blk_mq_requeue_request(struct request *rq)
 {
 	struct request_queue *q = rq->q;
 
 	trace_block_rq_requeue(q, rq);
-	clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags);
 
-	if (q->dma_drain_size && blk_rq_bytes(rq))
-		rq->nr_phys_segments--;
+	if (test_and_clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags)) {
+		if (q->dma_drain_size && blk_rq_bytes(rq))
+			rq->nr_phys_segments--;
+	}
 }
 
 void blk_mq_requeue_request(struct request *rq)
@@ -743,8 +745,6 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		rq = list_first_entry(&rq_list, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 
-		blk_mq_start_request(rq);
-
 		ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
 		switch (ret) {
 		case BLK_MQ_RQ_QUEUE_OK:
@@ -1186,7 +1186,6 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		int ret;
 
 		blk_mq_bio_to_request(rq, bio);
-		blk_mq_start_request(rq);
 
 		/*
 		 * For OK queue, we are done. For error, kill it. Any other
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 0e2084f37c67..4042440a0470 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3783,6 +3783,8 @@ static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
 		return BLK_MQ_RQ_QUEUE_BUSY;
 
+	blk_mq_start_request(rq);
+
 	ret = mtip_submit_request(hctx, rq);
 	if (likely(!ret))
 		return BLK_MQ_RQ_QUEUE_OK;
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index c5b7315c2c13..332ce20d45da 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -321,6 +321,8 @@ static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
 	cmd->rq = rq;
 	cmd->nq = hctx->driver_data;
 
+	blk_mq_start_request(rq);
+
 	null_handle_cmd(cmd);
 	return BLK_MQ_RQ_QUEUE_OK;
 }
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 13756e016797..83816bf6882f 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -205,6 +205,8 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
 		}
 	}
 
+	blk_mq_start_request(req);
+
 	num = blk_rq_map_sg(hctx->queue, vbr->req, vbr->sg);
 	if (num) {
 		if (rq_data_dir(vbr->req) == WRITE)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index f1df41168391..2dcd9078de48 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1890,6 +1890,7 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
 	scsi_init_cmd_errh(cmd);
 	cmd->scsi_done = scsi_mq_done;
 
+	blk_mq_start_request(req);
 	reason = scsi_dispatch_cmd(cmd);
 	if (reason) {
 		scsi_set_blocked(cmd, reason);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 9c4e306a9217..878b6f71da48 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -159,6 +159,7 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
 
+void blk_mq_start_request(struct request *rq);
 void blk_mq_end_io(struct request *rq, int error);
 void __blk_mq_end_io(struct request *rq, int error);
 
-- 
cgit v1.2.3


From c8a446ad695ada43a885ec12b38411dbd190a11b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:10 -0700
Subject: blk-mq: rename blk_mq_end_io to blk_mq_end_request

Now that we've changed the driver API on the submission side use the
opportunity to fix up the name on the completion side to fit into the
general scheme.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-flush.c                 |  4 ++--
 block/blk-mq.c                    | 16 ++++++++--------
 drivers/block/mtip32xx/mtip32xx.c |  4 ++--
 drivers/block/null_blk.c          |  2 +-
 drivers/block/virtio_blk.c        |  2 +-
 drivers/scsi/scsi_lib.c           |  4 ++--
 include/linux/blk-mq.h            |  4 ++--
 7 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-flush.c b/block/blk-flush.c
index 3cb5e9e7108a..698e6926388c 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -202,7 +202,7 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 		list_del_init(&rq->flush.list);
 		blk_flush_restore_request(rq);
 		if (q->mq_ops)
-			blk_mq_end_io(rq, error);
+			blk_mq_end_request(rq, error);
 		else
 			__blk_end_request_all(rq, error);
 		break;
@@ -378,7 +378,7 @@ void blk_insert_flush(struct request *rq)
 	 */
 	if (!policy) {
 		if (q->mq_ops)
-			blk_mq_end_io(rq, 0);
+			blk_mq_end_request(rq, 0);
 		else
 			__blk_end_bidi_request(rq, 0, 0, 0);
 		return;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 141f2e06803a..1713686f5c2f 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -300,7 +300,7 @@ void blk_mq_clone_flush_request(struct request *flush_rq,
 		hctx->cmd_size);
 }
 
-inline void __blk_mq_end_io(struct request *rq, int error)
+inline void __blk_mq_end_request(struct request *rq, int error)
 {
 	blk_account_io_done(rq);
 
@@ -312,15 +312,15 @@ inline void __blk_mq_end_io(struct request *rq, int error)
 		blk_mq_free_request(rq);
 	}
 }
-EXPORT_SYMBOL(__blk_mq_end_io);
+EXPORT_SYMBOL(__blk_mq_end_request);
 
-void blk_mq_end_io(struct request *rq, int error)
+void blk_mq_end_request(struct request *rq, int error)
 {
 	if (blk_update_request(rq, error, blk_rq_bytes(rq)))
 		BUG();
-	__blk_mq_end_io(rq, error);
+	__blk_mq_end_request(rq, error);
 }
-EXPORT_SYMBOL(blk_mq_end_io);
+EXPORT_SYMBOL(blk_mq_end_request);
 
 static void __blk_mq_complete_request_remote(void *data)
 {
@@ -360,7 +360,7 @@ void __blk_mq_complete_request(struct request *rq)
 	struct request_queue *q = rq->q;
 
 	if (!q->softirq_done_fn)
-		blk_mq_end_io(rq, rq->errors);
+		blk_mq_end_request(rq, rq->errors);
 	else
 		blk_mq_ipi_complete_request(rq);
 }
@@ -758,7 +758,7 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 			pr_err("blk-mq: bad return on queue: %d\n", ret);
 		case BLK_MQ_RQ_QUEUE_ERROR:
 			rq->errors = -EIO;
-			blk_mq_end_io(rq, rq->errors);
+			blk_mq_end_request(rq, rq->errors);
 			break;
 		}
 
@@ -1200,7 +1200,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 
 			if (ret == BLK_MQ_RQ_QUEUE_ERROR) {
 				rq->errors = -EIO;
-				blk_mq_end_io(rq, rq->errors);
+				blk_mq_end_request(rq, rq->errors);
 				goto done;
 			}
 		}
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 4042440a0470..6b7e8d0fba99 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -247,7 +247,7 @@ static void mtip_async_complete(struct mtip_port *port,
 	if (unlikely(cmd->unaligned))
 		up(&port->cmd_slot_unal);
 
-	blk_mq_end_io(rq, status ? -EIO : 0);
+	blk_mq_end_request(rq, status ? -EIO : 0);
 }
 
 /*
@@ -3739,7 +3739,7 @@ static int mtip_submit_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 		int err;
 
 		err = mtip_send_trim(dd, blk_rq_pos(rq), blk_rq_sectors(rq));
-		blk_mq_end_io(rq, err);
+		blk_mq_end_request(rq, err);
 		return 0;
 	}
 
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 332ce20d45da..ac50a2931044 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -177,7 +177,7 @@ static void end_cmd(struct nullb_cmd *cmd)
 {
 	switch (queue_mode)  {
 	case NULL_Q_MQ:
-		blk_mq_end_io(cmd->rq, 0);
+		blk_mq_end_request(cmd->rq, 0);
 		return;
 	case NULL_Q_RQ:
 		INIT_LIST_HEAD(&cmd->rq->queuelist);
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index 83816bf6882f..f751fc392ba9 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -135,7 +135,7 @@ static inline void virtblk_request_done(struct request *req)
 		req->errors = (error != 0);
 	}
 
-	blk_mq_end_io(req, error);
+	blk_mq_end_request(req, error);
 }
 
 static void virtblk_done(struct virtqueue *vq)
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 2dcd9078de48..73ce7d27f5c8 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -713,7 +713,7 @@ static bool scsi_end_request(struct request *req, int error,
 
 	if (req->mq_ctx) {
 		/*
-		 * In the MQ case the command gets freed by __blk_mq_end_io,
+		 * In the MQ case the command gets freed by __blk_mq_end_request,
 		 * so we have to do all cleanup that depends on it earlier.
 		 *
 		 * We also can't kick the queues from irq context, so we
@@ -721,7 +721,7 @@ static bool scsi_end_request(struct request *req, int error,
 		 */
 		scsi_mq_uninit_cmd(cmd);
 
-		__blk_mq_end_io(req, error);
+		__blk_mq_end_request(req, error);
 
 		if (scsi_target(sdev)->single_lun ||
 		    !list_empty(&sdev->host->starved_list))
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 878b6f71da48..cb217c16990d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -160,8 +160,8 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
 
 void blk_mq_start_request(struct request *rq);
-void blk_mq_end_io(struct request *rq, int error);
-void __blk_mq_end_io(struct request *rq, int error);
+void blk_mq_end_request(struct request *rq, int error);
+void __blk_mq_end_request(struct request *rq, int error);
 
 void blk_mq_requeue_request(struct request *rq);
 void blk_mq_add_to_requeue_list(struct request *rq, bool at_head);
-- 
cgit v1.2.3


From 81481eb423c295c5480a3fab9bb961cf286c91e7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:11 -0700
Subject: blk-mq: fix and simplify tag iteration for the timeout handler

Don't do a kmalloc from timer to handle timeouts, chances are we could be
under heavy load or similar and thus just miss out on the timeouts.
Fortunately it is very easy to just iterate over all in use tags, and doing
this properly actually cleans up the blk_mq_busy_iter API as well, and
prepares us for the next patch by passing a reserved argument to the
iterator.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq-tag.c      | 44 +++++++++++--------------
 block/blk-mq.c          | 85 +++++++++++++++----------------------------------
 include/linux/blk-mq.h  |  6 +++-
 include/scsi/scsi_tcq.h |  2 +-
 4 files changed, 49 insertions(+), 88 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index c1b92426c95e..b08788086414 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -392,45 +392,37 @@ void blk_mq_put_tag(struct blk_mq_hw_ctx *hctx, unsigned int tag,
 		__blk_mq_put_reserved_tag(tags, tag);
 }
 
-static void bt_for_each_free(struct blk_mq_bitmap_tags *bt,
-			     unsigned long *free_map, unsigned int off)
+static void bt_for_each(struct blk_mq_hw_ctx *hctx,
+		struct blk_mq_bitmap_tags *bt, unsigned int off,
+		busy_iter_fn *fn, void *data, bool reserved)
 {
-	int i;
+	struct request *rq;
+	int bit, i;
 
 	for (i = 0; i < bt->map_nr; i++) {
 		struct blk_align_bitmap *bm = &bt->map[i];
-		int bit = 0;
-
-		do {
-			bit = find_next_zero_bit(&bm->word, bm->depth, bit);
-			if (bit >= bm->depth)
-				break;
 
-			__set_bit(bit + off, free_map);
-			bit++;
-		} while (1);
+		for (bit = find_first_bit(&bm->word, bm->depth);
+		     bit < bm->depth;
+		     bit = find_next_bit(&bm->word, bm->depth, bit + 1)) {
+		     	rq = blk_mq_tag_to_rq(hctx->tags, off + bit);
+			if (rq->q == hctx->queue)
+				fn(hctx, rq, data, reserved);
+		}
 
 		off += (1 << bt->bits_per_word);
 	}
 }
 
-void blk_mq_tag_busy_iter(struct blk_mq_tags *tags,
-			  void (*fn)(void *, unsigned long *), void *data)
+void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
+		void *priv)
 {
-	unsigned long *tag_map;
-	size_t map_size;
-
-	map_size = ALIGN(tags->nr_tags, BITS_PER_LONG) / BITS_PER_LONG;
-	tag_map = kzalloc(map_size * sizeof(unsigned long), GFP_ATOMIC);
-	if (!tag_map)
-		return;
+	struct blk_mq_tags *tags = hctx->tags;
 
-	bt_for_each_free(&tags->bitmap_tags, tag_map, tags->nr_reserved_tags);
 	if (tags->nr_reserved_tags)
-		bt_for_each_free(&tags->breserved_tags, tag_map, 0);
-
-	fn(data, tag_map);
-	kfree(tag_map);
+		bt_for_each(hctx, &tags->breserved_tags, 0, fn, priv, true);
+	bt_for_each(hctx, &tags->bitmap_tags, tags->nr_reserved_tags, fn, priv,
+			false);
 }
 EXPORT_SYMBOL(blk_mq_tag_busy_iter);
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 1713686f5c2f..3baebcaf36db 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -525,58 +525,6 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
-struct blk_mq_timeout_data {
-	struct blk_mq_hw_ctx *hctx;
-	unsigned long *next;
-	unsigned int *next_set;
-};
-
-static void blk_mq_timeout_check(void *__data, unsigned long *free_tags)
-{
-	struct blk_mq_timeout_data *data = __data;
-	struct blk_mq_hw_ctx *hctx = data->hctx;
-	unsigned int tag;
-
-	 /* It may not be in flight yet (this is where
-	 * the REQ_ATOMIC_STARTED flag comes in). The requests are
-	 * statically allocated, so we know it's always safe to access the
-	 * memory associated with a bit offset into ->rqs[].
-	 */
-	tag = 0;
-	do {
-		struct request *rq;
-
-		tag = find_next_zero_bit(free_tags, hctx->tags->nr_tags, tag);
-		if (tag >= hctx->tags->nr_tags)
-			break;
-
-		rq = blk_mq_tag_to_rq(hctx->tags, tag++);
-		if (rq->q != hctx->queue)
-			continue;
-		if (!test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
-			continue;
-
-		blk_rq_check_expired(rq, data->next, data->next_set);
-	} while (1);
-}
-
-static void blk_mq_hw_ctx_check_timeout(struct blk_mq_hw_ctx *hctx,
-					unsigned long *next,
-					unsigned int *next_set)
-{
-	struct blk_mq_timeout_data data = {
-		.hctx		= hctx,
-		.next		= next,
-		.next_set	= next_set,
-	};
-
-	/*
-	 * Ask the tagging code to iterate busy requests, so we can
-	 * check them for timeout.
-	 */
-	blk_mq_tag_busy_iter(hctx->tags, blk_mq_timeout_check, &data);
-}
-
 static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
 {
 	struct request_queue *q = rq->q;
@@ -598,13 +546,30 @@ static enum blk_eh_timer_return blk_mq_rq_timed_out(struct request *rq)
 
 	return q->mq_ops->timeout(rq);
 }
+		
+struct blk_mq_timeout_data {
+	unsigned long next;
+	unsigned int next_set;
+};
+
+static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
+		struct request *rq, void *priv, bool reserved)
+{
+	struct blk_mq_timeout_data *data = priv;
+
+	if (test_bit(REQ_ATOM_STARTED, &rq->atomic_flags))
+		blk_rq_check_expired(rq, &data->next, &data->next_set);
+}
 
-static void blk_mq_rq_timer(unsigned long data)
+static void blk_mq_rq_timer(unsigned long priv)
 {
-	struct request_queue *q = (struct request_queue *) data;
+	struct request_queue *q = (struct request_queue *)priv;
+	struct blk_mq_timeout_data data = {
+		.next		= 0,
+		.next_set	= 0,
+	};
 	struct blk_mq_hw_ctx *hctx;
-	unsigned long next = 0;
-	int i, next_set = 0;
+	int i;
 
 	queue_for_each_hw_ctx(q, hctx, i) {
 		/*
@@ -614,12 +579,12 @@ static void blk_mq_rq_timer(unsigned long data)
 		if (!hctx->nr_ctx || !hctx->tags)
 			continue;
 
-		blk_mq_hw_ctx_check_timeout(hctx, &next, &next_set);
+		blk_mq_tag_busy_iter(hctx, blk_mq_check_expired, &data);
 	}
 
-	if (next_set) {
-		next = blk_rq_timeout(round_jiffies_up(next));
-		mod_timer(&q->timeout, next);
+	if (data.next_set) {
+		data.next = blk_rq_timeout(round_jiffies_up(data.next));
+		mod_timer(&q->timeout, data.next);
 	} else {
 		queue_for_each_hw_ctx(q, hctx, i)
 			blk_mq_tag_idle(hctx);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index cb217c16990d..0eb0f642be4b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -86,6 +86,9 @@ typedef int (init_request_fn)(void *, struct request *, unsigned int,
 typedef void (exit_request_fn)(void *, struct request *, unsigned int,
 		unsigned int);
 
+typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *,
+		bool);
+
 struct blk_mq_ops {
 	/*
 	 * Queue request
@@ -174,7 +177,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q);
 void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
-void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data);
+void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn,
+		void *priv);
 
 /*
  * Driver command data is immediately after the request. So subtract request
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index cdcc90b07ecb..e64583560701 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -68,7 +68,7 @@ static inline void scsi_activate_tcq(struct scsi_device *sdev, int depth)
 		return;
 
 	if (!shost_use_blk_mq(sdev->host) &&
-	    blk_queue_tagged(sdev->request_queue))
+	    !blk_queue_tagged(sdev->request_queue))
 		blk_queue_init_tags(sdev->request_queue, depth,
 				    sdev->host->bqt);
 
-- 
cgit v1.2.3


From 0152fb6b57c4fae769ee75ea2ae670f4ff39fba9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Sep 2014 16:40:13 -0700
Subject: blk-mq: pass a reserved argument to the timeout handler

Allow blk-mq to pass an argument to the timeout handler to indicate
if we're timing out a reserved or regular command.  For many drivers
those need to be handled different.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c          |  6 +++---
 drivers/scsi/scsi_lib.c | 10 +++++++++-
 include/linux/blk-mq.h  |  3 ++-
 3 files changed, 14 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 298d6e360661..d12f1983d493 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -530,7 +530,7 @@ struct blk_mq_timeout_data {
 	unsigned int next_set;
 };
 
-static void blk_mq_rq_timed_out(struct request *req)
+static void blk_mq_rq_timed_out(struct request *req, bool reserved)
 {
 	struct blk_mq_ops *ops = req->q->mq_ops;
 	enum blk_eh_timer_return ret = BLK_EH_RESET_TIMER;
@@ -548,7 +548,7 @@ static void blk_mq_rq_timed_out(struct request *req)
 		return;
 
 	if (ops->timeout)
-		ret = ops->timeout(req);
+		ret = ops->timeout(req, reserved);
 
 	switch (ret) {
 	case BLK_EH_HANDLED:
@@ -576,7 +576,7 @@ static void blk_mq_check_expired(struct blk_mq_hw_ctx *hctx,
 
 	if (time_after_eq(jiffies, rq->deadline)) {
 		if (!blk_mark_rq_complete(rq))
-			blk_mq_rq_timed_out(rq);
+			blk_mq_rq_timed_out(rq, reserved);
 	} else if (!data->next_set || time_after(data->next, rq->deadline)) {
 		data->next = rq->deadline;
 		data->next_set = 1;
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 73ce7d27f5c8..86b1156edb82 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1932,6 +1932,14 @@ out:
 	return ret;
 }
 
+static enum blk_eh_timer_return scsi_timeout(struct request *req,
+		bool reserved)
+{
+	if (reserved)
+		return BLK_EH_RESET_TIMER;
+	return scsi_times_out(req);
+}
+
 static int scsi_init_request(void *data, struct request *rq,
 		unsigned int hctx_idx, unsigned int request_idx,
 		unsigned int numa_node)
@@ -2043,7 +2051,7 @@ static struct blk_mq_ops scsi_mq_ops = {
 	.map_queue	= blk_mq_map_queue,
 	.queue_rq	= scsi_queue_rq,
 	.complete	= scsi_softirq_done,
-	.timeout	= scsi_times_out,
+	.timeout	= scsi_timeout,
 	.init_request	= scsi_init_request,
 	.exit_request	= scsi_exit_request,
 };
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 0eb0f642be4b..325349559fb0 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -79,6 +79,7 @@ struct blk_mq_tag_set {
 
 typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
+typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
 typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int);
 typedef int (init_request_fn)(void *, struct request *, unsigned int,
@@ -103,7 +104,7 @@ struct blk_mq_ops {
 	/*
 	 * Called on request timeout
 	 */
-	rq_timed_out_fn		*timeout;
+	timeout_fn		*timeout;
 
 	softirq_done_fn		*complete;
 
-- 
cgit v1.2.3


From 57a94e24bc927f642f7f48ca1bf5476aa5be269d Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 22 Sep 2014 20:11:50 +0200
Subject: mtd: nand: support ONFI timing mode retrieval for non-ONFI NANDs

Add an onfi_timing_mode_default field to nand_chip and nand_flash_dev in
order to support NAND timings definition for non-ONFI NAND.

NAND that support better timings mode than the default one have to define
a new entry in the nand_ids table.

The default timing mode should be deduced from timings description from
the datasheet and the ONFI specification
(www.onfi.org/~/media/ONFI/specs/onfi_3_1_spec.pdf, chapter 4.15
"Timing Parameters").
You should choose the closest mode that fit the timings requirements of
your NAND chip.

Signed-off-by: Boris BREZILLON <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_base.c |  2 ++
 include/linux/mtd/nand.h     | 11 +++++++++++
 2 files changed, 13 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c
index 801cad1de9eb..5b5c62712814 100644
--- a/drivers/mtd/nand/nand_base.c
+++ b/drivers/mtd/nand/nand_base.c
@@ -3594,6 +3594,8 @@ static bool find_full_id_nand(struct mtd_info *mtd, struct nand_chip *chip,
 		chip->options |= type->options;
 		chip->ecc_strength_ds = NAND_ECC_STRENGTH(type);
 		chip->ecc_step_ds = NAND_ECC_STEP(type);
+		chip->onfi_timing_mode_default =
+					type->onfi_timing_mode_default;
 
 		*busw = type->options & NAND_BUSWIDTH_16;
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index 8acb307b6fde..e4d451e4600b 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -587,6 +587,11 @@ struct nand_buffers {
  * @ecc_step_ds:	[INTERN] ECC step required by the @ecc_strength_ds,
  *                      also from the datasheet. It is the recommended ECC step
  *			size, if known; if unknown, set to zero.
+ * @onfi_timing_mode_default: [INTERN] default ONFI timing mode. This field is
+ *			      either deduced from the datasheet if the NAND
+ *			      chip is not ONFI compliant or set to 0 if it is
+ *			      (an ONFI chip is always configured in mode 0
+ *			      after a NAND reset)
  * @numchips:		[INTERN] number of physical chips
  * @chipsize:		[INTERN] the size of one chip for multichip arrays
  * @pagemask:		[INTERN] page number mask = number of (pages / chip) - 1
@@ -671,6 +676,7 @@ struct nand_chip {
 	uint8_t bits_per_cell;
 	uint16_t ecc_strength_ds;
 	uint16_t ecc_step_ds;
+	int onfi_timing_mode_default;
 	int badblockpos;
 	int badblockbits;
 
@@ -773,6 +779,10 @@ struct nand_chip {
  *               @ecc_step_ds in nand_chip{}, also from the datasheet.
  *               For example, the "4bit ECC for each 512Byte" can be set with
  *               NAND_ECC_INFO(4, 512).
+ * @onfi_timing_mode_default: the default ONFI timing mode entered after a NAND
+ *			      reset. Should be deduced from timings described
+ *			      in the datasheet.
+ *
  */
 struct nand_flash_dev {
 	char *name;
@@ -793,6 +803,7 @@ struct nand_flash_dev {
 		uint16_t strength_ds;
 		uint16_t step_ds;
 	} ecc;
+	int onfi_timing_mode_default;
 };
 
 /**
-- 
cgit v1.2.3


From 93af53b8633c4cb474585158512182b21219d743 Mon Sep 17 00:00:00 2001
From: Ezequiel García <ezequiel@vanguardiasur.com.ar>
Date: Sat, 20 Sep 2014 17:53:12 +0100
Subject: nand: omap2: Remove horrible ifdefs to fix module probe

The current code abuses ifdefs to determine if the selected ECC scheme
is supported by the running kernel. As a result the code is hard to read,
and it also fails to load as a module.

This commit removes all the ifdefs and instead introduces a function
omap2_nand_ecc_check() to check if the ECC is supported by using
IS_ENABLED(CONFIG_xxx).

Since IS_ENABLED() is true when a config is =y or =m, this change fixes the
module so it can be loaded with no issues.

Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Ezequiel Garcia <ezequiel@vanguardiasur.com.ar>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/omap2.c          | 130 +++++++++++++++++++++-----------------
 include/linux/platform_data/elm.h |  16 +++++
 2 files changed, 87 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/omap2.c b/drivers/mtd/nand/omap2.c
index e1a9b310c159..f97a4ffd489d 100644
--- a/drivers/mtd/nand/omap2.c
+++ b/drivers/mtd/nand/omap2.c
@@ -136,7 +136,6 @@
 
 #define BADBLOCK_MARKER_LENGTH		2
 
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 static u_char bch16_vector[] = {0xf5, 0x24, 0x1c, 0xd0, 0x61, 0xb3, 0xf1, 0x55,
 				0x2e, 0x2c, 0x86, 0xa3, 0xed, 0x36, 0x1b, 0x78,
 				0x48, 0x76, 0xa9, 0x3b, 0x97, 0xd1, 0x7a, 0x93,
@@ -144,7 +143,6 @@ static u_char bch16_vector[] = {0xf5, 0x24, 0x1c, 0xd0, 0x61, 0xb3, 0xf1, 0x55,
 static u_char bch8_vector[] = {0xf3, 0xdb, 0x14, 0x16, 0x8b, 0xd2, 0xbe, 0xcc,
 	0xac, 0x6b, 0xff, 0x99, 0x7b};
 static u_char bch4_vector[] = {0x00, 0x6b, 0x31, 0xdd, 0x41, 0xbc, 0x10};
-#endif
 
 /* oob info generated runtime depending on ecc algorithm and layout selected */
 static struct nand_ecclayout omap_oobinfo;
@@ -1292,7 +1290,6 @@ static int __maybe_unused omap_calculate_ecc_bch(struct mtd_info *mtd,
 	return 0;
 }
 
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 /**
  * erased_sector_bitflips - count bit flips
  * @data:	data sector buffer
@@ -1593,33 +1590,71 @@ static int omap_read_page_bch(struct mtd_info *mtd, struct nand_chip *chip,
 /**
  * is_elm_present - checks for presence of ELM module by scanning DT nodes
  * @omap_nand_info: NAND device structure containing platform data
- * @bch_type: 0x0=BCH4, 0x1=BCH8, 0x2=BCH16
  */
-static int is_elm_present(struct omap_nand_info *info,
-			struct device_node *elm_node, enum bch_ecc bch_type)
+static bool is_elm_present(struct omap_nand_info *info,
+			   struct device_node *elm_node)
 {
 	struct platform_device *pdev;
-	struct nand_ecc_ctrl *ecc = &info->nand.ecc;
-	int err;
+
 	/* check whether elm-id is passed via DT */
 	if (!elm_node) {
 		pr_err("nand: error: ELM DT node not found\n");
-		return -ENODEV;
+		return false;
 	}
 	pdev = of_find_device_by_node(elm_node);
 	/* check whether ELM device is registered */
 	if (!pdev) {
 		pr_err("nand: error: ELM device not found\n");
-		return -ENODEV;
+		return false;
 	}
 	/* ELM module available, now configure it */
 	info->elm_dev = &pdev->dev;
-	err = elm_config(info->elm_dev, bch_type,
-		(info->mtd.writesize / ecc->size), ecc->size, ecc->bytes);
+	return true;
+}
 
-	return err;
+static bool omap2_nand_ecc_check(struct omap_nand_info *info,
+				 struct omap_nand_platform_data	*pdata)
+{
+	bool ecc_needs_bch, ecc_needs_omap_bch, ecc_needs_elm;
+
+	switch (info->ecc_opt) {
+	case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW:
+	case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW:
+		ecc_needs_omap_bch = false;
+		ecc_needs_bch = true;
+		ecc_needs_elm = false;
+		break;
+	case OMAP_ECC_BCH4_CODE_HW:
+	case OMAP_ECC_BCH8_CODE_HW:
+	case OMAP_ECC_BCH16_CODE_HW:
+		ecc_needs_omap_bch = true;
+		ecc_needs_bch = false;
+		ecc_needs_elm = true;
+		break;
+	default:
+		ecc_needs_omap_bch = false;
+		ecc_needs_bch = false;
+		ecc_needs_elm = false;
+		break;
+	}
+
+	if (ecc_needs_bch && !IS_ENABLED(CONFIG_MTD_NAND_ECC_BCH)) {
+		dev_err(&info->pdev->dev,
+			"CONFIG_MTD_NAND_ECC_BCH not enabled\n");
+		return false;
+	}
+	if (ecc_needs_omap_bch && !IS_ENABLED(CONFIG_MTD_NAND_OMAP_BCH)) {
+		dev_err(&info->pdev->dev,
+			"CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
+		return false;
+	}
+	if (ecc_needs_elm && !is_elm_present(info, pdata->elm_of_node)) {
+		dev_err(&info->pdev->dev, "ELM not available\n");
+		return false;
+	}
+
+	return true;
 }
-#endif /* CONFIG_MTD_NAND_ECC_BCH */
 
 static int omap_nand_probe(struct platform_device *pdev)
 {
@@ -1797,6 +1832,11 @@ static int omap_nand_probe(struct platform_device *pdev)
 		goto return_error;
 	}
 
+	if (!omap2_nand_ecc_check(info, pdata)) {
+		err = -EINVAL;
+		goto return_error;
+	}
+
 	/* populate MTD interface based on ECC scheme */
 	ecclayout		= &omap_oobinfo;
 	switch (info->ecc_opt) {
@@ -1829,7 +1869,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		break;
 
 	case OMAP_ECC_BCH4_CODE_HW_DETECTION_SW:
-#ifdef CONFIG_MTD_NAND_ECC_BCH
 		pr_info("nand: using OMAP_ECC_BCH4_CODE_HW_DETECTION_SW\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1861,14 +1900,8 @@ static int omap_nand_probe(struct platform_device *pdev)
 			err = -EINVAL;
 		}
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_ECC_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 
 	case OMAP_ECC_BCH4_CODE_HW:
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 		pr_info("nand: using OMAP_ECC_BCH4_CODE_HW ECC scheme\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1890,21 +1923,15 @@ static int omap_nand_probe(struct platform_device *pdev)
 		/* reserved marker already included in ecclayout->eccbytes */
 		ecclayout->oobfree->offset	=
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
-		/* This ECC scheme requires ELM H/W block */
-		if (is_elm_present(info, pdata->elm_of_node, BCH4_ECC) < 0) {
-			pr_err("nand: error: could not initialize ELM\n");
-			err = -ENODEV;
+
+		err = elm_config(info->elm_dev, BCH4_ECC,
+				 info->mtd.writesize / nand_chip->ecc.size,
+				 nand_chip->ecc.size, nand_chip->ecc.bytes);
+		if (err < 0)
 			goto return_error;
-		}
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 
 	case OMAP_ECC_BCH8_CODE_HW_DETECTION_SW:
-#ifdef CONFIG_MTD_NAND_ECC_BCH
 		pr_info("nand: using OMAP_ECC_BCH8_CODE_HW_DETECTION_SW\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1937,14 +1964,8 @@ static int omap_nand_probe(struct platform_device *pdev)
 			goto return_error;
 		}
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_ECC_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 
 	case OMAP_ECC_BCH8_CODE_HW:
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 		pr_info("nand: using OMAP_ECC_BCH8_CODE_HW ECC scheme\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1956,12 +1977,13 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->ecc.calculate	= omap_calculate_ecc_bch;
 		nand_chip->ecc.read_page	= omap_read_page_bch;
 		nand_chip->ecc.write_page	= omap_write_page_bch;
-		/* This ECC scheme requires ELM H/W block */
-		err = is_elm_present(info, pdata->elm_of_node, BCH8_ECC);
-		if (err < 0) {
-			pr_err("nand: error: could not initialize ELM\n");
+
+		err = elm_config(info->elm_dev, BCH8_ECC,
+				 info->mtd.writesize / nand_chip->ecc.size,
+				 nand_chip->ecc.size, nand_chip->ecc.bytes);
+		if (err < 0)
 			goto return_error;
-		}
+
 		/* define ECC layout */
 		ecclayout->eccbytes		= nand_chip->ecc.bytes *
 							(mtd->writesize /
@@ -1973,14 +1995,8 @@ static int omap_nand_probe(struct platform_device *pdev)
 		ecclayout->oobfree->offset	=
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 
 	case OMAP_ECC_BCH16_CODE_HW:
-#ifdef CONFIG_MTD_NAND_OMAP_BCH
 		pr_info("using OMAP_ECC_BCH16_CODE_HW ECC scheme\n");
 		nand_chip->ecc.mode		= NAND_ECC_HW;
 		nand_chip->ecc.size		= 512;
@@ -1991,12 +2007,13 @@ static int omap_nand_probe(struct platform_device *pdev)
 		nand_chip->ecc.calculate	= omap_calculate_ecc_bch;
 		nand_chip->ecc.read_page	= omap_read_page_bch;
 		nand_chip->ecc.write_page	= omap_write_page_bch;
-		/* This ECC scheme requires ELM H/W block */
-		err = is_elm_present(info, pdata->elm_of_node, BCH16_ECC);
-		if (err < 0) {
-			pr_err("ELM is required for this ECC scheme\n");
+
+		err = elm_config(info->elm_dev, BCH16_ECC,
+				 info->mtd.writesize / nand_chip->ecc.size,
+				 nand_chip->ecc.size, nand_chip->ecc.bytes);
+		if (err < 0)
 			goto return_error;
-		}
+
 		/* define ECC layout */
 		ecclayout->eccbytes		= nand_chip->ecc.bytes *
 							(mtd->writesize /
@@ -2008,11 +2025,6 @@ static int omap_nand_probe(struct platform_device *pdev)
 		ecclayout->oobfree->offset	=
 				ecclayout->eccpos[ecclayout->eccbytes - 1] + 1;
 		break;
-#else
-		pr_err("nand: error: CONFIG_MTD_NAND_OMAP_BCH not enabled\n");
-		err = -EINVAL;
-		goto return_error;
-#endif
 	default:
 		pr_err("nand: error: invalid or unsupported ECC scheme\n");
 		err = -EINVAL;
diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h
index 780d1e97f620..b8686c00f15f 100644
--- a/include/linux/platform_data/elm.h
+++ b/include/linux/platform_data/elm.h
@@ -42,8 +42,24 @@ struct elm_errorvec {
 	int error_loc[16];
 };
 
+#if IS_ENABLED(CONFIG_MTD_NAND_OMAP_BCH)
 void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
 		struct elm_errorvec *err_vec);
 int elm_config(struct device *dev, enum bch_ecc bch_type,
 	int ecc_steps, int ecc_step_size, int ecc_syndrome_size);
+#else
+static inline void
+elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc,
+			  struct elm_errorvec *err_vec)
+{
+}
+
+static inline int elm_config(struct device *dev, enum bch_ecc bch_type,
+			     int ecc_steps, int ecc_step_size,
+			     int ecc_syndrome_size)
+{
+	return -ENOSYS;
+}
+#endif /* CONFIG_MTD_NAND_ECC_BCH */
+
 #endif /* __ELM_H */
-- 
cgit v1.2.3


From bee3f304435a9c8c70b135083e23516872a17c98 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Mon, 22 Sep 2014 14:46:13 -0600
Subject: PCI: Remove unused pci_find_upstream_pcie_bridge()

pci_find_upstream_pcie_bridge() is unused, so remove it.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/search.c | 34 ----------------------------------
 include/linux/pci.h  | 11 -----------
 2 files changed, 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/search.c b/drivers/pci/search.c
index 827ad831f1dd..a81f413083e4 100644
--- a/drivers/pci/search.c
+++ b/drivers/pci/search.c
@@ -103,40 +103,6 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 	return ret;
 }
 
-/*
- * find the upstream PCIe-to-PCI bridge of a PCI device
- * if the device is PCIE, return NULL
- * if the device isn't connected to a PCIe bridge (that is its parent is a
- * legacy PCI bridge and the bridge is directly connected to bus 0), return its
- * parent
- */
-struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev)
-{
-	struct pci_dev *tmp = NULL;
-
-	if (pci_is_pcie(pdev))
-		return NULL;
-	while (1) {
-		if (pci_is_root_bus(pdev->bus))
-			break;
-		pdev = pdev->bus->self;
-		/* a p2p bridge */
-		if (!pci_is_pcie(pdev)) {
-			tmp = pdev;
-			continue;
-		}
-		/* PCI device should connect to a PCIe bridge */
-		if (pci_pcie_type(pdev) != PCI_EXP_TYPE_PCI_BRIDGE) {
-			/* Busted hardware? */
-			WARN_ON_ONCE(1);
-			return NULL;
-		}
-		return pdev;
-	}
-
-	return tmp;
-}
-
 static struct pci_bus *pci_do_find_bus(struct pci_bus *bus, unsigned char busnr)
 {
 	struct pci_bus *child;
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 92c131efec1c..bf5a47c0cb42 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1828,17 +1828,6 @@ int pci_for_each_dma_alias(struct pci_dev *pdev,
 			   int (*fn)(struct pci_dev *pdev,
 				     u16 alias, void *data), void *data);
 
-/**
- * pci_find_upstream_pcie_bridge - find upstream PCIe-to-PCI bridge of a device
- * @pdev: the PCI device
- *
- * if the device is PCIE, return NULL
- * if the device isn't connected to a PCIe bridge (that is its parent is a
- * legacy PCI bridge and the bridge is directly connected to bus 0), return its
- * parent
- */
-struct pci_dev *pci_find_upstream_pcie_bridge(struct pci_dev *pdev);
-
 /* helper functions for operation of device flag */
 static inline void pci_set_dev_assigned(struct pci_dev *pdev)
 {
-- 
cgit v1.2.3


From 5d8f4c9fdd67404c9f94683836e49ec8bded2287 Mon Sep 17 00:00:00 2001
From: Alex Williamson <alex.williamson@redhat.com>
Date: Wed, 17 Sep 2014 10:41:13 -0600
Subject: PCI: Remove unused pci_get_dma_source()

pci_get_dma_source() is unused, so remove it.  We now have
dma_alias_devfn() to describe this.

Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/quirks.c | 51 ---------------------------------------------------
 include/linux/pci.h  |  5 -----
 2 files changed, 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/quirks.c b/drivers/pci/quirks.c
index da062d8ae36f..01c935cc38dc 100644
--- a/drivers/pci/quirks.c
+++ b/drivers/pci/quirks.c
@@ -3514,57 +3514,6 @@ DECLARE_PCI_FIXUP_HEADER(0x1283, 0x8892, quirk_use_pcie_bridge_dma_alias);
 /* Intel 82801, https://bugzilla.kernel.org/show_bug.cgi?id=44881#c49 */
 DECLARE_PCI_FIXUP_HEADER(0x8086, 0x244e, quirk_use_pcie_bridge_dma_alias);
 
-static struct pci_dev *pci_func_0_dma_source(struct pci_dev *dev)
-{
-	if (!PCI_FUNC(dev->devfn))
-		return pci_dev_get(dev);
-
-	return pci_get_slot(dev->bus, PCI_DEVFN(PCI_SLOT(dev->devfn), 0));
-}
-
-static const struct pci_dev_dma_source {
-	u16 vendor;
-	u16 device;
-	struct pci_dev *(*dma_source)(struct pci_dev *dev);
-} pci_dev_dma_source[] = {
-	/*
-	 * https://bugzilla.redhat.com/show_bug.cgi?id=605888
-	 *
-	 * Some Ricoh devices use the function 0 source ID for DMA on
-	 * other functions of a multifunction device.  The DMA devices
-	 * is therefore function 0, which will have implications of the
-	 * iommu grouping of these devices.
-	 */
-	{ PCI_VENDOR_ID_RICOH, 0xe822, pci_func_0_dma_source },
-	{ PCI_VENDOR_ID_RICOH, 0xe230, pci_func_0_dma_source },
-	{ PCI_VENDOR_ID_RICOH, 0xe832, pci_func_0_dma_source },
-	{ PCI_VENDOR_ID_RICOH, 0xe476, pci_func_0_dma_source },
-	{ 0 }
-};
-
-/*
- * IOMMUs with isolation capabilities need to be programmed with the
- * correct source ID of a device.  In most cases, the source ID matches
- * the device doing the DMA, but sometimes hardware is broken and will
- * tag the DMA as being sourced from a different device.  This function
- * allows that translation.  Note that the reference count of the
- * returned device is incremented on all paths.
- */
-struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
-{
-	const struct pci_dev_dma_source *i;
-
-	for (i = pci_dev_dma_source; i->dma_source; i++) {
-		if ((i->vendor == dev->vendor ||
-		     i->vendor == (u16)PCI_ANY_ID) &&
-		    (i->device == dev->device ||
-		     i->device == (u16)PCI_ANY_ID))
-			return i->dma_source(dev);
-	}
-
-	return pci_dev_get(dev);
-}
-
 /*
  * AMD has indicated that the devices below do not support peer-to-peer
  * in any system where they are found in the southbridge with an AMD
diff --git a/include/linux/pci.h b/include/linux/pci.h
index bf5a47c0cb42..d68d8e33c196 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1557,16 +1557,11 @@ enum pci_fixup_pass {
 
 #ifdef CONFIG_PCI_QUIRKS
 void pci_fixup_device(enum pci_fixup_pass pass, struct pci_dev *dev);
-struct pci_dev *pci_get_dma_source(struct pci_dev *dev);
 int pci_dev_specific_acs_enabled(struct pci_dev *dev, u16 acs_flags);
 void pci_dev_specific_enable_acs(struct pci_dev *dev);
 #else
 static inline void pci_fixup_device(enum pci_fixup_pass pass,
 				    struct pci_dev *dev) { }
-static inline struct pci_dev *pci_get_dma_source(struct pci_dev *dev)
-{
-	return pci_dev_get(dev);
-}
 static inline int pci_dev_specific_acs_enabled(struct pci_dev *dev,
 					       u16 acs_flags)
 {
-- 
cgit v1.2.3


From 48d11e067fc90ec9fc9c8f7ab982e5a83bf1887b Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Mon, 22 Sep 2014 12:26:10 -0700
Subject: mmc: Consolidate emmc tuning blocks

The same tuning block exists in the dw_mmc h.c and sdhci-msm.c
files. Move these into mmc.c so that they can be shared across
drivers.

Reported-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c       | 32 ++++++++++++++++++++++++++++++++
 drivers/mmc/host/dw_mmc.c    | 30 ------------------------------
 drivers/mmc/host/sdhci-msm.c | 38 ++++----------------------------------
 include/linux/mmc/mmc.h      |  5 +++++
 4 files changed, 41 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index ce11d89aa305..6390787fb32a 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -1177,6 +1177,38 @@ bus_speed:
 	return err;
 }
 
+const u8 tuning_blk_pattern_4bit[MMC_TUNING_BLK_PATTERN_4BIT_SIZE] = {
+	0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc,
+	0xc3, 0x3c, 0xcc, 0xff, 0xfe, 0xff, 0xfe, 0xef,
+	0xff, 0xdf, 0xff, 0xdd, 0xff, 0xfb, 0xff, 0xfb,
+	0xbf, 0xff, 0x7f, 0xff, 0x77, 0xf7, 0xbd, 0xef,
+	0xff, 0xf0, 0xff, 0xf0, 0x0f, 0xfc, 0xcc, 0x3c,
+	0xcc, 0x33, 0xcc, 0xcf, 0xff, 0xef, 0xff, 0xee,
+	0xff, 0xfd, 0xff, 0xfd, 0xdf, 0xff, 0xbf, 0xff,
+	0xbb, 0xff, 0xf7, 0xff, 0xf7, 0x7f, 0x7b, 0xde,
+};
+EXPORT_SYMBOL(tuning_blk_pattern_4bit);
+
+const u8 tuning_blk_pattern_8bit[MMC_TUNING_BLK_PATTERN_8BIT_SIZE] = {
+	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
+	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
+	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
+	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
+	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
+	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
+	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
+	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
+	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
+	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
+	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
+	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
+	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
+	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
+	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
+	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
+};
+EXPORT_SYMBOL(tuning_blk_pattern_8bit);
+
 /*
  * Execute tuning sequence to seek the proper bus operating
  * conditions for HS200 and HS400, which sends CMD21 to the device.
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 109fefeb9597..69f0cc68d5b2 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -82,36 +82,6 @@ struct idmac_desc {
 };
 #endif /* CONFIG_MMC_DW_IDMAC */
 
-static const u8 tuning_blk_pattern_4bit[] = {
-	0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc,
-	0xc3, 0x3c, 0xcc, 0xff, 0xfe, 0xff, 0xfe, 0xef,
-	0xff, 0xdf, 0xff, 0xdd, 0xff, 0xfb, 0xff, 0xfb,
-	0xbf, 0xff, 0x7f, 0xff, 0x77, 0xf7, 0xbd, 0xef,
-	0xff, 0xf0, 0xff, 0xf0, 0x0f, 0xfc, 0xcc, 0x3c,
-	0xcc, 0x33, 0xcc, 0xcf, 0xff, 0xef, 0xff, 0xee,
-	0xff, 0xfd, 0xff, 0xfd, 0xdf, 0xff, 0xbf, 0xff,
-	0xbb, 0xff, 0xf7, 0xff, 0xf7, 0x7f, 0x7b, 0xde,
-};
-
-static const u8 tuning_blk_pattern_8bit[] = {
-	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
-	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
-	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
-	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
-	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
-	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
-	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
-	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
-	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
-	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
-	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
-	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
-	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
-	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
-	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
-	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
-};
-
 static bool dw_mci_reset(struct dw_mci *host);
 
 #if defined(CONFIG_DEBUG_FS)
diff --git a/drivers/mmc/host/sdhci-msm.c b/drivers/mmc/host/sdhci-msm.c
index 19539c61353c..30804385af6d 100644
--- a/drivers/mmc/host/sdhci-msm.c
+++ b/drivers/mmc/host/sdhci-msm.c
@@ -46,36 +46,6 @@
 #define CMUX_SHIFT_PHASE_SHIFT	24
 #define CMUX_SHIFT_PHASE_MASK	(7 << CMUX_SHIFT_PHASE_SHIFT)
 
-static const u8 tuning_block_64[] = {
-	0xff, 0x0f, 0xff, 0x00, 0xff, 0xcc, 0xc3, 0xcc,
-	0xc3, 0x3c, 0xcc, 0xff, 0xfe, 0xff, 0xfe, 0xef,
-	0xff, 0xdf, 0xff, 0xdd, 0xff, 0xfb, 0xff, 0xfb,
-	0xbf, 0xff, 0x7f, 0xff, 0x77, 0xf7, 0xbd, 0xef,
-	0xff, 0xf0, 0xff, 0xf0, 0x0f, 0xfc, 0xcc, 0x3c,
-	0xcc, 0x33, 0xcc, 0xcf, 0xff, 0xef, 0xff, 0xee,
-	0xff, 0xfd, 0xff, 0xfd, 0xdf, 0xff, 0xbf, 0xff,
-	0xbb, 0xff, 0xf7, 0xff, 0xf7, 0x7f, 0x7b, 0xde,
-};
-
-static const u8 tuning_block_128[] = {
-	0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00, 0x00,
-	0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc, 0xcc,
-	0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff, 0xff,
-	0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee, 0xff,
-	0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd, 0xdd,
-	0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff, 0xbb,
-	0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff, 0xff,
-	0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee, 0xff,
-	0xff, 0xff, 0xff, 0x00, 0xff, 0xff, 0xff, 0x00,
-	0x00, 0xff, 0xff, 0xcc, 0xcc, 0xcc, 0x33, 0xcc,
-	0xcc, 0xcc, 0x33, 0x33, 0xcc, 0xcc, 0xcc, 0xff,
-	0xff, 0xff, 0xee, 0xff, 0xff, 0xff, 0xee, 0xee,
-	0xff, 0xff, 0xff, 0xdd, 0xff, 0xff, 0xff, 0xdd,
-	0xdd, 0xff, 0xff, 0xff, 0xbb, 0xff, 0xff, 0xff,
-	0xbb, 0xbb, 0xff, 0xff, 0xff, 0x77, 0xff, 0xff,
-	0xff, 0x77, 0x77, 0xff, 0x77, 0xbb, 0xdd, 0xee,
-};
-
 struct sdhci_msm_host {
 	struct platform_device *pdev;
 	void __iomem *core_mem;	/* MSM SDCC mapped address */
@@ -370,8 +340,8 @@ static int sdhci_msm_execute_tuning(struct sdhci_host *host, u32 opcode)
 {
 	int tuning_seq_cnt = 3;
 	u8 phase, *data_buf, tuned_phases[16], tuned_phase_cnt = 0;
-	const u8 *tuning_block_pattern = tuning_block_64;
-	int size = sizeof(tuning_block_64);	/* Pattern size in bytes */
+	const u8 *tuning_block_pattern = tuning_blk_pattern_4bit;
+	int size = sizeof(tuning_blk_pattern_4bit);
 	int rc;
 	struct mmc_host *mmc = host->mmc;
 	struct mmc_ios ios = host->mmc->ios;
@@ -387,8 +357,8 @@ static int sdhci_msm_execute_tuning(struct sdhci_host *host, u32 opcode)
 
 	if ((opcode == MMC_SEND_TUNING_BLOCK_HS200) &&
 	    (mmc->ios.bus_width == MMC_BUS_WIDTH_8)) {
-		tuning_block_pattern = tuning_block_128;
-		size = sizeof(tuning_block_128);
+		tuning_block_pattern = tuning_blk_pattern_8bit;
+		size = sizeof(tuning_blk_pattern_8bit);
 	}
 
 	data_buf = kmalloc(size, GFP_KERNEL);
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 78753bc90a87..1cd00b3a75b9 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -53,6 +53,11 @@
 #define MMC_SEND_TUNING_BLOCK    19   /* adtc                    R1  */
 #define MMC_SEND_TUNING_BLOCK_HS200	21	/* adtc R1  */
 
+#define MMC_TUNING_BLK_PATTERN_4BIT_SIZE	 64
+#define MMC_TUNING_BLK_PATTERN_8BIT_SIZE	128
+extern const u8 tuning_blk_pattern_4bit[MMC_TUNING_BLK_PATTERN_4BIT_SIZE];
+extern const u8 tuning_blk_pattern_8bit[MMC_TUNING_BLK_PATTERN_8BIT_SIZE];
+
   /* class 3 */
 #define MMC_WRITE_DAT_UNTIL_STOP 20   /* adtc [31:0] data addr   R1  */
 
-- 
cgit v1.2.3


From d7bead1b8890c3e47a24db270fdb840ea728d8f0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 22 Sep 2014 09:52:18 -0700
Subject: libata: change ata_<foo>_printk routines to return void

The return value is not used by callers of these functions nor
by uses of all macros so change the functions to return void.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c | 35 +++++++++++++----------------------
 include/linux/libata.h    | 12 ++++++------
 2 files changed, 19 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index 782d126cf191..f785ae50a0ea 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -6772,32 +6772,28 @@ const struct ata_port_info ata_dummy_port_info = {
 /*
  * Utility print functions
  */
-int ata_port_printk(const struct ata_port *ap, const char *level,
-		    const char *fmt, ...)
+void ata_port_printk(const struct ata_port *ap, const char *level,
+		     const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int r;
 
 	va_start(args, fmt);
 
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	r = printk("%sata%u: %pV", level, ap->print_id, &vaf);
+	printk("%sata%u: %pV", level, ap->print_id, &vaf);
 
 	va_end(args);
-
-	return r;
 }
 EXPORT_SYMBOL(ata_port_printk);
 
-int ata_link_printk(const struct ata_link *link, const char *level,
-		    const char *fmt, ...)
+void ata_link_printk(const struct ata_link *link, const char *level,
+		     const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int r;
 
 	va_start(args, fmt);
 
@@ -6805,37 +6801,32 @@ int ata_link_printk(const struct ata_link *link, const char *level,
 	vaf.va = &args;
 
 	if (sata_pmp_attached(link->ap) || link->ap->slave_link)
-		r = printk("%sata%u.%02u: %pV",
-			   level, link->ap->print_id, link->pmp, &vaf);
+		printk("%sata%u.%02u: %pV",
+		       level, link->ap->print_id, link->pmp, &vaf);
 	else
-		r = printk("%sata%u: %pV",
-			   level, link->ap->print_id, &vaf);
+		printk("%sata%u: %pV",
+		       level, link->ap->print_id, &vaf);
 
 	va_end(args);
-
-	return r;
 }
 EXPORT_SYMBOL(ata_link_printk);
 
-int ata_dev_printk(const struct ata_device *dev, const char *level,
+void ata_dev_printk(const struct ata_device *dev, const char *level,
 		    const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int r;
 
 	va_start(args, fmt);
 
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	r = printk("%sata%u.%02u: %pV",
-		   level, dev->link->ap->print_id, dev->link->pmp + dev->devno,
-		   &vaf);
+	printk("%sata%u.%02u: %pV",
+	       level, dev->link->ap->print_id, dev->link->pmp + dev->devno,
+	       &vaf);
 
 	va_end(args);
-
-	return r;
 }
 EXPORT_SYMBOL(ata_dev_printk);
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index 92abb497ab14..bd5fefeaf548 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1404,14 +1404,14 @@ static inline int sata_srst_pmp(struct ata_link *link)
  * printk helpers
  */
 __printf(3, 4)
-int ata_port_printk(const struct ata_port *ap, const char *level,
-		    const char *fmt, ...);
+void ata_port_printk(const struct ata_port *ap, const char *level,
+		     const char *fmt, ...);
 __printf(3, 4)
-int ata_link_printk(const struct ata_link *link, const char *level,
-		    const char *fmt, ...);
+void ata_link_printk(const struct ata_link *link, const char *level,
+		     const char *fmt, ...);
 __printf(3, 4)
-int ata_dev_printk(const struct ata_device *dev, const char *level,
-		   const char *fmt, ...);
+void ata_dev_printk(const struct ata_device *dev, const char *level,
+		    const char *fmt, ...);
 
 #define ata_port_err(ap, fmt, ...)				\
 	ata_port_printk(ap, KERN_ERR, fmt, ##__VA_ARGS__)
-- 
cgit v1.2.3


From d9ff958bb34aabdce08d11b0db24123c093d87cd Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 20 Aug 2014 19:20:53 +0200
Subject: dmaengine: Mark the struct dma_slave_config direction field
 deprecated

The direction passed to the device_prep_slave_sg, device_prep_dma_cyclic
or device_prep_interleaved_dma (through struct dma_interleaved_template)
should be used instead.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1f9e642c66ad..3d291f59acd8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -307,7 +307,9 @@ enum dma_slave_buswidth {
  * struct dma_slave_config - dma slave channel runtime config
  * @direction: whether the data shall go in or out on this slave
  * channel, right now. DMA_MEM_TO_DEV and DMA_DEV_TO_MEM are
- * legal values.
+ * legal values. DEPRECATED, drivers should use the direction argument
+ * to the device_prep_slave_sg and device_prep_dma_cyclic functions or
+ * the dir field in the dma_interleaved_template structure.
  * @src_addr: this is the physical address where DMA slave data
  * should be read (RX), if the source is memory this argument is
  * ignored.
-- 
cgit v1.2.3


From e1db1706c86ee455f25eeaeadeda827e1e02310f Mon Sep 17 00:00:00 2001
From: abdoulaye berthe <berthe.ab@gmail.com>
Date: Sat, 5 Jul 2014 18:28:50 +0200
Subject: gpio: gpiolib: set gpiochip_remove retval to void

This avoids handling gpiochip remove error in device
remove handler.

Signed-off-by: Abdoulaye Berthe <berthe.ab@gmail.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 24 +++++++-----------------
 include/linux/gpio/driver.h |  2 +-
 2 files changed, 8 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index a5831d6a9b91..bf1bb795f100 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -308,10 +308,9 @@ static void gpiochip_irqchip_remove(struct gpio_chip *gpiochip);
  *
  * A gpio_chip with any GPIOs still requested may not be removed.
  */
-int gpiochip_remove(struct gpio_chip *chip)
+void gpiochip_remove(struct gpio_chip *chip)
 {
 	unsigned long	flags;
-	int		status = 0;
 	unsigned	id;
 
 	acpi_gpiochip_remove(chip);
@@ -323,24 +322,15 @@ int gpiochip_remove(struct gpio_chip *chip)
 	of_gpiochip_remove(chip);
 
 	for (id = 0; id < chip->ngpio; id++) {
-		if (test_bit(FLAG_REQUESTED, &chip->desc[id].flags)) {
-			status = -EBUSY;
-			break;
-		}
-	}
-	if (status == 0) {
-		for (id = 0; id < chip->ngpio; id++)
-			chip->desc[id].chip = NULL;
-
-		list_del(&chip->list);
+		if (test_bit(FLAG_REQUESTED, &chip->desc[id].flags))
+			dev_crit(chip->dev, "REMOVING GPIOCHIP WITH GPIOS STILL REQUESTED\n");
 	}
+	for (id = 0; id < chip->ngpio; id++)
+		chip->desc[id].chip = NULL;
 
+	list_del(&chip->list);
 	spin_unlock_irqrestore(&gpio_lock, flags);
-
-	if (status == 0)
-		gpiochip_unexport(chip);
-
-	return status;
+	gpiochip_unexport(chip);
 }
 EXPORT_SYMBOL_GPL(gpiochip_remove);
 
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index a2de58fffd19..560bf7fa614f 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -141,7 +141,7 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip,
 
 /* add/remove chips */
 extern int gpiochip_add(struct gpio_chip *chip);
-extern int gpiochip_remove(struct gpio_chip *chip);
+extern void gpiochip_remove(struct gpio_chip *chip);
 extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
-- 
cgit v1.2.3


From 3d2613c4289ff22de3aa24d2d0a29e33937f023a Mon Sep 17 00:00:00 2001
From: Weike Chen <alvin.chen@intel.com>
Date: Wed, 17 Sep 2014 09:18:39 -0700
Subject: GPIO: gpio-dwapb: Enable platform driver binding to MFD driver

The Synopsys DesignWare APB GPIO driver only supports open firmware devices.
But, like Intel Quark X1000 SOC, which has a single PCI function exporting
a GPIO and an I2C controller, it is a Multifunction device. This patch is
to enable the current Synopsys DesignWare APB GPIO driver to support the
Multifunction device which exports the designware GPIO controller.

Reviewed-by: Hock Leong Kweh <hock.leong.kweh@intel.com>
Signed-off-by: Weike Chen <alvin.chen@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/Kconfig                     |   1 -
 drivers/gpio/gpio-dwapb.c                | 224 +++++++++++++++++++++++--------
 include/linux/platform_data/gpio-dwapb.h |  32 +++++
 3 files changed, 199 insertions(+), 58 deletions(-)
 create mode 100644 include/linux/platform_data/gpio-dwapb.h

(limited to 'include/linux')

diff --git a/drivers/gpio/Kconfig b/drivers/gpio/Kconfig
index ec398bee7c60..d9ffb6dfa54b 100644
--- a/drivers/gpio/Kconfig
+++ b/drivers/gpio/Kconfig
@@ -136,7 +136,6 @@ config GPIO_DWAPB
 	tristate "Synopsys DesignWare APB GPIO driver"
 	select GPIO_GENERIC
 	select GENERIC_IRQ_CHIP
-	depends on OF_GPIO
 	help
 	  Say Y or M here to build support for the Synopsys DesignWare APB
 	  GPIO block.
diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index d6618a6e2399..27466b5af5e8 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -21,6 +21,8 @@
 #include <linux/of_irq.h>
 #include <linux/platform_device.h>
 #include <linux/spinlock.h>
+#include <linux/platform_data/gpio-dwapb.h>
+#include <linux/slab.h>
 
 #define GPIO_SWPORTA_DR		0x00
 #define GPIO_SWPORTA_DDR	0x04
@@ -84,11 +86,10 @@ static void dwapb_toggle_trigger(struct dwapb_gpio *gpio, unsigned int offs)
 	writel(v, gpio->regs + GPIO_INT_POLARITY);
 }
 
-static void dwapb_irq_handler(u32 irq, struct irq_desc *desc)
+static u32 dwapb_do_irq(struct dwapb_gpio *gpio)
 {
-	struct dwapb_gpio *gpio = irq_get_handler_data(irq);
-	struct irq_chip *chip = irq_desc_get_chip(desc);
 	u32 irq_status = readl_relaxed(gpio->regs + GPIO_INTSTATUS);
+	u32 ret = irq_status;
 
 	while (irq_status) {
 		int hwirq = fls(irq_status) - 1;
@@ -102,6 +103,16 @@ static void dwapb_irq_handler(u32 irq, struct irq_desc *desc)
 			dwapb_toggle_trigger(gpio, hwirq);
 	}
 
+	return ret;
+}
+
+static void dwapb_irq_handler(u32 irq, struct irq_desc *desc)
+{
+	struct dwapb_gpio *gpio = irq_get_handler_data(irq);
+	struct irq_chip *chip = irq_desc_get_chip(desc);
+
+	dwapb_do_irq(gpio);
+
 	if (chip->irq_eoi)
 		chip->irq_eoi(irq_desc_get_irq_data(desc));
 }
@@ -207,22 +218,26 @@ static int dwapb_irq_set_type(struct irq_data *d, u32 type)
 	return 0;
 }
 
+static irqreturn_t dwapb_irq_handler_mfd(int irq, void *dev_id)
+{
+	u32 worked;
+	struct dwapb_gpio *gpio = dev_id;
+
+	worked = dwapb_do_irq(gpio);
+
+	return worked ? IRQ_HANDLED : IRQ_NONE;
+}
+
 static void dwapb_configure_irqs(struct dwapb_gpio *gpio,
-				 struct dwapb_gpio_port *port)
+				 struct dwapb_gpio_port *port,
+				 struct dwapb_port_property *pp)
 {
 	struct gpio_chip *gc = &port->bgc.gc;
-	struct device_node *node =  gc->of_node;
-	struct irq_chip_generic	*irq_gc;
+	struct device_node *node = pp->node;
+	struct irq_chip_generic	*irq_gc = NULL;
 	unsigned int hwirq, ngpio = gc->ngpio;
 	struct irq_chip_type *ct;
-	int err, irq, i;
-
-	irq = irq_of_parse_and_map(node, 0);
-	if (!irq) {
-		dev_warn(gpio->dev, "no irq for bank %s\n",
-			port->bgc.gc.of_node->full_name);
-		return;
-	}
+	int err, i;
 
 	gpio->domain = irq_domain_add_linear(node, ngpio,
 					     &irq_generic_chip_ops, gpio);
@@ -269,8 +284,24 @@ static void dwapb_configure_irqs(struct dwapb_gpio *gpio,
 	irq_gc->chip_types[1].type = IRQ_TYPE_EDGE_BOTH;
 	irq_gc->chip_types[1].handler = handle_edge_irq;
 
-	irq_set_chained_handler(irq, dwapb_irq_handler);
-	irq_set_handler_data(irq, gpio);
+	if (!pp->irq_shared) {
+		irq_set_chained_handler(pp->irq, dwapb_irq_handler);
+		irq_set_handler_data(pp->irq, gpio);
+	} else {
+		/*
+		 * Request a shared IRQ since where MFD would have devices
+		 * using the same irq pin
+		 */
+		err = devm_request_irq(gpio->dev, pp->irq,
+				       dwapb_irq_handler_mfd,
+				       IRQF_SHARED, "gpio-dwapb-mfd", gpio);
+		if (err) {
+			dev_err(gpio->dev, "error requesting IRQ\n");
+			irq_domain_remove(gpio->domain);
+			gpio->domain = NULL;
+			return;
+		}
+	}
 
 	for (hwirq = 0 ; hwirq < ngpio ; hwirq++)
 		irq_create_mapping(gpio->domain, hwirq);
@@ -296,57 +327,42 @@ static void dwapb_irq_teardown(struct dwapb_gpio *gpio)
 }
 
 static int dwapb_gpio_add_port(struct dwapb_gpio *gpio,
-			       struct device_node *port_np,
+			       struct dwapb_port_property *pp,
 			       unsigned int offs)
 {
 	struct dwapb_gpio_port *port;
-	u32 port_idx, ngpio;
 	void __iomem *dat, *set, *dirout;
 	int err;
 
-	if (of_property_read_u32(port_np, "reg", &port_idx) ||
-		port_idx >= DWAPB_MAX_PORTS) {
-		dev_err(gpio->dev, "missing/invalid port index for %s\n",
-			port_np->full_name);
-		return -EINVAL;
-	}
-
 	port = &gpio->ports[offs];
 	port->gpio = gpio;
 
-	if (of_property_read_u32(port_np, "snps,nr-gpios", &ngpio)) {
-		dev_info(gpio->dev, "failed to get number of gpios for %s\n",
-			 port_np->full_name);
-		ngpio = 32;
-	}
-
-	dat = gpio->regs + GPIO_EXT_PORTA + (port_idx * GPIO_EXT_PORT_SIZE);
-	set = gpio->regs + GPIO_SWPORTA_DR + (port_idx * GPIO_SWPORT_DR_SIZE);
+	dat = gpio->regs + GPIO_EXT_PORTA + (pp->idx * GPIO_EXT_PORT_SIZE);
+	set = gpio->regs + GPIO_SWPORTA_DR + (pp->idx * GPIO_SWPORT_DR_SIZE);
 	dirout = gpio->regs + GPIO_SWPORTA_DDR +
-		(port_idx * GPIO_SWPORT_DDR_SIZE);
+		(pp->idx * GPIO_SWPORT_DDR_SIZE);
 
 	err = bgpio_init(&port->bgc, gpio->dev, 4, dat, set, NULL, dirout,
 			 NULL, false);
 	if (err) {
 		dev_err(gpio->dev, "failed to init gpio chip for %s\n",
-			port_np->full_name);
+			pp->name);
 		return err;
 	}
 
-	port->bgc.gc.ngpio = ngpio;
-	port->bgc.gc.of_node = port_np;
+#ifdef CONFIG_OF_GPIO
+	port->bgc.gc.of_node = pp->node;
+#endif
+	port->bgc.gc.ngpio = pp->ngpio;
+	port->bgc.gc.base = pp->gpio_base;
 
-	/*
-	 * Only port A can provide interrupts in all configurations of the IP.
-	 */
-	if (port_idx == 0 &&
-	    of_property_read_bool(port_np, "interrupt-controller"))
-		dwapb_configure_irqs(gpio, port);
+	if (pp->irq)
+		dwapb_configure_irqs(gpio, port, pp);
 
 	err = gpiochip_add(&port->bgc.gc);
 	if (err)
 		dev_err(gpio->dev, "failed to register gpiochip for %s\n",
-			port_np->full_name);
+			pp->name);
 	else
 		port->is_registered = true;
 
@@ -362,25 +378,116 @@ static void dwapb_gpio_unregister(struct dwapb_gpio *gpio)
 			gpiochip_remove(&gpio->ports[m].bgc.gc);
 }
 
+static struct dwapb_platform_data *
+dwapb_gpio_get_pdata_of(struct device *dev)
+{
+	struct device_node *node, *port_np;
+	struct dwapb_platform_data *pdata;
+	struct dwapb_port_property *pp;
+	int nports;
+	int i;
+
+	node = dev->of_node;
+	if (!IS_ENABLED(CONFIG_OF_GPIO) || !node)
+		return ERR_PTR(-ENODEV);
+
+	nports = of_get_child_count(node);
+	if (nports == 0)
+		return ERR_PTR(-ENODEV);
+
+	pdata = kzalloc(sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	pdata->properties = kcalloc(nports, sizeof(*pp), GFP_KERNEL);
+	if (!pdata->properties) {
+		kfree(pdata);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	pdata->nports = nports;
+
+	i = 0;
+	for_each_child_of_node(node, port_np) {
+		pp = &pdata->properties[i++];
+		pp->node = port_np;
+
+		if (of_property_read_u32(port_np, "reg", &pp->idx) ||
+		    pp->idx >= DWAPB_MAX_PORTS) {
+			dev_err(dev, "missing/invalid port index for %s\n",
+				port_np->full_name);
+			kfree(pdata->properties);
+			kfree(pdata);
+			return ERR_PTR(-EINVAL);
+		}
+
+		if (of_property_read_u32(port_np, "snps,nr-gpios",
+					 &pp->ngpio)) {
+			dev_info(dev, "failed to get number of gpios for %s\n",
+				 port_np->full_name);
+			pp->ngpio = 32;
+		}
+
+		/*
+		 * Only port A can provide interrupts in all configurations of
+		 * the IP.
+		 */
+		if (pp->idx == 0 &&
+		    of_property_read_bool(port_np, "interrupt-controller")) {
+			pp->irq = irq_of_parse_and_map(port_np, 0);
+			if (!pp->irq) {
+				dev_warn(dev, "no irq for bank %s\n",
+					 port_np->full_name);
+			}
+		}
+
+		pp->irq_shared	= false;
+		pp->gpio_base	= -1;
+		pp->name	= port_np->full_name;
+	}
+
+	return pdata;
+}
+
+static inline void dwapb_free_pdata_of(struct dwapb_platform_data *pdata)
+{
+	if (!IS_ENABLED(CONFIG_OF_GPIO) || !pdata)
+		return;
+
+	kfree(pdata->properties);
+	kfree(pdata);
+}
+
 static int dwapb_gpio_probe(struct platform_device *pdev)
 {
+	unsigned int i;
 	struct resource *res;
 	struct dwapb_gpio *gpio;
-	struct device_node *np;
 	int err;
-	unsigned int offs = 0;
+	struct device *dev = &pdev->dev;
+	struct dwapb_platform_data *pdata = dev_get_platdata(dev);
+	bool is_pdata_alloc = !pdata;
+
+	if (is_pdata_alloc) {
+		pdata = dwapb_gpio_get_pdata_of(dev);
+		if (IS_ERR(pdata))
+			return PTR_ERR(pdata);
+	}
 
-	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
-	if (!gpio)
-		return -ENOMEM;
-	gpio->dev = &pdev->dev;
+	if (!pdata->nports) {
+		err = -ENODEV;
+		goto out_err;
+	}
 
-	gpio->nr_ports = of_get_child_count(pdev->dev.of_node);
-	if (!gpio->nr_ports) {
-		err = -EINVAL;
+	gpio = devm_kzalloc(&pdev->dev, sizeof(*gpio), GFP_KERNEL);
+	if (!gpio) {
+		err = -ENOMEM;
 		goto out_err;
 	}
-	gpio->ports = devm_kzalloc(&pdev->dev, gpio->nr_ports *
+	gpio->dev = &pdev->dev;
+	gpio->nr_ports = pdata->nports;
+
+	gpio->ports = devm_kcalloc(&pdev->dev, gpio->nr_ports,
 				   sizeof(*gpio->ports), GFP_KERNEL);
 	if (!gpio->ports) {
 		err = -ENOMEM;
@@ -394,20 +501,23 @@ static int dwapb_gpio_probe(struct platform_device *pdev)
 		goto out_err;
 	}
 
-	for_each_child_of_node(pdev->dev.of_node, np) {
-		err = dwapb_gpio_add_port(gpio, np, offs++);
+	for (i = 0; i < gpio->nr_ports; i++) {
+		err = dwapb_gpio_add_port(gpio, &pdata->properties[i], i);
 		if (err)
 			goto out_unregister;
 	}
 	platform_set_drvdata(pdev, gpio);
 
-	return 0;
+	goto out_err;
 
 out_unregister:
 	dwapb_gpio_unregister(gpio);
 	dwapb_irq_teardown(gpio);
 
 out_err:
+	if (is_pdata_alloc)
+		dwapb_free_pdata_of(pdata);
+
 	return err;
 }
 
diff --git a/include/linux/platform_data/gpio-dwapb.h b/include/linux/platform_data/gpio-dwapb.h
new file mode 100644
index 000000000000..28702c849af1
--- /dev/null
+++ b/include/linux/platform_data/gpio-dwapb.h
@@ -0,0 +1,32 @@
+/*
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef GPIO_DW_APB_H
+#define GPIO_DW_APB_H
+
+struct dwapb_port_property {
+	struct device_node *node;
+	const char	*name;
+	unsigned int	idx;
+	unsigned int	ngpio;
+	unsigned int	gpio_base;
+	unsigned int	irq;
+	bool		irq_shared;
+};
+
+struct dwapb_platform_data {
+	struct dwapb_port_property *properties;
+	unsigned int nports;
+};
+
+#endif
-- 
cgit v1.2.3


From 55cf9cb63f0e5439f208d78ed944de9a8df65011 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Mon, 15 Sep 2014 18:01:10 +0800
Subject: f2fs: support large sector size

Block size in f2fs is 4096 bytes, so theoretically, f2fs can support 4096 bytes
sector device at maximum. But now f2fs only support 512 bytes size sector, so
block device such as zRAM which uses page cache as its block storage space will
not be mounted successfully as mismatch between sector size of zRAM and sector
size of f2fs supported.

In this patch we support large sector size in f2fs, so block device with sector
size of 512/1024/2048/4096 bytes can be supported in f2fs.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          |  2 +-
 fs/f2fs/segment.c       |  4 ++--
 fs/f2fs/segment.h       | 10 +++++-----
 fs/f2fs/super.c         | 20 ++++++++++++++------
 include/linux/f2fs_fs.h |  5 +++--
 5 files changed, 25 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index aaf22a912044..13ab72084913 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -85,7 +85,7 @@ static struct bio *__bio_alloc(struct f2fs_sb_info *sbi, block_t blk_addr,
 	bio = bio_alloc(GFP_NOIO, npages);
 
 	bio->bi_bdev = sbi->sb->s_bdev;
-	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(sbi, blk_addr);
+	bio->bi_iter.bi_sector = SECTOR_FROM_BLOCK(blk_addr);
 	bio->bi_end_io = is_read ? f2fs_read_end_io : f2fs_write_end_io;
 	bio->bi_private = sbi;
 
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 4ea53aab786d..24b768ae39c4 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -370,8 +370,8 @@ static void locate_dirty_segment(struct f2fs_sb_info *sbi, unsigned int segno)
 static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
 				block_t blkstart, block_t blklen)
 {
-	sector_t start = SECTOR_FROM_BLOCK(sbi, blkstart);
-	sector_t len = SECTOR_FROM_BLOCK(sbi, blklen);
+	sector_t start = SECTOR_FROM_BLOCK(blkstart);
+	sector_t len = SECTOR_FROM_BLOCK(blklen);
 	trace_f2fs_issue_discard(sbi->sb, blkstart, blklen);
 	return blkdev_issue_discard(sbi->sb->s_bdev, start, len, GFP_NOFS, 0);
 }
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 848ac519958f..032c0905a12b 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -89,10 +89,10 @@
 #define TOTAL_SECS(sbi)	(sbi->total_sections)
 #define TOTAL_BLKS(sbi)	(SM_I(sbi)->segment_count << sbi->log_blocks_per_seg)
 
-#define SECTOR_FROM_BLOCK(sbi, blk_addr)				\
-	(((sector_t)blk_addr) << (sbi)->log_sectors_per_block)
-#define SECTOR_TO_BLOCK(sbi, sectors)					\
-	(sectors >> (sbi)->log_sectors_per_block)
+#define SECTOR_FROM_BLOCK(blk_addr)					\
+	(((sector_t)blk_addr) << F2FS_LOG_SECTORS_PER_BLOCK)
+#define SECTOR_TO_BLOCK(sectors)					\
+	(sectors >> F2FS_LOG_SECTORS_PER_BLOCK)
 #define MAX_BIO_BLOCKS(sbi)						\
 	((int)min((int)max_hw_blocks(sbi), BIO_MAX_PAGES))
 
@@ -711,7 +711,7 @@ static inline unsigned int max_hw_blocks(struct f2fs_sb_info *sbi)
 {
 	struct block_device *bdev = sbi->sb->s_bdev;
 	struct request_queue *q = bdev_get_queue(bdev);
-	return SECTOR_TO_BLOCK(sbi, queue_max_sectors(q));
+	return SECTOR_TO_BLOCK(queue_max_sectors(q));
 }
 
 /*
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index ed4095e5e8a7..3dfa1b5eae2f 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -788,14 +788,22 @@ static int sanity_check_raw_super(struct super_block *sb,
 		return 1;
 	}
 
-	if (le32_to_cpu(raw_super->log_sectorsize) !=
-					F2FS_LOG_SECTOR_SIZE) {
-		f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize");
+	/* Currently, support 512/1024/2048/4096 bytes sector size */
+	if (le32_to_cpu(raw_super->log_sectorsize) >
+				F2FS_MAX_LOG_SECTOR_SIZE ||
+		le32_to_cpu(raw_super->log_sectorsize) <
+				F2FS_MIN_LOG_SECTOR_SIZE) {
+		f2fs_msg(sb, KERN_INFO, "Invalid log sectorsize (%u)",
+			le32_to_cpu(raw_super->log_sectorsize));
 		return 1;
 	}
-	if (le32_to_cpu(raw_super->log_sectors_per_block) !=
-					F2FS_LOG_SECTORS_PER_BLOCK) {
-		f2fs_msg(sb, KERN_INFO, "Invalid log sectors per block");
+	if (le32_to_cpu(raw_super->log_sectors_per_block) +
+		le32_to_cpu(raw_super->log_sectorsize) !=
+			F2FS_MAX_LOG_SECTOR_SIZE) {
+		f2fs_msg(sb, KERN_INFO,
+			"Invalid log sectors per block(%u) log sectorsize(%u)",
+			le32_to_cpu(raw_super->log_sectors_per_block),
+			le32_to_cpu(raw_super->log_sectorsize));
 		return 1;
 	}
 	return 0;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 9ca1ff3d4662..860313a33a43 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -15,8 +15,9 @@
 #include <linux/types.h>
 
 #define F2FS_SUPER_OFFSET		1024	/* byte-size offset */
-#define F2FS_LOG_SECTOR_SIZE		9	/* 9 bits for 512 byte */
-#define F2FS_LOG_SECTORS_PER_BLOCK	3	/* 4KB: F2FS_BLKSIZE */
+#define F2FS_MIN_LOG_SECTOR_SIZE	9	/* 9 bits for 512 bytes */
+#define F2FS_MAX_LOG_SECTOR_SIZE	12	/* 12 bits for 4096 bytes */
+#define F2FS_LOG_SECTORS_PER_BLOCK	3	/* log number for sector/blk */
 #define F2FS_BLKSIZE			4096	/* support only 4KB block */
 #define F2FS_MAX_EXTENSION		64	/* # of extension entries */
 #define F2FS_BLK_ALIGN(x)	(((x) + F2FS_BLKSIZE - 1) / F2FS_BLKSIZE)
-- 
cgit v1.2.3


From 91397401bb5072f71e8ce8744ad0bdec3e875a91 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 11 Mar 2014 13:29:28 -0400
Subject: ARCH: AUDIT: audit_syscall_entry() should not require the arch

We have a function where the arch can be queried, syscall_get_arch().
So rather than have every single piece of arch specific code use and/or
duplicate syscall_get_arch(), just have the audit code use the
syscall_get_arch() code.

Based-on-patch-by: Richard Briggs <rgb@redhat.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
Cc: linux-alpha@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-ia64@vger.kernel.org
Cc: microblaze-uclinux@itee.uq.edu.au
Cc: linux-mips@linux-mips.org
Cc: linux@lists.openrisc.net
Cc: linux-parisc@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: linux-sh@vger.kernel.org
Cc: sparclinux@vger.kernel.org
Cc: user-mode-linux-devel@lists.sourceforge.net
Cc: linux-xtensa@linux-xtensa.org
Cc: x86@kernel.org
---
 arch/alpha/kernel/ptrace.c      |  2 +-
 arch/arm/kernel/ptrace.c        |  4 ++--
 arch/ia64/kernel/ptrace.c       |  2 +-
 arch/microblaze/kernel/ptrace.c |  3 +--
 arch/mips/kernel/ptrace.c       |  4 +---
 arch/openrisc/kernel/ptrace.c   |  3 +--
 arch/parisc/kernel/ptrace.c     |  9 +++------
 arch/powerpc/kernel/ptrace.c    |  7 ++-----
 arch/s390/kernel/ptrace.c       |  4 +---
 arch/sh/kernel/ptrace_32.c      | 14 +-------------
 arch/sh/kernel/ptrace_64.c      | 17 +----------------
 arch/sparc/kernel/ptrace_64.c   |  9 ++-------
 arch/um/kernel/ptrace.c         |  3 +--
 arch/x86/kernel/ptrace.c        |  8 ++------
 arch/x86/um/asm/ptrace.h        |  4 ----
 arch/xtensa/kernel/ptrace.c     |  2 +-
 include/linux/audit.h           |  7 ++++---
 17 files changed, 25 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c
index 86d835157b54..d9ee81769899 100644
--- a/arch/alpha/kernel/ptrace.c
+++ b/arch/alpha/kernel/ptrace.c
@@ -321,7 +321,7 @@ asmlinkage unsigned long syscall_trace_enter(void)
 	if (test_thread_flag(TIF_SYSCALL_TRACE) &&
 	    tracehook_report_syscall_entry(current_pt_regs()))
 		ret = -1UL;
-	audit_syscall_entry(AUDIT_ARCH_ALPHA, regs->r0, regs->r16, regs->r17, regs->r18, regs->r19);
+	audit_syscall_entry(regs->r0, regs->r16, regs->r17, regs->r18, regs->r19);
 	return ret ?: current_pt_regs()->r0;
 }
 
diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c
index 0c27ed6f3f23..6af95986fbf7 100644
--- a/arch/arm/kernel/ptrace.c
+++ b/arch/arm/kernel/ptrace.c
@@ -944,8 +944,8 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs, int scno)
 	if (test_thread_flag(TIF_SYSCALL_TRACEPOINT))
 		trace_sys_enter(regs, scno);
 
-	audit_syscall_entry(AUDIT_ARCH_ARM, scno, regs->ARM_r0, regs->ARM_r1,
-			    regs->ARM_r2, regs->ARM_r3);
+	audit_syscall_entry(scno, regs->ARM_r0, regs->ARM_r1, regs->ARM_r2,
+			    regs->ARM_r3);
 
 	return scno;
 }
diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c
index b7a5fffe0924..6f54d511cc50 100644
--- a/arch/ia64/kernel/ptrace.c
+++ b/arch/ia64/kernel/ptrace.c
@@ -1219,7 +1219,7 @@ syscall_trace_enter (long arg0, long arg1, long arg2, long arg3,
 		ia64_sync_krbs();
 
 
-	audit_syscall_entry(AUDIT_ARCH_IA64, regs.r15, arg0, arg1, arg2, arg3);
+	audit_syscall_entry(regs.r15, arg0, arg1, arg2, arg3);
 
 	return 0;
 }
diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c
index 39cf50841f6d..bb10637ce688 100644
--- a/arch/microblaze/kernel/ptrace.c
+++ b/arch/microblaze/kernel/ptrace.c
@@ -147,8 +147,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 */
 		ret = -1L;
 
-	audit_syscall_entry(EM_MICROBLAZE, regs->r12, regs->r5, regs->r6,
-			    regs->r7, regs->r8);
+	audit_syscall_entry(regs->r12, regs->r5, regs->r6, regs->r7, regs->r8);
 
 	return ret ?: regs->r12;
 }
diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c
index f639ccd5060c..d8a76f97a053 100644
--- a/arch/mips/kernel/ptrace.c
+++ b/arch/mips/kernel/ptrace.c
@@ -649,9 +649,7 @@ asmlinkage long syscall_trace_enter(struct pt_regs *regs, long syscall)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[2]);
 
-	audit_syscall_entry(syscall_get_arch(),
-			    syscall,
-			    regs->regs[4], regs->regs[5],
+	audit_syscall_entry(syscall, regs->regs[4], regs->regs[5],
 			    regs->regs[6], regs->regs[7]);
 	return syscall;
 }
diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c
index 71a2a0c34c65..4f59fa4e34e5 100644
--- a/arch/openrisc/kernel/ptrace.c
+++ b/arch/openrisc/kernel/ptrace.c
@@ -187,8 +187,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 		 */
 		ret = -1L;
 
-	audit_syscall_entry(AUDIT_ARCH_OPENRISC, regs->gpr[11],
-			    regs->gpr[3], regs->gpr[4],
+	audit_syscall_entry(regs->gpr[11], regs->gpr[3], regs->gpr[4],
 			    regs->gpr[5], regs->gpr[6]);
 
 	return ret ? : regs->gpr[11];
diff --git a/arch/parisc/kernel/ptrace.c b/arch/parisc/kernel/ptrace.c
index e842ee233db4..74814577e4b8 100644
--- a/arch/parisc/kernel/ptrace.c
+++ b/arch/parisc/kernel/ptrace.c
@@ -276,14 +276,11 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
 #ifdef CONFIG_64BIT
 	if (!is_compat_task())
-		audit_syscall_entry(AUDIT_ARCH_PARISC64,
-			regs->gr[20],
-			regs->gr[26], regs->gr[25],
-			regs->gr[24], regs->gr[23]);
+		audit_syscall_entry(regs->gr[20], regs->gr[26], regs->gr[25],
+				    regs->gr[24], regs->gr[23]);
 	else
 #endif
-		audit_syscall_entry(AUDIT_ARCH_PARISC,
-			regs->gr[20] & 0xffffffff,
+		audit_syscall_entry(regs->gr[20] & 0xffffffff,
 			regs->gr[26] & 0xffffffff,
 			regs->gr[25] & 0xffffffff,
 			regs->gr[24] & 0xffffffff,
diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c
index 2e3d2bf536c5..524a943a33bb 100644
--- a/arch/powerpc/kernel/ptrace.c
+++ b/arch/powerpc/kernel/ptrace.c
@@ -1788,14 +1788,11 @@ long do_syscall_trace_enter(struct pt_regs *regs)
 
 #ifdef CONFIG_PPC64
 	if (!is_32bit_task())
-		audit_syscall_entry(AUDIT_ARCH_PPC64,
-				    regs->gpr[0],
-				    regs->gpr[3], regs->gpr[4],
+		audit_syscall_entry(regs->gpr[0], regs->gpr[3], regs->gpr[4],
 				    regs->gpr[5], regs->gpr[6]);
 	else
 #endif
-		audit_syscall_entry(AUDIT_ARCH_PPC,
-				    regs->gpr[0],
+		audit_syscall_entry(regs->gpr[0],
 				    regs->gpr[3] & 0xffffffff,
 				    regs->gpr[4] & 0xffffffff,
 				    regs->gpr[5] & 0xffffffff,
diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c
index 5dc7ad9e2fbf..910f253b22bc 100644
--- a/arch/s390/kernel/ptrace.c
+++ b/arch/s390/kernel/ptrace.c
@@ -828,9 +828,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->gprs[2]);
 
-	audit_syscall_entry(is_compat_task() ?
-				AUDIT_ARCH_S390 : AUDIT_ARCH_S390X,
-			    regs->gprs[2], regs->orig_gpr2,
+	audit_syscall_entry(regs->gprs[2], regs->orig_gpr2,
 			    regs->gprs[3], regs->gprs[4],
 			    regs->gprs[5]);
 out:
diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c
index 668c81631c08..c1a6b89bfe70 100644
--- a/arch/sh/kernel/ptrace_32.c
+++ b/arch/sh/kernel/ptrace_32.c
@@ -484,17 +484,6 @@ long arch_ptrace(struct task_struct *child, long request,
 	return ret;
 }
 
-static inline int audit_arch(void)
-{
-	int arch = EM_SH;
-
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	arch |= __AUDIT_ARCH_LE;
-#endif
-
-	return arch;
-}
-
 asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 {
 	long ret = 0;
@@ -513,8 +502,7 @@ asmlinkage long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[0]);
 
-	audit_syscall_entry(audit_arch(), regs->regs[3],
-			    regs->regs[4], regs->regs[5],
+	audit_syscall_entry(regs->regs[3], regs->regs[4], regs->regs[5],
 			    regs->regs[6], regs->regs[7]);
 
 	return ret ?: regs->regs[0];
diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c
index af90339dadcd..5cea973a65b2 100644
--- a/arch/sh/kernel/ptrace_64.c
+++ b/arch/sh/kernel/ptrace_64.c
@@ -504,20 +504,6 @@ asmlinkage int sh64_ptrace(long request, long pid,
 	return sys_ptrace(request, pid, addr, data);
 }
 
-static inline int audit_arch(void)
-{
-	int arch = EM_SH;
-
-#ifdef CONFIG_64BIT
-	arch |= __AUDIT_ARCH_64BIT;
-#endif
-#ifdef CONFIG_CPU_LITTLE_ENDIAN
-	arch |= __AUDIT_ARCH_LE;
-#endif
-
-	return arch;
-}
-
 asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
 {
 	long long ret = 0;
@@ -536,8 +522,7 @@ asmlinkage long long do_syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->regs[9]);
 
-	audit_syscall_entry(audit_arch(), regs->regs[1],
-			    regs->regs[2], regs->regs[3],
+	audit_syscall_entry(regs->regs[1], regs->regs[2], regs->regs[3],
 			    regs->regs[4], regs->regs[5]);
 
 	return ret ?: regs->regs[9];
diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c
index c13c9f25d83a..9ddc4928a089 100644
--- a/arch/sparc/kernel/ptrace_64.c
+++ b/arch/sparc/kernel/ptrace_64.c
@@ -1076,13 +1076,8 @@ asmlinkage int syscall_trace_enter(struct pt_regs *regs)
 	if (unlikely(test_thread_flag(TIF_SYSCALL_TRACEPOINT)))
 		trace_sys_enter(regs, regs->u_regs[UREG_G1]);
 
-	audit_syscall_entry((test_thread_flag(TIF_32BIT) ?
-			     AUDIT_ARCH_SPARC :
-			     AUDIT_ARCH_SPARC64),
-			    regs->u_regs[UREG_G1],
-			    regs->u_regs[UREG_I0],
-			    regs->u_regs[UREG_I1],
-			    regs->u_regs[UREG_I2],
+	audit_syscall_entry(regs->u_regs[UREG_G1], regs->u_regs[UREG_I0],
+			    regs->u_regs[UREG_I1], regs->u_regs[UREG_I2],
 			    regs->u_regs[UREG_I3]);
 
 	return ret;
diff --git a/arch/um/kernel/ptrace.c b/arch/um/kernel/ptrace.c
index 694d551c8899..62435ef003d9 100644
--- a/arch/um/kernel/ptrace.c
+++ b/arch/um/kernel/ptrace.c
@@ -165,8 +165,7 @@ static void send_sigtrap(struct task_struct *tsk, struct uml_pt_regs *regs,
  */
 void syscall_trace_enter(struct pt_regs *regs)
 {
-	audit_syscall_entry(HOST_AUDIT_ARCH,
-			    UPT_SYSCALL_NR(&regs->regs),
+	audit_syscall_entry(UPT_SYSCALL_NR(&regs->regs),
 			    UPT_SYSCALL_ARG1(&regs->regs),
 			    UPT_SYSCALL_ARG2(&regs->regs),
 			    UPT_SYSCALL_ARG3(&regs->regs),
diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c
index 678c0ada3b3c..eb1c87f0b03b 100644
--- a/arch/x86/kernel/ptrace.c
+++ b/arch/x86/kernel/ptrace.c
@@ -1488,15 +1488,11 @@ long syscall_trace_enter(struct pt_regs *regs)
 		trace_sys_enter(regs, regs->orig_ax);
 
 	if (IS_IA32)
-		audit_syscall_entry(AUDIT_ARCH_I386,
-				    regs->orig_ax,
-				    regs->bx, regs->cx,
+		audit_syscall_entry(regs->orig_ax, regs->bx, regs->cx,
 				    regs->dx, regs->si);
 #ifdef CONFIG_X86_64
 	else
-		audit_syscall_entry(AUDIT_ARCH_X86_64,
-				    regs->orig_ax,
-				    regs->di, regs->si,
+		audit_syscall_entry(regs->orig_ax, regs->di, regs->si,
 				    regs->dx, regs->r10);
 #endif
 
diff --git a/arch/x86/um/asm/ptrace.h b/arch/x86/um/asm/ptrace.h
index 54f8102ccde5..e59eef20647b 100644
--- a/arch/x86/um/asm/ptrace.h
+++ b/arch/x86/um/asm/ptrace.h
@@ -47,8 +47,6 @@ struct user_desc;
 
 #ifdef CONFIG_X86_32
 
-#define HOST_AUDIT_ARCH AUDIT_ARCH_I386
-
 extern int ptrace_get_thread_area(struct task_struct *child, int idx,
                                   struct user_desc __user *user_desc);
 
@@ -57,8 +55,6 @@ extern int ptrace_set_thread_area(struct task_struct *child, int idx,
 
 #else
 
-#define HOST_AUDIT_ARCH AUDIT_ARCH_X86_64
-
 #define PT_REGS_R8(r) UPT_R8(&(r)->regs)
 #define PT_REGS_R9(r) UPT_R9(&(r)->regs)
 #define PT_REGS_R10(r) UPT_R10(&(r)->regs)
diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c
index 562fac664751..4d54b481123b 100644
--- a/arch/xtensa/kernel/ptrace.c
+++ b/arch/xtensa/kernel/ptrace.c
@@ -342,7 +342,7 @@ void do_syscall_trace_enter(struct pt_regs *regs)
 		do_syscall_trace();
 
 #if 0
-	audit_syscall_entry(current, AUDIT_ARCH_XTENSA..);
+	audit_syscall_entry(...);
 #endif
 }
 
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 22cfddb75566..bb1c3ab611bf 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,6 +26,7 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
+#include <asm/syscall.h>
 
 struct audit_sig_info {
 	uid_t		uid;
@@ -141,12 +142,12 @@ static inline void audit_free(struct task_struct *task)
 	if (unlikely(task->audit_context))
 		__audit_free(task);
 }
-static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
+static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
 {
 	if (unlikely(current->audit_context))
-		__audit_syscall_entry(arch, major, a0, a1, a2, a3);
+		__audit_syscall_entry(syscall_get_arch(), major, a0, a1, a2, a3);
 }
 static inline void audit_syscall_exit(void *pt_regs)
 {
@@ -322,7 +323,7 @@ static inline int audit_alloc(struct task_struct *task)
 }
 static inline void audit_free(struct task_struct *task)
 { }
-static inline void audit_syscall_entry(int arch, int major, unsigned long a0,
+static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a1, unsigned long a2,
 				       unsigned long a3)
 { }
-- 
cgit v1.2.3


From a9ebe0b98896b276a3a1664da5f40d3b7c92f316 Mon Sep 17 00:00:00 2001
From: Eric Paris <eparis@redhat.com>
Date: Tue, 22 Apr 2014 11:46:16 -0400
Subject: audit: fix build error when asm/syscall.h does not exist

avr32 does not have an asm/syscall.h file.  We need the
syscall_get_arch() definition from that file for all arch's which
support CONFIG_AUDITSYSCALL.  Obviously avr32 is not one of those
arch's.  Move the include inside the CONFIG_AUDITSYSCALL such that we
only do the include if we need the results.

When the syscall_get_arch() call is moved inside __audit_syscall_entry()
this include can be dropped entirely.  But that is going to require some
assembly changes on x86* in a patch that is not ready for the tree...

Reported-by: Paul Gortmaker <paul.gortmaker@windriver.com>
Signed-off-by: Eric Paris <eparis@redhat.com>
---
 include/linux/audit.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index bb1c3ab611bf..783157b289e8 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -26,7 +26,6 @@
 #include <linux/sched.h>
 #include <linux/ptrace.h>
 #include <uapi/linux/audit.h>
-#include <asm/syscall.h>
 
 struct audit_sig_info {
 	uid_t		uid;
@@ -110,6 +109,8 @@ extern void audit_log_session_info(struct audit_buffer *ab);
 #endif
 
 #ifdef CONFIG_AUDITSYSCALL
+#include <asm/syscall.h> /* for syscall_get_arch() */
+
 /* These are defined in auditsc.c */
 				/* Public API */
 extern int  audit_alloc(struct task_struct *task);
-- 
cgit v1.2.3


From b4f0d3755c5e9cc86292d5fd78261903b4f23d4a Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 4 Mar 2014 10:38:06 -0500
Subject: audit: x86: drop arch from __audit_syscall_entry() interface

Since the arch is found locally in __audit_syscall_entry(), there is no need to
pass it in as a parameter.  Delete it from the parameter list.

x86* was the only arch to call __audit_syscall_entry() directly and did so from
assembly code.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: x86@kernel.org
Cc: linux-kernel@vger.kernel.org
Cc: linux-audit@redhat.com
Signed-off-by: Eric Paris <eparis@redhat.com>

---

As this patch relies on changes in the audit tree, I think it
appropriate to send it through my tree rather than the x86 tree.
---
 arch/x86/ia32/ia32entry.S  | 12 ++++++------
 arch/x86/kernel/entry_32.S | 11 +++++------
 arch/x86/kernel/entry_64.S | 11 +++++------
 include/linux/audit.h      |  5 ++---
 kernel/auditsc.c           |  6 ++----
 5 files changed, 20 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S
index 4299eb05023c..f5bdd2881815 100644
--- a/arch/x86/ia32/ia32entry.S
+++ b/arch/x86/ia32/ia32entry.S
@@ -186,12 +186,12 @@ sysexit_from_sys_call:
 
 #ifdef CONFIG_AUDITSYSCALL
 	.macro auditsys_entry_common
-	movl %esi,%r9d			/* 6th arg: 4th syscall arg */
-	movl %edx,%r8d			/* 5th arg: 3rd syscall arg */
-	/* (already in %ecx)		   4th arg: 2nd syscall arg */
-	movl %ebx,%edx			/* 3rd arg: 1st syscall arg */
-	movl %eax,%esi			/* 2nd arg: syscall number */
-	movl $AUDIT_ARCH_I386,%edi	/* 1st arg: audit arch */
+	movl %esi,%r8d			/* 5th arg: 4th syscall arg */
+	movl %ecx,%r9d			/*swap with edx*/
+	movl %edx,%ecx			/* 4th arg: 3rd syscall arg */
+	movl %r9d,%edx			/* 3rd arg: 2nd syscall arg */
+	movl %ebx,%esi			/* 2nd arg: 1st syscall arg */
+	movl %eax,%edi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
 	movl RAX-ARGOFFSET(%rsp),%eax	/* reload syscall number */
 	cmpq $(IA32_NR_syscalls-1),%rax
diff --git a/arch/x86/kernel/entry_32.S b/arch/x86/kernel/entry_32.S
index 0d0c9d4ab6d5..f9e3fabc8716 100644
--- a/arch/x86/kernel/entry_32.S
+++ b/arch/x86/kernel/entry_32.S
@@ -449,12 +449,11 @@ sysenter_audit:
 	jnz syscall_trace_entry
 	addl $4,%esp
 	CFI_ADJUST_CFA_OFFSET -4
-	/* %esi already in 8(%esp)	   6th arg: 4th syscall arg */
-	/* %edx already in 4(%esp)	   5th arg: 3rd syscall arg */
-	/* %ecx already in 0(%esp)	   4th arg: 2nd syscall arg */
-	movl %ebx,%ecx			/* 3rd arg: 1st syscall arg */
-	movl %eax,%edx			/* 2nd arg: syscall number */
-	movl $AUDIT_ARCH_I386,%eax	/* 1st arg: audit arch */
+	movl %esi,4(%esp)		/* 5th arg: 4th syscall arg */
+	movl %edx,(%esp)		/* 4th arg: 3rd syscall arg */
+	/* %ecx already in %ecx		   3rd arg: 2nd syscall arg */
+	movl %ebx,%edx			/* 2nd arg: 1st syscall arg */
+	/* %eax already in %eax		   1st arg: syscall number */
 	call __audit_syscall_entry
 	pushl_cfi %ebx
 	movl PT_EAX(%esp),%eax		/* reload syscall number */
diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S
index c844f0816ab8..5e8cb2ad9fb3 100644
--- a/arch/x86/kernel/entry_64.S
+++ b/arch/x86/kernel/entry_64.S
@@ -488,12 +488,11 @@ badsys:
 	 * jump back to the normal fast path.
 	 */
 auditsys:
-	movq %r10,%r9			/* 6th arg: 4th syscall arg */
-	movq %rdx,%r8			/* 5th arg: 3rd syscall arg */
-	movq %rsi,%rcx			/* 4th arg: 2nd syscall arg */
-	movq %rdi,%rdx			/* 3rd arg: 1st syscall arg */
-	movq %rax,%rsi			/* 2nd arg: syscall number */
-	movl $AUDIT_ARCH_X86_64,%edi	/* 1st arg: audit arch */
+	movq %r10,%r8			/* 5th arg: 4th syscall arg */
+	movq %rdx,%rcx			/* 4th arg: 3rd syscall arg */
+	movq %rsi,%rdx			/* 3rd arg: 2nd syscall arg */
+	movq %rdi,%rsi			/* 2nd arg: 1st syscall arg */
+	movq %rax,%rdi			/* 1st arg: syscall number */
 	call __audit_syscall_entry
 	LOAD_ARGS 0		/* reload call-clobbered registers */
 	jmp system_call_fastpath
diff --git a/include/linux/audit.h b/include/linux/audit.h
index 783157b289e8..1ae00891aff9 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -115,8 +115,7 @@ extern void audit_log_session_info(struct audit_buffer *ab);
 				/* Public API */
 extern int  audit_alloc(struct task_struct *task);
 extern void __audit_free(struct task_struct *task);
-extern void __audit_syscall_entry(int arch,
-				  int major, unsigned long a0, unsigned long a1,
+extern void __audit_syscall_entry(int major, unsigned long a0, unsigned long a1,
 				  unsigned long a2, unsigned long a3);
 extern void __audit_syscall_exit(int ret_success, long ret_value);
 extern struct filename *__audit_reusename(const __user char *uptr);
@@ -148,7 +147,7 @@ static inline void audit_syscall_entry(int major, unsigned long a0,
 				       unsigned long a3)
 {
 	if (unlikely(current->audit_context))
-		__audit_syscall_entry(syscall_get_arch(), major, a0, a1, a2, a3);
+		__audit_syscall_entry(major, a0, a1, a2, a3);
 }
 static inline void audit_syscall_exit(void *pt_regs)
 {
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 9f03ac205e1f..4e17443fd1ef 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1506,7 +1506,6 @@ void __audit_free(struct task_struct *tsk)
 
 /**
  * audit_syscall_entry - fill in an audit record at syscall entry
- * @arch: architecture type
  * @major: major syscall type (function)
  * @a1: additional syscall register 1
  * @a2: additional syscall register 2
@@ -1521,9 +1520,8 @@ void __audit_free(struct task_struct *tsk)
  * will only be written if another part of the kernel requests that it
  * be written).
  */
-void __audit_syscall_entry(int arch, int major,
-			 unsigned long a1, unsigned long a2,
-			 unsigned long a3, unsigned long a4)
+void __audit_syscall_entry(int major, unsigned long a1, unsigned long a2,
+			   unsigned long a3, unsigned long a4)
 {
 	struct task_struct *tsk = current;
 	struct audit_context *context = tsk->audit_context;
-- 
cgit v1.2.3


From 219ca39427bf6c46c4e1473493e33bc00635e99b Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Wed, 26 Mar 2014 07:26:47 -0400
Subject: audit: use union for audit_field values since they are mutually
 exclusive

Since only one of val, uid, gid and lsm* are used at any given time, combine
them to reduce the size of the struct audit_field.

Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
---
 include/linux/audit.h | 14 +++++++++-----
 kernel/auditfilter.c  | 29 ++++++++++++++++++++---------
 2 files changed, 29 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 1ae00891aff9..36dffeccebdb 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -66,12 +66,16 @@ struct audit_krule {
 
 struct audit_field {
 	u32				type;
-	u32				val;
-	kuid_t				uid;
-	kgid_t				gid;
+	union {
+		u32			val;
+		kuid_t			uid;
+		kgid_t			gid;
+		struct {
+			char		*lsm_str;
+			void		*lsm_rule;
+		};
+	};
 	u32				op;
-	char				*lsm_str;
-	void				*lsm_rule;
 };
 
 extern int is_audit_feature_set(int which);
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index b65a138250b8..40ed9813d4b2 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -71,6 +71,24 @@ static struct list_head audit_rules_list[AUDIT_NR_FILTERS] = {
 
 DEFINE_MUTEX(audit_filter_mutex);
 
+static void audit_free_lsm_field(struct audit_field *f)
+{
+	switch (f->type) {
+	case AUDIT_SUBJ_USER:
+	case AUDIT_SUBJ_ROLE:
+	case AUDIT_SUBJ_TYPE:
+	case AUDIT_SUBJ_SEN:
+	case AUDIT_SUBJ_CLR:
+	case AUDIT_OBJ_USER:
+	case AUDIT_OBJ_ROLE:
+	case AUDIT_OBJ_TYPE:
+	case AUDIT_OBJ_LEV_LOW:
+	case AUDIT_OBJ_LEV_HIGH:
+		kfree(f->lsm_str);
+		security_audit_rule_free(f->lsm_rule);
+	}
+}
+
 static inline void audit_free_rule(struct audit_entry *e)
 {
 	int i;
@@ -80,11 +98,8 @@ static inline void audit_free_rule(struct audit_entry *e)
 	if (erule->watch)
 		audit_put_watch(erule->watch);
 	if (erule->fields)
-		for (i = 0; i < erule->field_count; i++) {
-			struct audit_field *f = &erule->fields[i];
-			kfree(f->lsm_str);
-			security_audit_rule_free(f->lsm_rule);
-		}
+		for (i = 0; i < erule->field_count; i++)
+			audit_free_lsm_field(&erule->fields[i]);
 	kfree(erule->fields);
 	kfree(erule->filterkey);
 	kfree(e);
@@ -422,10 +437,6 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 
 		f->type = data->fields[i];
 		f->val = data->values[i];
-		f->uid = INVALID_UID;
-		f->gid = INVALID_GID;
-		f->lsm_str = NULL;
-		f->lsm_rule = NULL;
 
 		/* Support legacy tests for a valid loginuid */
 		if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
-- 
cgit v1.2.3


From 9a37110d20c95d1ebf6c04881177fe8f62831db2 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 14:31:39 -0400
Subject: locking: Add WARN_ON_ONCE lock assertion

An interface may need to assert a lock invariant and not flood the
system logs; add a lockdep helper macro equivalent to
lockdep_assert_held() which only WARNs once.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/lockdep.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 008388f920d7..64c7425afbce 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -362,6 +362,10 @@ extern void lockdep_trace_alloc(gfp_t mask);
 		WARN_ON(debug_locks && !lockdep_is_held(l));	\
 	} while (0)
 
+#define lockdep_assert_held_once(l)	do {				\
+		WARN_ON_ONCE(debug_locks && !lockdep_is_held(l));	\
+	} while (0)
+
 #define lockdep_recursing(tsk)	((tsk)->lockdep_recursion)
 
 #else /* !CONFIG_LOCKDEP */
@@ -412,6 +416,7 @@ struct lock_class_key { };
 #define lockdep_depth(tsk)	(0)
 
 #define lockdep_assert_held(l)			do { (void)(l); } while (0)
+#define lockdep_assert_held_once(l)		do { (void)(l); } while (0)
 
 #define lockdep_recursing(tsk)			(0)
 
-- 
cgit v1.2.3


From 2a159389bf5d962359349a76827b2f683276a1c7 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Mon, 25 Aug 2014 17:51:26 +0200
Subject: USB: core: add device-qualifier quirk
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add new quirk for devices that cannot handle requests for the
device_qualifier descriptor.

A USB-2.0 compliant device must respond to requests for the
device_qualifier descriptor (even if it's with a request error), but at
least one device is known to misbehave after such a request.

Suggested-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c     | 3 +++
 include/linux/usb/quirks.h | 3 +++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index d481c99a20d7..c01b72467524 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4538,6 +4538,9 @@ check_highspeed (struct usb_hub *hub, struct usb_device *udev, int port1)
 	struct usb_qualifier_descriptor	*qual;
 	int				status;
 
+	if (udev->quirks & USB_QUIRK_DEVICE_QUALIFIER)
+		return;
+
 	qual = kmalloc (sizeof *qual, GFP_KERNEL);
 	if (qual == NULL)
 		return;
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index 55a17b188daa..ffe565c94743 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -41,4 +41,7 @@
  */
 #define USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL	0x00000080
 
+/* device can't handle device_qualifier descriptor requests */
+#define USB_QUIRK_DEVICE_QUALIFIER	0x00000100
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 299245a145b2ad4cfb4c5432eb1264299f55e7e0 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:24 -0400
Subject: serial: core: Privatize modem status enable flags

The serial core uses the tty port flags, ASYNC_CTS_FLOW and
ASYNC_CD_CHECK, to track whether CTS and DCD changes should be
ignored or handled. However, the tty port flags are not safe for
atomic bit operations and no lock provides serialized updates.

Introduce the struct uart_port status field to track CTS and DCD
enable states, and serialize access with uart port lock. Substitute
uart_cts_enabled() helper for tty_port_cts_enabled().

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/mxs-auart.c   |  2 +-
 drivers/tty/serial/serial_core.c | 29 +++++++++++++++++------------
 include/linux/serial_core.h      | 12 ++++++++++++
 3 files changed, 30 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/mxs-auart.c b/drivers/tty/serial/mxs-auart.c
index b5c329248c81..10c29334fe2f 100644
--- a/drivers/tty/serial/mxs-auart.c
+++ b/drivers/tty/serial/mxs-auart.c
@@ -408,7 +408,7 @@ static void mxs_auart_set_mctrl(struct uart_port *u, unsigned mctrl)
 
 	ctrl &= ~(AUART_CTRL2_RTSEN | AUART_CTRL2_RTS);
 	if (mctrl & TIOCM_RTS) {
-		if (tty_port_cts_enabled(&u->state->port))
+		if (uart_cts_enabled(u))
 			ctrl |= AUART_CTRL2_RTSEN;
 		else
 			ctrl |= AUART_CTRL2_RTS;
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index f764de32b658..dd21ed900635 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -59,6 +59,11 @@ static void uart_change_pm(struct uart_state *state,
 
 static void uart_port_shutdown(struct tty_port *port);
 
+static int uart_dcd_enabled(struct uart_port *uport)
+{
+	return uport->status & UPSTAT_DCD_ENABLE;
+}
+
 /*
  * This routine is used by the interrupt handler to schedule processing in
  * the software interrupt portion of the driver.
@@ -130,7 +135,6 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
 		int init_hw)
 {
 	struct uart_port *uport = state->uart_port;
-	struct tty_port *port = &state->port;
 	unsigned long page;
 	int retval = 0;
 
@@ -176,12 +180,12 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
 				uart_set_mctrl(uport, TIOCM_RTS | TIOCM_DTR);
 		}
 
-		if (tty_port_cts_enabled(port)) {
-			spin_lock_irq(&uport->lock);
+		spin_lock_irq(&uport->lock);
+		if (uart_cts_enabled(uport)) {
 			if (!(uport->ops->get_mctrl(uport) & TIOCM_CTS))
 				tty->hw_stopped = 1;
-			spin_unlock_irq(&uport->lock);
 		}
+		spin_unlock_irq(&uport->lock);
 	}
 
 	/*
@@ -435,7 +439,6 @@ EXPORT_SYMBOL(uart_get_divisor);
 static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
 					struct ktermios *old_termios)
 {
-	struct tty_port *port = &state->port;
 	struct uart_port *uport = state->uart_port;
 	struct ktermios *termios;
 
@@ -450,17 +453,19 @@ static void uart_change_speed(struct tty_struct *tty, struct uart_state *state,
 	uport->ops->set_termios(uport, termios, old_termios);
 
 	/*
-	 * Set flags based on termios cflag
+	 * Set modem status enables based on termios cflag
 	 */
+	spin_lock_irq(&uport->lock);
 	if (termios->c_cflag & CRTSCTS)
-		set_bit(ASYNCB_CTS_FLOW, &port->flags);
+		uport->status |= UPSTAT_CTS_ENABLE;
 	else
-		clear_bit(ASYNCB_CTS_FLOW, &port->flags);
+		uport->status &= ~UPSTAT_CTS_ENABLE;
 
 	if (termios->c_cflag & CLOCAL)
-		clear_bit(ASYNCB_CHECK_CD, &port->flags);
+		uport->status &= ~UPSTAT_DCD_ENABLE;
 	else
-		set_bit(ASYNCB_CHECK_CD, &port->flags);
+		uport->status |= UPSTAT_DCD_ENABLE;
+	spin_unlock_irq(&uport->lock);
 }
 
 static inline int __uart_put_char(struct uart_port *port,
@@ -2765,7 +2770,7 @@ void uart_handle_dcd_change(struct uart_port *uport, unsigned int status)
 
 	uport->icount.dcd++;
 
-	if (port->flags & ASYNC_CHECK_CD) {
+	if (uart_dcd_enabled(uport)) {
 		if (status)
 			wake_up_interruptible(&port->open_wait);
 		else if (tty)
@@ -2790,7 +2795,7 @@ void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 
 	uport->icount.cts++;
 
-	if (tty_port_cts_enabled(port)) {
+	if (uart_cts_enabled(uport)) {
 		if (tty->hw_stopped) {
 			if (status) {
 				tty->hw_stopped = 0;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3bd7d55eebce..452c9cc9d717 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -112,6 +112,7 @@ struct uart_icount {
 };
 
 typedef unsigned int __bitwise__ upf_t;
+typedef unsigned int __bitwise__ upstat_t;
 
 struct uart_port {
 	spinlock_t		lock;			/* port lock */
@@ -190,6 +191,12 @@ struct uart_port {
 #define UPF_CHANGE_MASK		((__force upf_t) (0x17fff))
 #define UPF_USR_MASK		((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY))
 
+	/* status must be updated while holding port lock */
+	upstat_t		status;
+
+#define UPSTAT_CTS_ENABLE	((__force upstat_t) (1 << 0))
+#define UPSTAT_DCD_ENABLE	((__force upstat_t) (1 << 1))
+
 	unsigned int		mctrl;			/* current modem ctrl settings */
 	unsigned int		timeout;		/* character-based timeout */
 	unsigned int		type;			/* port type */
@@ -355,6 +362,11 @@ static inline int uart_tx_stopped(struct uart_port *port)
 	return 0;
 }
 
+static inline bool uart_cts_enabled(struct uart_port *uport)
+{
+	return uport->status & UPSTAT_CTS_ENABLE;
+}
+
 /*
  * The following are helper functions for the low level drivers.
  */
-- 
cgit v1.2.3


From d01f4d181c92877ecc678adce248a30cb7077ff1 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:26 -0400
Subject: serial: core: Privatize tty->hw_stopped

tty->hw_stopped is not used by the tty core and is thread-unsafe;
hw_stopped is a member of a bitfield whose fields are updated
non-atomically and no lock is suitable for serializing updates.

Replace serial core usage of tty->hw_stopped with uport->hw_stopped.
Use int storage which works around Alpha EV4/5 non-atomic byte storage,
since uart_port uses different locks to protect certain fields within the
structure.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/bfin_uart.c   | 14 +++++++-------
 drivers/tty/serial/serial_core.c | 28 +++++++++++++---------------
 include/linux/serial_core.h      |  3 ++-
 3 files changed, 22 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/bfin_uart.c b/drivers/tty/serial/bfin_uart.c
index dec0fd725d80..fc9fbf3f24a7 100644
--- a/drivers/tty/serial/bfin_uart.c
+++ b/drivers/tty/serial/bfin_uart.c
@@ -108,22 +108,22 @@ static void bfin_serial_set_mctrl(struct uart_port *port, unsigned int mctrl)
 static irqreturn_t bfin_serial_mctrl_cts_int(int irq, void *dev_id)
 {
 	struct bfin_serial_port *uart = dev_id;
-	unsigned int status = bfin_serial_get_mctrl(&uart->port);
+	struct uart_port *uport = &uart->port;
+	unsigned int status = bfin_serial_get_mctrl(uport);
 #ifdef CONFIG_SERIAL_BFIN_HARD_CTSRTS
-	struct tty_struct *tty = uart->port.state->port.tty;
 
 	UART_CLEAR_SCTS(uart);
-	if (tty->hw_stopped) {
+	if (uport->hw_stopped) {
 		if (status) {
-			tty->hw_stopped = 0;
-			uart_write_wakeup(&uart->port);
+			uport->hw_stopped = 0;
+			uart_write_wakeup(uport);
 		}
 	} else {
 		if (!status)
-			tty->hw_stopped = 1;
+			uport->hw_stopped = 1;
 	}
 #endif
-	uart_handle_cts_change(&uart->port, status & TIOCM_CTS);
+	uart_handle_cts_change(uport, status & TIOCM_CTS);
 
 	return IRQ_HANDLED;
 }
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index dd21ed900635..7d51e26627fb 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -95,7 +95,7 @@ static void __uart_start(struct tty_struct *tty)
 	struct uart_state *state = tty->driver_data;
 	struct uart_port *port = state->uart_port;
 
-	if (!tty->stopped && !tty->hw_stopped)
+	if (!uart_tx_stopped(port))
 		port->ops->start_tx(port);
 }
 
@@ -181,10 +181,11 @@ static int uart_port_startup(struct tty_struct *tty, struct uart_state *state,
 		}
 
 		spin_lock_irq(&uport->lock);
-		if (uart_cts_enabled(uport)) {
-			if (!(uport->ops->get_mctrl(uport) & TIOCM_CTS))
-				tty->hw_stopped = 1;
-		}
+		if (uart_cts_enabled(uport) &&
+		    !(uport->ops->get_mctrl(uport) & TIOCM_CTS))
+			uport->hw_stopped = 1;
+		else
+			uport->hw_stopped = 0;
 		spin_unlock_irq(&uport->lock);
 	}
 
@@ -949,7 +950,7 @@ static int uart_get_lsr_info(struct tty_struct *tty,
 	 */
 	if (uport->x_char ||
 	    ((uart_circ_chars_pending(&state->xmit) > 0) &&
-	     !tty->stopped && !tty->hw_stopped))
+	     !uart_tx_stopped(uport)))
 		result &= ~TIOCSER_TEMT;
 
 	return put_user(result, value);
@@ -1295,7 +1296,7 @@ static void uart_set_termios(struct tty_struct *tty,
 
 	/*
 	 * If the port is doing h/w assisted flow control, do nothing.
-	 * We assume that tty->hw_stopped has never been set.
+	 * We assume that port->hw_stopped has never been set.
 	 */
 	if (uport->flags & UPF_HARD_FLOW)
 		return;
@@ -1303,7 +1304,7 @@ static void uart_set_termios(struct tty_struct *tty,
 	/* Handle turning off CRTSCTS */
 	if ((old_termios->c_cflag & CRTSCTS) && !(cflag & CRTSCTS)) {
 		spin_lock_irqsave(&uport->lock, flags);
-		tty->hw_stopped = 0;
+		uport->hw_stopped = 0;
 		__uart_start(tty);
 		spin_unlock_irqrestore(&uport->lock, flags);
 	}
@@ -1311,7 +1312,7 @@ static void uart_set_termios(struct tty_struct *tty,
 	else if (!(old_termios->c_cflag & CRTSCTS) && (cflag & CRTSCTS)) {
 		spin_lock_irqsave(&uport->lock, flags);
 		if (!(uport->ops->get_mctrl(uport) & TIOCM_CTS)) {
-			tty->hw_stopped = 1;
+			uport->hw_stopped = 1;
 			uport->ops->stop_tx(uport);
 		}
 		spin_unlock_irqrestore(&uport->lock, flags);
@@ -2788,23 +2789,20 @@ EXPORT_SYMBOL_GPL(uart_handle_dcd_change);
  */
 void uart_handle_cts_change(struct uart_port *uport, unsigned int status)
 {
-	struct tty_port *port = &uport->state->port;
-	struct tty_struct *tty = port->tty;
-
 	lockdep_assert_held_once(&uport->lock);
 
 	uport->icount.cts++;
 
 	if (uart_cts_enabled(uport)) {
-		if (tty->hw_stopped) {
+		if (uport->hw_stopped) {
 			if (status) {
-				tty->hw_stopped = 0;
+				uport->hw_stopped = 0;
 				uport->ops->start_tx(uport);
 				uart_write_wakeup(uport);
 			}
 		} else {
 			if (!status) {
-				tty->hw_stopped = 1;
+				uport->hw_stopped = 1;
 				uport->ops->stop_tx(uport);
 			}
 		}
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 452c9cc9d717..204c452a7567 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -197,6 +197,7 @@ struct uart_port {
 #define UPSTAT_CTS_ENABLE	((__force upstat_t) (1 << 0))
 #define UPSTAT_DCD_ENABLE	((__force upstat_t) (1 << 1))
 
+	int			hw_stopped;		/* sw-assisted CTS flow state */
 	unsigned int		mctrl;			/* current modem ctrl settings */
 	unsigned int		timeout;		/* character-based timeout */
 	unsigned int		type;			/* port type */
@@ -357,7 +358,7 @@ int uart_resume_port(struct uart_driver *reg, struct uart_port *port);
 static inline int uart_tx_stopped(struct uart_port *port)
 {
 	struct tty_struct *tty = port->state->port.tty;
-	if(tty->stopped || tty->hw_stopped)
+	if (tty->stopped || port->hw_stopped)
 		return 1;
 	return 0;
 }
-- 
cgit v1.2.3


From d7a855bd6ab25d10d5e3b6aeb53d9c57fa17b808 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:30 -0400
Subject: tty: Convert tty_struct bitfield to ints

The stopped, hw_stopped, flow_stopped and packet bits are smp-unsafe
and interrupt-unsafe. For example,

CPU 0                         | CPU 1
                              |
tty->flow_stopped = 1         | tty->hw_stopped = 0

One of these updates will be corrupted, as the bitwise operation
on the bitfield is non-atomic.

Ensure each flag has a separate memory location, so concurrent
updates do not corrupt orthogonal states. Because DEC Alpha EV4 and EV5
cpus (from 1995) perform RMW on smaller-than-machine-word storage,
"separate memory location" must be int instead of byte.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 84132942902a..4cfd4a82fc31 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -261,7 +261,10 @@ struct tty_struct {
 	unsigned long flags;
 	int count;
 	struct winsize winsize;		/* winsize_mutex */
-	unsigned char stopped:1, hw_stopped:1, flow_stopped:1, packet:1;
+	int stopped;
+	int flow_stopped;
+	int hw_stopped;
+	int packet;
 	unsigned char ctrl_status;	/* ctrl_lock */
 	unsigned int receive_room;	/* Bytes free for queue */
 	int flow_change;
-- 
cgit v1.2.3


From f9e053dcfc02b0ad29daec8524fb1afe09774976 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:31 -0400
Subject: tty: Serialize tty flow control changes with flow_lock

Without serialization, the flow control state can become inverted
wrt. the actual hardware state. For example,

CPU 0                          | CPU 1
stop_tty()                     |
  lock ctrl_lock               |
  tty->stopped = 1             |
  unlock ctrl_lock             |
                               | start_tty()
                               |   lock ctrl_lock
                               |   tty->stopped = 0
                               |   unlock ctrl_lock
                               |   driver->start()
  driver->stop()               |

In this case, the flow control state now indicates the tty has
been started, but the actual hardware state has actually been stopped.

Introduce tty->flow_lock spinlock to serialize tty flow control changes.
Split out unlocked __start_tty()/__stop_tty() flavors for use by
ioctl(TCXONC) in follow-on patch.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c       | 39 ++++++++++++++++++++++++++++-----------
 include/linux/tty.h        |  5 ++++-
 include/linux/tty_driver.h |  4 ++++
 3 files changed, 36 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index d4eb2a8b7047..b8ddfef6b5d8 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -919,18 +919,18 @@ void no_tty(void)
  *	but not always.
  *
  *	Locking:
- *		Uses the tty control lock internally
+ *		ctrl_lock
+ *		flow_lock
  */
 
-void stop_tty(struct tty_struct *tty)
+void __stop_tty(struct tty_struct *tty)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&tty->ctrl_lock, flags);
-	if (tty->stopped) {
-		spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+	if (tty->stopped)
 		return;
-	}
 	tty->stopped = 1;
+	spin_lock_irqsave(&tty->ctrl_lock, flags);
 	if (tty->link && tty->link->packet) {
 		tty->ctrl_status &= ~TIOCPKT_START;
 		tty->ctrl_status |= TIOCPKT_STOP;
@@ -941,6 +941,14 @@ void stop_tty(struct tty_struct *tty)
 		(tty->ops->stop)(tty);
 }
 
+void stop_tty(struct tty_struct *tty)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tty->flow_lock, flags);
+	__stop_tty(tty);
+	spin_unlock_irqrestore(&tty->flow_lock, flags);
+}
 EXPORT_SYMBOL(stop_tty);
 
 /**
@@ -954,17 +962,17 @@ EXPORT_SYMBOL(stop_tty);
  *
  *	Locking:
  *		ctrl_lock
+ *		flow_lock
  */
 
-void start_tty(struct tty_struct *tty)
+void __start_tty(struct tty_struct *tty)
 {
 	unsigned long flags;
-	spin_lock_irqsave(&tty->ctrl_lock, flags);
-	if (!tty->stopped || tty->flow_stopped) {
-		spin_unlock_irqrestore(&tty->ctrl_lock, flags);
+
+	if (!tty->stopped || tty->flow_stopped)
 		return;
-	}
 	tty->stopped = 0;
+	spin_lock_irqsave(&tty->ctrl_lock, flags);
 	if (tty->link && tty->link->packet) {
 		tty->ctrl_status &= ~TIOCPKT_STOP;
 		tty->ctrl_status |= TIOCPKT_START;
@@ -977,6 +985,14 @@ void start_tty(struct tty_struct *tty)
 	tty_wakeup(tty);
 }
 
+void start_tty(struct tty_struct *tty)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&tty->flow_lock, flags);
+	__start_tty(tty);
+	spin_unlock_irqrestore(&tty->flow_lock, flags);
+}
 EXPORT_SYMBOL(start_tty);
 
 /* We limit tty time update visibility to every 8 seconds or so. */
@@ -3019,6 +3035,7 @@ struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx)
 	INIT_WORK(&tty->hangup_work, do_tty_hangup);
 	mutex_init(&tty->atomic_write_lock);
 	spin_lock_init(&tty->ctrl_lock);
+	spin_lock_init(&tty->flow_lock);
 	INIT_LIST_HEAD(&tty->tty_files);
 	INIT_WORK(&tty->SAK_work, do_SAK_work);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4cfd4a82fc31..fd4148d3a261 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -252,6 +252,7 @@ struct tty_struct {
 	struct rw_semaphore termios_rwsem;
 	struct mutex winsize_mutex;
 	spinlock_t ctrl_lock;
+	spinlock_t flow_lock;
 	/* Termios values are protected by the termios rwsem */
 	struct ktermios termios, termios_locked;
 	struct termiox *termiox;	/* May be NULL for unsupported */
@@ -261,7 +262,7 @@ struct tty_struct {
 	unsigned long flags;
 	int count;
 	struct winsize winsize;		/* winsize_mutex */
-	int stopped;
+	int stopped;			/* flow_lock */
 	int flow_stopped;
 	int hw_stopped;
 	int packet;
@@ -400,7 +401,9 @@ extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode,
 extern char *tty_name(struct tty_struct *tty, char *buf);
 extern void tty_wait_until_sent(struct tty_struct *tty, long timeout);
 extern int tty_check_change(struct tty_struct *tty);
+extern void __stop_tty(struct tty_struct *tty);
 extern void stop_tty(struct tty_struct *tty);
+extern void __start_tty(struct tty_struct *tty);
 extern void start_tty(struct tty_struct *tty);
 extern int tty_register_driver(struct tty_driver *driver);
 extern int tty_unregister_driver(struct tty_driver *driver);
diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h
index e48c608a8fa8..92e337c18839 100644
--- a/include/linux/tty_driver.h
+++ b/include/linux/tty_driver.h
@@ -152,6 +152,8 @@
  * 	This routine notifies the tty driver that it should stop
  * 	outputting characters to the tty device.  
  *
+ *	Called with ->flow_lock held. Serialized with start() method.
+ *
  *	Optional:
  *
  *	Note: Call stop_tty not this method.
@@ -161,6 +163,8 @@
  * 	This routine notifies the tty driver that it resume sending
  *	characters to the tty device.
  *
+ *	Called with ->flow_lock held. Serialized with stop() method.
+ *
  *	Optional:
  *
  *	Note: Call start_tty not this method.
-- 
cgit v1.2.3


From c545b66c6922b002b5fe224a6eaec58c913650b5 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:33 -0400
Subject: tty: Serialize tcflow() with other tty flow control changes

Use newly-introduced tty->flow_lock to serialize updates to
tty->flow_stopped (via tcflow()) and with concurrent tty flow
control changes from other sources.

Merge the storage for ->stopped and ->flow_stopped, now that both
flags are serialized by ->flow_lock.

The padding bits are necessary to force the compiler to allocate the
type specified; otherwise, gcc will ignore the type specifier and
allocate the minimum number of bytes necessary to store the bitfield.
In turn, this would allow Alpha EV4 and EV5 cpus to corrupt adjacent
byte storage because those cpus use RMW to store byte and short data.

gcc versions < 4.7.2 will also corrupt storage adjacent to bitfields
smaller than unsigned long on ia64, ppc64, hppa64 and sparc64, thus
requiring more than unsigned int storage (which would otherwise be
sufficient to workaround the Alpha non-atomic byte/short storage problem).

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_ioctl.c | 8 ++++++--
 include/linux/tty.h     | 5 +++--
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index cd1285c3bfe9..dcf5c0af7de9 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -1177,16 +1177,20 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 			return retval;
 		switch (arg) {
 		case TCOOFF:
+			spin_lock_irq(&tty->flow_lock);
 			if (!tty->flow_stopped) {
 				tty->flow_stopped = 1;
-				stop_tty(tty);
+				__stop_tty(tty);
 			}
+			spin_unlock_irq(&tty->flow_lock);
 			break;
 		case TCOON:
+			spin_lock_irq(&tty->flow_lock);
 			if (tty->flow_stopped) {
 				tty->flow_stopped = 0;
-				start_tty(tty);
+				__start_tty(tty);
 			}
+			spin_unlock_irq(&tty->flow_lock);
 			break;
 		case TCIOFF:
 			if (STOP_CHAR(tty) != __DISABLED_CHAR)
diff --git a/include/linux/tty.h b/include/linux/tty.h
index fd4148d3a261..d80b53642f2c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -262,8 +262,9 @@ struct tty_struct {
 	unsigned long flags;
 	int count;
 	struct winsize winsize;		/* winsize_mutex */
-	int stopped;			/* flow_lock */
-	int flow_stopped;
+	unsigned long stopped:1,	/* flow_lock */
+		      flow_stopped:1,
+		      unused:62;
 	int hw_stopped;
 	int packet;
 	unsigned char ctrl_status;	/* ctrl_lock */
-- 
cgit v1.2.3


From 136d5258b2bc4ffae99cb69874a76624c26fbfad Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:34 -0400
Subject: tty: Move and rename send_prio_char() as tty_send_xchar()

Relocate the file-scope function, send_prio_char(), as a global
helper tty_send_xchar(). Remove the global declarations for
tty_write_lock()/tty_write_unlock(), as these are file-scope only now.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c    | 33 +++++++++++++++++++++++++++++++--
 drivers/tty/tty_ioctl.c | 33 ++-------------------------------
 include/linux/tty.h     |  3 +--
 3 files changed, 34 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index c249ff1d51ce..2f6f9b5e4891 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1023,14 +1023,14 @@ static ssize_t tty_read(struct file *file, char __user *buf, size_t count,
 	return i;
 }
 
-void tty_write_unlock(struct tty_struct *tty)
+static void tty_write_unlock(struct tty_struct *tty)
 	__releases(&tty->atomic_write_lock)
 {
 	mutex_unlock(&tty->atomic_write_lock);
 	wake_up_interruptible_poll(&tty->write_wait, POLLOUT);
 }
 
-int tty_write_lock(struct tty_struct *tty, int ndelay)
+static int tty_write_lock(struct tty_struct *tty, int ndelay)
 	__acquires(&tty->atomic_write_lock)
 {
 	if (!mutex_trylock(&tty->atomic_write_lock)) {
@@ -1217,6 +1217,35 @@ ssize_t redirected_tty_write(struct file *file, const char __user *buf,
 	return tty_write(file, buf, count, ppos);
 }
 
+/**
+ *	tty_send_xchar	-	send priority character
+ *
+ *	Send a high priority character to the tty even if stopped
+ *
+ *	Locking: none for xchar method, write ordering for write method.
+ */
+
+int tty_send_xchar(struct tty_struct *tty, char ch)
+{
+	int	was_stopped = tty->stopped;
+
+	if (tty->ops->send_xchar) {
+		tty->ops->send_xchar(tty, ch);
+		return 0;
+	}
+
+	if (tty_write_lock(tty, 0) < 0)
+		return -ERESTARTSYS;
+
+	if (was_stopped)
+		start_tty(tty);
+	tty->ops->write(tty, &ch, 1);
+	if (was_stopped)
+		stop_tty(tty);
+	tty_write_unlock(tty);
+	return 0;
+}
+
 static char ptychar[] = "pqrstuvwxyzabcde";
 
 /**
diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c
index dcf5c0af7de9..ad9120d1c0f1 100644
--- a/drivers/tty/tty_ioctl.c
+++ b/drivers/tty/tty_ioctl.c
@@ -911,35 +911,6 @@ static int set_ltchars(struct tty_struct *tty, struct ltchars __user *ltchars)
 }
 #endif
 
-/**
- *	send_prio_char		-	send priority character
- *
- *	Send a high priority character to the tty even if stopped
- *
- *	Locking: none for xchar method, write ordering for write method.
- */
-
-static int send_prio_char(struct tty_struct *tty, char ch)
-{
-	int	was_stopped = tty->stopped;
-
-	if (tty->ops->send_xchar) {
-		tty->ops->send_xchar(tty, ch);
-		return 0;
-	}
-
-	if (tty_write_lock(tty, 0) < 0)
-		return -ERESTARTSYS;
-
-	if (was_stopped)
-		start_tty(tty);
-	tty->ops->write(tty, &ch, 1);
-	if (was_stopped)
-		stop_tty(tty);
-	tty_write_unlock(tty);
-	return 0;
-}
-
 /**
  *	tty_change_softcar	-	carrier change ioctl helper
  *	@tty: tty to update
@@ -1194,11 +1165,11 @@ int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 			break;
 		case TCIOFF:
 			if (STOP_CHAR(tty) != __DISABLED_CHAR)
-				return send_prio_char(tty, STOP_CHAR(tty));
+				return tty_send_xchar(tty, STOP_CHAR(tty));
 			break;
 		case TCION:
 			if (START_CHAR(tty) != __DISABLED_CHAR)
-				return send_prio_char(tty, START_CHAR(tty));
+				return tty_send_xchar(tty, START_CHAR(tty));
 			break;
 		default:
 			return -EINVAL;
diff --git a/include/linux/tty.h b/include/linux/tty.h
index d80b53642f2c..4c6b9fd3c750 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -418,6 +418,7 @@ extern void tty_unregister_device(struct tty_driver *driver, unsigned index);
 extern int tty_read_raw_data(struct tty_struct *tty, unsigned char *bufp,
 			     int buflen);
 extern void tty_write_message(struct tty_struct *tty, char *msg);
+extern int tty_send_xchar(struct tty_struct *tty, char ch);
 extern int tty_put_char(struct tty_struct *tty, unsigned char c);
 extern int tty_chars_in_buffer(struct tty_struct *tty);
 extern int tty_write_room(struct tty_struct *tty);
@@ -502,8 +503,6 @@ extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
 extern struct mutex tty_mutex;
 extern spinlock_t tty_files_lock;
 
-extern void tty_write_unlock(struct tty_struct *tty);
-extern int tty_write_lock(struct tty_struct *tty, int ndelay);
 #define tty_is_writelocked(tty)  (mutex_is_locked(&tty->atomic_write_lock))
 
 extern void tty_port_init(struct tty_port *port);
-- 
cgit v1.2.3


From 99416322dd16b810ba74098cc50ef2a844091d35 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 10 Sep 2014 15:06:36 -0400
Subject: tty: Workaround Alpha non-atomic byte storage in tty_struct

The Alpha EV4/EV5 cpus can corrupt adjacent byte and short data because
those cpus use RMW to store byte and short data. Thus, concurrent adjacent
byte stores could become corrupted, if serialized by a different lock.
tty_struct uses different locks to protect certain fields within the
structure, and thus is vulnerable to byte stores which are not atomic.

Merge the ->ctrl_status byte and packet mode bit, both protected by the
->ctrl_lock, into an unsigned long.

The padding bits are necessary to force the compiler to allocate the
type specified; otherwise, gcc will ignore the type specifier and
allocate the minimum number of bytes required to store the bitfield.
In turn, this would allow Alpha EV4/EV5 cpus to corrupt adjacent byte
or short storage (because those cpus use RMW to store byte and short data).

gcc versions < 4.7.2 will also corrupt storage adjacent to bitfields
smaller than unsigned long on ia64, ppc64, hppa64, and sparc64, thus
requiring more than unsigned int storage (which would otherwise be
sufficient to fix the Alpha non-atomic storage problem).

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 4c6b9fd3c750..546e94557895 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -266,8 +266,9 @@ struct tty_struct {
 		      flow_stopped:1,
 		      unused:62;
 	int hw_stopped;
-	int packet;
-	unsigned char ctrl_status;	/* ctrl_lock */
+	unsigned long ctrl_status:8,	/* ctrl_lock */
+		      packet:1,
+		      unused_ctrl:55;
 	unsigned int receive_room;	/* Bytes free for queue */
 	int flow_change;
 
-- 
cgit v1.2.3


From f2693b430b33d9554daa0f21fbcae57633f8d4f1 Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 19 Aug 2014 09:51:52 +0800
Subject: usb: hcd: add TPL support flag

The targeted hosts (non-PC hosts) need to have TPL (Targeted Peripheral List)
for USB OTG & EH certification and other vendor specific requirements.

The platform who needs TPL feature should set this flag at usb host
controller driver.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/hcd.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 485cd5e2100c..a48c89e96988 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -144,6 +144,7 @@ struct usb_hcd {
 	unsigned		has_tt:1;	/* Integrated TT in root hub */
 	unsigned		amd_resume_bug:1; /* AMD remote wakeup quirk */
 	unsigned		can_do_streams:1; /* HC supports streams */
+	unsigned		tpl_support:1; /* OTG & EH TPL support */
 
 	unsigned int		irq;		/* irq allocated */
 	void __iomem		*regs;		/* device memory/io */
-- 
cgit v1.2.3


From 05f8b35a62efb8e70ebcd78e9c957324e9caddad Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 19 Aug 2014 09:51:55 +0800
Subject: usb: common: add API to get if the platform supports TPL

The TPL (Targeted Peripheral List) is used for targeted hosts
(non-PC hosts), and it can be used at USB OTG & EH certification
and some specific products which need white list.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/common/usb-common.c | 15 +++++++++++++++
 include/linux/usb/of.h          |  5 +++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/common/usb-common.c b/drivers/usb/common/usb-common.c
index 6dfd30a863c7..b530fd403ffb 100644
--- a/drivers/usb/common/usb-common.c
+++ b/drivers/usb/common/usb-common.c
@@ -139,6 +139,21 @@ enum usb_device_speed of_usb_get_maximum_speed(struct device_node *np)
 }
 EXPORT_SYMBOL_GPL(of_usb_get_maximum_speed);
 
+/**
+ * of_usb_host_tpl_support - to get if Targeted Peripheral List is supported
+ * for given targeted hosts (non-PC hosts)
+ * @np: Pointer to the given device_node
+ *
+ * The function gets if the targeted hosts support TPL or not
+ */
+bool of_usb_host_tpl_support(struct device_node *np)
+{
+	if (of_find_property(np, "tpl-support", NULL))
+		return true;
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(of_usb_host_tpl_support);
 #endif
 
 MODULE_LICENSE("GPL");
diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h
index 8c38aa26b3bb..cfe0528cdbb1 100644
--- a/include/linux/usb/of.h
+++ b/include/linux/usb/of.h
@@ -14,6 +14,7 @@
 #if IS_ENABLED(CONFIG_OF)
 enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np);
 enum usb_device_speed of_usb_get_maximum_speed(struct device_node *np);
+bool of_usb_host_tpl_support(struct device_node *np);
 #else
 static inline enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np)
 {
@@ -25,6 +26,10 @@ of_usb_get_maximum_speed(struct device_node *np)
 {
 	return USB_SPEED_UNKNOWN;
 }
+static inline bool of_usb_host_tpl_support(struct device_node *np)
+{
+	return false;
+}
 #endif
 
 #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_USB_SUPPORT)
-- 
cgit v1.2.3


From f6a9ff07832a9d30d457e976e6233b4351cd4cdf Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Tue, 19 Aug 2014 09:51:56 +0800
Subject: usb: chipidea: add TPL support for targeted hosts

For OTG and Embedded hosts, they may need TPL (Targeted Peripheral List)
for usb certification and other vender specific requirements, the
platform can tell chipidea core driver if it supports tpl through DT
or platform data.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/core.c  | 4 ++++
 drivers/usb/chipidea/host.c  | 1 +
 include/linux/usb/chipidea.h | 1 +
 3 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 619d13e29995..41d45a16dd30 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -473,6 +473,10 @@ static int ci_get_platdata(struct device *dev,
 				PTR_ERR(platdata->reg_vbus));
 			return PTR_ERR(platdata->reg_vbus);
 		}
+		/* Get TPL support */
+		if (!platdata->tpl_support)
+			platdata->tpl_support =
+				of_usb_host_tpl_support(dev->of_node);
 	}
 
 	if (of_usb_get_maximum_speed(dev->of_node) == USB_SPEED_FULL)
diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index a93d950e9468..0d6b24cb2270 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -60,6 +60,7 @@ static int host_start(struct ci_hdrc *ci)
 
 	hcd->power_budget = ci->platdata->power_budget;
 	hcd->phy = ci->transceiver;
+	hcd->tpl_support = ci->platdata->tpl_support;
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = ci->hw_bank.cap;
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index bbe779f640be..e14c09a45c5a 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -31,6 +31,7 @@ struct ci_hdrc_platform_data {
 #define CI_HDRC_CONTROLLER_STOPPED_EVENT	1
 	void	(*notify_event) (struct ci_hdrc *ci, unsigned event);
 	struct regulator	*reg_vbus;
+	bool			tpl_support;
 };
 
 /* Default offset of capability registers */
-- 
cgit v1.2.3


From 593078525c8b234a35a36ff551b8716464e86481 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Mon, 15 Sep 2014 16:04:12 +0200
Subject: uas: Add a quirk for rejecting ATA_12 and ATA_16 commands

And set this quirk for the Seagate Expansion Desk (0bc2:2312), as that one
seems to hang upon receiving an ATA_12 or ATA_16 command.

https://bugzilla.kernel.org/show_bug.cgi?id=79511
https://bbs.archlinux.org/viewtopic.php?id=183190

While at it also add missing documentation for the u value for usb-storage
quirks.

Cc: stable@vger.kernel.org # 3.16, 3.17
Signed-off-by: Hans de Goede <hdegoede@redhat.com>

--
Changes in v2: Add documentation for new t and u usb-storage.quirks flags
Changes in v3: Fix typo in documentation
Changes in v4: Also apply the quirk to (0bc2:3312)
Changes in v5: Rebased on 3.17-rc5, drop u documentation, already upstream
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt |  2 ++
 drivers/usb/storage/uas.c           | 13 +++++++++++++
 drivers/usb/storage/unusual_uas.h   | 23 +++++++++++++----------
 drivers/usb/storage/usb.c           |  6 +++++-
 include/linux/usb_usual.h           |  2 ++
 5 files changed, 35 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 10d51c2f10d7..dd4fe9880e4a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3541,6 +3541,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					bogus residue values);
 				s = SINGLE_LUN (the device has only one
 					Logical Unit);
+				t = NO_ATA_1X (don't allow ATA(12) and ATA(16)
+					commands, uas only);
 				u = IGNORE_UAS (don't bind to the uas driver);
 				w = NO_WP_DETECT (don't test whether the
 					medium is write-protected).
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 05b2d8e077d9..8c7d4a239f4c 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -28,6 +28,7 @@
 #include <scsi/scsi_tcq.h>
 
 #include "uas-detect.h"
+#include "scsiglue.h"
 
 /*
  * The r00-r01c specs define this version of the SENSE IU data structure.
@@ -49,6 +50,7 @@ struct uas_dev_info {
 	struct usb_anchor cmd_urbs;
 	struct usb_anchor sense_urbs;
 	struct usb_anchor data_urbs;
+	unsigned long flags;
 	int qdepth, resetting;
 	struct response_iu response;
 	unsigned cmd_pipe, status_pipe, data_in_pipe, data_out_pipe;
@@ -714,6 +716,15 @@ static int uas_queuecommand_lck(struct scsi_cmnd *cmnd,
 
 	BUILD_BUG_ON(sizeof(struct uas_cmd_info) > sizeof(struct scsi_pointer));
 
+	if ((devinfo->flags & US_FL_NO_ATA_1X) &&
+			(cmnd->cmnd[0] == ATA_12 || cmnd->cmnd[0] == ATA_16)) {
+		memcpy(cmnd->sense_buffer, usb_stor_sense_invalidCDB,
+		       sizeof(usb_stor_sense_invalidCDB));
+		cmnd->result = SAM_STAT_CHECK_CONDITION;
+		cmnd->scsi_done(cmnd);
+		return 0;
+	}
+
 	spin_lock_irqsave(&devinfo->lock, flags);
 
 	if (devinfo->resetting) {
@@ -1080,6 +1091,8 @@ static int uas_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	devinfo->resetting = 0;
 	devinfo->running_task = 0;
 	devinfo->shutdown = 0;
+	devinfo->flags = id->driver_info;
+	usb_stor_adjust_quirks(udev, &devinfo->flags);
 	init_usb_anchor(&devinfo->cmd_urbs);
 	init_usb_anchor(&devinfo->sense_urbs);
 	init_usb_anchor(&devinfo->data_urbs);
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 7244444df8ee..3ff2dd4c78ca 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -40,13 +40,16 @@
  * and don't forget to CC: the USB development list <linux-usb@vger.kernel.org>
  */
 
-/*
- * This is an example entry for the US_FL_IGNORE_UAS flag. Once we have an
- * actual entry using US_FL_IGNORE_UAS this entry should be removed.
- *
- * UNUSUAL_DEV(  0xabcd, 0x1234, 0x0100, 0x0100,
- *		"Example",
- *		"Storage with broken UAS",
- *		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
- *		US_FL_IGNORE_UAS),
- */
+/* https://bugzilla.kernel.org/show_bug.cgi?id=79511 */
+UNUSUAL_DEV(0x0bc2, 0x2312, 0x0000, 0x9999,
+		"Seagate",
+		"Expansion Desk",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_ATA_1X),
+
+/* https://bbs.archlinux.org/viewtopic.php?id=183190 */
+UNUSUAL_DEV(0x0bc2, 0x3312, 0x0000, 0x9999,
+		"Seagate",
+		"Expansion Desk",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_ATA_1X),
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index cedb29252a92..b9d1b9357287 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -478,7 +478,8 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 			US_FL_CAPACITY_OK | US_FL_IGNORE_RESIDUE |
 			US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT |
 			US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 |
-			US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE);
+			US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE |
+			US_FL_NO_ATA_1X);
 
 	p = quirks;
 	while (*p) {
@@ -543,6 +544,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 		case 's':
 			f |= US_FL_SINGLE_LUN;
 			break;
+		case 't':
+			f |= US_FL_NO_ATA_1X;
+			break;
 		case 'u':
 			f |= US_FL_IGNORE_UAS;
 			break;
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index 9b7de1b46437..d271f88f30ad 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -73,6 +73,8 @@
 		/* Device advertises UAS but it is broken */	\
 	US_FLAG(BROKEN_FUA,	0x01000000)			\
 		/* Cannot handle FUA in WRITE or READ CDBs */	\
+	US_FLAG(NO_ATA_1X,	0x02000000)			\
+		/* Cannot handle ATA_12 or ATA_16 CDBs */	\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From 734016b00b50a3c6a0e1fc1b7b217e783f5123a1 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Tue, 16 Sep 2014 18:36:52 +0200
Subject: uas: Add no-report-opcodes quirk

Besides the ASM1051 (*) needing sdev->no_report_opcodes = 1, it turns out that
the JMicron JMS567 also needs it to work properly with uas (usb-storage always
sets it). Since some of the scsi devs were not to keen on the idea to
outrightly set sdev->no_report_opcodes = 1 for all uas devices, so add a quirk
for this, and set it for the JMS567.

*) Which has become a non-issue since we've completely blacklisted uas on
the ASM1051 for other reasons

Cc: stable@vger.kernel.org
Reported-and-tested-by: Claudio Bizzarri <claudio.bizzarri@gmail.com>
Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/kernel-parameters.txt | 2 ++
 drivers/usb/storage/uas.c           | 4 ++++
 drivers/usb/storage/unusual_uas.h   | 7 +++++++
 drivers/usb/storage/usb.c           | 5 ++++-
 include/linux/usb_usual.h           | 2 ++
 5 files changed, 19 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index dd4fe9880e4a..1edd5fdc629d 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3522,6 +3522,8 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 					READ_DISC_INFO command);
 				e = NO_READ_CAPACITY_16 (don't use
 					READ_CAPACITY_16 command);
+				f = NO_REPORT_OPCODES (don't use report opcodes
+					command, uas only);
 				h = CAPACITY_HEURISTICS (decrease the
 					reported device capacity by one
 					sector if the number is odd);
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 8c7d4a239f4c..cad02ac88564 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -961,6 +961,10 @@ static int uas_slave_alloc(struct scsi_device *sdev)
 static int uas_slave_configure(struct scsi_device *sdev)
 {
 	struct uas_dev_info *devinfo = sdev->hostdata;
+
+	if (devinfo->flags & US_FL_NO_REPORT_OPCODES)
+		sdev->no_report_opcodes = 1;
+
 	scsi_set_tag_type(sdev, MSG_ORDERED_TAG);
 	scsi_activate_tcq(sdev, devinfo->qdepth - 2);
 	return 0;
diff --git a/drivers/usb/storage/unusual_uas.h b/drivers/usb/storage/unusual_uas.h
index 3ff2dd4c78ca..3e6243719df8 100644
--- a/drivers/usb/storage/unusual_uas.h
+++ b/drivers/usb/storage/unusual_uas.h
@@ -53,3 +53,10 @@ UNUSUAL_DEV(0x0bc2, 0x3312, 0x0000, 0x9999,
 		"Expansion Desk",
 		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
 		US_FL_NO_ATA_1X),
+
+/* Reported-by: Claudio Bizzarri <claudio.bizzarri@gmail.com> */
+UNUSUAL_DEV(0x152d, 0x0567, 0x0000, 0x9999,
+		"JMicron",
+		"JMS567",
+		USB_SC_DEVICE, USB_PR_DEVICE, NULL,
+		US_FL_NO_REPORT_OPCODES),
diff --git a/drivers/usb/storage/usb.c b/drivers/usb/storage/usb.c
index b9d1b9357287..f60e7d463636 100644
--- a/drivers/usb/storage/usb.c
+++ b/drivers/usb/storage/usb.c
@@ -479,7 +479,7 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 			US_FL_SINGLE_LUN | US_FL_NO_WP_DETECT |
 			US_FL_NO_READ_DISC_INFO | US_FL_NO_READ_CAPACITY_16 |
 			US_FL_INITIAL_READ10 | US_FL_WRITE_CACHE |
-			US_FL_NO_ATA_1X);
+			US_FL_NO_ATA_1X | US_FL_NO_REPORT_OPCODES);
 
 	p = quirks;
 	while (*p) {
@@ -517,6 +517,9 @@ void usb_stor_adjust_quirks(struct usb_device *udev, unsigned long *fflags)
 		case 'e':
 			f |= US_FL_NO_READ_CAPACITY_16;
 			break;
+		case 'f':
+			f |= US_FL_NO_REPORT_OPCODES;
+			break;
 		case 'h':
 			f |= US_FL_CAPACITY_HEURISTICS;
 			break;
diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h
index d271f88f30ad..a7f2604c5f25 100644
--- a/include/linux/usb_usual.h
+++ b/include/linux/usb_usual.h
@@ -75,6 +75,8 @@
 		/* Cannot handle FUA in WRITE or READ CDBs */	\
 	US_FLAG(NO_ATA_1X,	0x02000000)			\
 		/* Cannot handle ATA_12 or ATA_16 CDBs */	\
+	US_FLAG(NO_REPORT_OPCODES,	0x04000000)		\
+		/* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */	\
 
 #define US_FLAG(name, value)	US_FL_##name = value ,
 enum { US_DO_ALL_FLAGS };
-- 
cgit v1.2.3


From ddbe1fca0bcb87ca8c199ea873a456ca8a948567 Mon Sep 17 00:00:00 2001
From: Lu Baolu <baolu.lu@linux.intel.com>
Date: Fri, 19 Sep 2014 10:13:50 +0800
Subject: USB: Add device quirk for ASUS T100 Base Station keyboard

This full-speed USB device generates spurious remote wakeup event
as soon as USB_DEVICE_REMOTE_WAKEUP feature is set. As the result,
Linux can't enter system suspend and S0ix power saving modes once
this keyboard is used.

This patch tries to introduce USB_QUIRK_IGNORE_REMOTE_WAKEUP quirk.
With this quirk set, wakeup capability will be ignored during
device configure.

This patch could be back-ported to kernels as old as 2.6.39.

Signed-off-by: Lu Baolu <baolu.lu@linux.intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Cc: stable <stable@vger.kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hub.c     | 6 ++++--
 drivers/usb/core/quirks.c  | 4 ++++
 include/linux/usb/quirks.h | 3 +++
 3 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index db1d587cbb95..d5419292f89f 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1984,8 +1984,10 @@ void usb_set_device_state(struct usb_device *udev,
 					|| new_state == USB_STATE_SUSPENDED)
 				;	/* No change to wakeup settings */
 			else if (new_state == USB_STATE_CONFIGURED)
-				wakeup = udev->actconfig->desc.bmAttributes
-					 & USB_CONFIG_ATT_WAKEUP;
+				wakeup = (udev->quirks &
+					USB_QUIRK_IGNORE_REMOTE_WAKEUP) ? 0 :
+					udev->actconfig->desc.bmAttributes &
+					USB_CONFIG_ATT_WAKEUP;
 			else
 				wakeup = 0;
 		}
diff --git a/drivers/usb/core/quirks.c b/drivers/usb/core/quirks.c
index a342a783d496..5ae883dc21f5 100644
--- a/drivers/usb/core/quirks.c
+++ b/drivers/usb/core/quirks.c
@@ -163,6 +163,10 @@ static const struct usb_device_id usb_quirk_list[] = {
 	/* USB3503 */
 	{ USB_DEVICE(0x0424, 0x3503), .driver_info = USB_QUIRK_RESET_RESUME },
 
+	/* ASUS Base Station(T100) */
+	{ USB_DEVICE(0x0b05, 0x17e0), .driver_info =
+			USB_QUIRK_IGNORE_REMOTE_WAKEUP },
+
 	{ }  /* terminating entry must be last */
 };
 
diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index ffe565c94743..a4abaeb3fb00 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -44,4 +44,7 @@
 /* device can't handle device_qualifier descriptor requests */
 #define USB_QUIRK_DEVICE_QUALIFIER	0x00000100
 
+/* device generates spurious wakeup, ignore remote wakeup capability */
+#define USB_QUIRK_IGNORE_REMOTE_WAKEUP	0x00000200
+
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 5cb307c4c27a9f37ef0c8e6bedc8c53c9197f48f Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Tue, 23 Sep 2014 22:23:55 -0700
Subject: USB: quirks.h: use BIT()

Use the BIT macro instead of "open coding" bit fields.  This makes it
easier to actually see that the bits are not conflicting/overlapping.

Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/quirks.h | 20 ++++++++++----------
 1 file changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h
index a4abaeb3fb00..9948c874e3f1 100644
--- a/include/linux/usb/quirks.h
+++ b/include/linux/usb/quirks.h
@@ -8,27 +8,27 @@
 #define __LINUX_USB_QUIRKS_H
 
 /* string descriptors must not be fetched using a 255-byte read */
-#define USB_QUIRK_STRING_FETCH_255	0x00000001
+#define USB_QUIRK_STRING_FETCH_255		BIT(0)
 
 /* device can't resume correctly so reset it instead */
-#define USB_QUIRK_RESET_RESUME		0x00000002
+#define USB_QUIRK_RESET_RESUME			BIT(1)
 
 /* device can't handle Set-Interface requests */
-#define USB_QUIRK_NO_SET_INTF		0x00000004
+#define USB_QUIRK_NO_SET_INTF			BIT(2)
 
 /* device can't handle its Configuration or Interface strings */
-#define USB_QUIRK_CONFIG_INTF_STRINGS	0x00000008
+#define USB_QUIRK_CONFIG_INTF_STRINGS		BIT(3)
 
 /* device can't be reset(e.g morph devices), don't use reset */
-#define USB_QUIRK_RESET			0x00000010
+#define USB_QUIRK_RESET				BIT(4)
 
 /* device has more interface descriptions than the bNumInterfaces count,
    and can't handle talking to these interfaces */
-#define USB_QUIRK_HONOR_BNUMINTERFACES	0x00000020
+#define USB_QUIRK_HONOR_BNUMINTERFACES		BIT(5)
 
 /* device needs a pause during initialization, after we read the device
    descriptor */
-#define USB_QUIRK_DELAY_INIT		0x00000040
+#define USB_QUIRK_DELAY_INIT			BIT(6)
 
 /*
  * For high speed and super speed interupt endpoints, the USB 2.0 and
@@ -39,12 +39,12 @@
  * Devices with this quirk report their bInterval as the result of this
  * calculation instead of the exponent variable used in the calculation.
  */
-#define USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL	0x00000080
+#define USB_QUIRK_LINEAR_UFRAME_INTR_BINTERVAL	BIT(7)
 
 /* device can't handle device_qualifier descriptor requests */
-#define USB_QUIRK_DEVICE_QUALIFIER	0x00000100
+#define USB_QUIRK_DEVICE_QUALIFIER		BIT(8)
 
 /* device generates spurious wakeup, ignore remote wakeup capability */
-#define USB_QUIRK_IGNORE_REMOTE_WAKEUP	0x00000200
+#define USB_QUIRK_IGNORE_REMOTE_WAKEUP		BIT(9)
 
 #endif /* __LINUX_USB_QUIRKS_H */
-- 
cgit v1.2.3


From 833c95456a70826d1384883b73fd23aff24d366f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 12 Sep 2014 09:01:56 +0200
Subject: device coredump: add new device coredump class

Many devices run firmware and/or complex hardware, and most of that
can have bugs. When it misbehaves, however, it is often much harder
to debug than software running on the host.

Introduce a "device coredump" mechanism to allow dumping internal
device/firmware state through a generalized mechanism. As devices
are different and information needed can vary accordingly, this
doesn't prescribe a file format - it just provides mechanism to
get data to be able to capture it in a generalized way (e.g. in
distributions.)

The dumped data will be readable in sysfs in the virtual device's
data file under /sys/class/devcoredump/devcd*/. Writing to it will
free the data and remove the device, as does a 5-minute timeout.

Note that generalized capturing of such data may result in privacy
issues, so users generally need to be involved. In order to allow
certain users/system integrators/... to disable the feature at all,
introduce a Kconfig option to override the drivers that would like
to have the feature.

For now, this provides two ways of dumping data:
 1) with a vmalloc'ed area, that is then given to the subsystem
    and freed after retrieval or timeout
 2) with a generalized reader/free function method

We could/should add more options, e.g. a list of pages, since the
vmalloc area is very limited on some architectures.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS                 |   7 ++
 drivers/base/Kconfig        |  21 ++++
 drivers/base/Makefile       |   1 +
 drivers/base/devcoredump.c  | 265 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/devcoredump.h |  35 ++++++
 5 files changed, 329 insertions(+)
 create mode 100644 drivers/base/devcoredump.c
 create mode 100644 include/linux/devcoredump.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 809ecd680d88..d810a6fa8276 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2859,6 +2859,13 @@ T:	git git://git.kernel.org/pub/scm/linux/kernel/git/balbi/usb.git
 S:	Maintained
 F:	drivers/usb/dwc3/
 
+DEVICE COREDUMP (DEV_COREDUMP)
+M:	Johannes Berg <johannes@sipsolutions.net>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/base/devcoredump.c
+F:	include/linux/devcoredump.h
+
 DEVICE FREQUENCY (DEVFREQ)
 M:	MyungJoo Ham <myungjoo.ham@samsung.com>
 M:	Kyungmin Park <kyungmin.park@samsung.com>
diff --git a/drivers/base/Kconfig b/drivers/base/Kconfig
index 4e7f0ff83ae7..134f763d90fd 100644
--- a/drivers/base/Kconfig
+++ b/drivers/base/Kconfig
@@ -165,6 +165,27 @@ config FW_LOADER_USER_HELPER_FALLBACK
 
 	  If you are unsure about this, say N here.
 
+config WANT_DEV_COREDUMP
+	bool
+	help
+	  Drivers should "select" this option if they desire to use the
+	  device coredump mechanism.
+
+config DISABLE_DEV_COREDUMP
+	bool "Disable device coredump" if EXPERT
+	help
+	  Disable the device coredump mechanism despite drivers wanting to
+	  use it; this allows for more sensitive systems or systems that
+	  don't want to ever access the information to not have the code,
+	  nor keep any data.
+
+	  If unsure, say N.
+
+config DEV_COREDUMP
+	bool
+	default y if WANT_DEV_COREDUMP
+	depends on !DISABLE_DEV_COREDUMP
+
 config DEBUG_DRIVER
 	bool "Driver Core verbose debug messages"
 	depends on DEBUG_KERNEL
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 4aab26ec0292..6922cd6850a2 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -21,6 +21,7 @@ obj-$(CONFIG_SYS_HYPERVISOR) += hypervisor.o
 obj-$(CONFIG_REGMAP)	+= regmap/
 obj-$(CONFIG_SOC_BUS) += soc.o
 obj-$(CONFIG_PINCTRL) += pinctrl.o
+obj-$(CONFIG_DEV_COREDUMP) += devcoredump.o
 
 ccflags-$(CONFIG_DEBUG_DRIVER) := -DDEBUG
 
diff --git a/drivers/base/devcoredump.c b/drivers/base/devcoredump.c
new file mode 100644
index 000000000000..96614b04544c
--- /dev/null
+++ b/drivers/base/devcoredump.c
@@ -0,0 +1,265 @@
+/*
+ * This file is provided under the GPLv2 license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2014 Intel Mobile Communications GmbH
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <ilw@linux.intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * Author: Johannes Berg <johannes@sipsolutions.net>
+ */
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/devcoredump.h>
+#include <linux/list.h>
+#include <linux/slab.h>
+#include <linux/fs.h>
+#include <linux/workqueue.h>
+
+/* if data isn't read by userspace after 5 minutes then delete it */
+#define DEVCD_TIMEOUT	(HZ * 60 * 5)
+
+struct devcd_entry {
+	struct device devcd_dev;
+	const void *data;
+	size_t datalen;
+	struct module *owner;
+	ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+			const void *data, size_t datalen);
+	void (*free)(const void *data);
+	struct delayed_work del_wk;
+	struct device *failing_dev;
+};
+
+static struct devcd_entry *dev_to_devcd(struct device *dev)
+{
+	return container_of(dev, struct devcd_entry, devcd_dev);
+}
+
+static void devcd_dev_release(struct device *dev)
+{
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	devcd->free(devcd->data);
+	module_put(devcd->owner);
+
+	/*
+	 * this seems racy, but I don't see a notifier or such on
+	 * a struct device to know when it goes away?
+	 */
+	if (devcd->failing_dev->kobj.sd)
+		sysfs_delete_link(&devcd->failing_dev->kobj, &dev->kobj,
+				  "devcoredump");
+
+	put_device(devcd->failing_dev);
+	kfree(devcd);
+}
+
+static void devcd_del(struct work_struct *wk)
+{
+	struct devcd_entry *devcd;
+
+	devcd = container_of(wk, struct devcd_entry, del_wk.work);
+
+	device_del(&devcd->devcd_dev);
+	put_device(&devcd->devcd_dev);
+}
+
+static ssize_t devcd_data_read(struct file *filp, struct kobject *kobj,
+			       struct bin_attribute *bin_attr,
+			       char *buffer, loff_t offset, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	return devcd->read(buffer, offset, count, devcd->data, devcd->datalen);
+}
+
+static ssize_t devcd_data_write(struct file *filp, struct kobject *kobj,
+				struct bin_attribute *bin_attr,
+				char *buffer, loff_t offset, size_t count)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	mod_delayed_work(system_wq, &devcd->del_wk, 0);
+
+	return count;
+}
+
+static struct bin_attribute devcd_attr_data = {
+	.attr = { .name = "data", .mode = S_IRUSR | S_IWUSR, },
+	.size = 0,
+	.read = devcd_data_read,
+	.write = devcd_data_write,
+};
+
+static struct bin_attribute *devcd_dev_bin_attrs[] = {
+	&devcd_attr_data, NULL,
+};
+
+static const struct attribute_group devcd_dev_group = {
+	.bin_attrs = devcd_dev_bin_attrs,
+};
+
+static const struct attribute_group *devcd_dev_groups[] = {
+	&devcd_dev_group, NULL,
+};
+
+static struct class devcd_class = {
+	.name		= "devcoredump",
+	.owner		= THIS_MODULE,
+	.dev_release	= devcd_dev_release,
+	.dev_groups	= devcd_dev_groups,
+};
+
+static ssize_t devcd_readv(char *buffer, loff_t offset, size_t count,
+			   const void *data, size_t datalen)
+{
+	if (offset > datalen)
+		return -EINVAL;
+
+	if (offset + count > datalen)
+		count = datalen - offset;
+
+	if (count)
+		memcpy(buffer, ((u8 *)data) + offset, count);
+
+	return count;
+}
+
+/**
+ * dev_coredumpv - create device coredump with vmalloc data
+ * @dev: the struct device for the crashed device
+ * @data: vmalloc data containing the device coredump
+ * @datalen: length of the data
+ * @gfp: allocation flags
+ *
+ * This function takes ownership of the vmalloc'ed data and will free
+ * it when it is no longer used. See dev_coredumpm() for more information.
+ */
+void dev_coredumpv(struct device *dev, const void *data, size_t datalen,
+		   gfp_t gfp)
+{
+	dev_coredumpm(dev, NULL, data, datalen, gfp, devcd_readv, vfree);
+}
+EXPORT_SYMBOL_GPL(dev_coredumpv);
+
+static int devcd_match_failing(struct device *dev, const void *failing)
+{
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	return devcd->failing_dev == failing;
+}
+
+/**
+ * dev_coredumpm - create device coredump with read/free methods
+ * @dev: the struct device for the crashed device
+ * @owner: the module that contains the read/free functions, use %THIS_MODULE
+ * @data: data cookie for the @read/@free functions
+ * @datalen: length of the data
+ * @gfp: allocation flags
+ * @read: function to read from the given buffer
+ * @free: function to free the given buffer
+ *
+ * Creates a new device coredump for the given device. If a previous one hasn't
+ * been read yet, the new coredump is discarded. The data lifetime is determined
+ * by the device coredump framework and when it is no longer needed the @free
+ * function will be called to free the data.
+ */
+void dev_coredumpm(struct device *dev, struct module *owner,
+		   const void *data, size_t datalen, gfp_t gfp,
+		   ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+				   const void *data, size_t datalen),
+		   void (*free)(const void *data))
+{
+	static atomic_t devcd_count = ATOMIC_INIT(0);
+	struct devcd_entry *devcd;
+	struct device *existing;
+
+	existing = class_find_device(&devcd_class, NULL, dev,
+				     devcd_match_failing);
+	if (existing) {
+		put_device(existing);
+		goto free;
+	}
+
+	if (!try_module_get(owner))
+		goto free;
+
+	devcd = kzalloc(sizeof(*devcd), gfp);
+	if (!devcd)
+		goto put_module;
+
+	devcd->owner = owner;
+	devcd->data = data;
+	devcd->datalen = datalen;
+	devcd->read = read;
+	devcd->free = free;
+	devcd->failing_dev = get_device(dev);
+
+	device_initialize(&devcd->devcd_dev);
+
+	dev_set_name(&devcd->devcd_dev, "devcd%d",
+		     atomic_inc_return(&devcd_count));
+	devcd->devcd_dev.class = &devcd_class;
+
+	if (device_add(&devcd->devcd_dev))
+		goto put_device;
+
+	if (sysfs_create_link(&devcd->devcd_dev.kobj, &dev->kobj,
+			      "failing_device"))
+		/* nothing - symlink will be missing */;
+
+	if (sysfs_create_link(&dev->kobj, &devcd->devcd_dev.kobj,
+			      "devcoredump"))
+		/* nothing - symlink will be missing */;
+
+	INIT_DELAYED_WORK(&devcd->del_wk, devcd_del);
+	schedule_delayed_work(&devcd->del_wk, DEVCD_TIMEOUT);
+
+	return;
+ put_device:
+	put_device(&devcd->devcd_dev);
+ put_module:
+	module_put(owner);
+ free:
+	free(data);
+}
+EXPORT_SYMBOL_GPL(dev_coredumpm);
+
+static int __init devcoredump_init(void)
+{
+	return class_register(&devcd_class);
+}
+__initcall(devcoredump_init);
+
+static int devcd_free(struct device *dev, void *data)
+{
+	struct devcd_entry *devcd = dev_to_devcd(dev);
+
+	flush_delayed_work(&devcd->del_wk);
+	return 0;
+}
+
+static void __exit devcoredump_exit(void)
+{
+	class_for_each_device(&devcd_class, NULL, NULL, devcd_free);
+	class_unregister(&devcd_class);
+}
+__exitcall(devcoredump_exit);
diff --git a/include/linux/devcoredump.h b/include/linux/devcoredump.h
new file mode 100644
index 000000000000..c0a360e99f64
--- /dev/null
+++ b/include/linux/devcoredump.h
@@ -0,0 +1,35 @@
+#ifndef __DEVCOREDUMP_H
+#define __DEVCOREDUMP_H
+
+#include <linux/device.h>
+#include <linux/module.h>
+#include <linux/vmalloc.h>
+
+#ifdef CONFIG_DEV_COREDUMP
+void dev_coredumpv(struct device *dev, const void *data, size_t datalen,
+		   gfp_t gfp);
+
+void dev_coredumpm(struct device *dev, struct module *owner,
+		   const void *data, size_t datalen, gfp_t gfp,
+		   ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+				   const void *data, size_t datalen),
+		   void (*free)(const void *data));
+#else
+static inline void dev_coredumpv(struct device *dev, const void *data,
+				 size_t datalen, gfp_t gfp)
+{
+	vfree(data);
+}
+
+static inline void
+dev_coredumpm(struct device *dev, struct module *owner,
+	      const void *data, size_t datalen, gfp_t gfp,
+	      ssize_t (*read)(char *buffer, loff_t offset, size_t count,
+			      const void *data, size_t datalen),
+	      void (*free)(const void *data))
+{
+	free(data);
+}
+#endif /* CONFIG_DEV_COREDUMP */
+
+#endif /* __DEVCOREDUMP_H */
-- 
cgit v1.2.3


From 36c53b3cc3fac6952af68f43609b15ae050c9318 Mon Sep 17 00:00:00 2001
From: Federico Vaga <federico.vaga@cern.ch>
Date: Tue, 2 Sep 2014 17:31:40 +0200
Subject: ipack: save carrier owner to allow device to get it

There was not any kind of protection against carrier driver removal.
In this way, device driver can 'get' the carrier driver when it is
using it.

Signed-off-by: Federico Vaga <federico.vaga@cern.ch>
Acked-by: Samuel Iglesias Gonsalvez <siglesias@igalia.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/ipack/carriers/tpci200.c |  3 ++-
 drivers/ipack/ipack.c            |  4 +++-
 include/linux/ipack.h            | 24 +++++++++++++++++++++++-
 3 files changed, 28 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ipack/carriers/tpci200.c b/drivers/ipack/carriers/tpci200.c
index de5e32151a1e..9b23843dcad4 100644
--- a/drivers/ipack/carriers/tpci200.c
+++ b/drivers/ipack/carriers/tpci200.c
@@ -572,7 +572,8 @@ static int tpci200_pci_probe(struct pci_dev *pdev,
 	/* Register the carrier in the industry pack bus driver */
 	tpci200->info->ipack_bus = ipack_bus_register(&pdev->dev,
 						      TPCI200_NB_SLOT,
-						      &tpci200_bus_ops);
+						      &tpci200_bus_ops,
+						      THIS_MODULE);
 	if (!tpci200->info->ipack_bus) {
 		dev_err(&pdev->dev,
 			"error registering the carrier on ipack driver\n");
diff --git a/drivers/ipack/ipack.c b/drivers/ipack/ipack.c
index d0016ba469ed..c0e7b624ce54 100644
--- a/drivers/ipack/ipack.c
+++ b/drivers/ipack/ipack.c
@@ -206,7 +206,8 @@ static struct bus_type ipack_bus_type = {
 };
 
 struct ipack_bus_device *ipack_bus_register(struct device *parent, int slots,
-					    const struct ipack_bus_ops *ops)
+					    const struct ipack_bus_ops *ops,
+					    struct module *owner)
 {
 	int bus_nr;
 	struct ipack_bus_device *bus;
@@ -225,6 +226,7 @@ struct ipack_bus_device *ipack_bus_register(struct device *parent, int slots,
 	bus->parent = parent;
 	bus->slots = slots;
 	bus->ops = ops;
+	bus->owner = owner;
 	return bus;
 }
 EXPORT_SYMBOL_GPL(ipack_bus_register);
diff --git a/include/linux/ipack.h b/include/linux/ipack.h
index 1888e06ddf64..8bddc3fbdddf 100644
--- a/include/linux/ipack.h
+++ b/include/linux/ipack.h
@@ -172,6 +172,7 @@ struct ipack_bus_ops {
  *	@ops: bus operations for the mezzanine drivers
  */
 struct ipack_bus_device {
+	struct module *owner;
 	struct device *parent;
 	int slots;
 	int bus_nr;
@@ -189,7 +190,8 @@ struct ipack_bus_device {
  * available bus device in ipack.
  */
 struct ipack_bus_device *ipack_bus_register(struct device *parent, int slots,
-					    const struct ipack_bus_ops *ops);
+					    const struct ipack_bus_ops *ops,
+					    struct module *owner);
 
 /**
  *	ipack_bus_unregister -- unregister an ipack bus
@@ -265,3 +267,23 @@ void ipack_put_device(struct ipack_device *dev);
 	 .format = (_format), \
 	 .vendor = (vend), \
 	 .device = (dev)
+
+/**
+ * ipack_get_carrier - it increase the carrier ref. counter of
+ *                     the carrier module
+ * @dev: mezzanine device which wants to get the carrier
+ */
+static inline int ipack_get_carrier(struct ipack_device *dev)
+{
+	return try_module_get(dev->bus->owner);
+}
+
+/**
+ * ipack_get_carrier - it decrease the carrier ref. counter of
+ *                     the carrier module
+ * @dev: mezzanine device which wants to get the carrier
+ */
+static inline void ipack_put_carrier(struct ipack_device *dev)
+{
+	module_put(dev->bus->owner);
+}
-- 
cgit v1.2.3


From ffdbb715fa0c53203b1ea2a6ecc54bdcc8951612 Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Thu, 28 Aug 2014 14:14:09 +0100
Subject: misc: st_kim: Increase size of dev_name buffer to incorporate
 termination

Calling strncpy with a maximum size argument of 32 bytes on destination
array kim_gdata->dev_name of size 32 bytes might leave the destination
string unterminated.

Signed-off-by: Lee Jones <lee.jones@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/ti_wilink_st.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 932b76392248..884d6263e962 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -268,7 +268,7 @@ struct kim_data_s {
 	struct st_data_s *core_data;
 	struct chip_version version;
 	unsigned char ldisc_install;
-	unsigned char dev_name[UART_DEV_NAME_LEN];
+	unsigned char dev_name[UART_DEV_NAME_LEN + 1];
 	unsigned char flow_cntrl;
 	unsigned long baud_rate;
 };
-- 
cgit v1.2.3


From bef59c5024be687ef4f228915af9961307a779ab Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Wed, 20 Aug 2014 15:26:35 +0200
Subject: devres: Improve devm_kasprintf()/kvasprintf() support

  - Add devm_kasprintf()/kvasprintf(), introduced by commit
    75f2a4ead5d5890ada9c2663a70fb58613c0d9f2 ("devres: Add
    devm_kasprintf and devm_kvasprintf API"), to
    Documentation/driver-model/devres.txt,
  - Improve kernel doc: the string is not an existing formatted string,
    but is formatted into the newly-allocated buffer,
  - Add a __printf() annotation to devm_kasprintf(), so the compiler
    will verify the format string argument types.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/driver-model/devres.txt |  2 ++
 drivers/base/devres.c                 | 15 ++++++++-------
 include/linux/device.h                |  4 ++--
 3 files changed, 12 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/driver-model/devres.txt b/Documentation/driver-model/devres.txt
index ffa2d0e67a5d..40677443c0c5 100644
--- a/Documentation/driver-model/devres.txt
+++ b/Documentation/driver-model/devres.txt
@@ -293,12 +293,14 @@ MDIO
 MEM
   devm_free_pages()
   devm_get_free_pages()
+  devm_kasprintf()
   devm_kcalloc()
   devm_kfree()
   devm_kmalloc()
   devm_kmalloc_array()
   devm_kmemdup()
   devm_kstrdup()
+  devm_kvasprintf()
   devm_kzalloc()
 
 PCI
diff --git a/drivers/base/devres.c b/drivers/base/devres.c
index 69d9b0c89a01..c8a53d1e019f 100644
--- a/drivers/base/devres.c
+++ b/drivers/base/devres.c
@@ -817,13 +817,13 @@ char *devm_kstrdup(struct device *dev, const char *s, gfp_t gfp)
 EXPORT_SYMBOL_GPL(devm_kstrdup);
 
 /**
- * devm_kvasprintf - Allocate resource managed space
- *			for the formatted string.
+ * devm_kvasprintf - Allocate resource managed space and format a string
+ *		     into that.
  * @dev: Device to allocate memory for
  * @gfp: the GFP mask used in the devm_kmalloc() call when
  *       allocating memory
- * @fmt: the formatted string to duplicate
- * @ap: the list of tokens to be placed in the formatted string
+ * @fmt: The printf()-style format string
+ * @ap: Arguments for the format string
  * RETURNS:
  * Pointer to allocated string on success, NULL on failure.
  */
@@ -849,12 +849,13 @@ char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt,
 EXPORT_SYMBOL(devm_kvasprintf);
 
 /**
- * devm_kasprintf - Allocate resource managed space
- *		and copy an existing formatted string into that
+ * devm_kasprintf - Allocate resource managed space and format a string
+ *		    into that.
  * @dev: Device to allocate memory for
  * @gfp: the GFP mask used in the devm_kmalloc() call when
  *       allocating memory
- * @fmt: the string to duplicate
+ * @fmt: The printf()-style format string
+ * @...: Arguments for the format string
  * RETURNS:
  * Pointer to allocated string on success, NULL on failure.
  */
diff --git a/include/linux/device.h b/include/linux/device.h
index 43d183aeb25b..a608e237f0a8 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -607,8 +607,8 @@ extern int devres_release_group(struct device *dev, void *id);
 extern void *devm_kmalloc(struct device *dev, size_t size, gfp_t gfp);
 extern char *devm_kvasprintf(struct device *dev, gfp_t gfp, const char *fmt,
 			     va_list ap);
-extern char *devm_kasprintf(struct device *dev, gfp_t gfp,
-			    const char *fmt, ...);
+extern __printf(3, 4)
+char *devm_kasprintf(struct device *dev, gfp_t gfp, const char *fmt, ...);
 static inline void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp)
 {
 	return devm_kmalloc(dev, size, gfp | __GFP_ZERO);
-- 
cgit v1.2.3


From 295494af0695bc190e6b939df1036af898c2856f Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Fri, 19 Sep 2014 23:22:44 +0300
Subject: gpiolib: add irq_not_threaded flag to gpio_chip

Some GPIO chips (e.g. the DLN2 USB adapter) have blocking get/set
operation but do not need a threaded irq handler.

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib.c      | 2 +-
 include/linux/gpio/driver.h | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 4acf8b2e9226..6fdae789ccc9 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -437,7 +437,7 @@ static int gpiochip_irq_map(struct irq_domain *d, unsigned int irq,
 	irq_set_lockdep_class(irq, &gpiochip_irq_lock_class);
 	irq_set_chip_and_handler(irq, chip->irqchip, chip->irq_handler);
 	/* Chips that can sleep need nested thread handlers */
-	if (chip->can_sleep)
+	if (chip->can_sleep && !chip->irq_not_threaded)
 		irq_set_nested_thread(irq, 1);
 #ifdef CONFIG_ARM
 	set_irq_flags(irq, IRQF_VALID);
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 560bf7fa614f..719fab209158 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -56,6 +56,8 @@ struct seq_file;
  *	as the chip access may sleep when e.g. reading out the IRQ status
  *	registers.
  * @exported: flags if the gpiochip is exported for use from sysfs. Private.
+ * @irq_not_threaded: flag must be set if @can_sleep is set but the
+ *	IRQs don't need to be threaded
  *
  * A gpio_chip can help platforms abstract various sources of GPIOs so
  * they can all be accessed through a common programing interface.
@@ -101,6 +103,7 @@ struct gpio_chip {
 	struct gpio_desc	*desc;
 	const char		*const *names;
 	bool			can_sleep;
+	bool			irq_not_threaded;
 	bool			exported;
 
 #ifdef CONFIG_GPIOLIB_IRQCHIP
-- 
cgit v1.2.3


From 8af465db967bf25a4617416c0cbaaaa506d444f5 Mon Sep 17 00:00:00 2001
From: Roger Tseng <rogerable@realtek.com>
Date: Wed, 24 Sep 2014 17:07:13 +0800
Subject: mmc: core: Add new power_mode MMC_POWER_UNDEFINED

Add MMC_POWER_UNDEFINED for power_mode in struct mmc_ios and use it as
the initial value of host->ios.power_mode.

For hosts with MMC_CAP2_NO_PRESCAN_POWERUP, this makes the later
mmc_power_off() do real power-off things instead of NOP, and further
prevents state messed up in cards that was already initialized (eg. by
BIOS of UEFI driver).

Signed-off-by: Roger Tseng <rogerable@realtek.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/core.c  | 1 +
 include/linux/mmc/host.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/core.c b/drivers/mmc/core/core.c
index b1e209f479ad..cbb23215ad87 100644
--- a/drivers/mmc/core/core.c
+++ b/drivers/mmc/core/core.c
@@ -2482,6 +2482,7 @@ void mmc_start_host(struct mmc_host *host)
 {
 	host->f_init = max(freqs[0], host->f_min);
 	host->rescan_disable = 0;
+	host->ios.power_mode = MMC_POWER_UNDEFINED;
 	if (host->caps2 & MMC_CAP2_NO_PRESCAN_POWERUP)
 		mmc_power_off(host);
 	else
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index 797ae657dc3d..df0c15396bbf 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -42,6 +42,7 @@ struct mmc_ios {
 #define MMC_POWER_OFF		0
 #define MMC_POWER_UP		1
 #define MMC_POWER_ON		2
+#define MMC_POWER_UNDEFINED	3
 
 	unsigned char	bus_width;		/* data bus width */
 
-- 
cgit v1.2.3


From db4fa45ed3182d8206af241811dfc99369ffa849 Mon Sep 17 00:00:00 2001
From: Anders Berg <anders.berg@avagotech.com>
Date: Wed, 17 Sep 2014 08:46:58 +0200
Subject: spi: pl022: Add support for chip select extension

Add support for a extended PL022 which has an extra register for controlling up
to five chip select signals. This controller is found on the AXM5516 SoC.
Unfortunately the PrimeCell identification registers are identical to a
standard ARM PL022. To work around this, the peripheral ID must be overridden
in the device tree using the "arm,primecell-periphid" property with the value
0x000b6022.

Signed-off-by: Anders Berg <anders.berg@avagotech.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pl022.c  | 59 +++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/amba/bus.h |  5 ++++
 2 files changed, 63 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index 7f13f3f7198b..d0741b2eabb4 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -82,6 +82,7 @@
 #define SSP_MIS(r)	(r + 0x01C)
 #define SSP_ICR(r)	(r + 0x020)
 #define SSP_DMACR(r)	(r + 0x024)
+#define SSP_CSR(r)	(r + 0x030) /* vendor extension */
 #define SSP_ITCR(r)	(r + 0x080)
 #define SSP_ITIP(r)	(r + 0x084)
 #define SSP_ITOP(r)	(r + 0x088)
@@ -197,6 +198,12 @@
 /* Transmit DMA Enable bit */
 #define SSP_DMACR_MASK_TXDMAE		(0x1UL << 1)
 
+/*
+ * SSP Chip Select Control Register - SSP_CSR
+ * (vendor extension)
+ */
+#define SSP_CSR_CSVALUE_MASK		(0x1FUL << 0)
+
 /*
  * SSP Integration Test control Register - SSP_ITCR
  */
@@ -313,6 +320,7 @@ enum ssp_writing {
  * @extended_cr: 32 bit wide control register 0 with extra
  * features and extra features in CR1 as found in the ST variants
  * @pl023: supports a subset of the ST extensions called "PL023"
+ * @internal_cs_ctrl: supports chip select control register
  */
 struct vendor_data {
 	int fifodepth;
@@ -321,6 +329,7 @@ struct vendor_data {
 	bool extended_cr;
 	bool pl023;
 	bool loopback;
+	bool internal_cs_ctrl;
 };
 
 /**
@@ -440,9 +449,32 @@ static void null_cs_control(u32 command)
 	pr_debug("pl022: dummy chip select control, CS=0x%x\n", command);
 }
 
+/**
+ * internal_cs_control - Control chip select signals via SSP_CSR.
+ * @pl022: SSP driver private data structure
+ * @command: select/delect the chip
+ *
+ * Used on controller with internal chip select control via SSP_CSR register
+ * (vendor extension). Each of the 5 LSB in the register controls one chip
+ * select signal.
+ */
+static void internal_cs_control(struct pl022 *pl022, u32 command)
+{
+	u32 tmp;
+
+	tmp = readw(SSP_CSR(pl022->virtbase));
+	if (command == SSP_CHIP_SELECT)
+		tmp &= ~BIT(pl022->cur_cs);
+	else
+		tmp |= BIT(pl022->cur_cs);
+	writew(tmp, SSP_CSR(pl022->virtbase));
+}
+
 static void pl022_cs_control(struct pl022 *pl022, u32 command)
 {
-	if (gpio_is_valid(pl022->cur_cs))
+	if (pl022->vendor->internal_cs_ctrl)
+		internal_cs_control(pl022, command);
+	else if (gpio_is_valid(pl022->cur_cs))
 		gpio_set_value(pl022->cur_cs, command);
 	else
 		pl022->cur_chip->cs_control(command);
@@ -2122,6 +2154,9 @@ static int pl022_probe(struct amba_device *adev, const struct amba_id *id)
 	if (platform_info->num_chipselect && platform_info->chipselects) {
 		for (i = 0; i < num_cs; i++)
 			pl022->chipselects[i] = platform_info->chipselects[i];
+	} else if (pl022->vendor->internal_cs_ctrl) {
+		for (i = 0; i < num_cs; i++)
+			pl022->chipselects[i] = i;
 	} else if (IS_ENABLED(CONFIG_OF)) {
 		for (i = 0; i < num_cs; i++) {
 			int cs_gpio = of_get_named_gpio(np, "cs-gpios", i);
@@ -2352,6 +2387,7 @@ static struct vendor_data vendor_arm = {
 	.extended_cr = false,
 	.pl023 = false,
 	.loopback = true,
+	.internal_cs_ctrl = false,
 };
 
 static struct vendor_data vendor_st = {
@@ -2361,6 +2397,7 @@ static struct vendor_data vendor_st = {
 	.extended_cr = true,
 	.pl023 = false,
 	.loopback = true,
+	.internal_cs_ctrl = false,
 };
 
 static struct vendor_data vendor_st_pl023 = {
@@ -2370,6 +2407,17 @@ static struct vendor_data vendor_st_pl023 = {
 	.extended_cr = true,
 	.pl023 = true,
 	.loopback = false,
+	.internal_cs_ctrl = false,
+};
+
+static struct vendor_data vendor_lsi = {
+	.fifodepth = 8,
+	.max_bpw = 16,
+	.unidir = false,
+	.extended_cr = false,
+	.pl023 = false,
+	.loopback = true,
+	.internal_cs_ctrl = true,
 };
 
 static struct amba_id pl022_ids[] = {
@@ -2403,6 +2451,15 @@ static struct amba_id pl022_ids[] = {
 		.mask	= 0xffffffff,
 		.data	= &vendor_st_pl023,
 	},
+	{
+		/*
+		 * PL022 variant that has a chip select control register whih
+		 * allows control of 5 output signals nCS[0:4].
+		 */
+		.id	= 0x000b6022,
+		.mask	= 0x000fffff,
+		.data	= &vendor_lsi,
+	},
 	{ 0, 0 },
 };
 
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index fdd7e1b61f60..c324f5700d1a 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -44,10 +44,15 @@ struct amba_driver {
 	const struct amba_id	*id_table;
 };
 
+/*
+ * Constants for the designer field of the Peripheral ID register. When bit 7
+ * is set to '1', bits [6:0] should be the JEP106 manufacturer identity code.
+ */
 enum amba_vendor {
 	AMBA_VENDOR_ARM = 0x41,
 	AMBA_VENDOR_ST = 0x80,
 	AMBA_VENDOR_QCOM = 0x51,
+	AMBA_VENDOR_LSI = 0xb6,
 };
 
 extern struct bus_type amba_bustype;
-- 
cgit v1.2.3


From 234b239bea395316d7f78018c672f4a88b3cdf0d Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Wed, 17 Sep 2014 10:51:48 -0700
Subject: kvm: Faults which trigger IO release the mmap_sem
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When KVM handles a tdp fault it uses FOLL_NOWAIT. If the guest memory
has been swapped out or is behind a filemap, this will trigger async
readahead and return immediately. The rationale is that KVM will kick
back the guest with an "async page fault" and allow for some other
guest process to take over.

If async PFs are enabled the fault is retried asap from an async
workqueue. If not, it's retried immediately in the same code path. In
either case the retry will not relinquish the mmap semaphore and will
block on the IO. This is a bad thing, as other mmap semaphore users
now stall as a function of swap or filemap latency.

This patch ensures both the regular and async PF path re-enter the
fault allowing for the mmap semaphore to be relinquished in the case
of IO wait.

Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 11 +++++++++++
 include/linux/mm.h       |  1 +
 mm/gup.c                 |  4 ++++
 virt/kvm/async_pf.c      |  4 +---
 virt/kvm/kvm_main.c      | 49 +++++++++++++++++++++++++++++++++++++++++++++---
 5 files changed, 63 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index d44a2d640551..45aaeb3360c9 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -198,6 +198,17 @@ int kvm_setup_async_pf(struct kvm_vcpu *vcpu, gva_t gva, unsigned long hva,
 int kvm_async_pf_wakeup_all(struct kvm_vcpu *vcpu);
 #endif
 
+/*
+ * Carry out a gup that requires IO. Allow the mm to relinquish the mmap
+ * semaphore if the filemap/swap has to wait on a page lock. pagep == NULL
+ * controls whether we retry the gup one more time to completion in that case.
+ * Typically this is called after a FAULT_FLAG_RETRY_NOWAIT in the main tdp
+ * handler.
+ */
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+			 unsigned long addr, bool write_fault,
+			 struct page **pagep);
+
 enum {
 	OUTSIDE_GUEST_MODE,
 	IN_GUEST_MODE,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8981cc882ed2..0f4196a0bc20 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1985,6 +1985,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma,
 #define FOLL_HWPOISON	0x100	/* check page is hwpoisoned */
 #define FOLL_NUMA	0x200	/* force NUMA hinting page fault */
 #define FOLL_MIGRATION	0x400	/* wait for page to replace migration entry */
+#define FOLL_TRIED	0x800	/* a retry, previous pass started an IO */
 
 typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr,
 			void *data);
diff --git a/mm/gup.c b/mm/gup.c
index 91d044b1600d..af7ea3e0826b 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -281,6 +281,10 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY;
 	if (*flags & FOLL_NOWAIT)
 		fault_flags |= FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_RETRY_NOWAIT;
+	if (*flags & FOLL_TRIED) {
+		VM_WARN_ON_ONCE(fault_flags & FAULT_FLAG_ALLOW_RETRY);
+		fault_flags |= FAULT_FLAG_TRIED;
+	}
 
 	ret = handle_mm_fault(mm, vma, address, fault_flags);
 	if (ret & VM_FAULT_ERROR) {
diff --git a/virt/kvm/async_pf.c b/virt/kvm/async_pf.c
index d6a3d0993d88..5ff7f7f2689a 100644
--- a/virt/kvm/async_pf.c
+++ b/virt/kvm/async_pf.c
@@ -80,9 +80,7 @@ static void async_pf_execute(struct work_struct *work)
 
 	might_sleep();
 
-	down_read(&mm->mmap_sem);
-	get_user_pages(NULL, mm, addr, 1, 1, 0, NULL, NULL);
-	up_read(&mm->mmap_sem);
+	kvm_get_user_page_io(NULL, mm, addr, 1, NULL);
 	kvm_async_page_present_sync(vcpu, apf);
 
 	spin_lock(&vcpu->async_pf.lock);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 499db0977f3c..1c6e8476b244 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -1122,6 +1122,43 @@ static int get_user_page_nowait(struct task_struct *tsk, struct mm_struct *mm,
 	return __get_user_pages(tsk, mm, start, 1, flags, page, NULL, NULL);
 }
 
+int kvm_get_user_page_io(struct task_struct *tsk, struct mm_struct *mm,
+			 unsigned long addr, bool write_fault,
+			 struct page **pagep)
+{
+	int npages;
+	int locked = 1;
+	int flags = FOLL_TOUCH | FOLL_HWPOISON |
+		    (pagep ? FOLL_GET : 0) |
+		    (write_fault ? FOLL_WRITE : 0);
+
+	/*
+	 * If retrying the fault, we get here *not* having allowed the filemap
+	 * to wait on the page lock. We should now allow waiting on the IO with
+	 * the mmap semaphore released.
+	 */
+	down_read(&mm->mmap_sem);
+	npages = __get_user_pages(tsk, mm, addr, 1, flags, pagep, NULL,
+				  &locked);
+	if (!locked) {
+		VM_BUG_ON(npages != -EBUSY);
+
+		if (!pagep)
+			return 0;
+
+		/*
+		 * The previous call has now waited on the IO. Now we can
+		 * retry and complete. Pass TRIED to ensure we do not re
+		 * schedule async IO (see e.g. filemap_fault).
+		 */
+		down_read(&mm->mmap_sem);
+		npages = __get_user_pages(tsk, mm, addr, 1, flags | FOLL_TRIED,
+					  pagep, NULL, NULL);
+	}
+	up_read(&mm->mmap_sem);
+	return npages;
+}
+
 static inline int check_user_page_hwpoison(unsigned long addr)
 {
 	int rc, flags = FOLL_TOUCH | FOLL_HWPOISON | FOLL_WRITE;
@@ -1184,9 +1221,15 @@ static int hva_to_pfn_slow(unsigned long addr, bool *async, bool write_fault,
 		npages = get_user_page_nowait(current, current->mm,
 					      addr, write_fault, page);
 		up_read(&current->mm->mmap_sem);
-	} else
-		npages = get_user_pages_fast(addr, 1, write_fault,
-					     page);
+	} else {
+		/*
+		 * By now we have tried gup_fast, and possibly async_pf, and we
+		 * are certainly not atomic. Time to retry the gup, allowing
+		 * mmap semaphore to be relinquished in the case of IO.
+		 */
+		npages = kvm_get_user_page_io(current, current->mm, addr,
+					      write_fault, page);
+	}
 	if (npages != 1)
 		return npages;
 
-- 
cgit v1.2.3


From 57128468080a8b6ea452223036d3e417f748af55 Mon Sep 17 00:00:00 2001
From: Andres Lagar-Cavilla <andreslc@google.com>
Date: Mon, 22 Sep 2014 14:54:42 -0700
Subject: kvm: Fix page ageing bugs

1. We were calling clear_flush_young_notify in unmap_one, but we are
within an mmu notifier invalidate range scope. The spte exists no more
(due to range_start) and the accessed bit info has already been
propagated (due to kvm_pfn_set_accessed). Simply call
clear_flush_young.

2. We clear_flush_young on a primary MMU PMD, but this may be mapped
as a collection of PTEs by the secondary MMU (e.g. during log-dirty).
This required expanding the interface of the clear_flush_young mmu
notifier, so a lot of code has been trivially touched.

3. In the absence of shadow_accessed_mask (e.g. EPT A bit), we emulate
the access bit by blowing the spte. This requires proper synchronizing
with MMU notifier consumers, like every other removal of spte's does.

Signed-off-by: Andres Lagar-Cavilla <andreslc@google.com>
Acked-by: Rik van Riel <riel@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/arm/include/asm/kvm_host.h     |  3 ++-
 arch/arm64/include/asm/kvm_host.h   |  3 ++-
 arch/powerpc/include/asm/kvm_host.h |  2 +-
 arch/powerpc/include/asm/kvm_ppc.h  |  2 +-
 arch/powerpc/kvm/book3s.c           |  4 ++--
 arch/powerpc/kvm/book3s.h           |  3 ++-
 arch/powerpc/kvm/book3s_64_mmu_hv.c |  4 ++--
 arch/powerpc/kvm/book3s_pr.c        |  3 ++-
 arch/powerpc/kvm/e500_mmu_host.c    |  2 +-
 arch/x86/include/asm/kvm_host.h     |  2 +-
 arch/x86/kvm/mmu.c                  | 38 ++++++++++++++++++++++---------------
 drivers/iommu/amd_iommu_v2.c        |  6 ++++--
 include/linux/mmu_notifier.h        | 24 ++++++++++++++++-------
 mm/mmu_notifier.c                   |  5 +++--
 mm/rmap.c                           |  6 +++++-
 virt/kvm/kvm_main.c                 |  5 +++--
 16 files changed, 71 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h
index 032a8538318a..8c3f7eb62b54 100644
--- a/arch/arm/include/asm/kvm_host.h
+++ b/arch/arm/include/asm/kvm_host.h
@@ -170,7 +170,8 @@ unsigned long kvm_arm_num_regs(struct kvm_vcpu *vcpu);
 int kvm_arm_copy_reg_indices(struct kvm_vcpu *vcpu, u64 __user *indices);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+			      unsigned long end)
 {
 	return 0;
 }
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index be9970a59497..a3c671b3acc9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -180,7 +180,8 @@ int kvm_unmap_hva_range(struct kvm *kvm,
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
 /* We do not have shadow page tables, hence the empty hooks */
-static inline int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+static inline int kvm_age_hva(struct kvm *kvm, unsigned long start,
+			      unsigned long end)
 {
 	return 0;
 }
diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h
index 604000882352..d329bc5543a2 100644
--- a/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@ -56,7 +56,7 @@
 extern int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range(struct kvm *kvm,
 			       unsigned long start, unsigned long end);
-extern int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h
index fb86a2299d8a..d4a92d7cea6a 100644
--- a/arch/powerpc/include/asm/kvm_ppc.h
+++ b/arch/powerpc/include/asm/kvm_ppc.h
@@ -243,7 +243,7 @@ struct kvmppc_ops {
 	int (*unmap_hva)(struct kvm *kvm, unsigned long hva);
 	int (*unmap_hva_range)(struct kvm *kvm, unsigned long start,
 			   unsigned long end);
-	int (*age_hva)(struct kvm *kvm, unsigned long hva);
+	int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end);
 	int (*test_age_hva)(struct kvm *kvm, unsigned long hva);
 	void (*set_spte_hva)(struct kvm *kvm, unsigned long hva, pte_t pte);
 	void (*mmu_destroy)(struct kvm_vcpu *vcpu);
diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c
index dd03f6b299ba..c16cfbfeb781 100644
--- a/arch/powerpc/kvm/book3s.c
+++ b/arch/powerpc/kvm/book3s.c
@@ -851,9 +851,9 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return kvm->arch.kvm_ops->unmap_hva_range(kvm, start, end);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvm->arch.kvm_ops->age_hva(kvm, hva);
+	return kvm->arch.kvm_ops->age_hva(kvm, start, end);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/arch/powerpc/kvm/book3s.h b/arch/powerpc/kvm/book3s.h
index 4bf956cf94d6..d2b3ec088b8c 100644
--- a/arch/powerpc/kvm/book3s.h
+++ b/arch/powerpc/kvm/book3s.h
@@ -17,7 +17,8 @@ extern void kvmppc_core_flush_memslot_hv(struct kvm *kvm,
 extern int kvm_unmap_hva_hv(struct kvm *kvm, unsigned long hva);
 extern int kvm_unmap_hva_range_hv(struct kvm *kvm, unsigned long start,
 				  unsigned long end);
-extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva);
+extern int kvm_age_hva_hv(struct kvm *kvm, unsigned long start,
+			  unsigned long end);
 extern int kvm_test_age_hva_hv(struct kvm *kvm, unsigned long hva);
 extern void kvm_set_spte_hva_hv(struct kvm *kvm, unsigned long hva, pte_t pte);
 
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c
index 72c20bb16d26..81460c5359c0 100644
--- a/arch/powerpc/kvm/book3s_64_mmu_hv.c
+++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c
@@ -1002,11 +1002,11 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	return ret;
 }
 
-int kvm_age_hva_hv(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva_hv(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	if (!kvm->arch.using_mmu_notifiers)
 		return 0;
-	return kvm_handle_hva(kvm, hva, kvm_age_rmapp);
+	return kvm_handle_hva_range(kvm, start, end, kvm_age_rmapp);
 }
 
 static int kvm_test_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c
index faffb27badd9..852fcd8951c4 100644
--- a/arch/powerpc/kvm/book3s_pr.c
+++ b/arch/powerpc/kvm/book3s_pr.c
@@ -295,7 +295,8 @@ static int kvm_unmap_hva_range_pr(struct kvm *kvm, unsigned long start,
 	return 0;
 }
 
-static int kvm_age_hva_pr(struct kvm *kvm, unsigned long hva)
+static int kvm_age_hva_pr(struct kvm *kvm, unsigned long start,
+			  unsigned long end)
 {
 	/* XXX could be more clever ;) */
 	return 0;
diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c
index 08f14bb57897..b1f3f630315e 100644
--- a/arch/powerpc/kvm/e500_mmu_host.c
+++ b/arch/powerpc/kvm/e500_mmu_host.c
@@ -732,7 +732,7 @@ int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end)
 	return 0;
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
 	/* XXX could be more clever ;) */
 	return 0;
diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index eeeb573fcf6f..763d273cab1d 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1035,7 +1035,7 @@ asmlinkage void kvm_spurious_fault(void);
 #define KVM_ARCH_WANT_MMU_NOTIFIER
 int kvm_unmap_hva(struct kvm *kvm, unsigned long hva);
 int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end);
-int kvm_age_hva(struct kvm *kvm, unsigned long hva);
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
 void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
 int cpuid_maxphyaddr(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 47d534066325..3201e93ebd07 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1417,18 +1417,7 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 	struct rmap_iterator uninitialized_var(iter);
 	int young = 0;
 
-	/*
-	 * In case of absence of EPT Access and Dirty Bits supports,
-	 * emulate the accessed bit for EPT, by checking if this page has
-	 * an EPT mapping, and clearing it if it does. On the next access,
-	 * a new EPT mapping will be established.
-	 * This has some overhead, but not as much as the cost of swapping
-	 * out actively used pages or breaking up actively used hugepages.
-	 */
-	if (!shadow_accessed_mask) {
-		young = kvm_unmap_rmapp(kvm, rmapp, slot, gfn, level, data);
-		goto out;
-	}
+	BUG_ON(!shadow_accessed_mask);
 
 	for (sptep = rmap_get_first(*rmapp, &iter); sptep;
 	     sptep = rmap_get_next(&iter)) {
@@ -1440,7 +1429,6 @@ static int kvm_age_rmapp(struct kvm *kvm, unsigned long *rmapp,
 				 (unsigned long *)sptep);
 		}
 	}
-out:
 	trace_kvm_age_page(gfn, level, slot, young);
 	return young;
 }
@@ -1489,9 +1477,29 @@ static void rmap_recycle(struct kvm_vcpu *vcpu, u64 *spte, gfn_t gfn)
 	kvm_flush_remote_tlbs(vcpu->kvm);
 }
 
-int kvm_age_hva(struct kvm *kvm, unsigned long hva)
+int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end)
 {
-	return kvm_handle_hva(kvm, hva, 0, kvm_age_rmapp);
+	/*
+	 * In case of absence of EPT Access and Dirty Bits supports,
+	 * emulate the accessed bit for EPT, by checking if this page has
+	 * an EPT mapping, and clearing it if it does. On the next access,
+	 * a new EPT mapping will be established.
+	 * This has some overhead, but not as much as the cost of swapping
+	 * out actively used pages or breaking up actively used hugepages.
+	 */
+	if (!shadow_accessed_mask) {
+		/*
+		 * We are holding the kvm->mmu_lock, and we are blowing up
+		 * shadow PTEs. MMU notifier consumers need to be kept at bay.
+		 * This is correct as long as we don't decouple the mmu_lock
+		 * protected regions (like invalidate_range_start|end does).
+		 */
+		kvm->mmu_notifier_seq++;
+		return kvm_handle_hva_range(kvm, start, end, 0,
+					    kvm_unmap_rmapp);
+	}
+
+	return kvm_handle_hva_range(kvm, start, end, 0, kvm_age_rmapp);
 }
 
 int kvm_test_age_hva(struct kvm *kvm, unsigned long hva)
diff --git a/drivers/iommu/amd_iommu_v2.c b/drivers/iommu/amd_iommu_v2.c
index 5f578e850fc5..90d734bbf467 100644
--- a/drivers/iommu/amd_iommu_v2.c
+++ b/drivers/iommu/amd_iommu_v2.c
@@ -402,9 +402,11 @@ static void __mn_flush_page(struct mmu_notifier *mn,
 
 static int mn_clear_flush_young(struct mmu_notifier *mn,
 				struct mm_struct *mm,
-				unsigned long address)
+				unsigned long start,
+				unsigned long end)
 {
-	__mn_flush_page(mn, address);
+	for (; start < end; start += PAGE_SIZE)
+		__mn_flush_page(mn, start);
 
 	return 0;
 }
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 27288692241e..88787bb4b3b9 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -57,10 +57,13 @@ struct mmu_notifier_ops {
 	 * pte. This way the VM will provide proper aging to the
 	 * accesses to the page through the secondary MMUs and not
 	 * only to the ones through the Linux pte.
+	 * Start-end is necessary in case the secondary MMU is mapping the page
+	 * at a smaller granularity than the primary MMU.
 	 */
 	int (*clear_flush_young)(struct mmu_notifier *mn,
 				 struct mm_struct *mm,
-				 unsigned long address);
+				 unsigned long start,
+				 unsigned long end);
 
 	/*
 	 * test_young is called to check the young/accessed bitflag in
@@ -175,7 +178,8 @@ extern void mmu_notifier_unregister_no_release(struct mmu_notifier *mn,
 extern void __mmu_notifier_mm_destroy(struct mm_struct *mm);
 extern void __mmu_notifier_release(struct mm_struct *mm);
 extern int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address);
+					  unsigned long start,
+					  unsigned long end);
 extern int __mmu_notifier_test_young(struct mm_struct *mm,
 				     unsigned long address);
 extern void __mmu_notifier_change_pte(struct mm_struct *mm,
@@ -194,10 +198,11 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
 }
 
 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address)
+					  unsigned long start,
+					  unsigned long end)
 {
 	if (mm_has_notifiers(mm))
-		return __mmu_notifier_clear_flush_young(mm, address);
+		return __mmu_notifier_clear_flush_young(mm, start, end);
 	return 0;
 }
 
@@ -255,7 +260,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	unsigned long ___address = __address;				\
 	__young = ptep_clear_flush_young(___vma, ___address, __ptep);	\
 	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
-						  ___address);		\
+						  ___address,		\
+						  ___address +		\
+							PAGE_SIZE);	\
 	__young;							\
 })
 
@@ -266,7 +273,9 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm)
 	unsigned long ___address = __address;				\
 	__young = pmdp_clear_flush_young(___vma, ___address, __pmdp);	\
 	__young |= mmu_notifier_clear_flush_young(___vma->vm_mm,	\
-						  ___address);		\
+						  ___address,		\
+						  ___address +		\
+							PMD_SIZE);	\
 	__young;							\
 })
 
@@ -301,7 +310,8 @@ static inline void mmu_notifier_release(struct mm_struct *mm)
 }
 
 static inline int mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					  unsigned long address)
+					  unsigned long start,
+					  unsigned long end)
 {
 	return 0;
 }
diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c
index 950813b1eb36..2c8da9825fe3 100644
--- a/mm/mmu_notifier.c
+++ b/mm/mmu_notifier.c
@@ -107,7 +107,8 @@ void __mmu_notifier_release(struct mm_struct *mm)
  * existed or not.
  */
 int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
-					unsigned long address)
+					unsigned long start,
+					unsigned long end)
 {
 	struct mmu_notifier *mn;
 	int young = 0, id;
@@ -115,7 +116,7 @@ int __mmu_notifier_clear_flush_young(struct mm_struct *mm,
 	id = srcu_read_lock(&srcu);
 	hlist_for_each_entry_rcu(mn, &mm->mmu_notifier_mm->list, hlist) {
 		if (mn->ops->clear_flush_young)
-			young |= mn->ops->clear_flush_young(mn, mm, address);
+			young |= mn->ops->clear_flush_young(mn, mm, start, end);
 	}
 	srcu_read_unlock(&srcu, id);
 
diff --git a/mm/rmap.c b/mm/rmap.c
index 3e8491c504f8..bc74e0012809 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1355,7 +1355,11 @@ static int try_to_unmap_cluster(unsigned long cursor, unsigned int *mapcount,
 			continue;	/* don't unmap */
 		}
 
-		if (ptep_clear_flush_young_notify(vma, address, pte))
+		/*
+		 * No need for _notify because we're within an
+		 * mmu_notifier_invalidate_range_ {start|end} scope.
+		 */
+		if (ptep_clear_flush_young(vma, address, pte))
 			continue;
 
 		/* Nuke the page table entry. */
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index ff42b11d2b9c..0316314d48f4 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -369,7 +369,8 @@ static void kvm_mmu_notifier_invalidate_range_end(struct mmu_notifier *mn,
 
 static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 					      struct mm_struct *mm,
-					      unsigned long address)
+					      unsigned long start,
+					      unsigned long end)
 {
 	struct kvm *kvm = mmu_notifier_to_kvm(mn);
 	int young, idx;
@@ -377,7 +378,7 @@ static int kvm_mmu_notifier_clear_flush_young(struct mmu_notifier *mn,
 	idx = srcu_read_lock(&kvm->srcu);
 	spin_lock(&kvm->mmu_lock);
 
-	young = kvm_age_hva(kvm, address);
+	young = kvm_age_hva(kvm, start, end);
 	if (young)
 		kvm_flush_remote_tlbs(kvm);
 
-- 
cgit v1.2.3


From 445b8236959bfe624a5aa9bce89f44a3bec9b2b1 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:55 +0800
Subject: kvm: Rename make_all_cpus_request() to kvm_make_all_cpus_request()
 and make it non-static

Different architectures need different requests, and in fact we
will use this function in architecture-specific code later. This
will be outside kvm_main.c, so make it non-static and rename it to
kvm_make_all_cpus_request().

Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h |  1 +
 virt/kvm/kvm_main.c      | 10 +++++-----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 45aaeb3360c9..12c591fc2571 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -586,6 +586,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm);
 void kvm_reload_remote_mmus(struct kvm *kvm);
 void kvm_make_mclock_inprogress_request(struct kvm *kvm);
 void kvm_make_scan_ioapic_request(struct kvm *kvm);
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req);
 
 long kvm_arch_dev_ioctl(struct file *filp,
 			unsigned int ioctl, unsigned long arg);
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 0316314d48f4..5b8ca365932a 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -153,7 +153,7 @@ static void ack_flush(void *_completed)
 {
 }
 
-static bool make_all_cpus_request(struct kvm *kvm, unsigned int req)
+bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req)
 {
 	int i, cpu, me;
 	cpumask_var_t cpus;
@@ -190,7 +190,7 @@ void kvm_flush_remote_tlbs(struct kvm *kvm)
 	long dirty_count = kvm->tlbs_dirty;
 
 	smp_mb();
-	if (make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
+	if (kvm_make_all_cpus_request(kvm, KVM_REQ_TLB_FLUSH))
 		++kvm->stat.remote_tlb_flush;
 	cmpxchg(&kvm->tlbs_dirty, dirty_count, 0);
 }
@@ -198,17 +198,17 @@ EXPORT_SYMBOL_GPL(kvm_flush_remote_tlbs);
 
 void kvm_reload_remote_mmus(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MMU_RELOAD);
 }
 
 void kvm_make_mclock_inprogress_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_MCLOCK_INPROGRESS);
 }
 
 void kvm_make_scan_ioapic_request(struct kvm *kvm)
 {
-	make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
+	kvm_make_all_cpus_request(kvm, KVM_REQ_SCAN_IOAPIC);
 }
 
 int kvm_vcpu_init(struct kvm_vcpu *vcpu, struct kvm *kvm, unsigned id)
-- 
cgit v1.2.3


From 4256f43f9fab91e1c17b5846a240cf4b66a768a8 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Wed, 24 Sep 2014 15:57:54 +0800
Subject: kvm: x86: Add request bit to reload APIC access page address

Currently, the APIC access page is pinned by KVM for the entire life
of the guest.  We want to make it migratable in order to make memory
hot-unplug available for machines that run KVM.

This patch prepares to handle this in generic code, through a new
request bit (that will be set by the MMU notifier) and a new hook
that is called whenever the request bit is processed.

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  2 ++
 arch/x86/kvm/x86.c              | 14 ++++++++++++++
 include/linux/kvm_host.h        |  1 +
 3 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 022c356e0fed..60f9d73c6282 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -736,6 +736,7 @@ struct kvm_x86_ops {
 	void (*hwapic_isr_update)(struct kvm *kvm, int isr);
 	void (*load_eoi_exitmap)(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap);
 	void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set);
+	void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa);
 	void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector);
 	void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu);
 	int (*set_tss_addr)(struct kvm *kvm, unsigned int addr);
@@ -1044,6 +1045,7 @@ int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
 int kvm_arch_interrupt_allowed(struct kvm_vcpu *vcpu);
 int kvm_cpu_get_interrupt(struct kvm_vcpu *v);
 void kvm_vcpu_reset(struct kvm_vcpu *vcpu);
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu);
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address);
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 142569e6f8f9..c1412f5d93db 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -6026,6 +6026,18 @@ static void kvm_vcpu_flush_tlb(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->tlb_flush(vcpu);
 }
 
+void kvm_vcpu_reload_apic_access_page(struct kvm_vcpu *vcpu)
+{
+	if (!kvm_x86_ops->set_apic_access_page_addr)
+		return;
+
+	vcpu->kvm->arch.apic_access_page = gfn_to_page(vcpu->kvm,
+			APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT);
+	kvm_x86_ops->set_apic_access_page_addr(vcpu,
+			page_to_phys(vcpu->kvm->arch.apic_access_page));
+}
+EXPORT_SYMBOL_GPL(kvm_vcpu_reload_apic_access_page);
+
 void kvm_arch_mmu_notifier_invalidate_page(struct kvm *kvm,
 					   unsigned long address)
 {
@@ -6091,6 +6103,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 			kvm_deliver_pmi(vcpu);
 		if (kvm_check_request(KVM_REQ_SCAN_IOAPIC, vcpu))
 			vcpu_scan_ioapic(vcpu);
+		if (kvm_check_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu))
+			kvm_vcpu_reload_apic_access_page(vcpu);
 	}
 
 	if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 12c591fc2571..d594f9f34429 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -136,6 +136,7 @@ static inline bool is_error_page(struct page *page)
 #define KVM_REQ_GLOBAL_CLOCK_UPDATE 22
 #define KVM_REQ_ENABLE_IBS        23
 #define KVM_REQ_DISABLE_IBS       24
+#define KVM_REQ_APIC_PAGE_RELOAD  25
 
 #define KVM_USERSPACE_IRQ_SOURCE_ID		0
 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID	1
-- 
cgit v1.2.3


From 25cc24c200dcba21bd1f1a59a01741185062dc0e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 12 Sep 2014 08:53:53 +0200
Subject: mfd: max14577: Add defines for MAX77836 charger

Prepare for adding support for MAX77836 charger to the max14577 charger
driver by adding necessary new defines and prefixes to existing ones.

The MAX77836 uses slightly different values for ChgTyp field of STATUS2
register. On the MAX14577 value of 0x6 is reserved and 0x7 dead battery.
On the MAX77836 the opposite:
 - 0x6 means special charger,
 - 0x7 is reserved.
Regardless of these differences use one common enum
max14577_muic_charger_type.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Acked-by: Lee Jones <lee.jones@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/mfd/max14577-private.h | 54 ++++++++++++++++++++++++++----------
 1 file changed, 40 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index 499253604026..d6f321699b89 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -72,15 +72,33 @@ enum max14577_muic_reg {
 	MAX14577_MUIC_REG_END,
 };
 
+/*
+ * Combined charger types for max14577 and max77836.
+ *
+ * On max14577 three lower bits map to STATUS2/CHGTYP field.
+ * However the max77836 has different two last values of STATUS2/CHGTYP.
+ * To indicate the difference enum has two additional values for max77836.
+ * These values are just a register value bitwise OR with 0x8.
+ */
 enum max14577_muic_charger_type {
-	MAX14577_CHARGER_TYPE_NONE = 0,
-	MAX14577_CHARGER_TYPE_USB,
-	MAX14577_CHARGER_TYPE_DOWNSTREAM_PORT,
-	MAX14577_CHARGER_TYPE_DEDICATED_CHG,
-	MAX14577_CHARGER_TYPE_SPECIAL_500MA,
-	MAX14577_CHARGER_TYPE_SPECIAL_1A,
-	MAX14577_CHARGER_TYPE_RESERVED,
-	MAX14577_CHARGER_TYPE_DEAD_BATTERY = 7,
+	MAX14577_CHARGER_TYPE_NONE		= 0x0,
+	MAX14577_CHARGER_TYPE_USB		= 0x1,
+	MAX14577_CHARGER_TYPE_DOWNSTREAM_PORT	= 0x2,
+	MAX14577_CHARGER_TYPE_DEDICATED_CHG	= 0x3,
+	MAX14577_CHARGER_TYPE_SPECIAL_500MA	= 0x4,
+	/* Special 1A or 2A charger */
+	MAX14577_CHARGER_TYPE_SPECIAL_1A	= 0x5,
+	/* max14577: reserved, used on max77836 */
+	MAX14577_CHARGER_TYPE_RESERVED		= 0x6,
+	/* max14577: dead-battery charing with maximum current 100mA */
+	MAX14577_CHARGER_TYPE_DEAD_BATTERY	= 0x7,
+	/*
+	 * max77836: special charger (bias on D+/D-),
+	 * matches register value of 0x6
+	 */
+	MAX77836_CHARGER_TYPE_SPECIAL_BIAS	= 0xe,
+	/* max77836: reserved, register value 0x7 */
+	MAX77836_CHARGER_TYPE_RESERVED		= 0xf,
 };
 
 /* MAX14577 interrupts */
@@ -121,13 +139,15 @@ enum max14577_muic_charger_type {
 #define STATUS2_CHGTYP_SHIFT		0
 #define STATUS2_CHGDETRUN_SHIFT		3
 #define STATUS2_DCDTMR_SHIFT		4
-#define STATUS2_DBCHG_SHIFT		5
+#define MAX14577_STATUS2_DBCHG_SHIFT	5
+#define MAX77836_STATUS2_DXOVP_SHIFT	5
 #define STATUS2_VBVOLT_SHIFT		6
 #define MAX77836_STATUS2_VIDRM_SHIFT	7
 #define STATUS2_CHGTYP_MASK		(0x7 << STATUS2_CHGTYP_SHIFT)
 #define STATUS2_CHGDETRUN_MASK		BIT(STATUS2_CHGDETRUN_SHIFT)
 #define STATUS2_DCDTMR_MASK		BIT(STATUS2_DCDTMR_SHIFT)
-#define STATUS2_DBCHG_MASK		BIT(STATUS2_DBCHG_SHIFT)
+#define MAX14577_STATUS2_DBCHG_MASK	BIT(MAX14577_STATUS2_DBCHG_SHIFT)
+#define MAX77836_STATUS2_DXOVP_MASK	BIT(MAX77836_STATUS2_DXOVP_SHIFT)
 #define STATUS2_VBVOLT_MASK		BIT(STATUS2_VBVOLT_SHIFT)
 #define MAX77836_STATUS2_VIDRM_MASK	BIT(MAX77836_STATUS2_VIDRM_SHIFT)
 
@@ -177,9 +197,11 @@ enum max14577_muic_charger_type {
 #define CTRL3_JIGSET_SHIFT		0
 #define CTRL3_BOOTSET_SHIFT		2
 #define CTRL3_ADCDBSET_SHIFT		4
+#define CTRL3_WBTH_SHIFT		6
 #define CTRL3_JIGSET_MASK		(0x3 << CTRL3_JIGSET_SHIFT)
 #define CTRL3_BOOTSET_MASK		(0x3 << CTRL3_BOOTSET_SHIFT)
 #define CTRL3_ADCDBSET_MASK		(0x3 << CTRL3_ADCDBSET_SHIFT)
+#define CTRL3_WBTH_MASK			(0x3 << CTRL3_WBTH_SHIFT)
 
 /* Slave addr = 0x4A: Charger */
 enum max14577_charger_reg {
@@ -210,16 +232,20 @@ enum max14577_charger_reg {
 #define CDETCTRL1_CHGTYPMAN_SHIFT	1
 #define CDETCTRL1_DCDEN_SHIFT		2
 #define CDETCTRL1_DCD2SCT_SHIFT		3
-#define CDETCTRL1_DCHKTM_SHIFT		4
-#define CDETCTRL1_DBEXIT_SHIFT		5
+#define MAX14577_CDETCTRL1_DCHKTM_SHIFT	4
+#define MAX77836_CDETCTRL1_CDLY_SHIFT	4
+#define MAX14577_CDETCTRL1_DBEXIT_SHIFT	5
+#define MAX77836_CDETCTRL1_DCDCPL_SHIFT	5
 #define CDETCTRL1_DBIDLE_SHIFT		6
 #define CDETCTRL1_CDPDET_SHIFT		7
 #define CDETCTRL1_CHGDETEN_MASK		BIT(CDETCTRL1_CHGDETEN_SHIFT)
 #define CDETCTRL1_CHGTYPMAN_MASK	BIT(CDETCTRL1_CHGTYPMAN_SHIFT)
 #define CDETCTRL1_DCDEN_MASK		BIT(CDETCTRL1_DCDEN_SHIFT)
 #define CDETCTRL1_DCD2SCT_MASK		BIT(CDETCTRL1_DCD2SCT_SHIFT)
-#define CDETCTRL1_DCHKTM_MASK		BIT(CDETCTRL1_DCHKTM_SHIFT)
-#define CDETCTRL1_DBEXIT_MASK		BIT(CDETCTRL1_DBEXIT_SHIFT)
+#define MAX14577_CDETCTRL1_DCHKTM_MASK	BIT(MAX14577_CDETCTRL1_DCHKTM_SHIFT)
+#define MAX77836_CDETCTRL1_CDDLY_MASK	BIT(MAX77836_CDETCTRL1_CDDLY_SHIFT)
+#define MAX14577_CDETCTRL1_DBEXIT_MASK	BIT(MAX14577_CDETCTRL1_DBEXIT_SHIFT)
+#define MAX77836_CDETCTRL1_DCDCPL_MASK	BIT(MAX77836_CDETCTRL1_DCDCPL_SHIFT)
 #define CDETCTRL1_DBIDLE_MASK		BIT(CDETCTRL1_DBIDLE_SHIFT)
 #define CDETCTRL1_CDPDET_MASK		BIT(CDETCTRL1_CDPDET_SHIFT)
 
-- 
cgit v1.2.3


From 521e8bac67a71a6544274f39d5c61473e0e54ac0 Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 17 Sep 2014 11:06:16 +0200
Subject: perf/x86/intel/uncore: Update support for client uncore IMC PMU

This patch restructures the memory controller (IMC) uncore PMU support
for client SNB/IVB/HSW processors. The main change is that it can now
cope with more than one PCI device ID per processor model. There are
many flavors of memory controllers for each processor. They have
different PCI device ID, yet they behave the same w.r.t. the memory
controller PMU that we are interested in.

The patch now supports two distinct memory controllers for IVB
processors: one for mobile, one for desktop.

Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/20140917090616.GA11281@quad
Cc: ak@linux.intel.com
Cc: kan.liang@intel.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c | 69 +++++++++++++++++------
 include/linux/pci_ids.h                           |  1 +
 2 files changed, 52 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
index e0e934c8ee77..3001015b755c 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore_snb.c
@@ -460,6 +460,10 @@ static const struct pci_device_id ivb_uncore_pci_ids[] = {
 		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_IMC),
 		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
 	},
+	{ /* IMC */
+		PCI_DEVICE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IVB_E3_IMC),
+		.driver_data = UNCORE_PCI_DEV_DATA(SNB_PCI_UNCORE_IMC, 0),
+	},
 	{ /* end: all zeroes */ },
 };
 
@@ -486,34 +490,63 @@ static struct pci_driver hsw_uncore_pci_driver = {
 	.id_table	= hsw_uncore_pci_ids,
 };
 
-int snb_uncore_pci_init(void)
+struct imc_uncore_pci_dev {
+	__u32 pci_id;
+	struct pci_driver *driver;
+};
+#define IMC_DEV(a, d) \
+	{ .pci_id = PCI_DEVICE_ID_INTEL_##a, .driver = (d) }
+
+static const struct imc_uncore_pci_dev desktop_imc_pci_ids[] = {
+	IMC_DEV(SNB_IMC, &snb_uncore_pci_driver),
+	IMC_DEV(IVB_IMC, &ivb_uncore_pci_driver),    /* 3rd Gen Core processor */
+	IMC_DEV(IVB_E3_IMC, &ivb_uncore_pci_driver), /* Xeon E3-1200 v2/3rd Gen Core processor */
+	IMC_DEV(HSW_IMC, &hsw_uncore_pci_driver),    /* 4th Gen Core Processor */
+	{  /* end marker */ }
+};
+
+
+#define for_each_imc_pci_id(x, t) \
+	for (x = (t); (x)->pci_id; x++)
+
+static struct pci_driver *imc_uncore_find_dev(void)
 {
-	int ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_SNB_IMC);
-	if (ret)
-		return ret;
-	uncore_pci_uncores = snb_pci_uncores;
-	uncore_pci_driver = &snb_uncore_pci_driver;
-	return 0;
+	const struct imc_uncore_pci_dev *p;
+	int ret;
+
+	for_each_imc_pci_id(p, desktop_imc_pci_ids) {
+		ret = snb_pci2phy_map_init(p->pci_id);
+		if (ret == 0)
+			return p->driver;
+	}
+	return NULL;
 }
 
-int ivb_uncore_pci_init(void)
+static int imc_uncore_pci_init(void)
 {
-	int ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_IVB_IMC);
-	if (ret)
-		return ret;
+	struct pci_driver *imc_drv = imc_uncore_find_dev();
+
+	if (!imc_drv)
+		return -ENODEV;
+
 	uncore_pci_uncores = snb_pci_uncores;
-	uncore_pci_driver = &ivb_uncore_pci_driver;
+	uncore_pci_driver = imc_drv;
+
 	return 0;
 }
 
+int snb_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
+
+int ivb_uncore_pci_init(void)
+{
+	return imc_uncore_pci_init();
+}
 int hsw_uncore_pci_init(void)
 {
-	int ret = snb_pci2phy_map_init(PCI_DEVICE_ID_INTEL_HSW_IMC);
-	if (ret)
-		return ret;
-	uncore_pci_uncores = snb_pci_uncores;
-	uncore_pci_driver = &hsw_uncore_pci_driver;
-	return 0;
+	return imc_uncore_pci_init();
 }
 
 /* end of Sandy Bridge uncore support */
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..3102b7e3b460 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2536,6 +2536,7 @@
 #define PCI_DEVICE_ID_INTEL_EESSC	0x0008
 #define PCI_DEVICE_ID_INTEL_SNB_IMC	0x0100
 #define PCI_DEVICE_ID_INTEL_IVB_IMC	0x0154
+#define PCI_DEVICE_ID_INTEL_IVB_E3_IMC	0x0150
 #define PCI_DEVICE_ID_INTEL_HSW_IMC	0x0c00
 #define PCI_DEVICE_ID_INTEL_PXHD_0	0x0320
 #define PCI_DEVICE_ID_INTEL_PXHD_1	0x0321
-- 
cgit v1.2.3


From 41f93af900a20d1a0a358b522b5129c89677e9dc Mon Sep 17 00:00:00 2001
From: Sandeep Nair <sandeep_n@ti.com>
Date: Fri, 28 Feb 2014 10:47:50 -0500
Subject: soc: ti: add Keystone Navigator QMSS driver

The QMSS (Queue Manager Sub System) found on Keystone SOCs is one of
the main hardware sub system which forms the backbone of the Keystone
Multi-core Navigator. QMSS consist of queue managers, packed-data structure
processors(PDSP), linking RAM, descriptor pools and infrastructure
Packet DMA.

The Queue Manager is a hardware module that is responsible for accelerating
management of the packet queues. Packets are queued/de-queued by writing or
reading descriptor address to a particular memory mapped location. The PDSPs
perform QMSS related functions like accumulation, QoS, or event management.
Linking RAM registers are used to link the descriptors which are stored in
descriptor RAM. Descriptor RAM is configurable as internal or external memory.

The QMSS driver manages the PDSP setups, linking RAM regions,
queue pool management (allocation, push, pop and notify) and descriptor
pool management. The specifics on the device tree bindings for
QMSS can be found in:
	Documentation/devicetree/bindings/soc/keystone-navigator-qmss.txt

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kumar Gala <galak@codeaurora.org>
Cc: Olof Johansson <olof@lixom.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Sandeep Nair <sandeep_n@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 drivers/Kconfig                  |    2 +
 drivers/soc/Kconfig              |    1 +
 drivers/soc/Makefile             |    1 +
 drivers/soc/ti/Kconfig           |   21 +
 drivers/soc/ti/Makefile          |    4 +
 drivers/soc/ti/knav_qmss.h       |  386 ++++++++
 drivers/soc/ti/knav_qmss_acc.c   |  591 +++++++++++++
 drivers/soc/ti/knav_qmss_queue.c | 1816 ++++++++++++++++++++++++++++++++++++++
 include/linux/soc/ti/knav_qmss.h |   90 ++
 9 files changed, 2912 insertions(+)
 create mode 100644 drivers/soc/ti/Kconfig
 create mode 100644 drivers/soc/ti/Makefile
 create mode 100644 drivers/soc/ti/knav_qmss.h
 create mode 100644 drivers/soc/ti/knav_qmss_acc.c
 create mode 100644 drivers/soc/ti/knav_qmss_queue.c
 create mode 100644 include/linux/soc/ti/knav_qmss.h

(limited to 'include/linux')

diff --git a/drivers/Kconfig b/drivers/Kconfig
index 622fa266b29e..1a693d3f9d51 100644
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -148,6 +148,8 @@ source "drivers/remoteproc/Kconfig"
 
 source "drivers/rpmsg/Kconfig"
 
+source "drivers/soc/Kconfig"
+
 source "drivers/devfreq/Kconfig"
 
 source "drivers/extcon/Kconfig"
diff --git a/drivers/soc/Kconfig b/drivers/soc/Kconfig
index c8543855aa82..49e3f0cc71af 100644
--- a/drivers/soc/Kconfig
+++ b/drivers/soc/Kconfig
@@ -1,5 +1,6 @@
 menu "SOC (System On Chip) specific Drivers"
 
 source "drivers/soc/qcom/Kconfig"
+source "drivers/soc/ti/Kconfig"
 
 endmenu
diff --git a/drivers/soc/Makefile b/drivers/soc/Makefile
index 3b1b95d932d1..0d6e35dfea8c 100644
--- a/drivers/soc/Makefile
+++ b/drivers/soc/Makefile
@@ -4,3 +4,4 @@
 
 obj-$(CONFIG_ARCH_QCOM)		+= qcom/
 obj-$(CONFIG_ARCH_TEGRA)	+= tegra/
+obj-$(CONFIG_SOC_TI)		+= ti/
diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig
new file mode 100644
index 000000000000..f73896f762e8
--- /dev/null
+++ b/drivers/soc/ti/Kconfig
@@ -0,0 +1,21 @@
+#
+# TI SOC drivers
+#
+menuconfig SOC_TI
+	bool "TI SOC drivers support"
+
+if SOC_TI
+
+config KEYSTONE_NAVIGATOR_QMSS
+	tristate "Keystone Queue Manager Sub System"
+	depends on ARCH_KEYSTONE
+	help
+	  Say y here to support the Keystone multicore Navigator Queue
+	  Manager support. The Queue Manager is a hardware module that
+	  is responsible for accelerating management of the packet queues.
+	  Packets are queued/de-queued by writing/reading descriptor address
+	  to a particular memory mapped location in the Queue Manager module.
+
+	  If unsure, say N.
+
+endif # SOC_TI
diff --git a/drivers/soc/ti/Makefile b/drivers/soc/ti/Makefile
new file mode 100644
index 000000000000..bf85cacd5b85
--- /dev/null
+++ b/drivers/soc/ti/Makefile
@@ -0,0 +1,4 @@
+#
+# TI Keystone SOC drivers
+#
+obj-$(CONFIG_KEYSTONE_NAVIGATOR_QMSS)	+= knav_qmss_queue.o knav_qmss_acc.o
diff --git a/drivers/soc/ti/knav_qmss.h b/drivers/soc/ti/knav_qmss.h
new file mode 100644
index 000000000000..bc9dcc8cc3ce
--- /dev/null
+++ b/drivers/soc/ti/knav_qmss.h
@@ -0,0 +1,386 @@
+/*
+ * Keystone Navigator QMSS driver internal header
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Author:	Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#ifndef __KNAV_QMSS_H__
+#define __KNAV_QMSS_H__
+
+#define THRESH_GTE	BIT(7)
+#define THRESH_LT	0
+
+#define PDSP_CTRL_PC_MASK	0xffff0000
+#define PDSP_CTRL_SOFT_RESET	BIT(0)
+#define PDSP_CTRL_ENABLE	BIT(1)
+#define PDSP_CTRL_RUNNING	BIT(15)
+
+#define ACC_MAX_CHANNEL		48
+#define ACC_DEFAULT_PERIOD	25 /* usecs */
+
+#define ACC_CHANNEL_INT_BASE		2
+
+#define ACC_LIST_ENTRY_TYPE		1
+#define ACC_LIST_ENTRY_WORDS		(1 << ACC_LIST_ENTRY_TYPE)
+#define ACC_LIST_ENTRY_QUEUE_IDX	0
+#define ACC_LIST_ENTRY_DESC_IDX	(ACC_LIST_ENTRY_WORDS - 1)
+
+#define ACC_CMD_DISABLE_CHANNEL	0x80
+#define ACC_CMD_ENABLE_CHANNEL	0x81
+#define ACC_CFG_MULTI_QUEUE		BIT(21)
+
+#define ACC_INTD_OFFSET_EOI		(0x0010)
+#define ACC_INTD_OFFSET_COUNT(ch)	(0x0300 + 4 * (ch))
+#define ACC_INTD_OFFSET_STATUS(ch)	(0x0200 + 4 * ((ch) / 32))
+
+#define RANGE_MAX_IRQS			64
+
+#define ACC_DESCS_MAX		SZ_1K
+#define ACC_DESCS_MASK		(ACC_DESCS_MAX - 1)
+#define DESC_SIZE_MASK		0xful
+#define DESC_PTR_MASK		(~DESC_SIZE_MASK)
+
+#define KNAV_NAME_SIZE			32
+
+enum knav_acc_result {
+	ACC_RET_IDLE,
+	ACC_RET_SUCCESS,
+	ACC_RET_INVALID_COMMAND,
+	ACC_RET_INVALID_CHANNEL,
+	ACC_RET_INACTIVE_CHANNEL,
+	ACC_RET_ACTIVE_CHANNEL,
+	ACC_RET_INVALID_QUEUE,
+	ACC_RET_INVALID_RET,
+};
+
+struct knav_reg_config {
+	u32		revision;
+	u32		__pad1;
+	u32		divert;
+	u32		link_ram_base0;
+	u32		link_ram_size0;
+	u32		link_ram_base1;
+	u32		__pad2[2];
+	u32		starvation[0];
+};
+
+struct knav_reg_region {
+	u32		base;
+	u32		start_index;
+	u32		size_count;
+	u32		__pad;
+};
+
+struct knav_reg_pdsp_regs {
+	u32		control;
+	u32		status;
+	u32		cycle_count;
+	u32		stall_count;
+};
+
+struct knav_reg_acc_command {
+	u32		command;
+	u32		queue_mask;
+	u32		list_phys;
+	u32		queue_num;
+	u32		timer_config;
+};
+
+struct knav_link_ram_block {
+	dma_addr_t	 phys;
+	void		*virt;
+	size_t		 size;
+};
+
+struct knav_acc_info {
+	u32			 pdsp_id;
+	u32			 start_channel;
+	u32			 list_entries;
+	u32			 pacing_mode;
+	u32			 timer_count;
+	int			 mem_size;
+	int			 list_size;
+	struct knav_pdsp_info	*pdsp;
+};
+
+struct knav_acc_channel {
+	u32			channel;
+	u32			list_index;
+	u32			open_mask;
+	u32			*list_cpu[2];
+	dma_addr_t		list_dma[2];
+	char			name[KNAV_NAME_SIZE];
+	atomic_t		retrigger_count;
+};
+
+struct knav_pdsp_info {
+	const char					*name;
+	struct knav_reg_pdsp_regs  __iomem		*regs;
+	union {
+		void __iomem				*command;
+		struct knav_reg_acc_command __iomem	*acc_command;
+		u32 __iomem				*qos_command;
+	};
+	void __iomem					*intd;
+	u32 __iomem					*iram;
+	const char					*firmware;
+	u32						id;
+	struct list_head				list;
+};
+
+struct knav_qmgr_info {
+	unsigned			start_queue;
+	unsigned			num_queues;
+	struct knav_reg_config __iomem	*reg_config;
+	struct knav_reg_region __iomem	*reg_region;
+	struct knav_reg_queue __iomem	*reg_push, *reg_pop, *reg_peek;
+	void __iomem			*reg_status;
+	struct list_head		list;
+};
+
+#define KNAV_NUM_LINKRAM	2
+
+/**
+ * struct knav_queue_stats:	queue statistics
+ * pushes:			number of push operations
+ * pops:			number of pop operations
+ * push_errors:			number of push errors
+ * pop_errors:			number of pop errors
+ * notifies:			notifier counts
+ */
+struct knav_queue_stats {
+	atomic_t	 pushes;
+	atomic_t	 pops;
+	atomic_t	 push_errors;
+	atomic_t	 pop_errors;
+	atomic_t	 notifies;
+};
+
+/**
+ * struct knav_reg_queue:	queue registers
+ * @entry_count:		valid entries in the queue
+ * @byte_count:			total byte count in thhe queue
+ * @packet_size:		packet size for the queue
+ * @ptr_size_thresh:		packet pointer size threshold
+ */
+struct knav_reg_queue {
+	u32		entry_count;
+	u32		byte_count;
+	u32		packet_size;
+	u32		ptr_size_thresh;
+};
+
+/**
+ * struct knav_region:		qmss region info
+ * @dma_start, dma_end:		start and end dma address
+ * @virt_start, virt_end:	start and end virtual address
+ * @desc_size:			descriptor size
+ * @used_desc:			consumed descriptors
+ * @id:				region number
+ * @num_desc:			total descriptors
+ * @link_index:			index of the first descriptor
+ * @name:			region name
+ * @list:			instance in the device's region list
+ * @pools:			list of descriptor pools in the region
+ */
+struct knav_region {
+	dma_addr_t		dma_start, dma_end;
+	void			*virt_start, *virt_end;
+	unsigned		desc_size;
+	unsigned		used_desc;
+	unsigned		id;
+	unsigned		num_desc;
+	unsigned		link_index;
+	const char		*name;
+	struct list_head	list;
+	struct list_head	pools;
+};
+
+/**
+ * struct knav_pool:		qmss pools
+ * @dev:			device pointer
+ * @region:			qmss region info
+ * @queue:			queue registers
+ * @kdev:			qmss device pointer
+ * @region_offset:		offset from the base
+ * @num_desc:			total descriptors
+ * @desc_size:			descriptor size
+ * @region_id:			region number
+ * @name:			pool name
+ * @list:			list head
+ * @region_inst:		instance in the region's pool list
+ */
+struct knav_pool {
+	struct device			*dev;
+	struct knav_region		*region;
+	struct knav_queue		*queue;
+	struct knav_device		*kdev;
+	int				region_offset;
+	int				num_desc;
+	int				desc_size;
+	int				region_id;
+	const char			*name;
+	struct list_head		list;
+	struct list_head		region_inst;
+};
+
+/**
+ * struct knav_queue_inst:		qmss queue instace properties
+ * @descs:				descriptor pointer
+ * @desc_head, desc_tail, desc_count:	descriptor counters
+ * @acc:				accumulator channel pointer
+ * @kdev:				qmss device pointer
+ * @range:				range info
+ * @qmgr:				queue manager info
+ * @id:					queue instace id
+ * @irq_num:				irq line number
+ * @notify_needed:			notifier needed based on queue type
+ * @num_notifiers:			total notifiers
+ * @handles:				list head
+ * @name:				queue instance name
+ * @irq_name:				irq line name
+ */
+struct knav_queue_inst {
+	u32				*descs;
+	atomic_t			desc_head, desc_tail, desc_count;
+	struct knav_acc_channel	*acc;
+	struct knav_device		*kdev;
+	struct knav_range_info		*range;
+	struct knav_qmgr_info		*qmgr;
+	u32				id;
+	int				irq_num;
+	int				notify_needed;
+	atomic_t			num_notifiers;
+	struct list_head		handles;
+	const char			*name;
+	const char			*irq_name;
+};
+
+/**
+ * struct knav_queue:			qmss queue properties
+ * @reg_push, reg_pop, reg_peek:	push, pop queue registers
+ * @inst:				qmss queue instace properties
+ * @notifier_fn:			notifier function
+ * @notifier_fn_arg:			notifier function argument
+ * @notifier_enabled:			notier enabled for a give queue
+ * @rcu:				rcu head
+ * @flags:				queue flags
+ * @list:				list head
+ */
+struct knav_queue {
+	struct knav_reg_queue __iomem	*reg_push, *reg_pop, *reg_peek;
+	struct knav_queue_inst		*inst;
+	struct knav_queue_stats	stats;
+	knav_queue_notify_fn		notifier_fn;
+	void				*notifier_fn_arg;
+	atomic_t			notifier_enabled;
+	struct rcu_head			rcu;
+	unsigned			flags;
+	struct list_head		list;
+};
+
+struct knav_device {
+	struct device				*dev;
+	unsigned				base_id;
+	unsigned				num_queues;
+	unsigned				num_queues_in_use;
+	unsigned				inst_shift;
+	struct knav_link_ram_block		link_rams[KNAV_NUM_LINKRAM];
+	void					*instances;
+	struct list_head			regions;
+	struct list_head			queue_ranges;
+	struct list_head			pools;
+	struct list_head			pdsps;
+	struct list_head			qmgrs;
+};
+
+struct knav_range_ops {
+	int	(*init_range)(struct knav_range_info *range);
+	int	(*free_range)(struct knav_range_info *range);
+	int	(*init_queue)(struct knav_range_info *range,
+			      struct knav_queue_inst *inst);
+	int	(*open_queue)(struct knav_range_info *range,
+			      struct knav_queue_inst *inst, unsigned flags);
+	int	(*close_queue)(struct knav_range_info *range,
+			       struct knav_queue_inst *inst);
+	int	(*set_notify)(struct knav_range_info *range,
+			      struct knav_queue_inst *inst, bool enabled);
+};
+
+struct knav_irq_info {
+	int	irq;
+	u32	cpu_map;
+};
+
+struct knav_range_info {
+	const char			*name;
+	struct knav_device		*kdev;
+	unsigned			queue_base;
+	unsigned			num_queues;
+	void				*queue_base_inst;
+	unsigned			flags;
+	struct list_head		list;
+	struct knav_range_ops		*ops;
+	struct knav_acc_info		acc_info;
+	struct knav_acc_channel	*acc;
+	unsigned			num_irqs;
+	struct knav_irq_info		irqs[RANGE_MAX_IRQS];
+};
+
+#define RANGE_RESERVED		BIT(0)
+#define RANGE_HAS_IRQ		BIT(1)
+#define RANGE_HAS_ACCUMULATOR	BIT(2)
+#define RANGE_MULTI_QUEUE	BIT(3)
+
+#define for_each_region(kdev, region)				\
+	list_for_each_entry(region, &kdev->regions, list)
+
+#define first_region(kdev)					\
+	list_first_entry(&kdev->regions, \
+			struct knav_region, list)
+
+#define for_each_queue_range(kdev, range)			\
+	list_for_each_entry(range, &kdev->queue_ranges, list)
+
+#define first_queue_range(kdev)					\
+	list_first_entry(&kdev->queue_ranges, \
+			struct knav_range_info, list)
+
+#define for_each_pool(kdev, pool)				\
+	list_for_each_entry(pool, &kdev->pools, list)
+
+#define for_each_pdsp(kdev, pdsp)				\
+	list_for_each_entry(pdsp, &kdev->pdsps, list)
+
+#define for_each_qmgr(kdev, qmgr)				\
+	list_for_each_entry(qmgr, &kdev->qmgrs, list)
+
+static inline struct knav_pdsp_info *
+knav_find_pdsp(struct knav_device *kdev, unsigned pdsp_id)
+{
+	struct knav_pdsp_info *pdsp;
+
+	for_each_pdsp(kdev, pdsp)
+		if (pdsp_id == pdsp->id)
+			return pdsp;
+	return NULL;
+}
+
+extern int knav_init_acc_range(struct knav_device *kdev,
+					struct device_node *node,
+					struct knav_range_info *range);
+extern void knav_queue_notify(struct knav_queue_inst *inst);
+
+#endif /* __KNAV_QMSS_H__ */
diff --git a/drivers/soc/ti/knav_qmss_acc.c b/drivers/soc/ti/knav_qmss_acc.c
new file mode 100644
index 000000000000..6fbfde6e748f
--- /dev/null
+++ b/drivers/soc/ti/knav_qmss_acc.c
@@ -0,0 +1,591 @@
+/*
+ * Keystone accumulator queue manager
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Author:	Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/soc/ti/knav_qmss.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/firmware.h>
+
+#include "knav_qmss.h"
+
+#define knav_range_offset_to_inst(kdev, range, q)	\
+	(range->queue_base_inst + (q << kdev->inst_shift))
+
+static void __knav_acc_notify(struct knav_range_info *range,
+				struct knav_acc_channel *acc)
+{
+	struct knav_device *kdev = range->kdev;
+	struct knav_queue_inst *inst;
+	int range_base, queue;
+
+	range_base = kdev->base_id + range->queue_base;
+
+	if (range->flags & RANGE_MULTI_QUEUE) {
+		for (queue = 0; queue < range->num_queues; queue++) {
+			inst = knav_range_offset_to_inst(kdev, range,
+								queue);
+			if (inst->notify_needed) {
+				inst->notify_needed = 0;
+				dev_dbg(kdev->dev, "acc-irq: notifying %d\n",
+					range_base + queue);
+				knav_queue_notify(inst);
+			}
+		}
+	} else {
+		queue = acc->channel - range->acc_info.start_channel;
+		inst = knav_range_offset_to_inst(kdev, range, queue);
+		dev_dbg(kdev->dev, "acc-irq: notifying %d\n",
+			range_base + queue);
+		knav_queue_notify(inst);
+	}
+}
+
+static int knav_acc_set_notify(struct knav_range_info *range,
+				struct knav_queue_inst *kq,
+				bool enabled)
+{
+	struct knav_pdsp_info *pdsp = range->acc_info.pdsp;
+	struct knav_device *kdev = range->kdev;
+	u32 mask, offset;
+
+	/*
+	 * when enabling, we need to re-trigger an interrupt if we
+	 * have descriptors pending
+	 */
+	if (!enabled || atomic_read(&kq->desc_count) <= 0)
+		return 0;
+
+	kq->notify_needed = 1;
+	atomic_inc(&kq->acc->retrigger_count);
+	mask = BIT(kq->acc->channel % 32);
+	offset = ACC_INTD_OFFSET_STATUS(kq->acc->channel);
+	dev_dbg(kdev->dev, "setup-notify: re-triggering irq for %s\n",
+		kq->acc->name);
+	writel_relaxed(mask, pdsp->intd + offset);
+	return 0;
+}
+
+static irqreturn_t knav_acc_int_handler(int irq, void *_instdata)
+{
+	struct knav_acc_channel *acc;
+	struct knav_queue_inst *kq = NULL;
+	struct knav_range_info *range;
+	struct knav_pdsp_info *pdsp;
+	struct knav_acc_info *info;
+	struct knav_device *kdev;
+
+	u32 *list, *list_cpu, val, idx, notifies;
+	int range_base, channel, queue = 0;
+	dma_addr_t list_dma;
+
+	range = _instdata;
+	info  = &range->acc_info;
+	kdev  = range->kdev;
+	pdsp  = range->acc_info.pdsp;
+	acc   = range->acc;
+
+	range_base = kdev->base_id + range->queue_base;
+	if ((range->flags & RANGE_MULTI_QUEUE) == 0) {
+		for (queue = 0; queue < range->num_irqs; queue++)
+			if (range->irqs[queue].irq == irq)
+				break;
+		kq = knav_range_offset_to_inst(kdev, range, queue);
+		acc += queue;
+	}
+
+	channel = acc->channel;
+	list_dma = acc->list_dma[acc->list_index];
+	list_cpu = acc->list_cpu[acc->list_index];
+	dev_dbg(kdev->dev, "acc-irq: channel %d, list %d, virt %p, phys %x\n",
+		channel, acc->list_index, list_cpu, list_dma);
+	if (atomic_read(&acc->retrigger_count)) {
+		atomic_dec(&acc->retrigger_count);
+		__knav_acc_notify(range, acc);
+		writel_relaxed(1, pdsp->intd + ACC_INTD_OFFSET_COUNT(channel));
+		/* ack the interrupt */
+		writel_relaxed(ACC_CHANNEL_INT_BASE + channel,
+			       pdsp->intd + ACC_INTD_OFFSET_EOI);
+
+		return IRQ_HANDLED;
+	}
+
+	notifies = readl_relaxed(pdsp->intd + ACC_INTD_OFFSET_COUNT(channel));
+	WARN_ON(!notifies);
+	dma_sync_single_for_cpu(kdev->dev, list_dma, info->list_size,
+				DMA_FROM_DEVICE);
+
+	for (list = list_cpu; list < list_cpu + (info->list_size / sizeof(u32));
+	     list += ACC_LIST_ENTRY_WORDS) {
+		if (ACC_LIST_ENTRY_WORDS == 1) {
+			dev_dbg(kdev->dev,
+				"acc-irq: list %d, entry @%p, %08x\n",
+				acc->list_index, list, list[0]);
+		} else if (ACC_LIST_ENTRY_WORDS == 2) {
+			dev_dbg(kdev->dev,
+				"acc-irq: list %d, entry @%p, %08x %08x\n",
+				acc->list_index, list, list[0], list[1]);
+		} else if (ACC_LIST_ENTRY_WORDS == 4) {
+			dev_dbg(kdev->dev,
+				"acc-irq: list %d, entry @%p, %08x %08x %08x %08x\n",
+				acc->list_index, list, list[0], list[1],
+				list[2], list[3]);
+		}
+
+		val = list[ACC_LIST_ENTRY_DESC_IDX];
+		if (!val)
+			break;
+
+		if (range->flags & RANGE_MULTI_QUEUE) {
+			queue = list[ACC_LIST_ENTRY_QUEUE_IDX] >> 16;
+			if (queue < range_base ||
+			    queue >= range_base + range->num_queues) {
+				dev_err(kdev->dev,
+					"bad queue %d, expecting %d-%d\n",
+					queue, range_base,
+					range_base + range->num_queues);
+				break;
+			}
+			queue -= range_base;
+			kq = knav_range_offset_to_inst(kdev, range,
+								queue);
+		}
+
+		if (atomic_inc_return(&kq->desc_count) >= ACC_DESCS_MAX) {
+			atomic_dec(&kq->desc_count);
+			dev_err(kdev->dev,
+				"acc-irq: queue %d full, entry dropped\n",
+				queue + range_base);
+			continue;
+		}
+
+		idx = atomic_inc_return(&kq->desc_tail) & ACC_DESCS_MASK;
+		kq->descs[idx] = val;
+		kq->notify_needed = 1;
+		dev_dbg(kdev->dev, "acc-irq: enqueue %08x at %d, queue %d\n",
+			val, idx, queue + range_base);
+	}
+
+	__knav_acc_notify(range, acc);
+	memset(list_cpu, 0, info->list_size);
+	dma_sync_single_for_device(kdev->dev, list_dma, info->list_size,
+				   DMA_TO_DEVICE);
+
+	/* flip to the other list */
+	acc->list_index ^= 1;
+
+	/* reset the interrupt counter */
+	writel_relaxed(1, pdsp->intd + ACC_INTD_OFFSET_COUNT(channel));
+
+	/* ack the interrupt */
+	writel_relaxed(ACC_CHANNEL_INT_BASE + channel,
+		       pdsp->intd + ACC_INTD_OFFSET_EOI);
+
+	return IRQ_HANDLED;
+}
+
+int knav_range_setup_acc_irq(struct knav_range_info *range,
+				int queue, bool enabled)
+{
+	struct knav_device *kdev = range->kdev;
+	struct knav_acc_channel *acc;
+	unsigned long cpu_map;
+	int ret = 0, irq;
+	u32 old, new;
+
+	if (range->flags & RANGE_MULTI_QUEUE) {
+		acc = range->acc;
+		irq = range->irqs[0].irq;
+		cpu_map = range->irqs[0].cpu_map;
+	} else {
+		acc = range->acc + queue;
+		irq = range->irqs[queue].irq;
+		cpu_map = range->irqs[queue].cpu_map;
+	}
+
+	old = acc->open_mask;
+	if (enabled)
+		new = old | BIT(queue);
+	else
+		new = old & ~BIT(queue);
+	acc->open_mask = new;
+
+	dev_dbg(kdev->dev,
+		"setup-acc-irq: open mask old %08x, new %08x, channel %s\n",
+		old, new, acc->name);
+
+	if (likely(new == old))
+		return 0;
+
+	if (new && !old) {
+		dev_dbg(kdev->dev,
+			"setup-acc-irq: requesting %s for channel %s\n",
+			acc->name, acc->name);
+		ret = request_irq(irq, knav_acc_int_handler, 0, acc->name,
+				  range);
+		if (!ret && cpu_map) {
+			ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+			if (ret) {
+				dev_warn(range->kdev->dev,
+					 "Failed to set IRQ affinity\n");
+				return ret;
+			}
+		}
+	}
+
+	if (old && !new) {
+		dev_dbg(kdev->dev, "setup-acc-irq: freeing %s for channel %s\n",
+			acc->name, acc->name);
+		free_irq(irq, range);
+	}
+
+	return ret;
+}
+
+static const char *knav_acc_result_str(enum knav_acc_result result)
+{
+	static const char * const result_str[] = {
+		[ACC_RET_IDLE]			= "idle",
+		[ACC_RET_SUCCESS]		= "success",
+		[ACC_RET_INVALID_COMMAND]	= "invalid command",
+		[ACC_RET_INVALID_CHANNEL]	= "invalid channel",
+		[ACC_RET_INACTIVE_CHANNEL]	= "inactive channel",
+		[ACC_RET_ACTIVE_CHANNEL]	= "active channel",
+		[ACC_RET_INVALID_QUEUE]		= "invalid queue",
+		[ACC_RET_INVALID_RET]		= "invalid return code",
+	};
+
+	if (result >= ARRAY_SIZE(result_str))
+		return result_str[ACC_RET_INVALID_RET];
+	else
+		return result_str[result];
+}
+
+static enum knav_acc_result
+knav_acc_write(struct knav_device *kdev, struct knav_pdsp_info *pdsp,
+		struct knav_reg_acc_command *cmd)
+{
+	u32 result;
+
+	dev_dbg(kdev->dev, "acc command %08x %08x %08x %08x %08x\n",
+		cmd->command, cmd->queue_mask, cmd->list_phys,
+		cmd->queue_num, cmd->timer_config);
+
+	writel_relaxed(cmd->timer_config, &pdsp->acc_command->timer_config);
+	writel_relaxed(cmd->queue_num, &pdsp->acc_command->queue_num);
+	writel_relaxed(cmd->list_phys, &pdsp->acc_command->list_phys);
+	writel_relaxed(cmd->queue_mask, &pdsp->acc_command->queue_mask);
+	writel_relaxed(cmd->command, &pdsp->acc_command->command);
+
+	/* wait for the command to clear */
+	do {
+		result = readl_relaxed(&pdsp->acc_command->command);
+	} while ((result >> 8) & 0xff);
+
+	return (result >> 24) & 0xff;
+}
+
+static void knav_acc_setup_cmd(struct knav_device *kdev,
+				struct knav_range_info *range,
+				struct knav_reg_acc_command *cmd,
+				int queue)
+{
+	struct knav_acc_info *info = &range->acc_info;
+	struct knav_acc_channel *acc;
+	int queue_base;
+	u32 queue_mask;
+
+	if (range->flags & RANGE_MULTI_QUEUE) {
+		acc = range->acc;
+		queue_base = range->queue_base;
+		queue_mask = BIT(range->num_queues) - 1;
+	} else {
+		acc = range->acc + queue;
+		queue_base = range->queue_base + queue;
+		queue_mask = 0;
+	}
+
+	memset(cmd, 0, sizeof(*cmd));
+	cmd->command    = acc->channel;
+	cmd->queue_mask = queue_mask;
+	cmd->list_phys  = acc->list_dma[0];
+	cmd->queue_num  = info->list_entries << 16;
+	cmd->queue_num |= queue_base;
+
+	cmd->timer_config = ACC_LIST_ENTRY_TYPE << 18;
+	if (range->flags & RANGE_MULTI_QUEUE)
+		cmd->timer_config |= ACC_CFG_MULTI_QUEUE;
+	cmd->timer_config |= info->pacing_mode << 16;
+	cmd->timer_config |= info->timer_count;
+}
+
+static void knav_acc_stop(struct knav_device *kdev,
+				struct knav_range_info *range,
+				int queue)
+{
+	struct knav_reg_acc_command cmd;
+	struct knav_acc_channel *acc;
+	enum knav_acc_result result;
+
+	acc = range->acc + queue;
+
+	knav_acc_setup_cmd(kdev, range, &cmd, queue);
+	cmd.command |= ACC_CMD_DISABLE_CHANNEL << 8;
+	result = knav_acc_write(kdev, range->acc_info.pdsp, &cmd);
+
+	dev_dbg(kdev->dev, "stopped acc channel %s, result %s\n",
+		acc->name, knav_acc_result_str(result));
+}
+
+static enum knav_acc_result knav_acc_start(struct knav_device *kdev,
+						struct knav_range_info *range,
+						int queue)
+{
+	struct knav_reg_acc_command cmd;
+	struct knav_acc_channel *acc;
+	enum knav_acc_result result;
+
+	acc = range->acc + queue;
+
+	knav_acc_setup_cmd(kdev, range, &cmd, queue);
+	cmd.command |= ACC_CMD_ENABLE_CHANNEL << 8;
+	result = knav_acc_write(kdev, range->acc_info.pdsp, &cmd);
+
+	dev_dbg(kdev->dev, "started acc channel %s, result %s\n",
+		acc->name, knav_acc_result_str(result));
+
+	return result;
+}
+
+static int knav_acc_init_range(struct knav_range_info *range)
+{
+	struct knav_device *kdev = range->kdev;
+	struct knav_acc_channel *acc;
+	enum knav_acc_result result;
+	int queue;
+
+	for (queue = 0; queue < range->num_queues; queue++) {
+		acc = range->acc + queue;
+
+		knav_acc_stop(kdev, range, queue);
+		acc->list_index = 0;
+		result = knav_acc_start(kdev, range, queue);
+
+		if (result != ACC_RET_SUCCESS)
+			return -EIO;
+
+		if (range->flags & RANGE_MULTI_QUEUE)
+			return 0;
+	}
+	return 0;
+}
+
+static int knav_acc_init_queue(struct knav_range_info *range,
+				struct knav_queue_inst *kq)
+{
+	unsigned id = kq->id - range->queue_base;
+
+	kq->descs = devm_kzalloc(range->kdev->dev,
+				 ACC_DESCS_MAX * sizeof(u32), GFP_KERNEL);
+	if (!kq->descs)
+		return -ENOMEM;
+
+	kq->acc = range->acc;
+	if ((range->flags & RANGE_MULTI_QUEUE) == 0)
+		kq->acc += id;
+	return 0;
+}
+
+static int knav_acc_open_queue(struct knav_range_info *range,
+				struct knav_queue_inst *inst, unsigned flags)
+{
+	unsigned id = inst->id - range->queue_base;
+
+	return knav_range_setup_acc_irq(range, id, true);
+}
+
+static int knav_acc_close_queue(struct knav_range_info *range,
+					struct knav_queue_inst *inst)
+{
+	unsigned id = inst->id - range->queue_base;
+
+	return knav_range_setup_acc_irq(range, id, false);
+}
+
+static int knav_acc_free_range(struct knav_range_info *range)
+{
+	struct knav_device *kdev = range->kdev;
+	struct knav_acc_channel *acc;
+	struct knav_acc_info *info;
+	int channel, channels;
+
+	info = &range->acc_info;
+
+	if (range->flags & RANGE_MULTI_QUEUE)
+		channels = 1;
+	else
+		channels = range->num_queues;
+
+	for (channel = 0; channel < channels; channel++) {
+		acc = range->acc + channel;
+		if (!acc->list_cpu[0])
+			continue;
+		dma_unmap_single(kdev->dev, acc->list_dma[0],
+				 info->mem_size, DMA_BIDIRECTIONAL);
+		free_pages_exact(acc->list_cpu[0], info->mem_size);
+	}
+	devm_kfree(range->kdev->dev, range->acc);
+	return 0;
+}
+
+struct knav_range_ops knav_acc_range_ops = {
+	.set_notify	= knav_acc_set_notify,
+	.init_queue	= knav_acc_init_queue,
+	.open_queue	= knav_acc_open_queue,
+	.close_queue	= knav_acc_close_queue,
+	.init_range	= knav_acc_init_range,
+	.free_range	= knav_acc_free_range,
+};
+
+/**
+ * knav_init_acc_range: Initialise accumulator ranges
+ *
+ * @kdev:		qmss device
+ * @node:		device node
+ * @range:		qmms range information
+ *
+ * Return 0 on success or error
+ */
+int knav_init_acc_range(struct knav_device *kdev,
+				struct device_node *node,
+				struct knav_range_info *range)
+{
+	struct knav_acc_channel *acc;
+	struct knav_pdsp_info *pdsp;
+	struct knav_acc_info *info;
+	int ret, channel, channels;
+	int list_size, mem_size;
+	dma_addr_t list_dma;
+	void *list_mem;
+	u32 config[5];
+
+	range->flags |= RANGE_HAS_ACCUMULATOR;
+	info = &range->acc_info;
+
+	ret = of_property_read_u32_array(node, "accumulator", config, 5);
+	if (ret)
+		return ret;
+
+	info->pdsp_id		= config[0];
+	info->start_channel	= config[1];
+	info->list_entries	= config[2];
+	info->pacing_mode	= config[3];
+	info->timer_count	= config[4] / ACC_DEFAULT_PERIOD;
+
+	if (info->start_channel > ACC_MAX_CHANNEL) {
+		dev_err(kdev->dev, "channel %d invalid for range %s\n",
+			info->start_channel, range->name);
+		return -EINVAL;
+	}
+
+	if (info->pacing_mode > 3) {
+		dev_err(kdev->dev, "pacing mode %d invalid for range %s\n",
+			info->pacing_mode, range->name);
+		return -EINVAL;
+	}
+
+	pdsp = knav_find_pdsp(kdev, info->pdsp_id);
+	if (!pdsp) {
+		dev_err(kdev->dev, "pdsp id %d not found for range %s\n",
+			info->pdsp_id, range->name);
+		return -EINVAL;
+	}
+
+	info->pdsp = pdsp;
+	channels = range->num_queues;
+	if (of_get_property(node, "multi-queue", NULL)) {
+		range->flags |= RANGE_MULTI_QUEUE;
+		channels = 1;
+		if (range->queue_base & (32 - 1)) {
+			dev_err(kdev->dev,
+				"misaligned multi-queue accumulator range %s\n",
+				range->name);
+			return -EINVAL;
+		}
+		if (range->num_queues > 32) {
+			dev_err(kdev->dev,
+				"too many queues in accumulator range %s\n",
+				range->name);
+			return -EINVAL;
+		}
+	}
+
+	/* figure out list size */
+	list_size  = info->list_entries;
+	list_size *= ACC_LIST_ENTRY_WORDS * sizeof(u32);
+	info->list_size = list_size;
+	mem_size   = PAGE_ALIGN(list_size * 2);
+	info->mem_size  = mem_size;
+	range->acc = devm_kzalloc(kdev->dev, channels * sizeof(*range->acc),
+				  GFP_KERNEL);
+	if (!range->acc)
+		return -ENOMEM;
+
+	for (channel = 0; channel < channels; channel++) {
+		acc = range->acc + channel;
+		acc->channel = info->start_channel + channel;
+
+		/* allocate memory for the two lists */
+		list_mem = alloc_pages_exact(mem_size, GFP_KERNEL | GFP_DMA);
+		if (!list_mem)
+			return -ENOMEM;
+
+		list_dma = dma_map_single(kdev->dev, list_mem, mem_size,
+					  DMA_BIDIRECTIONAL);
+		if (dma_mapping_error(kdev->dev, list_dma)) {
+			free_pages_exact(list_mem, mem_size);
+			return -ENOMEM;
+		}
+
+		memset(list_mem, 0, mem_size);
+		dma_sync_single_for_device(kdev->dev, list_dma, mem_size,
+					   DMA_TO_DEVICE);
+		scnprintf(acc->name, sizeof(acc->name), "hwqueue-acc-%d",
+			  acc->channel);
+		acc->list_cpu[0] = list_mem;
+		acc->list_cpu[1] = list_mem + list_size;
+		acc->list_dma[0] = list_dma;
+		acc->list_dma[1] = list_dma + list_size;
+		dev_dbg(kdev->dev, "%s: channel %d, phys %08x, virt %8p\n",
+			acc->name, acc->channel, list_dma, list_mem);
+	}
+
+	range->ops = &knav_acc_range_ops;
+	return 0;
+}
+EXPORT_SYMBOL_GPL(knav_init_acc_range);
diff --git a/drivers/soc/ti/knav_qmss_queue.c b/drivers/soc/ti/knav_qmss_queue.c
new file mode 100644
index 000000000000..0a2c8634c48b
--- /dev/null
+++ b/drivers/soc/ti/knav_qmss_queue.c
@@ -0,0 +1,1816 @@
+/*
+ * Keystone Queue Manager subsystem driver
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Authors:	Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/clk.h>
+#include <linux/io.h>
+#include <linux/interrupt.h>
+#include <linux/bitops.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+#include <linux/platform_device.h>
+#include <linux/dma-mapping.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/of_device.h>
+#include <linux/of_address.h>
+#include <linux/pm_runtime.h>
+#include <linux/firmware.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+#include <linux/string.h>
+#include <linux/soc/ti/knav_qmss.h>
+
+#include "knav_qmss.h"
+
+static struct knav_device *kdev;
+static DEFINE_MUTEX(knav_dev_lock);
+
+/* Queue manager register indices in DTS */
+#define KNAV_QUEUE_PEEK_REG_INDEX	0
+#define KNAV_QUEUE_STATUS_REG_INDEX	1
+#define KNAV_QUEUE_CONFIG_REG_INDEX	2
+#define KNAV_QUEUE_REGION_REG_INDEX	3
+#define KNAV_QUEUE_PUSH_REG_INDEX	4
+#define KNAV_QUEUE_POP_REG_INDEX	5
+
+/* PDSP register indices in DTS */
+#define KNAV_QUEUE_PDSP_IRAM_REG_INDEX	0
+#define KNAV_QUEUE_PDSP_REGS_REG_INDEX	1
+#define KNAV_QUEUE_PDSP_INTD_REG_INDEX	2
+#define KNAV_QUEUE_PDSP_CMD_REG_INDEX	3
+
+#define knav_queue_idx_to_inst(kdev, idx)			\
+	(kdev->instances + (idx << kdev->inst_shift))
+
+#define for_each_handle_rcu(qh, inst)			\
+	list_for_each_entry_rcu(qh, &inst->handles, list)
+
+#define for_each_instance(idx, inst, kdev)		\
+	for (idx = 0, inst = kdev->instances;		\
+	     idx < (kdev)->num_queues_in_use;			\
+	     idx++, inst = knav_queue_idx_to_inst(kdev, idx))
+
+/**
+ * knav_queue_notify: qmss queue notfier call
+ *
+ * @inst:		qmss queue instance like accumulator
+ */
+void knav_queue_notify(struct knav_queue_inst *inst)
+{
+	struct knav_queue *qh;
+
+	if (!inst)
+		return;
+
+	rcu_read_lock();
+	for_each_handle_rcu(qh, inst) {
+		if (atomic_read(&qh->notifier_enabled) <= 0)
+			continue;
+		if (WARN_ON(!qh->notifier_fn))
+			continue;
+		atomic_inc(&qh->stats.notifies);
+		qh->notifier_fn(qh->notifier_fn_arg);
+	}
+	rcu_read_unlock();
+}
+EXPORT_SYMBOL_GPL(knav_queue_notify);
+
+static irqreturn_t knav_queue_int_handler(int irq, void *_instdata)
+{
+	struct knav_queue_inst *inst = _instdata;
+
+	knav_queue_notify(inst);
+	return IRQ_HANDLED;
+}
+
+static int knav_queue_setup_irq(struct knav_range_info *range,
+			  struct knav_queue_inst *inst)
+{
+	unsigned queue = inst->id - range->queue_base;
+	unsigned long cpu_map;
+	int ret = 0, irq;
+
+	if (range->flags & RANGE_HAS_IRQ) {
+		irq = range->irqs[queue].irq;
+		cpu_map = range->irqs[queue].cpu_map;
+		ret = request_irq(irq, knav_queue_int_handler, 0,
+					inst->irq_name, inst);
+		if (ret)
+			return ret;
+		disable_irq(irq);
+		if (cpu_map) {
+			ret = irq_set_affinity_hint(irq, to_cpumask(&cpu_map));
+			if (ret) {
+				dev_warn(range->kdev->dev,
+					 "Failed to set IRQ affinity\n");
+				return ret;
+			}
+		}
+	}
+	return ret;
+}
+
+static void knav_queue_free_irq(struct knav_queue_inst *inst)
+{
+	struct knav_range_info *range = inst->range;
+	unsigned queue = inst->id - inst->range->queue_base;
+	int irq;
+
+	if (range->flags & RANGE_HAS_IRQ) {
+		irq = range->irqs[queue].irq;
+		irq_set_affinity_hint(irq, NULL);
+		free_irq(irq, inst);
+	}
+}
+
+static inline bool knav_queue_is_busy(struct knav_queue_inst *inst)
+{
+	return !list_empty(&inst->handles);
+}
+
+static inline bool knav_queue_is_reserved(struct knav_queue_inst *inst)
+{
+	return inst->range->flags & RANGE_RESERVED;
+}
+
+static inline bool knav_queue_is_shared(struct knav_queue_inst *inst)
+{
+	struct knav_queue *tmp;
+
+	rcu_read_lock();
+	for_each_handle_rcu(tmp, inst) {
+		if (tmp->flags & KNAV_QUEUE_SHARED) {
+			rcu_read_unlock();
+			return true;
+		}
+	}
+	rcu_read_unlock();
+	return false;
+}
+
+static inline bool knav_queue_match_type(struct knav_queue_inst *inst,
+						unsigned type)
+{
+	if ((type == KNAV_QUEUE_QPEND) &&
+	    (inst->range->flags & RANGE_HAS_IRQ)) {
+		return true;
+	} else if ((type == KNAV_QUEUE_ACC) &&
+		(inst->range->flags & RANGE_HAS_ACCUMULATOR)) {
+		return true;
+	} else if ((type == KNAV_QUEUE_GP) &&
+		!(inst->range->flags &
+			(RANGE_HAS_ACCUMULATOR | RANGE_HAS_IRQ))) {
+		return true;
+	}
+	return false;
+}
+
+static inline struct knav_queue_inst *
+knav_queue_match_id_to_inst(struct knav_device *kdev, unsigned id)
+{
+	struct knav_queue_inst *inst;
+	int idx;
+
+	for_each_instance(idx, inst, kdev) {
+		if (inst->id == id)
+			return inst;
+	}
+	return NULL;
+}
+
+static inline struct knav_queue_inst *knav_queue_find_by_id(int id)
+{
+	if (kdev->base_id <= id &&
+	    kdev->base_id + kdev->num_queues > id) {
+		id -= kdev->base_id;
+		return knav_queue_match_id_to_inst(kdev, id);
+	}
+	return NULL;
+}
+
+static struct knav_queue *__knav_queue_open(struct knav_queue_inst *inst,
+				      const char *name, unsigned flags)
+{
+	struct knav_queue *qh;
+	unsigned id;
+	int ret = 0;
+
+	qh = devm_kzalloc(inst->kdev->dev, sizeof(*qh), GFP_KERNEL);
+	if (!qh)
+		return ERR_PTR(-ENOMEM);
+
+	qh->flags = flags;
+	qh->inst = inst;
+	id = inst->id - inst->qmgr->start_queue;
+	qh->reg_push = &inst->qmgr->reg_push[id];
+	qh->reg_pop = &inst->qmgr->reg_pop[id];
+	qh->reg_peek = &inst->qmgr->reg_peek[id];
+
+	/* first opener? */
+	if (!knav_queue_is_busy(inst)) {
+		struct knav_range_info *range = inst->range;
+
+		inst->name = kstrndup(name, KNAV_NAME_SIZE, GFP_KERNEL);
+		if (range->ops && range->ops->open_queue)
+			ret = range->ops->open_queue(range, inst, flags);
+
+		if (ret) {
+			devm_kfree(inst->kdev->dev, qh);
+			return ERR_PTR(ret);
+		}
+	}
+	list_add_tail_rcu(&qh->list, &inst->handles);
+	return qh;
+}
+
+static struct knav_queue *
+knav_queue_open_by_id(const char *name, unsigned id, unsigned flags)
+{
+	struct knav_queue_inst *inst;
+	struct knav_queue *qh;
+
+	mutex_lock(&knav_dev_lock);
+
+	qh = ERR_PTR(-ENODEV);
+	inst = knav_queue_find_by_id(id);
+	if (!inst)
+		goto unlock_ret;
+
+	qh = ERR_PTR(-EEXIST);
+	if (!(flags & KNAV_QUEUE_SHARED) && knav_queue_is_busy(inst))
+		goto unlock_ret;
+
+	qh = ERR_PTR(-EBUSY);
+	if ((flags & KNAV_QUEUE_SHARED) &&
+	    (knav_queue_is_busy(inst) && !knav_queue_is_shared(inst)))
+		goto unlock_ret;
+
+	qh = __knav_queue_open(inst, name, flags);
+
+unlock_ret:
+	mutex_unlock(&knav_dev_lock);
+
+	return qh;
+}
+
+static struct knav_queue *knav_queue_open_by_type(const char *name,
+						unsigned type, unsigned flags)
+{
+	struct knav_queue_inst *inst;
+	struct knav_queue *qh = ERR_PTR(-EINVAL);
+	int idx;
+
+	mutex_lock(&knav_dev_lock);
+
+	for_each_instance(idx, inst, kdev) {
+		if (knav_queue_is_reserved(inst))
+			continue;
+		if (!knav_queue_match_type(inst, type))
+			continue;
+		if (knav_queue_is_busy(inst))
+			continue;
+		qh = __knav_queue_open(inst, name, flags);
+		goto unlock_ret;
+	}
+
+unlock_ret:
+	mutex_unlock(&knav_dev_lock);
+	return qh;
+}
+
+static void knav_queue_set_notify(struct knav_queue_inst *inst, bool enabled)
+{
+	struct knav_range_info *range = inst->range;
+
+	if (range->ops && range->ops->set_notify)
+		range->ops->set_notify(range, inst, enabled);
+}
+
+static int knav_queue_enable_notifier(struct knav_queue *qh)
+{
+	struct knav_queue_inst *inst = qh->inst;
+	bool first;
+
+	if (WARN_ON(!qh->notifier_fn))
+		return -EINVAL;
+
+	/* Adjust the per handle notifier count */
+	first = (atomic_inc_return(&qh->notifier_enabled) == 1);
+	if (!first)
+		return 0; /* nothing to do */
+
+	/* Now adjust the per instance notifier count */
+	first = (atomic_inc_return(&inst->num_notifiers) == 1);
+	if (first)
+		knav_queue_set_notify(inst, true);
+
+	return 0;
+}
+
+static int knav_queue_disable_notifier(struct knav_queue *qh)
+{
+	struct knav_queue_inst *inst = qh->inst;
+	bool last;
+
+	last = (atomic_dec_return(&qh->notifier_enabled) == 0);
+	if (!last)
+		return 0; /* nothing to do */
+
+	last = (atomic_dec_return(&inst->num_notifiers) == 0);
+	if (last)
+		knav_queue_set_notify(inst, false);
+
+	return 0;
+}
+
+static int knav_queue_set_notifier(struct knav_queue *qh,
+				struct knav_queue_notify_config *cfg)
+{
+	knav_queue_notify_fn old_fn = qh->notifier_fn;
+
+	if (!cfg)
+		return -EINVAL;
+
+	if (!(qh->inst->range->flags & (RANGE_HAS_ACCUMULATOR | RANGE_HAS_IRQ)))
+		return -ENOTSUPP;
+
+	if (!cfg->fn && old_fn)
+		knav_queue_disable_notifier(qh);
+
+	qh->notifier_fn = cfg->fn;
+	qh->notifier_fn_arg = cfg->fn_arg;
+
+	if (cfg->fn && !old_fn)
+		knav_queue_enable_notifier(qh);
+
+	return 0;
+}
+
+static int knav_gp_set_notify(struct knav_range_info *range,
+			       struct knav_queue_inst *inst,
+			       bool enabled)
+{
+	unsigned queue;
+
+	if (range->flags & RANGE_HAS_IRQ) {
+		queue = inst->id - range->queue_base;
+		if (enabled)
+			enable_irq(range->irqs[queue].irq);
+		else
+			disable_irq_nosync(range->irqs[queue].irq);
+	}
+	return 0;
+}
+
+static int knav_gp_open_queue(struct knav_range_info *range,
+				struct knav_queue_inst *inst, unsigned flags)
+{
+	return knav_queue_setup_irq(range, inst);
+}
+
+static int knav_gp_close_queue(struct knav_range_info *range,
+				struct knav_queue_inst *inst)
+{
+	knav_queue_free_irq(inst);
+	return 0;
+}
+
+struct knav_range_ops knav_gp_range_ops = {
+	.set_notify	= knav_gp_set_notify,
+	.open_queue	= knav_gp_open_queue,
+	.close_queue	= knav_gp_close_queue,
+};
+
+
+static int knav_queue_get_count(void *qhandle)
+{
+	struct knav_queue *qh = qhandle;
+	struct knav_queue_inst *inst = qh->inst;
+
+	return readl_relaxed(&qh->reg_peek[0].entry_count) +
+		atomic_read(&inst->desc_count);
+}
+
+static void knav_queue_debug_show_instance(struct seq_file *s,
+					struct knav_queue_inst *inst)
+{
+	struct knav_device *kdev = inst->kdev;
+	struct knav_queue *qh;
+
+	if (!knav_queue_is_busy(inst))
+		return;
+
+	seq_printf(s, "\tqueue id %d (%s)\n",
+		   kdev->base_id + inst->id, inst->name);
+	for_each_handle_rcu(qh, inst) {
+		seq_printf(s, "\t\thandle %p: ", qh);
+		seq_printf(s, "pushes %8d, ",
+			   atomic_read(&qh->stats.pushes));
+		seq_printf(s, "pops %8d, ",
+			   atomic_read(&qh->stats.pops));
+		seq_printf(s, "count %8d, ",
+			   knav_queue_get_count(qh));
+		seq_printf(s, "notifies %8d, ",
+			   atomic_read(&qh->stats.notifies));
+		seq_printf(s, "push errors %8d, ",
+			   atomic_read(&qh->stats.push_errors));
+		seq_printf(s, "pop errors %8d\n",
+			   atomic_read(&qh->stats.pop_errors));
+	}
+}
+
+static int knav_queue_debug_show(struct seq_file *s, void *v)
+{
+	struct knav_queue_inst *inst;
+	int idx;
+
+	mutex_lock(&knav_dev_lock);
+	seq_printf(s, "%s: %u-%u\n",
+		   dev_name(kdev->dev), kdev->base_id,
+		   kdev->base_id + kdev->num_queues - 1);
+	for_each_instance(idx, inst, kdev)
+		knav_queue_debug_show_instance(s, inst);
+	mutex_unlock(&knav_dev_lock);
+
+	return 0;
+}
+
+static int knav_queue_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, knav_queue_debug_show, NULL);
+}
+
+static const struct file_operations knav_queue_debug_ops = {
+	.open		= knav_queue_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static inline int knav_queue_pdsp_wait(u32 * __iomem addr, unsigned timeout,
+					u32 flags)
+{
+	unsigned long end;
+	u32 val = 0;
+
+	end = jiffies + msecs_to_jiffies(timeout);
+	while (time_after(end, jiffies)) {
+		val = readl_relaxed(addr);
+		if (flags)
+			val &= flags;
+		if (!val)
+			break;
+		cpu_relax();
+	}
+	return val ? -ETIMEDOUT : 0;
+}
+
+
+static int knav_queue_flush(struct knav_queue *qh)
+{
+	struct knav_queue_inst *inst = qh->inst;
+	unsigned id = inst->id - inst->qmgr->start_queue;
+
+	atomic_set(&inst->desc_count, 0);
+	writel_relaxed(0, &inst->qmgr->reg_push[id].ptr_size_thresh);
+	return 0;
+}
+
+/**
+ * knav_queue_open()	- open a hardware queue
+ * @name		- name to give the queue handle
+ * @id			- desired queue number if any or specifes the type
+ *			  of queue
+ * @flags		- the following flags are applicable to queues:
+ *	KNAV_QUEUE_SHARED - allow the queue to be shared. Queues are
+ *			     exclusive by default.
+ *			     Subsequent attempts to open a shared queue should
+ *			     also have this flag.
+ *
+ * Returns a handle to the open hardware queue if successful. Use IS_ERR()
+ * to check the returned value for error codes.
+ */
+void *knav_queue_open(const char *name, unsigned id,
+					unsigned flags)
+{
+	struct knav_queue *qh = ERR_PTR(-EINVAL);
+
+	switch (id) {
+	case KNAV_QUEUE_QPEND:
+	case KNAV_QUEUE_ACC:
+	case KNAV_QUEUE_GP:
+		qh = knav_queue_open_by_type(name, id, flags);
+		break;
+
+	default:
+		qh = knav_queue_open_by_id(name, id, flags);
+		break;
+	}
+	return qh;
+}
+EXPORT_SYMBOL_GPL(knav_queue_open);
+
+/**
+ * knav_queue_close()	- close a hardware queue handle
+ * @qh			- handle to close
+ */
+void knav_queue_close(void *qhandle)
+{
+	struct knav_queue *qh = qhandle;
+	struct knav_queue_inst *inst = qh->inst;
+
+	while (atomic_read(&qh->notifier_enabled) > 0)
+		knav_queue_disable_notifier(qh);
+
+	mutex_lock(&knav_dev_lock);
+	list_del_rcu(&qh->list);
+	mutex_unlock(&knav_dev_lock);
+	synchronize_rcu();
+	if (!knav_queue_is_busy(inst)) {
+		struct knav_range_info *range = inst->range;
+
+		if (range->ops && range->ops->close_queue)
+			range->ops->close_queue(range, inst);
+	}
+	devm_kfree(inst->kdev->dev, qh);
+}
+EXPORT_SYMBOL_GPL(knav_queue_close);
+
+/**
+ * knav_queue_device_control()	- Perform control operations on a queue
+ * @qh				- queue handle
+ * @cmd				- control commands
+ * @arg				- command argument
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int knav_queue_device_control(void *qhandle, enum knav_queue_ctrl_cmd cmd,
+				unsigned long arg)
+{
+	struct knav_queue *qh = qhandle;
+	struct knav_queue_notify_config *cfg;
+	int ret;
+
+	switch ((int)cmd) {
+	case KNAV_QUEUE_GET_ID:
+		ret = qh->inst->kdev->base_id + qh->inst->id;
+		break;
+
+	case KNAV_QUEUE_FLUSH:
+		ret = knav_queue_flush(qh);
+		break;
+
+	case KNAV_QUEUE_SET_NOTIFIER:
+		cfg = (void *)arg;
+		ret = knav_queue_set_notifier(qh, cfg);
+		break;
+
+	case KNAV_QUEUE_ENABLE_NOTIFY:
+		ret = knav_queue_enable_notifier(qh);
+		break;
+
+	case KNAV_QUEUE_DISABLE_NOTIFY:
+		ret = knav_queue_disable_notifier(qh);
+		break;
+
+	case KNAV_QUEUE_GET_COUNT:
+		ret = knav_queue_get_count(qh);
+		break;
+
+	default:
+		ret = -ENOTSUPP;
+		break;
+	}
+	return ret;
+}
+EXPORT_SYMBOL_GPL(knav_queue_device_control);
+
+
+
+/**
+ * knav_queue_push()	- push data (or descriptor) to the tail of a queue
+ * @qh			- hardware queue handle
+ * @data		- data to push
+ * @size		- size of data to push
+ * @flags		- can be used to pass additional information
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int knav_queue_push(void *qhandle, dma_addr_t dma,
+					unsigned size, unsigned flags)
+{
+	struct knav_queue *qh = qhandle;
+	u32 val;
+
+	val = (u32)dma | ((size / 16) - 1);
+	writel_relaxed(val, &qh->reg_push[0].ptr_size_thresh);
+
+	atomic_inc(&qh->stats.pushes);
+	return 0;
+}
+
+/**
+ * knav_queue_pop()	- pop data (or descriptor) from the head of a queue
+ * @qh			- hardware queue handle
+ * @size		- (optional) size of the data pop'ed.
+ *
+ * Returns a DMA address on success, 0 on failure.
+ */
+dma_addr_t knav_queue_pop(void *qhandle, unsigned *size)
+{
+	struct knav_queue *qh = qhandle;
+	struct knav_queue_inst *inst = qh->inst;
+	dma_addr_t dma;
+	u32 val, idx;
+
+	/* are we accumulated? */
+	if (inst->descs) {
+		if (unlikely(atomic_dec_return(&inst->desc_count) < 0)) {
+			atomic_inc(&inst->desc_count);
+			return 0;
+		}
+		idx  = atomic_inc_return(&inst->desc_head);
+		idx &= ACC_DESCS_MASK;
+		val = inst->descs[idx];
+	} else {
+		val = readl_relaxed(&qh->reg_pop[0].ptr_size_thresh);
+		if (unlikely(!val))
+			return 0;
+	}
+
+	dma = val & DESC_PTR_MASK;
+	if (size)
+		*size = ((val & DESC_SIZE_MASK) + 1) * 16;
+
+	atomic_inc(&qh->stats.pops);
+	return dma;
+}
+
+/* carve out descriptors and push into queue */
+static void kdesc_fill_pool(struct knav_pool *pool)
+{
+	struct knav_region *region;
+	int i;
+
+	region = pool->region;
+	pool->desc_size = region->desc_size;
+	for (i = 0; i < pool->num_desc; i++) {
+		int index = pool->region_offset + i;
+		dma_addr_t dma_addr;
+		unsigned dma_size;
+		dma_addr = region->dma_start + (region->desc_size * index);
+		dma_size = ALIGN(pool->desc_size, SMP_CACHE_BYTES);
+		dma_sync_single_for_device(pool->dev, dma_addr, dma_size,
+					   DMA_TO_DEVICE);
+		knav_queue_push(pool->queue, dma_addr, dma_size, 0);
+	}
+}
+
+/* pop out descriptors and close the queue */
+static void kdesc_empty_pool(struct knav_pool *pool)
+{
+	dma_addr_t dma;
+	unsigned size;
+	void *desc;
+	int i;
+
+	if (!pool->queue)
+		return;
+
+	for (i = 0;; i++) {
+		dma = knav_queue_pop(pool->queue, &size);
+		if (!dma)
+			break;
+		desc = knav_pool_desc_dma_to_virt(pool, dma);
+		if (!desc) {
+			dev_dbg(pool->kdev->dev,
+				"couldn't unmap desc, continuing\n");
+			continue;
+		}
+	}
+	WARN_ON(i != pool->num_desc);
+	knav_queue_close(pool->queue);
+}
+
+
+/* Get the DMA address of a descriptor */
+dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt)
+{
+	struct knav_pool *pool = ph;
+	return pool->region->dma_start + (virt - pool->region->virt_start);
+}
+
+void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma)
+{
+	struct knav_pool *pool = ph;
+	return pool->region->virt_start + (dma - pool->region->dma_start);
+}
+
+/**
+ * knav_pool_create()	- Create a pool of descriptors
+ * @name		- name to give the pool handle
+ * @num_desc		- numbers of descriptors in the pool
+ * @region_id		- QMSS region id from which the descriptors are to be
+ *			  allocated.
+ *
+ * Returns a pool handle on success.
+ * Use IS_ERR_OR_NULL() to identify error values on return.
+ */
+void *knav_pool_create(const char *name,
+					int num_desc, int region_id)
+{
+	struct knav_region *reg_itr, *region = NULL;
+	struct knav_pool *pool, *pi;
+	struct list_head *node;
+	unsigned last_offset;
+	bool slot_found;
+	int ret;
+
+	if (!kdev->dev)
+		return ERR_PTR(-ENODEV);
+
+	pool = devm_kzalloc(kdev->dev, sizeof(*pool), GFP_KERNEL);
+	if (!pool) {
+		dev_err(kdev->dev, "out of memory allocating pool\n");
+		return ERR_PTR(-ENOMEM);
+	}
+
+	for_each_region(kdev, reg_itr) {
+		if (reg_itr->id != region_id)
+			continue;
+		region = reg_itr;
+		break;
+	}
+
+	if (!region) {
+		dev_err(kdev->dev, "region-id(%d) not found\n", region_id);
+		ret = -EINVAL;
+		goto err;
+	}
+
+	pool->queue = knav_queue_open(name, KNAV_QUEUE_GP, 0);
+	if (IS_ERR_OR_NULL(pool->queue)) {
+		dev_err(kdev->dev,
+			"failed to open queue for pool(%s), error %ld\n",
+			name, PTR_ERR(pool->queue));
+		ret = PTR_ERR(pool->queue);
+		goto err;
+	}
+
+	pool->name = kstrndup(name, KNAV_NAME_SIZE, GFP_KERNEL);
+	pool->kdev = kdev;
+	pool->dev = kdev->dev;
+
+	mutex_lock(&knav_dev_lock);
+
+	if (num_desc > (region->num_desc - region->used_desc)) {
+		dev_err(kdev->dev, "out of descs in region(%d) for pool(%s)\n",
+			region_id, name);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	/* Region maintains a sorted (by region offset) list of pools
+	 * use the first free slot which is large enough to accomodate
+	 * the request
+	 */
+	last_offset = 0;
+	slot_found = false;
+	node = &region->pools;
+	list_for_each_entry(pi, &region->pools, region_inst) {
+		if ((pi->region_offset - last_offset) >= num_desc) {
+			slot_found = true;
+			break;
+		}
+		last_offset = pi->region_offset + pi->num_desc;
+	}
+	node = &pi->region_inst;
+
+	if (slot_found) {
+		pool->region = region;
+		pool->num_desc = num_desc;
+		pool->region_offset = last_offset;
+		region->used_desc += num_desc;
+		list_add_tail(&pool->list, &kdev->pools);
+		list_add_tail(&pool->region_inst, node);
+	} else {
+		dev_err(kdev->dev, "pool(%s) create failed: fragmented desc pool in region(%d)\n",
+			name, region_id);
+		ret = -ENOMEM;
+		goto err;
+	}
+
+	mutex_unlock(&knav_dev_lock);
+	kdesc_fill_pool(pool);
+	return pool;
+
+err:
+	mutex_unlock(&knav_dev_lock);
+	kfree(pool->name);
+	devm_kfree(kdev->dev, pool);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(knav_pool_create);
+
+/**
+ * knav_pool_destroy()	- Free a pool of descriptors
+ * @pool		- pool handle
+ */
+void knav_pool_destroy(void *ph)
+{
+	struct knav_pool *pool = ph;
+
+	if (!pool)
+		return;
+
+	if (!pool->region)
+		return;
+
+	kdesc_empty_pool(pool);
+	mutex_lock(&knav_dev_lock);
+
+	pool->region->used_desc -= pool->num_desc;
+	list_del(&pool->region_inst);
+	list_del(&pool->list);
+
+	mutex_unlock(&knav_dev_lock);
+	kfree(pool->name);
+	devm_kfree(kdev->dev, pool);
+}
+EXPORT_SYMBOL_GPL(knav_pool_destroy);
+
+
+/**
+ * knav_pool_desc_get()	- Get a descriptor from the pool
+ * @pool			- pool handle
+ *
+ * Returns descriptor from the pool.
+ */
+void *knav_pool_desc_get(void *ph)
+{
+	struct knav_pool *pool = ph;
+	dma_addr_t dma;
+	unsigned size;
+	void *data;
+
+	dma = knav_queue_pop(pool->queue, &size);
+	if (unlikely(!dma))
+		return ERR_PTR(-ENOMEM);
+	data = knav_pool_desc_dma_to_virt(pool, dma);
+	return data;
+}
+
+/**
+ * knav_pool_desc_put()	- return a descriptor to the pool
+ * @pool			- pool handle
+ */
+void knav_pool_desc_put(void *ph, void *desc)
+{
+	struct knav_pool *pool = ph;
+	dma_addr_t dma;
+	dma = knav_pool_desc_virt_to_dma(pool, desc);
+	knav_queue_push(pool->queue, dma, pool->region->desc_size, 0);
+}
+
+/**
+ * knav_pool_desc_map()	- Map descriptor for DMA transfer
+ * @pool			- pool handle
+ * @desc			- address of descriptor to map
+ * @size			- size of descriptor to map
+ * @dma				- DMA address return pointer
+ * @dma_sz			- adjusted return pointer
+ *
+ * Returns 0 on success, errno otherwise.
+ */
+int knav_pool_desc_map(void *ph, void *desc, unsigned size,
+					dma_addr_t *dma, unsigned *dma_sz)
+{
+	struct knav_pool *pool = ph;
+	*dma = knav_pool_desc_virt_to_dma(pool, desc);
+	size = min(size, pool->region->desc_size);
+	size = ALIGN(size, SMP_CACHE_BYTES);
+	*dma_sz = size;
+	dma_sync_single_for_device(pool->dev, *dma, size, DMA_TO_DEVICE);
+
+	/* Ensure the descriptor reaches to the memory */
+	__iowmb();
+
+	return 0;
+}
+
+/**
+ * knav_pool_desc_unmap()	- Unmap descriptor after DMA transfer
+ * @pool			- pool handle
+ * @dma				- DMA address of descriptor to unmap
+ * @dma_sz			- size of descriptor to unmap
+ *
+ * Returns descriptor address on success, Use IS_ERR_OR_NULL() to identify
+ * error values on return.
+ */
+void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz)
+{
+	struct knav_pool *pool = ph;
+	unsigned desc_sz;
+	void *desc;
+
+	desc_sz = min(dma_sz, pool->region->desc_size);
+	desc = knav_pool_desc_dma_to_virt(pool, dma);
+	dma_sync_single_for_cpu(pool->dev, dma, desc_sz, DMA_FROM_DEVICE);
+	prefetch(desc);
+	return desc;
+}
+
+/**
+ * knav_pool_count()	- Get the number of descriptors in pool.
+ * @pool		- pool handle
+ * Returns number of elements in the pool.
+ */
+int knav_pool_count(void *ph)
+{
+	struct knav_pool *pool = ph;
+	return knav_queue_get_count(pool->queue);
+}
+
+static void knav_queue_setup_region(struct knav_device *kdev,
+					struct knav_region *region)
+{
+	unsigned hw_num_desc, hw_desc_size, size;
+	struct knav_reg_region __iomem  *regs;
+	struct knav_qmgr_info *qmgr;
+	struct knav_pool *pool;
+	int id = region->id;
+	struct page *page;
+
+	/* unused region? */
+	if (!region->num_desc) {
+		dev_warn(kdev->dev, "unused region %s\n", region->name);
+		return;
+	}
+
+	/* get hardware descriptor value */
+	hw_num_desc = ilog2(region->num_desc - 1) + 1;
+
+	/* did we force fit ourselves into nothingness? */
+	if (region->num_desc < 32) {
+		region->num_desc = 0;
+		dev_warn(kdev->dev, "too few descriptors in region %s\n",
+			 region->name);
+		return;
+	}
+
+	size = region->num_desc * region->desc_size;
+	region->virt_start = alloc_pages_exact(size, GFP_KERNEL | GFP_DMA |
+						GFP_DMA32);
+	if (!region->virt_start) {
+		region->num_desc = 0;
+		dev_err(kdev->dev, "memory alloc failed for region %s\n",
+			region->name);
+		return;
+	}
+	region->virt_end = region->virt_start + size;
+	page = virt_to_page(region->virt_start);
+
+	region->dma_start = dma_map_page(kdev->dev, page, 0, size,
+					 DMA_BIDIRECTIONAL);
+	if (dma_mapping_error(kdev->dev, region->dma_start)) {
+		dev_err(kdev->dev, "dma map failed for region %s\n",
+			region->name);
+		goto fail;
+	}
+	region->dma_end = region->dma_start + size;
+
+	pool = devm_kzalloc(kdev->dev, sizeof(*pool), GFP_KERNEL);
+	if (!pool) {
+		dev_err(kdev->dev, "out of memory allocating dummy pool\n");
+		goto fail;
+	}
+	pool->num_desc = 0;
+	pool->region_offset = region->num_desc;
+	list_add(&pool->region_inst, &region->pools);
+
+	dev_dbg(kdev->dev,
+		"region %s (%d): size:%d, link:%d@%d, phys:%08x-%08x, virt:%p-%p\n",
+		region->name, id, region->desc_size, region->num_desc,
+		region->link_index, region->dma_start, region->dma_end,
+		region->virt_start, region->virt_end);
+
+	hw_desc_size = (region->desc_size / 16) - 1;
+	hw_num_desc -= 5;
+
+	for_each_qmgr(kdev, qmgr) {
+		regs = qmgr->reg_region + id;
+		writel_relaxed(region->dma_start, &regs->base);
+		writel_relaxed(region->link_index, &regs->start_index);
+		writel_relaxed(hw_desc_size << 16 | hw_num_desc,
+			       &regs->size_count);
+	}
+	return;
+
+fail:
+	if (region->dma_start)
+		dma_unmap_page(kdev->dev, region->dma_start, size,
+				DMA_BIDIRECTIONAL);
+	if (region->virt_start)
+		free_pages_exact(region->virt_start, size);
+	region->num_desc = 0;
+	return;
+}
+
+static const char *knav_queue_find_name(struct device_node *node)
+{
+	const char *name;
+
+	if (of_property_read_string(node, "label", &name) < 0)
+		name = node->name;
+	if (!name)
+		name = "unknown";
+	return name;
+}
+
+static int knav_queue_setup_regions(struct knav_device *kdev,
+					struct device_node *regions)
+{
+	struct device *dev = kdev->dev;
+	struct knav_region *region;
+	struct device_node *child;
+	u32 temp[2];
+	int ret;
+
+	for_each_child_of_node(regions, child) {
+		region = devm_kzalloc(dev, sizeof(*region), GFP_KERNEL);
+		if (!region) {
+			dev_err(dev, "out of memory allocating region\n");
+			return -ENOMEM;
+		}
+
+		region->name = knav_queue_find_name(child);
+		of_property_read_u32(child, "id", &region->id);
+		ret = of_property_read_u32_array(child, "region-spec", temp, 2);
+		if (!ret) {
+			region->num_desc  = temp[0];
+			region->desc_size = temp[1];
+		} else {
+			dev_err(dev, "invalid region info %s\n", region->name);
+			devm_kfree(dev, region);
+			continue;
+		}
+
+		if (!of_get_property(child, "link-index", NULL)) {
+			dev_err(dev, "No link info for %s\n", region->name);
+			devm_kfree(dev, region);
+			continue;
+		}
+		ret = of_property_read_u32(child, "link-index",
+					   &region->link_index);
+		if (ret) {
+			dev_err(dev, "link index not found for %s\n",
+				region->name);
+			devm_kfree(dev, region);
+			continue;
+		}
+
+		INIT_LIST_HEAD(&region->pools);
+		list_add_tail(&region->list, &kdev->regions);
+	}
+	if (list_empty(&kdev->regions)) {
+		dev_err(dev, "no valid region information found\n");
+		return -ENODEV;
+	}
+
+	/* Next, we run through the regions and set things up */
+	for_each_region(kdev, region)
+		knav_queue_setup_region(kdev, region);
+
+	return 0;
+}
+
+static int knav_get_link_ram(struct knav_device *kdev,
+				       const char *name,
+				       struct knav_link_ram_block *block)
+{
+	struct platform_device *pdev = to_platform_device(kdev->dev);
+	struct device_node *node = pdev->dev.of_node;
+	u32 temp[2];
+
+	/*
+	 * Note: link ram resources are specified in "entry" sized units. In
+	 * reality, although entries are ~40bits in hardware, we treat them as
+	 * 64-bit entities here.
+	 *
+	 * For example, to specify the internal link ram for Keystone-I class
+	 * devices, we would set the linkram0 resource to 0x80000-0x83fff.
+	 *
+	 * This gets a bit weird when other link rams are used.  For example,
+	 * if the range specified is 0x0c000000-0x0c003fff (i.e., 16K entries
+	 * in MSMC SRAM), the actual memory used is 0x0c000000-0x0c020000,
+	 * which accounts for 64-bits per entry, for 16K entries.
+	 */
+	if (!of_property_read_u32_array(node, name , temp, 2)) {
+		if (temp[0]) {
+			/*
+			 * queue_base specified => using internal or onchip
+			 * link ram WARNING - we do not "reserve" this block
+			 */
+			block->phys = (dma_addr_t)temp[0];
+			block->virt = NULL;
+			block->size = temp[1];
+		} else {
+			block->size = temp[1];
+			/* queue_base not specific => allocate requested size */
+			block->virt = dmam_alloc_coherent(kdev->dev,
+						  8 * block->size, &block->phys,
+						  GFP_KERNEL);
+			if (!block->virt) {
+				dev_err(kdev->dev, "failed to alloc linkram\n");
+				return -ENOMEM;
+			}
+		}
+	} else {
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static int knav_queue_setup_link_ram(struct knav_device *kdev)
+{
+	struct knav_link_ram_block *block;
+	struct knav_qmgr_info *qmgr;
+
+	for_each_qmgr(kdev, qmgr) {
+		block = &kdev->link_rams[0];
+		dev_dbg(kdev->dev, "linkram0: phys:%x, virt:%p, size:%x\n",
+			block->phys, block->virt, block->size);
+		writel_relaxed(block->phys, &qmgr->reg_config->link_ram_base0);
+		writel_relaxed(block->size, &qmgr->reg_config->link_ram_size0);
+
+		block++;
+		if (!block->size)
+			return 0;
+
+		dev_dbg(kdev->dev, "linkram1: phys:%x, virt:%p, size:%x\n",
+			block->phys, block->virt, block->size);
+		writel_relaxed(block->phys, &qmgr->reg_config->link_ram_base1);
+	}
+
+	return 0;
+}
+
+static int knav_setup_queue_range(struct knav_device *kdev,
+					struct device_node *node)
+{
+	struct device *dev = kdev->dev;
+	struct knav_range_info *range;
+	struct knav_qmgr_info *qmgr;
+	u32 temp[2], start, end, id, index;
+	int ret, i;
+
+	range = devm_kzalloc(dev, sizeof(*range), GFP_KERNEL);
+	if (!range) {
+		dev_err(dev, "out of memory allocating range\n");
+		return -ENOMEM;
+	}
+
+	range->kdev = kdev;
+	range->name = knav_queue_find_name(node);
+	ret = of_property_read_u32_array(node, "qrange", temp, 2);
+	if (!ret) {
+		range->queue_base = temp[0] - kdev->base_id;
+		range->num_queues = temp[1];
+	} else {
+		dev_err(dev, "invalid queue range %s\n", range->name);
+		devm_kfree(dev, range);
+		return -EINVAL;
+	}
+
+	for (i = 0; i < RANGE_MAX_IRQS; i++) {
+		struct of_phandle_args oirq;
+
+		if (of_irq_parse_one(node, i, &oirq))
+			break;
+
+		range->irqs[i].irq = irq_create_of_mapping(&oirq);
+		if (range->irqs[i].irq == IRQ_NONE)
+			break;
+
+		range->num_irqs++;
+
+		if (oirq.args_count == 3)
+			range->irqs[i].cpu_map =
+				(oirq.args[2] & 0x0000ff00) >> 8;
+	}
+
+	range->num_irqs = min(range->num_irqs, range->num_queues);
+	if (range->num_irqs)
+		range->flags |= RANGE_HAS_IRQ;
+
+	if (of_get_property(node, "qalloc-by-id", NULL))
+		range->flags |= RANGE_RESERVED;
+
+	if (of_get_property(node, "accumulator", NULL)) {
+		ret = knav_init_acc_range(kdev, node, range);
+		if (ret < 0) {
+			devm_kfree(dev, range);
+			return ret;
+		}
+	} else {
+		range->ops = &knav_gp_range_ops;
+	}
+
+	/* set threshold to 1, and flush out the queues */
+	for_each_qmgr(kdev, qmgr) {
+		start = max(qmgr->start_queue, range->queue_base);
+		end   = min(qmgr->start_queue + qmgr->num_queues,
+			    range->queue_base + range->num_queues);
+		for (id = start; id < end; id++) {
+			index = id - qmgr->start_queue;
+			writel_relaxed(THRESH_GTE | 1,
+				       &qmgr->reg_peek[index].ptr_size_thresh);
+			writel_relaxed(0,
+				       &qmgr->reg_push[index].ptr_size_thresh);
+		}
+	}
+
+	list_add_tail(&range->list, &kdev->queue_ranges);
+	dev_dbg(dev, "added range %s: %d-%d, %d irqs%s%s%s\n",
+		range->name, range->queue_base,
+		range->queue_base + range->num_queues - 1,
+		range->num_irqs,
+		(range->flags & RANGE_HAS_IRQ) ? ", has irq" : "",
+		(range->flags & RANGE_RESERVED) ? ", reserved" : "",
+		(range->flags & RANGE_HAS_ACCUMULATOR) ? ", acc" : "");
+	kdev->num_queues_in_use += range->num_queues;
+	return 0;
+}
+
+static int knav_setup_queue_pools(struct knav_device *kdev,
+				   struct device_node *queue_pools)
+{
+	struct device_node *type, *range;
+	int ret;
+
+	for_each_child_of_node(queue_pools, type) {
+		for_each_child_of_node(type, range) {
+			ret = knav_setup_queue_range(kdev, range);
+			/* return value ignored, we init the rest... */
+		}
+	}
+
+	/* ... and barf if they all failed! */
+	if (list_empty(&kdev->queue_ranges)) {
+		dev_err(kdev->dev, "no valid queue range found\n");
+		return -ENODEV;
+	}
+	return 0;
+}
+
+static void knav_free_queue_range(struct knav_device *kdev,
+				  struct knav_range_info *range)
+{
+	if (range->ops && range->ops->free_range)
+		range->ops->free_range(range);
+	list_del(&range->list);
+	devm_kfree(kdev->dev, range);
+}
+
+static void knav_free_queue_ranges(struct knav_device *kdev)
+{
+	struct knav_range_info *range;
+
+	for (;;) {
+		range = first_queue_range(kdev);
+		if (!range)
+			break;
+		knav_free_queue_range(kdev, range);
+	}
+}
+
+static void knav_queue_free_regions(struct knav_device *kdev)
+{
+	struct knav_region *region;
+	struct knav_pool *pool;
+	unsigned size;
+
+	for (;;) {
+		region = first_region(kdev);
+		if (!region)
+			break;
+		list_for_each_entry(pool, &region->pools, region_inst)
+			knav_pool_destroy(pool);
+
+		size = region->virt_end - region->virt_start;
+		if (size)
+			free_pages_exact(region->virt_start, size);
+		list_del(&region->list);
+		devm_kfree(kdev->dev, region);
+	}
+}
+
+static void __iomem *knav_queue_map_reg(struct knav_device *kdev,
+					struct device_node *node, int index)
+{
+	struct resource res;
+	void __iomem *regs;
+	int ret;
+
+	ret = of_address_to_resource(node, index, &res);
+	if (ret) {
+		dev_err(kdev->dev, "Can't translate of node(%s) address for index(%d)\n",
+			node->name, index);
+		return ERR_PTR(ret);
+	}
+
+	regs = devm_ioremap_resource(kdev->dev, &res);
+	if (IS_ERR(regs))
+		dev_err(kdev->dev, "Failed to map register base for index(%d) node(%s)\n",
+			index, node->name);
+	return regs;
+}
+
+static int knav_queue_init_qmgrs(struct knav_device *kdev,
+					struct device_node *qmgrs)
+{
+	struct device *dev = kdev->dev;
+	struct knav_qmgr_info *qmgr;
+	struct device_node *child;
+	u32 temp[2];
+	int ret;
+
+	for_each_child_of_node(qmgrs, child) {
+		qmgr = devm_kzalloc(dev, sizeof(*qmgr), GFP_KERNEL);
+		if (!qmgr) {
+			dev_err(dev, "out of memory allocating qmgr\n");
+			return -ENOMEM;
+		}
+
+		ret = of_property_read_u32_array(child, "managed-queues",
+						 temp, 2);
+		if (!ret) {
+			qmgr->start_queue = temp[0];
+			qmgr->num_queues = temp[1];
+		} else {
+			dev_err(dev, "invalid qmgr queue range\n");
+			devm_kfree(dev, qmgr);
+			continue;
+		}
+
+		dev_info(dev, "qmgr start queue %d, number of queues %d\n",
+			 qmgr->start_queue, qmgr->num_queues);
+
+		qmgr->reg_peek =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PEEK_REG_INDEX);
+		qmgr->reg_status =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_STATUS_REG_INDEX);
+		qmgr->reg_config =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_CONFIG_REG_INDEX);
+		qmgr->reg_region =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_REGION_REG_INDEX);
+		qmgr->reg_push =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PUSH_REG_INDEX);
+		qmgr->reg_pop =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_POP_REG_INDEX);
+
+		if (IS_ERR(qmgr->reg_peek) || IS_ERR(qmgr->reg_status) ||
+		    IS_ERR(qmgr->reg_config) || IS_ERR(qmgr->reg_region) ||
+		    IS_ERR(qmgr->reg_push) || IS_ERR(qmgr->reg_pop)) {
+			dev_err(dev, "failed to map qmgr regs\n");
+			if (!IS_ERR(qmgr->reg_peek))
+				devm_iounmap(dev, qmgr->reg_peek);
+			if (!IS_ERR(qmgr->reg_status))
+				devm_iounmap(dev, qmgr->reg_status);
+			if (!IS_ERR(qmgr->reg_config))
+				devm_iounmap(dev, qmgr->reg_config);
+			if (!IS_ERR(qmgr->reg_region))
+				devm_iounmap(dev, qmgr->reg_region);
+			if (!IS_ERR(qmgr->reg_push))
+				devm_iounmap(dev, qmgr->reg_push);
+			if (!IS_ERR(qmgr->reg_pop))
+				devm_iounmap(dev, qmgr->reg_pop);
+			devm_kfree(dev, qmgr);
+			continue;
+		}
+
+		list_add_tail(&qmgr->list, &kdev->qmgrs);
+		dev_info(dev, "added qmgr start queue %d, num of queues %d, reg_peek %p, reg_status %p, reg_config %p, reg_region %p, reg_push %p, reg_pop %p\n",
+			 qmgr->start_queue, qmgr->num_queues,
+			 qmgr->reg_peek, qmgr->reg_status,
+			 qmgr->reg_config, qmgr->reg_region,
+			 qmgr->reg_push, qmgr->reg_pop);
+	}
+	return 0;
+}
+
+static int knav_queue_init_pdsps(struct knav_device *kdev,
+					struct device_node *pdsps)
+{
+	struct device *dev = kdev->dev;
+	struct knav_pdsp_info *pdsp;
+	struct device_node *child;
+	int ret;
+
+	for_each_child_of_node(pdsps, child) {
+		pdsp = devm_kzalloc(dev, sizeof(*pdsp), GFP_KERNEL);
+		if (!pdsp) {
+			dev_err(dev, "out of memory allocating pdsp\n");
+			return -ENOMEM;
+		}
+		pdsp->name = knav_queue_find_name(child);
+		ret = of_property_read_string(child, "firmware",
+					      &pdsp->firmware);
+		if (ret < 0 || !pdsp->firmware) {
+			dev_err(dev, "unknown firmware for pdsp %s\n",
+				pdsp->name);
+			devm_kfree(dev, pdsp);
+			continue;
+		}
+		dev_dbg(dev, "pdsp name %s fw name :%s\n", pdsp->name,
+			pdsp->firmware);
+
+		pdsp->iram =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PDSP_IRAM_REG_INDEX);
+		pdsp->regs =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PDSP_REGS_REG_INDEX);
+		pdsp->intd =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PDSP_INTD_REG_INDEX);
+		pdsp->command =
+			knav_queue_map_reg(kdev, child,
+					   KNAV_QUEUE_PDSP_CMD_REG_INDEX);
+
+		if (IS_ERR(pdsp->command) || IS_ERR(pdsp->iram) ||
+		    IS_ERR(pdsp->regs) || IS_ERR(pdsp->intd)) {
+			dev_err(dev, "failed to map pdsp %s regs\n",
+				pdsp->name);
+			if (!IS_ERR(pdsp->command))
+				devm_iounmap(dev, pdsp->command);
+			if (!IS_ERR(pdsp->iram))
+				devm_iounmap(dev, pdsp->iram);
+			if (!IS_ERR(pdsp->regs))
+				devm_iounmap(dev, pdsp->regs);
+			if (!IS_ERR(pdsp->intd))
+				devm_iounmap(dev, pdsp->intd);
+			devm_kfree(dev, pdsp);
+			continue;
+		}
+		of_property_read_u32(child, "id", &pdsp->id);
+		list_add_tail(&pdsp->list, &kdev->pdsps);
+		dev_dbg(dev, "added pdsp %s: command %p, iram %p, regs %p, intd %p, firmware %s\n",
+			pdsp->name, pdsp->command, pdsp->iram, pdsp->regs,
+			pdsp->intd, pdsp->firmware);
+	}
+	return 0;
+}
+
+static int knav_queue_stop_pdsp(struct knav_device *kdev,
+			  struct knav_pdsp_info *pdsp)
+{
+	u32 val, timeout = 1000;
+	int ret;
+
+	val = readl_relaxed(&pdsp->regs->control) & ~PDSP_CTRL_ENABLE;
+	writel_relaxed(val, &pdsp->regs->control);
+	ret = knav_queue_pdsp_wait(&pdsp->regs->control, timeout,
+					PDSP_CTRL_RUNNING);
+	if (ret < 0) {
+		dev_err(kdev->dev, "timed out on pdsp %s stop\n", pdsp->name);
+		return ret;
+	}
+	return 0;
+}
+
+static int knav_queue_load_pdsp(struct knav_device *kdev,
+			  struct knav_pdsp_info *pdsp)
+{
+	int i, ret, fwlen;
+	const struct firmware *fw;
+	u32 *fwdata;
+
+	ret = request_firmware(&fw, pdsp->firmware, kdev->dev);
+	if (ret) {
+		dev_err(kdev->dev, "failed to get firmware %s for pdsp %s\n",
+			pdsp->firmware, pdsp->name);
+		return ret;
+	}
+	writel_relaxed(pdsp->id + 1, pdsp->command + 0x18);
+	/* download the firmware */
+	fwdata = (u32 *)fw->data;
+	fwlen = (fw->size + sizeof(u32) - 1) / sizeof(u32);
+	for (i = 0; i < fwlen; i++)
+		writel_relaxed(be32_to_cpu(fwdata[i]), pdsp->iram + i);
+
+	release_firmware(fw);
+	return 0;
+}
+
+static int knav_queue_start_pdsp(struct knav_device *kdev,
+			   struct knav_pdsp_info *pdsp)
+{
+	u32 val, timeout = 1000;
+	int ret;
+
+	/* write a command for sync */
+	writel_relaxed(0xffffffff, pdsp->command);
+	while (readl_relaxed(pdsp->command) != 0xffffffff)
+		cpu_relax();
+
+	/* soft reset the PDSP */
+	val  = readl_relaxed(&pdsp->regs->control);
+	val &= ~(PDSP_CTRL_PC_MASK | PDSP_CTRL_SOFT_RESET);
+	writel_relaxed(val, &pdsp->regs->control);
+
+	/* enable pdsp */
+	val = readl_relaxed(&pdsp->regs->control) | PDSP_CTRL_ENABLE;
+	writel_relaxed(val, &pdsp->regs->control);
+
+	/* wait for command register to clear */
+	ret = knav_queue_pdsp_wait(pdsp->command, timeout, 0);
+	if (ret < 0) {
+		dev_err(kdev->dev,
+			"timed out on pdsp %s command register wait\n",
+			pdsp->name);
+		return ret;
+	}
+	return 0;
+}
+
+static void knav_queue_stop_pdsps(struct knav_device *kdev)
+{
+	struct knav_pdsp_info *pdsp;
+
+	/* disable all pdsps */
+	for_each_pdsp(kdev, pdsp)
+		knav_queue_stop_pdsp(kdev, pdsp);
+}
+
+static int knav_queue_start_pdsps(struct knav_device *kdev)
+{
+	struct knav_pdsp_info *pdsp;
+	int ret;
+
+	knav_queue_stop_pdsps(kdev);
+	/* now load them all */
+	for_each_pdsp(kdev, pdsp) {
+		ret = knav_queue_load_pdsp(kdev, pdsp);
+		if (ret < 0)
+			return ret;
+	}
+
+	for_each_pdsp(kdev, pdsp) {
+		ret = knav_queue_start_pdsp(kdev, pdsp);
+		WARN_ON(ret);
+	}
+	return 0;
+}
+
+static inline struct knav_qmgr_info *knav_find_qmgr(unsigned id)
+{
+	struct knav_qmgr_info *qmgr;
+
+	for_each_qmgr(kdev, qmgr) {
+		if ((id >= qmgr->start_queue) &&
+		    (id < qmgr->start_queue + qmgr->num_queues))
+			return qmgr;
+	}
+	return NULL;
+}
+
+static int knav_queue_init_queue(struct knav_device *kdev,
+					struct knav_range_info *range,
+					struct knav_queue_inst *inst,
+					unsigned id)
+{
+	char irq_name[KNAV_NAME_SIZE];
+	inst->qmgr = knav_find_qmgr(id);
+	if (!inst->qmgr)
+		return -1;
+
+	INIT_LIST_HEAD(&inst->handles);
+	inst->kdev = kdev;
+	inst->range = range;
+	inst->irq_num = -1;
+	inst->id = id;
+	scnprintf(irq_name, sizeof(irq_name), "hwqueue-%d", id);
+	inst->irq_name = kstrndup(irq_name, sizeof(irq_name), GFP_KERNEL);
+
+	if (range->ops && range->ops->init_queue)
+		return range->ops->init_queue(range, inst);
+	else
+		return 0;
+}
+
+static int knav_queue_init_queues(struct knav_device *kdev)
+{
+	struct knav_range_info *range;
+	int size, id, base_idx;
+	int idx = 0, ret = 0;
+
+	/* how much do we need for instance data? */
+	size = sizeof(struct knav_queue_inst);
+
+	/* round this up to a power of 2, keep the index to instance
+	 * arithmetic fast.
+	 * */
+	kdev->inst_shift = order_base_2(size);
+	size = (1 << kdev->inst_shift) * kdev->num_queues_in_use;
+	kdev->instances = devm_kzalloc(kdev->dev, size, GFP_KERNEL);
+	if (!kdev->instances)
+		return -1;
+
+	for_each_queue_range(kdev, range) {
+		if (range->ops && range->ops->init_range)
+			range->ops->init_range(range);
+		base_idx = idx;
+		for (id = range->queue_base;
+		     id < range->queue_base + range->num_queues; id++, idx++) {
+			ret = knav_queue_init_queue(kdev, range,
+					knav_queue_idx_to_inst(kdev, idx), id);
+			if (ret < 0)
+				return ret;
+		}
+		range->queue_base_inst =
+			knav_queue_idx_to_inst(kdev, base_idx);
+	}
+	return 0;
+}
+
+static int knav_queue_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *qmgrs, *queue_pools, *regions, *pdsps;
+	struct device *dev = &pdev->dev;
+	u32 temp[2];
+	int ret;
+
+	if (!node) {
+		dev_err(dev, "device tree info unavailable\n");
+		return -ENODEV;
+	}
+
+	kdev = devm_kzalloc(dev, sizeof(struct knav_device), GFP_KERNEL);
+	if (!kdev) {
+		dev_err(dev, "memory allocation failed\n");
+		return -ENOMEM;
+	}
+
+	platform_set_drvdata(pdev, kdev);
+	kdev->dev = dev;
+	INIT_LIST_HEAD(&kdev->queue_ranges);
+	INIT_LIST_HEAD(&kdev->qmgrs);
+	INIT_LIST_HEAD(&kdev->pools);
+	INIT_LIST_HEAD(&kdev->regions);
+	INIT_LIST_HEAD(&kdev->pdsps);
+
+	pm_runtime_enable(&pdev->dev);
+	ret = pm_runtime_get_sync(&pdev->dev);
+	if (ret < 0) {
+		dev_err(dev, "Failed to enable QMSS\n");
+		return ret;
+	}
+
+	if (of_property_read_u32_array(node, "queue-range", temp, 2)) {
+		dev_err(dev, "queue-range not specified\n");
+		ret = -ENODEV;
+		goto err;
+	}
+	kdev->base_id    = temp[0];
+	kdev->num_queues = temp[1];
+
+	/* Initialize queue managers using device tree configuration */
+	qmgrs =  of_get_child_by_name(node, "qmgrs");
+	if (!qmgrs) {
+		dev_err(dev, "queue manager info not specified\n");
+		ret = -ENODEV;
+		goto err;
+	}
+	ret = knav_queue_init_qmgrs(kdev, qmgrs);
+	of_node_put(qmgrs);
+	if (ret)
+		goto err;
+
+	/* get pdsp configuration values from device tree */
+	pdsps =  of_get_child_by_name(node, "pdsps");
+	if (pdsps) {
+		ret = knav_queue_init_pdsps(kdev, pdsps);
+		if (ret)
+			goto err;
+
+		ret = knav_queue_start_pdsps(kdev);
+		if (ret)
+			goto err;
+	}
+	of_node_put(pdsps);
+
+	/* get usable queue range values from device tree */
+	queue_pools = of_get_child_by_name(node, "queue-pools");
+	if (!queue_pools) {
+		dev_err(dev, "queue-pools not specified\n");
+		ret = -ENODEV;
+		goto err;
+	}
+	ret = knav_setup_queue_pools(kdev, queue_pools);
+	of_node_put(queue_pools);
+	if (ret)
+		goto err;
+
+	ret = knav_get_link_ram(kdev, "linkram0", &kdev->link_rams[0]);
+	if (ret) {
+		dev_err(kdev->dev, "could not setup linking ram\n");
+		goto err;
+	}
+
+	ret = knav_get_link_ram(kdev, "linkram1", &kdev->link_rams[1]);
+	if (ret) {
+		/*
+		 * nothing really, we have one linking ram already, so we just
+		 * live within our means
+		 */
+	}
+
+	ret = knav_queue_setup_link_ram(kdev);
+	if (ret)
+		goto err;
+
+	regions =  of_get_child_by_name(node, "descriptor-regions");
+	if (!regions) {
+		dev_err(dev, "descriptor-regions not specified\n");
+		goto err;
+	}
+	ret = knav_queue_setup_regions(kdev, regions);
+	of_node_put(regions);
+	if (ret)
+		goto err;
+
+	ret = knav_queue_init_queues(kdev);
+	if (ret < 0) {
+		dev_err(dev, "hwqueue initialization failed\n");
+		goto err;
+	}
+
+	debugfs_create_file("qmss", S_IFREG | S_IRUGO, NULL, NULL,
+			    &knav_queue_debug_ops);
+	return 0;
+
+err:
+	knav_queue_stop_pdsps(kdev);
+	knav_queue_free_regions(kdev);
+	knav_free_queue_ranges(kdev);
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return ret;
+}
+
+static int knav_queue_remove(struct platform_device *pdev)
+{
+	/* TODO: Free resources */
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+	return 0;
+}
+
+/* Match table for of_platform binding */
+static struct of_device_id keystone_qmss_of_match[] = {
+	{ .compatible = "ti,keystone-navigator-qmss", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, keystone_qmss_of_match);
+
+static struct platform_driver keystone_qmss_driver = {
+	.probe		= knav_queue_probe,
+	.remove		= knav_queue_remove,
+	.driver		= {
+		.name	= "keystone-navigator-qmss",
+		.owner	= THIS_MODULE,
+		.of_match_table = keystone_qmss_of_match,
+	},
+};
+module_platform_driver(keystone_qmss_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI QMSS driver for Keystone SOCs");
+MODULE_AUTHOR("Sandeep Nair <sandeep_n@ti.com>");
+MODULE_AUTHOR("Santosh Shilimkar <santosh.shilimkar@ti.com>");
diff --git a/include/linux/soc/ti/knav_qmss.h b/include/linux/soc/ti/knav_qmss.h
new file mode 100644
index 000000000000..9f0ebb3bad27
--- /dev/null
+++ b/include/linux/soc/ti/knav_qmss.h
@@ -0,0 +1,90 @@
+/*
+ * Keystone Navigator Queue Management Sub-System header
+ *
+ * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Author:	Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __SOC_TI_KNAV_QMSS_H__
+#define __SOC_TI_KNAV_QMSS_H__
+
+#include <linux/err.h>
+#include <linux/time.h>
+#include <linux/atomic.h>
+#include <linux/device.h>
+#include <linux/fcntl.h>
+#include <linux/dma-mapping.h>
+
+/* queue types */
+#define KNAV_QUEUE_QPEND	((unsigned)-2) /* interruptible qpend queue */
+#define KNAV_QUEUE_ACC		((unsigned)-3) /* Accumulated queue */
+#define KNAV_QUEUE_GP		((unsigned)-4) /* General purpose queue */
+
+/* queue flags */
+#define KNAV_QUEUE_SHARED	0x0001		/* Queue can be shared */
+
+/**
+ * enum knav_queue_ctrl_cmd -	queue operations.
+ * @KNAV_QUEUE_GET_ID:		Get the ID number for an open queue
+ * @KNAV_QUEUE_FLUSH:		forcibly empty a queue if possible
+ * @KNAV_QUEUE_SET_NOTIFIER:	Set a notifier callback to a queue handle.
+ * @KNAV_QUEUE_ENABLE_NOTIFY:	Enable notifier callback for a queue handle.
+ * @KNAV_QUEUE_DISABLE_NOTIFY:	Disable notifier callback for a queue handle.
+ * @KNAV_QUEUE_GET_COUNT:	Get number of queues.
+ */
+enum knav_queue_ctrl_cmd {
+	KNAV_QUEUE_GET_ID,
+	KNAV_QUEUE_FLUSH,
+	KNAV_QUEUE_SET_NOTIFIER,
+	KNAV_QUEUE_ENABLE_NOTIFY,
+	KNAV_QUEUE_DISABLE_NOTIFY,
+	KNAV_QUEUE_GET_COUNT
+};
+
+/* Queue notifier callback prototype */
+typedef void (*knav_queue_notify_fn)(void *arg);
+
+/**
+ * struct knav_queue_notify_config:	Notifier configuration
+ * @fn:					Notifier function
+ * @fn_arg:				Notifier function arguments
+ */
+struct knav_queue_notify_config {
+	knav_queue_notify_fn fn;
+	void *fn_arg;
+};
+
+void *knav_queue_open(const char *name, unsigned id,
+					unsigned flags);
+void knav_queue_close(void *qhandle);
+int knav_queue_device_control(void *qhandle,
+				enum knav_queue_ctrl_cmd cmd,
+				unsigned long arg);
+dma_addr_t knav_queue_pop(void *qhandle, unsigned *size);
+int knav_queue_push(void *qhandle, dma_addr_t dma,
+				unsigned size, unsigned flags);
+
+void *knav_pool_create(const char *name,
+				int num_desc, int region_id);
+void knav_pool_destroy(void *ph);
+int knav_pool_count(void *ph);
+void *knav_pool_desc_get(void *ph);
+void knav_pool_desc_put(void *ph, void *desc);
+int knav_pool_desc_map(void *ph, void *desc, unsigned size,
+					dma_addr_t *dma, unsigned *dma_sz);
+void *knav_pool_desc_unmap(void *ph, dma_addr_t dma, unsigned dma_sz);
+dma_addr_t knav_pool_desc_virt_to_dma(void *ph, void *virt);
+void *knav_pool_desc_dma_to_virt(void *ph, dma_addr_t dma);
+
+#endif /* __SOC_TI_KNAV_QMSS_H__ */
-- 
cgit v1.2.3


From 88139ed030583557751e279968e13e892ae10825 Mon Sep 17 00:00:00 2001
From: Santosh Shilimkar <santosh.shilimkar@ti.com>
Date: Sun, 30 Mar 2014 17:29:04 -0400
Subject: soc: ti: add Keystone Navigator DMA support

The Keystone Navigator DMA driver sets up the dma channels and flows for
the QMSS(Queue Manager SubSystem) who triggers the actual data movements
across clients using destination queues. Every client modules like
NETCP(Network Coprocessor), SRIO(Serial Rapid IO) and CRYPTO
Engines has its own instance of packet dma hardware. QMSS has also
an internal packet DMA module which is used as an infrastructure
DMA with zero copy.

Initially this driver was proposed as DMA engine driver but since the
hardware is not typical DMA engine and hence doesn't comply with typical
DMA engine driver needs, that approach was naked. Link to that
discussion -
	https://lkml.org/lkml/2014/3/18/340

As aligned, now we pair the Navigator DMA with its companion Navigator
QMSS subsystem driver.

Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Kumar Gala <galak@codeaurora.org>
Cc: Olof Johansson <olof@lixom.net>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Mark Rutland <mark.rutland@arm.com>
Signed-off-by: Sandeep Nair <sandeep_n@ti.com>
Signed-off-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
---
 drivers/soc/ti/Kconfig          |  10 +
 drivers/soc/ti/Makefile         |   1 +
 drivers/soc/ti/knav_dma.c       | 815 ++++++++++++++++++++++++++++++++++++++++
 include/linux/soc/ti/knav_dma.h | 175 +++++++++
 4 files changed, 1001 insertions(+)
 create mode 100644 drivers/soc/ti/knav_dma.c
 create mode 100644 include/linux/soc/ti/knav_dma.h

(limited to 'include/linux')

diff --git a/drivers/soc/ti/Kconfig b/drivers/soc/ti/Kconfig
index f73896f762e8..7266b2165183 100644
--- a/drivers/soc/ti/Kconfig
+++ b/drivers/soc/ti/Kconfig
@@ -18,4 +18,14 @@ config KEYSTONE_NAVIGATOR_QMSS
 
 	  If unsure, say N.
 
+config KEYSTONE_NAVIGATOR_DMA
+	tristate "TI Keystone Navigator Packet DMA support"
+	depends on ARCH_KEYSTONE
+	help
+	  Say y tp enable support for the Keystone Navigator Packet DMA on
+	  on Keystone family of devices. It sets up the dma channels for the
+	  Queue Manager Sub System.
+
+	  If unsure, say N.
+
 endif # SOC_TI
diff --git a/drivers/soc/ti/Makefile b/drivers/soc/ti/Makefile
index bf85cacd5b85..6bed611e1934 100644
--- a/drivers/soc/ti/Makefile
+++ b/drivers/soc/ti/Makefile
@@ -2,3 +2,4 @@
 # TI Keystone SOC drivers
 #
 obj-$(CONFIG_KEYSTONE_NAVIGATOR_QMSS)	+= knav_qmss_queue.o knav_qmss_acc.o
+obj-$(CONFIG_KEYSTONE_NAVIGATOR_DMA)	+= knav_dma.o
diff --git a/drivers/soc/ti/knav_dma.c b/drivers/soc/ti/knav_dma.c
new file mode 100644
index 000000000000..17264275f32b
--- /dev/null
+++ b/drivers/soc/ti/knav_dma.c
@@ -0,0 +1,815 @@
+/*
+ * Copyright (C) 2014 Texas Instruments Incorporated
+ * Authors:	Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *		Sandeep Nair <sandeep_n@ti.com>
+ *		Cyril Chemparathy <cyril@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/io.h>
+#include <linux/sched.h>
+#include <linux/module.h>
+#include <linux/dma-direction.h>
+#include <linux/interrupt.h>
+#include <linux/pm_runtime.h>
+#include <linux/of_dma.h>
+#include <linux/of_address.h>
+#include <linux/platform_device.h>
+#include <linux/soc/ti/knav_dma.h>
+#include <linux/debugfs.h>
+#include <linux/seq_file.h>
+
+#define REG_MASK		0xffffffff
+
+#define DMA_LOOPBACK		BIT(31)
+#define DMA_ENABLE		BIT(31)
+#define DMA_TEARDOWN		BIT(30)
+
+#define DMA_TX_FILT_PSWORDS	BIT(29)
+#define DMA_TX_FILT_EINFO	BIT(30)
+#define DMA_TX_PRIO_SHIFT	0
+#define DMA_RX_PRIO_SHIFT	16
+#define DMA_PRIO_MASK		GENMASK(3, 0)
+#define DMA_PRIO_DEFAULT	0
+#define DMA_RX_TIMEOUT_DEFAULT	17500 /* cycles */
+#define DMA_RX_TIMEOUT_MASK	GENMASK(16, 0)
+#define DMA_RX_TIMEOUT_SHIFT	0
+
+#define CHAN_HAS_EPIB		BIT(30)
+#define CHAN_HAS_PSINFO		BIT(29)
+#define CHAN_ERR_RETRY		BIT(28)
+#define CHAN_PSINFO_AT_SOP	BIT(25)
+#define CHAN_SOP_OFF_SHIFT	16
+#define CHAN_SOP_OFF_MASK	GENMASK(9, 0)
+#define DESC_TYPE_SHIFT		26
+#define DESC_TYPE_MASK		GENMASK(2, 0)
+
+/*
+ * QMGR & QNUM together make up 14 bits with QMGR as the 2 MSb's in the logical
+ * navigator cloud mapping scheme.
+ * using the 14bit physical queue numbers directly maps into this scheme.
+ */
+#define CHAN_QNUM_MASK		GENMASK(14, 0)
+#define DMA_MAX_QMS		4
+#define DMA_TIMEOUT		1	/* msecs */
+#define DMA_INVALID_ID		0xffff
+
+struct reg_global {
+	u32	revision;
+	u32	perf_control;
+	u32	emulation_control;
+	u32	priority_control;
+	u32	qm_base_address[DMA_MAX_QMS];
+};
+
+struct reg_chan {
+	u32	control;
+	u32	mode;
+	u32	__rsvd[6];
+};
+
+struct reg_tx_sched {
+	u32	prio;
+};
+
+struct reg_rx_flow {
+	u32	control;
+	u32	tags;
+	u32	tag_sel;
+	u32	fdq_sel[2];
+	u32	thresh[3];
+};
+
+struct knav_dma_pool_device {
+	struct device			*dev;
+	struct list_head		list;
+};
+
+struct knav_dma_device {
+	bool				loopback, enable_all;
+	unsigned			tx_priority, rx_priority, rx_timeout;
+	unsigned			logical_queue_managers;
+	unsigned			qm_base_address[DMA_MAX_QMS];
+	struct reg_global __iomem	*reg_global;
+	struct reg_chan __iomem		*reg_tx_chan;
+	struct reg_rx_flow __iomem	*reg_rx_flow;
+	struct reg_chan __iomem		*reg_rx_chan;
+	struct reg_tx_sched __iomem	*reg_tx_sched;
+	unsigned			max_rx_chan, max_tx_chan;
+	unsigned			max_rx_flow;
+	char				name[32];
+	atomic_t			ref_count;
+	struct list_head		list;
+	struct list_head		chan_list;
+	spinlock_t			lock;
+};
+
+struct knav_dma_chan {
+	enum dma_transfer_direction	direction;
+	struct knav_dma_device		*dma;
+	atomic_t			ref_count;
+
+	/* registers */
+	struct reg_chan __iomem		*reg_chan;
+	struct reg_tx_sched __iomem	*reg_tx_sched;
+	struct reg_rx_flow __iomem	*reg_rx_flow;
+
+	/* configuration stuff */
+	unsigned			channel, flow;
+	struct knav_dma_cfg		cfg;
+	struct list_head		list;
+	spinlock_t			lock;
+};
+
+#define chan_number(ch)	((ch->direction == DMA_MEM_TO_DEV) ? \
+			ch->channel : ch->flow)
+
+static struct knav_dma_pool_device *kdev;
+
+static bool check_config(struct knav_dma_chan *chan, struct knav_dma_cfg *cfg)
+{
+	if (!memcmp(&chan->cfg, cfg, sizeof(*cfg)))
+		return true;
+	else
+		return false;
+}
+
+static int chan_start(struct knav_dma_chan *chan,
+			struct knav_dma_cfg *cfg)
+{
+	u32 v = 0;
+
+	spin_lock(&chan->lock);
+	if ((chan->direction == DMA_MEM_TO_DEV) && chan->reg_chan) {
+		if (cfg->u.tx.filt_pswords)
+			v |= DMA_TX_FILT_PSWORDS;
+		if (cfg->u.tx.filt_einfo)
+			v |= DMA_TX_FILT_EINFO;
+		writel_relaxed(v, &chan->reg_chan->mode);
+		writel_relaxed(DMA_ENABLE, &chan->reg_chan->control);
+	}
+
+	if (chan->reg_tx_sched)
+		writel_relaxed(cfg->u.tx.priority, &chan->reg_tx_sched->prio);
+
+	if (chan->reg_rx_flow) {
+		v = 0;
+
+		if (cfg->u.rx.einfo_present)
+			v |= CHAN_HAS_EPIB;
+		if (cfg->u.rx.psinfo_present)
+			v |= CHAN_HAS_PSINFO;
+		if (cfg->u.rx.err_mode == DMA_RETRY)
+			v |= CHAN_ERR_RETRY;
+		v |= (cfg->u.rx.desc_type & DESC_TYPE_MASK) << DESC_TYPE_SHIFT;
+		if (cfg->u.rx.psinfo_at_sop)
+			v |= CHAN_PSINFO_AT_SOP;
+		v |= (cfg->u.rx.sop_offset & CHAN_SOP_OFF_MASK)
+			<< CHAN_SOP_OFF_SHIFT;
+		v |= cfg->u.rx.dst_q & CHAN_QNUM_MASK;
+
+		writel_relaxed(v, &chan->reg_rx_flow->control);
+		writel_relaxed(0, &chan->reg_rx_flow->tags);
+		writel_relaxed(0, &chan->reg_rx_flow->tag_sel);
+
+		v =  cfg->u.rx.fdq[0] << 16;
+		v |=  cfg->u.rx.fdq[1] & CHAN_QNUM_MASK;
+		writel_relaxed(v, &chan->reg_rx_flow->fdq_sel[0]);
+
+		v =  cfg->u.rx.fdq[2] << 16;
+		v |=  cfg->u.rx.fdq[3] & CHAN_QNUM_MASK;
+		writel_relaxed(v, &chan->reg_rx_flow->fdq_sel[1]);
+
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[0]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[1]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[2]);
+	}
+
+	/* Keep a copy of the cfg */
+	memcpy(&chan->cfg, cfg, sizeof(*cfg));
+	spin_unlock(&chan->lock);
+
+	return 0;
+}
+
+static int chan_teardown(struct knav_dma_chan *chan)
+{
+	unsigned long end, value;
+
+	if (!chan->reg_chan)
+		return 0;
+
+	/* indicate teardown */
+	writel_relaxed(DMA_TEARDOWN, &chan->reg_chan->control);
+
+	/* wait for the dma to shut itself down */
+	end = jiffies + msecs_to_jiffies(DMA_TIMEOUT);
+	do {
+		value = readl_relaxed(&chan->reg_chan->control);
+		if ((value & DMA_ENABLE) == 0)
+			break;
+	} while (time_after(end, jiffies));
+
+	if (readl_relaxed(&chan->reg_chan->control) & DMA_ENABLE) {
+		dev_err(kdev->dev, "timeout waiting for teardown\n");
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static void chan_stop(struct knav_dma_chan *chan)
+{
+	spin_lock(&chan->lock);
+	if (chan->reg_rx_flow) {
+		/* first detach fdqs, starve out the flow */
+		writel_relaxed(0, &chan->reg_rx_flow->fdq_sel[0]);
+		writel_relaxed(0, &chan->reg_rx_flow->fdq_sel[1]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[0]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[1]);
+		writel_relaxed(0, &chan->reg_rx_flow->thresh[2]);
+	}
+
+	/* teardown the dma channel */
+	chan_teardown(chan);
+
+	/* then disconnect the completion side */
+	if (chan->reg_rx_flow) {
+		writel_relaxed(0, &chan->reg_rx_flow->control);
+		writel_relaxed(0, &chan->reg_rx_flow->tags);
+		writel_relaxed(0, &chan->reg_rx_flow->tag_sel);
+	}
+
+	memset(&chan->cfg, 0, sizeof(struct knav_dma_cfg));
+	spin_unlock(&chan->lock);
+
+	dev_dbg(kdev->dev, "channel stopped\n");
+}
+
+static void dma_hw_enable_all(struct knav_dma_device *dma)
+{
+	int i;
+
+	for (i = 0; i < dma->max_tx_chan; i++) {
+		writel_relaxed(0, &dma->reg_tx_chan[i].mode);
+		writel_relaxed(DMA_ENABLE, &dma->reg_tx_chan[i].control);
+	}
+}
+
+
+static void knav_dma_hw_init(struct knav_dma_device *dma)
+{
+	unsigned v;
+	int i;
+
+	spin_lock(&dma->lock);
+	v  = dma->loopback ? DMA_LOOPBACK : 0;
+	writel_relaxed(v, &dma->reg_global->emulation_control);
+
+	v = readl_relaxed(&dma->reg_global->perf_control);
+	v |= ((dma->rx_timeout & DMA_RX_TIMEOUT_MASK) << DMA_RX_TIMEOUT_SHIFT);
+	writel_relaxed(v, &dma->reg_global->perf_control);
+
+	v = ((dma->tx_priority << DMA_TX_PRIO_SHIFT) |
+	     (dma->rx_priority << DMA_RX_PRIO_SHIFT));
+
+	writel_relaxed(v, &dma->reg_global->priority_control);
+
+	/* Always enable all Rx channels. Rx paths are managed using flows */
+	for (i = 0; i < dma->max_rx_chan; i++)
+		writel_relaxed(DMA_ENABLE, &dma->reg_rx_chan[i].control);
+
+	for (i = 0; i < dma->logical_queue_managers; i++)
+		writel_relaxed(dma->qm_base_address[i],
+			       &dma->reg_global->qm_base_address[i]);
+	spin_unlock(&dma->lock);
+}
+
+static void knav_dma_hw_destroy(struct knav_dma_device *dma)
+{
+	int i;
+	unsigned v;
+
+	spin_lock(&dma->lock);
+	v = ~DMA_ENABLE & REG_MASK;
+
+	for (i = 0; i < dma->max_rx_chan; i++)
+		writel_relaxed(v, &dma->reg_rx_chan[i].control);
+
+	for (i = 0; i < dma->max_tx_chan; i++)
+		writel_relaxed(v, &dma->reg_tx_chan[i].control);
+	spin_unlock(&dma->lock);
+}
+
+static void dma_debug_show_channels(struct seq_file *s,
+					struct knav_dma_chan *chan)
+{
+	int i;
+
+	seq_printf(s, "\t%s %d:\t",
+		((chan->direction == DMA_MEM_TO_DEV) ? "tx chan" : "rx flow"),
+		chan_number(chan));
+
+	if (chan->direction == DMA_MEM_TO_DEV) {
+		seq_printf(s, "einfo - %d, pswords - %d, priority - %d\n",
+			chan->cfg.u.tx.filt_einfo,
+			chan->cfg.u.tx.filt_pswords,
+			chan->cfg.u.tx.priority);
+	} else {
+		seq_printf(s, "einfo - %d, psinfo - %d, desc_type - %d\n",
+			chan->cfg.u.rx.einfo_present,
+			chan->cfg.u.rx.psinfo_present,
+			chan->cfg.u.rx.desc_type);
+		seq_printf(s, "\t\t\tdst_q: [%d], thresh: %d fdq: ",
+			chan->cfg.u.rx.dst_q,
+			chan->cfg.u.rx.thresh);
+		for (i = 0; i < KNAV_DMA_FDQ_PER_CHAN; i++)
+			seq_printf(s, "[%d]", chan->cfg.u.rx.fdq[i]);
+		seq_printf(s, "\n");
+	}
+}
+
+static void dma_debug_show_devices(struct seq_file *s,
+					struct knav_dma_device *dma)
+{
+	struct knav_dma_chan *chan;
+
+	list_for_each_entry(chan, &dma->chan_list, list) {
+		if (atomic_read(&chan->ref_count))
+			dma_debug_show_channels(s, chan);
+	}
+}
+
+static int dma_debug_show(struct seq_file *s, void *v)
+{
+	struct knav_dma_device *dma;
+
+	list_for_each_entry(dma, &kdev->list, list) {
+		if (atomic_read(&dma->ref_count)) {
+			seq_printf(s, "%s : max_tx_chan: (%d), max_rx_flows: (%d)\n",
+			dma->name, dma->max_tx_chan, dma->max_rx_flow);
+			dma_debug_show_devices(s, dma);
+		}
+	}
+
+	return 0;
+}
+
+static int knav_dma_debug_open(struct inode *inode, struct file *file)
+{
+	return single_open(file, dma_debug_show, NULL);
+}
+
+static const struct file_operations knav_dma_debug_ops = {
+	.open		= knav_dma_debug_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= single_release,
+};
+
+static int of_channel_match_helper(struct device_node *np, const char *name,
+					const char **dma_instance)
+{
+	struct of_phandle_args args;
+	struct device_node *dma_node;
+	int index;
+
+	dma_node = of_parse_phandle(np, "ti,navigator-dmas", 0);
+	if (!dma_node)
+		return -ENODEV;
+
+	*dma_instance = dma_node->name;
+	index = of_property_match_string(np, "ti,navigator-dma-names", name);
+	if (index < 0) {
+		dev_err(kdev->dev, "No 'ti,navigator-dma-names' propery\n");
+		return -ENODEV;
+	}
+
+	if (of_parse_phandle_with_fixed_args(np, "ti,navigator-dmas",
+					1, index, &args)) {
+		dev_err(kdev->dev, "Missing the pahndle args name %s\n", name);
+		return -ENODEV;
+	}
+
+	if (args.args[0] < 0) {
+		dev_err(kdev->dev, "Missing args for %s\n", name);
+		return -ENODEV;
+	}
+
+	return args.args[0];
+}
+
+/**
+ * knav_dma_open_channel() - try to setup an exclusive slave channel
+ * @dev:	pointer to client device structure
+ * @name:	slave channel name
+ * @config:	dma configuration parameters
+ *
+ * Returns pointer to appropriate DMA channel on success or NULL.
+ */
+void *knav_dma_open_channel(struct device *dev, const char *name,
+					struct knav_dma_cfg *config)
+{
+	struct knav_dma_chan *chan;
+	struct knav_dma_device *dma;
+	bool found = false;
+	int chan_num = -1;
+	const char *instance;
+
+	if (!kdev) {
+		pr_err("keystone-navigator-dma driver not registered\n");
+		return (void *)-EINVAL;
+	}
+
+	chan_num = of_channel_match_helper(dev->of_node, name, &instance);
+	if (chan_num < 0) {
+		dev_err(kdev->dev, "No DMA instace with name %s\n", name);
+		return (void *)-EINVAL;
+	}
+
+	dev_dbg(kdev->dev, "initializing %s channel %d from DMA %s\n",
+		  config->direction == DMA_MEM_TO_DEV   ? "transmit" :
+		  config->direction == DMA_DEV_TO_MEM ? "receive"  :
+		  "unknown", chan_num, instance);
+
+	if (config->direction != DMA_MEM_TO_DEV &&
+	    config->direction != DMA_DEV_TO_MEM) {
+		dev_err(kdev->dev, "bad direction\n");
+		return (void *)-EINVAL;
+	}
+
+	/* Look for correct dma instance */
+	list_for_each_entry(dma, &kdev->list, list) {
+		if (!strcmp(dma->name, instance)) {
+			found = true;
+			break;
+		}
+	}
+	if (!found) {
+		dev_err(kdev->dev, "No DMA instace with name %s\n", instance);
+		return (void *)-EINVAL;
+	}
+
+	/* Look for correct dma channel from dma instance */
+	found = false;
+	list_for_each_entry(chan, &dma->chan_list, list) {
+		if (config->direction == DMA_MEM_TO_DEV) {
+			if (chan->channel == chan_num) {
+				found = true;
+				break;
+			}
+		} else {
+			if (chan->flow == chan_num) {
+				found = true;
+				break;
+			}
+		}
+	}
+	if (!found) {
+		dev_err(kdev->dev, "channel %d is not in DMA %s\n",
+				chan_num, instance);
+		return (void *)-EINVAL;
+	}
+
+	if (atomic_read(&chan->ref_count) >= 1) {
+		if (!check_config(chan, config)) {
+			dev_err(kdev->dev, "channel %d config miss-match\n",
+				chan_num);
+			return (void *)-EINVAL;
+		}
+	}
+
+	if (atomic_inc_return(&chan->dma->ref_count) <= 1)
+		knav_dma_hw_init(chan->dma);
+
+	if (atomic_inc_return(&chan->ref_count) <= 1)
+		chan_start(chan, config);
+
+	dev_dbg(kdev->dev, "channel %d opened from DMA %s\n",
+				chan_num, instance);
+
+	return chan;
+}
+EXPORT_SYMBOL_GPL(knav_dma_open_channel);
+
+/**
+ * knav_dma_close_channel()	- Destroy a dma channel
+ *
+ * channel:	dma channel handle
+ *
+ */
+void knav_dma_close_channel(void *channel)
+{
+	struct knav_dma_chan *chan = channel;
+
+	if (!kdev) {
+		pr_err("keystone-navigator-dma driver not registered\n");
+		return;
+	}
+
+	if (atomic_dec_return(&chan->ref_count) <= 0)
+		chan_stop(chan);
+
+	if (atomic_dec_return(&chan->dma->ref_count) <= 0)
+		knav_dma_hw_destroy(chan->dma);
+
+	dev_dbg(kdev->dev, "channel %d or flow %d closed from DMA %s\n",
+			chan->channel, chan->flow, chan->dma->name);
+}
+EXPORT_SYMBOL_GPL(knav_dma_close_channel);
+
+static void __iomem *pktdma_get_regs(struct knav_dma_device *dma,
+				struct device_node *node,
+				unsigned index, resource_size_t *_size)
+{
+	struct device *dev = kdev->dev;
+	struct resource res;
+	void __iomem *regs;
+	int ret;
+
+	ret = of_address_to_resource(node, index, &res);
+	if (ret) {
+		dev_err(dev, "Can't translate of node(%s) address for index(%d)\n",
+			node->name, index);
+		return ERR_PTR(ret);
+	}
+
+	regs = devm_ioremap_resource(kdev->dev, &res);
+	if (IS_ERR(regs))
+		dev_err(dev, "Failed to map register base for index(%d) node(%s)\n",
+			index, node->name);
+	if (_size)
+		*_size = resource_size(&res);
+
+	return regs;
+}
+
+static int pktdma_init_rx_chan(struct knav_dma_chan *chan, u32 flow)
+{
+	struct knav_dma_device *dma = chan->dma;
+
+	chan->flow = flow;
+	chan->reg_rx_flow = dma->reg_rx_flow + flow;
+	chan->channel = DMA_INVALID_ID;
+	dev_dbg(kdev->dev, "rx flow(%d) (%p)\n", chan->flow, chan->reg_rx_flow);
+
+	return 0;
+}
+
+static int pktdma_init_tx_chan(struct knav_dma_chan *chan, u32 channel)
+{
+	struct knav_dma_device *dma = chan->dma;
+
+	chan->channel = channel;
+	chan->reg_chan = dma->reg_tx_chan + channel;
+	chan->reg_tx_sched = dma->reg_tx_sched + channel;
+	chan->flow = DMA_INVALID_ID;
+	dev_dbg(kdev->dev, "tx channel(%d) (%p)\n", chan->channel, chan->reg_chan);
+
+	return 0;
+}
+
+static int pktdma_init_chan(struct knav_dma_device *dma,
+				enum dma_transfer_direction dir,
+				unsigned chan_num)
+{
+	struct device *dev = kdev->dev;
+	struct knav_dma_chan *chan;
+	int ret = -EINVAL;
+
+	chan = devm_kzalloc(dev, sizeof(*chan), GFP_KERNEL);
+	if (!chan)
+		return -ENOMEM;
+
+	INIT_LIST_HEAD(&chan->list);
+	chan->dma	= dma;
+	chan->direction	= DMA_NONE;
+	atomic_set(&chan->ref_count, 0);
+	spin_lock_init(&chan->lock);
+
+	if (dir == DMA_MEM_TO_DEV) {
+		chan->direction = dir;
+		ret = pktdma_init_tx_chan(chan, chan_num);
+	} else if (dir == DMA_DEV_TO_MEM) {
+		chan->direction = dir;
+		ret = pktdma_init_rx_chan(chan, chan_num);
+	} else {
+		dev_err(dev, "channel(%d) direction unknown\n", chan_num);
+	}
+
+	list_add_tail(&chan->list, &dma->chan_list);
+
+	return ret;
+}
+
+static int dma_init(struct device_node *cloud, struct device_node *dma_node)
+{
+	unsigned max_tx_chan, max_rx_chan, max_rx_flow, max_tx_sched;
+	struct device_node *node = dma_node;
+	struct knav_dma_device *dma;
+	int ret, len, num_chan = 0;
+	resource_size_t size;
+	u32 timeout;
+	u32 i;
+
+	dma = devm_kzalloc(kdev->dev, sizeof(*dma), GFP_KERNEL);
+	if (!dma) {
+		dev_err(kdev->dev, "could not allocate driver mem\n");
+		return -ENOMEM;
+	}
+	INIT_LIST_HEAD(&dma->list);
+	INIT_LIST_HEAD(&dma->chan_list);
+
+	if (!of_find_property(cloud, "ti,navigator-cloud-address", &len)) {
+		dev_err(kdev->dev, "unspecified navigator cloud addresses\n");
+		return -ENODEV;
+	}
+
+	dma->logical_queue_managers = len / sizeof(u32);
+	if (dma->logical_queue_managers > DMA_MAX_QMS) {
+		dev_warn(kdev->dev, "too many queue mgrs(>%d) rest ignored\n",
+			 dma->logical_queue_managers);
+		dma->logical_queue_managers = DMA_MAX_QMS;
+	}
+
+	ret = of_property_read_u32_array(cloud, "ti,navigator-cloud-address",
+					dma->qm_base_address,
+					dma->logical_queue_managers);
+	if (ret) {
+		dev_err(kdev->dev, "invalid navigator cloud addresses\n");
+		return -ENODEV;
+	}
+
+	dma->reg_global	 = pktdma_get_regs(dma, node, 0, &size);
+	if (!dma->reg_global)
+		return -ENODEV;
+	if (size < sizeof(struct reg_global)) {
+		dev_err(kdev->dev, "bad size %pa for global regs\n", &size);
+		return -ENODEV;
+	}
+
+	dma->reg_tx_chan = pktdma_get_regs(dma, node, 1, &size);
+	if (!dma->reg_tx_chan)
+		return -ENODEV;
+
+	max_tx_chan = size / sizeof(struct reg_chan);
+	dma->reg_rx_chan = pktdma_get_regs(dma, node, 2, &size);
+	if (!dma->reg_rx_chan)
+		return -ENODEV;
+
+	max_rx_chan = size / sizeof(struct reg_chan);
+	dma->reg_tx_sched = pktdma_get_regs(dma, node, 3, &size);
+	if (!dma->reg_tx_sched)
+		return -ENODEV;
+
+	max_tx_sched = size / sizeof(struct reg_tx_sched);
+	dma->reg_rx_flow = pktdma_get_regs(dma, node, 4, &size);
+	if (!dma->reg_rx_flow)
+		return -ENODEV;
+
+	max_rx_flow = size / sizeof(struct reg_rx_flow);
+	dma->rx_priority = DMA_PRIO_DEFAULT;
+	dma->tx_priority = DMA_PRIO_DEFAULT;
+
+	dma->enable_all	= (of_get_property(node, "ti,enable-all", NULL) != NULL);
+	dma->loopback	= (of_get_property(node, "ti,loop-back",  NULL) != NULL);
+
+	ret = of_property_read_u32(node, "ti,rx-retry-timeout", &timeout);
+	if (ret < 0) {
+		dev_dbg(kdev->dev, "unspecified rx timeout using value %d\n",
+			DMA_RX_TIMEOUT_DEFAULT);
+		timeout = DMA_RX_TIMEOUT_DEFAULT;
+	}
+
+	dma->rx_timeout = timeout;
+	dma->max_rx_chan = max_rx_chan;
+	dma->max_rx_flow = max_rx_flow;
+	dma->max_tx_chan = min(max_tx_chan, max_tx_sched);
+	atomic_set(&dma->ref_count, 0);
+	strcpy(dma->name, node->name);
+	spin_lock_init(&dma->lock);
+
+	for (i = 0; i < dma->max_tx_chan; i++) {
+		if (pktdma_init_chan(dma, DMA_MEM_TO_DEV, i) >= 0)
+			num_chan++;
+	}
+
+	for (i = 0; i < dma->max_rx_flow; i++) {
+		if (pktdma_init_chan(dma, DMA_DEV_TO_MEM, i) >= 0)
+			num_chan++;
+	}
+
+	list_add_tail(&dma->list, &kdev->list);
+
+	/*
+	 * For DSP software usecases or userpace transport software, setup all
+	 * the DMA hardware resources.
+	 */
+	if (dma->enable_all) {
+		atomic_inc(&dma->ref_count);
+		knav_dma_hw_init(dma);
+		dma_hw_enable_all(dma);
+	}
+
+	dev_info(kdev->dev, "DMA %s registered %d logical channels, flows %d, tx chans: %d, rx chans: %d%s\n",
+		dma->name, num_chan, dma->max_rx_flow,
+		dma->max_tx_chan, dma->max_rx_chan,
+		dma->loopback ? ", loopback" : "");
+
+	return 0;
+}
+
+static int knav_dma_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *node = pdev->dev.of_node;
+	struct device_node *child;
+	int ret = 0;
+
+	if (!node) {
+		dev_err(&pdev->dev, "could not find device info\n");
+		return -EINVAL;
+	}
+
+	kdev = devm_kzalloc(dev,
+			sizeof(struct knav_dma_pool_device), GFP_KERNEL);
+	if (!kdev) {
+		dev_err(dev, "could not allocate driver mem\n");
+		return -ENOMEM;
+	}
+
+	kdev->dev = dev;
+	INIT_LIST_HEAD(&kdev->list);
+
+	pm_runtime_enable(kdev->dev);
+	ret = pm_runtime_get_sync(kdev->dev);
+	if (ret < 0) {
+		dev_err(kdev->dev, "unable to enable pktdma, err %d\n", ret);
+		return ret;
+	}
+
+	/* Initialise all packet dmas */
+	for_each_child_of_node(node, child) {
+		ret = dma_init(node, child);
+		if (ret) {
+			dev_err(&pdev->dev, "init failed with %d\n", ret);
+			break;
+		}
+	}
+
+	if (list_empty(&kdev->list)) {
+		dev_err(dev, "no valid dma instance\n");
+		return -ENODEV;
+	}
+
+	debugfs_create_file("knav_dma", S_IFREG | S_IRUGO, NULL, NULL,
+			    &knav_dma_debug_ops);
+
+	return ret;
+}
+
+static int knav_dma_remove(struct platform_device *pdev)
+{
+	struct knav_dma_device *dma;
+
+	list_for_each_entry(dma, &kdev->list, list) {
+		if (atomic_dec_return(&dma->ref_count) == 0)
+			knav_dma_hw_destroy(dma);
+	}
+
+	pm_runtime_put_sync(&pdev->dev);
+	pm_runtime_disable(&pdev->dev);
+
+	return 0;
+}
+
+static struct of_device_id of_match[] = {
+	{ .compatible = "ti,keystone-navigator-dma", },
+	{},
+};
+
+MODULE_DEVICE_TABLE(of, of_match);
+
+static struct platform_driver knav_dma_driver = {
+	.probe	= knav_dma_probe,
+	.remove	= knav_dma_remove,
+	.driver = {
+		.name		= "keystone-navigator-dma",
+		.owner		= THIS_MODULE,
+		.of_match_table	= of_match,
+	},
+};
+module_platform_driver(knav_dma_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_DESCRIPTION("TI Keystone Navigator Packet DMA driver");
+MODULE_AUTHOR("Sandeep Nair <sandeep_n@ti.com>");
+MODULE_AUTHOR("Santosh Shilimkar <santosh.shilimkar@ti.com>");
diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
new file mode 100644
index 000000000000..e864a3eb9ac4
--- /dev/null
+++ b/include/linux/soc/ti/knav_dma.h
@@ -0,0 +1,175 @@
+/*
+ * Copyright (C) 2014 Texas Instruments Incorporated
+ * Authors:	Sandeep Nair <sandeep_n@ti.com
+ *		Cyril Chemparathy <cyril@ti.com
+		Santosh Shilimkar <santosh.shilimkar@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation version 2.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__
+#define __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__
+
+/*
+ * PKTDMA descriptor manipulation macros for host packet descriptor
+ */
+#define MASK(x)					(BIT(x) - 1)
+#define KNAV_DMA_DESC_PKT_LEN_MASK		MASK(22)
+#define KNAV_DMA_DESC_PKT_LEN_SHIFT		0
+#define KNAV_DMA_DESC_PS_INFO_IN_SOP		BIT(22)
+#define KNAV_DMA_DESC_PS_INFO_IN_DESC		0
+#define KNAV_DMA_DESC_TAG_MASK			MASK(8)
+#define KNAV_DMA_DESC_SAG_HI_SHIFT		24
+#define KNAV_DMA_DESC_STAG_LO_SHIFT		16
+#define KNAV_DMA_DESC_DTAG_HI_SHIFT		8
+#define KNAV_DMA_DESC_DTAG_LO_SHIFT		0
+#define KNAV_DMA_DESC_HAS_EPIB			BIT(31)
+#define KNAV_DMA_DESC_NO_EPIB			0
+#define KNAV_DMA_DESC_PSLEN_SHIFT		24
+#define KNAV_DMA_DESC_PSLEN_MASK		MASK(6)
+#define KNAV_DMA_DESC_ERR_FLAG_SHIFT		20
+#define KNAV_DMA_DESC_ERR_FLAG_MASK		MASK(4)
+#define KNAV_DMA_DESC_PSFLAG_SHIFT		16
+#define KNAV_DMA_DESC_PSFLAG_MASK		MASK(4)
+#define KNAV_DMA_DESC_RETQ_SHIFT		0
+#define KNAV_DMA_DESC_RETQ_MASK			MASK(14)
+#define KNAV_DMA_DESC_BUF_LEN_MASK		MASK(22)
+
+#define KNAV_DMA_NUM_EPIB_WORDS			4
+#define KNAV_DMA_NUM_PS_WORDS			16
+#define KNAV_DMA_FDQ_PER_CHAN			4
+
+/* Tx channel scheduling priority */
+enum knav_dma_tx_priority {
+	DMA_PRIO_HIGH	= 0,
+	DMA_PRIO_MED_H,
+	DMA_PRIO_MED_L,
+	DMA_PRIO_LOW
+};
+
+/* Rx channel error handling mode during buffer starvation */
+enum knav_dma_rx_err_mode {
+	DMA_DROP = 0,
+	DMA_RETRY
+};
+
+/* Rx flow size threshold configuration */
+enum knav_dma_rx_thresholds {
+	DMA_THRESH_NONE		= 0,
+	DMA_THRESH_0		= 1,
+	DMA_THRESH_0_1		= 3,
+	DMA_THRESH_0_1_2	= 7
+};
+
+/* Descriptor type */
+enum knav_dma_desc_type {
+	DMA_DESC_HOST = 0,
+	DMA_DESC_MONOLITHIC = 2
+};
+
+/**
+ * struct knav_dma_tx_cfg:	Tx channel configuration
+ * @filt_einfo:			Filter extended packet info
+ * @filt_pswords:		Filter PS words present
+ * @knav_dma_tx_priority:	Tx channel scheduling priority
+ */
+struct knav_dma_tx_cfg {
+	bool				filt_einfo;
+	bool				filt_pswords;
+	enum knav_dma_tx_priority	priority;
+};
+
+/**
+ * struct knav_dma_rx_cfg:	Rx flow configuration
+ * @einfo_present:		Extended packet info present
+ * @psinfo_present:		PS words present
+ * @knav_dma_rx_err_mode:	Error during buffer starvation
+ * @knav_dma_desc_type:	Host or Monolithic desc
+ * @psinfo_at_sop:		PS word located at start of packet
+ * @sop_offset:			Start of packet offset
+ * @dst_q:			Destination queue for a given flow
+ * @thresh:			Rx flow size threshold
+ * @fdq[]:			Free desc Queue array
+ * @sz_thresh0:			RX packet size threshold 0
+ * @sz_thresh1:			RX packet size threshold 1
+ * @sz_thresh2:			RX packet size threshold 2
+ */
+struct knav_dma_rx_cfg {
+	bool				einfo_present;
+	bool				psinfo_present;
+	enum knav_dma_rx_err_mode	err_mode;
+	enum knav_dma_desc_type		desc_type;
+	bool				psinfo_at_sop;
+	unsigned int			sop_offset;
+	unsigned int			dst_q;
+	enum knav_dma_rx_thresholds	thresh;
+	unsigned int			fdq[KNAV_DMA_FDQ_PER_CHAN];
+	unsigned int			sz_thresh0;
+	unsigned int			sz_thresh1;
+	unsigned int			sz_thresh2;
+};
+
+/**
+ * struct knav_dma_cfg:	Pktdma channel configuration
+ * @sl_cfg:			Slave configuration
+ * @tx:				Tx channel configuration
+ * @rx:				Rx flow configuration
+ */
+struct knav_dma_cfg {
+	enum dma_transfer_direction direction;
+	union {
+		struct knav_dma_tx_cfg	tx;
+		struct knav_dma_rx_cfg	rx;
+	} u;
+};
+
+/**
+ * struct knav_dma_desc:	Host packet descriptor layout
+ * @desc_info:			Descriptor information like id, type, length
+ * @tag_info:			Flow tag info written in during RX
+ * @packet_info:		Queue Manager, policy, flags etc
+ * @buff_len:			Buffer length in bytes
+ * @buff:			Buffer pointer
+ * @next_desc:			For chaining the descriptors
+ * @orig_len:			length since 'buff_len' can be overwritten
+ * @orig_buff:			buff pointer since 'buff' can be overwritten
+ * @epib:			Extended packet info block
+ * @psdata:			Protocol specific
+ */
+struct knav_dma_desc {
+	u32	desc_info;
+	u32	tag_info;
+	u32	packet_info;
+	u32	buff_len;
+	u32	buff;
+	u32	next_desc;
+	u32	orig_len;
+	u32	orig_buff;
+	u32	epib[KNAV_DMA_NUM_EPIB_WORDS];
+	u32	psdata[KNAV_DMA_NUM_PS_WORDS];
+	u32	pad[4];
+} ____cacheline_aligned;
+
+#ifdef CONFIG_KEYSTONE_NAVIGATOR_DMA
+void *knav_dma_open_channel(struct device *dev, const char *name,
+				struct knav_dma_cfg *config);
+void knav_dma_close_channel(void *channel);
+#else
+static inline void *knav_dma_open_channel(struct device *dev, const char *name,
+				struct knav_dma_cfg *config)
+{
+	return (void *) NULL;
+}
+static inline void knav_dma_close_channel(void *channel)
+{}
+
+#endif
+
+#endif /* __SOC_TI_KEYSTONE_NAVIGATOR_DMA_H__ */
-- 
cgit v1.2.3


From b8f139f68f2099b7f8b4ef470a1e53210e3aa025 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 12 Sep 2014 08:53:56 +0200
Subject: regulator/mfd: max14577: Export symbols for calculating charger
 current

This patch prepares for changing the max14577 charger driver to allow
configuring battery-dependent settings from DTS.

The patch moves from regulator driver to MFD core driver and exports:
 - function for calculating register value for charger's current;
 - table of limits for chargers (MAX14577, MAX77836).

Previously they were used only by the max14577 regulator driver. In next
patch the charger driver will use them as well. Exporting them will
reduce unnecessary code duplication.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Mark Brown <broonie@linaro.org>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max14577.c               | 95 ++++++++++++++++++++++++++++++++++++
 drivers/regulator/max14577.c         | 80 ++----------------------------
 include/linux/mfd/max14577-private.h | 22 ++++-----
 include/linux/mfd/max14577.h         | 23 +++++++++
 4 files changed, 133 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max14577.c b/drivers/mfd/max14577.c
index 6599407b5624..b8af263be594 100644
--- a/drivers/mfd/max14577.c
+++ b/drivers/mfd/max14577.c
@@ -26,6 +26,87 @@
 #include <linux/mfd/max14577.h>
 #include <linux/mfd/max14577-private.h>
 
+/*
+ * Table of valid charger currents for different Maxim chipsets.
+ * It is placed here because it is used by both charger and regulator driver.
+ */
+const struct maxim_charger_current maxim_charger_currents[] = {
+	[MAXIM_DEVICE_TYPE_UNKNOWN] = { 0, 0, 0, 0 },
+	[MAXIM_DEVICE_TYPE_MAX14577] = {
+		.min		= MAX14577_CHARGER_CURRENT_LIMIT_MIN,
+		.high_start	= MAX14577_CHARGER_CURRENT_LIMIT_HIGH_START,
+		.high_step	= MAX14577_CHARGER_CURRENT_LIMIT_HIGH_STEP,
+		.max		= MAX14577_CHARGER_CURRENT_LIMIT_MAX,
+	},
+	[MAXIM_DEVICE_TYPE_MAX77836] = {
+		.min		= MAX77836_CHARGER_CURRENT_LIMIT_MIN,
+		.high_start	= MAX77836_CHARGER_CURRENT_LIMIT_HIGH_START,
+		.high_step	= MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP,
+		.max		= MAX77836_CHARGER_CURRENT_LIMIT_MAX,
+	},
+};
+EXPORT_SYMBOL_GPL(maxim_charger_currents);
+
+/*
+ * maxim_charger_calc_reg_current - Calculate register value for current
+ * @limits:	constraints for charger, matching the MBCICHWRC register
+ * @min_ua:	minimal requested current, micro Amps
+ * @max_ua:	maximum requested current, micro Amps
+ * @dst:	destination to store calculated register value
+ *
+ * Calculates the value of MBCICHWRC (Fast Battery Charge Current) register
+ * for given current and stores it under pointed 'dst'. The stored value
+ * combines low bit (MBCICHWRCL) and high bits (MBCICHWRCH). It is also
+ * properly shifted.
+ *
+ * The calculated register value matches the current which:
+ *  - is always between <limits.min, limits.max>;
+ *  - is always less or equal to max_ua;
+ *  - is the highest possible value;
+ *  - may be lower than min_ua.
+ *
+ * On success returns 0. On error returns -EINVAL (requested min/max current
+ * is outside of given charger limits) and 'dst' is not set.
+ */
+int maxim_charger_calc_reg_current(const struct maxim_charger_current *limits,
+		unsigned int min_ua, unsigned int max_ua, u8 *dst)
+{
+	unsigned int current_bits = 0xf;
+
+	if (min_ua > max_ua)
+		return -EINVAL;
+
+	if (min_ua > limits->max || max_ua < limits->min)
+		return -EINVAL;
+
+	if (max_ua < limits->high_start) {
+		/*
+		 * Less than high_start, so set the minimal current
+		 * (turn Low Bit off, 0 as high bits).
+		 */
+		*dst = 0x0;
+		return 0;
+	}
+
+	/* max_ua is in range: <high_start, infinite>, cut it to limits.max */
+	max_ua = min(limits->max, max_ua);
+	max_ua -= limits->high_start;
+	/*
+	 * There is no risk of overflow 'max_ua' here because:
+	 *  - max_ua >= limits.high_start
+	 *  - BUILD_BUG checks that 'limits' are: max >= high_start + high_step
+	 */
+	current_bits = max_ua / limits->high_step;
+
+	/* Turn Low Bit on (use range <limits.high_start, limits.max>) ... */
+	*dst = 0x1 << CHGCTRL4_MBCICHWRCL_SHIFT;
+	/* and set proper High Bits */
+	*dst |= current_bits << CHGCTRL4_MBCICHWRCH_SHIFT;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(maxim_charger_calc_reg_current);
+
 static const struct mfd_cell max14577_devs[] = {
 	{
 		.name = "max14577-muic",
@@ -466,6 +547,20 @@ static int __init max14577_i2c_init(void)
 	BUILD_BUG_ON(ARRAY_SIZE(max14577_i2c_id) != MAXIM_DEVICE_TYPE_NUM);
 	BUILD_BUG_ON(ARRAY_SIZE(max14577_dt_match) != MAXIM_DEVICE_TYPE_NUM);
 
+	/* Valid charger current values must be provided for each chipset */
+	BUILD_BUG_ON(ARRAY_SIZE(maxim_charger_currents) != MAXIM_DEVICE_TYPE_NUM);
+
+	/* Check for valid values for charger */
+	BUILD_BUG_ON(MAX14577_CHARGER_CURRENT_LIMIT_HIGH_START +
+			MAX14577_CHARGER_CURRENT_LIMIT_HIGH_STEP * 0xf !=
+			MAX14577_CHARGER_CURRENT_LIMIT_MAX);
+	BUILD_BUG_ON(MAX14577_CHARGER_CURRENT_LIMIT_HIGH_STEP == 0);
+
+	BUILD_BUG_ON(MAX77836_CHARGER_CURRENT_LIMIT_HIGH_START +
+			MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP * 0xf !=
+			MAX77836_CHARGER_CURRENT_LIMIT_MAX);
+	BUILD_BUG_ON(MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP == 0);
+
 	return i2c_add_driver(&max14577_i2c_driver);
 }
 subsys_initcall(max14577_i2c_init);
diff --git a/drivers/regulator/max14577.c b/drivers/regulator/max14577.c
index 5d9c605cf534..0ff5a20ac958 100644
--- a/drivers/regulator/max14577.c
+++ b/drivers/regulator/max14577.c
@@ -22,42 +22,6 @@
 #include <linux/mfd/max14577-private.h>
 #include <linux/regulator/of_regulator.h>
 
-/*
- * Valid limits of current for max14577 and max77836 chargers.
- * They must correspond to MBCICHWRCL and MBCICHWRCH fields in CHGCTRL4
- * register for given chipset.
- */
-struct maxim_charger_current {
-	/* Minimal current, set in CHGCTRL4/MBCICHWRCL, uA */
-	unsigned int min;
-	/*
-	 * Minimal current when high setting is active,
-	 * set in CHGCTRL4/MBCICHWRCH, uA
-	 */
-	unsigned int high_start;
-	/* Value of one step in high setting, uA */
-	unsigned int high_step;
-	/* Maximum current of high setting, uA */
-	unsigned int max;
-};
-
-/* Table of valid charger currents for different Maxim chipsets */
-static const struct maxim_charger_current maxim_charger_currents[] = {
-	[MAXIM_DEVICE_TYPE_UNKNOWN] = { 0, 0, 0, 0 },
-	[MAXIM_DEVICE_TYPE_MAX14577] = {
-		.min		= MAX14577_REGULATOR_CURRENT_LIMIT_MIN,
-		.high_start	= MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START,
-		.high_step	= MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP,
-		.max		= MAX14577_REGULATOR_CURRENT_LIMIT_MAX,
-	},
-	[MAXIM_DEVICE_TYPE_MAX77836] = {
-		.min		= MAX77836_REGULATOR_CURRENT_LIMIT_MIN,
-		.high_start	= MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START,
-		.high_step	= MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP,
-		.max		= MAX77836_REGULATOR_CURRENT_LIMIT_MAX,
-	},
-};
-
 static int max14577_reg_is_enabled(struct regulator_dev *rdev)
 {
 	int rid = rdev_get_id(rdev);
@@ -103,8 +67,8 @@ static int max14577_reg_get_current_limit(struct regulator_dev *rdev)
 static int max14577_reg_set_current_limit(struct regulator_dev *rdev,
 		int min_uA, int max_uA)
 {
-	int i, current_bits = 0xf;
 	u8 reg_data;
+	int ret;
 	struct max14577 *max14577 = rdev_get_drvdata(rdev);
 	const struct maxim_charger_current *limits =
 		&maxim_charger_currents[max14577->dev_type];
@@ -112,35 +76,9 @@ static int max14577_reg_set_current_limit(struct regulator_dev *rdev,
 	if (rdev_get_id(rdev) != MAX14577_CHARGER)
 		return -EINVAL;
 
-	if (min_uA > limits->max || max_uA < limits->min)
-		return -EINVAL;
-
-	if (max_uA < limits->high_start) {
-		/*
-		 * Less than high_start,
-		 * so set the minimal current (turn only Low Bit off)
-		 */
-		u8 reg_data = 0x0 << CHGCTRL4_MBCICHWRCL_SHIFT;
-		return max14577_update_reg(rdev->regmap,
-				MAX14577_CHG_REG_CHG_CTRL4,
-				CHGCTRL4_MBCICHWRCL_MASK, reg_data);
-	}
-
-	/*
-	 * max_uA is in range: <high_start, inifinite>, so search for
-	 * valid current starting from maximum current.
-	 */
-	for (i = limits->max; i >= limits->high_start; i -= limits->high_step) {
-		if (i <= max_uA)
-			break;
-		current_bits--;
-	}
-	BUG_ON(current_bits < 0); /* Cannot happen */
-
-	/* Turn Low Bit on (use range high_start-max)... */
-	reg_data = 0x1 << CHGCTRL4_MBCICHWRCL_SHIFT;
-	/* and set proper High Bits */
-	reg_data |= current_bits << CHGCTRL4_MBCICHWRCH_SHIFT;
+	ret = maxim_charger_calc_reg_current(limits, min_uA, max_uA, &reg_data);
+	if (ret)
+		return ret;
 
 	return max14577_update_reg(rdev->regmap, MAX14577_CHG_REG_CHG_CTRL4,
 			CHGCTRL4_MBCICHWRCL_MASK | CHGCTRL4_MBCICHWRCH_MASK,
@@ -442,16 +380,6 @@ static struct platform_driver max14577_regulator_driver = {
 
 static int __init max14577_regulator_init(void)
 {
-	/* Check for valid values for charger */
-	BUILD_BUG_ON(MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START +
-			MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP * 0xf !=
-			MAX14577_REGULATOR_CURRENT_LIMIT_MAX);
-	BUILD_BUG_ON(MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START +
-			MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP * 0xf !=
-			MAX77836_REGULATOR_CURRENT_LIMIT_MAX);
-	/* Valid charger current values must be provided for each chipset */
-	BUILD_BUG_ON(ARRAY_SIZE(maxim_charger_currents) != MAXIM_DEVICE_TYPE_NUM);
-
 	BUILD_BUG_ON(ARRAY_SIZE(max14577_supported_regulators) != MAX14577_REGULATOR_NUM);
 	BUILD_BUG_ON(ARRAY_SIZE(max77836_supported_regulators) != MAX77836_REGULATOR_NUM);
 
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index d6f321699b89..7d514839c764 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -281,17 +281,17 @@ enum max14577_charger_reg {
 #define CHGCTRL7_OTPCGHCVS_SHIFT	0
 #define CHGCTRL7_OTPCGHCVS_MASK		(0x3 << CHGCTRL7_OTPCGHCVS_SHIFT)
 
-/* MAX14577 regulator current limits (as in CHGCTRL4 register), uA */
-#define MAX14577_REGULATOR_CURRENT_LIMIT_MIN		 90000
-#define MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_START	200000
-#define MAX14577_REGULATOR_CURRENT_LIMIT_HIGH_STEP	 50000
-#define MAX14577_REGULATOR_CURRENT_LIMIT_MAX		950000
-
-/* MAX77836 regulator current limits (as in CHGCTRL4 register), uA */
-#define MAX77836_REGULATOR_CURRENT_LIMIT_MIN		 45000
-#define MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_START	100000
-#define MAX77836_REGULATOR_CURRENT_LIMIT_HIGH_STEP	 25000
-#define MAX77836_REGULATOR_CURRENT_LIMIT_MAX		475000
+/* MAX14577 charger current limits (as in CHGCTRL4 register), uA */
+#define MAX14577_CHARGER_CURRENT_LIMIT_MIN		 90000U
+#define MAX14577_CHARGER_CURRENT_LIMIT_HIGH_START	200000U
+#define MAX14577_CHARGER_CURRENT_LIMIT_HIGH_STEP	 50000U
+#define MAX14577_CHARGER_CURRENT_LIMIT_MAX		950000U
+
+/* MAX77836 charger current limits (as in CHGCTRL4 register), uA */
+#define MAX77836_CHARGER_CURRENT_LIMIT_MIN		 45000U
+#define MAX77836_CHARGER_CURRENT_LIMIT_HIGH_START	100000U
+#define MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP	 25000U
+#define MAX77836_CHARGER_CURRENT_LIMIT_MAX		475000U
 
 /* MAX14577 regulator SFOUT LDO voltage, fixed, uV */
 #define MAX14577_REGULATOR_SAFEOUT_VOLTAGE		4900000
diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h
index c83fbed1c7b6..3c098d57b1d1 100644
--- a/include/linux/mfd/max14577.h
+++ b/include/linux/mfd/max14577.h
@@ -74,4 +74,27 @@ struct max14577_platform_data {
 	struct max14577_regulator_platform_data *regulators;
 };
 
+/*
+ * Valid limits of current for max14577 and max77836 chargers.
+ * They must correspond to MBCICHWRCL and MBCICHWRCH fields in CHGCTRL4
+ * register for given chipset.
+ */
+struct maxim_charger_current {
+	/* Minimal current, set in CHGCTRL4/MBCICHWRCL, uA */
+	unsigned int min;
+	/*
+	 * Minimal current when high setting is active,
+	 * set in CHGCTRL4/MBCICHWRCH, uA
+	 */
+	unsigned int high_start;
+	/* Value of one step in high setting, uA */
+	unsigned int high_step;
+	/* Maximum current of high setting, uA */
+	unsigned int max;
+};
+
+extern const struct maxim_charger_current maxim_charger_currents[];
+extern int maxim_charger_calc_reg_current(const struct maxim_charger_current *limits,
+		unsigned int min_ua, unsigned int max_ua, u8 *dst);
+
 #endif /* __MAX14577_H__ */
-- 
cgit v1.2.3


From e30110e9c96f48aea01abc3e6dfadb369cbafec3 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 12 Sep 2014 08:53:57 +0200
Subject: charger: max14577: Configure battery-dependent settings from DTS and
 sysfs

Remove hard-coded values for:
 - Fast Charge current,
 - End Of Charge current,
 - Fast Charge timer,
 - Overvoltage Protection Threshold,
 - Battery Constant Voltage,
and use DTS or sysfs to configure them. This allows using the max14577 charger
driver with different batteries.

Now the charger driver requires valid configuration data from DTS. In
case of wrong configuration data it fails during probe.

The fast charge timer is configured through sysfs entry.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/power/Kconfig                |   1 +
 drivers/power/max14577_charger.c     | 311 +++++++++++++++++++++++++++++++----
 include/linux/mfd/max14577-private.h |  19 +++
 include/linux/mfd/max14577.h         |   7 +
 4 files changed, 310 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/Kconfig b/drivers/power/Kconfig
index 69fa8a9ef7a6..04e1d2fe2201 100644
--- a/drivers/power/Kconfig
+++ b/drivers/power/Kconfig
@@ -327,6 +327,7 @@ config CHARGER_MANAGER
 config CHARGER_MAX14577
 	tristate "Maxim MAX14577/77836 battery charger driver"
 	depends on MFD_MAX14577
+	select SYSFS
 	help
 	  Say Y to enable support for the battery charger control sysfs and
 	  platform data of MAX14577/77836 MUICs.
diff --git a/drivers/power/max14577_charger.c b/drivers/power/max14577_charger.c
index 193530549b1f..0a2bc7277026 100644
--- a/drivers/power/max14577_charger.c
+++ b/drivers/power/max14577_charger.c
@@ -19,6 +19,7 @@
 #include <linux/platform_device.h>
 #include <linux/power_supply.h>
 #include <linux/mfd/max14577-private.h>
+#include <linux/mfd/max14577.h>
 
 struct max14577_charger {
 	struct device *dev;
@@ -27,6 +28,8 @@ struct max14577_charger {
 
 	unsigned int		charging_state;
 	unsigned int		battery_state;
+
+	struct max14577_charger_platform_data	*pdata;
 };
 
 /*
@@ -178,15 +181,131 @@ static int max14577_get_present(struct max14577_charger *chg)
 	return 1;
 }
 
+static int max14577_set_fast_charge_timer(struct max14577_charger *chg,
+		unsigned long hours)
+{
+	u8 reg_data;
+
+	switch (hours) {
+	case 5 ... 7:
+		reg_data = hours - 3;
+		break;
+	case 0:
+		/* Disable */
+		reg_data = 0x7;
+		break;
+	default:
+		dev_err(chg->dev, "Wrong value for Fast-Charge Timer: %lu\n",
+				hours);
+		return -EINVAL;
+	}
+	reg_data <<= CHGCTRL1_TCHW_SHIFT;
+
+	return max14577_update_reg(chg->max14577->regmap,
+			MAX14577_REG_CHGCTRL1, CHGCTRL1_TCHW_MASK, reg_data);
+}
+
+static int max14577_init_constant_voltage(struct max14577_charger *chg,
+		unsigned int uvolt)
+{
+	u8 reg_data;
+
+	if (uvolt < MAXIM_CHARGER_CONSTANT_VOLTAGE_MIN ||
+			uvolt > MAXIM_CHARGER_CONSTANT_VOLTAGE_MAX)
+		return -EINVAL;
+
+	if (uvolt == 4200000)
+		reg_data = 0x0;
+	else if (uvolt == MAXIM_CHARGER_CONSTANT_VOLTAGE_MAX)
+		reg_data = 0x1f;
+	else if (uvolt <= 4280000) {
+		unsigned int val = uvolt;
+
+		val -= MAXIM_CHARGER_CONSTANT_VOLTAGE_MIN;
+		val /= MAXIM_CHARGER_CONSTANT_VOLTAGE_STEP;
+		if (uvolt <= 4180000)
+			reg_data = 0x1 + val;
+		else
+			reg_data = val; /* Fix for gap between 4.18V and 4.22V */
+	} else
+		return -EINVAL;
+
+	reg_data <<= CHGCTRL3_MBCCVWRC_SHIFT;
+
+	return max14577_write_reg(chg->max14577->regmap,
+			MAX14577_CHG_REG_CHG_CTRL3, reg_data);
+}
+
+static int max14577_init_eoc(struct max14577_charger *chg,
+		unsigned int uamp)
+{
+	unsigned int current_bits = 0xf;
+	u8 reg_data;
+
+	switch (chg->max14577->dev_type) {
+	case MAXIM_DEVICE_TYPE_MAX77836:
+		if (uamp < 5000)
+			return -EINVAL; /* Requested current is too low */
+
+		if (uamp >= 7500 && uamp < 10000)
+			current_bits = 0x0;
+		else if (uamp <= 50000) {
+			/* <5000, 7499> and <10000, 50000> */
+			current_bits = uamp / 5000;
+		} else {
+			uamp = min(uamp, 100000U) - 50000U;
+			current_bits = 0xa + uamp / 10000;
+		}
+		break;
+
+	case MAXIM_DEVICE_TYPE_MAX14577:
+	default:
+		if (uamp < MAX14577_CHARGER_EOC_CURRENT_LIMIT_MIN)
+			return -EINVAL; /* Requested current is too low */
+
+		uamp = min(uamp, MAX14577_CHARGER_EOC_CURRENT_LIMIT_MAX);
+		uamp -= MAX14577_CHARGER_EOC_CURRENT_LIMIT_MIN;
+		current_bits = uamp / MAX14577_CHARGER_EOC_CURRENT_LIMIT_STEP;
+		break;
+	}
+
+	reg_data = current_bits << CHGCTRL5_EOCS_SHIFT;
+
+	return max14577_update_reg(chg->max14577->regmap,
+			MAX14577_CHG_REG_CHG_CTRL5, CHGCTRL5_EOCS_MASK,
+			reg_data);
+}
+
+static int max14577_init_fast_charge(struct max14577_charger *chg,
+		unsigned int uamp)
+{
+	u8 reg_data;
+	int ret;
+	const struct maxim_charger_current *limits =
+		&maxim_charger_currents[chg->max14577->dev_type];
+
+	ret = maxim_charger_calc_reg_current(limits, uamp, uamp, &reg_data);
+	if (ret) {
+		dev_err(chg->dev, "Wrong value for fast charge: %u\n", uamp);
+		return ret;
+	}
+
+	return max14577_update_reg(chg->max14577->regmap,
+			MAX14577_CHG_REG_CHG_CTRL4,
+			CHGCTRL4_MBCICHWRCL_MASK | CHGCTRL4_MBCICHWRCH_MASK,
+			reg_data);
+}
+
 /*
  * Sets charger registers to proper and safe default values.
  * Some of these values are equal to defaults in MAX14577E
  * data sheet but there are minor differences.
  */
-static void max14577_charger_reg_init(struct max14577_charger *chg)
+static int max14577_charger_reg_init(struct max14577_charger *chg)
 {
 	struct regmap *rmap = chg->max14577->regmap;
 	u8 reg_data;
+	int ret;
 
 	/*
 	 * Charger-Type Manual Detection, default off (set CHGTYPMAN to 0)
@@ -198,10 +317,6 @@ static void max14577_charger_reg_init(struct max14577_charger *chg)
 			CDETCTRL1_CHGDETEN_MASK | CDETCTRL1_CHGTYPMAN_MASK,
 			reg_data);
 
-	/* Battery Fast-Charge Timer, set to: 6hrs */
-	reg_data = 0x3 << CHGCTRL1_TCHW_SHIFT;
-	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL1, reg_data);
-
 	/*
 	 * Wall-Adapter Rapid Charge, default on
 	 * Battery-Charger, default on
@@ -210,32 +325,46 @@ static void max14577_charger_reg_init(struct max14577_charger *chg)
 	reg_data |= 0x1 << CHGCTRL2_MBCHOSTEN_SHIFT;
 	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL2, reg_data);
 
-	/* Battery-Charger Constant Voltage (CV) Mode, set to: 4.35V */
-	reg_data = 0xf << CHGCTRL3_MBCCVWRC_SHIFT;
-	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL3, reg_data);
-
-	/*
-	 * Fast Battery-Charge Current Low,
-	 * default 200-950mA (max14577) / 100-475mA (max77836)
-	 *
-	 * Fast Battery-Charge Current High,
-	 * set to 450mA (max14577) / 225mA (max77836)
-	 */
-	reg_data = 0x1 << CHGCTRL4_MBCICHWRCL_SHIFT;
-	reg_data |= 0x5 << CHGCTRL4_MBCICHWRCH_SHIFT;
-	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL4, reg_data);
-
-	/* End-of-Charge Current, set to 50mA (max14577) / 7.5mA (max77836) */
-	reg_data = 0x0 << CHGCTRL5_EOCS_SHIFT;
-	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL5, reg_data);
-
 	/* Auto Charging Stop, default off */
 	reg_data = 0x0 << CHGCTRL6_AUTOSTOP_SHIFT;
 	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL6, reg_data);
 
-	/* Overvoltage-Protection Threshold, set to 6.5V */
-	reg_data = 0x2 << CHGCTRL7_OTPCGHCVS_SHIFT;
+	ret = max14577_init_constant_voltage(chg, chg->pdata->constant_uvolt);
+	if (ret)
+		return ret;
+
+	ret = max14577_init_eoc(chg, chg->pdata->eoc_uamp);
+	if (ret)
+		return ret;
+
+	ret = max14577_init_fast_charge(chg, chg->pdata->fast_charge_uamp);
+	if (ret)
+		return ret;
+
+	ret = max14577_set_fast_charge_timer(chg,
+			MAXIM_CHARGER_FAST_CHARGE_TIMER_DEFAULT);
+	if (ret)
+		return ret;
+
+	/* Initialize Overvoltage-Protection Threshold */
+	switch (chg->pdata->ovp_uvolt) {
+	case 7500000:
+		reg_data = 0x0;
+		break;
+	case 6000000:
+	case 6500000:
+	case 7000000:
+		reg_data = 0x1 + (chg->pdata->ovp_uvolt - 6000000) / 500000;
+		break;
+	default:
+		dev_err(chg->dev, "Wrong value for OVP: %u\n",
+				chg->pdata->ovp_uvolt);
+		return -EINVAL;
+	}
+	reg_data <<= CHGCTRL7_OTPCGHCVS_SHIFT;
 	max14577_write_reg(rmap, MAX14577_REG_CHGCTRL7, reg_data);
+
+	return 0;
 }
 
 /* Support property from charger */
@@ -295,6 +424,110 @@ static int max14577_charger_get_property(struct power_supply *psy,
 	return ret;
 }
 
+#ifdef CONFIG_OF
+static struct max14577_charger_platform_data *max14577_charger_dt_init(
+		struct platform_device *pdev)
+{
+	struct max14577_charger_platform_data *pdata;
+	struct device_node *np = pdev->dev.of_node;
+	int ret;
+
+	if (!np) {
+		dev_err(&pdev->dev, "No charger OF node\n");
+		return ERR_PTR(-EINVAL);
+	}
+
+	pdata = devm_kzalloc(&pdev->dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	ret = of_property_read_u32(np, "maxim,constant-uvolt",
+			&pdata->constant_uvolt);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot parse maxim,constant-uvolt field from DT\n");
+		return ERR_PTR(ret);
+	}
+
+	ret = of_property_read_u32(np, "maxim,fast-charge-uamp",
+			&pdata->fast_charge_uamp);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot parse maxim,fast-charge-uamp field from DT\n");
+		return ERR_PTR(ret);
+	}
+
+	ret = of_property_read_u32(np, "maxim,eoc-uamp", &pdata->eoc_uamp);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot parse maxim,eoc-uamp field from DT\n");
+		return ERR_PTR(ret);
+	}
+
+	ret = of_property_read_u32(np, "maxim,ovp-uvolt", &pdata->ovp_uvolt);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot parse maxim,ovp-uvolt field from DT\n");
+		return ERR_PTR(ret);
+	}
+
+	return pdata;
+}
+#else /* CONFIG_OF */
+static struct max14577_charger_platform_data *max14577_charger_dt_init(
+		struct platform_device *pdev)
+{
+	return NULL;
+}
+#endif /* CONFIG_OF */
+
+static ssize_t show_fast_charge_timer(struct device *dev,
+		struct device_attribute *attr, char *buf)
+{
+	struct max14577_charger *chg = dev_get_drvdata(dev);
+	u8 reg_data;
+	int ret;
+	unsigned int val;
+
+	ret = max14577_read_reg(chg->max14577->regmap, MAX14577_REG_CHGCTRL1,
+			&reg_data);
+	if (ret)
+		return ret;
+
+	reg_data &= CHGCTRL1_TCHW_MASK;
+	reg_data >>= CHGCTRL1_TCHW_SHIFT;
+	switch (reg_data) {
+	case 0x2 ... 0x4:
+		val = reg_data + 3;
+		break;
+	case 0x7:
+		val = 0;
+		break;
+	default:
+		val = 5;
+		break;
+	}
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", val);
+}
+
+static ssize_t store_fast_charge_timer(struct device *dev,
+		struct device_attribute *attr, const char *buf, size_t count)
+{
+	struct max14577_charger *chg = dev_get_drvdata(dev);
+	unsigned long val;
+	int ret;
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	ret = max14577_set_fast_charge_timer(chg, val);
+	if (ret)
+		return ret;
+
+	return count;
+}
+
+static DEVICE_ATTR(fast_charge_timer, S_IRUGO | S_IWUSR,
+		show_fast_charge_timer, store_fast_charge_timer);
+
 static int max14577_charger_probe(struct platform_device *pdev)
 {
 	struct max14577_charger *chg;
@@ -309,7 +542,13 @@ static int max14577_charger_probe(struct platform_device *pdev)
 	chg->dev = &pdev->dev;
 	chg->max14577 = max14577;
 
-	max14577_charger_reg_init(chg);
+	chg->pdata = max14577_charger_dt_init(pdev);
+	if (IS_ERR_OR_NULL(chg->pdata))
+		return PTR_ERR(chg->pdata);
+
+	ret = max14577_charger_reg_init(chg);
+	if (ret)
+		return ret;
 
 	chg->charger.name = "max14577-charger",
 	chg->charger.type = POWER_SUPPLY_TYPE_BATTERY,
@@ -317,19 +556,35 @@ static int max14577_charger_probe(struct platform_device *pdev)
 	chg->charger.num_properties = ARRAY_SIZE(max14577_charger_props),
 	chg->charger.get_property = max14577_charger_get_property,
 
+	ret = device_create_file(&pdev->dev, &dev_attr_fast_charge_timer);
+	if (ret) {
+		dev_err(&pdev->dev, "failed: create sysfs entry\n");
+		return ret;
+	}
+
 	ret = power_supply_register(&pdev->dev, &chg->charger);
 	if (ret) {
 		dev_err(&pdev->dev, "failed: power supply register\n");
-		return ret;
+		goto err;
 	}
 
+	/* Check for valid values for charger */
+	BUILD_BUG_ON(MAX14577_CHARGER_EOC_CURRENT_LIMIT_MIN +
+			MAX14577_CHARGER_EOC_CURRENT_LIMIT_STEP * 0xf !=
+			MAX14577_CHARGER_EOC_CURRENT_LIMIT_MAX);
 	return 0;
+
+err:
+	device_remove_file(&pdev->dev, &dev_attr_fast_charge_timer);
+
+	return ret;
 }
 
 static int max14577_charger_remove(struct platform_device *pdev)
 {
 	struct max14577_charger *chg = platform_get_drvdata(pdev);
 
+	device_remove_file(&pdev->dev, &dev_attr_fast_charge_timer);
 	power_supply_unregister(&chg->charger);
 
 	return 0;
diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h
index 7d514839c764..f01c1fae4d84 100644
--- a/include/linux/mfd/max14577-private.h
+++ b/include/linux/mfd/max14577-private.h
@@ -293,6 +293,25 @@ enum max14577_charger_reg {
 #define MAX77836_CHARGER_CURRENT_LIMIT_HIGH_STEP	 25000U
 #define MAX77836_CHARGER_CURRENT_LIMIT_MAX		475000U
 
+/*
+ * MAX14577 charger End-Of-Charge current limits
+ * (as in CHGCTRL5 register), uA
+ */
+#define MAX14577_CHARGER_EOC_CURRENT_LIMIT_MIN		50000U
+#define MAX14577_CHARGER_EOC_CURRENT_LIMIT_STEP		10000U
+#define MAX14577_CHARGER_EOC_CURRENT_LIMIT_MAX		200000U
+
+/*
+ * MAX14577/MAX77836 Battery Constant Voltage
+ * (as in CHGCTRL3 register), uV
+ */
+#define MAXIM_CHARGER_CONSTANT_VOLTAGE_MIN		4000000U
+#define MAXIM_CHARGER_CONSTANT_VOLTAGE_STEP		20000U
+#define MAXIM_CHARGER_CONSTANT_VOLTAGE_MAX		4350000U
+
+/* Default value for fast charge timer, in hours */
+#define MAXIM_CHARGER_FAST_CHARGE_TIMER_DEFAULT		5
+
 /* MAX14577 regulator SFOUT LDO voltage, fixed, uV */
 #define MAX14577_REGULATOR_SAFEOUT_VOLTAGE		4900000
 
diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h
index 3c098d57b1d1..ccfaf952c31b 100644
--- a/include/linux/mfd/max14577.h
+++ b/include/linux/mfd/max14577.h
@@ -54,6 +54,13 @@ struct max14577_regulator_platform_data {
 	struct device_node *of_node;
 };
 
+struct max14577_charger_platform_data {
+	u32 constant_uvolt;
+	u32 fast_charge_uamp;
+	u32 eoc_uamp;
+	u32 ovp_uvolt;
+};
+
 /*
  * MAX14577 MFD platform data
  */
-- 
cgit v1.2.3


From 9eca80461a45177e456219a9cd944c27675d6512 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:07:33 -0400
Subject: Revert "blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall
 during probe"

This reverts commit 0a30288da1aec914e158c2d7a3482a85f632750f, which
was a temporary fix for SCSI blk-mq stall issue.  The following
patches will fix the issue properly by introducing atomic mode to
percpu_ref.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq.c                  | 11 +----------
 include/linux/percpu-refcount.h |  1 -
 lib/percpu-refcount.c           | 16 ----------------
 3 files changed, 1 insertion(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 255d79c14dc1..44a78ae3f899 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -119,16 +119,7 @@ void blk_mq_freeze_queue(struct request_queue *q)
 	spin_unlock_irq(q->queue_lock);
 
 	if (freeze) {
-		/*
-		 * XXX: Temporary kludge to work around SCSI blk-mq stall.
-		 * SCSI synchronously creates and destroys many queues
-		 * back-to-back during probe leading to lengthy stalls.
-		 * This will be fixed by keeping ->mq_usage_counter in
-		 * atomic mode until genhd registration, but, for now,
-		 * let's work around using expedited synchronization.
-		 */
-		__percpu_ref_kill_expedited(&q->mq_usage_counter);
-
+		percpu_ref_kill(&q->mq_usage_counter);
 		blk_mq_run_queues(q, false);
 	}
 	wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter));
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 11b38ceca7e2..5df6784bd9d2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -72,7 +72,6 @@ void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
-void __percpu_ref_kill_expedited(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index c6c31e2829b1..559ee0b20318 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -189,19 +189,3 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
-
-/*
- * XXX: Temporary kludge to work around SCSI blk-mq stall.  Used only by
- * block/blk-mq.c::blk_mq_freeze_queue().  Will be removed during v3.18
- * devel cycle.  Do not use anywhere else.
- */
-void __percpu_ref_kill_expedited(struct percpu_ref *ref)
-{
-	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
-		  "percpu_ref_kill() called more than once on %pf!",
-		  ref->release);
-
-	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
-	synchronize_sched_expedited();
-	percpu_ref_kill_rcu(&ref->rcu);
-}
-- 
cgit v1.2.3


From a2237370194484ee6aeeff04b617e4b14d178966 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: relocate percpu_ref_reinit()

percpu_ref is gonna go through restructuring.  Move
percpu_ref_reinit() after percpu_ref_kill_and_confirm().  This will
make later changes easier to follow and result in cleaner
organization.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h |  2 +-
 lib/percpu-refcount.c           | 70 ++++++++++++++++++++---------------------
 2 files changed, 36 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 5df6784bd9d2..f015f139d491 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -68,10 +68,10 @@ struct percpu_ref {
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
 				 percpu_ref_func_t *release, gfp_t gfp);
-void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
+void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 559ee0b20318..070dab5e7d77 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -62,41 +62,6 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 
-/**
- * percpu_ref_reinit - re-initialize a percpu refcount
- * @ref: perpcu_ref to re-initialize
- *
- * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully, killed
- * and reached 0 but not exited.
- *
- * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
- * this function is in progress.
- */
-void percpu_ref_reinit(struct percpu_ref *ref)
-{
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
-	int cpu;
-
-	BUG_ON(!pcpu_count);
-	WARN_ON(!percpu_ref_is_zero(ref));
-
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
-
-	/*
-	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
-	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following PCPU_REF_DEAD clearing.
-	 */
-	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(pcpu_count, cpu) = 0;
-
-	smp_store_release(&ref->pcpu_count_ptr,
-			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
-}
-EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-
 /**
  * percpu_ref_exit - undo percpu_ref_init()
  * @ref: percpu_ref to exit
@@ -189,3 +154,38 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
+
+/**
+ * percpu_ref_reinit - re-initialize a percpu refcount
+ * @ref: perpcu_ref to re-initialize
+ *
+ * Re-initialize @ref so that it's in the same state as when it finished
+ * percpu_ref_init().  @ref must have been initialized successfully, killed
+ * and reached 0 but not exited.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_reinit(struct percpu_ref *ref)
+{
+	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	int cpu;
+
+	BUG_ON(!pcpu_count);
+	WARN_ON(!percpu_ref_is_zero(ref));
+
+	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+
+	/*
+	 * Restore per-cpu operation.  smp_store_release() is paired with
+	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
+	 * that the zeroing is visible to all percpu accesses which can see
+	 * the following PCPU_REF_DEAD clearing.
+	 */
+	for_each_possible_cpu(cpu)
+		*per_cpu_ptr(pcpu_count, cpu) = 0;
+
+	smp_store_release(&ref->pcpu_count_ptr,
+			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 6251f9976af7656b6970a8820153f356430f5de2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: minor code and comment updates

* Some comments became stale.  Updated.
* percpu_ref_tryget() unnecessarily initializes @ret.  Removed.
* A blank line removed from percpu_ref_kill_rcu().
* Explicit function name in a WARN format string replaced with __func__.
* WARN_ON() in percpu_ref_reinit() converted to WARN_ON_ONCE().

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 25 ++++++++++++++++---------
 lib/percpu-refcount.c           | 14 ++++++--------
 2 files changed, 22 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index f015f139d491..d44b027f74fd 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -115,8 +115,10 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
  * percpu_ref_get - increment a percpu refcount
  * @ref: percpu_ref to get
  *
- * Analagous to atomic_inc().
-  */
+ * Analagous to atomic_long_inc().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
 	unsigned long __percpu *pcpu_count;
@@ -138,12 +140,12 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  * Increment a percpu refcount unless its count already reached zero.
  * Returns %true on success; %false on failure.
  *
- * The caller is responsible for ensuring that @ref stays accessible.
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
 	unsigned long __percpu *pcpu_count;
-	int ret = false;
+	int ret;
 
 	rcu_read_lock_sched();
 
@@ -166,12 +168,13 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  * Increment a percpu refcount unless it has already been killed.  Returns
  * %true on success; %false on failure.
  *
- * Completion of percpu_ref_kill() in itself doesn't guarantee that tryget
- * will fail.  For such guarantee, percpu_ref_kill_and_confirm() should be
- * used.  After the confirm_kill callback is invoked, it's guaranteed that
- * no new reference will be given out by percpu_ref_tryget().
+ * Completion of percpu_ref_kill() in itself doesn't guarantee that this
+ * function will fail.  For such guarantee, percpu_ref_kill_and_confirm()
+ * should be used.  After the confirm_kill callback is invoked, it's
+ * guaranteed that no new reference will be given out by
+ * percpu_ref_tryget_live().
  *
- * The caller is responsible for ensuring that @ref stays accessible.
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
@@ -196,6 +199,8 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  *
  * Decrement the refcount, and if 0, call the release function (which was passed
  * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
@@ -216,6 +221,8 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  * @ref: percpu_ref to test
  *
  * Returns %true if @ref reached zero.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 070dab5e7d77..8ef3f5c20df6 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -108,7 +108,6 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
-
 	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
@@ -120,8 +119,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 		ref->confirm_kill(ref);
 
 	/*
-	 * Now we're in single atomic_t mode with a consistent refcount, so it's
-	 * safe to drop our initial ref:
+	 * Now we're in single atomic_long_t mode with a consistent
+	 * refcount, so it's safe to drop our initial ref:
 	 */
 	percpu_ref_put(ref);
 }
@@ -134,8 +133,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
  * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
  * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
  * called after @ref is seen as dead from all CPUs - all further
- * invocations of percpu_ref_tryget() will fail.  See percpu_ref_tryget()
- * for more details.
+ * invocations of percpu_ref_tryget_live() will fail.  See
+ * percpu_ref_tryget_live() for more details.
  *
  * Due to the way percpu_ref is implemented, @confirm_kill will be called
  * after at least one full RCU grace period has passed but this is an
@@ -145,8 +144,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
 	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
-		  "percpu_ref_kill() called more than once on %pf!",
-		  ref->release);
+		  "%s called more than once on %pf!", __func__, ref->release);
 
 	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
 	ref->confirm_kill = confirm_kill;
@@ -172,7 +170,7 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 	int cpu;
 
 	BUG_ON(!pcpu_count);
-	WARN_ON(!percpu_ref_is_zero(ref));
+	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
 	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
 
-- 
cgit v1.2.3


From eecc16ba9a49b05dd847a317af166a6728eb56ca Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: replace pcpu_ prefix with percpu_

percpu_ref uses pcpu_ prefix for internal stuff and percpu_ for
externally visible ones.  This is the same convention used in the
percpu allocator implementation.  It works fine there but percpu_ref
doesn't have too much internal-only stuff and scattered usages of
pcpu_ prefix are confusing than helpful.

This patch replaces all pcpu_ prefixes with percpu_.  This is pure
rename and there's no functional change.  Note that PCPU_REF_DEAD is
renamed to __PERCPU_REF_DEAD to signify that the flag is internal.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 46 ++++++++++++++++-----------------
 lib/percpu-refcount.c           | 56 +++++++++++++++++++++--------------------
 2 files changed, 52 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index d44b027f74fd..3d463a39e0f7 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -13,7 +13,7 @@
  *
  * The refcount will have a range of 0 to ((1U << 31) - 1), i.e. one bit less
  * than an atomic_t - this is because of the way shutdown works, see
- * percpu_ref_kill()/PCPU_COUNT_BIAS.
+ * percpu_ref_kill()/PERCPU_COUNT_BIAS.
  *
  * Before you call percpu_ref_kill(), percpu_ref_put() does not check for the
  * refcount hitting 0 - it can't, if it was in percpu mode. percpu_ref_kill()
@@ -60,7 +60,7 @@ struct percpu_ref {
 	 * The low bit of the pointer indicates whether the ref is in percpu
 	 * mode; if set, then get/put will manipulate the atomic_t.
 	 */
-	unsigned long		pcpu_count_ptr;
+	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
 	percpu_ref_func_t	*confirm_kill;
 	struct rcu_head		rcu;
@@ -88,26 +88,26 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 	return percpu_ref_kill_and_confirm(ref, NULL);
 }
 
-#define PCPU_REF_DEAD		1
+#define __PERCPU_REF_DEAD	1
 
 /*
  * Internal helper.  Don't use outside percpu-refcount proper.  The
  * function doesn't return the pointer and let the caller test it for NULL
  * because doing so forces the compiler to generate two conditional
- * branches as it can't assume that @ref->pcpu_count is not NULL.
+ * branches as it can't assume that @ref->percpu_count is not NULL.
  */
-static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
-				    unsigned long __percpu **pcpu_countp)
+static inline bool __percpu_ref_alive(struct percpu_ref *ref,
+				      unsigned long __percpu **percpu_countp)
 {
-	unsigned long pcpu_ptr = ACCESS_ONCE(ref->pcpu_count_ptr);
+	unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
 
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(pcpu_ptr & PCPU_REF_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_DEAD))
 		return false;
 
-	*pcpu_countp = (unsigned long __percpu *)pcpu_ptr;
+	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
 	return true;
 }
 
@@ -121,12 +121,12 @@ static inline bool __pcpu_ref_alive(struct percpu_ref *ref,
  */
 static inline void percpu_ref_get(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count))
+		this_cpu_inc(*percpu_count);
 	else
 		atomic_long_inc(&ref->count);
 
@@ -144,13 +144,13 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 	int ret;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count)) {
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count)) {
+		this_cpu_inc(*percpu_count);
 		ret = true;
 	} else {
 		ret = atomic_long_inc_not_zero(&ref->count);
@@ -178,13 +178,13 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 	int ret = false;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count)) {
-		this_cpu_inc(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count)) {
+		this_cpu_inc(*percpu_count);
 		ret = true;
 	}
 
@@ -204,12 +204,12 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
  */
 static inline void percpu_ref_put(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
-		this_cpu_dec(*pcpu_count);
+	if (__percpu_ref_alive(ref, &percpu_count))
+		this_cpu_dec(*percpu_count);
 	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
 
@@ -226,9 +226,9 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
  */
 static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count;
+	unsigned long __percpu *percpu_count;
 
-	if (__pcpu_ref_alive(ref, &pcpu_count))
+	if (__percpu_ref_alive(ref, &percpu_count))
 		return false;
 	return !atomic_long_read(&ref->count);
 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 8ef3f5c20df6..5aea6b7356c7 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -11,8 +11,8 @@
  * percpu counters will all sum to the correct value
  *
  * (More precisely: because moduler arithmatic is commutative the sum of all the
- * pcpu_count vars will be equal to what it would have been if all the gets and
- * puts were done to a single integer, even if some of the percpu integers
+ * percpu_count vars will be equal to what it would have been if all the gets
+ * and puts were done to a single integer, even if some of the percpu integers
  * overflow or underflow).
  *
  * The real trick to implementing percpu refcounts is shutdown. We can't detect
@@ -29,11 +29,12 @@
  * atomic_long_t can't hit 0 before we've added up all the percpu refs.
  */
 
-#define PCPU_COUNT_BIAS		(1LU << (BITS_PER_LONG - 1))
+#define PERCPU_COUNT_BIAS	(1LU << (BITS_PER_LONG - 1))
 
-static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
+static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
-	return (unsigned long __percpu *)(ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	return (unsigned long __percpu *)
+		(ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
 }
 
 /**
@@ -51,10 +52,11 @@ static unsigned long __percpu *pcpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
-	ref->pcpu_count_ptr = (unsigned long)alloc_percpu_gfp(unsigned long, gfp);
-	if (!ref->pcpu_count_ptr)
+	ref->percpu_count_ptr =
+		(unsigned long)alloc_percpu_gfp(unsigned long, gfp);
+	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
 	ref->release = release;
@@ -74,11 +76,11 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 
-	if (pcpu_count) {
-		free_percpu(pcpu_count);
-		ref->pcpu_count_ptr = PCPU_REF_DEAD;
+	if (percpu_count) {
+		free_percpu(percpu_count);
+		ref->percpu_count_ptr = __PERCPU_REF_DEAD;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -86,14 +88,14 @@ EXPORT_SYMBOL_GPL(percpu_ref_exit);
 static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	unsigned long count = 0;
 	int cpu;
 
 	for_each_possible_cpu(cpu)
-		count += *per_cpu_ptr(pcpu_count, cpu);
+		count += *per_cpu_ptr(percpu_count, cpu);
 
-	pr_debug("global %ld pcpu %ld",
+	pr_debug("global %ld percpu %ld",
 		 atomic_long_read(&ref->count), (long)count);
 
 	/*
@@ -108,7 +110,7 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
-	atomic_long_add((long)count - PCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
 		  "percpu ref (%pf) <= 0 (%ld) after killed",
@@ -143,10 +145,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->pcpu_count_ptr |= PCPU_REF_DEAD;
+	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
 	ref->confirm_kill = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
@@ -166,24 +168,24 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  */
 void percpu_ref_reinit(struct percpu_ref *ref)
 {
-	unsigned long __percpu *pcpu_count = pcpu_count_ptr(ref);
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	int cpu;
 
-	BUG_ON(!pcpu_count);
+	BUG_ON(!percpu_count);
 	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
-	atomic_long_set(&ref->count, 1 + PCPU_COUNT_BIAS);
+	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __pcpu_ref_alive() and guarantees
-	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following PCPU_REF_DEAD clearing.
+	 * smp_read_barrier_depends() in __percpu_ref_alive() and
+	 * guarantees that the zeroing is visible to all percpu accesses
+	 * which can see the following __PERCPU_REF_DEAD clearing.
 	 */
 	for_each_possible_cpu(cpu)
-		*per_cpu_ptr(pcpu_count, cpu) = 0;
+		*per_cpu_ptr(percpu_count, cpu) = 0;
 
-	smp_store_release(&ref->pcpu_count_ptr,
-			  ref->pcpu_count_ptr & ~PCPU_REF_DEAD);
+	smp_store_release(&ref->percpu_count_ptr,
+			  ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 9e804d1f58da1eca079f796347c1cf1d1df564e2 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:48 -0400
Subject: percpu_ref: rename things to prepare for decoupling percpu/atomic
 mode switch

percpu_ref will be restructured so that percpu/atomic mode switching
and reference killing are dedoupled.  In preparation, do the following
renames.

* percpu_ref->confirm_kill	-> percpu_ref->confirm_switch
* __PERCPU_REF_DEAD		-> __PERCPU_REF_ATOMIC
* __percpu_ref_alive()		-> __ref_is_percpu()

This patch is pure rename and doesn't introduce any functional
changes.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
---
 include/linux/percpu-refcount.h | 25 ++++++++++++++-----------
 lib/percpu-refcount.c           | 22 +++++++++++-----------
 2 files changed, 25 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 3d463a39e0f7..910e5f72055d 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -54,6 +54,11 @@
 struct percpu_ref;
 typedef void (percpu_ref_func_t)(struct percpu_ref *);
 
+/* flags set in the lower bits of percpu_ref->percpu_count_ptr */
+enum {
+	__PERCPU_REF_ATOMIC	= 1LU << 0,	/* operating in atomic mode */
+};
+
 struct percpu_ref {
 	atomic_long_t		count;
 	/*
@@ -62,7 +67,7 @@ struct percpu_ref {
 	 */
 	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
-	percpu_ref_func_t	*confirm_kill;
+	percpu_ref_func_t	*confirm_switch;
 	struct rcu_head		rcu;
 };
 
@@ -88,23 +93,21 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 	return percpu_ref_kill_and_confirm(ref, NULL);
 }
 
-#define __PERCPU_REF_DEAD	1
-
 /*
  * Internal helper.  Don't use outside percpu-refcount proper.  The
  * function doesn't return the pointer and let the caller test it for NULL
  * because doing so forces the compiler to generate two conditional
  * branches as it can't assume that @ref->percpu_count is not NULL.
  */
-static inline bool __percpu_ref_alive(struct percpu_ref *ref,
-				      unsigned long __percpu **percpu_countp)
+static inline bool __ref_is_percpu(struct percpu_ref *ref,
+					  unsigned long __percpu **percpu_countp)
 {
 	unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
 
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
@@ -125,7 +128,7 @@ static inline void percpu_ref_get(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		this_cpu_inc(*percpu_count);
 	else
 		atomic_long_inc(&ref->count);
@@ -149,7 +152,7 @@ static inline bool percpu_ref_tryget(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count)) {
+	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
 	} else {
@@ -183,7 +186,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count)) {
+	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
 	}
@@ -208,7 +211,7 @@ static inline void percpu_ref_put(struct percpu_ref *ref)
 
 	rcu_read_lock_sched();
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		this_cpu_dec(*percpu_count);
 	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
 		ref->release(ref);
@@ -228,7 +231,7 @@ static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count;
 
-	if (__percpu_ref_alive(ref, &percpu_count))
+	if (__ref_is_percpu(ref, &percpu_count))
 		return false;
 	return !atomic_long_read(&ref->count);
 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 5aea6b7356c7..7aef590c1ef8 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -34,7 +34,7 @@
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
-		(ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
+		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 }
 
 /**
@@ -80,7 +80,7 @@ void percpu_ref_exit(struct percpu_ref *ref)
 
 	if (percpu_count) {
 		free_percpu(percpu_count);
-		ref->percpu_count_ptr = __PERCPU_REF_DEAD;
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -117,8 +117,8 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 		  ref->release, atomic_long_read(&ref->count));
 
 	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
-	if (ref->confirm_kill)
-		ref->confirm_kill(ref);
+	if (ref->confirm_switch)
+		ref->confirm_switch(ref);
 
 	/*
 	 * Now we're in single atomic_long_t mode with a consistent
@@ -145,11 +145,11 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
-	ref->confirm_kill = confirm_kill;
+	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	ref->confirm_switch = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
 }
@@ -178,14 +178,14 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
-	 * smp_read_barrier_depends() in __percpu_ref_alive() and
-	 * guarantees that the zeroing is visible to all percpu accesses
-	 * which can see the following __PERCPU_REF_DEAD clearing.
+	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
+	 * that the zeroing is visible to all percpu accesses which can see
+	 * the following __PERCPU_REF_ATOMIC clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_DEAD);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 27344a9017cdaff82a167827da3001a0918afdc3 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: add PCPU_REF_DEAD

percpu_ref will be restructured so that percpu/atomic mode switching
and reference killing are dedoupled.  In preparation, add
PCPU_REF_DEAD and PCPU_REF_ATOMIC_DEAD which is OR of ATOMIC and DEAD.
For now, ATOMIC and DEAD are changed together and all PCPU_REF_ATOMIC
uses are converted to PCPU_REF_ATOMIC_DEAD without causing any
behavior changes.

percpu_ref_init() now specifies an explicit alignment when allocating
the percpu counters so that the pointer has enough unused low bits to
accomodate the flags.  Note that one flag was fine as min alignment
for percpu memory is 2 bytes but two flags are already too many for
the natural alignment of unsigned longs on archs like cris and m68k.

v2: The original patch had BUILD_BUG_ON() which triggers if unsigned
    long's alignment isn't enough to accomodate the flags, which
    triggered on cris and m64k.  percpu_ref_init() updated to specify
    the required alignment explicitly.  Reported by Fengguang.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: kbuild test robot <fengguang.wu@intel.com>
---
 include/linux/percpu-refcount.h |  6 +++++-
 lib/percpu-refcount.c           | 19 +++++++++++--------
 2 files changed, 16 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 910e5f72055d..bd9483d390b4 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -57,6 +57,10 @@ typedef void (percpu_ref_func_t)(struct percpu_ref *);
 /* flags set in the lower bits of percpu_ref->percpu_count_ptr */
 enum {
 	__PERCPU_REF_ATOMIC	= 1LU << 0,	/* operating in atomic mode */
+	__PERCPU_REF_DEAD	= 1LU << 1,	/* (being) killed */
+	__PERCPU_REF_ATOMIC_DEAD = __PERCPU_REF_ATOMIC | __PERCPU_REF_DEAD,
+
+	__PERCPU_REF_FLAG_BITS	= 2,
 };
 
 struct percpu_ref {
@@ -107,7 +111,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 7aef590c1ef8..e2ff19f970cf 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -34,7 +34,7 @@
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
-		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+		(ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 
 /**
@@ -52,10 +52,13 @@ static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 		    gfp_t gfp)
 {
+	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
+			     __alignof__(unsigned long));
+
 	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
 
-	ref->percpu_count_ptr =
-		(unsigned long)alloc_percpu_gfp(unsigned long, gfp);
+	ref->percpu_count_ptr = (unsigned long)
+		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
@@ -80,7 +83,7 @@ void percpu_ref_exit(struct percpu_ref *ref)
 
 	if (percpu_count) {
 		free_percpu(percpu_count);
-		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC;
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
 	}
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
@@ -145,10 +148,10 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC,
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,
 		  "%s called more than once on %pf!", __func__, ref->release);
 
-	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
 	ref->confirm_switch = confirm_kill;
 
 	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
@@ -180,12 +183,12 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 	 * Restore per-cpu operation.  smp_store_release() is paired with
 	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
 	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following __PERCPU_REF_ATOMIC clearing.
+	 * the following __PERCPU_REF_ATOMIC_DEAD clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 490c79a65708873228cf114cf00e32c204e4e907 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: decouple switching to atomic mode and killing

percpu_ref has treated the dropping of the base reference and
switching to atomic mode as an integral operation; however, there's
nothing inherent tying the two together.

The use cases for percpu_ref have been expanding continuously.  While
the current init/kill/reinit/exit model can cover a lot, the coupling
of kill/reinit with atomic/percpu mode switching is turning out to be
too restrictive for use cases where many percpu_refs are created and
destroyed back-to-back with only some of them reaching extended
operation.  The coupling also makes implementing always-atomic debug
mode difficult.

This patch separates out atomic mode switching into
percpu_ref_switch_to_atomic() and reimplements
percpu_ref_kill_and_confirm() on top of it.

* The handling of __PERCPU_REF_ATOMIC and __PERCPU_REF_DEAD is now
  differentiated.  Among get/put operations, percpu_ref_tryget_live()
  is the only one which cares about DEAD.

* percpu_ref_switch_to_atomic() can be called multiple times on the
  same ref.  This means that multiple @confirm_switch may get queued
  up which we can't do reliably without extra memory area.  This is
  handled by making the later invocation synchronously wait for the
  completion of the previous one.  This isn't particularly desirable
  but such synchronous waits shouldn't happen in most cases.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h |   8 ++-
 lib/percpu-refcount.c           | 141 +++++++++++++++++++++++++++++++---------
 2 files changed, 116 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index bd9483d390b4..d1252e1335e8 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -78,9 +78,11 @@ struct percpu_ref {
 int __must_check percpu_ref_init(struct percpu_ref *ref,
 				 percpu_ref_func_t *release, gfp_t gfp);
 void percpu_ref_exit(struct percpu_ref *ref);
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_switch);
+void percpu_ref_reinit(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
-void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
@@ -111,7 +113,7 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
@@ -193,6 +195,8 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 	if (__ref_is_percpu(ref, &percpu_count)) {
 		this_cpu_inc(*percpu_count);
 		ret = true;
+	} else if (!(ACCESS_ONCE(ref->percpu_count_ptr) & __PERCPU_REF_DEAD)) {
+		ret = atomic_long_inc_not_zero(&ref->count);
 	}
 
 	rcu_read_unlock_sched();
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e2ff19f970cf..6e0d14366c5d 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -1,6 +1,8 @@
 #define pr_fmt(fmt) "%s: " fmt "\n", __func__
 
 #include <linux/kernel.h>
+#include <linux/sched.h>
+#include <linux/wait.h>
 #include <linux/percpu-refcount.h>
 
 /*
@@ -31,6 +33,8 @@
 
 #define PERCPU_COUNT_BIAS	(1LU << (BITS_PER_LONG - 1))
 
+static DECLARE_WAIT_QUEUE_HEAD(percpu_ref_switch_waitq);
+
 static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
 {
 	return (unsigned long __percpu *)
@@ -88,7 +92,19 @@ void percpu_ref_exit(struct percpu_ref *ref)
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
 
-static void percpu_ref_kill_rcu(struct rcu_head *rcu)
+static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
+{
+	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+
+	ref->confirm_switch(ref);
+	ref->confirm_switch = NULL;
+	wake_up_all(&percpu_ref_switch_waitq);
+
+	/* drop ref from percpu_ref_switch_to_atomic() */
+	percpu_ref_put(ref);
+}
+
+static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 {
 	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
@@ -116,47 +132,79 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu)
 	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
 
 	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
-		  "percpu ref (%pf) <= 0 (%ld) after killed",
+		  "percpu ref (%pf) <= 0 (%ld) after switching to atomic",
 		  ref->release, atomic_long_read(&ref->count));
 
-	/* @ref is viewed as dead on all CPUs, send out kill confirmation */
-	if (ref->confirm_switch)
-		ref->confirm_switch(ref);
+	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
+	percpu_ref_call_confirm_rcu(rcu);
+}
 
-	/*
-	 * Now we're in single atomic_long_t mode with a consistent
-	 * refcount, so it's safe to drop our initial ref:
-	 */
-	percpu_ref_put(ref);
+static void percpu_ref_noop_confirm_switch(struct percpu_ref *ref)
+{
+}
+
+static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+					  percpu_ref_func_t *confirm_switch)
+{
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC)) {
+		/* switching from percpu to atomic */
+		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+
+		/*
+		 * Non-NULL ->confirm_switch is used to indicate that
+		 * switching is in progress.  Use noop one if unspecified.
+		 */
+		WARN_ON_ONCE(ref->confirm_switch);
+		ref->confirm_switch =
+			confirm_switch ?: percpu_ref_noop_confirm_switch;
+
+		percpu_ref_get(ref);	/* put after confirmation */
+		call_rcu_sched(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+	} else if (confirm_switch) {
+		/*
+		 * Somebody already set ATOMIC.  Switching may still be in
+		 * progress.  @confirm_switch must be invoked after the
+		 * switching is complete and a full sched RCU grace period
+		 * has passed.  Wait synchronously for the previous
+		 * switching and schedule @confirm_switch invocation.
+		 */
+		wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+		ref->confirm_switch = confirm_switch;
+
+		percpu_ref_get(ref);	/* put after confirmation */
+		call_rcu_sched(&ref->rcu, percpu_ref_call_confirm_rcu);
+	}
 }
 
 /**
- * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
- * @ref: percpu_ref to kill
- * @confirm_kill: optional confirmation callback
+ * percpu_ref_switch_to_atomic - switch a percpu_ref to atomic mode
+ * @ref: percpu_ref to switch to atomic mode
+ * @confirm_switch: optional confirmation callback
  *
- * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
- * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
- * called after @ref is seen as dead from all CPUs - all further
- * invocations of percpu_ref_tryget_live() will fail.  See
- * percpu_ref_tryget_live() for more details.
+ * There's no reason to use this function for the usual reference counting.
+ * Use percpu_ref_kill[_and_confirm]().
+ *
+ * Schedule switching of @ref to atomic mode.  All its percpu counts will
+ * be collected to the main atomic counter.  On completion, when all CPUs
+ * are guaraneed to be in atomic mode, @confirm_switch, which may not
+ * block, is invoked.  This function may be invoked concurrently with all
+ * the get/put operations and can safely be mixed with kill and reinit
+ * operations.
  *
- * Due to the way percpu_ref is implemented, @confirm_kill will be called
- * after at least one full RCU grace period has passed but this is an
- * implementation detail and callers must not depend on it.
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode.  In such cases, @confirm_switch
+ * will be invoked after the switching is complete.
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
  */
-void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
-				 percpu_ref_func_t *confirm_kill)
+void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_switch)
 {
-	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC_DEAD,
-		  "%s called more than once on %pf!", __func__, ref->release);
-
-	ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC_DEAD;
-	ref->confirm_switch = confirm_kill;
-
-	call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu);
+	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
-EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 
 /**
  * percpu_ref_reinit - re-initialize a percpu refcount
@@ -192,3 +240,34 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
+
+/**
+ * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
+ * @ref: percpu_ref to kill
+ * @confirm_kill: optional confirmation callback
+ *
+ * Equivalent to percpu_ref_kill() but also schedules kill confirmation if
+ * @confirm_kill is not NULL.  @confirm_kill, which may not block, will be
+ * called after @ref is seen as dead from all CPUs at which point all
+ * further invocations of percpu_ref_tryget_live() will fail.  See
+ * percpu_ref_tryget_live() for details.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @confirm_kill is specified and @ref is already in
+ * the process of switching to atomic mode by percpu_ref_switch_atomic().
+ *
+ * Due to the way percpu_ref is implemented, @confirm_switch will be called
+ * after at least one full sched RCU grace period has passed but this is an
+ * implementation detail and must not be depended upon.
+ */
+void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
+				 percpu_ref_func_t *confirm_kill)
+{
+	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
+		  "%s called more than once on %pf!", __func__, ref->release);
+
+	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+	__percpu_ref_switch_to_atomic(ref, confirm_kill);
+	percpu_ref_put(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
-- 
cgit v1.2.3


From f47ad45784611297b699f3dffb6c7222b76afe64 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:49 -0400
Subject: percpu_ref: decouple switching to percpu mode and reinit

percpu_ref has treated the dropping of the base reference and
switching to atomic mode as an integral operation; however, there's
nothing inherent tying the two together.

The use cases for percpu_ref have been expanding continuously.  While
the current init/kill/reinit/exit model can cover a lot, the coupling
of kill/reinit with atomic/percpu mode switching is turning out to be
too restrictive for use cases where many percpu_refs are created and
destroyed back-to-back with only some of them reaching extended
operation.  The coupling also makes implementing always-atomic debug
mode difficult.

This patch separates out percpu mode switching into
percpu_ref_switch_to_percpu() and reimplements percpu_ref_reinit() on
top of it.

* DEAD still requires ATOMIC.  A dead ref can't be switched to percpu
  mode w/o going through reinit.

v2: __percpu_ref_switch_to_percpu() was missing static.  Fixed.
    Reported by Fengguang aka kbuild test robot.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: kbuild test robot <fengguang.wu@intel.com>
---
 include/linux/percpu-refcount.h |  3 +-
 lib/percpu-refcount.c           | 73 ++++++++++++++++++++++++++++++-----------
 2 files changed, 56 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index d1252e1335e8..cd7e20f0fe47 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -80,9 +80,10 @@ int __must_check percpu_ref_init(struct percpu_ref *ref,
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch);
-void percpu_ref_reinit(struct percpu_ref *ref);
+void percpu_ref_switch_to_percpu(struct percpu_ref *ref);
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
+void percpu_ref_reinit(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 6e0d14366c5d..5a6d43baccc5 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -206,40 +206,54 @@ void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
 
-/**
- * percpu_ref_reinit - re-initialize a percpu refcount
- * @ref: perpcu_ref to re-initialize
- *
- * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully, killed
- * and reached 0 but not exited.
- *
- * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
- * this function is in progress.
- */
-void percpu_ref_reinit(struct percpu_ref *ref)
+static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 {
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	int cpu;
 
 	BUG_ON(!percpu_count);
-	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
 
-	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
+		return;
+
+	wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+
+	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired with
 	 * smp_read_barrier_depends() in __ref_is_percpu() and guarantees
 	 * that the zeroing is visible to all percpu accesses which can see
-	 * the following __PERCPU_REF_ATOMIC_DEAD clearing.
+	 * the following __PERCPU_REF_ATOMIC clearing.
 	 */
 	for_each_possible_cpu(cpu)
 		*per_cpu_ptr(percpu_count, cpu) = 0;
 
 	smp_store_release(&ref->percpu_count_ptr,
-			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC_DEAD);
+			  ref->percpu_count_ptr & ~__PERCPU_REF_ATOMIC);
+}
+
+/**
+ * percpu_ref_switch_to_percpu - switch a percpu_ref to percpu mode
+ * @ref: percpu_ref to switch to percpu mode
+ *
+ * There's no reason to use this function for the usual reference counting.
+ * To re-use an expired ref, use percpu_ref_reinit().
+ *
+ * Switch @ref to percpu mode.  This function may be invoked concurrently
+ * with all the get/put operations and can safely be mixed with kill and
+ * reinit operations.
+ *
+ * This function normally doesn't block and can be called from any context
+ * but it may block if @ref is in the process of switching to atomic mode
+ * by percpu_ref_switch_atomic().
+ */
+void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
+{
+	/* a dying or dead ref can't be switched to percpu mode w/o reinit */
+	if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+		__percpu_ref_switch_to_percpu(ref);
 }
-EXPORT_SYMBOL_GPL(percpu_ref_reinit);
 
 /**
  * percpu_ref_kill_and_confirm - drop the initial ref and schedule confirmation
@@ -253,8 +267,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_reinit);
  * percpu_ref_tryget_live() for details.
  *
  * This function normally doesn't block and can be called from any context
- * but it may block if @confirm_kill is specified and @ref is already in
- * the process of switching to atomic mode by percpu_ref_switch_atomic().
+ * but it may block if @confirm_kill is specified and @ref is in the
+ * process of switching to atomic mode by percpu_ref_switch_atomic().
  *
  * Due to the way percpu_ref is implemented, @confirm_switch will be called
  * after at least one full sched RCU grace period has passed but this is an
@@ -271,3 +285,24 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	percpu_ref_put(ref);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
+
+/**
+ * percpu_ref_reinit - re-initialize a percpu refcount
+ * @ref: perpcu_ref to re-initialize
+ *
+ * Re-initialize @ref so that it's in the same state as when it finished
+ * percpu_ref_init().  @ref must have been initialized successfully and
+ * reached 0 but not exited.
+ *
+ * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
+ * this function is in progress.
+ */
+void percpu_ref_reinit(struct percpu_ref *ref)
+{
+	WARN_ON_ONCE(!percpu_ref_is_zero(ref));
+
+	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
+	percpu_ref_get(ref);
+	__percpu_ref_switch_to_percpu(ref);
+}
+EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 2aad2a86f6685c10360ec8a5a55eb9ab7059cb72 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:50 -0400
Subject: percpu_ref: add PERCPU_REF_INIT_* flags

With the recent addition of percpu_ref_reinit(), percpu_ref now can be
used as a persistent switch which can be turned on and off repeatedly
where turning off maps to killing the ref and waiting for it to drain;
however, there currently isn't a way to initialize a percpu_ref in its
off (killed and drained) state, which can be inconvenient for certain
persistent switch use cases.

Similarly, percpu_ref_switch_to_atomic/percpu() allow dynamic
selection of operation mode; however, currently a newly initialized
percpu_ref is always in percpu mode making it impossible to avoid the
latency overhead of switching to atomic mode.

This patch adds @flags to percpu_ref_init() and implements the
following flags.

* PERCPU_REF_INIT_ATOMIC	: start ref in atomic mode
* PERCPU_REF_INIT_DEAD		: start ref killed and drained

These flags should be able to serve the above two use cases.

v2: target_core_tpg.c conversion was missing.  Fixed.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 block/blk-mq.c                   |  2 +-
 drivers/target/target_core_tpg.c |  2 +-
 fs/aio.c                         |  4 ++--
 include/linux/percpu-refcount.h  | 18 +++++++++++++++++-
 kernel/cgroup.c                  |  7 ++++---
 lib/percpu-refcount.c            | 23 ++++++++++++++++++-----
 6 files changed, 43 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 44a78ae3f899..d85fe01c44ef 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1796,7 +1796,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 		goto err_hctxs;
 
 	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
-			    GFP_KERNEL))
+			    0, GFP_KERNEL))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/drivers/target/target_core_tpg.c b/drivers/target/target_core_tpg.c
index 4ab6da338585..be783f717f19 100644
--- a/drivers/target/target_core_tpg.c
+++ b/drivers/target/target_core_tpg.c
@@ -819,7 +819,7 @@ int core_tpg_add_lun(
 {
 	int ret;
 
-	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release,
+	ret = percpu_ref_init(&lun->lun_ref, core_tpg_lun_ref_release, 0,
 			      GFP_KERNEL);
 	if (ret < 0)
 		return ret;
diff --git a/fs/aio.c b/fs/aio.c
index 8d217ed04e6e..84a751005f5b 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -661,10 +661,10 @@ static struct kioctx *ioctx_alloc(unsigned nr_events)
 
 	INIT_LIST_HEAD(&ctx->active_reqs);
 
-	if (percpu_ref_init(&ctx->users, free_ioctx_users, GFP_KERNEL))
+	if (percpu_ref_init(&ctx->users, free_ioctx_users, 0, GFP_KERNEL))
 		goto err;
 
-	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, GFP_KERNEL))
+	if (percpu_ref_init(&ctx->reqs, free_ioctx_reqs, 0, GFP_KERNEL))
 		goto err;
 
 	ctx->cpu = alloc_percpu(struct kioctx_cpu);
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index cd7e20f0fe47..b0293f268cd2 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -63,6 +63,21 @@ enum {
 	__PERCPU_REF_FLAG_BITS	= 2,
 };
 
+/* @flags for percpu_ref_init() */
+enum {
+	/*
+	 * Start w/ ref == 1 in atomic mode.  Can be switched to percpu
+	 * operation using percpu_ref_switch_to_percpu().
+	 */
+	PERCPU_REF_INIT_ATOMIC	= 1 << 0,
+
+	/*
+	 * Start dead w/ ref == 0 in atomic mode.  Must be revived with
+	 * percpu_ref_reinit() before used.  Implies INIT_ATOMIC.
+	 */
+	PERCPU_REF_INIT_DEAD	= 1 << 1,
+};
+
 struct percpu_ref {
 	atomic_long_t		count;
 	/*
@@ -76,7 +91,8 @@ struct percpu_ref {
 };
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
-				 percpu_ref_func_t *release, gfp_t gfp);
+				 percpu_ref_func_t *release, unsigned int flags,
+				 gfp_t gfp);
 void percpu_ref_exit(struct percpu_ref *ref);
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index a99d504294de..753df01a9831 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -1634,7 +1634,8 @@ static int cgroup_setup_root(struct cgroup_root *root, unsigned int ss_mask)
 		goto out;
 	root_cgrp->id = ret;
 
-	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, GFP_KERNEL);
+	ret = percpu_ref_init(&root_cgrp->self.refcnt, css_release, 0,
+			      GFP_KERNEL);
 	if (ret)
 		goto out;
 
@@ -4510,7 +4511,7 @@ static int create_css(struct cgroup *cgrp, struct cgroup_subsys *ss,
 
 	init_and_link_css(css, ss, cgrp);
 
-	err = percpu_ref_init(&css->refcnt, css_release, GFP_KERNEL);
+	err = percpu_ref_init(&css->refcnt, css_release, 0, GFP_KERNEL);
 	if (err)
 		goto err_free_css;
 
@@ -4583,7 +4584,7 @@ static int cgroup_mkdir(struct kernfs_node *parent_kn, const char *name,
 		goto out_unlock;
 	}
 
-	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, GFP_KERNEL);
+	ret = percpu_ref_init(&cgrp->self.refcnt, css_release, 0, GFP_KERNEL);
 	if (ret)
 		goto out_free_cgrp;
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 5a6d43baccc5..ed280fb1e5b5 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -45,27 +45,40 @@ static unsigned long __percpu *percpu_count_ptr(struct percpu_ref *ref)
  * percpu_ref_init - initialize a percpu refcount
  * @ref: percpu_ref to initialize
  * @release: function which will be called when refcount hits 0
+ * @flags: PERCPU_REF_INIT_* flags
  * @gfp: allocation mask to use
  *
- * Initializes the refcount in single atomic counter mode with a refcount of 1;
- * analagous to atomic_long_set(ref, 1).
+ * Initializes @ref.  If @flags is zero, @ref starts in percpu mode with a
+ * refcount of 1; analagous to atomic_long_set(ref, 1).  See the
+ * definitions of PERCPU_REF_INIT_* flags for flag behaviors.
  *
  * Note that @release must not sleep - it may potentially be called from RCU
  * callback context by percpu_ref_kill().
  */
 int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
-		    gfp_t gfp)
+		    unsigned int flags, gfp_t gfp)
 {
 	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
 			     __alignof__(unsigned long));
-
-	atomic_long_set(&ref->count, 1 + PERCPU_COUNT_BIAS);
+	unsigned long start_count = 0;
 
 	ref->percpu_count_ptr = (unsigned long)
 		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
+	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
+		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
+	else
+		start_count += PERCPU_COUNT_BIAS;
+
+	if (flags & PERCPU_REF_INIT_DEAD)
+		ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
+	else
+		start_count++;
+
+	atomic_long_set(&ref->count, start_count);
+
 	ref->release = release;
 	return 0;
 }
-- 
cgit v1.2.3


From 1cae13e75b7a7848c03138636d4eb8d8a5054dd5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:50 -0400
Subject: percpu_ref: make INIT_ATOMIC and switch_to_atomic() sticky

Currently, a percpu_ref which is initialized with
PERPCU_REF_INIT_ATOMIC or switched to atomic mode via
switch_to_atomic() automatically reverts to percpu mode on the first
percpu_ref_reinit().  This makes the atomic mode difficult to use for
cases where a percpu_ref is used as a persistent on/off switch which
may be cycled multiple times.

This patch makes such atomic state sticky so that it survives through
kill/reinit cycles.  After this patch, atomic state is cleared only by
an explicit percpu_ref_switch_to_percpu() call.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 include/linux/percpu-refcount.h |  5 ++++-
 lib/percpu-refcount.c           | 20 +++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index b0293f268cd2..00c01fc6d88c 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -67,7 +67,9 @@ enum {
 enum {
 	/*
 	 * Start w/ ref == 1 in atomic mode.  Can be switched to percpu
-	 * operation using percpu_ref_switch_to_percpu().
+	 * operation using percpu_ref_switch_to_percpu().  If initialized
+	 * with this flag, the ref will stay in atomic mode until
+	 * percpu_ref_switch_to_percpu() is invoked on it.
 	 */
 	PERCPU_REF_INIT_ATOMIC	= 1 << 0,
 
@@ -87,6 +89,7 @@ struct percpu_ref {
 	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
 	percpu_ref_func_t	*confirm_switch;
+	bool			force_atomic:1;
 	struct rcu_head		rcu;
 };
 
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index ed280fb1e5b5..6111bcb28376 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -67,6 +67,8 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
+	ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
+
 	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD))
 		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
 	else
@@ -202,7 +204,8 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
  * are guaraneed to be in atomic mode, @confirm_switch, which may not
  * block, is invoked.  This function may be invoked concurrently with all
  * the get/put operations and can safely be mixed with kill and reinit
- * operations.
+ * operations.  Note that @ref will stay in atomic mode across kill/reinit
+ * cycles until percpu_ref_switch_to_percpu() is called.
  *
  * This function normally doesn't block and can be called from any context
  * but it may block if @confirm_kill is specified and @ref is already in
@@ -216,6 +219,7 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_switch)
 {
+	ref->force_atomic = true;
 	__percpu_ref_switch_to_atomic(ref, confirm_switch);
 }
 
@@ -255,7 +259,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  *
  * Switch @ref to percpu mode.  This function may be invoked concurrently
  * with all the get/put operations and can safely be mixed with kill and
- * reinit operations.
+ * reinit operations.  This function reverses the sticky atomic state set
+ * by PERCPU_REF_INIT_ATOMIC or percpu_ref_switch_to_atomic().  If @ref is
+ * dying or dead, the actual switching takes place on the following
+ * percpu_ref_reinit().
  *
  * This function normally doesn't block and can be called from any context
  * but it may block if @ref is in the process of switching to atomic mode
@@ -263,6 +270,8 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
  */
 void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 {
+	ref->force_atomic = false;
+
 	/* a dying or dead ref can't be switched to percpu mode w/o reinit */
 	if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD))
 		__percpu_ref_switch_to_percpu(ref);
@@ -304,8 +313,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
  * @ref: perpcu_ref to re-initialize
  *
  * Re-initialize @ref so that it's in the same state as when it finished
- * percpu_ref_init().  @ref must have been initialized successfully and
- * reached 0 but not exited.
+ * percpu_ref_init() ignoring %PERCPU_REF_INIT_DEAD.  @ref must have been
+ * initialized successfully and reached 0 but not exited.
  *
  * Note that percpu_ref_tryget[_live]() are safe to perform on @ref while
  * this function is in progress.
@@ -316,6 +325,7 @@ void percpu_ref_reinit(struct percpu_ref *ref)
 
 	ref->percpu_count_ptr &= ~__PERCPU_REF_DEAD;
 	percpu_ref_get(ref);
-	__percpu_ref_switch_to_percpu(ref);
+	if (!ref->force_atomic)
+		__percpu_ref_switch_to_percpu(ref);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_reinit);
-- 
cgit v1.2.3


From 17497acbdce9506fd6a75115dee4ab80c3cc5ee5 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Wed, 24 Sep 2014 13:31:50 -0400
Subject: blk-mq, percpu_ref: start q->mq_usage_counter in atomic mode

blk-mq uses percpu_ref for its usage counter which tracks the number
of in-flight commands and used to synchronously drain the queue on
freeze.  percpu_ref shutdown takes measureable wallclock time as it
involves a sched RCU grace period.  This means that draining a blk-mq
takes measureable wallclock time.  One would think that this shouldn't
matter as queue shutdown should be a rare event which takes place
asynchronously w.r.t. userland.

Unfortunately, SCSI probing involves synchronously setting up and then
tearing down a lot of request_queues back-to-back for non-existent
LUNs.  This means that SCSI probing may take above ten seconds when
scsi-mq is used.

  [    0.949892] scsi host0: Virtio SCSI HBA
  [    1.007864] scsi 0:0:0:0: Direct-Access     QEMU     QEMU HARDDISK    1.1. PQ: 0 ANSI: 5
  [    1.021299] scsi 0:0:1:0: Direct-Access     QEMU     QEMU HARDDISK    1.1. PQ: 0 ANSI: 5
  [    1.520356] tsc: Refined TSC clocksource calibration: 2491.910 MHz

  <stall>

  [   16.186549] sd 0:0:0:0: Attached scsi generic sg0 type 0
  [   16.190478] sd 0:0:1:0: Attached scsi generic sg1 type 0
  [   16.194099] osd: LOADED open-osd 0.2.1
  [   16.203202] sd 0:0:0:0: [sda] 31457280 512-byte logical blocks: (16.1 GB/15.0 GiB)
  [   16.208478] sd 0:0:0:0: [sda] Write Protect is off
  [   16.211439] sd 0:0:0:0: [sda] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA
  [   16.218771] sd 0:0:1:0: [sdb] 31457280 512-byte logical blocks: (16.1 GB/15.0 GiB)
  [   16.223264] sd 0:0:1:0: [sdb] Write Protect is off
  [   16.225682] sd 0:0:1:0: [sdb] Write cache: enabled, read cache: enabled, doesn't support DPO or FUA

This is also the reason why request_queues start in bypass mode which
is ended on blk_register_queue() as shutting down a fully functional
queue also involves a RCU grace period and the queues for non-existent
SCSI devices never reach registration.

blk-mq basically needs to do the same thing - start the mq in a
degraded mode which is faster to shut down and then make it fully
functional only after the queue reaches registration.  percpu_ref
recently grew facilities to force atomic operation until explicitly
switched to percpu mode, which can be used for this purpose.  This
patch makes blk-mq initialize q->mq_usage_counter in atomic mode and
switch it to percpu mode only once blk_register_queue() is reached.

Note that this issue was previously worked around by 0a30288da1ae
("blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during
probe") for v3.17.  The temp fix was reverted in preparation of adding
persistent atomic mode to percpu_ref by 9eca80461a45 ("Revert "blk-mq,
percpu_ref: implement a kludge for SCSI blk-mq stall during probe"").
This patch and the prerequisite percpu_ref changes will be merged
during v3.18 devel cycle.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-by: Christoph Hellwig <hch@infradead.org>
Link: http://lkml.kernel.org/g/20140919113815.GA10791@lst.de
Fixes: add703fda981 ("blk-mq: use percpu_ref for mq usage count")
Reviewed-by: Kent Overstreet <kmo@daterainc.com>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Johannes Weiner <hannes@cmpxchg.org>
---
 block/blk-mq-sysfs.c   |  6 ++++++
 block/blk-mq.c         |  6 +++++-
 block/blk-sysfs.c      | 11 +++++++++--
 include/linux/blk-mq.h |  1 +
 4 files changed, 21 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq-sysfs.c b/block/blk-mq-sysfs.c
index ed5217867555..371d8800b48a 100644
--- a/block/blk-mq-sysfs.c
+++ b/block/blk-mq-sysfs.c
@@ -402,6 +402,12 @@ static void blk_mq_sysfs_init(struct request_queue *q)
 	}
 }
 
+/* see blk_register_queue() */
+void blk_mq_finish_init(struct request_queue *q)
+{
+	percpu_ref_switch_to_percpu(&q->mq_usage_counter);
+}
+
 int blk_mq_register_disk(struct gendisk *disk)
 {
 	struct device *dev = disk_to_dev(disk);
diff --git a/block/blk-mq.c b/block/blk-mq.c
index d85fe01c44ef..38f4a165640d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1795,8 +1795,12 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 	if (!q)
 		goto err_hctxs;
 
+	/*
+	 * Init percpu_ref in atomic mode so that it's faster to shutdown.
+	 * See blk_register_queue() for details.
+	 */
 	if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release,
-			    0, GFP_KERNEL))
+			    PERCPU_REF_INIT_ATOMIC, GFP_KERNEL))
 		goto err_map;
 
 	setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 17f5c84ce7bf..521ae9089c50 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -551,12 +551,19 @@ int blk_register_queue(struct gendisk *disk)
 		return -ENXIO;
 
 	/*
-	 * Initialization must be complete by now.  Finish the initial
-	 * bypass from queue allocation.
+	 * SCSI probing may synchronously create and destroy a lot of
+	 * request_queues for non-existent devices.  Shutting down a fully
+	 * functional queue takes measureable wallclock time as RCU grace
+	 * periods are involved.  To avoid excessive latency in these
+	 * cases, a request_queue starts out in a degraded mode which is
+	 * faster to shut down and is made fully functional here as
+	 * request_queues for non-existent devices never get registered.
 	 */
 	if (!blk_queue_init_done(q)) {
 		queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q);
 		blk_queue_bypass_end(q);
+		if (q->mq_ops)
+			blk_mq_finish_init(q);
 	}
 
 	ret = blk_trace_init_sysfs(dev);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index a1e31f274fcd..c13a0c09faea 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -140,6 +140,7 @@ enum {
 };
 
 struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *);
+void blk_mq_finish_init(struct request_queue *q);
 int blk_mq_register_disk(struct gendisk *);
 void blk_mq_unregister_disk(struct gendisk *);
 
-- 
cgit v1.2.3


From 94e57fea62020dbf6e5d0093eabcd28366e86044 Mon Sep 17 00:00:00 2001
From: Francesco Ruggeri <fruggeri@arista.com>
Date: Wed, 24 Sep 2014 10:12:41 -0700
Subject: PCI: Move PCI_VENDOR_ID_VMWARE to pci_ids.h

Move PCI_VENDOR_ID_VMWARE from device-specific files to pci_ids.h.
It is useful to always have access to it, especially when accessing
subsystem_vendor_id on emulated devices.

[bhelgaas: keep pci_ids.h sorted and use lower-case hex]
Signed-off-by: Francesco Ruggeri <fruggeri@arista.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/gpu/drm/vmwgfx/svga_reg.h  | 1 -
 drivers/misc/vmw_vmci/vmci_guest.c | 1 -
 drivers/net/vmxnet3/vmxnet3_int.h  | 1 -
 drivers/scsi/vmw_pvscsi.h          | 1 -
 include/linux/pci_ids.h            | 2 ++
 5 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/vmwgfx/svga_reg.h b/drivers/gpu/drm/vmwgfx/svga_reg.h
index 11323dd5196f..e4259c2c1acc 100644
--- a/drivers/gpu/drm/vmwgfx/svga_reg.h
+++ b/drivers/gpu/drm/vmwgfx/svga_reg.h
@@ -35,7 +35,6 @@
 /*
  * PCI device IDs.
  */
-#define PCI_VENDOR_ID_VMWARE            0x15AD
 #define PCI_DEVICE_ID_VMWARE_SVGA2      0x0405
 
 /*
diff --git a/drivers/misc/vmw_vmci/vmci_guest.c b/drivers/misc/vmw_vmci/vmci_guest.c
index 248399a881af..189b32519748 100644
--- a/drivers/misc/vmw_vmci/vmci_guest.c
+++ b/drivers/misc/vmw_vmci/vmci_guest.c
@@ -35,7 +35,6 @@
 #include "vmci_driver.h"
 #include "vmci_event.h"
 
-#define PCI_VENDOR_ID_VMWARE		0x15AD
 #define PCI_DEVICE_ID_VMWARE_VMCI	0x0740
 
 #define VMCI_UTIL_NUM_RESOURCES 1
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 29ee77f2c97f..c388ef509d66 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -117,7 +117,6 @@ enum {
 /*
  * PCI vendor and device IDs.
  */
-#define PCI_VENDOR_ID_VMWARE            0x15AD
 #define PCI_DEVICE_ID_VMWARE_VMXNET3    0x07B0
 #define MAX_ETHERNET_CARDS		10
 #define MAX_PCI_PASSTHRU_DEVICE		6
diff --git a/drivers/scsi/vmw_pvscsi.h b/drivers/scsi/vmw_pvscsi.h
index ce4588851274..ee16f0c5c47d 100644
--- a/drivers/scsi/vmw_pvscsi.h
+++ b/drivers/scsi/vmw_pvscsi.h
@@ -32,7 +32,6 @@
 
 #define MASK(n)        ((1 << (n)) - 1)        /* make an n-bit mask */
 
-#define PCI_VENDOR_ID_VMWARE		0x15AD
 #define PCI_DEVICE_ID_VMWARE_PVSCSI	0x07C0
 
 /*
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..da9e6f753196 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2245,6 +2245,8 @@
 #define PCI_VENDOR_ID_MORETON		0x15aa
 #define PCI_DEVICE_ID_RASTEL_2PORT	0x2000
 
+#define PCI_VENDOR_ID_VMWARE		0x15ad
+
 #define PCI_VENDOR_ID_ZOLTRIX		0x15b0
 #define PCI_DEVICE_ID_ZOLTRIX_2BD0	0x2bd0
 
-- 
cgit v1.2.3


From b2fc3f3c6d397d434174147eca3db1ec778195ce Mon Sep 17 00:00:00 2001
From: Olof Johansson <olof@lixom.net>
Date: Wed, 24 Sep 2014 11:42:38 -0700
Subject: drivers/soc: ti: fix build break with modules

Fixes below build break by not switching to stubs when the driver is a module:

drivers/soc/ti/knav_dma.c:418:7: error: redefinition of 'knav_dma_open_channel'
 void *knav_dma_open_channel(struct device *dev, const char *name,
       ^
In file included from drivers/soc/ti/knav_dma.c:26:0:
include/linux/soc/ti/knav_dma.h:165:21: note: previous definition of 'knav_dma_open_channel' was here
 static inline void *knav_dma_open_channel(struct device *dev, const char *name,
                     ^

Cc: Santosh Shilimkar <santosh.shilimkar@ti.com>
Signed-off-by: Olof Johansson <olof@lixom.net>
---
 include/linux/soc/ti/knav_dma.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/soc/ti/knav_dma.h b/include/linux/soc/ti/knav_dma.h
index e864a3eb9ac4..dad035c16d94 100644
--- a/include/linux/soc/ti/knav_dma.h
+++ b/include/linux/soc/ti/knav_dma.h
@@ -157,7 +157,7 @@ struct knav_dma_desc {
 	u32	pad[4];
 } ____cacheline_aligned;
 
-#ifdef CONFIG_KEYSTONE_NAVIGATOR_DMA
+#if IS_ENABLED(CONFIG_KEYSTONE_NAVIGATOR_DMA)
 void *knav_dma_open_channel(struct device *dev, const char *name,
 				struct knav_dma_cfg *config);
 void knav_dma_close_channel(void *channel);
-- 
cgit v1.2.3


From 7990da71ebfa887ae6fe4464ab0d99ddeb8efacc Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Wed, 3 Sep 2014 17:49:32 +0200
Subject: PM / QoS: Add PM_QOS_MEMORY_BANDWIDTH class

Also adds a class type PM_QOS_SUM that aggregates the values by summing them.

It can be used by memory controllers to calculate the optimum clock frequency
based on the bandwidth needs of the different memory clients.

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/power/pm_qos_interface.txt |  4 +++-
 include/linux/pm_qos.h                   |  5 ++++-
 kernel/power/qos.c                       | 27 ++++++++++++++++++++++++++-
 3 files changed, 33 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/power/pm_qos_interface.txt b/Documentation/power/pm_qos_interface.txt
index a5da5c7e7128..129f7c0e1483 100644
--- a/Documentation/power/pm_qos_interface.txt
+++ b/Documentation/power/pm_qos_interface.txt
@@ -5,7 +5,8 @@ performance expectations by drivers, subsystems and user space applications on
 one of the parameters.
 
 Two different PM QoS frameworks are available:
-1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput.
+1. PM QoS classes for cpu_dma_latency, network_latency, network_throughput,
+memory_bandwidth.
 2. the per-device PM QoS framework provides the API to manage the per-device latency
 constraints and PM QoS flags.
 
@@ -13,6 +14,7 @@ Each parameters have defined units:
  * latency: usec
  * timeout: usec
  * throughput: kbs (kilo bit / sec)
+ * memory bandwidth: mbs (mega bit / sec)
 
 
 1. PM QoS framework
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 9ab4bf7c4646..636e82834506 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -15,6 +15,7 @@ enum {
 	PM_QOS_CPU_DMA_LATENCY,
 	PM_QOS_NETWORK_LATENCY,
 	PM_QOS_NETWORK_THROUGHPUT,
+	PM_QOS_MEMORY_BANDWIDTH,
 
 	/* insert new class ID */
 	PM_QOS_NUM_CLASSES,
@@ -32,6 +33,7 @@ enum pm_qos_flags_status {
 #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
 #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE	(2000 * USEC_PER_SEC)
 #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE	0
+#define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE	0
 #define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE	0
 #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE	0
 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT	(-1)
@@ -69,7 +71,8 @@ struct dev_pm_qos_request {
 enum pm_qos_type {
 	PM_QOS_UNITIALIZED,
 	PM_QOS_MAX,		/* return the largest value */
-	PM_QOS_MIN		/* return the smallest value */
+	PM_QOS_MIN,		/* return the smallest value */
+	PM_QOS_SUM		/* return the sum */
 };
 
 /*
diff --git a/kernel/power/qos.c b/kernel/power/qos.c
index 884b77058864..5f4c006c4b1e 100644
--- a/kernel/power/qos.c
+++ b/kernel/power/qos.c
@@ -105,11 +105,27 @@ static struct pm_qos_object network_throughput_pm_qos = {
 };
 
 
+static BLOCKING_NOTIFIER_HEAD(memory_bandwidth_notifier);
+static struct pm_qos_constraints memory_bw_constraints = {
+	.list = PLIST_HEAD_INIT(memory_bw_constraints.list),
+	.target_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+	.default_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+	.no_constraint_value = PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE,
+	.type = PM_QOS_SUM,
+	.notifiers = &memory_bandwidth_notifier,
+};
+static struct pm_qos_object memory_bandwidth_pm_qos = {
+	.constraints = &memory_bw_constraints,
+	.name = "memory_bandwidth",
+};
+
+
 static struct pm_qos_object *pm_qos_array[] = {
 	&null_pm_qos,
 	&cpu_dma_pm_qos,
 	&network_lat_pm_qos,
-	&network_throughput_pm_qos
+	&network_throughput_pm_qos,
+	&memory_bandwidth_pm_qos,
 };
 
 static ssize_t pm_qos_power_write(struct file *filp, const char __user *buf,
@@ -130,6 +146,9 @@ static const struct file_operations pm_qos_power_fops = {
 /* unlocked internal variant */
 static inline int pm_qos_get_value(struct pm_qos_constraints *c)
 {
+	struct plist_node *node;
+	int total_value = 0;
+
 	if (plist_head_empty(&c->list))
 		return c->no_constraint_value;
 
@@ -140,6 +159,12 @@ static inline int pm_qos_get_value(struct pm_qos_constraints *c)
 	case PM_QOS_MAX:
 		return plist_last(&c->list)->prio;
 
+	case PM_QOS_SUM:
+		plist_for_each(node, &c->list)
+			total_value += node->prio;
+
+		return total_value;
+
 	default:
 		/* runtime check for not using enum */
 		BUG();
-- 
cgit v1.2.3


From a743419f420a64d442280845c0377a915b76644f Mon Sep 17 00:00:00 2001
From: Benjamin Coddington <bcodding@redhat.com>
Date: Tue, 23 Sep 2014 12:26:19 -0400
Subject: SUNRPC: Don't wake tasks during connection abort

When aborting a connection to preserve source ports, don't wake the task in
xs_error_report.  This allows tasks with RPC_TASK_SOFTCONN to succeed if the
connection needs to be re-established since it preserves the task's status
instead of setting it to the status of the aborting kernel_connect().

This may also avoid a potential conflict on the socket's lock.

Signed-off-by: Benjamin Coddington <bcodding@redhat.com>
Cc: stable@vger.kernel.org # 3.14+
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprt.h | 1 +
 net/sunrpc/xprtsock.c       | 4 ++++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index fcbfe8783243..cf391eef2e6d 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -357,6 +357,7 @@ int			xs_swapper(struct rpc_xprt *xprt, int enable);
 #define XPRT_CONNECTION_ABORT	(7)
 #define XPRT_CONNECTION_CLOSE	(8)
 #define XPRT_CONGESTED		(9)
+#define XPRT_CONNECTION_REUSE	(10)
 
 static inline void xprt_set_connected(struct rpc_xprt *xprt)
 {
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 7ed47b4943da..bffba4e4bfc6 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -845,6 +845,8 @@ static void xs_error_report(struct sock *sk)
 	dprintk("RPC:       xs_error_report client %p, error=%d...\n",
 			xprt, -err);
 	trace_rpc_socket_error(xprt, sk->sk_socket, err);
+	if (test_bit(XPRT_CONNECTION_REUSE, &xprt->state))
+		goto out;
 	xprt_wake_pending_tasks(xprt, err);
  out:
 	read_unlock_bh(&sk->sk_callback_lock);
@@ -2261,7 +2263,9 @@ static void xs_tcp_setup_socket(struct work_struct *work)
 		abort_and_exit = test_and_clear_bit(XPRT_CONNECTION_ABORT,
 				&xprt->state);
 		/* "close" the socket, preserving the local port */
+		set_bit(XPRT_CONNECTION_REUSE, &xprt->state);
 		xs_tcp_reuse_connection(transport);
+		clear_bit(XPRT_CONNECTION_REUSE, &xprt->state);
 
 		if (abort_and_exit)
 			goto out_eagain;
-- 
cgit v1.2.3


From 8478eaa16e701ecfe054b62ec764bc1291b79e19 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 18 Sep 2014 16:09:27 +1000
Subject: NFSv4: use exponential retry on NFS4ERR_DELAY for async requests.

Currently asynchronous NFSv4 request will be retried with
exponential timeout (from 1/10 to 15 seconds), but async
requests will always use a 15second retry.

Some "async" requests are really synchronous though.  The
async mechanism is used to allow the request to continue if
the requesting process is killed.
In those cases, an exponential retry is appropriate.

For example, if two different clients both open a file and
get a READ delegation, and one client then unlinks the file
(while still holding an open file descriptor), that unlink
will used the "silly-rename" handling which is async.
The first rename will result in NFS4ERR_DELAY while the
delegation is reclaimed from the other client.  The rename
will not be retried for 15 seconds, causing an unlink to take
15 seconds rather than 100msec.

This patch only added exponential timeout for async unlink and
async rename.  Other async calls, such as 'close' are sometimes
waited for so they might benefit from exponential timeout too.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs4proc.c       | 65 ++++++++++++++++++++++++++++++++-----------------
 include/linux/nfs_xdr.h |  2 ++
 2 files changed, 44 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 288be08bda95..45bce9ed6791 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -77,7 +77,7 @@ struct nfs4_opendata;
 static int _nfs4_proc_open(struct nfs4_opendata *data);
 static int _nfs4_recover_proc_open(struct nfs4_opendata *data);
 static int nfs4_do_fsinfo(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *);
-static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *);
+static int nfs4_async_handle_error(struct rpc_task *, const struct nfs_server *, struct nfs4_state *, long *);
 static void nfs_fixup_referral_attributes(struct nfs_fattr *fattr);
 static int nfs4_proc_getattr(struct nfs_server *, struct nfs_fh *, struct nfs_fattr *, struct nfs4_label *label);
 static int _nfs4_proc_getattr(struct nfs_server *server, struct nfs_fh *fhandle, struct nfs_fattr *fattr, struct nfs4_label *label);
@@ -314,20 +314,30 @@ static void nfs4_setup_readdir(u64 cookie, __be32 *verifier, struct dentry *dent
 	kunmap_atomic(start);
 }
 
+static long nfs4_update_delay(long *timeout)
+{
+	long ret;
+	if (!timeout)
+		return NFS4_POLL_RETRY_MAX;
+	if (*timeout <= 0)
+		*timeout = NFS4_POLL_RETRY_MIN;
+	if (*timeout > NFS4_POLL_RETRY_MAX)
+		*timeout = NFS4_POLL_RETRY_MAX;
+	ret = *timeout;
+	*timeout <<= 1;
+	return ret;
+}
+
 static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 {
 	int res = 0;
 
 	might_sleep();
 
-	if (*timeout <= 0)
-		*timeout = NFS4_POLL_RETRY_MIN;
-	if (*timeout > NFS4_POLL_RETRY_MAX)
-		*timeout = NFS4_POLL_RETRY_MAX;
-	freezable_schedule_timeout_killable_unsafe(*timeout);
+	freezable_schedule_timeout_killable_unsafe(
+		nfs4_update_delay(timeout));
 	if (fatal_signal_pending(current))
 		res = -ERESTARTSYS;
-	*timeout <<= 1;
 	return res;
 }
 
@@ -2583,7 +2593,7 @@ static void nfs4_close_done(struct rpc_task *task, void *data)
 			if (calldata->arg.fmode == 0)
 				break;
 		default:
-			if (nfs4_async_handle_error(task, server, state) == -EAGAIN) {
+			if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN) {
 				rpc_restart_call_prepare(task);
 				goto out_release;
 			}
@@ -3572,7 +3582,8 @@ static int nfs4_proc_unlink_done(struct rpc_task *task, struct inode *dir)
 
 	if (!nfs4_sequence_done(task, &res->seq_res))
 		return 0;
-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+	if (nfs4_async_handle_error(task, res->server, NULL,
+				    &data->timeout) == -EAGAIN)
 		return 0;
 	update_changeattr(dir, &res->cinfo);
 	return 1;
@@ -3605,7 +3616,7 @@ static int nfs4_proc_rename_done(struct rpc_task *task, struct inode *old_dir,
 
 	if (!nfs4_sequence_done(task, &res->seq_res))
 		return 0;
-	if (nfs4_async_handle_error(task, res->server, NULL) == -EAGAIN)
+	if (nfs4_async_handle_error(task, res->server, NULL, &data->timeout) == -EAGAIN)
 		return 0;
 
 	update_changeattr(old_dir, &res->old_cinfo);
@@ -4109,7 +4120,8 @@ static int nfs4_read_done_cb(struct rpc_task *task, struct nfs_pgio_header *hdr)
 
 	trace_nfs4_read(hdr, task->tk_status);
 	if (nfs4_async_handle_error(task, server,
-				    hdr->args.context->state) == -EAGAIN) {
+				    hdr->args.context->state,
+				    NULL) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
@@ -4177,10 +4189,11 @@ static int nfs4_write_done_cb(struct rpc_task *task,
 			      struct nfs_pgio_header *hdr)
 {
 	struct inode *inode = hdr->inode;
-	
+
 	trace_nfs4_write(hdr, task->tk_status);
 	if (nfs4_async_handle_error(task, NFS_SERVER(inode),
-				    hdr->args.context->state) == -EAGAIN) {
+				    hdr->args.context->state,
+				    NULL) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
@@ -4260,7 +4273,8 @@ static int nfs4_commit_done_cb(struct rpc_task *task, struct nfs_commit_data *da
 	struct inode *inode = data->inode;
 
 	trace_nfs4_commit(data, task->tk_status);
-	if (nfs4_async_handle_error(task, NFS_SERVER(inode), NULL) == -EAGAIN) {
+	if (nfs4_async_handle_error(task, NFS_SERVER(inode),
+				    NULL, NULL) == -EAGAIN) {
 		rpc_restart_call_prepare(task);
 		return -EAGAIN;
 	}
@@ -4813,7 +4827,8 @@ out:
 
 
 static int
-nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server, struct nfs4_state *state)
+nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
+			struct nfs4_state *state, long *timeout)
 {
 	struct nfs_client *clp = server->nfs_client;
 
@@ -4863,6 +4878,8 @@ nfs4_async_handle_error(struct rpc_task *task, const struct nfs_server *server,
 #endif /* CONFIG_NFS_V4_1 */
 		case -NFS4ERR_DELAY:
 			nfs_inc_server_stats(server, NFSIOS_DELAY);
+			rpc_delay(task, nfs4_update_delay(timeout));
+			goto restart_call;
 		case -NFS4ERR_GRACE:
 			rpc_delay(task, NFS4_POLL_RETRY_MAX);
 		case -NFS4ERR_RETRY_UNCACHED_REP:
@@ -5103,8 +5120,8 @@ static void nfs4_delegreturn_done(struct rpc_task *task, void *calldata)
 			pnfs_roc_set_barrier(data->inode, data->roc_barrier);
 		break;
 	default:
-		if (nfs4_async_handle_error(task, data->res.server, NULL) ==
-				-EAGAIN) {
+		if (nfs4_async_handle_error(task, data->res.server,
+					    NULL, NULL) == -EAGAIN) {
 			rpc_restart_call_prepare(task);
 			return;
 		}
@@ -5368,7 +5385,8 @@ static void nfs4_locku_done(struct rpc_task *task, void *data)
 		case -NFS4ERR_EXPIRED:
 			break;
 		default:
-			if (nfs4_async_handle_error(task, calldata->server, NULL) == -EAGAIN)
+			if (nfs4_async_handle_error(task, calldata->server,
+						    NULL, NULL) == -EAGAIN)
 				rpc_restart_call_prepare(task);
 	}
 	nfs_release_seqid(calldata->arg.seqid);
@@ -5974,7 +5992,8 @@ static void nfs4_release_lockowner_done(struct rpc_task *task, void *calldata)
 		break;
 	case -NFS4ERR_LEASE_MOVED:
 	case -NFS4ERR_DELAY:
-		if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN)
+		if (nfs4_async_handle_error(task, server,
+					    NULL, NULL) == -EAGAIN)
 			rpc_restart_call_prepare(task);
 	}
 }
@@ -7591,7 +7610,7 @@ static void nfs4_layoutget_done(struct rpc_task *task, void *calldata)
 			rpc_restart_call_prepare(task);
 		}
 	}
-	if (nfs4_async_handle_error(task, server, state) == -EAGAIN)
+	if (nfs4_async_handle_error(task, server, state, NULL) == -EAGAIN)
 		rpc_restart_call_prepare(task);
 out:
 	dprintk("<-- %s\n", __func__);
@@ -7751,7 +7770,7 @@ static void nfs4_layoutreturn_done(struct rpc_task *task, void *calldata)
 	case 0:
 		break;
 	case -NFS4ERR_DELAY:
-		if (nfs4_async_handle_error(task, server, NULL) != -EAGAIN)
+		if (nfs4_async_handle_error(task, server, NULL, NULL) != -EAGAIN)
 			break;
 		rpc_restart_call_prepare(task);
 		return;
@@ -7882,7 +7901,7 @@ nfs4_layoutcommit_done(struct rpc_task *task, void *calldata)
 	case 0:
 		break;
 	default:
-		if (nfs4_async_handle_error(task, server, NULL) == -EAGAIN) {
+		if (nfs4_async_handle_error(task, server, NULL, NULL) == -EAGAIN) {
 			rpc_restart_call_prepare(task);
 			return;
 		}
@@ -8178,7 +8197,7 @@ static void nfs41_free_stateid_done(struct rpc_task *task, void *calldata)
 
 	switch (task->tk_status) {
 	case -NFS4ERR_DELAY:
-		if (nfs4_async_handle_error(task, data->server, NULL) == -EAGAIN)
+		if (nfs4_async_handle_error(task, data->server, NULL, NULL) == -EAGAIN)
 			rpc_restart_call_prepare(task);
 	}
 }
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 7ae249ccb78d..6951c7d9097d 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1339,6 +1339,7 @@ struct nfs_unlinkdata {
 	struct inode *dir;
 	struct rpc_cred	*cred;
 	struct nfs_fattr dir_attr;
+	long timeout;
 };
 
 struct nfs_renamedata {
@@ -1352,6 +1353,7 @@ struct nfs_renamedata {
 	struct dentry		*new_dentry;
 	struct nfs_fattr	new_fattr;
 	void (*complete)(struct rpc_task *, struct nfs_renamedata *);
+	long timeout;
 };
 
 struct nfs_access_entry;
-- 
cgit v1.2.3


From cc952e7017fa2e8871ee6a94f2c606ff5911f61e Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 24 Sep 2014 06:26:21 -0400
Subject: tty: Fix width of unsigned long bitfield padding

Commit c545b66c6922b002b5fe224a6eaec58c913650b5,
'tty: Serialize tcflow() with other tty flow control changes' and
commit 99416322dd16b810ba74098cc50ef2a844091d35,
'tty: Workaround Alpha non-atomic byte storage in tty_struct' work around
compiler bugs and non-atomic storage on multiple arches by padding
bitfields out to the declared type which is unsigned long. However, the
width varies by arch.

Pad bitfields to actual width of unsigned long (which is BITS_PER_LONG).

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 546e94557895..5171ef8f7b85 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -264,11 +264,11 @@ struct tty_struct {
 	struct winsize winsize;		/* winsize_mutex */
 	unsigned long stopped:1,	/* flow_lock */
 		      flow_stopped:1,
-		      unused:62;
+		      unused:BITS_PER_LONG - 2;
 	int hw_stopped;
 	unsigned long ctrl_status:8,	/* ctrl_lock */
 		      packet:1,
-		      unused_ctrl:55;
+		      unused_ctrl:BITS_PER_LONG - 9;
 	unsigned int receive_room;	/* Bytes free for queue */
 	int flow_change;
 
-- 
cgit v1.2.3


From cbbce82209490df8b68da9aec0d642451fe0a668 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Thu, 25 Sep 2014 13:55:19 +1000
Subject: SCHED: add some "wait..on_bit...timeout()" interfaces.

In commit c1221321b7c25b53204447cff9949a6d5a7ddddc
   sched: Allow wait_on_bit_action() functions to support a timeout

I suggested that a "wait_on_bit_timeout()" interface would not meet my
need.  This isn't true - I was just over-engineering.

Including a 'private' field in wait_bit_key instead of a focused
"timeout" field was just premature generalization.  If some other
use is ever found, it can be generalized or added later.

So this patch renames "private" to "timeout" with a meaning "stop
waiting when "jiffies" reaches or passes "timeout",
and adds two of the many possible wait..bit..timeout() interfaces:

wait_on_page_bit_killable_timeout(), which is the one I want to use,
and out_of_line_wait_on_bit_timeout() which is a reasonably general
example.  Others can be added as needed.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Ingo Molnar <mingo@kernel.org>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/pagemap.h |  2 ++
 include/linux/wait.h    |  5 ++++-
 kernel/sched/wait.c     | 36 ++++++++++++++++++++++++++++++++++++
 mm/filemap.c            | 13 +++++++++++++
 4 files changed, 55 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 3df8c7db7a4e..87f9e4230d3a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -502,6 +502,8 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
 extern void wait_on_page_bit(struct page *page, int bit_nr);
 
 extern int wait_on_page_bit_killable(struct page *page, int bit_nr);
+extern int wait_on_page_bit_killable_timeout(struct page *page,
+					     int bit_nr, unsigned long timeout);
 
 static inline int wait_on_page_locked_killable(struct page *page)
 {
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 6fb1ba5f9b2f..80115bf88671 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -25,7 +25,7 @@ struct wait_bit_key {
 	void			*flags;
 	int			bit_nr;
 #define WAIT_ATOMIC_T_BIT_NR	-1
-	unsigned long		private;
+	unsigned long		timeout;
 };
 
 struct wait_bit_queue {
@@ -154,6 +154,7 @@ int __wait_on_bit_lock(wait_queue_head_t *, struct wait_bit_queue *, wait_bit_ac
 void wake_up_bit(void *, int);
 void wake_up_atomic_t(atomic_t *);
 int out_of_line_wait_on_bit(void *, int, wait_bit_action_f *, unsigned);
+int out_of_line_wait_on_bit_timeout(void *, int, wait_bit_action_f *, unsigned, unsigned long);
 int out_of_line_wait_on_bit_lock(void *, int, wait_bit_action_f *, unsigned);
 int out_of_line_wait_on_atomic_t(atomic_t *, int (*)(atomic_t *), unsigned);
 wait_queue_head_t *bit_waitqueue(void *, int);
@@ -859,6 +860,8 @@ int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
 extern int bit_wait(struct wait_bit_key *);
 extern int bit_wait_io(struct wait_bit_key *);
+extern int bit_wait_timeout(struct wait_bit_key *);
+extern int bit_wait_io_timeout(struct wait_bit_key *);
 
 /**
  * wait_on_bit - wait for a bit to be cleared
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 15cab1a4f84e..5a62915f47a8 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -343,6 +343,18 @@ int __sched out_of_line_wait_on_bit(void *word, int bit,
 }
 EXPORT_SYMBOL(out_of_line_wait_on_bit);
 
+int __sched out_of_line_wait_on_bit_timeout(
+	void *word, int bit, wait_bit_action_f *action,
+	unsigned mode, unsigned long timeout)
+{
+	wait_queue_head_t *wq = bit_waitqueue(word, bit);
+	DEFINE_WAIT_BIT(wait, word, bit);
+
+	wait.key.timeout = jiffies + timeout;
+	return __wait_on_bit(wq, &wait, action, mode);
+}
+EXPORT_SYMBOL_GPL(out_of_line_wait_on_bit_timeout);
+
 int __sched
 __wait_on_bit_lock(wait_queue_head_t *wq, struct wait_bit_queue *q,
 			wait_bit_action_f *action, unsigned mode)
@@ -520,3 +532,27 @@ __sched int bit_wait_io(struct wait_bit_key *word)
 	return 0;
 }
 EXPORT_SYMBOL(bit_wait_io);
+
+__sched int bit_wait_timeout(struct wait_bit_key *word)
+{
+	unsigned long now = ACCESS_ONCE(jiffies);
+	if (signal_pending_state(current->state, current))
+		return 1;
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	schedule_timeout(word->timeout - now);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_timeout);
+
+__sched int bit_wait_io_timeout(struct wait_bit_key *word)
+{
+	unsigned long now = ACCESS_ONCE(jiffies);
+	if (signal_pending_state(current->state, current))
+		return 1;
+	if (time_after_eq(now, word->timeout))
+		return -EAGAIN;
+	io_schedule_timeout(word->timeout - now);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(bit_wait_io_timeout);
diff --git a/mm/filemap.c b/mm/filemap.c
index 90effcdf948d..cbe5a9013f70 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -703,6 +703,19 @@ int wait_on_page_bit_killable(struct page *page, int bit_nr)
 			     bit_wait_io, TASK_KILLABLE);
 }
 
+int wait_on_page_bit_killable_timeout(struct page *page,
+				       int bit_nr, unsigned long timeout)
+{
+	DEFINE_WAIT_BIT(wait, &page->flags, bit_nr);
+
+	wait.key.timeout = jiffies + timeout;
+	if (!test_bit(bit_nr, &page->flags))
+		return 0;
+	return __wait_on_bit(page_waitqueue(page), &wait,
+			     bit_wait_io_timeout, TASK_KILLABLE);
+}
+EXPORT_SYMBOL_GPL(wait_on_page_bit_killable_timeout);
+
 /**
  * add_page_wait_queue - Add an arbitrary waiter to a page's wait queue
  * @page: Page defining the wait queue of interest
-- 
cgit v1.2.3


From a4796e37c12e177572b80864cbab9c907ea250b0 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Wed, 24 Sep 2014 11:28:32 +1000
Subject: MM: export page_wakeup functions

This will allow NFS to wait for PG_private to be cleared and,
particularly, to send a wake-up when it is.

Signed-off-by: NeilBrown <neilb@suse.de>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/pagemap.h | 10 ++++++++--
 mm/filemap.c            |  8 ++------
 2 files changed, 10 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 87f9e4230d3a..2dca0cef3506 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -496,8 +496,8 @@ static inline int lock_page_or_retry(struct page *page, struct mm_struct *mm,
 }
 
 /*
- * This is exported only for wait_on_page_locked/wait_on_page_writeback.
- * Never use this directly!
+ * This is exported only for wait_on_page_locked/wait_on_page_writeback,
+ * and for filesystems which need to wait on PG_private.
  */
 extern void wait_on_page_bit(struct page *page, int bit_nr);
 
@@ -512,6 +512,12 @@ static inline int wait_on_page_locked_killable(struct page *page)
 	return 0;
 }
 
+extern wait_queue_head_t *page_waitqueue(struct page *page);
+static inline void wake_up_page(struct page *page, int bit)
+{
+	__wake_up_bit(page_waitqueue(page), &page->flags, bit);
+}
+
 /* 
  * Wait for a page to be unlocked.
  *
diff --git a/mm/filemap.c b/mm/filemap.c
index cbe5a9013f70..b9b1413080be 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -670,17 +670,13 @@ EXPORT_SYMBOL(__page_cache_alloc);
  * at a cost of "thundering herd" phenomena during rare hash
  * collisions.
  */
-static wait_queue_head_t *page_waitqueue(struct page *page)
+wait_queue_head_t *page_waitqueue(struct page *page)
 {
 	const struct zone *zone = page_zone(page);
 
 	return &zone->wait_table[hash_ptr(page, zone->wait_table_bits)];
 }
-
-static inline void wake_up_page(struct page *page, int bit)
-{
-	__wake_up_bit(page_waitqueue(page), &page->flags, bit);
-}
+EXPORT_SYMBOL(page_waitqueue);
 
 void wait_on_page_bit(struct page *page, int bit_nr)
 {
-- 
cgit v1.2.3


From 1aed074869a9cbe0a846ea7b254d8fd9a4a4d31f Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 3 Sep 2014 18:34:04 +0200
Subject: iommu: Convert iommu-caps from define to enum

Allow compile-time type-checking.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c |  2 +-
 include/linux/iommu.h | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index 0639b9274b11..bc45478e26db 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -948,7 +948,7 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
 int iommu_domain_has_cap(struct iommu_domain *domain,
-			 unsigned long cap)
+			 enum iommu_cap cap)
 {
 	if (unlikely(domain->ops->domain_has_cap == NULL))
 		return 0;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 20f9a527922a..98fc126655ae 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -57,8 +57,11 @@ struct iommu_domain {
 	struct iommu_domain_geometry geometry;
 };
 
-#define IOMMU_CAP_CACHE_COHERENCY	0x1
-#define IOMMU_CAP_INTR_REMAP		0x2	/* isolates device intrs */
+enum iommu_cap {
+	IOMMU_CAP_CACHE_COHERENCY,	/* IOMMU can enforce cache coherent DMA
+					   transactions */
+	IOMMU_CAP_INTR_REMAP,		/* IOMMU supports interrupt isolation */
+};
 
 /*
  * Following constraints are specifc to FSL_PAMUV1:
@@ -155,7 +158,7 @@ extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 		       size_t size);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern int iommu_domain_has_cap(struct iommu_domain *domain,
-				unsigned long cap);
+				enum iommu_cap cap);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
 
@@ -305,7 +308,7 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_ad
 }
 
 static inline int iommu_domain_has_cap(struct iommu_domain *domain,
-				       unsigned long cap)
+				       enum iommu_cap cap)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 3c0e0ca0a4e757159d868c4870556515d66b6c97 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Wed, 3 Sep 2014 18:47:25 +0200
Subject: iommu: Introduce iommu_capable API function

This function will replace the current iommu_domain_has_cap
function and clean up the interface while at it.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 18 +++++++++++++++---
 include/linux/iommu.h |  7 +++++++
 2 files changed, 22 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index bc45478e26db..aeb243f46332 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -817,6 +817,15 @@ bool iommu_present(struct bus_type *bus)
 }
 EXPORT_SYMBOL_GPL(iommu_present);
 
+bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
+{
+	if (!bus->iommu_ops || !bus->iommu_ops->capable)
+		return false;
+
+	return bus->iommu_ops->capable(cap);
+}
+EXPORT_SYMBOL_GPL(iommu_capable);
+
 /**
  * iommu_set_fault_handler() - set a fault handler for an iommu domain
  * @domain: iommu domain
@@ -950,10 +959,13 @@ EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 int iommu_domain_has_cap(struct iommu_domain *domain,
 			 enum iommu_cap cap)
 {
-	if (unlikely(domain->ops->domain_has_cap == NULL))
-		return 0;
+	if (domain->ops->domain_has_cap != NULL)
+		return domain->ops->domain_has_cap(domain, cap);
+
+	if (domain->ops->capable != NULL)
+		return domain->ops->capable(cap);
 
-	return domain->ops->domain_has_cap(domain, cap);
+	return 0;
 }
 EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
 
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 98fc126655ae..d5534d554693 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -105,6 +105,7 @@ enum iommu_attr {
  * @pgsize_bitmap: bitmap of supported page sizes
  */
 struct iommu_ops {
+	bool (*capable)(enum iommu_cap);
 	int (*domain_init)(struct iommu_domain *domain);
 	void (*domain_destroy)(struct iommu_domain *domain);
 	int (*attach_dev)(struct iommu_domain *domain, struct device *dev);
@@ -145,6 +146,7 @@ struct iommu_ops {
 
 extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops);
 extern bool iommu_present(struct bus_type *bus);
+extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap);
 extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus);
 extern struct iommu_group *iommu_group_get_by_id(int id);
 extern void iommu_domain_free(struct iommu_domain *domain);
@@ -253,6 +255,11 @@ static inline bool iommu_present(struct bus_type *bus)
 	return false;
 }
 
+static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap)
+{
+	return false;
+}
+
 static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus)
 {
 	return NULL;
-- 
cgit v1.2.3


From 24278a24d88ae730229417e5d3bd452d7545fbcc Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Fri, 5 Sep 2014 10:57:11 +0200
Subject: iommu: Remove iommu_domain_has_cap() API function

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/iommu.c | 13 -------------
 include/linux/iommu.h | 11 -----------
 2 files changed, 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index aeb243f46332..d2d242fcaa3d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -956,19 +956,6 @@ phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova)
 }
 EXPORT_SYMBOL_GPL(iommu_iova_to_phys);
 
-int iommu_domain_has_cap(struct iommu_domain *domain,
-			 enum iommu_cap cap)
-{
-	if (domain->ops->domain_has_cap != NULL)
-		return domain->ops->domain_has_cap(domain, cap);
-
-	if (domain->ops->capable != NULL)
-		return domain->ops->capable(cap);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(iommu_domain_has_cap);
-
 static size_t iommu_pgsize(struct iommu_domain *domain,
 			   unsigned long addr_merge, size_t size)
 {
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index d5534d554693..379a6179fd96 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -97,7 +97,6 @@ enum iommu_attr {
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
  * @iova_to_phys: translate iova to physical address
- * @domain_has_cap: domain capabilities query
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
  * @domain_get_attr: Query domain attributes
@@ -115,8 +114,6 @@ struct iommu_ops {
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
-	int (*domain_has_cap)(struct iommu_domain *domain,
-			      unsigned long cap);
 	int (*add_device)(struct device *dev);
 	void (*remove_device)(struct device *dev);
 	int (*device_group)(struct device *dev, unsigned int *groupid);
@@ -159,8 +156,6 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 		       size_t size);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
-extern int iommu_domain_has_cap(struct iommu_domain *domain,
-				enum iommu_cap cap);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
 
@@ -314,12 +309,6 @@ static inline phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_ad
 	return 0;
 }
 
-static inline int iommu_domain_has_cap(struct iommu_domain *domain,
-				       enum iommu_cap cap)
-{
-	return 0;
-}
-
 static inline void iommu_set_fault_handler(struct iommu_domain *domain,
 				iommu_fault_handler_t handler, void *token)
 {
-- 
cgit v1.2.3


From 3fc2aa5522ab958374d93ef5d2e12df7ee233c91 Mon Sep 17 00:00:00 2001
From: Michal Sojka <sojka@merica.cz>
Date: Wed, 24 Sep 2014 22:43:18 +0200
Subject: usb: gadget: Introduce usb_gadget_giveback_request()

All USB peripheral controller drivers call completion routines directly.
This patch adds usb_gadget_giveback_request() which will be used instead
of direct invocation in the next patch. The goal here is to have a place
where common functionality can be added.

Signed-off-by: Michal Sojka <sojka@merica.cz>
Acked-by: Felipe Balbi <balbi@ti.com>
Tested-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/gadget/udc/udc-core.c | 16 ++++++++++++++++
 include/linux/usb/gadget.h        |  8 ++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index ad1ceac15468..16d3f6fedd1c 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -106,6 +106,22 @@ EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
 
 /* ------------------------------------------------------------------------- */
 
+/**
+ * usb_gadget_giveback_request - give the request back to the gadget layer
+ * Context: in_interrupt()
+ *
+ * This is called by device controller drivers in order to return the
+ * completed request back to the gadget layer.
+ */
+void usb_gadget_giveback_request(struct usb_ep *ep,
+		struct usb_request *req)
+{
+	req->complete(ep, req);
+}
+EXPORT_SYMBOL_GPL(usb_gadget_giveback_request);
+
+/* ------------------------------------------------------------------------- */
+
 static void usb_gadget_state_work(struct work_struct *work)
 {
 	struct usb_gadget	*gadget = work_to_gadget(work);
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index d18811433324..522cafe26790 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -1023,6 +1023,14 @@ extern void usb_gadget_udc_reset(struct usb_gadget *gadget,
 
 /*-------------------------------------------------------------------------*/
 
+/* utility to give requests back to the gadget layer */
+
+extern void usb_gadget_giveback_request(struct usb_ep *ep,
+		struct usb_request *req);
+
+
+/*-------------------------------------------------------------------------*/
+
 /* utility wrapping a simple endpoint selection policy */
 
 extern struct usb_ep *usb_ep_autoconfig(struct usb_gadget *,
-- 
cgit v1.2.3


From 0cfbd328d60f85b0dcf66df61a3615e9a8e5d4e4 Mon Sep 17 00:00:00 2001
From: Michal Sojka <sojka@merica.cz>
Date: Wed, 24 Sep 2014 22:43:21 +0200
Subject: usb: Add LED triggers for USB activity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

With this patch, USB activity can be signaled by blinking a LED. There
are two triggers, one for activity on USB host and one for USB gadget.

Both triggers should work with all host/device controllers. Tested only
with musb.

Performace: I measured performance overheads on ARM Cortex-A8 (TI
AM335x) running on 600 MHz.

Duration of usb_led_activity():
- with no LED attached to the trigger:        2 ± 1 µs
- with one GPIO LED attached to the trigger:  2 ± 1 µs or 8 ± 2 µs (two peaks in histogram)

Duration of functions calling usb_led_activity() (with this patch
applied and no LED attached to the trigger):
- __usb_hcd_giveback_urb():    10 - 25 µs
- usb_gadget_giveback_request(): 2 - 6 µs

Signed-off-by: Michal Sojka <sojka@merica.cz>
Acked-by: Felipe Balbi <balbi@ti.com>
Tested-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/Kconfig               | 10 +++++++
 drivers/usb/common/Makefile       |  1 +
 drivers/usb/common/led.c          | 57 +++++++++++++++++++++++++++++++++++++++
 drivers/usb/core/hcd.c            |  2 ++
 drivers/usb/gadget/udc/udc-core.c |  4 +++
 include/linux/usb.h               | 12 +++++++++
 6 files changed, 86 insertions(+)
 create mode 100644 drivers/usb/common/led.c

(limited to 'include/linux')

diff --git a/drivers/usb/Kconfig b/drivers/usb/Kconfig
index cf1b19bca306..ae481c37a208 100644
--- a/drivers/usb/Kconfig
+++ b/drivers/usb/Kconfig
@@ -149,4 +149,14 @@ source "drivers/usb/phy/Kconfig"
 
 source "drivers/usb/gadget/Kconfig"
 
+config USB_LED_TRIG
+	bool "USB LED Triggers"
+	depends on LEDS_CLASS && USB_COMMON && LEDS_TRIGGERS
+	help
+	  This option adds LED triggers for USB host and/or gadget activity.
+
+	  Say Y here if you are working on a system with led-class supported
+	  LEDs and you want to use them as activity indicators for USB host or
+	  gadget.
+
 endif # USB_SUPPORT
diff --git a/drivers/usb/common/Makefile b/drivers/usb/common/Makefile
index 052c12069c24..ca2f8bd0e431 100644
--- a/drivers/usb/common/Makefile
+++ b/drivers/usb/common/Makefile
@@ -4,5 +4,6 @@
 
 obj-$(CONFIG_USB_COMMON)	  += usb-common.o
 usb-common-y			  += common.o
+usb-common-$(CONFIG_USB_LED_TRIG) += led.o
 
 obj-$(CONFIG_USB_OTG_FSM) += usb-otg-fsm.o
diff --git a/drivers/usb/common/led.c b/drivers/usb/common/led.c
new file mode 100644
index 000000000000..df23da00a901
--- /dev/null
+++ b/drivers/usb/common/led.c
@@ -0,0 +1,57 @@
+/*
+ * LED Triggers for USB Activity
+ *
+ * Copyright 2014 Michal Sojka <sojka@merica.cz>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/init.h>
+#include <linux/leds.h>
+#include <linux/usb.h>
+
+#define BLINK_DELAY 30
+
+static unsigned long usb_blink_delay = BLINK_DELAY;
+
+DEFINE_LED_TRIGGER(ledtrig_usb_gadget);
+DEFINE_LED_TRIGGER(ledtrig_usb_host);
+
+void usb_led_activity(enum usb_led_event ev)
+{
+	struct led_trigger *trig = NULL;
+
+	switch (ev) {
+	case USB_LED_EVENT_GADGET:
+		trig = ledtrig_usb_gadget;
+		break;
+	case USB_LED_EVENT_HOST:
+		trig = ledtrig_usb_host;
+		break;
+	}
+	/* led_trigger_blink_oneshot() handles trig == NULL gracefully */
+	led_trigger_blink_oneshot(trig, &usb_blink_delay, &usb_blink_delay, 0);
+}
+EXPORT_SYMBOL_GPL(usb_led_activity);
+
+
+static int __init ledtrig_usb_init(void)
+{
+	led_trigger_register_simple("usb-gadget", &ledtrig_usb_gadget);
+	led_trigger_register_simple("usb-host", &ledtrig_usb_host);
+	return 0;
+}
+
+static void __exit ledtrig_usb_exit(void)
+{
+	led_trigger_unregister_simple(ledtrig_usb_gadget);
+	led_trigger_unregister_simple(ledtrig_usb_host);
+}
+
+module_init(ledtrig_usb_init);
+module_exit(ledtrig_usb_exit);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index d3fe161bec05..bcb96ff207ba 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -1664,6 +1664,8 @@ static void __usb_hcd_giveback_urb(struct urb *urb)
 	usbmon_urb_complete(&hcd->self, urb, status);
 	usb_anchor_suspend_wakeups(anchor);
 	usb_unanchor_urb(urb);
+	if (likely(status == 0))
+		usb_led_activity(USB_LED_EVENT_HOST);
 
 	/* pass ownership to the completion handler */
 	urb->status = status;
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index 16d3f6fedd1c..f107bb60a5ab 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -27,6 +27,7 @@
 
 #include <linux/usb/ch9.h>
 #include <linux/usb/gadget.h>
+#include <linux/usb.h>
 
 /**
  * struct usb_udc - describes one usb device controller
@@ -116,6 +117,9 @@ EXPORT_SYMBOL_GPL(usb_gadget_unmap_request);
 void usb_gadget_giveback_request(struct usb_ep *ep,
 		struct usb_request *req)
 {
+	if (likely(req->status == 0))
+		usb_led_activity(USB_LED_EVENT_GADGET);
+
 	req->complete(ep, req);
 }
 EXPORT_SYMBOL_GPL(usb_gadget_giveback_request);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index d2465bc0e73c..447a7e2fc19b 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -1862,6 +1862,18 @@ extern void usb_unregister_notify(struct notifier_block *nb);
 /* debugfs stuff */
 extern struct dentry *usb_debug_root;
 
+/* LED triggers */
+enum usb_led_event {
+	USB_LED_EVENT_HOST = 0,
+	USB_LED_EVENT_GADGET = 1,
+};
+
+#ifdef CONFIG_USB_LED_TRIG
+extern void usb_led_activity(enum usb_led_event ev);
+#else
+static inline void usb_led_activity(enum usb_led_event ev) {}
+#endif
+
 #endif  /* __KERNEL__ */
 
 #endif
-- 
cgit v1.2.3


From d79b6fe17aa279c7015a9c4ee88809dad4be9959 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 25 Sep 2014 18:28:28 +0200
Subject: PM / Domains: Add genpd attach/detach callbacks

While a PM domain can enable PM runtime management of its devices' module
clocks by setting

	genpd->dev_ops.stop = pm_clk_suspend;
	genpd->dev_ops.start = pm_clk_resume;

this also requires registering the clocks with the pm_clk subsystem.
In the legacy case, this is handled by the platform code, after
attaching the device to its PM domain.

When the devices are instantiated from DT, devices are attached to their
PM domains by generic code, leaving no method for the platform-specific
PM domain code to register their clocks.

Add two callbacks, allowing a PM domain to perform platform-specific
tasks when a device is attached to or detached from a PM domain.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 6 ++++++
 include/linux/pm_domain.h   | 2 ++
 2 files changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index cd400575ee0e..2a9f4c5025e3 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1436,6 +1436,9 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 
 	spin_unlock_irq(&dev->power.lock);
 
+	if (genpd->attach_dev)
+		genpd->attach_dev(dev);
+
 	mutex_lock(&gpd_data->lock);
 	gpd_data->base.dev = dev;
 	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
@@ -1528,6 +1531,9 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 	genpd->device_count--;
 	genpd->max_off_time_changed = true;
 
+	if (genpd->detach_dev)
+		genpd->detach_dev(dev);
+
 	spin_lock_irq(&dev->power.lock);
 
 	dev->pm_domain = NULL;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 292079d8da6b..9a93e622bdea 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -73,6 +73,8 @@ struct generic_pm_domain {
 	bool cached_power_down_ok;
 	struct device_node *of_node; /* Node in device tree */
 	struct gpd_cpu_data *cpu_data;
+	void (*attach_dev)(struct device *dev);
+	void (*detach_dev)(struct device *dev);
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
-- 
cgit v1.2.3


From 263c589bae9eb404df2c1e8d49ec775bb7b288d4 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 25 Sep 2014 17:49:59 +0200
Subject: PM / Domains: Remove legacy API for adding devices through DT

There are no active clients of the legacy API and we now also have a
better way to handle genpd DT support. So let's remove the legacy API.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 33 ---------------------------------
 include/linux/pm_domain.h   | 17 -----------------
 2 files changed, 50 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 2a9f4c5025e3..18cc68d058d6 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1456,39 +1456,6 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	return ret;
 }
 
-/**
- * __pm_genpd_of_add_device - Add a device to an I/O PM domain.
- * @genpd_node: Device tree node pointer representing a PM domain to which the
- *   the device is added to.
- * @dev: Device to be added.
- * @td: Set of PM QoS timing parameters to attach to the device.
- */
-int __pm_genpd_of_add_device(struct device_node *genpd_node, struct device *dev,
-			     struct gpd_timing_data *td)
-{
-	struct generic_pm_domain *genpd = NULL, *gpd;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	if (IS_ERR_OR_NULL(genpd_node) || IS_ERR_OR_NULL(dev))
-		return -EINVAL;
-
-	mutex_lock(&gpd_list_lock);
-	list_for_each_entry(gpd, &gpd_list, gpd_list_node) {
-		if (gpd->of_node == genpd_node) {
-			genpd = gpd;
-			break;
-		}
-	}
-	mutex_unlock(&gpd_list_lock);
-
-	if (!genpd)
-		return -EINVAL;
-
-	return __pm_genpd_add_device(genpd, dev, td);
-}
-
-
 /**
  * __pm_genpd_name_add_device - Find I/O PM domain and add a device to it.
  * @domain_name: Name of the PM domain to add the device to.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 9a93e622bdea..ed4f4a79c528 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -71,7 +71,6 @@ struct generic_pm_domain {
 	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
 	bool max_off_time_changed;
 	bool cached_power_down_ok;
-	struct device_node *of_node; /* Node in device tree */
 	struct gpd_cpu_data *cpu_data;
 	void (*attach_dev)(struct device *dev);
 	void (*detach_dev)(struct device *dev);
@@ -124,10 +123,6 @@ extern int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 				 struct device *dev,
 				 struct gpd_timing_data *td);
 
-extern int __pm_genpd_of_add_device(struct device_node *genpd_node,
-				    struct device *dev,
-				    struct gpd_timing_data *td);
-
 extern int __pm_genpd_name_add_device(const char *domain_name,
 				      struct device *dev,
 				      struct gpd_timing_data *td);
@@ -169,12 +164,6 @@ static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd,
 {
 	return -ENOSYS;
 }
-static inline int __pm_genpd_of_add_device(struct device_node *genpd_node,
-					   struct device *dev,
-					   struct gpd_timing_data *td)
-{
-	return -ENOSYS;
-}
 static inline int __pm_genpd_name_add_device(const char *domain_name,
 					     struct device *dev,
 					     struct gpd_timing_data *td)
@@ -240,12 +229,6 @@ static inline int pm_genpd_add_device(struct generic_pm_domain *genpd,
 	return __pm_genpd_add_device(genpd, dev, NULL);
 }
 
-static inline int pm_genpd_of_add_device(struct device_node *genpd_node,
-					 struct device *dev)
-{
-	return __pm_genpd_of_add_device(genpd_node, dev, NULL);
-}
-
 static inline int pm_genpd_name_add_device(const char *domain_name,
 					   struct device *dev)
 {
-- 
cgit v1.2.3


From 7c94e1c157a227837b04f02f5edeff8301410ba2 Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Thu, 25 Sep 2014 23:23:43 +0800
Subject: block: introduce blk_flush_queue to drive flush machinery

This patch introduces 'struct blk_flush_queue' and puts all
flush machinery related fields into this structure, so that

	- flush implementation details aren't exposed to driver
	- it is easy to convert to per dispatch-queue flush machinery

This patch is basically a mechanical replacement.

Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |   4 +-
 block/blk-flush.c      | 109 ++++++++++++++++++++++++++++++-------------------
 block/blk-mq.c         |  10 +++--
 block/blk.h            |  22 +++++++++-
 include/linux/blkdev.h |  10 +----
 5 files changed, 99 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 222fe84d6ac4..cfaca8ca6cc4 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -390,11 +390,13 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all)
 		 * be drained.  Check all the queues and counters.
 		 */
 		if (drain_all) {
+			struct blk_flush_queue *fq = blk_get_flush_queue(q);
 			drain |= !list_empty(&q->queue_head);
 			for (i = 0; i < 2; i++) {
 				drain |= q->nr_rqs[i];
 				drain |= q->in_flight[i];
-				drain |= !list_empty(&q->flush_queue[i]);
+				if (fq)
+				    drain |= !list_empty(&fq->flush_queue[i]);
 			}
 		}
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index caf44756d329..b01a86d6bf86 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -28,7 +28,7 @@
  *
  * The actual execution of flush is double buffered.  Whenever a request
  * needs to execute PRE or POSTFLUSH, it queues at
- * q->flush_queue[q->flush_pending_idx].  Once certain criteria are met, a
+ * fq->flush_queue[fq->flush_pending_idx].  Once certain criteria are met, a
  * flush is issued and the pending_idx is toggled.  When the flush
  * completes, all the requests which were pending are proceeded to the next
  * step.  This allows arbitrary merging of different types of FLUSH/FUA
@@ -155,7 +155,7 @@ static bool blk_flush_queue_rq(struct request *rq, bool add_front)
  * completion and trigger the next step.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
+ * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
  *
  * RETURNS:
  * %true if requests were added to the dispatch queue, %false otherwise.
@@ -164,7 +164,8 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 				   int error)
 {
 	struct request_queue *q = rq->q;
-	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	bool queued = false, kicked;
 
 	BUG_ON(rq->flush.seq & seq);
@@ -180,12 +181,12 @@ static bool blk_flush_complete_seq(struct request *rq, unsigned int seq,
 	case REQ_FSEQ_POSTFLUSH:
 		/* queue for flush */
 		if (list_empty(pending))
-			q->flush_pending_since = jiffies;
+			fq->flush_pending_since = jiffies;
 		list_move_tail(&rq->flush.list, pending);
 		break;
 
 	case REQ_FSEQ_DATA:
-		list_move_tail(&rq->flush.list, &q->flush_data_in_flight);
+		list_move_tail(&rq->flush.list, &fq->flush_data_in_flight);
 		queued = blk_flush_queue_rq(rq, true);
 		break;
 
@@ -220,17 +221,18 @@ static void flush_end_io(struct request *flush_rq, int error)
 	bool queued = false;
 	struct request *rq, *n;
 	unsigned long flags = 0;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	if (q->mq_ops) {
-		spin_lock_irqsave(&q->mq_flush_lock, flags);
+		spin_lock_irqsave(&fq->mq_flush_lock, flags);
 		flush_rq->tag = -1;
 	}
 
-	running = &q->flush_queue[q->flush_running_idx];
-	BUG_ON(q->flush_pending_idx == q->flush_running_idx);
+	running = &fq->flush_queue[fq->flush_running_idx];
+	BUG_ON(fq->flush_pending_idx == fq->flush_running_idx);
 
 	/* account completion of the flush request */
-	q->flush_running_idx ^= 1;
+	fq->flush_running_idx ^= 1;
 
 	if (!q->mq_ops)
 		elv_completed_request(q, flush_rq);
@@ -254,13 +256,13 @@ static void flush_end_io(struct request *flush_rq, int error)
 	 * directly into request_fn may confuse the driver.  Always use
 	 * kblockd.
 	 */
-	if (queued || q->flush_queue_delayed) {
+	if (queued || fq->flush_queue_delayed) {
 		WARN_ON(q->mq_ops);
 		blk_run_queue_async(q);
 	}
-	q->flush_queue_delayed = 0;
+	fq->flush_queue_delayed = 0;
 	if (q->mq_ops)
-		spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+		spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
 
 /**
@@ -271,33 +273,34 @@ static void flush_end_io(struct request *flush_rq, int error)
  * Please read the comment at the top of this file for more info.
  *
  * CONTEXT:
- * spin_lock_irq(q->queue_lock or q->mq_flush_lock)
+ * spin_lock_irq(q->queue_lock or fq->mq_flush_lock)
  *
  * RETURNS:
  * %true if flush was issued, %false otherwise.
  */
 static bool blk_kick_flush(struct request_queue *q)
 {
-	struct list_head *pending = &q->flush_queue[q->flush_pending_idx];
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
+	struct list_head *pending = &fq->flush_queue[fq->flush_pending_idx];
 	struct request *first_rq =
 		list_first_entry(pending, struct request, flush.list);
-	struct request *flush_rq = q->flush_rq;
+	struct request *flush_rq = fq->flush_rq;
 
 	/* C1 described at the top of this file */
-	if (q->flush_pending_idx != q->flush_running_idx || list_empty(pending))
+	if (fq->flush_pending_idx != fq->flush_running_idx || list_empty(pending))
 		return false;
 
 	/* C2 and C3 */
-	if (!list_empty(&q->flush_data_in_flight) &&
+	if (!list_empty(&fq->flush_data_in_flight) &&
 	    time_before(jiffies,
-			q->flush_pending_since + FLUSH_PENDING_TIMEOUT))
+			fq->flush_pending_since + FLUSH_PENDING_TIMEOUT))
 		return false;
 
 	/*
 	 * Issue flush and toggle pending_idx.  This makes pending_idx
 	 * different from running_idx, which means flush is in flight.
 	 */
-	q->flush_pending_idx ^= 1;
+	fq->flush_pending_idx ^= 1;
 
 	blk_rq_init(q, flush_rq);
 	if (q->mq_ops)
@@ -329,6 +332,7 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 	struct blk_mq_hw_ctx *hctx;
 	struct blk_mq_ctx *ctx;
 	unsigned long flags;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	ctx = rq->mq_ctx;
 	hctx = q->mq_ops->map_queue(q, ctx->cpu);
@@ -337,10 +341,10 @@ static void mq_flush_data_end_io(struct request *rq, int error)
 	 * After populating an empty queue, kick it to avoid stall.  Read
 	 * the comment in flush_end_io().
 	 */
-	spin_lock_irqsave(&q->mq_flush_lock, flags);
+	spin_lock_irqsave(&fq->mq_flush_lock, flags);
 	if (blk_flush_complete_seq(rq, REQ_FSEQ_DATA, error))
 		blk_mq_run_hw_queue(hctx, true);
-	spin_unlock_irqrestore(&q->mq_flush_lock, flags);
+	spin_unlock_irqrestore(&fq->mq_flush_lock, flags);
 }
 
 /**
@@ -408,11 +412,13 @@ void blk_insert_flush(struct request *rq)
 	rq->cmd_flags |= REQ_FLUSH_SEQ;
 	rq->flush.saved_end_io = rq->end_io; /* Usually NULL */
 	if (q->mq_ops) {
+		struct blk_flush_queue *fq = blk_get_flush_queue(q);
+
 		rq->end_io = mq_flush_data_end_io;
 
-		spin_lock_irq(&q->mq_flush_lock);
+		spin_lock_irq(&fq->mq_flush_lock);
 		blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0);
-		spin_unlock_irq(&q->mq_flush_lock);
+		spin_unlock_irq(&fq->mq_flush_lock);
 		return;
 	}
 	rq->end_io = flush_data_end_io;
@@ -473,31 +479,52 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-static int blk_mq_init_flush(struct request_queue *q)
+static struct blk_flush_queue *blk_alloc_flush_queue(
+		struct request_queue *q)
 {
-	struct blk_mq_tag_set *set = q->tag_set;
+	struct blk_flush_queue *fq;
+	int rq_sz = sizeof(struct request);
 
-	spin_lock_init(&q->mq_flush_lock);
+	fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+	if (!fq)
+		goto fail;
 
-	q->flush_rq = kzalloc(round_up(sizeof(struct request) +
-				set->cmd_size, cache_line_size()),
-				GFP_KERNEL);
-	if (!q->flush_rq)
-		return -ENOMEM;
-	return 0;
+	if (q->mq_ops) {
+		spin_lock_init(&fq->mq_flush_lock);
+		rq_sz = round_up(rq_sz + q->tag_set->cmd_size,
+				cache_line_size());
+	}
+
+	fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
+	if (!fq->flush_rq)
+		goto fail_rq;
+
+	INIT_LIST_HEAD(&fq->flush_queue[0]);
+	INIT_LIST_HEAD(&fq->flush_queue[1]);
+	INIT_LIST_HEAD(&fq->flush_data_in_flight);
+
+	return fq;
+
+ fail_rq:
+	kfree(fq);
+ fail:
+	return NULL;
 }
 
-int blk_init_flush(struct request_queue *q)
+static void blk_free_flush_queue(struct blk_flush_queue *fq)
 {
-	INIT_LIST_HEAD(&q->flush_queue[0]);
-	INIT_LIST_HEAD(&q->flush_queue[1]);
-	INIT_LIST_HEAD(&q->flush_data_in_flight);
+	/* bio based request queue hasn't flush queue */
+	if (!fq)
+		return;
 
-	if (q->mq_ops)
-		return blk_mq_init_flush(q);
+	kfree(fq->flush_rq);
+	kfree(fq);
+}
 
-	q->flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
-	if (!q->flush_rq)
+int blk_init_flush(struct request_queue *q)
+{
+	q->fq = blk_alloc_flush_queue(q);
+	if (!q->fq)
 		return -ENOMEM;
 
 	return 0;
@@ -505,5 +532,5 @@ int blk_init_flush(struct request_queue *q)
 
 void blk_exit_flush(struct request_queue *q)
 {
-	kfree(q->flush_rq);
+	blk_free_flush_queue(q->fq);
 }
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 2758cdf2de94..d39e8a5eaeaa 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -508,20 +508,22 @@ void blk_mq_kick_requeue_list(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_mq_kick_requeue_list);
 
-static inline bool is_flush_request(struct request *rq, unsigned int tag)
+static inline bool is_flush_request(struct request *rq,
+		struct blk_flush_queue *fq, unsigned int tag)
 {
 	return ((rq->cmd_flags & REQ_FLUSH_SEQ) &&
-			rq->q->flush_rq->tag == tag);
+			fq->flush_rq->tag == tag);
 }
 
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag)
 {
 	struct request *rq = tags->rqs[tag];
+	struct blk_flush_queue *fq = blk_get_flush_queue(rq->q);
 
-	if (!is_flush_request(rq, tag))
+	if (!is_flush_request(rq, fq, tag))
 		return rq;
 
-	return rq->q->flush_rq;
+	return fq->flush_rq;
 }
 EXPORT_SYMBOL(blk_mq_tag_to_rq);
 
diff --git a/block/blk.h b/block/blk.h
index c6fa3d4c6a89..833c4ac6c4eb 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -12,11 +12,28 @@
 /* Max future timer expiry for timeouts */
 #define BLK_MAX_TIMEOUT		(5 * HZ)
 
+struct blk_flush_queue {
+	unsigned int		flush_queue_delayed:1;
+	unsigned int		flush_pending_idx:1;
+	unsigned int		flush_running_idx:1;
+	unsigned long		flush_pending_since;
+	struct list_head	flush_queue[2];
+	struct list_head	flush_data_in_flight;
+	struct request		*flush_rq;
+	spinlock_t		mq_flush_lock;
+};
+
 extern struct kmem_cache *blk_requestq_cachep;
 extern struct kmem_cache *request_cachep;
 extern struct kobj_type blk_queue_ktype;
 extern struct ida blk_queue_ida;
 
+static inline struct blk_flush_queue *blk_get_flush_queue(
+		struct request_queue *q)
+{
+	return q->fq;
+}
+
 static inline void __blk_get_queue(struct request_queue *q)
 {
 	kobject_get(&q->kobj);
@@ -89,6 +106,7 @@ void blk_insert_flush(struct request *rq);
 static inline struct request *__elv_next_request(struct request_queue *q)
 {
 	struct request *rq;
+	struct blk_flush_queue *fq = blk_get_flush_queue(q);
 
 	while (1) {
 		if (!list_empty(&q->queue_head)) {
@@ -111,9 +129,9 @@ static inline struct request *__elv_next_request(struct request_queue *q)
 		 * should be restarted later. Please see flush_end_io() for
 		 * details.
 		 */
-		if (q->flush_pending_idx != q->flush_running_idx &&
+		if (fq->flush_pending_idx != fq->flush_running_idx &&
 				!queue_flush_queueable(q)) {
-			q->flush_queue_delayed = 1;
+			fq->flush_queue_delayed = 1;
 			return NULL;
 		}
 		if (unlikely(blk_queue_bypass(q)) ||
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index e267bf0db559..49f3461e4272 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -36,6 +36,7 @@ struct request;
 struct sg_io_hdr;
 struct bsg_job;
 struct blkcg_gq;
+struct blk_flush_queue;
 
 #define BLKDEV_MIN_RQ	4
 #define BLKDEV_MAX_RQ	128	/* Default maximum */
@@ -455,14 +456,7 @@ struct request_queue {
 	 */
 	unsigned int		flush_flags;
 	unsigned int		flush_not_queueable:1;
-	unsigned int		flush_queue_delayed:1;
-	unsigned int		flush_pending_idx:1;
-	unsigned int		flush_running_idx:1;
-	unsigned long		flush_pending_since;
-	struct list_head	flush_queue[2];
-	struct list_head	flush_data_in_flight;
-	struct request		*flush_rq;
-	spinlock_t		mq_flush_lock;
+	struct blk_flush_queue	*fq;
 
 	struct list_head	requeue_list;
 	spinlock_t		requeue_lock;
-- 
cgit v1.2.3


From f70ced09170761acb69840cafaace4abc72cba4b Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@canonical.com>
Date: Thu, 25 Sep 2014 23:23:47 +0800
Subject: blk-mq: support per-distpatch_queue flush machinery

This patch supports to run one single flush machinery for
each blk-mq dispatch queue, so that:

- current init_request and exit_request callbacks can
cover flush request too, then the buggy copying way of
initializing flush request's pdu can be fixed

- flushing performance gets improved in case of multi hw-queue

In fio sync write test over virtio-blk(4 hw queues, ioengine=sync,
iodepth=64, numjobs=4, bs=4K), it is observed that througput gets
increased a lot over my test environment:
	- throughput: +70% in case of virtio-blk over null_blk
	- throughput: +30% in case of virtio-blk over SSD image

The multi virtqueue feature isn't merged to QEMU yet, and patches for
the feature can be found in below tree:

	git://kernel.ubuntu.com/ming/qemu.git  	v2.1.0-mq.4

And simply passing 'num_queues=4 vectors=5' should be enough to
enable multi queue(quad queue) feature for QEMU virtio-blk.

Suggested-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Ming Lei <ming.lei@canonical.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-core.c       |  2 +-
 block/blk-flush.c      | 21 ++++++++++++++-------
 block/blk-mq.c         | 50 ++++++++++++++++++++++++--------------------------
 block/blk-sysfs.c      |  4 ++--
 block/blk.h            | 16 +++++++++++++---
 include/linux/blk-mq.h |  6 ++++++
 6 files changed, 60 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index b1dd4e086740..e1c2775c7597 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -704,7 +704,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn,
 	if (!q)
 		return NULL;
 
-	q->fq = blk_alloc_flush_queue(q);
+	q->fq = blk_alloc_flush_queue(q, NUMA_NO_NODE, 0);
 	if (!q->fq)
 		return NULL;
 
diff --git a/block/blk-flush.c b/block/blk-flush.c
index 004d95e4098e..20badd7b9d1b 100644
--- a/block/blk-flush.c
+++ b/block/blk-flush.c
@@ -305,8 +305,15 @@ static bool blk_kick_flush(struct request_queue *q, struct blk_flush_queue *fq)
 	fq->flush_pending_idx ^= 1;
 
 	blk_rq_init(q, flush_rq);
-	if (q->mq_ops)
-		blk_mq_clone_flush_request(flush_rq, first_rq);
+
+	/*
+	 * Borrow tag from the first request since they can't
+	 * be in flight at the same time.
+	 */
+	if (q->mq_ops) {
+		flush_rq->mq_ctx = first_rq->mq_ctx;
+		flush_rq->tag = first_rq->tag;
+	}
 
 	flush_rq->cmd_type = REQ_TYPE_FS;
 	flush_rq->cmd_flags = WRITE_FLUSH | REQ_FLUSH_SEQ;
@@ -480,22 +487,22 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask,
 }
 EXPORT_SYMBOL(blkdev_issue_flush);
 
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q)
+struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
+		int node, int cmd_size)
 {
 	struct blk_flush_queue *fq;
 	int rq_sz = sizeof(struct request);
 
-	fq = kzalloc(sizeof(*fq), GFP_KERNEL);
+	fq = kzalloc_node(sizeof(*fq), GFP_KERNEL, node);
 	if (!fq)
 		goto fail;
 
 	if (q->mq_ops) {
 		spin_lock_init(&fq->mq_flush_lock);
-		rq_sz = round_up(rq_sz + q->tag_set->cmd_size,
-				cache_line_size());
+		rq_sz = round_up(rq_sz + cmd_size, cache_line_size());
 	}
 
-	fq->flush_rq = kzalloc(rq_sz, GFP_KERNEL);
+	fq->flush_rq = kzalloc_node(rq_sz, GFP_KERNEL, node);
 	if (!fq->flush_rq)
 		goto fail_rq;
 
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 53b6def12fc4..4e7a31466139 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -281,26 +281,6 @@ void blk_mq_free_request(struct request *rq)
 	__blk_mq_free_request(hctx, ctx, rq);
 }
 
-/*
- * Clone all relevant state from a request that has been put on hold in
- * the flush state machine into the preallocated flush request that hangs
- * off the request queue.
- *
- * For a driver the flush request should be invisible, that's why we are
- * impersonating the original request here.
- */
-void blk_mq_clone_flush_request(struct request *flush_rq,
-		struct request *orig_rq)
-{
-	struct blk_mq_hw_ctx *hctx =
-		orig_rq->q->mq_ops->map_queue(orig_rq->q, orig_rq->mq_ctx->cpu);
-
-	flush_rq->mq_ctx = orig_rq->mq_ctx;
-	flush_rq->tag = orig_rq->tag;
-	memcpy(blk_mq_rq_to_pdu(flush_rq), blk_mq_rq_to_pdu(orig_rq),
-		hctx->cmd_size);
-}
-
 inline void __blk_mq_end_request(struct request *rq, int error)
 {
 	blk_account_io_done(rq);
@@ -1516,12 +1496,20 @@ static void blk_mq_exit_hctx(struct request_queue *q,
 		struct blk_mq_tag_set *set,
 		struct blk_mq_hw_ctx *hctx, unsigned int hctx_idx)
 {
+	unsigned flush_start_tag = set->queue_depth;
+
 	blk_mq_tag_idle(hctx);
 
+	if (set->ops->exit_request)
+		set->ops->exit_request(set->driver_data,
+				       hctx->fq->flush_rq, hctx_idx,
+				       flush_start_tag + hctx_idx);
+
 	if (set->ops->exit_hctx)
 		set->ops->exit_hctx(hctx, hctx_idx);
 
 	blk_mq_unregister_cpu_notifier(&hctx->cpu_notifier);
+	blk_free_flush_queue(hctx->fq);
 	kfree(hctx->ctxs);
 	blk_mq_free_bitmap(&hctx->ctx_map);
 }
@@ -1556,6 +1544,7 @@ static int blk_mq_init_hctx(struct request_queue *q,
 		struct blk_mq_hw_ctx *hctx, unsigned hctx_idx)
 {
 	int node;
+	unsigned flush_start_tag = set->queue_depth;
 
 	node = hctx->numa_node;
 	if (node == NUMA_NO_NODE)
@@ -1594,8 +1583,23 @@ static int blk_mq_init_hctx(struct request_queue *q,
 	    set->ops->init_hctx(hctx, set->driver_data, hctx_idx))
 		goto free_bitmap;
 
+	hctx->fq = blk_alloc_flush_queue(q, hctx->numa_node, set->cmd_size);
+	if (!hctx->fq)
+		goto exit_hctx;
+
+	if (set->ops->init_request &&
+	    set->ops->init_request(set->driver_data,
+				   hctx->fq->flush_rq, hctx_idx,
+				   flush_start_tag + hctx_idx, node))
+		goto free_fq;
+
 	return 0;
 
+ free_fq:
+	kfree(hctx->fq);
+ exit_hctx:
+	if (set->ops->exit_hctx)
+		set->ops->exit_hctx(hctx, hctx_idx);
  free_bitmap:
 	blk_mq_free_bitmap(&hctx->ctx_map);
  free_ctxs:
@@ -1862,16 +1866,10 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set)
 
 	blk_mq_add_queue_tag_set(set, q);
 
-	q->fq = blk_alloc_flush_queue(q);
-	if (!q->fq)
-		goto err_hw_queues;
-
 	blk_mq_map_swqueue(q);
 
 	return q;
 
-err_hw_queues:
-	blk_mq_exit_hw_queues(q, set, set->nr_hw_queues);
 err_hw:
 	blk_cleanup_queue(q);
 err_hctxs:
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index 718cffc4c678..e8f38a36c625 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -517,10 +517,10 @@ static void blk_release_queue(struct kobject *kobj)
 	if (q->queue_tags)
 		__blk_queue_free_tags(q);
 
-	blk_free_flush_queue(q->fq);
-
 	if (q->mq_ops)
 		blk_mq_free_queue(q);
+	else
+		blk_free_flush_queue(q->fq);
 
 	blk_trace_shutdown(q);
 
diff --git a/block/blk.h b/block/blk.h
index 7ecdd8517e69..43b036185712 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -2,6 +2,8 @@
 #define BLK_INTERNAL_H
 
 #include <linux/idr.h>
+#include <linux/blk-mq.h>
+#include "blk-mq.h"
 
 /* Amount of time in which a process may batch requests */
 #define BLK_BATCH_TIME	(HZ/50UL)
@@ -31,7 +33,14 @@ extern struct ida blk_queue_ida;
 static inline struct blk_flush_queue *blk_get_flush_queue(
 		struct request_queue *q, struct blk_mq_ctx *ctx)
 {
-	return q->fq;
+	struct blk_mq_hw_ctx *hctx;
+
+	if (!q->mq_ops)
+		return q->fq;
+
+	hctx = q->mq_ops->map_queue(q, ctx->cpu);
+
+	return hctx->fq;
 }
 
 static inline void __blk_get_queue(struct request_queue *q)
@@ -39,8 +48,9 @@ static inline void __blk_get_queue(struct request_queue *q)
 	kobject_get(&q->kobj);
 }
 
-struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q);
-void blk_free_flush_queue(struct blk_flush_queue *fq);
+struct blk_flush_queue *blk_alloc_flush_queue(struct request_queue *q,
+		int node, int cmd_size);
+void blk_free_flush_queue(struct blk_flush_queue *q);
 
 int blk_init_rl(struct request_list *rl, struct request_queue *q,
 		gfp_t gfp_mask);
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 325349559fb0..02c5d950f444 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -4,6 +4,7 @@
 #include <linux/blkdev.h>
 
 struct blk_mq_tags;
+struct blk_flush_queue;
 
 struct blk_mq_cpu_notifier {
 	struct list_head list;
@@ -34,6 +35,7 @@ struct blk_mq_hw_ctx {
 
 	struct request_queue	*queue;
 	unsigned int		queue_num;
+	struct blk_flush_queue	*fq;
 
 	void			*driver_data;
 
@@ -119,6 +121,10 @@ struct blk_mq_ops {
 	/*
 	 * Called for every command allocated by the block layer to allow
 	 * the driver to set up driver specific data.
+	 *
+	 * Tag greater than or equal to queue_depth is for setting up
+	 * flush request.
+	 *
 	 * Ditto for exit/teardown.
 	 */
 	init_request_fn		*init_request;
-- 
cgit v1.2.3


From 53e50398968d43338c4d932114e68bc099fc5fbd Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Sat, 20 Sep 2014 14:52:30 -0700
Subject: net: Remove gso_send_check as an offload callback

The send_check logic was only interesting in cases of TCP offload and
UDP UFO where the checksum needed to be initialized to the pseudo
header checksum. Now we've moved that logic into the related
gso_segment functions so gso_send_check is no longer needed.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  1 -
 net/core/dev.c            | 10 ----------
 net/ipv4/af_inet.c        | 36 ------------------------------------
 net/ipv4/gre_offload.c    | 11 +++--------
 net/ipv4/tcp_offload.c    |  6 ------
 net/ipv4/udp_offload.c    |  6 ------
 net/ipv6/ip6_offload.c    | 27 ---------------------------
 net/ipv6/tcpv6_offload.c  |  6 ------
 net/ipv6/udp_offload.c    |  6 ------
 net/mpls/mpls_gso.c       |  7 -------
 10 files changed, 3 insertions(+), 113 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4354b4307e37..9f5d293a0281 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1911,7 +1911,6 @@ struct packet_type {
 struct offload_callbacks {
 	struct sk_buff		*(*gso_segment)(struct sk_buff *skb,
 						netdev_features_t features);
-	int			(*gso_send_check)(struct sk_buff *skb);
 	struct sk_buff		**(*gro_receive)(struct sk_buff **head,
 					       struct sk_buff *skb);
 	int			(*gro_complete)(struct sk_buff *skb, int nhoff);
diff --git a/net/core/dev.c b/net/core/dev.c
index db0388607329..e2ced01c01e4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2422,16 +2422,6 @@ struct sk_buff *skb_mac_gso_segment(struct sk_buff *skb,
 	rcu_read_lock();
 	list_for_each_entry_rcu(ptype, &offload_base, list) {
 		if (ptype->type == type && ptype->callbacks.gso_segment) {
-			if (unlikely(skb->ip_summed != CHECKSUM_PARTIAL)) {
-				int err;
-
-				err = ptype->callbacks.gso_send_check(skb);
-				segs = ERR_PTR(err);
-				if (err || skb_gso_ok(skb, features))
-					break;
-				__skb_push(skb, (skb->data -
-						 skb_network_header(skb)));
-			}
 			segs = ptype->callbacks.gso_segment(skb, features);
 			break;
 		}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 72011cc4c13b..28e589c5f32d 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1197,40 +1197,6 @@ int inet_sk_rebuild_header(struct sock *sk)
 }
 EXPORT_SYMBOL(inet_sk_rebuild_header);
 
-static int inet_gso_send_check(struct sk_buff *skb)
-{
-	const struct net_offload *ops;
-	const struct iphdr *iph;
-	int proto;
-	int ihl;
-	int err = -EINVAL;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(*iph))))
-		goto out;
-
-	iph = ip_hdr(skb);
-	ihl = iph->ihl * 4;
-	if (ihl < sizeof(*iph))
-		goto out;
-
-	proto = iph->protocol;
-
-	/* Warning: after this point, iph might be no longer valid */
-	if (unlikely(!pskb_may_pull(skb, ihl)))
-		goto out;
-	__skb_pull(skb, ihl);
-
-	skb_reset_transport_header(skb);
-	err = -EPROTONOSUPPORT;
-
-	ops = rcu_dereference(inet_offloads[proto]);
-	if (likely(ops && ops->callbacks.gso_send_check))
-		err = ops->callbacks.gso_send_check(skb);
-
-out:
-	return err;
-}
-
 static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 					netdev_features_t features)
 {
@@ -1655,7 +1621,6 @@ static int ipv4_proc_init(void);
 static struct packet_offload ip_packet_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IP),
 	.callbacks = {
-		.gso_send_check = inet_gso_send_check,
 		.gso_segment = inet_gso_segment,
 		.gro_receive = inet_gro_receive,
 		.gro_complete = inet_gro_complete,
@@ -1664,7 +1629,6 @@ static struct packet_offload ip_packet_offload __read_mostly = {
 
 static const struct net_offload ipip_offload = {
 	.callbacks = {
-		.gso_send_check = inet_gso_send_check,
 		.gso_segment	= inet_gso_segment,
 		.gro_receive	= inet_gro_receive,
 		.gro_complete	= inet_gro_complete,
diff --git a/net/ipv4/gre_offload.c b/net/ipv4/gre_offload.c
index d3fe2ac05167..a77729503071 100644
--- a/net/ipv4/gre_offload.c
+++ b/net/ipv4/gre_offload.c
@@ -15,13 +15,6 @@
 #include <net/protocol.h>
 #include <net/gre.h>
 
-static int gre_gso_send_check(struct sk_buff *skb)
-{
-	if (!skb->encapsulation)
-		return -EINVAL;
-	return 0;
-}
-
 static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				       netdev_features_t features)
 {
@@ -46,6 +39,9 @@ static struct sk_buff *gre_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_IPIP)))
 		goto out;
 
+	if (!skb->encapsulation)
+		goto out;
+
 	if (unlikely(!pskb_may_pull(skb, sizeof(*greh))))
 		goto out;
 
@@ -256,7 +252,6 @@ static int gre_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload gre_offload = {
 	.callbacks = {
-		.gso_send_check = gre_gso_send_check,
 		.gso_segment = gre_gso_segment,
 		.gro_receive = gre_gro_receive,
 		.gro_complete = gre_gro_complete,
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 7cd12b0458ff..5b90f2f447a5 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -288,11 +288,6 @@ int tcp_gro_complete(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(tcp_gro_complete);
 
-static int tcp_v4_gso_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 {
 	/* Don't bother verifying checksum if we're going to flush anyway. */
@@ -320,7 +315,6 @@ static int tcp4_gro_complete(struct sk_buff *skb, int thoff)
 
 static const struct net_offload tcpv4_offload = {
 	.callbacks = {
-		.gso_send_check	=	tcp_v4_gso_send_check,
 		.gso_segment	=	tcp4_gso_segment,
 		.gro_receive	=	tcp4_gro_receive,
 		.gro_complete	=	tcp4_gro_complete,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 2918cc914824..19ebe6a39ddc 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -25,11 +25,6 @@ struct udp_offload_priv {
 	struct udp_offload_priv __rcu *next;
 };
 
-static int udp4_ufo_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 				       netdev_features_t features)
 {
@@ -346,7 +341,6 @@ static int udp4_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv4_offload = {
 	.callbacks = {
-		.gso_send_check = udp4_ufo_send_check,
 		.gso_segment = udp4_ufo_fragment,
 		.gro_receive  =	udp4_gro_receive,
 		.gro_complete =	udp4_gro_complete,
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index 9952f3fce30a..9034f76ae013 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -53,31 +53,6 @@ static int ipv6_gso_pull_exthdrs(struct sk_buff *skb, int proto)
 	return proto;
 }
 
-static int ipv6_gso_send_check(struct sk_buff *skb)
-{
-	const struct ipv6hdr *ipv6h;
-	const struct net_offload *ops;
-	int err = -EINVAL;
-
-	if (unlikely(!pskb_may_pull(skb, sizeof(*ipv6h))))
-		goto out;
-
-	ipv6h = ipv6_hdr(skb);
-	__skb_pull(skb, sizeof(*ipv6h));
-	err = -EPROTONOSUPPORT;
-
-	ops = rcu_dereference(inet6_offloads[
-		ipv6_gso_pull_exthdrs(skb, ipv6h->nexthdr)]);
-
-	if (likely(ops && ops->callbacks.gso_send_check)) {
-		skb_reset_transport_header(skb);
-		err = ops->callbacks.gso_send_check(skb);
-	}
-
-out:
-	return err;
-}
-
 static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 	netdev_features_t features)
 {
@@ -306,7 +281,6 @@ out_unlock:
 static struct packet_offload ipv6_packet_offload __read_mostly = {
 	.type = cpu_to_be16(ETH_P_IPV6),
 	.callbacks = {
-		.gso_send_check = ipv6_gso_send_check,
 		.gso_segment = ipv6_gso_segment,
 		.gro_receive = ipv6_gro_receive,
 		.gro_complete = ipv6_gro_complete,
@@ -315,7 +289,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = {
 
 static const struct net_offload sit_offload = {
 	.callbacks = {
-		.gso_send_check = ipv6_gso_send_check,
 		.gso_segment	= ipv6_gso_segment,
 		.gro_receive	= ipv6_gro_receive,
 		.gro_complete	= ipv6_gro_complete,
diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c
index 96253154db3a..c1ab77105b4c 100644
--- a/net/ipv6/tcpv6_offload.c
+++ b/net/ipv6/tcpv6_offload.c
@@ -15,11 +15,6 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static int tcp_v6_gso_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct sk_buff **tcp6_gro_receive(struct sk_buff **head,
 					 struct sk_buff *skb)
 {
@@ -71,7 +66,6 @@ struct sk_buff *tcp6_gso_segment(struct sk_buff *skb,
 }
 static const struct net_offload tcpv6_offload = {
 	.callbacks = {
-		.gso_send_check	=	tcp_v6_gso_send_check,
 		.gso_segment	=	tcp6_gso_segment,
 		.gro_receive	=	tcp6_gro_receive,
 		.gro_complete	=	tcp6_gro_complete,
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index e4af6437ea3b..212ebfc7973f 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -17,11 +17,6 @@
 #include <net/ip6_checksum.h>
 #include "ip6_offload.h"
 
-static int udp6_ufo_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 					 netdev_features_t features)
 {
@@ -166,7 +161,6 @@ static int udp6_gro_complete(struct sk_buff *skb, int nhoff)
 
 static const struct net_offload udpv6_offload = {
 	.callbacks = {
-		.gso_send_check =	udp6_ufo_send_check,
 		.gso_segment	=	udp6_ufo_fragment,
 		.gro_receive	=	udp6_gro_receive,
 		.gro_complete	=	udp6_gro_complete,
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index 6b38d083e1c9..e28ed2ef5b06 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -65,15 +65,9 @@ out:
 	return segs;
 }
 
-static int mpls_gso_send_check(struct sk_buff *skb)
-{
-	return 0;
-}
-
 static struct packet_offload mpls_mc_offload = {
 	.type = cpu_to_be16(ETH_P_MPLS_MC),
 	.callbacks = {
-		.gso_send_check =	mpls_gso_send_check,
 		.gso_segment    =	mpls_gso_segment,
 	},
 };
@@ -81,7 +75,6 @@ static struct packet_offload mpls_mc_offload = {
 static struct packet_offload mpls_uc_offload = {
 	.type = cpu_to_be16(ETH_P_MPLS_UC),
 	.callbacks = {
-		.gso_send_check =	mpls_gso_send_check,
 		.gso_segment    =	mpls_gso_segment,
 	},
 };
-- 
cgit v1.2.3


From b63adb979583ef185718d774d8162387db5589c0 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Fri, 26 Sep 2014 00:03:16 +0000
Subject: kernel: add support for kernel restart handler call chain

Various drivers implement architecture and/or device specific means to
restart (reset) the system.  Various mechanisms have been implemented to
support those schemes.  The best known mechanism is arm_pm_restart, which
is a function pointer to be set either from platform specific code or from
drivers.  Another mechanism is to use hardware watchdogs to issue a reset;
this mechanism is used if there is no other method available to reset a
board or system.  Two examples are alim7101_wdt, which currently uses the
reboot notifier to trigger a reset, and moxart_wdt, which registers the
arm_pm_restart function.

The existing mechanisms have a number of drawbacks.  Typically only one
scheme to restart the system is supported (at least if arm_pm_restart is
used).  At least in theory there can be multiple means to restart the
system, some of which may be less desirable (for example one mechanism may
only reset the CPU, while another may reset the entire system).  Using
arm_pm_restart can also be racy if the function pointer is set from a
driver, as the driver may be in the process of being unloaded when
arm_pm_restart is called.  Using the reboot notifier is always racy, as it
is unknown if and when other functions using the reboot notifier have
completed execution by the time the watchdog fires.

Introduce a system restart handler call chain to solve the described
problems.  This call chain is expected to be executed from the
architecture specific machine_restart() function.  Drivers providing
system restart functionality (such as the watchdog drivers mentioned
above) are expected to register with this call chain.  By using the
priority field in the notifier block, callers can control restart handler
execution sequence and thus ensure that the restart handler with the
optimal restart capabilities for a given system is called first.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Heiko Stuebner <heiko@sntech.de>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Wim Van Sebroeck <wim@iguana.be>
Cc: Maxime Ripard <maxime.ripard@free-electrons.com>
Cc: Will Deacon <will.deacon@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Jonas Jensen <jonas.jensen@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
Cc: David Woodhouse <dwmw2@infradead.org>
Cc: Tomasz Figa <t.figa@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
---
 include/linux/reboot.h |  3 ++
 kernel/reboot.c        | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 84 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/reboot.h b/include/linux/reboot.h
index 48bf152761c7..67fc8fcdc4b0 100644
--- a/include/linux/reboot.h
+++ b/include/linux/reboot.h
@@ -38,6 +38,9 @@ extern int reboot_force;
 extern int register_reboot_notifier(struct notifier_block *);
 extern int unregister_reboot_notifier(struct notifier_block *);
 
+extern int register_restart_handler(struct notifier_block *);
+extern int unregister_restart_handler(struct notifier_block *);
+extern void do_kernel_restart(char *cmd);
 
 /*
  * Architecture-specific implementations of sys_reboot commands.
diff --git a/kernel/reboot.c b/kernel/reboot.c
index a3a9e240fcdb..5925f5ae8dff 100644
--- a/kernel/reboot.c
+++ b/kernel/reboot.c
@@ -104,6 +104,87 @@ int unregister_reboot_notifier(struct notifier_block *nb)
 }
 EXPORT_SYMBOL(unregister_reboot_notifier);
 
+/*
+ *	Notifier list for kernel code which wants to be called
+ *	to restart the system.
+ */
+static ATOMIC_NOTIFIER_HEAD(restart_handler_list);
+
+/**
+ *	register_restart_handler - Register function to be called to reset
+ *				   the system
+ *	@nb: Info about handler function to be called
+ *	@nb->priority:	Handler priority. Handlers should follow the
+ *			following guidelines for setting priorities.
+ *			0:	Restart handler of last resort,
+ *				with limited restart capabilities
+ *			128:	Default restart handler; use if no other
+ *				restart handler is expected to be available,
+ *				and/or if restart functionality is
+ *				sufficient to restart the entire system
+ *			255:	Highest priority restart handler, will
+ *				preempt all other restart handlers
+ *
+ *	Registers a function with code to be called to restart the
+ *	system.
+ *
+ *	Registered functions will be called from machine_restart as last
+ *	step of the restart sequence (if the architecture specific
+ *	machine_restart function calls do_kernel_restart - see below
+ *	for details).
+ *	Registered functions are expected to restart the system immediately.
+ *	If more than one function is registered, the restart handler priority
+ *	selects which function will be called first.
+ *
+ *	Restart handlers are expected to be registered from non-architecture
+ *	code, typically from drivers. A typical use case would be a system
+ *	where restart functionality is provided through a watchdog. Multiple
+ *	restart handlers may exist; for example, one restart handler might
+ *	restart the entire system, while another only restarts the CPU.
+ *	In such cases, the restart handler which only restarts part of the
+ *	hardware is expected to register with low priority to ensure that
+ *	it only runs if no other means to restart the system is available.
+ *
+ *	Currently always returns zero, as atomic_notifier_chain_register()
+ *	always returns zero.
+ */
+int register_restart_handler(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&restart_handler_list, nb);
+}
+EXPORT_SYMBOL(register_restart_handler);
+
+/**
+ *	unregister_restart_handler - Unregister previously registered
+ *				     restart handler
+ *	@nb: Hook to be unregistered
+ *
+ *	Unregisters a previously registered restart handler function.
+ *
+ *	Returns zero on success, or %-ENOENT on failure.
+ */
+int unregister_restart_handler(struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&restart_handler_list, nb);
+}
+EXPORT_SYMBOL(unregister_restart_handler);
+
+/**
+ *	do_kernel_restart - Execute kernel restart handler call chain
+ *
+ *	Calls functions registered with register_restart_handler.
+ *
+ *	Expected to be called from machine_restart as last step of the restart
+ *	sequence.
+ *
+ *	Restarts the system immediately if a restart handler function has been
+ *	registered. Otherwise does nothing.
+ */
+void do_kernel_restart(char *cmd)
+{
+	atomic_notifier_call_chain(&restart_handler_list, reboot_mode, cmd);
+}
+
 void migrate_to_reboot_cpu(void)
 {
 	/* The boot cpu is always logical cpu 0 */
-- 
cgit v1.2.3


From 6ae61fbf38d0cd2aa922eb5e7241e9b0bfd7009d Mon Sep 17 00:00:00 2001
From: Lee Jones <lee.jones@linaro.org>
Date: Wed, 13 Aug 2014 12:45:19 +0100
Subject: misc: st_kim: Increase size of dev_name buffer to incorporate
 termination

Calling strncpy with a maximum size argument of 32 bytes on destination
array kim_gdata->dev_name of size 32 bytes might leave the destination
string unterminated.

Cc: gregkh@linuxfoundation.org
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/ti_wilink_st.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h
index 932b76392248..884d6263e962 100644
--- a/include/linux/ti_wilink_st.h
+++ b/include/linux/ti_wilink_st.h
@@ -268,7 +268,7 @@ struct kim_data_s {
 	struct st_data_s *core_data;
 	struct chip_version version;
 	unsigned char ldisc_install;
-	unsigned char dev_name[UART_DEV_NAME_LEN];
+	unsigned char dev_name[UART_DEV_NAME_LEN + 1];
 	unsigned char flow_cntrl;
 	unsigned long baud_rate;
 };
-- 
cgit v1.2.3


From 48018943eb906d81e48f40675c17b92abfeafcf1 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@linaro.org>
Date: Wed, 13 Aug 2014 11:42:46 +0100
Subject: mfd: wm5102: Mark register write sequencer control 3 readable

During init the core checks if the wm5102 has finished starting by reading
register 0x19 and looking at the value. This read always fails since this
is not a readable register, mark it as being one. While we're at it provide
a constant for the register name (as supplied by Charles Keepax).

Signed-off-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/arizona-core.c            | 3 ++-
 drivers/mfd/wm5102-tables.c           | 1 +
 include/linux/mfd/arizona/registers.h | 1 +
 3 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/arizona-core.c b/drivers/mfd/arizona-core.c
index 10a0cb90619a..7eabc36b97c1 100644
--- a/drivers/mfd/arizona-core.c
+++ b/drivers/mfd/arizona-core.c
@@ -784,7 +784,8 @@ int arizona_dev_init(struct arizona *arizona)
 	/* Ensure device startup is complete */
 	switch (arizona->type) {
 	case WM5102:
-		ret = regmap_read(arizona->regmap, 0x19, &val);
+		ret = regmap_read(arizona->regmap,
+				  ARIZONA_WRITE_SEQUENCER_CTRL_3, &val);
 		if (ret != 0)
 			dev_err(dev,
 				"Failed to check write sequencer state: %d\n",
diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index a219888d7f4b..77e16e59a45d 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1017,6 +1017,7 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_WRITE_SEQUENCER_CTRL_0:
 	case ARIZONA_WRITE_SEQUENCER_CTRL_1:
 	case ARIZONA_WRITE_SEQUENCER_CTRL_2:
+	case ARIZONA_WRITE_SEQUENCER_CTRL_3:
 	case ARIZONA_WRITE_SEQUENCER_PROM:
 	case ARIZONA_TONE_GENERATOR_1:
 	case ARIZONA_TONE_GENERATOR_2:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index dbd23c36de21..68913ec90969 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -27,6 +27,7 @@
 #define ARIZONA_WRITE_SEQUENCER_CTRL_0           0x16
 #define ARIZONA_WRITE_SEQUENCER_CTRL_1           0x17
 #define ARIZONA_WRITE_SEQUENCER_CTRL_2           0x18
+#define ARIZONA_WRITE_SEQUENCER_CTRL_3           0x19
 #define ARIZONA_WRITE_SEQUENCER_PROM             0x1A
 #define ARIZONA_TONE_GENERATOR_1                 0x20
 #define ARIZONA_TONE_GENERATOR_2                 0x21
-- 
cgit v1.2.3


From 4b5c1f1e080f79c3c226596047a20ccd1c8a9486 Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Wed, 20 Aug 2014 15:43:39 +0200
Subject: mfd: max77693: Fix register enum name

According to the MAX77693 documentation the name of
the register is FLASH_STATUS.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/max77693-private.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index c466ff3e16b8..615f12141854 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -46,7 +46,7 @@ enum max77693_pmic_reg {
 	MAX77693_LED_REG_VOUT_FLASH2			= 0x0C,
 	MAX77693_LED_REG_FLASH_INT			= 0x0E,
 	MAX77693_LED_REG_FLASH_INT_MASK			= 0x0F,
-	MAX77693_LED_REG_FLASH_INT_STATUS		= 0x10,
+	MAX77693_LED_REG_FLASH_STATUS			= 0x10,
 
 	MAX77693_PMIC_REG_PMIC_ID1			= 0x20,
 	MAX77693_PMIC_REG_PMIC_ID2			= 0x21,
-- 
cgit v1.2.3


From a0bc607208e295f70d0355fa4e632a0c8c27533b Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Fri, 22 Aug 2014 11:06:18 +0200
Subject: mfd: max77693: Improve support for the flash cell

This patch improves support for the flash cell of
max77693 mfd by adding relevant of_compatible field
and a structure for caching related platform data.
Added are also FLASH registers related macro definitions.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Signed-off-by: Andrzej Hajda <a.hajda@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max77693.c               |  5 ++-
 include/linux/mfd/max77693-private.h | 59 ++++++++++++++++++++++++++++++++++++
 include/linux/mfd/max77693.h         | 40 ++++++++++++++++++++++++
 3 files changed, 103 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index 249c139ef04a..cf008f45968c 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -44,9 +44,12 @@
 static const struct mfd_cell max77693_devs[] = {
 	{ .name = "max77693-pmic", },
 	{ .name = "max77693-charger", },
-	{ .name = "max77693-flash", },
 	{ .name = "max77693-muic", },
 	{ .name = "max77693-haptic", },
+	{
+		.name = "max77693-flash",
+		.of_compatible = "maxim,max77693-flash",
+	},
 };
 
 static const struct regmap_config max77693_regmap_config = {
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index 615f12141854..de063773f3b1 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -85,6 +85,65 @@ enum max77693_pmic_reg {
 	MAX77693_PMIC_REG_END,
 };
 
+/* MAX77693 ITORCH register */
+#define TORCH_IOUT1_SHIFT	0
+#define TORCH_IOUT2_SHIFT	4
+#define TORCH_IOUT_MIN		15625
+#define TORCH_IOUT_MAX		250000
+#define TORCH_IOUT_STEP		15625
+
+/* MAX77693 IFLASH1 and IFLASH2 registers */
+#define FLASH_IOUT_MIN		15625
+#define FLASH_IOUT_MAX_1LED	1000000
+#define FLASH_IOUT_MAX_2LEDS	625000
+#define FLASH_IOUT_STEP		15625
+
+/* MAX77693 TORCH_TIMER register */
+#define TORCH_TMR_NO_TIMER	0x40
+#define TORCH_TIMEOUT_MIN	262000
+#define TORCH_TIMEOUT_MAX	15728000
+
+/* MAX77693 FLASH_TIMER register */
+#define FLASH_TMR_LEVEL		0x80
+#define FLASH_TIMEOUT_MIN	62500
+#define FLASH_TIMEOUT_MAX	1000000
+#define FLASH_TIMEOUT_STEP	62500
+
+/* MAX77693 FLASH_EN register */
+#define FLASH_EN_OFF		0x0
+#define FLASH_EN_FLASH		0x1
+#define FLASH_EN_TORCH		0x2
+#define FLASH_EN_ON		0x3
+#define FLASH_EN_SHIFT(x)	(6 - ((x) - 1) * 2)
+#define TORCH_EN_SHIFT(x)	(2 - ((x) - 1) * 2)
+
+/* MAX77693 MAX_FLASH1 register */
+#define MAX_FLASH1_MAX_FL_EN	0x80
+#define MAX_FLASH1_VSYS_MIN	2400
+#define MAX_FLASH1_VSYS_MAX	3400
+#define MAX_FLASH1_VSYS_STEP	33
+
+/* MAX77693 VOUT_CNTL register */
+#define FLASH_BOOST_FIXED	0x04
+#define FLASH_BOOST_LEDNUM_2	0x80
+
+/* MAX77693 VOUT_FLASH1 register */
+#define FLASH_VOUT_MIN		3300
+#define FLASH_VOUT_MAX		5500
+#define FLASH_VOUT_STEP		25
+#define FLASH_VOUT_RMIN		0x0c
+
+/* MAX77693 FLASH_STATUS register */
+#define FLASH_STATUS_FLASH_ON	BIT(3)
+#define FLASH_STATUS_TORCH_ON	BIT(2)
+
+/* MAX77693 FLASH_INT register */
+#define FLASH_INT_FLED2_OPEN	BIT(0)
+#define FLASH_INT_FLED2_SHORT	BIT(1)
+#define FLASH_INT_FLED1_OPEN	BIT(2)
+#define FLASH_INT_FLED1_SHORT	BIT(3)
+#define FLASH_INT_OVER_CURRENT	BIT(4)
+
 /* MAX77693 CHG_CNFG_00 register */
 #define CHG_CNFG_00_CHG_MASK		0x1
 #define CHG_CNFG_00_BUCK_MASK		0x4
diff --git a/include/linux/mfd/max77693.h b/include/linux/mfd/max77693.h
index 3f3dc45f93ee..f0b6585cd874 100644
--- a/include/linux/mfd/max77693.h
+++ b/include/linux/mfd/max77693.h
@@ -63,6 +63,45 @@ struct max77693_muic_platform_data {
 	int path_uart;
 };
 
+/* MAX77693 led flash */
+
+/* triggers */
+enum max77693_led_trigger {
+	MAX77693_LED_TRIG_OFF,
+	MAX77693_LED_TRIG_FLASH,
+	MAX77693_LED_TRIG_TORCH,
+	MAX77693_LED_TRIG_EXT,
+	MAX77693_LED_TRIG_SOFT,
+};
+
+/* trigger types */
+enum max77693_led_trigger_type {
+	MAX77693_LED_TRIG_TYPE_EDGE,
+	MAX77693_LED_TRIG_TYPE_LEVEL,
+};
+
+/* boost modes */
+enum max77693_led_boost_mode {
+	MAX77693_LED_BOOST_NONE,
+	MAX77693_LED_BOOST_ADAPTIVE,
+	MAX77693_LED_BOOST_FIXED,
+};
+
+struct max77693_led_platform_data {
+	u32 fleds[2];
+	u32 iout_torch[2];
+	u32 iout_flash[2];
+	u32 trigger[2];
+	u32 trigger_type[2];
+	u32 num_leds;
+	u32 boost_mode;
+	u32 flash_timeout;
+	u32 boost_vout;
+	u32 low_vsys;
+};
+
+/* MAX77693 */
+
 struct max77693_platform_data {
 	/* regulator data */
 	struct max77693_regulator_data *regulators;
@@ -70,5 +109,6 @@ struct max77693_platform_data {
 
 	/* muic data */
 	struct max77693_muic_platform_data *muic_data;
+	struct max77693_led_platform_data *led_data;
 };
 #endif	/* __LINUX_MFD_MAX77693_H */
-- 
cgit v1.2.3


From dfa52c852dd3fcf3c0e696ab2d7df0bf91b2aed9 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Thu, 28 Aug 2014 13:52:50 +0200
Subject: mfd: ti_ssp: Remove unused header

The header file include/linux/mfd/ti_ssp.h does not seem to be used
anywhere. It was orphaned by 3033ee62 "mfd: Remove obsolete ti-ssp
driver". Remove it.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/ti_ssp.h | 93 ----------------------------------------------
 1 file changed, 93 deletions(-)
 delete mode 100644 include/linux/mfd/ti_ssp.h

(limited to 'include/linux')

diff --git a/include/linux/mfd/ti_ssp.h b/include/linux/mfd/ti_ssp.h
deleted file mode 100644
index dbb4b43bd20e..000000000000
--- a/include/linux/mfd/ti_ssp.h
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Sequencer Serial Port (SSP) driver for Texas Instruments' SoCs
- *
- * Copyright (C) 2010 Texas Instruments Inc
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
- */
-
-#ifndef __TI_SSP_H__
-#define __TI_SSP_H__
-
-struct ti_ssp_dev_data {
-	const char	*dev_name;
-	void		*pdata;
-	size_t		pdata_size;
-};
-
-struct ti_ssp_data {
-	unsigned long		out_clock;
-	struct ti_ssp_dev_data	dev_data[2];
-};
-
-struct ti_ssp_spi_data {
-	unsigned long	iosel;
-	int		num_cs;
-	void		(*select)(int cs);
-};
-
-/*
- * Sequencer port IO pin configuration bits.  These do not correlate 1-1 with
- * the hardware.  The iosel field in the port data combines iosel1 and iosel2,
- * and is therefore not a direct map to register space.  It is best to use the
- * macros below to construct iosel values.
- *
- * least significant 16 bits --> iosel1
- * most significant 16 bits  --> iosel2
- */
-
-#define SSP_IN			0x0000
-#define SSP_DATA		0x0001
-#define SSP_CLOCK		0x0002
-#define SSP_CHIPSEL		0x0003
-#define SSP_OUT			0x0004
-#define SSP_PIN_SEL(pin, v)	((v) << ((pin) * 3))
-#define SSP_PIN_MASK(pin)	SSP_PIN_SEL(pin, 0x7)
-#define SSP_INPUT_SEL(pin)	((pin) << 16)
-
-/* Sequencer port config bits */
-#define SSP_EARLY_DIN		BIT(8)
-#define SSP_DELAY_DOUT		BIT(9)
-
-/* Sequence map definitions */
-#define SSP_CLK_HIGH		BIT(0)
-#define SSP_CLK_LOW		0
-#define SSP_DATA_HIGH		BIT(1)
-#define SSP_DATA_LOW		0
-#define SSP_CS_HIGH		BIT(2)
-#define SSP_CS_LOW		0
-#define SSP_OUT_MODE		BIT(3)
-#define SSP_IN_MODE		0
-#define SSP_DATA_REG		BIT(4)
-#define SSP_ADDR_REG		0
-
-#define SSP_OPCODE_DIRECT	((0x0) << 5)
-#define SSP_OPCODE_TOGGLE	((0x1) << 5)
-#define SSP_OPCODE_SHIFT	((0x2) << 5)
-#define SSP_OPCODE_BRANCH0	((0x4) << 5)
-#define SSP_OPCODE_BRANCH1	((0x5) << 5)
-#define SSP_OPCODE_BRANCH	((0x6) << 5)
-#define SSP_OPCODE_STOP		((0x7) << 5)
-#define SSP_BRANCH(addr)	((addr) << 8)
-#define SSP_COUNT(cycles)	((cycles) << 8)
-
-int ti_ssp_raw_read(struct device *dev);
-int ti_ssp_raw_write(struct device *dev, u32 val);
-int ti_ssp_load(struct device *dev, int offs, u32* prog, int len);
-int ti_ssp_run(struct device *dev, u32 pc, u32 input, u32 *output);
-int ti_ssp_set_mode(struct device *dev, int mode);
-int ti_ssp_set_iosel(struct device *dev, u32 iosel);
-
-#endif /* __TI_SSP_H__ */
-- 
cgit v1.2.3


From c593aca4e42a24b229a8070c4eb50d01f54877f2 Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Fri, 22 Aug 2014 18:49:03 +0200
Subject: mfd: davinci_voicecodec: Fix 'if defined' guard type in header

The include guard doesn't work as intended due to the transposition
typo DAVINCI -> DAVINIC.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/davinci_voicecodec.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/davinci_voicecodec.h b/include/linux/mfd/davinci_voicecodec.h
index 5166935ce66d..cb01496bfa49 100644
--- a/include/linux/mfd/davinci_voicecodec.h
+++ b/include/linux/mfd/davinci_voicecodec.h
@@ -21,7 +21,7 @@
  */
 
 #ifndef __LINUX_MFD_DAVINCI_VOICECODEC_H_
-#define __LINUX_MFD_DAVINIC_VOICECODEC_H_
+#define __LINUX_MFD_DAVINCI_VOICECODEC_H_
 
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
-- 
cgit v1.2.3


From 9bb9e29c78f8d8ee310987fd58a2b908a4ce0c40 Mon Sep 17 00:00:00 2001
From: Beniamino Galvani <b.galvani@gmail.com>
Date: Sat, 30 Aug 2014 14:50:23 +0200
Subject: mfd: Add Ricoh RN5T618 PMIC core driver

Ricoh RN5T618 is a power management IC which integrates 3 step-down
DCDC converters, 7 low-dropout regulators, a Li-ion battery charger,
fuel gauge, ADC, GPIOs and a watchdog timer.

This commit adds a MFD core driver to support the I2C communication
with the device.

Signed-off-by: Beniamino Galvani <b.galvani@gmail.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig         |  11 +++
 drivers/mfd/Makefile        |   1 +
 drivers/mfd/rn5t618.c       | 134 ++++++++++++++++++++++++++
 include/linux/mfd/rn5t618.h | 228 ++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 374 insertions(+)
 create mode 100644 drivers/mfd/rn5t618.c
 create mode 100644 include/linux/mfd/rn5t618.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index fc419da8d722..82f70dcab136 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -597,6 +597,17 @@ config MFD_RC5T583
 	  Additional drivers must be enabled in order to use the
 	  different functionality of the device.
 
+config MFD_RN5T618
+	tristate "Ricoh RN5T5618 PMIC"
+	depends on I2C
+	select MFD_CORE
+	select REGMAP_I2C
+	help
+	  Say yes here to add support for the Ricoh RN5T618 PMIC. This
+	  driver provides common support for accessing the device,
+	  additional drivers must be enabled in order to use the
+	  functionality of the device.
+
 config MFD_SEC_CORE
 	bool "SAMSUNG Electronics PMIC Series Support"
 	depends on I2C=y
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index d58068aa8aa9..aa5a73a5ba47 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -160,6 +160,7 @@ obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
 obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
+obj-$(CONFIG_MFD_RN5T618)	+= rn5t618.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
 obj-$(CONFIG_MFD_LM3533)	+= lm3533-core.o lm3533-ctrlbank.o
diff --git a/drivers/mfd/rn5t618.c b/drivers/mfd/rn5t618.c
new file mode 100644
index 000000000000..666857192dbe
--- /dev/null
+++ b/drivers/mfd/rn5t618.c
@@ -0,0 +1,134 @@
+/*
+ * MFD core driver for Ricoh RN5T618 PMIC
+ *
+ * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/i2c.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/rn5t618.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+static const struct mfd_cell rn5t618_cells[] = {
+	{ .name = "rn5t618-regulator" },
+	{ .name = "rn5t618-wdt" },
+};
+
+static bool rn5t618_volatile_reg(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case RN5T618_WATCHDOGCNT:
+	case RN5T618_DCIRQ:
+	case RN5T618_ILIMDATAH ... RN5T618_AIN0DATAL:
+	case RN5T618_IR_ADC1 ... RN5T618_IR_ADC3:
+	case RN5T618_IR_GPR:
+	case RN5T618_IR_GPF:
+	case RN5T618_MON_IOIN:
+	case RN5T618_INTMON:
+		return true;
+	default:
+		return false;
+	}
+}
+
+static const struct regmap_config rn5t618_regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.volatile_reg	= rn5t618_volatile_reg,
+	.max_register	= RN5T618_MAX_REG,
+	.cache_type	= REGCACHE_RBTREE,
+};
+
+static struct rn5t618 *rn5t618_pm_power_off;
+
+static void rn5t618_power_off(void)
+{
+	/* disable automatic repower-on */
+	regmap_update_bits(rn5t618_pm_power_off->regmap, RN5T618_REPCNT,
+			   RN5T618_REPCNT_REPWRON, 0);
+	/* start power-off sequence */
+	regmap_update_bits(rn5t618_pm_power_off->regmap, RN5T618_SLPCNT,
+			   RN5T618_SLPCNT_SWPWROFF, RN5T618_SLPCNT_SWPWROFF);
+}
+
+static int rn5t618_i2c_probe(struct i2c_client *i2c,
+			     const struct i2c_device_id *id)
+{
+	struct rn5t618 *priv;
+	int ret;
+
+	priv = devm_kzalloc(&i2c->dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	i2c_set_clientdata(i2c, priv);
+
+	priv->regmap = devm_regmap_init_i2c(i2c, &rn5t618_regmap_config);
+	if (IS_ERR(priv->regmap)) {
+		ret = PTR_ERR(priv->regmap);
+		dev_err(&i2c->dev, "regmap init failed: %d\n", ret);
+		return ret;
+	}
+
+	ret = mfd_add_devices(&i2c->dev, -1, rn5t618_cells,
+			      ARRAY_SIZE(rn5t618_cells), NULL, 0, NULL);
+	if (ret) {
+		dev_err(&i2c->dev, "failed to add sub-devices: %d\n", ret);
+		return ret;
+	}
+
+	if (!pm_power_off) {
+		rn5t618_pm_power_off = priv;
+		pm_power_off = rn5t618_power_off;
+	}
+
+	return 0;
+}
+
+static int rn5t618_i2c_remove(struct i2c_client *i2c)
+{
+	struct rn5t618 *priv = i2c_get_clientdata(i2c);
+
+	if (priv == rn5t618_pm_power_off) {
+		rn5t618_pm_power_off = NULL;
+		pm_power_off = NULL;
+	}
+
+	mfd_remove_devices(&i2c->dev);
+	return 0;
+}
+
+static const struct of_device_id rn5t618_of_match[] = {
+	{ .compatible = "ricoh,rn5t618" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, rn5t618_of_match);
+
+static const struct i2c_device_id rn5t618_i2c_id[] = {
+	{ }
+};
+MODULE_DEVICE_TABLE(i2c, rn5t618_i2c_id);
+
+static struct i2c_driver rn5t618_i2c_driver = {
+	.driver = {
+		.name = "rn5t618",
+		.of_match_table = of_match_ptr(rn5t618_of_match),
+	},
+	.probe = rn5t618_i2c_probe,
+	.remove = rn5t618_i2c_remove,
+	.id_table = rn5t618_i2c_id,
+};
+
+module_i2c_driver(rn5t618_i2c_driver);
+
+MODULE_AUTHOR("Beniamino Galvani <b.galvani@gmail.com>");
+MODULE_DESCRIPTION("Ricoh RN5T618 MFD driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/rn5t618.h b/include/linux/mfd/rn5t618.h
new file mode 100644
index 000000000000..c72d5344f3b3
--- /dev/null
+++ b/include/linux/mfd/rn5t618.h
@@ -0,0 +1,228 @@
+/*
+ * MFD core driver for Ricoh RN5T618 PMIC
+ *
+ * Copyright (C) 2014 Beniamino Galvani <b.galvani@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LINUX_MFD_RN5T618_H
+#define __LINUX_MFD_RN5T618_H
+
+#include <linux/regmap.h>
+
+#define RN5T618_LSIVER			0x00
+#define RN5T618_OTPVER			0x01
+#define RN5T618_IODAC			0x02
+#define RN5T618_VINDAC			0x03
+#define RN5T618_CPUCNT			0x06
+#define RN5T618_PSWR			0x07
+#define RN5T618_PONHIS			0x09
+#define RN5T618_POFFHIS			0x0a
+#define RN5T618_WATCHDOG		0x0b
+#define RN5T618_WATCHDOGCNT		0x0c
+#define RN5T618_PWRFUNC			0x0d
+#define RN5T618_SLPCNT			0x0e
+#define RN5T618_REPCNT			0x0f
+#define RN5T618_PWRONTIMSET		0x10
+#define RN5T618_NOETIMSETCNT		0x11
+#define RN5T618_PWRIREN			0x12
+#define RN5T618_PWRIRQ			0x13
+#define RN5T618_PWRMON			0x14
+#define RN5T618_PWRIRSEL		0x15
+#define RN5T618_DC1_SLOT		0x16
+#define RN5T618_DC2_SLOT		0x17
+#define RN5T618_DC3_SLOT		0x18
+#define RN5T618_LDO1_SLOT		0x1b
+#define RN5T618_LDO2_SLOT		0x1c
+#define RN5T618_LDO3_SLOT		0x1d
+#define RN5T618_LDO4_SLOT		0x1e
+#define RN5T618_LDO5_SLOT		0x1f
+#define RN5T618_PSO0_SLOT		0x25
+#define RN5T618_PSO1_SLOT		0x26
+#define RN5T618_PSO2_SLOT		0x27
+#define RN5T618_PSO3_SLOT		0x28
+#define RN5T618_LDORTC1_SLOT		0x2a
+#define RN5T618_DC1CTL			0x2c
+#define RN5T618_DC1CTL2			0x2d
+#define RN5T618_DC2CTL			0x2e
+#define RN5T618_DC2CTL2			0x2f
+#define RN5T618_DC3CTL			0x30
+#define RN5T618_DC3CTL2			0x31
+#define RN5T618_DC1DAC			0x36
+#define RN5T618_DC2DAC			0x37
+#define RN5T618_DC3DAC			0x38
+#define RN5T618_DC1DAC_SLP		0x3b
+#define RN5T618_DC2DAC_SLP		0x3c
+#define RN5T618_DC3DAC_SLP		0x3d
+#define RN5T618_DCIREN			0x40
+#define RN5T618_DCIRQ			0x41
+#define RN5T618_DCIRMON			0x42
+#define RN5T618_LDOEN1			0x44
+#define RN5T618_LDOEN2			0x45
+#define RN5T618_LDODIS			0x46
+#define RN5T618_LDO1DAC			0x4c
+#define RN5T618_LDO2DAC			0x4d
+#define RN5T618_LDO3DAC			0x4e
+#define RN5T618_LDO4DAC			0x4f
+#define RN5T618_LDO5DAC			0x50
+#define RN5T618_LDORTCDAC		0x56
+#define RN5T618_LDORTC2DAC		0x57
+#define RN5T618_LDO1DAC_SLP		0x58
+#define RN5T618_LDO2DAC_SLP		0x59
+#define RN5T618_LDO3DAC_SLP		0x5a
+#define RN5T618_LDO4DAC_SLP		0x5b
+#define RN5T618_LDO5DAC_SLP		0x5c
+#define RN5T618_ADCCNT1			0x64
+#define RN5T618_ADCCNT2			0x65
+#define RN5T618_ADCCNT3			0x66
+#define RN5T618_ILIMDATAH		0x68
+#define RN5T618_ILIMDATAL		0x69
+#define RN5T618_VBATDATAH		0x6a
+#define RN5T618_VBATDATAL		0x6b
+#define RN5T618_VADPDATAH		0x6c
+#define RN5T618_VADPDATAL		0x6d
+#define RN5T618_VUSBDATAH		0x6e
+#define RN5T618_VUSBDATAL		0x6f
+#define RN5T618_VSYSDATAH		0x70
+#define RN5T618_VSYSDATAL		0x71
+#define RN5T618_VTHMDATAH		0x72
+#define RN5T618_VTHMDATAL		0x73
+#define RN5T618_AIN1DATAH		0x74
+#define RN5T618_AIN1DATAL		0x75
+#define RN5T618_AIN0DATAH		0x76
+#define RN5T618_AIN0DATAL		0x77
+#define RN5T618_ILIMTHL			0x78
+#define RN5T618_ILIMTHH			0x79
+#define RN5T618_VBATTHL			0x7a
+#define RN5T618_VBATTHH			0x7b
+#define RN5T618_VADPTHL			0x7c
+#define RN5T618_VADPTHH			0x7d
+#define RN5T618_VUSBTHL			0x7e
+#define RN5T618_VUSBTHH			0x7f
+#define RN5T618_VSYSTHL			0x80
+#define RN5T618_VSYSTHH			0x81
+#define RN5T618_VTHMTHL			0x82
+#define RN5T618_VTHMTHH			0x83
+#define RN5T618_AIN1THL			0x84
+#define RN5T618_AIN1THH			0x85
+#define RN5T618_AIN0THL			0x86
+#define RN5T618_AIN0THH			0x87
+#define RN5T618_EN_ADCIR1		0x88
+#define RN5T618_EN_ADCIR2		0x89
+#define RN5T618_EN_ADCIR3		0x8a
+#define RN5T618_IR_ADC1			0x8c
+#define RN5T618_IR_ADC2			0x8d
+#define RN5T618_IR_ADC3			0x8e
+#define RN5T618_IOSEL			0x90
+#define RN5T618_IOOUT			0x91
+#define RN5T618_GPEDGE1			0x92
+#define RN5T618_GPEDGE2			0x93
+#define RN5T618_EN_GPIR			0x94
+#define RN5T618_IR_GPR			0x95
+#define RN5T618_IR_GPF			0x96
+#define RN5T618_MON_IOIN		0x97
+#define RN5T618_GPLED_FUNC		0x98
+#define RN5T618_INTPOL			0x9c
+#define RN5T618_INTEN			0x9d
+#define RN5T618_INTMON			0x9e
+#define RN5T618_PREVINDAC		0xb0
+#define RN5T618_BATDAC			0xb1
+#define RN5T618_CHGCTL1			0xb3
+#define RN5T618_CHGCTL2			0xb4
+#define RN5T618_VSYSSET			0xb5
+#define RN5T618_REGISET1		0xb6
+#define RN5T618_REGISET2		0xb7
+#define RN5T618_CHGISET			0xb8
+#define RN5T618_TIMSET			0xb9
+#define RN5T618_BATSET1			0xba
+#define RN5T618_BATSET2			0xbb
+#define RN5T618_DIESET			0xbc
+#define RN5T618_CHGSTATE		0xbd
+#define RN5T618_CHGCTRL_IRFMASK		0xbe
+#define RN5T618_CHGSTAT_IRFMASK1	0xbf
+#define RN5T618_CHGSTAT_IRFMASK2	0xc0
+#define RN5T618_CHGERR_IRFMASK		0xc1
+#define RN5T618_CHGCTRL_IRR		0xc2
+#define RN5T618_CHGSTAT_IRR1		0xc3
+#define RN5T618_CHGSTAT_IRR2		0xc4
+#define RN5T618_CHGERR_IRR		0xc5
+#define RN5T618_CHGCTRL_MONI		0xc6
+#define RN5T618_CHGSTAT_MONI1		0xc7
+#define RN5T618_CHGSTAT_MONI2		0xc8
+#define RN5T618_CHGERR_MONI		0xc9
+#define RN5T618_CHGCTRL_DETMOD1		0xca
+#define RN5T618_CHGCTRL_DETMOD2		0xcb
+#define RN5T618_CHGSTAT_DETMOD1		0xcc
+#define RN5T618_CHGSTAT_DETMOD2		0xcd
+#define RN5T618_CHGSTAT_DETMOD3		0xce
+#define RN5T618_CHGERR_DETMOD1		0xcf
+#define RN5T618_CHGERR_DETMOD2		0xd0
+#define RN5T618_CHGOSCCTL		0xd4
+#define RN5T618_CHGOSCSCORESET1		0xd5
+#define RN5T618_CHGOSCSCORESET2		0xd6
+#define RN5T618_CHGOSCSCORESET3		0xd7
+#define RN5T618_CHGOSCFREQSET1		0xd8
+#define RN5T618_CHGOSCFREQSET2		0xd9
+#define RN5T618_CONTROL			0xe0
+#define RN5T618_SOC			0xe1
+#define RN5T618_RE_CAP_H		0xe2
+#define RN5T618_RE_CAP_L		0xe3
+#define RN5T618_FA_CAP_H		0xe4
+#define RN5T618_FA_CAP_L		0xe5
+#define RN5T618_AGE			0xe6
+#define RN5T618_TT_EMPTY_H		0xe7
+#define RN5T618_TT_EMPTY_L		0xe8
+#define RN5T618_TT_FULL_H		0xe9
+#define RN5T618_TT_FULL_L		0xea
+#define RN5T618_VOLTAGE_1		0xeb
+#define RN5T618_VOLTAGE_0		0xec
+#define RN5T618_TEMP_1			0xed
+#define RN5T618_TEMP_0			0xee
+#define RN5T618_CC_CTRL			0xef
+#define RN5T618_CC_COUNT2		0xf0
+#define RN5T618_CC_COUNT1		0xf1
+#define RN5T618_CC_COUNT0		0xf2
+#define RN5T618_CC_SUMREG3		0xf3
+#define RN5T618_CC_SUMREG2		0xf4
+#define RN5T618_CC_SUMREG1		0xf5
+#define RN5T618_CC_SUMREG0		0xf6
+#define RN5T618_CC_OFFREG1		0xf7
+#define RN5T618_CC_OFFREG0		0xf8
+#define RN5T618_CC_GAINREG1		0xf9
+#define RN5T618_CC_GAINREG0		0xfa
+#define RN5T618_CC_AVEREG1		0xfb
+#define RN5T618_CC_AVEREG0		0xfc
+#define RN5T618_MAX_REG			0xfc
+
+#define RN5T618_REPCNT_REPWRON		BIT(0)
+#define RN5T618_SLPCNT_SWPWROFF		BIT(0)
+#define RN5T618_WATCHDOG_WDOGEN		BIT(2)
+#define RN5T618_WATCHDOG_WDOGTIM_M	(BIT(0) | BIT(1))
+#define RN5T618_WATCHDOG_WDOGTIM_S	0
+#define RN5T618_PWRIRQ_IR_WDOG		BIT(6)
+
+enum {
+	RN5T618_DCDC1,
+	RN5T618_DCDC2,
+	RN5T618_DCDC3,
+	RN5T618_LDO1,
+	RN5T618_LDO2,
+	RN5T618_LDO3,
+	RN5T618_LDO4,
+	RN5T618_LDO5,
+	RN5T618_LDORTC1,
+	RN5T618_LDORTC2,
+	RN5T618_REG_NUM,
+};
+
+struct rn5t618 {
+	struct regmap *regmap;
+};
+
+#endif /* __LINUX_MFD_RN5T618_H */
-- 
cgit v1.2.3


From c24084db223aec7793201b94f0712cfdfa7e9fe7 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Mon, 1 Sep 2014 15:48:52 +0100
Subject: mfd: arizona: Add ASYNC_SAMPLE_RATE_2 registers

Some arizona devices have a second asynchronous sample rate, add the
registers necessary to support this.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Acked-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/wm5102-tables.c           |  3 +++
 drivers/mfd/wm5110-tables.c           |  4 ++++
 include/linux/mfd/arizona/registers.h | 28 ++++++++++++++++++++++------
 sound/soc/codecs/arizona.c            |  2 +-
 4 files changed, 30 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index 6d9b7f832bf5..d6f35bbf795b 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -1059,6 +1059,8 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ASYNC_CLOCK_1:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_OUTPUT_SYSTEM_CLOCK:
 	case ARIZONA_OUTPUT_ASYNC_CLOCK:
 	case ARIZONA_RATE_ESTIMATOR_1:
@@ -1892,6 +1894,7 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_SAMPLE_RATE_3_STATUS:
 	case ARIZONA_HAPTICS_STATUS:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_FLL1_NCO_TEST_0:
 	case ARIZONA_FLL2_NCO_TEST_0:
 	case ARIZONA_DAC_COMP_1:
diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index beae0a397ee1..4642b5b816a0 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -702,6 +702,7 @@ static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000104, 0x0011 },    /* R260   - Sample rate 3 */
 	{ 0x00000112, 0x0305 },    /* R274   - Async clock 1 */
 	{ 0x00000113, 0x0011 },    /* R275   - Async sample rate 1 */
+	{ 0x00000114, 0x0011 },    /* R276   - Async sample rate 2 */
 	{ 0x00000149, 0x0000 },    /* R329   - Output system clock */
 	{ 0x0000014A, 0x0000 },    /* R330   - Output async clock */
 	{ 0x00000152, 0x0000 },    /* R338   - Rate Estimator 1 */
@@ -1738,6 +1739,8 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ASYNC_CLOCK_1:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_OUTPUT_SYSTEM_CLOCK:
 	case ARIZONA_OUTPUT_ASYNC_CLOCK:
 	case ARIZONA_RATE_ESTIMATOR_1:
@@ -2820,6 +2823,7 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_SAMPLE_RATE_3_STATUS:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
+	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_MIC_DETECT_3:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_INPUT_ENABLES_STATUS:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index 68913ec90969..c0b075f6bc35 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -71,7 +71,9 @@
 #define ARIZONA_SAMPLE_RATE_3_STATUS             0x10C
 #define ARIZONA_ASYNC_CLOCK_1                    0x112
 #define ARIZONA_ASYNC_SAMPLE_RATE_1              0x113
+#define ARIZONA_ASYNC_SAMPLE_RATE_2              0x114
 #define ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS       0x11B
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS       0x11C
 #define ARIZONA_OUTPUT_SYSTEM_CLOCK              0x149
 #define ARIZONA_OUTPUT_ASYNC_CLOCK               0x14A
 #define ARIZONA_RATE_ESTIMATOR_1                 0x152
@@ -1665,16 +1667,30 @@
 /*
  * R275 (0x113) - Async sample rate 1
  */
-#define ARIZONA_ASYNC_SAMPLE_RATE_MASK           0x001F  /* ASYNC_SAMPLE_RATE - [4:0] */
-#define ARIZONA_ASYNC_SAMPLE_RATE_SHIFT               0  /* ASYNC_SAMPLE_RATE - [4:0] */
-#define ARIZONA_ASYNC_SAMPLE_RATE_WIDTH               5  /* ASYNC_SAMPLE_RATE - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_MASK         0x001F  /* ASYNC_SAMPLE_RATE_1 - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_SHIFT             0  /* ASYNC_SAMPLE_RATE_1 - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_WIDTH             5  /* ASYNC_SAMPLE_RATE_1 - [4:0] */
+
+/*
+ * R276 (0x114) - Async sample rate 2
+ */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_MASK         0x001F  /* ASYNC_SAMPLE_RATE_2 - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_SHIFT             0  /* ASYNC_SAMPLE_RATE_2 - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_WIDTH             5  /* ASYNC_SAMPLE_RATE_2 - [4:0] */
 
 /*
  * R283 (0x11B) - Async sample rate 1 status
  */
-#define ARIZONA_ASYNC_SAMPLE_RATE_STS_MASK       0x001F  /* ASYNC_SAMPLE_RATE_STS - [4:0] */
-#define ARIZONA_ASYNC_SAMPLE_RATE_STS_SHIFT           0  /* ASYNC_SAMPLE_RATE_STS - [4:0] */
-#define ARIZONA_ASYNC_SAMPLE_RATE_STS_WIDTH           5  /* ASYNC_SAMPLE_RATE_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_STS_MASK     0x001F  /* ASYNC_SAMPLE_RATE_1_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_STS_SHIFT         0  /* ASYNC_SAMPLE_RATE_1_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_1_STS_WIDTH         5  /* ASYNC_SAMPLE_RATE_1_STS - [4:0] */
+
+/*
+ * R284 (0x11C) - Async sample rate 2 status
+ */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_STS_MASK     0x001F  /* ASYNC_SAMPLE_RATE_2_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_STS_SHIFT         0  /* ASYNC_SAMPLE_RATE_2_STS - [4:0] */
+#define ARIZONA_ASYNC_SAMPLE_RATE_2_STS_WIDTH         5  /* ASYNC_SAMPLE_RATE_2_STS - [4:0] */
 
 /*
  * R329 (0x149) - Output system clock
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 2c71f16bd661..0c05e7a7945f 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1220,7 +1220,7 @@ static int arizona_hw_params_rate(struct snd_pcm_substream *substream,
 		break;
 	case ARIZONA_CLK_ASYNCCLK:
 		snd_soc_update_bits(codec, ARIZONA_ASYNC_SAMPLE_RATE_1,
-				    ARIZONA_ASYNC_SAMPLE_RATE_MASK, sr_val);
+				    ARIZONA_ASYNC_SAMPLE_RATE_1_MASK, sr_val);
 		if (base)
 			snd_soc_update_bits(codec, base + ARIZONA_AIF_RATE_CTRL,
 					    ARIZONA_AIF1_RATE_MASK,
-- 
cgit v1.2.3


From e9e9d3973594cadd9e892bc79f914f299bb61124 Mon Sep 17 00:00:00 2001
From: Axel Lin <axel.lin@ingics.com>
Date: Sat, 16 Aug 2014 21:23:40 +0800
Subject: mfd: da9052: Avoid setting read_flag_mask for da9052-i2c driver

Current code init regmap with &da9052_regmap_config for both da9052-spi and
da9052-i2c drivers. da9052-spi sets the read_flag_mask.
The same setting may be applied for da9052-i2c if da9052-spi driver is loaded
first because they actually use the same regmap_config setting.
Fix this issue by using a local variable for regmap_config in da9052-spi driver,
so the settings in spi driver won't impact the settings in i2c driver.
Also makes da9052_regmap_config const to avoid similar issue.

Signed-off-by: Axel Lin <axel.lin@ingics.com>
Acked-by: Adam Thomson <Adam.Thomson.Opensource@diasemi.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/da9052-core.c         | 2 +-
 drivers/mfd/da9052-spi.c          | 7 ++++---
 include/linux/mfd/da9052/da9052.h | 2 +-
 3 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/da9052-core.c b/drivers/mfd/da9052-core.c
index e8af816d73a9..52a0c2f6264f 100644
--- a/drivers/mfd/da9052-core.c
+++ b/drivers/mfd/da9052-core.c
@@ -522,7 +522,7 @@ static const struct mfd_cell da9052_subdev_info[] = {
 	},
 };
 
-struct regmap_config da9052_regmap_config = {
+const struct regmap_config da9052_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
 
diff --git a/drivers/mfd/da9052-spi.c b/drivers/mfd/da9052-spi.c
index 17666b40b70c..45ae0b7d13ef 100644
--- a/drivers/mfd/da9052-spi.c
+++ b/drivers/mfd/da9052-spi.c
@@ -23,6 +23,7 @@
 
 static int da9052_spi_probe(struct spi_device *spi)
 {
+	struct regmap_config config;
 	int ret;
 	const struct spi_device_id *id = spi_get_device_id(spi);
 	struct da9052 *da9052;
@@ -40,10 +41,10 @@ static int da9052_spi_probe(struct spi_device *spi)
 
 	spi_set_drvdata(spi, da9052);
 
-	da9052_regmap_config.read_flag_mask = 1;
-	da9052_regmap_config.write_flag_mask = 0;
+	config = da9052_regmap_config;
+	config.read_flag_mask = 1;
 
-	da9052->regmap = devm_regmap_init_spi(spi, &da9052_regmap_config);
+	da9052->regmap = devm_regmap_init_spi(spi, &config);
 	if (IS_ERR(da9052->regmap)) {
 		ret = PTR_ERR(da9052->regmap);
 		dev_err(&spi->dev, "Failed to allocate register map: %d\n",
diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h
index bba65f51a0b5..c18a4c19d6fc 100644
--- a/include/linux/mfd/da9052/da9052.h
+++ b/include/linux/mfd/da9052/da9052.h
@@ -211,7 +211,7 @@ static inline int da9052_reg_update(struct da9052 *da9052, unsigned char reg,
 int da9052_device_init(struct da9052 *da9052, u8 chip_id);
 void da9052_device_exit(struct da9052 *da9052);
 
-extern struct regmap_config da9052_regmap_config;
+extern const struct regmap_config da9052_regmap_config;
 
 int da9052_irq_init(struct da9052 *da9052);
 int da9052_irq_exit(struct da9052 *da9052);
-- 
cgit v1.2.3


From f69a7cf74d5536faa180437581be2a9c0aad1bb1 Mon Sep 17 00:00:00 2001
From: Chris Zhong <zyw@rock-chips.com>
Date: Wed, 3 Sep 2014 21:51:44 +0800
Subject: mfd: RK808: Add new mfd driver for RK808

The RK808 chip is a power management IC for multimedia and handheld
devices. It contains the following components:

- Regulators
- RTC
- Clkout

The RK808 core driver is registered as a platform driver and provides
communication through I2C with the host device for the different
components.

Signed-off-by: Chris Zhong <zyw@rock-chips.com>
Signed-off-by: Zhang Qing <zhangqing@rock-chips.com>
Tested-by: Heiko <heiko@sntech.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig       |  13 +++
 drivers/mfd/Makefile      |   1 +
 drivers/mfd/rk808.c       | 245 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/rk808.h | 196 +++++++++++++++++++++++++++++++++++++
 4 files changed, 455 insertions(+)
 create mode 100644 drivers/mfd/rk808.c
 create mode 100644 include/linux/mfd/rk808.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 82f70dcab136..049796a28215 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -597,6 +597,19 @@ config MFD_RC5T583
 	  Additional drivers must be enabled in order to use the
 	  different functionality of the device.
 
+config MFD_RK808
+	tristate "Rockchip RK808 Power Management chip"
+	depends on I2C && OF
+	select MFD_CORE
+	select REGMAP_I2C
+	select REGMAP_IRQ
+	help
+	  If you say yes here you get support for the RK808
+	  Power Management chips.
+	  This driver provides common support for accessing the device
+	  through I2C interface. The device supports multiple sub-devices
+	  including interrupts, RTC, LDO & DCDC regulators, and onkey.
+
 config MFD_RN5T618
 	tristate "Ricoh RN5T5618 PMIC"
 	depends on I2C
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index aa5a73a5ba47..bd111213bd58 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -160,6 +160,7 @@ obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
 obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
 obj-$(CONFIG_MFD_RC5T583)	+= rc5t583.o rc5t583-irq.o
+obj-$(CONFIG_MFD_RK808)		+= rk808.o
 obj-$(CONFIG_MFD_RN5T618)	+= rn5t618.o
 obj-$(CONFIG_MFD_SEC_CORE)	+= sec-core.o sec-irq.o
 obj-$(CONFIG_MFD_SYSCON)	+= syscon.o
diff --git a/drivers/mfd/rk808.c b/drivers/mfd/rk808.c
new file mode 100644
index 000000000000..0324422b3b10
--- /dev/null
+++ b/drivers/mfd/rk808.c
@@ -0,0 +1,245 @@
+/*
+ * MFD core driver for Rockchip RK808
+ *
+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ * Author: Zhang Qing <zhangqing@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#include <linux/i2c.h>
+#include <linux/interrupt.h>
+#include <linux/mfd/rk808.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+
+struct rk808_reg_data {
+	int addr;
+	int mask;
+	int value;
+};
+
+static const struct regmap_config rk808_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+	.max_register = RK808_IO_POL_REG,
+};
+
+static struct resource rtc_resources[] = {
+	{
+		.start  = RK808_IRQ_RTC_ALARM,
+		.end    = RK808_IRQ_RTC_ALARM,
+		.flags  = IORESOURCE_IRQ,
+	}
+};
+
+static const struct mfd_cell rk808s[] = {
+	{ .name = "rk808-clkout", },
+	{ .name = "rk808-regulator", },
+	{
+		.name = "rk808-rtc",
+		.num_resources = ARRAY_SIZE(rtc_resources),
+		.resources = &rtc_resources[0],
+	},
+};
+
+static const struct rk808_reg_data pre_init_reg[] = {
+	{ RK808_BUCK3_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_150MA },
+	{ RK808_BUCK4_CONFIG_REG, BUCK_ILMIN_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_BOOST_CONFIG_REG, BOOST_ILMIN_MASK, BOOST_ILMIN_100MA },
+	{ RK808_BUCK1_CONFIG_REG, BUCK1_RATE_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_BUCK2_CONFIG_REG, BUCK2_RATE_MASK,  BUCK_ILMIN_200MA },
+	{ RK808_VB_MON_REG,       MASK_ALL,         VB_LO_ACT |
+						    VB_LO_SEL_3500MV },
+};
+
+static const struct regmap_irq rk808_irqs[] = {
+	/* INT_STS */
+	[RK808_IRQ_VOUT_LO] = {
+		.mask = RK808_IRQ_VOUT_LO_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_VB_LO] = {
+		.mask = RK808_IRQ_VB_LO_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_PWRON] = {
+		.mask = RK808_IRQ_PWRON_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_PWRON_LP] = {
+		.mask = RK808_IRQ_PWRON_LP_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_HOTDIE] = {
+		.mask = RK808_IRQ_HOTDIE_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_RTC_ALARM] = {
+		.mask = RK808_IRQ_RTC_ALARM_MSK,
+		.reg_offset = 0,
+	},
+	[RK808_IRQ_RTC_PERIOD] = {
+		.mask = RK808_IRQ_RTC_PERIOD_MSK,
+		.reg_offset = 0,
+	},
+
+	/* INT_STS2 */
+	[RK808_IRQ_PLUG_IN_INT] = {
+		.mask = RK808_IRQ_PLUG_IN_INT_MSK,
+		.reg_offset = 1,
+	},
+	[RK808_IRQ_PLUG_OUT_INT] = {
+		.mask = RK808_IRQ_PLUG_OUT_INT_MSK,
+		.reg_offset = 1,
+	},
+};
+
+static struct regmap_irq_chip rk808_irq_chip = {
+	.name = "rk808",
+	.irqs = rk808_irqs,
+	.num_irqs = ARRAY_SIZE(rk808_irqs),
+	.num_regs = 2,
+	.irq_reg_stride = 2,
+	.status_base = RK808_INT_STS_REG1,
+	.mask_base = RK808_INT_STS_MSK_REG1,
+	.ack_base = RK808_INT_STS_REG1,
+	.init_ack_masked = true,
+};
+
+static struct i2c_client *rk808_i2c_client;
+static void rk808_device_shutdown(void)
+{
+	int ret;
+	struct rk808 *rk808 = i2c_get_clientdata(rk808_i2c_client);
+
+	if (!rk808) {
+		dev_warn(&rk808_i2c_client->dev,
+			 "have no rk808, so do nothing here\n");
+		return;
+	}
+
+	ret = regmap_update_bits(rk808->regmap,
+				 RK808_DEVCTRL_REG,
+				 DEV_OFF_RST, DEV_OFF_RST);
+	if (ret)
+		dev_err(&rk808_i2c_client->dev, "power off error!\n");
+}
+
+static int rk808_probe(struct i2c_client *client,
+		       const struct i2c_device_id *id)
+{
+	struct device_node *np = client->dev.of_node;
+	struct rk808 *rk808;
+	int pm_off = 0;
+	int ret;
+	int i;
+
+	if (!client->irq) {
+		dev_err(&client->dev, "No interrupt support, no core IRQ\n");
+		return -EINVAL;
+	}
+
+	rk808 = devm_kzalloc(&client->dev, sizeof(*rk808), GFP_KERNEL);
+	if (!rk808)
+		return -ENOMEM;
+
+	rk808->regmap = devm_regmap_init_i2c(client, &rk808_regmap_config);
+	if (IS_ERR(rk808->regmap)) {
+		dev_err(&client->dev, "regmap initialization failed\n");
+		return PTR_ERR(rk808->regmap);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(pre_init_reg); i++) {
+		ret = regmap_update_bits(rk808->regmap, pre_init_reg[i].addr,
+					 pre_init_reg[i].mask,
+					 pre_init_reg[i].value);
+		if (ret) {
+			dev_err(&client->dev,
+				"0x%x write err\n", pre_init_reg[i].addr);
+			return ret;
+		}
+	}
+
+	ret = regmap_add_irq_chip(rk808->regmap, client->irq,
+				  IRQF_ONESHOT, -1,
+				  &rk808_irq_chip, &rk808->irq_data);
+	if (ret) {
+		dev_err(&client->dev, "Failed to add irq_chip %d\n", ret);
+		return ret;
+	}
+
+	rk808->i2c = client;
+	i2c_set_clientdata(client, rk808);
+
+	ret = mfd_add_devices(&client->dev, -1,
+			      rk808s, ARRAY_SIZE(rk808s),
+			      NULL, 0, regmap_irq_get_domain(rk808->irq_data));
+	if (ret) {
+		dev_err(&client->dev, "failed to add MFD devices %d\n", ret);
+		goto err_irq;
+	}
+
+	pm_off = of_property_read_bool(np,
+				"rockchip,system-power-controller");
+	if (pm_off && !pm_power_off) {
+		rk808_i2c_client = client;
+		pm_power_off = rk808_device_shutdown;
+	}
+
+	return 0;
+
+err_irq:
+	regmap_del_irq_chip(client->irq, rk808->irq_data);
+	return ret;
+}
+
+static int rk808_remove(struct i2c_client *client)
+{
+	struct rk808 *rk808 = i2c_get_clientdata(client);
+
+	regmap_del_irq_chip(client->irq, rk808->irq_data);
+	mfd_remove_devices(&client->dev);
+	pm_power_off = NULL;
+
+	return 0;
+}
+
+static struct of_device_id rk808_of_match[] = {
+	{ .compatible = "rockchip,rk808" },
+	{ },
+};
+MODULE_DEVICE_TABLE(of, rk808_of_match);
+
+static const struct i2c_device_id rk808_ids[] = {
+	{ "rk808" },
+	{ },
+};
+MODULE_DEVICE_TABLE(i2c, rk808_ids);
+
+static struct i2c_driver rk808_i2c_driver = {
+	.driver = {
+		.name = "rk808",
+		.of_match_table = rk808_of_match,
+	},
+	.probe    = rk808_probe,
+	.remove   = rk808_remove,
+	.id_table = rk808_ids,
+};
+
+module_i2c_driver(rk808_i2c_driver);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Chris Zhong <zyw@rock-chips.com>");
+MODULE_AUTHOR("Zhang Qing <zhangqing@rock-chips.com>");
+MODULE_DESCRIPTION("RK808 PMIC driver");
diff --git a/include/linux/mfd/rk808.h b/include/linux/mfd/rk808.h
new file mode 100644
index 000000000000..fb09312d854b
--- /dev/null
+++ b/include/linux/mfd/rk808.h
@@ -0,0 +1,196 @@
+/*
+ * rk808.h for Rockchip RK808
+ *
+ * Copyright (c) 2014, Fuzhou Rockchip Electronics Co., Ltd
+ *
+ * Author: Chris Zhong <zyw@rock-chips.com>
+ * Author: Zhang Qing <zhangqing@rock-chips.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef __LINUX_REGULATOR_rk808_H
+#define __LINUX_REGULATOR_rk808_H
+
+#include <linux/regulator/machine.h>
+#include <linux/regmap.h>
+
+/*
+ * rk808 Global Register Map.
+ */
+
+#define RK808_DCDC1	0 /* (0+RK808_START) */
+#define RK808_LDO1	4 /* (4+RK808_START) */
+#define RK808_NUM_REGULATORS   14
+
+enum rk808_reg {
+	RK808_ID_DCDC1,
+	RK808_ID_DCDC2,
+	RK808_ID_DCDC3,
+	RK808_ID_DCDC4,
+	RK808_ID_LDO1,
+	RK808_ID_LDO2,
+	RK808_ID_LDO3,
+	RK808_ID_LDO4,
+	RK808_ID_LDO5,
+	RK808_ID_LDO6,
+	RK808_ID_LDO7,
+	RK808_ID_LDO8,
+	RK808_ID_SWITCH1,
+	RK808_ID_SWITCH2,
+};
+
+#define RK808_SECONDS_REG	0x00
+#define RK808_MINUTES_REG	0x01
+#define RK808_HOURS_REG		0x02
+#define RK808_DAYS_REG		0x03
+#define RK808_MONTHS_REG	0x04
+#define RK808_YEARS_REG		0x05
+#define RK808_WEEKS_REG		0x06
+#define RK808_ALARM_SECONDS_REG	0x08
+#define RK808_ALARM_MINUTES_REG	0x09
+#define RK808_ALARM_HOURS_REG	0x0a
+#define RK808_ALARM_DAYS_REG	0x0b
+#define RK808_ALARM_MONTHS_REG	0x0c
+#define RK808_ALARM_YEARS_REG	0x0d
+#define RK808_RTC_CTRL_REG	0x10
+#define RK808_RTC_STATUS_REG	0x11
+#define RK808_RTC_INT_REG	0x12
+#define RK808_RTC_COMP_LSB_REG	0x13
+#define RK808_RTC_COMP_MSB_REG	0x14
+#define RK808_CLK32OUT_REG	0x20
+#define RK808_VB_MON_REG	0x21
+#define RK808_THERMAL_REG	0x22
+#define RK808_DCDC_EN_REG	0x23
+#define RK808_LDO_EN_REG	0x24
+#define RK808_SLEEP_SET_OFF_REG1	0x25
+#define RK808_SLEEP_SET_OFF_REG2	0x26
+#define RK808_DCDC_UV_STS_REG	0x27
+#define RK808_DCDC_UV_ACT_REG	0x28
+#define RK808_LDO_UV_STS_REG	0x29
+#define RK808_LDO_UV_ACT_REG	0x2a
+#define RK808_DCDC_PG_REG	0x2b
+#define RK808_LDO_PG_REG	0x2c
+#define RK808_VOUT_MON_TDB_REG	0x2d
+#define RK808_BUCK1_CONFIG_REG		0x2e
+#define RK808_BUCK1_ON_VSEL_REG		0x2f
+#define RK808_BUCK1_SLP_VSEL_REG	0x30
+#define RK808_BUCK1_DVS_VSEL_REG	0x31
+#define RK808_BUCK2_CONFIG_REG		0x32
+#define RK808_BUCK2_ON_VSEL_REG		0x33
+#define RK808_BUCK2_SLP_VSEL_REG	0x34
+#define RK808_BUCK2_DVS_VSEL_REG	0x35
+#define RK808_BUCK3_CONFIG_REG		0x36
+#define RK808_BUCK4_CONFIG_REG		0x37
+#define RK808_BUCK4_ON_VSEL_REG		0x38
+#define RK808_BUCK4_SLP_VSEL_REG	0x39
+#define RK808_BOOST_CONFIG_REG		0x3a
+#define RK808_LDO1_ON_VSEL_REG		0x3b
+#define RK808_LDO1_SLP_VSEL_REG		0x3c
+#define RK808_LDO2_ON_VSEL_REG		0x3d
+#define RK808_LDO2_SLP_VSEL_REG		0x3e
+#define RK808_LDO3_ON_VSEL_REG		0x3f
+#define RK808_LDO3_SLP_VSEL_REG		0x40
+#define RK808_LDO4_ON_VSEL_REG		0x41
+#define RK808_LDO4_SLP_VSEL_REG		0x42
+#define RK808_LDO5_ON_VSEL_REG		0x43
+#define RK808_LDO5_SLP_VSEL_REG		0x44
+#define RK808_LDO6_ON_VSEL_REG		0x45
+#define RK808_LDO6_SLP_VSEL_REG		0x46
+#define RK808_LDO7_ON_VSEL_REG		0x47
+#define RK808_LDO7_SLP_VSEL_REG		0x48
+#define RK808_LDO8_ON_VSEL_REG		0x49
+#define RK808_LDO8_SLP_VSEL_REG		0x4a
+#define RK808_DEVCTRL_REG	0x4b
+#define RK808_INT_STS_REG1	0x4c
+#define RK808_INT_STS_MSK_REG1	0x4d
+#define RK808_INT_STS_REG2	0x4e
+#define RK808_INT_STS_MSK_REG2	0x4f
+#define RK808_IO_POL_REG	0x50
+
+/* IRQ Definitions */
+#define RK808_IRQ_VOUT_LO	0
+#define RK808_IRQ_VB_LO		1
+#define RK808_IRQ_PWRON		2
+#define RK808_IRQ_PWRON_LP	3
+#define RK808_IRQ_HOTDIE	4
+#define RK808_IRQ_RTC_ALARM	5
+#define RK808_IRQ_RTC_PERIOD	6
+#define RK808_IRQ_PLUG_IN_INT	7
+#define RK808_IRQ_PLUG_OUT_INT	8
+#define RK808_NUM_IRQ		9
+
+#define RK808_IRQ_VOUT_LO_MSK		BIT(0)
+#define RK808_IRQ_VB_LO_MSK		BIT(1)
+#define RK808_IRQ_PWRON_MSK		BIT(2)
+#define RK808_IRQ_PWRON_LP_MSK		BIT(3)
+#define RK808_IRQ_HOTDIE_MSK		BIT(4)
+#define RK808_IRQ_RTC_ALARM_MSK		BIT(5)
+#define RK808_IRQ_RTC_PERIOD_MSK	BIT(6)
+#define RK808_IRQ_PLUG_IN_INT_MSK	BIT(0)
+#define RK808_IRQ_PLUG_OUT_INT_MSK	BIT(1)
+
+#define RK808_VBAT_LOW_2V8	0x00
+#define RK808_VBAT_LOW_2V9	0x01
+#define RK808_VBAT_LOW_3V0	0x02
+#define RK808_VBAT_LOW_3V1	0x03
+#define RK808_VBAT_LOW_3V2	0x04
+#define RK808_VBAT_LOW_3V3	0x05
+#define RK808_VBAT_LOW_3V4	0x06
+#define RK808_VBAT_LOW_3V5	0x07
+#define VBAT_LOW_VOL_MASK	(0x07 << 0)
+#define EN_VABT_LOW_SHUT_DOWN	(0x00 << 4)
+#define EN_VBAT_LOW_IRQ		(0x1 << 4)
+#define VBAT_LOW_ACT_MASK	(0x1 << 4)
+
+#define BUCK_ILMIN_MASK		(7 << 0)
+#define BOOST_ILMIN_MASK	(7 << 0)
+#define BUCK1_RATE_MASK		(3 << 3)
+#define BUCK2_RATE_MASK		(3 << 3)
+#define MASK_ALL	0xff
+
+#define SWITCH2_EN	BIT(6)
+#define SWITCH1_EN	BIT(5)
+#define DEV_OFF_RST	BIT(3)
+
+#define VB_LO_ACT		BIT(4)
+#define VB_LO_SEL_3500MV	(7 << 0)
+
+#define VOUT_LO_INT	BIT(0)
+#define CLK32KOUT2_EN	BIT(0)
+
+enum {
+	BUCK_ILMIN_50MA,
+	BUCK_ILMIN_100MA,
+	BUCK_ILMIN_150MA,
+	BUCK_ILMIN_200MA,
+	BUCK_ILMIN_250MA,
+	BUCK_ILMIN_300MA,
+	BUCK_ILMIN_350MA,
+	BUCK_ILMIN_400MA,
+};
+
+enum {
+	BOOST_ILMIN_75MA,
+	BOOST_ILMIN_100MA,
+	BOOST_ILMIN_125MA,
+	BOOST_ILMIN_150MA,
+	BOOST_ILMIN_175MA,
+	BOOST_ILMIN_200MA,
+	BOOST_ILMIN_225MA,
+	BOOST_ILMIN_250MA,
+};
+
+struct rk808 {
+	struct i2c_client *i2c;
+	struct regmap_irq_chip_data *irq_data;
+	struct regmap *regmap;
+};
+#endif /* __LINUX_REGULATOR_rk808_H */
-- 
cgit v1.2.3


From bb048713bba3ead39f6112910906d9fe3f88ede7 Mon Sep 17 00:00:00 2001
From: Josef Ahmad <josef.ahmad@intel.com>
Date: Tue, 2 Sep 2014 13:45:20 +0300
Subject: pci_ids: Add support for Intel Quark ILB

This patch adds the PCI id for Intel Quark ILB.
It will be used for GPIO and Multifunction device driver.

Signed-off-by: Josef Ahmad <josef.ahmad@intel.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..4e82195b1695 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2557,6 +2557,7 @@
 #define PCI_DEVICE_ID_INTEL_MFD_EMMC0	0x0823
 #define PCI_DEVICE_ID_INTEL_MFD_EMMC1	0x0824
 #define PCI_DEVICE_ID_INTEL_MRST_SD2	0x084F
+#define PCI_DEVICE_ID_INTEL_QUARK_X1000_ILB	0x095E
 #define PCI_DEVICE_ID_INTEL_I960	0x0960
 #define PCI_DEVICE_ID_INTEL_I960RM	0x0962
 #define PCI_DEVICE_ID_INTEL_CENTERTON_ILB	0x0c60
-- 
cgit v1.2.3


From 8bdf87b400271ebc7fbf71e117c299d19a97ebb4 Mon Sep 17 00:00:00 2001
From: Guodong Xu <guodong.xu@linaro.org>
Date: Mon, 1 Sep 2014 16:28:34 +0800
Subject: mfd: Add HI6421 PMIC Core driver

This adds driver to support HiSilicon Hi6421 PMIC. Hi6421 includes multi-
functions, such as regulators, codec, ADCs, Coulomb counter, etc.
This driver includes core APIs _only_.

Drivers for individul components, like voltage regulators, are
implemented in corresponding driver directories and files.

Registers in Hi6421 are memory mapped, so using regmap-mmio API.

Signed-off-by: Guodong Xu <guodong.xu@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig             |  13 +++++
 drivers/mfd/Makefile            |   1 +
 drivers/mfd/hi6421-pmic-core.c  | 113 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/hi6421-pmic.h |  41 +++++++++++++++
 4 files changed, 168 insertions(+)
 create mode 100644 drivers/mfd/hi6421-pmic-core.c
 create mode 100644 include/linux/mfd/hi6421-pmic.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 049796a28215..609b7a2144d6 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -210,6 +210,19 @@ config MFD_MC13XXX_I2C
 	help
 	  Select this if your MC13xxx is connected via an I2C bus.
 
+config MFD_HI6421_PMIC
+	tristate "HiSilicon Hi6421 PMU/Codec IC"
+	depends on OF
+	select MFD_CORE
+	select REGMAP_MMIO
+	help
+	  Add support for HiSilicon Hi6421 PMIC. Hi6421 includes multi-
+	  functions, such as regulators, RTC, codec, Coulomb counter, etc.
+	  This driver includes core APIs _only_. You have to select
+	  individul components like voltage regulators under corresponding
+	  menus in order to enable them.
+	  We communicate with the Hi6421 via memory-mapped I/O.
+
 config HTC_EGPIO
 	bool "HTC EGPIO support"
 	depends on GPIOLIB && ARM
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index bd111213bd58..1d11a247abbb 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -172,6 +172,7 @@ obj-$(CONFIG_MFD_AS3722)	+= as3722.o
 obj-$(CONFIG_MFD_STW481X)	+= stw481x.o
 obj-$(CONFIG_MFD_IPAQ_MICRO)	+= ipaq-micro.o
 obj-$(CONFIG_MFD_MENF21BMC)	+= menf21bmc.o
+obj-$(CONFIG_MFD_HI6421_PMIC)	+= hi6421-pmic-core.o
 
 intel-soc-pmic-objs		:= intel_soc_pmic_core.o intel_soc_pmic_crc.o
 obj-$(CONFIG_INTEL_SOC_PMIC)	+= intel-soc-pmic.o
diff --git a/drivers/mfd/hi6421-pmic-core.c b/drivers/mfd/hi6421-pmic-core.c
new file mode 100644
index 000000000000..321a2656fd00
--- /dev/null
+++ b/drivers/mfd/hi6421-pmic-core.c
@@ -0,0 +1,113 @@
+/*
+ * Device driver for Hi6421 IC
+ *
+ * Copyright (c) <2011-2014> HiSilicon Technologies Co., Ltd.
+ *              http://www.hisilicon.com
+ * Copyright (c) <2013-2014> Linaro Ltd.
+ *              http://www.linaro.org
+ *
+ * Author: Guodong Xu <guodong.xu@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+#include <linux/mfd/hi6421-pmic.h>
+
+static const struct mfd_cell hi6421_devs[] = {
+	{ .name = "hi6421-regulator", },
+};
+
+static struct regmap_config hi6421_regmap_config = {
+	.reg_bits = 32,
+	.reg_stride = 4,
+	.val_bits = 8,
+	.max_register = HI6421_REG_TO_BUS_ADDR(HI6421_REG_MAX),
+};
+
+static int hi6421_pmic_probe(struct platform_device *pdev)
+{
+	struct hi6421_pmic *pmic;
+	struct resource *res;
+	void __iomem *base;
+	int ret;
+
+	pmic = devm_kzalloc(&pdev->dev, sizeof(*pmic), GFP_KERNEL);
+	if (!pmic)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	base = devm_ioremap_resource(&pdev->dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	pmic->regmap = devm_regmap_init_mmio_clk(&pdev->dev, NULL, base,
+						 &hi6421_regmap_config);
+	if (IS_ERR(pmic->regmap)) {
+		dev_err(&pdev->dev,
+			"regmap init failed: %ld\n", PTR_ERR(pmic->regmap));
+		return PTR_ERR(pmic->regmap);
+	}
+
+	/* set over-current protection debounce 8ms */
+	regmap_update_bits(pmic->regmap, HI6421_OCP_DEB_CTRL_REG,
+				(HI6421_OCP_DEB_SEL_MASK
+				 | HI6421_OCP_EN_DEBOUNCE_MASK
+				 | HI6421_OCP_AUTO_STOP_MASK),
+				(HI6421_OCP_DEB_SEL_8MS
+				 | HI6421_OCP_EN_DEBOUNCE_ENABLE));
+
+	platform_set_drvdata(pdev, pmic);
+
+	ret = mfd_add_devices(&pdev->dev, 0, hi6421_devs,
+			ARRAY_SIZE(hi6421_devs), NULL, 0, NULL);
+	if (ret) {
+		dev_err(&pdev->dev, "add mfd devices failed: %d\n", ret);
+		return ret;
+	}
+
+	return 0;
+}
+
+static int hi6421_pmic_remove(struct platform_device *pdev)
+{
+	mfd_remove_devices(&pdev->dev);
+
+	return 0;
+}
+
+static struct of_device_id of_hi6421_pmic_match_tbl[] = {
+	{ .compatible = "hisilicon,hi6421-pmic", },
+	{ },
+};
+
+static struct platform_driver hi6421_pmic_driver = {
+	.driver = {
+		.name	= "hi6421_pmic",
+		.of_match_table = of_hi6421_pmic_match_tbl,
+	},
+	.probe	= hi6421_pmic_probe,
+	.remove	= hi6421_pmic_remove,
+};
+module_platform_driver(hi6421_pmic_driver);
+
+MODULE_AUTHOR("Guodong Xu <guodong.xu@linaro.org>");
+MODULE_DESCRIPTION("Hi6421 PMIC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/hi6421-pmic.h b/include/linux/mfd/hi6421-pmic.h
new file mode 100644
index 000000000000..587273e35acf
--- /dev/null
+++ b/include/linux/mfd/hi6421-pmic.h
@@ -0,0 +1,41 @@
+/*
+ * Header file for device driver Hi6421 PMIC
+ *
+ * Copyright (c) <2011-2014> HiSilicon Technologies Co., Ltd.
+ *              http://www.hisilicon.com
+ * Copyright (c) <2013-2014> Linaro Ltd.
+ *              http://www.linaro.org
+ *
+ * Author: Guodong Xu <guodong.xu@linaro.org>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef	__HI6421_PMIC_H
+#define	__HI6421_PMIC_H
+
+/* Hi6421 registers are mapped to memory bus in 4 bytes stride */
+#define HI6421_REG_TO_BUS_ADDR(x)	(x << 2)
+
+/* Hi6421 maximum register number */
+#define HI6421_REG_MAX			0xFF
+
+/* Hi6421 OCP (over current protection) and DEB (debounce) control register */
+#define	HI6421_OCP_DEB_CTRL_REG		HI6421_REG_TO_BUS_ADDR(0x51)
+#define	HI6421_OCP_DEB_SEL_MASK		0x0C
+#define HI6421_OCP_DEB_SEL_8MS		0x00
+#define HI6421_OCP_DEB_SEL_16MS		0x04
+#define HI6421_OCP_DEB_SEL_32MS		0x08
+#define HI6421_OCP_DEB_SEL_64MS		0x0C
+#define HI6421_OCP_EN_DEBOUNCE_MASK	0x02
+#define HI6421_OCP_EN_DEBOUNCE_ENABLE	0x02
+#define HI6421_OCP_AUTO_STOP_MASK	0x01
+#define HI6421_OCP_AUTO_STOP_ENABLE	0x01
+
+struct hi6421_pmic {
+	struct regmap		*regmap;
+};
+
+#endif		/* __HI6421_PMIC_H */
-- 
cgit v1.2.3


From 0b496b4c95c74ba795bc642a6092263ebf905759 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 5 Sep 2014 22:16:18 +0100
Subject: mfd: tps65217: Tell regmap what registers are valid

Allow regmap to provide debugfs access to the register map by telling it
what registers are valid.

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/tps65217.c       | 2 ++
 include/linux/mfd/tps65217.h | 2 ++
 2 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/tps65217.c b/drivers/mfd/tps65217.c
index 3cc4c7084b92..a8ee52c95f2f 100644
--- a/drivers/mfd/tps65217.c
+++ b/drivers/mfd/tps65217.c
@@ -146,6 +146,8 @@ EXPORT_SYMBOL_GPL(tps65217_clear_bits);
 static struct regmap_config tps65217_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
+
+	.max_register = TPS65217_REG_MAX,
 };
 
 static const struct of_device_id tps65217_of_match[] = {
diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h
index 95d6938737fd..ac7fba44d7e4 100644
--- a/include/linux/mfd/tps65217.h
+++ b/include/linux/mfd/tps65217.h
@@ -60,6 +60,8 @@
 #define TPS65217_REG_SEQ5		0X1D
 #define TPS65217_REG_SEQ6		0X1E
 
+#define TPS65217_REG_MAX		TPS65217_REG_SEQ6
+
 /* Register field definitions */
 #define TPS65217_CHIPID_CHIP_MASK	0xF0
 #define TPS65217_CHIPID_REV_MASK	0x0F
-- 
cgit v1.2.3


From f0933a60d1902c918249d11fb6d9a5ffd581ef5b Mon Sep 17 00:00:00 2001
From: Jeff Lance <j-lance1@ti.com>
Date: Thu, 4 Sep 2014 19:01:57 +0200
Subject: mfd: ti_am335x_tscadc: Update logic in CTRL register for 5-wire TS

The logic in AFE_Pen_Ctrl bitmask in the CTRL register is different for five
wire versus four or eight wire touschscreens. This patch should fix this for
five-wire touch screens. There should be no change needed here for four and
eight wire tousch screens.

Signed-off-by: Jeff Lance <j-lance1@ti.com>
[bigeasy: keep the change mfd only]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ti_am335x_tscadc.c       | 30 +++++++++++++++++-------------
 include/linux/mfd/ti_am335x_tscadc.h |  1 +
 2 files changed, 18 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ti_am335x_tscadc.c b/drivers/mfd/ti_am335x_tscadc.c
index 121add8be456..d877e777cce6 100644
--- a/drivers/mfd/ti_am335x_tscadc.c
+++ b/drivers/mfd/ti_am335x_tscadc.c
@@ -242,18 +242,20 @@ static	int ti_tscadc_probe(struct platform_device *pdev)
 	tscadc_writel(tscadc, REG_CLKDIV, tscadc->clk_div);
 
 	/* Set the control register bits */
-	ctrl = CNTRLREG_STEPCONFIGWRT |
-			CNTRLREG_STEPID;
-	if (tsc_wires > 0)
-		ctrl |= CNTRLREG_4WIRE | CNTRLREG_TSCENB;
+	ctrl = CNTRLREG_STEPCONFIGWRT |	CNTRLREG_STEPID;
 	tscadc_writel(tscadc, REG_CTRL, ctrl);
 
 	/* Set register bits for Idle Config Mode */
-	if (tsc_wires > 0)
+	if (tsc_wires > 0) {
+		tscadc->tsc_wires = tsc_wires;
+		if (tsc_wires == 5)
+			ctrl |= CNTRLREG_5WIRE | CNTRLREG_TSCENB;
+		else
+			ctrl |= CNTRLREG_4WIRE | CNTRLREG_TSCENB;
 		tscadc_idle_config(tscadc);
+	}
 
 	/* Enable the TSC module enable bit */
-	ctrl = tscadc_readl(tscadc, REG_CTRL);
 	ctrl |= CNTRLREG_TSCSSENB;
 	tscadc_writel(tscadc, REG_CTRL, ctrl);
 
@@ -325,21 +327,23 @@ static int tscadc_suspend(struct device *dev)
 static int tscadc_resume(struct device *dev)
 {
 	struct ti_tscadc_dev	*tscadc_dev = dev_get_drvdata(dev);
-	unsigned int restore, ctrl;
+	u32 ctrl;
 
 	pm_runtime_get_sync(dev);
 
 	/* context restore */
 	ctrl = CNTRLREG_STEPCONFIGWRT |	CNTRLREG_STEPID;
-	if (tscadc_dev->tsc_cell != -1)
-		ctrl |= CNTRLREG_TSCENB | CNTRLREG_4WIRE;
 	tscadc_writel(tscadc_dev, REG_CTRL, ctrl);
 
-	if (tscadc_dev->tsc_cell != -1)
+	if (tscadc_dev->tsc_cell != -1) {
+		if (tscadc_dev->tsc_wires == 5)
+			ctrl |= CNTRLREG_5WIRE | CNTRLREG_TSCENB;
+		else
+			ctrl |= CNTRLREG_4WIRE | CNTRLREG_TSCENB;
 		tscadc_idle_config(tscadc_dev);
-	restore = tscadc_readl(tscadc_dev, REG_CTRL);
-	tscadc_writel(tscadc_dev, REG_CTRL,
-			(restore | CNTRLREG_TSCSSENB));
+	}
+	ctrl |= CNTRLREG_TSCSSENB;
+	tscadc_writel(tscadc_dev, REG_CTRL, ctrl);
 
 	tscadc_writel(tscadc_dev, REG_CLKDIV, tscadc_dev->clk_div);
 
diff --git a/include/linux/mfd/ti_am335x_tscadc.h b/include/linux/mfd/ti_am335x_tscadc.h
index fb96c84dada5..e2e70053470e 100644
--- a/include/linux/mfd/ti_am335x_tscadc.h
+++ b/include/linux/mfd/ti_am335x_tscadc.h
@@ -155,6 +155,7 @@ struct ti_tscadc_dev {
 	void __iomem *tscadc_base;
 	int irq;
 	int used_cells;	/* 1-2 */
+	int tsc_wires;
 	int tsc_cell;	/* -1 if not used */
 	int adc_cell;	/* -1 if not used */
 	struct mfd_cell cells[TSCADC_CELLS];
-- 
cgit v1.2.3


From 6ab3430129e258ea31dd214adf1c760dfafde67a Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 16 Sep 2014 14:52:36 +0300
Subject: mfd: Add ACPI support

If an MFD device is backed by ACPI namespace, we should allow subdevice
drivers to access their corresponding ACPI companion devices through normal
means (e.g using ACPI_COMPANION()).

This patch adds such support to the MFD core. If the MFD parent device
does not specify any ACPI _HID/_CID for the child device, the child
device will share the parent ACPI companion device. Otherwise the child
device will be assigned with the corresponding ACPI companion, if found
in the namespace below the parent.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 Documentation/acpi/enumeration.txt | 27 +++++++++++++++++++++++++
 drivers/mfd/mfd-core.c             | 40 ++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/core.h           |  3 +++
 3 files changed, 70 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/acpi/enumeration.txt b/Documentation/acpi/enumeration.txt
index e182be5e3c83..b60d2ab69497 100644
--- a/Documentation/acpi/enumeration.txt
+++ b/Documentation/acpi/enumeration.txt
@@ -312,3 +312,30 @@ a code like this:
 
 There are also devm_* versions of these functions which release the
 descriptors once the device is released.
+
+MFD devices
+~~~~~~~~~~~
+The MFD devices register their children as platform devices. For the child
+devices there needs to be an ACPI handle that they can use to reference
+parts of the ACPI namespace that relate to them. In the Linux MFD subsystem
+we provide two ways:
+
+	o The children share the parent ACPI handle.
+	o The MFD cell can specify the ACPI id of the device.
+
+For the first case, the MFD drivers do not need to do anything. The
+resulting child platform device will have its ACPI_COMPANION() set to point
+to the parent device.
+
+If the ACPI namespace has a device that we can match using an ACPI id,
+the id should be set like:
+
+	static struct mfd_cell my_subdevice_cell = {
+		.name = "my_subdevice",
+		/* set the resources relative to the parent */
+		.acpi_pnpid = "XYZ0001",
+	};
+
+The ACPI id "XYZ0001" is then used to lookup an ACPI device directly under
+the MFD device and if found, that ACPI companion device is bound to the
+resulting child platform device.
diff --git a/drivers/mfd/mfd-core.c b/drivers/mfd/mfd-core.c
index 5d0fbe1e097a..f3338fe9d069 100644
--- a/drivers/mfd/mfd-core.c
+++ b/drivers/mfd/mfd-core.c
@@ -78,6 +78,44 @@ static int mfd_platform_add_cell(struct platform_device *pdev,
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_ACPI)
+static void mfd_acpi_add_device(const struct mfd_cell *cell,
+				struct platform_device *pdev)
+{
+	struct acpi_device *parent_adev;
+	struct acpi_device *adev;
+
+	parent_adev = ACPI_COMPANION(pdev->dev.parent);
+	if (!parent_adev)
+		return;
+
+	/*
+	 * MFD child device gets its ACPI handle either from the ACPI
+	 * device directly under the parent that matches the acpi_pnpid or
+	 * it will use the parent handle if is no acpi_pnpid is given.
+	 */
+	adev = parent_adev;
+	if (cell->acpi_pnpid) {
+		struct acpi_device_id ids[2] = {};
+		struct acpi_device *child_adev;
+
+		strlcpy(ids[0].id, cell->acpi_pnpid, sizeof(ids[0].id));
+		list_for_each_entry(child_adev, &parent_adev->children, node)
+			if (acpi_match_device_ids(child_adev, ids)) {
+				adev = child_adev;
+				break;
+			}
+	}
+
+	ACPI_COMPANION_SET(&pdev->dev, adev);
+}
+#else
+static inline void mfd_acpi_add_device(const struct mfd_cell *cell,
+				       struct platform_device *pdev)
+{
+}
+#endif
+
 static int mfd_add_device(struct device *parent, int id,
 			  const struct mfd_cell *cell, atomic_t *usage_count,
 			  struct resource *mem_base,
@@ -119,6 +157,8 @@ static int mfd_add_device(struct device *parent, int id,
 		}
 	}
 
+	mfd_acpi_add_device(cell, pdev);
+
 	if (cell->pdata_size) {
 		ret = platform_device_add_data(pdev,
 					cell->platform_data, cell->pdata_size);
diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index f543de91ce19..73e1709d4c09 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -44,6 +44,9 @@ struct mfd_cell {
 	 */
 	const char		*of_compatible;
 
+	/* Matches ACPI PNP id, either _HID or _CID */
+	const char		*acpi_pnpid;
+
 	/*
 	 * These resources can be specified relative to the parent device.
 	 * For accessing hardware you should use resources from the platform dev
-- 
cgit v1.2.3


From 234abab143aef82c0ef1f2de409c0db96b666f3c Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 10 Sep 2014 21:29:56 +0200
Subject: tty: serial: 8250_core: allow to set ->throttle / ->unthrottle
 callbacks

The OMAP UART provides support for HW assisted flow control. What is
missing is the support to throttle / unthrottle callbacks which are used
by the omap-serial driver at the moment.
This patch adds the callbacks. It should be safe to add them since they
are only invoked from the serial_core (uart_throttle()) if the feature
flags are set.

Reviewed-by: Tony Lindgren <tony@atomide.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 14 ++++++++++++++
 include/linux/serial_core.h         |  2 ++
 2 files changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index a0c1d64f34c5..68c44d97091b 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -1319,6 +1319,16 @@ static void serial8250_start_tx(struct uart_port *port)
 	}
 }
 
+static void serial8250_throttle(struct uart_port *port)
+{
+	port->throttle(port);
+}
+
+static void serial8250_unthrottle(struct uart_port *port)
+{
+	port->unthrottle(port);
+}
+
 static void serial8250_stop_rx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
@@ -2912,6 +2922,8 @@ static struct uart_ops serial8250_pops = {
 	.get_mctrl	= serial8250_get_mctrl,
 	.stop_tx	= serial8250_stop_tx,
 	.start_tx	= serial8250_start_tx,
+	.throttle	= serial8250_throttle,
+	.unthrottle	= serial8250_unthrottle,
 	.stop_rx	= serial8250_stop_rx,
 	.enable_ms	= serial8250_enable_ms,
 	.break_ctl	= serial8250_break_ctl,
@@ -3462,6 +3474,8 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		uart->capabilities	= up->capabilities;
 		uart->rs485_config	= up->rs485_config;
 		uart->rs485		= up->rs485;
+		uart->port.throttle	= up->port.throttle;
+		uart->port.unthrottle	= up->port.unthrottle;
 
 		/* Take tx_loadsz from fifosize if it wasn't set separately */
 		if (uart->port.fifosize && !uart->tx_loadsz)
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 204c452a7567..21c2e05c1bc3 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -125,6 +125,8 @@ struct uart_port {
 				               struct ktermios *old);
 	int			(*startup)(struct uart_port *port);
 	void			(*shutdown)(struct uart_port *port);
+	void			(*throttle)(struct uart_port *port);
+	void			(*unthrottle)(struct uart_port *port);
 	int			(*handle_irq)(struct uart_port *);
 	void			(*pm)(struct uart_port *, unsigned int state,
 				      unsigned int old);
-- 
cgit v1.2.3


From d74d5d1b7288ff9d4439c8c7e0e314cde9743467 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Wed, 10 Sep 2014 21:29:57 +0200
Subject: tty: serial: 8250_core: add run time pm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While comparing the OMAP-serial and the 8250 part of this I noticed that
the latter does not use run time-pm. Here are the pieces. It is
basically a get before first register access and a last_busy + put after
last access. This has to be enabled from userland _and_ UART_CAP_RPM is
required for this.
The runtime PM can usually work transparently in the background however
there is one exception to this: After serial8250_tx_chars() completes
there still may be unsent bytes in the FIFO (depending on CPU speed vs
baud rate + flow control). Even if the TTY-buffer is empty we do not
want RPM to disable the device because it won't send the remaining
bytes. Instead we leave serial8250_tx_chars() with RPM enabled and wait
for the FIFO empty interrupt. Once we enter serial8250_tx_chars() with
an empty buffer we know that the FIFO is empty and since we are not going
to send anything, we can disable the device.
That xchg() is to ensure that serial8250_tx_chars() can be called
multiple times and only the first invocation will actually invoke the
runtime PM function. So that the last invocation of __stop_tx() will
disable runtime pm.

NOTE: do not enable RPM on the device unless you know what you do! If
the device goes idle, it won't be woken up by incomming RX data _unless_
there is a wakeup irq configured which is usually the RX pin configure
for wakeup via the reset module. The RX activity will then wake up the
device from idle. However the first character is garbage and lost. The
following bytes will be received once the device is up in time. On the
beagle board xm (omap3) it takes approx 13ms from the first wakeup byte
until the first byte that is received properly if the device was in
core-off.

v5…v8:
	- drop RPM from serial8250_set_mctrl() it will be used in
	  restore path which already has RPM active and holds
	  dev->power.lock
v4…v5:
	- add a wrapper around rpm function and introduce UART_CAP_RPM
	  to ensure RPM put is invoked after the TX FIFO is empty.
v3…v4:
	- added runtime to the console code
	- removed device_may_wakeup() from serial8250_set_sleep()

Cc: mika.westerberg@linux.intel.com
Reviewed-by: Tony Lindgren <tony@atomide.com>
Tested-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250.h      |   1 +
 drivers/tty/serial/8250/8250_core.c | 133 ++++++++++++++++++++++++++++++++----
 include/linux/serial_8250.h         |   1 +
 3 files changed, 122 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250.h b/drivers/tty/serial/8250/8250.h
index 85bfec58d77c..1bcb4b2141a6 100644
--- a/drivers/tty/serial/8250/8250.h
+++ b/drivers/tty/serial/8250/8250.h
@@ -72,6 +72,7 @@ struct serial8250_config {
 #define UART_CAP_UUE	(1 << 12)	/* UART needs IER bit 6 set (Xscale) */
 #define UART_CAP_RTOIE	(1 << 13)	/* UART needs IER bit 4 set (Xscale, Tegra) */
 #define UART_CAP_HFIFO	(1 << 14)	/* UART has a "hidden" FIFO */
+#define UART_CAP_RPM	(1 << 15)	/* Runtime PM is active while idle */
 
 #define UART_BUG_QUOT	(1 << 0)	/* UART has buggy quot LSB */
 #define UART_BUG_TXEN	(1 << 1)	/* UART has buggy TX IIR status */
diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 68c44d97091b..3cf5c98013e4 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -38,6 +38,7 @@
 #include <linux/mutex.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/pm_runtime.h>
 #ifdef CONFIG_SPARC
 #include <linux/sunserialcore.h>
 #endif
@@ -540,6 +541,53 @@ void serial8250_clear_and_reinit_fifos(struct uart_8250_port *p)
 }
 EXPORT_SYMBOL_GPL(serial8250_clear_and_reinit_fifos);
 
+static void serial8250_rpm_get(struct uart_8250_port *p)
+{
+	if (!(p->capabilities & UART_CAP_RPM))
+		return;
+	pm_runtime_get_sync(p->port.dev);
+}
+
+static void serial8250_rpm_put(struct uart_8250_port *p)
+{
+	if (!(p->capabilities & UART_CAP_RPM))
+		return;
+	pm_runtime_mark_last_busy(p->port.dev);
+	pm_runtime_put_autosuspend(p->port.dev);
+}
+
+/*
+ * This two wrapper ensure, that enable_runtime_pm_tx() can be called more than
+ * once and disable_runtime_pm_tx() will still disable RPM because the fifo is
+ * empty and the HW can idle again.
+ */
+static void serial8250_rpm_get_tx(struct uart_8250_port *p)
+{
+	unsigned char rpm_active;
+
+	if (!(p->capabilities & UART_CAP_RPM))
+		return;
+
+	rpm_active = xchg(&p->rpm_tx_active, 1);
+	if (rpm_active)
+		return;
+	pm_runtime_get_sync(p->port.dev);
+}
+
+static void serial8250_rpm_put_tx(struct uart_8250_port *p)
+{
+	unsigned char rpm_active;
+
+	if (!(p->capabilities & UART_CAP_RPM))
+		return;
+
+	rpm_active = xchg(&p->rpm_tx_active, 0);
+	if (!rpm_active)
+		return;
+	pm_runtime_mark_last_busy(p->port.dev);
+	pm_runtime_put_autosuspend(p->port.dev);
+}
+
 /*
  * IER sleep support.  UARTs which have EFRs need the "extended
  * capability" bit enabled.  Note that on XR16C850s, we need to
@@ -554,10 +602,11 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 	 * offset but the UART channel may only write to the corresponding
 	 * bit.
 	 */
+	serial8250_rpm_get(p);
 	if ((p->port.type == PORT_XR17V35X) ||
 	   (p->port.type == PORT_XR17D15X)) {
 		serial_out(p, UART_EXAR_SLEEP, sleep ? 0xff : 0);
-		return;
+		goto out;
 	}
 
 	if (p->capabilities & UART_CAP_SLEEP) {
@@ -573,6 +622,8 @@ static void serial8250_set_sleep(struct uart_8250_port *p, int sleep)
 			serial_out(p, UART_LCR, 0);
 		}
 	}
+out:
+	serial8250_rpm_put(p);
 }
 
 #ifdef CONFIG_SERIAL_8250_RSA
@@ -1273,6 +1324,7 @@ static inline void __stop_tx(struct uart_8250_port *p)
 	if (p->ier & UART_IER_THRI) {
 		p->ier &= ~UART_IER_THRI;
 		serial_out(p, UART_IER, p->ier);
+		serial8250_rpm_put_tx(p);
 	}
 }
 
@@ -1280,6 +1332,7 @@ static void serial8250_stop_tx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
+	serial8250_rpm_get(up);
 	__stop_tx(up);
 
 	/*
@@ -1289,12 +1342,14 @@ static void serial8250_stop_tx(struct uart_port *port)
 		up->acr |= UART_ACR_TXDIS;
 		serial_icr_write(up, UART_ACR, up->acr);
 	}
+	serial8250_rpm_put(up);
 }
 
 static void serial8250_start_tx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
+	serial8250_rpm_get_tx(up);
 	if (up->dma && !serial8250_tx_dma(up)) {
 		return;
 	} else if (!(up->ier & UART_IER_THRI)) {
@@ -1333,9 +1388,13 @@ static void serial8250_stop_rx(struct uart_port *port)
 {
 	struct uart_8250_port *up = up_to_u8250p(port);
 
+	serial8250_rpm_get(up);
+
 	up->ier &= ~UART_IER_RLSI;
 	up->port.read_status_mask &= ~UART_LSR_DR;
 	serial_port_out(port, UART_IER, up->ier);
+
+	serial8250_rpm_put(up);
 }
 
 static void serial8250_enable_ms(struct uart_port *port)
@@ -1347,7 +1406,10 @@ static void serial8250_enable_ms(struct uart_port *port)
 		return;
 
 	up->ier |= UART_IER_MSI;
+
+	serial8250_rpm_get(up);
 	serial_port_out(port, UART_IER, up->ier);
+	serial8250_rpm_put(up);
 }
 
 /*
@@ -1469,7 +1531,12 @@ void serial8250_tx_chars(struct uart_8250_port *up)
 
 	DEBUG_INTR("THRE...");
 
-	if (uart_circ_empty(xmit))
+	/*
+	 * With RPM enabled, we have to wait once the FIFO is empty before the
+	 * HW can go idle. So we get here once again with empty FIFO and disable
+	 * the interrupt and RPM in __stop_tx()
+	 */
+	if (uart_circ_empty(xmit) && !(up->capabilities & UART_CAP_RPM))
 		__stop_tx(up);
 }
 EXPORT_SYMBOL_GPL(serial8250_tx_chars);
@@ -1537,9 +1604,17 @@ EXPORT_SYMBOL_GPL(serial8250_handle_irq);
 
 static int serial8250_default_handle_irq(struct uart_port *port)
 {
-	unsigned int iir = serial_port_in(port, UART_IIR);
+	struct uart_8250_port *up = up_to_u8250p(port);
+	unsigned int iir;
+	int ret;
+
+	serial8250_rpm_get(up);
 
-	return serial8250_handle_irq(port, iir);
+	iir = serial_port_in(port, UART_IIR);
+	ret = serial8250_handle_irq(port, iir);
+
+	serial8250_rpm_put(up);
+	return ret;
 }
 
 /*
@@ -1796,11 +1871,15 @@ static unsigned int serial8250_tx_empty(struct uart_port *port)
 	unsigned long flags;
 	unsigned int lsr;
 
+	serial8250_rpm_get(up);
+
 	spin_lock_irqsave(&port->lock, flags);
 	lsr = serial_port_in(port, UART_LSR);
 	up->lsr_saved_flags |= lsr & LSR_SAVE_FLAGS;
 	spin_unlock_irqrestore(&port->lock, flags);
 
+	serial8250_rpm_put(up);
+
 	return (lsr & BOTH_EMPTY) == BOTH_EMPTY ? TIOCSER_TEMT : 0;
 }
 
@@ -1810,7 +1889,9 @@ static unsigned int serial8250_get_mctrl(struct uart_port *port)
 	unsigned int status;
 	unsigned int ret;
 
+	serial8250_rpm_get(up);
 	status = serial8250_modem_status(up);
+	serial8250_rpm_put(up);
 
 	ret = 0;
 	if (status & UART_MSR_DCD)
@@ -1850,6 +1931,7 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
 
+	serial8250_rpm_get(up);
 	spin_lock_irqsave(&port->lock, flags);
 	if (break_state == -1)
 		up->lcr |= UART_LCR_SBC;
@@ -1857,6 +1939,7 @@ static void serial8250_break_ctl(struct uart_port *port, int break_state)
 		up->lcr &= ~UART_LCR_SBC;
 	serial_port_out(port, UART_LCR, up->lcr);
 	spin_unlock_irqrestore(&port->lock, flags);
+	serial8250_rpm_put(up);
 }
 
 /*
@@ -1901,12 +1984,23 @@ static void wait_for_xmitr(struct uart_8250_port *up, int bits)
 
 static int serial8250_get_poll_char(struct uart_port *port)
 {
-	unsigned char lsr = serial_port_in(port, UART_LSR);
+	struct uart_8250_port *up = up_to_u8250p(port);
+	unsigned char lsr;
+	int status;
+
+	serial8250_rpm_get(up);
 
-	if (!(lsr & UART_LSR_DR))
-		return NO_POLL_CHAR;
+	lsr = serial_port_in(port, UART_LSR);
 
-	return serial_port_in(port, UART_RX);
+	if (!(lsr & UART_LSR_DR)) {
+		status = NO_POLL_CHAR;
+		goto out;
+	}
+
+	status = serial_port_in(port, UART_RX);
+out:
+	serial8250_rpm_put(up);
+	return status;
 }
 
 
@@ -1916,6 +2010,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
 	unsigned int ier;
 	struct uart_8250_port *up = up_to_u8250p(port);
 
+	serial8250_rpm_get(up);
 	/*
 	 *	First save the IER then disable the interrupts
 	 */
@@ -1937,6 +2032,7 @@ static void serial8250_put_poll_char(struct uart_port *port,
 	 */
 	wait_for_xmitr(up, BOTH_EMPTY);
 	serial_port_out(port, UART_IER, ier);
+	serial8250_rpm_put(up);
 }
 
 #endif /* CONFIG_CONSOLE_POLL */
@@ -1962,6 +2058,7 @@ int serial8250_do_startup(struct uart_port *port)
 	if (port->iotype != up->cur_iotype)
 		set_io_from_upio(port);
 
+	serial8250_rpm_get(up);
 	if (port->type == PORT_16C950) {
 		/* Wake up and initialize UART */
 		up->acr = 0;
@@ -1982,7 +2079,6 @@ int serial8250_do_startup(struct uart_port *port)
 	 */
 	enable_rsa(up);
 #endif
-
 	/*
 	 * Clear the FIFO buffers and disable them.
 	 * (they will be reenabled in set_termios())
@@ -2006,7 +2102,8 @@ int serial8250_do_startup(struct uart_port *port)
 	    (serial_port_in(port, UART_LSR) == 0xff)) {
 		printk_ratelimited(KERN_INFO "ttyS%d: LSR safety check engaged!\n",
 				   serial_index(port));
-		return -ENODEV;
+		retval = -ENODEV;
+		goto out;
 	}
 
 	/*
@@ -2091,7 +2188,7 @@ int serial8250_do_startup(struct uart_port *port)
 	} else {
 		retval = serial_link_irq_chain(up);
 		if (retval)
-			return retval;
+			goto out;
 	}
 
 	/*
@@ -2189,8 +2286,10 @@ dont_test_tx_en:
 		outb_p(0x80, icp);
 		inb_p(icp);
 	}
-
-	return 0;
+	retval = 0;
+out:
+	serial8250_rpm_put(up);
+	return retval;
 }
 EXPORT_SYMBOL_GPL(serial8250_do_startup);
 
@@ -2206,6 +2305,7 @@ void serial8250_do_shutdown(struct uart_port *port)
 	struct uart_8250_port *up = up_to_u8250p(port);
 	unsigned long flags;
 
+	serial8250_rpm_get(up);
 	/*
 	 * Disable interrupts from this port
 	 */
@@ -2245,6 +2345,7 @@ void serial8250_do_shutdown(struct uart_port *port)
 	 * the IRQ chain.
 	 */
 	serial_port_in(port, UART_RX);
+	serial8250_rpm_put(up);
 
 	del_timer_sync(&up->timer);
 	up->timer.function = serial8250_timeout;
@@ -2360,6 +2461,7 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	 * Ok, we're now changing the port state.  Do it with
 	 * interrupts disabled.
 	 */
+	serial8250_rpm_get(up);
 	spin_lock_irqsave(&port->lock, flags);
 
 	/*
@@ -2481,6 +2583,8 @@ serial8250_do_set_termios(struct uart_port *port, struct ktermios *termios,
 	}
 	serial8250_set_mctrl(port, port->mctrl);
 	spin_unlock_irqrestore(&port->lock, flags);
+	serial8250_rpm_put(up);
+
 	/* Don't rewrite B0 */
 	if (tty_termios_baud_rate(termios))
 		tty_termios_encode_baud_rate(termios, baud, baud);
@@ -3091,6 +3195,8 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
 
 	touch_nmi_watchdog();
 
+	serial8250_rpm_get(up);
+
 	if (port->sysrq || oops_in_progress)
 		locked = spin_trylock_irqsave(&port->lock, flags);
 	else
@@ -3127,6 +3233,7 @@ serial8250_console_write(struct console *co, const char *s, unsigned int count)
 
 	if (locked)
 		spin_unlock_irqrestore(&port->lock, flags);
+	serial8250_rpm_put(up);
 }
 
 static int __init serial8250_console_setup(struct console *co, char *options)
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 6fc9d7bee05e..c267412a3ef4 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -84,6 +84,7 @@ struct uart_8250_port {
 	unsigned char		mcr_mask;	/* mask of user bits */
 	unsigned char		mcr_force;	/* mask of forced bits */
 	unsigned char		cur_iotype;	/* Running I/O type */
+	unsigned char		rpm_tx_active;
 
 	/*
 	 * Some bits in registers are cleared on a read, so they must
-- 
cgit v1.2.3


From 7276ca3fa23864133f5ee7431c51546d9b7f695f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Mon, 22 Sep 2014 13:28:16 +0200
Subject: netfilter: bridge: nf_bridge_copy_header as static inline in header

Move nf_bridge_copy_header() as static inline in netfilter_bridge.h
header file. This patch prepares the modularization of the br_netfilter
code.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/linux/netfilter_bridge.h | 48 +++++++++++++++++++++++++++++++---------
 net/bridge/br_netfilter.c        | 28 -----------------------
 2 files changed, 38 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index 8ab1c278b66d..fe996d59de64 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -24,16 +24,6 @@ enum nf_br_hook_priorities {
 #define BRNF_8021Q			0x10
 #define BRNF_PPPoE			0x20
 
-/* Only used in br_forward.c */
-int nf_bridge_copy_header(struct sk_buff *skb);
-static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
-{
-	if (skb->nf_bridge &&
-	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
-		return nf_bridge_copy_header(skb);
-  	return 0;
-}
-
 static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 {
 	switch (skb->protocol) {
@@ -46,6 +36,44 @@ static inline unsigned int nf_bridge_encap_header_len(const struct sk_buff *skb)
 	}
 }
 
+static inline void nf_bridge_update_protocol(struct sk_buff *skb)
+{
+	if (skb->nf_bridge->mask & BRNF_8021Q)
+		skb->protocol = htons(ETH_P_8021Q);
+	else if (skb->nf_bridge->mask & BRNF_PPPoE)
+		skb->protocol = htons(ETH_P_PPP_SES);
+}
+
+/* Fill in the header for fragmented IP packets handled by
+ * the IPv4 connection tracking code.
+ *
+ * Only used in br_forward.c
+ */
+static inline int nf_bridge_copy_header(struct sk_buff *skb)
+{
+	int err;
+	unsigned int header_size;
+
+	nf_bridge_update_protocol(skb);
+	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
+	err = skb_cow_head(skb, header_size);
+	if (err)
+		return err;
+
+	skb_copy_to_linear_data_offset(skb, -header_size,
+				       skb->nf_bridge->data, header_size);
+	__skb_push(skb, nf_bridge_encap_header_len(skb));
+	return 0;
+}
+
+static inline int nf_bridge_maybe_copy_header(struct sk_buff *skb)
+{
+	if (skb->nf_bridge &&
+	    skb->nf_bridge->mask & (BRNF_BRIDGED | BRNF_BRIDGED_DNAT))
+		return nf_bridge_copy_header(skb);
+  	return 0;
+}
+
 static inline unsigned int nf_bridge_mtu_reduction(const struct sk_buff *skb)
 {
 	if (unlikely(skb->nf_bridge->mask & BRNF_PPPoE))
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index a615264cf01a..61929a7cd815 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -245,14 +245,6 @@ static inline void nf_bridge_save_header(struct sk_buff *skb)
 					 skb->nf_bridge->data, header_size);
 }
 
-static inline void nf_bridge_update_protocol(struct sk_buff *skb)
-{
-	if (skb->nf_bridge->mask & BRNF_8021Q)
-		skb->protocol = htons(ETH_P_8021Q);
-	else if (skb->nf_bridge->mask & BRNF_PPPoE)
-		skb->protocol = htons(ETH_P_PPP_SES);
-}
-
 /* When handing a packet over to the IP layer
  * check whether we have a skb that is in the
  * expected format
@@ -320,26 +312,6 @@ drop:
 	return -1;
 }
 
-/* Fill in the header for fragmented IP packets handled by
- * the IPv4 connection tracking code.
- */
-int nf_bridge_copy_header(struct sk_buff *skb)
-{
-	int err;
-	unsigned int header_size;
-
-	nf_bridge_update_protocol(skb);
-	header_size = ETH_HLEN + nf_bridge_encap_header_len(skb);
-	err = skb_cow_head(skb, header_size);
-	if (err)
-		return err;
-
-	skb_copy_to_linear_data_offset(skb, -header_size,
-				       skb->nf_bridge->data, header_size);
-	__skb_push(skb, nf_bridge_encap_header_len(skb));
-	return 0;
-}
-
 /* PF_BRIDGE/PRE_ROUTING *********************************************/
 /* Undo the changes made for ip6tables PREROUTING and continue the
  * bridge PRE_ROUTING hook. */
-- 
cgit v1.2.3


From 34666d467cbf1e2e3c7bb15a63eccfb582cdd71f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Thu, 18 Sep 2014 11:29:03 +0200
Subject: netfilter: bridge: move br_netfilter out of the core

Jesper reported that br_netfilter always registers the hooks since
this is part of the bridge core. This harms performance for people that
don't need this.

This patch modularizes br_netfilter so it can be rmmod'ed, thus,
the hooks can be unregistered. I think the bridge netfilter should have
been a separated module since the beginning, Patrick agreed on that.

Note that this is breaking compatibility for users that expect that
bridge netfilter is going to be available after explicitly 'modprobe
bridge' or via automatic load through brctl.

However, the damage can be easily undone by modprobing br_netfilter.
The bridge core also spots a message to provide a clue to people that
didn't notice that this has been deprecated.

On top of that, the plan is that nftables will not rely on this software
layer, but integrate the connection tracking into the bridge layer to
enable stateful filtering and NAT, which is was bridge netfilter users
seem to require.

This patch still keeps the fake_dst_ops in the bridge core, since this
is required by when the bridge port is initialized. So we can safely
modprobe/rmmod br_netfilter anytime.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Acked-by: Florian Westphal <fw@strlen.de>
---
 include/linux/netfilter_bridge.h       |  2 +-
 include/linux/skbuff.h                 | 12 ++---
 include/net/neighbour.h                |  2 +-
 include/net/netfilter/ipv4/nf_reject.h |  2 +-
 include/net/netfilter/ipv6/nf_reject.h |  2 +-
 net/Kconfig                            |  7 +--
 net/bridge/Makefile                    |  5 +-
 net/bridge/br.c                        | 14 ++---
 net/bridge/br_device.c                 |  4 +-
 net/bridge/br_forward.c                |  2 +
 net/bridge/br_input.c                  |  1 +
 net/bridge/br_netfilter.c              | 88 ++++++-------------------------
 net/bridge/br_netlink.c                |  2 +-
 net/bridge/br_nf_core.c                | 96 ++++++++++++++++++++++++++++++++++
 net/bridge/br_private.h                | 12 ++---
 net/bridge/br_sysfs_br.c               |  4 +-
 16 files changed, 151 insertions(+), 104 deletions(-)
 create mode 100644 net/bridge/br_nf_core.c

(limited to 'include/linux')

diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h
index fe996d59de64..c755e4971fa3 100644
--- a/include/linux/netfilter_bridge.h
+++ b/include/linux/netfilter_bridge.h
@@ -15,7 +15,7 @@ enum nf_br_hook_priorities {
 	NF_BR_PRI_LAST = INT_MAX,
 };
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 
 #define BRNF_PKT_TYPE			0x01
 #define BRNF_BRIDGED_DNAT		0x02
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 07c9fdd0c126..c4ff43f84573 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -156,7 +156,7 @@ struct nf_conntrack {
 };
 #endif
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 struct nf_bridge_info {
 	atomic_t		use;
 	unsigned int		mask;
@@ -560,7 +560,7 @@ struct sk_buff {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	struct nf_conntrack	*nfct;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct nf_bridge_info	*nf_bridge;
 #endif
 
@@ -2977,7 +2977,7 @@ static inline void nf_conntrack_get(struct nf_conntrack *nfct)
 		atomic_inc(&nfct->use);
 }
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static inline void nf_bridge_put(struct nf_bridge_info *nf_bridge)
 {
 	if (nf_bridge && atomic_dec_and_test(&nf_bridge->use))
@@ -2995,7 +2995,7 @@ static inline void nf_reset(struct sk_buff *skb)
 	nf_conntrack_put(skb->nfct);
 	skb->nfct = NULL;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(skb->nf_bridge);
 	skb->nf_bridge = NULL;
 #endif
@@ -3016,7 +3016,7 @@ static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 	nf_conntrack_get(src->nfct);
 	dst->nfctinfo = src->nfctinfo;
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
 #endif
@@ -3030,7 +3030,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	nf_conntrack_put(dst->nfct);
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	nf_bridge_put(dst->nf_bridge);
 #endif
 	__nf_copy(dst, src);
diff --git a/include/net/neighbour.h b/include/net/neighbour.h
index 47f425464f84..f60558d0254c 100644
--- a/include/net/neighbour.h
+++ b/include/net/neighbour.h
@@ -373,7 +373,7 @@ static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb)
 	return 0;
 }
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static inline int neigh_hh_bridge(struct hh_cache *hh, struct sk_buff *skb)
 {
 	unsigned int seq, hh_alen;
diff --git a/include/net/netfilter/ipv4/nf_reject.h b/include/net/netfilter/ipv4/nf_reject.h
index 931fbf812171..f713b5a31d62 100644
--- a/include/net/netfilter/ipv4/nf_reject.h
+++ b/include/net/netfilter/ipv4/nf_reject.h
@@ -98,7 +98,7 @@ static void nf_send_reset(struct sk_buff *oldskb, int hook)
 
 	nf_ct_attach(nskb, oldskb);
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* If we use ip_local_out for bridged traffic, the MAC source on
 	 * the RST will be ours, instead of the destination's.  This confuses
 	 * some routers/firewalls, and they drop the packet.  So we need to
diff --git a/include/net/netfilter/ipv6/nf_reject.h b/include/net/netfilter/ipv6/nf_reject.h
index 710d17ed70b4..7a10cfcd8e33 100644
--- a/include/net/netfilter/ipv6/nf_reject.h
+++ b/include/net/netfilter/ipv6/nf_reject.h
@@ -147,7 +147,7 @@ static void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook)
 
 	nf_ct_attach(nskb, oldskb);
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* If we use ip6_local_out for bridged traffic, the MAC source on
 	 * the RST will be ours, instead of the destination's.  This confuses
 	 * some routers/firewalls, and they drop the packet.  So we need to
diff --git a/net/Kconfig b/net/Kconfig
index 4051fdfa4367..dc5d700d05e7 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -176,10 +176,11 @@ config NETFILTER_ADVANCED
 	  If unsure, say Y.
 
 config BRIDGE_NETFILTER
-	bool "Bridged IP/ARP packets filtering"
-	depends on BRIDGE && NETFILTER && INET
+	tristate "Bridged IP/ARP packets filtering"
+	depends on (BRIDGE || BRIDGE=n)
+	depends on NETFILTER && INET
 	depends on NETFILTER_ADVANCED
-	default y
+	default m
 	---help---
 	  Enabling this option will let arptables resp. iptables see bridged
 	  ARP resp. IP traffic. If you want a bridging firewall, you probably
diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 8590b942bffa..5e3eac5dc8b9 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -6,11 +6,12 @@ obj-$(CONFIG_BRIDGE) += bridge.o
 
 bridge-y	:= br.o br_device.o br_fdb.o br_forward.o br_if.o br_input.o \
 			br_ioctl.o br_stp.o br_stp_bpdu.o \
-			br_stp_if.o br_stp_timer.o br_netlink.o
+			br_stp_if.o br_stp_timer.o br_netlink.o \
+			br_nf_core.o
 
 bridge-$(CONFIG_SYSFS) += br_sysfs_if.o br_sysfs_br.o
 
-bridge-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
+obj-$(CONFIG_BRIDGE_NETFILTER) += br_netfilter.o
 
 bridge-$(CONFIG_BRIDGE_IGMP_SNOOPING) += br_multicast.o br_mdb.o
 
diff --git a/net/bridge/br.c b/net/bridge/br.c
index 1a755a1e5410..44425aff7cba 100644
--- a/net/bridge/br.c
+++ b/net/bridge/br.c
@@ -161,7 +161,7 @@ static int __init br_init(void)
 	if (err)
 		goto err_out1;
 
-	err = br_netfilter_init();
+	err = br_nf_core_init();
 	if (err)
 		goto err_out2;
 
@@ -179,11 +179,16 @@ static int __init br_init(void)
 	br_fdb_test_addr_hook = br_fdb_test_addr;
 #endif
 
+	pr_info("bridge: automatic filtering via arp/ip/ip6tables has been "
+		"deprecated. Update your scripts to load br_netfilter if you "
+		"need this.\n");
+
 	return 0;
+
 err_out4:
 	unregister_netdevice_notifier(&br_device_notifier);
 err_out3:
-	br_netfilter_fini();
+	br_nf_core_fini();
 err_out2:
 	unregister_pernet_subsys(&br_net_ops);
 err_out1:
@@ -196,20 +201,17 @@ err_out:
 static void __exit br_deinit(void)
 {
 	stp_proto_unregister(&br_stp_proto);
-
 	br_netlink_fini();
 	unregister_netdevice_notifier(&br_device_notifier);
 	brioctl_set(NULL);
-
 	unregister_pernet_subsys(&br_net_ops);
 
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
 
-	br_netfilter_fini();
+	br_nf_core_fini();
 #if IS_ENABLED(CONFIG_ATM_LANE)
 	br_fdb_test_addr_hook = NULL;
 #endif
-
 	br_fdb_fini();
 }
 
diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 568cccd39a3d..659cac15c0df 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -36,7 +36,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 vid = 0;
 
 	rcu_read_lock();
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	if (skb->nf_bridge && (skb->nf_bridge->mask & BRNF_BRIDGED_DNAT)) {
 		br_nf_pre_routing_finish_bridge_slow(skb);
 		rcu_read_unlock();
@@ -167,7 +167,7 @@ static int br_change_mtu(struct net_device *dev, int new_mtu)
 
 	dev->mtu = new_mtu;
 
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	/* remember the MTU in the rtable for PMTU */
 	dst_metric_set(&br->fake_rtable.dst, RTAX_MTU, new_mtu);
 #endif
diff --git a/net/bridge/br_forward.c b/net/bridge/br_forward.c
index 056b67b0e277..992ec49a96aa 100644
--- a/net/bridge/br_forward.c
+++ b/net/bridge/br_forward.c
@@ -49,6 +49,7 @@ int br_dev_queue_push_xmit(struct sk_buff *skb)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(br_dev_queue_push_xmit);
 
 int br_forward_finish(struct sk_buff *skb)
 {
@@ -56,6 +57,7 @@ int br_forward_finish(struct sk_buff *skb)
 		       br_dev_queue_push_xmit);
 
 }
+EXPORT_SYMBOL_GPL(br_forward_finish);
 
 static void __br_deliver(const struct net_bridge_port *to, struct sk_buff *skb)
 {
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index 366c43649079..6fd5522df696 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -140,6 +140,7 @@ drop:
 	kfree_skb(skb);
 	goto out;
 }
+EXPORT_SYMBOL_GPL(br_handle_frame_finish);
 
 /* note: already called with rcu_read_lock */
 static int br_handle_local_finish(struct sk_buff *skb)
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
index 61929a7cd815..97e43937aaca 100644
--- a/net/bridge/br_netfilter.c
+++ b/net/bridge/br_netfilter.c
@@ -111,66 +111,6 @@ static inline __be16 pppoe_proto(const struct sk_buff *skb)
 	 pppoe_proto(skb) == htons(PPP_IPV6) && \
 	 brnf_filter_pppoe_tagged)
 
-static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
-			     struct sk_buff *skb, u32 mtu)
-{
-}
-
-static void fake_redirect(struct dst_entry *dst, struct sock *sk,
-			  struct sk_buff *skb)
-{
-}
-
-static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
-{
-	return NULL;
-}
-
-static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
-					   struct sk_buff *skb,
-					   const void *daddr)
-{
-	return NULL;
-}
-
-static unsigned int fake_mtu(const struct dst_entry *dst)
-{
-	return dst->dev->mtu;
-}
-
-static struct dst_ops fake_dst_ops = {
-	.family =		AF_INET,
-	.protocol =		cpu_to_be16(ETH_P_IP),
-	.update_pmtu =		fake_update_pmtu,
-	.redirect =		fake_redirect,
-	.cow_metrics =		fake_cow_metrics,
-	.neigh_lookup =		fake_neigh_lookup,
-	.mtu =			fake_mtu,
-};
-
-/*
- * Initialize bogus route table used to keep netfilter happy.
- * Currently, we fill in the PMTU entry because netfilter
- * refragmentation needs it, and the rt_flags entry because
- * ipt_REJECT needs it.  Future netfilter modules might
- * require us to fill additional fields.
- */
-static const u32 br_dst_default_metrics[RTAX_MAX] = {
-	[RTAX_MTU - 1] = 1500,
-};
-
-void br_netfilter_rtable_init(struct net_bridge *br)
-{
-	struct rtable *rt = &br->fake_rtable;
-
-	atomic_set(&rt->dst.__refcnt, 1);
-	rt->dst.dev = br->dev;
-	rt->dst.path = &rt->dst;
-	dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
-	rt->dst.flags	= DST_NOXFRM | DST_FAKE_RTABLE;
-	rt->dst.ops = &fake_dst_ops;
-}
-
 static inline struct rtable *bridge_parent_rtable(const struct net_device *dev)
 {
 	struct net_bridge_port *port;
@@ -1031,38 +971,42 @@ static struct ctl_table brnf_table[] = {
 };
 #endif
 
-int __init br_netfilter_init(void)
+static int __init br_netfilter_init(void)
 {
 	int ret;
 
-	ret = dst_entries_init(&fake_dst_ops);
+	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
 	if (ret < 0)
 		return ret;
 
-	ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-	if (ret < 0) {
-		dst_entries_destroy(&fake_dst_ops);
-		return ret;
-	}
 #ifdef CONFIG_SYSCTL
 	brnf_sysctl_header = register_net_sysctl(&init_net, "net/bridge", brnf_table);
 	if (brnf_sysctl_header == NULL) {
 		printk(KERN_WARNING
 		       "br_netfilter: can't register to sysctl.\n");
-		nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
-		dst_entries_destroy(&fake_dst_ops);
-		return -ENOMEM;
+		ret = -ENOMEM;
+		goto err1;
 	}
 #endif
 	printk(KERN_NOTICE "Bridge firewalling registered\n");
 	return 0;
+err1:
+	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
+	return ret;
 }
 
-void br_netfilter_fini(void)
+static void __exit br_netfilter_fini(void)
 {
 	nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops));
 #ifdef CONFIG_SYSCTL
 	unregister_net_sysctl_table(brnf_sysctl_header);
 #endif
-	dst_entries_destroy(&fake_dst_ops);
 }
+
+module_init(br_netfilter_init);
+module_exit(br_netfilter_fini);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Lennert Buytenhek <buytenh@gnu.org>");
+MODULE_AUTHOR("Bart De Schuymer <bdschuym@pandora.be>");
+MODULE_DESCRIPTION("Linux ethernet netfilter firewall bridge");
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 90a91e137acc..0fa66b83685f 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -602,7 +602,7 @@ out_af:
 	return err;
 }
 
-void __exit br_netlink_fini(void)
+void br_netlink_fini(void)
 {
 	br_mdb_uninit();
 	rtnl_af_unregister(&br_af_ops);
diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c
new file mode 100644
index 000000000000..387cb3bd017c
--- /dev/null
+++ b/net/bridge/br_nf_core.c
@@ -0,0 +1,96 @@
+/*
+ *	Handle firewalling core
+ *	Linux ethernet bridge
+ *
+ *	Authors:
+ *	Lennert Buytenhek		<buytenh@gnu.org>
+ *	Bart De Schuymer		<bdschuym@pandora.be>
+ *
+ *	This program is free software; you can redistribute it and/or
+ *	modify it under the terms of the GNU General Public License
+ *	as published by the Free Software Foundation; either version
+ *	2 of the License, or (at your option) any later version.
+ *
+ *	Lennert dedicates this file to Kerstin Wurdinger.
+ */
+
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/in_route.h>
+#include <linux/inetdevice.h>
+#include <net/route.h>
+
+#include "br_private.h"
+#ifdef CONFIG_SYSCTL
+#include <linux/sysctl.h>
+#endif
+
+static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk,
+			     struct sk_buff *skb, u32 mtu)
+{
+}
+
+static void fake_redirect(struct dst_entry *dst, struct sock *sk,
+			  struct sk_buff *skb)
+{
+}
+
+static u32 *fake_cow_metrics(struct dst_entry *dst, unsigned long old)
+{
+	return NULL;
+}
+
+static struct neighbour *fake_neigh_lookup(const struct dst_entry *dst,
+					   struct sk_buff *skb,
+					   const void *daddr)
+{
+	return NULL;
+}
+
+static unsigned int fake_mtu(const struct dst_entry *dst)
+{
+	return dst->dev->mtu;
+}
+
+static struct dst_ops fake_dst_ops = {
+	.family		= AF_INET,
+	.protocol	= cpu_to_be16(ETH_P_IP),
+	.update_pmtu	= fake_update_pmtu,
+	.redirect	= fake_redirect,
+	.cow_metrics	= fake_cow_metrics,
+	.neigh_lookup	= fake_neigh_lookup,
+	.mtu		= fake_mtu,
+};
+
+/*
+ * Initialize bogus route table used to keep netfilter happy.
+ * Currently, we fill in the PMTU entry because netfilter
+ * refragmentation needs it, and the rt_flags entry because
+ * ipt_REJECT needs it.  Future netfilter modules might
+ * require us to fill additional fields.
+ */
+static const u32 br_dst_default_metrics[RTAX_MAX] = {
+	[RTAX_MTU - 1] = 1500,
+};
+
+void br_netfilter_rtable_init(struct net_bridge *br)
+{
+	struct rtable *rt = &br->fake_rtable;
+
+	atomic_set(&rt->dst.__refcnt, 1);
+	rt->dst.dev = br->dev;
+	rt->dst.path = &rt->dst;
+	dst_init_metrics(&rt->dst, br_dst_default_metrics, true);
+	rt->dst.flags	= DST_NOXFRM | DST_FAKE_RTABLE;
+	rt->dst.ops = &fake_dst_ops;
+}
+
+int __init br_nf_core_init(void)
+{
+	return dst_entries_init(&fake_dst_ops);
+}
+
+void br_nf_core_fini(void)
+{
+	dst_entries_destroy(&fake_dst_ops);
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 62a7fa2e3569..d304d752c091 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -221,7 +221,7 @@ struct net_bridge
 	struct pcpu_sw_netstats		__percpu *stats;
 	spinlock_t			hash_lock;
 	struct hlist_head		hash[BR_HASH_SIZE];
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	struct rtable 			fake_rtable;
 	bool				nf_call_iptables;
 	bool				nf_call_ip6tables;
@@ -751,13 +751,13 @@ static inline int br_vlan_enabled(struct net_bridge *br)
 #endif
 
 /* br_netfilter.c */
-#ifdef CONFIG_BRIDGE_NETFILTER
-int br_netfilter_init(void);
-void br_netfilter_fini(void);
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
+int br_nf_core_init(void);
+void br_nf_core_fini(void);
 void br_netfilter_rtable_init(struct net_bridge *);
 #else
-#define br_netfilter_init()	(0)
-#define br_netfilter_fini()	do { } while (0)
+static inline int br_nf_core_init(void) { return 0; }
+static inline void br_nf_core_fini(void) {}
 #define br_netfilter_rtable_init(x)
 #endif
 
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index c9e2572b15f4..cb431c6016ee 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -629,7 +629,7 @@ static ssize_t multicast_startup_query_interval_store(
 }
 static DEVICE_ATTR_RW(multicast_startup_query_interval);
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 static ssize_t nf_call_iptables_show(
 	struct device *d, struct device_attribute *attr, char *buf)
 {
@@ -763,7 +763,7 @@ static struct attribute *bridge_attrs[] = {
 	&dev_attr_multicast_query_response_interval.attr,
 	&dev_attr_multicast_startup_query_interval.attr,
 #endif
-#ifdef CONFIG_BRIDGE_NETFILTER
+#if IS_ENABLED(CONFIG_BRIDGE_NETFILTER)
 	&dev_attr_nf_call_iptables.attr,
 	&dev_attr_nf_call_ip6tables.attr,
 	&dev_attr_nf_call_arptables.attr,
-- 
cgit v1.2.3


From 99c55f7d47c0dc6fc64729f37bf435abf43f4c60 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:57 -0700
Subject: bpf: introduce BPF syscall and maps

BPF syscall is a multiplexor for a range of different operations on eBPF.
This patch introduces syscall with single command to create a map.
Next patch adds commands to access maps.

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

Userspace example:
/* this syscall wrapper creates a map with given type and attributes
 * and returns map_fd on success.
 * use close(map_fd) to delete the map
 */
int bpf_create_map(enum bpf_map_type map_type, int key_size,
                   int value_size, int max_entries)
{
    union bpf_attr attr = {
        .map_type = map_type,
        .key_size = key_size,
        .value_size = value_size,
        .max_entries = max_entries
    };

    return bpf(BPF_MAP_CREATE, &attr, sizeof(attr));
}

'union bpf_attr' is backwards compatible with future extensions.

More details in Documentation/networking/filter.txt and in manpage

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt |  39 +++++++++
 include/linux/bpf.h                 |  41 +++++++++
 include/uapi/linux/bpf.h            |  23 +++++
 kernel/bpf/Makefile                 |   2 +-
 kernel/bpf/syscall.c                | 169 ++++++++++++++++++++++++++++++++++++
 5 files changed, 273 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/bpf.h
 create mode 100644 kernel/bpf/syscall.c

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 014e0319a5c4..4a01d71785e9 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1001,6 +1001,45 @@ instruction that loads 64-bit immediate value into a dst_reg.
 Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
 32-bit immediate value into a register.
 
+eBPF maps
+---------
+'maps' is a generic storage of different types for sharing data between kernel
+and userspace.
+
+The maps are accessed from user space via BPF syscall, which has commands:
+- create a map with given type and attributes
+  map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
+  using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
+  returns process-local file descriptor or negative error
+
+- lookup key in a given map
+  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key, attr->value
+  returns zero and stores found elem into value or negative error
+
+- create or update key/value pair in a given map
+  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key, attr->value
+  returns zero or negative error
+
+- find and delete element by key in a given map
+  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+  using attr->map_fd, attr->key
+
+- to delete map: close(fd)
+  Exiting process will delete maps automatically
+
+userspace programs use this syscall to create/access maps that eBPF programs
+are concurrently updating.
+
+maps can have different types: hash, array, bloom filter, radix-tree, etc.
+
+The map is defined by:
+  . type
+  . max number of elements
+  . key size in bytes
+  . value size in bytes
+
 Testing
 -------
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
new file mode 100644
index 000000000000..48014a71f0fe
--- /dev/null
+++ b/include/linux/bpf.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#ifndef _LINUX_BPF_H
+#define _LINUX_BPF_H 1
+
+#include <uapi/linux/bpf.h>
+#include <linux/workqueue.h>
+
+struct bpf_map;
+
+/* map is generic key/value storage optionally accesible by eBPF programs */
+struct bpf_map_ops {
+	/* funcs callable from userspace (via syscall) */
+	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
+	void (*map_free)(struct bpf_map *);
+};
+
+struct bpf_map {
+	atomic_t refcnt;
+	enum bpf_map_type map_type;
+	u32 key_size;
+	u32 value_size;
+	u32 max_entries;
+	struct bpf_map_ops *ops;
+	struct work_struct work;
+};
+
+struct bpf_map_type_list {
+	struct list_head list_node;
+	struct bpf_map_ops *ops;
+	enum bpf_map_type type;
+};
+
+void bpf_register_map_type(struct bpf_map_type_list *tl);
+void bpf_map_put(struct bpf_map *map);
+
+#endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 479ed0b6be16..f58a10f9670c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -62,4 +62,27 @@ struct bpf_insn {
 	__s32	imm;		/* signed immediate constant */
 };
 
+/* BPF syscall commands */
+enum bpf_cmd {
+	/* create a map with given type and attributes
+	 * fd = bpf(BPF_MAP_CREATE, union bpf_attr *, u32 size)
+	 * returns fd or negative error
+	 * map is deleted when fd is closed
+	 */
+	BPF_MAP_CREATE,
+};
+
+enum bpf_map_type {
+	BPF_MAP_TYPE_UNSPEC,
+};
+
+union bpf_attr {
+	struct { /* anonymous struct used by BPF_MAP_CREATE command */
+		__u32	map_type;	/* one of enum bpf_map_type */
+		__u32	key_size;	/* size of key in bytes */
+		__u32	value_size;	/* size of value in bytes */
+		__u32	max_entries;	/* max number of entries in a map */
+	};
+} __attribute__((aligned(8)));
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 6a71145e2769..e9f7334ed07a 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1 +1 @@
-obj-y := core.o
+obj-y := core.o syscall.o
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
new file mode 100644
index 000000000000..428a0e23adc0
--- /dev/null
+++ b/kernel/bpf/syscall.c
@@ -0,0 +1,169 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/syscalls.h>
+#include <linux/slab.h>
+#include <linux/anon_inodes.h>
+
+static LIST_HEAD(bpf_map_types);
+
+static struct bpf_map *find_and_alloc_map(union bpf_attr *attr)
+{
+	struct bpf_map_type_list *tl;
+	struct bpf_map *map;
+
+	list_for_each_entry(tl, &bpf_map_types, list_node) {
+		if (tl->type == attr->map_type) {
+			map = tl->ops->map_alloc(attr);
+			if (IS_ERR(map))
+				return map;
+			map->ops = tl->ops;
+			map->map_type = attr->map_type;
+			return map;
+		}
+	}
+	return ERR_PTR(-EINVAL);
+}
+
+/* boot time registration of different map implementations */
+void bpf_register_map_type(struct bpf_map_type_list *tl)
+{
+	list_add(&tl->list_node, &bpf_map_types);
+}
+
+/* called from workqueue */
+static void bpf_map_free_deferred(struct work_struct *work)
+{
+	struct bpf_map *map = container_of(work, struct bpf_map, work);
+
+	/* implementation dependent freeing */
+	map->ops->map_free(map);
+}
+
+/* decrement map refcnt and schedule it for freeing via workqueue
+ * (unrelying map implementation ops->map_free() might sleep)
+ */
+void bpf_map_put(struct bpf_map *map)
+{
+	if (atomic_dec_and_test(&map->refcnt)) {
+		INIT_WORK(&map->work, bpf_map_free_deferred);
+		schedule_work(&map->work);
+	}
+}
+
+static int bpf_map_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_map *map = filp->private_data;
+
+	bpf_map_put(map);
+	return 0;
+}
+
+static const struct file_operations bpf_map_fops = {
+	.release = bpf_map_release,
+};
+
+/* helper macro to check that unused fields 'union bpf_attr' are zero */
+#define CHECK_ATTR(CMD) \
+	memchr_inv((void *) &attr->CMD##_LAST_FIELD + \
+		   sizeof(attr->CMD##_LAST_FIELD), 0, \
+		   sizeof(*attr) - \
+		   offsetof(union bpf_attr, CMD##_LAST_FIELD) - \
+		   sizeof(attr->CMD##_LAST_FIELD)) != NULL
+
+#define BPF_MAP_CREATE_LAST_FIELD max_entries
+/* called via syscall */
+static int map_create(union bpf_attr *attr)
+{
+	struct bpf_map *map;
+	int err;
+
+	err = CHECK_ATTR(BPF_MAP_CREATE);
+	if (err)
+		return -EINVAL;
+
+	/* find map type and init map: hashtable vs rbtree vs bloom vs ... */
+	map = find_and_alloc_map(attr);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	atomic_set(&map->refcnt, 1);
+
+	err = anon_inode_getfd("bpf-map", &bpf_map_fops, map, O_RDWR | O_CLOEXEC);
+
+	if (err < 0)
+		/* failed to allocate fd */
+		goto free_map;
+
+	return err;
+
+free_map:
+	map->ops->map_free(map);
+	return err;
+}
+
+SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
+{
+	union bpf_attr attr = {};
+	int err;
+
+	/* the syscall is limited to root temporarily. This restriction will be
+	 * lifted when security audit is clean. Note that eBPF+tracing must have
+	 * this restriction, since it may pass kernel data to user space
+	 */
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	if (!access_ok(VERIFY_READ, uattr, 1))
+		return -EFAULT;
+
+	if (size > PAGE_SIZE)	/* silly large */
+		return -E2BIG;
+
+	/* If we're handed a bigger struct than we know of,
+	 * ensure all the unknown bits are 0 - i.e. new
+	 * user-space does not rely on any kernel feature
+	 * extensions we dont know about yet.
+	 */
+	if (size > sizeof(attr)) {
+		unsigned char __user *addr;
+		unsigned char __user *end;
+		unsigned char val;
+
+		addr = (void __user *)uattr + sizeof(attr);
+		end  = (void __user *)uattr + size;
+
+		for (; addr < end; addr++) {
+			err = get_user(val, addr);
+			if (err)
+				return err;
+			if (val)
+				return -E2BIG;
+		}
+		size = sizeof(attr);
+	}
+
+	/* copy attributes from user space, may be less than sizeof(bpf_attr) */
+	if (copy_from_user(&attr, uattr, size) != 0)
+		return -EFAULT;
+
+	switch (cmd) {
+	case BPF_MAP_CREATE:
+		err = map_create(&attr);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+
+	return err;
+}
-- 
cgit v1.2.3


From 749730ce42a2121e1c88350d69478bff3994b10a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:58 -0700
Subject: bpf: enable bpf syscall on x64 and i386

done as separate commit to ease conflict resolution

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/syscalls/syscall_32.tbl  | 1 +
 arch/x86/syscalls/syscall_64.tbl  | 1 +
 include/linux/syscalls.h          | 3 ++-
 include/uapi/asm-generic/unistd.h | 4 +++-
 kernel/sys_ni.c                   | 3 +++
 5 files changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl
index 028b78168d85..9fe1b5d002f0 100644
--- a/arch/x86/syscalls/syscall_32.tbl
+++ b/arch/x86/syscalls/syscall_32.tbl
@@ -363,3 +363,4 @@
 354	i386	seccomp			sys_seccomp
 355	i386	getrandom		sys_getrandom
 356	i386	memfd_create		sys_memfd_create
+357	i386	bpf			sys_bpf
diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl
index 35dd922727b9..281150b539a2 100644
--- a/arch/x86/syscalls/syscall_64.tbl
+++ b/arch/x86/syscalls/syscall_64.tbl
@@ -327,6 +327,7 @@
 318	common	getrandom		sys_getrandom
 319	common	memfd_create		sys_memfd_create
 320	common	kexec_file_load		sys_kexec_file_load
+321	common	bpf			sys_bpf
 
 #
 # x32-specific system call numbers start at 512 to avoid cache impact
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index 0f86d85a9ce4..bda9b81357cc 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -65,6 +65,7 @@ struct old_linux_dirent;
 struct perf_event_attr;
 struct file_handle;
 struct sigaltstack;
+union bpf_attr;
 
 #include <linux/types.h>
 #include <linux/aio_abi.h>
@@ -875,5 +876,5 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 			    const char __user *uargs);
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
-
+asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 11d11bc5c78f..22749c134117 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -705,9 +705,11 @@ __SYSCALL(__NR_seccomp, sys_seccomp)
 __SYSCALL(__NR_getrandom, sys_getrandom)
 #define __NR_memfd_create 279
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
+#define __NR_bpf 280
+__SYSCALL(__NR_bpf, sys_bpf)
 
 #undef __NR_syscalls
-#define __NR_syscalls 280
+#define __NR_syscalls 281
 
 /*
  * All syscalls below here should go away really,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 391d4ddb6f4b..b4b5083f5f5e 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -218,3 +218,6 @@ cond_syscall(sys_kcmp);
 
 /* operate on Secure Computing state */
 cond_syscall(sys_seccomp);
+
+/* access BPF programs and maps */
+cond_syscall(sys_bpf);
-- 
cgit v1.2.3


From db20fd2b01087bdfbe30bce314a198eefedcc42e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:16:59 -0700
Subject: bpf: add lookup/update/delete/iterate methods to BPF maps

'maps' is a generic storage of different types for sharing data between kernel
and userspace.

The maps are accessed from user space via BPF syscall, which has commands:

- create a map with given type and attributes
  fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
  returns fd or negative error

- lookup key in a given map referenced by fd
  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero and stores found elem into value or negative error

- create or update key/value pair in a given map
  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->value
  returns zero or negative error

- find and delete element by key in a given map
  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key

- iterate map elements (based on input key return next_key)
  err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
  using attr->map_fd, attr->key, attr->next_key

- close(fd) deletes the map

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |   8 ++
 include/uapi/linux/bpf.h |  38 ++++++++
 kernel/bpf/syscall.c     | 235 +++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 281 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 48014a71f0fe..2887f3f9da59 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -9,6 +9,7 @@
 
 #include <uapi/linux/bpf.h>
 #include <linux/workqueue.h>
+#include <linux/file.h>
 
 struct bpf_map;
 
@@ -17,6 +18,12 @@ struct bpf_map_ops {
 	/* funcs callable from userspace (via syscall) */
 	struct bpf_map *(*map_alloc)(union bpf_attr *attr);
 	void (*map_free)(struct bpf_map *);
+	int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
+
+	/* funcs callable from userspace and from eBPF programs */
+	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
+	int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+	int (*map_delete_elem)(struct bpf_map *map, void *key);
 };
 
 struct bpf_map {
@@ -37,5 +44,6 @@ struct bpf_map_type_list {
 
 void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
+struct bpf_map *bpf_map_get(struct fd f);
 
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f58a10f9670c..395cabd2ca0a 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -70,6 +70,35 @@ enum bpf_cmd {
 	 * map is deleted when fd is closed
 	 */
 	BPF_MAP_CREATE,
+
+	/* lookup key in a given map
+	 * err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->value
+	 * returns zero and stores found elem into value
+	 * or negative error
+	 */
+	BPF_MAP_LOOKUP_ELEM,
+
+	/* create or update key/value pair in a given map
+	 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->value
+	 * returns zero or negative error
+	 */
+	BPF_MAP_UPDATE_ELEM,
+
+	/* find and delete elem by key in a given map
+	 * err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key
+	 * returns zero or negative error
+	 */
+	BPF_MAP_DELETE_ELEM,
+
+	/* lookup key in a given map and return next key
+	 * err = bpf(BPF_MAP_GET_NEXT_KEY, union bpf_attr *attr, u32 size)
+	 * Using attr->map_fd, attr->key, attr->next_key
+	 * returns zero and stores next key or negative error
+	 */
+	BPF_MAP_GET_NEXT_KEY,
 };
 
 enum bpf_map_type {
@@ -83,6 +112,15 @@ union bpf_attr {
 		__u32	value_size;	/* size of value in bytes */
 		__u32	max_entries;	/* max number of entries in a map */
 	};
+
+	struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */
+		__u32		map_fd;
+		__aligned_u64	key;
+		union {
+			__aligned_u64 value;
+			__aligned_u64 next_key;
+		};
+	};
 } __attribute__((aligned(8)));
 
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 428a0e23adc0..f94349ecaf61 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -13,6 +13,7 @@
 #include <linux/syscalls.h>
 #include <linux/slab.h>
 #include <linux/anon_inodes.h>
+#include <linux/file.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -111,6 +112,228 @@ free_map:
 	return err;
 }
 
+/* if error is returned, fd is released.
+ * On success caller should complete fd access with matching fdput()
+ */
+struct bpf_map *bpf_map_get(struct fd f)
+{
+	struct bpf_map *map;
+
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	if (f.file->f_op != &bpf_map_fops) {
+		fdput(f);
+		return ERR_PTR(-EINVAL);
+	}
+
+	map = f.file->private_data;
+
+	return map;
+}
+
+/* helper to convert user pointers passed inside __aligned_u64 fields */
+static void __user *u64_to_ptr(__u64 val)
+{
+	return (void __user *) (unsigned long) val;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_LOOKUP_ELEM_LAST_FIELD value
+
+static int map_lookup_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *uvalue = u64_to_ptr(attr->value);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *value;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_LOOKUP_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ESRCH;
+	rcu_read_lock();
+	value = map->ops->map_lookup_elem(map, key);
+	if (!value)
+		goto err_unlock;
+
+	err = -EFAULT;
+	if (copy_to_user(uvalue, value, map->value_size) != 0)
+		goto err_unlock;
+
+	err = 0;
+
+err_unlock:
+	rcu_read_unlock();
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
+
+static int map_update_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *uvalue = u64_to_ptr(attr->value);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *value;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_UPDATE_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ENOMEM;
+	value = kmalloc(map->value_size, GFP_USER);
+	if (!value)
+		goto free_key;
+
+	err = -EFAULT;
+	if (copy_from_user(value, uvalue, map->value_size) != 0)
+		goto free_value;
+
+	/* eBPF program that use maps are running under rcu_read_lock(),
+	 * therefore all map accessors rely on this fact, so do the same here
+	 */
+	rcu_read_lock();
+	err = map->ops->map_update_elem(map, key, value);
+	rcu_read_unlock();
+
+free_value:
+	kfree(value);
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+#define BPF_MAP_DELETE_ELEM_LAST_FIELD key
+
+static int map_delete_elem(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_DELETE_ELEM))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	rcu_read_lock();
+	err = map->ops->map_delete_elem(map, key);
+	rcu_read_unlock();
+
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define BPF_MAP_GET_NEXT_KEY_LAST_FIELD next_key
+
+static int map_get_next_key(union bpf_attr *attr)
+{
+	void __user *ukey = u64_to_ptr(attr->key);
+	void __user *unext_key = u64_to_ptr(attr->next_key);
+	int ufd = attr->map_fd;
+	struct fd f = fdget(ufd);
+	struct bpf_map *map;
+	void *key, *next_key;
+	int err;
+
+	if (CHECK_ATTR(BPF_MAP_GET_NEXT_KEY))
+		return -EINVAL;
+
+	map = bpf_map_get(f);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+
+	err = -ENOMEM;
+	key = kmalloc(map->key_size, GFP_USER);
+	if (!key)
+		goto err_put;
+
+	err = -EFAULT;
+	if (copy_from_user(key, ukey, map->key_size) != 0)
+		goto free_key;
+
+	err = -ENOMEM;
+	next_key = kmalloc(map->key_size, GFP_USER);
+	if (!next_key)
+		goto free_key;
+
+	rcu_read_lock();
+	err = map->ops->map_get_next_key(map, key, next_key);
+	rcu_read_unlock();
+	if (err)
+		goto free_next_key;
+
+	err = -EFAULT;
+	if (copy_to_user(unext_key, next_key, map->key_size) != 0)
+		goto free_next_key;
+
+	err = 0;
+
+free_next_key:
+	kfree(next_key);
+free_key:
+	kfree(key);
+err_put:
+	fdput(f);
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -160,6 +383,18 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_MAP_CREATE:
 		err = map_create(&attr);
 		break;
+	case BPF_MAP_LOOKUP_ELEM:
+		err = map_lookup_elem(&attr);
+		break;
+	case BPF_MAP_UPDATE_ELEM:
+		err = map_update_elem(&attr);
+		break;
+	case BPF_MAP_DELETE_ELEM:
+		err = map_delete_elem(&attr);
+		break;
+	case BPF_MAP_GET_NEXT_KEY:
+		err = map_get_next_key(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 09756af46893c18839062976c3252e93a1beeba7 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:00 -0700
Subject: bpf: expand BPF syscall with program load/unload

eBPF programs are similar to kernel modules. They are loaded by the user
process and automatically unloaded when process exits. Each eBPF program is
a safe run-to-completion set of instructions. eBPF verifier statically
determines that the program terminates and is safe to execute.

The following syscall wrapper can be used to load the program:
int bpf_prog_load(enum bpf_prog_type prog_type,
                  const struct bpf_insn *insns, int insn_cnt,
                  const char *license)
{
    union bpf_attr attr = {
        .prog_type = prog_type,
        .insns = ptr_to_u64(insns),
        .insn_cnt = insn_cnt,
        .license = ptr_to_u64(license),
    };

    return bpf(BPF_PROG_LOAD, &attr, sizeof(attr));
}
where 'insns' is an array of eBPF instructions and 'license' is a string
that must be GPL compatible to call helper functions marked gpl_only

Upon succesful load the syscall returns prog_fd.
Use close(prog_fd) to unload the program.

User space tests and examples follow in the later patches

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  38 +++++++++++
 include/linux/filter.h   |   8 +--
 include/uapi/linux/bpf.h |  26 ++++++++
 kernel/bpf/core.c        |  29 +++++----
 kernel/bpf/syscall.c     | 165 +++++++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 246 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 2887f3f9da59..92979182be81 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -46,4 +46,42 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
 struct bpf_map *bpf_map_get(struct fd f);
 
+/* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
+ * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
+ * instructions after verifying
+ */
+struct bpf_func_proto {
+	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+	bool gpl_only;
+};
+
+struct bpf_verifier_ops {
+	/* return eBPF function prototype for verification */
+	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+};
+
+struct bpf_prog_type_list {
+	struct list_head list_node;
+	struct bpf_verifier_ops *ops;
+	enum bpf_prog_type type;
+};
+
+void bpf_register_prog_type(struct bpf_prog_type_list *tl);
+
+struct bpf_prog;
+
+struct bpf_prog_aux {
+	atomic_t refcnt;
+	bool is_gpl_compatible;
+	enum bpf_prog_type prog_type;
+	struct bpf_verifier_ops *ops;
+	struct bpf_map **used_maps;
+	u32 used_map_cnt;
+	struct bpf_prog *prog;
+	struct work_struct work;
+};
+
+void bpf_prog_put(struct bpf_prog *prog);
+struct bpf_prog *bpf_prog_get(u32 ufd);
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1a0bc6d134d7..4ffc0958d85e 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,6 +21,7 @@
 struct sk_buff;
 struct sock;
 struct seccomp_data;
+struct bpf_prog_aux;
 
 /* ArgX, context and stack frame pointer register positions. Note,
  * Arg1, Arg2, Arg3, etc are used as argument mappings of function
@@ -300,17 +301,12 @@ struct bpf_binary_header {
 	u8 image[];
 };
 
-struct bpf_work_struct {
-	struct bpf_prog *prog;
-	struct work_struct work;
-};
-
 struct bpf_prog {
 	u16			pages;		/* Number of allocated pages */
 	bool			jited;		/* Is our filter JIT'ed? */
 	u32			len;		/* Number of filter blocks */
 	struct sock_fprog_kern	*orig_prog;	/* Original BPF program */
-	struct bpf_work_struct	*work;		/* Deferred free work struct */
+	struct bpf_prog_aux	*aux;		/* Auxiliary fields */
 	unsigned int		(*bpf_func)(const struct sk_buff *skb,
 					    const struct bpf_insn *filter);
 	/* Instructions for interpreter */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 395cabd2ca0a..424f442016e7 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -99,12 +99,23 @@ enum bpf_cmd {
 	 * returns zero and stores next key or negative error
 	 */
 	BPF_MAP_GET_NEXT_KEY,
+
+	/* verify and load eBPF program
+	 * prog_fd = bpf(BPF_PROG_LOAD, union bpf_attr *attr, u32 size)
+	 * Using attr->prog_type, attr->insns, attr->license
+	 * returns fd or negative error
+	 */
+	BPF_PROG_LOAD,
 };
 
 enum bpf_map_type {
 	BPF_MAP_TYPE_UNSPEC,
 };
 
+enum bpf_prog_type {
+	BPF_PROG_TYPE_UNSPEC,
+};
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -121,6 +132,21 @@ union bpf_attr {
 			__aligned_u64 next_key;
 		};
 	};
+
+	struct { /* anonymous struct used by BPF_PROG_LOAD command */
+		__u32		prog_type;	/* one of enum bpf_prog_type */
+		__u32		insn_cnt;
+		__aligned_u64	insns;
+		__aligned_u64	license;
+	};
 } __attribute__((aligned(8)));
 
+/* integer value in 'imm' field of BPF_CALL instruction selects which helper
+ * function eBPF program intends to call
+ */
+enum bpf_func_id {
+	BPF_FUNC_unspec,
+	__BPF_FUNC_MAX_ID,
+};
+
 #endif /* _UAPI__LINUX_BPF_H__ */
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 8b7002488251..f0c30c59b317 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -27,6 +27,7 @@
 #include <linux/random.h>
 #include <linux/moduleloader.h>
 #include <asm/unaligned.h>
+#include <linux/bpf.h>
 
 /* Registers */
 #define BPF_R0	regs[BPF_REG_0]
@@ -71,7 +72,7 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 {
 	gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO |
 			  gfp_extra_flags;
-	struct bpf_work_struct *ws;
+	struct bpf_prog_aux *aux;
 	struct bpf_prog *fp;
 
 	size = round_up(size, PAGE_SIZE);
@@ -79,14 +80,14 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags)
 	if (fp == NULL)
 		return NULL;
 
-	ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags);
-	if (ws == NULL) {
+	aux = kzalloc(sizeof(*aux), GFP_KERNEL | gfp_extra_flags);
+	if (aux == NULL) {
 		vfree(fp);
 		return NULL;
 	}
 
 	fp->pages = size / PAGE_SIZE;
-	fp->work = ws;
+	fp->aux = aux;
 
 	return fp;
 }
@@ -110,10 +111,10 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size,
 		memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE);
 		fp->pages = size / PAGE_SIZE;
 
-		/* We keep fp->work from fp_old around in the new
+		/* We keep fp->aux from fp_old around in the new
 		 * reallocated structure.
 		 */
-		fp_old->work = NULL;
+		fp_old->aux = NULL;
 		__bpf_prog_free(fp_old);
 	}
 
@@ -123,7 +124,7 @@ EXPORT_SYMBOL_GPL(bpf_prog_realloc);
 
 void __bpf_prog_free(struct bpf_prog *fp)
 {
-	kfree(fp->work);
+	kfree(fp->aux);
 	vfree(fp);
 }
 EXPORT_SYMBOL_GPL(__bpf_prog_free);
@@ -638,19 +639,19 @@ EXPORT_SYMBOL_GPL(bpf_prog_select_runtime);
 
 static void bpf_prog_free_deferred(struct work_struct *work)
 {
-	struct bpf_work_struct *ws;
+	struct bpf_prog_aux *aux;
 
-	ws = container_of(work, struct bpf_work_struct, work);
-	bpf_jit_free(ws->prog);
+	aux = container_of(work, struct bpf_prog_aux, work);
+	bpf_jit_free(aux->prog);
 }
 
 /* Free internal BPF program */
 void bpf_prog_free(struct bpf_prog *fp)
 {
-	struct bpf_work_struct *ws = fp->work;
+	struct bpf_prog_aux *aux = fp->aux;
 
-	INIT_WORK(&ws->work, bpf_prog_free_deferred);
-	ws->prog = fp;
-	schedule_work(&ws->work);
+	INIT_WORK(&aux->work, bpf_prog_free_deferred);
+	aux->prog = fp;
+	schedule_work(&aux->work);
 }
 EXPORT_SYMBOL_GPL(bpf_prog_free);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f94349ecaf61..0afb4eaa1887 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -14,6 +14,8 @@
 #include <linux/slab.h>
 #include <linux/anon_inodes.h>
 #include <linux/file.h>
+#include <linux/license.h>
+#include <linux/filter.h>
 
 static LIST_HEAD(bpf_map_types);
 
@@ -334,6 +336,166 @@ err_put:
 	return err;
 }
 
+static LIST_HEAD(bpf_prog_types);
+
+static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
+{
+	struct bpf_prog_type_list *tl;
+
+	list_for_each_entry(tl, &bpf_prog_types, list_node) {
+		if (tl->type == type) {
+			prog->aux->ops = tl->ops;
+			prog->aux->prog_type = type;
+			return 0;
+		}
+	}
+	return -EINVAL;
+}
+
+void bpf_register_prog_type(struct bpf_prog_type_list *tl)
+{
+	list_add(&tl->list_node, &bpf_prog_types);
+}
+
+/* drop refcnt on maps used by eBPF program and free auxilary data */
+static void free_used_maps(struct bpf_prog_aux *aux)
+{
+	int i;
+
+	for (i = 0; i < aux->used_map_cnt; i++)
+		bpf_map_put(aux->used_maps[i]);
+
+	kfree(aux->used_maps);
+}
+
+void bpf_prog_put(struct bpf_prog *prog)
+{
+	if (atomic_dec_and_test(&prog->aux->refcnt)) {
+		free_used_maps(prog->aux);
+		bpf_prog_free(prog);
+	}
+}
+
+static int bpf_prog_release(struct inode *inode, struct file *filp)
+{
+	struct bpf_prog *prog = filp->private_data;
+
+	bpf_prog_put(prog);
+	return 0;
+}
+
+static const struct file_operations bpf_prog_fops = {
+        .release = bpf_prog_release,
+};
+
+static struct bpf_prog *get_prog(struct fd f)
+{
+	struct bpf_prog *prog;
+
+	if (!f.file)
+		return ERR_PTR(-EBADF);
+
+	if (f.file->f_op != &bpf_prog_fops) {
+		fdput(f);
+		return ERR_PTR(-EINVAL);
+	}
+
+	prog = f.file->private_data;
+
+	return prog;
+}
+
+/* called by sockets/tracing/seccomp before attaching program to an event
+ * pairs with bpf_prog_put()
+ */
+struct bpf_prog *bpf_prog_get(u32 ufd)
+{
+	struct fd f = fdget(ufd);
+	struct bpf_prog *prog;
+
+	prog = get_prog(f);
+
+	if (IS_ERR(prog))
+		return prog;
+
+	atomic_inc(&prog->aux->refcnt);
+	fdput(f);
+	return prog;
+}
+
+/* last field in 'union bpf_attr' used by this command */
+#define	BPF_PROG_LOAD_LAST_FIELD license
+
+static int bpf_prog_load(union bpf_attr *attr)
+{
+	enum bpf_prog_type type = attr->prog_type;
+	struct bpf_prog *prog;
+	int err;
+	char license[128];
+	bool is_gpl;
+
+	if (CHECK_ATTR(BPF_PROG_LOAD))
+		return -EINVAL;
+
+	/* copy eBPF program license from user space */
+	if (strncpy_from_user(license, u64_to_ptr(attr->license),
+			      sizeof(license) - 1) < 0)
+		return -EFAULT;
+	license[sizeof(license) - 1] = 0;
+
+	/* eBPF programs must be GPL compatible to use GPL-ed functions */
+	is_gpl = license_is_gpl_compatible(license);
+
+	if (attr->insn_cnt >= BPF_MAXINSNS)
+		return -EINVAL;
+
+	/* plain bpf_prog allocation */
+	prog = bpf_prog_alloc(bpf_prog_size(attr->insn_cnt), GFP_USER);
+	if (!prog)
+		return -ENOMEM;
+
+	prog->len = attr->insn_cnt;
+
+	err = -EFAULT;
+	if (copy_from_user(prog->insns, u64_to_ptr(attr->insns),
+			   prog->len * sizeof(struct bpf_insn)) != 0)
+		goto free_prog;
+
+	prog->orig_prog = NULL;
+	prog->jited = false;
+
+	atomic_set(&prog->aux->refcnt, 1);
+	prog->aux->is_gpl_compatible = is_gpl;
+
+	/* find program type: socket_filter vs tracing_filter */
+	err = find_prog_type(type, prog);
+	if (err < 0)
+		goto free_prog;
+
+	/* run eBPF verifier */
+	/* err = bpf_check(prog, tb); */
+
+	if (err < 0)
+		goto free_used_maps;
+
+	/* eBPF program is ready to be JITed */
+	bpf_prog_select_runtime(prog);
+
+	err = anon_inode_getfd("bpf-prog", &bpf_prog_fops, prog, O_RDWR | O_CLOEXEC);
+
+	if (err < 0)
+		/* failed to allocate fd */
+		goto free_used_maps;
+
+	return err;
+
+free_used_maps:
+	free_used_maps(prog->aux);
+free_prog:
+	bpf_prog_free(prog);
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr = {};
@@ -395,6 +557,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_MAP_GET_NEXT_KEY:
 		err = map_get_next_key(&attr);
 		break;
+	case BPF_PROG_LOAD:
+		err = bpf_prog_load(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 51580e798cb61b0fc63fa3aa6c5c975375aa0550 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:02 -0700
Subject: bpf: verifier (add docs)

this patch adds all of eBPF verfier documentation and empty bpf_check()

The end goal for the verifier is to statically check safety of the program.

Verifier will catch:
- loops
- out of range jumps
- unreachable instructions
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention

More details in Documentation/networking/filter.txt

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/filter.txt | 224 ++++++++++++++++++++++++++++++++++++
 include/linux/bpf.h                 |   2 +
 kernel/bpf/Makefile                 |   2 +-
 kernel/bpf/syscall.c                |   2 +-
 kernel/bpf/verifier.c               | 133 +++++++++++++++++++++
 5 files changed, 361 insertions(+), 2 deletions(-)
 create mode 100644 kernel/bpf/verifier.c

(limited to 'include/linux')

diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
index 4a01d71785e9..5ce4d07406a5 100644
--- a/Documentation/networking/filter.txt
+++ b/Documentation/networking/filter.txt
@@ -1001,6 +1001,99 @@ instruction that loads 64-bit immediate value into a dst_reg.
 Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
 32-bit immediate value into a register.
 
+eBPF verifier
+-------------
+The safety of the eBPF program is determined in two steps.
+
+First step does DAG check to disallow loops and other CFG validation.
+In particular it will detect programs that have unreachable instructions.
+(though classic BPF checker allows them)
+
+Second step starts from the first insn and descends all possible paths.
+It simulates execution of every insn and observes the state change of
+registers and stack.
+
+At the start of the program the register R1 contains a pointer to context
+and has type PTR_TO_CTX.
+If verifier sees an insn that does R2=R1, then R2 has now type
+PTR_TO_CTX as well and can be used on the right hand side of expression.
+If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=UNKNOWN_VALUE,
+since addition of two valid pointers makes invalid pointer.
+(In 'secure' mode verifier will reject any type of pointer arithmetic to make
+sure that kernel addresses don't leak to unprivileged users)
+
+If register was never written to, it's not readable:
+  bpf_mov R0 = R2
+  bpf_exit
+will be rejected, since R2 is unreadable at the start of the program.
+
+After kernel function call, R1-R5 are reset to unreadable and
+R0 has a return type of the function.
+
+Since R6-R9 are callee saved, their state is preserved across the call.
+  bpf_mov R6 = 1
+  bpf_call foo
+  bpf_mov R0 = R6
+  bpf_exit
+is a correct program. If there was R1 instead of R6, it would have
+been rejected.
+
+load/store instructions are allowed only with registers of valid types, which
+are PTR_TO_CTX, PTR_TO_MAP, FRAME_PTR. They are bounds and alignment checked.
+For example:
+ bpf_mov R1 = 1
+ bpf_mov R2 = 2
+ bpf_xadd *(u32 *)(R1 + 3) += R2
+ bpf_exit
+will be rejected, since R1 doesn't have a valid pointer type at the time of
+execution of instruction bpf_xadd.
+
+At the start R1 type is PTR_TO_CTX (a pointer to generic 'struct bpf_context')
+A callback is used to customize verifier to restrict eBPF program access to only
+certain fields within ctx structure with specified size and alignment.
+
+For example, the following insn:
+  bpf_ld R0 = *(u32 *)(R6 + 8)
+intends to load a word from address R6 + 8 and store it into R0
+If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know
+that offset 8 of size 4 bytes can be accessed for reading, otherwise
+the verifier will reject the program.
+If R6=FRAME_PTR, then access should be aligned and be within
+stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8,
+so it will fail verification, since it's out of bounds.
+
+The verifier will allow eBPF program to read data from stack only after
+it wrote into it.
+Classic BPF verifier does similar check with M[0-15] memory slots.
+For example:
+  bpf_ld R0 = *(u32 *)(R10 - 4)
+  bpf_exit
+is invalid program.
+Though R10 is correct read-only register and has type FRAME_PTR
+and R10 - 4 is within stack bounds, there were no stores into that location.
+
+Pointer register spill/fill is tracked as well, since four (R6-R9)
+callee saved registers may not be enough for some programs.
+
+Allowed function calls are customized with bpf_verifier_ops->get_func_proto()
+The eBPF verifier will check that registers match argument constraints.
+After the call register R0 will be set to return type of the function.
+
+Function calls is a main mechanism to extend functionality of eBPF programs.
+Socket filters may let programs to call one set of functions, whereas tracing
+filters may allow completely different set.
+
+If a function made accessible to eBPF program, it needs to be thought through
+from safety point of view. The verifier will guarantee that the function is
+called with valid arguments.
+
+seccomp vs socket filters have different security restrictions for classic BPF.
+Seccomp solves this by two stage verifier: classic BPF verifier is followed
+by seccomp verifier. In case of eBPF one configurable verifier is shared for
+all use cases.
+
+See details of eBPF verifier in kernel/bpf/verifier.c
+
 eBPF maps
 ---------
 'maps' is a generic storage of different types for sharing data between kernel
@@ -1040,6 +1133,137 @@ The map is defined by:
   . key size in bytes
   . value size in bytes
 
+Understanding eBPF verifier messages
+------------------------------------
+
+The following are few examples of invalid eBPF programs and verifier error
+messages as seen in the log:
+
+Program with unreachable instructions:
+static struct bpf_insn prog[] = {
+  BPF_EXIT_INSN(),
+  BPF_EXIT_INSN(),
+};
+Error:
+  unreachable insn 1
+
+Program that reads uninitialized register:
+  BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+  BPF_EXIT_INSN(),
+Error:
+  0: (bf) r0 = r2
+  R2 !read_ok
+
+Program that doesn't initialize R0 before exiting:
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+  BPF_EXIT_INSN(),
+Error:
+  0: (bf) r2 = r1
+  1: (95) exit
+  R0 !read_ok
+
+Program that accesses stack out of bounds:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 +8) = 0
+  invalid stack off=8 size=8
+
+Program that doesn't initialize stack before passing its address into function:
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_EXIT_INSN(),
+Error:
+  0: (bf) r2 = r10
+  1: (07) r2 += -8
+  2: (b7) r1 = 0x0
+  3: (85) call 1
+  invalid indirect read from stack off -8+0 size 8
+
+Program that uses invalid map_fd=0 while calling to map_lookup_elem() function:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 0x0
+  4: (85) call 1
+  fd 0 is not pointing to valid bpf_map
+
+Program that doesn't check return value of map_lookup_elem() before accessing
+map element:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 0x0
+  4: (85) call 1
+  5: (7a) *(u64 *)(r0 +0) = 0
+  R0 invalid mem access 'map_value_or_null'
+
+Program that correctly checks map_lookup_elem() returned value for NULL, but
+accesses the memory with incorrect alignment:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 1
+  4: (85) call 1
+  5: (15) if r0 == 0x0 goto pc+1
+   R0=map_ptr R10=fp
+  6: (7a) *(u64 *)(r0 +4) = 0
+  misaligned access off 4 size 8
+
+Program that correctly checks map_lookup_elem() returned value for NULL and
+accesses memory with correct alignment in one side of 'if' branch, but fails
+to do so in the other side of 'if' branch:
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+  BPF_EXIT_INSN(),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+  BPF_EXIT_INSN(),
+Error:
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 1
+  4: (85) call 1
+  5: (15) if r0 == 0x0 goto pc+2
+   R0=map_ptr R10=fp
+  6: (7a) *(u64 *)(r0 +0) = 0
+  7: (95) exit
+
+  from 5 to 8: R0=imm0 R10=fp
+  8: (7a) *(u64 *)(r0 +0) = 1
+  R0 invalid mem access 'imm'
+
 Testing
 -------
 
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 92979182be81..9dfeb36f8971 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -83,5 +83,7 @@ struct bpf_prog_aux {
 
 void bpf_prog_put(struct bpf_prog *prog);
 struct bpf_prog *bpf_prog_get(u32 ufd);
+/* verify correctness of eBPF program */
+int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
 
 #endif /* _LINUX_BPF_H */
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index e9f7334ed07a..3c726b0995b7 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1 +1 @@
-obj-y := core.o syscall.o
+obj-y := core.o syscall.o verifier.o
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index b513659d120f..74b3628c5fdb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -507,7 +507,7 @@ static int bpf_prog_load(union bpf_attr *attr)
 		goto free_prog;
 
 	/* run eBPF verifier */
-	/* err = bpf_check(prog, tb); */
+	err = bpf_check(prog, attr);
 
 	if (err < 0)
 		goto free_used_maps;
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
new file mode 100644
index 000000000000..d6f9c3d6b4d7
--- /dev/null
+++ b/kernel/bpf/verifier.c
@@ -0,0 +1,133 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <net/netlink.h>
+#include <linux/file.h>
+#include <linux/vmalloc.h>
+
+/* bpf_check() is a static code analyzer that walks eBPF program
+ * instruction by instruction and updates register/stack state.
+ * All paths of conditional branches are analyzed until 'bpf_exit' insn.
+ *
+ * The first pass is depth-first-search to check that the program is a DAG.
+ * It rejects the following programs:
+ * - larger than BPF_MAXINSNS insns
+ * - if loop is present (detected via back-edge)
+ * - unreachable insns exist (shouldn't be a forest. program = one function)
+ * - out of bounds or malformed jumps
+ * The second pass is all possible path descent from the 1st insn.
+ * Since it's analyzing all pathes through the program, the length of the
+ * analysis is limited to 32k insn, which may be hit even if total number of
+ * insn is less then 4K, but there are too many branches that change stack/regs.
+ * Number of 'branches to be analyzed' is limited to 1k
+ *
+ * On entry to each instruction, each register has a type, and the instruction
+ * changes the types of the registers depending on instruction semantics.
+ * If instruction is BPF_MOV64_REG(BPF_REG_1, BPF_REG_5), then type of R5 is
+ * copied to R1.
+ *
+ * All registers are 64-bit.
+ * R0 - return register
+ * R1-R5 argument passing registers
+ * R6-R9 callee saved registers
+ * R10 - frame pointer read-only
+ *
+ * At the start of BPF program the register R1 contains a pointer to bpf_context
+ * and has type PTR_TO_CTX.
+ *
+ * Verifier tracks arithmetic operations on pointers in case:
+ *    BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
+ *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -20),
+ * 1st insn copies R10 (which has FRAME_PTR) type into R1
+ * and 2nd arithmetic instruction is pattern matched to recognize
+ * that it wants to construct a pointer to some element within stack.
+ * So after 2nd insn, the register R1 has type PTR_TO_STACK
+ * (and -20 constant is saved for further stack bounds checking).
+ * Meaning that this reg is a pointer to stack plus known immediate constant.
+ *
+ * Most of the time the registers have UNKNOWN_VALUE type, which
+ * means the register has some value, but it's not a valid pointer.
+ * (like pointer plus pointer becomes UNKNOWN_VALUE type)
+ *
+ * When verifier sees load or store instructions the type of base register
+ * can be: PTR_TO_MAP_VALUE, PTR_TO_CTX, FRAME_PTR. These are three pointer
+ * types recognized by check_mem_access() function.
+ *
+ * PTR_TO_MAP_VALUE means that this register is pointing to 'map element value'
+ * and the range of [ptr, ptr + map's value_size) is accessible.
+ *
+ * registers used to pass values to function calls are checked against
+ * function argument constraints.
+ *
+ * ARG_PTR_TO_MAP_KEY is one of such argument constraints.
+ * It means that the register type passed to this function must be
+ * PTR_TO_STACK and it will be used inside the function as
+ * 'pointer to map element key'
+ *
+ * For example the argument constraints for bpf_map_lookup_elem():
+ *   .ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+ *   .arg1_type = ARG_CONST_MAP_PTR,
+ *   .arg2_type = ARG_PTR_TO_MAP_KEY,
+ *
+ * ret_type says that this function returns 'pointer to map elem value or null'
+ * function expects 1st argument to be a const pointer to 'struct bpf_map' and
+ * 2nd argument should be a pointer to stack, which will be used inside
+ * the helper function as a pointer to map element key.
+ *
+ * On the kernel side the helper function looks like:
+ * u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+ * {
+ *    struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+ *    void *key = (void *) (unsigned long) r2;
+ *    void *value;
+ *
+ *    here kernel can access 'key' and 'map' pointers safely, knowing that
+ *    [key, key + map->key_size) bytes are valid and were initialized on
+ *    the stack of eBPF program.
+ * }
+ *
+ * Corresponding eBPF program may look like:
+ *    BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),  // after this insn R2 type is FRAME_PTR
+ *    BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), // after this insn R2 type is PTR_TO_STACK
+ *    BPF_LD_MAP_FD(BPF_REG_1, map_fd),      // after this insn R1 type is CONST_PTR_TO_MAP
+ *    BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+ * here verifier looks at prototype of map_lookup_elem() and sees:
+ * .arg1_type == ARG_CONST_MAP_PTR and R1->type == CONST_PTR_TO_MAP, which is ok,
+ * Now verifier knows that this map has key of R1->map_ptr->key_size bytes
+ *
+ * Then .arg2_type == ARG_PTR_TO_MAP_KEY and R2->type == PTR_TO_STACK, ok so far,
+ * Now verifier checks that [R2, R2 + map's key_size) are within stack limits
+ * and were initialized prior to this call.
+ * If it's ok, then verifier allows this BPF_CALL insn and looks at
+ * .ret_type which is RET_PTR_TO_MAP_VALUE_OR_NULL, so it sets
+ * R0->type = PTR_TO_MAP_VALUE_OR_NULL which means bpf_map_lookup_elem() function
+ * returns ether pointer to map value or NULL.
+ *
+ * When type PTR_TO_MAP_VALUE_OR_NULL passes through 'if (reg != 0) goto +off'
+ * insn, the register holding that pointer in the true branch changes state to
+ * PTR_TO_MAP_VALUE and the same register changes state to CONST_IMM in the false
+ * branch. See check_cond_jmp_op().
+ *
+ * After the call R0 is set to return type of the function and registers R1-R5
+ * are set to NOT_INIT to indicate that they are no longer readable.
+ */
+
+int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
+{
+	int ret = -EINVAL;
+
+	return ret;
+}
-- 
cgit v1.2.3


From 0246e64d9a5fcd4805198de59b9b5cf1f974eb41 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:04 -0700
Subject: bpf: handle pseudo BPF_LD_IMM64 insn

eBPF programs passed from userspace are using pseudo BPF_LD_IMM64 instructions
to refer to process-local map_fd. Scan the program for such instructions and
if FDs are valid, convert them to 'struct bpf_map' pointers which will be used
by verifier to check access to maps in bpf_map_lookup/update() calls.
If program passes verifier, convert pseudo BPF_LD_IMM64 into generic by dropping
BPF_PSEUDO_MAP_FD flag.

Note that eBPF interpreter is generic and knows nothing about pseudo insns.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/filter.h |   6 ++
 kernel/bpf/verifier.c  | 147 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 153 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/filter.h b/include/linux/filter.h
index 4ffc0958d85e..ca95abd2bed1 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -145,6 +145,12 @@ struct bpf_prog_aux;
 		.off   = 0,					\
 		.imm   = ((__u64) (IMM)) >> 32 })
 
+#define BPF_PSEUDO_MAP_FD	1
+
+/* pseudo BPF_LD_IMM64 insn used to refer to process-local map_fd */
+#define BPF_LD_MAP_FD(DST, MAP_FD)				\
+	BPF_LD_IMM64_RAW(DST, BPF_PSEUDO_MAP_FD, MAP_FD)
+
 /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */
 
 #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM)			\
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 871edc1f2e1f..7227543e474b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -125,10 +125,15 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
+#define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
+
 /* single container for all structs
  * one verifier_env per bpf_check() call
  */
 struct verifier_env {
+	struct bpf_prog *prog;		/* eBPF program being verified */
+	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
+	u32 used_map_cnt;		/* number of used maps */
 };
 
 /* verbose verifier prints what it's seeing
@@ -300,6 +305,115 @@ static void print_bpf_insn(struct bpf_insn *insn)
 	}
 }
 
+/* return the map pointer stored inside BPF_LD_IMM64 instruction */
+static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
+{
+	u64 imm64 = ((u64) (u32) insn[0].imm) | ((u64) (u32) insn[1].imm) << 32;
+
+	return (struct bpf_map *) (unsigned long) imm64;
+}
+
+/* look for pseudo eBPF instructions that access map FDs and
+ * replace them with actual map pointers
+ */
+static int replace_map_fd_with_map_ptr(struct verifier_env *env)
+{
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	int i, j;
+
+	for (i = 0; i < insn_cnt; i++, insn++) {
+		if (insn[0].code == (BPF_LD | BPF_IMM | BPF_DW)) {
+			struct bpf_map *map;
+			struct fd f;
+
+			if (i == insn_cnt - 1 || insn[1].code != 0 ||
+			    insn[1].dst_reg != 0 || insn[1].src_reg != 0 ||
+			    insn[1].off != 0) {
+				verbose("invalid bpf_ld_imm64 insn\n");
+				return -EINVAL;
+			}
+
+			if (insn->src_reg == 0)
+				/* valid generic load 64-bit imm */
+				goto next_insn;
+
+			if (insn->src_reg != BPF_PSEUDO_MAP_FD) {
+				verbose("unrecognized bpf_ld_imm64 insn\n");
+				return -EINVAL;
+			}
+
+			f = fdget(insn->imm);
+
+			map = bpf_map_get(f);
+			if (IS_ERR(map)) {
+				verbose("fd %d is not pointing to valid bpf_map\n",
+					insn->imm);
+				fdput(f);
+				return PTR_ERR(map);
+			}
+
+			/* store map pointer inside BPF_LD_IMM64 instruction */
+			insn[0].imm = (u32) (unsigned long) map;
+			insn[1].imm = ((u64) (unsigned long) map) >> 32;
+
+			/* check whether we recorded this map already */
+			for (j = 0; j < env->used_map_cnt; j++)
+				if (env->used_maps[j] == map) {
+					fdput(f);
+					goto next_insn;
+				}
+
+			if (env->used_map_cnt >= MAX_USED_MAPS) {
+				fdput(f);
+				return -E2BIG;
+			}
+
+			/* remember this map */
+			env->used_maps[env->used_map_cnt++] = map;
+
+			/* hold the map. If the program is rejected by verifier,
+			 * the map will be released by release_maps() or it
+			 * will be used by the valid program until it's unloaded
+			 * and all maps are released in free_bpf_prog_info()
+			 */
+			atomic_inc(&map->refcnt);
+
+			fdput(f);
+next_insn:
+			insn++;
+			i++;
+		}
+	}
+
+	/* now all pseudo BPF_LD_IMM64 instructions load valid
+	 * 'struct bpf_map *' into a register instead of user map_fd.
+	 * These pointers will be used later by verifier to validate map access.
+	 */
+	return 0;
+}
+
+/* drop refcnt of maps used by the rejected program */
+static void release_maps(struct verifier_env *env)
+{
+	int i;
+
+	for (i = 0; i < env->used_map_cnt; i++)
+		bpf_map_put(env->used_maps[i]);
+}
+
+/* convert pseudo BPF_LD_IMM64 into generic BPF_LD_IMM64 */
+static void convert_pseudo_ld_imm64(struct verifier_env *env)
+{
+	struct bpf_insn *insn = env->prog->insnsi;
+	int insn_cnt = env->prog->len;
+	int i;
+
+	for (i = 0; i < insn_cnt; i++, insn++)
+		if (insn->code == (BPF_LD | BPF_IMM | BPF_DW))
+			insn->src_reg = 0;
+}
+
 int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 {
 	char __user *log_ubuf = NULL;
@@ -316,6 +430,8 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (!env)
 		return -ENOMEM;
 
+	env->prog = prog;
+
 	/* grab the mutex to protect few globals used by verifier */
 	mutex_lock(&bpf_verifier_lock);
 
@@ -342,8 +458,14 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 		log_level = 0;
 	}
 
+	ret = replace_map_fd_with_map_ptr(env);
+	if (ret < 0)
+		goto skip_full_check;
+
 	/* ret = do_check(env); */
 
+skip_full_check:
+
 	if (log_level && log_len >= log_size - 1) {
 		BUG_ON(log_len >= log_size);
 		/* verifier log exceeded user supplied buffer */
@@ -357,11 +479,36 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 		goto free_log_buf;
 	}
 
+	if (ret == 0 && env->used_map_cnt) {
+		/* if program passed verifier, update used_maps in bpf_prog_info */
+		prog->aux->used_maps = kmalloc_array(env->used_map_cnt,
+						     sizeof(env->used_maps[0]),
+						     GFP_KERNEL);
+
+		if (!prog->aux->used_maps) {
+			ret = -ENOMEM;
+			goto free_log_buf;
+		}
+
+		memcpy(prog->aux->used_maps, env->used_maps,
+		       sizeof(env->used_maps[0]) * env->used_map_cnt);
+		prog->aux->used_map_cnt = env->used_map_cnt;
+
+		/* program is valid. Convert pseudo bpf_ld_imm64 into generic
+		 * bpf_ld_imm64 instructions
+		 */
+		convert_pseudo_ld_imm64(env);
+	}
 
 free_log_buf:
 	if (log_level)
 		vfree(log_buf);
 free_env:
+	if (!prog->aux->used_maps)
+		/* if we didn't copy map pointers into bpf_prog_info, release
+		 * them now. Otherwise free_bpf_prog_info() will release them.
+		 */
+		release_maps(env);
 	kfree(env);
 	mutex_unlock(&bpf_verifier_lock);
 	return ret;
-- 
cgit v1.2.3


From 17a5267067f3c372fec9ffb798d6eaba6b5e6a4c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Fri, 26 Sep 2014 00:17:06 -0700
Subject: bpf: verifier (add verifier core)

This patch adds verifier core which simulates execution of every insn and
records the state of registers and program stack. Every branch instruction seen
during simulation is pushed into state stack. When verifier reaches BPF_EXIT,
it pops the state from the stack and continues until it reaches BPF_EXIT again.
For program:
1: bpf_mov r1, xxx
2: if (r1 == 0) goto 5
3: bpf_mov r0, 1
4: goto 6
5: bpf_mov r0, 2
6: bpf_exit
The verifier will walk insns: 1, 2, 3, 4, 6
then it will pop the state recorded at insn#2 and will continue: 5, 6

This way it walks all possible paths through the program and checks all
possible values of registers. While doing so, it checks for:
- invalid instructions
- uninitialized register access
- uninitialized stack access
- misaligned stack access
- out of range stack access
- invalid calling convention
- instruction encoding is not using reserved fields

Kernel subsystem configures the verifier with two callbacks:

- bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
  that provides information to the verifer which fields of 'ctx'
  are accessible (remember 'ctx' is the first argument to eBPF program)

- const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
  returns argument constraints of kernel helper functions that eBPF program
  may call, so that verifier can checks that R1-R5 types match the prototype

More details in Documentation/networking/filter.txt and in kernel/bpf/verifier.c

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h   |   47 +++
 kernel/bpf/verifier.c | 1075 ++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 1121 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 9dfeb36f8971..3cf91754a957 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -46,6 +46,31 @@ void bpf_register_map_type(struct bpf_map_type_list *tl);
 void bpf_map_put(struct bpf_map *map);
 struct bpf_map *bpf_map_get(struct fd f);
 
+/* function argument constraints */
+enum bpf_arg_type {
+	ARG_ANYTHING = 0,	/* any argument is ok */
+
+	/* the following constraints used to prototype
+	 * bpf_map_lookup/update/delete_elem() functions
+	 */
+	ARG_CONST_MAP_PTR,	/* const argument used as pointer to bpf_map */
+	ARG_PTR_TO_MAP_KEY,	/* pointer to stack used as map key */
+	ARG_PTR_TO_MAP_VALUE,	/* pointer to stack used as map value */
+
+	/* the following constraints used to prototype bpf_memcmp() and other
+	 * functions that access data on eBPF program stack
+	 */
+	ARG_PTR_TO_STACK,	/* any pointer to eBPF program stack */
+	ARG_CONST_STACK_SIZE,	/* number of bytes accessed from stack */
+};
+
+/* type of values returned from helper functions */
+enum bpf_return_type {
+	RET_INTEGER,			/* function returns integer */
+	RET_VOID,			/* function doesn't return anything */
+	RET_PTR_TO_MAP_VALUE_OR_NULL,	/* returns a pointer to map elem value or NULL */
+};
+
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
  * to in-kernel helper functions and for adjusting imm32 field in BPF_CALL
  * instructions after verifying
@@ -53,11 +78,33 @@ struct bpf_map *bpf_map_get(struct fd f);
 struct bpf_func_proto {
 	u64 (*func)(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 	bool gpl_only;
+	enum bpf_return_type ret_type;
+	enum bpf_arg_type arg1_type;
+	enum bpf_arg_type arg2_type;
+	enum bpf_arg_type arg3_type;
+	enum bpf_arg_type arg4_type;
+	enum bpf_arg_type arg5_type;
+};
+
+/* bpf_context is intentionally undefined structure. Pointer to bpf_context is
+ * the first argument to eBPF programs.
+ * For socket filters: 'struct bpf_context *' == 'struct sk_buff *'
+ */
+struct bpf_context;
+
+enum bpf_access_type {
+	BPF_READ = 1,
+	BPF_WRITE = 2
 };
 
 struct bpf_verifier_ops {
 	/* return eBPF function prototype for verification */
 	const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id);
+
+	/* return true if 'size' wide access at offset 'off' within bpf_context
+	 * with 'type' (read or write) is allowed
+	 */
+	bool (*is_valid_access)(int off, int size, enum bpf_access_type type);
 };
 
 struct bpf_prog_type_list {
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index c689ab8e2713..a086dd3210a8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -125,6 +125,70 @@
  * are set to NOT_INIT to indicate that they are no longer readable.
  */
 
+/* types of values stored in eBPF registers */
+enum bpf_reg_type {
+	NOT_INIT = 0,		 /* nothing was written into register */
+	UNKNOWN_VALUE,		 /* reg doesn't contain a valid pointer */
+	PTR_TO_CTX,		 /* reg points to bpf_context */
+	CONST_PTR_TO_MAP,	 /* reg points to struct bpf_map */
+	PTR_TO_MAP_VALUE,	 /* reg points to map element value */
+	PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */
+	FRAME_PTR,		 /* reg == frame_pointer */
+	PTR_TO_STACK,		 /* reg == frame_pointer + imm */
+	CONST_IMM,		 /* constant integer value */
+};
+
+struct reg_state {
+	enum bpf_reg_type type;
+	union {
+		/* valid when type == CONST_IMM | PTR_TO_STACK */
+		int imm;
+
+		/* valid when type == CONST_PTR_TO_MAP | PTR_TO_MAP_VALUE |
+		 *   PTR_TO_MAP_VALUE_OR_NULL
+		 */
+		struct bpf_map *map_ptr;
+	};
+};
+
+enum bpf_stack_slot_type {
+	STACK_INVALID,    /* nothing was stored in this stack slot */
+	STACK_SPILL,      /* 1st byte of register spilled into stack */
+	STACK_SPILL_PART, /* other 7 bytes of register spill */
+	STACK_MISC	  /* BPF program wrote some data into this slot */
+};
+
+struct bpf_stack_slot {
+	enum bpf_stack_slot_type stype;
+	struct reg_state reg_st;
+};
+
+/* state of the program:
+ * type of all registers and stack info
+ */
+struct verifier_state {
+	struct reg_state regs[MAX_BPF_REG];
+	struct bpf_stack_slot stack[MAX_BPF_STACK];
+};
+
+/* linked list of verifier states used to prune search */
+struct verifier_state_list {
+	struct verifier_state state;
+	struct verifier_state_list *next;
+};
+
+/* verifier_state + insn_idx are pushed to stack when branch is encountered */
+struct verifier_stack_elem {
+	/* verifer state is 'st'
+	 * before processing instruction 'insn_idx'
+	 * and after processing instruction 'prev_insn_idx'
+	 */
+	struct verifier_state st;
+	int insn_idx;
+	int prev_insn_idx;
+	struct verifier_stack_elem *next;
+};
+
 #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */
 
 /* single container for all structs
@@ -132,6 +196,9 @@
  */
 struct verifier_env {
 	struct bpf_prog *prog;		/* eBPF program being verified */
+	struct verifier_stack_elem *head; /* stack of verifier states to be processed */
+	int stack_size;			/* number of states to be processed */
+	struct verifier_state cur_state; /* current verifier state */
 	struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
 	u32 used_map_cnt;		/* number of used maps */
 };
@@ -160,6 +227,45 @@ static void verbose(const char *fmt, ...)
 	va_end(args);
 }
 
+/* string representation of 'enum bpf_reg_type' */
+static const char * const reg_type_str[] = {
+	[NOT_INIT]		= "?",
+	[UNKNOWN_VALUE]		= "inv",
+	[PTR_TO_CTX]		= "ctx",
+	[CONST_PTR_TO_MAP]	= "map_ptr",
+	[PTR_TO_MAP_VALUE]	= "map_value",
+	[PTR_TO_MAP_VALUE_OR_NULL] = "map_value_or_null",
+	[FRAME_PTR]		= "fp",
+	[PTR_TO_STACK]		= "fp",
+	[CONST_IMM]		= "imm",
+};
+
+static void print_verifier_state(struct verifier_env *env)
+{
+	enum bpf_reg_type t;
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		t = env->cur_state.regs[i].type;
+		if (t == NOT_INIT)
+			continue;
+		verbose(" R%d=%s", i, reg_type_str[t]);
+		if (t == CONST_IMM || t == PTR_TO_STACK)
+			verbose("%d", env->cur_state.regs[i].imm);
+		else if (t == CONST_PTR_TO_MAP || t == PTR_TO_MAP_VALUE ||
+			 t == PTR_TO_MAP_VALUE_OR_NULL)
+			verbose("(ks=%d,vs=%d)",
+				env->cur_state.regs[i].map_ptr->key_size,
+				env->cur_state.regs[i].map_ptr->value_size);
+	}
+	for (i = 0; i < MAX_BPF_STACK; i++) {
+		if (env->cur_state.stack[i].stype == STACK_SPILL)
+			verbose(" fp%d=%s", -MAX_BPF_STACK + i,
+				reg_type_str[env->cur_state.stack[i].reg_st.type]);
+	}
+	verbose("\n");
+}
+
 static const char *const bpf_class_string[] = {
 	[BPF_LD]    = "ld",
 	[BPF_LDX]   = "ldx",
@@ -305,6 +411,735 @@ static void print_bpf_insn(struct bpf_insn *insn)
 	}
 }
 
+static int pop_stack(struct verifier_env *env, int *prev_insn_idx)
+{
+	struct verifier_stack_elem *elem;
+	int insn_idx;
+
+	if (env->head == NULL)
+		return -1;
+
+	memcpy(&env->cur_state, &env->head->st, sizeof(env->cur_state));
+	insn_idx = env->head->insn_idx;
+	if (prev_insn_idx)
+		*prev_insn_idx = env->head->prev_insn_idx;
+	elem = env->head->next;
+	kfree(env->head);
+	env->head = elem;
+	env->stack_size--;
+	return insn_idx;
+}
+
+static struct verifier_state *push_stack(struct verifier_env *env, int insn_idx,
+					 int prev_insn_idx)
+{
+	struct verifier_stack_elem *elem;
+
+	elem = kmalloc(sizeof(struct verifier_stack_elem), GFP_KERNEL);
+	if (!elem)
+		goto err;
+
+	memcpy(&elem->st, &env->cur_state, sizeof(env->cur_state));
+	elem->insn_idx = insn_idx;
+	elem->prev_insn_idx = prev_insn_idx;
+	elem->next = env->head;
+	env->head = elem;
+	env->stack_size++;
+	if (env->stack_size > 1024) {
+		verbose("BPF program is too complex\n");
+		goto err;
+	}
+	return &elem->st;
+err:
+	/* pop all elements and return */
+	while (pop_stack(env, NULL) >= 0);
+	return NULL;
+}
+
+#define CALLER_SAVED_REGS 6
+static const int caller_saved[CALLER_SAVED_REGS] = {
+	BPF_REG_0, BPF_REG_1, BPF_REG_2, BPF_REG_3, BPF_REG_4, BPF_REG_5
+};
+
+static void init_reg_state(struct reg_state *regs)
+{
+	int i;
+
+	for (i = 0; i < MAX_BPF_REG; i++) {
+		regs[i].type = NOT_INIT;
+		regs[i].imm = 0;
+		regs[i].map_ptr = NULL;
+	}
+
+	/* frame pointer */
+	regs[BPF_REG_FP].type = FRAME_PTR;
+
+	/* 1st arg to a function */
+	regs[BPF_REG_1].type = PTR_TO_CTX;
+}
+
+static void mark_reg_unknown_value(struct reg_state *regs, u32 regno)
+{
+	BUG_ON(regno >= MAX_BPF_REG);
+	regs[regno].type = UNKNOWN_VALUE;
+	regs[regno].imm = 0;
+	regs[regno].map_ptr = NULL;
+}
+
+enum reg_arg_type {
+	SRC_OP,		/* register is used as source operand */
+	DST_OP,		/* register is used as destination operand */
+	DST_OP_NO_MARK	/* same as above, check only, don't mark */
+};
+
+static int check_reg_arg(struct reg_state *regs, u32 regno,
+			 enum reg_arg_type t)
+{
+	if (regno >= MAX_BPF_REG) {
+		verbose("R%d is invalid\n", regno);
+		return -EINVAL;
+	}
+
+	if (t == SRC_OP) {
+		/* check whether register used as source operand can be read */
+		if (regs[regno].type == NOT_INIT) {
+			verbose("R%d !read_ok\n", regno);
+			return -EACCES;
+		}
+	} else {
+		/* check whether register used as dest operand can be written to */
+		if (regno == BPF_REG_FP) {
+			verbose("frame pointer is read only\n");
+			return -EACCES;
+		}
+		if (t == DST_OP)
+			mark_reg_unknown_value(regs, regno);
+	}
+	return 0;
+}
+
+static int bpf_size_to_bytes(int bpf_size)
+{
+	if (bpf_size == BPF_W)
+		return 4;
+	else if (bpf_size == BPF_H)
+		return 2;
+	else if (bpf_size == BPF_B)
+		return 1;
+	else if (bpf_size == BPF_DW)
+		return 8;
+	else
+		return -EINVAL;
+}
+
+/* check_stack_read/write functions track spill/fill of registers,
+ * stack boundary and alignment are checked in check_mem_access()
+ */
+static int check_stack_write(struct verifier_state *state, int off, int size,
+			     int value_regno)
+{
+	struct bpf_stack_slot *slot;
+	int i;
+
+	if (value_regno >= 0 &&
+	    (state->regs[value_regno].type == PTR_TO_MAP_VALUE ||
+	     state->regs[value_regno].type == PTR_TO_STACK ||
+	     state->regs[value_regno].type == PTR_TO_CTX)) {
+
+		/* register containing pointer is being spilled into stack */
+		if (size != 8) {
+			verbose("invalid size of register spill\n");
+			return -EACCES;
+		}
+
+		slot = &state->stack[MAX_BPF_STACK + off];
+		slot->stype = STACK_SPILL;
+		/* save register state */
+		slot->reg_st = state->regs[value_regno];
+		for (i = 1; i < 8; i++) {
+			slot = &state->stack[MAX_BPF_STACK + off + i];
+			slot->stype = STACK_SPILL_PART;
+			slot->reg_st.type = UNKNOWN_VALUE;
+			slot->reg_st.map_ptr = NULL;
+		}
+	} else {
+
+		/* regular write of data into stack */
+		for (i = 0; i < size; i++) {
+			slot = &state->stack[MAX_BPF_STACK + off + i];
+			slot->stype = STACK_MISC;
+			slot->reg_st.type = UNKNOWN_VALUE;
+			slot->reg_st.map_ptr = NULL;
+		}
+	}
+	return 0;
+}
+
+static int check_stack_read(struct verifier_state *state, int off, int size,
+			    int value_regno)
+{
+	int i;
+	struct bpf_stack_slot *slot;
+
+	slot = &state->stack[MAX_BPF_STACK + off];
+
+	if (slot->stype == STACK_SPILL) {
+		if (size != 8) {
+			verbose("invalid size of register spill\n");
+			return -EACCES;
+		}
+		for (i = 1; i < 8; i++) {
+			if (state->stack[MAX_BPF_STACK + off + i].stype !=
+			    STACK_SPILL_PART) {
+				verbose("corrupted spill memory\n");
+				return -EACCES;
+			}
+		}
+
+		if (value_regno >= 0)
+			/* restore register state from stack */
+			state->regs[value_regno] = slot->reg_st;
+		return 0;
+	} else {
+		for (i = 0; i < size; i++) {
+			if (state->stack[MAX_BPF_STACK + off + i].stype !=
+			    STACK_MISC) {
+				verbose("invalid read from stack off %d+%d size %d\n",
+					off, i, size);
+				return -EACCES;
+			}
+		}
+		if (value_regno >= 0)
+			/* have read misc data from the stack */
+			mark_reg_unknown_value(state->regs, value_regno);
+		return 0;
+	}
+}
+
+/* check read/write into map element returned by bpf_map_lookup_elem() */
+static int check_map_access(struct verifier_env *env, u32 regno, int off,
+			    int size)
+{
+	struct bpf_map *map = env->cur_state.regs[regno].map_ptr;
+
+	if (off < 0 || off + size > map->value_size) {
+		verbose("invalid access to map value, value_size=%d off=%d size=%d\n",
+			map->value_size, off, size);
+		return -EACCES;
+	}
+	return 0;
+}
+
+/* check access to 'struct bpf_context' fields */
+static int check_ctx_access(struct verifier_env *env, int off, int size,
+			    enum bpf_access_type t)
+{
+	if (env->prog->aux->ops->is_valid_access &&
+	    env->prog->aux->ops->is_valid_access(off, size, t))
+		return 0;
+
+	verbose("invalid bpf_context access off=%d size=%d\n", off, size);
+	return -EACCES;
+}
+
+/* check whether memory at (regno + off) is accessible for t = (read | write)
+ * if t==write, value_regno is a register which value is stored into memory
+ * if t==read, value_regno is a register which will receive the value from memory
+ * if t==write && value_regno==-1, some unknown value is stored into memory
+ * if t==read && value_regno==-1, don't care what we read from memory
+ */
+static int check_mem_access(struct verifier_env *env, u32 regno, int off,
+			    int bpf_size, enum bpf_access_type t,
+			    int value_regno)
+{
+	struct verifier_state *state = &env->cur_state;
+	int size, err = 0;
+
+	size = bpf_size_to_bytes(bpf_size);
+	if (size < 0)
+		return size;
+
+	if (off % size != 0) {
+		verbose("misaligned access off %d size %d\n", off, size);
+		return -EACCES;
+	}
+
+	if (state->regs[regno].type == PTR_TO_MAP_VALUE) {
+		err = check_map_access(env, regno, off, size);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown_value(state->regs, value_regno);
+
+	} else if (state->regs[regno].type == PTR_TO_CTX) {
+		err = check_ctx_access(env, off, size, t);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown_value(state->regs, value_regno);
+
+	} else if (state->regs[regno].type == FRAME_PTR) {
+		if (off >= 0 || off < -MAX_BPF_STACK) {
+			verbose("invalid stack off=%d size=%d\n", off, size);
+			return -EACCES;
+		}
+		if (t == BPF_WRITE)
+			err = check_stack_write(state, off, size, value_regno);
+		else
+			err = check_stack_read(state, off, size, value_regno);
+	} else {
+		verbose("R%d invalid mem access '%s'\n",
+			regno, reg_type_str[state->regs[regno].type]);
+		return -EACCES;
+	}
+	return err;
+}
+
+static int check_xadd(struct verifier_env *env, struct bpf_insn *insn)
+{
+	struct reg_state *regs = env->cur_state.regs;
+	int err;
+
+	if ((BPF_SIZE(insn->code) != BPF_W && BPF_SIZE(insn->code) != BPF_DW) ||
+	    insn->imm != 0) {
+		verbose("BPF_XADD uses reserved fields\n");
+		return -EINVAL;
+	}
+
+	/* check src1 operand */
+	err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+	if (err)
+		return err;
+
+	/* check src2 operand */
+	err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+	if (err)
+		return err;
+
+	/* check whether atomic_add can read the memory */
+	err = check_mem_access(env, insn->dst_reg, insn->off,
+			       BPF_SIZE(insn->code), BPF_READ, -1);
+	if (err)
+		return err;
+
+	/* check whether atomic_add can write into the same memory */
+	return check_mem_access(env, insn->dst_reg, insn->off,
+				BPF_SIZE(insn->code), BPF_WRITE, -1);
+}
+
+/* when register 'regno' is passed into function that will read 'access_size'
+ * bytes from that pointer, make sure that it's within stack boundary
+ * and all elements of stack are initialized
+ */
+static int check_stack_boundary(struct verifier_env *env,
+				int regno, int access_size)
+{
+	struct verifier_state *state = &env->cur_state;
+	struct reg_state *regs = state->regs;
+	int off, i;
+
+	if (regs[regno].type != PTR_TO_STACK)
+		return -EACCES;
+
+	off = regs[regno].imm;
+	if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 ||
+	    access_size <= 0) {
+		verbose("invalid stack type R%d off=%d access_size=%d\n",
+			regno, off, access_size);
+		return -EACCES;
+	}
+
+	for (i = 0; i < access_size; i++) {
+		if (state->stack[MAX_BPF_STACK + off + i].stype != STACK_MISC) {
+			verbose("invalid indirect read from stack off %d+%d size %d\n",
+				off, i, access_size);
+			return -EACCES;
+		}
+	}
+	return 0;
+}
+
+static int check_func_arg(struct verifier_env *env, u32 regno,
+			  enum bpf_arg_type arg_type, struct bpf_map **mapp)
+{
+	struct reg_state *reg = env->cur_state.regs + regno;
+	enum bpf_reg_type expected_type;
+	int err = 0;
+
+	if (arg_type == ARG_ANYTHING)
+		return 0;
+
+	if (reg->type == NOT_INIT) {
+		verbose("R%d !read_ok\n", regno);
+		return -EACCES;
+	}
+
+	if (arg_type == ARG_PTR_TO_STACK || arg_type == ARG_PTR_TO_MAP_KEY ||
+	    arg_type == ARG_PTR_TO_MAP_VALUE) {
+		expected_type = PTR_TO_STACK;
+	} else if (arg_type == ARG_CONST_STACK_SIZE) {
+		expected_type = CONST_IMM;
+	} else if (arg_type == ARG_CONST_MAP_PTR) {
+		expected_type = CONST_PTR_TO_MAP;
+	} else {
+		verbose("unsupported arg_type %d\n", arg_type);
+		return -EFAULT;
+	}
+
+	if (reg->type != expected_type) {
+		verbose("R%d type=%s expected=%s\n", regno,
+			reg_type_str[reg->type], reg_type_str[expected_type]);
+		return -EACCES;
+	}
+
+	if (arg_type == ARG_CONST_MAP_PTR) {
+		/* bpf_map_xxx(map_ptr) call: remember that map_ptr */
+		*mapp = reg->map_ptr;
+
+	} else if (arg_type == ARG_PTR_TO_MAP_KEY) {
+		/* bpf_map_xxx(..., map_ptr, ..., key) call:
+		 * check that [key, key + map->key_size) are within
+		 * stack limits and initialized
+		 */
+		if (!*mapp) {
+			/* in function declaration map_ptr must come before
+			 * map_key, so that it's verified and known before
+			 * we have to check map_key here. Otherwise it means
+			 * that kernel subsystem misconfigured verifier
+			 */
+			verbose("invalid map_ptr to access map->key\n");
+			return -EACCES;
+		}
+		err = check_stack_boundary(env, regno, (*mapp)->key_size);
+
+	} else if (arg_type == ARG_PTR_TO_MAP_VALUE) {
+		/* bpf_map_xxx(..., map_ptr, ..., value) call:
+		 * check [value, value + map->value_size) validity
+		 */
+		if (!*mapp) {
+			/* kernel subsystem misconfigured verifier */
+			verbose("invalid map_ptr to access map->value\n");
+			return -EACCES;
+		}
+		err = check_stack_boundary(env, regno, (*mapp)->value_size);
+
+	} else if (arg_type == ARG_CONST_STACK_SIZE) {
+		/* bpf_xxx(..., buf, len) call will access 'len' bytes
+		 * from stack pointer 'buf'. Check it
+		 * note: regno == len, regno - 1 == buf
+		 */
+		if (regno == 0) {
+			/* kernel subsystem misconfigured verifier */
+			verbose("ARG_CONST_STACK_SIZE cannot be first argument\n");
+			return -EACCES;
+		}
+		err = check_stack_boundary(env, regno - 1, reg->imm);
+	}
+
+	return err;
+}
+
+static int check_call(struct verifier_env *env, int func_id)
+{
+	struct verifier_state *state = &env->cur_state;
+	const struct bpf_func_proto *fn = NULL;
+	struct reg_state *regs = state->regs;
+	struct bpf_map *map = NULL;
+	struct reg_state *reg;
+	int i, err;
+
+	/* find function prototype */
+	if (func_id < 0 || func_id >= __BPF_FUNC_MAX_ID) {
+		verbose("invalid func %d\n", func_id);
+		return -EINVAL;
+	}
+
+	if (env->prog->aux->ops->get_func_proto)
+		fn = env->prog->aux->ops->get_func_proto(func_id);
+
+	if (!fn) {
+		verbose("unknown func %d\n", func_id);
+		return -EINVAL;
+	}
+
+	/* eBPF programs must be GPL compatible to use GPL-ed functions */
+	if (!env->prog->aux->is_gpl_compatible && fn->gpl_only) {
+		verbose("cannot call GPL only function from proprietary program\n");
+		return -EINVAL;
+	}
+
+	/* check args */
+	err = check_func_arg(env, BPF_REG_1, fn->arg1_type, &map);
+	if (err)
+		return err;
+	err = check_func_arg(env, BPF_REG_2, fn->arg2_type, &map);
+	if (err)
+		return err;
+	err = check_func_arg(env, BPF_REG_3, fn->arg3_type, &map);
+	if (err)
+		return err;
+	err = check_func_arg(env, BPF_REG_4, fn->arg4_type, &map);
+	if (err)
+		return err;
+	err = check_func_arg(env, BPF_REG_5, fn->arg5_type, &map);
+	if (err)
+		return err;
+
+	/* reset caller saved regs */
+	for (i = 0; i < CALLER_SAVED_REGS; i++) {
+		reg = regs + caller_saved[i];
+		reg->type = NOT_INIT;
+		reg->imm = 0;
+	}
+
+	/* update return register */
+	if (fn->ret_type == RET_INTEGER) {
+		regs[BPF_REG_0].type = UNKNOWN_VALUE;
+	} else if (fn->ret_type == RET_VOID) {
+		regs[BPF_REG_0].type = NOT_INIT;
+	} else if (fn->ret_type == RET_PTR_TO_MAP_VALUE_OR_NULL) {
+		regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL;
+		/* remember map_ptr, so that check_map_access()
+		 * can check 'value_size' boundary of memory access
+		 * to map element returned from bpf_map_lookup_elem()
+		 */
+		if (map == NULL) {
+			verbose("kernel subsystem misconfigured verifier\n");
+			return -EINVAL;
+		}
+		regs[BPF_REG_0].map_ptr = map;
+	} else {
+		verbose("unknown return type %d of func %d\n",
+			fn->ret_type, func_id);
+		return -EINVAL;
+	}
+	return 0;
+}
+
+/* check validity of 32-bit and 64-bit arithmetic operations */
+static int check_alu_op(struct reg_state *regs, struct bpf_insn *insn)
+{
+	u8 opcode = BPF_OP(insn->code);
+	int err;
+
+	if (opcode == BPF_END || opcode == BPF_NEG) {
+		if (opcode == BPF_NEG) {
+			if (BPF_SRC(insn->code) != 0 ||
+			    insn->src_reg != BPF_REG_0 ||
+			    insn->off != 0 || insn->imm != 0) {
+				verbose("BPF_NEG uses reserved fields\n");
+				return -EINVAL;
+			}
+		} else {
+			if (insn->src_reg != BPF_REG_0 || insn->off != 0 ||
+			    (insn->imm != 16 && insn->imm != 32 && insn->imm != 64)) {
+				verbose("BPF_END uses reserved fields\n");
+				return -EINVAL;
+			}
+		}
+
+		/* check src operand */
+		err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+		if (err)
+			return err;
+
+		/* check dest operand */
+		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+		if (err)
+			return err;
+
+	} else if (opcode == BPF_MOV) {
+
+		if (BPF_SRC(insn->code) == BPF_X) {
+			if (insn->imm != 0 || insn->off != 0) {
+				verbose("BPF_MOV uses reserved fields\n");
+				return -EINVAL;
+			}
+
+			/* check src operand */
+			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			if (err)
+				return err;
+		} else {
+			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
+				verbose("BPF_MOV uses reserved fields\n");
+				return -EINVAL;
+			}
+		}
+
+		/* check dest operand */
+		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+		if (err)
+			return err;
+
+		if (BPF_SRC(insn->code) == BPF_X) {
+			if (BPF_CLASS(insn->code) == BPF_ALU64) {
+				/* case: R1 = R2
+				 * copy register state to dest reg
+				 */
+				regs[insn->dst_reg] = regs[insn->src_reg];
+			} else {
+				regs[insn->dst_reg].type = UNKNOWN_VALUE;
+				regs[insn->dst_reg].map_ptr = NULL;
+			}
+		} else {
+			/* case: R = imm
+			 * remember the value we stored into this reg
+			 */
+			regs[insn->dst_reg].type = CONST_IMM;
+			regs[insn->dst_reg].imm = insn->imm;
+		}
+
+	} else if (opcode > BPF_END) {
+		verbose("invalid BPF_ALU opcode %x\n", opcode);
+		return -EINVAL;
+
+	} else {	/* all other ALU ops: and, sub, xor, add, ... */
+
+		bool stack_relative = false;
+
+		if (BPF_SRC(insn->code) == BPF_X) {
+			if (insn->imm != 0 || insn->off != 0) {
+				verbose("BPF_ALU uses reserved fields\n");
+				return -EINVAL;
+			}
+			/* check src1 operand */
+			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			if (err)
+				return err;
+		} else {
+			if (insn->src_reg != BPF_REG_0 || insn->off != 0) {
+				verbose("BPF_ALU uses reserved fields\n");
+				return -EINVAL;
+			}
+		}
+
+		/* check src2 operand */
+		err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+		if (err)
+			return err;
+
+		if ((opcode == BPF_MOD || opcode == BPF_DIV) &&
+		    BPF_SRC(insn->code) == BPF_K && insn->imm == 0) {
+			verbose("div by zero\n");
+			return -EINVAL;
+		}
+
+		/* pattern match 'bpf_add Rx, imm' instruction */
+		if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 &&
+		    regs[insn->dst_reg].type == FRAME_PTR &&
+		    BPF_SRC(insn->code) == BPF_K)
+			stack_relative = true;
+
+		/* check dest operand */
+		err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+		if (err)
+			return err;
+
+		if (stack_relative) {
+			regs[insn->dst_reg].type = PTR_TO_STACK;
+			regs[insn->dst_reg].imm = insn->imm;
+		}
+	}
+
+	return 0;
+}
+
+static int check_cond_jmp_op(struct verifier_env *env,
+			     struct bpf_insn *insn, int *insn_idx)
+{
+	struct reg_state *regs = env->cur_state.regs;
+	struct verifier_state *other_branch;
+	u8 opcode = BPF_OP(insn->code);
+	int err;
+
+	if (opcode > BPF_EXIT) {
+		verbose("invalid BPF_JMP opcode %x\n", opcode);
+		return -EINVAL;
+	}
+
+	if (BPF_SRC(insn->code) == BPF_X) {
+		if (insn->imm != 0) {
+			verbose("BPF_JMP uses reserved fields\n");
+			return -EINVAL;
+		}
+
+		/* check src1 operand */
+		err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+		if (err)
+			return err;
+	} else {
+		if (insn->src_reg != BPF_REG_0) {
+			verbose("BPF_JMP uses reserved fields\n");
+			return -EINVAL;
+		}
+	}
+
+	/* check src2 operand */
+	err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+	if (err)
+		return err;
+
+	/* detect if R == 0 where R was initialized to zero earlier */
+	if (BPF_SRC(insn->code) == BPF_K &&
+	    (opcode == BPF_JEQ || opcode == BPF_JNE) &&
+	    regs[insn->dst_reg].type == CONST_IMM &&
+	    regs[insn->dst_reg].imm == insn->imm) {
+		if (opcode == BPF_JEQ) {
+			/* if (imm == imm) goto pc+off;
+			 * only follow the goto, ignore fall-through
+			 */
+			*insn_idx += insn->off;
+			return 0;
+		} else {
+			/* if (imm != imm) goto pc+off;
+			 * only follow fall-through branch, since
+			 * that's where the program will go
+			 */
+			return 0;
+		}
+	}
+
+	other_branch = push_stack(env, *insn_idx + insn->off + 1, *insn_idx);
+	if (!other_branch)
+		return -EFAULT;
+
+	/* detect if R == 0 where R is returned value from bpf_map_lookup_elem() */
+	if (BPF_SRC(insn->code) == BPF_K &&
+	    insn->imm == 0 && (opcode == BPF_JEQ ||
+			       opcode == BPF_JNE) &&
+	    regs[insn->dst_reg].type == PTR_TO_MAP_VALUE_OR_NULL) {
+		if (opcode == BPF_JEQ) {
+			/* next fallthrough insn can access memory via
+			 * this register
+			 */
+			regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+			/* branch targer cannot access it, since reg == 0 */
+			other_branch->regs[insn->dst_reg].type = CONST_IMM;
+			other_branch->regs[insn->dst_reg].imm = 0;
+		} else {
+			other_branch->regs[insn->dst_reg].type = PTR_TO_MAP_VALUE;
+			regs[insn->dst_reg].type = CONST_IMM;
+			regs[insn->dst_reg].imm = 0;
+		}
+	} else if (BPF_SRC(insn->code) == BPF_K &&
+		   (opcode == BPF_JEQ || opcode == BPF_JNE)) {
+
+		if (opcode == BPF_JEQ) {
+			/* detect if (R == imm) goto
+			 * and in the target state recognize that R = imm
+			 */
+			other_branch->regs[insn->dst_reg].type = CONST_IMM;
+			other_branch->regs[insn->dst_reg].imm = insn->imm;
+		} else {
+			/* detect if (R != imm) goto
+			 * and in the fall-through state recognize that R = imm
+			 */
+			regs[insn->dst_reg].type = CONST_IMM;
+			regs[insn->dst_reg].imm = insn->imm;
+		}
+	}
+	if (log_level)
+		print_verifier_state(env);
+	return 0;
+}
+
 /* return the map pointer stored inside BPF_LD_IMM64 instruction */
 static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
 {
@@ -313,6 +1148,37 @@ static struct bpf_map *ld_imm64_to_map_ptr(struct bpf_insn *insn)
 	return (struct bpf_map *) (unsigned long) imm64;
 }
 
+/* verify BPF_LD_IMM64 instruction */
+static int check_ld_imm(struct verifier_env *env, struct bpf_insn *insn)
+{
+	struct reg_state *regs = env->cur_state.regs;
+	int err;
+
+	if (BPF_SIZE(insn->code) != BPF_DW) {
+		verbose("invalid BPF_LD_IMM insn\n");
+		return -EINVAL;
+	}
+	if (insn->off != 0) {
+		verbose("BPF_LD_IMM64 uses reserved fields\n");
+		return -EINVAL;
+	}
+
+	err = check_reg_arg(regs, insn->dst_reg, DST_OP);
+	if (err)
+		return err;
+
+	if (insn->src_reg == 0)
+		/* generic move 64-bit immediate into a register */
+		return 0;
+
+	/* replace_map_fd_with_map_ptr() should have caught bad ld_imm64 */
+	BUG_ON(insn->src_reg != BPF_PSEUDO_MAP_FD);
+
+	regs[insn->dst_reg].type = CONST_PTR_TO_MAP;
+	regs[insn->dst_reg].map_ptr = ld_imm64_to_map_ptr(insn);
+	return 0;
+}
+
 /* non-recursive DFS pseudo code
  * 1  procedure DFS-iterative(G,v):
  * 2      label v as discovered
@@ -498,6 +1364,212 @@ err_free:
 	return ret;
 }
 
+static int do_check(struct verifier_env *env)
+{
+	struct verifier_state *state = &env->cur_state;
+	struct bpf_insn *insns = env->prog->insnsi;
+	struct reg_state *regs = state->regs;
+	int insn_cnt = env->prog->len;
+	int insn_idx, prev_insn_idx = 0;
+	int insn_processed = 0;
+	bool do_print_state = false;
+
+	init_reg_state(regs);
+	insn_idx = 0;
+	for (;;) {
+		struct bpf_insn *insn;
+		u8 class;
+		int err;
+
+		if (insn_idx >= insn_cnt) {
+			verbose("invalid insn idx %d insn_cnt %d\n",
+				insn_idx, insn_cnt);
+			return -EFAULT;
+		}
+
+		insn = &insns[insn_idx];
+		class = BPF_CLASS(insn->code);
+
+		if (++insn_processed > 32768) {
+			verbose("BPF program is too large. Proccessed %d insn\n",
+				insn_processed);
+			return -E2BIG;
+		}
+
+		if (log_level && do_print_state) {
+			verbose("\nfrom %d to %d:", prev_insn_idx, insn_idx);
+			print_verifier_state(env);
+			do_print_state = false;
+		}
+
+		if (log_level) {
+			verbose("%d: ", insn_idx);
+			print_bpf_insn(insn);
+		}
+
+		if (class == BPF_ALU || class == BPF_ALU64) {
+			err = check_alu_op(regs, insn);
+			if (err)
+				return err;
+
+		} else if (class == BPF_LDX) {
+			if (BPF_MODE(insn->code) != BPF_MEM ||
+			    insn->imm != 0) {
+				verbose("BPF_LDX uses reserved fields\n");
+				return -EINVAL;
+			}
+			/* check src operand */
+			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			if (err)
+				return err;
+
+			err = check_reg_arg(regs, insn->dst_reg, DST_OP_NO_MARK);
+			if (err)
+				return err;
+
+			/* check that memory (src_reg + off) is readable,
+			 * the state of dst_reg will be updated by this func
+			 */
+			err = check_mem_access(env, insn->src_reg, insn->off,
+					       BPF_SIZE(insn->code), BPF_READ,
+					       insn->dst_reg);
+			if (err)
+				return err;
+
+		} else if (class == BPF_STX) {
+			if (BPF_MODE(insn->code) == BPF_XADD) {
+				err = check_xadd(env, insn);
+				if (err)
+					return err;
+				insn_idx++;
+				continue;
+			}
+
+			if (BPF_MODE(insn->code) != BPF_MEM ||
+			    insn->imm != 0) {
+				verbose("BPF_STX uses reserved fields\n");
+				return -EINVAL;
+			}
+			/* check src1 operand */
+			err = check_reg_arg(regs, insn->src_reg, SRC_OP);
+			if (err)
+				return err;
+			/* check src2 operand */
+			err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+			if (err)
+				return err;
+
+			/* check that memory (dst_reg + off) is writeable */
+			err = check_mem_access(env, insn->dst_reg, insn->off,
+					       BPF_SIZE(insn->code), BPF_WRITE,
+					       insn->src_reg);
+			if (err)
+				return err;
+
+		} else if (class == BPF_ST) {
+			if (BPF_MODE(insn->code) != BPF_MEM ||
+			    insn->src_reg != BPF_REG_0) {
+				verbose("BPF_ST uses reserved fields\n");
+				return -EINVAL;
+			}
+			/* check src operand */
+			err = check_reg_arg(regs, insn->dst_reg, SRC_OP);
+			if (err)
+				return err;
+
+			/* check that memory (dst_reg + off) is writeable */
+			err = check_mem_access(env, insn->dst_reg, insn->off,
+					       BPF_SIZE(insn->code), BPF_WRITE,
+					       -1);
+			if (err)
+				return err;
+
+		} else if (class == BPF_JMP) {
+			u8 opcode = BPF_OP(insn->code);
+
+			if (opcode == BPF_CALL) {
+				if (BPF_SRC(insn->code) != BPF_K ||
+				    insn->off != 0 ||
+				    insn->src_reg != BPF_REG_0 ||
+				    insn->dst_reg != BPF_REG_0) {
+					verbose("BPF_CALL uses reserved fields\n");
+					return -EINVAL;
+				}
+
+				err = check_call(env, insn->imm);
+				if (err)
+					return err;
+
+			} else if (opcode == BPF_JA) {
+				if (BPF_SRC(insn->code) != BPF_K ||
+				    insn->imm != 0 ||
+				    insn->src_reg != BPF_REG_0 ||
+				    insn->dst_reg != BPF_REG_0) {
+					verbose("BPF_JA uses reserved fields\n");
+					return -EINVAL;
+				}
+
+				insn_idx += insn->off + 1;
+				continue;
+
+			} else if (opcode == BPF_EXIT) {
+				if (BPF_SRC(insn->code) != BPF_K ||
+				    insn->imm != 0 ||
+				    insn->src_reg != BPF_REG_0 ||
+				    insn->dst_reg != BPF_REG_0) {
+					verbose("BPF_EXIT uses reserved fields\n");
+					return -EINVAL;
+				}
+
+				/* eBPF calling convetion is such that R0 is used
+				 * to return the value from eBPF program.
+				 * Make sure that it's readable at this time
+				 * of bpf_exit, which means that program wrote
+				 * something into it earlier
+				 */
+				err = check_reg_arg(regs, BPF_REG_0, SRC_OP);
+				if (err)
+					return err;
+
+				insn_idx = pop_stack(env, &prev_insn_idx);
+				if (insn_idx < 0) {
+					break;
+				} else {
+					do_print_state = true;
+					continue;
+				}
+			} else {
+				err = check_cond_jmp_op(env, insn, &insn_idx);
+				if (err)
+					return err;
+			}
+		} else if (class == BPF_LD) {
+			u8 mode = BPF_MODE(insn->code);
+
+			if (mode == BPF_ABS || mode == BPF_IND) {
+				verbose("LD_ABS is not supported yet\n");
+				return -EINVAL;
+			} else if (mode == BPF_IMM) {
+				err = check_ld_imm(env, insn);
+				if (err)
+					return err;
+
+				insn_idx++;
+			} else {
+				verbose("invalid BPF_LD mode\n");
+				return -EINVAL;
+			}
+		} else {
+			verbose("unknown insn class %d\n", class);
+			return -EINVAL;
+		}
+
+		insn_idx++;
+	}
+
+	return 0;
+}
+
 /* look for pseudo eBPF instructions that access map FDs and
  * replace them with actual map pointers
  */
@@ -651,9 +1723,10 @@ int bpf_check(struct bpf_prog *prog, union bpf_attr *attr)
 	if (ret < 0)
 		goto skip_full_check;
 
-	/* ret = do_check(env); */
+	ret = do_check(env);
 
 skip_full_check:
+	while (pop_stack(env, NULL) >= 0);
 
 	if (log_level && log_len >= log_size - 1) {
 		BUG_ON(log_len >= log_size);
-- 
cgit v1.2.3


From 6ea754eb761d9e7a8ac6fa462b05f9e4cf04fb6c Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 22 Sep 2014 11:10:50 -0700
Subject: net: Change netdev_<level> logging functions to return void

No caller or macro uses the return value so make all
the functions return void.

Signed-off-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 19 +++++++++----------
 net/core/dev.c            | 44 +++++++++++++++++---------------------------
 2 files changed, 26 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9f5d293a0281..9b7fbacb6296 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3612,22 +3612,22 @@ static inline const char *netdev_reg_state(const struct net_device *dev)
 }
 
 __printf(3, 4)
-int netdev_printk(const char *level, const struct net_device *dev,
-		  const char *format, ...);
+void netdev_printk(const char *level, const struct net_device *dev,
+		   const char *format, ...);
 __printf(2, 3)
-int netdev_emerg(const struct net_device *dev, const char *format, ...);
+void netdev_emerg(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_alert(const struct net_device *dev, const char *format, ...);
+void netdev_alert(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_crit(const struct net_device *dev, const char *format, ...);
+void netdev_crit(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_err(const struct net_device *dev, const char *format, ...);
+void netdev_err(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_warn(const struct net_device *dev, const char *format, ...);
+void netdev_warn(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_notice(const struct net_device *dev, const char *format, ...);
+void netdev_notice(const struct net_device *dev, const char *format, ...);
 __printf(2, 3)
-int netdev_info(const struct net_device *dev, const char *format, ...);
+void netdev_info(const struct net_device *dev, const char *format, ...);
 
 #define MODULE_ALIAS_NETDEV(device) \
 	MODULE_ALIAS("netdev-" device)
@@ -3645,7 +3645,6 @@ do {								\
 ({								\
 	if (0)							\
 		netdev_printk(KERN_DEBUG, __dev, format, ##args); \
-	0;							\
 })
 #endif
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e2ced01c01e4..e55c546717d4 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7066,53 +7066,45 @@ const char *netdev_drivername(const struct net_device *dev)
 	return empty;
 }
 
-static int __netdev_printk(const char *level, const struct net_device *dev,
-			   struct va_format *vaf)
+static void __netdev_printk(const char *level, const struct net_device *dev,
+			    struct va_format *vaf)
 {
-	int r;
-
 	if (dev && dev->dev.parent) {
-		r = dev_printk_emit(level[1] - '0',
-				    dev->dev.parent,
-				    "%s %s %s%s: %pV",
-				    dev_driver_string(dev->dev.parent),
-				    dev_name(dev->dev.parent),
-				    netdev_name(dev), netdev_reg_state(dev),
-				    vaf);
+		dev_printk_emit(level[1] - '0',
+				dev->dev.parent,
+				"%s %s %s%s: %pV",
+				dev_driver_string(dev->dev.parent),
+				dev_name(dev->dev.parent),
+				netdev_name(dev), netdev_reg_state(dev),
+				vaf);
 	} else if (dev) {
-		r = printk("%s%s%s: %pV", level, netdev_name(dev),
-			   netdev_reg_state(dev), vaf);
+		printk("%s%s%s: %pV",
+		       level, netdev_name(dev), netdev_reg_state(dev), vaf);
 	} else {
-		r = printk("%s(NULL net_device): %pV", level, vaf);
+		printk("%s(NULL net_device): %pV", level, vaf);
 	}
-
-	return r;
 }
 
-int netdev_printk(const char *level, const struct net_device *dev,
-		  const char *format, ...)
+void netdev_printk(const char *level, const struct net_device *dev,
+		   const char *format, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int r;
 
 	va_start(args, format);
 
 	vaf.fmt = format;
 	vaf.va = &args;
 
-	r = __netdev_printk(level, dev, &vaf);
+	__netdev_printk(level, dev, &vaf);
 
 	va_end(args);
-
-	return r;
 }
 EXPORT_SYMBOL(netdev_printk);
 
 #define define_netdev_printk_level(func, level)			\
-int func(const struct net_device *dev, const char *fmt, ...)	\
+void func(const struct net_device *dev, const char *fmt, ...)	\
 {								\
-	int r;							\
 	struct va_format vaf;					\
 	va_list args;						\
 								\
@@ -7121,11 +7113,9 @@ int func(const struct net_device *dev, const char *fmt, ...)	\
 	vaf.fmt = fmt;						\
 	vaf.va = &args;						\
 								\
-	r = __netdev_printk(level, dev, &vaf);			\
+	__netdev_printk(level, dev, &vaf);			\
 								\
 	va_end(args);						\
-								\
-	return r;						\
 }								\
 EXPORT_SYMBOL(func);
 
-- 
cgit v1.2.3


From f4a775d14489a801a5b8b0540e23ab82e2703091 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 22 Sep 2014 16:29:32 -0700
Subject: net: introduce __skb_header_release()

While profiling TCP stack, I noticed one useless atomic operation
in tcp_sendmsg(), caused by skb_header_release().

It turns out all current skb_header_release() users have a fresh skb,
that no other user can see, so we can avoid one atomic operation.

Introduce __skb_header_release() to clearly document this.

This gave me a 1.5 % improvement on TCP_RR workload.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 15 +++++++++++++++
 net/core/skbuff.c      |  4 ++--
 net/ipv4/tcp.c         |  4 ++--
 net/ipv4/tcp_output.c  | 10 +++++-----
 4 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index f1bfa3781c75..8eaa62400fca 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1083,6 +1083,7 @@ static inline int skb_header_cloned(const struct sk_buff *skb)
  *	Drop a reference to the header part of the buffer.  This is done
  *	by acquiring a payload reference.  You must not read from the header
  *	part of skb->data after this.
+ *	Note : Check if you can use __skb_header_release() instead.
  */
 static inline void skb_header_release(struct sk_buff *skb)
 {
@@ -1091,6 +1092,20 @@ static inline void skb_header_release(struct sk_buff *skb)
 	atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref);
 }
 
+/**
+ *	__skb_header_release - release reference to header
+ *	@skb: buffer to operate on
+ *
+ *	Variant of skb_header_release() assuming skb is private to caller.
+ *	We can avoid one atomic operation.
+ */
+static inline void __skb_header_release(struct sk_buff *skb)
+{
+	skb->nohdr = 1;
+	atomic_set(&skb_shinfo(skb)->dataref, 1 + (1 << SKB_DATAREF_SHIFT));
+}
+
+
 /**
  *	skb_shared - is the buffer shared
  *	@skb: buffer to check
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 06a8feb10099..512dc7dcbc32 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3179,7 +3179,7 @@ int skb_gro_receive(struct sk_buff **head, struct sk_buff *skb)
 	skb_shinfo(nskb)->frag_list = p;
 	skb_shinfo(nskb)->gso_size = pinfo->gso_size;
 	pinfo->gso_size = 0;
-	skb_header_release(p);
+	__skb_header_release(p);
 	NAPI_GRO_CB(nskb)->last = p;
 
 	nskb->data_len += p->len;
@@ -3211,7 +3211,7 @@ merge:
 	else
 		NAPI_GRO_CB(p)->last->next = skb;
 	NAPI_GRO_CB(p)->last = skb;
-	skb_header_release(skb);
+	__skb_header_release(skb);
 	lp = p;
 
 done:
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 070aeff1b131..553b01f52f71 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -609,7 +609,7 @@ static inline bool forced_push(const struct tcp_sock *tp)
 	return after(tp->write_seq, tp->pushed_seq + (tp->max_window >> 1));
 }
 
-static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
+static void skb_entail(struct sock *sk, struct sk_buff *skb)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
@@ -618,7 +618,7 @@ static inline void skb_entail(struct sock *sk, struct sk_buff *skb)
 	tcb->seq     = tcb->end_seq = tp->write_seq;
 	tcb->tcp_flags = TCPHDR_ACK;
 	tcb->sacked  = 0;
-	skb_header_release(skb);
+	__skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
 	sk->sk_wmem_queued += skb->truesize;
 	sk_mem_charge(sk, skb->truesize);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 8c61a7c0c889..f173b1c4f815 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -995,7 +995,7 @@ static void tcp_queue_skb(struct sock *sk, struct sk_buff *skb)
 
 	/* Advance write_seq and place onto the write_queue. */
 	tp->write_seq = TCP_SKB_CB(skb)->end_seq;
-	skb_header_release(skb);
+	__skb_header_release(skb);
 	tcp_add_write_queue_tail(sk, skb);
 	sk->sk_wmem_queued += skb->truesize;
 	sk_mem_charge(sk, skb->truesize);
@@ -1167,7 +1167,7 @@ int tcp_fragment(struct sock *sk, struct sk_buff *skb, u32 len,
 	}
 
 	/* Link BUFF into the send queue. */
-	skb_header_release(buff);
+	__skb_header_release(buff);
 	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
@@ -1671,7 +1671,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
 	tcp_set_skb_tso_segs(sk, buff, mss_now);
 
 	/* Link BUFF into the send queue. */
-	skb_header_release(buff);
+	__skb_header_release(buff);
 	tcp_insert_write_queue_after(skb, buff, sk);
 
 	return 0;
@@ -2772,7 +2772,7 @@ int tcp_send_synack(struct sock *sk)
 			if (nskb == NULL)
 				return -ENOMEM;
 			tcp_unlink_write_queue(skb, sk);
-			skb_header_release(nskb);
+			__skb_header_release(nskb);
 			__tcp_add_write_queue_head(sk, nskb);
 			sk_wmem_free_skb(sk, skb);
 			sk->sk_wmem_queued += nskb->truesize;
@@ -2947,7 +2947,7 @@ static void tcp_connect_queue_skb(struct sock *sk, struct sk_buff *skb)
 	struct tcp_skb_cb *tcb = TCP_SKB_CB(skb);
 
 	tcb->end_seq += skb->len;
-	skb_header_release(skb);
+	__skb_header_release(skb);
 	__tcp_add_write_queue_tail(sk, skb);
 	sk->sk_wmem_queued += skb->truesize;
 	sk_mem_charge(sk, skb->truesize);
-- 
cgit v1.2.3


From c873d14d30b838a516a94967242322d4b73e79e7 Mon Sep 17 00:00:00 2001
From: Jyri Sarha <jsarha@ti.com>
Date: Fri, 5 Sep 2014 15:21:34 +0300
Subject: clk: add gpio gated clock

The added gpio-gate-clock is a basic clock that can be enabled and
disabled trough a gpio output. The DT binding document for the clock
is also added. For EPROBE_DEFER handling the registering of the clock
has to be delayed until of_clk_get() call time.

Signed-off-by: Jyri Sarha <jsarha@ti.com>
Signed-off-by: Mike Turquette <mturquette@linaro.org>
---
 .../devicetree/bindings/clock/gpio-gate-clock.txt  |  21 +++
 drivers/clk/Makefile                               |   1 +
 drivers/clk/clk-gpio-gate.c                        | 204 +++++++++++++++++++++
 include/linux/clk-provider.h                       |  22 +++
 4 files changed, 248 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
 create mode 100644 drivers/clk/clk-gpio-gate.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt b/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
new file mode 100644
index 000000000000..d3379ff9b84b
--- /dev/null
+++ b/Documentation/devicetree/bindings/clock/gpio-gate-clock.txt
@@ -0,0 +1,21 @@
+Binding for simple gpio gated clock.
+
+This binding uses the common clock binding[1].
+
+[1] Documentation/devicetree/bindings/clock/clock-bindings.txt
+
+Required properties:
+- compatible : shall be "gpio-gate-clock".
+- #clock-cells : from common clock binding; shall be set to 0.
+- enable-gpios : GPIO reference for enabling and disabling the clock.
+
+Optional properties:
+- clocks: Maximum of one parent clock is supported.
+
+Example:
+	clock {
+		compatible = "gpio-gate-clock";
+		clocks = <&parentclk>;
+		#clock-cells = <0>;
+		enable-gpios = <&gpio 1 GPIO_ACTIVE_HIGH>;
+	};
diff --git a/drivers/clk/Makefile b/drivers/clk/Makefile
index 27c542ba9e73..92a7f6c02394 100644
--- a/drivers/clk/Makefile
+++ b/drivers/clk/Makefile
@@ -9,6 +9,7 @@ obj-$(CONFIG_COMMON_CLK)	+= clk-gate.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-mux.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-composite.o
 obj-$(CONFIG_COMMON_CLK)	+= clk-fractional-divider.o
+obj-$(CONFIG_COMMON_CLK)	+= clk-gpio-gate.o
 ifeq ($(CONFIG_OF), y)
 obj-$(CONFIG_COMMON_CLK)	+= clk-conf.o
 endif
diff --git a/drivers/clk/clk-gpio-gate.c b/drivers/clk/clk-gpio-gate.c
new file mode 100644
index 000000000000..9dde88533684
--- /dev/null
+++ b/drivers/clk/clk-gpio-gate.c
@@ -0,0 +1,204 @@
+/*
+ * Copyright (C) 2013 - 2014 Texas Instruments Incorporated - http://www.ti.com
+ * Author: Jyri Sarha <jsarha@ti.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Gpio gated clock implementation
+ */
+
+#include <linux/clk-provider.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/err.h>
+#include <linux/device.h>
+
+/**
+ * DOC: basic gpio gated clock which can be enabled and disabled
+ *      with gpio output
+ * Traits of this clock:
+ * prepare - clk_(un)prepare only ensures parent is (un)prepared
+ * enable - clk_enable and clk_disable are functional & control gpio
+ * rate - inherits rate from parent.  No clk_set_rate support
+ * parent - fixed parent.  No clk_set_parent support
+ */
+
+#define to_clk_gpio(_hw) container_of(_hw, struct clk_gpio, hw)
+
+static int clk_gpio_gate_enable(struct clk_hw *hw)
+{
+	struct clk_gpio *clk = to_clk_gpio(hw);
+
+	gpiod_set_value(clk->gpiod, 1);
+
+	return 0;
+}
+
+static void clk_gpio_gate_disable(struct clk_hw *hw)
+{
+	struct clk_gpio *clk = to_clk_gpio(hw);
+
+	gpiod_set_value(clk->gpiod, 0);
+}
+
+static int clk_gpio_gate_is_enabled(struct clk_hw *hw)
+{
+	struct clk_gpio *clk = to_clk_gpio(hw);
+
+	return gpiod_get_value(clk->gpiod);
+}
+
+const struct clk_ops clk_gpio_gate_ops = {
+	.enable = clk_gpio_gate_enable,
+	.disable = clk_gpio_gate_disable,
+	.is_enabled = clk_gpio_gate_is_enabled,
+};
+EXPORT_SYMBOL_GPL(clk_gpio_gate_ops);
+
+/**
+ * clk_register_gpio - register a gpip clock with the clock framework
+ * @dev: device that is registering this clock
+ * @name: name of this clock
+ * @parent_name: name of this clock's parent
+ * @gpiod: gpio descriptor to gate this clock
+ */
+struct clk *clk_register_gpio_gate(struct device *dev, const char *name,
+		const char *parent_name, struct gpio_desc *gpiod,
+		unsigned long flags)
+{
+	struct clk_gpio *clk_gpio = NULL;
+	struct clk *clk = ERR_PTR(-EINVAL);
+	struct clk_init_data init = { NULL };
+	unsigned long gpio_flags;
+	int err;
+
+	if (gpiod_is_active_low(gpiod))
+		gpio_flags = GPIOF_OUT_INIT_HIGH;
+	else
+		gpio_flags = GPIOF_OUT_INIT_LOW;
+
+	if (dev)
+		err = devm_gpio_request_one(dev, desc_to_gpio(gpiod),
+					    gpio_flags, name);
+	else
+		err = gpio_request_one(desc_to_gpio(gpiod), gpio_flags, name);
+
+	if (err) {
+		pr_err("%s: %s: Error requesting clock control gpio %u\n",
+		       __func__, name, desc_to_gpio(gpiod));
+		return ERR_PTR(err);
+	}
+
+	if (dev)
+		clk_gpio = devm_kzalloc(dev, sizeof(struct clk_gpio),
+					GFP_KERNEL);
+	else
+		clk_gpio = kzalloc(sizeof(struct clk_gpio), GFP_KERNEL);
+
+	if (!clk_gpio) {
+		clk = ERR_PTR(-ENOMEM);
+		goto clk_register_gpio_gate_err;
+	}
+
+	init.name = name;
+	init.ops = &clk_gpio_gate_ops;
+	init.flags = flags | CLK_IS_BASIC;
+	init.parent_names = (parent_name ? &parent_name : NULL);
+	init.num_parents = (parent_name ? 1 : 0);
+
+	clk_gpio->gpiod = gpiod;
+	clk_gpio->hw.init = &init;
+
+	clk = clk_register(dev, &clk_gpio->hw);
+
+	if (!IS_ERR(clk))
+		return clk;
+
+	if (!dev)
+		kfree(clk_gpio);
+
+clk_register_gpio_gate_err:
+	gpiod_put(gpiod);
+
+	return clk;
+}
+EXPORT_SYMBOL_GPL(clk_register_gpio_gate);
+
+#ifdef CONFIG_OF
+/**
+ * The clk_register_gpio_gate has to be delayed, because the EPROBE_DEFER
+ * can not be handled properly at of_clk_init() call time.
+ */
+
+struct clk_gpio_gate_delayed_register_data {
+	struct device_node *node;
+	struct mutex lock;
+	struct clk *clk;
+};
+
+static struct clk *of_clk_gpio_gate_delayed_register_get(
+		struct of_phandle_args *clkspec,
+		void *_data)
+{
+	struct clk_gpio_gate_delayed_register_data *data = _data;
+	struct clk *clk;
+	const char *clk_name = data->node->name;
+	const char *parent_name;
+	struct gpio_desc *gpiod;
+	int gpio;
+
+	mutex_lock(&data->lock);
+
+	if (data->clk) {
+		mutex_unlock(&data->lock);
+		return data->clk;
+	}
+
+	gpio = of_get_named_gpio_flags(data->node, "enable-gpios", 0, NULL);
+	if (gpio < 0) {
+		mutex_unlock(&data->lock);
+		if (gpio != -EPROBE_DEFER)
+			pr_err("%s: %s: Can't get 'enable-gpios' DT property\n",
+			       __func__, clk_name);
+		return ERR_PTR(gpio);
+	}
+	gpiod = gpio_to_desc(gpio);
+
+	parent_name = of_clk_get_parent_name(data->node, 0);
+
+	clk = clk_register_gpio_gate(NULL, clk_name, parent_name, gpiod, 0);
+	if (IS_ERR(clk)) {
+		mutex_unlock(&data->lock);
+		return clk;
+	}
+
+	data->clk = clk;
+	mutex_unlock(&data->lock);
+
+	return clk;
+}
+
+/**
+ * of_gpio_gate_clk_setup() - Setup function for gpio controlled clock
+ */
+void __init of_gpio_gate_clk_setup(struct device_node *node)
+{
+	struct clk_gpio_gate_delayed_register_data *data;
+
+	data = kzalloc(sizeof(struct clk_gpio_gate_delayed_register_data),
+		       GFP_KERNEL);
+	if (!data)
+		return;
+
+	data->node = node;
+	mutex_init(&data->lock);
+
+	of_clk_add_provider(node, of_clk_gpio_gate_delayed_register_get, data);
+}
+EXPORT_SYMBOL_GPL(of_gpio_gate_clk_setup);
+CLK_OF_DECLARE(gpio_gate_clk, "gpio-gate-clock", of_gpio_gate_clk_setup);
+#endif
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 411dd7eb2653..ec1581bd94cd 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -488,6 +488,28 @@ struct clk *clk_register_composite(struct device *dev, const char *name,
 		struct clk_hw *gate_hw, const struct clk_ops *gate_ops,
 		unsigned long flags);
 
+/***
+ * struct clk_gpio_gate - gpio gated clock
+ *
+ * @hw:		handle between common and hardware-specific interfaces
+ * @gpiod:	gpio descriptor
+ *
+ * Clock with a gpio control for enabling and disabling the parent clock.
+ * Implements .enable, .disable and .is_enabled
+ */
+
+struct clk_gpio {
+	struct clk_hw	hw;
+	struct gpio_desc *gpiod;
+};
+
+extern const struct clk_ops clk_gpio_gate_ops;
+struct clk *clk_register_gpio_gate(struct device *dev, const char *name,
+		const char *parent_name, struct gpio_desc *gpio,
+		unsigned long flags);
+
+void of_gpio_clk_gate_setup(struct device_node *node);
+
 /**
  * clk_register - allocate a new clock, register it and return an opaque cookie
  * @dev: device that is registering this clock
-- 
cgit v1.2.3


From e59c5371fb9d8268d1c043172e88cecab9dc934f Mon Sep 17 00:00:00 2001
From: Mike Turquette <mturquette@linaro.org>
Date: Tue, 18 Feb 2014 21:21:25 -0800
Subject: clk: introduce clk_set_phase function & callback

A common operation for a clock signal generator is to shift the phase of
that signal. This patch introduces a new function to the clk.h API to
dynamically adjust the phase of a clock signal. Additionally this patch
introduces support for the new function in the common clock framework
via the .set_phase call back in struct clk_ops.

Signed-off-by: Mike Turquette <mturquette@linaro.org>
Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/clk/clk.c            | 85 +++++++++++++++++++++++++++++++++++++++++---
 include/linux/clk-private.h  |  1 +
 include/linux/clk-provider.h |  5 +++
 include/linux/clk.h          | 29 +++++++++++++++
 4 files changed, 116 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index b76fa69b44cb..d87661af0c72 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -117,11 +117,11 @@ static void clk_summary_show_one(struct seq_file *s, struct clk *c, int level)
 	if (!c)
 		return;
 
-	seq_printf(s, "%*s%-*s %11d %12d %11lu %10lu\n",
+	seq_printf(s, "%*s%-*s %11d %12d %11lu %10lu %-3d\n",
 		   level * 3 + 1, "",
 		   30 - level * 3, c->name,
 		   c->enable_count, c->prepare_count, clk_get_rate(c),
-		   clk_get_accuracy(c));
+		   clk_get_accuracy(c), clk_get_phase(c));
 }
 
 static void clk_summary_show_subtree(struct seq_file *s, struct clk *c,
@@ -143,8 +143,8 @@ static int clk_summary_show(struct seq_file *s, void *data)
 	struct clk *c;
 	struct hlist_head **lists = (struct hlist_head **)s->private;
 
-	seq_puts(s, "   clock                         enable_cnt  prepare_cnt        rate   accuracy\n");
-	seq_puts(s, "--------------------------------------------------------------------------------\n");
+	seq_puts(s, "   clock                         enable_cnt  prepare_cnt        rate   accuracy   phase\n");
+	seq_puts(s, "----------------------------------------------------------------------------------------\n");
 
 	clk_prepare_lock();
 
@@ -180,6 +180,7 @@ static void clk_dump_one(struct seq_file *s, struct clk *c, int level)
 	seq_printf(s, "\"prepare_count\": %d,", c->prepare_count);
 	seq_printf(s, "\"rate\": %lu", clk_get_rate(c));
 	seq_printf(s, "\"accuracy\": %lu", clk_get_accuracy(c));
+	seq_printf(s, "\"phase\": %d", clk_get_phase(c));
 }
 
 static void clk_dump_subtree(struct seq_file *s, struct clk *c, int level)
@@ -264,6 +265,11 @@ static int clk_debug_create_one(struct clk *clk, struct dentry *pdentry)
 	if (!d)
 		goto err_out;
 
+	d = debugfs_create_u32("clk_phase", S_IRUGO, clk->dentry,
+			(u32 *)&clk->phase);
+	if (!d)
+		goto err_out;
+
 	d = debugfs_create_x32("clk_flags", S_IRUGO, clk->dentry,
 			(u32 *)&clk->flags);
 	if (!d)
@@ -1738,6 +1744,77 @@ out:
 }
 EXPORT_SYMBOL_GPL(clk_set_parent);
 
+/**
+ * clk_set_phase - adjust the phase shift of a clock signal
+ * @clk: clock signal source
+ * @degrees: number of degrees the signal is shifted
+ *
+ * Shifts the phase of a clock signal by the specified
+ * degrees. Returns 0 on success, -EERROR otherwise.
+ *
+ * This function makes no distinction about the input or reference
+ * signal that we adjust the clock signal phase against. For example
+ * phase locked-loop clock signal generators we may shift phase with
+ * respect to feedback clock signal input, but for other cases the
+ * clock phase may be shifted with respect to some other, unspecified
+ * signal.
+ *
+ * Additionally the concept of phase shift does not propagate through
+ * the clock tree hierarchy, which sets it apart from clock rates and
+ * clock accuracy. A parent clock phase attribute does not have an
+ * impact on the phase attribute of a child clock.
+ */
+int clk_set_phase(struct clk *clk, int degrees)
+{
+	int ret = 0;
+
+	if (!clk)
+		goto out;
+
+	/* sanity check degrees */
+	degrees %= 360;
+	if (degrees < 0)
+		degrees += 360;
+
+	clk_prepare_lock();
+
+	if (!clk->ops->set_phase)
+		goto out_unlock;
+
+	ret = clk->ops->set_phase(clk->hw, degrees);
+
+	if (!ret)
+		clk->phase = degrees;
+
+out_unlock:
+	clk_prepare_unlock();
+
+out:
+	return ret;
+}
+
+/**
+ * clk_get_phase - return the phase shift of a clock signal
+ * @clk: clock signal source
+ *
+ * Returns the phase shift of a clock node in degrees, otherwise returns
+ * -EERROR.
+ */
+int clk_get_phase(struct clk *clk)
+{
+	int ret = 0;
+
+	if (!clk)
+		goto out;
+
+	clk_prepare_lock();
+	ret = clk->phase;
+	clk_prepare_unlock();
+
+out:
+	return ret;
+}
+
 /**
  * __clk_init - initialize the data structures in a struct clk
  * @dev:	device initializing this clk, placeholder for now
diff --git a/include/linux/clk-private.h b/include/linux/clk-private.h
index efbf70b9fd84..845be30be50f 100644
--- a/include/linux/clk-private.h
+++ b/include/linux/clk-private.h
@@ -46,6 +46,7 @@ struct clk {
 	unsigned int		enable_count;
 	unsigned int		prepare_count;
 	unsigned long		accuracy;
+	int			phase;
 	struct hlist_head	children;
 	struct hlist_node	child_node;
 	unsigned int		notifier_count;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 411dd7eb2653..201a6195a3eb 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -129,6 +129,10 @@ struct dentry;
  *		set then clock accuracy will be initialized to parent accuracy
  *		or 0 (perfect clock) if clock has no parent.
  *
+ * @set_phase:	Shift the phase this clock signal in degrees specified
+ *		by the second argument. Valid values for degrees are
+ *		0-359. Return 0 on success, otherwise -EERROR.
+ *
  * @init:	Perform platform-specific initialization magic.
  *		This is not not used by any of the basic clock types.
  *		Please consider other ways of solving initialization problems
@@ -177,6 +181,7 @@ struct clk_ops {
 				    unsigned long parent_rate, u8 index);
 	unsigned long	(*recalc_accuracy)(struct clk_hw *hw,
 					   unsigned long parent_accuracy);
+	int		(*set_phase)(struct clk_hw *hw, int degrees);
 	void		(*init)(struct clk_hw *hw);
 	int		(*debug_init)(struct clk_hw *hw, struct dentry *dentry);
 };
diff --git a/include/linux/clk.h b/include/linux/clk.h
index fb5e097d8f72..38bdedd3e389 100644
--- a/include/linux/clk.h
+++ b/include/linux/clk.h
@@ -106,6 +106,25 @@ int clk_notifier_unregister(struct clk *clk, struct notifier_block *nb);
  */
 long clk_get_accuracy(struct clk *clk);
 
+/**
+ * clk_set_phase - adjust the phase shift of a clock signal
+ * @clk: clock signal source
+ * @degrees: number of degrees the signal is shifted
+ *
+ * Shifts the phase of a clock signal by the specified degrees. Returns 0 on
+ * success, -EERROR otherwise.
+ */
+int clk_set_phase(struct clk *clk, int degrees);
+
+/**
+ * clk_get_phase - return the phase shift of a clock signal
+ * @clk: clock signal source
+ *
+ * Returns the phase shift of a clock node in degrees, otherwise returns
+ * -EERROR.
+ */
+int clk_get_phase(struct clk *clk);
+
 #else
 
 static inline long clk_get_accuracy(struct clk *clk)
@@ -113,6 +132,16 @@ static inline long clk_get_accuracy(struct clk *clk)
 	return -ENOTSUPP;
 }
 
+static inline long clk_set_phase(struct clk *clk, int phase)
+{
+	return -ENOTSUPP;
+}
+
+static inline long clk_get_phase(struct clk *clk)
+{
+	return -ENOTSUPP;
+}
+
 #endif
 
 /**
-- 
cgit v1.2.3


From 355bb165cd8bad2500df37437a9121f0177b6741 Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Sat, 30 Aug 2014 21:18:00 +0200
Subject: clk: Include of.h in clock-provider.h

CLK_OF_DECLARE relies on OF_DECLARE_1 that is defined in of.h. Fixes build
errors when one use CLK_OF_DECLARE but doesn't include of.h

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 include/linux/clk-provider.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 201a6195a3eb..69b20d4c1e1a 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -13,6 +13,7 @@
 
 #include <linux/clk.h>
 #include <linux/io.h>
+#include <linux/of.h>
 
 #ifdef CONFIG_COMMON_CLK
 
-- 
cgit v1.2.3


From 9824cf73c3a5e677bee6fcba43c4807e01ff1b4a Mon Sep 17 00:00:00 2001
From: Maxime Ripard <maxime.ripard@free-electrons.com>
Date: Mon, 14 Jul 2014 13:53:27 +0200
Subject: clk: Add a function to retrieve phase

The current phase API doesn't look into the actual hardware to get the phase
value, but will rather get it from a variable only set by the set_phase
function.

This will cause issue when the client driver will never call the set_phase
function, where we can end up having a reported phase that will not match what
the hardware has been programmed to by the bootloader or what phase is
programmed out of reset.

Add a new get_phase function for the drivers to implement so that we can get
this value.

Signed-off-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reviewed-by: Heiko Stuebner <heiko@sntech.de>
Acked-by: Hans de Goede <hdegoede@redhat.com>
---
 drivers/clk/clk.c            | 10 ++++++++++
 include/linux/clk-provider.h |  5 +++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index d87661af0c72..113d75db371d 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -1933,6 +1933,16 @@ int __clk_init(struct device *dev, struct clk *clk)
 	else
 		clk->accuracy = 0;
 
+	/*
+	 * Set clk's phase.
+	 * Since a phase is by definition relative to its parent, just
+	 * query the current clock phase, or just assume it's in phase.
+	 */
+	if (clk->ops->get_phase)
+		clk->phase = clk->ops->get_phase(clk->hw);
+	else
+		clk->phase = 0;
+
 	/*
 	 * Set clk's rate.  The preferred method is to use .recalc_rate.  For
 	 * simple clocks and lazy developers the default fallback is to use the
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 69b20d4c1e1a..abec961092a7 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -130,6 +130,10 @@ struct dentry;
  *		set then clock accuracy will be initialized to parent accuracy
  *		or 0 (perfect clock) if clock has no parent.
  *
+ * @get_phase:	Queries the hardware to get the current phase of a clock.
+ *		Returned values are 0-359 degrees on success, negative
+ *		error codes on failure.
+ *
  * @set_phase:	Shift the phase this clock signal in degrees specified
  *		by the second argument. Valid values for degrees are
  *		0-359. Return 0 on success, otherwise -EERROR.
@@ -182,6 +186,7 @@ struct clk_ops {
 				    unsigned long parent_rate, u8 index);
 	unsigned long	(*recalc_accuracy)(struct clk_hw *hw,
 					   unsigned long parent_accuracy);
+	int		(*get_phase)(struct clk_hw *hw);
 	int		(*set_phase)(struct clk_hw *hw, int degrees);
 	void		(*init)(struct clk_hw *hw);
 	int		(*debug_init)(struct clk_hw *hw, struct dentry *dentry);
-- 
cgit v1.2.3


From e7258c1a269e0967856c81d182c286a78f5ecf15 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:55 -0400
Subject: block: Get rid of bdev_integrity_enabled()

bdev_integrity_enabled() is only used by bio_integrity_enabled().
Combine these two functions.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/data-integrity.txt | 10 ---------
 block/bio-integrity.c                  | 39 +++++++++++++++-------------------
 include/linux/bio.h                    |  6 +++---
 3 files changed, 20 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index 2d735b0ae383..b4eacf48053c 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -192,16 +192,6 @@ will require extra work due to the application tag.
     supported by the block device.
 
 
-    int bdev_integrity_enabled(block_device, int rw);
-
-      bdev_integrity_enabled() will return 1 if the block device
-      supports integrity metadata transfer for the data direction
-      specified in 'rw'.
-
-      bdev_integrity_enabled() honors the write_generate and
-      read_verify flags in sysfs and will respond accordingly.
-
-
     int bio_integrity_prep(bio);
 
       To generate IMD for WRITE and to set up buffers for READ, the
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index f14b4abbebd8..36b788552c3e 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -147,24 +147,6 @@ int bio_integrity_add_page(struct bio *bio, struct page *page,
 }
 EXPORT_SYMBOL(bio_integrity_add_page);
 
-static int bdev_integrity_enabled(struct block_device *bdev, int rw)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bdev);
-
-	if (bi == NULL)
-		return 0;
-
-	if (rw == READ && bi->verify_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_READ))
-		return 1;
-
-	if (rw == WRITE && bi->generate_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_WRITE))
-		return 1;
-
-	return 0;
-}
-
 /**
  * bio_integrity_enabled - Check whether integrity can be passed
  * @bio:	bio to check
@@ -174,16 +156,29 @@ static int bdev_integrity_enabled(struct block_device *bdev, int rw)
  * set prior to calling.  The functions honors the write_generate and
  * read_verify flags in sysfs.
  */
-int bio_integrity_enabled(struct bio *bio)
+bool bio_integrity_enabled(struct bio *bio)
 {
+	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
+
 	if (!bio_is_rw(bio))
-		return 0;
+		return false;
 
 	/* Already protected? */
 	if (bio_integrity(bio))
-		return 0;
+		return false;
+
+	if (bi == NULL)
+		return false;
+
+	if (bio_data_dir(bio) == READ && bi->verify_fn != NULL &&
+	    (bi->flags & INTEGRITY_FLAG_READ))
+		return true;
+
+	if (bio_data_dir(bio) == WRITE && bi->generate_fn != NULL &&
+	    (bi->flags & INTEGRITY_FLAG_WRITE))
+		return true;
 
-	return bdev_integrity_enabled(bio->bi_bdev, bio_data_dir(bio));
+	return false;
 }
 EXPORT_SYMBOL(bio_integrity_enabled);
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b39e5000ff58..63e399b4fde5 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -666,7 +666,7 @@ struct biovec_slab {
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
-extern int bio_integrity_enabled(struct bio *bio);
+extern bool bio_integrity_enabled(struct bio *bio);
 extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
 extern int bio_integrity_get_tag(struct bio *, void *, unsigned int);
 extern int bio_integrity_prep(struct bio *);
@@ -685,9 +685,9 @@ static inline int bio_integrity(struct bio *bio)
 	return 0;
 }
 
-static inline int bio_integrity_enabled(struct bio *bio)
+static inline bool bio_integrity_enabled(struct bio *bio)
 {
-	return 0;
+	return false;
 }
 
 static inline int bioset_integrity_create(struct bio_set *bs, int pool_size)
-- 
cgit v1.2.3


From 180b2f95dd331010a9930a65c8a18d6d81b94dc1 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:56 -0400
Subject: block: Replace bi_integrity with bi_special

For commands like REQ_COPY we need a way to pass extra information along
with each bio. Like integrity metadata this information must be
available at the bottom of the stack so bi_private does not suffice.

Rename the existing bi_integrity field to bi_special and make it a union
so we can have different bio extensions for each class of command.

We previously used bi_integrity != NULL as a way to identify whether a
bio had integrity metadata or not. Introduce a REQ_INTEGRITY to be the
indicator now that bi_special can contain different things.

In addition, bio_integrity(bio) will now return a pointer to the
integrity payload (when applicable).

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/data-integrity.txt | 10 +++++-----
 block/bio-integrity.c                  | 19 ++++++++++---------
 drivers/scsi/sd_dif.c                  |  8 ++++----
 include/linux/bio.h                    | 11 +++++++++--
 include/linux/blk_types.h              |  8 ++++++--
 include/linux/blkdev.h                 |  7 ++-----
 6 files changed, 36 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index b4eacf48053c..4d4de8b09530 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -129,11 +129,11 @@ interface for this is being worked on.
 4.1 BIO
 
 The data integrity patches add a new field to struct bio when
-CONFIG_BLK_DEV_INTEGRITY is enabled.  bio->bi_integrity is a pointer
-to a struct bip which contains the bio integrity payload.  Essentially
-a bip is a trimmed down struct bio which holds a bio_vec containing
-the integrity metadata and the required housekeeping information (bvec
-pool, vector count, etc.)
+CONFIG_BLK_DEV_INTEGRITY is enabled.  bio_integrity(bio) returns a
+pointer to a struct bip which contains the bio integrity payload.
+Essentially a bip is a trimmed down struct bio which holds a bio_vec
+containing the integrity metadata and the required housekeeping
+information (bvec pool, vector count, etc.)
 
 A kernel subsystem can enable data integrity protection on a bio by
 calling bio_integrity_alloc(bio).  This will allocate and attach the
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 36b788552c3e..bd3125c3c124 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -79,6 +79,7 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
 	bip->bip_slab = idx;
 	bip->bip_bio = bio;
 	bio->bi_integrity = bip;
+	bio->bi_rw |= REQ_INTEGRITY;
 
 	return bip;
 err:
@@ -96,7 +97,7 @@ EXPORT_SYMBOL(bio_integrity_alloc);
  */
 void bio_integrity_free(struct bio *bio)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_set *bs = bio->bi_pool;
 
 	if (bip->bip_owns_buf)
@@ -128,7 +129,7 @@ EXPORT_SYMBOL(bio_integrity_free);
 int bio_integrity_add_page(struct bio *bio, struct page *page,
 			   unsigned int len, unsigned int offset)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_vec *iv;
 
 	if (bip->bip_vcnt >= bip->bip_max_vcnt) {
@@ -229,7 +230,7 @@ EXPORT_SYMBOL(bio_integrity_tag_size);
 static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
 			     int set)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	unsigned int nr_sectors;
 
@@ -304,12 +305,12 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 	struct bio_vec *bv;
 	sector_t sector;
 	unsigned int sectors, ret = 0, i;
-	void *prot_buf = bio->bi_integrity->bip_buf;
+	void *prot_buf = bio_integrity(bio)->bip_buf;
 
 	if (operate)
 		sector = bio->bi_iter.bi_sector;
 	else
-		sector = bio->bi_integrity->bip_iter.bi_sector;
+		sector = bio_integrity(bio)->bip_iter.bi_sector;
 
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
@@ -505,7 +506,7 @@ static void bio_integrity_verify_fn(struct work_struct *work)
  */
 void bio_integrity_endio(struct bio *bio, int error)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 
 	BUG_ON(bip->bip_bio != bio);
 
@@ -536,7 +537,7 @@ EXPORT_SYMBOL(bio_integrity_endio);
  */
 void bio_integrity_advance(struct bio *bio, unsigned int bytes_done)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	unsigned bytes = bio_integrity_bytes(bi, bytes_done >> 9);
 
@@ -558,7 +559,7 @@ EXPORT_SYMBOL(bio_integrity_advance);
 void bio_integrity_trim(struct bio *bio, unsigned int offset,
 			unsigned int sectors)
 {
-	struct bio_integrity_payload *bip = bio->bi_integrity;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 
 	bio_integrity_advance(bio, offset << 9);
@@ -577,7 +578,7 @@ EXPORT_SYMBOL(bio_integrity_trim);
 int bio_integrity_clone(struct bio *bio, struct bio *bio_src,
 			gfp_t gfp_mask)
 {
-	struct bio_integrity_payload *bip_src = bio_src->bi_integrity;
+	struct bio_integrity_payload *bip_src = bio_integrity(bio_src);
 	struct bio_integrity_payload *bip;
 
 	BUG_ON(bip_src == NULL);
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index a7a691d0af7d..29f0477a8708 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -383,9 +383,9 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
 			break;
 
-		virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
+		virt = bio_integrity(bio)->bip_iter.bi_sector & 0xffffffff;
 
-		bip_for_each_vec(iv, bio->bi_integrity, iter) {
+		bip_for_each_vec(iv, bio_integrity(bio), iter) {
 			sdt = kmap_atomic(iv.bv_page)
 				+ iv.bv_offset;
 
@@ -434,9 +434,9 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		struct bio_vec iv;
 		struct bvec_iter iter;
 
-		virt = bio->bi_integrity->bip_iter.bi_sector & 0xffffffff;
+		virt = bio_integrity(bio)->bip_iter.bi_sector & 0xffffffff;
 
-		bip_for_each_vec(iv, bio->bi_integrity, iter) {
+		bip_for_each_vec(iv, bio_integrity(bio), iter) {
 			sdt = kmap_atomic(iv.bv_page)
 				+ iv.bv_offset;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 63e399b4fde5..a810a74071b2 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -293,6 +293,15 @@ static inline unsigned bio_segments(struct bio *bio)
 #define bio_get(bio)	atomic_inc(&(bio)->bi_cnt)
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
+
+static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
+{
+	if (bio->bi_rw & REQ_INTEGRITY)
+		return bio->bi_integrity;
+
+	return NULL;
+}
+
 /*
  * bio integrity payload
  */
@@ -661,8 +670,6 @@ struct biovec_slab {
 	for_each_bio(_bio)						\
 		bip_for_each_vec(_bvl, _bio->bi_integrity, _iter)
 
-#define bio_integrity(bio) (bio->bi_integrity != NULL)
-
 extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int);
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index bb7d66460e7a..6a5d2f2de1b9 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -78,9 +78,11 @@ struct bio {
 	struct io_context	*bi_ioc;
 	struct cgroup_subsys_state *bi_css;
 #endif
+	union {
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
-	struct bio_integrity_payload *bi_integrity;  /* data integrity */
+		struct bio_integrity_payload *bi_integrity; /* data integrity */
 #endif
+	};
 
 	unsigned short		bi_vcnt;	/* how many bio_vec's */
 
@@ -162,6 +164,7 @@ enum rq_flag_bits {
 	__REQ_WRITE_SAME,	/* write same block many times */
 
 	__REQ_NOIDLE,		/* don't anticipate more IO after this one */
+	__REQ_INTEGRITY,	/* I/O includes block integrity payload */
 	__REQ_FUA,		/* forced unit access */
 	__REQ_FLUSH,		/* request for cache flush */
 
@@ -203,13 +206,14 @@ enum rq_flag_bits {
 #define REQ_DISCARD		(1ULL << __REQ_DISCARD)
 #define REQ_WRITE_SAME		(1ULL << __REQ_WRITE_SAME)
 #define REQ_NOIDLE		(1ULL << __REQ_NOIDLE)
+#define REQ_INTEGRITY		(1ULL << __REQ_INTEGRITY)
 
 #define REQ_FAILFAST_MASK \
 	(REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER)
 #define REQ_COMMON_MASK \
 	(REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \
 	 REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \
-	 REQ_SECURE)
+	 REQ_SECURE | REQ_INTEGRITY)
 #define REQ_CLONE_MASK		REQ_COMMON_MASK
 
 #define BIO_NO_ADVANCE_ITER_MASK	(REQ_DISCARD|REQ_WRITE_SAME)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 49f3461e4272..7fcb2caef559 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1514,12 +1514,9 @@ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 	return disk->integrity;
 }
 
-static inline int blk_integrity_rq(struct request *rq)
+static inline bool blk_integrity_rq(struct request *rq)
 {
-	if (rq->bio == NULL)
-		return 0;
-
-	return bio_integrity(rq->bio);
+	return rq->cmd_flags & REQ_INTEGRITY;
 }
 
 static inline void blk_queue_max_integrity_segments(struct request_queue *q,
-- 
cgit v1.2.3


From 8492b68bc4025e7bce1d57761bd7c047efda2f81 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:57 -0400
Subject: block: Remove integrity tagging functions

None of the filesystems appear interested in using the integrity tagging
feature. Potentially because very few storage devices actually permit
using the application tag space.

Remove the tagging functions.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/block/data-integrity.txt | 34 ------------
 block/bio-integrity.c                  | 94 +---------------------------------
 block/blk-integrity.c                  |  2 -
 drivers/scsi/sd_dif.c                  | 65 -----------------------
 include/linux/bio.h                    |  3 --
 include/linux/blkdev.h                 |  4 --
 6 files changed, 1 insertion(+), 201 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/data-integrity.txt b/Documentation/block/data-integrity.txt
index 4d4de8b09530..f56ec97f0d14 100644
--- a/Documentation/block/data-integrity.txt
+++ b/Documentation/block/data-integrity.txt
@@ -206,36 +206,6 @@ will require extra work due to the application tag.
       bio_integrity_enabled() returned 1.
 
 
-    int bio_integrity_tag_size(bio);
-
-      If the filesystem wants to use the application tag space it will
-      first have to find out how much storage space is available.
-      Because tag space is generally limited (usually 2 bytes per
-      sector regardless of sector size), the integrity framework
-      supports interleaving the information between the sectors in an
-      I/O.
-
-      Filesystems can call bio_integrity_tag_size(bio) to find out how
-      many bytes of storage are available for that particular bio.
-
-      Another option is bdev_get_tag_size(block_device) which will
-      return the number of available bytes per hardware sector.
-
-
-    int bio_integrity_set_tag(bio, void *tag_buf, len);
-
-      After a successful return from bio_integrity_prep(),
-      bio_integrity_set_tag() can be used to attach an opaque tag
-      buffer to a bio.  Obviously this only makes sense if the I/O is
-      a WRITE.
-
-
-    int bio_integrity_get_tag(bio, void *tag_buf, len);
-
-      Similarly, at READ I/O completion time the filesystem can
-      retrieve the tag buffer using bio_integrity_get_tag().
-
-
 5.3 PASSING EXISTING INTEGRITY METADATA
 
     Filesystems that either generate their own integrity metadata or
@@ -288,8 +258,6 @@ will require extra work due to the application tag.
             .name                   = "STANDARDSBODY-TYPE-VARIANT-CSUM",
             .generate_fn            = my_generate_fn,
        	    .verify_fn              = my_verify_fn,
-       	    .get_tag_fn             = my_get_tag_fn,
-       	    .set_tag_fn             = my_set_tag_fn,
 	    .tuple_size             = sizeof(struct my_tuple_size),
 	    .tag_size               = <tag bytes per hw sector>,
         };
@@ -311,7 +279,5 @@ will require extra work due to the application tag.
       are available per hardware sector.  For DIF this is either 2 or
       0 depending on the value of the Control Mode Page ATO bit.
 
-      See 6.2 for a description of get_tag_fn and set_tag_fn.
-
 ----------------------------------------------------------------------
 2007-12-24 Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index bd3125c3c124..367bb24bb9f1 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -209,90 +209,6 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 	return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size;
 }
 
-/**
- * bio_integrity_tag_size - Retrieve integrity tag space
- * @bio:	bio to inspect
- *
- * Description: Returns the maximum number of tag bytes that can be
- * attached to this bio. Filesystems can use this to determine how
- * much metadata to attach to an I/O.
- */
-unsigned int bio_integrity_tag_size(struct bio *bio)
-{
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-
-	BUG_ON(bio->bi_iter.bi_size == 0);
-
-	return bi->tag_size * (bio->bi_iter.bi_size / bi->sector_size);
-}
-EXPORT_SYMBOL(bio_integrity_tag_size);
-
-static int bio_integrity_tag(struct bio *bio, void *tag_buf, unsigned int len,
-			     int set)
-{
-	struct bio_integrity_payload *bip = bio_integrity(bio);
-	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-	unsigned int nr_sectors;
-
-	BUG_ON(bip->bip_buf == NULL);
-
-	if (bi->tag_size == 0)
-		return -1;
-
-	nr_sectors = bio_integrity_hw_sectors(bi,
-					DIV_ROUND_UP(len, bi->tag_size));
-
-	if (nr_sectors * bi->tuple_size > bip->bip_iter.bi_size) {
-		printk(KERN_ERR "%s: tag too big for bio: %u > %u\n", __func__,
-		       nr_sectors * bi->tuple_size, bip->bip_iter.bi_size);
-		return -1;
-	}
-
-	if (set)
-		bi->set_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
-	else
-		bi->get_tag_fn(bip->bip_buf, tag_buf, nr_sectors);
-
-	return 0;
-}
-
-/**
- * bio_integrity_set_tag - Attach a tag buffer to a bio
- * @bio:	bio to attach buffer to
- * @tag_buf:	Pointer to a buffer containing tag data
- * @len:	Length of the included buffer
- *
- * Description: Use this function to tag a bio by leveraging the extra
- * space provided by devices formatted with integrity protection.  The
- * size of the integrity buffer must be <= to the size reported by
- * bio_integrity_tag_size().
- */
-int bio_integrity_set_tag(struct bio *bio, void *tag_buf, unsigned int len)
-{
-	BUG_ON(bio_data_dir(bio) != WRITE);
-
-	return bio_integrity_tag(bio, tag_buf, len, 1);
-}
-EXPORT_SYMBOL(bio_integrity_set_tag);
-
-/**
- * bio_integrity_get_tag - Retrieve a tag buffer from a bio
- * @bio:	bio to retrieve buffer from
- * @tag_buf:	Pointer to a buffer for the tag data
- * @len:	Length of the target buffer
- *
- * Description: Use this function to retrieve the tag buffer from a
- * completed I/O. The size of the integrity buffer must be <= to the
- * size reported by bio_integrity_tag_size().
- */
-int bio_integrity_get_tag(struct bio *bio, void *tag_buf, unsigned int len)
-{
-	BUG_ON(bio_data_dir(bio) != READ);
-
-	return bio_integrity_tag(bio, tag_buf, len, 0);
-}
-EXPORT_SYMBOL(bio_integrity_get_tag);
-
 /**
  * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
@@ -355,14 +271,6 @@ static void bio_integrity_generate(struct bio *bio)
 	bio_integrity_generate_verify(bio, 1);
 }
 
-static inline unsigned short blk_integrity_tuple_size(struct blk_integrity *bi)
-{
-	if (bi)
-		return bi->tuple_size;
-
-	return 0;
-}
-
 /**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
@@ -393,7 +301,7 @@ int bio_integrity_prep(struct bio *bio)
 	sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
 
 	/* Allocate kernel buffer for protection data */
-	len = sectors * blk_integrity_tuple_size(bi);
+	len = sectors * bi->tuple_size;
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 7fbab84399e6..7ac17160ab69 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -418,8 +418,6 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		bi->generate_fn = template->generate_fn;
 		bi->verify_fn = template->verify_fn;
 		bi->tuple_size = template->tuple_size;
-		bi->set_tag_fn = template->set_tag_fn;
-		bi->get_tag_fn = template->get_tag_fn;
 		bi->tag_size = template->tag_size;
 	} else
 		bi->name = bi_unsupported_name;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 29f0477a8708..38a7778631be 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -128,39 +128,10 @@ static int sd_dif_type1_verify_ip(struct blk_integrity_exchg *bix)
 	return sd_dif_type1_verify(bix, sd_dif_ip_fn);
 }
 
-/*
- * Functions for interleaving and deinterleaving application tags
- */
-static void sd_dif_type1_set_tag(void *prot, void *tag_buf, unsigned int sectors)
-{
-	struct sd_dif_tuple *sdt = prot;
-	u8 *tag = tag_buf;
-	unsigned int i, j;
-
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
-		sdt->app_tag = tag[j] << 8 | tag[j+1];
-		BUG_ON(sdt->app_tag == 0xffff);
-	}
-}
-
-static void sd_dif_type1_get_tag(void *prot, void *tag_buf, unsigned int sectors)
-{
-	struct sd_dif_tuple *sdt = prot;
-	u8 *tag = tag_buf;
-	unsigned int i, j;
-
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
-		tag[j] = (sdt->app_tag & 0xff00) >> 8;
-		tag[j+1] = sdt->app_tag & 0xff;
-	}
-}
-
 static struct blk_integrity dif_type1_integrity_crc = {
 	.name			= "T10-DIF-TYPE1-CRC",
 	.generate_fn		= sd_dif_type1_generate_crc,
 	.verify_fn		= sd_dif_type1_verify_crc,
-	.get_tag_fn		= sd_dif_type1_get_tag,
-	.set_tag_fn		= sd_dif_type1_set_tag,
 	.tuple_size		= sizeof(struct sd_dif_tuple),
 	.tag_size		= 0,
 };
@@ -169,8 +140,6 @@ static struct blk_integrity dif_type1_integrity_ip = {
 	.name			= "T10-DIF-TYPE1-IP",
 	.generate_fn		= sd_dif_type1_generate_ip,
 	.verify_fn		= sd_dif_type1_verify_ip,
-	.get_tag_fn		= sd_dif_type1_get_tag,
-	.set_tag_fn		= sd_dif_type1_set_tag,
 	.tuple_size		= sizeof(struct sd_dif_tuple),
 	.tag_size		= 0,
 };
@@ -245,42 +214,10 @@ static int sd_dif_type3_verify_ip(struct blk_integrity_exchg *bix)
 	return sd_dif_type3_verify(bix, sd_dif_ip_fn);
 }
 
-static void sd_dif_type3_set_tag(void *prot, void *tag_buf, unsigned int sectors)
-{
-	struct sd_dif_tuple *sdt = prot;
-	u8 *tag = tag_buf;
-	unsigned int i, j;
-
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 6, sdt++) {
-		sdt->app_tag = tag[j] << 8 | tag[j+1];
-		sdt->ref_tag = tag[j+2] << 24 | tag[j+3] << 16 |
-			tag[j+4] << 8 | tag[j+5];
-	}
-}
-
-static void sd_dif_type3_get_tag(void *prot, void *tag_buf, unsigned int sectors)
-{
-	struct sd_dif_tuple *sdt = prot;
-	u8 *tag = tag_buf;
-	unsigned int i, j;
-
-	for (i = 0, j = 0 ; i < sectors ; i++, j += 2, sdt++) {
-		tag[j] = (sdt->app_tag & 0xff00) >> 8;
-		tag[j+1] = sdt->app_tag & 0xff;
-		tag[j+2] = (sdt->ref_tag & 0xff000000) >> 24;
-		tag[j+3] = (sdt->ref_tag & 0xff0000) >> 16;
-		tag[j+4] = (sdt->ref_tag & 0xff00) >> 8;
-		tag[j+5] = sdt->ref_tag & 0xff;
-		BUG_ON(sdt->app_tag == 0xffff || sdt->ref_tag == 0xffffffff);
-	}
-}
-
 static struct blk_integrity dif_type3_integrity_crc = {
 	.name			= "T10-DIF-TYPE3-CRC",
 	.generate_fn		= sd_dif_type3_generate_crc,
 	.verify_fn		= sd_dif_type3_verify_crc,
-	.get_tag_fn		= sd_dif_type3_get_tag,
-	.set_tag_fn		= sd_dif_type3_set_tag,
 	.tuple_size		= sizeof(struct sd_dif_tuple),
 	.tag_size		= 0,
 };
@@ -289,8 +226,6 @@ static struct blk_integrity dif_type3_integrity_ip = {
 	.name			= "T10-DIF-TYPE3-IP",
 	.generate_fn		= sd_dif_type3_generate_ip,
 	.verify_fn		= sd_dif_type3_verify_ip,
-	.get_tag_fn		= sd_dif_type3_get_tag,
-	.set_tag_fn		= sd_dif_type3_set_tag,
 	.tuple_size		= sizeof(struct sd_dif_tuple),
 	.tag_size		= 0,
 };
diff --git a/include/linux/bio.h b/include/linux/bio.h
index a810a74071b2..63a0e53e238c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -362,7 +362,6 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *);
 extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs);
 
 extern struct bio_set *fs_bio_set;
-unsigned int bio_integrity_tag_size(struct bio *bio);
 
 static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs)
 {
@@ -674,8 +673,6 @@ extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, un
 extern void bio_integrity_free(struct bio *);
 extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int);
 extern bool bio_integrity_enabled(struct bio *bio);
-extern int bio_integrity_set_tag(struct bio *, void *, unsigned int);
-extern int bio_integrity_get_tag(struct bio *, void *, unsigned int);
 extern int bio_integrity_prep(struct bio *);
 extern void bio_integrity_endio(struct bio *, int);
 extern void bio_integrity_advance(struct bio *, unsigned int);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 7fcb2caef559..0bf5d79d9ba0 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1472,14 +1472,10 @@ struct blk_integrity_exchg {
 
 typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
 typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
-typedef void (integrity_set_tag_fn) (void *, void *, unsigned int);
-typedef void (integrity_get_tag_fn) (void *, void *, unsigned int);
 
 struct blk_integrity {
 	integrity_gen_fn	*generate_fn;
 	integrity_vrfy_fn	*verify_fn;
-	integrity_set_tag_fn	*set_tag_fn;
-	integrity_get_tag_fn	*get_tag_fn;
 
 	unsigned short		flags;
 	unsigned short		tuple_size;
-- 
cgit v1.2.3


From 5f9378fa9ca214977b5bfc12197c67eea450fc40 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:58 -0400
Subject: block: Remove bip_buf

bip_buf is not really needed so we can remove it.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c | 10 ++++++----
 include/linux/bio.h   |  3 ---
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 367bb24bb9f1..e84f7fb8694b 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -101,7 +101,8 @@ void bio_integrity_free(struct bio *bio)
 	struct bio_set *bs = bio->bi_pool;
 
 	if (bip->bip_owns_buf)
-		kfree(bip->bip_buf);
+		kfree(page_address(bip->bip_vec->bv_page) +
+		      bip->bip_vec->bv_offset);
 
 	if (bs) {
 		if (bip->bip_slab != BIO_POOL_NONE)
@@ -219,14 +220,16 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	struct blk_integrity_exchg bix;
 	struct bio_vec *bv;
+	struct bio_integrity_payload *bip = bio_integrity(bio);
 	sector_t sector;
 	unsigned int sectors, ret = 0, i;
-	void *prot_buf = bio_integrity(bio)->bip_buf;
+	void *prot_buf = page_address(bip->bip_vec->bv_page) +
+		bip->bip_vec->bv_offset;
 
 	if (operate)
 		sector = bio->bi_iter.bi_sector;
 	else
-		sector = bio_integrity(bio)->bip_iter.bi_sector;
+		sector = bip->bip_iter.bi_sector;
 
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
 	bix.sector_size = bi->sector_size;
@@ -321,7 +324,6 @@ int bio_integrity_prep(struct bio *bio)
 	}
 
 	bip->bip_owns_buf = 1;
-	bip->bip_buf = buf;
 	bip->bip_iter.bi_size = len;
 	bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 63a0e53e238c..448d8c052cb7 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -310,9 +310,6 @@ struct bio_integrity_payload {
 
 	struct bvec_iter	bip_iter;
 
-	/* kill - should just use bip_vec */
-	void			*bip_buf;	/* generated integrity data */
-
 	bio_end_io_t		*bip_end_io;	/* saved I/O completion fn */
 
 	unsigned short		bip_slab;	/* slab the bip came from */
-- 
cgit v1.2.3


From 3be91c4a3d090bd700bd6ee5bf457c1bbf189a4f Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:19:59 -0400
Subject: block: Deprecate the use of the term sector in the context of block
 integrity

The protection interval is not necessarily tied to the logical block
size of a block device. Stop using the terms "sector" and "sectors".

Going forward we will use the term "seed" to describe the initial
reference tag value for a given I/O. "Interval" will be used to describe
the portion of the data buffer that a given piece of protection
information is associated with.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  | 42 +++++++++++++++++++++---------------------
 block/blk-integrity.c  | 10 +++++-----
 drivers/scsi/sd_dif.c  | 46 +++++++++++++++++++++++-----------------------
 include/linux/blkdev.h |  6 +++---
 4 files changed, 52 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index e84f7fb8694b..6a3aacf57b19 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -185,20 +185,20 @@ bool bio_integrity_enabled(struct bio *bio)
 EXPORT_SYMBOL(bio_integrity_enabled);
 
 /**
- * bio_integrity_hw_sectors - Convert 512b sectors to hardware ditto
+ * bio_integrity_intervals - Return number of integrity intervals for a bio
  * @bi:		blk_integrity profile for device
- * @sectors:	Number of 512 sectors to convert
+ * @sectors:	Size of the bio in 512-byte sectors
  *
  * Description: The block layer calculates everything in 512 byte
- * sectors but integrity metadata is done in terms of the hardware
- * sector size of the storage device.  Convert the block layer sectors
- * to physical sectors.
+ * sectors but integrity metadata is done in terms of the data integrity
+ * interval size of the storage device.  Convert the block layer sectors
+ * to the appropriate number of integrity intervals.
  */
-static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
-						    unsigned int sectors)
+static inline unsigned int bio_integrity_intervals(struct blk_integrity *bi,
+						   unsigned int sectors)
 {
 	/* At this point there are only 512b or 4096b DIF/EPP devices */
-	if (bi->sector_size == 4096)
+	if (bi->interval == 4096)
 		return sectors >>= 3;
 
 	return sectors;
@@ -207,7 +207,7 @@ static inline unsigned int bio_integrity_hw_sectors(struct blk_integrity *bi,
 static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 					       unsigned int sectors)
 {
-	return bio_integrity_hw_sectors(bi, sectors) * bi->tuple_size;
+	return bio_integrity_intervals(bi, sectors) * bi->tuple_size;
 }
 
 /**
@@ -221,25 +221,25 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 	struct blk_integrity_exchg bix;
 	struct bio_vec *bv;
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	sector_t sector;
-	unsigned int sectors, ret = 0, i;
+	sector_t seed;
+	unsigned int intervals, ret = 0, i;
 	void *prot_buf = page_address(bip->bip_vec->bv_page) +
 		bip->bip_vec->bv_offset;
 
 	if (operate)
-		sector = bio->bi_iter.bi_sector;
+		seed = bio->bi_iter.bi_sector;
 	else
-		sector = bip->bip_iter.bi_sector;
+		seed = bip->bip_iter.bi_sector;
 
 	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	bix.sector_size = bi->sector_size;
+	bix.interval = bi->interval;
 
 	bio_for_each_segment_all(bv, bio, i) {
 		void *kaddr = kmap_atomic(bv->bv_page);
 		bix.data_buf = kaddr + bv->bv_offset;
 		bix.data_size = bv->bv_len;
 		bix.prot_buf = prot_buf;
-		bix.sector = sector;
+		bix.seed = seed;
 
 		if (operate)
 			bi->generate_fn(&bix);
@@ -251,9 +251,9 @@ static int bio_integrity_generate_verify(struct bio *bio, int operate)
 			}
 		}
 
-		sectors = bv->bv_len / bi->sector_size;
-		sector += sectors;
-		prot_buf += sectors * bi->tuple_size;
+		intervals = bv->bv_len / bi->interval;
+		seed += intervals;
+		prot_buf += intervals * bi->tuple_size;
 
 		kunmap_atomic(kaddr);
 	}
@@ -294,17 +294,17 @@ int bio_integrity_prep(struct bio *bio)
 	unsigned long start, end;
 	unsigned int len, nr_pages;
 	unsigned int bytes, offset, i;
-	unsigned int sectors;
+	unsigned int intervals;
 
 	bi = bdev_get_integrity(bio->bi_bdev);
 	q = bdev_get_queue(bio->bi_bdev);
 	BUG_ON(bi == NULL);
 	BUG_ON(bio_integrity(bio));
 
-	sectors = bio_integrity_hw_sectors(bi, bio_sectors(bio));
+	intervals = bio_integrity_intervals(bi, bio_sectors(bio));
 
 	/* Allocate kernel buffer for protection data */
-	len = sectors * bi->tuple_size;
+	len = intervals * bi->tuple_size;
 	buf = kmalloc(len, GFP_NOIO | q->bounce_gfp);
 	if (unlikely(buf == NULL)) {
 		printk(KERN_ERR "could not allocate integrity buffer\n");
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 7ac17160ab69..3a83a7d08177 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -154,10 +154,10 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 	if (!b1 || !b2)
 		return -1;
 
-	if (b1->sector_size != b2->sector_size) {
-		printk(KERN_ERR "%s: %s/%s sector sz %u != %u\n", __func__,
-		       gd1->disk_name, gd2->disk_name,
-		       b1->sector_size, b2->sector_size);
+	if (b1->interval != b2->interval) {
+		pr_err("%s: %s/%s protection interval %u != %u\n",
+		       __func__, gd1->disk_name, gd2->disk_name,
+		       b1->interval, b2->interval);
 		return -1;
 	}
 
@@ -407,7 +407,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		kobject_uevent(&bi->kobj, KOBJ_ADD);
 
 		bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
-		bi->sector_size = queue_logical_block_size(disk->queue);
+		bi->interval = queue_logical_block_size(disk->queue);
 		disk->integrity = bi;
 	} else
 		bi = disk->integrity;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 38a7778631be..1600270a46e5 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -57,16 +57,16 @@ static void sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
 	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t sector = bix->sector;
+	sector_t seed = bix->seed;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
-		sdt->guard_tag = fn(buf, bix->sector_size);
-		sdt->ref_tag = cpu_to_be32(sector & 0xffffffff);
+	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
+		sdt->guard_tag = fn(buf, bix->interval);
+		sdt->ref_tag = cpu_to_be32(seed & 0xffffffff);
 		sdt->app_tag = 0;
 
-		buf += bix->sector_size;
-		sector++;
+		buf += bix->interval;
+		seed++;
 	}
 }
 
@@ -84,35 +84,35 @@ static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
 	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t sector = bix->sector;
+	sector_t seed = bix->seed;
 	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
+	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff)
 			return 0;
 
-		if (be32_to_cpu(sdt->ref_tag) != (sector & 0xffffffff)) {
+		if (be32_to_cpu(sdt->ref_tag) != (seed & 0xffffffff)) {
 			printk(KERN_ERR
 			       "%s: ref tag error on sector %lu (rcvd %u)\n",
-			       bix->disk_name, (unsigned long)sector,
+			       bix->disk_name, (unsigned long)seed,
 			       be32_to_cpu(sdt->ref_tag));
 			return -EIO;
 		}
 
-		csum = fn(buf, bix->sector_size);
+		csum = fn(buf, bix->interval);
 
 		if (sdt->guard_tag != csum) {
 			printk(KERN_ERR "%s: guard tag error on sector %lu " \
 			       "(rcvd %04x, data %04x)\n", bix->disk_name,
-			       (unsigned long)sector,
+			       (unsigned long)seed,
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
 
-		buf += bix->sector_size;
-		sector++;
+		buf += bix->interval;
+		seed++;
 	}
 
 	return 0;
@@ -155,12 +155,12 @@ static void sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
 	struct sd_dif_tuple *sdt = bix->prot_buf;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
-		sdt->guard_tag = fn(buf, bix->sector_size);
+	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
+		sdt->guard_tag = fn(buf, bix->interval);
 		sdt->ref_tag = 0;
 		sdt->app_tag = 0;
 
-		buf += bix->sector_size;
+		buf += bix->interval;
 	}
 }
 
@@ -178,27 +178,27 @@ static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 {
 	void *buf = bix->data_buf;
 	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t sector = bix->sector;
+	sector_t seed = bix->seed;
 	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->sector_size, sdt++) {
+	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
 			return 0;
 
-		csum = fn(buf, bix->sector_size);
+		csum = fn(buf, bix->interval);
 
 		if (sdt->guard_tag != csum) {
 			printk(KERN_ERR "%s: guard tag error on sector %lu " \
 			       "(rcvd %04x, data %04x)\n", bix->disk_name,
-			       (unsigned long)sector,
+			       (unsigned long)seed,
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
 
-		buf += bix->sector_size;
-		sector++;
+		buf += bix->interval;
+		seed++;
 	}
 
 	return 0;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0bf5d79d9ba0..d364c42dbf17 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1464,9 +1464,9 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 struct blk_integrity_exchg {
 	void			*prot_buf;
 	void			*data_buf;
-	sector_t		sector;
+	sector_t		seed;
 	unsigned int		data_size;
-	unsigned short		sector_size;
+	unsigned short		interval;
 	const char		*disk_name;
 };
 
@@ -1479,7 +1479,7 @@ struct blk_integrity {
 
 	unsigned short		flags;
 	unsigned short		tuple_size;
-	unsigned short		sector_size;
+	unsigned short		interval;
 	unsigned short		tag_size;
 
 	const char		*name;
-- 
cgit v1.2.3


From 1859308853b19c4daf4afaab910d3d52ac1ec2ff Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:01 -0400
Subject: block: Clean up the code used to generate and verify integrity
 metadata

Instead of the "operate" parameter we pass in a seed value and a pointer
to a function that can be used to process the integrity metadata. The
generation function is changed to have a return value to fit into this
scheme.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  |  82 ++++++++++----------------------------
 drivers/scsi/sd_dif.c  | 106 ++++++++++++++++++++++++++-----------------------
 include/linux/bio.h    |  12 ++++++
 include/linux/blkdev.h |   9 ++---
 4 files changed, 94 insertions(+), 115 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index cf40837e7710..fe4de033b34c 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -207,69 +207,43 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi,
 }
 
 /**
- * bio_integrity_generate_verify - Generate/verify integrity metadata for a bio
+ * bio_integrity_process - Process integrity metadata for a bio
  * @bio:	bio to generate/verify integrity metadata for
- * @operate:	operate number, 1 for generate, 0 for verify
+ * @proc_fn:	Pointer to the relevant processing function
  */
-static int bio_integrity_generate_verify(struct bio *bio, int operate)
+static int bio_integrity_process(struct bio *bio,
+				 integrity_processing_fn *proc_fn)
 {
 	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
-	struct blk_integrity_exchg bix;
+	struct blk_integrity_iter iter;
 	struct bio_vec *bv;
 	struct bio_integrity_payload *bip = bio_integrity(bio);
-	sector_t seed;
-	unsigned int intervals, ret = 0, i;
+	unsigned int i, ret = 0;
 	void *prot_buf = page_address(bip->bip_vec->bv_page) +
 		bip->bip_vec->bv_offset;
 
-	if (operate)
-		seed = bio->bi_iter.bi_sector;
-	else
-		seed = bip->bip_iter.bi_sector;
-
-	bix.disk_name = bio->bi_bdev->bd_disk->disk_name;
-	bix.interval = bi->interval;
+	iter.disk_name = bio->bi_bdev->bd_disk->disk_name;
+	iter.interval = bi->interval;
+	iter.seed = bip_get_seed(bip);
+	iter.prot_buf = prot_buf;
 
 	bio_for_each_segment_all(bv, bio, i) {
 		void *kaddr = kmap_atomic(bv->bv_page);
-		bix.data_buf = kaddr + bv->bv_offset;
-		bix.data_size = bv->bv_len;
-		bix.prot_buf = prot_buf;
-		bix.seed = seed;
-
-		if (operate)
-			bi->generate_fn(&bix);
-		else {
-			ret = bi->verify_fn(&bix);
-			if (ret) {
-				kunmap_atomic(kaddr);
-				return ret;
-			}
-		}
 
-		intervals = bv->bv_len / bi->interval;
-		seed += intervals;
-		prot_buf += intervals * bi->tuple_size;
+		iter.data_buf = kaddr + bv->bv_offset;
+		iter.data_size = bv->bv_len;
+
+		ret = proc_fn(&iter);
+		if (ret) {
+			kunmap_atomic(kaddr);
+			return ret;
+		}
 
 		kunmap_atomic(kaddr);
 	}
 	return ret;
 }
 
-/**
- * bio_integrity_generate - Generate integrity metadata for a bio
- * @bio:	bio to generate integrity metadata for
- *
- * Description: Generates integrity metadata for a bio by calling the
- * block device's generation callback function.  The bio must have a
- * bip attached with enough room to accommodate the generated
- * integrity metadata.
- */
-static void bio_integrity_generate(struct bio *bio)
-{
-	bio_integrity_generate_verify(bio, 1);
-}
-
 /**
  * bio_integrity_prep - Prepare bio for integrity I/O
  * @bio:	bio to prepare
@@ -321,7 +295,7 @@ int bio_integrity_prep(struct bio *bio)
 
 	bip->bip_owns_buf = 1;
 	bip->bip_iter.bi_size = len;
-	bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
+	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
 	/* Map it */
 	offset = offset_in_page(buf);
@@ -357,25 +331,12 @@ int bio_integrity_prep(struct bio *bio)
 
 	/* Auto-generate integrity metadata if this is a write */
 	if (bio_data_dir(bio) == WRITE)
-		bio_integrity_generate(bio);
+		bio_integrity_process(bio, bi->generate_fn);
 
 	return 0;
 }
 EXPORT_SYMBOL(bio_integrity_prep);
 
-/**
- * bio_integrity_verify - Verify integrity metadata for a bio
- * @bio:	bio to verify
- *
- * Description: This function is called to verify the integrity of a
- * bio.	 The data in the bio io_vec is compared to the integrity
- * metadata returned by the HBA.
- */
-static int bio_integrity_verify(struct bio *bio)
-{
-	return bio_integrity_generate_verify(bio, 0);
-}
-
 /**
  * bio_integrity_verify_fn - Integrity I/O completion worker
  * @work:	Work struct stored in bio to be verified
@@ -389,9 +350,10 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 	struct bio_integrity_payload *bip =
 		container_of(work, struct bio_integrity_payload, bip_work);
 	struct bio *bio = bip->bip_bio;
+	struct blk_integrity *bi = bdev_get_integrity(bio->bi_bdev);
 	int error;
 
-	error = bio_integrity_verify(bio);
+	error = bio_integrity_process(bio, bi->verify_fn);
 
 	/* Restore original bio completion handler */
 	bio->bi_end_io = bip->bip_end_io;
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 1600270a46e5..801c41851a01 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -53,42 +53,44 @@ static __u16 sd_dif_ip_fn(void *data, unsigned int len)
  * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
  * 16 bit app tag, 32 bit reference tag.
  */
-static void sd_dif_type1_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
+static void sd_dif_type1_generate(struct blk_integrity_iter *iter, csum_fn *fn)
 {
-	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t seed = bix->seed;
+	void *buf = iter->data_buf;
+	struct sd_dif_tuple *sdt = iter->prot_buf;
+	sector_t seed = iter->seed;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
-		sdt->guard_tag = fn(buf, bix->interval);
+	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
+		sdt->guard_tag = fn(buf, iter->interval);
 		sdt->ref_tag = cpu_to_be32(seed & 0xffffffff);
 		sdt->app_tag = 0;
 
-		buf += bix->interval;
+		buf += iter->interval;
 		seed++;
 	}
 }
 
-static void sd_dif_type1_generate_crc(struct blk_integrity_exchg *bix)
+static int sd_dif_type1_generate_crc(struct blk_integrity_iter *iter)
 {
-	sd_dif_type1_generate(bix, sd_dif_crc_fn);
+	sd_dif_type1_generate(iter, sd_dif_crc_fn);
+	return 0;
 }
 
-static void sd_dif_type1_generate_ip(struct blk_integrity_exchg *bix)
+static int sd_dif_type1_generate_ip(struct blk_integrity_iter *iter)
 {
-	sd_dif_type1_generate(bix, sd_dif_ip_fn);
+	sd_dif_type1_generate(iter, sd_dif_ip_fn);
+	return 0;
 }
 
-static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
+static int sd_dif_type1_verify(struct blk_integrity_iter *iter, csum_fn *fn)
 {
-	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t seed = bix->seed;
+	void *buf = iter->data_buf;
+	struct sd_dif_tuple *sdt = iter->prot_buf;
+	sector_t seed = iter->seed;
 	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
+	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff)
 			return 0;
@@ -96,36 +98,36 @@ static int sd_dif_type1_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
 		if (be32_to_cpu(sdt->ref_tag) != (seed & 0xffffffff)) {
 			printk(KERN_ERR
 			       "%s: ref tag error on sector %lu (rcvd %u)\n",
-			       bix->disk_name, (unsigned long)seed,
+			       iter->disk_name, (unsigned long)seed,
 			       be32_to_cpu(sdt->ref_tag));
 			return -EIO;
 		}
 
-		csum = fn(buf, bix->interval);
+		csum = fn(buf, iter->interval);
 
 		if (sdt->guard_tag != csum) {
 			printk(KERN_ERR "%s: guard tag error on sector %lu " \
-			       "(rcvd %04x, data %04x)\n", bix->disk_name,
+			       "(rcvd %04x, data %04x)\n", iter->disk_name,
 			       (unsigned long)seed,
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
 
-		buf += bix->interval;
+		buf += iter->interval;
 		seed++;
 	}
 
 	return 0;
 }
 
-static int sd_dif_type1_verify_crc(struct blk_integrity_exchg *bix)
+static int sd_dif_type1_verify_crc(struct blk_integrity_iter *iter)
 {
-	return sd_dif_type1_verify(bix, sd_dif_crc_fn);
+	return sd_dif_type1_verify(iter, sd_dif_crc_fn);
 }
 
-static int sd_dif_type1_verify_ip(struct blk_integrity_exchg *bix)
+static int sd_dif_type1_verify_ip(struct blk_integrity_iter *iter)
 {
-	return sd_dif_type1_verify(bix, sd_dif_ip_fn);
+	return sd_dif_type1_verify(iter, sd_dif_ip_fn);
 }
 
 static struct blk_integrity dif_type1_integrity_crc = {
@@ -149,69 +151,71 @@ static struct blk_integrity dif_type1_integrity_ip = {
  * Type 3 protection has a 16-bit guard tag and 16 + 32 bits of opaque
  * tag space.
  */
-static void sd_dif_type3_generate(struct blk_integrity_exchg *bix, csum_fn *fn)
+static void sd_dif_type3_generate(struct blk_integrity_iter *iter, csum_fn *fn)
 {
-	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
+	void *buf = iter->data_buf;
+	struct sd_dif_tuple *sdt = iter->prot_buf;
 	unsigned int i;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
-		sdt->guard_tag = fn(buf, bix->interval);
+	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
+		sdt->guard_tag = fn(buf, iter->interval);
 		sdt->ref_tag = 0;
 		sdt->app_tag = 0;
 
-		buf += bix->interval;
+		buf += iter->interval;
 	}
 }
 
-static void sd_dif_type3_generate_crc(struct blk_integrity_exchg *bix)
+static int sd_dif_type3_generate_crc(struct blk_integrity_iter *iter)
 {
-	sd_dif_type3_generate(bix, sd_dif_crc_fn);
+	sd_dif_type3_generate(iter, sd_dif_crc_fn);
+	return 0;
 }
 
-static void sd_dif_type3_generate_ip(struct blk_integrity_exchg *bix)
+static int sd_dif_type3_generate_ip(struct blk_integrity_iter *iter)
 {
-	sd_dif_type3_generate(bix, sd_dif_ip_fn);
+	sd_dif_type3_generate(iter, sd_dif_ip_fn);
+	return 0;
 }
 
-static int sd_dif_type3_verify(struct blk_integrity_exchg *bix, csum_fn *fn)
+static int sd_dif_type3_verify(struct blk_integrity_iter *iter, csum_fn *fn)
 {
-	void *buf = bix->data_buf;
-	struct sd_dif_tuple *sdt = bix->prot_buf;
-	sector_t seed = bix->seed;
+	void *buf = iter->data_buf;
+	struct sd_dif_tuple *sdt = iter->prot_buf;
+	sector_t seed = iter->seed;
 	unsigned int i;
 	__u16 csum;
 
-	for (i = 0 ; i < bix->data_size ; i += bix->interval, sdt++) {
+	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
 		/* Unwritten sectors */
 		if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
 			return 0;
 
-		csum = fn(buf, bix->interval);
+		csum = fn(buf, iter->interval);
 
 		if (sdt->guard_tag != csum) {
 			printk(KERN_ERR "%s: guard tag error on sector %lu " \
-			       "(rcvd %04x, data %04x)\n", bix->disk_name,
+			       "(rcvd %04x, data %04x)\n", iter->disk_name,
 			       (unsigned long)seed,
 			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
 			return -EIO;
 		}
 
-		buf += bix->interval;
+		buf += iter->interval;
 		seed++;
 	}
 
 	return 0;
 }
 
-static int sd_dif_type3_verify_crc(struct blk_integrity_exchg *bix)
+static int sd_dif_type3_verify_crc(struct blk_integrity_iter *iter)
 {
-	return sd_dif_type3_verify(bix, sd_dif_crc_fn);
+	return sd_dif_type3_verify(iter, sd_dif_crc_fn);
 }
 
-static int sd_dif_type3_verify_ip(struct blk_integrity_exchg *bix)
+static int sd_dif_type3_verify_ip(struct blk_integrity_iter *iter)
 {
-	return sd_dif_type3_verify(bix, sd_dif_ip_fn);
+	return sd_dif_type3_verify(iter, sd_dif_ip_fn);
 }
 
 static struct blk_integrity dif_type3_integrity_crc = {
@@ -310,6 +314,7 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 	phys = hw_sector & 0xffffffff;
 
 	__rq_for_each_bio(bio, rq) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
 		struct bio_vec iv;
 		struct bvec_iter iter;
 		unsigned int j;
@@ -318,9 +323,9 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
 			break;
 
-		virt = bio_integrity(bio)->bip_iter.bi_sector & 0xffffffff;
+		virt = bip_get_seed(bip) & 0xffffffff;
 
-		bip_for_each_vec(iv, bio_integrity(bio), iter) {
+		bip_for_each_vec(iv, bip, iter) {
 			sdt = kmap_atomic(iv.bv_page)
 				+ iv.bv_offset;
 
@@ -366,12 +371,13 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		phys >>= 3;
 
 	__rq_for_each_bio(bio, scmd->request) {
+		struct bio_integrity_payload *bip = bio_integrity(bio);
 		struct bio_vec iv;
 		struct bvec_iter iter;
 
-		virt = bio_integrity(bio)->bip_iter.bi_sector & 0xffffffff;
+		virt = bip_get_seed(bip) & 0xffffffff;
 
-		bip_for_each_vec(iv, bio_integrity(bio), iter) {
+		bip_for_each_vec(iv, bip, iter) {
 			sdt = kmap_atomic(iv.bv_page)
 				+ iv.bv_offset;
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 448d8c052cb7..3fd36660fd10 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -322,6 +322,18 @@ struct bio_integrity_payload {
 	struct bio_vec		*bip_vec;
 	struct bio_vec		bip_inline_vecs[0];/* embedded bvec array */
 };
+
+static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
+{
+	return bip->bip_iter.bi_sector;
+}
+
+static inline void bip_set_seed(struct bio_integrity_payload *bip,
+				sector_t seed)
+{
+	bip->bip_iter.bi_sector = seed;
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 extern void bio_trim(struct bio *bio, int offset, int size);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d364c42dbf17..24c1e055b8a7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1461,7 +1461,7 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 #define INTEGRITY_FLAG_READ	2	/* verify data integrity on read */
 #define INTEGRITY_FLAG_WRITE	4	/* generate data integrity on write */
 
-struct blk_integrity_exchg {
+struct blk_integrity_iter {
 	void			*prot_buf;
 	void			*data_buf;
 	sector_t		seed;
@@ -1470,12 +1470,11 @@ struct blk_integrity_exchg {
 	const char		*disk_name;
 };
 
-typedef void (integrity_gen_fn) (struct blk_integrity_exchg *);
-typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *);
+typedef int (integrity_processing_fn) (struct blk_integrity_iter *);
 
 struct blk_integrity {
-	integrity_gen_fn	*generate_fn;
-	integrity_vrfy_fn	*verify_fn;
+	integrity_processing_fn	*generate_fn;
+	integrity_processing_fn	*verify_fn;
 
 	unsigned short		flags;
 	unsigned short		tuple_size;
-- 
cgit v1.2.3


From 8288f496eb1b1905c425e92eaf1abbb29119217b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:02 -0400
Subject: block: Add prefix to block integrity profile flags

Add a BLK_ prefix to the integrity profile flags. Also rename the flags
to be more consistent with the generate/verify terminology in the rest
of the integrity code.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  |  4 ++--
 block/blk-integrity.c  | 43 ++++++++++++++++++++++---------------------
 include/linux/blkdev.h |  6 ++++--
 3 files changed, 28 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index fe4de033b34c..e64733bb29b6 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -173,11 +173,11 @@ bool bio_integrity_enabled(struct bio *bio)
 		return false;
 
 	if (bio_data_dir(bio) == READ && bi->verify_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_READ))
+	    (bi->flags & BLK_INTEGRITY_VERIFY))
 		return true;
 
 	if (bio_data_dir(bio) == WRITE && bi->generate_fn != NULL &&
-	    (bi->flags & INTEGRITY_FLAG_WRITE))
+	    (bi->flags & BLK_INTEGRITY_GENERATE))
 		return true;
 
 	return false;
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 3a83a7d08177..a7436ccc936b 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -269,42 +269,42 @@ static ssize_t integrity_tag_size_show(struct blk_integrity *bi, char *page)
 		return sprintf(page, "0\n");
 }
 
-static ssize_t integrity_read_store(struct blk_integrity *bi,
-				    const char *page, size_t count)
+static ssize_t integrity_verify_store(struct blk_integrity *bi,
+				      const char *page, size_t count)
 {
 	char *p = (char *) page;
 	unsigned long val = simple_strtoul(p, &p, 10);
 
 	if (val)
-		bi->flags |= INTEGRITY_FLAG_READ;
+		bi->flags |= BLK_INTEGRITY_VERIFY;
 	else
-		bi->flags &= ~INTEGRITY_FLAG_READ;
+		bi->flags &= ~BLK_INTEGRITY_VERIFY;
 
 	return count;
 }
 
-static ssize_t integrity_read_show(struct blk_integrity *bi, char *page)
+static ssize_t integrity_verify_show(struct blk_integrity *bi, char *page)
 {
-	return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_READ) != 0);
+	return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_VERIFY) != 0);
 }
 
-static ssize_t integrity_write_store(struct blk_integrity *bi,
-				     const char *page, size_t count)
+static ssize_t integrity_generate_store(struct blk_integrity *bi,
+					const char *page, size_t count)
 {
 	char *p = (char *) page;
 	unsigned long val = simple_strtoul(p, &p, 10);
 
 	if (val)
-		bi->flags |= INTEGRITY_FLAG_WRITE;
+		bi->flags |= BLK_INTEGRITY_GENERATE;
 	else
-		bi->flags &= ~INTEGRITY_FLAG_WRITE;
+		bi->flags &= ~BLK_INTEGRITY_GENERATE;
 
 	return count;
 }
 
-static ssize_t integrity_write_show(struct blk_integrity *bi, char *page)
+static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page)
 {
-	return sprintf(page, "%d\n", (bi->flags & INTEGRITY_FLAG_WRITE) != 0);
+	return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0);
 }
 
 static struct integrity_sysfs_entry integrity_format_entry = {
@@ -317,23 +317,23 @@ static struct integrity_sysfs_entry integrity_tag_size_entry = {
 	.show = integrity_tag_size_show,
 };
 
-static struct integrity_sysfs_entry integrity_read_entry = {
+static struct integrity_sysfs_entry integrity_verify_entry = {
 	.attr = { .name = "read_verify", .mode = S_IRUGO | S_IWUSR },
-	.show = integrity_read_show,
-	.store = integrity_read_store,
+	.show = integrity_verify_show,
+	.store = integrity_verify_store,
 };
 
-static struct integrity_sysfs_entry integrity_write_entry = {
+static struct integrity_sysfs_entry integrity_generate_entry = {
 	.attr = { .name = "write_generate", .mode = S_IRUGO | S_IWUSR },
-	.show = integrity_write_show,
-	.store = integrity_write_store,
+	.show = integrity_generate_show,
+	.store = integrity_generate_store,
 };
 
 static struct attribute *integrity_attrs[] = {
 	&integrity_format_entry.attr,
 	&integrity_tag_size_entry.attr,
-	&integrity_read_entry.attr,
-	&integrity_write_entry.attr,
+	&integrity_verify_entry.attr,
+	&integrity_generate_entry.attr,
 	NULL,
 };
 
@@ -406,7 +406,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 
 		kobject_uevent(&bi->kobj, KOBJ_ADD);
 
-		bi->flags |= INTEGRITY_FLAG_READ | INTEGRITY_FLAG_WRITE;
+		bi->flags |= BLK_INTEGRITY_VERIFY | BLK_INTEGRITY_GENERATE;
 		bi->interval = queue_logical_block_size(disk->queue);
 		disk->integrity = bi;
 	} else
@@ -419,6 +419,7 @@ int blk_integrity_register(struct gendisk *disk, struct blk_integrity *template)
 		bi->verify_fn = template->verify_fn;
 		bi->tuple_size = template->tuple_size;
 		bi->tag_size = template->tag_size;
+		bi->flags |= template->flags;
 	} else
 		bi->name = bi_unsupported_name;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 24c1e055b8a7..cf92eb031ae9 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1458,8 +1458,10 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
-#define INTEGRITY_FLAG_READ	2	/* verify data integrity on read */
-#define INTEGRITY_FLAG_WRITE	4	/* generate data integrity on write */
+enum blk_integrity_flags {
+	BLK_INTEGRITY_VERIFY		= 1 << 0,
+	BLK_INTEGRITY_GENERATE		= 1 << 1,
+};
 
 struct blk_integrity_iter {
 	void			*prot_buf;
-- 
cgit v1.2.3


From 3aec2f41a8baeb70aa77556a4e4dcec7d9d70b4d Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:03 -0400
Subject: block: Add a disk flag to block integrity profile

So far we have relied on the app tag size to determine whether a disk
has been formatted with T10 protection information or not. However, not
all target devices provide application tag storage.

Add a flag to the block integrity profile that indicates whether the
disk has been formatted with protection information.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@dev.mellanox.co.il>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 Documentation/ABI/testing/sysfs-block |  8 ++++++++
 block/blk-integrity.c                 | 12 ++++++++++++
 drivers/scsi/sd_dif.c                 |  8 +++++++-
 include/linux/blkdev.h                |  1 +
 4 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-block b/Documentation/ABI/testing/sysfs-block
index 279da08f7541..8df003963d99 100644
--- a/Documentation/ABI/testing/sysfs-block
+++ b/Documentation/ABI/testing/sysfs-block
@@ -53,6 +53,14 @@ Description:
 		512 bytes of data.
 
 
+What:		/sys/block/<disk>/integrity/device_is_integrity_capable
+Date:		July 2014
+Contact:	Martin K. Petersen <martin.petersen@oracle.com>
+Description:
+		Indicates whether a storage device is capable of storing
+		integrity metadata. Set if the device is T10 PI-capable.
+
+
 What:		/sys/block/<disk>/integrity/write_generate
 Date:		June 2008
 Contact:	Martin K. Petersen <martin.petersen@oracle.com>
diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index a7436ccc936b..1c6ba442cd91 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -307,6 +307,12 @@ static ssize_t integrity_generate_show(struct blk_integrity *bi, char *page)
 	return sprintf(page, "%d\n", (bi->flags & BLK_INTEGRITY_GENERATE) != 0);
 }
 
+static ssize_t integrity_device_show(struct blk_integrity *bi, char *page)
+{
+	return sprintf(page, "%u\n",
+		       (bi->flags & BLK_INTEGRITY_DEVICE_CAPABLE) != 0);
+}
+
 static struct integrity_sysfs_entry integrity_format_entry = {
 	.attr = { .name = "format", .mode = S_IRUGO },
 	.show = integrity_format_show,
@@ -329,11 +335,17 @@ static struct integrity_sysfs_entry integrity_generate_entry = {
 	.store = integrity_generate_store,
 };
 
+static struct integrity_sysfs_entry integrity_device_entry = {
+	.attr = { .name = "device_is_integrity_capable", .mode = S_IRUGO },
+	.show = integrity_device_show,
+};
+
 static struct attribute *integrity_attrs[] = {
 	&integrity_format_entry.attr,
 	&integrity_tag_size_entry.attr,
 	&integrity_verify_entry.attr,
 	&integrity_generate_entry.attr,
+	&integrity_device_entry.attr,
 	NULL,
 };
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 801c41851a01..1e971c6f8c2b 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -270,7 +270,13 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 		  "Enabling DIX %s protection\n", disk->integrity->name);
 
 	/* Signal to block layer that we support sector tagging */
-	if (dif && type && sdkp->ATO) {
+	if (dif && type) {
+
+		disk->integrity->flags |= BLK_INTEGRITY_DEVICE_CAPABLE;
+
+		if (!sdkp)
+			return;
+
 		if (type == SD_DIF_TYPE3_PROTECTION)
 			disk->integrity->tag_size = sizeof(u16) + sizeof(u32);
 		else
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index cf92eb031ae9..4600fc63e3fc 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1461,6 +1461,7 @@ static inline uint64_t rq_io_start_time_ns(struct request *req)
 enum blk_integrity_flags {
 	BLK_INTEGRITY_VERIFY		= 1 << 0,
 	BLK_INTEGRITY_GENERATE		= 1 << 1,
+	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
 };
 
 struct blk_integrity_iter {
-- 
cgit v1.2.3


From b1f01388574c9329922f760fc2a7335c2d14b08b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:04 -0400
Subject: block: Relocate bio integrity flags

Move flags affecting the integrity code out of the bio bi_flags and into
the block integrity payload.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c     | 4 ++--
 drivers/scsi/sd_dif.c     | 4 ++--
 include/linux/bio.h       | 9 ++++++++-
 include/linux/blk_types.h | 6 ++----
 4 files changed, 14 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index e64733bb29b6..26aa901b961f 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -100,7 +100,7 @@ void bio_integrity_free(struct bio *bio)
 	struct bio_integrity_payload *bip = bio_integrity(bio);
 	struct bio_set *bs = bio->bi_pool;
 
-	if (bip->bip_owns_buf)
+	if (bip->bip_flags & BIP_BLOCK_INTEGRITY)
 		kfree(page_address(bip->bip_vec->bv_page) +
 		      bip->bip_vec->bv_offset);
 
@@ -293,7 +293,7 @@ int bio_integrity_prep(struct bio *bio)
 		return -EIO;
 	}
 
-	bip->bip_owns_buf = 1;
+	bip->bip_flags |= BIP_BLOCK_INTEGRITY;
 	bip->bip_iter.bi_size = len;
 	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 1e971c6f8c2b..4ce636fdc15f 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -326,7 +326,7 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		unsigned int j;
 
 		/* Already remapped? */
-		if (bio_flagged(bio, BIO_MAPPED_INTEGRITY))
+		if (bip->bip_flags & BIP_MAPPED_INTEGRITY)
 			break;
 
 		virt = bip_get_seed(bip) & 0xffffffff;
@@ -347,7 +347,7 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 			kunmap_atomic(sdt);
 		}
 
-		bio->bi_flags |= (1 << BIO_MAPPED_INTEGRITY);
+		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
 	}
 }
 
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 3fd36660fd10..b508cf69206d 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -315,7 +315,7 @@ struct bio_integrity_payload {
 	unsigned short		bip_slab;	/* slab the bip came from */
 	unsigned short		bip_vcnt;	/* # of integrity bio_vecs */
 	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
-	unsigned		bip_owns_buf:1;	/* should free bip_buf */
+	unsigned short		bip_flags;	/* control flags */
 
 	struct work_struct	bip_work;	/* I/O completion */
 
@@ -323,6 +323,13 @@ struct bio_integrity_payload {
 	struct bio_vec		bip_inline_vecs[0];/* embedded bvec array */
 };
 
+enum bip_flags {
+	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
+	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
+	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
+	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
+};
+
 static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
 {
 	return bip->bip_iter.bi_sector;
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 6a5d2f2de1b9..38bc008e4503 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -120,10 +120,8 @@ struct bio {
 #define BIO_USER_MAPPED 6	/* contains user pages */
 #define BIO_EOPNOTSUPP	7	/* not supported */
 #define BIO_NULL_MAPPED 8	/* contains invalid user pages */
-#define BIO_FS_INTEGRITY 9	/* fs owns integrity data, not block layer */
-#define BIO_QUIET	10	/* Make BIO Quiet */
-#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */
-#define BIO_SNAP_STABLE	12	/* bio data must be snapshotted during write */
+#define BIO_QUIET	9	/* Make BIO Quiet */
+#define BIO_SNAP_STABLE	10	/* bio data must be snapshotted during write */
 
 /*
  * Flags starting here get preserved by bio_reset() - this includes
-- 
cgit v1.2.3


From aae7df50190a640e51bfe11c93f94741ac82ff0b Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:05 -0400
Subject: block: Integrity checksum flag

Make the choice of checksum a per-I/O property by introducing a flag
that can be inspected by the SCSI layer. There are several reasons for
this:

 1. It allows us to switch choice of checksum without unloading and
    reloading the HBA driver.

 2. During error recovery we need to be able to tell the HBA that
    checksums read from disk should not be verified and converted to IP
    checksums.

 3. For error injection purposes we need to be able to write a bad guard
    tag to storage. Since the storage device only supports T10 CRC we
    need to be able to disable IP checksum conversion on the HBA.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio-integrity.c  | 3 +++
 drivers/scsi/sd_dif.c  | 6 ++++--
 include/linux/bio.h    | 1 +
 include/linux/blkdev.h | 1 +
 4 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 26aa901b961f..8e0548484dd3 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -297,6 +297,9 @@ int bio_integrity_prep(struct bio *bio)
 	bip->bip_iter.bi_size = len;
 	bip_set_seed(bip, bio->bi_iter.bi_sector);
 
+	if (bi->flags & BLK_INTEGRITY_IP_CHECKSUM)
+		bip->bip_flags |= BIP_IP_CHECKSUM;
+
 	/* Map it */
 	offset = offset_in_page(buf);
 	for (i = 0 ; i < nr_pages ; i++) {
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 4ce636fdc15f..2198abee619e 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -255,12 +255,14 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 		return;
 
 	/* Enable DMA of protection information */
-	if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP)
+	if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) {
 		if (type == SD_DIF_TYPE3_PROTECTION)
 			blk_integrity_register(disk, &dif_type3_integrity_ip);
 		else
 			blk_integrity_register(disk, &dif_type1_integrity_ip);
-	else
+
+		disk->integrity->flags |= BLK_INTEGRITY_IP_CHECKSUM;
+	} else
 		if (type == SD_DIF_TYPE3_PROTECTION)
 			blk_integrity_register(disk, &dif_type3_integrity_crc);
 		else
diff --git a/include/linux/bio.h b/include/linux/bio.h
index b508cf69206d..14bff3fe56d4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -328,6 +328,7 @@ enum bip_flags {
 	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
 	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
 	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
+	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
 };
 
 static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 4600fc63e3fc..773df190a4ee 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1462,6 +1462,7 @@ enum blk_integrity_flags {
 	BLK_INTEGRITY_VERIFY		= 1 << 0,
 	BLK_INTEGRITY_GENERATE		= 1 << 1,
 	BLK_INTEGRITY_DEVICE_CAPABLE	= 1 << 2,
+	BLK_INTEGRITY_IP_CHECKSUM	= 1 << 3,
 };
 
 struct blk_integrity_iter {
-- 
cgit v1.2.3


From 4eaf99beadcefbf126fa05e66fb40fca999e09fd Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:06 -0400
Subject: block: Don't merge requests if integrity flags differ

We'd occasionally merge requests with conflicting integrity flags.
Introduce a merge helper which checks that the requests have compatible
integrity payloads.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-integrity.c  | 36 ++++++++++++++++++++++++++----------
 block/blk-merge.c      |  6 +++---
 include/linux/blkdev.h | 20 ++++++++++----------
 3 files changed, 39 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-integrity.c b/block/blk-integrity.c
index 1c6ba442cd91..79ffb4855af0 100644
--- a/block/blk-integrity.c
+++ b/block/blk-integrity.c
@@ -186,37 +186,53 @@ int blk_integrity_compare(struct gendisk *gd1, struct gendisk *gd2)
 }
 EXPORT_SYMBOL(blk_integrity_compare);
 
-int blk_integrity_merge_rq(struct request_queue *q, struct request *req,
-			   struct request *next)
+bool blk_integrity_merge_rq(struct request_queue *q, struct request *req,
+			    struct request *next)
 {
-	if (blk_integrity_rq(req) != blk_integrity_rq(next))
-		return -1;
+	if (blk_integrity_rq(req) == 0 && blk_integrity_rq(next) == 0)
+		return true;
+
+	if (blk_integrity_rq(req) == 0 || blk_integrity_rq(next) == 0)
+		return false;
+
+	if (bio_integrity(req->bio)->bip_flags !=
+	    bio_integrity(next->bio)->bip_flags)
+		return false;
 
 	if (req->nr_integrity_segments + next->nr_integrity_segments >
 	    q->limits.max_integrity_segments)
-		return -1;
+		return false;
 
-	return 0;
+	return true;
 }
 EXPORT_SYMBOL(blk_integrity_merge_rq);
 
-int blk_integrity_merge_bio(struct request_queue *q, struct request *req,
-			    struct bio *bio)
+bool blk_integrity_merge_bio(struct request_queue *q, struct request *req,
+			     struct bio *bio)
 {
 	int nr_integrity_segs;
 	struct bio *next = bio->bi_next;
 
+	if (blk_integrity_rq(req) == 0 && bio_integrity(bio) == NULL)
+		return true;
+
+	if (blk_integrity_rq(req) == 0 || bio_integrity(bio) == NULL)
+		return false;
+
+	if (bio_integrity(req->bio)->bip_flags != bio_integrity(bio)->bip_flags)
+		return false;
+
 	bio->bi_next = NULL;
 	nr_integrity_segs = blk_rq_count_integrity_sg(q, bio);
 	bio->bi_next = next;
 
 	if (req->nr_integrity_segments + nr_integrity_segs >
 	    q->limits.max_integrity_segments)
-		return -1;
+		return false;
 
 	req->nr_integrity_segments += nr_integrity_segs;
 
-	return 0;
+	return true;
 }
 EXPORT_SYMBOL(blk_integrity_merge_bio);
 
diff --git a/block/blk-merge.c b/block/blk-merge.c
index 77881798f793..f71bad35b4cc 100644
--- a/block/blk-merge.c
+++ b/block/blk-merge.c
@@ -313,7 +313,7 @@ static inline int ll_new_hw_segment(struct request_queue *q,
 	if (req->nr_phys_segments + nr_phys_segs > queue_max_segments(q))
 		goto no_merge;
 
-	if (bio_integrity(bio) && blk_integrity_merge_bio(q, req, bio))
+	if (blk_integrity_merge_bio(q, req, bio) == false)
 		goto no_merge;
 
 	/*
@@ -410,7 +410,7 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req,
 	if (total_phys_segments > queue_max_segments(q))
 		return 0;
 
-	if (blk_integrity_rq(req) && blk_integrity_merge_rq(q, req, next))
+	if (blk_integrity_merge_rq(q, req, next) == false)
 		return 0;
 
 	/* Merge is OK... */
@@ -590,7 +590,7 @@ bool blk_rq_merge_ok(struct request *rq, struct bio *bio)
 		return false;
 
 	/* only merge integrity protected bio into ditto rq */
-	if (bio_integrity(bio) != blk_integrity_rq(rq))
+	if (blk_integrity_merge_bio(rq->q, rq, bio) == false)
 		return false;
 
 	/* must be using the same buffer */
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 773df190a4ee..038b40f84c7a 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1497,10 +1497,10 @@ extern int blk_integrity_compare(struct gendisk *, struct gendisk *);
 extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *,
 				   struct scatterlist *);
 extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
-extern int blk_integrity_merge_rq(struct request_queue *, struct request *,
-				  struct request *);
-extern int blk_integrity_merge_bio(struct request_queue *, struct request *,
-				   struct bio *);
+extern bool blk_integrity_merge_rq(struct request_queue *, struct request *,
+				   struct request *);
+extern bool blk_integrity_merge_bio(struct request_queue *, struct request *,
+				    struct bio *);
 
 static inline
 struct blk_integrity *bdev_get_integrity(struct block_device *bdev)
@@ -1580,15 +1580,15 @@ static inline unsigned short queue_max_integrity_segments(struct request_queue *
 {
 	return 0;
 }
-static inline int blk_integrity_merge_rq(struct request_queue *rq,
-					 struct request *r1,
-					 struct request *r2)
+static inline bool blk_integrity_merge_rq(struct request_queue *rq,
+					  struct request *r1,
+					  struct request *r2)
 {
 	return 0;
 }
-static inline int blk_integrity_merge_bio(struct request_queue *rq,
-					  struct request *r,
-					  struct bio *b)
+static inline bool blk_integrity_merge_bio(struct request_queue *rq,
+					   struct request *r,
+					   struct bio *b)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 2341c2f8c33196d02cf5a721746eea4e3c06674a Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:07 -0400
Subject: block: Add T10 Protection Information functions

The T10 Protection Information format is also used by some devices that
do not go through the SCSI layer (virtual block devices, NVMe). Relocate
the relevant functions to a block layer library that can be used without
involving SCSI.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/Kconfig              |   1 +
 block/Makefile             |   4 +-
 block/t10-pi.c             | 197 ++++++++++++++++++++++++++++++++++++
 drivers/scsi/Kconfig       |   1 -
 drivers/scsi/sd_dif.c      | 241 ++++-----------------------------------------
 include/linux/crc-t10dif.h |   5 +-
 include/linux/t10-pi.h     |  22 +++++
 7 files changed, 245 insertions(+), 226 deletions(-)
 create mode 100644 block/t10-pi.c
 create mode 100644 include/linux/t10-pi.h

(limited to 'include/linux')

diff --git a/block/Kconfig b/block/Kconfig
index 2429515c05c2..161491d0a879 100644
--- a/block/Kconfig
+++ b/block/Kconfig
@@ -77,6 +77,7 @@ config BLK_DEV_BSGLIB
 
 config BLK_DEV_INTEGRITY
 	bool "Block layer data integrity support"
+	select CRC_T10DIF if BLK_DEV_INTEGRITY
 	---help---
 	Some storage devices allow extra information to be
 	stored/retrieved to help protect the data.  The block layer
diff --git a/block/Makefile b/block/Makefile
index a2ce6ac935ec..00ecc97629db 100644
--- a/block/Makefile
+++ b/block/Makefile
@@ -20,6 +20,6 @@ obj-$(CONFIG_IOSCHED_DEADLINE)	+= deadline-iosched.o
 obj-$(CONFIG_IOSCHED_CFQ)	+= cfq-iosched.o
 
 obj-$(CONFIG_BLOCK_COMPAT)	+= compat_ioctl.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY)	+= blk-integrity.o
 obj-$(CONFIG_BLK_CMDLINE_PARSER)	+= cmdline-parser.o
-obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o
+obj-$(CONFIG_BLK_DEV_INTEGRITY) += bio-integrity.o blk-integrity.o t10-pi.o
+
diff --git a/block/t10-pi.c b/block/t10-pi.c
new file mode 100644
index 000000000000..24d6e9715318
--- /dev/null
+++ b/block/t10-pi.c
@@ -0,0 +1,197 @@
+/*
+ * t10_pi.c - Functions for generating and verifying T10 Protection
+ *	      Information.
+ *
+ * Copyright (C) 2007, 2008, 2014 Oracle Corporation
+ * Written by: Martin K. Petersen <martin.petersen@oracle.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License version
+ * 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; see the file COPYING.  If not, write to
+ * the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139,
+ * USA.
+ *
+ */
+
+#include <linux/t10-pi.h>
+#include <linux/blkdev.h>
+#include <linux/crc-t10dif.h>
+#include <net/checksum.h>
+
+typedef __be16 (csum_fn) (void *, unsigned int);
+
+static const __be16 APP_ESCAPE = (__force __be16) 0xffff;
+static const __be32 REF_ESCAPE = (__force __be32) 0xffffffff;
+
+static __be16 t10_pi_crc_fn(void *data, unsigned int len)
+{
+	return cpu_to_be16(crc_t10dif(data, len));
+}
+
+static __be16 t10_pi_ip_fn(void *data, unsigned int len)
+{
+	return (__force __be16)ip_compute_csum(data, len);
+}
+
+/*
+ * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
+ * 16 bit app tag, 32 bit reference tag. Type 3 does not define the ref
+ * tag.
+ */
+static int t10_pi_generate(struct blk_integrity_iter *iter, csum_fn *fn,
+			   unsigned int type)
+{
+	unsigned int i;
+
+	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
+		struct t10_pi_tuple *pi = iter->prot_buf;
+
+		pi->guard_tag = fn(iter->data_buf, iter->interval);
+		pi->app_tag = 0;
+
+		if (type == 1)
+			pi->ref_tag = cpu_to_be32(lower_32_bits(iter->seed));
+		else
+			pi->ref_tag = 0;
+
+		iter->data_buf += iter->interval;
+		iter->prot_buf += sizeof(struct t10_pi_tuple);
+		iter->seed++;
+	}
+
+	return 0;
+}
+
+static int t10_pi_verify(struct blk_integrity_iter *iter, csum_fn *fn,
+				unsigned int type)
+{
+	unsigned int i;
+
+	for (i = 0 ; i < iter->data_size ; i += iter->interval) {
+		struct t10_pi_tuple *pi = iter->prot_buf;
+		__be16 csum;
+
+		switch (type) {
+		case 1:
+		case 2:
+			if (pi->app_tag == APP_ESCAPE)
+				goto next;
+
+			if (be32_to_cpu(pi->ref_tag) !=
+			    lower_32_bits(iter->seed)) {
+				pr_err("%s: ref tag error at location %llu " \
+				       "(rcvd %u)\n", iter->disk_name,
+				       (unsigned long long)
+				       iter->seed, be32_to_cpu(pi->ref_tag));
+				return -EILSEQ;
+			}
+			break;
+		case 3:
+			if (pi->app_tag == APP_ESCAPE &&
+			    pi->ref_tag == REF_ESCAPE)
+				goto next;
+			break;
+		}
+
+		csum = fn(iter->data_buf, iter->interval);
+
+		if (pi->guard_tag != csum) {
+			pr_err("%s: guard tag error at sector %llu " \
+			       "(rcvd %04x, want %04x)\n", iter->disk_name,
+			       (unsigned long long)iter->seed,
+			       be16_to_cpu(pi->guard_tag), be16_to_cpu(csum));
+			return -EILSEQ;
+		}
+
+next:
+		iter->data_buf += iter->interval;
+		iter->prot_buf += sizeof(struct t10_pi_tuple);
+		iter->seed++;
+	}
+
+	return 0;
+}
+
+static int t10_pi_type1_generate_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_crc_fn, 1);
+}
+
+static int t10_pi_type1_generate_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_ip_fn, 1);
+}
+
+static int t10_pi_type1_verify_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_crc_fn, 1);
+}
+
+static int t10_pi_type1_verify_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_ip_fn, 1);
+}
+
+static int t10_pi_type3_generate_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_crc_fn, 3);
+}
+
+static int t10_pi_type3_generate_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_generate(iter, t10_pi_ip_fn, 3);
+}
+
+static int t10_pi_type3_verify_crc(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_crc_fn, 3);
+}
+
+static int t10_pi_type3_verify_ip(struct blk_integrity_iter *iter)
+{
+	return t10_pi_verify(iter, t10_pi_ip_fn, 3);
+}
+
+struct blk_integrity t10_pi_type1_crc = {
+	.name			= "T10-DIF-TYPE1-CRC",
+	.generate_fn		= t10_pi_type1_generate_crc,
+	.verify_fn		= t10_pi_type1_verify_crc,
+	.tuple_size		= sizeof(struct t10_pi_tuple),
+	.tag_size		= 0,
+};
+EXPORT_SYMBOL(t10_pi_type1_crc);
+
+struct blk_integrity t10_pi_type1_ip = {
+	.name			= "T10-DIF-TYPE1-IP",
+	.generate_fn		= t10_pi_type1_generate_ip,
+	.verify_fn		= t10_pi_type1_verify_ip,
+	.tuple_size		= sizeof(struct t10_pi_tuple),
+	.tag_size		= 0,
+};
+EXPORT_SYMBOL(t10_pi_type1_ip);
+
+struct blk_integrity t10_pi_type3_crc = {
+	.name			= "T10-DIF-TYPE3-CRC",
+	.generate_fn		= t10_pi_type3_generate_crc,
+	.verify_fn		= t10_pi_type3_verify_crc,
+	.tuple_size		= sizeof(struct t10_pi_tuple),
+	.tag_size		= 0,
+};
+EXPORT_SYMBOL(t10_pi_type3_crc);
+
+struct blk_integrity t10_pi_type3_ip = {
+	.name			= "T10-DIF-TYPE3-IP",
+	.generate_fn		= t10_pi_type3_generate_ip,
+	.verify_fn		= t10_pi_type3_verify_ip,
+	.tuple_size		= sizeof(struct t10_pi_tuple),
+	.tag_size		= 0,
+};
+EXPORT_SYMBOL(t10_pi_type3_ip);
diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
index 18a3358eb1d4..9ece13f922a6 100644
--- a/drivers/scsi/Kconfig
+++ b/drivers/scsi/Kconfig
@@ -62,7 +62,6 @@ comment "SCSI support type (disk, tape, CD-ROM)"
 config BLK_DEV_SD
 	tristate "SCSI disk support"
 	depends on SCSI
-	select CRC_T10DIF if BLK_DEV_INTEGRITY
 	---help---
 	  If you want to use SCSI hard disks, Fibre Channel disks,
 	  Serial ATA (SATA) or Parallel ATA (PATA) hard disks,
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index 2198abee619e..b7eaeadc18f9 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -21,7 +21,7 @@
  */
 
 #include <linux/blkdev.h>
-#include <linux/crc-t10dif.h>
+#include <linux/t10-pi.h>
 
 #include <scsi/scsi.h>
 #include <scsi/scsi_cmnd.h>
@@ -33,207 +33,8 @@
 #include <scsi/scsi_ioctl.h>
 #include <scsi/scsicam.h>
 
-#include <net/checksum.h>
-
 #include "sd.h"
 
-typedef __u16 (csum_fn) (void *, unsigned int);
-
-static __u16 sd_dif_crc_fn(void *data, unsigned int len)
-{
-	return cpu_to_be16(crc_t10dif(data, len));
-}
-
-static __u16 sd_dif_ip_fn(void *data, unsigned int len)
-{
-	return ip_compute_csum(data, len);
-}
-
-/*
- * Type 1 and Type 2 protection use the same format: 16 bit guard tag,
- * 16 bit app tag, 32 bit reference tag.
- */
-static void sd_dif_type1_generate(struct blk_integrity_iter *iter, csum_fn *fn)
-{
-	void *buf = iter->data_buf;
-	struct sd_dif_tuple *sdt = iter->prot_buf;
-	sector_t seed = iter->seed;
-	unsigned int i;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
-		sdt->guard_tag = fn(buf, iter->interval);
-		sdt->ref_tag = cpu_to_be32(seed & 0xffffffff);
-		sdt->app_tag = 0;
-
-		buf += iter->interval;
-		seed++;
-	}
-}
-
-static int sd_dif_type1_generate_crc(struct blk_integrity_iter *iter)
-{
-	sd_dif_type1_generate(iter, sd_dif_crc_fn);
-	return 0;
-}
-
-static int sd_dif_type1_generate_ip(struct blk_integrity_iter *iter)
-{
-	sd_dif_type1_generate(iter, sd_dif_ip_fn);
-	return 0;
-}
-
-static int sd_dif_type1_verify(struct blk_integrity_iter *iter, csum_fn *fn)
-{
-	void *buf = iter->data_buf;
-	struct sd_dif_tuple *sdt = iter->prot_buf;
-	sector_t seed = iter->seed;
-	unsigned int i;
-	__u16 csum;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
-		/* Unwritten sectors */
-		if (sdt->app_tag == 0xffff)
-			return 0;
-
-		if (be32_to_cpu(sdt->ref_tag) != (seed & 0xffffffff)) {
-			printk(KERN_ERR
-			       "%s: ref tag error on sector %lu (rcvd %u)\n",
-			       iter->disk_name, (unsigned long)seed,
-			       be32_to_cpu(sdt->ref_tag));
-			return -EIO;
-		}
-
-		csum = fn(buf, iter->interval);
-
-		if (sdt->guard_tag != csum) {
-			printk(KERN_ERR "%s: guard tag error on sector %lu " \
-			       "(rcvd %04x, data %04x)\n", iter->disk_name,
-			       (unsigned long)seed,
-			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
-			return -EIO;
-		}
-
-		buf += iter->interval;
-		seed++;
-	}
-
-	return 0;
-}
-
-static int sd_dif_type1_verify_crc(struct blk_integrity_iter *iter)
-{
-	return sd_dif_type1_verify(iter, sd_dif_crc_fn);
-}
-
-static int sd_dif_type1_verify_ip(struct blk_integrity_iter *iter)
-{
-	return sd_dif_type1_verify(iter, sd_dif_ip_fn);
-}
-
-static struct blk_integrity dif_type1_integrity_crc = {
-	.name			= "T10-DIF-TYPE1-CRC",
-	.generate_fn		= sd_dif_type1_generate_crc,
-	.verify_fn		= sd_dif_type1_verify_crc,
-	.tuple_size		= sizeof(struct sd_dif_tuple),
-	.tag_size		= 0,
-};
-
-static struct blk_integrity dif_type1_integrity_ip = {
-	.name			= "T10-DIF-TYPE1-IP",
-	.generate_fn		= sd_dif_type1_generate_ip,
-	.verify_fn		= sd_dif_type1_verify_ip,
-	.tuple_size		= sizeof(struct sd_dif_tuple),
-	.tag_size		= 0,
-};
-
-
-/*
- * Type 3 protection has a 16-bit guard tag and 16 + 32 bits of opaque
- * tag space.
- */
-static void sd_dif_type3_generate(struct blk_integrity_iter *iter, csum_fn *fn)
-{
-	void *buf = iter->data_buf;
-	struct sd_dif_tuple *sdt = iter->prot_buf;
-	unsigned int i;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
-		sdt->guard_tag = fn(buf, iter->interval);
-		sdt->ref_tag = 0;
-		sdt->app_tag = 0;
-
-		buf += iter->interval;
-	}
-}
-
-static int sd_dif_type3_generate_crc(struct blk_integrity_iter *iter)
-{
-	sd_dif_type3_generate(iter, sd_dif_crc_fn);
-	return 0;
-}
-
-static int sd_dif_type3_generate_ip(struct blk_integrity_iter *iter)
-{
-	sd_dif_type3_generate(iter, sd_dif_ip_fn);
-	return 0;
-}
-
-static int sd_dif_type3_verify(struct blk_integrity_iter *iter, csum_fn *fn)
-{
-	void *buf = iter->data_buf;
-	struct sd_dif_tuple *sdt = iter->prot_buf;
-	sector_t seed = iter->seed;
-	unsigned int i;
-	__u16 csum;
-
-	for (i = 0 ; i < iter->data_size ; i += iter->interval, sdt++) {
-		/* Unwritten sectors */
-		if (sdt->app_tag == 0xffff && sdt->ref_tag == 0xffffffff)
-			return 0;
-
-		csum = fn(buf, iter->interval);
-
-		if (sdt->guard_tag != csum) {
-			printk(KERN_ERR "%s: guard tag error on sector %lu " \
-			       "(rcvd %04x, data %04x)\n", iter->disk_name,
-			       (unsigned long)seed,
-			       be16_to_cpu(sdt->guard_tag), be16_to_cpu(csum));
-			return -EIO;
-		}
-
-		buf += iter->interval;
-		seed++;
-	}
-
-	return 0;
-}
-
-static int sd_dif_type3_verify_crc(struct blk_integrity_iter *iter)
-{
-	return sd_dif_type3_verify(iter, sd_dif_crc_fn);
-}
-
-static int sd_dif_type3_verify_ip(struct blk_integrity_iter *iter)
-{
-	return sd_dif_type3_verify(iter, sd_dif_ip_fn);
-}
-
-static struct blk_integrity dif_type3_integrity_crc = {
-	.name			= "T10-DIF-TYPE3-CRC",
-	.generate_fn		= sd_dif_type3_generate_crc,
-	.verify_fn		= sd_dif_type3_verify_crc,
-	.tuple_size		= sizeof(struct sd_dif_tuple),
-	.tag_size		= 0,
-};
-
-static struct blk_integrity dif_type3_integrity_ip = {
-	.name			= "T10-DIF-TYPE3-IP",
-	.generate_fn		= sd_dif_type3_generate_ip,
-	.verify_fn		= sd_dif_type3_verify_ip,
-	.tuple_size		= sizeof(struct sd_dif_tuple),
-	.tag_size		= 0,
-};
-
 /*
  * Configure exchange of protection information between OS and HBA.
  */
@@ -257,16 +58,16 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 	/* Enable DMA of protection information */
 	if (scsi_host_get_guard(sdkp->device->host) & SHOST_DIX_GUARD_IP) {
 		if (type == SD_DIF_TYPE3_PROTECTION)
-			blk_integrity_register(disk, &dif_type3_integrity_ip);
+			blk_integrity_register(disk, &t10_pi_type3_ip);
 		else
-			blk_integrity_register(disk, &dif_type1_integrity_ip);
+			blk_integrity_register(disk, &t10_pi_type1_ip);
 
 		disk->integrity->flags |= BLK_INTEGRITY_IP_CHECKSUM;
 	} else
 		if (type == SD_DIF_TYPE3_PROTECTION)
-			blk_integrity_register(disk, &dif_type3_integrity_crc);
+			blk_integrity_register(disk, &t10_pi_type3_crc);
 		else
-			blk_integrity_register(disk, &dif_type1_integrity_crc);
+			blk_integrity_register(disk, &t10_pi_type1_crc);
 
 	sd_printk(KERN_NOTICE, sdkp,
 		  "Enabling DIX %s protection\n", disk->integrity->name);
@@ -308,10 +109,10 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
 void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		    unsigned int sector_sz)
 {
-	const int tuple_sz = sizeof(struct sd_dif_tuple);
+	const int tuple_sz = sizeof(struct t10_pi_tuple);
 	struct bio *bio;
 	struct scsi_disk *sdkp;
-	struct sd_dif_tuple *sdt;
+	struct t10_pi_tuple *pi;
 	u32 phys, virt;
 
 	sdkp = rq->bio->bi_bdev->bd_disk->private_data;
@@ -334,19 +135,18 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 		virt = bip_get_seed(bip) & 0xffffffff;
 
 		bip_for_each_vec(iv, bip, iter) {
-			sdt = kmap_atomic(iv.bv_page)
-				+ iv.bv_offset;
+			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
 
-			for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) {
+			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
 
-				if (be32_to_cpu(sdt->ref_tag) == virt)
-					sdt->ref_tag = cpu_to_be32(phys);
+				if (be32_to_cpu(pi->ref_tag) == virt)
+					pi->ref_tag = cpu_to_be32(phys);
 
 				virt++;
 				phys++;
 			}
 
-			kunmap_atomic(sdt);
+			kunmap_atomic(pi);
 		}
 
 		bip->bip_flags |= BIP_MAPPED_INTEGRITY;
@@ -359,10 +159,10 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
  */
 void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 {
-	const int tuple_sz = sizeof(struct sd_dif_tuple);
+	const int tuple_sz = sizeof(struct t10_pi_tuple);
 	struct scsi_disk *sdkp;
 	struct bio *bio;
-	struct sd_dif_tuple *sdt;
+	struct t10_pi_tuple *pi;
 	unsigned int j, sectors, sector_sz;
 	u32 phys, virt;
 
@@ -386,25 +186,24 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 		virt = bip_get_seed(bip) & 0xffffffff;
 
 		bip_for_each_vec(iv, bip, iter) {
-			sdt = kmap_atomic(iv.bv_page)
-				+ iv.bv_offset;
+			pi = kmap_atomic(iv.bv_page) + iv.bv_offset;
 
-			for (j = 0; j < iv.bv_len; j += tuple_sz, sdt++) {
+			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
 
 				if (sectors == 0) {
-					kunmap_atomic(sdt);
+					kunmap_atomic(pi);
 					return;
 				}
 
-				if (be32_to_cpu(sdt->ref_tag) == phys)
-					sdt->ref_tag = cpu_to_be32(virt);
+				if (be32_to_cpu(pi->ref_tag) == phys)
+					pi->ref_tag = cpu_to_be32(virt);
 
 				virt++;
 				phys++;
 				sectors--;
 			}
 
-			kunmap_atomic(sdt);
+			kunmap_atomic(pi);
 		}
 	}
 }
diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h
index b3cb71f0d3b0..cf53d0773ce3 100644
--- a/include/linux/crc-t10dif.h
+++ b/include/linux/crc-t10dif.h
@@ -6,7 +6,8 @@
 #define CRC_T10DIF_DIGEST_SIZE 2
 #define CRC_T10DIF_BLOCK_SIZE 1
 
-__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len);
-__u16 crc_t10dif(unsigned char const *, size_t);
+extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer,
+				size_t len);
+extern __u16 crc_t10dif(unsigned char const *, size_t);
 
 #endif
diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h
new file mode 100644
index 000000000000..6a8b9942632d
--- /dev/null
+++ b/include/linux/t10-pi.h
@@ -0,0 +1,22 @@
+#ifndef _LINUX_T10_PI_H
+#define _LINUX_T10_PI_H
+
+#include <linux/types.h>
+#include <linux/blkdev.h>
+
+/*
+ * T10 Protection Information tuple.
+ */
+struct t10_pi_tuple {
+	__be16 guard_tag;	/* Checksum */
+	__be16 app_tag;		/* Opaque storage */
+	__be32 ref_tag;		/* Target LBA or indirect LBA */
+};
+
+
+extern struct blk_integrity t10_pi_type1_crc;
+extern struct blk_integrity t10_pi_type1_ip;
+extern struct blk_integrity t10_pi_type3_crc;
+extern struct blk_integrity t10_pi_type3_ip;
+
+#endif
-- 
cgit v1.2.3


From 7bced397510ab569d31de4c70b39e13355046387 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 30 Dec 2013 12:37:29 -0800
Subject: net_dma: simple removal

Per commit "77873803363c net_dma: mark broken" net_dma is no longer used
and there is no plan to fix it.

This is the mechanical removal of bits in CONFIG_NET_DMA ifdef guards.
Reverting the remainder of the net_dma induced changes is deferred to
subsequent patches.

Marked for stable due to Roman's report of a memory leak in
dma_pin_iovec_pages():

    https://lkml.org/lkml/2014/9/3/177

Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Vinod Koul <vinod.koul@intel.com>
Cc: David Whipple <whipple@securedatainnovations.ch>
Cc: Alexander Duyck <alexander.h.duyck@intel.com>
Cc: <stable@vger.kernel.org>
Reported-by: Roman Gushchin <klamm@yandex-team.ru>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Dan Williams <dan.j.williams@intel.com>
---
 Documentation/ABI/removed/net_dma      |   8 +
 Documentation/networking/ip-sysctl.txt |   6 -
 drivers/dma/Kconfig                    |  12 --
 drivers/dma/Makefile                   |   1 -
 drivers/dma/dmaengine.c                | 104 ------------
 drivers/dma/ioat/dma.c                 |   1 -
 drivers/dma/ioat/dma.h                 |   7 -
 drivers/dma/ioat/dma_v2.c              |   1 -
 drivers/dma/ioat/dma_v3.c              |   1 -
 drivers/dma/iovlock.c                  | 280 ---------------------------------
 include/linux/dmaengine.h              |  22 +--
 include/linux/skbuff.h                 |   8 +-
 include/linux/tcp.h                    |   8 -
 include/net/netdma.h                   |  32 ----
 include/net/sock.h                     |  19 +--
 include/net/tcp.h                      |   8 -
 kernel/sysctl_binary.c                 |   1 -
 net/core/Makefile                      |   1 -
 net/core/dev.c                         |  10 --
 net/core/sock.c                        |   6 -
 net/core/user_dma.c                    | 131 ---------------
 net/dccp/proto.c                       |   4 +-
 net/ipv4/sysctl_net_ipv4.c             |   9 --
 net/ipv4/tcp.c                         | 147 ++---------------
 net/ipv4/tcp_input.c                   |  61 -------
 net/ipv4/tcp_ipv4.c                    |  18 +--
 net/ipv6/tcp_ipv6.c                    |  13 +-
 net/llc/af_llc.c                       |  10 +-
 28 files changed, 35 insertions(+), 894 deletions(-)
 create mode 100644 Documentation/ABI/removed/net_dma
 delete mode 100644 drivers/dma/iovlock.c
 delete mode 100644 include/net/netdma.h
 delete mode 100644 net/core/user_dma.c

(limited to 'include/linux')

diff --git a/Documentation/ABI/removed/net_dma b/Documentation/ABI/removed/net_dma
new file mode 100644
index 000000000000..a173aecc2f18
--- /dev/null
+++ b/Documentation/ABI/removed/net_dma
@@ -0,0 +1,8 @@
+What:		tcp_dma_copybreak sysctl
+Date:		Removed in kernel v3.13
+Contact:	Dan Williams <dan.j.williams@intel.com>
+Description:
+	Formerly the lower limit, in bytes, of the size of socket reads
+	that will be offloaded to a DMA copy engine.  Removed due to
+	coherency issues of the cpu potentially touching the buffers
+	while dma is in flight.
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index ab42c95f9985..ea8f3b182e70 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -582,12 +582,6 @@ tcp_workaround_signed_windows - BOOLEAN
 	not receive a window scaling option from them.
 	Default: 0
 
-tcp_dma_copybreak - INTEGER
-	Lower limit, in bytes, of the size of socket reads that will be
-	offloaded to a DMA copy engine, if one is present in the system
-	and CONFIG_NET_DMA is enabled.
-	Default: 4096
-
 tcp_thin_linear_timeouts - BOOLEAN
 	Enable dynamic triggering of linear timeouts for thin streams.
 	If set, a check is performed upon retransmission by timeout to
diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 605b016bcea4..6b5f37e01a70 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -368,18 +368,6 @@ config DMA_OF
 comment "DMA Clients"
 	depends on DMA_ENGINE
 
-config NET_DMA
-	bool "Network: TCP receive copy offload"
-	depends on DMA_ENGINE && NET
-	default (INTEL_IOATDMA || FSL_DMA)
-	depends on BROKEN
-	help
-	  This enables the use of DMA engines in the network stack to
-	  offload receive copy-to-user operations, freeing CPU cycles.
-
-	  Say Y here if you enabled INTEL_IOATDMA or FSL_DMA, otherwise
-	  say N.
-
 config ASYNC_TX_DMA
 	bool "Async_tx: Offload support for the async_tx api"
 	depends on DMA_ENGINE
diff --git a/drivers/dma/Makefile b/drivers/dma/Makefile
index a029d0f4a1be..0c9dc7549327 100644
--- a/drivers/dma/Makefile
+++ b/drivers/dma/Makefile
@@ -6,7 +6,6 @@ obj-$(CONFIG_DMA_VIRTUAL_CHANNELS) += virt-dma.o
 obj-$(CONFIG_DMA_ACPI) += acpi-dma.o
 obj-$(CONFIG_DMA_OF) += of-dma.o
 
-obj-$(CONFIG_NET_DMA) += iovlock.o
 obj-$(CONFIG_INTEL_MID_DMAC) += intel_mid_dma.o
 obj-$(CONFIG_DMATEST) += dmatest.o
 obj-$(CONFIG_INTEL_IOATDMA) += ioat/
diff --git a/drivers/dma/dmaengine.c b/drivers/dma/dmaengine.c
index ed610b497518..268de183b519 100644
--- a/drivers/dma/dmaengine.c
+++ b/drivers/dma/dmaengine.c
@@ -1084,110 +1084,6 @@ dmaengine_get_unmap_data(struct device *dev, int nr, gfp_t flags)
 }
 EXPORT_SYMBOL(dmaengine_get_unmap_data);
 
-/**
- * dma_async_memcpy_pg_to_pg - offloaded copy from page to page
- * @chan: DMA channel to offload copy to
- * @dest_pg: destination page
- * @dest_off: offset in page to copy to
- * @src_pg: source page
- * @src_off: offset in page to copy from
- * @len: length
- *
- * Both @dest_page/@dest_off and @src_page/@src_off must be mappable to a bus
- * address according to the DMA mapping API rules for streaming mappings.
- * Both @dest_page/@dest_off and @src_page/@src_off must stay memory resident
- * (kernel memory or locked user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_pg_to_pg(struct dma_chan *chan, struct page *dest_pg,
-	unsigned int dest_off, struct page *src_pg, unsigned int src_off,
-	size_t len)
-{
-	struct dma_device *dev = chan->device;
-	struct dma_async_tx_descriptor *tx;
-	struct dmaengine_unmap_data *unmap;
-	dma_cookie_t cookie;
-	unsigned long flags;
-
-	unmap = dmaengine_get_unmap_data(dev->dev, 2, GFP_NOWAIT);
-	if (!unmap)
-		return -ENOMEM;
-
-	unmap->to_cnt = 1;
-	unmap->from_cnt = 1;
-	unmap->addr[0] = dma_map_page(dev->dev, src_pg, src_off, len,
-				      DMA_TO_DEVICE);
-	unmap->addr[1] = dma_map_page(dev->dev, dest_pg, dest_off, len,
-				      DMA_FROM_DEVICE);
-	unmap->len = len;
-	flags = DMA_CTRL_ACK;
-	tx = dev->device_prep_dma_memcpy(chan, unmap->addr[1], unmap->addr[0],
-					 len, flags);
-
-	if (!tx) {
-		dmaengine_unmap_put(unmap);
-		return -ENOMEM;
-	}
-
-	dma_set_unmap(tx, unmap);
-	cookie = tx->tx_submit(tx);
-	dmaengine_unmap_put(unmap);
-
-	preempt_disable();
-	__this_cpu_add(chan->local->bytes_transferred, len);
-	__this_cpu_inc(chan->local->memcpy_count);
-	preempt_enable();
-
-	return cookie;
-}
-EXPORT_SYMBOL(dma_async_memcpy_pg_to_pg);
-
-/**
- * dma_async_memcpy_buf_to_buf - offloaded copy between virtual addresses
- * @chan: DMA channel to offload copy to
- * @dest: destination address (virtual)
- * @src: source address (virtual)
- * @len: length
- *
- * Both @dest and @src must be mappable to a bus address according to the
- * DMA mapping API rules for streaming mappings.
- * Both @dest and @src must stay memory resident (kernel memory or locked
- * user space pages).
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_buf(struct dma_chan *chan, void *dest,
-			    void *src, size_t len)
-{
-	return dma_async_memcpy_pg_to_pg(chan, virt_to_page(dest),
-					 (unsigned long) dest & ~PAGE_MASK,
-					 virt_to_page(src),
-					 (unsigned long) src & ~PAGE_MASK, len);
-}
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_buf);
-
-/**
- * dma_async_memcpy_buf_to_pg - offloaded copy from address to page
- * @chan: DMA channel to offload copy to
- * @page: destination page
- * @offset: offset in page to copy to
- * @kdata: source address (virtual)
- * @len: length
- *
- * Both @page/@offset and @kdata must be mappable to a bus address according
- * to the DMA mapping API rules for streaming mappings.
- * Both @page/@offset and @kdata must stay memory resident (kernel memory or
- * locked user space pages)
- */
-dma_cookie_t
-dma_async_memcpy_buf_to_pg(struct dma_chan *chan, struct page *page,
-			   unsigned int offset, void *kdata, size_t len)
-{
-	return dma_async_memcpy_pg_to_pg(chan, page, offset,
-					 virt_to_page(kdata),
-					 (unsigned long) kdata & ~PAGE_MASK, len);
-}
-EXPORT_SYMBOL(dma_async_memcpy_buf_to_pg);
-
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
 	struct dma_chan *chan)
 {
diff --git a/drivers/dma/ioat/dma.c b/drivers/dma/ioat/dma.c
index b76c1485933b..940c1502a8b5 100644
--- a/drivers/dma/ioat/dma.c
+++ b/drivers/dma/ioat/dma.c
@@ -1222,7 +1222,6 @@ int ioat1_dma_probe(struct ioatdma_device *device, int dca)
 	err = ioat_probe(device);
 	if (err)
 		return err;
-	ioat_set_tcp_copy_break(4096);
 	err = ioat_register(device);
 	if (err)
 		return err;
diff --git a/drivers/dma/ioat/dma.h b/drivers/dma/ioat/dma.h
index e982f00a9843..d63f68b1aa35 100644
--- a/drivers/dma/ioat/dma.h
+++ b/drivers/dma/ioat/dma.h
@@ -214,13 +214,6 @@ __dump_desc_dbg(struct ioat_chan_common *chan, struct ioat_dma_descriptor *hw,
 #define dump_desc_dbg(c, d) \
 	({ if (d) __dump_desc_dbg(&c->base, d->hw, &d->txd, desc_id(d)); 0; })
 
-static inline void ioat_set_tcp_copy_break(unsigned long copybreak)
-{
-	#ifdef CONFIG_NET_DMA
-	sysctl_tcp_dma_copybreak = copybreak;
-	#endif
-}
-
 static inline struct ioat_chan_common *
 ioat_chan_by_index(struct ioatdma_device *device, int index)
 {
diff --git a/drivers/dma/ioat/dma_v2.c b/drivers/dma/ioat/dma_v2.c
index 2ce9be498608..695483e6be32 100644
--- a/drivers/dma/ioat/dma_v2.c
+++ b/drivers/dma/ioat/dma_v2.c
@@ -900,7 +900,6 @@ int ioat2_dma_probe(struct ioatdma_device *device, int dca)
 	err = ioat_probe(device);
 	if (err)
 		return err;
-	ioat_set_tcp_copy_break(2048);
 
 	list_for_each_entry(c, &dma->channels, device_node) {
 		chan = to_chan_common(c);
diff --git a/drivers/dma/ioat/dma_v3.c b/drivers/dma/ioat/dma_v3.c
index 85971d6e9646..895f869d6c2c 100644
--- a/drivers/dma/ioat/dma_v3.c
+++ b/drivers/dma/ioat/dma_v3.c
@@ -1655,7 +1655,6 @@ int ioat3_dma_probe(struct ioatdma_device *device, int dca)
 	err = ioat_probe(device);
 	if (err)
 		return err;
-	ioat_set_tcp_copy_break(262144);
 
 	list_for_each_entry(c, &dma->channels, device_node) {
 		chan = to_chan_common(c);
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
deleted file mode 100644
index bb48a57c2fc1..000000000000
--- a/drivers/dma/iovlock.c
+++ /dev/null
@@ -1,280 +0,0 @@
-/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
- * Portions based on net/core/datagram.c and copyrighted by their authors.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-
-/*
- * This code allows the net stack to make use of a DMA engine for
- * skb to iovec copies.
- */
-
-#include <linux/dmaengine.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <net/tcp.h> /* for memcpy_toiovec */
-#include <asm/io.h>
-#include <asm/uaccess.h>
-
-static int num_pages_spanned(struct iovec *iov)
-{
-	return
-	((PAGE_ALIGN((unsigned long)iov->iov_base + iov->iov_len) -
-	((unsigned long)iov->iov_base & PAGE_MASK)) >> PAGE_SHIFT);
-}
-
-/*
- * Pin down all the iovec pages needed for len bytes.
- * Return a struct dma_pinned_list to keep track of pages pinned down.
- *
- * We are allocating a single chunk of memory, and then carving it up into
- * 3 sections, the latter 2 whose size depends on the number of iovecs and the
- * total number of pages, respectively.
- */
-struct dma_pinned_list *dma_pin_iovec_pages(struct iovec *iov, size_t len)
-{
-	struct dma_pinned_list *local_list;
-	struct page **pages;
-	int i;
-	int ret;
-	int nr_iovecs = 0;
-	int iovec_len_used = 0;
-	int iovec_pages_used = 0;
-
-	/* don't pin down non-user-based iovecs */
-	if (segment_eq(get_fs(), KERNEL_DS))
-		return NULL;
-
-	/* determine how many iovecs/pages there are, up front */
-	do {
-		iovec_len_used += iov[nr_iovecs].iov_len;
-		iovec_pages_used += num_pages_spanned(&iov[nr_iovecs]);
-		nr_iovecs++;
-	} while (iovec_len_used < len);
-
-	/* single kmalloc for pinned list, page_list[], and the page arrays */
-	local_list = kmalloc(sizeof(*local_list)
-		+ (nr_iovecs * sizeof (struct dma_page_list))
-		+ (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
-	if (!local_list)
-		goto out;
-
-	/* list of pages starts right after the page list array */
-	pages = (struct page **) &local_list->page_list[nr_iovecs];
-
-	local_list->nr_iovecs = 0;
-
-	for (i = 0; i < nr_iovecs; i++) {
-		struct dma_page_list *page_list = &local_list->page_list[i];
-
-		len -= iov[i].iov_len;
-
-		if (!access_ok(VERIFY_WRITE, iov[i].iov_base, iov[i].iov_len))
-			goto unpin;
-
-		page_list->nr_pages = num_pages_spanned(&iov[i]);
-		page_list->base_address = iov[i].iov_base;
-
-		page_list->pages = pages;
-		pages += page_list->nr_pages;
-
-		/* pin pages down */
-		down_read(&current->mm->mmap_sem);
-		ret = get_user_pages(
-			current,
-			current->mm,
-			(unsigned long) iov[i].iov_base,
-			page_list->nr_pages,
-			1,	/* write */
-			0,	/* force */
-			page_list->pages,
-			NULL);
-		up_read(&current->mm->mmap_sem);
-
-		if (ret != page_list->nr_pages)
-			goto unpin;
-
-		local_list->nr_iovecs = i + 1;
-	}
-
-	return local_list;
-
-unpin:
-	dma_unpin_iovec_pages(local_list);
-out:
-	return NULL;
-}
-
-void dma_unpin_iovec_pages(struct dma_pinned_list *pinned_list)
-{
-	int i, j;
-
-	if (!pinned_list)
-		return;
-
-	for (i = 0; i < pinned_list->nr_iovecs; i++) {
-		struct dma_page_list *page_list = &pinned_list->page_list[i];
-		for (j = 0; j < page_list->nr_pages; j++) {
-			set_page_dirty_lock(page_list->pages[j]);
-			page_cache_release(page_list->pages[j]);
-		}
-	}
-
-	kfree(pinned_list);
-}
-
-
-/*
- * We have already pinned down the pages we will be using in the iovecs.
- * Each entry in iov array has corresponding entry in pinned_list->page_list.
- * Using array indexing to keep iov[] and page_list[] in sync.
- * Initial elements in iov array's iov->iov_len will be 0 if already copied into
- *   by another call.
- * iov array length remaining guaranteed to be bigger than len.
- */
-dma_cookie_t dma_memcpy_to_iovec(struct dma_chan *chan, struct iovec *iov,
-	struct dma_pinned_list *pinned_list, unsigned char *kdata, size_t len)
-{
-	int iov_byte_offset;
-	int copy;
-	dma_cookie_t dma_cookie = 0;
-	int iovec_idx;
-	int page_idx;
-
-	if (!chan)
-		return memcpy_toiovec(iov, kdata, len);
-
-	iovec_idx = 0;
-	while (iovec_idx < pinned_list->nr_iovecs) {
-		struct dma_page_list *page_list;
-
-		/* skip already used-up iovecs */
-		while (!iov[iovec_idx].iov_len)
-			iovec_idx++;
-
-		page_list = &pinned_list->page_list[iovec_idx];
-
-		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
-		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
-			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
-
-		/* break up copies to not cross page boundary */
-		while (iov[iovec_idx].iov_len) {
-			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
-			copy = min_t(int, copy, iov[iovec_idx].iov_len);
-
-			dma_cookie = dma_async_memcpy_buf_to_pg(chan,
-					page_list->pages[page_idx],
-					iov_byte_offset,
-					kdata,
-					copy);
-			/* poll for a descriptor slot */
-			if (unlikely(dma_cookie < 0)) {
-				dma_async_issue_pending(chan);
-				continue;
-			}
-
-			len -= copy;
-			iov[iovec_idx].iov_len -= copy;
-			iov[iovec_idx].iov_base += copy;
-
-			if (!len)
-				return dma_cookie;
-
-			kdata += copy;
-			iov_byte_offset = 0;
-			page_idx++;
-		}
-		iovec_idx++;
-	}
-
-	/* really bad if we ever run out of iovecs */
-	BUG();
-	return -EFAULT;
-}
-
-dma_cookie_t dma_memcpy_pg_to_iovec(struct dma_chan *chan, struct iovec *iov,
-	struct dma_pinned_list *pinned_list, struct page *page,
-	unsigned int offset, size_t len)
-{
-	int iov_byte_offset;
-	int copy;
-	dma_cookie_t dma_cookie = 0;
-	int iovec_idx;
-	int page_idx;
-	int err;
-
-	/* this needs as-yet-unimplemented buf-to-buff, so punt. */
-	/* TODO: use dma for this */
-	if (!chan || !pinned_list) {
-		u8 *vaddr = kmap(page);
-		err = memcpy_toiovec(iov, vaddr + offset, len);
-		kunmap(page);
-		return err;
-	}
-
-	iovec_idx = 0;
-	while (iovec_idx < pinned_list->nr_iovecs) {
-		struct dma_page_list *page_list;
-
-		/* skip already used-up iovecs */
-		while (!iov[iovec_idx].iov_len)
-			iovec_idx++;
-
-		page_list = &pinned_list->page_list[iovec_idx];
-
-		iov_byte_offset = ((unsigned long)iov[iovec_idx].iov_base & ~PAGE_MASK);
-		page_idx = (((unsigned long)iov[iovec_idx].iov_base & PAGE_MASK)
-			 - ((unsigned long)page_list->base_address & PAGE_MASK)) >> PAGE_SHIFT;
-
-		/* break up copies to not cross page boundary */
-		while (iov[iovec_idx].iov_len) {
-			copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
-			copy = min_t(int, copy, iov[iovec_idx].iov_len);
-
-			dma_cookie = dma_async_memcpy_pg_to_pg(chan,
-					page_list->pages[page_idx],
-					iov_byte_offset,
-					page,
-					offset,
-					copy);
-			/* poll for a descriptor slot */
-			if (unlikely(dma_cookie < 0)) {
-				dma_async_issue_pending(chan);
-				continue;
-			}
-
-			len -= copy;
-			iov[iovec_idx].iov_len -= copy;
-			iov[iovec_idx].iov_base += copy;
-
-			if (!len)
-				return dma_cookie;
-
-			offset += copy;
-			iov_byte_offset = 0;
-			page_idx++;
-		}
-		iovec_idx++;
-	}
-
-	/* really bad if we ever run out of iovecs */
-	BUG();
-	return -EFAULT;
-}
diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index c5c92d59e531..3e382ecd1927 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -903,18 +903,6 @@ static inline void dmaengine_put(void)
 }
 #endif
 
-#ifdef CONFIG_NET_DMA
-#define net_dmaengine_get()	dmaengine_get()
-#define net_dmaengine_put()	dmaengine_put()
-#else
-static inline void net_dmaengine_get(void)
-{
-}
-static inline void net_dmaengine_put(void)
-{
-}
-#endif
-
 #ifdef CONFIG_ASYNC_TX_DMA
 #define async_dmaengine_get()	dmaengine_get()
 #define async_dmaengine_put()	dmaengine_put()
@@ -936,16 +924,8 @@ async_dma_find_channel(enum dma_transaction_type type)
 	return NULL;
 }
 #endif /* CONFIG_ASYNC_TX_DMA */
-
-dma_cookie_t dma_async_memcpy_buf_to_buf(struct dma_chan *chan,
-	void *dest, void *src, size_t len);
-dma_cookie_t dma_async_memcpy_buf_to_pg(struct dma_chan *chan,
-	struct page *page, unsigned int offset, void *kdata, size_t len);
-dma_cookie_t dma_async_memcpy_pg_to_pg(struct dma_chan *chan,
-	struct page *dest_pg, unsigned int dest_off, struct page *src_pg,
-	unsigned int src_off, size_t len);
 void dma_async_tx_descriptor_init(struct dma_async_tx_descriptor *tx,
-	struct dma_chan *chan);
+				  struct dma_chan *chan);
 
 static inline void async_tx_ack(struct dma_async_tx_descriptor *tx)
 {
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5e1e6f2d98c2..bdbf7afad6b7 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -28,7 +28,6 @@
 #include <linux/textsearch.h>
 #include <net/checksum.h>
 #include <linux/rcupdate.h>
-#include <linux/dmaengine.h>
 #include <linux/hrtimer.h>
 #include <linux/dma-mapping.h>
 #include <linux/netdev_features.h>
@@ -515,11 +514,8 @@ struct sk_buff {
 	/* 6/8 bit hole (depending on ndisc_nodetype presence) */
 	kmemcheck_bitfield_end(flags2);
 
-#if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
-	union {
-		unsigned int	napi_id;
-		dma_cookie_t	dma_cookie;
-	};
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	unsigned int	napi_id;
 #endif
 #ifdef CONFIG_NETWORK_SECMARK
 	__u32			secmark;
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 4ad0706d40eb..90895b8dc7f2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -19,7 +19,6 @@
 
 
 #include <linux/skbuff.h>
-#include <linux/dmaengine.h>
 #include <net/sock.h>
 #include <net/inet_connection_sock.h>
 #include <net/inet_timewait_sock.h>
@@ -169,13 +168,6 @@ struct tcp_sock {
 		struct iovec		*iov;
 		int			memory;
 		int			len;
-#ifdef CONFIG_NET_DMA
-		/* members for async copy */
-		struct dma_chan		*dma_chan;
-		int			wakeup;
-		struct dma_pinned_list	*pinned_list;
-		dma_cookie_t		dma_cookie;
-#endif
 	} ucopy;
 
 	u32	snd_wl1;	/* Sequence for window update		*/
diff --git a/include/net/netdma.h b/include/net/netdma.h
deleted file mode 100644
index 8ba8ce284eeb..000000000000
--- a/include/net/netdma.h
+++ /dev/null
@@ -1,32 +0,0 @@
-/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-#ifndef NETDMA_H
-#define NETDMA_H
-#ifdef CONFIG_NET_DMA
-#include <linux/dmaengine.h>
-#include <linux/skbuff.h>
-
-int dma_skb_copy_datagram_iovec(struct dma_chan* chan,
-		struct sk_buff *skb, int offset, struct iovec *to,
-		size_t len, struct dma_pinned_list *pinned_list);
-
-#endif /* CONFIG_NET_DMA */
-#endif /* NETDMA_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index b9586a137cad..3353b47f3d40 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -231,7 +231,6 @@ struct cg_proto;
   *	@sk_receive_queue: incoming packets
   *	@sk_wmem_alloc: transmit queue bytes committed
   *	@sk_write_queue: Packet sending queue
-  *	@sk_async_wait_queue: DMA copied packets
   *	@sk_omem_alloc: "o" is "option" or "other"
   *	@sk_wmem_queued: persistent queue size
   *	@sk_forward_alloc: space allocated forward
@@ -354,10 +353,6 @@ struct sock {
 	struct sk_filter __rcu	*sk_filter;
 	struct socket_wq __rcu	*sk_wq;
 
-#ifdef CONFIG_NET_DMA
-	struct sk_buff_head	sk_async_wait_queue;
-#endif
-
 #ifdef CONFIG_XFRM
 	struct xfrm_policy	*sk_policy[2];
 #endif
@@ -2214,27 +2209,15 @@ void sock_tx_timestamp(struct sock *sk, __u8 *tx_flags);
  * sk_eat_skb - Release a skb if it is no longer needed
  * @sk: socket to eat this skb from
  * @skb: socket buffer to eat
- * @copied_early: flag indicating whether DMA operations copied this data early
  *
  * This routine must be called with interrupts disabled or with the socket
  * locked so that the sk_buff queue operation is ok.
 */
-#ifdef CONFIG_NET_DMA
-static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early)
-{
-	__skb_unlink(skb, &sk->sk_receive_queue);
-	if (!copied_early)
-		__kfree_skb(skb);
-	else
-		__skb_queue_tail(&sk->sk_async_wait_queue, skb);
-}
-#else
-static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb, bool copied_early)
+static inline void sk_eat_skb(struct sock *sk, struct sk_buff *skb)
 {
 	__skb_unlink(skb, &sk->sk_receive_queue);
 	__kfree_skb(skb);
 }
-#endif
 
 static inline
 struct net *sock_net(const struct sock *sk)
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 8c4dd63134d4..2c2f24ffa383 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -27,7 +27,6 @@
 #include <linux/cache.h>
 #include <linux/percpu.h>
 #include <linux/skbuff.h>
-#include <linux/dmaengine.h>
 #include <linux/crypto.h>
 #include <linux/cryptohash.h>
 #include <linux/kref.h>
@@ -267,7 +266,6 @@ extern int sysctl_tcp_adv_win_scale;
 extern int sysctl_tcp_tw_reuse;
 extern int sysctl_tcp_frto;
 extern int sysctl_tcp_low_latency;
-extern int sysctl_tcp_dma_copybreak;
 extern int sysctl_tcp_nometrics_save;
 extern int sysctl_tcp_moderate_rcvbuf;
 extern int sysctl_tcp_tso_win_divisor;
@@ -1023,12 +1021,6 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
 	tp->ucopy.len = 0;
 	tp->ucopy.memory = 0;
 	skb_queue_head_init(&tp->ucopy.prequeue);
-#ifdef CONFIG_NET_DMA
-	tp->ucopy.dma_chan = NULL;
-	tp->ucopy.wakeup = 0;
-	tp->ucopy.pinned_list = NULL;
-	tp->ucopy.dma_cookie = 0;
-#endif
 }
 
 bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 653cbbd9e7ad..d457005acedf 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -390,7 +390,6 @@ static const struct bin_table bin_net_ipv4_table[] = {
 	{ CTL_INT,	NET_TCP_MTU_PROBING,			"tcp_mtu_probing" },
 	{ CTL_INT,	NET_TCP_BASE_MSS,			"tcp_base_mss" },
 	{ CTL_INT,	NET_IPV4_TCP_WORKAROUND_SIGNED_WINDOWS,	"tcp_workaround_signed_windows" },
-	{ CTL_INT,	NET_TCP_DMA_COPYBREAK,			"tcp_dma_copybreak" },
 	{ CTL_INT,	NET_TCP_SLOW_START_AFTER_IDLE,		"tcp_slow_start_after_idle" },
 	{ CTL_INT,	NET_CIPSOV4_CACHE_ENABLE,		"cipso_cache_enable" },
 	{ CTL_INT,	NET_CIPSOV4_CACHE_BUCKET_SIZE,		"cipso_cache_bucket_size" },
diff --git a/net/core/Makefile b/net/core/Makefile
index 9628c20acff6..5038f1ea0349 100644
--- a/net/core/Makefile
+++ b/net/core/Makefile
@@ -16,7 +16,6 @@ obj-y += net-sysfs.o
 obj-$(CONFIG_PROC_FS) += net-procfs.o
 obj-$(CONFIG_NET_PKTGEN) += pktgen.o
 obj-$(CONFIG_NETPOLL) += netpoll.o
-obj-$(CONFIG_NET_DMA) += user_dma.o
 obj-$(CONFIG_FIB_RULES) += fib_rules.o
 obj-$(CONFIG_TRACEPOINTS) += net-traces.o
 obj-$(CONFIG_NET_DROP_MONITOR) += drop_monitor.o
diff --git a/net/core/dev.c b/net/core/dev.c
index b1b0c8d4d7df..5e37e9abe8c5 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -1266,7 +1266,6 @@ static int __dev_open(struct net_device *dev)
 		clear_bit(__LINK_STATE_START, &dev->state);
 	else {
 		dev->flags |= IFF_UP;
-		net_dmaengine_get();
 		dev_set_rx_mode(dev);
 		dev_activate(dev);
 		add_device_randomness(dev->dev_addr, dev->addr_len);
@@ -1342,7 +1341,6 @@ static int __dev_close_many(struct list_head *head)
 			ops->ndo_stop(dev);
 
 		dev->flags &= ~IFF_UP;
-		net_dmaengine_put();
 	}
 
 	return 0;
@@ -4405,14 +4403,6 @@ static void net_rx_action(struct softirq_action *h)
 out:
 	net_rps_action_and_irq_enable(sd);
 
-#ifdef CONFIG_NET_DMA
-	/*
-	 * There may not be any more sk_buffs coming right now, so push
-	 * any pending DMA copies to hardware
-	 */
-	dma_issue_pending_all();
-#endif
-
 	return;
 
 softnet_break:
diff --git a/net/core/sock.c b/net/core/sock.c
index c0fc6bdad1e3..2f143c3b190a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1452,9 +1452,6 @@ struct sock *sk_clone_lock(const struct sock *sk, const gfp_t priority)
 		atomic_set(&newsk->sk_omem_alloc, 0);
 		skb_queue_head_init(&newsk->sk_receive_queue);
 		skb_queue_head_init(&newsk->sk_write_queue);
-#ifdef CONFIG_NET_DMA
-		skb_queue_head_init(&newsk->sk_async_wait_queue);
-#endif
 
 		spin_lock_init(&newsk->sk_dst_lock);
 		rwlock_init(&newsk->sk_callback_lock);
@@ -2265,9 +2262,6 @@ void sock_init_data(struct socket *sock, struct sock *sk)
 	skb_queue_head_init(&sk->sk_receive_queue);
 	skb_queue_head_init(&sk->sk_write_queue);
 	skb_queue_head_init(&sk->sk_error_queue);
-#ifdef CONFIG_NET_DMA
-	skb_queue_head_init(&sk->sk_async_wait_queue);
-#endif
 
 	sk->sk_send_head	=	NULL;
 
diff --git a/net/core/user_dma.c b/net/core/user_dma.c
deleted file mode 100644
index 1b5fefdb8198..000000000000
--- a/net/core/user_dma.c
+++ /dev/null
@@ -1,131 +0,0 @@
-/*
- * Copyright(c) 2004 - 2006 Intel Corporation. All rights reserved.
- * Portions based on net/core/datagram.c and copyrighted by their authors.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms of the GNU General Public License as published by the Free
- * Software Foundation; either version 2 of the License, or (at your option)
- * any later version.
- *
- * This program is distributed in the hope that it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59
- * Temple Place - Suite 330, Boston, MA  02111-1307, USA.
- *
- * The full GNU General Public License is included in this distribution in the
- * file called COPYING.
- */
-
-/*
- * This code allows the net stack to make use of a DMA engine for
- * skb to iovec copies.
- */
-
-#include <linux/dmaengine.h>
-#include <linux/socket.h>
-#include <linux/export.h>
-#include <net/tcp.h>
-#include <net/netdma.h>
-
-#define NET_DMA_DEFAULT_COPYBREAK 4096
-
-int sysctl_tcp_dma_copybreak = NET_DMA_DEFAULT_COPYBREAK;
-EXPORT_SYMBOL(sysctl_tcp_dma_copybreak);
-
-/**
- *	dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
- *	@skb - buffer to copy
- *	@offset - offset in the buffer to start copying from
- *	@iovec - io vector to copy to
- *	@len - amount of data to copy from buffer to iovec
- *	@pinned_list - locked iovec buffer data
- *
- *	Note: the iovec is modified during the copy.
- */
-int dma_skb_copy_datagram_iovec(struct dma_chan *chan,
-			struct sk_buff *skb, int offset, struct iovec *to,
-			size_t len, struct dma_pinned_list *pinned_list)
-{
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
-	struct sk_buff *frag_iter;
-	dma_cookie_t cookie = 0;
-
-	/* Copy header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		cookie = dma_memcpy_to_iovec(chan, to, pinned_list,
-					    skb->data + offset, copy);
-		if (cookie < 0)
-			goto fault;
-		len -= copy;
-		if (len == 0)
-			goto end;
-		offset += copy;
-	}
-
-	/* Copy paged appendix. Hmm... why does this look so complicated? */
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		WARN_ON(start > offset + len);
-
-		end = start + skb_frag_size(frag);
-		copy = end - offset;
-		if (copy > 0) {
-			struct page *page = skb_frag_page(frag);
-
-			if (copy > len)
-				copy = len;
-
-			cookie = dma_memcpy_pg_to_iovec(chan, to, pinned_list, page,
-					frag->page_offset + offset - start, copy);
-			if (cookie < 0)
-				goto fault;
-			len -= copy;
-			if (len == 0)
-				goto end;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	skb_walk_frags(skb, frag_iter) {
-		int end;
-
-		WARN_ON(start > offset + len);
-
-		end = start + frag_iter->len;
-		copy = end - offset;
-		if (copy > 0) {
-			if (copy > len)
-				copy = len;
-			cookie = dma_skb_copy_datagram_iovec(chan, frag_iter,
-							     offset - start,
-							     to, copy,
-							     pinned_list);
-			if (cookie < 0)
-				goto fault;
-			len -= copy;
-			if (len == 0)
-				goto end;
-			offset += copy;
-		}
-		start = end;
-	}
-
-end:
-	if (!len) {
-		skb->dma_cookie = cookie;
-		return cookie;
-	}
-
-fault:
-	return -EFAULT;
-}
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index eb892b4f4814..f9076f295b13 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -848,7 +848,7 @@ int dccp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		default:
 			dccp_pr_debug("packet_type=%s\n",
 				      dccp_packet_name(dh->dccph_type));
-			sk_eat_skb(sk, skb, false);
+			sk_eat_skb(sk, skb);
 		}
 verify_sock_status:
 		if (sock_flag(sk, SOCK_DONE)) {
@@ -905,7 +905,7 @@ verify_sock_status:
 			len = skb->len;
 	found_fin_ok:
 		if (!(flags & MSG_PEEK))
-			sk_eat_skb(sk, skb, false);
+			sk_eat_skb(sk, skb);
 		break;
 	} while (1);
 out:
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 44eba052b43d..c3d2a48481f1 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -635,15 +635,6 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
-#ifdef CONFIG_NET_DMA
-	{
-		.procname	= "tcp_dma_copybreak",
-		.data		= &sysctl_tcp_dma_copybreak,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-#endif
 	{
 		.procname	= "tcp_slow_start_after_idle",
 		.data		= &sysctl_tcp_slow_start_after_idle,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 97c8f5620c43..28595a364f09 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -274,7 +274,6 @@
 #include <net/tcp.h>
 #include <net/xfrm.h>
 #include <net/ip.h>
-#include <net/netdma.h>
 #include <net/sock.h>
 
 #include <asm/uaccess.h>
@@ -1454,39 +1453,6 @@ static void tcp_prequeue_process(struct sock *sk)
 	tp->ucopy.memory = 0;
 }
 
-#ifdef CONFIG_NET_DMA
-static void tcp_service_net_dma(struct sock *sk, bool wait)
-{
-	dma_cookie_t done, used;
-	dma_cookie_t last_issued;
-	struct tcp_sock *tp = tcp_sk(sk);
-
-	if (!tp->ucopy.dma_chan)
-		return;
-
-	last_issued = tp->ucopy.dma_cookie;
-	dma_async_issue_pending(tp->ucopy.dma_chan);
-
-	do {
-		if (dma_async_is_tx_complete(tp->ucopy.dma_chan,
-					      last_issued, &done,
-					      &used) == DMA_COMPLETE) {
-			/* Safe to free early-copied skbs now */
-			__skb_queue_purge(&sk->sk_async_wait_queue);
-			break;
-		} else {
-			struct sk_buff *skb;
-			while ((skb = skb_peek(&sk->sk_async_wait_queue)) &&
-			       (dma_async_is_complete(skb->dma_cookie, done,
-						      used) == DMA_COMPLETE)) {
-				__skb_dequeue(&sk->sk_async_wait_queue);
-				kfree_skb(skb);
-			}
-		}
-	} while (wait);
-}
-#endif
-
 static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 {
 	struct sk_buff *skb;
@@ -1504,7 +1470,7 @@ static struct sk_buff *tcp_recv_skb(struct sock *sk, u32 seq, u32 *off)
 		 * splitted a fat GRO packet, while we released socket lock
 		 * in skb_splice_bits()
 		 */
-		sk_eat_skb(sk, skb, false);
+		sk_eat_skb(sk, skb);
 	}
 	return NULL;
 }
@@ -1570,11 +1536,11 @@ int tcp_read_sock(struct sock *sk, read_descriptor_t *desc,
 				continue;
 		}
 		if (tcp_hdr(skb)->fin) {
-			sk_eat_skb(sk, skb, false);
+			sk_eat_skb(sk, skb);
 			++seq;
 			break;
 		}
-		sk_eat_skb(sk, skb, false);
+		sk_eat_skb(sk, skb);
 		if (!desc->count)
 			break;
 		tp->copied_seq = seq;
@@ -1612,7 +1578,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	int target;		/* Read at least this many bytes */
 	long timeo;
 	struct task_struct *user_recv = NULL;
-	bool copied_early = false;
 	struct sk_buff *skb;
 	u32 urg_hole = 0;
 
@@ -1655,28 +1620,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 
 	target = sock_rcvlowat(sk, flags & MSG_WAITALL, len);
 
-#ifdef CONFIG_NET_DMA
-	tp->ucopy.dma_chan = NULL;
-	preempt_disable();
-	skb = skb_peek_tail(&sk->sk_receive_queue);
-	{
-		int available = 0;
-
-		if (skb)
-			available = TCP_SKB_CB(skb)->seq + skb->len - (*seq);
-		if ((available < target) &&
-		    (len > sysctl_tcp_dma_copybreak) && !(flags & MSG_PEEK) &&
-		    !sysctl_tcp_low_latency &&
-		    net_dma_find_channel()) {
-			preempt_enable();
-			tp->ucopy.pinned_list =
-					dma_pin_iovec_pages(msg->msg_iov, len);
-		} else {
-			preempt_enable();
-		}
-	}
-#endif
-
 	do {
 		u32 offset;
 
@@ -1807,16 +1750,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			/* __ Set realtime policy in scheduler __ */
 		}
 
-#ifdef CONFIG_NET_DMA
-		if (tp->ucopy.dma_chan) {
-			if (tp->rcv_wnd == 0 &&
-			    !skb_queue_empty(&sk->sk_async_wait_queue)) {
-				tcp_service_net_dma(sk, true);
-				tcp_cleanup_rbuf(sk, copied);
-			} else
-				dma_async_issue_pending(tp->ucopy.dma_chan);
-		}
-#endif
 		if (copied >= target) {
 			/* Do not sleep, just process backlog. */
 			release_sock(sk);
@@ -1824,11 +1757,6 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		} else
 			sk_wait_data(sk, &timeo);
 
-#ifdef CONFIG_NET_DMA
-		tcp_service_net_dma(sk, false);  /* Don't block */
-		tp->ucopy.wakeup = 0;
-#endif
-
 		if (user_recv) {
 			int chunk;
 
@@ -1886,43 +1814,13 @@ do_prequeue:
 		}
 
 		if (!(flags & MSG_TRUNC)) {
-#ifdef CONFIG_NET_DMA
-			if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-				tp->ucopy.dma_chan = net_dma_find_channel();
-
-			if (tp->ucopy.dma_chan) {
-				tp->ucopy.dma_cookie = dma_skb_copy_datagram_iovec(
-					tp->ucopy.dma_chan, skb, offset,
-					msg->msg_iov, used,
-					tp->ucopy.pinned_list);
-
-				if (tp->ucopy.dma_cookie < 0) {
-
-					pr_alert("%s: dma_cookie < 0\n",
-						 __func__);
-
-					/* Exception. Bailout! */
-					if (!copied)
-						copied = -EFAULT;
-					break;
-				}
-
-				dma_async_issue_pending(tp->ucopy.dma_chan);
-
-				if ((offset + used) == skb->len)
-					copied_early = true;
-
-			} else
-#endif
-			{
-				err = skb_copy_datagram_iovec(skb, offset,
-						msg->msg_iov, used);
-				if (err) {
-					/* Exception. Bailout! */
-					if (!copied)
-						copied = -EFAULT;
-					break;
-				}
+			err = skb_copy_datagram_iovec(skb, offset,
+						      msg->msg_iov, used);
+			if (err) {
+				/* Exception. Bailout! */
+				if (!copied)
+					copied = -EFAULT;
+				break;
 			}
 		}
 
@@ -1942,19 +1840,15 @@ skip_copy:
 
 		if (tcp_hdr(skb)->fin)
 			goto found_fin_ok;
-		if (!(flags & MSG_PEEK)) {
-			sk_eat_skb(sk, skb, copied_early);
-			copied_early = false;
-		}
+		if (!(flags & MSG_PEEK))
+			sk_eat_skb(sk, skb);
 		continue;
 
 	found_fin_ok:
 		/* Process the FIN. */
 		++*seq;
-		if (!(flags & MSG_PEEK)) {
-			sk_eat_skb(sk, skb, copied_early);
-			copied_early = false;
-		}
+		if (!(flags & MSG_PEEK))
+			sk_eat_skb(sk, skb);
 		break;
 	} while (len > 0);
 
@@ -1977,16 +1871,6 @@ skip_copy:
 		tp->ucopy.len = 0;
 	}
 
-#ifdef CONFIG_NET_DMA
-	tcp_service_net_dma(sk, true);  /* Wait for queue to drain */
-	tp->ucopy.dma_chan = NULL;
-
-	if (tp->ucopy.pinned_list) {
-		dma_unpin_iovec_pages(tp->ucopy.pinned_list);
-		tp->ucopy.pinned_list = NULL;
-	}
-#endif
-
 	/* According to UNIX98, msg_name/msg_namelen are ignored
 	 * on connected socket. I was just happy when found this 8) --ANK
 	 */
@@ -2330,9 +2214,6 @@ int tcp_disconnect(struct sock *sk, int flags)
 	__skb_queue_purge(&sk->sk_receive_queue);
 	tcp_write_queue_purge(sk);
 	__skb_queue_purge(&tp->out_of_order_queue);
-#ifdef CONFIG_NET_DMA
-	__skb_queue_purge(&sk->sk_async_wait_queue);
-#endif
 
 	inet->inet_dport = 0;
 
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index eeaac399420d..1342e9851f97 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -73,7 +73,6 @@
 #include <net/inet_common.h>
 #include <linux/ipsec.h>
 #include <asm/unaligned.h>
-#include <net/netdma.h>
 
 int sysctl_tcp_timestamps __read_mostly = 1;
 int sysctl_tcp_window_scaling __read_mostly = 1;
@@ -4970,53 +4969,6 @@ static inline bool tcp_checksum_complete_user(struct sock *sk,
 	       __tcp_checksum_complete_user(sk, skb);
 }
 
-#ifdef CONFIG_NET_DMA
-static bool tcp_dma_try_early_copy(struct sock *sk, struct sk_buff *skb,
-				  int hlen)
-{
-	struct tcp_sock *tp = tcp_sk(sk);
-	int chunk = skb->len - hlen;
-	int dma_cookie;
-	bool copied_early = false;
-
-	if (tp->ucopy.wakeup)
-		return false;
-
-	if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-		tp->ucopy.dma_chan = net_dma_find_channel();
-
-	if (tp->ucopy.dma_chan && skb_csum_unnecessary(skb)) {
-
-		dma_cookie = dma_skb_copy_datagram_iovec(tp->ucopy.dma_chan,
-							 skb, hlen,
-							 tp->ucopy.iov, chunk,
-							 tp->ucopy.pinned_list);
-
-		if (dma_cookie < 0)
-			goto out;
-
-		tp->ucopy.dma_cookie = dma_cookie;
-		copied_early = true;
-
-		tp->ucopy.len -= chunk;
-		tp->copied_seq += chunk;
-		tcp_rcv_space_adjust(sk);
-
-		if ((tp->ucopy.len == 0) ||
-		    (tcp_flag_word(tcp_hdr(skb)) & TCP_FLAG_PSH) ||
-		    (atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1))) {
-			tp->ucopy.wakeup = 1;
-			sk->sk_data_ready(sk, 0);
-		}
-	} else if (chunk > 0) {
-		tp->ucopy.wakeup = 1;
-		sk->sk_data_ready(sk, 0);
-	}
-out:
-	return copied_early;
-}
-#endif /* CONFIG_NET_DMA */
-
 /* Does PAWS and seqno based validation of an incoming segment, flags will
  * play significant role here.
  */
@@ -5201,14 +5153,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 
 			if (tp->copied_seq == tp->rcv_nxt &&
 			    len - tcp_header_len <= tp->ucopy.len) {
-#ifdef CONFIG_NET_DMA
-				if (tp->ucopy.task == current &&
-				    sock_owned_by_user(sk) &&
-				    tcp_dma_try_early_copy(sk, skb, tcp_header_len)) {
-					copied_early = 1;
-					eaten = 1;
-				}
-#endif
 				if (tp->ucopy.task == current &&
 				    sock_owned_by_user(sk) && !copied_early) {
 					__set_current_state(TASK_RUNNING);
@@ -5274,11 +5218,6 @@ void tcp_rcv_established(struct sock *sk, struct sk_buff *skb,
 			if (!copied_early || tp->rcv_nxt != tp->rcv_wup)
 				__tcp_ack_snd_check(sk, 0);
 no_ack:
-#ifdef CONFIG_NET_DMA
-			if (copied_early)
-				__skb_queue_tail(&sk->sk_async_wait_queue, skb);
-			else
-#endif
 			if (eaten)
 				kfree_skb_partial(skb, fragstolen);
 			sk->sk_data_ready(sk, 0);
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3cf976510497..737c2e270ee3 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -72,7 +72,6 @@
 #include <net/inet_common.h>
 #include <net/timewait_sock.h>
 #include <net/xfrm.h>
-#include <net/netdma.h>
 #include <net/secure_seq.h>
 #include <net/tcp_memcontrol.h>
 #include <net/busy_poll.h>
@@ -1999,18 +1998,8 @@ process:
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-#ifdef CONFIG_NET_DMA
-		struct tcp_sock *tp = tcp_sk(sk);
-		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-			tp->ucopy.dma_chan = net_dma_find_channel();
-		if (tp->ucopy.dma_chan)
+		if (!tcp_prequeue(sk, skb))
 			ret = tcp_v4_do_rcv(sk, skb);
-		else
-#endif
-		{
-			if (!tcp_prequeue(sk, skb))
-				ret = tcp_v4_do_rcv(sk, skb);
-		}
 	} else if (unlikely(sk_add_backlog(sk, skb,
 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
 		bh_unlock_sock(sk);
@@ -2169,11 +2158,6 @@ void tcp_v4_destroy_sock(struct sock *sk)
 	}
 #endif
 
-#ifdef CONFIG_NET_DMA
-	/* Cleans up our sk_async_wait_queue */
-	__skb_queue_purge(&sk->sk_async_wait_queue);
-#endif
-
 	/* Clean prequeue, it must be empty really */
 	__skb_queue_purge(&tp->ucopy.prequeue);
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 889079b2ea85..cb21fccf2089 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -59,7 +59,6 @@
 #include <net/snmp.h>
 #include <net/dsfield.h>
 #include <net/timewait_sock.h>
-#include <net/netdma.h>
 #include <net/inet_common.h>
 #include <net/secure_seq.h>
 #include <net/tcp_memcontrol.h>
@@ -1520,18 +1519,8 @@ process:
 	bh_lock_sock_nested(sk);
 	ret = 0;
 	if (!sock_owned_by_user(sk)) {
-#ifdef CONFIG_NET_DMA
-		struct tcp_sock *tp = tcp_sk(sk);
-		if (!tp->ucopy.dma_chan && tp->ucopy.pinned_list)
-			tp->ucopy.dma_chan = net_dma_find_channel();
-		if (tp->ucopy.dma_chan)
+		if (!tcp_prequeue(sk, skb))
 			ret = tcp_v6_do_rcv(sk, skb);
-		else
-#endif
-		{
-			if (!tcp_prequeue(sk, skb))
-				ret = tcp_v6_do_rcv(sk, skb);
-		}
 	} else if (unlikely(sk_add_backlog(sk, skb,
 					   sk->sk_rcvbuf + sk->sk_sndbuf))) {
 		bh_unlock_sock(sk);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 0080d2b0a8ae..bb9cbc17d926 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -839,7 +839,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 		if (!(flags & MSG_PEEK)) {
 			spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
-			sk_eat_skb(sk, skb, false);
+			sk_eat_skb(sk, skb);
 			spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
 			*seq = 0;
 		}
@@ -861,10 +861,10 @@ copy_uaddr:
 		llc_cmsg_rcv(msg, skb);
 
 	if (!(flags & MSG_PEEK)) {
-			spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
-			sk_eat_skb(sk, skb, false);
-			spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
-			*seq = 0;
+		spin_lock_irqsave(&sk->sk_receive_queue.lock, cpu_flags);
+		sk_eat_skb(sk, skb);
+		spin_unlock_irqrestore(&sk->sk_receive_queue.lock, cpu_flags);
+		*seq = 0;
 	}
 
 	goto out;
-- 
cgit v1.2.3


From 3d9a0d2f8212879407e58d67f460d8920eb6543d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 25 Sep 2014 23:04:56 -0700
Subject: dql: dql_queued() should write first to reduce bus transactions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While doing high throughput test on a BQL enabled NIC,
I found a very high cost in ndo_start_xmit() when accessing BQL data.

It turned out the problem was caused by compiler trying to be
smart, but involving a bad MESI transaction :

  0.05 │  mov    0xc0(%rax),%edi    // LOAD dql->num_queued
  0.48 │  mov    %edx,0xc8(%rax)    // STORE dql->last_obj_cnt = count
 58.23 │  add    %edx,%edi
  0.58 │  cmp    %edi,0xc4(%rax)
  0.76 │  mov    %edi,0xc0(%rax)    // STORE dql->num_queued += count
  0.72 │  js     bd8

I got an incredible 10 % gain [1] by making sure cpu do not attempt
to get the cache line in Shared mode, but directly requests for
ownership.

New code :
	mov    %edx,0xc8(%rax)  // STORE dql->last_obj_cnt = count
	add    %edx,0xc0(%rax)  // RMW   dql->num_queued += count
	mov    0xc4(%rax),%ecx  // LOAD dql->adj_limit
	mov    0xc0(%rax),%edx  // LOAD dql->num_queued
	cmp    %edx,%ecx

The TX completion was running from another cpu, with high interrupts
rate.

Note that I am using barrier() as a soft hint, as mb() here could be
too heavy cost.

[1] This was a netperf TCP_STREAM with TSO disabled, but GSO enabled.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dynamic_queue_limits.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h
index 5621547d631b..a4be70398ce1 100644
--- a/include/linux/dynamic_queue_limits.h
+++ b/include/linux/dynamic_queue_limits.h
@@ -73,14 +73,22 @@ static inline void dql_queued(struct dql *dql, unsigned int count)
 {
 	BUG_ON(count > DQL_MAX_OBJECT);
 
-	dql->num_queued += count;
 	dql->last_obj_cnt = count;
+
+	/* We want to force a write first, so that cpu do not attempt
+	 * to get cache line containing last_obj_cnt, num_queued, adj_limit
+	 * in Shared state, but directly does a Request For Ownership
+	 * It is only a hint, we use barrier() only.
+	 */
+	barrier();
+
+	dql->num_queued += count;
 }
 
 /* Returns how many objects can be queued, < 0 indicates over limit. */
 static inline int dql_avail(const struct dql *dql)
 {
-	return dql->adj_limit - dql->num_queued;
+	return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued);
 }
 
 /* Record number of completed objects and recalculate the limit. */
-- 
cgit v1.2.3


From c08ee14cc6634457948bc5e26584697208baa02a Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 12 Sep 2014 15:01:57 +0300
Subject: clk: ti: change clock init to use generic of_clk_init

Previously, the TI clock driver initialized all the clocks hierarchically
under each separate clock provider node. Now, each clock that requires
IO access will instead check their parent node to find out which IO range
to use.

This patch allows the TI clock driver to use a few new features provided
by the generic of_clk_init, and also allows registration of clock nodes
outside the clock hierarchy (for example, any external clocks.)

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Cc: Mike Turquette <mturquette@linaro.org>
Cc: Paul Walmsley <paul@pwsan.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Peter Ujfalusi <peter.ujfalusi@ti.com>
Cc: Jyri Sarha <jsarha@ti.com>
Cc: Stefan Assmann <sassmann@kpanic.de>
Acked-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/io.c         | 12 +++++--
 arch/arm/mach-omap2/prm_common.c |  2 --
 drivers/clk/ti/clk.c             | 68 +++++++++++++++++++++++++---------------
 include/linux/clk/ti.h           |  1 +
 4 files changed, 54 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/io.c b/arch/arm/mach-omap2/io.c
index 5d0667c119f6..a1b82a994842 100644
--- a/arch/arm/mach-omap2/io.c
+++ b/arch/arm/mach-omap2/io.c
@@ -734,8 +734,16 @@ int __init omap_clk_init(void)
 	ti_clk_init_features();
 
 	ret = of_prcm_init();
-	if (!ret)
-		ret = omap_clk_soc_init();
+	if (ret)
+		return ret;
+
+	of_clk_init(NULL);
+
+	ti_dt_clk_init_retry_clks();
+
+	ti_dt_clockdomains_setup();
+
+	ret = omap_clk_soc_init();
 
 	return ret;
 }
diff --git a/arch/arm/mach-omap2/prm_common.c b/arch/arm/mach-omap2/prm_common.c
index 76ca320f007c..3b890807f5e6 100644
--- a/arch/arm/mach-omap2/prm_common.c
+++ b/arch/arm/mach-omap2/prm_common.c
@@ -525,8 +525,6 @@ int __init of_prcm_init(void)
 		memmap_index++;
 	}
 
-	ti_dt_clockdomains_setup();
-
 	return 0;
 }
 
diff --git a/drivers/clk/ti/clk.c b/drivers/clk/ti/clk.c
index b1a6f7144f3f..337abe5909e1 100644
--- a/drivers/clk/ti/clk.c
+++ b/drivers/clk/ti/clk.c
@@ -25,8 +25,8 @@
 #undef pr_fmt
 #define pr_fmt(fmt) "%s: " fmt, __func__
 
-static int ti_dt_clk_memmap_index;
 struct ti_clk_ll_ops *ti_clk_ll_ops;
+static struct device_node *clocks_node_ptr[CLK_MAX_MEMMAPS];
 
 /**
  * ti_dt_clocks_register - register DT alias clocks during boot
@@ -108,9 +108,21 @@ void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index)
 	struct clk_omap_reg *reg;
 	u32 val;
 	u32 tmp;
+	int i;
 
 	reg = (struct clk_omap_reg *)&tmp;
-	reg->index = ti_dt_clk_memmap_index;
+
+	for (i = 0; i < CLK_MAX_MEMMAPS; i++) {
+		if (clocks_node_ptr[i] == node->parent)
+			break;
+	}
+
+	if (i == CLK_MAX_MEMMAPS) {
+		pr_err("clk-provider not found for %s!\n", node->name);
+		return NULL;
+	}
+
+	reg->index = i;
 
 	if (of_property_read_u32_index(node, "reg", index, &val)) {
 		pr_err("%s must have reg[%d]!\n", node->name, index);
@@ -127,20 +139,14 @@ void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index)
  * @parent: master node
  * @index: internal index for clk_reg_ops
  *
- * Initializes a master clock IP block and its child clock nodes.
- * Regmap is provided for accessing the register space for the
- * IP block and all the clocks under it.
+ * Initializes a master clock IP block. This basically sets up the
+ * mapping from clocks node to the memory map index. All the clocks
+ * are then initialized through the common of_clk_init call, and the
+ * clocks will access their memory maps based on the node layout.
  */
 void ti_dt_clk_init_provider(struct device_node *parent, int index)
 {
-	const struct of_device_id *match;
-	struct device_node *np;
 	struct device_node *clocks;
-	of_clk_init_cb_t clk_init_cb;
-	struct clk_init_item *retry;
-	struct clk_init_item *tmp;
-
-	ti_dt_clk_memmap_index = index;
 
 	/* get clocks for this parent */
 	clocks = of_get_child_by_name(parent, "clocks");
@@ -149,19 +155,31 @@ void ti_dt_clk_init_provider(struct device_node *parent, int index)
 		return;
 	}
 
-	for_each_child_of_node(clocks, np) {
-		match = of_match_node(&__clk_of_table, np);
-		if (!match)
-			continue;
-		clk_init_cb = (of_clk_init_cb_t)match->data;
-		pr_debug("%s: initializing: %s\n", __func__, np->name);
-		clk_init_cb(np);
-	}
+	/* add clocks node info */
+	clocks_node_ptr[index] = clocks;
+}
 
-	list_for_each_entry_safe(retry, tmp, &retry_list, link) {
-		pr_debug("retry-init: %s\n", retry->node->name);
-		retry->func(retry->hw, retry->node);
-		list_del(&retry->link);
-		kfree(retry);
+/**
+ * ti_dt_clk_init_retry_clks - init clocks from the retry list
+ *
+ * Initializes any clocks that have failed to initialize before,
+ * reasons being missing parent node(s) during earlier init. This
+ * typically happens only for DPLLs which need to have both of their
+ * parent clocks ready during init.
+ */
+void ti_dt_clk_init_retry_clks(void)
+{
+	struct clk_init_item *retry;
+	struct clk_init_item *tmp;
+	int retries = 5;
+
+	while (!list_empty(&retry_list) && retries) {
+		list_for_each_entry_safe(retry, tmp, &retry_list, link) {
+			pr_debug("retry-init: %s\n", retry->node->name);
+			retry->func(retry->hw, retry->node);
+			list_del(&retry->link);
+			kfree(retry);
+		}
+		retries--;
 	}
 }
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index e8d8a35034a5..f75acbf70e96 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -292,6 +292,7 @@ void omap2xxx_clkt_vps_init(void);
 void __iomem *ti_clk_get_reg_addr(struct device_node *node, int index);
 void ti_dt_clocks_register(struct ti_dt_clk *oclks);
 void ti_dt_clk_init_provider(struct device_node *np, int index);
+void ti_dt_clk_init_retry_clks(void);
 void ti_dt_clockdomains_setup(void);
 int ti_clk_retry_init(struct device_node *node, struct clk_hw *hw,
 		      ti_of_clk_init_cb_t func);
-- 
cgit v1.2.3


From 3d46e73dfdb840f460e5b06416965d132570ec33 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Wed, 24 Sep 2014 23:05:50 +0400
Subject: usb: rename phy to usb_phy in HCD

The USB PHY member of the HCD structure is renamed to 'usb_phy' and
modifications are done in all drivers accessing it.
This is in preparation to adding the generic PHY support.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
[Sergei: added missing 'drivers/usb/misc/lvstest.c' file, resolved rejects,
updated changelog.]
Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/host.c   |  2 +-
 drivers/usb/core/hcd.c        | 20 ++++++++++----------
 drivers/usb/core/hub.c        |  8 ++++----
 drivers/usb/host/ehci-fsl.c   | 16 ++++++++--------
 drivers/usb/host/ehci-hub.c   |  2 +-
 drivers/usb/host/ehci-msm.c   |  4 ++--
 drivers/usb/host/ehci-tegra.c | 16 ++++++++--------
 drivers/usb/host/ohci-omap.c  | 20 ++++++++++----------
 drivers/usb/misc/lvstest.c    |  8 ++++----
 include/linux/usb/hcd.h       |  2 +-
 10 files changed, 49 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index 0d6b24cb2270..ebde7b6ce687 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -59,7 +59,7 @@ static int host_start(struct ci_hdrc *ci)
 	hcd->has_tt = 1;
 
 	hcd->power_budget = ci->platdata->power_budget;
-	hcd->phy = ci->transceiver;
+	hcd->usb_phy = ci->transceiver;
 	hcd->tpl_support = ci->platdata->tpl_support;
 
 	ehci = hcd_to_ehci(hcd);
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index bcb96ff207ba..2c56252b9ff8 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2629,7 +2629,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 	int retval;
 	struct usb_device *rhdev;
 
-	if (IS_ENABLED(CONFIG_USB_PHY) && !hcd->phy) {
+	if (IS_ENABLED(CONFIG_USB_PHY) && !hcd->usb_phy) {
 		struct usb_phy *phy = usb_get_phy_dev(hcd->self.controller, 0);
 
 		if (IS_ERR(phy)) {
@@ -2642,7 +2642,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 				usb_put_phy(phy);
 				return retval;
 			}
-			hcd->phy = phy;
+			hcd->usb_phy = phy;
 			hcd->remove_phy = 1;
 		}
 	}
@@ -2790,10 +2790,10 @@ err_allocate_root_hub:
 err_register_bus:
 	hcd_buffer_destroy(hcd);
 err_remove_phy:
-	if (hcd->remove_phy && hcd->phy) {
-		usb_phy_shutdown(hcd->phy);
-		usb_put_phy(hcd->phy);
-		hcd->phy = NULL;
+	if (hcd->remove_phy && hcd->usb_phy) {
+		usb_phy_shutdown(hcd->usb_phy);
+		usb_put_phy(hcd->usb_phy);
+		hcd->usb_phy = NULL;
 	}
 	return retval;
 }
@@ -2866,10 +2866,10 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 
 	usb_deregister_bus(&hcd->self);
 	hcd_buffer_destroy(hcd);
-	if (hcd->remove_phy && hcd->phy) {
-		usb_phy_shutdown(hcd->phy);
-		usb_put_phy(hcd->phy);
-		hcd->phy = NULL;
+	if (hcd->remove_phy && hcd->usb_phy) {
+		usb_phy_shutdown(hcd->usb_phy);
+		usb_put_phy(hcd->usb_phy);
+		hcd->usb_phy = NULL;
 	}
 
 	usb_put_invalidate_rhdev(hcd);
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index 1d21b2c21f9b..11e80ac31324 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -4468,8 +4468,8 @@ hub_port_init (struct usb_hub *hub, struct usb_device *udev, int port1,
 	if (retval)
 		goto fail;
 
-	if (hcd->phy && !hdev->parent)
-		usb_phy_notify_connect(hcd->phy, udev->speed);
+	if (hcd->usb_phy && !hdev->parent)
+		usb_phy_notify_connect(hcd->usb_phy, udev->speed);
 
 	/*
 	 * Some superspeed devices have finished the link training process
@@ -4627,9 +4627,9 @@ static void hub_port_connect(struct usb_hub *hub, int port1, u16 portstatus,
 
 	/* Disconnect any existing devices under this port */
 	if (udev) {
-		if (hcd->phy && !hdev->parent &&
+		if (hcd->usb_phy && !hdev->parent &&
 				!(portstatus & USB_PORT_STAT_CONNECTION))
-			usb_phy_notify_disconnect(hcd->phy, udev->speed);
+			usb_phy_notify_disconnect(hcd->usb_phy, udev->speed);
 		usb_disconnect(&port_dev->child);
 	}
 
diff --git a/drivers/usb/host/ehci-fsl.c b/drivers/usb/host/ehci-fsl.c
index 3d84b6a41dae..2d2ae8db439e 100644
--- a/drivers/usb/host/ehci-fsl.c
+++ b/drivers/usb/host/ehci-fsl.c
@@ -136,15 +136,15 @@ static int usb_hcd_fsl_probe(const struct hc_driver *driver,
 	if (pdata->operating_mode == FSL_USB2_DR_OTG) {
 		struct ehci_hcd *ehci = hcd_to_ehci(hcd);
 
-		hcd->phy = usb_get_phy(USB_PHY_TYPE_USB2);
+		hcd->usb_phy = usb_get_phy(USB_PHY_TYPE_USB2);
 		dev_dbg(&pdev->dev, "hcd=0x%p  ehci=0x%p, phy=0x%p\n",
-			hcd, ehci, hcd->phy);
+			hcd, ehci, hcd->usb_phy);
 
-		if (!IS_ERR_OR_NULL(hcd->phy)) {
-			retval = otg_set_host(hcd->phy->otg,
+		if (!IS_ERR_OR_NULL(hcd->usb_phy)) {
+			retval = otg_set_host(hcd->usb_phy->otg,
 					      &ehci_to_hcd(ehci)->self);
 			if (retval) {
-				usb_put_phy(hcd->phy);
+				usb_put_phy(hcd->usb_phy);
 				goto err2;
 			}
 		} else {
@@ -181,9 +181,9 @@ static void usb_hcd_fsl_remove(struct usb_hcd *hcd,
 {
 	struct fsl_usb2_platform_data *pdata = dev_get_platdata(&pdev->dev);
 
-	if (!IS_ERR_OR_NULL(hcd->phy)) {
-		otg_set_host(hcd->phy->otg, NULL);
-		usb_put_phy(hcd->phy);
+	if (!IS_ERR_OR_NULL(hcd->usb_phy)) {
+		otg_set_host(hcd->usb_phy->otg, NULL);
+		usb_put_phy(hcd->usb_phy);
 	}
 
 	usb_remove_hcd(hcd);
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 7ccb3ccf8e86..5728829cf6ef 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -922,7 +922,7 @@ int ehci_hub_control(
 #ifdef CONFIG_USB_OTG
 			if ((hcd->self.otg_port == (wIndex + 1))
 			    && hcd->self.b_hnp_enable) {
-				otg_start_hnp(hcd->phy->otg);
+				otg_start_hnp(hcd->usb_phy->otg);
 				break;
 			}
 #endif
diff --git a/drivers/usb/host/ehci-msm.c b/drivers/usb/host/ehci-msm.c
index 934b39d5e44a..9dc2118ae808 100644
--- a/drivers/usb/host/ehci-msm.c
+++ b/drivers/usb/host/ehci-msm.c
@@ -124,7 +124,7 @@ static int ehci_msm_probe(struct platform_device *pdev)
 		goto put_hcd;
 	}
 
-	hcd->phy = phy;
+	hcd->usb_phy = phy;
 	device_init_wakeup(&pdev->dev, 1);
 	/*
 	 * OTG device parent of HCD takes care of putting
@@ -151,7 +151,7 @@ static int ehci_msm_remove(struct platform_device *pdev)
 	pm_runtime_disable(&pdev->dev);
 	pm_runtime_set_suspended(&pdev->dev);
 
-	otg_set_host(hcd->phy->otg, NULL);
+	otg_set_host(hcd->usb_phy->otg, NULL);
 
 	/* FIXME: need to call usb_remove_hcd() here? */
 
diff --git a/drivers/usb/host/ehci-tegra.c b/drivers/usb/host/ehci-tegra.c
index 7aafb05e7a40..aaa01971efe9 100644
--- a/drivers/usb/host/ehci-tegra.c
+++ b/drivers/usb/host/ehci-tegra.c
@@ -206,7 +206,7 @@ static int tegra_ehci_hub_control(
 		if (tegra->port_resuming && !(temp & PORT_SUSPEND)) {
 			/* Resume completed, re-enable disconnect detection */
 			tegra->port_resuming = 0;
-			tegra_usb_phy_postresume(hcd->phy);
+			tegra_usb_phy_postresume(hcd->usb_phy);
 		}
 	}
 
@@ -259,7 +259,7 @@ static int tegra_ehci_hub_control(
 			goto done;
 
 		/* Disable disconnect detection during port resume */
-		tegra_usb_phy_preresume(hcd->phy);
+		tegra_usb_phy_preresume(hcd->usb_phy);
 
 		ehci->reset_done[wIndex-1] = jiffies + msecs_to_jiffies(25);
 
@@ -454,7 +454,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 		err = PTR_ERR(u_phy);
 		goto cleanup_clk_en;
 	}
-	hcd->phy = u_phy;
+	hcd->usb_phy = u_phy;
 
 	tegra->needs_double_reset = of_property_read_bool(pdev->dev.of_node,
 		"nvidia,needs-double-reset");
@@ -475,7 +475,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 	ehci->caps = hcd->regs + 0x100;
 	ehci->has_hostpc = soc_config->has_hostpc;
 
-	err = usb_phy_init(hcd->phy);
+	err = usb_phy_init(hcd->usb_phy);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to initialize phy\n");
 		goto cleanup_clk_en;
@@ -490,7 +490,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 	}
 	u_phy->otg->host = hcd_to_bus(hcd);
 
-	err = usb_phy_set_suspend(hcd->phy, 0);
+	err = usb_phy_set_suspend(hcd->usb_phy, 0);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to power on the phy\n");
 		goto cleanup_phy;
@@ -517,7 +517,7 @@ static int tegra_ehci_probe(struct platform_device *pdev)
 cleanup_otg_set_host:
 	otg_set_host(u_phy->otg, NULL);
 cleanup_phy:
-	usb_phy_shutdown(hcd->phy);
+	usb_phy_shutdown(hcd->usb_phy);
 cleanup_clk_en:
 	clk_disable_unprepare(tegra->clk);
 cleanup_hcd_create:
@@ -531,9 +531,9 @@ static int tegra_ehci_remove(struct platform_device *pdev)
 	struct tegra_ehci_hcd *tegra =
 		(struct tegra_ehci_hcd *)hcd_to_ehci(hcd)->priv;
 
-	otg_set_host(hcd->phy->otg, NULL);
+	otg_set_host(hcd->usb_phy->otg, NULL);
 
-	usb_phy_shutdown(hcd->phy);
+	usb_phy_shutdown(hcd->usb_phy);
 	usb_remove_hcd(hcd);
 
 	clk_disable_unprepare(tegra->clk);
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index de9428362503..0231606d47c2 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -180,10 +180,10 @@ static void start_hnp(struct ohci_hcd *ohci)
 	unsigned long	flags;
 	u32 l;
 
-	otg_start_hnp(hcd->phy->otg);
+	otg_start_hnp(hcd->usb_phy->otg);
 
 	local_irq_save(flags);
-	hcd->phy->state = OTG_STATE_A_SUSPEND;
+	hcd->usb_phy->state = OTG_STATE_A_SUSPEND;
 	writel (RH_PS_PSS, &ohci->regs->roothub.portstatus [port]);
 	l = omap_readl(OTG_CTRL);
 	l &= ~OTG_A_BUSREQ;
@@ -220,14 +220,14 @@ static int ohci_omap_reset(struct usb_hcd *hcd)
 
 #ifdef	CONFIG_USB_OTG
 	if (need_transceiver) {
-		hcd->phy = usb_get_phy(USB_PHY_TYPE_USB2);
-		if (!IS_ERR_OR_NULL(hcd->phy)) {
-			int	status = otg_set_host(hcd->phy->otg,
+		hcd->usb_phy = usb_get_phy(USB_PHY_TYPE_USB2);
+		if (!IS_ERR_OR_NULL(hcd->usb_phy)) {
+			int	status = otg_set_host(hcd->usb_phy->otg,
 						&ohci_to_hcd(ohci)->self);
 			dev_dbg(hcd->self.controller, "init %s phy, status %d\n",
-					hcd->phy->label, status);
+					hcd->usb_phy->label, status);
 			if (status) {
-				usb_put_phy(hcd->phy);
+				usb_put_phy(hcd->usb_phy);
 				return status;
 			}
 		} else {
@@ -399,9 +399,9 @@ usb_hcd_omap_remove (struct usb_hcd *hcd, struct platform_device *pdev)
 	dev_dbg(hcd->self.controller, "stopping USB Controller\n");
 	usb_remove_hcd(hcd);
 	omap_ohci_clock_power(0);
-	if (!IS_ERR_OR_NULL(hcd->phy)) {
-		(void) otg_set_host(hcd->phy->otg, 0);
-		usb_put_phy(hcd->phy);
+	if (!IS_ERR_OR_NULL(hcd->usb_phy)) {
+		(void) otg_set_host(hcd->usb_phy->otg, 0);
+		usb_put_phy(hcd->usb_phy);
 	}
 	if (machine_is_omap_osk())
 		gpio_free(9);
diff --git a/drivers/usb/misc/lvstest.c b/drivers/usb/misc/lvstest.c
index 7d589c156fb1..62cb8cd08403 100644
--- a/drivers/usb/misc/lvstest.c
+++ b/drivers/usb/misc/lvstest.c
@@ -333,13 +333,13 @@ static void lvs_rh_work(struct work_struct *work)
 					USB_PORT_STAT_CONNECTION) {
 				lvs->present = true;
 				lvs->portnum = i;
-				if (hcd->phy)
-					usb_phy_notify_connect(hcd->phy,
+				if (hcd->usb_phy)
+					usb_phy_notify_connect(hcd->usb_phy,
 							USB_SPEED_SUPER);
 			} else {
 				lvs->present = false;
-				if (hcd->phy)
-					usb_phy_notify_disconnect(hcd->phy,
+				if (hcd->usb_phy)
+					usb_phy_notify_disconnect(hcd->usb_phy,
 							USB_SPEED_SUPER);
 			}
 			break;
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index a48c89e96988..788059a108e5 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -106,7 +106,7 @@ struct usb_hcd {
 	 * OTG and some Host controllers need software interaction with phys;
 	 * other external phys should be software-transparent
 	 */
-	struct usb_phy	*phy;
+	struct usb_phy		*usb_phy;
 
 	/* Flags that need to be manipulated atomically because they can
 	 * change while the host controller is running.  Always use
-- 
cgit v1.2.3


From 0043325495222139daa0696db736f67658dc7770 Mon Sep 17 00:00:00 2001
From: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Date: Wed, 24 Sep 2014 23:09:44 +0400
Subject: usb: hcd: add generic PHY support

Add the generic PHY support, analogous to the USB PHY support. Intended it to be
used with the PCI EHCI/OHCI drivers and the xHCI platform driver.

Signed-off-by: Sergei Shtylyov <sergei.shtylyov@cogentembedded.com>
Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/core/hcd.c  | 42 ++++++++++++++++++++++++++++++++++++++++--
 include/linux/usb/hcd.h |  1 +
 2 files changed, 41 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index 2c56252b9ff8..b84fb141e122 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -42,6 +42,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/types.h>
 
+#include <linux/phy/phy.h>
 #include <linux/usb.h>
 #include <linux/usb/hcd.h>
 #include <linux/usb/phy.h>
@@ -2647,6 +2648,29 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		}
 	}
 
+	if (IS_ENABLED(CONFIG_GENERIC_PHY)) {
+		struct phy *phy = phy_get(hcd->self.controller, "usb");
+
+		if (IS_ERR(phy)) {
+			retval = PTR_ERR(phy);
+			if (retval == -EPROBE_DEFER)
+				goto err_phy;
+		} else {
+			retval = phy_init(phy);
+			if (retval) {
+				phy_put(phy);
+				goto err_phy;
+			}
+			retval = phy_power_on(phy);
+			if (retval) {
+				phy_exit(phy);
+				phy_put(phy);
+				goto err_phy;
+			}
+			hcd->phy = phy;
+		}
+	}
+
 	dev_info(hcd->self.controller, "%s\n", hcd->product_desc);
 
 	/* Keep old behaviour if authorized_default is not in [0, 1]. */
@@ -2662,7 +2686,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 	 */
 	if ((retval = hcd_buffer_create(hcd)) != 0) {
 		dev_dbg(hcd->self.controller, "pool alloc failed\n");
-		goto err_remove_phy;
+		goto err_create_buf;
 	}
 
 	if ((retval = usb_register_bus(&hcd->self)) < 0)
@@ -2789,7 +2813,14 @@ err_allocate_root_hub:
 	usb_deregister_bus(&hcd->self);
 err_register_bus:
 	hcd_buffer_destroy(hcd);
-err_remove_phy:
+err_create_buf:
+	if (IS_ENABLED(CONFIG_GENERIC_PHY) && hcd->phy) {
+		phy_power_off(hcd->phy);
+		phy_exit(hcd->phy);
+		phy_put(hcd->phy);
+		hcd->phy = NULL;
+	}
+err_phy:
 	if (hcd->remove_phy && hcd->usb_phy) {
 		usb_phy_shutdown(hcd->usb_phy);
 		usb_put_phy(hcd->usb_phy);
@@ -2866,6 +2897,13 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 
 	usb_deregister_bus(&hcd->self);
 	hcd_buffer_destroy(hcd);
+
+	if (IS_ENABLED(CONFIG_GENERIC_PHY) && hcd->phy) {
+		phy_power_off(hcd->phy);
+		phy_exit(hcd->phy);
+		phy_put(hcd->phy);
+		hcd->phy = NULL;
+	}
 	if (hcd->remove_phy && hcd->usb_phy) {
 		usb_phy_shutdown(hcd->usb_phy);
 		usb_put_phy(hcd->usb_phy);
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index 788059a108e5..cd96a2bc3388 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -107,6 +107,7 @@ struct usb_hcd {
 	 * other external phys should be software-transparent
 	 */
 	struct usb_phy		*usb_phy;
+	struct phy		*phy;
 
 	/* Flags that need to be manipulated atomically because they can
 	 * change while the host controller is running.  Always use
-- 
cgit v1.2.3


From 1f180359f42fc6fda4600175c63f2a84f444cc92 Mon Sep 17 00:00:00 2001
From: Tomas Winkler <tomas.winkler@intel.com>
Date: Mon, 29 Sep 2014 16:31:46 +0300
Subject: mei: remove include to pci header from mei module files

Remove inclusion of linux/pci.h in mei layer
however we need to include the headers that before
got included implicitly from linux/pci.h.

Signed-off-by: Tomas Winkler <tomas.winkler@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/misc/mei/amthif.c    | 2 +-
 drivers/misc/mei/bus.c       | 1 -
 drivers/misc/mei/client.c    | 2 +-
 drivers/misc/mei/debugfs.c   | 1 -
 drivers/misc/mei/hbm.c       | 5 +++--
 drivers/misc/mei/init.c      | 1 -
 drivers/misc/mei/interrupt.c | 2 +-
 drivers/misc/mei/main.c      | 2 +-
 drivers/misc/mei/nfc.c       | 3 ++-
 drivers/misc/mei/wd.c        | 1 -
 include/linux/mei_cl_bus.h   | 1 +
 11 files changed, 10 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mei/amthif.c b/drivers/misc/mei/amthif.c
index 5d47d1b36ccf..d9b0e761fcd2 100644
--- a/drivers/misc/mei/amthif.c
+++ b/drivers/misc/mei/amthif.c
@@ -20,7 +20,6 @@
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/aio.h>
-#include <linux/pci.h>
 #include <linux/ioctl.h>
 #include <linux/cdev.h>
 #include <linux/list.h>
@@ -29,6 +28,7 @@
 #include <linux/uuid.h>
 #include <linux/jiffies.h>
 #include <linux/uaccess.h>
+#include <linux/slab.h>
 
 #include <linux/mei.h>
 
diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c
index 4cc1a66187be..4d20d60ca38d 100644
--- a/drivers/misc/mei/bus.c
+++ b/drivers/misc/mei/bus.c
@@ -22,7 +22,6 @@
 #include <linux/slab.h>
 #include <linux/mutex.h>
 #include <linux/interrupt.h>
-#include <linux/pci.h>
 #include <linux/mei_cl_bus.h>
 
 #include "mei_dev.h"
diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c
index bfc3ad24db9c..04a196429e55 100644
--- a/drivers/misc/mei/client.c
+++ b/drivers/misc/mei/client.c
@@ -14,10 +14,10 @@
  *
  */
 
-#include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
+#include <linux/slab.h>
 #include <linux/pm_runtime.h>
 
 #include <linux/mei.h>
diff --git a/drivers/misc/mei/debugfs.c b/drivers/misc/mei/debugfs.c
index e14a7db9a669..aca7847da69f 100644
--- a/drivers/misc/mei/debugfs.c
+++ b/drivers/misc/mei/debugfs.c
@@ -17,7 +17,6 @@
 #include <linux/kernel.h>
 #include <linux/device.h>
 #include <linux/debugfs.h>
-#include <linux/pci.h>
 
 #include <linux/mei.h>
 
diff --git a/drivers/misc/mei/hbm.c b/drivers/misc/mei/hbm.c
index 209650bccf57..c874fdd44ce1 100644
--- a/drivers/misc/mei/hbm.c
+++ b/drivers/misc/mei/hbm.c
@@ -15,11 +15,12 @@
  */
 
 #include <linux/export.h>
-#include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
-#include <linux/mei.h>
 #include <linux/pm_runtime.h>
+#include <linux/slab.h>
+
+#include <linux/mei.h>
 
 #include "mei_dev.h"
 #include "hbm.h"
diff --git a/drivers/misc/mei/init.c b/drivers/misc/mei/init.c
index dd233fd43178..76ef8ffa42c1 100644
--- a/drivers/misc/mei/init.c
+++ b/drivers/misc/mei/init.c
@@ -15,7 +15,6 @@
  */
 
 #include <linux/export.h>
-#include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/delay.h>
diff --git a/drivers/misc/mei/interrupt.c b/drivers/misc/mei/interrupt.c
index 8dac76901130..8844e1772793 100644
--- a/drivers/misc/mei/interrupt.c
+++ b/drivers/misc/mei/interrupt.c
@@ -16,11 +16,11 @@
 
 
 #include <linux/export.h>
-#include <linux/pci.h>
 #include <linux/kthread.h>
 #include <linux/interrupt.h>
 #include <linux/fs.h>
 #include <linux/jiffies.h>
+#include <linux/slab.h>
 
 #include <linux/mei.h>
 
diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c
index 4d738c881878..d31f271f6516 100644
--- a/drivers/misc/mei/main.c
+++ b/drivers/misc/mei/main.c
@@ -17,12 +17,12 @@
 #include <linux/moduleparam.h>
 #include <linux/kernel.h>
 #include <linux/device.h>
+#include <linux/slab.h>
 #include <linux/fs.h>
 #include <linux/errno.h>
 #include <linux/types.h>
 #include <linux/fcntl.h>
 #include <linux/aio.h>
-#include <linux/pci.h>
 #include <linux/poll.h>
 #include <linux/init.h>
 #include <linux/ioctl.h>
diff --git a/drivers/misc/mei/nfc.c b/drivers/misc/mei/nfc.c
index 9f32fd6eaccf..5b369f4c47de 100644
--- a/drivers/misc/mei/nfc.c
+++ b/drivers/misc/mei/nfc.c
@@ -19,7 +19,8 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/device.h>
-#include <linux/pci.h>
+#include <linux/slab.h>
+
 #include <linux/mei_cl_bus.h>
 
 #include "mei_dev.h"
diff --git a/drivers/misc/mei/wd.c b/drivers/misc/mei/wd.c
index d28511b78eaa..626b4c13993b 100644
--- a/drivers/misc/mei/wd.c
+++ b/drivers/misc/mei/wd.c
@@ -17,7 +17,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/device.h>
-#include <linux/pci.h>
 #include <linux/sched.h>
 #include <linux/watchdog.h>
 
diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h
index d14af7b722ef..164aad1f9f12 100644
--- a/include/linux/mei_cl_bus.h
+++ b/include/linux/mei_cl_bus.h
@@ -3,6 +3,7 @@
 
 #include <linux/device.h>
 #include <linux/uuid.h>
+#include <linux/mod_devicetable.h>
 
 struct mei_cl_device;
 
-- 
cgit v1.2.3


From c02607aad2f9ed478eb288bcec1c00cd9df38b3c Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Mon, 29 Sep 2014 10:05:06 -0600
Subject: iommu: introduce domain attribute for nesting IOMMUs

Some IOMMUs, such as the ARM SMMU, support two stages of translation.
The idea behind such a scheme is to allow a guest operating system to
use the IOMMU for DMA mappings in the first stage of translation, with
the hypervisor then installing mappings in the second stage to provide
isolation of the DMA to the physical range assigned to that virtual
machine.

In order to allow IOMMU domains to be used for second-stage translation,
this patch adds a new iommu_attr (IOMMU_ATTR_NESTING) for setting
second-stage domains prior to device attach. The attribute can also be
queried to see if a domain is actually making use of nesting.

Acked-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Alex Williamson <alex.williamson@redhat.com>
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 20f9a527922a..7b02bcc85b9e 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -80,6 +80,7 @@ enum iommu_attr {
 	DOMAIN_ATTR_FSL_PAMU_STASH,
 	DOMAIN_ATTR_FSL_PAMU_ENABLE,
 	DOMAIN_ATTR_FSL_PAMUV1,
+	DOMAIN_ATTR_NESTING,	/* two stages of translation */
 	DOMAIN_ATTR_MAX,
 };
 
-- 
cgit v1.2.3


From b1937227316417aa7568d01e6fa1f272e98fb890 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 28 Sep 2014 22:18:47 -0700
Subject: net: reorganize sk_buff for faster __copy_skb_header()

With proliferation of bit fields in sk_buff, __copy_skb_header() became
quite expensive, showing as the most expensive function in a GSO
workload.

__copy_skb_header() performance is also critical for non GSO TCP
operations, as it is used from skb_clone()

This patch carefully moves all the fields that were not copied in a
separate zone : cloned, nohdr, fclone, peeked, head_frag, xmit_more

Then I moved all other fields and all other copied fields in a section
delimited by headers_start[0]/headers_end[0] section so that we
can use a single memcpy() call, inlined by compiler using long
word load/stores.

I also tried to make all copies in the natural orders of sk_buff,
to help hardware prefetching.

I made sure sk_buff size did not change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 133 ++++++++++++++++++++++++++-----------------------
 net/core/skbuff.c      |  80 ++++++++++++++---------------
 2 files changed, 113 insertions(+), 100 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 8eaa62400fca..b6cced304b26 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -527,27 +527,41 @@ struct sk_buff {
 	char			cb[48] __aligned(8);
 
 	unsigned long		_skb_refdst;
+	void			(*destructor)(struct sk_buff *skb);
 #ifdef CONFIG_XFRM
 	struct	sec_path	*sp;
+#endif
+#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
+	struct nf_conntrack	*nfct;
+#endif
+#ifdef CONFIG_BRIDGE_NETFILTER
+	struct nf_bridge_info	*nf_bridge;
 #endif
 	unsigned int		len,
 				data_len;
 	__u16			mac_len,
 				hdr_len;
-	union {
-		__wsum		csum;
-		struct {
-			__u16	csum_start;
-			__u16	csum_offset;
-		};
-	};
-	__u32			priority;
+
+	/* Following fields are _not_ copied in __copy_skb_header()
+	 * Note that queue_mapping is here mostly to fill a hole.
+	 */
 	kmemcheck_bitfield_begin(flags1);
-	__u8			ignore_df:1,
-				cloned:1,
-				ip_summed:2,
+	__u16			queue_mapping;
+	__u8			cloned:1,
 				nohdr:1,
-				nfctinfo:3;
+				fclone:2,
+				peeked:1,
+				head_frag:1,
+				xmit_more:1;
+	/* one bit hole */
+	kmemcheck_bitfield_end(flags1);
+
+
+
+	/* fields enclosed in headers_start/headers_end are copied
+	 * using a single memcpy() in __copy_skb_header()
+	 */
+	__u32			headers_start[0];
 
 /* if you move pkt_type around you also must adapt those constants */
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -558,58 +572,53 @@ struct sk_buff {
 #define PKT_TYPE_OFFSET()	offsetof(struct sk_buff, __pkt_type_offset)
 
 	__u8			__pkt_type_offset[0];
-	__u8			pkt_type:3,
-				fclone:2,
-				ipvs_property:1,
-				peeked:1,
-				nf_trace:1;
-	kmemcheck_bitfield_end(flags1);
-	__be16			protocol;
-
-	void			(*destructor)(struct sk_buff *skb);
-#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
-	struct nf_conntrack	*nfct;
-#endif
-#ifdef CONFIG_BRIDGE_NETFILTER
-	struct nf_bridge_info	*nf_bridge;
-#endif
-
-	int			skb_iif;
-
-	__u32			hash;
-
-	__be16			vlan_proto;
-	__u16			vlan_tci;
-
-#ifdef CONFIG_NET_SCHED
-	__u16			tc_index;	/* traffic control index */
-#ifdef CONFIG_NET_CLS_ACT
-	__u16			tc_verd;	/* traffic control verdict */
-#endif
-#endif
-
-	__u16			queue_mapping;
-	kmemcheck_bitfield_begin(flags2);
-	__u8			xmit_more:1;
-#ifdef CONFIG_IPV6_NDISC_NODETYPE
-	__u8			ndisc_nodetype:2;
-#endif
+	__u8			pkt_type:3;
 	__u8			pfmemalloc:1;
+	__u8			ignore_df:1;
+	__u8			nfctinfo:3;
+
+	__u8			nf_trace:1;
+	__u8			ip_summed:2;
 	__u8			ooo_okay:1;
 	__u8			l4_hash:1;
 	__u8			sw_hash:1;
 	__u8			wifi_acked_valid:1;
 	__u8			wifi_acked:1;
+
 	__u8			no_fcs:1;
-	__u8			head_frag:1;
 	/* Indicates the inner headers are valid in the skbuff. */
 	__u8			encapsulation:1;
 	__u8			encap_hdr_csum:1;
 	__u8			csum_valid:1;
 	__u8			csum_complete_sw:1;
-	/* 1/3 bit hole (depending on ndisc_nodetype presence) */
-	kmemcheck_bitfield_end(flags2);
+	__u8			csum_level:2;
+	__u8			csum_bad:1;
 
+#ifdef CONFIG_IPV6_NDISC_NODETYPE
+	__u8			ndisc_nodetype:2;
+#endif
+	__u8			ipvs_property:1;
+	/* 5 or 7 bit hole */
+
+#ifdef CONFIG_NET_SCHED
+	__u16			tc_index;	/* traffic control index */
+#ifdef CONFIG_NET_CLS_ACT
+	__u16			tc_verd;	/* traffic control verdict */
+#endif
+#endif
+
+	union {
+		__wsum		csum;
+		struct {
+			__u16	csum_start;
+			__u16	csum_offset;
+		};
+	};
+	__u32			priority;
+	int			skb_iif;
+	__u32			hash;
+	__be16			vlan_proto;
+	__u16			vlan_tci;
 #if defined CONFIG_NET_DMA || defined CONFIG_NET_RX_BUSY_POLL
 	union {
 		unsigned int	napi_id;
@@ -625,19 +634,18 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
-	kmemcheck_bitfield_begin(flags3);
-	__u8			csum_level:2;
-	__u8			csum_bad:1;
-	/* 13 bit hole */
-	kmemcheck_bitfield_end(flags3);
-
 	__be16			inner_protocol;
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
 	__u16			inner_mac_header;
+
+	__be16			protocol;
 	__u16			transport_header;
 	__u16			network_header;
 	__u16			mac_header;
+
+	__u32			headers_end[0];
+
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	sk_buff_data_t		tail;
 	sk_buff_data_t		end;
@@ -3040,19 +3048,22 @@ static inline void nf_reset_trace(struct sk_buff *skb)
 }
 
 /* Note: This doesn't put any conntrack and bridge info in dst. */
-static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src)
+static inline void __nf_copy(struct sk_buff *dst, const struct sk_buff *src,
+			     bool copy)
 {
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 	dst->nfct = src->nfct;
 	nf_conntrack_get(src->nfct);
-	dst->nfctinfo = src->nfctinfo;
+	if (copy)
+		dst->nfctinfo = src->nfctinfo;
 #endif
 #ifdef CONFIG_BRIDGE_NETFILTER
 	dst->nf_bridge  = src->nf_bridge;
 	nf_bridge_get(src->nf_bridge);
 #endif
 #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) || defined(CONFIG_NF_TABLES)
-	dst->nf_trace = src->nf_trace;
+	if (copy)
+		dst->nf_trace = src->nf_trace;
 #endif
 }
 
@@ -3064,7 +3075,7 @@ static inline void nf_copy(struct sk_buff *dst, const struct sk_buff *src)
 #ifdef CONFIG_BRIDGE_NETFILTER
 	nf_bridge_put(dst->nf_bridge);
 #endif
-	__nf_copy(dst, src);
+	__nf_copy(dst, src, true);
 }
 
 #ifdef CONFIG_NETWORK_SECMARK
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d4fdc649112c..4be570a4ab21 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -261,7 +261,6 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		atomic_t *fclone_ref = (atomic_t *) (child + 1);
 
 		kmemcheck_annotate_bitfield(child, flags1);
-		kmemcheck_annotate_bitfield(child, flags2);
 		skb->fclone = SKB_FCLONE_ORIG;
 		atomic_set(fclone_ref, 1);
 
@@ -675,57 +674,61 @@ void consume_skb(struct sk_buff *skb)
 }
 EXPORT_SYMBOL(consume_skb);
 
+/* Make sure a field is enclosed inside headers_start/headers_end section */
+#define CHECK_SKB_FIELD(field) \
+	BUILD_BUG_ON(offsetof(struct sk_buff, field) <		\
+		     offsetof(struct sk_buff, headers_start));	\
+	BUILD_BUG_ON(offsetof(struct sk_buff, field) >		\
+		     offsetof(struct sk_buff, headers_end));	\
+
 static void __copy_skb_header(struct sk_buff *new, const struct sk_buff *old)
 {
 	new->tstamp		= old->tstamp;
+	/* We do not copy old->sk */
 	new->dev		= old->dev;
-	new->transport_header	= old->transport_header;
-	new->network_header	= old->network_header;
-	new->mac_header		= old->mac_header;
-	new->inner_protocol	= old->inner_protocol;
-	new->inner_transport_header = old->inner_transport_header;
-	new->inner_network_header = old->inner_network_header;
-	new->inner_mac_header = old->inner_mac_header;
+	memcpy(new->cb, old->cb, sizeof(old->cb));
 	skb_dst_copy(new, old);
-	skb_copy_hash(new, old);
-	new->ooo_okay		= old->ooo_okay;
-	new->no_fcs		= old->no_fcs;
-	new->encapsulation	= old->encapsulation;
-	new->encap_hdr_csum	= old->encap_hdr_csum;
-	new->csum_valid		= old->csum_valid;
-	new->csum_complete_sw	= old->csum_complete_sw;
 #ifdef CONFIG_XFRM
 	new->sp			= secpath_get(old->sp);
 #endif
-	memcpy(new->cb, old->cb, sizeof(old->cb));
-	new->csum		= old->csum;
-	new->ignore_df		= old->ignore_df;
-	new->pkt_type		= old->pkt_type;
-	new->ip_summed		= old->ip_summed;
-	skb_copy_queue_mapping(new, old);
-	new->priority		= old->priority;
-#if IS_ENABLED(CONFIG_IP_VS)
-	new->ipvs_property	= old->ipvs_property;
+	__nf_copy(new, old, false);
+
+	/* Note : this field could be in headers_start/headers_end section
+	 * It is not yet because we do not want to have a 16 bit hole
+	 */
+	new->queue_mapping = old->queue_mapping;
+
+	memcpy(&new->headers_start, &old->headers_start,
+	       offsetof(struct sk_buff, headers_end) -
+	       offsetof(struct sk_buff, headers_start));
+	CHECK_SKB_FIELD(protocol);
+	CHECK_SKB_FIELD(csum);
+	CHECK_SKB_FIELD(hash);
+	CHECK_SKB_FIELD(priority);
+	CHECK_SKB_FIELD(skb_iif);
+	CHECK_SKB_FIELD(vlan_proto);
+	CHECK_SKB_FIELD(vlan_tci);
+	CHECK_SKB_FIELD(transport_header);
+	CHECK_SKB_FIELD(network_header);
+	CHECK_SKB_FIELD(mac_header);
+	CHECK_SKB_FIELD(inner_protocol);
+	CHECK_SKB_FIELD(inner_transport_header);
+	CHECK_SKB_FIELD(inner_network_header);
+	CHECK_SKB_FIELD(inner_mac_header);
+	CHECK_SKB_FIELD(mark);
+#ifdef CONFIG_NETWORK_SECMARK
+	CHECK_SKB_FIELD(secmark);
+#endif
+#ifdef CONFIG_NET_RX_BUSY_POLL
+	CHECK_SKB_FIELD(napi_id);
 #endif
-	new->pfmemalloc		= old->pfmemalloc;
-	new->protocol		= old->protocol;
-	new->mark		= old->mark;
-	new->skb_iif		= old->skb_iif;
-	__nf_copy(new, old);
 #ifdef CONFIG_NET_SCHED
-	new->tc_index		= old->tc_index;
+	CHECK_SKB_FIELD(tc_index);
 #ifdef CONFIG_NET_CLS_ACT
-	new->tc_verd		= old->tc_verd;
+	CHECK_SKB_FIELD(tc_verd);
 #endif
 #endif
-	new->vlan_proto		= old->vlan_proto;
-	new->vlan_tci		= old->vlan_tci;
-
-	skb_copy_secmark(new, old);
 
-#ifdef CONFIG_NET_RX_BUSY_POLL
-	new->napi_id	= old->napi_id;
-#endif
 }
 
 /*
@@ -876,7 +879,6 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 			return NULL;
 
 		kmemcheck_annotate_bitfield(n, flags1);
-		kmemcheck_annotate_bitfield(n, flags2);
 		n->fclone = SKB_FCLONE_UNAVAILABLE;
 	}
 
-- 
cgit v1.2.3


From 87a15a8090c0e5284c0e53528d9defa5d9237866 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Fri, 26 Sep 2014 13:58:26 -0400
Subject: NFSD: Add generic v4.2 infrastructure

It's cleaner to introduce everything at once and have the server reply
with "not supported" than it would be to introduce extra operations when
implementing a specific one in the middle of the list.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4xdr.c    | 28 ++++++++++++++++++++++++++++
 include/linux/nfs4.h | 18 ++++++++++++++++--
 2 files changed, 44 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index e771a1a7c6f1..0311baebd7d3 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1586,6 +1586,20 @@ static nfsd4_dec nfsd4_dec_ops[] = {
 	[OP_WANT_DELEGATION]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_DESTROY_CLIENTID]	= (nfsd4_dec)nfsd4_decode_destroy_clientid,
 	[OP_RECLAIM_COMPLETE]	= (nfsd4_dec)nfsd4_decode_reclaim_complete,
+
+	/* new operations for NFSv4.2 */
+	[OP_ALLOCATE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_COPY]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_COPY_NOTIFY]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_DEALLOCATE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_IO_ADVISE]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTERROR]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_LAYOUTSTATS]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_WRITE_SAME]		= (nfsd4_dec)nfsd4_decode_notsupp,
 };
 
 static inline bool
@@ -3823,6 +3837,20 @@ static nfsd4_enc nfsd4_enc_ops[] = {
 	[OP_WANT_DELEGATION]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_DESTROY_CLIENTID]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_RECLAIM_COMPLETE]	= (nfsd4_enc)nfsd4_encode_noop,
+
+	/* NFSv4.2 operations */
+	[OP_ALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_COPY]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_COPY_NOTIFY]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_DEALLOCATE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_IO_ADVISE]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTERROR]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_LAYOUTSTATS]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_WRITE_SAME]		= (nfsd4_enc)nfsd4_encode_noop,
 };
 
 /*
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 79b2a0f5c318..cf38224c4fa0 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -110,6 +110,20 @@ enum nfs_opnum4 {
 	OP_DESTROY_CLIENTID = 57,
 	OP_RECLAIM_COMPLETE = 58,
 
+	/* nfs42 */
+	OP_ALLOCATE = 59,
+	OP_COPY = 60,
+	OP_COPY_NOTIFY = 61,
+	OP_DEALLOCATE = 62,
+	OP_IO_ADVISE = 63,
+	OP_LAYOUTERROR = 64,
+	OP_LAYOUTSTATS = 65,
+	OP_OFFLOAD_CANCEL = 66,
+	OP_OFFLOAD_STATUS = 67,
+	OP_READ_PLUS = 68,
+	OP_SEEK = 69,
+	OP_WRITE_SAME = 70,
+
 	OP_ILLEGAL = 10044,
 };
 
@@ -117,10 +131,10 @@ enum nfs_opnum4 {
 Needs to be updated if more operations are defined in future.*/
 
 #define FIRST_NFS4_OP	OP_ACCESS
-#define LAST_NFS4_OP 	OP_RECLAIM_COMPLETE
+#define LAST_NFS4_OP 	OP_WRITE_SAME
 #define LAST_NFS40_OP	OP_RELEASE_LOCKOWNER
 #define LAST_NFS41_OP	OP_RECLAIM_COMPLETE
-#define LAST_NFS42_OP	OP_RECLAIM_COMPLETE
+#define LAST_NFS42_OP	OP_WRITE_SAME
 
 enum nfsstat4 {
 	NFS4_OK = 0,
-- 
cgit v1.2.3


From 24bab491220faa446d945624086d838af41d616c Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Fri, 26 Sep 2014 13:58:27 -0400
Subject: NFSD: Implement SEEK

This patch adds server support for the NFS v4.2 operation SEEK, which
returns the position of the next hole or data segment in a file.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c   | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfsd/nfs4xdr.c    | 36 ++++++++++++++++++++++++++++++++++--
 fs/nfsd/xdr4.h       | 14 ++++++++++++++
 include/linux/nfs4.h |  5 +++++
 4 files changed, 102 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 5e0dc528a0e8..cdeb3cfd6f32 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1013,6 +1013,49 @@ nfsd4_write(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	return status;
 }
 
+static __be32
+nfsd4_seek(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
+		struct nfsd4_seek *seek)
+{
+	int whence;
+	__be32 status;
+	struct file *file;
+
+	status = nfs4_preprocess_stateid_op(SVC_NET(rqstp), cstate,
+					    &seek->seek_stateid,
+					    RD_STATE, &file);
+	if (status) {
+		dprintk("NFSD: nfsd4_seek: couldn't process stateid!\n");
+		return status;
+	}
+
+	switch (seek->seek_whence) {
+	case NFS4_CONTENT_DATA:
+		whence = SEEK_DATA;
+		break;
+	case NFS4_CONTENT_HOLE:
+		whence = SEEK_HOLE;
+		break;
+	default:
+		status = nfserr_union_notsupp;
+		goto out;
+	}
+
+	/*
+	 * Note:  This call does change file->f_pos, but nothing in NFSD
+	 *        should ever file->f_pos.
+	 */
+	seek->seek_pos = vfs_llseek(file, seek->seek_offset, whence);
+	if (seek->seek_pos < 0)
+		status = nfserrno(seek->seek_pos);
+	else if (seek->seek_pos >= i_size_read(file_inode(file)))
+		seek->seek_eof = true;
+
+out:
+	fput(file);
+	return status;
+}
+
 /* This routine never returns NFS_OK!  If there are no other errors, it
  * will return NFSERR_SAME or NFSERR_NOT_SAME depending on whether the
  * attributes matched.  VERIFY is implemented by mapping NFSERR_SAME
@@ -1881,6 +1924,12 @@ static struct nfsd4_operation nfsd4_ops[] = {
 		.op_get_currentstateid = (stateid_getter)nfsd4_get_freestateid,
 		.op_rsize_bop = (nfsd4op_rsize)nfsd4_only_status_rsize,
 	},
+
+	/* NFSv4.2 operations */
+	[OP_SEEK] = {
+		.op_func = (nfsd4op_func)nfsd4_seek,
+		.op_name = "OP_SEEK",
+	},
 };
 
 int nfsd4_max_reply(struct svc_rqst *rqstp, struct nfsd4_op *op)
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0311baebd7d3..7ec646380005 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1513,6 +1513,22 @@ static __be32 nfsd4_decode_reclaim_complete(struct nfsd4_compoundargs *argp, str
 	DECODE_TAIL;
 }
 
+static __be32
+nfsd4_decode_seek(struct nfsd4_compoundargs *argp, struct nfsd4_seek *seek)
+{
+	DECODE_HEAD;
+
+	status = nfsd4_decode_stateid(argp, &seek->seek_stateid);
+	if (status)
+		return status;
+
+	READ_BUF(8 + 4);
+	p = xdr_decode_hyper(p, &seek->seek_offset);
+	seek->seek_whence = be32_to_cpup(p);
+
+	DECODE_TAIL;
+}
+
 static __be32
 nfsd4_decode_noop(struct nfsd4_compoundargs *argp, void *p)
 {
@@ -1598,7 +1614,7 @@ static nfsd4_dec nfsd4_dec_ops[] = {
 	[OP_OFFLOAD_CANCEL]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_OFFLOAD_STATUS]	= (nfsd4_dec)nfsd4_decode_notsupp,
 	[OP_READ_PLUS]		= (nfsd4_dec)nfsd4_decode_notsupp,
-	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_notsupp,
+	[OP_SEEK]		= (nfsd4_dec)nfsd4_decode_seek,
 	[OP_WRITE_SAME]		= (nfsd4_dec)nfsd4_decode_notsupp,
 };
 
@@ -3765,6 +3781,22 @@ nfsd4_encode_test_stateid(struct nfsd4_compoundres *resp, __be32 nfserr,
 	return nfserr;
 }
 
+static __be32
+nfsd4_encode_seek(struct nfsd4_compoundres *resp, __be32 nfserr,
+		  struct nfsd4_seek *seek)
+{
+	__be32 *p;
+
+	if (nfserr)
+		return nfserr;
+
+	p = xdr_reserve_space(&resp->xdr, 4 + 8);
+	*p++ = cpu_to_be32(seek->seek_eof);
+	p = xdr_encode_hyper(p, seek->seek_pos);
+
+	return nfserr;
+}
+
 static __be32
 nfsd4_encode_noop(struct nfsd4_compoundres *resp, __be32 nfserr, void *p)
 {
@@ -3849,7 +3881,7 @@ static nfsd4_enc nfsd4_enc_ops[] = {
 	[OP_OFFLOAD_CANCEL]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_OFFLOAD_STATUS]	= (nfsd4_enc)nfsd4_encode_noop,
 	[OP_READ_PLUS]		= (nfsd4_enc)nfsd4_encode_noop,
-	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_noop,
+	[OP_SEEK]		= (nfsd4_enc)nfsd4_encode_seek,
 	[OP_WRITE_SAME]		= (nfsd4_enc)nfsd4_encode_noop,
 };
 
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index 465e7799742a..5720e9457f33 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -428,6 +428,17 @@ struct nfsd4_reclaim_complete {
 	u32 rca_one_fs;
 };
 
+struct nfsd4_seek {
+	/* request */
+	stateid_t	seek_stateid;
+	loff_t		seek_offset;
+	u32		seek_whence;
+
+	/* response */
+	u32		seek_eof;
+	loff_t		seek_pos;
+};
+
 struct nfsd4_op {
 	int					opnum;
 	__be32					status;
@@ -473,6 +484,9 @@ struct nfsd4_op {
 		struct nfsd4_reclaim_complete	reclaim_complete;
 		struct nfsd4_test_stateid	test_stateid;
 		struct nfsd4_free_stateid	free_stateid;
+
+		/* NFSv4.2 */
+		struct nfsd4_seek		seek;
 	} u;
 	struct nfs4_replay *			replay;
 };
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index cf38224c4fa0..026b0c042c40 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -550,4 +550,9 @@ struct nfs4_deviceid {
 	char data[NFS4_DEVICEID4_SIZE];
 };
 
+enum data_content4 {
+	NFS4_CONTENT_DATA		= 0,
+	NFS4_CONTENT_HOLE		= 1,
+};
+
 #endif
-- 
cgit v1.2.3


From 8c14f9c70327a6fb75534c4c61d7ea9c82ccf78f Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 29 Sep 2014 11:55:36 +0200
Subject: ARCNET: add com20020 PCI IDs with metadata

This patch adds metadata for the com20020 to prepare for devices with
multiple io address areas with multi card interfaces.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/com20020-pci.c | 216 ++++++++++++++++++++++++++++++++------
 include/linux/com20020.h          |  16 +++
 2 files changed, 197 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index 7bb292e59559..f9e55527739d 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -63,6 +63,8 @@ MODULE_LICENSE("GPL");
 
 static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
+	struct com20020_pci_channel_map *cm;
+	struct com20020_pci_card_info *ci;
 	struct net_device *dev;
 	struct arcnet_local *lp;
 	int ioaddr, err;
@@ -75,19 +77,15 @@ static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 
 	dev->netdev_ops = &com20020_netdev_ops;
 
+	ci = (struct com20020_pci_card_info *)id->driver_data;
+
 	lp = netdev_priv(dev);
 
 	pci_set_drvdata(pdev, dev);
 
-	// SOHARD needs PCI base addr 4
-	if (pdev->vendor==0x10B5) {
-		BUGMSG(D_NORMAL, "SOHARD\n");
-		ioaddr = pci_resource_start(pdev, 4);
-	}
-	else {
-		BUGMSG(D_NORMAL, "Contemporary Controls\n");
-		ioaddr = pci_resource_start(pdev, 2);
-	}
+	cm = &ci->chan_map_tbl[0];
+	BUGMSG(D_NORMAL, "%s Controls\n", ci->name);
+	ioaddr = pci_resource_start(pdev, cm->bar);
 
 	if (!request_region(ioaddr, ARCNET_TOTAL_SIZE, "com20020-pci")) {
 		BUGMSG(D_INIT, "IO region %xh-%xh already allocated.\n",
@@ -105,7 +103,7 @@ static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *i
 	dev->irq = pdev->irq;
 	dev->dev_addr[0] = node;
 	lp->card_name = "PCI COM20020";
-	lp->card_flags = id->driver_data;
+	lp->card_flags = ci->flags;
 	lp->backplane = backplane;
 	lp->clockp = clockp & 7;
 	lp->clockm = clockm & 3;
@@ -144,32 +142,180 @@ static void com20020pci_remove(struct pci_dev *pdev)
 	free_netdev(dev);
 }
 
+static struct com20020_pci_card_info card_info_10mbit = {
+	.name = "ARC-PCI",
+	.devcount = 1,
+	.chan_map_tbl = {
+		{ 2, 0x00, 0x08 },
+	},
+	.flags = ARC_CAN_10MBIT,
+};
+
+static struct com20020_pci_card_info card_info_5mbit = {
+	.name = "ARC-PCI",
+	.devcount = 1,
+	.chan_map_tbl = {
+		{ 2, 0x00, 0x08 },
+	},
+	.flags = ARC_IS_5MBIT,
+};
+
+static struct com20020_pci_card_info card_info_sohard = {
+	.name = "PLX-PCI",
+	.devcount = 1,
+	/* SOHARD needs PCI base addr 4 */
+	.chan_map_tbl = {
+		{4, 0x00, 0x08},
+	},
+	.flags = ARC_CAN_10MBIT,
+};
+
 static const struct pci_device_id com20020pci_id_table[] = {
-	{ 0x1571, 0xa001, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa002, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa003, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa004, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa005, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa006, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa007, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa008, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 },
-	{ 0x1571, 0xa009, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00a, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00b, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00c, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00d, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa00e, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_IS_5MBIT },
-	{ 0x1571, 0xa201, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa202, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa203, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa204, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa205, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x1571, 0xa206, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x10B5, 0x9030, 0x10B5,     0x2978,     0, 0, ARC_CAN_10MBIT },
-	{ 0x10B5, 0x9050, 0x10B5,     0x2273,     0, 0, ARC_CAN_10MBIT },
-	{ 0x14BA, 0x6000, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{ 0x10B5, 0x2200, PCI_ANY_ID, PCI_ANY_ID, 0, 0, ARC_CAN_10MBIT },
-	{0,}
+	{
+		0x1571, 0xa001,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0,
+	},
+	{
+		0x1571, 0xa002,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0,
+	},
+	{
+		0x1571, 0xa003,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa004,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0,
+	},
+	{
+		0x1571, 0xa005,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa006,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa007,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa008,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		0
+	},
+	{
+		0x1571, 0xa009,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00a,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00b,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00c,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00d,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa00e,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_5mbit
+	},
+	{
+		0x1571, 0xa201,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa202,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa203,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa204,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa205,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x1571, 0xa206,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x10B5, 0x9030,
+		0x10B5, 0x2978,
+		0, 0,
+		(kernel_ulong_t)&card_info_sohard
+	},
+	{
+		0x10B5, 0x9050,
+		0x10B5, 0x2273,
+		0, 0,
+		(kernel_ulong_t)&card_info_sohard
+	},
+	{
+		0x14BA, 0x6000,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{
+		0x10B5, 0x2200,
+		PCI_ANY_ID, PCI_ANY_ID,
+		0, 0,
+		(kernel_ulong_t)&card_info_10mbit
+	},
+	{ 0, }
 };
 
 MODULE_DEVICE_TABLE(pci, com20020pci_id_table);
diff --git a/include/linux/com20020.h b/include/linux/com20020.h
index 5dcfb944b6ce..6a1ceca61e7f 100644
--- a/include/linux/com20020.h
+++ b/include/linux/com20020.h
@@ -41,6 +41,22 @@ extern const struct net_device_ops com20020_netdev_ops;
 #define BUS_ALIGN  1
 #endif
 
+#define PLX_PCI_MAX_CARDS 1
+
+struct com20020_pci_channel_map {
+	u32 bar;
+	u32 offset;
+	u32 size;               /* 0x00 - auto, e.g. length of entire bar */
+};
+
+struct com20020_pci_card_info {
+	const char *name;
+	int devcount;
+
+	struct com20020_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CARDS];
+
+	unsigned int flags;
+};
 
 #define _INTMASK  (ioaddr+BUS_ALIGN*0)	/* writable */
 #define _STATUS   (ioaddr+BUS_ALIGN*0)	/* readable */
-- 
cgit v1.2.3


From c51da42a6346c0c747e70a4f5ae873da1150a784 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 29 Sep 2014 11:55:37 +0200
Subject: ARCNET: add support for multi interfaces on com20020

The com20020-pci driver is currently designed to instance
one netdev with one pci device. This patch adds support to
instance many cards with one pci device, depending on the device
data in the private data.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/arcnet/com20020-pci.c | 149 ++++++++++++++++++++++++--------------
 drivers/net/arcnet/com20020_cs.c  |   4 -
 include/linux/com20020.h          |  15 +++-
 3 files changed, 109 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/arcnet/com20020-pci.c b/drivers/net/arcnet/com20020-pci.c
index f9e55527739d..fe87576bae3c 100644
--- a/drivers/net/arcnet/com20020-pci.c
+++ b/drivers/net/arcnet/com20020-pci.c
@@ -38,6 +38,7 @@
 #include <linux/pci.h>
 #include <linux/arcdevice.h>
 #include <linux/com20020.h>
+#include <linux/list.h>
 
 #include <asm/io.h>
 
@@ -61,85 +62,125 @@ module_param(clockp, int, 0);
 module_param(clockm, int, 0);
 MODULE_LICENSE("GPL");
 
+static void com20020pci_remove(struct pci_dev *pdev);
+
 static int com20020pci_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	struct com20020_pci_channel_map *cm;
 	struct com20020_pci_card_info *ci;
 	struct net_device *dev;
 	struct arcnet_local *lp;
-	int ioaddr, err;
+	struct com20020_priv *priv;
+	int i, ioaddr, ret;
+	struct resource *r;
 
 	if (pci_enable_device(pdev))
 		return -EIO;
-	dev = alloc_arcdev(device);
-	if (!dev)
-		return -ENOMEM;
-
-	dev->netdev_ops = &com20020_netdev_ops;
 
+	priv = devm_kzalloc(&pdev->dev, sizeof(struct com20020_priv),
+			    GFP_KERNEL);
 	ci = (struct com20020_pci_card_info *)id->driver_data;
+	priv->ci = ci;
 
-	lp = netdev_priv(dev);
+	INIT_LIST_HEAD(&priv->list_dev);
 
-	pci_set_drvdata(pdev, dev);
 
-	cm = &ci->chan_map_tbl[0];
-	BUGMSG(D_NORMAL, "%s Controls\n", ci->name);
-	ioaddr = pci_resource_start(pdev, cm->bar);
+	for (i = 0; i < ci->devcount; i++) {
+		struct com20020_pci_channel_map *cm = &ci->chan_map_tbl[i];
+		struct com20020_dev *card;
 
-	if (!request_region(ioaddr, ARCNET_TOTAL_SIZE, "com20020-pci")) {
-		BUGMSG(D_INIT, "IO region %xh-%xh already allocated.\n",
-		       ioaddr, ioaddr + ARCNET_TOTAL_SIZE - 1);
-		err = -EBUSY;
-		goto out_dev;
-	}
+		dev = alloc_arcdev(device);
+		if (!dev) {
+			ret = -ENOMEM;
+			goto out_port;
+		}
 
-	// Dummy access after Reset
-	// ARCNET controller needs this access to detect bustype
-	outb(0x00,ioaddr+1);
-	inb(ioaddr+1);
-
-	dev->base_addr = ioaddr;
-	dev->irq = pdev->irq;
-	dev->dev_addr[0] = node;
-	lp->card_name = "PCI COM20020";
-	lp->card_flags = ci->flags;
-	lp->backplane = backplane;
-	lp->clockp = clockp & 7;
-	lp->clockm = clockm & 3;
-	lp->timeout = timeout;
-	lp->hw.owner = THIS_MODULE;
-
-	if (ASTATUS() == 0xFF) {
-		BUGMSG(D_NORMAL, "IO address %Xh was reported by PCI BIOS, "
-		       "but seems empty!\n", ioaddr);
-		err = -EIO;
-		goto out_port;
-	}
-	if (com20020_check(dev)) {
-		err = -EIO;
-		goto out_port;
+		dev->netdev_ops = &com20020_netdev_ops;
+
+		lp = netdev_priv(dev);
+
+		BUGMSG(D_NORMAL, "%s Controls\n", ci->name);
+		ioaddr = pci_resource_start(pdev, cm->bar) + cm->offset;
+
+		r = devm_request_region(&pdev->dev, ioaddr, cm->size,
+					"com20020-pci");
+		if (!r) {
+			pr_err("IO region %xh-%xh already allocated.\n",
+			       ioaddr, ioaddr + cm->size - 1);
+			ret = -EBUSY;
+			goto out_port;
+		}
+
+		/* Dummy access after Reset
+		 * ARCNET controller needs
+		 * this access to detect bustype
+		 */
+		outb(0x00, ioaddr + 1);
+		inb(ioaddr + 1);
+
+		dev->base_addr = ioaddr;
+		dev->dev_addr[0] = node;
+		dev->irq = pdev->irq;
+		lp->card_name = "PCI COM20020";
+		lp->card_flags = ci->flags;
+		lp->backplane = backplane;
+		lp->clockp = clockp & 7;
+		lp->clockm = clockm & 3;
+		lp->timeout = timeout;
+		lp->hw.owner = THIS_MODULE;
+
+		if (ASTATUS() == 0xFF) {
+			pr_err("IO address %Xh is empty!\n", ioaddr);
+			ret = -EIO;
+			goto out_port;
+		}
+		if (com20020_check(dev)) {
+			ret = -EIO;
+			goto out_port;
+		}
+
+		card = devm_kzalloc(&pdev->dev, sizeof(struct com20020_dev),
+				    GFP_KERNEL);
+		if (!card) {
+			pr_err("%s out of memory!\n", __func__);
+			return -ENOMEM;
+		}
+
+		card->index = i;
+		card->pci_priv = priv;
+		card->dev = dev;
+
+		dev_set_drvdata(&dev->dev, card);
+
+		ret = com20020_found(dev, IRQF_SHARED);
+		if (ret)
+			goto out_port;
+
+		list_add(&card->list, &priv->list_dev);
 	}
 
-	if ((err = com20020_found(dev, IRQF_SHARED)) != 0)
-	        goto out_port;
+	pci_set_drvdata(pdev, priv);
 
 	return 0;
 
 out_port:
-	release_region(ioaddr, ARCNET_TOTAL_SIZE);
-out_dev:
-	free_netdev(dev);
-	return err;
+	com20020pci_remove(pdev);
+	return ret;
 }
 
 static void com20020pci_remove(struct pci_dev *pdev)
 {
-	struct net_device *dev = pci_get_drvdata(pdev);
-	unregister_netdev(dev);
-	free_irq(dev->irq, dev);
-	release_region(dev->base_addr, ARCNET_TOTAL_SIZE);
-	free_netdev(dev);
+	struct com20020_dev *card, *tmpcard;
+	struct com20020_priv *priv;
+
+	priv = pci_get_drvdata(pdev);
+
+	list_for_each_entry_safe(card, tmpcard, &priv->list_dev, list) {
+		struct net_device *dev = card->dev;
+
+		unregister_netdev(dev);
+		free_irq(dev->irq, dev);
+		free_netdev(dev);
+	}
 }
 
 static struct com20020_pci_card_info card_info_10mbit = {
diff --git a/drivers/net/arcnet/com20020_cs.c b/drivers/net/arcnet/com20020_cs.c
index 1a790a20210d..057d9582132a 100644
--- a/drivers/net/arcnet/com20020_cs.c
+++ b/drivers/net/arcnet/com20020_cs.c
@@ -112,10 +112,6 @@ static void com20020_detach(struct pcmcia_device *p_dev);
 
 /*====================================================================*/
 
-struct com20020_dev {
-    struct net_device       *dev;
-};
-
 static int com20020_probe(struct pcmcia_device *p_dev)
 {
     struct com20020_dev *info;
diff --git a/include/linux/com20020.h b/include/linux/com20020.h
index 6a1ceca61e7f..85898995b234 100644
--- a/include/linux/com20020.h
+++ b/include/linux/com20020.h
@@ -41,7 +41,7 @@ extern const struct net_device_ops com20020_netdev_ops;
 #define BUS_ALIGN  1
 #endif
 
-#define PLX_PCI_MAX_CARDS 1
+#define PLX_PCI_MAX_CARDS 2
 
 struct com20020_pci_channel_map {
 	u32 bar;
@@ -58,6 +58,19 @@ struct com20020_pci_card_info {
 	unsigned int flags;
 };
 
+struct com20020_priv {
+	struct com20020_pci_card_info *ci;
+	struct list_head list_dev;
+};
+
+struct com20020_dev {
+	struct list_head list;
+	struct net_device *dev;
+
+	struct com20020_priv *pci_priv;
+	int index;
+};
+
 #define _INTMASK  (ioaddr+BUS_ALIGN*0)	/* writable */
 #define _STATUS   (ioaddr+BUS_ALIGN*0)	/* readable */
 #define _COMMAND  (ioaddr+BUS_ALIGN*1)	/* standard arcnet commands */
-- 
cgit v1.2.3


From 79cf79abce71eb7dbc40e2f3121048ca5405cb47 Mon Sep 17 00:00:00 2001
From: Michael Braun <michael-dev@fami-braun.de>
Date: Thu, 25 Sep 2014 16:31:08 +0200
Subject: macvlan: add source mode

This patch adds a new mode of operation to macvlan, called "source".
It allows one to set a list of allowed mac address, which is used
to match against source mac address from received frames on underlying
interface.
This enables creating mac based VLAN associations, instead of standard
port or tag based. The feature is useful to deploy 802.1x mac based
behavior, where drivers of underlying interfaces doesn't allows that.

Configuration is done through the netlink interface using e.g.:
 ip link add link eth0 name macvlan0 type macvlan mode source
 ip link add link eth0 name macvlan1 type macvlan mode source
 ip link set link dev macvlan0 type macvlan macaddr add 00:11:11:11:11:11
 ip link set link dev macvlan0 type macvlan macaddr add 00:22:22:22:22:22
 ip link set link dev macvlan0 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:33:33:33:33:33
 ip link set link dev macvlan1 type macvlan macaddr add 00:44:44:44:44:44

This allows clients with MAC addresses 00:11:11:11:11:11,
00:22:22:22:22:22 to be part of only VLAN associated with macvlan0
interface. Clients with MAC addresses 00:44:44:44:44:44 with only VLAN
associated with macvlan1 interface. And client with MAC address
00:33:33:33:33:33 to be associated with both VLANs.

Based on work of Stefan Gula <steweg@gmail.com>

v8: last version of Stefan Gula for Kernel 3.2.1
v9: rework onto linux-next 2014-03-12 by Michael Braun
    add MACADDR_SET command, enable to configure mac for source mode
    while creating interface
v10:
  - reduce indention level
  - rename source_list to source_entry
  - use aligned 64bit ether address
  - use hash_64 instead of addr[5]
v11:
  - rebase for 3.14 / linux-next 20.04.2014
v12
  - rebase for linux-next 2014-09-25

Signed-off-by: Michael Braun <michael-dev@fami-braun.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c        | 304 ++++++++++++++++++++++++++++++++++++++++++-
 include/linux/if_macvlan.h   |   1 +
 include/uapi/linux/if_link.h |  12 ++
 3 files changed, 314 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 726edabff26b..e8a453f1b458 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -35,7 +35,8 @@
 #include <net/xfrm.h>
 #include <linux/netpoll.h>
 
-#define MACVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
+#define MACVLAN_HASH_BITS	8
+#define MACVLAN_HASH_SIZE	(1<<MACVLAN_HASH_BITS)
 #define MACVLAN_BC_QUEUE_LEN	1000
 
 struct macvlan_port {
@@ -47,6 +48,14 @@ struct macvlan_port {
 	struct work_struct	bc_work;
 	bool 			passthru;
 	int			count;
+	struct hlist_head	vlan_source_hash[MACVLAN_HASH_SIZE];
+};
+
+struct macvlan_source_entry {
+	struct hlist_node	hlist;
+	struct macvlan_dev	*vlan;
+	unsigned char		addr[6+2] __aligned(sizeof(u16));
+	struct rcu_head		rcu;
 };
 
 struct macvlan_skb_cb {
@@ -57,6 +66,20 @@ struct macvlan_skb_cb {
 
 static void macvlan_port_destroy(struct net_device *dev);
 
+/* Hash Ethernet address */
+static u32 macvlan_eth_hash(const unsigned char *addr)
+{
+	u64 value = get_unaligned((u64 *)addr);
+
+	/* only want 6 bytes */
+#ifdef __BIG_ENDIAN
+	value >>= 16;
+#else
+	value <<= 16;
+#endif
+	return hash_64(value, MACVLAN_HASH_BITS);
+}
+
 static struct macvlan_port *macvlan_port_get_rcu(const struct net_device *dev)
 {
 	return rcu_dereference(dev->rx_handler_data);
@@ -73,20 +96,68 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 					       const unsigned char *addr)
 {
 	struct macvlan_dev *vlan;
+	u32 idx = macvlan_eth_hash(addr);
 
-	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[addr[5]], hlist) {
+	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[idx], hlist) {
 		if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr))
 			return vlan;
 	}
 	return NULL;
 }
 
+static struct macvlan_source_entry *macvlan_hash_lookup_source(
+	const struct macvlan_dev *vlan,
+	const unsigned char *addr)
+{
+	struct macvlan_source_entry *entry;
+	u32 idx = macvlan_eth_hash(addr);
+	struct hlist_head *h = &vlan->port->vlan_source_hash[idx];
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (ether_addr_equal_64bits(entry->addr, addr) &&
+		    entry->vlan == vlan)
+			return entry;
+	}
+	return NULL;
+}
+
+static int macvlan_hash_add_source(struct macvlan_dev *vlan,
+				   const unsigned char *addr)
+{
+	struct macvlan_port *port = vlan->port;
+	struct macvlan_source_entry *entry;
+	struct hlist_head *h;
+
+	entry = macvlan_hash_lookup_source(vlan, addr);
+	if (entry)
+		return 0;
+
+	entry = kmalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	ether_addr_copy(entry->addr, addr);
+	entry->vlan = vlan;
+	h = &port->vlan_source_hash[macvlan_eth_hash(addr)];
+	hlist_add_head_rcu(&entry->hlist, h);
+	vlan->macaddr_count++;
+
+	return 0;
+}
+
 static void macvlan_hash_add(struct macvlan_dev *vlan)
 {
 	struct macvlan_port *port = vlan->port;
 	const unsigned char *addr = vlan->dev->dev_addr;
+	u32 idx = macvlan_eth_hash(addr);
 
-	hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[addr[5]]);
+	hlist_add_head_rcu(&vlan->hlist, &port->vlan_hash[idx]);
+}
+
+static void macvlan_hash_del_source(struct macvlan_source_entry *entry)
+{
+	hlist_del_rcu(&entry->hlist);
+	kfree_rcu(entry, rcu);
 }
 
 static void macvlan_hash_del(struct macvlan_dev *vlan, bool sync)
@@ -267,6 +338,65 @@ err:
 	atomic_long_inc(&skb->dev->rx_dropped);
 }
 
+static void macvlan_flush_sources(struct macvlan_port *port,
+				  struct macvlan_dev *vlan)
+{
+	int i;
+
+	for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
+		struct hlist_node *h, *n;
+
+		hlist_for_each_safe(h, n, &port->vlan_source_hash[i]) {
+			struct macvlan_source_entry *entry;
+
+			entry = hlist_entry(h, struct macvlan_source_entry,
+					    hlist);
+			if (entry->vlan == vlan)
+				macvlan_hash_del_source(entry);
+		}
+	}
+	vlan->macaddr_count = 0;
+}
+
+static void macvlan_forward_source_one(struct sk_buff *skb,
+				       struct macvlan_dev *vlan)
+{
+	struct sk_buff *nskb;
+	struct net_device *dev;
+	int len;
+	int ret;
+
+	dev = vlan->dev;
+	if (unlikely(!(dev->flags & IFF_UP)))
+		return;
+
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return;
+
+	len = nskb->len + ETH_HLEN;
+	nskb->dev = dev;
+	nskb->pkt_type = PACKET_HOST;
+
+	ret = netif_rx(nskb);
+	macvlan_count_rx(vlan, len, ret == NET_RX_SUCCESS, 0);
+}
+
+static void macvlan_forward_source(struct sk_buff *skb,
+				   struct macvlan_port *port,
+				   const unsigned char *addr)
+{
+	struct macvlan_source_entry *entry;
+	u32 idx = macvlan_eth_hash(addr);
+	struct hlist_head *h = &port->vlan_source_hash[idx];
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (ether_addr_equal_64bits(entry->addr, addr))
+			if (entry->vlan->dev->flags & IFF_UP)
+				macvlan_forward_source_one(skb, entry->vlan);
+	}
+}
+
 /* called under rcu_read_lock() from netif_receive_skb */
 static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 {
@@ -285,6 +415,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		if (!skb)
 			return RX_HANDLER_CONSUMED;
 		eth = eth_hdr(skb);
+		macvlan_forward_source(skb, port, eth->h_source);
 		src = macvlan_hash_lookup(port, eth->h_source);
 		if (src && src->mode != MACVLAN_MODE_VEPA &&
 		    src->mode != MACVLAN_MODE_BRIDGE) {
@@ -301,6 +432,7 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 		return RX_HANDLER_PASS;
 	}
 
+	macvlan_forward_source(skb, port, eth->h_source);
 	if (port->passthru)
 		vlan = list_first_or_null_rcu(&port->vlans,
 					      struct macvlan_dev, list);
@@ -667,6 +799,7 @@ static void macvlan_uninit(struct net_device *dev)
 
 	free_percpu(vlan->pcpu_stats);
 
+	macvlan_flush_sources(port, vlan);
 	port->count -= 1;
 	if (!port->count)
 		macvlan_port_destroy(port->dev);
@@ -925,6 +1058,8 @@ static int macvlan_port_create(struct net_device *dev)
 	INIT_LIST_HEAD(&port->vlans);
 	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
 		INIT_HLIST_HEAD(&port->vlan_hash[i]);
+	for (i = 0; i < MACVLAN_HASH_SIZE; i++)
+		INIT_HLIST_HEAD(&port->vlan_source_hash[i]);
 
 	skb_queue_head_init(&port->bc_queue);
 	INIT_WORK(&port->bc_work, macvlan_process_broadcast);
@@ -966,11 +1101,102 @@ static int macvlan_validate(struct nlattr *tb[], struct nlattr *data[])
 		case MACVLAN_MODE_VEPA:
 		case MACVLAN_MODE_BRIDGE:
 		case MACVLAN_MODE_PASSTHRU:
+		case MACVLAN_MODE_SOURCE:
+			break;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		switch (nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE])) {
+		case MACVLAN_MACADDR_ADD:
+		case MACVLAN_MACADDR_DEL:
+		case MACVLAN_MACADDR_FLUSH:
+		case MACVLAN_MACADDR_SET:
 			break;
 		default:
 			return -EINVAL;
 		}
 	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR]) {
+		if (nla_len(data[IFLA_MACVLAN_MACADDR]) != ETH_ALEN)
+			return -EINVAL;
+
+		if (!is_valid_ether_addr(nla_data(data[IFLA_MACVLAN_MACADDR])))
+			return -EADDRNOTAVAIL;
+	}
+
+	if (data && data[IFLA_MACVLAN_MACADDR_COUNT])
+		return -EINVAL;
+
+	return 0;
+}
+
+/**
+ * reconfigure list of remote source mac address
+ * (only for macvlan devices in source mode)
+ * Note regarding alignment: all netlink data is aligned to 4 Byte, which
+ * suffices for both ether_addr_copy and ether_addr_equal_64bits usage.
+ */
+static int macvlan_changelink_sources(struct macvlan_dev *vlan, u32 mode,
+				      struct nlattr *data[])
+{
+	char *addr = NULL;
+	int ret, rem, len;
+	struct nlattr *nla, *head;
+	struct macvlan_source_entry *entry;
+
+	if (data[IFLA_MACVLAN_MACADDR])
+		addr = nla_data(data[IFLA_MACVLAN_MACADDR]);
+
+	if (mode == MACVLAN_MACADDR_ADD) {
+		if (!addr)
+			return -EINVAL;
+
+		return macvlan_hash_add_source(vlan, addr);
+
+	} else if (mode == MACVLAN_MACADDR_DEL) {
+		if (!addr)
+			return -EINVAL;
+
+		entry = macvlan_hash_lookup_source(vlan, addr);
+		if (entry) {
+			macvlan_hash_del_source(entry);
+			vlan->macaddr_count--;
+		}
+	} else if (mode == MACVLAN_MACADDR_FLUSH) {
+		macvlan_flush_sources(vlan->port, vlan);
+	} else if (mode == MACVLAN_MACADDR_SET) {
+		macvlan_flush_sources(vlan->port, vlan);
+
+		if (addr) {
+			ret = macvlan_hash_add_source(vlan, addr);
+			if (ret)
+				return ret;
+		}
+
+		if (!data || !data[IFLA_MACVLAN_MACADDR_DATA])
+			return 0;
+
+		head = nla_data(data[IFLA_MACVLAN_MACADDR_DATA]);
+		len = nla_len(data[IFLA_MACVLAN_MACADDR_DATA]);
+
+		nla_for_each_attr(nla, head, len, rem) {
+			if (nla_type(nla) != IFLA_MACVLAN_MACADDR ||
+			    nla_len(nla) != ETH_ALEN)
+				continue;
+
+			addr = nla_data(nla);
+			ret = macvlan_hash_add_source(vlan, addr);
+			if (ret)
+				return ret;
+		}
+	} else {
+		return -EINVAL;
+	}
+
 	return 0;
 }
 
@@ -981,6 +1207,7 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 	struct macvlan_port *port;
 	struct net_device *lowerdev;
 	int err;
+	int macmode;
 
 	if (!tb[IFLA_LINK])
 		return -EINVAL;
@@ -1034,6 +1261,15 @@ int macvlan_common_newlink(struct net *src_net, struct net_device *dev,
 		eth_hw_addr_inherit(dev, lowerdev);
 	}
 
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		if (vlan->mode != MACVLAN_MODE_SOURCE)
+			return -EINVAL;
+		macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
+		err = macvlan_changelink_sources(vlan, macmode, data);
+		if (err)
+			return err;
+	}
+
 	port->count += 1;
 	err = register_netdevice(dev);
 	if (err < 0)
@@ -1070,6 +1306,8 @@ void macvlan_dellink(struct net_device *dev, struct list_head *head)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 
+	if (vlan->mode == MACVLAN_MODE_SOURCE)
+		macvlan_flush_sources(vlan->port, vlan);
 	list_del_rcu(&vlan->list);
 	unregister_netdevice_queue(dev, head);
 	netdev_upper_dev_unlink(vlan->lowerdev, dev);
@@ -1082,6 +1320,8 @@ static int macvlan_changelink(struct net_device *dev,
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	enum macvlan_mode mode;
 	bool set_mode = false;
+	enum macvlan_macaddr_mode macmode;
+	int ret;
 
 	/* Validate mode, but don't set yet: setting flags may fail. */
 	if (data && data[IFLA_MACVLAN_MODE]) {
@@ -1091,6 +1331,9 @@ static int macvlan_changelink(struct net_device *dev,
 		if ((mode == MACVLAN_MODE_PASSTHRU) !=
 		    (vlan->mode == MACVLAN_MODE_PASSTHRU))
 			return -EINVAL;
+		if (vlan->mode == MACVLAN_MODE_SOURCE &&
+		    vlan->mode != mode)
+			macvlan_flush_sources(vlan->port, vlan);
 	}
 
 	if (data && data[IFLA_MACVLAN_FLAGS]) {
@@ -1110,26 +1353,77 @@ static int macvlan_changelink(struct net_device *dev,
 	}
 	if (set_mode)
 		vlan->mode = mode;
+	if (data && data[IFLA_MACVLAN_MACADDR_MODE]) {
+		if (vlan->mode != MACVLAN_MODE_SOURCE)
+			return -EINVAL;
+		macmode = nla_get_u32(data[IFLA_MACVLAN_MACADDR_MODE]);
+		ret = macvlan_changelink_sources(vlan, macmode, data);
+		if (ret)
+			return ret;
+	}
 	return 0;
 }
 
+static size_t macvlan_get_size_mac(const struct macvlan_dev *vlan)
+{
+	if (vlan->macaddr_count == 0)
+		return 0;
+	return nla_total_size(0) /* IFLA_MACVLAN_MACADDR_DATA */
+		+ vlan->macaddr_count * nla_total_size(sizeof(u8) * ETH_ALEN);
+}
+
 static size_t macvlan_get_size(const struct net_device *dev)
 {
+	struct macvlan_dev *vlan = netdev_priv(dev);
+
 	return (0
 		+ nla_total_size(4) /* IFLA_MACVLAN_MODE */
 		+ nla_total_size(2) /* IFLA_MACVLAN_FLAGS */
+		+ nla_total_size(4) /* IFLA_MACVLAN_MACADDR_COUNT */
+		+ macvlan_get_size_mac(vlan) /* IFLA_MACVLAN_MACADDR */
 		);
 }
 
+static int macvlan_fill_info_macaddr(struct sk_buff *skb,
+				     const struct macvlan_dev *vlan,
+				     const int i)
+{
+	struct hlist_head *h = &vlan->port->vlan_source_hash[i];
+	struct macvlan_source_entry *entry;
+
+	hlist_for_each_entry_rcu(entry, h, hlist) {
+		if (entry->vlan != vlan)
+			continue;
+		if (nla_put(skb, IFLA_MACVLAN_MACADDR, ETH_ALEN, entry->addr))
+			return 1;
+	}
+	return 0;
+}
+
 static int macvlan_fill_info(struct sk_buff *skb,
 				const struct net_device *dev)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
+	int i;
+	struct nlattr *nest;
 
 	if (nla_put_u32(skb, IFLA_MACVLAN_MODE, vlan->mode))
 		goto nla_put_failure;
 	if (nla_put_u16(skb, IFLA_MACVLAN_FLAGS, vlan->flags))
 		goto nla_put_failure;
+	if (nla_put_u32(skb, IFLA_MACVLAN_MACADDR_COUNT, vlan->macaddr_count))
+		goto nla_put_failure;
+	if (vlan->macaddr_count > 0) {
+		nest = nla_nest_start(skb, IFLA_MACVLAN_MACADDR_DATA);
+		if (nest == NULL)
+			goto nla_put_failure;
+
+		for (i = 0; i < MACVLAN_HASH_SIZE; i++) {
+			if (macvlan_fill_info_macaddr(skb, vlan, i))
+				goto nla_put_failure;
+		}
+		nla_nest_end(skb, nest);
+	}
 	return 0;
 
 nla_put_failure:
@@ -1139,6 +1433,10 @@ nla_put_failure:
 static const struct nla_policy macvlan_policy[IFLA_MACVLAN_MAX + 1] = {
 	[IFLA_MACVLAN_MODE]  = { .type = NLA_U32 },
 	[IFLA_MACVLAN_FLAGS] = { .type = NLA_U16 },
+	[IFLA_MACVLAN_MACADDR_MODE] = { .type = NLA_U32 },
+	[IFLA_MACVLAN_MACADDR] = { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
+	[IFLA_MACVLAN_MACADDR_DATA] = { .type = NLA_NESTED },
+	[IFLA_MACVLAN_MACADDR_COUNT] = { .type = NLA_U32 },
 };
 
 int macvlan_link_register(struct rtnl_link_ops *ops)
diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h
index 6b2c7cf352a5..6f6929ea8a0c 100644
--- a/include/linux/if_macvlan.h
+++ b/include/linux/if_macvlan.h
@@ -60,6 +60,7 @@ struct macvlan_dev {
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct netpoll		*netpoll;
 #endif
+	unsigned int		macaddr_count;
 };
 
 static inline void macvlan_count_rx(const struct macvlan_dev *vlan,
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index c80f95f6ee78..0bdb77e16875 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -303,6 +303,10 @@ enum {
 	IFLA_MACVLAN_UNSPEC,
 	IFLA_MACVLAN_MODE,
 	IFLA_MACVLAN_FLAGS,
+	IFLA_MACVLAN_MACADDR_MODE,
+	IFLA_MACVLAN_MACADDR,
+	IFLA_MACVLAN_MACADDR_DATA,
+	IFLA_MACVLAN_MACADDR_COUNT,
 	__IFLA_MACVLAN_MAX,
 };
 
@@ -313,6 +317,14 @@ enum macvlan_mode {
 	MACVLAN_MODE_VEPA    = 2, /* talk to other ports through ext bridge */
 	MACVLAN_MODE_BRIDGE  = 4, /* talk to bridge ports directly */
 	MACVLAN_MODE_PASSTHRU = 8,/* take over the underlying device */
+	MACVLAN_MODE_SOURCE  = 16,/* use source MAC address list to assign */
+};
+
+enum macvlan_macaddr_mode {
+	MACVLAN_MACADDR_ADD,
+	MACVLAN_MACADDR_DEL,
+	MACVLAN_MACADDR_FLUSH,
+	MACVLAN_MACADDR_SET,
 };
 
 #define MACVLAN_FLAG_NOPROMISC	1
-- 
cgit v1.2.3


From 4bcfda09936da647b0a3b49d5dcb3c6c6ebb0395 Mon Sep 17 00:00:00 2001
From: "Tan, Raymond" <raymond.tan@intel.com>
Date: Wed, 3 Sep 2014 10:41:38 +0800
Subject: i2c: designware: add support of platform data to set I2C mode

Use the platform data to set the clk_freq when there is no DT configuration
available. The clk_freq in turn will determine the I2C speed mode.

In Quark, there is currently no other configuration mechanism other than
board files.

Signed-off-by: Raymond Tan <raymond.tan@intel.com>
Reviewed-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Reviewed-by: Hock Leong Kweh <hock.leong.kweh@intel.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-designware-platdrv.c  |  6 ++++++
 include/linux/platform_data/i2c-designware.h | 21 +++++++++++++++++++++
 2 files changed, 27 insertions(+)
 create mode 100644 include/linux/platform_data/i2c-designware.h

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-designware-platdrv.c b/drivers/i2c/busses/i2c-designware-platdrv.c
index 8193e8eea764..4d6a6b94e2fa 100644
--- a/drivers/i2c/busses/i2c-designware-platdrv.c
+++ b/drivers/i2c/busses/i2c-designware-platdrv.c
@@ -41,6 +41,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/acpi.h>
+#include <linux/platform_data/i2c-designware.h>
 #include "i2c-designware-core.h"
 
 static struct i2c_algorithm i2c_dw_algo = {
@@ -122,6 +123,7 @@ static int dw_i2c_probe(struct platform_device *pdev)
 	struct dw_i2c_dev *dev;
 	struct i2c_adapter *adap;
 	struct resource *mem;
+	struct dw_i2c_platform_data *pdata;
 	int irq, r;
 	u32 clk_freq;
 
@@ -182,6 +184,10 @@ static int dw_i2c_probe(struct platform_device *pdev)
 			dev_err(&pdev->dev, "Only 100kHz and 400kHz supported");
 			return -EINVAL;
 		}
+	} else {
+		pdata = dev_get_platdata(&pdev->dev);
+		if (pdata)
+			clk_freq = pdata->i2c_scl_freq;
 	}
 
 	dev->functionality =
diff --git a/include/linux/platform_data/i2c-designware.h b/include/linux/platform_data/i2c-designware.h
new file mode 100644
index 000000000000..7a61fb27c25b
--- /dev/null
+++ b/include/linux/platform_data/i2c-designware.h
@@ -0,0 +1,21 @@
+/*
+ * Copyright(c) 2014 Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ */
+
+#ifndef I2C_DESIGNWARE_H
+#define I2C_DESIGNWARE_H
+
+struct dw_i2c_platform_data {
+	unsigned int i2c_scl_freq;
+};
+
+#endif
-- 
cgit v1.2.3


From f48c767ce8951e30eb716b8ef69142d21aacbd1d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 29 Sep 2014 13:58:47 +0200
Subject: PM / Domains: Move dev_pm_domain_attach|detach() to pm_domain.h

The commit 46420dd73b80 (PM / Domains: Add APIs to attach/detach a PM
domain for a device) started using errno values in pm.h header file.
It also failed to include the header for these, thus it caused
compiler errors.

Instead of including the errno header to pm.h, let's move the functions
to pm_domain.h, since it's a better match.

Fixes: 46420dd73b80 (PM / Domains: Add APIs to attach/detach a PM domain for a device)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/amba/bus.c          |  1 +
 drivers/base/platform.c     |  1 +
 drivers/i2c/i2c-core.c      |  1 +
 drivers/mmc/core/sdio_bus.c |  1 +
 drivers/spi/spi.c           |  1 +
 include/linux/pm.h          | 11 -----------
 include/linux/pm_domain.h   | 11 +++++++++++
 7 files changed, 16 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 8f5239377f91..47bbdc1b5be3 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <linux/pm.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/amba/bus.h>
 #include <linux/sizes.h>
 
diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index 904be3dc0908..b2afc29403f9 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -21,6 +21,7 @@
 #include <linux/err.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/idr.h>
 #include <linux/acpi.h>
 #include <linux/clk/clk-conf.h>
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 3cd8f11f1b5f..e61a6c5c3372 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -48,6 +48,7 @@
 #include <linux/irqflags.h>
 #include <linux/rwsem.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/acpi.h>
 #include <linux/jump_label.h>
 #include <asm/uaccess.h>
diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index 1df0fc63c17c..65cf7a7e05ea 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -16,6 +16,7 @@
 #include <linux/export.h>
 #include <linux/slab.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/acpi.h>
 
 #include <linux/mmc/card.h>
diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c
index 72a0beb1fafa..3907f1493e7d 100644
--- a/drivers/spi/spi.c
+++ b/drivers/spi/spi.c
@@ -35,6 +35,7 @@
 #include <linux/spi/spi.h>
 #include <linux/of_gpio.h>
 #include <linux/pm_runtime.h>
+#include <linux/pm_domain.h>
 #include <linux/export.h>
 #include <linux/sched/rt.h>
 #include <linux/delay.h>
diff --git a/include/linux/pm.h b/include/linux/pm.h
index c4cbf485a5d6..1022ba1eb4de 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -622,17 +622,6 @@ struct dev_pm_domain {
 	void (*detach)(struct device *dev, bool power_off);
 };
 
-#ifdef CONFIG_PM
-extern int dev_pm_domain_attach(struct device *dev, bool power_on);
-extern void dev_pm_domain_detach(struct device *dev, bool power_off);
-#else
-static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
-{
-	return -ENODEV;
-}
-static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
-#endif
-
 /*
  * The PM_EVENT_ messages are also used by drivers implementing the legacy
  * suspend framework, based on the ->suspend() and ->resume() callbacks common
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index ed4f4a79c528..900474317afc 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -301,4 +301,15 @@ static inline int of_genpd_add_provider_onecell(struct device_node *np,
 	return __of_genpd_add_provider(np, __of_genpd_xlate_onecell, data);
 }
 
+#ifdef CONFIG_PM
+extern int dev_pm_domain_attach(struct device *dev, bool power_on);
+extern void dev_pm_domain_detach(struct device *dev, bool power_off);
+#else
+static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
+{
+	return -ENODEV;
+}
+static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
+#endif
+
 #endif /* _LINUX_PM_DOMAIN_H */
-- 
cgit v1.2.3


From baeb7ef34952f523a129e5d1369aa42ecbe7f8c9 Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Mon, 29 Sep 2014 10:21:03 +0200
Subject: tty: serial: 8250: use 32bit variable for rpm_tx_active

The kbuild test robot wrote me:
|  make.cross ARCH=powerpc
|>> ERROR: ".__xchg_called_with_bad_pointer" [drivers/tty/serial/8250/8250.ko] undefined!

The generic implementation of xchg() on arm and x86 works for variables of
size one bye (char). According to the report powerpc does not support
xchg() for one byte sized variables and looking further it seems also to
be the same case for sparc and tile (or for 10 out of 26 architectures
which provide a custom implementation).
For that reason I increase the size of the variable from one to four
bytes to get it work on powerpc (and the others).

Reported-By: kbuild test robot <fengguang.wu@intel.com>
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_8250.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index c267412a3ef4..3df10d5f154b 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -84,7 +84,7 @@ struct uart_8250_port {
 	unsigned char		mcr_mask;	/* mask of user bits */
 	unsigned char		mcr_force;	/* mask of forced bits */
 	unsigned char		cur_iotype;	/* Running I/O type */
-	unsigned char		rpm_tx_active;
+	unsigned int		rpm_tx_active;
 
 	/*
 	 * Some bits in registers are cleared on a read, so they must
-- 
cgit v1.2.3


From d8e0a86f9713689e35dc14f7184e85a13a2a9f4e Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Mon, 23 Jun 2014 14:20:06 -0700
Subject: i2c: cros_ec: Remove EC_I2C_FLAG_10BIT

In <https://lkml.org/lkml/2014/6/10/265> pointed out that the 10-bit
flag in the cros_ec_tunnel was useless.  It went into a 16-bit flags
field but was defined at (1 << 16).

Since we have no 10-bit i2c devices on the other side of the tunnel on
any known devices this was never a problem.  Until we do it makes
sense to remove this code.  On the EC side the code to handle this
flag was removed in <https://chromium-review.googlesource.com/204162>.

Reported-by: Dave Jones <davej@redhat.com>
Signed-off-by: Doug Anderson <dianders@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/busses/i2c-cros-ec-tunnel.c | 6 ++++--
 include/linux/mfd/cros_ec_commands.h    | 3 ---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index 0403ec1dc3b5..fc89c13b1632 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -94,7 +94,7 @@ static int ec_i2c_construct_message(u8 *buf, const struct i2c_msg i2c_msgs[],
 		msg->addr_flags = i2c_msg->addr;
 
 		if (i2c_msg->flags & I2C_M_TEN)
-			msg->addr_flags |= EC_I2C_FLAG_10BIT;
+			return -EINVAL;
 
 		if (i2c_msg->flags & I2C_M_RD) {
 			msg->addr_flags |= EC_I2C_FLAG_READ;
@@ -218,7 +218,9 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 		}
 	}
 
-	ec_i2c_construct_message(request, i2c_msgs, num, bus_num);
+	result = ec_i2c_construct_message(request, i2c_msgs, num, bus_num);
+	if (result)
+		goto exit;
 
 	msg.version = 0;
 	msg.command = EC_CMD_I2C_PASSTHRU;
diff --git a/include/linux/mfd/cros_ec_commands.h b/include/linux/mfd/cros_ec_commands.h
index 7853a6410d14..a49cd41feea7 100644
--- a/include/linux/mfd/cros_ec_commands.h
+++ b/include/linux/mfd/cros_ec_commands.h
@@ -1928,9 +1928,6 @@ struct ec_response_power_info {
 
 #define EC_CMD_I2C_PASSTHRU 0x9e
 
-/* Slave address is 10 (not 7) bit */
-#define EC_I2C_FLAG_10BIT	(1 << 16)
-
 /* Read data; if not present, message is a write */
 #define EC_I2C_FLAG_READ	(1 << 15)
 
-- 
cgit v1.2.3


From 41f8bba7f5552d033583777dede2df7c36e7853d Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:21 +0100
Subject: of/pci: Add pci_register_io_range() and pci_pio_to_address()

Some architectures do not have a simple view of the PCI I/O space and
instead use a range of CPU addresses that map to bus addresses.  For some
architectures these ranges will be expressed by OF bindings in a device
tree file.

This patch introduces a pci_register_io_range() helper function with a
generic implementation that can be used by such architectures to keep track
of the I/O ranges described by the PCI bindings.  If the PCI_IOBASE macro
is not defined, that signals lack of support for PCI and we return an
error.

In order to retrieve the CPU address associated with an I/O port, a new
helper function pci_pio_to_address() is introduced.  This will search in
the list of ranges registered with pci_register_io_range() and return the
CPU address that corresponds to the given port.

[arnd: add dummy !CONFIG_OF pci_pio_to_address() to fix build errors]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Acked-by: Rob Herring <robh@kernel.org>
CC: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/address.c       | 109 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_address.h |   7 +++
 2 files changed, 116 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index e3718250d66e..758d4f04d4aa 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -5,6 +5,8 @@
 #include <linux/module.h>
 #include <linux/of_address.h>
 #include <linux/pci_regs.h>
+#include <linux/sizes.h>
+#include <linux/slab.h>
 #include <linux/string.h>
 
 /* Max address size we deal with */
@@ -601,12 +603,119 @@ const __be32 *of_get_address(struct device_node *dev, int index, u64 *size,
 }
 EXPORT_SYMBOL(of_get_address);
 
+#ifdef PCI_IOBASE
+struct io_range {
+	struct list_head list;
+	phys_addr_t start;
+	resource_size_t size;
+};
+
+static LIST_HEAD(io_range_list);
+static DEFINE_SPINLOCK(io_range_lock);
+#endif
+
+/*
+ * Record the PCI IO range (expressed as CPU physical address + size).
+ * Return a negative value if an error has occured, zero otherwise
+ */
+int __weak pci_register_io_range(phys_addr_t addr, resource_size_t size)
+{
+	int err = 0;
+
+#ifdef PCI_IOBASE
+	struct io_range *range;
+	resource_size_t allocated_size = 0;
+
+	/* check if the range hasn't been previously recorded */
+	spin_lock(&io_range_lock);
+	list_for_each_entry(range, &io_range_list, list) {
+		if (addr >= range->start && addr + size <= range->start + size) {
+			/* range already registered, bail out */
+			goto end_register;
+		}
+		allocated_size += range->size;
+	}
+
+	/* range not registed yet, check for available space */
+	if (allocated_size + size - 1 > IO_SPACE_LIMIT) {
+		/* if it's too big check if 64K space can be reserved */
+		if (allocated_size + SZ_64K - 1 > IO_SPACE_LIMIT) {
+			err = -E2BIG;
+			goto end_register;
+		}
+
+		size = SZ_64K;
+		pr_warn("Requested IO range too big, new size set to 64K\n");
+	}
+
+	/* add the range to the list */
+	range = kzalloc(sizeof(*range), GFP_KERNEL);
+	if (!range) {
+		err = -ENOMEM;
+		goto end_register;
+	}
+
+	range->start = addr;
+	range->size = size;
+
+	list_add_tail(&range->list, &io_range_list);
+
+end_register:
+	spin_unlock(&io_range_lock);
+#endif
+
+	return err;
+}
+
+phys_addr_t pci_pio_to_address(unsigned long pio)
+{
+	phys_addr_t address = (phys_addr_t)OF_BAD_ADDR;
+
+#ifdef PCI_IOBASE
+	struct io_range *range;
+	resource_size_t allocated_size = 0;
+
+	if (pio > IO_SPACE_LIMIT)
+		return address;
+
+	spin_lock(&io_range_lock);
+	list_for_each_entry(range, &io_range_list, list) {
+		if (pio >= allocated_size && pio < allocated_size + range->size) {
+			address = range->start + pio - allocated_size;
+			break;
+		}
+		allocated_size += range->size;
+	}
+	spin_unlock(&io_range_lock);
+#endif
+
+	return address;
+}
+
 unsigned long __weak pci_address_to_pio(phys_addr_t address)
 {
+#ifdef PCI_IOBASE
+	struct io_range *res;
+	resource_size_t offset = 0;
+	unsigned long addr = -1;
+
+	spin_lock(&io_range_lock);
+	list_for_each_entry(res, &io_range_list, list) {
+		if (address >= res->start && address < res->start + res->size) {
+			addr = res->start - address + offset;
+			break;
+		}
+		offset += res->size;
+	}
+	spin_unlock(&io_range_lock);
+
+	return addr;
+#else
 	if (address > IO_SPACE_LIMIT)
 		return (unsigned long)-1;
 
 	return (unsigned long) address;
+#endif
 }
 
 static int __of_address_to_resource(struct device_node *dev,
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index fb7b7221e063..497a04356ff8 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -55,7 +55,9 @@ extern void __iomem *of_iomap(struct device_node *device, int index);
 extern const __be32 *of_get_address(struct device_node *dev, int index,
 			   u64 *size, unsigned int *flags);
 
+extern int pci_register_io_range(phys_addr_t addr, resource_size_t size);
 extern unsigned long pci_address_to_pio(phys_addr_t addr);
+extern phys_addr_t pci_pio_to_address(unsigned long pio);
 
 extern int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node);
@@ -80,6 +82,11 @@ static inline const __be32 *of_get_address(struct device_node *dev, int index,
 	return NULL;
 }
 
+static inline phys_addr_t pci_pio_to_address(unsigned long pio)
+{
+	return 0;
+}
+
 static inline int of_pci_range_parser_init(struct of_pci_range_parser *parser,
 			struct device_node *node)
 {
-- 
cgit v1.2.3


From 2101e533f41a90b25bee17ce969734e26eb0eb55 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Fri, 26 Sep 2014 00:09:19 +0200
Subject: bcma: register bcma as device tree driver

This driver is used by the bcm53xx ARM SoC code. Now it is possible to
give the address of the chipcommon core in device tree and bcma will
search for all the other cores.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 Documentation/devicetree/bindings/bus/bcma.txt | 20 +++++++
 drivers/bcma/bcma_private.h                    | 14 +++++
 drivers/bcma/host_soc.c                        | 81 ++++++++++++++++++++++++++
 drivers/bcma/main.c                            | 52 ++++++++++++++++-
 include/linux/bcma/bcma.h                      |  2 +
 5 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 Documentation/devicetree/bindings/bus/bcma.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/bus/bcma.txt b/Documentation/devicetree/bindings/bus/bcma.txt
new file mode 100644
index 000000000000..e9070c161172
--- /dev/null
+++ b/Documentation/devicetree/bindings/bus/bcma.txt
@@ -0,0 +1,20 @@
+Driver for ARM AXI Bus with Broadcom Plugins (bcma)
+
+Required properties:
+
+- compatible : brcm,bus-axi
+
+- reg : iomem address range of chipcommon core
+
+The cores on the AXI bus are automatically detected by bcma with the
+memory ranges they are using and they get registered afterwards.
+
+Example:
+
+	axi@18000000 {
+		compatible = "brcm,bus-axi";
+		reg = <0x18000000 0x1000>;
+		ranges = <0x00000000 0x18000000 0x00100000>;
+		#address-cells = <1>;
+		#size-cells = <1>;
+	};
diff --git a/drivers/bcma/bcma_private.h b/drivers/bcma/bcma_private.h
index b40be43c6f31..b6412b2d748d 100644
--- a/drivers/bcma/bcma_private.h
+++ b/drivers/bcma/bcma_private.h
@@ -88,6 +88,20 @@ extern int __init bcma_host_pci_init(void);
 extern void __exit bcma_host_pci_exit(void);
 #endif /* CONFIG_BCMA_HOST_PCI */
 
+/* host_soc.c */
+#if defined(CONFIG_BCMA_HOST_SOC) && defined(CONFIG_OF)
+extern int __init bcma_host_soc_register_driver(void);
+extern void __exit bcma_host_soc_unregister_driver(void);
+#else
+static inline int __init bcma_host_soc_register_driver(void)
+{
+	return 0;
+}
+static inline void __exit bcma_host_soc_unregister_driver(void)
+{
+}
+#endif /* CONFIG_BCMA_HOST_SOC && CONFIG_OF */
+
 /* driver_pci.c */
 u32 bcma_pcie_read(struct bcma_drv_pci *pc, u32 address);
 
diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c
index 718e054dd727..335cbcfd945b 100644
--- a/drivers/bcma/host_soc.c
+++ b/drivers/bcma/host_soc.c
@@ -7,6 +7,9 @@
 
 #include "bcma_private.h"
 #include "scan.h"
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/of_address.h>
 #include <linux/bcma/bcma.h>
 #include <linux/bcma/bcma_soc.h>
 
@@ -176,6 +179,7 @@ int __init bcma_host_soc_register(struct bcma_soc *soc)
 	/* Host specific */
 	bus->hosttype = BCMA_HOSTTYPE_SOC;
 	bus->ops = &bcma_host_soc_ops;
+	bus->host_pdev = NULL;
 
 	/* Initialize struct, detect chip */
 	bcma_init_bus(bus);
@@ -195,3 +199,80 @@ int __init bcma_host_soc_init(struct bcma_soc *soc)
 
 	return err;
 }
+
+#ifdef CONFIG_OF
+static int bcma_host_soc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct device_node *np = dev->of_node;
+	struct bcma_bus *bus;
+	int err;
+
+	/* Alloc */
+	bus = devm_kzalloc(dev, sizeof(*bus), GFP_KERNEL);
+	if (!bus)
+		return -ENOMEM;
+
+	/* Map MMIO */
+	bus->mmio = of_iomap(np, 0);
+	if (!bus->mmio)
+		return -ENOMEM;
+
+	/* Host specific */
+	bus->hosttype = BCMA_HOSTTYPE_SOC;
+	bus->ops = &bcma_host_soc_ops;
+	bus->host_pdev = pdev;
+
+	/* Initialize struct, detect chip */
+	bcma_init_bus(bus);
+
+	/* Register */
+	err = bcma_bus_register(bus);
+	if (err)
+		goto err_unmap_mmio;
+
+	platform_set_drvdata(pdev, bus);
+
+	return err;
+
+err_unmap_mmio:
+	iounmap(bus->mmio);
+	return err;
+}
+
+static int bcma_host_soc_remove(struct platform_device *pdev)
+{
+	struct bcma_bus *bus = platform_get_drvdata(pdev);
+
+	bcma_bus_unregister(bus);
+	iounmap(bus->mmio);
+	platform_set_drvdata(pdev, NULL);
+
+	return 0;
+}
+
+static const struct of_device_id bcma_host_soc_of_match[] = {
+	{ .compatible = "brcm,bus-axi", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, bcma_host_soc_of_match);
+
+static struct platform_driver bcma_host_soc_driver = {
+	.driver = {
+		.name = "bcma-host-soc",
+		.of_match_table = bcma_host_soc_of_match,
+	},
+	.probe		= bcma_host_soc_probe,
+	.remove		= bcma_host_soc_remove,
+};
+
+int __init bcma_host_soc_register_driver(void)
+{
+	return platform_driver_register(&bcma_host_soc_driver);
+}
+
+void __exit bcma_host_soc_unregister_driver(void)
+{
+	platform_driver_unregister(&bcma_host_soc_driver);
+}
+#endif /* CONFIG_OF */
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index c421403cab43..d1656c2f70af 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -10,6 +10,7 @@
 #include <linux/platform_device.h>
 #include <linux/bcma/bcma.h>
 #include <linux/slab.h>
+#include <linux/of_address.h>
 
 MODULE_DESCRIPTION("Broadcom's specific AMBA driver");
 MODULE_LICENSE("GPL");
@@ -131,6 +132,43 @@ static bool bcma_is_core_needed_early(u16 core_id)
 	return false;
 }
 
+#ifdef CONFIG_OF
+static struct device_node *bcma_of_find_child_device(struct platform_device *parent,
+						     struct bcma_device *core)
+{
+	struct device_node *node;
+	u64 size;
+	const __be32 *reg;
+
+	if (!parent || !parent->dev.of_node)
+		return NULL;
+
+	for_each_child_of_node(parent->dev.of_node, node) {
+		reg = of_get_address(node, 0, &size, NULL);
+		if (!reg)
+			continue;
+		if (of_translate_address(node, reg) == core->addr)
+			return node;
+	}
+	return NULL;
+}
+
+static void bcma_of_fill_device(struct platform_device *parent,
+				struct bcma_device *core)
+{
+	struct device_node *node;
+
+	node = bcma_of_find_child_device(parent, core);
+	if (node)
+		core->dev.of_node = node;
+}
+#else
+static void bcma_of_fill_device(struct platform_device *parent,
+				struct bcma_device *core)
+{
+}
+#endif /* CONFIG_OF */
+
 static void bcma_register_core(struct bcma_bus *bus, struct bcma_device *core)
 {
 	int err;
@@ -147,7 +185,13 @@ static void bcma_register_core(struct bcma_bus *bus, struct bcma_device *core)
 		break;
 	case BCMA_HOSTTYPE_SOC:
 		core->dev.dma_mask = &core->dev.coherent_dma_mask;
-		core->dma_dev = &core->dev;
+		if (bus->host_pdev) {
+			core->dma_dev = &bus->host_pdev->dev;
+			core->dev.parent = &bus->host_pdev->dev;
+			bcma_of_fill_device(bus->host_pdev, core);
+		} else {
+			core->dma_dev = &core->dev;
+		}
 		break;
 	case BCMA_HOSTTYPE_SDIO:
 		break;
@@ -528,6 +572,11 @@ static int __init bcma_modinit(void)
 	if (err)
 		return err;
 
+	err = bcma_host_soc_register_driver();
+	if (err) {
+		pr_err("SoC host initialization failed\n");
+		err = 0;
+	}
 #ifdef CONFIG_BCMA_HOST_PCI
 	err = bcma_host_pci_init();
 	if (err) {
@@ -545,6 +594,7 @@ static void __exit bcma_modexit(void)
 #ifdef CONFIG_BCMA_HOST_PCI
 	bcma_host_pci_exit();
 #endif
+	bcma_host_soc_unregister_driver();
 	bus_unregister(&bcma_bus_type);
 }
 module_exit(bcma_modexit)
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 634597917670..729f48e6b20b 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -323,6 +323,8 @@ struct bcma_bus {
 		struct pci_dev *host_pci;
 		/* Pointer to the SDIO device (only for BCMA_HOSTTYPE_SDIO) */
 		struct sdio_func *host_sdio;
+		/* Pointer to platform device (only for BCMA_HOSTTYPE_SOC) */
+		struct platform_device *host_pdev;
 	};
 
 	struct bcma_chipinfo chipinfo;
-- 
cgit v1.2.3


From 2a8a8ce651d3a88fdf83e2ed15633c8d19292108 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 30 Sep 2014 02:21:34 +0200
Subject: PM / sleep: Export dpm_suspend_late/noirq() and
 dpm_resume_early/noirq()

Subsequent change sets will add platform-related operations between
dpm_suspend_late() and dpm_suspend_noirq() as well as between
dpm_resume_noirq() and dpm_resume_early() in suspend_enter(), so
export these functions for suspend_enter() to be able to call them
separately and split the invocations of dpm_suspend_end() and
dpm_resume_start() in there accordingly.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/main.c |  8 ++++----
 include/linux/pm.h        |  4 ++++
 kernel/power/suspend.c    | 14 +++++++++++---
 3 files changed, 19 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c
index b67d9aef9fe4..44973196d3fd 100644
--- a/drivers/base/power/main.c
+++ b/drivers/base/power/main.c
@@ -540,7 +540,7 @@ static void async_resume_noirq(void *data, async_cookie_t cookie)
  * Call the "noirq" resume handlers for all devices in dpm_noirq_list and
  * enable device drivers to receive interrupts.
  */
-static void dpm_resume_noirq(pm_message_t state)
+void dpm_resume_noirq(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
@@ -662,7 +662,7 @@ static void async_resume_early(void *data, async_cookie_t cookie)
  * dpm_resume_early - Execute "early resume" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  */
-static void dpm_resume_early(pm_message_t state)
+void dpm_resume_early(pm_message_t state)
 {
 	struct device *dev;
 	ktime_t starttime = ktime_get();
@@ -1093,7 +1093,7 @@ static int device_suspend_noirq(struct device *dev)
  * Prevent device drivers from receiving interrupts and call the "noirq" suspend
  * handlers for all non-sysdev devices.
  */
-static int dpm_suspend_noirq(pm_message_t state)
+int dpm_suspend_noirq(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
@@ -1232,7 +1232,7 @@ static int device_suspend_late(struct device *dev)
  * dpm_suspend_late - Execute "late suspend" callbacks for all devices.
  * @state: PM transition of the system being carried out.
  */
-static int dpm_suspend_late(pm_message_t state)
+int dpm_suspend_late(pm_message_t state)
 {
 	ktime_t starttime = ktime_get();
 	int error = 0;
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 72c0fe098a27..e1c00b7ee913 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -679,12 +679,16 @@ struct dev_pm_domain {
 extern void device_pm_lock(void);
 extern void dpm_resume_start(pm_message_t state);
 extern void dpm_resume_end(pm_message_t state);
+extern void dpm_resume_noirq(pm_message_t state);
+extern void dpm_resume_early(pm_message_t state);
 extern void dpm_resume(pm_message_t state);
 extern void dpm_complete(pm_message_t state);
 
 extern void device_pm_unlock(void);
 extern int dpm_suspend_end(pm_message_t state);
 extern int dpm_suspend_start(pm_message_t state);
+extern int dpm_suspend_noirq(pm_message_t state);
+extern int dpm_suspend_late(pm_message_t state);
 extern int dpm_suspend(pm_message_t state);
 extern int dpm_prepare(pm_message_t state);
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index e837dd6783c6..58ae98b7dc2b 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -265,11 +265,16 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	if (error)
 		goto Platform_finish;
 
-	error = dpm_suspend_end(PMSG_SUSPEND);
+	error = dpm_suspend_late(PMSG_SUSPEND);
 	if (error) {
-		printk(KERN_ERR "PM: Some devices failed to power down\n");
+		printk(KERN_ERR "PM: late suspend of devices failed\n");
 		goto Platform_finish;
 	}
+	error = dpm_suspend_noirq(PMSG_SUSPEND);
+	if (error) {
+		printk(KERN_ERR "PM: noirq suspend of devices failed\n");
+		goto Devices_early_resume;
+	}
 	error = platform_suspend_prepare_late(state);
 	if (error)
 		goto Platform_wake;
@@ -319,7 +324,10 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 
  Platform_wake:
 	platform_suspend_wake(state);
-	dpm_resume_start(PMSG_RESUME);
+	dpm_resume_noirq(PMSG_RESUME);
+
+ Devices_early_resume:
+	dpm_resume_early(PMSG_RESUME);
 
  Platform_finish:
 	platform_suspend_finish(state);
-- 
cgit v1.2.3


From a8d46b9e4e487301affe84fa53de40b890898604 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 30 Sep 2014 02:29:01 +0200
Subject: ACPI / sleep: Rework the handling of ACPI GPE wakeup from
 suspend-to-idle

The ACPI GPE wakeup from suspend-to-idle is currently based on using
the IRQF_NO_SUSPEND flag for the ACPI SCI, but that is problematic
for a couple of reasons.  First, in principle the ACPI SCI may be
shared and IRQF_NO_SUSPEND does not really work well with shared
interrupts.  Second, it may require the ACPI subsystem to special-case
the handling of device notifications depending on whether or not
they are received during suspend-to-idle in some places which would
lead to fragile code.  Finally, it's better the handle ACPI wakeup
interrupts consistently with wakeup interrupts from other sources.

For this reason, remove the IRQF_NO_SUSPEND flag from the ACPI SCI
and use enable_irq_wake()/disable_irq_wake() with it instead, which
requires two additional platform hooks to be added to struct
platform_freeze_ops.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/osl.c      |  2 +-
 drivers/acpi/sleep.c    | 16 ++++++++++++++++
 include/linux/suspend.h |  2 ++
 kernel/power/suspend.c  | 21 ++++++++++++++++++++-
 4 files changed, 39 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/osl.c b/drivers/acpi/osl.c
index 3abe9b223ba7..5ca29b5af8d1 100644
--- a/drivers/acpi/osl.c
+++ b/drivers/acpi/osl.c
@@ -825,7 +825,7 @@ acpi_os_install_interrupt_handler(u32 gsi, acpi_osd_handler handler,
 
 	acpi_irq_handler = handler;
 	acpi_irq_context = context;
-	if (request_irq(irq, acpi_irq, IRQF_SHARED | IRQF_NO_SUSPEND, "acpi", acpi_irq)) {
+	if (request_irq(irq, acpi_irq, IRQF_SHARED, "acpi", acpi_irq)) {
 		printk(KERN_ERR PREFIX "SCI (IRQ%d) allocation failed\n", irq);
 		acpi_irq_handler = NULL;
 		return AE_NOT_ACQUIRED;
diff --git a/drivers/acpi/sleep.c b/drivers/acpi/sleep.c
index 54da4a3fe65e..05a31b573fc3 100644
--- a/drivers/acpi/sleep.c
+++ b/drivers/acpi/sleep.c
@@ -14,6 +14,7 @@
 #include <linux/irq.h>
 #include <linux/dmi.h>
 #include <linux/device.h>
+#include <linux/interrupt.h>
 #include <linux/suspend.h>
 #include <linux/reboot.h>
 #include <linux/acpi.h>
@@ -626,6 +627,19 @@ static int acpi_freeze_begin(void)
 	return 0;
 }
 
+static int acpi_freeze_prepare(void)
+{
+	acpi_enable_all_wakeup_gpes();
+	enable_irq_wake(acpi_gbl_FADT.sci_interrupt);
+	return 0;
+}
+
+static void acpi_freeze_restore(void)
+{
+	disable_irq_wake(acpi_gbl_FADT.sci_interrupt);
+	acpi_enable_all_runtime_gpes();
+}
+
 static void acpi_freeze_end(void)
 {
 	acpi_scan_lock_release();
@@ -633,6 +647,8 @@ static void acpi_freeze_end(void)
 
 static const struct platform_freeze_ops acpi_freeze_ops = {
 	.begin = acpi_freeze_begin,
+	.prepare = acpi_freeze_prepare,
+	.restore = acpi_freeze_restore,
 	.end = acpi_freeze_end,
 };
 
diff --git a/include/linux/suspend.h b/include/linux/suspend.h
index 06a9910827c2..3388c1b6f7d8 100644
--- a/include/linux/suspend.h
+++ b/include/linux/suspend.h
@@ -189,6 +189,8 @@ struct platform_suspend_ops {
 
 struct platform_freeze_ops {
 	int (*begin)(void);
+	int (*prepare)(void);
+	void (*restore)(void);
 	void (*end)(void);
 };
 
diff --git a/kernel/power/suspend.c b/kernel/power/suspend.c
index a25e768d92b5..4ca9a33ff620 100644
--- a/kernel/power/suspend.c
+++ b/kernel/power/suspend.c
@@ -144,6 +144,12 @@ static int platform_suspend_prepare(suspend_state_t state)
 		suspend_ops->prepare() : 0;
 }
 
+static int platform_suspend_prepare_late(suspend_state_t state)
+{
+	return state == PM_SUSPEND_FREEZE && freeze_ops->prepare ?
+		freeze_ops->prepare() : 0;
+}
+
 static int platform_suspend_prepare_noirq(suspend_state_t state)
 {
 	return state != PM_SUSPEND_FREEZE && suspend_ops->prepare_late ?
@@ -156,6 +162,12 @@ static void platform_resume_noirq(suspend_state_t state)
 		suspend_ops->wake();
 }
 
+static void platform_resume_early(suspend_state_t state)
+{
+	if (state == PM_SUSPEND_FREEZE && freeze_ops->restore)
+		freeze_ops->restore();
+}
+
 static void platform_resume_finish(suspend_state_t state)
 {
 	if (state != PM_SUSPEND_FREEZE && suspend_ops->finish)
@@ -270,10 +282,14 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 		printk(KERN_ERR "PM: late suspend of devices failed\n");
 		goto Platform_finish;
 	}
+	error = platform_suspend_prepare_late(state);
+	if (error)
+		goto Devices_early_resume;
+
 	error = dpm_suspend_noirq(PMSG_SUSPEND);
 	if (error) {
 		printk(KERN_ERR "PM: noirq suspend of devices failed\n");
-		goto Devices_early_resume;
+		goto Platform_early_resume;
 	}
 	error = platform_suspend_prepare_noirq(state);
 	if (error)
@@ -326,6 +342,9 @@ static int suspend_enter(suspend_state_t state, bool *wakeup)
 	platform_resume_noirq(state);
 	dpm_resume_noirq(PMSG_RESUME);
 
+ Platform_early_resume:
+	platform_resume_early(state);
+
  Devices_early_resume:
 	dpm_resume_early(PMSG_RESUME);
 
-- 
cgit v1.2.3


From 83bbde1cc0ec9d156b9271e29ffe0dc89c687feb Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:24 +0100
Subject: of/pci: Move of_pci_range_to_resource() to of/address.c

We need to enhance of_pci_range_to_resources() enough that it won't make
sense for it to be inline anymore.  Move it to drivers/of/address.c, under
#ifdef CONFIG_PCI.

of_address.h previously implemented of_pci_range_to_resources()
unconditionally, regardless of any config options.  The implementation in
address.c is defined only when CONFIG_OF_ADDRESS=y and CONFIG_PCI=y,
so add a dummy version to avoid build errors when CONFIG_OF or
CONFIG_OF_ADDRESS is not defined.

[bhelgaas: drop extra detail from changelog, move def under CONFIG_PCI,
add dummy of_pci_range_to_resource() for build errors (from Arnd)]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/of/address.c       |  9 +++++++++
 include/linux/of_address.h | 20 +++++++++-----------
 2 files changed, 18 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index 758d4f04d4aa..327a57410797 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -295,6 +295,15 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser,
 }
 EXPORT_SYMBOL_GPL(of_pci_range_parser_one);
 
+void of_pci_range_to_resource(struct of_pci_range *range,
+			      struct device_node *np, struct resource *res)
+{
+	res->flags = range->flags;
+	res->start = range->cpu_addr;
+	res->end = range->cpu_addr + range->size - 1;
+	res->parent = res->child = res->sibling = NULL;
+	res->name = np->full_name;
+}
 #endif /* CONFIG_PCI */
 
 /*
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 497a04356ff8..a38e1c846c23 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -23,17 +23,6 @@ struct of_pci_range {
 #define for_each_of_pci_range(parser, range) \
 	for (; of_pci_range_parser_one(parser, range);)
 
-static inline void of_pci_range_to_resource(struct of_pci_range *range,
-					    struct device_node *np,
-					    struct resource *res)
-{
-	res->flags = range->flags;
-	res->start = range->cpu_addr;
-	res->end = range->cpu_addr + range->size - 1;
-	res->parent = res->child = res->sibling = NULL;
-	res->name = np->full_name;
-}
-
 /* Translate a DMA address from device space to CPU space */
 extern u64 of_translate_dma_address(struct device_node *dev,
 				    const __be32 *in_addr);
@@ -145,6 +134,9 @@ extern const __be32 *of_get_pci_address(struct device_node *dev, int bar_no,
 			       u64 *size, unsigned int *flags);
 extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 				      struct resource *r);
+extern void of_pci_range_to_resource(struct of_pci_range *range,
+				     struct device_node *np,
+				     struct resource *res);
 #else /* CONFIG_OF_ADDRESS && CONFIG_PCI */
 static inline int of_pci_address_to_resource(struct device_node *dev, int bar,
 				             struct resource *r)
@@ -157,6 +149,12 @@ static inline const __be32 *of_get_pci_address(struct device_node *dev,
 {
 	return NULL;
 }
+static inline void of_pci_range_to_resource(struct of_pci_range *range,
+					    struct device_node *np,
+					    struct resource *res)
+{
+	return -ENOSYS;
+}
 #endif /* CONFIG_OF_ADDRESS && CONFIG_PCI */
 
 #endif /* __OF_ADDRESS_H */
-- 
cgit v1.2.3


From 1c6dcbe5ceff81c2cf8d929646af675cd59fe7c0 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Fri, 26 Sep 2014 13:58:48 -0400
Subject: NFS: Implement SEEK

The SEEK operation is used when an application makes an lseek call with
either the SEEK_HOLE or SEEK_DATA flags set.  I fall back on
nfs_file_llseek() if the server does not have SEEK support.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/Makefile           |  1 +
 fs/nfs/inode.c            |  2 +
 fs/nfs/nfs42.h            | 14 +++++++
 fs/nfs/nfs42proc.c        | 69 +++++++++++++++++++++++++++++++++
 fs/nfs/nfs42xdr.c         | 98 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4_fs.h          |  3 ++
 fs/nfs/nfs4file.c         | 25 ++++++++++++
 fs/nfs/nfs4proc.c         |  4 +-
 fs/nfs/nfs4xdr.c          |  7 ++++
 include/linux/nfs4.h      |  3 ++
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   | 19 +++++++++
 12 files changed, 244 insertions(+), 2 deletions(-)
 create mode 100644 fs/nfs/nfs42.h
 create mode 100644 fs/nfs/nfs42proc.c
 create mode 100644 fs/nfs/nfs42xdr.c

(limited to 'include/linux')

diff --git a/fs/nfs/Makefile b/fs/nfs/Makefile
index 4782e0840dcc..04cb830fa09f 100644
--- a/fs/nfs/Makefile
+++ b/fs/nfs/Makefile
@@ -28,6 +28,7 @@ nfsv4-y := nfs4proc.o nfs4xdr.o nfs4state.o nfs4renewd.o nfs4super.o nfs4file.o
 nfsv4-$(CONFIG_NFS_USE_LEGACY_DNS) += cache_lib.o
 nfsv4-$(CONFIG_SYSCTL)	+= nfs4sysctl.o
 nfsv4-$(CONFIG_NFS_V4_1)	+= pnfs.o pnfs_dev.o
+nfsv4-$(CONFIG_NFS_V4_2)	+= nfs42proc.o
 
 obj-$(CONFIG_PNFS_FILE_LAYOUT) += filelayout/
 obj-$(CONFIG_PNFS_OBJLAYOUT) += objlayout/
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 577a36f0a510..56d073ede3c7 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -716,6 +716,7 @@ struct nfs_lock_context *nfs_get_lock_context(struct nfs_open_context *ctx)
 	kfree(new);
 	return res;
 }
+EXPORT_SYMBOL_GPL(nfs_get_lock_context);
 
 void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
 {
@@ -728,6 +729,7 @@ void nfs_put_lock_context(struct nfs_lock_context *l_ctx)
 	spin_unlock(&inode->i_lock);
 	kfree(l_ctx);
 }
+EXPORT_SYMBOL_GPL(nfs_put_lock_context);
 
 /**
  * nfs_close_context - Common close_context() routine NFSv2/v3
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
new file mode 100644
index 000000000000..d10333a197bf
--- /dev/null
+++ b/fs/nfs/nfs42.h
@@ -0,0 +1,14 @@
+/*
+ * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+
+#ifndef __LINUX_FS_NFS_NFS4_2_H
+#define __LINUX_FS_NFS_NFS4_2_H
+
+/* nfs4.2proc.c */
+loff_t nfs42_proc_llseek(struct file *, loff_t, int);
+
+/* nfs4.2xdr.h */
+extern struct rpc_procinfo nfs4_2_procedures[];
+
+#endif /* __LINUX_FS_NFS_NFS4_2_H */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
new file mode 100644
index 000000000000..0886f1db5917
--- /dev/null
+++ b/fs/nfs/nfs42proc.c
@@ -0,0 +1,69 @@
+/*
+ * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+#include <linux/fs.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/nfs.h>
+#include <linux/nfs3.h>
+#include <linux/nfs4.h>
+#include <linux/nfs_xdr.h>
+#include <linux/nfs_fs.h>
+#include "nfs4_fs.h"
+#include "nfs42.h"
+
+static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
+				fmode_t fmode)
+{
+	struct nfs_open_context *open;
+	struct nfs_lock_context *lock;
+	int ret;
+
+	open = get_nfs_open_context(nfs_file_open_context(file));
+	lock = nfs_get_lock_context(open);
+	if (IS_ERR(lock)) {
+		put_nfs_open_context(open);
+		return PTR_ERR(lock);
+	}
+
+	ret = nfs4_set_rw_stateid(dst, open, lock, fmode);
+
+	nfs_put_lock_context(lock);
+	put_nfs_open_context(open);
+	return ret;
+}
+
+loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
+{
+	struct inode *inode = file_inode(filep);
+	struct nfs42_seek_args args = {
+		.sa_fh		= NFS_FH(inode),
+		.sa_offset	= offset,
+		.sa_what	= (whence == SEEK_HOLE) ?
+					NFS4_CONTENT_HOLE : NFS4_CONTENT_DATA,
+	};
+	struct nfs42_seek_res res;
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_SEEK],
+		.rpc_argp = &args,
+		.rpc_resp = &res,
+	};
+	struct nfs_server *server = NFS_SERVER(inode);
+	int status;
+
+	if (!(server->caps & NFS_CAP_SEEK))
+		return -ENOTSUPP;
+
+	status = nfs42_set_rw_stateid(&args.sa_stateid, filep, FMODE_READ);
+	if (status)
+		return status;
+
+	nfs_wb_all(inode);
+	status = nfs4_call_sync(server->client, server, &msg,
+				&args.seq_args, &res.seq_res, 0);
+	if (status == -ENOTSUPP)
+		server->caps &= ~NFS_CAP_SEEK;
+	if (status)
+		return status;
+
+	return vfs_setpos(filep, res.sr_offset, inode->i_sb->s_maxbytes);
+}
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
new file mode 100644
index 000000000000..c90469b604b8
--- /dev/null
+++ b/fs/nfs/nfs42xdr.c
@@ -0,0 +1,98 @@
+/*
+ * Copyright (c) 2014 Anna Schumaker <Anna.Schumaker@Netapp.com>
+ */
+#ifndef __LINUX_FS_NFS_NFS4_2XDR_H
+#define __LINUX_FS_NFS_NFS4_2XDR_H
+
+#define encode_seek_maxsz		(op_encode_hdr_maxsz + \
+					 encode_stateid_maxsz + \
+					 2 /* offset */ + \
+					 1 /* whence */)
+#define decode_seek_maxsz		(op_decode_hdr_maxsz + \
+					 1 /* eof */ + \
+					 1 /* whence */ + \
+					 2 /* offset */ + \
+					 2 /* length */)
+
+#define NFS4_enc_seek_sz		(compound_encode_hdr_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_seek_maxsz)
+#define NFS4_dec_seek_sz		(compound_decode_hdr_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_seek_maxsz)
+
+
+static void encode_seek(struct xdr_stream *xdr,
+			struct nfs42_seek_args *args,
+			struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_SEEK, decode_seek_maxsz, hdr);
+	encode_nfs4_stateid(xdr, &args->sa_stateid);
+	encode_uint64(xdr, args->sa_offset);
+	encode_uint32(xdr, args->sa_what);
+}
+
+/*
+ * Encode SEEK request
+ */
+static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
+			      struct xdr_stream *xdr,
+			      struct nfs42_seek_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->sa_fh, &hdr);
+	encode_seek(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
+static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
+{
+	int status;
+	__be32 *p;
+
+	status = decode_op_hdr(xdr, OP_SEEK);
+	if (status)
+		return status;
+
+	p = xdr_inline_decode(xdr, 4 + 8);
+	if (unlikely(!p))
+		goto out_overflow;
+
+	res->sr_eof = be32_to_cpup(p++);
+	p = xdr_decode_hyper(p, &res->sr_offset);
+	return 0;
+
+out_overflow:
+	print_overflow_msg(__func__, xdr);
+	return -EIO;
+}
+
+/*
+ * Decode SEEK request
+ */
+static int nfs4_xdr_dec_seek(struct rpc_rqst *rqstp,
+			     struct xdr_stream *xdr,
+			     struct nfs42_seek_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_seek(xdr, res);
+out:
+	return status;
+}
+#endif /* __LINUX_FS_NFS_NFS4_2XDR_H */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index 92193eddb41d..b6b518d18aa7 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -227,6 +227,9 @@ int nfs4_replace_transport(struct nfs_server *server,
 				const struct nfs4_fs_locations *locations);
 
 /* nfs4proc.c */
+extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
+			  struct rpc_message *, struct nfs4_sequence_args *,
+			  struct nfs4_sequence_res *, int);
 extern int nfs4_proc_setclientid(struct nfs_client *, u32, unsigned short, struct rpc_cred *, struct nfs4_setclientid_res *);
 extern int nfs4_proc_setclientid_confirm(struct nfs_client *, struct nfs4_setclientid_res *arg, struct rpc_cred *);
 extern int nfs4_proc_get_rootfh(struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *, bool);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index a816f0627a6c..4dffa3a64731 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -8,6 +8,10 @@
 #include "fscache.h"
 #include "pnfs.h"
 
+#ifdef CONFIG_NFS_V4_2
+#include "nfs42.h"
+#endif
+
 #define NFSDBG_FACILITY		NFSDBG_FILE
 
 static int
@@ -115,8 +119,29 @@ nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 	return ret;
 }
 
+#ifdef CONFIG_NFS_V4_2
+static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
+{
+	loff_t ret;
+
+	switch (whence) {
+	case SEEK_HOLE:
+	case SEEK_DATA:
+		ret = nfs42_proc_llseek(filep, offset, whence);
+		if (ret != -ENOTSUPP)
+			return ret;
+	default:
+		return nfs_file_llseek(filep, offset, whence);
+	}
+}
+#endif /* CONFIG_NFS_V4_2 */
+
 const struct file_operations nfs4_file_operations = {
+#ifdef CONFIG_NFS_V4_2
+	.llseek		= nfs4_file_llseek,
+#else
 	.llseek		= nfs_file_llseek,
+#endif
 	.read		= new_sync_read,
 	.write		= new_sync_write,
 	.read_iter	= nfs_file_read,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 7dd8aca31c29..f3a59f1fb498 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -875,7 +875,6 @@ static int nfs4_call_sync_sequence(struct rpc_clnt *clnt,
 	return ret;
 }
 
-static
 int nfs4_call_sync(struct rpc_clnt *clnt,
 		   struct nfs_server *server,
 		   struct rpc_message *msg,
@@ -8429,7 +8428,8 @@ static const struct nfs4_minor_version_ops nfs_v4_1_minor_ops = {
 		| NFS_CAP_CHANGE_ATTR
 		| NFS_CAP_POSIX_LOCK
 		| NFS_CAP_STATEID_NFSV41
-		| NFS_CAP_ATOMIC_OPEN_V1,
+		| NFS_CAP_ATOMIC_OPEN_V1
+		| NFS_CAP_SEEK,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
 	.match_stateid = nfs41_match_stateid,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index e13b59d8d9aa..949e8717118c 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7427,6 +7427,10 @@ nfs4_stat_to_errno(int stat)
 	return -stat;
 }
 
+#ifdef CONFIG_NFS_V4_2
+#include "nfs42xdr.c"
+#endif /* CONFIG_NFS_V4_2 */
+
 #define PROC(proc, argtype, restype)				\
 [NFSPROC4_CLNT_##proc] = {					\
 	.p_proc   = NFSPROC4_COMPOUND,				\
@@ -7495,6 +7499,9 @@ struct rpc_procinfo	nfs4_procedures[] = {
 			enc_bind_conn_to_session, dec_bind_conn_to_session),
 	PROC(DESTROY_CLIENTID,	enc_destroy_clientid,	dec_destroy_clientid),
 #endif /* CONFIG_NFS_V4_1 */
+#ifdef CONFIG_NFS_V4_2
+	PROC(SEEK,		enc_seek,		dec_seek),
+#endif /* CONFIG_NFS_V4_2 */
 };
 
 const struct rpc_version nfs_version4 = {
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 026b0c042c40..356acc2846fd 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -487,6 +487,9 @@ enum {
 	NFSPROC4_CLNT_GETDEVICELIST,
 	NFSPROC4_CLNT_BIND_CONN_TO_SESSION,
 	NFSPROC4_CLNT_DESTROY_CLIENTID,
+
+	/* nfs42 */
+	NFSPROC4_CLNT_SEEK,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index 922be2e050f5..a32ba0d7a98f 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -230,5 +230,6 @@ struct nfs_server {
 #define NFS_CAP_STATEID_NFSV41	(1U << 16)
 #define NFS_CAP_ATOMIC_OPEN_V1	(1U << 17)
 #define NFS_CAP_SECURITY_LABEL	(1U << 18)
+#define NFS_CAP_SEEK		(1U << 19)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 0040629894df..0051b1ad2b37 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1239,6 +1239,25 @@ struct pnfs_ds_commit_info {
 
 #endif /* CONFIG_NFS_V4_1 */
 
+#ifdef CONFIG_NFS_V4_2
+struct nfs42_seek_args {
+	struct nfs4_sequence_args	seq_args;
+
+	struct nfs_fh			*sa_fh;
+	nfs4_stateid			sa_stateid;
+	u64				sa_offset;
+	u32				sa_what;
+};
+
+struct nfs42_seek_res {
+	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
+
+	u32	sr_eof;
+	u64	sr_offset;
+};
+#endif
+
 struct nfs_page;
 
 #define NFS_PAGEVEC_SIZE	(8U)
-- 
cgit v1.2.3


From e1c00e10e92c04aa637126db2e59b092bd4878f8 Mon Sep 17 00:00:00 2001
From: Majd Dibbiny <majd@mellanox.com>
Date: Tue, 30 Sep 2014 12:03:48 +0300
Subject: net/mlx4_core: New init and exit flow for mlx4_core

In the new flow, we separate the pci initialization and teardown
from the initialization and teardown of the other resources.

__mlx4_init_one handles the pci resources initialization. It then
calls mlx4_load_one to initialize the remainder of the resources.

When removing a device, mlx4_remove_one is invoked. However, now
mlx4_remove_one calls mlx4_unload_one to free all the resources except the pci
resources. When mlx4_unload_one returns, mlx4_remove_one then frees the
pci resources.

The above separation will allow us to implement 'reset flow' in the future.
It will also enable more EQs for VFs and is a pre-step to the modern API to
enable/disable SRIOV.

Also added nvfs; an integer array of size MLX4_MAX_PORTS + 1; to the mlx4_dev
struct. This new field is used to avoid parsing the num_vfs module parameter
each time the mlx4_restart_one is called.

Signed-off-by: Majd Dibbiny <majd@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/main.c | 393 +++++++++++++++++-------------
 include/linux/mlx4/device.h               |   1 +
 2 files changed, 220 insertions(+), 174 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 4e9857b409f3..f6c32a947185 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2259,116 +2259,18 @@ static void mlx4_free_ownership(struct mlx4_dev *dev)
 	iounmap(owner);
 }
 
-static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
+static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
+			 int total_vfs, int *nvfs, struct mlx4_priv *priv)
 {
-	struct mlx4_priv *priv;
 	struct mlx4_dev *dev;
+	unsigned sum = 0;
 	int err;
 	int port;
-	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
-	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
-	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
-		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
-	unsigned total_vfs = 0;
-	int sriov_initialized = 0;
-	unsigned int i;
+	int i;
 	int existing_vfs = 0;
 
-	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
-
-	err = pci_enable_device(pdev);
-	if (err) {
-		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
-		return err;
-	}
-
-	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
-	 * per port, we must limit the number of VFs to 63 (since their are
-	 * 128 MACs)
-	 */
-	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
-	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
-		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
-		if (nvfs[i] < 0) {
-			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
-			return -EINVAL;
-		}
-	}
-	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
-	     i++) {
-		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
-		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
-			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
-			return -EINVAL;
-		}
-	}
-	if (total_vfs >= MLX4_MAX_NUM_VF) {
-		dev_err(&pdev->dev,
-			"Requested more VF's (%d) than allowed (%d)\n",
-			total_vfs, MLX4_MAX_NUM_VF - 1);
-		return -EINVAL;
-	}
-
-	for (i = 0; i < MLX4_MAX_PORTS; i++) {
-		if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
-			dev_err(&pdev->dev,
-				"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
-				nvfs[i] + nvfs[2], i + 1,
-				MLX4_MAX_NUM_VF_P_PORT - 1);
-			return -EINVAL;
-		}
-	}
-
-
-	/*
-	 * Check for BARs.
-	 */
-	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
-	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
-		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
-			pci_dev_data, pci_resource_flags(pdev, 0));
-		err = -ENODEV;
-		goto err_disable_pdev;
-	}
-	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
-		dev_err(&pdev->dev, "Missing UAR, aborting\n");
-		err = -ENODEV;
-		goto err_disable_pdev;
-	}
-
-	err = pci_request_regions(pdev, DRV_NAME);
-	if (err) {
-		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
-		goto err_disable_pdev;
-	}
-
-	pci_set_master(pdev);
+	dev = &priv->dev;
 
-	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err) {
-		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
-		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (err) {
-			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
-			goto err_release_regions;
-		}
-	}
-	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (err) {
-		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
-		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
-		if (err) {
-			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
-			goto err_release_regions;
-		}
-	}
-
-	/* Allow large DMA segments, up to the firmware limit of 1 GB */
-	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
-
-	dev       = pci_get_drvdata(pdev);
-	priv      = mlx4_priv(dev);
-	dev->pdev = pdev;
 	INIT_LIST_HEAD(&priv->ctx_list);
 	spin_lock_init(&priv->ctx_lock);
 
@@ -2382,28 +2284,9 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 
 	dev->rev_id = pdev->revision;
 	dev->numa_node = dev_to_node(&pdev->dev);
+
 	/* Detect if this device is a virtual function */
 	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
-		/* When acting as pf, we normally skip vfs unless explicitly
-		 * requested to probe them. */
-		if (total_vfs) {
-			unsigned vfs_offset = 0;
-			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
-				     vfs_offset + nvfs[i] < extended_func_num(pdev);
-			     vfs_offset += nvfs[i], i++)
-				;
-			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
-				err = -ENODEV;
-				goto err_free_dev;
-			}
-			if ((extended_func_num(pdev) - vfs_offset)
-			    > prb_vf[i]) {
-				mlx4_warn(dev, "Skipping virtual function:%d\n",
-					  extended_func_num(pdev));
-				err = -ENODEV;
-				goto err_free_dev;
-			}
-		}
 		mlx4_warn(dev, "Detected virtual function - running in slave mode\n");
 		dev->flags |= MLX4_FLAG_SLAVE;
 	} else {
@@ -2413,11 +2296,10 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 		err = mlx4_get_ownership(dev);
 		if (err) {
 			if (err < 0)
-				goto err_free_dev;
+				return err;
 			else {
 				mlx4_warn(dev, "Multiple PFs not yet supported - Skipping PF\n");
-				err = -EINVAL;
-				goto err_free_dev;
+				return -EINVAL;
 			}
 		}
 
@@ -2429,7 +2311,8 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 				GFP_KERNEL);
 			if (NULL == dev->dev_vfs) {
 				mlx4_err(dev, "Failed to allocate memory for VFs\n");
-				err = 0;
+				err = -ENOMEM;
+				goto err_free_own;
 			} else {
 				atomic_inc(&pf_loading);
 				existing_vfs = pci_num_vf(pdev);
@@ -2445,13 +2328,11 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 					mlx4_err(dev, "Failed to enable SR-IOV, continuing without SR-IOV (err = %d)\n",
 						 err);
 					atomic_dec(&pf_loading);
-					err = 0;
 				} else {
 					mlx4_warn(dev, "Running in master mode\n");
 					dev->flags |= MLX4_FLAG_SRIOV |
 						MLX4_FLAG_MASTER;
 					dev->num_vfs = total_vfs;
-					sriov_initialized = 1;
 				}
 			}
 		}
@@ -2467,7 +2348,7 @@ static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data)
 		err = mlx4_reset(dev);
 		if (err) {
 			mlx4_err(dev, "Failed to reset HCA, aborting\n");
-			goto err_rel_own;
+			goto err_sriov;
 		}
 	}
 
@@ -2517,34 +2398,46 @@ slave_start:
 	/* In master functions, the communication channel must be initialized
 	 * after obtaining its address from fw */
 	if (mlx4_is_master(dev)) {
-		unsigned sum = 0;
-		err = mlx4_multi_func_init(dev);
-		if (err) {
-			mlx4_err(dev, "Failed to init master mfunc interface, aborting\n");
+		int ib_ports = 0;
+
+		mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
+			ib_ports++;
+
+		if (ib_ports &&
+		    (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
+			mlx4_err(dev,
+				 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
+			err = -EINVAL;
+			goto err_close;
+		}
+		if (dev->caps.num_ports < 2 &&
+		    num_vfs_argc > 1) {
+			err = -EINVAL;
+			mlx4_err(dev,
+				 "Error: Trying to configure VFs on port 2, but HCA has only %d physical ports\n",
+				 dev->caps.num_ports);
 			goto err_close;
 		}
-		if (sriov_initialized) {
-			int ib_ports = 0;
-			mlx4_foreach_port(i, dev, MLX4_PORT_TYPE_IB)
-				ib_ports++;
+		memcpy(dev->nvfs, nvfs, sizeof(dev->nvfs));
 
-			if (ib_ports &&
-			    (num_vfs_argc > 1 || probe_vfs_argc > 1)) {
-				mlx4_err(dev,
-					 "Invalid syntax of num_vfs/probe_vfs with IB port - single port VFs syntax is only supported when all ports are configured as ethernet\n");
-				err = -EINVAL;
-				goto err_master_mfunc;
-			}
-			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]); i++) {
-				unsigned j;
-				for (j = 0; j < nvfs[i]; ++sum, ++j) {
-					dev->dev_vfs[sum].min_port =
-						i < 2 ? i + 1 : 1;
-					dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
-						dev->caps.num_ports;
-				}
+		for (i = 0; i < sizeof(dev->nvfs)/sizeof(dev->nvfs[0]); i++) {
+			unsigned j;
+
+			for (j = 0; j < dev->nvfs[i]; ++sum, ++j) {
+				dev->dev_vfs[sum].min_port = i < 2 ? i + 1 : 1;
+				dev->dev_vfs[sum].n_ports = i < 2 ? 1 :
+					dev->caps.num_ports;
 			}
 		}
+
+		/* In master functions, the communication channel
+		 * must be initialized after obtaining its address from fw
+		 */
+		err = mlx4_multi_func_init(dev);
+		if (err) {
+			mlx4_err(dev, "Failed to init master mfunc interface, aborting.\n");
+			goto err_close;
+		}
 	}
 
 	err = mlx4_alloc_eq_table(dev);
@@ -2565,7 +2458,7 @@ slave_start:
 	if (!mlx4_is_slave(dev)) {
 		err = mlx4_init_steering(dev);
 		if (err)
-			goto err_free_eq;
+			goto err_disable_msix;
 	}
 
 	err = mlx4_setup_hca(dev);
@@ -2625,6 +2518,10 @@ err_steer:
 	if (!mlx4_is_slave(dev))
 		mlx4_clear_steering(dev);
 
+err_disable_msix:
+	if (dev->flags & MLX4_FLAG_MSI_X)
+		pci_disable_msix(pdev);
+
 err_free_eq:
 	mlx4_free_eq_table(dev);
 
@@ -2641,9 +2538,6 @@ err_master_mfunc:
 	}
 
 err_close:
-	if (dev->flags & MLX4_FLAG_MSI_X)
-		pci_disable_msix(pdev);
-
 	mlx4_close_hca(dev);
 
 err_mfunc:
@@ -2657,17 +2551,151 @@ err_sriov:
 	if (dev->flags & MLX4_FLAG_SRIOV && !existing_vfs)
 		pci_disable_sriov(pdev);
 
-err_rel_own:
-	if (!mlx4_is_slave(dev))
-		mlx4_free_ownership(dev);
-
 	if (mlx4_is_master(dev) && dev->num_vfs)
 		atomic_dec(&pf_loading);
 
 	kfree(priv->dev.dev_vfs);
 
-err_free_dev:
-	kfree(priv);
+err_free_own:
+	if (!mlx4_is_slave(dev))
+		mlx4_free_ownership(dev);
+
+	return err;
+}
+
+static int __mlx4_init_one(struct pci_dev *pdev, int pci_dev_data,
+			   struct mlx4_priv *priv)
+{
+	int err;
+	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+	int prb_vf[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+	const int param_map[MLX4_MAX_PORTS + 1][MLX4_MAX_PORTS + 1] = {
+		{2, 0, 0}, {0, 1, 2}, {0, 1, 2} };
+	unsigned total_vfs = 0;
+	unsigned int i;
+
+	pr_info(DRV_NAME ": Initializing %s\n", pci_name(pdev));
+
+	err = pci_enable_device(pdev);
+	if (err) {
+		dev_err(&pdev->dev, "Cannot enable PCI device, aborting\n");
+		return err;
+	}
+
+	/* Due to requirement that all VFs and the PF are *guaranteed* 2 MACS
+	 * per port, we must limit the number of VFs to 63 (since their are
+	 * 128 MACs)
+	 */
+	for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) && i < num_vfs_argc;
+	     total_vfs += nvfs[param_map[num_vfs_argc - 1][i]], i++) {
+		nvfs[param_map[num_vfs_argc - 1][i]] = num_vfs[i];
+		if (nvfs[i] < 0) {
+			dev_err(&pdev->dev, "num_vfs module parameter cannot be negative\n");
+			err = -EINVAL;
+			goto err_disable_pdev;
+		}
+	}
+	for (i = 0; i < sizeof(prb_vf)/sizeof(prb_vf[0]) && i < probe_vfs_argc;
+	     i++) {
+		prb_vf[param_map[probe_vfs_argc - 1][i]] = probe_vf[i];
+		if (prb_vf[i] < 0 || prb_vf[i] > nvfs[i]) {
+			dev_err(&pdev->dev, "probe_vf module parameter cannot be negative or greater than num_vfs\n");
+			err = -EINVAL;
+			goto err_disable_pdev;
+		}
+	}
+	if (total_vfs >= MLX4_MAX_NUM_VF) {
+		dev_err(&pdev->dev,
+			"Requested more VF's (%d) than allowed (%d)\n",
+			total_vfs, MLX4_MAX_NUM_VF - 1);
+		err = -EINVAL;
+		goto err_disable_pdev;
+	}
+
+	for (i = 0; i < MLX4_MAX_PORTS; i++) {
+		if (nvfs[i] + nvfs[2] >= MLX4_MAX_NUM_VF_P_PORT) {
+			dev_err(&pdev->dev,
+				"Requested more VF's (%d) for port (%d) than allowed (%d)\n",
+				nvfs[i] + nvfs[2], i + 1,
+				MLX4_MAX_NUM_VF_P_PORT - 1);
+			err = -EINVAL;
+			goto err_disable_pdev;
+		}
+	}
+
+	/* Check for BARs. */
+	if (!(pci_dev_data & MLX4_PCI_DEV_IS_VF) &&
+	    !(pci_resource_flags(pdev, 0) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Missing DCS, aborting (driver_data: 0x%x, pci_resource_flags(pdev, 0):0x%lx)\n",
+			pci_dev_data, pci_resource_flags(pdev, 0));
+		err = -ENODEV;
+		goto err_disable_pdev;
+	}
+	if (!(pci_resource_flags(pdev, 2) & IORESOURCE_MEM)) {
+		dev_err(&pdev->dev, "Missing UAR, aborting\n");
+		err = -ENODEV;
+		goto err_disable_pdev;
+	}
+
+	err = pci_request_regions(pdev, DRV_NAME);
+	if (err) {
+		dev_err(&pdev->dev, "Couldn't get PCI resources, aborting\n");
+		goto err_disable_pdev;
+	}
+
+	pci_set_master(pdev);
+
+	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit PCI DMA mask\n");
+		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "Can't set PCI DMA mask, aborting\n");
+			goto err_release_regions;
+		}
+	}
+	err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
+	if (err) {
+		dev_warn(&pdev->dev, "Warning: couldn't set 64-bit consistent PCI DMA mask\n");
+		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(32));
+		if (err) {
+			dev_err(&pdev->dev, "Can't set consistent PCI DMA mask, aborting\n");
+			goto err_release_regions;
+		}
+	}
+
+	/* Allow large DMA segments, up to the firmware limit of 1 GB */
+	dma_set_max_seg_size(&pdev->dev, 1024 * 1024 * 1024);
+	/* Detect if this device is a virtual function */
+	if (pci_dev_data & MLX4_PCI_DEV_IS_VF) {
+		/* When acting as pf, we normally skip vfs unless explicitly
+		 * requested to probe them.
+		 */
+		if (total_vfs) {
+			unsigned vfs_offset = 0;
+
+			for (i = 0; i < sizeof(nvfs)/sizeof(nvfs[0]) &&
+			     vfs_offset + nvfs[i] < extended_func_num(pdev);
+			     vfs_offset += nvfs[i], i++)
+				;
+			if (i == sizeof(nvfs)/sizeof(nvfs[0])) {
+				err = -ENODEV;
+				goto err_release_regions;
+			}
+			if ((extended_func_num(pdev) - vfs_offset)
+			    > prb_vf[i]) {
+				dev_warn(&pdev->dev, "Skipping virtual function:%d\n",
+					 extended_func_num(pdev));
+				err = -ENODEV;
+				goto err_release_regions;
+			}
+		}
+	}
+
+	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+	if (err)
+		goto err_release_regions;
+	return 0;
 
 err_release_regions:
 	pci_release_regions(pdev);
@@ -2682,6 +2710,7 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 {
 	struct mlx4_priv *priv;
 	struct mlx4_dev *dev;
+	int ret;
 
 	printk_once(KERN_INFO "%s", mlx4_version);
 
@@ -2690,13 +2719,18 @@ static int mlx4_init_one(struct pci_dev *pdev, const struct pci_device_id *id)
 		return -ENOMEM;
 
 	dev       = &priv->dev;
+	dev->pdev = pdev;
 	pci_set_drvdata(pdev, dev);
 	priv->pci_dev_data = id->driver_data;
 
-	return __mlx4_init_one(pdev, id->driver_data);
+	ret =  __mlx4_init_one(pdev, id->driver_data, priv);
+	if (ret)
+		kfree(priv);
+
+	return ret;
 }
 
-static void __mlx4_remove_one(struct pci_dev *pdev)
+static void mlx4_unload_one(struct pci_dev *pdev)
 {
 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -2775,8 +2809,6 @@ static void __mlx4_remove_one(struct pci_dev *pdev)
 	kfree(dev->caps.qp1_proxy);
 	kfree(dev->dev_vfs);
 
-	pci_release_regions(pdev);
-	pci_disable_device(pdev);
 	memset(priv, 0, sizeof(*priv));
 	priv->pci_dev_data = pci_dev_data;
 	priv->removed = 1;
@@ -2787,7 +2819,9 @@ static void mlx4_remove_one(struct pci_dev *pdev)
 	struct mlx4_dev  *dev  = pci_get_drvdata(pdev);
 	struct mlx4_priv *priv = mlx4_priv(dev);
 
-	__mlx4_remove_one(pdev);
+	mlx4_unload_one(pdev);
+	pci_release_regions(pdev);
+	pci_disable_device(pdev);
 	kfree(priv);
 	pci_set_drvdata(pdev, NULL);
 }
@@ -2796,11 +2830,22 @@ int mlx4_restart_one(struct pci_dev *pdev)
 {
 	struct mlx4_dev	 *dev  = pci_get_drvdata(pdev);
 	struct mlx4_priv *priv = mlx4_priv(dev);
-	int		  pci_dev_data;
+	int nvfs[MLX4_MAX_PORTS + 1] = {0, 0, 0};
+	int pci_dev_data, err, total_vfs;
 
 	pci_dev_data = priv->pci_dev_data;
-	__mlx4_remove_one(pdev);
-	return __mlx4_init_one(pdev, pci_dev_data);
+	total_vfs = dev->num_vfs;
+	memcpy(nvfs, dev->nvfs, sizeof(dev->nvfs));
+
+	mlx4_unload_one(pdev);
+	err = mlx4_load_one(pdev, pci_dev_data, total_vfs, nvfs, priv);
+	if (err) {
+		mlx4_err(dev, "%s: ERROR: mlx4_load_one failed, pci_name=%s, err=%d\n",
+			 __func__, pci_name(pdev), err);
+		return err;
+	}
+
+	return err;
 }
 
 static const struct pci_device_id mlx4_pci_table[] = {
@@ -2854,7 +2899,7 @@ MODULE_DEVICE_TABLE(pci, mlx4_pci_table);
 static pci_ers_result_t mlx4_pci_err_detected(struct pci_dev *pdev,
 					      pci_channel_state_t state)
 {
-	__mlx4_remove_one(pdev);
+	mlx4_unload_one(pdev);
 
 	return state == pci_channel_io_perm_failure ?
 		PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_NEED_RESET;
@@ -2866,7 +2911,7 @@ static pci_ers_result_t mlx4_pci_slot_reset(struct pci_dev *pdev)
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	int               ret;
 
-	ret = __mlx4_init_one(pdev, priv->pci_dev_data);
+	ret = __mlx4_init_one(pdev, priv->pci_dev_data, priv);
 
 	return ret ? PCI_ERS_RESULT_DISCONNECT : PCI_ERS_RESULT_RECOVERED;
 }
@@ -2880,7 +2925,7 @@ static struct pci_driver mlx4_driver = {
 	.name		= DRV_NAME,
 	.id_table	= mlx4_pci_table,
 	.probe		= mlx4_init_one,
-	.shutdown	= __mlx4_remove_one,
+	.shutdown	= mlx4_unload_one,
 	.remove		= mlx4_remove_one,
 	.err_handler    = &mlx4_err_handler,
 };
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 03b5608a4329..b2f8ab9a57c4 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -707,6 +707,7 @@ struct mlx4_dev {
 	u64			regid_promisc_array[MLX4_MAX_PORTS + 1];
 	u64			regid_allmulti_array[MLX4_MAX_PORTS + 1];
 	struct mlx4_vf_dev     *dev_vfs;
+	int                     nvfs[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_eqe {
-- 
cgit v1.2.3


From c611529e7cd3465ec0eada0f44200e8420c38908 Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Fri, 26 Sep 2014 19:20:08 -0400
Subject: sd: Honor block layer integrity handling flags

A set of flags introduced in the block layer enable better control over
how protection information is handled. These flags are useful for both
error injection and data recovery purposes. Checking can be enabled and
disabled for controller and disk, and the guard tag format is now a
per-I/O property.

Update sd_protect_op to communicate the relevant information to the
low-level device driver via a set of flags in scsi_cmnd.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/scsi/sd.c        | 73 ++++++++++++++++++++++++++++--------------------
 drivers/scsi/sd.h        | 66 +++++++++++++++++++++++++++++++++++++++++--
 drivers/scsi/sd_dif.c    | 23 ++++++---------
 include/linux/bio.h      | 33 ++++++++++++++++------
 include/scsi/scsi_cmnd.h | 36 +++++++++++++++++-------
 5 files changed, 166 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 2c2041ca4b70..9f7099f4b537 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -610,29 +610,44 @@ static void scsi_disk_put(struct scsi_disk *sdkp)
 	mutex_unlock(&sd_ref_mutex);
 }
 
-static void sd_prot_op(struct scsi_cmnd *scmd, unsigned int dif)
-{
-	unsigned int prot_op = SCSI_PROT_NORMAL;
-	unsigned int dix = scsi_prot_sg_count(scmd);
-
-	if (scmd->sc_data_direction == DMA_FROM_DEVICE) {
-		if (dif && dix)
-			prot_op = SCSI_PROT_READ_PASS;
-		else if (dif && !dix)
-			prot_op = SCSI_PROT_READ_STRIP;
-		else if (!dif && dix)
-			prot_op = SCSI_PROT_READ_INSERT;
-	} else {
-		if (dif && dix)
-			prot_op = SCSI_PROT_WRITE_PASS;
-		else if (dif && !dix)
-			prot_op = SCSI_PROT_WRITE_INSERT;
-		else if (!dif && dix)
-			prot_op = SCSI_PROT_WRITE_STRIP;
+
+
+static unsigned char sd_setup_protect_cmnd(struct scsi_cmnd *scmd,
+					   unsigned int dix, unsigned int dif)
+{
+	struct bio *bio = scmd->request->bio;
+	unsigned int prot_op = sd_prot_op(rq_data_dir(scmd->request), dix, dif);
+	unsigned int protect = 0;
+
+	if (dix) {				/* DIX Type 0, 1, 2, 3 */
+		if (bio_integrity_flagged(bio, BIP_IP_CHECKSUM))
+			scmd->prot_flags |= SCSI_PROT_IP_CHECKSUM;
+
+		if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false)
+			scmd->prot_flags |= SCSI_PROT_GUARD_CHECK;
+	}
+
+	if (dif != SD_DIF_TYPE3_PROTECTION) {	/* DIX/DIF Type 0, 1, 2 */
+		scmd->prot_flags |= SCSI_PROT_REF_INCREMENT;
+
+		if (bio_integrity_flagged(bio, BIP_CTRL_NOCHECK) == false)
+			scmd->prot_flags |= SCSI_PROT_REF_CHECK;
+	}
+
+	if (dif) {				/* DIX/DIF Type 1, 2, 3 */
+		scmd->prot_flags |= SCSI_PROT_TRANSFER_PI;
+
+		if (bio_integrity_flagged(bio, BIP_DISK_NOCHECK))
+			protect = 3 << 5;	/* Disable target PI checking */
+		else
+			protect = 1 << 5;	/* Enable target PI checking */
 	}
 
 	scsi_set_prot_op(scmd, prot_op);
 	scsi_set_prot_type(scmd, dif);
+	scmd->prot_flags &= sd_prot_flag_mask(prot_op);
+
+	return protect;
 }
 
 static void sd_config_discard(struct scsi_disk *sdkp, unsigned int mode)
@@ -893,7 +908,8 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	sector_t block = blk_rq_pos(rq);
 	sector_t threshold;
 	unsigned int this_count = blk_rq_sectors(rq);
-	int ret, host_dif;
+	unsigned int dif, dix;
+	int ret;
 	unsigned char protect;
 
 	ret = scsi_init_io(SCpnt, GFP_ATOMIC);
@@ -995,7 +1011,7 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 		SCpnt->cmnd[0] = WRITE_6;
 
 		if (blk_integrity_rq(rq))
-			sd_dif_prepare(rq, block, sdp->sector_size);
+			sd_dif_prepare(SCpnt);
 
 	} else if (rq_data_dir(rq) == READ) {
 		SCpnt->cmnd[0] = READ_6;
@@ -1010,14 +1026,15 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 					"writing" : "reading", this_count,
 					blk_rq_sectors(rq)));
 
-	/* Set RDPROTECT/WRPROTECT if disk is formatted with DIF */
-	host_dif = scsi_host_dif_capable(sdp->host, sdkp->protection_type);
-	if (host_dif)
-		protect = 1 << 5;
+	dix = scsi_prot_sg_count(SCpnt);
+	dif = scsi_host_dif_capable(SCpnt->device->host, sdkp->protection_type);
+
+	if (dif || dix)
+		protect = sd_setup_protect_cmnd(SCpnt, dix, dif);
 	else
 		protect = 0;
 
-	if (host_dif == SD_DIF_TYPE2_PROTECTION) {
+	if (protect && sdkp->protection_type == SD_DIF_TYPE2_PROTECTION) {
 		SCpnt->cmnd = mempool_alloc(sd_cdb_pool, GFP_ATOMIC);
 
 		if (unlikely(SCpnt->cmnd == NULL)) {
@@ -1102,10 +1119,6 @@ static int sd_setup_read_write_cmnd(struct scsi_cmnd *SCpnt)
 	}
 	SCpnt->sdb.length = this_count * sdp->sector_size;
 
-	/* If DIF or DIX is enabled, tell HBA how to handle request */
-	if (host_dif || scsi_prot_sg_count(SCpnt))
-		sd_prot_op(SCpnt, host_dif);
-
 	/*
 	 * We shouldn't disconnect in the middle of a sector, so with a dumb
 	 * host adapter, it's safe to assume that we can at least transfer
diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h
index 4c3ab8377fd3..467377884b63 100644
--- a/drivers/scsi/sd.h
+++ b/drivers/scsi/sd.h
@@ -166,6 +166,68 @@ enum sd_dif_target_protection_types {
 	SD_DIF_TYPE3_PROTECTION = 0x3,
 };
 
+/*
+ * Look up the DIX operation based on whether the command is read or
+ * write and whether dix and dif are enabled.
+ */
+static inline unsigned int sd_prot_op(bool write, bool dix, bool dif)
+{
+	/* Lookup table: bit 2 (write), bit 1 (dix), bit 0 (dif) */
+	const unsigned int ops[] = {	/* wrt dix dif */
+		SCSI_PROT_NORMAL,	/*  0	0   0  */
+		SCSI_PROT_READ_STRIP,	/*  0	0   1  */
+		SCSI_PROT_READ_INSERT,	/*  0	1   0  */
+		SCSI_PROT_READ_PASS,	/*  0	1   1  */
+		SCSI_PROT_NORMAL,	/*  1	0   0  */
+		SCSI_PROT_WRITE_INSERT, /*  1	0   1  */
+		SCSI_PROT_WRITE_STRIP,	/*  1	1   0  */
+		SCSI_PROT_WRITE_PASS,	/*  1	1   1  */
+	};
+
+	return ops[write << 2 | dix << 1 | dif];
+}
+
+/*
+ * Returns a mask of the protection flags that are valid for a given DIX
+ * operation.
+ */
+static inline unsigned int sd_prot_flag_mask(unsigned int prot_op)
+{
+	const unsigned int flag_mask[] = {
+		[SCSI_PROT_NORMAL]		= 0,
+
+		[SCSI_PROT_READ_STRIP]		= SCSI_PROT_TRANSFER_PI |
+						  SCSI_PROT_GUARD_CHECK |
+						  SCSI_PROT_REF_CHECK |
+						  SCSI_PROT_REF_INCREMENT,
+
+		[SCSI_PROT_READ_INSERT]		= SCSI_PROT_REF_INCREMENT |
+						  SCSI_PROT_IP_CHECKSUM,
+
+		[SCSI_PROT_READ_PASS]		= SCSI_PROT_TRANSFER_PI |
+						  SCSI_PROT_GUARD_CHECK |
+						  SCSI_PROT_REF_CHECK |
+						  SCSI_PROT_REF_INCREMENT |
+						  SCSI_PROT_IP_CHECKSUM,
+
+		[SCSI_PROT_WRITE_INSERT]	= SCSI_PROT_TRANSFER_PI |
+						  SCSI_PROT_REF_INCREMENT,
+
+		[SCSI_PROT_WRITE_STRIP]		= SCSI_PROT_GUARD_CHECK |
+						  SCSI_PROT_REF_CHECK |
+						  SCSI_PROT_REF_INCREMENT |
+						  SCSI_PROT_IP_CHECKSUM,
+
+		[SCSI_PROT_WRITE_PASS]		= SCSI_PROT_TRANSFER_PI |
+						  SCSI_PROT_GUARD_CHECK |
+						  SCSI_PROT_REF_CHECK |
+						  SCSI_PROT_REF_INCREMENT |
+						  SCSI_PROT_IP_CHECKSUM,
+	};
+
+	return flag_mask[prot_op];
+}
+
 /*
  * Data Integrity Field tuple.
  */
@@ -178,7 +240,7 @@ struct sd_dif_tuple {
 #ifdef CONFIG_BLK_DEV_INTEGRITY
 
 extern void sd_dif_config_host(struct scsi_disk *);
-extern void sd_dif_prepare(struct request *rq, sector_t, unsigned int);
+extern void sd_dif_prepare(struct scsi_cmnd *scmd);
 extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
@@ -186,7 +248,7 @@ extern void sd_dif_complete(struct scsi_cmnd *, unsigned int);
 static inline void sd_dif_config_host(struct scsi_disk *disk)
 {
 }
-static inline int sd_dif_prepare(struct request *rq, sector_t s, unsigned int a)
+static inline int sd_dif_prepare(struct scsi_cmnd *scmd)
 {
 	return 0;
 }
diff --git a/drivers/scsi/sd_dif.c b/drivers/scsi/sd_dif.c
index b7eaeadc18f9..14c7d42a11c2 100644
--- a/drivers/scsi/sd_dif.c
+++ b/drivers/scsi/sd_dif.c
@@ -106,8 +106,7 @@ void sd_dif_config_host(struct scsi_disk *sdkp)
  *
  * Type 3 does not have a reference tag so no remapping is required.
  */
-void sd_dif_prepare(struct request *rq, sector_t hw_sector,
-		    unsigned int sector_sz)
+void sd_dif_prepare(struct scsi_cmnd *scmd)
 {
 	const int tuple_sz = sizeof(struct t10_pi_tuple);
 	struct bio *bio;
@@ -115,14 +114,14 @@ void sd_dif_prepare(struct request *rq, sector_t hw_sector,
 	struct t10_pi_tuple *pi;
 	u32 phys, virt;
 
-	sdkp = rq->bio->bi_bdev->bd_disk->private_data;
+	sdkp = scsi_disk(scmd->request->rq_disk);
 
 	if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION)
 		return;
 
-	phys = hw_sector & 0xffffffff;
+	phys = scsi_prot_ref_tag(scmd);
 
-	__rq_for_each_bio(bio, rq) {
+	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
 		struct bio_vec iv;
 		struct bvec_iter iter;
@@ -163,7 +162,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 	struct scsi_disk *sdkp;
 	struct bio *bio;
 	struct t10_pi_tuple *pi;
-	unsigned int j, sectors, sector_sz;
+	unsigned int j, intervals;
 	u32 phys, virt;
 
 	sdkp = scsi_disk(scmd->request->rq_disk);
@@ -171,12 +170,8 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 	if (sdkp->protection_type == SD_DIF_TYPE3_PROTECTION || good_bytes == 0)
 		return;
 
-	sector_sz = scmd->device->sector_size;
-	sectors = good_bytes / sector_sz;
-
-	phys = blk_rq_pos(scmd->request) & 0xffffffff;
-	if (sector_sz == 4096)
-		phys >>= 3;
+	intervals = good_bytes / scsi_prot_interval(scmd);
+	phys = scsi_prot_ref_tag(scmd);
 
 	__rq_for_each_bio(bio, scmd->request) {
 		struct bio_integrity_payload *bip = bio_integrity(bio);
@@ -190,7 +185,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 
 			for (j = 0; j < iv.bv_len; j += tuple_sz, pi++) {
 
-				if (sectors == 0) {
+				if (intervals == 0) {
 					kunmap_atomic(pi);
 					return;
 				}
@@ -200,7 +195,7 @@ void sd_dif_complete(struct scsi_cmnd *scmd, unsigned int good_bytes)
 
 				virt++;
 				phys++;
-				sectors--;
+				intervals--;
 			}
 
 			kunmap_atomic(pi);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 14bff3fe56d4..ce6b75964b71 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -292,6 +292,14 @@ static inline unsigned bio_segments(struct bio *bio)
  */
 #define bio_get(bio)	atomic_inc(&(bio)->bi_cnt)
 
+enum bip_flags {
+	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
+	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
+	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
+	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
+	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
+};
+
 #if defined(CONFIG_BLK_DEV_INTEGRITY)
 
 static inline struct bio_integrity_payload *bio_integrity(struct bio *bio)
@@ -323,13 +331,15 @@ struct bio_integrity_payload {
 	struct bio_vec		bip_inline_vecs[0];/* embedded bvec array */
 };
 
-enum bip_flags {
-	BIP_BLOCK_INTEGRITY	= 1 << 0, /* block layer owns integrity data */
-	BIP_MAPPED_INTEGRITY	= 1 << 1, /* ref tag has been remapped */
-	BIP_CTRL_NOCHECK	= 1 << 2, /* disable HBA integrity checking */
-	BIP_DISK_NOCHECK	= 1 << 3, /* disable disk integrity checking */
-	BIP_IP_CHECKSUM		= 1 << 4, /* IP checksum */
-};
+static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
+{
+	struct bio_integrity_payload *bip = bio_integrity(bio);
+
+	if (bip)
+		return bip->bip_flags & flag;
+
+	return false;
+}
 
 static inline sector_t bip_get_seed(struct bio_integrity_payload *bip)
 {
@@ -701,9 +711,9 @@ extern void bio_integrity_init(void);
 
 #else /* CONFIG_BLK_DEV_INTEGRITY */
 
-static inline int bio_integrity(struct bio *bio)
+static inline void *bio_integrity(struct bio *bio)
 {
-	return 0;
+	return NULL;
 }
 
 static inline bool bio_integrity_enabled(struct bio *bio)
@@ -754,6 +764,11 @@ static inline void bio_integrity_init(void)
 	return;
 }
 
+static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag)
+{
+	return false;
+}
+
 #endif /* CONFIG_BLK_DEV_INTEGRITY */
 
 #endif /* CONFIG_BLOCK */
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 73f349044941..522a5f27f553 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -10,9 +10,10 @@
 #include <scsi/scsi_device.h>
 
 struct Scsi_Host;
-struct scsi_device;
 struct scsi_driver;
 
+#include <scsi/scsi_device.h>
+
 /*
  * MAX_COMMAND_SIZE is:
  * The longest fixed-length SCSI CDB as per the SCSI standard.
@@ -81,6 +82,7 @@ struct scsi_cmnd {
 
 	unsigned char prot_op;
 	unsigned char prot_type;
+	unsigned char prot_flags;
 
 	unsigned short cmd_len;
 	enum dma_data_direction sc_data_direction;
@@ -252,6 +254,14 @@ static inline unsigned char scsi_get_prot_op(struct scsi_cmnd *scmd)
 	return scmd->prot_op;
 }
 
+enum scsi_prot_flags {
+	SCSI_PROT_TRANSFER_PI		= 1 << 0,
+	SCSI_PROT_GUARD_CHECK		= 1 << 1,
+	SCSI_PROT_REF_CHECK		= 1 << 2,
+	SCSI_PROT_REF_INCREMENT		= 1 << 3,
+	SCSI_PROT_IP_CHECKSUM		= 1 << 4,
+};
+
 /*
  * The controller usually does not know anything about the target it
  * is communicating with.  However, when DIX is enabled the controller
@@ -280,6 +290,17 @@ static inline sector_t scsi_get_lba(struct scsi_cmnd *scmd)
 	return blk_rq_pos(scmd->request);
 }
 
+static inline unsigned int scsi_prot_interval(struct scsi_cmnd *scmd)
+{
+	return scmd->device->sector_size;
+}
+
+static inline u32 scsi_prot_ref_tag(struct scsi_cmnd *scmd)
+{
+	return blk_rq_pos(scmd->request) >>
+		(ilog2(scsi_prot_interval(scmd)) - 9) & 0xffffffff;
+}
+
 static inline unsigned scsi_prot_sg_count(struct scsi_cmnd *cmd)
 {
 	return cmd->prot_sdb ? cmd->prot_sdb->table.nents : 0;
@@ -316,17 +337,12 @@ static inline void set_driver_byte(struct scsi_cmnd *cmd, char status)
 static inline unsigned scsi_transfer_length(struct scsi_cmnd *scmd)
 {
 	unsigned int xfer_len = scsi_out(scmd)->length;
-	unsigned int prot_op = scsi_get_prot_op(scmd);
-	unsigned int sector_size = scmd->device->sector_size;
+	unsigned int prot_interval = scsi_prot_interval(scmd);
 
-	switch (prot_op) {
-	case SCSI_PROT_NORMAL:
-	case SCSI_PROT_WRITE_STRIP:
-	case SCSI_PROT_READ_INSERT:
-		return xfer_len;
-	}
+	if (scmd->prot_flags & SCSI_PROT_TRANSFER_PI)
+		xfer_len += (xfer_len >> ilog2(prot_interval)) * 8;
 
-	return xfer_len + (xfer_len >> ilog2(sector_size)) * 8;
+	return xfer_len;
 }
 
 #endif /* _SCSI_SCSI_CMND_H */
-- 
cgit v1.2.3


From 0b0b0893d49b34201a6c4416b1a707b580b91e3d Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:25 +0100
Subject: of/pci: Fix the conversion of IO ranges into IO resources

The ranges property for a host bridge controller in DT describes the
mapping between the PCI bus address and the CPU physical address.  The
resources framework however expects that the IO resources start at a pseudo
"port" address 0 (zero) and have a maximum size of IO_SPACE_LIMIT.  The
conversion from PCI ranges to resources failed to take that into account,
returning a CPU physical address instead of a port number.

Also fix all the drivers that depend on the old behaviour by fetching the
CPU physical address based on the port number where it is being needed.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Thierry Reding <thierry.reding@gmail.com>
CC: Simon Horman <horms@verge.net.au>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 arch/arm/mach-integrator/pci_v3.c | 23 ++++++++++----------
 drivers/of/address.c              | 44 +++++++++++++++++++++++++++++++++++----
 drivers/pci/host/pci-tegra.c      | 10 ++++++---
 drivers/pci/host/pcie-rcar.c      | 21 +++++++++++++------
 include/linux/of_address.h        | 12 +++++------
 5 files changed, 80 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-integrator/pci_v3.c b/arch/arm/mach-integrator/pci_v3.c
index 05e1f73a1e8d..c186a17c2cff 100644
--- a/arch/arm/mach-integrator/pci_v3.c
+++ b/arch/arm/mach-integrator/pci_v3.c
@@ -660,6 +660,7 @@ static void __init pci_v3_preinit(void)
 {
 	unsigned long flags;
 	unsigned int temp;
+	phys_addr_t io_address = pci_pio_to_address(io_mem.start);
 
 	pcibios_min_mem = 0x00100000;
 
@@ -701,7 +702,7 @@ static void __init pci_v3_preinit(void)
 	/*
 	 * Setup window 2 - PCI IO
 	 */
-	v3_writel(V3_LB_BASE2, v3_addr_to_lb_base2(io_mem.start) |
+	v3_writel(V3_LB_BASE2, v3_addr_to_lb_base2(io_address) |
 			V3_LB_BASE_ENABLE);
 	v3_writew(V3_LB_MAP2, v3_addr_to_lb_map2(0));
 
@@ -742,6 +743,7 @@ static void __init pci_v3_preinit(void)
 static void __init pci_v3_postinit(void)
 {
 	unsigned int pci_cmd;
+	phys_addr_t io_address = pci_pio_to_address(io_mem.start);
 
 	pci_cmd = PCI_COMMAND_MEMORY |
 		  PCI_COMMAND_MASTER | PCI_COMMAND_INVALIDATE;
@@ -758,7 +760,7 @@ static void __init pci_v3_postinit(void)
 		       "interrupt: %d\n", ret);
 #endif
 
-	register_isa_ports(non_mem.start, io_mem.start, 0);
+	register_isa_ports(non_mem.start, io_address, 0);
 }
 
 /*
@@ -867,33 +869,32 @@ static int __init pci_v3_probe(struct platform_device *pdev)
 
 	for_each_of_pci_range(&parser, &range) {
 		if (!range.flags) {
-			of_pci_range_to_resource(&range, np, &conf_mem);
+			ret = of_pci_range_to_resource(&range, np, &conf_mem);
 			conf_mem.name = "PCIv3 config";
 		}
 		if (range.flags & IORESOURCE_IO) {
-			of_pci_range_to_resource(&range, np, &io_mem);
+			ret = of_pci_range_to_resource(&range, np, &io_mem);
 			io_mem.name = "PCIv3 I/O";
 		}
 		if ((range.flags & IORESOURCE_MEM) &&
 			!(range.flags & IORESOURCE_PREFETCH)) {
 			non_mem_pci = range.pci_addr;
 			non_mem_pci_sz = range.size;
-			of_pci_range_to_resource(&range, np, &non_mem);
+			ret = of_pci_range_to_resource(&range, np, &non_mem);
 			non_mem.name = "PCIv3 non-prefetched mem";
 		}
 		if ((range.flags & IORESOURCE_MEM) &&
 			(range.flags & IORESOURCE_PREFETCH)) {
 			pre_mem_pci = range.pci_addr;
 			pre_mem_pci_sz = range.size;
-			of_pci_range_to_resource(&range, np, &pre_mem);
+			ret = of_pci_range_to_resource(&range, np, &pre_mem);
 			pre_mem.name = "PCIv3 prefetched mem";
 		}
-	}
 
-	if (!conf_mem.start || !io_mem.start ||
-	    !non_mem.start || !pre_mem.start) {
-		dev_err(&pdev->dev, "missing ranges in device node\n");
-		return -EINVAL;
+		if (ret < 0) {
+			dev_err(&pdev->dev, "missing ranges in device node\n");
+			return ret;
+		}
 	}
 
 	pci_v3.map_irq = of_irq_parse_and_map_pci;
diff --git a/drivers/of/address.c b/drivers/of/address.c
index 327a57410797..afdb78299f61 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -295,14 +295,50 @@ struct of_pci_range *of_pci_range_parser_one(struct of_pci_range_parser *parser,
 }
 EXPORT_SYMBOL_GPL(of_pci_range_parser_one);
 
-void of_pci_range_to_resource(struct of_pci_range *range,
-			      struct device_node *np, struct resource *res)
+/*
+ * of_pci_range_to_resource - Create a resource from an of_pci_range
+ * @range:	the PCI range that describes the resource
+ * @np:		device node where the range belongs to
+ * @res:	pointer to a valid resource that will be updated to
+ *              reflect the values contained in the range.
+ *
+ * Returns EINVAL if the range cannot be converted to resource.
+ *
+ * Note that if the range is an IO range, the resource will be converted
+ * using pci_address_to_pio() which can fail if it is called too early or
+ * if the range cannot be matched to any host bridge IO space (our case here).
+ * To guard against that we try to register the IO range first.
+ * If that fails we know that pci_address_to_pio() will do too.
+ */
+int of_pci_range_to_resource(struct of_pci_range *range,
+			     struct device_node *np, struct resource *res)
 {
+	int err;
 	res->flags = range->flags;
-	res->start = range->cpu_addr;
-	res->end = range->cpu_addr + range->size - 1;
 	res->parent = res->child = res->sibling = NULL;
 	res->name = np->full_name;
+
+	if (res->flags & IORESOURCE_IO) {
+		unsigned long port;
+		err = pci_register_io_range(range->cpu_addr, range->size);
+		if (err)
+			goto invalid_range;
+		port = pci_address_to_pio(range->cpu_addr);
+		if (port == (unsigned long)-1) {
+			err = -EINVAL;
+			goto invalid_range;
+		}
+		res->start = port;
+	} else {
+		res->start = range->cpu_addr;
+	}
+	res->end = res->start + range->size - 1;
+	return 0;
+
+invalid_range:
+	res->start = (resource_size_t)OF_BAD_ADDR;
+	res->end = (resource_size_t)OF_BAD_ADDR;
+	return err;
 }
 #endif /* CONFIG_PCI */
 
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 0fb0fdb223d5..946935db62b6 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -626,13 +626,14 @@ DECLARE_PCI_FIXUP_FINAL(PCI_ANY_ID, PCI_ANY_ID, tegra_pcie_relax_enable);
 static int tegra_pcie_setup(int nr, struct pci_sys_data *sys)
 {
 	struct tegra_pcie *pcie = sys_to_pcie(sys);
+	phys_addr_t io_start = pci_pio_to_address(pcie->io.start);
 
 	pci_add_resource_offset(&sys->resources, &pcie->mem, sys->mem_offset);
 	pci_add_resource_offset(&sys->resources, &pcie->prefetch,
 				sys->mem_offset);
 	pci_add_resource(&sys->resources, &pcie->busn);
 
-	pci_ioremap_io(nr * SZ_64K, pcie->io.start);
+	pci_ioremap_io(nr * SZ_64K, io_start);
 
 	return 1;
 }
@@ -737,6 +738,7 @@ static irqreturn_t tegra_pcie_isr(int irq, void *arg)
 static void tegra_pcie_setup_translations(struct tegra_pcie *pcie)
 {
 	u32 fpci_bar, size, axi_address;
+	phys_addr_t io_start = pci_pio_to_address(pcie->io.start);
 
 	/* Bar 0: type 1 extended configuration space */
 	fpci_bar = 0xfe100000;
@@ -749,7 +751,7 @@ static void tegra_pcie_setup_translations(struct tegra_pcie *pcie)
 	/* Bar 1: downstream IO bar */
 	fpci_bar = 0xfdfc0000;
 	size = resource_size(&pcie->io);
-	axi_address = pcie->io.start;
+	axi_address = io_start;
 	afi_writel(pcie, axi_address, AFI_AXI_BAR1_START);
 	afi_writel(pcie, size >> 12, AFI_AXI_BAR1_SZ);
 	afi_writel(pcie, fpci_bar, AFI_FPCI_BAR1);
@@ -1520,7 +1522,9 @@ static int tegra_pcie_parse_dt(struct tegra_pcie *pcie)
 	}
 
 	for_each_of_pci_range(&parser, &range) {
-		of_pci_range_to_resource(&range, np, &res);
+		err = of_pci_range_to_resource(&range, np, &res);
+		if (err < 0)
+			return err;
 
 		switch (res.flags & IORESOURCE_TYPE_BITS) {
 		case IORESOURCE_IO:
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index 4884ee5e07d4..61158e03ab5f 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -323,6 +323,7 @@ static void rcar_pcie_setup_window(int win, struct rcar_pcie *pcie)
 
 	/* Setup PCIe address space mappings for each resource */
 	resource_size_t size;
+	resource_size_t res_start;
 	u32 mask;
 
 	rcar_pci_write_reg(pcie, 0x00000000, PCIEPTCTLR(win));
@@ -335,8 +336,13 @@ static void rcar_pcie_setup_window(int win, struct rcar_pcie *pcie)
 	mask = (roundup_pow_of_two(size) / SZ_128) - 1;
 	rcar_pci_write_reg(pcie, mask << 7, PCIEPAMR(win));
 
-	rcar_pci_write_reg(pcie, upper_32_bits(res->start), PCIEPARH(win));
-	rcar_pci_write_reg(pcie, lower_32_bits(res->start), PCIEPARL(win));
+	if (res->flags & IORESOURCE_IO)
+		res_start = pci_pio_to_address(res->start);
+	else
+		res_start = res->start;
+
+	rcar_pci_write_reg(pcie, upper_32_bits(res_start), PCIEPARH(win));
+	rcar_pci_write_reg(pcie, lower_32_bits(res_start), PCIEPARL(win));
 
 	/* First resource is for IO */
 	mask = PAR_ENABLE;
@@ -363,9 +369,10 @@ static int rcar_pcie_setup(int nr, struct pci_sys_data *sys)
 
 		rcar_pcie_setup_window(i, pcie);
 
-		if (res->flags & IORESOURCE_IO)
-			pci_ioremap_io(nr * SZ_64K, res->start);
-		else
+		if (res->flags & IORESOURCE_IO) {
+			phys_addr_t io_start = pci_pio_to_address(res->start);
+			pci_ioremap_io(nr * SZ_64K, io_start);
+		} else
 			pci_add_resource(&sys->resources, res);
 	}
 	pci_add_resource(&sys->resources, &pcie->busn);
@@ -935,8 +942,10 @@ static int rcar_pcie_probe(struct platform_device *pdev)
 	}
 
 	for_each_of_pci_range(&parser, &range) {
-		of_pci_range_to_resource(&range, pdev->dev.of_node,
+		err = of_pci_range_to_resource(&range, pdev->dev.of_node,
 						&pcie->res[win++]);
+		if (err < 0)
+			return err;
 
 		if (win > RCAR_PCI_MAX_RESOURCES)
 			break;
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index a38e1c846c23..8cb14eb393d6 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -134,9 +134,9 @@ extern const __be32 *of_get_pci_address(struct device_node *dev, int bar_no,
 			       u64 *size, unsigned int *flags);
 extern int of_pci_address_to_resource(struct device_node *dev, int bar,
 				      struct resource *r);
-extern void of_pci_range_to_resource(struct of_pci_range *range,
-				     struct device_node *np,
-				     struct resource *res);
+extern int of_pci_range_to_resource(struct of_pci_range *range,
+				    struct device_node *np,
+				    struct resource *res);
 #else /* CONFIG_OF_ADDRESS && CONFIG_PCI */
 static inline int of_pci_address_to_resource(struct device_node *dev, int bar,
 				             struct resource *r)
@@ -149,9 +149,9 @@ static inline const __be32 *of_get_pci_address(struct device_node *dev,
 {
 	return NULL;
 }
-static inline void of_pci_range_to_resource(struct of_pci_range *range,
-					    struct device_node *np,
-					    struct resource *res)
+static inline int of_pci_range_to_resource(struct of_pci_range *range,
+					   struct device_node *np,
+					   struct resource *res)
 {
 	return -ENOSYS;
 }
-- 
cgit v1.2.3


From 670ba0c8883b576d0aec28bd7a838358a4be1406 Mon Sep 17 00:00:00 2001
From: Catalin Marinas <catalin.marinas@arm.com>
Date: Mon, 29 Sep 2014 15:29:26 +0100
Subject: PCI: Add generic domain handling

The handling of PCI domains (or PCI segments in ACPI speak) is usually a
straightforward affair but its implementation is currently left to the
architectural code, with pci_domain_nr(b) querying the value of the domain
associated with bus b.

This patch introduces CONFIG_PCI_DOMAINS_GENERIC as an option that can be
selected if an architecture wants a simple implementation where the value
of the domain associated with a bus is stored in struct pci_bus.

The architectures that select CONFIG_PCI_DOMAINS_GENERIC will then have to
implement pci_bus_assign_domain_nr() as a way of setting the domain number
associated with a root bus.  All child buses except the root bus will
inherit the domain_nr value from their parent.

Signed-off-by: Catalin Marinas <Catalin.Marinas@arm.com>
[Renamed pci_set_domain_nr() to pci_bus_assign_domain_nr()]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
---
 drivers/pci/probe.c | 11 ++++++++---
 include/linux/pci.h | 21 +++++++++++++++++++++
 2 files changed, 29 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/probe.c b/drivers/pci/probe.c
index e3cf8a2e6292..636d1c9156a9 100644
--- a/drivers/pci/probe.c
+++ b/drivers/pci/probe.c
@@ -485,7 +485,7 @@ void pci_read_bridge_bases(struct pci_bus *child)
 	}
 }
 
-static struct pci_bus *pci_alloc_bus(void)
+static struct pci_bus *pci_alloc_bus(struct pci_bus *parent)
 {
 	struct pci_bus *b;
 
@@ -500,6 +500,10 @@ static struct pci_bus *pci_alloc_bus(void)
 	INIT_LIST_HEAD(&b->resources);
 	b->max_bus_speed = PCI_SPEED_UNKNOWN;
 	b->cur_bus_speed = PCI_SPEED_UNKNOWN;
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	if (parent)
+		b->domain_nr = parent->domain_nr;
+#endif
 	return b;
 }
 
@@ -671,7 +675,7 @@ static struct pci_bus *pci_alloc_child_bus(struct pci_bus *parent,
 	/*
 	 * Allocate a new bus, and inherit stuff from the parent..
 	 */
-	child = pci_alloc_bus();
+	child = pci_alloc_bus(parent);
 	if (!child)
 		return NULL;
 
@@ -1761,13 +1765,14 @@ struct pci_bus *pci_create_root_bus(struct device *parent, int bus,
 	char bus_addr[64];
 	char *fmt;
 
-	b = pci_alloc_bus();
+	b = pci_alloc_bus(NULL);
 	if (!b)
 		return NULL;
 
 	b->sysdata = sysdata;
 	b->ops = ops;
 	b->number = b->busn_res.start = bus;
+	pci_bus_assign_domain_nr(b, parent);
 	b2 = pci_find_bus(pci_domain_nr(b), bus);
 	if (b2) {
 		/* If we already got to this bus through a different bridge, ignore it */
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 61978a460841..a494e5d775c3 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -456,6 +456,9 @@ struct pci_bus {
 	unsigned char	primary;	/* number of primary bridge */
 	unsigned char	max_bus_speed;	/* enum pci_bus_speed */
 	unsigned char	cur_bus_speed;	/* enum pci_bus_speed */
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+	int		domain_nr;
+#endif
 
 	char		name[48];
 
@@ -1288,6 +1291,24 @@ static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
 static inline int pci_proc_domain(struct pci_bus *bus) { return 0; }
 #endif /* CONFIG_PCI_DOMAINS */
 
+/*
+ * Generic implementation for PCI domain support. If your
+ * architecture does not need custom management of PCI
+ * domains then this implementation will be used
+ */
+#ifdef CONFIG_PCI_DOMAINS_GENERIC
+static inline int pci_domain_nr(struct pci_bus *bus)
+{
+	return bus->domain_nr;
+}
+void pci_bus_assign_domain_nr(struct pci_bus *bus, struct device *parent);
+#else
+static inline void pci_bus_assign_domain_nr(struct pci_bus *bus,
+					struct device *parent)
+{
+}
+#endif
+
 /* some architectures require additional setup to direct VGA traffic */
 typedef int (*arch_set_vga_state_t)(struct pci_dev *pdev, bool decode,
 		      unsigned int command_bits, u32 flags);
-- 
cgit v1.2.3


From 41e5c0f81d3e676d671d96a0a1fafb27abfbd9d7 Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:27 +0100
Subject: of/pci: Add pci_get_new_domain_nr() and of_get_pci_domain_nr()

Add pci_get_new_domain_nr() to allocate a new domain number and
of_get_pci_domain_nr() to retrieve the PCI domain number of a given device
from DT.  Host bridge drivers or architecture-specific code can choose to
implement their PCI domain number policy using these two functions.

Using of_get_pci_domain_nr() guarantees a stable PCI domain number on every
boot provided that all host bridge controllers are assigned a number in the
device tree using "linux,pci-domain" property.  Mixing use of
pci_get_new_domain_nr() and of_get_pci_domain_nr() is not recommended as it
can lead to potentially conflicting domain numbers being assigned to root
buses behind different host bridges.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/of/of_pci.c    | 25 +++++++++++++++++++++++++
 drivers/pci/pci.c      |  9 +++++++++
 include/linux/of_pci.h |  7 +++++++
 include/linux/pci.h    |  3 +++
 4 files changed, 44 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 848199633798..82d172fa145a 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -89,6 +89,31 @@ int of_pci_parse_bus_range(struct device_node *node, struct resource *res)
 }
 EXPORT_SYMBOL_GPL(of_pci_parse_bus_range);
 
+/**
+ * This function will try to obtain the host bridge domain number by
+ * finding a property called "linux,pci-domain" of the given device node.
+ *
+ * @node: device tree node with the domain information
+ *
+ * Returns the associated domain number from DT in the range [0-0xffff], or
+ * a negative value if the required property is not found.
+ */
+int of_get_pci_domain_nr(struct device_node *node)
+{
+	const __be32 *value;
+	int len;
+	u16 domain;
+
+	value = of_get_property(node, "linux,pci-domain", &len);
+	if (!value || len < sizeof(*value))
+		return -EINVAL;
+
+	domain = (u16)be32_to_cpup(value);
+
+	return domain;
+}
+EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
+
 #ifdef CONFIG_PCI_MSI
 
 static LIST_HEAD(of_pci_msi_chip_list);
diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 2c9ac70254e2..d36f35ff6c5d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -4406,6 +4406,15 @@ static void pci_no_domains(void)
 #endif
 }
 
+#ifdef CONFIG_PCI_DOMAINS
+static atomic_t __domain_nr = ATOMIC_INIT(-1);
+
+int pci_get_new_domain_nr(void)
+{
+	return atomic_inc_return(&__domain_nr);
+}
+#endif
+
 /**
  * pci_ext_cfg_avail - can we access extended PCI config space?
  *
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index dde3a4a0fa5d..71062e9602f5 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -15,6 +15,7 @@ struct device_node *of_pci_find_child_device(struct device_node *parent,
 int of_pci_get_devfn(struct device_node *np);
 int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin);
 int of_pci_parse_bus_range(struct device_node *node, struct resource *res);
+int of_get_pci_domain_nr(struct device_node *node);
 #else
 static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq)
 {
@@ -43,6 +44,12 @@ of_pci_parse_bus_range(struct device_node *node, struct resource *res)
 {
 	return -EINVAL;
 }
+
+static inline int
+of_get_pci_domain_nr(struct device_node *node)
+{
+	return -1;
+}
 #endif
 
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a494e5d775c3..150da2d644e7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1285,10 +1285,12 @@ void pci_cfg_access_unlock(struct pci_dev *dev);
  */
 #ifdef CONFIG_PCI_DOMAINS
 extern int pci_domains_supported;
+int pci_get_new_domain_nr(void);
 #else
 enum { pci_domains_supported = 0 };
 static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
 static inline int pci_proc_domain(struct pci_bus *bus) { return 0; }
+static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
 #endif /* CONFIG_PCI_DOMAINS */
 
 /*
@@ -1417,6 +1419,7 @@ static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus,
 
 static inline int pci_domain_nr(struct pci_bus *bus) { return 0; }
 static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) { return NULL; }
+static inline int pci_get_new_domain_nr(void) { return -ENOSYS; }
 
 #define dev_is_pci(d) (false)
 #define dev_is_pf(d) (false)
-- 
cgit v1.2.3


From cbe4097f8ae699ebbdaf8c95ecab38d47e0bd5da Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:28 +0100
Subject: of/pci: Add support for parsing PCI host bridge resources from DT

Provide a function to parse the PCI DT ranges that can be used to create a
pci_host_bridge structure together with its associated bus.

Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
[make io_base parameter optional]
Signed-off-by: Robert Richter <rrichter@cavium.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
CC: Arnd Bergmann <arnd@arndb.de>
CC: Grant Likely <grant.likely@linaro.org>
CC: Rob Herring <robh+dt@kernel.org>
CC: Catalin Marinas <catalin.marinas@arm.com>
---
 drivers/of/of_pci.c    | 117 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_pci.h |   6 +++
 2 files changed, 123 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 82d172fa145a..8882b467be95 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -1,7 +1,9 @@
 #include <linux/kernel.h>
 #include <linux/export.h>
 #include <linux/of.h>
+#include <linux/of_address.h>
 #include <linux/of_pci.h>
+#include <linux/slab.h>
 
 static inline int __of_pci_pci_compare(struct device_node *node,
 				       unsigned int data)
@@ -114,6 +116,121 @@ int of_get_pci_domain_nr(struct device_node *node)
 }
 EXPORT_SYMBOL_GPL(of_get_pci_domain_nr);
 
+#if defined(CONFIG_OF_ADDRESS)
+/**
+ * of_pci_get_host_bridge_resources - Parse PCI host bridge resources from DT
+ * @dev: device node of the host bridge having the range property
+ * @busno: bus number associated with the bridge root bus
+ * @bus_max: maximum number of buses for this bridge
+ * @resources: list where the range of resources will be added after DT parsing
+ * @io_base: pointer to a variable that will contain on return the physical
+ * address for the start of the I/O range. Can be NULL if the caller doesn't
+ * expect IO ranges to be present in the device tree.
+ *
+ * It is the caller's job to free the @resources list.
+ *
+ * This function will parse the "ranges" property of a PCI host bridge device
+ * node and setup the resource mapping based on its content. It is expected
+ * that the property conforms with the Power ePAPR document.
+ *
+ * It returns zero if the range parsing has been successful or a standard error
+ * value if it failed.
+ */
+int of_pci_get_host_bridge_resources(struct device_node *dev,
+			unsigned char busno, unsigned char bus_max,
+			struct list_head *resources, resource_size_t *io_base)
+{
+	struct resource *res;
+	struct resource *bus_range;
+	struct of_pci_range range;
+	struct of_pci_range_parser parser;
+	char range_type[4];
+	int err;
+
+	if (io_base)
+		*io_base = (resource_size_t)OF_BAD_ADDR;
+
+	bus_range = kzalloc(sizeof(*bus_range), GFP_KERNEL);
+	if (!bus_range)
+		return -ENOMEM;
+
+	pr_info("PCI host bridge %s ranges:\n", dev->full_name);
+
+	err = of_pci_parse_bus_range(dev, bus_range);
+	if (err) {
+		bus_range->start = busno;
+		bus_range->end = bus_max;
+		bus_range->flags = IORESOURCE_BUS;
+		pr_info("  No bus range found for %s, using %pR\n",
+			dev->full_name, bus_range);
+	} else {
+		if (bus_range->end > bus_range->start + bus_max)
+			bus_range->end = bus_range->start + bus_max;
+	}
+	pci_add_resource(resources, bus_range);
+
+	/* Check for ranges property */
+	err = of_pci_range_parser_init(&parser, dev);
+	if (err)
+		goto parse_failed;
+
+	pr_debug("Parsing ranges property...\n");
+	for_each_of_pci_range(&parser, &range) {
+		/* Read next ranges element */
+		if ((range.flags & IORESOURCE_TYPE_BITS) == IORESOURCE_IO)
+			snprintf(range_type, 4, " IO");
+		else if ((range.flags & IORESOURCE_TYPE_BITS) == IORESOURCE_MEM)
+			snprintf(range_type, 4, "MEM");
+		else
+			snprintf(range_type, 4, "err");
+		pr_info("  %s %#010llx..%#010llx -> %#010llx\n", range_type,
+			range.cpu_addr, range.cpu_addr + range.size - 1,
+			range.pci_addr);
+
+		/*
+		 * If we failed translation or got a zero-sized region
+		 * then skip this range
+		 */
+		if (range.cpu_addr == OF_BAD_ADDR || range.size == 0)
+			continue;
+
+		res = kzalloc(sizeof(struct resource), GFP_KERNEL);
+		if (!res) {
+			err = -ENOMEM;
+			goto parse_failed;
+		}
+
+		err = of_pci_range_to_resource(&range, dev, res);
+		if (err)
+			goto conversion_failed;
+
+		if (resource_type(res) == IORESOURCE_IO) {
+			if (!io_base) {
+				pr_err("I/O range found for %s. Please provide an io_base pointer to save CPU base address\n",
+					dev->full_name);
+				err = -EINVAL;
+				goto conversion_failed;
+			}
+			if (*io_base != (resource_size_t)OF_BAD_ADDR)
+				pr_warn("More than one I/O resource converted for %s. CPU base address for old range lost!\n",
+					dev->full_name);
+			*io_base = range.cpu_addr;
+		}
+
+		pci_add_resource_offset(resources, res,	res->start - range.pci_addr);
+	}
+
+	return 0;
+
+conversion_failed:
+	kfree(res);
+parse_failed:
+	pci_free_resource_list(resources);
+	return err;
+}
+EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
+#endif /* CONFIG_OF_ADDRESS */
+
 #ifdef CONFIG_PCI_MSI
 
 static LIST_HEAD(of_pci_msi_chip_list);
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 71062e9602f5..1fd207e7a847 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -52,6 +52,12 @@ of_get_pci_domain_nr(struct device_node *node)
 }
 #endif
 
+#if defined(CONFIG_OF_ADDRESS)
+int of_pci_get_host_bridge_resources(struct device_node *dev,
+			unsigned char busno, unsigned char bus_max,
+			struct list_head *resources, resource_size_t *io_base);
+#endif
+
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
 int of_pci_msi_chip_add(struct msi_chip *chip);
 void of_pci_msi_chip_remove(struct msi_chip *chip);
-- 
cgit v1.2.3


From 8b921acfeffdb0b45085da862fc301a2d25ed2cf Mon Sep 17 00:00:00 2001
From: Liviu Dudau <Liviu.Dudau@arm.com>
Date: Mon, 29 Sep 2014 15:29:30 +0100
Subject: PCI: Add pci_remap_iospace() to map bus I/O resources

Add pci_remap_iospace() to map bus I/O resources into the CPU virtual
address space.  Architectures with special needs may provide their own
version, but most should be able to use this one.

This function is useful for PCI host bridge drivers that need to map the
PCI I/O resources into virtual memory space.

[bhelgaas: phys_addr description, drop temporary "err" variable]
Signed-off-by: Liviu Dudau <Liviu.Dudau@arm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
CC: Arnd Bergmann <arnd@arndb.de>
---
 drivers/pci/pci.c             | 31 +++++++++++++++++++++++++++++++
 include/asm-generic/pgtable.h |  4 ++++
 include/linux/pci.h           |  3 +++
 3 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index d36f35ff6c5d..6e994fc077f4 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -2704,6 +2704,37 @@ int pci_request_regions_exclusive(struct pci_dev *pdev, const char *res_name)
 }
 EXPORT_SYMBOL(pci_request_regions_exclusive);
 
+/**
+ *	pci_remap_iospace - Remap the memory mapped I/O space
+ *	@res: Resource describing the I/O space
+ *	@phys_addr: physical address of range to be mapped
+ *
+ *	Remap the memory mapped I/O space described by the @res
+ *	and the CPU physical address @phys_addr into virtual address space.
+ *	Only architectures that have memory mapped IO functions defined
+ *	(and the PCI_IOBASE value defined) should call this function.
+ */
+int __weak pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr)
+{
+#if defined(PCI_IOBASE) && defined(CONFIG_MMU)
+	unsigned long vaddr = (unsigned long)PCI_IOBASE + res->start;
+
+	if (!(res->flags & IORESOURCE_IO))
+		return -EINVAL;
+
+	if (res->end > IO_SPACE_LIMIT)
+		return -EINVAL;
+
+	return ioremap_page_range(vaddr, vaddr + resource_size(res), phys_addr,
+				  pgprot_device(PAGE_KERNEL));
+#else
+	/* this architecture does not have memory mapped I/O space,
+	   so this function should never be called */
+	WARN_ONCE(1, "This architecture does not support memory mapped I/O\n");
+	return -ENODEV;
+#endif
+}
+
 static void __pci_set_master(struct pci_dev *dev, bool enable)
 {
 	u16 old_cmd, cmd;
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 53b2acc38213..977e545a64c3 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -249,6 +249,10 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 #define pgprot_writecombine pgprot_noncached
 #endif
 
+#ifndef pgprot_device
+#define pgprot_device pgprot_noncached
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 150da2d644e7..b4995fdfffb1 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1100,6 +1100,9 @@ int __must_check pci_bus_alloc_resource(struct pci_bus *bus,
 						  resource_size_t),
 			void *alignf_data);
 
+
+int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr);
+
 static inline dma_addr_t pci_bus_address(struct pci_dev *pdev, int bar)
 {
 	struct pci_bus_region region;
-- 
cgit v1.2.3


From 7704ac937345d4b502062952657027234aa86a37 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 23 Sep 2014 12:08:08 -0400
Subject: HID: wacom: implement generic HID handling for pen generic devices

ISDv4 and v5 are plain HID devices. We can directly implement a generic
HID parsing/handling and remove the need to manually add those PID in
the list of supported devices.

This patch implements the pen support only. The finger part will come in
a later patch.

To be properly notified of an .event() and a .report(), we need to force
hid-core to go through the HID parsing. By default, wacom.ko binds only
hidraw, so the hid parsing is not done by hid-core. When a true HID device
is there, we add the flag HID_CLAIMED_DRIVER to hid->claimed which will
force hid-core to parse the incoming reports.
(Note that this can be easily backported by directly setting the .claimed
flag to HID_CLAIMED_DRIVER even if hid-core does not support
HID_CONNECT_DRIVER)

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Acked-by: Jason Gerecke <killertofu@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c  |   3 +
 drivers/hid/wacom.h     |   6 ++
 drivers/hid/wacom_sys.c |  12 +++-
 drivers/hid/wacom_wac.c | 179 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/hid/wacom_wac.h |   8 +++
 include/linux/hid.h     |   2 +
 6 files changed, 208 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 12b6e67d9de0..583344dba3c6 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1591,6 +1591,9 @@ int hid_connect(struct hid_device *hdev, unsigned int connect_mask)
 	if ((connect_mask & HID_CONNECT_HIDRAW) && !hidraw_connect(hdev))
 		hdev->claimed |= HID_CLAIMED_HIDRAW;
 
+	if (connect_mask & HID_CONNECT_DRIVER)
+		hdev->claimed |= HID_CLAIMED_DRIVER;
+
 	/* Drivers with the ->raw_event callback set are not required to connect
 	 * to any other listener. */
 	if (!hdev->claimed && !hdev->driver->raw_event) {
diff --git a/drivers/hid/wacom.h b/drivers/hid/wacom.h
index 64bc1b296d91..0cc53440543a 100644
--- a/drivers/hid/wacom.h
+++ b/drivers/hid/wacom.h
@@ -89,6 +89,7 @@
 #include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/mod_devicetable.h>
+#include <linux/hid.h>
 #include <linux/usb/input.h>
 #include <linux/power_supply.h>
 #include <asm/unaligned.h>
@@ -143,4 +144,9 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev,
 				   struct wacom_wac *wacom_wac);
 int wacom_setup_pad_input_capabilities(struct input_dev *input_dev,
 				       struct wacom_wac *wacom_wac);
+void wacom_wac_usage_mapping(struct hid_device *hdev,
+		struct hid_field *field, struct hid_usage *usage);
+int wacom_wac_event(struct hid_device *hdev, struct hid_field *field,
+		struct hid_usage *usage, __s32 value);
+void wacom_wac_report(struct hid_device *hdev, struct hid_report *report);
 #endif
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 21ac2baa21be..dd288b2fbfe8 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -13,7 +13,6 @@
 
 #include "wacom_wac.h"
 #include "wacom.h"
-#include <linux/hid.h>
 
 #define WAC_MSG_RETRIES		5
 
@@ -215,6 +214,9 @@ static void wacom_usage_mapping(struct hid_device *hdev,
 			features->pressure_max = field->logical_maximum;
 		break;
 	}
+
+	if (features->type == HID_GENERIC)
+		wacom_wac_usage_mapping(hdev, field, usage);
 }
 
 static void wacom_parse_hid(struct hid_device *hdev,
@@ -1318,6 +1320,7 @@ static int wacom_probe(struct hid_device *hdev,
 	struct wacom_wac *wacom_wac;
 	struct wacom_features *features;
 	int error;
+	unsigned int connect_mask = HID_CONNECT_HIDRAW;
 
 	if (!id->driver_data)
 		return -EINVAL;
@@ -1451,8 +1454,11 @@ static int wacom_probe(struct hid_device *hdev,
 	/* Note that if query fails it is not a hard failure */
 	wacom_query_tablet_data(hdev, features);
 
+	if (features->type == HID_GENERIC)
+		connect_mask |= HID_CONNECT_DRIVER;
+
 	/* Regular HID work starts now */
-	error = hid_hw_start(hdev, HID_CONNECT_HIDRAW);
+	error = hid_hw_start(hdev, connect_mask);
 	if (error) {
 		hid_err(hdev, "hw start failed\n");
 		goto fail_hw_start;
@@ -1532,6 +1538,8 @@ static struct hid_driver wacom_driver = {
 	.id_table =	wacom_ids,
 	.probe =	wacom_probe,
 	.remove =	wacom_remove,
+	.event =	wacom_wac_event,
+	.report =	wacom_wac_report,
 #ifdef CONFIG_PM
 	.resume =	wacom_resume,
 	.reset_resume =	wacom_reset_resume,
diff --git a/drivers/hid/wacom_wac.c b/drivers/hid/wacom_wac.c
index b8180e40534d..e77d46d85a11 100644
--- a/drivers/hid/wacom_wac.c
+++ b/drivers/hid/wacom_wac.c
@@ -1248,6 +1248,176 @@ static int wacom_tpc_irq(struct wacom_wac *wacom, size_t len)
 	return 0;
 }
 
+static void wacom_map_usage(struct wacom *wacom, struct hid_usage *usage,
+		struct hid_field *field, __u8 type, __u16 code, int fuzz)
+{
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct input_dev *input = wacom_wac->input;
+	int fmin = field->logical_minimum;
+	int fmax = field->logical_maximum;
+
+	usage->type = type;
+	usage->code = code;
+
+	set_bit(type, input->evbit);
+
+	switch (type) {
+	case EV_ABS:
+		input_set_abs_params(input, code, fmin, fmax, fuzz, 0);
+		input_abs_set_res(input, code,
+				  hidinput_calc_abs_res(field, code));
+		break;
+	case EV_KEY:
+		input_set_capability(input, EV_KEY, code);
+		break;
+	case EV_MSC:
+		input_set_capability(input, EV_MSC, code);
+		break;
+	}
+}
+
+static void wacom_wac_pen_usage_mapping(struct hid_device *hdev,
+		struct hid_field *field, struct hid_usage *usage)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+
+	switch (usage->hid) {
+	case HID_GD_X:
+		wacom_map_usage(wacom, usage, field, EV_ABS, ABS_X, 4);
+		break;
+	case HID_GD_Y:
+		wacom_map_usage(wacom, usage, field, EV_ABS, ABS_Y, 4);
+		break;
+	case HID_DG_TIPPRESSURE:
+		wacom_map_usage(wacom, usage, field, EV_ABS, ABS_PRESSURE, 0);
+		break;
+	case HID_DG_INRANGE:
+		wacom_map_usage(wacom, usage, field, EV_KEY, BTN_TOOL_PEN, 0);
+		break;
+	case HID_DG_INVERT:
+		wacom_map_usage(wacom, usage, field, EV_KEY,
+				BTN_TOOL_RUBBER, 0);
+		break;
+	case HID_DG_ERASER:
+	case HID_DG_TIPSWITCH:
+		wacom_map_usage(wacom, usage, field, EV_KEY, BTN_TOUCH, 0);
+		break;
+	case HID_DG_BARRELSWITCH:
+		wacom_map_usage(wacom, usage, field, EV_KEY, BTN_STYLUS, 0);
+		break;
+	case HID_DG_BARRELSWITCH2:
+		wacom_map_usage(wacom, usage, field, EV_KEY, BTN_STYLUS2, 0);
+		break;
+	case HID_DG_TOOLSERIALNUMBER:
+		wacom_map_usage(wacom, usage, field, EV_MSC, MSC_SERIAL, 0);
+		break;
+	}
+}
+
+static int wacom_wac_pen_event(struct hid_device *hdev, struct hid_field *field,
+		struct hid_usage *usage, __s32 value)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct input_dev *input = wacom_wac->input;
+
+	/* checking which Tool / tip switch to send */
+	switch (usage->hid) {
+	case HID_DG_INRANGE:
+		wacom_wac->hid_data.inrange_state = value;
+		return 0;
+	case HID_DG_INVERT:
+		wacom_wac->hid_data.invert_state = value;
+		return 0;
+	case HID_DG_ERASER:
+	case HID_DG_TIPSWITCH:
+		wacom_wac->hid_data.tipswitch |= value;
+		return 0;
+	}
+
+	/* send pen events only when touch is up or forced out */
+	if (!usage->type || wacom_wac->shared->touch_down)
+		return 0;
+
+	input_event(input, usage->type, usage->code, value);
+
+	return 0;
+}
+
+static void wacom_wac_pen_report(struct hid_device *hdev,
+		struct hid_report *report)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct input_dev *input = wacom_wac->input;
+	bool prox = wacom_wac->hid_data.inrange_state;
+
+	if (!wacom_wac->shared->stylus_in_proximity) /* first in prox */
+		/* Going into proximity select tool */
+		wacom_wac->tool[0] = wacom_wac->hid_data.invert_state ?
+						BTN_TOOL_RUBBER : BTN_TOOL_PEN;
+
+	/* keep pen state for touch events */
+	wacom_wac->shared->stylus_in_proximity = prox;
+
+	/* send pen events only when touch is up or forced out */
+	if (!wacom_wac->shared->touch_down) {
+		input_report_key(input, BTN_TOUCH,
+				wacom_wac->hid_data.tipswitch);
+		input_report_key(input, wacom_wac->tool[0], prox);
+
+		wacom_wac->hid_data.tipswitch = false;
+
+		input_sync(input);
+	}
+}
+
+#define WACOM_PEN_FIELD(f)	(((f)->logical == HID_DG_STYLUS) || \
+				 ((f)->physical == HID_DG_STYLUS))
+#define WACOM_FINGER_FIELD(f)	(((f)->logical == HID_DG_FINGER) || \
+				 ((f)->physical == HID_DG_FINGER))
+
+void wacom_wac_usage_mapping(struct hid_device *hdev,
+		struct hid_field *field, struct hid_usage *usage)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct input_dev *input = wacom_wac->input;
+
+	/* currently, only direct devices have proper hid report descriptors */
+	__set_bit(INPUT_PROP_DIRECT, input->propbit);
+
+	if (WACOM_PEN_FIELD(field))
+		return wacom_wac_pen_usage_mapping(hdev, field, usage);
+}
+
+int wacom_wac_event(struct hid_device *hdev, struct hid_field *field,
+		struct hid_usage *usage, __s32 value)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+
+	if (wacom->wacom_wac.features.type != HID_GENERIC)
+		return 0;
+
+	if (WACOM_PEN_FIELD(field))
+		return wacom_wac_pen_event(hdev, field, usage, value);
+
+	return 0;
+}
+
+void wacom_wac_report(struct hid_device *hdev, struct hid_report *report)
+{
+	struct wacom *wacom = hid_get_drvdata(hdev);
+	struct wacom_wac *wacom_wac = &wacom->wacom_wac;
+	struct hid_field *field = report->field[0];
+
+	if (wacom_wac->features.type != HID_GENERIC)
+		return;
+
+	if (WACOM_PEN_FIELD(field))
+		return wacom_wac_pen_report(hdev, report);
+}
+
 static int wacom_bpt_touch(struct wacom_wac *wacom)
 {
 	struct wacom_features *features = &wacom->features;
@@ -1746,6 +1916,10 @@ int wacom_setup_input_capabilities(struct input_dev *input_dev,
 
 	input_dev->evbit[0] |= BIT_MASK(EV_KEY) | BIT_MASK(EV_ABS);
 
+	if (features->type == HID_GENERIC)
+		/* setup has already been done */
+		return 0;
+
 	__set_bit(BTN_TOUCH, input_dev->keybit);
 	__set_bit(ABS_MISC, input_dev->absbit);
 
@@ -2585,6 +2759,9 @@ static const struct wacom_features wacom_features_0x30C =
 	  .oVid = USB_VENDOR_ID_WACOM, .oPid = 0x30A, .touch_max = 10,
 	  .check_for_hid_type = true, .hid_type = HID_TYPE_USBNONE };
 
+static const struct wacom_features wacom_features_HID_ANY_ID =
+	{ "Wacom HID", .type = HID_GENERIC };
+
 #define USB_DEVICE_WACOM(prod)						\
 	HID_DEVICE(BUS_USB, HID_GROUP_WACOM, USB_VENDOR_ID_WACOM, prod),\
 	.driver_data = (kernel_ulong_t)&wacom_features_##prod
@@ -2729,6 +2906,8 @@ const struct hid_device_id wacom_ids[] = {
 	{ USB_DEVICE_WACOM(0x4004) },
 	{ USB_DEVICE_WACOM(0x5000) },
 	{ USB_DEVICE_WACOM(0x5002) },
+
+	{ USB_DEVICE_WACOM(HID_ANY_ID) },
 	{ }
 };
 MODULE_DEVICE_TABLE(hid, wacom_ids);
diff --git a/drivers/hid/wacom_wac.h b/drivers/hid/wacom_wac.h
index 72f9ca8e5cd4..f472eac292d5 100644
--- a/drivers/hid/wacom_wac.h
+++ b/drivers/hid/wacom_wac.h
@@ -113,6 +113,7 @@ enum {
 	MTSCREEN,
 	MTTPC,
 	MTTPC_B,
+	HID_GENERIC,
 	MAX_TYPE
 };
 
@@ -154,6 +155,12 @@ struct wacom_shared {
 	struct input_dev *touch_input;
 };
 
+struct hid_data {
+	bool inrange_state;
+	bool invert_state;
+	bool tipswitch;
+};
+
 struct wacom_wac {
 	char name[WACOM_NAME_MAX];
 	char pad_name[WACOM_NAME_MAX];
@@ -175,6 +182,7 @@ struct wacom_wac {
 	int ps_connected;
 	u8 bt_features;
 	u8 bt_high_speed;
+	struct hid_data hid_data;
 };
 
 #endif
diff --git a/include/linux/hid.h b/include/linux/hid.h
index f53c4a9cca1d..3dcd00496064 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -265,6 +265,7 @@ struct hid_item {
 #define HID_CONNECT_HIDDEV		0x08
 #define HID_CONNECT_HIDDEV_FORCE	0x10
 #define HID_CONNECT_FF			0x20
+#define HID_CONNECT_DRIVER		0x40
 #define HID_CONNECT_DEFAULT	(HID_CONNECT_HIDINPUT|HID_CONNECT_HIDRAW| \
 		HID_CONNECT_HIDDEV|HID_CONNECT_FF)
 
@@ -440,6 +441,7 @@ struct hid_output_fifo {
 #define HID_CLAIMED_INPUT	1
 #define HID_CLAIMED_HIDDEV	2
 #define HID_CLAIMED_HIDRAW	4
+#define HID_CLAIMED_DRIVER	8
 
 #define HID_STAT_ADDED		1
 #define HID_STAT_PARSED		2
-- 
cgit v1.2.3


From ad975ebad4c3ce8dcc7d0bb4db26ea5aca4cfc99 Mon Sep 17 00:00:00 2001
From: Alexander Gordeev <agordeev@redhat.com>
Date: Tue, 23 Sep 2014 12:39:54 -0600
Subject: PCI/MSI: Remove arch_msi_check_device()

No architectures implement arch_msi_check_device() or the struct msi_chip
.check_device() method, so remove them.

Remove the "type" parameter to pci_msi_check_device() because it was only
used to call arch_msi_check_device() and is no longer needed.

[bhelgaas: changelog, split to separate patch]
Signed-off-by: Alexander Gordeev <agordeev@redhat.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/msi.c   | 21 +++------------------
 include/linux/msi.h |  3 ---
 2 files changed, 3 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5a40516444f3..db21b77a03a5 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -56,16 +56,6 @@ void __weak arch_teardown_msi_irq(unsigned int irq)
 	chip->teardown_irq(chip, irq);
 }
 
-int __weak arch_msi_check_device(struct pci_dev *dev, int nvec, int type)
-{
-	struct msi_chip *chip = dev->bus->msi;
-
-	if (!chip || !chip->check_device)
-		return 0;
-
-	return chip->check_device(chip, dev, nvec, type);
-}
-
 int __weak arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	struct msi_desc *entry;
@@ -815,10 +805,9 @@ out_free:
  * to determine if MSI/-X are supported for the device. If MSI/-X is
  * supported return 0, else return an error code.
  **/
-static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
+static int pci_msi_check_device(struct pci_dev *dev, int nvec)
 {
 	struct pci_bus *bus;
-	int ret;
 
 	/* MSI must be globally enabled and supported by the device */
 	if (!pci_msi_enable || !dev || dev->no_msi)
@@ -843,10 +832,6 @@ static int pci_msi_check_device(struct pci_dev *dev, int nvec, int type)
 		if (bus->bus_flags & PCI_BUS_FLAGS_NO_MSI)
 			return -EINVAL;
 
-	ret = arch_msi_check_device(dev, nvec, type);
-	if (ret)
-		return ret;
-
 	return 0;
 }
 
@@ -952,7 +937,7 @@ int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec)
 	if (!entries || !dev->msix_cap || dev->current_state != PCI_D0)
 		return -EINVAL;
 
-	status = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSIX);
+	status = pci_msi_check_device(dev, nvec);
 	if (status)
 		return status;
 
@@ -1086,7 +1071,7 @@ int pci_enable_msi_range(struct pci_dev *dev, int minvec, int maxvec)
 		nvec = maxvec;
 
 	do {
-		rc = pci_msi_check_device(dev, nvec, PCI_CAP_ID_MSI);
+		rc = pci_msi_check_device(dev, nvec);
 		if (rc < 0) {
 			return rc;
 		} else if (rc > 0) {
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 8103f32f6d87..dbf7cc932444 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -60,7 +60,6 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc);
 void arch_teardown_msi_irq(unsigned int irq);
 int arch_setup_msi_irqs(struct pci_dev *dev, int nvec, int type);
 void arch_teardown_msi_irqs(struct pci_dev *dev);
-int arch_msi_check_device(struct pci_dev* dev, int nvec, int type);
 void arch_restore_msi_irqs(struct pci_dev *dev);
 
 void default_teardown_msi_irqs(struct pci_dev *dev);
@@ -77,8 +76,6 @@ struct msi_chip {
 	int (*setup_irq)(struct msi_chip *chip, struct pci_dev *dev,
 			 struct msi_desc *desc);
 	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
-	int (*check_device)(struct msi_chip *chip, struct pci_dev *dev,
-			    int nvec, int type);
 };
 
 #endif /* LINUX_MSI_H */
-- 
cgit v1.2.3


From 81052769e48609525c452d8f078a5786b673e178 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 23 Sep 2014 13:27:22 +0800
Subject: PCI/MSI: Remove unused kobject from struct msi_desc

After commit 1c51b50c2995 ("PCI/MSI: Export MSI mode using attributes, not
kobjects"), the kobject in struct msi_desc is unused.

Remove the unused struct kobject from struct msi_desc.

[bhelgaas: changelog]
Fixes: 1c51b50c2995 ("PCI/MSI: Export MSI mode using attributes, not kobjects")
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/pci/msi.c   | 11 -----------
 include/linux/msi.h |  2 --
 2 files changed, 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5f1e5dc994cf..97d6ef67a3c8 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -374,17 +374,6 @@ static void free_msi_irqs(struct pci_dev *dev)
 				iounmap(entry->mask_base);
 		}
 
-		/*
-		 * Its possible that we get into this path
-		 * When populate_msi_sysfs fails, which means the entries
-		 * were not registered with sysfs.  In that case don't
-		 * unregister them.
-		 */
-		if (entry->kobj.parent) {
-			kobject_del(&entry->kobj);
-			kobject_put(&entry->kobj);
-		}
-
 		list_del(&entry->list);
 		kfree(entry);
 	}
diff --git a/include/linux/msi.h b/include/linux/msi.h
index dbf7cc932444..5a91c2d58ffd 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -47,8 +47,6 @@ struct msi_desc {
 
 	/* Last set MSI message */
 	struct msi_msg msg;
-
-	struct kobject kobj;
 };
 
 /*
-- 
cgit v1.2.3


From 48c3c38f003c25d50a09d3da558667c5ecd530aa Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 23 Sep 2014 11:02:42 -0600
Subject: PCI/MSI: Remove "pos" from the struct msi_desc msi_attrib

"msi_attrib.pos" is only used for MSI (not MSI-X), and we already cache the
MSI capability offset in "dev->msi_cap".

Remove "pos" from the struct msi_attrib and use "dev->msi_cap" directly.

[bhelgaas: changelog, fix whitespace]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 arch/mips/pci/msi-octeon.c         | 6 ++----
 drivers/pci/host/pcie-designware.c | 5 ++---
 drivers/pci/msi.c                  | 2 --
 include/linux/msi.h                | 1 -
 4 files changed, 4 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c
index ab0c5d14c6f7..63bbe07a1ccd 100644
--- a/arch/mips/pci/msi-octeon.c
+++ b/arch/mips/pci/msi-octeon.c
@@ -73,8 +73,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 	 * wants.  Most devices only want 1, which will give
 	 * configured_private_bits and request_private_bits equal 0.
 	 */
-	pci_read_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS,
-			     &control);
+	pci_read_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, &control);
 
 	/*
 	 * If the number of private bits has been configured then use
@@ -176,8 +175,7 @@ msi_irq_allocated:
 	/* Update the number of IRQs the device has available to it */
 	control &= ~PCI_MSI_FLAGS_QSIZE;
 	control |= request_private_bits << 4;
-	pci_write_config_word(dev, desc->msi_attrib.pos + PCI_MSI_FLAGS,
-			      control);
+	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
 
 	irq_set_msi_desc(irq, desc);
 	write_msi_msg(irq, &msg);
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 52bd3a143563..fa2fa459ed90 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -355,9 +355,8 @@ static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
 		return -EINVAL;
 	}
 
-	pci_read_config_word(pdev, desc->msi_attrib.pos+PCI_MSI_FLAGS,
-				&msg_ctr);
-	msgvec = (msg_ctr&PCI_MSI_FLAGS_QSIZE) >> 4;
+	pci_read_config_word(pdev, pdev->msi_cap + PCI_MSI_FLAGS, &msg_ctr);
+	msgvec = (msg_ctr & PCI_MSI_FLAGS_QSIZE) >> 4;
 	if (msgvec == 0)
 		msgvec = (msg_ctr & PCI_MSI_FLAGS_QMASK) >> 1;
 	if (msgvec > 5)
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 97d6ef67a3c8..40699a2041b5 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -574,7 +574,6 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev)
 	entry->msi_attrib.entry_nr	= 0;
 	entry->msi_attrib.maskbit	= !!(control & PCI_MSI_FLAGS_MASKBIT);
 	entry->msi_attrib.default_irq	= dev->irq;	/* Save IOAPIC IRQ */
-	entry->msi_attrib.pos		= dev->msi_cap;
 	entry->msi_attrib.multi_cap	= (control & PCI_MSI_FLAGS_QMASK) >> 1;
 
 	if (control & PCI_MSI_FLAGS_64BIT)
@@ -678,7 +677,6 @@ static int msix_setup_entries(struct pci_dev *dev, void __iomem *base,
 		entry->msi_attrib.is_64		= 1;
 		entry->msi_attrib.entry_nr	= entries[i].entry;
 		entry->msi_attrib.default_irq	= dev->irq;
-		entry->msi_attrib.pos		= dev->msix_cap;
 		entry->mask_base		= base;
 
 		list_add_tail(&entry->list, &dev->msi_list);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 5a91c2d58ffd..44f4746d033b 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -29,7 +29,6 @@ struct msi_desc {
 		__u8	multi_cap : 3;	/* log2 num of messages supported */
 		__u8	maskbit	: 1;	/* mask-pending bit supported ? */
 		__u8	is_64	: 1;	/* Address size: 0=32bit 1=64bit */
-		__u8	pos;		/* Location of the msi capability */
 		__u16	entry_nr;	/* specific enabled entry */
 		unsigned default_irq;	/* default pre-assigned irq */
 	} msi_attrib;
-- 
cgit v1.2.3


From d0bf4a9e92b9a93ffeeacbd7b6cb83e0ee3dc2ef Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 29 Sep 2014 13:29:15 -0700
Subject: net: cleanup and document skb fclone layout

Lets use a proper structure to clearly document and implement
skb fast clones.

Then, we might experiment more easily alternative layouts.

This patch adds a new skb_fclone_busy() helper, used by tcp and xfrm,
to stop leaking of implementation details.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 25 +++++++++++++++++++++++++
 net/core/skbuff.c      | 41 ++++++++++++++++++++---------------------
 net/ipv4/tcp_output.c  |  5 +----
 net/xfrm/xfrm_policy.c |  4 +---
 4 files changed, 47 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 262efdbc346b..d8f7d74d5a4d 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -781,6 +781,31 @@ struct sk_buff *alloc_skb_with_frags(unsigned long header_len,
 				     int *errcode,
 				     gfp_t gfp_mask);
 
+/* Layout of fast clones : [skb1][skb2][fclone_ref] */
+struct sk_buff_fclones {
+	struct sk_buff	skb1;
+
+	struct sk_buff	skb2;
+
+	atomic_t	fclone_ref;
+};
+
+/**
+ *	skb_fclone_busy - check if fclone is busy
+ *	@skb: buffer
+ *
+ * Returns true is skb is a fast clone, and its clone is not freed.
+ */
+static inline bool skb_fclone_busy(const struct sk_buff *skb)
+{
+	const struct sk_buff_fclones *fclones;
+
+	fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+	return skb->fclone == SKB_FCLONE_ORIG &&
+	       fclones->skb2.fclone == SKB_FCLONE_CLONE;
+}
+
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
 					       gfp_t priority)
 {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 4be570a4ab21..a8cebb40699c 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -257,15 +257,16 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 	kmemcheck_annotate_variable(shinfo->destructor_arg);
 
 	if (flags & SKB_ALLOC_FCLONE) {
-		struct sk_buff *child = skb + 1;
-		atomic_t *fclone_ref = (atomic_t *) (child + 1);
+		struct sk_buff_fclones *fclones;
 
-		kmemcheck_annotate_bitfield(child, flags1);
+		fclones = container_of(skb, struct sk_buff_fclones, skb1);
+
+		kmemcheck_annotate_bitfield(&fclones->skb2, flags1);
 		skb->fclone = SKB_FCLONE_ORIG;
-		atomic_set(fclone_ref, 1);
+		atomic_set(&fclones->fclone_ref, 1);
 
-		child->fclone = SKB_FCLONE_UNAVAILABLE;
-		child->pfmemalloc = pfmemalloc;
+		fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
+		fclones->skb2.pfmemalloc = pfmemalloc;
 	}
 out:
 	return skb;
@@ -524,8 +525,7 @@ static void skb_release_data(struct sk_buff *skb)
  */
 static void kfree_skbmem(struct sk_buff *skb)
 {
-	struct sk_buff *other;
-	atomic_t *fclone_ref;
+	struct sk_buff_fclones *fclones;
 
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
@@ -533,22 +533,21 @@ static void kfree_skbmem(struct sk_buff *skb)
 		break;
 
 	case SKB_FCLONE_ORIG:
-		fclone_ref = (atomic_t *) (skb + 2);
-		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, skb);
+		fclones = container_of(skb, struct sk_buff_fclones, skb1);
+		if (atomic_dec_and_test(&fclones->fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, fclones);
 		break;
 
 	case SKB_FCLONE_CLONE:
-		fclone_ref = (atomic_t *) (skb + 1);
-		other = skb - 1;
+		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 
 		/* The clone portion is available for
 		 * fast-cloning again.
 		 */
 		skb->fclone = SKB_FCLONE_UNAVAILABLE;
 
-		if (atomic_dec_and_test(fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, other);
+		if (atomic_dec_and_test(&fclones->fclone_ref))
+			kmem_cache_free(skbuff_fclone_cache, fclones);
 		break;
 	}
 }
@@ -859,17 +858,18 @@ EXPORT_SYMBOL_GPL(skb_copy_ubufs);
 
 struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 {
-	struct sk_buff *n;
+	struct sk_buff_fclones *fclones = container_of(skb,
+						       struct sk_buff_fclones,
+						       skb1);
+	struct sk_buff *n = &fclones->skb2;
 
 	if (skb_orphan_frags(skb, gfp_mask))
 		return NULL;
 
-	n = skb + 1;
 	if (skb->fclone == SKB_FCLONE_ORIG &&
 	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
-		atomic_t *fclone_ref = (atomic_t *) (n + 1);
 		n->fclone = SKB_FCLONE_CLONE;
-		atomic_inc(fclone_ref);
+		atomic_inc(&fclones->fclone_ref);
 	} else {
 		if (skb_pfmemalloc(skb))
 			gfp_mask |= __GFP_MEMALLOC;
@@ -3240,8 +3240,7 @@ void __init skb_init(void)
 					      SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 					      NULL);
 	skbuff_fclone_cache = kmem_cache_create("skbuff_fclone_cache",
-						(2*sizeof(struct sk_buff)) +
-						sizeof(atomic_t),
+						sizeof(struct sk_buff_fclones),
 						0,
 						SLAB_HWCACHE_ALIGN|SLAB_PANIC,
 						NULL);
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ee567e9e98c3..8d4eac793700 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2110,10 +2110,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 static bool skb_still_in_host_queue(const struct sock *sk,
 				    const struct sk_buff *skb)
 {
-	const struct sk_buff *fclone = skb + 1;
-
-	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
-		     fclone->fclone == SKB_FCLONE_CLONE)) {
+	if (unlikely(skb_fclone_busy(skb))) {
 		NET_INC_STATS_BH(sock_net(sk),
 				 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
 		return true;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index f623dca6ce30..4c4e457e7888 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1961,10 +1961,8 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
 	struct xfrm_dst *xdst = (struct xfrm_dst *) dst;
 	struct xfrm_policy *pol = xdst->pols[0];
 	struct xfrm_policy_queue *pq = &pol->polq;
-	const struct sk_buff *fclone = skb + 1;
 
-	if (unlikely(skb->fclone == SKB_FCLONE_ORIG &&
-		     fclone->fclone == SKB_FCLONE_CLONE)) {
+	if (unlikely(skb_fclone_busy(skb))) {
 		kfree_skb(skb);
 		return 0;
 	}
-- 
cgit v1.2.3


From 8bce6d7d0d1ede22af334ee241841e9278365278 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Mon, 29 Sep 2014 20:22:29 -0700
Subject: udp: Generalize skb_udp_segment

skb_udp_segment is the function called from udp4_ufo_fragment to
segment a UDP tunnel packet. This function currently assumes
segmentation is transparent Ethernet bridging (i.e. VXLAN
encapsulation). This patch generalizes the function to
operate on either Ethertype or IP protocol.

The inner_protocol field must be set to the protocol of the inner
header. This can now be either an Ethertype or an IP protocol
(in a union). A new flag in the skbuff indicates which type is
effective. skb_set_inner_protocol and skb_set_inner_ipproto
helper functions were added to set the inner_protocol. These
functions are called from the point where the tunnel encapsulation
is occuring.

When skb_udp_tunnel_segment is called, the function to segment the
inner packet is selected based on the inner IP or Ethertype. In the
case of an IP protocol encapsulation, the function is derived from
inet[6]_offloads. In the case of Ethertype, skb->protocol is
set to the inner_protocol and skb_mac_gso_segment is called. (GRE
currently does this, but it might be possible to lookup the protocol
in offload_base and call the appropriate segmenation function
directly).

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 26 +++++++++++++++++++++++--
 include/net/udp.h      |  3 ++-
 net/ipv4/udp_offload.c | 51 +++++++++++++++++++++++++++++++++++++++++++++-----
 net/ipv6/udp_offload.c |  2 +-
 4 files changed, 73 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d8f7d74d5a4d..7c5036d11feb 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -596,7 +596,8 @@ struct sk_buff {
 	__u8			ndisc_nodetype:2;
 #endif
 	__u8			ipvs_property:1;
-	/* 5 or 7 bit hole */
+	__u8			inner_protocol_type:1;
+	/* 4 or 6 bit hole */
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
@@ -632,7 +633,11 @@ struct sk_buff {
 		__u32		reserved_tailroom;
 	};
 
-	__be16			inner_protocol;
+	union {
+		__be16		inner_protocol;
+		__u8		inner_ipproto;
+	};
+
 	__u16			inner_transport_header;
 	__u16			inner_network_header;
 	__u16			inner_mac_header;
@@ -1762,6 +1767,23 @@ static inline void skb_reserve(struct sk_buff *skb, int len)
 	skb->tail += len;
 }
 
+#define ENCAP_TYPE_ETHER	0
+#define ENCAP_TYPE_IPPROTO	1
+
+static inline void skb_set_inner_protocol(struct sk_buff *skb,
+					  __be16 protocol)
+{
+	skb->inner_protocol = protocol;
+	skb->inner_protocol_type = ENCAP_TYPE_ETHER;
+}
+
+static inline void skb_set_inner_ipproto(struct sk_buff *skb,
+					 __u8 ipproto)
+{
+	skb->inner_ipproto = ipproto;
+	skb->inner_protocol_type = ENCAP_TYPE_IPPROTO;
+}
+
 static inline void skb_reset_inner_headers(struct sk_buff *skb)
 {
 	skb->inner_mac_header = skb->mac_header;
diff --git a/include/net/udp.h b/include/net/udp.h
index 16f4e80f0519..07f9b70962f6 100644
--- a/include/net/udp.h
+++ b/include/net/udp.h
@@ -239,7 +239,8 @@ int udp_ioctl(struct sock *sk, int cmd, unsigned long arg);
 int udp_disconnect(struct sock *sk, int flags);
 unsigned int udp_poll(struct file *file, struct socket *sock, poll_table *wait);
 struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
-				       netdev_features_t features);
+				       netdev_features_t features,
+				       bool is_ipv6);
 int udp_lib_getsockopt(struct sock *sk, int level, int optname,
 		       char __user *optval, int __user *optlen);
 int udp_lib_setsockopt(struct sock *sk, int level, int optname,
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 19ebe6a39ddc..8c35f2c939ee 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -25,8 +25,11 @@ struct udp_offload_priv {
 	struct udp_offload_priv __rcu *next;
 };
 
-struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
-				       netdev_features_t features)
+static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
+	netdev_features_t features,
+	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+					     netdev_features_t features),
+	__be16 new_protocol)
 {
 	struct sk_buff *segs = ERR_PTR(-EINVAL);
 	u16 mac_offset = skb->mac_header;
@@ -48,7 +51,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 	skb_reset_mac_header(skb);
 	skb_set_network_header(skb, skb_inner_network_offset(skb));
 	skb->mac_len = skb_inner_network_offset(skb);
-	skb->protocol = htons(ETH_P_TEB);
+	skb->protocol = new_protocol;
 
 	need_csum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM);
 	if (need_csum)
@@ -56,7 +59,7 @@ struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
 
 	/* segment inner packet. */
 	enc_features = skb->dev->hw_enc_features & netif_skb_features(skb);
-	segs = skb_mac_gso_segment(skb, enc_features);
+	segs = gso_inner_segment(skb, enc_features);
 	if (IS_ERR_OR_NULL(segs)) {
 		skb_gso_error_unwind(skb, protocol, tnl_hlen, mac_offset,
 				     mac_len);
@@ -101,6 +104,44 @@ out:
 	return segs;
 }
 
+struct sk_buff *skb_udp_tunnel_segment(struct sk_buff *skb,
+				       netdev_features_t features,
+				       bool is_ipv6)
+{
+	__be16 protocol = skb->protocol;
+	const struct net_offload **offloads;
+	const struct net_offload *ops;
+	struct sk_buff *segs = ERR_PTR(-EINVAL);
+	struct sk_buff *(*gso_inner_segment)(struct sk_buff *skb,
+					     netdev_features_t features);
+
+	rcu_read_lock();
+
+	switch (skb->inner_protocol_type) {
+	case ENCAP_TYPE_ETHER:
+		protocol = skb->inner_protocol;
+		gso_inner_segment = skb_mac_gso_segment;
+		break;
+	case ENCAP_TYPE_IPPROTO:
+		offloads = is_ipv6 ? inet6_offloads : inet_offloads;
+		ops = rcu_dereference(offloads[skb->inner_ipproto]);
+		if (!ops || !ops->callbacks.gso_segment)
+			goto out_unlock;
+		gso_inner_segment = ops->callbacks.gso_segment;
+		break;
+	default:
+		goto out_unlock;
+	}
+
+	segs = __skb_udp_tunnel_segment(skb, features, gso_inner_segment,
+					protocol);
+
+out_unlock:
+	rcu_read_unlock();
+
+	return segs;
+}
+
 static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 					 netdev_features_t features)
 {
@@ -113,7 +154,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 	if (skb->encapsulation &&
 	    (skb_shinfo(skb)->gso_type &
 	     (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))) {
-		segs = skb_udp_tunnel_segment(skb, features);
+		segs = skb_udp_tunnel_segment(skb, features, false);
 		goto out;
 	}
 
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 212ebfc7973f..8f96988c1db2 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -58,7 +58,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 
 	if (skb->encapsulation && skb_shinfo(skb)->gso_type &
 	    (SKB_GSO_UDP_TUNNEL|SKB_GSO_UDP_TUNNEL_CSUM))
-		segs = skb_udp_tunnel_segment(skb, features);
+		segs = skb_udp_tunnel_segment(skb, features, true);
 	else {
 		const struct ipv6hdr *ipv6h;
 		struct udphdr *uh;
-- 
cgit v1.2.3


From 90a8020278c1598fafd071736a0846b38510309c Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Wed, 1 Oct 2014 21:49:18 -0400
Subject: vfs: fix data corruption when blocksize < pagesize for mmaped data

->page_mkwrite() is used by filesystems to allocate blocks under a page
which is becoming writeably mmapped in some process' address space. This
allows a filesystem to return a page fault if there is not enough space
available, user exceeds quota or similar problem happens, rather than
silently discarding data later when writepage is called.

However VFS fails to call ->page_mkwrite() in all the cases where
filesystems need it when blocksize < pagesize. For example when
blocksize = 1024, pagesize = 4096 the following is problematic:
  ftruncate(fd, 0);
  pwrite(fd, buf, 1024, 0);
  map = mmap(NULL, 1024, PROT_WRITE, MAP_SHARED, fd, 0);
  map[0] = 'a';       ----> page_mkwrite() for index 0 is called
  ftruncate(fd, 10000); /* or even pwrite(fd, buf, 1, 10000) */
  mremap(map, 1024, 10000, 0);
  map[4095] = 'a';    ----> no page_mkwrite() called

At the moment ->page_mkwrite() is called, filesystem can allocate only
one block for the page because i_size == 1024. Otherwise it would create
blocks beyond i_size which is generally undesirable. But later at
->writepage() time, we also need to store data at offset 4095 but we
don't have block allocated for it.

This patch introduces a helper function filesystems can use to have
->page_mkwrite() called at all the necessary moments.

Signed-off-by: Jan Kara <jack@suse.cz>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 fs/buffer.c        |  3 +++
 include/linux/mm.h |  1 +
 mm/truncate.c      | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+)

(limited to 'include/linux')

diff --git a/fs/buffer.c b/fs/buffer.c
index 9a6029e0dd71..6dc1475dcb2d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -2087,6 +2087,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 			struct page *page, void *fsdata)
 {
 	struct inode *inode = mapping->host;
+	loff_t old_size = inode->i_size;
 	int i_size_changed = 0;
 
 	copied = block_write_end(file, mapping, pos, len, copied, page, fsdata);
@@ -2106,6 +2107,8 @@ int generic_write_end(struct file *file, struct address_space *mapping,
 	unlock_page(page);
 	page_cache_release(page);
 
+	if (old_size < pos)
+		pagecache_isize_extended(inode, old_size, pos);
 	/*
 	 * Don't mark the inode dirty under page lock. First, it unnecessarily
 	 * makes the holding time of page lock longer. Second, it forces lock
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8981cc882ed2..5005464fe012 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1155,6 +1155,7 @@ static inline void unmap_shared_mapping_range(struct address_space *mapping,
 
 extern void truncate_pagecache(struct inode *inode, loff_t new);
 extern void truncate_setsize(struct inode *inode, loff_t newsize);
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to);
 void truncate_pagecache_range(struct inode *inode, loff_t offset, loff_t end);
 int truncate_inode_page(struct address_space *mapping, struct page *page);
 int generic_error_remove_page(struct address_space *mapping, struct page *page);
diff --git a/mm/truncate.c b/mm/truncate.c
index 96d167372d89..261eaf6e5a19 100644
--- a/mm/truncate.c
+++ b/mm/truncate.c
@@ -20,6 +20,7 @@
 #include <linux/buffer_head.h>	/* grr. try_to_release_page,
 				   do_invalidatepage */
 #include <linux/cleancache.h>
+#include <linux/rmap.h>
 #include "internal.h"
 
 static void clear_exceptional_entry(struct address_space *mapping,
@@ -719,11 +720,67 @@ EXPORT_SYMBOL(truncate_pagecache);
  */
 void truncate_setsize(struct inode *inode, loff_t newsize)
 {
+	loff_t oldsize = inode->i_size;
+
 	i_size_write(inode, newsize);
+	if (newsize > oldsize)
+		pagecache_isize_extended(inode, oldsize, newsize);
 	truncate_pagecache(inode, newsize);
 }
 EXPORT_SYMBOL(truncate_setsize);
 
+/**
+ * pagecache_isize_extended - update pagecache after extension of i_size
+ * @inode:	inode for which i_size was extended
+ * @from:	original inode size
+ * @to:		new inode size
+ *
+ * Handle extension of inode size either caused by extending truncate or by
+ * write starting after current i_size. We mark the page straddling current
+ * i_size RO so that page_mkwrite() is called on the nearest write access to
+ * the page.  This way filesystem can be sure that page_mkwrite() is called on
+ * the page before user writes to the page via mmap after the i_size has been
+ * changed.
+ *
+ * The function must be called after i_size is updated so that page fault
+ * coming after we unlock the page will already see the new i_size.
+ * The function must be called while we still hold i_mutex - this not only
+ * makes sure i_size is stable but also that userspace cannot observe new
+ * i_size value before we are prepared to store mmap writes at new inode size.
+ */
+void pagecache_isize_extended(struct inode *inode, loff_t from, loff_t to)
+{
+	int bsize = 1 << inode->i_blkbits;
+	loff_t rounded_from;
+	struct page *page;
+	pgoff_t index;
+
+	WARN_ON(!mutex_is_locked(&inode->i_mutex));
+	WARN_ON(to > inode->i_size);
+
+	if (from >= to || bsize == PAGE_CACHE_SIZE)
+		return;
+	/* Page straddling @from will not have any hole block created? */
+	rounded_from = round_up(from, bsize);
+	if (to <= rounded_from || !(rounded_from & (PAGE_CACHE_SIZE - 1)))
+		return;
+
+	index = from >> PAGE_CACHE_SHIFT;
+	page = find_lock_page(inode->i_mapping, index);
+	/* Page not cached? Nothing to do */
+	if (!page)
+		return;
+	/*
+	 * See clear_page_dirty_for_io() for details why set_page_dirty()
+	 * is needed.
+	 */
+	if (page_mkclean(page))
+		set_page_dirty(page);
+	unlock_page(page);
+	page_cache_release(page);
+}
+EXPORT_SYMBOL(pagecache_isize_extended);
+
 /**
  * truncate_pagecache_range - unmap and remove pagecache that is hole-punched
  * @inode: inode
-- 
cgit v1.2.3


From d068b02cfdfc27f5962ec82ec5568b706f599edc Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Wed, 1 Oct 2014 11:58:02 -0700
Subject: net: phy: add BCM7425 and BCM7429 PHYs

Signed-off-by: Petri Gynther <pgynther@google.com>
Acked-by: Florian Fainelli <f.fainelli@gmai.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm7xxx.c | 28 ++++++++++++++++++++++++++++
 include/linux/brcmphy.h   |  2 ++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c
index daae69950925..1d211d369039 100644
--- a/drivers/net/phy/bcm7xxx.c
+++ b/drivers/net/phy/bcm7xxx.c
@@ -350,6 +350,32 @@ static struct phy_driver bcm7xxx_driver[] = {
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7439, "Broadcom BCM7439"),
 	BCM7XXX_28NM_GPHY(PHY_ID_BCM7445, "Broadcom BCM7445"),
 {
+	.phy_id         = PHY_ID_BCM7425,
+	.phy_id_mask    = 0xfffffff0,
+	.name           = "Broadcom BCM7425",
+	.features       = PHY_GBIT_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags          = 0,
+	.config_init    = bcm7xxx_config_init,
+	.config_aneg    = genphy_config_aneg,
+	.read_status    = genphy_read_status,
+	.suspend        = bcm7xxx_suspend,
+	.resume         = bcm7xxx_config_init,
+	.driver         = { .owner = THIS_MODULE },
+}, {
+	.phy_id         = PHY_ID_BCM7429,
+	.phy_id_mask    = 0xfffffff0,
+	.name           = "Broadcom BCM7429",
+	.features       = PHY_GBIT_FEATURES |
+			  SUPPORTED_Pause | SUPPORTED_Asym_Pause,
+	.flags          = PHY_IS_INTERNAL,
+	.config_init    = bcm7xxx_config_init,
+	.config_aneg    = genphy_config_aneg,
+	.read_status    = genphy_read_status,
+	.suspend        = bcm7xxx_suspend,
+	.resume         = bcm7xxx_config_init,
+	.driver         = { .owner = THIS_MODULE },
+}, {
 	.phy_id		= PHY_BCM_OUI_4,
 	.phy_id_mask	= 0xffff0000,
 	.name		= "Broadcom BCM7XXX 40nm",
@@ -381,6 +407,8 @@ static struct mdio_device_id __maybe_unused bcm7xxx_tbl[] = {
 	{ PHY_ID_BCM7250, 0xfffffff0, },
 	{ PHY_ID_BCM7364, 0xfffffff0, },
 	{ PHY_ID_BCM7366, 0xfffffff0, },
+	{ PHY_ID_BCM7425, 0xfffffff0, },
+	{ PHY_ID_BCM7429, 0xfffffff0, },
 	{ PHY_ID_BCM7439, 0xfffffff0, },
 	{ PHY_ID_BCM7445, 0xfffffff0, },
 	{ PHY_BCM_OUI_4, 0xffff0000 },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 3f626fe48c9a..7ccd928cc1f2 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -16,6 +16,8 @@
 #define PHY_ID_BCM7250			0xae025280
 #define PHY_ID_BCM7364			0xae025260
 #define PHY_ID_BCM7366			0x600d8490
+#define PHY_ID_BCM7425			0x03625e60
+#define PHY_ID_BCM7429			0x600d8730
 #define PHY_ID_BCM7439			0x600d8480
 #define PHY_ID_BCM7445			0x600d8510
 
-- 
cgit v1.2.3


From 599bad38cf7163123af7c9efea0fcf228bc74fe1 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Tue, 30 Sep 2014 13:02:02 +0200
Subject: driver core: Add BUS_NOTIFY_REMOVED_DEVICE event

This event closes an important gap in the bus notifiers.
There is already the BUS_NOTIFY_DEL_DEVICE event, but that
is sent when the device is still bound to its device driver.

This is too early for the IOMMU code to destroy any mappings
for the device, as they might still be in use by the driver.

The new BUS_NOTIFY_REMOVED_DEVICE event introduced with this
patch closes this gap as it is sent when the device is
already unbound from its device driver and almost completly
removed from the driver core.

With this event the IOMMU code can safely destroy any
mappings and other data structures when a device is removed.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Tested-by: Jerry Hoemann <jerry.hoemann@hp.com>
---
 drivers/base/core.c    |  3 +++
 include/linux/device.h | 11 ++++++-----
 2 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/core.c b/drivers/base/core.c
index 20da3ad1696b..7b270a2e6ed5 100644
--- a/drivers/base/core.c
+++ b/drivers/base/core.c
@@ -1211,6 +1211,9 @@ void device_del(struct device *dev)
 	 */
 	if (platform_notify_remove)
 		platform_notify_remove(dev);
+	if (dev->bus)
+		blocking_notifier_call_chain(&dev->bus->p->bus_notifier,
+					     BUS_NOTIFY_REMOVED_DEVICE, dev);
 	kobject_uevent(&dev->kobj, KOBJ_REMOVE);
 	cleanup_device_parent(dev);
 	kobject_del(&dev->kobj);
diff --git a/include/linux/device.h b/include/linux/device.h
index 43d183aeb25b..d0d5c5db509d 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -181,13 +181,14 @@ extern int bus_unregister_notifier(struct bus_type *bus,
  * with the device lock held in the core, so be careful.
  */
 #define BUS_NOTIFY_ADD_DEVICE		0x00000001 /* device added */
-#define BUS_NOTIFY_DEL_DEVICE		0x00000002 /* device removed */
-#define BUS_NOTIFY_BIND_DRIVER		0x00000003 /* driver about to be
+#define BUS_NOTIFY_DEL_DEVICE		0x00000002 /* device to be removed */
+#define BUS_NOTIFY_REMOVED_DEVICE	0x00000003 /* device removed */
+#define BUS_NOTIFY_BIND_DRIVER		0x00000004 /* driver about to be
 						      bound */
-#define BUS_NOTIFY_BOUND_DRIVER		0x00000004 /* driver bound to device */
-#define BUS_NOTIFY_UNBIND_DRIVER	0x00000005 /* driver about to be
+#define BUS_NOTIFY_BOUND_DRIVER		0x00000005 /* driver bound to device */
+#define BUS_NOTIFY_UNBIND_DRIVER	0x00000006 /* driver about to be
 						      unbound */
-#define BUS_NOTIFY_UNBOUND_DRIVER	0x00000006 /* driver is unbound
+#define BUS_NOTIFY_UNBOUND_DRIVER	0x00000007 /* driver is unbound
 						      from the device */
 
 extern struct kset *bus_get_kset(struct bus_type *bus);
-- 
cgit v1.2.3


From 89168b48991537bec2573b3b6a8841df74465b12 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Thu, 2 Oct 2014 09:08:46 +0200
Subject: mmc: core: restore detect line inversion semantics

commit 98e90de99a0c43bd434da814c882c4332441871e
"mmc: host: switch OF parser to use gpio descriptors"
switched the semantic behaviour of card detect and read
only flags such that the inversion capability flag would
only be set if inversion was explicitly specified in the
device tree, in the hopes that no-one was using double
inversion.

It turns out that the XOR:ing between the explicit
inversion was indeed in use, so we need to restore the
old semantics where both ways of inversion are checked
and the end result XOR:ed.

Reported-by: Javier Martinez Canillas <javier@dowhile0.org>
Tested-by: Javier Martinez Canillas <javier@dowhile0.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/host.c       | 32 ++++++++++++++++++++++++++++----
 drivers/mmc/core/slot-gpio.c  | 14 ++++++++++++--
 drivers/mmc/host/mmci.c       |  4 ++--
 drivers/mmc/host/sdhci-acpi.c |  2 +-
 include/linux/mmc/slot-gpio.h |  4 ++--
 5 files changed, 45 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/host.c b/drivers/mmc/core/host.c
index 31969436d77c..03c53b72a2d6 100644
--- a/drivers/mmc/core/host.c
+++ b/drivers/mmc/core/host.c
@@ -311,6 +311,7 @@ int mmc_of_parse(struct mmc_host *host)
 	struct device_node *np;
 	u32 bus_width;
 	int len, ret;
+	bool cap_invert, gpio_invert;
 
 	if (!host->parent || !host->parent->of_node)
 		return 0;
@@ -359,12 +360,15 @@ int mmc_of_parse(struct mmc_host *host)
 		host->caps |= MMC_CAP_NONREMOVABLE;
 	} else {
 		if (of_property_read_bool(np, "cd-inverted"))
-			host->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
+			cap_invert = true;
+		else
+			cap_invert = false;
 
 		if (of_find_property(np, "broken-cd", &len))
 			host->caps |= MMC_CAP_NEEDS_POLL;
 
-		ret = mmc_gpiod_request_cd(host, "cd", 0, false, 0);
+		ret = mmc_gpiod_request_cd(host, "cd", 0, true,
+					   0, &gpio_invert);
 		if (ret) {
 			if (ret == -EPROBE_DEFER)
 				return ret;
@@ -375,13 +379,29 @@ int mmc_of_parse(struct mmc_host *host)
 			}
 		} else
 			dev_info(host->parent, "Got CD GPIO\n");
+
+		/*
+		 * There are two ways to flag that the CD line is inverted:
+		 * through the cd-inverted flag and by the GPIO line itself
+		 * being inverted from the GPIO subsystem. This is a leftover
+		 * from the times when the GPIO subsystem did not make it
+		 * possible to flag a line as inverted.
+		 *
+		 * If the capability on the host AND the GPIO line are
+		 * both inverted, the end result is that the CD line is
+		 * not inverted.
+		 */
+		if (cap_invert ^ gpio_invert)
+			host->caps2 |= MMC_CAP2_CD_ACTIVE_HIGH;
 	}
 
 	/* Parse Write Protection */
 	if (of_property_read_bool(np, "wp-inverted"))
-		host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
+		cap_invert = true;
+	else
+		cap_invert = false;
 
-	ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0);
+	ret = mmc_gpiod_request_ro(host, "wp", 0, false, 0, &gpio_invert);
 	if (ret) {
 		if (ret == -EPROBE_DEFER)
 			goto out;
@@ -393,6 +413,10 @@ int mmc_of_parse(struct mmc_host *host)
 	} else
 		dev_info(host->parent, "Got WP GPIO\n");
 
+	/* See the comment on CD inversion above */
+	if (cap_invert ^ gpio_invert)
+		host->caps2 |= MMC_CAP2_RO_ACTIVE_HIGH;
+
 	if (of_find_property(np, "cap-sd-highspeed", &len))
 		host->caps |= MMC_CAP_SD_HIGHSPEED;
 	if (of_find_property(np, "cap-mmc-highspeed", &len))
diff --git a/drivers/mmc/core/slot-gpio.c b/drivers/mmc/core/slot-gpio.c
index 38f76555d4bf..69bbf2adb329 100644
--- a/drivers/mmc/core/slot-gpio.c
+++ b/drivers/mmc/core/slot-gpio.c
@@ -281,6 +281,8 @@ EXPORT_SYMBOL(mmc_gpio_free_cd);
  * @idx: index of the GPIO to obtain in the consumer
  * @override_active_level: ignore %GPIO_ACTIVE_LOW flag
  * @debounce: debounce time in microseconds
+ * @gpio_invert: will return whether the GPIO line is inverted or not, set
+ * to NULL to ignore
  *
  * Use this function in place of mmc_gpio_request_cd() to use the GPIO
  * descriptor API.  Note that it is paired with mmc_gpiod_free_cd() not
@@ -291,7 +293,7 @@ EXPORT_SYMBOL(mmc_gpio_free_cd);
  */
 int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
-			 unsigned int debounce)
+			 unsigned int debounce, bool *gpio_invert)
 {
 	struct mmc_gpio *ctx;
 	struct gpio_desc *desc;
@@ -316,6 +318,9 @@ int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			return ret;
 	}
 
+	if (gpio_invert)
+		*gpio_invert = !gpiod_is_active_low(desc);
+
 	ctx->override_cd_active_level = override_active_level;
 	ctx->cd_gpio = desc;
 
@@ -330,6 +335,8 @@ EXPORT_SYMBOL(mmc_gpiod_request_cd);
  * @idx: index of the GPIO to obtain in the consumer
  * @override_active_level: ignore %GPIO_ACTIVE_LOW flag
  * @debounce: debounce time in microseconds
+ * @gpio_invert: will return whether the GPIO line is inverted or not,
+ * set to NULL to ignore
  *
  * Use this function in place of mmc_gpio_request_ro() to use the GPIO
  * descriptor API.  Note that it is paired with mmc_gpiod_free_ro() not
@@ -339,7 +346,7 @@ EXPORT_SYMBOL(mmc_gpiod_request_cd);
  */
 int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
-			 unsigned int debounce)
+			 unsigned int debounce, bool *gpio_invert)
 {
 	struct mmc_gpio *ctx;
 	struct gpio_desc *desc;
@@ -364,6 +371,9 @@ int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 			return ret;
 	}
 
+	if (gpio_invert)
+		*gpio_invert = !gpiod_is_active_low(desc);
+
 	ctx->override_ro_active_level = override_active_level;
 	ctx->ro_gpio = desc;
 
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index c9dafed550f2..43af791e2e45 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1682,7 +1682,7 @@ static int mmci_probe(struct amba_device *dev,
 	 * silently of these do not exist and proceed to try platform data
 	 */
 	if (!np) {
-		ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0);
+		ret = mmc_gpiod_request_cd(mmc, "cd", 0, false, 0, NULL);
 		if (ret < 0) {
 			if (ret == -EPROBE_DEFER)
 				goto clk_disable;
@@ -1693,7 +1693,7 @@ static int mmci_probe(struct amba_device *dev,
 			}
 		}
 
-		ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0);
+		ret = mmc_gpiod_request_ro(mmc, "wp", 0, false, 0, NULL);
 		if (ret < 0) {
 			if (ret == -EPROBE_DEFER)
 				goto clk_disable;
diff --git a/drivers/mmc/host/sdhci-acpi.c b/drivers/mmc/host/sdhci-acpi.c
index 3483c089baa7..327bc24ec8ce 100644
--- a/drivers/mmc/host/sdhci-acpi.c
+++ b/drivers/mmc/host/sdhci-acpi.c
@@ -352,7 +352,7 @@ static int sdhci_acpi_probe(struct platform_device *pdev)
 	if (sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD)) {
 		bool v = sdhci_acpi_flag(c, SDHCI_ACPI_SD_CD_OVERRIDE_LEVEL);
 
-		if (mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0)) {
+		if (mmc_gpiod_request_cd(host->mmc, NULL, 0, v, 0, NULL)) {
 			dev_warn(dev, "failed to setup card detect gpio\n");
 			c->use_runtime_pm = false;
 		}
diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h
index a0d0442c15bf..e56fa24c9322 100644
--- a/include/linux/mmc/slot-gpio.h
+++ b/include/linux/mmc/slot-gpio.h
@@ -24,10 +24,10 @@ void mmc_gpio_free_cd(struct mmc_host *host);
 
 int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
-			 unsigned int debounce);
+			 unsigned int debounce, bool *gpio_invert);
 int mmc_gpiod_request_ro(struct mmc_host *host, const char *con_id,
 			 unsigned int idx, bool override_active_level,
-			 unsigned int debounce);
+			 unsigned int debounce, bool *gpio_invert);
 void mmc_gpiod_free_cd(struct mmc_host *host);
 void mmc_gpiod_request_cd_irq(struct mmc_host *host);
 
-- 
cgit v1.2.3


From 57384592c43375d2c9a14d82aebbdc95fdda9e9d Mon Sep 17 00:00:00 2001
From: Joerg Roedel <jroedel@suse.de>
Date: Thu, 2 Oct 2014 11:50:25 +0200
Subject: iommu/vt-d: Store bus information in RMRR PCI device path

This will be used later to match broken RMRR entries.

Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c | 1 +
 include/linux/dmar.h | 8 +++++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 8ed55b0a1ce4..68da1ab0f2cd 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -155,6 +155,7 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event)
 	if (event == BUS_NOTIFY_ADD_DEVICE) {
 		for (tmp = dev; tmp; tmp = tmp->bus->self) {
 			level--;
+			info->path[level].bus = tmp->bus->number;
 			info->path[level].device = PCI_SLOT(tmp->devfn);
 			info->path[level].function = PCI_FUNC(tmp->devfn);
 			if (pci_is_root_bus(tmp->bus))
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 1deece46a0ca..593fff99e6bf 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -56,13 +56,19 @@ struct dmar_drhd_unit {
 	struct intel_iommu *iommu;
 };
 
+struct dmar_pci_path {
+	u8 bus;
+	u8 device;
+	u8 function;
+};
+
 struct dmar_pci_notify_info {
 	struct pci_dev			*dev;
 	unsigned long			event;
 	int				bus;
 	u16				seg;
 	u16				level;
-	struct acpi_dmar_pci_path	path[];
+	struct dmar_pci_path		path[];
 }  __attribute__((packed));
 
 extern struct rw_semaphore dmar_global_lock;
-- 
cgit v1.2.3


From 8acd91e8620836a56ff62028ed28ba629f2881a0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Tue, 30 Sep 2014 15:26:00 +0200
Subject: locking/lockdep: Revert qrwlock recusive stuff

Commit f0bab73cb539 ("locking/lockdep: Restrict the use of recursive
read_lock() with qrwlock") changed lockdep to try and conform to the
qrwlock semantics which differ from the traditional rwlock semantics.

In particular qrwlock is fair outside of interrupt context, but in
interrupt context readers will ignore all fairness.

The problem modeling this is that read and write side have different
lock state (interrupts) semantics but we only have a single
representation of these. Therefore lockdep will get confused, thinking
the lock can cause interrupt lock inversions.

So revert it for now; the old rwlock semantics were already imperfectly
modeled and the qrwlock extra won't fit either.

If we want to properly fix this, I think we need to resurrect the work
by Gautham did a few years ago that split the read and write state of
locks:

   http://lwn.net/Articles/332801/

FWIW the locking selftest that would've failed (and was reported by
Borislav earlier) is something like:

  RL(X1);	/* IRQ-ON */
  LOCK(A);
  UNLOCK(A);
  RU(X1);

  IRQ_ENTER();
  RL(X1);	/* IN-IRQ */
  RU(X1);
  IRQ_EXIT();

At which point it would report that because A is an IRQ-unsafe lock we
can suffer the following inversion:

	CPU0		CPU1

	lock(A)
			lock(X1)
			lock(A)
	<IRQ>
	 lock(X1)

And this is 'wrong' because X1 can recurse (assuming the above lock are
in fact read-lock) but lockdep doesn't know about this.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Waiman Long <Waiman.Long@hp.com>
Cc: ego@linux.vnet.ibm.com
Cc: bp@alien8.de
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Link: http://lkml.kernel.org/r/20140930132600.GA7444@worktop.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/lockdep.h  | 10 +--------
 kernel/locking/lockdep.c |  6 ------
 lib/locking-selftest.c   | 56 ++++++------------------------------------------
 3 files changed, 8 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index b5a84b62fb84..f388481201cd 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -478,24 +478,16 @@ static inline void print_irqtrace_events(struct task_struct *curr)
  * on the per lock-class debug mode:
  */
 
-/*
- * Read states in the 2-bit held_lock:read field:
- *  0: Exclusive lock
- *  1: Shareable lock, cannot be recursively called
- *  2: Shareable lock, can be recursively called
- *  3: Shareable lock, cannot be recursively called except in interrupt context
- */
 #define lock_acquire_exclusive(l, s, t, n, i)		lock_acquire(l, s, t, 0, 1, n, i)
 #define lock_acquire_shared(l, s, t, n, i)		lock_acquire(l, s, t, 1, 1, n, i)
 #define lock_acquire_shared_recursive(l, s, t, n, i)	lock_acquire(l, s, t, 2, 1, n, i)
-#define lock_acquire_shared_irecursive(l, s, t, n, i)	lock_acquire(l, s, t, 3, 1, n, i)
 
 #define spin_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
 #define spin_acquire_nest(l, s, t, n, i)	lock_acquire_exclusive(l, s, t, n, i)
 #define spin_release(l, n, i)			lock_release(l, n, i)
 
 #define rwlock_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
-#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_irecursive(l, s, t, NULL, i)
+#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_recursive(l, s, t, NULL, i)
 #define rwlock_release(l, n, i)			lock_release(l, n, i)
 
 #define seqcount_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 420ba685c4e5..88d0d4420ad2 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -3597,12 +3597,6 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass,
 	raw_local_irq_save(flags);
 	check_flags(flags);
 
-	/*
-	 * An interrupt recursive read in interrupt context can be considered
-	 * to be the same as a recursive read from checking perspective.
-	 */
-	if ((read == 3) && in_interrupt())
-		read = 2;
 	current->lockdep_recursion = 1;
 	trace_lock_acquire(lock, subclass, trylock, read, check, nest_lock, ip);
 	__lock_acquire(lock, subclass, trylock, read, check,
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 62af709b2083..872a15a2a637 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -267,46 +267,19 @@ GENERATE_TESTCASE(AA_rsem)
 #undef E
 
 /*
- * Special-case for read-locking, they are not allowed to
- * recurse on the same lock class except under interrupt context:
+ * Special-case for read-locking, they are
+ * allowed to recurse on the same lock class:
  */
 static void rlock_AA1(void)
 {
 	RL(X1);
-	RL(X1); // this one should fail
+	RL(X1); // this one should NOT fail
 }
 
 static void rlock_AA1B(void)
 {
 	RL(X1);
-	RL(X2); // this one should fail
-}
-
-static void rlock_AHA1(void)
-{
-	RL(X1);
-	HARDIRQ_ENTER();
-	RL(X1);	// this one should NOT fail
-	HARDIRQ_EXIT();
-}
-
-static void rlock_AHA1B(void)
-{
-	RL(X1);
-	HARDIRQ_ENTER();
-	RL(X2);	// this one should NOT fail
-	HARDIRQ_EXIT();
-}
-
-static void rlock_ASAHA1(void)
-{
-	RL(X1);
-	SOFTIRQ_ENTER();
-	RL(X1);	// this one should NOT fail
-	HARDIRQ_ENTER();
-	RL(X1);	// this one should NOT fail
-	HARDIRQ_EXIT();
-	SOFTIRQ_EXIT();
+	RL(X2); // this one should NOT fail
 }
 
 static void rsem_AA1(void)
@@ -1096,7 +1069,7 @@ static inline void print_testname(const char *testname)
 	print_testname(desc);					\
 	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
 	dotest(name##_wlock, FAILURE, LOCKTYPE_RWLOCK);		\
-	dotest(name##_rlock, FAILURE, LOCKTYPE_RWLOCK);		\
+	dotest(name##_rlock, SUCCESS, LOCKTYPE_RWLOCK);		\
 	dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX);		\
 	dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM);		\
 	dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM);		\
@@ -1857,14 +1830,14 @@ void locking_selftest(void)
 	printk("  --------------------------------------------------------------------------\n");
 	print_testname("recursive read-lock");
 	printk("             |");
-	dotest(rlock_AA1, FAILURE, LOCKTYPE_RWLOCK);
+	dotest(rlock_AA1, SUCCESS, LOCKTYPE_RWLOCK);
 	printk("             |");
 	dotest(rsem_AA1, FAILURE, LOCKTYPE_RWSEM);
 	printk("\n");
 
 	print_testname("recursive read-lock #2");
 	printk("             |");
-	dotest(rlock_AA1B, FAILURE, LOCKTYPE_RWLOCK);
+	dotest(rlock_AA1B, SUCCESS, LOCKTYPE_RWLOCK);
 	printk("             |");
 	dotest(rsem_AA1B, FAILURE, LOCKTYPE_RWSEM);
 	printk("\n");
@@ -1883,21 +1856,6 @@ void locking_selftest(void)
 	dotest(rsem_AA3, FAILURE, LOCKTYPE_RWSEM);
 	printk("\n");
 
-	print_testname("recursive rlock with interrupt");
-	printk("             |");
-	dotest(rlock_AHA1, SUCCESS, LOCKTYPE_RWLOCK);
-	printk("\n");
-
-	print_testname("recursive rlock with interrupt #2");
-	printk("             |");
-	dotest(rlock_AHA1B, SUCCESS, LOCKTYPE_RWLOCK);
-	printk("\n");
-
-	print_testname("recursive rlock with interrupt #3");
-	printk("             |");
-	dotest(rlock_ASAHA1, SUCCESS, LOCKTYPE_RWLOCK);
-	printk("\n");
-
 	printk("  --------------------------------------------------------------------------\n");
 
 	/*
-- 
cgit v1.2.3


From f14bb039a4e8206439d3e9abd92bc76bd142f243 Mon Sep 17 00:00:00 2001
From: Andy Grover <agrover@redhat.com>
Date: Wed, 1 Oct 2014 16:07:03 -0700
Subject: uio: Export definition of struct uio_device

In order to prevent a O(n) search of the filesystem to link up its uio
node with its target configuration, TCMU needs to know the minor number
that UIO assigned. Expose the definition of this struct so TCMU can
access this field.

Signed-off-by: Andy Grover <agrover@redhat.com>
Signed-off-by: Nicholas Bellinger <nab@linux-iscsi.org>
---
 drivers/uio/uio.c          | 12 ------------
 include/linux/uio_driver.h | 12 +++++++++++-
 2 files changed, 11 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index a673e5b6a2e0..60fa6278fbce 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -28,18 +28,6 @@
 
 #define UIO_MAX_DEVICES		(1U << MINORBITS)
 
-struct uio_device {
-	struct module		*owner;
-	struct device		*dev;
-	int			minor;
-	atomic_t		event;
-	struct fasync_struct	*async_queue;
-	wait_queue_head_t	wait;
-	struct uio_info		*info;
-	struct kobject		*map_dir;
-	struct kobject		*portio_dir;
-};
-
 static int uio_major;
 static struct cdev *uio_cdev;
 static DEFINE_IDR(uio_idr);
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index 1ad4724458de..baa81718d985 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -63,7 +63,17 @@ struct uio_port {
 
 #define MAX_UIO_PORT_REGIONS	5
 
-struct uio_device;
+struct uio_device {
+        struct module           *owner;
+        struct device           *dev;
+        int                     minor;
+        atomic_t                event;
+        struct fasync_struct    *async_queue;
+        wait_queue_head_t       wait;
+        struct uio_info         *info;
+        struct kobject          *map_dir;
+        struct kobject          *portio_dir;
+};
 
 /**
  * struct uio_info - UIO device capabilities
-- 
cgit v1.2.3


From 615413979487a1e25a3b76abbaa316280ca19d26 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Wed, 24 Sep 2014 10:27:27 +0300
Subject: mmc: sdhci: Add quirk for always getting TC with stop cmd

Add a quirk for a host controller that always sets
a Transfer Complete interrupt status for the stop
command even when a busy response is not indicated.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 8 ++++++--
 include/linux/mmc/sdhci.h | 2 ++
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index cbd433712756..a932f70bcc51 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -2225,7 +2225,7 @@ static void sdhci_tuning_timer(unsigned long data)
  *                                                                           *
 \*****************************************************************************/
 
-static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
+static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
 {
 	BUG_ON(intmask == 0);
 
@@ -2272,6 +2272,9 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask)
 
 		/* The controller does not support the end-of-busy IRQ,
 		 * fall through and take the SDHCI_INT_RESPONSE */
+	} else if ((host->quirks2 & SDHCI_QUIRK2_STOP_WITH_TC) &&
+		   host->cmd->opcode == MMC_STOP_TRANSMISSION && !host->data) {
+		*mask &= ~SDHCI_INT_DATA_END;
 	}
 
 	if (intmask & SDHCI_INT_RESPONSE)
@@ -2481,7 +2484,8 @@ static irqreturn_t sdhci_irq(int irq, void *dev_id)
 		}
 
 		if (intmask & SDHCI_INT_CMD_MASK)
-			sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK);
+			sdhci_cmd_irq(host, intmask & SDHCI_INT_CMD_MASK,
+				      &intmask);
 
 		if (intmask & SDHCI_INT_DATA_MASK)
 			sdhci_data_irq(host, intmask & SDHCI_INT_DATA_MASK);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 0aa85ca0c788..dba793e3a331 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -98,6 +98,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK2_BROKEN_HS200			(1<<6)
 /* Controller does not support DDR50 */
 #define SDHCI_QUIRK2_BROKEN_DDR50			(1<<7)
+/* Stop command (CMD12) can set Transfer Complete when not using MMC_RSP_BUSY */
+#define SDHCI_QUIRK2_STOP_WITH_TC			(1<<8)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3


From f39cb1797ec1094b196d3dab44a7ca6060813d38 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 2 Oct 2014 21:12:34 +0200
Subject: PM / Domains: Rename cpu_data to cpuidle_data

The "cpu_data" are defined for some archs and thus conflicting with the
"cpu_data" member in the struct gpd_cpu_data. This causes a compiler
error for those archs.

Let's fix it by rename the member to cpuidle_data. In this context it
also seems appropriate to rename the struct to gpd_cpuidle_data to
better reflect its use.

Fixes: f48c767ce895 (PM / Domains: Move dev_pm_domain_attach|detach() to pm_domain.h)
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 54 ++++++++++++++++++++++-----------------------
 include/linux/pm_domain.h   |  4 ++--
 2 files changed, 29 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 18cc68d058d6..40bc2f4072cc 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -142,13 +142,13 @@ static void genpd_recalc_cpu_exit_latency(struct generic_pm_domain *genpd)
 {
 	s64 usecs64;
 
-	if (!genpd->cpu_data)
+	if (!genpd->cpuidle_data)
 		return;
 
 	usecs64 = genpd->power_on_latency_ns;
 	do_div(usecs64, NSEC_PER_USEC);
-	usecs64 += genpd->cpu_data->saved_exit_latency;
-	genpd->cpu_data->idle_state->exit_latency = usecs64;
+	usecs64 += genpd->cpuidle_data->saved_exit_latency;
+	genpd->cpuidle_data->idle_state->exit_latency = usecs64;
 }
 
 /**
@@ -188,9 +188,9 @@ static int __pm_genpd_poweron(struct generic_pm_domain *genpd)
 		return 0;
 	}
 
-	if (genpd->cpu_data) {
+	if (genpd->cpuidle_data) {
 		cpuidle_pause_and_lock();
-		genpd->cpu_data->idle_state->disabled = true;
+		genpd->cpuidle_data->idle_state->disabled = true;
 		cpuidle_resume_and_unlock();
 		goto out;
 	}
@@ -513,17 +513,17 @@ static int pm_genpd_poweroff(struct generic_pm_domain *genpd)
 		}
 	}
 
-	if (genpd->cpu_data) {
+	if (genpd->cpuidle_data) {
 		/*
-		 * If cpu_data is set, cpuidle should turn the domain off when
-		 * the CPU in it is idle.  In that case we don't decrement the
-		 * subdomain counts of the master domains, so that power is not
-		 * removed from the current domain prematurely as a result of
-		 * cutting off the masters' power.
+		 * If cpuidle_data is set, cpuidle should turn the domain off
+		 * when the CPU in it is idle.  In that case we don't decrement
+		 * the subdomain counts of the master domains, so that power is
+		 * not removed from the current domain prematurely as a result
+		 * of cutting off the masters' power.
 		 */
 		genpd->status = GPD_STATE_POWER_OFF;
 		cpuidle_pause_and_lock();
-		genpd->cpu_data->idle_state->disabled = false;
+		genpd->cpuidle_data->idle_state->disabled = false;
 		cpuidle_resume_and_unlock();
 		goto out;
 	}
@@ -1698,7 +1698,7 @@ int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd,
 int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 {
 	struct cpuidle_driver *cpuidle_drv;
-	struct gpd_cpu_data *cpu_data;
+	struct gpd_cpuidle_data *cpuidle_data;
 	struct cpuidle_state *idle_state;
 	int ret = 0;
 
@@ -1707,12 +1707,12 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 
 	genpd_acquire_lock(genpd);
 
-	if (genpd->cpu_data) {
+	if (genpd->cpuidle_data) {
 		ret = -EEXIST;
 		goto out;
 	}
-	cpu_data = kzalloc(sizeof(*cpu_data), GFP_KERNEL);
-	if (!cpu_data) {
+	cpuidle_data = kzalloc(sizeof(*cpuidle_data), GFP_KERNEL);
+	if (!cpuidle_data) {
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -1730,9 +1730,9 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 		ret = -EAGAIN;
 		goto err;
 	}
-	cpu_data->idle_state = idle_state;
-	cpu_data->saved_exit_latency = idle_state->exit_latency;
-	genpd->cpu_data = cpu_data;
+	cpuidle_data->idle_state = idle_state;
+	cpuidle_data->saved_exit_latency = idle_state->exit_latency;
+	genpd->cpuidle_data = cpuidle_data;
 	genpd_recalc_cpu_exit_latency(genpd);
 
  out:
@@ -1743,7 +1743,7 @@ int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state)
 	cpuidle_driver_unref();
 
  err_drv:
-	kfree(cpu_data);
+	kfree(cpuidle_data);
 	goto out;
 }
 
@@ -1766,7 +1766,7 @@ int pm_genpd_name_attach_cpuidle(const char *name, int state)
  */
 int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
 {
-	struct gpd_cpu_data *cpu_data;
+	struct gpd_cpuidle_data *cpuidle_data;
 	struct cpuidle_state *idle_state;
 	int ret = 0;
 
@@ -1775,20 +1775,20 @@ int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd)
 
 	genpd_acquire_lock(genpd);
 
-	cpu_data = genpd->cpu_data;
-	if (!cpu_data) {
+	cpuidle_data = genpd->cpuidle_data;
+	if (!cpuidle_data) {
 		ret = -ENODEV;
 		goto out;
 	}
-	idle_state = cpu_data->idle_state;
+	idle_state = cpuidle_data->idle_state;
 	if (!idle_state->disabled) {
 		ret = -EAGAIN;
 		goto out;
 	}
-	idle_state->exit_latency = cpu_data->saved_exit_latency;
+	idle_state->exit_latency = cpuidle_data->saved_exit_latency;
 	cpuidle_driver_unref();
-	genpd->cpu_data = NULL;
-	kfree(cpu_data);
+	genpd->cpuidle_data = NULL;
+	kfree(cpuidle_data);
 
  out:
 	genpd_release_lock(genpd);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 900474317afc..73e938b7e937 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -38,7 +38,7 @@ struct gpd_dev_ops {
 	bool (*active_wakeup)(struct device *dev);
 };
 
-struct gpd_cpu_data {
+struct gpd_cpuidle_data {
 	unsigned int saved_exit_latency;
 	struct cpuidle_state *idle_state;
 };
@@ -71,7 +71,7 @@ struct generic_pm_domain {
 	s64 max_off_time_ns;	/* Maximum allowed "suspended" time. */
 	bool max_off_time_changed;
 	bool cached_power_down_ok;
-	struct gpd_cpu_data *cpu_data;
+	struct gpd_cpuidle_data *cpuidle_data;
 	void (*attach_dev)(struct device *dev);
 	void (*detach_dev)(struct device *dev);
 };
-- 
cgit v1.2.3


From 5a17dae422d7de4b776a9753cd4673a343a25b4b Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 5 Aug 2014 11:52:11 +0100
Subject: efi: Add efi= parameter parsing to the EFI boot stub

We need a way to customize the behaviour of the EFI boot stub, in
particular, we need a way to disable the "chunking" workaround, used
when reading files from the EFI System Partition.

One of my machines doesn't cope well when reading files in 1MB chunks to
a buffer above the 4GB mark - it appears that the "chunking" bug
workaround triggers another firmware bug. This was only discovered with
commit 4bf7111f5016 ("x86/efi: Support initrd loaded above 4G"), and
that commit is perfectly valid. The symptom I observed was a corrupt
initrd rather than any kind of crash.

efi= is now used to specify EFI parameters in two very different
execution environments, the EFI boot stub and during kernel boot.

There is also a slight performance optimization by enabling efi=nochunk,
but that's offset by the fact that you're more likely to run into
firmware issues, at least on x86. This is the rationale behind leaving
the workaround enabled by default.

Also provide some documentation for EFI_READ_CHUNK_SIZE and why we're
using the current value of 1MB.

Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Roy Franz <roy.franz@linaro.org>
Cc: Maarten Lankhorst <m.b.lankhorst@gmail.com>
Cc: Leif Lindholm <leif.lindholm@linaro.org>
Cc: Borislav Petkov <bp@suse.de>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/kernel-parameters.txt            |  5 ++-
 arch/x86/boot/compressed/eboot.c               |  4 ++
 arch/x86/platform/efi/efi.c                    | 19 +++++++-
 drivers/firmware/efi/libstub/arm-stub.c        |  4 ++
 drivers/firmware/efi/libstub/efi-stub-helper.c | 62 +++++++++++++++++++++++++-
 include/linux/efi.h                            |  2 +
 6 files changed, 91 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 5ae8608ca9f5..67d79597d9d6 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -992,10 +992,13 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			Format: {"off" | "on" | "skip[mbr]"}
 
 	efi=		[EFI]
-			Format: { "old_map" }
+			Format: { "old_map", "nochunk" }
 			old_map [X86-64]: switch to the old ioremap-based EFI
 			runtime services mapping. 32-bit still uses this one by
 			default.
+			nochunk: disable reading files in "chunks" in the EFI
+			boot stub, as chunking can cause problems with some
+			firmware implementations.
 
 	efi_no_storage_paranoia [EFI; X86]
 			Using this parameter you can use more than 50% of
diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c
index f277184e2ac1..f4bdab1dbf66 100644
--- a/arch/x86/boot/compressed/eboot.c
+++ b/arch/x86/boot/compressed/eboot.c
@@ -1100,6 +1100,10 @@ struct boot_params *make_boot_params(struct efi_config *c)
 	else
 		initrd_addr_max = hdr->initrd_addr_max;
 
+	status = efi_parse_options(cmdline_ptr);
+	if (status != EFI_SUCCESS)
+		goto fail2;
+
 	status = handle_cmdline_files(sys_table, image,
 				      (char *)(unsigned long)hdr->cmd_line_ptr,
 				      "initrd=", initrd_addr_max,
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index 850da94fef30..a1f745b0bf1d 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -943,8 +943,23 @@ static int __init parse_efi_cmdline(char *str)
 	if (*str == '=')
 		str++;
 
-	if (!strncmp(str, "old_map", 7))
-		set_bit(EFI_OLD_MEMMAP, &efi.flags);
+	while (*str) {
+		if (!strncmp(str, "old_map", 7)) {
+			set_bit(EFI_OLD_MEMMAP, &efi.flags);
+			str += strlen("old_map");
+		}
+
+		/*
+		 * Skip any options we don't understand. Presumably
+		 * they apply to the EFI boot stub.
+		 */
+		while (*str && *str != ',')
+			str++;
+
+		/* If we hit a delimiter, skip it */
+		if (*str == ',')
+			str++;
+	}
 
 	return 0;
 }
diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c
index 480339b6b110..75ee05964cbc 100644
--- a/drivers/firmware/efi/libstub/arm-stub.c
+++ b/drivers/firmware/efi/libstub/arm-stub.c
@@ -226,6 +226,10 @@ unsigned long __init efi_entry(void *handle, efi_system_table_t *sys_table,
 		goto fail_free_image;
 	}
 
+	status = efi_parse_options(cmdline_ptr);
+	if (status != EFI_SUCCESS)
+		pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n");
+
 	/*
 	 * Unauthenticated device tree data is a security hazard, so
 	 * ignore 'dtb=' unless UEFI Secure Boot is disabled.
diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c
index 32d5cca30f49..a920fec8fe88 100644
--- a/drivers/firmware/efi/libstub/efi-stub-helper.c
+++ b/drivers/firmware/efi/libstub/efi-stub-helper.c
@@ -15,8 +15,23 @@
 
 #include "efistub.h"
 
+/*
+ * Some firmware implementations have problems reading files in one go.
+ * A read chunk size of 1MB seems to work for most platforms.
+ *
+ * Unfortunately, reading files in chunks triggers *other* bugs on some
+ * platforms, so we provide a way to disable this workaround, which can
+ * be done by passing "efi=nochunk" on the EFI boot stub command line.
+ *
+ * If you experience issues with initrd images being corrupt it's worth
+ * trying efi=nochunk, but chunking is enabled by default because there
+ * are far more machines that require the workaround than those that
+ * break with it enabled.
+ */
 #define EFI_READ_CHUNK_SIZE	(1024 * 1024)
 
+static unsigned long __chunk_size = EFI_READ_CHUNK_SIZE;
+
 struct file_info {
 	efi_file_handle_t *handle;
 	u64 size;
@@ -281,6 +296,49 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size,
 	efi_call_early(free_pages, addr, nr_pages);
 }
 
+/*
+ * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi=
+ * option, e.g. efi=nochunk.
+ *
+ * It should be noted that efi= is parsed in two very different
+ * environments, first in the early boot environment of the EFI boot
+ * stub, and subsequently during the kernel boot.
+ */
+efi_status_t efi_parse_options(char *cmdline)
+{
+	char *str;
+
+	/*
+	 * If no EFI parameters were specified on the cmdline we've got
+	 * nothing to do.
+	 */
+	str = strstr(cmdline, "efi=");
+	if (!str)
+		return EFI_SUCCESS;
+
+	/* Skip ahead to first argument */
+	str += strlen("efi=");
+
+	/*
+	 * Remember, because efi= is also used by the kernel we need to
+	 * skip over arguments we don't understand.
+	 */
+	while (*str) {
+		if (!strncmp(str, "nochunk", 7)) {
+			str += strlen("nochunk");
+			__chunk_size = -1UL;
+		}
+
+		/* Group words together, delimited by "," */
+		while (*str && *str != ',')
+			str++;
+
+		if (*str == ',')
+			str++;
+	}
+
+	return EFI_SUCCESS;
+}
 
 /*
  * Check the cmdline for a LILO-style file= arguments.
@@ -423,8 +481,8 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 			size = files[j].size;
 			while (size) {
 				unsigned long chunksize;
-				if (size > EFI_READ_CHUNK_SIZE)
-					chunksize = EFI_READ_CHUNK_SIZE;
+				if (size > __chunk_size)
+					chunksize = __chunk_size;
 				else
 					chunksize = size;
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 45cb4ffdea62..518779fb5e90 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1227,4 +1227,6 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 				  unsigned long *load_addr,
 				  unsigned long *load_size);
 
+efi_status_t efi_parse_options(char *cmdline);
+
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From b2e0a54a1296a91b800f316df7bef7d1905e4fd0 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Thu, 14 Aug 2014 17:15:26 +0800
Subject: efi: Move noefi early param code out of x86 arch code

noefi param can be used for arches other than X86 later, thus move it
out of x86 platform code.

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 Documentation/kernel-parameters.txt |  2 +-
 arch/x86/platform/efi/efi.c         | 10 +---------
 drivers/firmware/efi/efi.c          | 13 +++++++++++++
 include/linux/efi.h                 |  1 +
 4 files changed, 16 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 67d79597d9d6..08df275eee2b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2169,7 +2169,7 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 
 	nodsp		[SH] Disable hardware DSP at boot time.
 
-	noefi		[X86] Disable EFI runtime services support.
+	noefi		Disable EFI runtime services support.
 
 	noexec		[IA-64]
 
diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index a1f745b0bf1d..c90d3cd2728c 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -70,14 +70,6 @@ static efi_config_table_type_t arch_tables[] __initdata = {
 
 u64 efi_setup;		/* efi setup_data physical address */
 
-static bool disable_runtime __initdata = false;
-static int __init setup_noefi(char *arg)
-{
-	disable_runtime = true;
-	return 0;
-}
-early_param("noefi", setup_noefi);
-
 int add_efi_memmap;
 EXPORT_SYMBOL(add_efi_memmap);
 
@@ -492,7 +484,7 @@ void __init efi_init(void)
 	if (!efi_runtime_supported())
 		pr_info("No EFI runtime due to 32/64-bit mismatch with kernel\n");
 	else {
-		if (disable_runtime || efi_runtime_init())
+		if (efi_runtime_disabled() || efi_runtime_init())
 			return;
 	}
 	if (efi_memmap_init())
diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 64ecbb501c50..c8f01a73edb5 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -41,6 +41,19 @@ struct efi __read_mostly efi = {
 };
 EXPORT_SYMBOL(efi);
 
+static bool disable_runtime;
+static int __init setup_noefi(char *arg)
+{
+	disable_runtime = true;
+	return 0;
+}
+early_param("noefi", setup_noefi);
+
+bool efi_runtime_disabled(void)
+{
+	return disable_runtime;
+}
+
 static struct kobject *efi_kobj;
 static struct kobject *efivars_kobj;
 
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 518779fb5e90..4812ed0b0374 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -1229,4 +1229,5 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg,
 
 efi_status_t efi_parse_options(char *cmdline);
 
+bool efi_runtime_disabled(void);
 #endif /* _LINUX_EFI_H */
-- 
cgit v1.2.3


From 6ccc72b87b83ece31c2a75bbe07f440b0378f7a9 Mon Sep 17 00:00:00 2001
From: Dave Young <dyoung@redhat.com>
Date: Thu, 14 Aug 2014 17:15:27 +0800
Subject: lib: Add a generic cmdline parse function parse_option_str

There should be a generic function to parse params like a=b,c
Adding parse_option_str in lib/cmdline.c which will return true
if there's specified option set in the params.

Also updated efi=old_map parsing code to use the new function

Signed-off-by: Dave Young <dyoung@redhat.com>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 arch/x86/platform/efi/efi.c | 22 ++--------------------
 include/linux/kernel.h      |  1 +
 lib/cmdline.c               | 29 +++++++++++++++++++++++++++++
 3 files changed, 32 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c
index c90d3cd2728c..c73a7df5d37f 100644
--- a/arch/x86/platform/efi/efi.c
+++ b/arch/x86/platform/efi/efi.c
@@ -932,26 +932,8 @@ u64 efi_mem_attributes(unsigned long phys_addr)
 
 static int __init parse_efi_cmdline(char *str)
 {
-	if (*str == '=')
-		str++;
-
-	while (*str) {
-		if (!strncmp(str, "old_map", 7)) {
-			set_bit(EFI_OLD_MEMMAP, &efi.flags);
-			str += strlen("old_map");
-		}
-
-		/*
-		 * Skip any options we don't understand. Presumably
-		 * they apply to the EFI boot stub.
-		 */
-		while (*str && *str != ',')
-			str++;
-
-		/* If we hit a delimiter, skip it */
-		if (*str == ',')
-			str++;
-	}
+	if (parse_option_str(str, "old_map"))
+		set_bit(EFI_OLD_MEMMAP, &efi.flags);
 
 	return 0;
 }
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 95624bed87ef..f66427ef0628 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -407,6 +407,7 @@ int vsscanf(const char *, const char *, va_list);
 extern int get_option(char **str, int *pint);
 extern char *get_options(const char *str, int nints, int *ints);
 extern unsigned long long memparse(const char *ptr, char **retptr);
+extern bool parse_option_str(const char *str, const char *option);
 
 extern int core_kernel_text(unsigned long addr);
 extern int core_kernel_data(unsigned long addr);
diff --git a/lib/cmdline.c b/lib/cmdline.c
index 76a712e6e20e..8f13cf73c2ec 100644
--- a/lib/cmdline.c
+++ b/lib/cmdline.c
@@ -160,3 +160,32 @@ unsigned long long memparse(const char *ptr, char **retptr)
 	return ret;
 }
 EXPORT_SYMBOL(memparse);
+
+/**
+ *	parse_option_str - Parse a string and check an option is set or not
+ *	@str: String to be parsed
+ *	@option: option name
+ *
+ *	This function parses a string containing a comma-separated list of
+ *	strings like a=b,c.
+ *
+ *	Return true if there's such option in the string, or return false.
+ */
+bool parse_option_str(const char *str, const char *option)
+{
+	while (*str) {
+		if (!strncmp(str, option, strlen(option))) {
+			str += strlen(option);
+			if (!*str || *str == ',')
+				return true;
+		}
+
+		while (*str && *str != ',')
+			str++;
+
+		if (*str == ',')
+			str++;
+	}
+
+	return false;
+}
-- 
cgit v1.2.3


From 9c97e0bdd4b4ae44577a1b1ec949e782084e9a78 Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Wed, 3 Sep 2014 13:32:19 +0200
Subject: efi: Add macro for EFI_MEMORY_UCE memory attribute

Add the following macro from the UEFI spec, for completeness:

  EFI_MEMORY_UCE  Memory cacheability attribute: The memory region
                  supports being configured as not cacheable, exported,
                  and supports the "fetch and add" semaphore mechanism.

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 include/linux/efi.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/efi.h b/include/linux/efi.h
index 4812ed0b0374..7464032ae00a 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -92,6 +92,7 @@ typedef	struct {
 #define EFI_MEMORY_WC		((u64)0x0000000000000002ULL)	/* write-coalescing */
 #define EFI_MEMORY_WT		((u64)0x0000000000000004ULL)	/* write-through */
 #define EFI_MEMORY_WB		((u64)0x0000000000000008ULL)	/* write-back */
+#define EFI_MEMORY_UCE		((u64)0x0000000000000010ULL)	/* uncached, exported */
 #define EFI_MEMORY_WP		((u64)0x0000000000001000ULL)	/* write-protect */
 #define EFI_MEMORY_RP		((u64)0x0000000000002000ULL)	/* read-protect */
 #define EFI_MEMORY_XP		((u64)0x0000000000004000ULL)	/* execute-protect */
-- 
cgit v1.2.3


From 98d2a6ca14520904a47c46258d3bad02ffcd3f96 Mon Sep 17 00:00:00 2001
From: Laszlo Ersek <lersek@redhat.com>
Date: Wed, 3 Sep 2014 13:32:20 +0200
Subject: efi: Introduce efi_md_typeattr_format()

At the moment, there are three architectures debug-printing the EFI memory
map at initialization: x86, ia64, and arm64. They all use different format
strings, plus the EFI memory type and the EFI memory attributes are
similarly hard to decode for a human reader.

Introduce a helper __init function that formats the memory type and the
memory attributes in a unified way, to a user-provided character buffer.

The array "memory_type_name" is copied from the arm64 code, temporarily
duplicating it. The (otherwise optional) braces around each string literal
in the initializer list are dropped in order to match the kernel coding
style more closely. The element size is tightened from 32 to 20 bytes
(maximum actual string length + 1) so that we can derive the field width
from the element size.

Signed-off-by: Laszlo Ersek <lersek@redhat.com>
Tested-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Acked-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
[ Dropped useless 'register' keyword, which compiler will ignore ]
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/efi.c | 57 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/efi.h        |  7 ++++++
 2 files changed, 64 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index cebfa36a27ae..8590099ac148 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -445,3 +445,60 @@ int __init efi_get_fdt_params(struct efi_fdt_params *params, int verbose)
 	return ret;
 }
 #endif /* CONFIG_EFI_PARAMS_FROM_FDT */
+
+static __initdata char memory_type_name[][20] = {
+	"Reserved",
+	"Loader Code",
+	"Loader Data",
+	"Boot Code",
+	"Boot Data",
+	"Runtime Code",
+	"Runtime Data",
+	"Conventional Memory",
+	"Unusable Memory",
+	"ACPI Reclaim Memory",
+	"ACPI Memory NVS",
+	"Memory Mapped I/O",
+	"MMIO Port Space",
+	"PAL Code"
+};
+
+char * __init efi_md_typeattr_format(char *buf, size_t size,
+				     const efi_memory_desc_t *md)
+{
+	char *pos;
+	int type_len;
+	u64 attr;
+
+	pos = buf;
+	if (md->type >= ARRAY_SIZE(memory_type_name))
+		type_len = snprintf(pos, size, "[type=%u", md->type);
+	else
+		type_len = snprintf(pos, size, "[%-*s",
+				    (int)(sizeof(memory_type_name[0]) - 1),
+				    memory_type_name[md->type]);
+	if (type_len >= size)
+		return buf;
+
+	pos += type_len;
+	size -= type_len;
+
+	attr = md->attribute;
+	if (attr & ~(EFI_MEMORY_UC | EFI_MEMORY_WC | EFI_MEMORY_WT |
+		     EFI_MEMORY_WB | EFI_MEMORY_UCE | EFI_MEMORY_WP |
+		     EFI_MEMORY_RP | EFI_MEMORY_XP | EFI_MEMORY_RUNTIME))
+		snprintf(pos, size, "|attr=0x%016llx]",
+			 (unsigned long long)attr);
+	else
+		snprintf(pos, size, "|%3s|%2s|%2s|%2s|%3s|%2s|%2s|%2s|%2s]",
+			 attr & EFI_MEMORY_RUNTIME ? "RUN" : "",
+			 attr & EFI_MEMORY_XP      ? "XP"  : "",
+			 attr & EFI_MEMORY_RP      ? "RP"  : "",
+			 attr & EFI_MEMORY_WP      ? "WP"  : "",
+			 attr & EFI_MEMORY_UCE     ? "UCE" : "",
+			 attr & EFI_MEMORY_WB      ? "WB"  : "",
+			 attr & EFI_MEMORY_WT      ? "WT"  : "",
+			 attr & EFI_MEMORY_WC      ? "WC"  : "",
+			 attr & EFI_MEMORY_UC      ? "UC"  : "");
+	return buf;
+}
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 7464032ae00a..78b29b133e14 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -887,6 +887,13 @@ extern bool efi_poweroff_required(void);
 	     (md) <= (efi_memory_desc_t *)((m)->map_end - (m)->desc_size); \
 	     (md) = (void *)(md) + (m)->desc_size)
 
+/*
+ * Format an EFI memory descriptor's type and attributes to a user-provided
+ * character buffer, as per snprintf(), and return the buffer.
+ */
+char * __init efi_md_typeattr_format(char *buf, size_t size,
+				     const efi_memory_desc_t *md);
+
 /**
  * efi_range_is_wc - check the WC bit on an address range
  * @start: starting kvirt address
-- 
cgit v1.2.3


From 6d80dba1c9fe4316ef626980102b92fa30c7845a Mon Sep 17 00:00:00 2001
From: Matt Fleming <matt.fleming@intel.com>
Date: Tue, 30 Sep 2014 21:58:52 +0100
Subject: efi: Provide a non-blocking SetVariable() operation

There are some circumstances that call for trying to write an EFI
variable in a non-blocking way. One such scenario is when writing pstore
data in efi_pstore_write() via the pstore_dump() kdump callback.

Now that we have an EFI runtime spinlock we need a way of aborting if
there is contention instead of spinning, since when writing pstore data
from the kdump callback, the runtime lock may already be held by the CPU
that's running the callback if we crashed in the middle of an EFI
variable operation.

The situation is sufficiently special that a new EFI variable operation
is warranted.

Introduce ->set_variable_nonblocking() for this use case. It is an
optional EFI backend operation, and need only be implemented by those
backends that usually acquire locks to serialize access to EFI
variables, as is the case for virt_efi_set_variable() where we now grab
the EFI runtime spinlock.

Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Matthew Garrett <mjg59@srcf.ucam.org>
Signed-off-by: Matt Fleming <matt.fleming@intel.com>
---
 drivers/firmware/efi/runtime-wrappers.c | 19 +++++++++++++
 drivers/firmware/efi/vars.c             | 47 +++++++++++++++++++++++++++++++++
 include/linux/efi.h                     |  6 +++++
 3 files changed, 72 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/runtime-wrappers.c b/drivers/firmware/efi/runtime-wrappers.c
index 9694cba665c4..4349206198b2 100644
--- a/drivers/firmware/efi/runtime-wrappers.c
+++ b/drivers/firmware/efi/runtime-wrappers.c
@@ -200,6 +200,24 @@ static efi_status_t virt_efi_set_variable(efi_char16_t *name,
 	return status;
 }
 
+static efi_status_t
+virt_efi_set_variable_nonblocking(efi_char16_t *name, efi_guid_t *vendor,
+				  u32 attr, unsigned long data_size,
+				  void *data)
+{
+	unsigned long flags;
+	efi_status_t status;
+
+	if (!spin_trylock_irqsave(&efi_runtime_lock, flags))
+		return EFI_NOT_READY;
+
+	status = efi_call_virt(set_variable, name, vendor, attr, data_size,
+			       data);
+	spin_unlock_irqrestore(&efi_runtime_lock, flags);
+	return status;
+}
+
+
 static efi_status_t virt_efi_query_variable_info(u32 attr,
 						 u64 *storage_space,
 						 u64 *remaining_space,
@@ -287,6 +305,7 @@ void efi_native_runtime_setup(void)
 	efi.get_variable = virt_efi_get_variable;
 	efi.get_next_variable = virt_efi_get_next_variable;
 	efi.set_variable = virt_efi_set_variable;
+	efi.set_variable_nonblocking = virt_efi_set_variable_nonblocking;
 	efi.get_next_high_mono_count = virt_efi_get_next_high_mono_count;
 	efi.reset_system = virt_efi_reset_system;
 	efi.query_variable_info = virt_efi_query_variable_info;
diff --git a/drivers/firmware/efi/vars.c b/drivers/firmware/efi/vars.c
index 1fa724f31b0e..fa3c66bdc1e5 100644
--- a/drivers/firmware/efi/vars.c
+++ b/drivers/firmware/efi/vars.c
@@ -595,6 +595,39 @@ int efivar_entry_set(struct efivar_entry *entry, u32 attributes,
 }
 EXPORT_SYMBOL_GPL(efivar_entry_set);
 
+/*
+ * efivar_entry_set_nonblocking - call set_variable_nonblocking()
+ *
+ * This function is guaranteed to not block and is suitable for calling
+ * from crash/panic handlers.
+ *
+ * Crucially, this function will not block if it cannot acquire
+ * __efivars->lock. Instead, it returns -EBUSY.
+ */
+static int
+efivar_entry_set_nonblocking(efi_char16_t *name, efi_guid_t vendor,
+			     u32 attributes, unsigned long size, void *data)
+{
+	const struct efivar_operations *ops = __efivars->ops;
+	unsigned long flags;
+	efi_status_t status;
+
+	if (!spin_trylock_irqsave(&__efivars->lock, flags))
+		return -EBUSY;
+
+	status = check_var_size(attributes, size + ucs2_strsize(name, 1024));
+	if (status != EFI_SUCCESS) {
+		spin_unlock_irqrestore(&__efivars->lock, flags);
+		return -ENOSPC;
+	}
+
+	status = ops->set_variable_nonblocking(name, &vendor, attributes,
+					       size, data);
+
+	spin_unlock_irqrestore(&__efivars->lock, flags);
+	return efi_status_to_err(status);
+}
+
 /**
  * efivar_entry_set_safe - call set_variable() if enough space in firmware
  * @name: buffer containing the variable name
@@ -622,6 +655,20 @@ int efivar_entry_set_safe(efi_char16_t *name, efi_guid_t vendor, u32 attributes,
 	if (!ops->query_variable_store)
 		return -ENOSYS;
 
+	/*
+	 * If the EFI variable backend provides a non-blocking
+	 * ->set_variable() operation and we're in a context where we
+	 * cannot block, then we need to use it to avoid live-locks,
+	 * since the implication is that the regular ->set_variable()
+	 * will block.
+	 *
+	 * If no ->set_variable_nonblocking() is provided then
+	 * ->set_variable() is assumed to be non-blocking.
+	 */
+	if (!block && ops->set_variable_nonblocking)
+		return efivar_entry_set_nonblocking(name, vendor, attributes,
+						    size, data);
+
 	if (!block) {
 		if (!spin_trylock_irqsave(&__efivars->lock, flags))
 			return -EBUSY;
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 78b29b133e14..0949f9c7e872 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -503,6 +503,10 @@ typedef efi_status_t efi_get_next_variable_t (unsigned long *name_size, efi_char
 typedef efi_status_t efi_set_variable_t (efi_char16_t *name, efi_guid_t *vendor, 
 					 u32 attr, unsigned long data_size,
 					 void *data);
+typedef efi_status_t
+efi_set_variable_nonblocking_t(efi_char16_t *name, efi_guid_t *vendor,
+			       u32 attr, unsigned long data_size, void *data);
+
 typedef efi_status_t efi_get_next_high_mono_count_t (u32 *count);
 typedef void efi_reset_system_t (int reset_type, efi_status_t status,
 				 unsigned long data_size, efi_char16_t *data);
@@ -822,6 +826,7 @@ extern struct efi {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
+	efi_set_variable_nonblocking_t *set_variable_nonblocking;
 	efi_query_variable_info_t *query_variable_info;
 	efi_update_capsule_t *update_capsule;
 	efi_query_capsule_caps_t *query_capsule_caps;
@@ -1042,6 +1047,7 @@ struct efivar_operations {
 	efi_get_variable_t *get_variable;
 	efi_get_next_variable_t *get_next_variable;
 	efi_set_variable_t *set_variable;
+	efi_set_variable_nonblocking_t *set_variable_nonblocking;
 	efi_query_variable_store_t *query_variable_store;
 };
 
-- 
cgit v1.2.3


From d8f429e1669b9709f5b669aac9d734dbe0640891 Mon Sep 17 00:00:00 2001
From: Junichi Nomura <j-nomura@ce.jp.nec.com>
Date: Fri, 3 Oct 2014 17:27:12 -0400
Subject: block: add bioset_create_nobvec()

Users of bio_clone_fast() do not want bios with their own bvecs.
Allocating a bvec mempool as part of the bioset intended for such users
is a waste of memory.

bioset_create_nobvec() creates a bioset that doesn't have the bvec
mempool.

Signed-off-by: Jun'ichi Nomura <j-nomura@ce.jp.nec.com>
Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c         | 61 ++++++++++++++++++++++++++++++++++++++---------------
 include/linux/bio.h |  1 +
 2 files changed, 45 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 3e6331d25d90..3e6e1986a5b2 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -428,6 +428,9 @@ struct bio *bio_alloc_bioset(gfp_t gfp_mask, int nr_iovecs, struct bio_set *bs)
 		front_pad = 0;
 		inline_vecs = nr_iovecs;
 	} else {
+		/* should not use nobvec bioset for nr_iovecs > 0 */
+		if (WARN_ON_ONCE(!bs->bvec_pool && nr_iovecs > 0))
+			return NULL;
 		/*
 		 * generic_make_request() converts recursion to iteration; this
 		 * means if we're running beneath it, any bios we allocate and
@@ -1900,20 +1903,9 @@ void bioset_free(struct bio_set *bs)
 }
 EXPORT_SYMBOL(bioset_free);
 
-/**
- * bioset_create  - Create a bio_set
- * @pool_size:	Number of bio and bio_vecs to cache in the mempool
- * @front_pad:	Number of bytes to allocate in front of the returned bio
- *
- * Description:
- *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
- *    to ask for a number of bytes to be allocated in front of the bio.
- *    Front pad allocation is useful for embedding the bio inside
- *    another structure, to avoid allocating extra data to go with the bio.
- *    Note that the bio must be embedded at the END of that structure always,
- *    or things will break badly.
- */
-struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
+static struct bio_set *__bioset_create(unsigned int pool_size,
+				       unsigned int front_pad,
+				       bool create_bvec_pool)
 {
 	unsigned int back_pad = BIO_INLINE_VECS * sizeof(struct bio_vec);
 	struct bio_set *bs;
@@ -1938,9 +1930,11 @@ struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
 	if (!bs->bio_pool)
 		goto bad;
 
-	bs->bvec_pool = biovec_create_pool(pool_size);
-	if (!bs->bvec_pool)
-		goto bad;
+	if (create_bvec_pool) {
+		bs->bvec_pool = biovec_create_pool(pool_size);
+		if (!bs->bvec_pool)
+			goto bad;
+	}
 
 	bs->rescue_workqueue = alloc_workqueue("bioset", WQ_MEM_RECLAIM, 0);
 	if (!bs->rescue_workqueue)
@@ -1951,8 +1945,41 @@ bad:
 	bioset_free(bs);
 	return NULL;
 }
+
+/**
+ * bioset_create  - Create a bio_set
+ * @pool_size:	Number of bio and bio_vecs to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ *
+ * Description:
+ *    Set up a bio_set to be used with @bio_alloc_bioset. Allows the caller
+ *    to ask for a number of bytes to be allocated in front of the bio.
+ *    Front pad allocation is useful for embedding the bio inside
+ *    another structure, to avoid allocating extra data to go with the bio.
+ *    Note that the bio must be embedded at the END of that structure always,
+ *    or things will break badly.
+ */
+struct bio_set *bioset_create(unsigned int pool_size, unsigned int front_pad)
+{
+	return __bioset_create(pool_size, front_pad, true);
+}
 EXPORT_SYMBOL(bioset_create);
 
+/**
+ * bioset_create_nobvec  - Create a bio_set without bio_vec mempool
+ * @pool_size:	Number of bio to cache in the mempool
+ * @front_pad:	Number of bytes to allocate in front of the returned bio
+ *
+ * Description:
+ *    Same functionality as bioset_create() except that mempool is not
+ *    created for bio_vecs. Saving some memory for bio_clone_fast() users.
+ */
+struct bio_set *bioset_create_nobvec(unsigned int pool_size, unsigned int front_pad)
+{
+	return __bioset_create(pool_size, front_pad, false);
+}
+EXPORT_SYMBOL(bioset_create_nobvec);
+
 #ifdef CONFIG_BLK_CGROUP
 /**
  * bio_associate_current - associate a bio with %current
diff --git a/include/linux/bio.h b/include/linux/bio.h
index ce6b75964b71..7347f486ceca 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -378,6 +378,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors,
 }
 
 extern struct bio_set *bioset_create(unsigned int, unsigned int);
+extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int);
 extern void bioset_free(struct bio_set *);
 extern mempool_t *biovec_create_pool(int pool_entries);
 
-- 
cgit v1.2.3


From 906d201530f2c52aeb4eee31895c71cdccf1e9a0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 24 Sep 2014 11:17:56 -0700
Subject: dynamic_debug: change __dynamic_<foo>_dbg return types to void

The return value is not used by callers of these functions
so change the functions to return void.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Jason Baron <jbaron@akamai.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/dynamic_debug.h | 12 +++++------
 lib/dynamic_debug.c           | 50 +++++++++++++++++--------------------------
 2 files changed, 26 insertions(+), 36 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dynamic_debug.h b/include/linux/dynamic_debug.h
index 2fe93b26b42f..4f1bbc68cd1b 100644
--- a/include/linux/dynamic_debug.h
+++ b/include/linux/dynamic_debug.h
@@ -42,7 +42,7 @@ int ddebug_add_module(struct _ddebug *tab, unsigned int n,
 #if defined(CONFIG_DYNAMIC_DEBUG)
 extern int ddebug_remove_module(const char *mod_name);
 extern __printf(2, 3)
-int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...);
+void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...);
 
 extern int ddebug_dyndbg_module_param_cb(char *param, char *val,
 					const char *modname);
@@ -50,15 +50,15 @@ extern int ddebug_dyndbg_module_param_cb(char *param, char *val,
 struct device;
 
 extern __printf(3, 4)
-int __dynamic_dev_dbg(struct _ddebug *descriptor, const struct device *dev,
-		      const char *fmt, ...);
+void __dynamic_dev_dbg(struct _ddebug *descriptor, const struct device *dev,
+		       const char *fmt, ...);
 
 struct net_device;
 
 extern __printf(3, 4)
-int __dynamic_netdev_dbg(struct _ddebug *descriptor,
-			 const struct net_device *dev,
-			 const char *fmt, ...);
+void __dynamic_netdev_dbg(struct _ddebug *descriptor,
+			  const struct net_device *dev,
+			  const char *fmt, ...);
 
 #define DEFINE_DYNAMIC_DEBUG_METADATA(name, fmt)		\
 	static struct _ddebug  __aligned(8)			\
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index c9afbe2c445a..31fe79e31ab8 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -537,10 +537,9 @@ static char *dynamic_emit_prefix(const struct _ddebug *desc, char *buf)
 	return buf;
 }
 
-int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
+void __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 {
 	va_list args;
-	int res;
 	struct va_format vaf;
 	char buf[PREFIX_SIZE];
 
@@ -552,21 +551,17 @@ int __dynamic_pr_debug(struct _ddebug *descriptor, const char *fmt, ...)
 	vaf.fmt = fmt;
 	vaf.va = &args;
 
-	res = printk(KERN_DEBUG "%s%pV",
-		     dynamic_emit_prefix(descriptor, buf), &vaf);
+	printk(KERN_DEBUG "%s%pV", dynamic_emit_prefix(descriptor, buf), &vaf);
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_pr_debug);
 
-int __dynamic_dev_dbg(struct _ddebug *descriptor,
+void __dynamic_dev_dbg(struct _ddebug *descriptor,
 		      const struct device *dev, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int res;
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
@@ -577,30 +572,27 @@ int __dynamic_dev_dbg(struct _ddebug *descriptor,
 	vaf.va = &args;
 
 	if (!dev) {
-		res = printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
+		printk(KERN_DEBUG "(NULL device *): %pV", &vaf);
 	} else {
 		char buf[PREFIX_SIZE];
 
-		res = dev_printk_emit(7, dev, "%s%s %s: %pV",
-				      dynamic_emit_prefix(descriptor, buf),
-				      dev_driver_string(dev), dev_name(dev),
-				      &vaf);
+		dev_printk_emit(7, dev, "%s%s %s: %pV",
+				dynamic_emit_prefix(descriptor, buf),
+				dev_driver_string(dev), dev_name(dev),
+				&vaf);
 	}
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_dev_dbg);
 
 #ifdef CONFIG_NET
 
-int __dynamic_netdev_dbg(struct _ddebug *descriptor,
-			 const struct net_device *dev, const char *fmt, ...)
+void __dynamic_netdev_dbg(struct _ddebug *descriptor,
+			  const struct net_device *dev, const char *fmt, ...)
 {
 	struct va_format vaf;
 	va_list args;
-	int res;
 
 	BUG_ON(!descriptor);
 	BUG_ON(!fmt);
@@ -613,23 +605,21 @@ int __dynamic_netdev_dbg(struct _ddebug *descriptor,
 	if (dev && dev->dev.parent) {
 		char buf[PREFIX_SIZE];
 
-		res = dev_printk_emit(7, dev->dev.parent,
-				      "%s%s %s %s%s: %pV",
-				      dynamic_emit_prefix(descriptor, buf),
-				      dev_driver_string(dev->dev.parent),
-				      dev_name(dev->dev.parent),
-				      netdev_name(dev), netdev_reg_state(dev),
-				      &vaf);
+		dev_printk_emit(7, dev->dev.parent,
+				"%s%s %s %s%s: %pV",
+				dynamic_emit_prefix(descriptor, buf),
+				dev_driver_string(dev->dev.parent),
+				dev_name(dev->dev.parent),
+				netdev_name(dev), netdev_reg_state(dev),
+				&vaf);
 	} else if (dev) {
-		res = printk(KERN_DEBUG "%s%s: %pV", netdev_name(dev),
-			     netdev_reg_state(dev), &vaf);
+		printk(KERN_DEBUG "%s%s: %pV", netdev_name(dev),
+		       netdev_reg_state(dev), &vaf);
 	} else {
-		res = printk(KERN_DEBUG "(NULL net_device): %pV", &vaf);
+		printk(KERN_DEBUG "(NULL net_device): %pV", &vaf);
 	}
 
 	va_end(args);
-
-	return res;
 }
 EXPORT_SYMBOL(__dynamic_netdev_dbg);
 
-- 
cgit v1.2.3


From 55a93b3ea780908b7d1b3a8cf1976223a9268d78 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 3 Oct 2014 15:31:07 -0700
Subject: qdisc: validate skb without holding lock

Validation of skb can be pretty expensive :

GSO segmentation and/or checksum computations.

We can do this without holding qdisc lock, so that other cpus
can queue additional packets.

Trick is that requeued packets were already validated, so we carry
a boolean so that sch_direct_xmit() can validate a fresh skb list,
or directly use an old one.

Tested on 40Gb NIC (8 TX queues) and 200 concurrent flows, 48 threads
host.

Turning TSO on or off had no effect on throughput, only few more cpu
cycles. Lock contention on qdisc lock disappeared.

Same if disabling TX checksum offload.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 +-
 include/net/pkt_sched.h   |  2 +-
 net/core/dev.c            | 29 +++++++++++++++++++---
 net/sched/sch_generic.c   | 61 ++++++++++++++++++++++-------------------------
 4 files changed, 56 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 9b7fbacb6296..910fb17ad148 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2821,7 +2821,7 @@ int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
 			 struct netdev_phys_port_id *ppid);
-struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev);
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
 int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb);
diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h
index 8bbe626e9ece..e4b3c828c1c2 100644
--- a/include/net/pkt_sched.h
+++ b/include/net/pkt_sched.h
@@ -99,7 +99,7 @@ void qdisc_put_stab(struct qdisc_size_table *tab);
 void qdisc_warn_nonwc(const char *txt, struct Qdisc *qdisc);
 int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		    struct net_device *dev, struct netdev_queue *txq,
-		    spinlock_t *root_lock);
+		    spinlock_t *root_lock, bool validate);
 
 void __qdisc_run(struct Qdisc *q);
 
diff --git a/net/core/dev.c b/net/core/dev.c
index e55c546717d4..1a90530f83ff 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2655,7 +2655,7 @@ struct sk_buff *validate_xmit_vlan(struct sk_buff *skb, netdev_features_t featur
 	return skb;
 }
 
-struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
+static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device *dev)
 {
 	netdev_features_t features;
 
@@ -2720,6 +2720,30 @@ out_null:
 	return NULL;
 }
 
+struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev)
+{
+	struct sk_buff *next, *head = NULL, *tail;
+
+	while (skb) {
+		next = skb->next;
+		skb->next = NULL;
+		skb = validate_xmit_skb(skb, dev);
+		if (skb) {
+			struct sk_buff *end = skb;
+
+			while (end->next)
+				end = end->next;
+			if (!head)
+				head = skb;
+			else
+				tail->next = skb;
+			tail = end;
+		}
+		skb = next;
+	}
+	return head;
+}
+
 static void qdisc_pkt_len_init(struct sk_buff *skb)
 {
 	const struct skb_shared_info *shinfo = skb_shinfo(skb);
@@ -2786,8 +2810,7 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		qdisc_bstats_update(q, skb);
 
-		skb = validate_xmit_skb(skb, dev);
-		if (skb && sch_direct_xmit(skb, q, dev, txq, root_lock)) {
+		if (sch_direct_xmit(skb, q, dev, txq, root_lock, true)) {
 			if (unlikely(contended)) {
 				spin_unlock(&q->busylock);
 				contended = false;
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 797ebef73642..2b349a4de3c8 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -56,40 +56,34 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 	return 0;
 }
 
-static struct sk_buff *try_bulk_dequeue_skb(struct Qdisc *q,
-					    struct sk_buff *head_skb,
-					    int bytelimit)
+static void try_bulk_dequeue_skb(struct Qdisc *q,
+				 struct sk_buff *skb,
+				 const struct netdev_queue *txq)
 {
-	struct sk_buff *skb, *tail_skb = head_skb;
+	int bytelimit = qdisc_avail_bulklimit(txq) - skb->len;
 
 	while (bytelimit > 0) {
-		skb = q->dequeue(q);
-		if (!skb)
-			break;
+		struct sk_buff *nskb = q->dequeue(q);
 
-		bytelimit -= skb->len; /* covers GSO len */
-		skb = validate_xmit_skb(skb, qdisc_dev(q));
-		if (!skb)
+		if (!nskb)
 			break;
 
-		while (tail_skb->next) /* GSO list goto tail */
-			tail_skb = tail_skb->next;
-
-		tail_skb->next = skb;
-		tail_skb = skb;
+		bytelimit -= nskb->len; /* covers GSO len */
+		skb->next = nskb;
+		skb = nskb;
 	}
-
-	return head_skb;
+	skb->next = NULL;
 }
 
 /* Note that dequeue_skb can possibly return a SKB list (via skb->next).
  * A requeued skb (via q->gso_skb) can also be a SKB list.
  */
-static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
+static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate)
 {
 	struct sk_buff *skb = q->gso_skb;
 	const struct netdev_queue *txq = q->dev_queue;
 
+	*validate = true;
 	if (unlikely(skb)) {
 		/* check the reason of requeuing without tx lock first */
 		txq = skb_get_tx_queue(txq->dev, skb);
@@ -98,21 +92,16 @@ static inline struct sk_buff *dequeue_skb(struct Qdisc *q)
 			q->q.qlen--;
 		} else
 			skb = NULL;
+		/* skb in gso_skb were already validated */
+		*validate = false;
 	} else {
 		if (!(q->flags & TCQ_F_ONETXQUEUE) ||
 		    !netif_xmit_frozen_or_stopped(txq)) {
-			int bytelimit = qdisc_avail_bulklimit(txq);
-
 			skb = q->dequeue(q);
-			if (skb) {
-				bytelimit -= skb->len;
-				skb = validate_xmit_skb(skb, qdisc_dev(q));
-			}
 			if (skb && qdisc_may_bulk(q))
-				skb = try_bulk_dequeue_skb(q, skb, bytelimit);
+				try_bulk_dequeue_skb(q, skb, txq);
 		}
 	}
-
 	return skb;
 }
 
@@ -156,19 +145,24 @@ static inline int handle_dev_cpu_collision(struct sk_buff *skb,
  */
 int sch_direct_xmit(struct sk_buff *skb, struct Qdisc *q,
 		    struct net_device *dev, struct netdev_queue *txq,
-		    spinlock_t *root_lock)
+		    spinlock_t *root_lock, bool validate)
 {
 	int ret = NETDEV_TX_BUSY;
 
 	/* And release qdisc */
 	spin_unlock(root_lock);
 
-	HARD_TX_LOCK(dev, txq, smp_processor_id());
-	if (!netif_xmit_frozen_or_stopped(txq))
-		skb = dev_hard_start_xmit(skb, dev, txq, &ret);
+	/* Note that we validate skb (GSO, checksum, ...) outside of locks */
+	if (validate)
+		skb = validate_xmit_skb_list(skb, dev);
 
-	HARD_TX_UNLOCK(dev, txq);
+	if (skb) {
+		HARD_TX_LOCK(dev, txq, smp_processor_id());
+		if (!netif_xmit_frozen_or_stopped(txq))
+			skb = dev_hard_start_xmit(skb, dev, txq, &ret);
 
+		HARD_TX_UNLOCK(dev, txq);
+	}
 	spin_lock(root_lock);
 
 	if (dev_xmit_complete(ret)) {
@@ -217,9 +211,10 @@ static inline int qdisc_restart(struct Qdisc *q)
 	struct net_device *dev;
 	spinlock_t *root_lock;
 	struct sk_buff *skb;
+	bool validate;
 
 	/* Dequeue packet */
-	skb = dequeue_skb(q);
+	skb = dequeue_skb(q, &validate);
 	if (unlikely(!skb))
 		return 0;
 
@@ -229,7 +224,7 @@ static inline int qdisc_restart(struct Qdisc *q)
 	dev = qdisc_dev(q);
 	txq = skb_get_tx_queue(dev, skb);
 
-	return sch_direct_xmit(skb, q, dev, txq, root_lock);
+	return sch_direct_xmit(skb, q, dev, txq, root_lock, validate);
 }
 
 void __qdisc_run(struct Qdisc *q)
-- 
cgit v1.2.3


From c7a08ac7ee68b9af0d5af99c7b34b574cac4d144 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:42 +0300
Subject: net/mlx5_core: Update device capabilities handling

Rearrange struct mlx5_caps so it has a "gen" field to represent the current
capabilities configured for the device. Max capabilities can also be queried
from the device. Also update capabilities struct to contain more fields as per
the latest revision if firmware specification.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/cq.c                |   8 +-
 drivers/infiniband/hw/mlx5/mad.c               |   2 +-
 drivers/infiniband/hw/mlx5/main.c              |  83 ++++++----
 drivers/infiniband/hw/mlx5/qp.c                |  72 +++++---
 drivers/infiniband/hw/mlx5/srq.c               |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |  24 ++-
 drivers/net/ethernet/mellanox/mlx5/core/eq.c   |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c   |  81 +--------
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 219 +++++++++++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/uar.c  |   4 +-
 include/linux/mlx5/device.h                    |  24 ++-
 include/linux/mlx5/driver.h                    |  28 +++-
 12 files changed, 331 insertions(+), 222 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index e4056279166d..10cfce5119a9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -752,7 +752,7 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 		return ERR_PTR(-EINVAL);
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries > dev->mdev->caps.max_cqes)
+	if (entries > dev->mdev->caps.gen.max_cqes)
 		return ERR_PTR(-EINVAL);
 
 	cq = kzalloc(sizeof(*cq), GFP_KERNEL);
@@ -919,7 +919,7 @@ int mlx5_ib_modify_cq(struct ib_cq *cq, u16 cq_count, u16 cq_period)
 	int err;
 	u32 fsel;
 
-	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
+	if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_CQ_MODER))
 		return -ENOSYS;
 
 	in = kzalloc(sizeof(*in), GFP_KERNEL);
@@ -1074,7 +1074,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 	int uninitialized_var(cqe_size);
 	unsigned long flags;
 
-	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
+	if (!(dev->mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_RESIZE_CQ)) {
 		pr_info("Firmware does not support resize CQ\n");
 		return -ENOSYS;
 	}
@@ -1083,7 +1083,7 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 		return -EINVAL;
 
 	entries = roundup_pow_of_two(entries + 1);
-	if (entries > dev->mdev->caps.max_cqes + 1)
+	if (entries > dev->mdev->caps.gen.max_cqes + 1)
 		return -EINVAL;
 
 	if (entries == ibcq->cqe + 1)
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index b514bbb5610f..657af9a1167c 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -129,7 +129,7 @@ int mlx5_query_ext_port_caps(struct mlx5_ib_dev *dev, u8 port)
 
 	packet_error = be16_to_cpu(out_mad->status);
 
-	dev->mdev->caps.ext_port_cap[port - 1] = (!err && !packet_error) ?
+	dev->mdev->caps.gen.ext_port_cap[port - 1] = (!err && !packet_error) ?
 		MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO : 0;
 
 out:
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index d8907b20522a..f3114d1132fb 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -157,11 +157,13 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct ib_smp *in_mad  = NULL;
 	struct ib_smp *out_mad = NULL;
+	struct mlx5_general_caps *gen;
 	int err = -ENOMEM;
 	int max_rq_sg;
 	int max_sq_sg;
 	u64 flags;
 
+	gen = &dev->mdev->caps.gen;
 	in_mad  = kzalloc(sizeof(*in_mad), GFP_KERNEL);
 	out_mad = kmalloc(sizeof(*out_mad), GFP_KERNEL);
 	if (!in_mad || !out_mad)
@@ -183,7 +185,7 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 		IB_DEVICE_PORT_ACTIVE_EVENT		|
 		IB_DEVICE_SYS_IMAGE_GUID		|
 		IB_DEVICE_RC_RNR_NAK_GEN;
-	flags = dev->mdev->caps.flags;
+	flags = gen->flags;
 	if (flags & MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR)
 		props->device_cap_flags |= IB_DEVICE_BAD_PKEY_CNTR;
 	if (flags & MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR)
@@ -213,30 +215,31 @@ static int mlx5_ib_query_device(struct ib_device *ibdev,
 	memcpy(&props->sys_image_guid, out_mad->data +	4, 8);
 
 	props->max_mr_size	   = ~0ull;
-	props->page_size_cap	   = dev->mdev->caps.min_page_sz;
-	props->max_qp		   = 1 << dev->mdev->caps.log_max_qp;
-	props->max_qp_wr	   = dev->mdev->caps.max_wqes;
-	max_rq_sg = dev->mdev->caps.max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
-	max_sq_sg = (dev->mdev->caps.max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
+	props->page_size_cap	   = gen->min_page_sz;
+	props->max_qp		   = 1 << gen->log_max_qp;
+	props->max_qp_wr	   = gen->max_wqes;
+	max_rq_sg = gen->max_rq_desc_sz / sizeof(struct mlx5_wqe_data_seg);
+	max_sq_sg = (gen->max_sq_desc_sz - sizeof(struct mlx5_wqe_ctrl_seg)) /
 		sizeof(struct mlx5_wqe_data_seg);
 	props->max_sge = min(max_rq_sg, max_sq_sg);
-	props->max_cq		   = 1 << dev->mdev->caps.log_max_cq;
-	props->max_cqe		   = dev->mdev->caps.max_cqes - 1;
-	props->max_mr		   = 1 << dev->mdev->caps.log_max_mkey;
-	props->max_pd		   = 1 << dev->mdev->caps.log_max_pd;
-	props->max_qp_rd_atom	   = dev->mdev->caps.max_ra_req_qp;
-	props->max_qp_init_rd_atom = dev->mdev->caps.max_ra_res_qp;
+	props->max_cq		   = 1 << gen->log_max_cq;
+	props->max_cqe		   = gen->max_cqes - 1;
+	props->max_mr		   = 1 << gen->log_max_mkey;
+	props->max_pd		   = 1 << gen->log_max_pd;
+	props->max_qp_rd_atom	   = 1 << gen->log_max_ra_req_qp;
+	props->max_qp_init_rd_atom = 1 << gen->log_max_ra_res_qp;
+	props->max_srq		   = 1 << gen->log_max_srq;
+	props->max_srq_wr	   = gen->max_srq_wqes - 1;
+	props->local_ca_ack_delay  = gen->local_ca_ack_delay;
 	props->max_res_rd_atom	   = props->max_qp_rd_atom * props->max_qp;
-	props->max_srq		   = 1 << dev->mdev->caps.log_max_srq;
-	props->max_srq_wr	   = dev->mdev->caps.max_srq_wqes - 1;
 	props->max_srq_sge	   = max_rq_sg - 1;
 	props->max_fast_reg_page_list_len = (unsigned int)-1;
-	props->local_ca_ack_delay  = dev->mdev->caps.local_ca_ack_delay;
+	props->local_ca_ack_delay  = gen->local_ca_ack_delay;
 	props->atomic_cap	   = IB_ATOMIC_NONE;
 	props->masked_atomic_cap   = IB_ATOMIC_NONE;
 	props->max_pkeys	   = be16_to_cpup((__be16 *)(out_mad->data + 28));
-	props->max_mcast_grp	   = 1 << dev->mdev->caps.log_max_mcg;
-	props->max_mcast_qp_attach = dev->mdev->caps.max_qp_mcg;
+	props->max_mcast_grp	   = 1 << gen->log_max_mcg;
+	props->max_mcast_qp_attach = gen->max_qp_mcg;
 	props->max_total_mcast_qp_attach = props->max_mcast_qp_attach *
 					   props->max_mcast_grp;
 	props->max_map_per_fmr = INT_MAX; /* no limit in ConnectIB */
@@ -254,10 +257,12 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
 	struct ib_smp *in_mad  = NULL;
 	struct ib_smp *out_mad = NULL;
+	struct mlx5_general_caps *gen;
 	int ext_active_speed;
 	int err = -ENOMEM;
 
-	if (port < 1 || port > dev->mdev->caps.num_ports) {
+	gen = &dev->mdev->caps.gen;
+	if (port < 1 || port > gen->num_ports) {
 		mlx5_ib_warn(dev, "invalid port number %d\n", port);
 		return -EINVAL;
 	}
@@ -288,8 +293,8 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 	props->phys_state	= out_mad->data[33] >> 4;
 	props->port_cap_flags	= be32_to_cpup((__be32 *)(out_mad->data + 20));
 	props->gid_tbl_len	= out_mad->data[50];
-	props->max_msg_sz	= 1 << to_mdev(ibdev)->mdev->caps.log_max_msg;
-	props->pkey_tbl_len	= to_mdev(ibdev)->mdev->caps.port[port - 1].pkey_table_len;
+	props->max_msg_sz	= 1 << gen->log_max_msg;
+	props->pkey_tbl_len	= gen->port[port - 1].pkey_table_len;
 	props->bad_pkey_cntr	= be16_to_cpup((__be16 *)(out_mad->data + 46));
 	props->qkey_viol_cntr	= be16_to_cpup((__be16 *)(out_mad->data + 48));
 	props->active_width	= out_mad->data[31] & 0xf;
@@ -316,7 +321,7 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port,
 
 	/* If reported active speed is QDR, check if is FDR-10 */
 	if (props->active_speed == 4) {
-		if (dev->mdev->caps.ext_port_cap[port - 1] &
+		if (gen->ext_port_cap[port - 1] &
 		    MLX_EXT_PORT_CAP_FLAG_EXTENDED_PORT_INFO) {
 			init_query_mad(in_mad);
 			in_mad->attr_id = MLX5_ATTR_EXTENDED_PORT_INFO;
@@ -470,6 +475,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	struct mlx5_ib_alloc_ucontext_req_v2 req;
 	struct mlx5_ib_alloc_ucontext_resp resp;
 	struct mlx5_ib_ucontext *context;
+	struct mlx5_general_caps *gen;
 	struct mlx5_uuar_info *uuari;
 	struct mlx5_uar *uars;
 	int gross_uuars;
@@ -480,6 +486,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	int i;
 	size_t reqlen;
 
+	gen = &dev->mdev->caps.gen;
 	if (!dev->ib_active)
 		return ERR_PTR(-EAGAIN);
 
@@ -512,14 +519,14 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 
 	num_uars = req.total_num_uuars / MLX5_NON_FP_BF_REGS_PER_PAGE;
 	gross_uuars = num_uars * MLX5_BF_REGS_PER_PAGE;
-	resp.qp_tab_size      = 1 << dev->mdev->caps.log_max_qp;
-	resp.bf_reg_size      = dev->mdev->caps.bf_reg_size;
+	resp.qp_tab_size      = 1 << gen->log_max_qp;
+	resp.bf_reg_size      = gen->bf_reg_size;
 	resp.cache_line_size  = L1_CACHE_BYTES;
-	resp.max_sq_desc_sz = dev->mdev->caps.max_sq_desc_sz;
-	resp.max_rq_desc_sz = dev->mdev->caps.max_rq_desc_sz;
-	resp.max_send_wqebb = dev->mdev->caps.max_wqes;
-	resp.max_recv_wr = dev->mdev->caps.max_wqes;
-	resp.max_srq_recv_wr = dev->mdev->caps.max_srq_wqes;
+	resp.max_sq_desc_sz = gen->max_sq_desc_sz;
+	resp.max_rq_desc_sz = gen->max_rq_desc_sz;
+	resp.max_send_wqebb = gen->max_wqes;
+	resp.max_recv_wr = gen->max_wqes;
+	resp.max_srq_recv_wr = gen->max_srq_wqes;
 
 	context = kzalloc(sizeof(*context), GFP_KERNEL);
 	if (!context)
@@ -565,7 +572,7 @@ static struct ib_ucontext *mlx5_ib_alloc_ucontext(struct ib_device *ibdev,
 	mutex_init(&context->db_page_mutex);
 
 	resp.tot_uuars = req.total_num_uuars;
-	resp.num_ports = dev->mdev->caps.num_ports;
+	resp.num_ports = gen->num_ports;
 	err = ib_copy_to_udata(udata, &resp,
 			       sizeof(resp) - sizeof(resp.reserved));
 	if (err)
@@ -967,9 +974,11 @@ static void mlx5_ib_event(struct mlx5_core_dev *dev, void *context,
 
 static void get_ext_port_caps(struct mlx5_ib_dev *dev)
 {
+	struct mlx5_general_caps *gen;
 	int port;
 
-	for (port = 1; port <= dev->mdev->caps.num_ports; port++)
+	gen = &dev->mdev->caps.gen;
+	for (port = 1; port <= gen->num_ports; port++)
 		mlx5_query_ext_port_caps(dev, port);
 }
 
@@ -977,9 +986,11 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
 {
 	struct ib_device_attr *dprops = NULL;
 	struct ib_port_attr *pprops = NULL;
+	struct mlx5_general_caps *gen;
 	int err = 0;
 	int port;
 
+	gen = &dev->mdev->caps.gen;
 	pprops = kmalloc(sizeof(*pprops), GFP_KERNEL);
 	if (!pprops)
 		goto out;
@@ -994,14 +1005,14 @@ static int get_port_caps(struct mlx5_ib_dev *dev)
 		goto out;
 	}
 
-	for (port = 1; port <= dev->mdev->caps.num_ports; port++) {
+	for (port = 1; port <= gen->num_ports; port++) {
 		err = mlx5_ib_query_port(&dev->ib_dev, port, pprops);
 		if (err) {
 			mlx5_ib_warn(dev, "query_port %d failed %d\n", port, err);
 			break;
 		}
-		dev->mdev->caps.port[port - 1].pkey_table_len = dprops->max_pkeys;
-		dev->mdev->caps.port[port - 1].gid_table_len = pprops->gid_tbl_len;
+		gen->port[port - 1].pkey_table_len = dprops->max_pkeys;
+		gen->port[port - 1].gid_table_len = pprops->gid_tbl_len;
 		mlx5_ib_dbg(dev, "pkey_table_len %d, gid_table_len %d\n",
 			    dprops->max_pkeys, pprops->gid_tbl_len);
 	}
@@ -1279,8 +1290,8 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	strlcpy(dev->ib_dev.name, "mlx5_%d", IB_DEVICE_NAME_MAX);
 	dev->ib_dev.owner		= THIS_MODULE;
 	dev->ib_dev.node_type		= RDMA_NODE_IB_CA;
-	dev->ib_dev.local_dma_lkey	= mdev->caps.reserved_lkey;
-	dev->num_ports		= mdev->caps.num_ports;
+	dev->ib_dev.local_dma_lkey	= mdev->caps.gen.reserved_lkey;
+	dev->num_ports		= mdev->caps.gen.num_ports;
 	dev->ib_dev.phys_port_cnt     = dev->num_ports;
 	dev->ib_dev.num_comp_vectors	= dev->num_comp_vectors;
 	dev->ib_dev.dma_device	= &mdev->pdev->dev;
@@ -1355,7 +1366,7 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 	dev->ib_dev.free_fast_reg_page_list  = mlx5_ib_free_fast_reg_page_list;
 	dev->ib_dev.check_mr_status	= mlx5_ib_check_mr_status;
 
-	if (mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC) {
+	if (mdev->caps.gen.flags & MLX5_DEV_CAP_FLAG_XRC) {
 		dev->ib_dev.alloc_xrcd = mlx5_ib_alloc_xrcd;
 		dev->ib_dev.dealloc_xrcd = mlx5_ib_dealloc_xrcd;
 		dev->ib_dev.uverbs_cmd_mask |=
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8c574b63d77b..dbfe498870c1 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -158,11 +158,13 @@ static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
 static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 		       int has_rq, struct mlx5_ib_qp *qp, struct mlx5_ib_create_qp *ucmd)
 {
+	struct mlx5_general_caps *gen;
 	int wqe_size;
 	int wq_size;
 
+	gen = &dev->mdev->caps.gen;
 	/* Sanity check RQ size before proceeding */
-	if (cap->max_recv_wr  > dev->mdev->caps.max_wqes)
+	if (cap->max_recv_wr  > gen->max_wqes)
 		return -EINVAL;
 
 	if (!has_rq) {
@@ -182,10 +184,10 @@ static int set_rq_size(struct mlx5_ib_dev *dev, struct ib_qp_cap *cap,
 			wq_size = roundup_pow_of_two(cap->max_recv_wr) * wqe_size;
 			wq_size = max_t(int, wq_size, MLX5_SEND_WQE_BB);
 			qp->rq.wqe_cnt = wq_size / wqe_size;
-			if (wqe_size > dev->mdev->caps.max_rq_desc_sz) {
+			if (wqe_size > gen->max_rq_desc_sz) {
 				mlx5_ib_dbg(dev, "wqe_size %d, max %d\n",
 					    wqe_size,
-					    dev->mdev->caps.max_rq_desc_sz);
+					    gen->max_rq_desc_sz);
 				return -EINVAL;
 			}
 			qp->rq.wqe_shift = ilog2(wqe_size);
@@ -266,9 +268,11 @@ static int calc_send_wqe(struct ib_qp_init_attr *attr)
 static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 			struct mlx5_ib_qp *qp)
 {
+	struct mlx5_general_caps *gen;
 	int wqe_size;
 	int wq_size;
 
+	gen = &dev->mdev->caps.gen;
 	if (!attr->cap.max_send_wr)
 		return 0;
 
@@ -277,9 +281,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 	if (wqe_size < 0)
 		return wqe_size;
 
-	if (wqe_size > dev->mdev->caps.max_sq_desc_sz) {
+	if (wqe_size > gen->max_sq_desc_sz) {
 		mlx5_ib_dbg(dev, "wqe_size(%d) > max_sq_desc_sz(%d)\n",
-			    wqe_size, dev->mdev->caps.max_sq_desc_sz);
+			    wqe_size, gen->max_sq_desc_sz);
 		return -EINVAL;
 	}
 
@@ -292,9 +296,9 @@ static int calc_sq_size(struct mlx5_ib_dev *dev, struct ib_qp_init_attr *attr,
 
 	wq_size = roundup_pow_of_two(attr->cap.max_send_wr * wqe_size);
 	qp->sq.wqe_cnt = wq_size / MLX5_SEND_WQE_BB;
-	if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
+	if (qp->sq.wqe_cnt > gen->max_wqes) {
 		mlx5_ib_dbg(dev, "wqe count(%d) exceeds limits(%d)\n",
-			    qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
+			    qp->sq.wqe_cnt, gen->max_wqes);
 		return -ENOMEM;
 	}
 	qp->sq.wqe_shift = ilog2(MLX5_SEND_WQE_BB);
@@ -309,11 +313,13 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
 			    struct mlx5_ib_qp *qp,
 			    struct mlx5_ib_create_qp *ucmd)
 {
+	struct mlx5_general_caps *gen;
 	int desc_sz = 1 << qp->sq.wqe_shift;
 
-	if (desc_sz > dev->mdev->caps.max_sq_desc_sz) {
+	gen = &dev->mdev->caps.gen;
+	if (desc_sz > gen->max_sq_desc_sz) {
 		mlx5_ib_warn(dev, "desc_sz %d, max_sq_desc_sz %d\n",
-			     desc_sz, dev->mdev->caps.max_sq_desc_sz);
+			     desc_sz, gen->max_sq_desc_sz);
 		return -EINVAL;
 	}
 
@@ -325,9 +331,9 @@ static int set_user_buf_size(struct mlx5_ib_dev *dev,
 
 	qp->sq.wqe_cnt = ucmd->sq_wqe_count;
 
-	if (qp->sq.wqe_cnt > dev->mdev->caps.max_wqes) {
+	if (qp->sq.wqe_cnt > gen->max_wqes) {
 		mlx5_ib_warn(dev, "wqe_cnt %d, max_wqes %d\n",
-			     qp->sq.wqe_cnt, dev->mdev->caps.max_wqes);
+			     qp->sq.wqe_cnt, gen->max_wqes);
 		return -EINVAL;
 	}
 
@@ -803,16 +809,18 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	struct mlx5_ib_resources *devr = &dev->devr;
 	struct mlx5_ib_create_qp_resp resp;
 	struct mlx5_create_qp_mbox_in *in;
+	struct mlx5_general_caps *gen;
 	struct mlx5_ib_create_qp ucmd;
 	int inlen = sizeof(*in);
 	int err;
 
+	gen = &dev->mdev->caps.gen;
 	mutex_init(&qp->mutex);
 	spin_lock_init(&qp->sq.lock);
 	spin_lock_init(&qp->rq.lock);
 
 	if (init_attr->create_flags & IB_QP_CREATE_BLOCK_MULTICAST_LOOPBACK) {
-		if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
+		if (!(gen->flags & MLX5_DEV_CAP_FLAG_BLOCK_MCAST)) {
 			mlx5_ib_dbg(dev, "block multicast loopback isn't supported\n");
 			return -EINVAL;
 		} else {
@@ -851,9 +859,9 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 				mlx5_ib_dbg(dev, "invalid rq params\n");
 				return -EINVAL;
 			}
-			if (ucmd.sq_wqe_count > dev->mdev->caps.max_wqes) {
+			if (ucmd.sq_wqe_count > gen->max_wqes) {
 				mlx5_ib_dbg(dev, "requested sq_wqe_count (%d) > max allowed (%d)\n",
-					    ucmd.sq_wqe_count, dev->mdev->caps.max_wqes);
+					    ucmd.sq_wqe_count, gen->max_wqes);
 				return -EINVAL;
 			}
 			err = create_user_qp(dev, pd, qp, udata, &in, &resp, &inlen);
@@ -1144,6 +1152,7 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 				struct ib_qp_init_attr *init_attr,
 				struct ib_udata *udata)
 {
+	struct mlx5_general_caps *gen;
 	struct mlx5_ib_dev *dev;
 	struct mlx5_ib_qp *qp;
 	u16 xrcdn = 0;
@@ -1161,11 +1170,12 @@ struct ib_qp *mlx5_ib_create_qp(struct ib_pd *pd,
 		}
 		dev = to_mdev(to_mxrcd(init_attr->xrcd)->ibxrcd.device);
 	}
+	gen = &dev->mdev->caps.gen;
 
 	switch (init_attr->qp_type) {
 	case IB_QPT_XRC_TGT:
 	case IB_QPT_XRC_INI:
-		if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC)) {
+		if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC)) {
 			mlx5_ib_dbg(dev, "XRC not supported\n");
 			return ERR_PTR(-ENOSYS);
 		}
@@ -1272,6 +1282,9 @@ enum {
 
 static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
 {
+	struct mlx5_general_caps *gen;
+
+	gen = &dev->mdev->caps.gen;
 	if (rate == IB_RATE_PORT_CURRENT) {
 		return 0;
 	} else if (rate < IB_RATE_2_5_GBPS || rate > IB_RATE_300_GBPS) {
@@ -1279,7 +1292,7 @@ static int ib_rate_to_mlx5(struct mlx5_ib_dev *dev, u8 rate)
 	} else {
 		while (rate != IB_RATE_2_5_GBPS &&
 		       !(1 << (rate + MLX5_STAT_RATE_OFFSET) &
-			 dev->mdev->caps.stat_rate_support))
+			 gen->stat_rate_support))
 			--rate;
 	}
 
@@ -1290,8 +1303,10 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
 			 struct mlx5_qp_path *path, u8 port, int attr_mask,
 			 u32 path_flags, const struct ib_qp_attr *attr)
 {
+	struct mlx5_general_caps *gen;
 	int err;
 
+	gen = &dev->mdev->caps.gen;
 	path->fl = (path_flags & MLX5_PATH_FLAG_FL) ? 0x80 : 0;
 	path->free_ar = (path_flags & MLX5_PATH_FLAG_FREE_AR) ? 0x80 : 0;
 
@@ -1318,9 +1333,9 @@ static int mlx5_set_path(struct mlx5_ib_dev *dev, const struct ib_ah_attr *ah,
 	path->port = port;
 
 	if (ah->ah_flags & IB_AH_GRH) {
-		if (ah->grh.sgid_index >= dev->mdev->caps.port[port - 1].gid_table_len) {
+		if (ah->grh.sgid_index >= gen->port[port - 1].gid_table_len) {
 			pr_err(KERN_ERR "sgid_index (%u) too large. max is %d\n",
-			       ah->grh.sgid_index, dev->mdev->caps.port[port - 1].gid_table_len);
+			       ah->grh.sgid_index, gen->port[port - 1].gid_table_len);
 			return -EINVAL;
 		}
 
@@ -1492,6 +1507,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	struct mlx5_ib_cq *send_cq, *recv_cq;
 	struct mlx5_qp_context *context;
+	struct mlx5_general_caps *gen;
 	struct mlx5_modify_qp_mbox_in *in;
 	struct mlx5_ib_pd *pd;
 	enum mlx5_qp_state mlx5_cur, mlx5_new;
@@ -1500,6 +1516,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	int mlx5_st;
 	int err;
 
+	gen = &dev->mdev->caps.gen;
 	in = kzalloc(sizeof(*in), GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -1539,7 +1556,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 			err = -EINVAL;
 			goto out;
 		}
-		context->mtu_msgmax = (attr->path_mtu << 5) | dev->mdev->caps.log_max_msg;
+		context->mtu_msgmax = (attr->path_mtu << 5) | gen->log_max_msg;
 	}
 
 	if (attr_mask & IB_QP_DEST_QPN)
@@ -1685,9 +1702,11 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 	struct mlx5_ib_dev *dev = to_mdev(ibqp->device);
 	struct mlx5_ib_qp *qp = to_mqp(ibqp);
 	enum ib_qp_state cur_state, new_state;
+	struct mlx5_general_caps *gen;
 	int err = -EINVAL;
 	int port;
 
+	gen = &dev->mdev->caps.gen;
 	mutex_lock(&qp->mutex);
 
 	cur_state = attr_mask & IB_QP_CUR_STATE ? attr->cur_qp_state : qp->state;
@@ -1699,21 +1718,21 @@ int mlx5_ib_modify_qp(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		goto out;
 
 	if ((attr_mask & IB_QP_PORT) &&
-	    (attr->port_num == 0 || attr->port_num > dev->mdev->caps.num_ports))
+	    (attr->port_num == 0 || attr->port_num > gen->num_ports))
 		goto out;
 
 	if (attr_mask & IB_QP_PKEY_INDEX) {
 		port = attr_mask & IB_QP_PORT ? attr->port_num : qp->port;
-		if (attr->pkey_index >= dev->mdev->caps.port[port - 1].pkey_table_len)
+		if (attr->pkey_index >= gen->port[port - 1].pkey_table_len)
 			goto out;
 	}
 
 	if (attr_mask & IB_QP_MAX_QP_RD_ATOMIC &&
-	    attr->max_rd_atomic > dev->mdev->caps.max_ra_res_qp)
+	    attr->max_rd_atomic > (1 << gen->log_max_ra_res_qp))
 		goto out;
 
 	if (attr_mask & IB_QP_MAX_DEST_RD_ATOMIC &&
-	    attr->max_dest_rd_atomic > dev->mdev->caps.max_ra_req_qp)
+	    attr->max_dest_rd_atomic > (1 << gen->log_max_ra_req_qp))
 		goto out;
 
 	if (cur_state == new_state && cur_state == IB_QPS_RESET) {
@@ -2893,7 +2912,8 @@ static void to_ib_ah_attr(struct mlx5_ib_dev *ibdev, struct ib_ah_attr *ib_ah_at
 	memset(ib_ah_attr, 0, sizeof(*ib_ah_attr));
 	ib_ah_attr->port_num	  = path->port;
 
-	if (ib_ah_attr->port_num == 0 || ib_ah_attr->port_num > dev->caps.num_ports)
+	if (ib_ah_attr->port_num == 0 ||
+	    ib_ah_attr->port_num > dev->caps.gen.num_ports)
 		return;
 
 	ib_ah_attr->sl = path->sl & 0xf;
@@ -3011,10 +3031,12 @@ struct ib_xrcd *mlx5_ib_alloc_xrcd(struct ib_device *ibdev,
 					  struct ib_udata *udata)
 {
 	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_general_caps *gen;
 	struct mlx5_ib_xrcd *xrcd;
 	int err;
 
-	if (!(dev->mdev->caps.flags & MLX5_DEV_CAP_FLAG_XRC))
+	gen = &dev->mdev->caps.gen;
+	if (!(gen->flags & MLX5_DEV_CAP_FLAG_XRC))
 		return ERR_PTR(-ENOSYS);
 
 	xrcd = kmalloc(sizeof(*xrcd), GFP_KERNEL);
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 70bd131ba646..97cc1baaa8e3 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -238,6 +238,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 				  struct ib_udata *udata)
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
+	struct mlx5_general_caps *gen;
 	struct mlx5_ib_srq *srq;
 	int desc_size;
 	int buf_size;
@@ -247,11 +248,12 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 	int is_xrc;
 	u32 flgs, xrcdn;
 
+	gen = &dev->mdev->caps.gen;
 	/* Sanity check SRQ size before proceeding */
-	if (init_attr->attr.max_wr >= dev->mdev->caps.max_srq_wqes) {
+	if (init_attr->attr.max_wr >= gen->max_srq_wqes) {
 		mlx5_ib_dbg(dev, "max_wr %d, cap %d\n",
 			    init_attr->attr.max_wr,
-			    dev->mdev->caps.max_srq_wqes);
+			    gen->max_srq_wqes);
 		return ERR_PTR(-EINVAL);
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 65a7da69e2ac..6eb0f85cf872 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1538,16 +1538,9 @@ static const char *cmd_status_str(u8 status)
 	}
 }
 
-int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
+static int cmd_status_to_err(u8 status)
 {
-	if (!hdr->status)
-		return 0;
-
-	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
-		cmd_status_str(hdr->status), hdr->status,
-		be32_to_cpu(hdr->syndrome));
-
-	switch (hdr->status) {
+	switch (status) {
 	case MLX5_CMD_STAT_OK:				return 0;
 	case MLX5_CMD_STAT_INT_ERR:			return -EIO;
 	case MLX5_CMD_STAT_BAD_OP_ERR:			return -EINVAL;
@@ -1567,3 +1560,16 @@ int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
 	default:					return -EIO;
 	}
 }
+
+/* this will be available till all the commands use set/get macros */
+int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
+{
+	if (!hdr->status)
+		return 0;
+
+	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
+		cmd_status_str(hdr->status), hdr->status,
+		be32_to_cpu(hdr->syndrome));
+
+	return cmd_status_to_err(hdr->status);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 4e8bd0b34bb0..11b9b840ad4d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -468,7 +468,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 
 	err = mlx5_create_map_eq(dev, &table->pages_eq,
 				 MLX5_EQ_VEC_PAGES,
-				 dev->caps.max_vf + 1,
+				 dev->caps.gen.max_vf + 1,
 				 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq",
 				 &dev->priv.uuari.uars[0]);
 	if (err) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index f012658b6a92..087c4c797deb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -64,86 +64,9 @@ out_out:
 	return err;
 }
 
-int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev,
-			   struct mlx5_caps *caps)
+int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps)
 {
-	struct mlx5_cmd_query_hca_cap_mbox_out *out;
-	struct mlx5_cmd_query_hca_cap_mbox_in in;
-	struct mlx5_query_special_ctxs_mbox_out ctx_out;
-	struct mlx5_query_special_ctxs_mbox_in ctx_in;
-	int err;
-	u16 t16;
-
-	out = kzalloc(sizeof(*out), GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
-
-	memset(&in, 0, sizeof(in));
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
-	in.hdr.opmod  = cpu_to_be16(0x1);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
-	if (err)
-		goto out_out;
-
-	if (out->hdr.status) {
-		err = mlx5_cmd_status_to_err(&out->hdr);
-		goto out_out;
-	}
-
-
-	caps->log_max_eq = out->hca_cap.log_max_eq & 0xf;
-	caps->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
-	caps->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
-	caps->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
-	caps->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
-	caps->flags = be64_to_cpu(out->hca_cap.flags);
-	caps->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
-	caps->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
-	caps->num_ports = out->hca_cap.num_ports & 0xf;
-	caps->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
-	if (caps->num_ports > MLX5_MAX_PORTS) {
-		mlx5_core_err(dev, "device has %d ports while the driver supports max %d ports\n",
-			      caps->num_ports, MLX5_MAX_PORTS);
-		err = -EINVAL;
-		goto out_out;
-	}
-	caps->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
-	caps->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
-	caps->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
-	caps->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
-	caps->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
-	caps->log_max_mcg = out->hca_cap.log_max_mcg;
-	caps->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
-	caps->max_ra_res_qp = 1 << (out->hca_cap.log_max_ra_res_qp & 0x3f);
-	caps->max_ra_req_qp = 1 << (out->hca_cap.log_max_ra_req_qp & 0x3f);
-	caps->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
-	t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
-	if (t16 & 0x8000) {
-		caps->bf_reg_size = 1 << (t16 & 0x1f);
-		caps->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
-	} else {
-		caps->bf_reg_size = 0;
-		caps->bf_regs_per_page = 0;
-	}
-	caps->min_page_sz = ~(u32)((1 << out->hca_cap.log_pg_sz) - 1);
-
-	memset(&ctx_in, 0, sizeof(ctx_in));
-	memset(&ctx_out, 0, sizeof(ctx_out));
-	ctx_in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS);
-	err = mlx5_cmd_exec(dev, &ctx_in, sizeof(ctx_in),
-				 &ctx_out, sizeof(ctx_out));
-	if (err)
-		goto out_out;
-
-	if (ctx_out.hdr.status)
-		err = mlx5_cmd_status_to_err(&ctx_out.hdr);
-
-	caps->reserved_lkey = be32_to_cpu(ctx_out.reserved_lkey);
-
-out_out:
-	kfree(out);
-
-	return err;
+	return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR);
 }
 
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index f2716cc1f51d..d9f74618befa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -207,11 +207,11 @@ static void release_bar(struct pci_dev *pdev)
 static int mlx5_enable_msix(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
-	int num_eqs = 1 << dev->caps.log_max_eq;
+	int num_eqs = 1 << dev->caps.gen.log_max_eq;
 	int nvec;
 	int i;
 
-	nvec = dev->caps.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE;
+	nvec = dev->caps.gen.num_ports * num_online_cpus() + MLX5_EQ_VEC_COMP_BASE;
 	nvec = min_t(int, nvec, num_eqs);
 	if (nvec <= MLX5_EQ_VEC_COMP_BASE)
 		return -ENOMEM;
@@ -250,13 +250,34 @@ struct mlx5_reg_host_endianess {
 #define CAP_MASK(pos, size) ((u64)((1 << (size)) - 1) << (pos))
 
 enum {
-	MLX5_CAP_BITS_RW_MASK	= CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
-				  CAP_MASK(MLX5_CAP_OFF_DCT, 1),
+	MLX5_CAP_BITS_RW_MASK = CAP_MASK(MLX5_CAP_OFF_CMDIF_CSUM, 2) |
+				MLX5_DEV_CAP_FLAG_DCT,
 };
 
+static u16 to_fw_pkey_sz(u32 size)
+{
+	switch (size) {
+	case 128:
+		return 0;
+	case 256:
+		return 1;
+	case 512:
+		return 2;
+	case 1024:
+		return 3;
+	case 2048:
+		return 4;
+	case 4096:
+		return 5;
+	default:
+		pr_warn("invalid pkey table size %d\n", size);
+		return 0;
+	}
+}
+
 /* selectively copy writable fields clearing any reserved area
  */
-static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
+static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_general_caps *from)
 {
 	u64 v64;
 
@@ -265,76 +286,172 @@ static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_hca_cap *from)
 	to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
 	to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
 	to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
-	to->log_max_atomic_size_qp = from->log_max_atomic_size_qp;
-	to->log_max_atomic_size_dc = from->log_max_atomic_size_dc;
-	v64 = be64_to_cpu(from->flags) & MLX5_CAP_BITS_RW_MASK;
+	to->pkey_table_size = cpu_to_be16(to_fw_pkey_sz(from->pkey_table_size));
+	v64 = from->flags & MLX5_CAP_BITS_RW_MASK;
 	to->flags = cpu_to_be64(v64);
 }
 
-enum {
-	HCA_CAP_OPMOD_GET_MAX	= 0,
-	HCA_CAP_OPMOD_GET_CUR	= 1,
-};
+static u16 get_pkey_table_size(int pkey)
+{
+	if (pkey > MLX5_MAX_LOG_PKEY_TABLE)
+		return 0;
 
-static int handle_hca_cap(struct mlx5_core_dev *dev)
+	return MLX5_MIN_PKEY_TABLE_SIZE << pkey;
+}
+
+static void fw2drv_caps(struct mlx5_caps *caps,
+			struct mlx5_cmd_query_hca_cap_mbox_out *out)
 {
-	struct mlx5_cmd_query_hca_cap_mbox_out *query_out = NULL;
-	struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
-	struct mlx5_cmd_query_hca_cap_mbox_in query_ctx;
-	struct mlx5_cmd_set_hca_cap_mbox_out set_out;
-	u64 flags;
+	struct mlx5_general_caps *gen = &caps->gen;
+	u16 t16;
+
+	gen->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
+	gen->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
+	gen->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
+	gen->log_max_strq = out->hca_cap.log_max_strq_sz;
+	gen->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
+	gen->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
+	gen->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
+	gen->max_eqes = out->hca_cap.log_max_eq_sz;
+	gen->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
+	gen->log_max_eq = out->hca_cap.log_max_eq & 0xf;
+	gen->max_indirection = out->hca_cap.max_indirection;
+	gen->log_max_mrw_sz = out->hca_cap.log_max_mrw_sz;
+	gen->log_max_bsf_list_size = 0;
+	gen->log_max_klm_list_size = 0;
+	gen->log_max_ra_req_dc = out->hca_cap.log_max_ra_req_dc;
+	gen->log_max_ra_res_dc = out->hca_cap.log_max_ra_res_dc;
+	gen->log_max_ra_req_qp = out->hca_cap.log_max_ra_req_qp;
+	gen->log_max_ra_res_qp = out->hca_cap.log_max_ra_res_qp;
+	gen->max_qp_counters = be16_to_cpu(out->hca_cap.max_qp_count);
+	gen->pkey_table_size = get_pkey_table_size(be16_to_cpu(out->hca_cap.pkey_table_size));
+	gen->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
+	gen->num_ports = out->hca_cap.num_ports & 0xf;
+	gen->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
+	gen->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
+	gen->flags = be64_to_cpu(out->hca_cap.flags);
+	pr_debug("flags = 0x%llx\n", gen->flags);
+	gen->uar_sz = out->hca_cap.uar_sz;
+	gen->min_log_pg_sz = out->hca_cap.log_pg_sz;
+
+	t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
+	if (t16 & 0x8000) {
+		gen->bf_reg_size = 1 << (t16 & 0x1f);
+		gen->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
+	} else {
+		gen->bf_reg_size = 0;
+		gen->bf_regs_per_page = 0;
+	}
+	gen->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
+	gen->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
+	gen->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
+	gen->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
+	gen->log_max_xrcd = out->hca_cap.log_max_xrcd;
+	gen->log_uar_page_sz = be16_to_cpu(out->hca_cap.log_uar_page_sz);
+}
+
+static const char *caps_opmod_str(u16 opmod)
+{
+	switch (opmod) {
+	case HCA_CAP_OPMOD_GET_MAX:
+		return "GET_MAX";
+	case HCA_CAP_OPMOD_GET_CUR:
+		return "GET_CUR";
+	default:
+		return "Invalid";
+	}
+}
+
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
+		       u16 opmod)
+{
+	struct mlx5_cmd_query_hca_cap_mbox_out *out;
+	struct mlx5_cmd_query_hca_cap_mbox_in in;
 	int err;
 
-	memset(&query_ctx, 0, sizeof(query_ctx));
-	query_out = kzalloc(sizeof(*query_out), GFP_KERNEL);
-	if (!query_out)
+	memset(&in, 0, sizeof(in));
+	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	if (!out)
 		return -ENOMEM;
 
-	set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
-	if (!set_ctx) {
-		err = -ENOMEM;
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
+	in.hdr.opmod  = cpu_to_be16(opmod);
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
+
+	err = mlx5_cmd_status_to_err(&out->hdr);
+	if (err) {
+		mlx5_core_warn(dev, "query max hca cap failed, %d\n", err);
 		goto query_ex;
 	}
+	mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod));
+	fw2drv_caps(caps, out);
 
-	query_ctx.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
-	query_ctx.hdr.opmod  = cpu_to_be16(HCA_CAP_OPMOD_GET_CUR);
-	err = mlx5_cmd_exec(dev, &query_ctx, sizeof(query_ctx),
-				 query_out, sizeof(*query_out));
+query_ex:
+	kfree(out);
+	return err;
+}
+
+static int set_caps(struct mlx5_core_dev *dev,
+		    struct mlx5_cmd_set_hca_cap_mbox_in *in)
+{
+	struct mlx5_cmd_set_hca_cap_mbox_out out;
+	int err;
+
+	memset(&out, 0, sizeof(out));
+
+	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
+	err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
 	if (err)
-		goto query_ex;
+		return err;
 
-	err = mlx5_cmd_status_to_err(&query_out->hdr);
-	if (err) {
-		mlx5_core_warn(dev, "query hca cap failed, %d\n", err);
+	err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+
+static int handle_hca_cap(struct mlx5_core_dev *dev)
+{
+	struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
+	struct mlx5_profile *prof = dev->profile;
+	struct mlx5_caps *cur_caps = NULL;
+	struct mlx5_caps *max_caps = NULL;
+	int err = -ENOMEM;
+
+	set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
+	if (!set_ctx)
 		goto query_ex;
-	}
 
-	copy_rw_fields(&set_ctx->hca_cap, &query_out->hca_cap);
+	max_caps = kzalloc(sizeof(*max_caps), GFP_KERNEL);
+	if (!max_caps)
+		goto query_ex;
 
-	if (dev->profile && dev->profile->mask & MLX5_PROF_MASK_QP_SIZE)
-		set_ctx->hca_cap.log_max_qp = dev->profile->log_max_qp;
+	cur_caps = kzalloc(sizeof(*cur_caps), GFP_KERNEL);
+	if (!cur_caps)
+		goto query_ex;
 
-	flags = be64_to_cpu(query_out->hca_cap.flags);
-	/* disable checksum */
-	flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
-
-	set_ctx->hca_cap.flags = cpu_to_be64(flags);
-	memset(&set_out, 0, sizeof(set_out));
-	set_ctx->hca_cap.log_uar_page_sz = cpu_to_be16(PAGE_SHIFT - 12);
-	set_ctx->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
-	err = mlx5_cmd_exec(dev, set_ctx, sizeof(*set_ctx),
-				 &set_out, sizeof(set_out));
-	if (err) {
-		mlx5_core_warn(dev, "set hca cap failed, %d\n", err);
+	err = mlx5_core_get_caps(dev, max_caps, HCA_CAP_OPMOD_GET_MAX);
+	if (err)
 		goto query_ex;
-	}
 
-	err = mlx5_cmd_status_to_err(&set_out.hdr);
+	err = mlx5_core_get_caps(dev, cur_caps, HCA_CAP_OPMOD_GET_CUR);
 	if (err)
 		goto query_ex;
 
+	/* we limit the size of the pkey table to 128 entries for now */
+	cur_caps->gen.pkey_table_size = 128;
+
+	if (prof->mask & MLX5_PROF_MASK_QP_SIZE)
+		cur_caps->gen.log_max_qp = prof->log_max_qp;
+
+	/* disable checksum */
+	cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
+
+	copy_rw_fields(&set_ctx->hca_cap, &cur_caps->gen);
+	err = set_caps(dev, set_ctx);
+
 query_ex:
-	kfree(query_out);
+	kfree(cur_caps);
+	kfree(max_caps);
 	kfree(set_ctx);
 
 	return err;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 68f5d9c77c7b..0a6348cefc01 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -174,11 +174,11 @@ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari)
 	for (i = 0; i < tot_uuars; i++) {
 		bf = &uuari->bfs[i];
 
-		bf->buf_size = dev->caps.bf_reg_size / 2;
+		bf->buf_size = dev->caps.gen.bf_reg_size / 2;
 		bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE];
 		bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map;
 		bf->reg = NULL; /* Add WC support */
-		bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.bf_reg_size +
+		bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * dev->caps.gen.bf_reg_size +
 			MLX5_BF_OFFSET;
 		bf->need_lock = need_uuar_lock(i);
 		spin_lock_init(&bf->lock);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 334947151dfc..dce01fd854a8 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -70,6 +70,11 @@ enum {
 	MLX5_INLINE_SEG = 0x80000000,
 };
 
+enum {
+	MLX5_MIN_PKEY_TABLE_SIZE = 128,
+	MLX5_MAX_LOG_PKEY_TABLE  = 5,
+};
+
 enum {
 	MLX5_PERM_LOCAL_READ	= 1 << 2,
 	MLX5_PERM_LOCAL_WRITE	= 1 << 3,
@@ -184,10 +189,10 @@ enum {
 	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
 	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
 	MLX5_DEV_CAP_FLAG_RESIZE_SRQ	= 1LL << 32,
+	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 37,
 	MLX5_DEV_CAP_FLAG_REMOTE_FENCE	= 1LL << 38,
 	MLX5_DEV_CAP_FLAG_TLP_HINTS	= 1LL << 39,
 	MLX5_DEV_CAP_FLAG_SIG_HAND_OVER	= 1LL << 40,
-	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 41,
 	MLX5_DEV_CAP_FLAG_CMDIF_CSUM	= 3LL << 46,
 };
 
@@ -243,10 +248,14 @@ enum {
 };
 
 enum {
-	MLX5_CAP_OFF_DCT		= 41,
 	MLX5_CAP_OFF_CMDIF_CSUM		= 46,
 };
 
+enum {
+	HCA_CAP_OPMOD_GET_MAX	= 0,
+	HCA_CAP_OPMOD_GET_CUR	= 1,
+};
+
 struct mlx5_inbox_hdr {
 	__be16		opcode;
 	u8		rsvd[4];
@@ -303,9 +312,10 @@ struct mlx5_hca_cap {
 	u8	log_max_ra_req_qp;
 	u8	rsvd10;
 	u8	log_max_ra_res_qp;
-	u8	rsvd11[4];
+	u8	pad_cap;
+	u8	rsvd11[3];
 	__be16	max_qp_count;
-	__be16	rsvd12;
+	__be16	pkey_table_size;
 	u8	rsvd13;
 	u8	local_ca_ack_delay;
 	u8	rsvd14;
@@ -335,11 +345,7 @@ struct mlx5_hca_cap {
 	u8	log_max_xrcd;
 	u8	rsvd25[42];
 	__be16  log_uar_page_sz;
-	u8	rsvd26[28];
-	u8	log_max_atomic_size_qp;
-	u8	rsvd27[2];
-	u8	log_max_atomic_size_dc;
-	u8	rsvd28[76];
+	u8	rsvd26[108];
 };
 
 
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b88e9b46d957..45a2add747e0 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -335,23 +335,30 @@ struct mlx5_port_caps {
 	int	pkey_table_len;
 };
 
-struct mlx5_caps {
+struct mlx5_general_caps {
 	u8	log_max_eq;
 	u8	log_max_cq;
 	u8	log_max_qp;
 	u8	log_max_mkey;
 	u8	log_max_pd;
 	u8	log_max_srq;
+	u8	log_max_strq;
+	u8	log_max_mrw_sz;
+	u8	log_max_bsf_list_size;
+	u8	log_max_klm_list_size;
 	u32	max_cqes;
 	int	max_wqes;
+	u32	max_eqes;
+	u32	max_indirection;
 	int	max_sq_desc_sz;
 	int	max_rq_desc_sz;
+	int	max_dc_sq_desc_sz;
 	u64	flags;
 	u16	stat_rate_support;
 	int	log_max_msg;
 	int	num_ports;
-	int	max_ra_res_qp;
-	int	max_ra_req_qp;
+	u8	log_max_ra_res_qp;
+	u8	log_max_ra_req_qp;
 	int	max_srq_wqes;
 	int	bf_reg_size;
 	int	bf_regs_per_page;
@@ -363,6 +370,19 @@ struct mlx5_caps {
 	u8	log_max_mcg;
 	u32	max_qp_mcg;
 	int	min_page_sz;
+	int	pd_cap;
+	u32	max_qp_counters;
+	u32	pkey_table_size;
+	u8	log_max_ra_req_dc;
+	u8	log_max_ra_res_dc;
+	u32	uar_sz;
+	u8	min_log_pg_sz;
+	u8	log_max_xrcd;
+	u16	log_uar_page_sz;
+};
+
+struct mlx5_caps {
+	struct mlx5_general_caps gen;
 };
 
 struct mlx5_cmd_mailbox {
@@ -695,6 +715,8 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
+		       u16 opmod);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
 int mlx5_cmd_exec_cb(struct mlx5_core_dev *dev, void *in, int in_size,
-- 
cgit v1.2.3


From d29b796adada8780db3512c4a34b339f9aeef1ae Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:43 +0300
Subject: net/mlx5_core: Use hardware registers description header file

Add an auto generated header file that describes hardware registers along with
set of macros that set/get values. The macros do static checks to avoid
overflow, handle endianess, and overall provide a clean way to code commands.
Currently the header file is small and we will add structs as we make use of
the macros.
A few commands were removed from the commands enum since they are not supported
currently and will be added when support is available.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c |  36 -------
 drivers/net/ethernet/mellanox/mlx5/core/qp.c  |   3 -
 include/linux/mlx5/device.h                   |  44 ++++++++
 include/linux/mlx5/driver.h                   |  76 +-------------
 include/linux/mlx5/mlx5_ifc.h                 | 143 ++++++++++++++++++++++++++
 5 files changed, 188 insertions(+), 114 deletions(-)
 create mode 100644 include/linux/mlx5/mlx5_ifc.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 6eb0f85cf872..3ecef1310bae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -357,60 +357,24 @@ const char *mlx5_command_str(int command)
 	case MLX5_CMD_OP_2ERR_QP:
 		return "2ERR_QP";
 
-	case MLX5_CMD_OP_RTS2SQD_QP:
-		return "RTS2SQD_QP";
-
-	case MLX5_CMD_OP_SQD2RTS_QP:
-		return "SQD2RTS_QP";
-
 	case MLX5_CMD_OP_2RST_QP:
 		return "2RST_QP";
 
 	case MLX5_CMD_OP_QUERY_QP:
 		return "QUERY_QP";
 
-	case MLX5_CMD_OP_CONF_SQP:
-		return "CONF_SQP";
-
 	case MLX5_CMD_OP_MAD_IFC:
 		return "MAD_IFC";
 
 	case MLX5_CMD_OP_INIT2INIT_QP:
 		return "INIT2INIT_QP";
 
-	case MLX5_CMD_OP_SUSPEND_QP:
-		return "SUSPEND_QP";
-
-	case MLX5_CMD_OP_UNSUSPEND_QP:
-		return "UNSUSPEND_QP";
-
-	case MLX5_CMD_OP_SQD2SQD_QP:
-		return "SQD2SQD_QP";
-
-	case MLX5_CMD_OP_ALLOC_QP_COUNTER_SET:
-		return "ALLOC_QP_COUNTER_SET";
-
-	case MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET:
-		return "DEALLOC_QP_COUNTER_SET";
-
-	case MLX5_CMD_OP_QUERY_QP_COUNTER_SET:
-		return "QUERY_QP_COUNTER_SET";
-
 	case MLX5_CMD_OP_CREATE_PSV:
 		return "CREATE_PSV";
 
 	case MLX5_CMD_OP_DESTROY_PSV:
 		return "DESTROY_PSV";
 
-	case MLX5_CMD_OP_QUERY_PSV:
-		return "QUERY_PSV";
-
-	case MLX5_CMD_OP_QUERY_SIG_RULE_TABLE:
-		return "QUERY_SIG_RULE_TABLE";
-
-	case MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE:
-		return "QUERY_BLOCK_SIZE_TABLE";
-
 	case MLX5_CMD_OP_CREATE_SRQ:
 		return "CREATE_SRQ";
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 8145b4668229..415b67ce379e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -184,13 +184,10 @@ int mlx5_core_qp_modify(struct mlx5_core_dev *dev, enum mlx5_qp_state cur_state,
 			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
 			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
 			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_RTS2RTS_QP,
-			[MLX5_QP_STATE_SQD]	= MLX5_CMD_OP_RTS2SQD_QP,
 		},
 		[MLX5_QP_STATE_SQD] = {
 			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
 			[MLX5_QP_STATE_ERR]	= MLX5_CMD_OP_2ERR_QP,
-			[MLX5_QP_STATE_RTS]	= MLX5_CMD_OP_SQD2RTS_QP,
-			[MLX5_QP_STATE_SQD]	= MLX5_CMD_OP_SQD2SQD_QP,
 		},
 		[MLX5_QP_STATE_SQER] = {
 			[MLX5_QP_STATE_RST]	= MLX5_CMD_OP_2RST_QP,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index dce01fd854a8..0032687f58c7 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -44,6 +44,50 @@
 #error Host endianness not defined
 #endif
 
+/* helper macros */
+#define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0)
+#define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld)
+#define __mlx5_bit_off(typ, fld) ((unsigned)(unsigned long)(&(__mlx5_nullp(typ)->fld)))
+#define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32)
+#define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64)
+#define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f))
+#define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1))
+#define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << __mlx5_dw_bit_off(typ, fld))
+#define __mlx5_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits)
+
+#define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8)
+#define MLX5_ST_SZ_BYTES(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 8)
+#define MLX5_ST_SZ_DW(typ) (sizeof(struct mlx5_ifc_##typ##_bits) / 32)
+#define MLX5_BYTE_OFF(typ, fld) (__mlx5_bit_off(typ, fld) / 8)
+#define MLX5_ADDR_OF(typ, p, fld) ((char *)(p) + MLX5_BYTE_OFF(typ, fld))
+
+/* insert a value to a struct */
+#define MLX5_SET(typ, p, fld, v) do { \
+	BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 32);             \
+	*((__be32 *)(p) + __mlx5_dw_off(typ, fld)) = \
+	cpu_to_be32((be32_to_cpu(*((__be32 *)(p) + __mlx5_dw_off(typ, fld))) & \
+		     (~__mlx5_dw_mask(typ, fld))) | (((v) & __mlx5_mask(typ, fld)) \
+		     << __mlx5_dw_bit_off(typ, fld))); \
+} while (0)
+
+#define MLX5_GET(typ, p, fld) ((be32_to_cpu(*((__be32 *)(p) +\
+__mlx5_dw_off(typ, fld))) >> __mlx5_dw_bit_off(typ, fld)) & \
+__mlx5_mask(typ, fld))
+
+#define MLX5_GET_PR(typ, p, fld) ({ \
+	u32 ___t = MLX5_GET(typ, p, fld); \
+	pr_debug(#fld " = 0x%x\n", ___t); \
+	___t; \
+})
+
+#define MLX5_SET64(typ, p, fld, v) do { \
+	BUILD_BUG_ON(__mlx5_bit_sz(typ, fld) != 64); \
+	BUILD_BUG_ON(__mlx5_bit_off(typ, fld) % 64); \
+	*((__be64 *)(p) + __mlx5_64_off(typ, fld)) = cpu_to_be64(v); \
+} while (0)
+
+#define MLX5_GET64(typ, p, fld) be64_to_cpu(*((__be64 *)(p) + __mlx5_64_off(typ, fld)))
+
 enum {
 	MLX5_MAX_COMMANDS		= 32,
 	MLX5_CMD_DATA_BLOCK_SIZE	= 512,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 45a2add747e0..6f48dc793b9f 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -44,6 +44,7 @@
 
 #include <linux/mlx5/device.h>
 #include <linux/mlx5/doorbell.h>
+#include <linux/mlx5/mlx5_ifc.h>
 
 enum {
 	MLX5_BOARD_ID_LEN = 64,
@@ -98,81 +99,6 @@ enum {
 	MLX5_ATOMIC_MODE_256B		= 8 << 16,
 };
 
-enum {
-	MLX5_CMD_OP_QUERY_HCA_CAP		= 0x100,
-	MLX5_CMD_OP_QUERY_ADAPTER		= 0x101,
-	MLX5_CMD_OP_INIT_HCA			= 0x102,
-	MLX5_CMD_OP_TEARDOWN_HCA		= 0x103,
-	MLX5_CMD_OP_ENABLE_HCA			= 0x104,
-	MLX5_CMD_OP_DISABLE_HCA			= 0x105,
-	MLX5_CMD_OP_QUERY_PAGES			= 0x107,
-	MLX5_CMD_OP_MANAGE_PAGES		= 0x108,
-	MLX5_CMD_OP_SET_HCA_CAP			= 0x109,
-
-	MLX5_CMD_OP_CREATE_MKEY			= 0x200,
-	MLX5_CMD_OP_QUERY_MKEY			= 0x201,
-	MLX5_CMD_OP_DESTROY_MKEY		= 0x202,
-	MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS	= 0x203,
-
-	MLX5_CMD_OP_CREATE_EQ			= 0x301,
-	MLX5_CMD_OP_DESTROY_EQ			= 0x302,
-	MLX5_CMD_OP_QUERY_EQ			= 0x303,
-
-	MLX5_CMD_OP_CREATE_CQ			= 0x400,
-	MLX5_CMD_OP_DESTROY_CQ			= 0x401,
-	MLX5_CMD_OP_QUERY_CQ			= 0x402,
-	MLX5_CMD_OP_MODIFY_CQ			= 0x403,
-
-	MLX5_CMD_OP_CREATE_QP			= 0x500,
-	MLX5_CMD_OP_DESTROY_QP			= 0x501,
-	MLX5_CMD_OP_RST2INIT_QP			= 0x502,
-	MLX5_CMD_OP_INIT2RTR_QP			= 0x503,
-	MLX5_CMD_OP_RTR2RTS_QP			= 0x504,
-	MLX5_CMD_OP_RTS2RTS_QP			= 0x505,
-	MLX5_CMD_OP_SQERR2RTS_QP		= 0x506,
-	MLX5_CMD_OP_2ERR_QP			= 0x507,
-	MLX5_CMD_OP_RTS2SQD_QP			= 0x508,
-	MLX5_CMD_OP_SQD2RTS_QP			= 0x509,
-	MLX5_CMD_OP_2RST_QP			= 0x50a,
-	MLX5_CMD_OP_QUERY_QP			= 0x50b,
-	MLX5_CMD_OP_CONF_SQP			= 0x50c,
-	MLX5_CMD_OP_MAD_IFC			= 0x50d,
-	MLX5_CMD_OP_INIT2INIT_QP		= 0x50e,
-	MLX5_CMD_OP_SUSPEND_QP			= 0x50f,
-	MLX5_CMD_OP_UNSUSPEND_QP		= 0x510,
-	MLX5_CMD_OP_SQD2SQD_QP			= 0x511,
-	MLX5_CMD_OP_ALLOC_QP_COUNTER_SET	= 0x512,
-	MLX5_CMD_OP_DEALLOC_QP_COUNTER_SET	= 0x513,
-	MLX5_CMD_OP_QUERY_QP_COUNTER_SET	= 0x514,
-
-	MLX5_CMD_OP_CREATE_PSV			= 0x600,
-	MLX5_CMD_OP_DESTROY_PSV			= 0x601,
-	MLX5_CMD_OP_QUERY_PSV			= 0x602,
-	MLX5_CMD_OP_QUERY_SIG_RULE_TABLE	= 0x603,
-	MLX5_CMD_OP_QUERY_BLOCK_SIZE_TABLE	= 0x604,
-
-	MLX5_CMD_OP_CREATE_SRQ			= 0x700,
-	MLX5_CMD_OP_DESTROY_SRQ			= 0x701,
-	MLX5_CMD_OP_QUERY_SRQ			= 0x702,
-	MLX5_CMD_OP_ARM_RQ			= 0x703,
-	MLX5_CMD_OP_RESIZE_SRQ			= 0x704,
-
-	MLX5_CMD_OP_ALLOC_PD			= 0x800,
-	MLX5_CMD_OP_DEALLOC_PD			= 0x801,
-	MLX5_CMD_OP_ALLOC_UAR			= 0x802,
-	MLX5_CMD_OP_DEALLOC_UAR			= 0x803,
-
-	MLX5_CMD_OP_ATTACH_TO_MCG		= 0x806,
-	MLX5_CMD_OP_DETACH_FROM_MCG		= 0x807,
-
-
-	MLX5_CMD_OP_ALLOC_XRCD			= 0x80e,
-	MLX5_CMD_OP_DEALLOC_XRCD		= 0x80f,
-
-	MLX5_CMD_OP_ACCESS_REG			= 0x805,
-	MLX5_CMD_OP_MAX				= 0x810,
-};
-
 enum {
 	MLX5_REG_PCAP		 = 0x5001,
 	MLX5_REG_PMTU		 = 0x5003,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
new file mode 100644
index 000000000000..df3bd9b5fbcf
--- /dev/null
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -0,0 +1,143 @@
+/*
+ * Copyright (c) 2014, Mellanox Technologies inc.  All rights reserved.
+ *
+ * This software is available to you under a choice of one of two
+ * licenses.  You may choose to be licensed under the terms of the GNU
+ * General Public License (GPL) Version 2, available from the file
+ * COPYING in the main directory of this source tree, or the
+ * OpenIB.org BSD license below:
+ *
+ *     Redistribution and use in source and binary forms, with or
+ *     without modification, are permitted provided that the following
+ *     conditions are met:
+ *
+ *      - Redistributions of source code must retain the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer.
+ *
+ *      - Redistributions in binary form must reproduce the above
+ *        copyright notice, this list of conditions and the following
+ *        disclaimer in the documentation and/or other materials
+ *        provided with the distribution.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef MLX5_IFC_H
+#define MLX5_IFC_H
+
+enum {
+	MLX5_CMD_OP_QUERY_HCA_CAP                 = 0x100,
+	MLX5_CMD_OP_QUERY_ADAPTER                 = 0x101,
+	MLX5_CMD_OP_INIT_HCA                      = 0x102,
+	MLX5_CMD_OP_TEARDOWN_HCA                  = 0x103,
+	MLX5_CMD_OP_ENABLE_HCA                    = 0x104,
+	MLX5_CMD_OP_DISABLE_HCA                   = 0x105,
+	MLX5_CMD_OP_QUERY_PAGES                   = 0x107,
+	MLX5_CMD_OP_MANAGE_PAGES                  = 0x108,
+	MLX5_CMD_OP_SET_HCA_CAP                   = 0x109,
+	MLX5_CMD_OP_CREATE_MKEY                   = 0x200,
+	MLX5_CMD_OP_QUERY_MKEY                    = 0x201,
+	MLX5_CMD_OP_DESTROY_MKEY                  = 0x202,
+	MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS        = 0x203,
+	MLX5_CMD_OP_PAGE_FAULT_RESUME             = 0x204,
+	MLX5_CMD_OP_CREATE_EQ                     = 0x301,
+	MLX5_CMD_OP_DESTROY_EQ                    = 0x302,
+	MLX5_CMD_OP_QUERY_EQ                      = 0x303,
+	MLX5_CMD_OP_GEN_EQE                       = 0x304,
+	MLX5_CMD_OP_CREATE_CQ                     = 0x400,
+	MLX5_CMD_OP_DESTROY_CQ                    = 0x401,
+	MLX5_CMD_OP_QUERY_CQ                      = 0x402,
+	MLX5_CMD_OP_MODIFY_CQ                     = 0x403,
+	MLX5_CMD_OP_CREATE_QP                     = 0x500,
+	MLX5_CMD_OP_DESTROY_QP                    = 0x501,
+	MLX5_CMD_OP_RST2INIT_QP                   = 0x502,
+	MLX5_CMD_OP_INIT2RTR_QP                   = 0x503,
+	MLX5_CMD_OP_RTR2RTS_QP                    = 0x504,
+	MLX5_CMD_OP_RTS2RTS_QP                    = 0x505,
+	MLX5_CMD_OP_SQERR2RTS_QP                  = 0x506,
+	MLX5_CMD_OP_2ERR_QP                       = 0x507,
+	MLX5_CMD_OP_2RST_QP                       = 0x50a,
+	MLX5_CMD_OP_QUERY_QP                      = 0x50b,
+	MLX5_CMD_OP_INIT2INIT_QP                  = 0x50e,
+	MLX5_CMD_OP_CREATE_PSV                    = 0x600,
+	MLX5_CMD_OP_DESTROY_PSV                   = 0x601,
+	MLX5_CMD_OP_CREATE_SRQ                    = 0x700,
+	MLX5_CMD_OP_DESTROY_SRQ                   = 0x701,
+	MLX5_CMD_OP_QUERY_SRQ                     = 0x702,
+	MLX5_CMD_OP_ARM_RQ                        = 0x703,
+	MLX5_CMD_OP_RESIZE_SRQ                    = 0x704,
+	MLX5_CMD_OP_CREATE_DCT                    = 0x710,
+	MLX5_CMD_OP_DESTROY_DCT                   = 0x711,
+	MLX5_CMD_OP_DRAIN_DCT                     = 0x712,
+	MLX5_CMD_OP_QUERY_DCT                     = 0x713,
+	MLX5_CMD_OP_ARM_DCT_FOR_KEY_VIOLATION     = 0x714,
+	MLX5_CMD_OP_QUERY_VPORT_STATE             = 0x750,
+	MLX5_CMD_OP_MODIFY_VPORT_STATE            = 0x751,
+	MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT       = 0x752,
+	MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT      = 0x753,
+	MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT       = 0x754,
+	MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT      = 0x755,
+	MLX5_CMD_OP_QUERY_RCOE_ADDRESS            = 0x760,
+	MLX5_CMD_OP_SET_ROCE_ADDRESS              = 0x761,
+	MLX5_CMD_OP_QUERY_VPORT_COUNTER           = 0x770,
+	MLX5_CMD_OP_ALLOC_Q_COUNTER               = 0x771,
+	MLX5_CMD_OP_DEALLOC_Q_COUNTER             = 0x772,
+	MLX5_CMD_OP_QUERY_Q_COUNTER               = 0x773,
+	MLX5_CMD_OP_ALLOC_PD                      = 0x800,
+	MLX5_CMD_OP_DEALLOC_PD                    = 0x801,
+	MLX5_CMD_OP_ALLOC_UAR                     = 0x802,
+	MLX5_CMD_OP_DEALLOC_UAR                   = 0x803,
+	MLX5_CMD_OP_CONFIG_INT_MODERATION         = 0x804,
+	MLX5_CMD_OP_ACCESS_REG                    = 0x805,
+	MLX5_CMD_OP_ATTACH_TO_MCG                 = 0x806,
+	MLX5_CMD_OP_DETACH_FROM_MCG               = 0x807,
+	MLX5_CMD_OP_GET_DROPPED_PACKET_LOG        = 0x80a,
+	MLX5_CMD_OP_MAD_IFC                       = 0x50d,
+	MLX5_CMD_OP_QUERY_MAD_DEMUX               = 0x80b,
+	MLX5_CMD_OP_SET_MAD_DEMUX                 = 0x80c,
+	MLX5_CMD_OP_NOP                           = 0x80d,
+	MLX5_CMD_OP_ALLOC_XRCD                    = 0x80e,
+	MLX5_CMD_OP_DEALLOC_XRCD                  = 0x80f,
+	MLX5_CMD_OP_SET_BURST_SIZE                = 0x812,
+	MLX5_CMD_OP_QUERY_BURST_SZIE              = 0x813,
+	MLX5_CMD_OP_ACTIVATE_TRACER               = 0x814,
+	MLX5_CMD_OP_DEACTIVATE_TRACER             = 0x815,
+	MLX5_CMD_OP_CREATE_SNIFFER_RULE           = 0x820,
+	MLX5_CMD_OP_DESTROY_SNIFFER_RULE          = 0x821,
+	MLX5_CMD_OP_QUERY_CONG_PARAMS             = 0x822,
+	MLX5_CMD_OP_MODIFY_CONG_PARAMS            = 0x823,
+	MLX5_CMD_OP_QUERY_CONG_STATISTICS         = 0x824,
+	MLX5_CMD_OP_CREATE_TIR                    = 0x900,
+	MLX5_CMD_OP_MODIFY_TIR                    = 0x901,
+	MLX5_CMD_OP_DESTROY_TIR                   = 0x902,
+	MLX5_CMD_OP_QUERY_TIR                     = 0x903,
+	MLX5_CMD_OP_CREATE_TIS                    = 0x912,
+	MLX5_CMD_OP_MODIFY_TIS                    = 0x913,
+	MLX5_CMD_OP_DESTROY_TIS                   = 0x914,
+	MLX5_CMD_OP_QUERY_TIS                     = 0x915,
+	MLX5_CMD_OP_CREATE_SQ                     = 0x904,
+	MLX5_CMD_OP_MODIFY_SQ                     = 0x905,
+	MLX5_CMD_OP_DESTROY_SQ                    = 0x906,
+	MLX5_CMD_OP_QUERY_SQ                      = 0x907,
+	MLX5_CMD_OP_CREATE_RQ                     = 0x908,
+	MLX5_CMD_OP_MODIFY_RQ                     = 0x909,
+	MLX5_CMD_OP_DESTROY_RQ                    = 0x90a,
+	MLX5_CMD_OP_QUERY_RQ                      = 0x90b,
+	MLX5_CMD_OP_CREATE_RMP                    = 0x90c,
+	MLX5_CMD_OP_MODIFY_RMP                    = 0x90d,
+	MLX5_CMD_OP_DESTROY_RMP                   = 0x90e,
+	MLX5_CMD_OP_QUERY_RMP                     = 0x90f,
+	MLX5_CMD_OP_SET_FLOW_TABLE_ENTRY          = 0x910,
+	MLX5_CMD_OP_QUERY_FLOW_TABLE_ENTRY        = 0x911,
+	MLX5_CMD_OP_MAX				  = 0x911
+};
+
+#endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From b775516b042f9e35f856bd2914afefd9d23021d7 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:44 +0300
Subject: net/mlx5_core: use set/get macros in device caps

Transform device capabilities related commands to use set/get macros to
manipulate command mailboxes.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |  17 ++
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 150 +++++++++---------
 include/linux/mlx5/device.h                    |  92 -----------
 include/linux/mlx5/driver.h                    |   1 +
 include/linux/mlx5/mlx5_ifc.h                  | 206 +++++++++++++++++++++++++
 5 files changed, 298 insertions(+), 168 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 3ecef1310bae..368c6c5ea014 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1537,3 +1537,20 @@ int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr)
 
 	return cmd_status_to_err(hdr->status);
 }
+
+int mlx5_cmd_status_to_err_v2(void *ptr)
+{
+	u32	syndrome;
+	u8	status;
+
+	status = be32_to_cpu(*(__be32 *)ptr) >> 24;
+	if (!status)
+		return 0;
+
+	syndrome = be32_to_cpu(*(__be32 *)(ptr + 4));
+
+	pr_warn("command failed, status %s(0x%x), syndrome 0x%x\n",
+		cmd_status_str(status), status, syndrome);
+
+	return cmd_status_to_err(status);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index d9f74618befa..b9e3259e415f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -43,6 +43,7 @@
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/srq.h>
 #include <linux/debugfs.h>
+#include <linux/mlx5/mlx5_ifc.h>
 #include "mlx5_core.h"
 
 #define DRIVER_NAME "mlx5_core"
@@ -277,18 +278,20 @@ static u16 to_fw_pkey_sz(u32 size)
 
 /* selectively copy writable fields clearing any reserved area
  */
-static void copy_rw_fields(struct mlx5_hca_cap *to, struct mlx5_general_caps *from)
+static void copy_rw_fields(void *to, struct mlx5_caps *from)
 {
+	__be64 *flags_off = (__be64 *)MLX5_ADDR_OF(cmd_hca_cap, to, reserved_22);
 	u64 v64;
 
-	to->log_max_qp = from->log_max_qp & 0x1f;
-	to->log_max_ra_req_dc = from->log_max_ra_req_dc & 0x3f;
-	to->log_max_ra_res_dc = from->log_max_ra_res_dc & 0x3f;
-	to->log_max_ra_req_qp = from->log_max_ra_req_qp & 0x3f;
-	to->log_max_ra_res_qp = from->log_max_ra_res_qp & 0x3f;
-	to->pkey_table_size = cpu_to_be16(to_fw_pkey_sz(from->pkey_table_size));
-	v64 = from->flags & MLX5_CAP_BITS_RW_MASK;
-	to->flags = cpu_to_be64(v64);
+	MLX5_SET(cmd_hca_cap, to, log_max_qp, from->gen.log_max_qp);
+	MLX5_SET(cmd_hca_cap, to, log_max_ra_req_qp, from->gen.log_max_ra_req_qp);
+	MLX5_SET(cmd_hca_cap, to, log_max_ra_res_qp, from->gen.log_max_ra_res_qp);
+	MLX5_SET(cmd_hca_cap, to, pkey_table_size, from->gen.pkey_table_size);
+	MLX5_SET(cmd_hca_cap, to, log_max_ra_req_dc, from->gen.log_max_ra_req_dc);
+	MLX5_SET(cmd_hca_cap, to, log_max_ra_res_dc, from->gen.log_max_ra_res_dc);
+	MLX5_SET(cmd_hca_cap, to, pkey_table_size, to_fw_pkey_sz(from->gen.pkey_table_size));
+	v64 = from->gen.flags & MLX5_CAP_BITS_RW_MASK;
+	*flags_off = cpu_to_be64(v64);
 }
 
 static u16 get_pkey_table_size(int pkey)
@@ -299,55 +302,47 @@ static u16 get_pkey_table_size(int pkey)
 	return MLX5_MIN_PKEY_TABLE_SIZE << pkey;
 }
 
-static void fw2drv_caps(struct mlx5_caps *caps,
-			struct mlx5_cmd_query_hca_cap_mbox_out *out)
+static void fw2drv_caps(struct mlx5_caps *caps, void *out)
 {
 	struct mlx5_general_caps *gen = &caps->gen;
-	u16 t16;
-
-	gen->max_srq_wqes = 1 << out->hca_cap.log_max_srq_sz;
-	gen->max_wqes = 1 << out->hca_cap.log_max_qp_sz;
-	gen->log_max_qp = out->hca_cap.log_max_qp & 0x1f;
-	gen->log_max_strq = out->hca_cap.log_max_strq_sz;
-	gen->log_max_srq = out->hca_cap.log_max_srqs & 0x1f;
-	gen->max_cqes = 1 << out->hca_cap.log_max_cq_sz;
-	gen->log_max_cq = out->hca_cap.log_max_cq & 0x1f;
-	gen->max_eqes = out->hca_cap.log_max_eq_sz;
-	gen->log_max_mkey = out->hca_cap.log_max_mkey & 0x3f;
-	gen->log_max_eq = out->hca_cap.log_max_eq & 0xf;
-	gen->max_indirection = out->hca_cap.max_indirection;
-	gen->log_max_mrw_sz = out->hca_cap.log_max_mrw_sz;
-	gen->log_max_bsf_list_size = 0;
-	gen->log_max_klm_list_size = 0;
-	gen->log_max_ra_req_dc = out->hca_cap.log_max_ra_req_dc;
-	gen->log_max_ra_res_dc = out->hca_cap.log_max_ra_res_dc;
-	gen->log_max_ra_req_qp = out->hca_cap.log_max_ra_req_qp;
-	gen->log_max_ra_res_qp = out->hca_cap.log_max_ra_res_qp;
-	gen->max_qp_counters = be16_to_cpu(out->hca_cap.max_qp_count);
-	gen->pkey_table_size = get_pkey_table_size(be16_to_cpu(out->hca_cap.pkey_table_size));
-	gen->local_ca_ack_delay = out->hca_cap.local_ca_ack_delay & 0x1f;
-	gen->num_ports = out->hca_cap.num_ports & 0xf;
-	gen->log_max_msg = out->hca_cap.log_max_msg & 0x1f;
-	gen->stat_rate_support = be16_to_cpu(out->hca_cap.stat_rate_support);
-	gen->flags = be64_to_cpu(out->hca_cap.flags);
-	pr_debug("flags = 0x%llx\n", gen->flags);
-	gen->uar_sz = out->hca_cap.uar_sz;
-	gen->min_log_pg_sz = out->hca_cap.log_pg_sz;
 
-	t16 = be16_to_cpu(out->hca_cap.bf_log_bf_reg_size);
-	if (t16 & 0x8000) {
-		gen->bf_reg_size = 1 << (t16 & 0x1f);
-		gen->bf_regs_per_page = MLX5_BF_REGS_PER_PAGE;
-	} else {
-		gen->bf_reg_size = 0;
-		gen->bf_regs_per_page = 0;
-	}
-	gen->max_sq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_sq);
-	gen->max_rq_desc_sz = be16_to_cpu(out->hca_cap.max_desc_sz_rq);
-	gen->max_qp_mcg = be32_to_cpu(out->hca_cap.max_qp_mcg) & 0xffffff;
-	gen->log_max_pd = out->hca_cap.log_max_pd & 0x1f;
-	gen->log_max_xrcd = out->hca_cap.log_max_xrcd;
-	gen->log_uar_page_sz = be16_to_cpu(out->hca_cap.log_uar_page_sz);
+	gen->max_srq_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_srq_sz);
+	gen->max_wqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_qp_sz);
+	gen->log_max_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_qp);
+	gen->log_max_strq = MLX5_GET_PR(cmd_hca_cap, out, log_max_strq_sz);
+	gen->log_max_srq = MLX5_GET_PR(cmd_hca_cap, out, log_max_srqs);
+	gen->max_cqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_cq_sz);
+	gen->log_max_cq = MLX5_GET_PR(cmd_hca_cap, out, log_max_cq);
+	gen->max_eqes = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_max_eq_sz);
+	gen->log_max_mkey = MLX5_GET_PR(cmd_hca_cap, out, log_max_mkey);
+	gen->log_max_eq = MLX5_GET_PR(cmd_hca_cap, out, log_max_eq);
+	gen->max_indirection = MLX5_GET_PR(cmd_hca_cap, out, max_indirection);
+	gen->log_max_mrw_sz = MLX5_GET_PR(cmd_hca_cap, out, log_max_mrw_sz);
+	gen->log_max_bsf_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_bsf_list_size);
+	gen->log_max_klm_list_size = MLX5_GET_PR(cmd_hca_cap, out, log_max_klm_list_size);
+	gen->log_max_ra_req_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_dc);
+	gen->log_max_ra_res_dc = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_dc);
+	gen->log_max_ra_req_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_req_qp);
+	gen->log_max_ra_res_qp = MLX5_GET_PR(cmd_hca_cap, out, log_max_ra_res_qp);
+	gen->max_qp_counters = MLX5_GET_PR(cmd_hca_cap, out, max_qp_cnt);
+	gen->pkey_table_size = get_pkey_table_size(MLX5_GET_PR(cmd_hca_cap, out, pkey_table_size));
+	gen->local_ca_ack_delay = MLX5_GET_PR(cmd_hca_cap, out, local_ca_ack_delay);
+	gen->num_ports = MLX5_GET_PR(cmd_hca_cap, out, num_ports);
+	gen->log_max_msg = MLX5_GET_PR(cmd_hca_cap, out, log_max_msg);
+	gen->stat_rate_support = MLX5_GET_PR(cmd_hca_cap, out, stat_rate_support);
+	gen->flags = be64_to_cpu(*(__be64 *)MLX5_ADDR_OF(cmd_hca_cap, out, reserved_22));
+	pr_debug("flags = 0x%llx\n", gen->flags);
+	gen->uar_sz = MLX5_GET_PR(cmd_hca_cap, out, uar_sz);
+	gen->min_log_pg_sz = MLX5_GET_PR(cmd_hca_cap, out, log_pg_sz);
+	gen->bf_reg_size = MLX5_GET_PR(cmd_hca_cap, out, bf);
+	gen->bf_reg_size = 1 << MLX5_GET_PR(cmd_hca_cap, out, log_bf_reg_size);
+	gen->max_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq);
+	gen->max_rq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_rq);
+	gen->max_dc_sq_desc_sz = MLX5_GET_PR(cmd_hca_cap, out, max_wqe_sz_sq_dc);
+	gen->max_qp_mcg = MLX5_GET_PR(cmd_hca_cap, out, max_qp_mcg);
+	gen->log_max_pd = MLX5_GET_PR(cmd_hca_cap, out, log_max_pd);
+	gen->log_max_xrcd = MLX5_GET_PR(cmd_hca_cap, out, log_max_xrcd);
+	gen->log_uar_page_sz = MLX5_GET_PR(cmd_hca_cap, out, log_uar_page_sz);
 }
 
 static const char *caps_opmod_str(u16 opmod)
@@ -365,59 +360,61 @@ static const char *caps_opmod_str(u16 opmod)
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
 		       u16 opmod)
 {
-	struct mlx5_cmd_query_hca_cap_mbox_out *out;
-	struct mlx5_cmd_query_hca_cap_mbox_in in;
+	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *out;
 	int err;
 
-	memset(&in, 0, sizeof(in));
-	out = kzalloc(sizeof(*out), GFP_KERNEL);
+	memset(in, 0, sizeof(in));
+	out = kzalloc(out_sz, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+	if (err)
+		goto query_ex;
 
-	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_QUERY_HCA_CAP);
-	in.hdr.opmod  = cpu_to_be16(opmod);
-	err = mlx5_cmd_exec(dev, &in, sizeof(in), out, sizeof(*out));
-
-	err = mlx5_cmd_status_to_err(&out->hdr);
+	err = mlx5_cmd_status_to_err_v2(out);
 	if (err) {
 		mlx5_core_warn(dev, "query max hca cap failed, %d\n", err);
 		goto query_ex;
 	}
 	mlx5_core_dbg(dev, "%s\n", caps_opmod_str(opmod));
-	fw2drv_caps(caps, out);
+	fw2drv_caps(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct));
 
 query_ex:
 	kfree(out);
 	return err;
 }
 
-static int set_caps(struct mlx5_core_dev *dev,
-		    struct mlx5_cmd_set_hca_cap_mbox_in *in)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz)
 {
-	struct mlx5_cmd_set_hca_cap_mbox_out out;
+	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)];
 	int err;
 
-	memset(&out, 0, sizeof(out));
+	memset(out, 0, sizeof(out));
 
-	in->hdr.opcode = cpu_to_be16(MLX5_CMD_OP_SET_HCA_CAP);
-	err = mlx5_cmd_exec(dev, in, sizeof(*in), &out, sizeof(out));
+	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
+	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
 	if (err)
 		return err;
 
-	err = mlx5_cmd_status_to_err(&out.hdr);
+	err = mlx5_cmd_status_to_err_v2(out);
 
 	return err;
 }
 
 static int handle_hca_cap(struct mlx5_core_dev *dev)
 {
-	struct mlx5_cmd_set_hca_cap_mbox_in *set_ctx = NULL;
+	void *set_ctx = NULL;
 	struct mlx5_profile *prof = dev->profile;
 	struct mlx5_caps *cur_caps = NULL;
 	struct mlx5_caps *max_caps = NULL;
 	int err = -ENOMEM;
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
 
-	set_ctx = kzalloc(sizeof(*set_ctx), GFP_KERNEL);
+	set_ctx = kzalloc(set_sz, GFP_KERNEL);
 	if (!set_ctx)
 		goto query_ex;
 
@@ -446,8 +443,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 	/* disable checksum */
 	cur_caps->gen.flags &= ~MLX5_DEV_CAP_FLAG_CMDIF_CSUM;
 
-	copy_rw_fields(&set_ctx->hca_cap, &cur_caps->gen);
-	err = set_caps(dev, set_ctx);
+	copy_rw_fields(MLX5_ADDR_OF(set_hca_cap_in, set_ctx, hca_capability_struct),
+		       cur_caps);
+	err = set_caps(dev, set_ctx, set_sz);
 
 query_ex:
 	kfree(cur_caps);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 0032687f58c7..1d67fd32e71c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -327,98 +327,6 @@ struct mlx5_cmd_query_adapter_mbox_out {
 	u8			vsd_psid[16];
 };
 
-struct mlx5_hca_cap {
-	u8	rsvd1[16];
-	u8	log_max_srq_sz;
-	u8	log_max_qp_sz;
-	u8	rsvd2;
-	u8	log_max_qp;
-	u8	log_max_strq_sz;
-	u8	log_max_srqs;
-	u8	rsvd4[2];
-	u8	rsvd5;
-	u8	log_max_cq_sz;
-	u8	rsvd6;
-	u8	log_max_cq;
-	u8	log_max_eq_sz;
-	u8	log_max_mkey;
-	u8	rsvd7;
-	u8	log_max_eq;
-	u8	max_indirection;
-	u8	log_max_mrw_sz;
-	u8	log_max_bsf_list_sz;
-	u8	log_max_klm_list_sz;
-	u8	rsvd_8_0;
-	u8	log_max_ra_req_dc;
-	u8	rsvd_8_1;
-	u8	log_max_ra_res_dc;
-	u8	rsvd9;
-	u8	log_max_ra_req_qp;
-	u8	rsvd10;
-	u8	log_max_ra_res_qp;
-	u8	pad_cap;
-	u8	rsvd11[3];
-	__be16	max_qp_count;
-	__be16	pkey_table_size;
-	u8	rsvd13;
-	u8	local_ca_ack_delay;
-	u8	rsvd14;
-	u8	num_ports;
-	u8	log_max_msg;
-	u8	rsvd15[3];
-	__be16	stat_rate_support;
-	u8	rsvd16[2];
-	__be64	flags;
-	u8	rsvd17;
-	u8	uar_sz;
-	u8	rsvd18;
-	u8	log_pg_sz;
-	__be16	bf_log_bf_reg_size;
-	u8	rsvd19[4];
-	__be16	max_desc_sz_sq;
-	u8	rsvd20[2];
-	__be16	max_desc_sz_rq;
-	u8	rsvd21[2];
-	__be16	max_desc_sz_sq_dc;
-	__be32	max_qp_mcg;
-	u8	rsvd22[3];
-	u8	log_max_mcg;
-	u8	rsvd23;
-	u8	log_max_pd;
-	u8	rsvd24;
-	u8	log_max_xrcd;
-	u8	rsvd25[42];
-	__be16  log_uar_page_sz;
-	u8	rsvd26[108];
-};
-
-
-struct mlx5_cmd_query_hca_cap_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-};
-
-
-struct mlx5_cmd_query_hca_cap_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-	struct mlx5_hca_cap     hca_cap;
-};
-
-
-struct mlx5_cmd_set_hca_cap_mbox_in {
-	struct mlx5_inbox_hdr	hdr;
-	u8			rsvd[8];
-	struct mlx5_hca_cap     hca_cap;
-};
-
-
-struct mlx5_cmd_set_hca_cap_mbox_out {
-	struct mlx5_outbox_hdr	hdr;
-	u8			rsvd0[8];
-};
-
-
 struct mlx5_cmd_init_hca_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	u8			rsvd0[2];
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6f48dc793b9f..c439f9c59b93 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -641,6 +641,7 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_events(struct mlx5_core_dev *dev);
 void mlx5_cmd_use_polling(struct mlx5_core_dev *dev);
 int mlx5_cmd_status_to_err(struct mlx5_outbox_hdr *hdr);
+int mlx5_cmd_status_to_err_v2(void *ptr);
 int mlx5_core_get_caps(struct mlx5_core_dev *dev, struct mlx5_caps *caps,
 		       u16 opmod);
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index df3bd9b5fbcf..5f48b8f592c5 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -140,4 +140,210 @@ enum {
 	MLX5_CMD_OP_MAX				  = 0x911
 };
 
+struct mlx5_ifc_cmd_hca_cap_bits {
+	u8         reserved_0[0x80];
+
+	u8         log_max_srq_sz[0x8];
+	u8         log_max_qp_sz[0x8];
+	u8         reserved_1[0xb];
+	u8         log_max_qp[0x5];
+
+	u8         log_max_strq_sz[0x8];
+	u8         reserved_2[0x3];
+	u8         log_max_srqs[0x5];
+	u8         reserved_3[0x10];
+
+	u8         reserved_4[0x8];
+	u8         log_max_cq_sz[0x8];
+	u8         reserved_5[0xb];
+	u8         log_max_cq[0x5];
+
+	u8         log_max_eq_sz[0x8];
+	u8         reserved_6[0x2];
+	u8         log_max_mkey[0x6];
+	u8         reserved_7[0xc];
+	u8         log_max_eq[0x4];
+
+	u8         max_indirection[0x8];
+	u8         reserved_8[0x1];
+	u8         log_max_mrw_sz[0x7];
+	u8         reserved_9[0x2];
+	u8         log_max_bsf_list_size[0x6];
+	u8         reserved_10[0x2];
+	u8         log_max_klm_list_size[0x6];
+
+	u8         reserved_11[0xa];
+	u8         log_max_ra_req_dc[0x6];
+	u8         reserved_12[0xa];
+	u8         log_max_ra_res_dc[0x6];
+
+	u8         reserved_13[0xa];
+	u8         log_max_ra_req_qp[0x6];
+	u8         reserved_14[0xa];
+	u8         log_max_ra_res_qp[0x6];
+
+	u8         pad_cap[0x1];
+	u8         cc_query_allowed[0x1];
+	u8         cc_modify_allowed[0x1];
+	u8         reserved_15[0x1d];
+
+	u8         reserved_16[0x6];
+	u8         max_qp_cnt[0xa];
+	u8         pkey_table_size[0x10];
+
+	u8         eswitch_owner[0x1];
+	u8         reserved_17[0xa];
+	u8         local_ca_ack_delay[0x5];
+	u8         reserved_18[0x8];
+	u8         num_ports[0x8];
+
+	u8         reserved_19[0x3];
+	u8         log_max_msg[0x5];
+	u8         reserved_20[0x18];
+
+	u8         stat_rate_support[0x10];
+	u8         reserved_21[0x10];
+
+	u8         reserved_22[0x10];
+	u8         cmdif_checksum[0x2];
+	u8         sigerr_cqe[0x1];
+	u8         reserved_23[0x1];
+	u8         wq_signature[0x1];
+	u8         sctr_data_cqe[0x1];
+	u8         reserved_24[0x1];
+	u8         sho[0x1];
+	u8         tph[0x1];
+	u8         rf[0x1];
+	u8         dc[0x1];
+	u8         reserved_25[0x2];
+	u8         roce[0x1];
+	u8         atomic[0x1];
+	u8         rsz_srq[0x1];
+
+	u8         cq_oi[0x1];
+	u8         cq_resize[0x1];
+	u8         cq_moderation[0x1];
+	u8         sniffer_rule_flow[0x1];
+	u8         sniffer_rule_vport[0x1];
+	u8         sniffer_rule_phy[0x1];
+	u8         reserved_26[0x1];
+	u8         pg[0x1];
+	u8         block_lb_mc[0x1];
+	u8         reserved_27[0x3];
+	u8         cd[0x1];
+	u8         reserved_28[0x1];
+	u8         apm[0x1];
+	u8         reserved_29[0x7];
+	u8         qkv[0x1];
+	u8         pkv[0x1];
+	u8         reserved_30[0x4];
+	u8         xrc[0x1];
+	u8         ud[0x1];
+	u8         uc[0x1];
+	u8         rc[0x1];
+
+	u8         reserved_31[0xa];
+	u8         uar_sz[0x6];
+	u8         reserved_32[0x8];
+	u8         log_pg_sz[0x8];
+
+	u8         bf[0x1];
+	u8         reserved_33[0xa];
+	u8         log_bf_reg_size[0x5];
+	u8         reserved_34[0x10];
+
+	u8         reserved_35[0x10];
+	u8         max_wqe_sz_sq[0x10];
+
+	u8         reserved_36[0x10];
+	u8         max_wqe_sz_rq[0x10];
+
+	u8         reserved_37[0x10];
+	u8         max_wqe_sz_sq_dc[0x10];
+
+	u8         reserved_38[0x7];
+	u8         max_qp_mcg[0x19];
+
+	u8         reserved_39[0x18];
+	u8         log_max_mcg[0x8];
+
+	u8         reserved_40[0xb];
+	u8         log_max_pd[0x5];
+	u8         reserved_41[0xb];
+	u8         log_max_xrcd[0x5];
+
+	u8         reserved_42[0x20];
+
+	u8         reserved_43[0x3];
+	u8         log_max_rq[0x5];
+	u8         reserved_44[0x3];
+	u8         log_max_sq[0x5];
+	u8         reserved_45[0x3];
+	u8         log_max_tir[0x5];
+	u8         reserved_46[0x3];
+	u8         log_max_tis[0x5];
+
+	u8         reserved_47[0x13];
+	u8         log_max_rq_per_tir[0x5];
+	u8         reserved_48[0x3];
+	u8         log_max_tis_per_sq[0x5];
+
+	u8         reserved_49[0xe0];
+
+	u8         reserved_50[0x10];
+	u8         log_uar_page_sz[0x10];
+
+	u8         reserved_51[0x100];
+
+	u8         reserved_52[0x1f];
+	u8         cqe_zip[0x1];
+
+	u8         cqe_zip_timeout[0x10];
+	u8         cqe_zip_max_num[0x10];
+
+	u8         reserved_53[0x220];
+};
+
+struct mlx5_ifc_set_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+
+	struct mlx5_ifc_cmd_hca_cap_bits hca_capability_struct;
+};
+
+struct mlx5_ifc_query_hca_cap_in_bits {
+	u8         opcode[0x10];
+	u8         reserved_0[0x10];
+
+	u8         reserved_1[0x10];
+	u8         op_mod[0x10];
+
+	u8         reserved_2[0x40];
+};
+
+struct mlx5_ifc_query_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+
+	u8         capability_struct[256][0x8];
+};
+
+struct mlx5_ifc_set_hca_cap_out_bits {
+	u8         status[0x8];
+	u8         reserved_0[0x18];
+
+	u8         syndrome[0x20];
+
+	u8         reserved_1[0x40];
+};
+
 #endif /* MLX5_IFC_H */
-- 
cgit v1.2.3


From 5903325a64834211daf63a62db3b35ee580cb8bf Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Thu, 2 Oct 2014 12:19:45 +0300
Subject: net/mlx5_core: Identify resources by their type

This patch puts a common part as the first field of mlx5_core_qp. This field is
used to identify which resource generated an event. This is required since upcoming
new resource types such as DC targets are allocated for the same numerical space
as regular QPs and may generate the same events. By searching the resource in the
same table we can then look at the common field to identify the resource.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 12 +++---
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 57 ++++++++++++++++++++--------
 include/linux/mlx5/driver.h                  | 13 ++++++-
 include/linux/mlx5/qp.h                      |  3 +-
 4 files changed, 60 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index 11b9b840ad4d..ed53291468f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -198,7 +198,7 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 	int eqes_found = 0;
 	int set_ci = 0;
 	u32 cqn;
-	u32 srqn;
+	u32 rsn;
 	u8 port;
 
 	while ((eqe = next_eqe_sw(eq))) {
@@ -224,18 +224,18 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 		case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
 		case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
 		case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+			rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
 			mlx5_core_dbg(dev, "event %s(%d) arrived\n",
 				      eqe_type_str(eqe->type), eqe->type);
-			mlx5_qp_event(dev, be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff,
-				      eqe->type);
+			mlx5_rsc_event(dev, rsn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_SRQ_RQ_LIMIT:
 		case MLX5_EVENT_TYPE_SRQ_CATAS_ERROR:
-			srqn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+			rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
 			mlx5_core_dbg(dev, "SRQ event %s(%d): srqn 0x%x\n",
-				      eqe_type_str(eqe->type), eqe->type, srqn);
-			mlx5_srq_event(dev, srqn, eqe->type);
+				      eqe_type_str(eqe->type), eqe->type, rsn);
+			mlx5_srq_event(dev, rsn, eqe->type);
 			break;
 
 		case MLX5_EVENT_TYPE_CMD:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 415b67ce379e..5261a2b0da43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -39,28 +39,53 @@
 
 #include "mlx5_core.h"
 
-void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type)
+static struct mlx5_core_rsc_common *mlx5_get_rsc(struct mlx5_core_dev *dev,
+						 u32 rsn)
 {
 	struct mlx5_qp_table *table = &dev->priv.qp_table;
-	struct mlx5_core_qp *qp;
+	struct mlx5_core_rsc_common *common;
 
 	spin_lock(&table->lock);
 
-	qp = radix_tree_lookup(&table->tree, qpn);
-	if (qp)
-		atomic_inc(&qp->refcount);
+	common = radix_tree_lookup(&table->tree, rsn);
+	if (common)
+		atomic_inc(&common->refcount);
 
 	spin_unlock(&table->lock);
 
-	if (!qp) {
-		mlx5_core_warn(dev, "Async event for bogus QP 0x%x\n", qpn);
-		return;
+	if (!common) {
+		mlx5_core_warn(dev, "Async event for bogus resource 0x%x\n",
+			       rsn);
+		return NULL;
 	}
+	return common;
+}
 
-	qp->event(qp, event_type);
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
+{
+	if (atomic_dec_and_test(&common->refcount))
+		complete(&common->free);
+}
+
+void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
+{
+	struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, rsn);
+	struct mlx5_core_qp *qp;
+
+	if (!common)
+		return;
+
+	switch (common->res) {
+	case MLX5_RES_QP:
+		qp = (struct mlx5_core_qp *)common;
+		qp->event(qp, event_type);
+		break;
+
+	default:
+		mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
+	}
 
-	if (atomic_dec_and_test(&qp->refcount))
-		complete(&qp->free);
+	mlx5_core_put_rsc(common);
 }
 
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
@@ -92,6 +117,7 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 	qp->qpn = be32_to_cpu(out.qpn) & 0xffffff;
 	mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
 
+	qp->common.res = MLX5_RES_QP;
 	spin_lock_irq(&table->lock);
 	err = radix_tree_insert(&table->tree, qp->qpn, qp);
 	spin_unlock_irq(&table->lock);
@@ -106,9 +132,9 @@ int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			      qp->qpn);
 
 	qp->pid = current->pid;
-	atomic_set(&qp->refcount, 1);
+	atomic_set(&qp->common.refcount, 1);
 	atomic_inc(&dev->num_qps);
-	init_completion(&qp->free);
+	init_completion(&qp->common.free);
 
 	return 0;
 
@@ -138,9 +164,8 @@ int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
 	radix_tree_delete(&table->tree, qp->qpn);
 	spin_unlock_irqrestore(&table->lock, flags);
 
-	if (atomic_dec_and_test(&qp->refcount))
-		complete(&qp->free);
-	wait_for_completion(&qp->free);
+	mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+	wait_for_completion(&qp->common.free);
 
 	memset(&in, 0, sizeof(in));
 	memset(&out, 0, sizeof(out));
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index c439f9c59b93..246310dc8bef 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -375,6 +375,16 @@ struct mlx5_core_mr {
 	u32			pd;
 };
 
+enum mlx5_res_type {
+	MLX5_RES_QP,
+};
+
+struct mlx5_core_rsc_common {
+	enum mlx5_res_type	res;
+	atomic_t		refcount;
+	struct completion	free;
+};
+
 struct mlx5_core_srq {
 	u32		srqn;
 	int		max;
@@ -700,7 +710,7 @@ int mlx5_eq_init(struct mlx5_core_dev *dev);
 void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
-void mlx5_qp_event(struct mlx5_core_dev *dev, u32 qpn, int event_type);
+void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -737,6 +747,7 @@ void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev);
 int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index);
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
 
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 9709b30e2d69..7c4c0f1f5805 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -342,10 +342,9 @@ struct mlx5_stride_block_ctrl_seg {
 };
 
 struct mlx5_core_qp {
+	struct mlx5_core_rsc_common	common; /* must be first */
 	void (*event)		(struct mlx5_core_qp *, int);
 	int			qpn;
-	atomic_t		refcount;
-	struct completion	free;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
 };
-- 
cgit v1.2.3


From efc98d08e1ec4fd131f794370b274dceaf32c958 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Fri, 3 Oct 2014 15:48:08 -0700
Subject: fou: eliminate IPv4,v6 specific GRO functions

This patch removes fou[46]_gro_receive and fou[46]_gro_complete
functions. The v4 or v6 variants were chosen for the UDP offloads
based on the address family of the socket this is not necessary
or correct. Alternatively, this patch adds is_ipv6 to napi_gro_skb.
This is set in udp6_gro_receive and unset in udp4_gro_receive. In
fou_gro_receive the value is used to select the correct inet_offloads
for the protocol of the outer IP header.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  3 +++
 net/ipv4/fou.c            | 48 ++++++++---------------------------------------
 net/ipv4/udp_offload.c    |  1 +
 net/ipv6/udp_offload.c    |  1 +
 4 files changed, 13 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 910fb17ad148..22d54b9b700d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1886,6 +1886,9 @@ struct napi_gro_cb {
 	/* Number of checksums via CHECKSUM_UNNECESSARY */
 	u8	csum_cnt:3;
 
+	/* Used in foo-over-udp, set in udp[46]_gro_receive */
+	u8	is_ipv6:1;
+
 	/* used to support CHECKSUM_COMPLETE for tunneling protocols */
 	__wsum	csum;
 
diff --git a/net/ipv4/fou.c b/net/ipv4/fou.c
index dced89fbe480..7e2126a31f2e 100644
--- a/net/ipv4/fou.c
+++ b/net/ipv4/fou.c
@@ -65,14 +65,15 @@ static int fou_udp_recv(struct sock *sk, struct sk_buff *skb)
 }
 
 static struct sk_buff **fou_gro_receive(struct sk_buff **head,
-					struct sk_buff *skb,
-					const struct net_offload **offloads)
+					struct sk_buff *skb)
 {
 	const struct net_offload *ops;
 	struct sk_buff **pp = NULL;
 	u8 proto = NAPI_GRO_CB(skb)->proto;
+	const struct net_offload **offloads;
 
 	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
 	if (!ops || !ops->callbacks.gro_receive)
 		goto out_unlock;
@@ -85,14 +86,15 @@ out_unlock:
 	return pp;
 }
 
-static int fou_gro_complete(struct sk_buff *skb, int nhoff,
-			    const struct net_offload **offloads)
+static int fou_gro_complete(struct sk_buff *skb, int nhoff)
 {
 	const struct net_offload *ops;
 	u8 proto = NAPI_GRO_CB(skb)->proto;
 	int err = -ENOSYS;
+	const struct net_offload **offloads;
 
 	rcu_read_lock();
+	offloads = NAPI_GRO_CB(skb)->is_ipv6 ? inet6_offloads : inet_offloads;
 	ops = rcu_dereference(offloads[proto]);
 	if (WARN_ON(!ops || !ops->callbacks.gro_complete))
 		goto out_unlock;
@@ -105,28 +107,6 @@ out_unlock:
 	return err;
 }
 
-static struct sk_buff **fou4_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
-{
-	return fou_gro_receive(head, skb, inet_offloads);
-}
-
-static int fou4_gro_complete(struct sk_buff *skb, int nhoff)
-{
-	return fou_gro_complete(skb, nhoff, inet_offloads);
-}
-
-static struct sk_buff **fou6_gro_receive(struct sk_buff **head,
-					 struct sk_buff *skb)
-{
-	return fou_gro_receive(head, skb, inet6_offloads);
-}
-
-static int fou6_gro_complete(struct sk_buff *skb, int nhoff)
-{
-	return fou_gro_complete(skb, nhoff, inet6_offloads);
-}
-
 static int fou_add_to_port_list(struct fou *fou)
 {
 	struct fou *fout;
@@ -199,20 +179,8 @@ static int fou_create(struct net *net, struct fou_cfg *cfg,
 
 	sk->sk_allocation = GFP_ATOMIC;
 
-	switch (cfg->udp_config.family) {
-	case AF_INET:
-		fou->udp_offloads.callbacks.gro_receive = fou4_gro_receive;
-		fou->udp_offloads.callbacks.gro_complete = fou4_gro_complete;
-		break;
-	case AF_INET6:
-		fou->udp_offloads.callbacks.gro_receive = fou6_gro_receive;
-		fou->udp_offloads.callbacks.gro_complete = fou6_gro_complete;
-		break;
-	default:
-		err = -EPFNOSUPPORT;
-		goto error;
-	}
-
+	fou->udp_offloads.callbacks.gro_receive = fou_gro_receive;
+	fou->udp_offloads.callbacks.gro_complete = fou_gro_complete;
 	fou->udp_offloads.port = cfg->udp_config.local_udp_port;
 	fou->udp_offloads.ipproto = cfg->protocol;
 
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 8c35f2c939ee..507310ef4b56 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -334,6 +334,7 @@ static struct sk_buff **udp4_gro_receive(struct sk_buff **head,
 		skb_gro_checksum_try_convert(skb, IPPROTO_UDP, uh->check,
 					     inet_gro_compute_pseudo);
 skip:
+	NAPI_GRO_CB(skb)->is_ipv6 = 0;
 	return udp_gro_receive(head, skb, uh);
 
 flush:
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 8f96988c1db2..6b8f543f6ac6 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -140,6 +140,7 @@ static struct sk_buff **udp6_gro_receive(struct sk_buff **head,
 					     ip6_gro_compute_pseudo);
 
 skip:
+	NAPI_GRO_CB(skb)->is_ipv6 = 1;
 	return udp_gro_receive(head, skb, uh);
 
 flush:
-- 
cgit v1.2.3


From a7ee8839daf21c4a3ca439733e7bed25f32e7954 Mon Sep 17 00:00:00 2001
From: Denis CIOCCA <denis.ciocca@st.com>
Date: Fri, 3 Oct 2014 17:35:35 +0200
Subject: iio:imu: changed structure name from st_sensors to st_sensor_settings

This patch change structure name and related variables names.

Signed-off-by: Denis Ciocca <denis.ciocca@st.com>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/accel/st_accel_core.c               |  13 +--
 drivers/iio/common/st_sensors/st_sensors_core.c | 126 +++++++++++++-----------
 drivers/iio/gyro/st_gyro_core.c                 |  13 +--
 drivers/iio/magnetometer/st_magn_core.c         |  13 +--
 drivers/iio/pressure/st_pressure_core.c         |  20 ++--
 include/linux/iio/common/st_sensors.h           |  10 +-
 6 files changed, 104 insertions(+), 91 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/accel/st_accel_core.c b/drivers/iio/accel/st_accel_core.c
index 087864854c61..7cbdfae03441 100644
--- a/drivers/iio/accel/st_accel_core.c
+++ b/drivers/iio/accel/st_accel_core.c
@@ -161,7 +161,7 @@ static const struct iio_chan_spec st_accel_16bit_channels[] = {
 	IIO_CHAN_SOFT_TIMESTAMP(3)
 };
 
-static const struct st_sensors st_accel_sensors[] = {
+static const struct st_sensor_settings st_accel_sensors_settings[] = {
 	{
 		.wai = ST_ACCEL_1_WAI_EXP,
 		.sensors_supported = {
@@ -470,18 +470,19 @@ int st_accel_common_probe(struct iio_dev *indio_dev,
 	st_sensors_power_enable(indio_dev);
 
 	err = st_sensors_check_device_support(indio_dev,
-				ARRAY_SIZE(st_accel_sensors), st_accel_sensors);
+					ARRAY_SIZE(st_accel_sensors_settings),
+					st_accel_sensors_settings);
 	if (err < 0)
 		return err;
 
 	adata->num_data_channels = ST_ACCEL_NUMBER_DATA_CHANNELS;
-	adata->multiread_bit = adata->sensor->multi_read_bit;
-	indio_dev->channels = adata->sensor->ch;
+	adata->multiread_bit = adata->sensor_settings->multi_read_bit;
+	indio_dev->channels = adata->sensor_settings->ch;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
 	adata->current_fullscale = (struct st_sensor_fullscale_avl *)
-						&adata->sensor->fs.fs_avl[0];
-	adata->odr = adata->sensor->odr.odr_avl[0].hz;
+					&adata->sensor_settings->fs.fs_avl[0];
+	adata->odr = adata->sensor_settings->odr.odr_avl[0].hz;
 
 	if (!plat_data)
 		plat_data =
diff --git a/drivers/iio/common/st_sensors/st_sensors_core.c b/drivers/iio/common/st_sensors/st_sensors_core.c
index 24cfe4e044f9..edd13d2b4121 100644
--- a/drivers/iio/common/st_sensors/st_sensors_core.c
+++ b/drivers/iio/common/st_sensors/st_sensors_core.c
@@ -44,18 +44,18 @@ st_sensors_write_data_with_mask_error:
 	return err;
 }
 
-static int st_sensors_match_odr(struct st_sensors *sensor,
+static int st_sensors_match_odr(struct st_sensor_settings *sensor_settings,
 			unsigned int odr, struct st_sensor_odr_avl *odr_out)
 {
 	int i, ret = -EINVAL;
 
 	for (i = 0; i < ST_SENSORS_ODR_LIST_MAX; i++) {
-		if (sensor->odr.odr_avl[i].hz == 0)
+		if (sensor_settings->odr.odr_avl[i].hz == 0)
 			goto st_sensors_match_odr_error;
 
-		if (sensor->odr.odr_avl[i].hz == odr) {
-			odr_out->hz = sensor->odr.odr_avl[i].hz;
-			odr_out->value = sensor->odr.odr_avl[i].value;
+		if (sensor_settings->odr.odr_avl[i].hz == odr) {
+			odr_out->hz = sensor_settings->odr.odr_avl[i].hz;
+			odr_out->value = sensor_settings->odr.odr_avl[i].value;
 			ret = 0;
 			break;
 		}
@@ -71,23 +71,26 @@ int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr)
 	struct st_sensor_odr_avl odr_out = {0, 0};
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
-	err = st_sensors_match_odr(sdata->sensor, odr, &odr_out);
+	err = st_sensors_match_odr(sdata->sensor_settings, odr, &odr_out);
 	if (err < 0)
 		goto st_sensors_match_odr_error;
 
-	if ((sdata->sensor->odr.addr == sdata->sensor->pw.addr) &&
-			(sdata->sensor->odr.mask == sdata->sensor->pw.mask)) {
+	if ((sdata->sensor_settings->odr.addr ==
+					sdata->sensor_settings->pw.addr) &&
+				(sdata->sensor_settings->odr.mask ==
+					sdata->sensor_settings->pw.mask)) {
 		if (sdata->enabled == true) {
 			err = st_sensors_write_data_with_mask(indio_dev,
-				sdata->sensor->odr.addr,
-				sdata->sensor->odr.mask,
+				sdata->sensor_settings->odr.addr,
+				sdata->sensor_settings->odr.mask,
 				odr_out.value);
 		} else {
 			err = 0;
 		}
 	} else {
 		err = st_sensors_write_data_with_mask(indio_dev,
-			sdata->sensor->odr.addr, sdata->sensor->odr.mask,
+			sdata->sensor_settings->odr.addr,
+			sdata->sensor_settings->odr.mask,
 			odr_out.value);
 	}
 	if (err >= 0)
@@ -98,16 +101,16 @@ st_sensors_match_odr_error:
 }
 EXPORT_SYMBOL(st_sensors_set_odr);
 
-static int st_sensors_match_fs(struct st_sensors *sensor,
+static int st_sensors_match_fs(struct st_sensor_settings *sensor_settings,
 					unsigned int fs, int *index_fs_avl)
 {
 	int i, ret = -EINVAL;
 
 	for (i = 0; i < ST_SENSORS_FULLSCALE_AVL_MAX; i++) {
-		if (sensor->fs.fs_avl[i].num == 0)
+		if (sensor_settings->fs.fs_avl[i].num == 0)
 			goto st_sensors_match_odr_error;
 
-		if (sensor->fs.fs_avl[i].num == fs) {
+		if (sensor_settings->fs.fs_avl[i].num == fs) {
 			*index_fs_avl = i;
 			ret = 0;
 			break;
@@ -118,25 +121,24 @@ st_sensors_match_odr_error:
 	return ret;
 }
 
-static int st_sensors_set_fullscale(struct iio_dev *indio_dev,
-								unsigned int fs)
+static int st_sensors_set_fullscale(struct iio_dev *indio_dev, unsigned int fs)
 {
 	int err, i = 0;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
-	err = st_sensors_match_fs(sdata->sensor, fs, &i);
+	err = st_sensors_match_fs(sdata->sensor_settings, fs, &i);
 	if (err < 0)
 		goto st_accel_set_fullscale_error;
 
 	err = st_sensors_write_data_with_mask(indio_dev,
-				sdata->sensor->fs.addr,
-				sdata->sensor->fs.mask,
-				sdata->sensor->fs.fs_avl[i].value);
+				sdata->sensor_settings->fs.addr,
+				sdata->sensor_settings->fs.mask,
+				sdata->sensor_settings->fs.fs_avl[i].value);
 	if (err < 0)
 		goto st_accel_set_fullscale_error;
 
 	sdata->current_fullscale = (struct st_sensor_fullscale_avl *)
-						&sdata->sensor->fs.fs_avl[i];
+					&sdata->sensor_settings->fs.fs_avl[i];
 	return err;
 
 st_accel_set_fullscale_error:
@@ -153,10 +155,12 @@ int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable)
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
 	if (enable) {
-		tmp_value = sdata->sensor->pw.value_on;
-		if ((sdata->sensor->odr.addr == sdata->sensor->pw.addr) &&
-			(sdata->sensor->odr.mask == sdata->sensor->pw.mask)) {
-			err = st_sensors_match_odr(sdata->sensor,
+		tmp_value = sdata->sensor_settings->pw.value_on;
+		if ((sdata->sensor_settings->odr.addr ==
+					sdata->sensor_settings->pw.addr) &&
+				(sdata->sensor_settings->odr.mask ==
+					sdata->sensor_settings->pw.mask)) {
+			err = st_sensors_match_odr(sdata->sensor_settings,
 							sdata->odr, &odr_out);
 			if (err < 0)
 				goto set_enable_error;
@@ -164,8 +168,8 @@ int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable)
 			found = true;
 		}
 		err = st_sensors_write_data_with_mask(indio_dev,
-				sdata->sensor->pw.addr,
-				sdata->sensor->pw.mask, tmp_value);
+				sdata->sensor_settings->pw.addr,
+				sdata->sensor_settings->pw.mask, tmp_value);
 		if (err < 0)
 			goto set_enable_error;
 
@@ -175,9 +179,9 @@ int st_sensors_set_enable(struct iio_dev *indio_dev, bool enable)
 			sdata->odr = odr_out.hz;
 	} else {
 		err = st_sensors_write_data_with_mask(indio_dev,
-				sdata->sensor->pw.addr,
-				sdata->sensor->pw.mask,
-				sdata->sensor->pw.value_off);
+				sdata->sensor_settings->pw.addr,
+				sdata->sensor_settings->pw.mask,
+				sdata->sensor_settings->pw.value_off);
 		if (err < 0)
 			goto set_enable_error;
 
@@ -194,8 +198,9 @@ int st_sensors_set_axis_enable(struct iio_dev *indio_dev, u8 axis_enable)
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
 	return st_sensors_write_data_with_mask(indio_dev,
-				sdata->sensor->enable_axis.addr,
-				sdata->sensor->enable_axis.mask, axis_enable);
+				sdata->sensor_settings->enable_axis.addr,
+				sdata->sensor_settings->enable_axis.mask,
+				axis_enable);
 }
 EXPORT_SYMBOL(st_sensors_set_axis_enable);
 
@@ -236,13 +241,13 @@ void st_sensors_power_disable(struct iio_dev *indio_dev)
 EXPORT_SYMBOL(st_sensors_power_disable);
 
 static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
-				       struct st_sensors_platform_data *pdata)
+					struct st_sensors_platform_data *pdata)
 {
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
 	switch (pdata->drdy_int_pin) {
 	case 1:
-		if (sdata->sensor->drdy_irq.mask_int1 == 0) {
+		if (sdata->sensor_settings->drdy_irq.mask_int1 == 0) {
 			dev_err(&indio_dev->dev,
 					"DRDY on INT1 not available.\n");
 			return -EINVAL;
@@ -250,7 +255,7 @@ static int st_sensors_set_drdy_int_pin(struct iio_dev *indio_dev,
 		sdata->drdy_int_pin = 1;
 		break;
 	case 2:
-		if (sdata->sensor->drdy_irq.mask_int2 == 0) {
+		if (sdata->sensor_settings->drdy_irq.mask_int2 == 0) {
 			dev_err(&indio_dev->dev,
 					"DRDY on INT2 not available.\n");
 			return -EINVAL;
@@ -318,7 +323,7 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
 
 	if (sdata->current_fullscale) {
 		err = st_sensors_set_fullscale(indio_dev,
-					       sdata->current_fullscale->num);
+						sdata->current_fullscale->num);
 		if (err < 0)
 			return err;
 	} else
@@ -330,7 +335,8 @@ int st_sensors_init_sensor(struct iio_dev *indio_dev,
 
 	/* set BDU */
 	err = st_sensors_write_data_with_mask(indio_dev,
-			sdata->sensor->bdu.addr, sdata->sensor->bdu.mask, true);
+					sdata->sensor_settings->bdu.addr,
+					sdata->sensor_settings->bdu.mask, true);
 	if (err < 0)
 		return err;
 
@@ -346,26 +352,28 @@ int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable)
 	u8 drdy_mask;
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
-	if (!sdata->sensor->drdy_irq.addr)
+	if (!sdata->sensor_settings->drdy_irq.addr)
 		return 0;
 
 	/* Enable/Disable the interrupt generator 1. */
-	if (sdata->sensor->drdy_irq.ig1.en_addr > 0) {
+	if (sdata->sensor_settings->drdy_irq.ig1.en_addr > 0) {
 		err = st_sensors_write_data_with_mask(indio_dev,
-			sdata->sensor->drdy_irq.ig1.en_addr,
-			sdata->sensor->drdy_irq.ig1.en_mask, (int)enable);
+				sdata->sensor_settings->drdy_irq.ig1.en_addr,
+				sdata->sensor_settings->drdy_irq.ig1.en_mask,
+				(int)enable);
 		if (err < 0)
 			goto st_accel_set_dataready_irq_error;
 	}
 
 	if (sdata->drdy_int_pin == 1)
-		drdy_mask = sdata->sensor->drdy_irq.mask_int1;
+		drdy_mask = sdata->sensor_settings->drdy_irq.mask_int1;
 	else
-		drdy_mask = sdata->sensor->drdy_irq.mask_int2;
+		drdy_mask = sdata->sensor_settings->drdy_irq.mask_int2;
 
 	/* Enable/Disable the interrupt generator for data ready. */
 	err = st_sensors_write_data_with_mask(indio_dev,
-			sdata->sensor->drdy_irq.addr, drdy_mask, (int)enable);
+					sdata->sensor_settings->drdy_irq.addr,
+					drdy_mask, (int)enable);
 
 st_accel_set_dataready_irq_error:
 	return err;
@@ -378,8 +386,8 @@ int st_sensors_set_fullscale_by_gain(struct iio_dev *indio_dev, int scale)
 	struct st_sensor_data *sdata = iio_priv(indio_dev);
 
 	for (i = 0; i < ST_SENSORS_FULLSCALE_AVL_MAX; i++) {
-		if ((sdata->sensor->fs.fs_avl[i].gain == scale) &&
-				(sdata->sensor->fs.fs_avl[i].gain != 0)) {
+		if ((sdata->sensor_settings->fs.fs_avl[i].gain == scale) &&
+				(sdata->sensor_settings->fs.fs_avl[i].gain != 0)) {
 			err = 0;
 			break;
 		}
@@ -388,7 +396,7 @@ int st_sensors_set_fullscale_by_gain(struct iio_dev *indio_dev, int scale)
 		goto st_sensors_match_scale_error;
 
 	err = st_sensors_set_fullscale(indio_dev,
-					sdata->sensor->fs.fs_avl[i].num);
+				sdata->sensor_settings->fs.fs_avl[i].num);
 
 st_sensors_match_scale_error:
 	return err;
@@ -439,7 +447,7 @@ int st_sensors_read_info_raw(struct iio_dev *indio_dev,
 		if (err < 0)
 			goto out;
 
-		msleep((sdata->sensor->bootime * 1000) / sdata->odr);
+		msleep((sdata->sensor_settings->bootime * 1000) / sdata->odr);
 		err = st_sensors_read_axis_data(indio_dev, ch, val);
 		if (err < 0)
 			goto out;
@@ -456,7 +464,8 @@ out:
 EXPORT_SYMBOL(st_sensors_read_info_raw);
 
 int st_sensors_check_device_support(struct iio_dev *indio_dev,
-			int num_sensors_list, const struct st_sensors *sensors)
+			int num_sensors_list,
+			const struct st_sensor_settings *sensor_settings)
 {
 	u8 wai;
 	int i, n, err;
@@ -470,23 +479,24 @@ int st_sensors_check_device_support(struct iio_dev *indio_dev,
 	}
 
 	for (i = 0; i < num_sensors_list; i++) {
-		if (sensors[i].wai == wai)
+		if (sensor_settings[i].wai == wai)
 			break;
 	}
 	if (i == num_sensors_list)
 		goto device_not_supported;
 
-	for (n = 0; n < ARRAY_SIZE(sensors[i].sensors_supported); n++) {
+	for (n = 0; n < ARRAY_SIZE(sensor_settings[i].sensors_supported); n++) {
 		if (strcmp(indio_dev->name,
-				&sensors[i].sensors_supported[n][0]) == 0)
+				&sensor_settings[i].sensors_supported[n][0]) == 0)
 			break;
 	}
-	if (n == ARRAY_SIZE(sensors[i].sensors_supported)) {
+	if (n == ARRAY_SIZE(sensor_settings[i].sensors_supported)) {
 		dev_err(&indio_dev->dev, "device name and WhoAmI mismatch.\n");
 		goto sensor_name_mismatch;
 	}
 
-	sdata->sensor = (struct st_sensors *)&sensors[i];
+	sdata->sensor_settings =
+			(struct st_sensor_settings *)&sensor_settings[i];
 
 	return i;
 
@@ -508,11 +518,11 @@ ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
 
 	mutex_lock(&indio_dev->mlock);
 	for (i = 0; i < ST_SENSORS_ODR_LIST_MAX; i++) {
-		if (sdata->sensor->odr.odr_avl[i].hz == 0)
+		if (sdata->sensor_settings->odr.odr_avl[i].hz == 0)
 			break;
 
 		len += scnprintf(buf + len, PAGE_SIZE - len, "%d ",
-					sdata->sensor->odr.odr_avl[i].hz);
+				sdata->sensor_settings->odr.odr_avl[i].hz);
 	}
 	mutex_unlock(&indio_dev->mlock);
 	buf[len - 1] = '\n';
@@ -530,11 +540,11 @@ ssize_t st_sensors_sysfs_scale_avail(struct device *dev,
 
 	mutex_lock(&indio_dev->mlock);
 	for (i = 0; i < ST_SENSORS_FULLSCALE_AVL_MAX; i++) {
-		if (sdata->sensor->fs.fs_avl[i].num == 0)
+		if (sdata->sensor_settings->fs.fs_avl[i].num == 0)
 			break;
 
 		len += scnprintf(buf + len, PAGE_SIZE - len, "0.%06u ",
-					sdata->sensor->fs.fs_avl[i].gain);
+				sdata->sensor_settings->fs.fs_avl[i].gain);
 	}
 	mutex_unlock(&indio_dev->mlock);
 	buf[len - 1] = '\n';
diff --git a/drivers/iio/gyro/st_gyro_core.c b/drivers/iio/gyro/st_gyro_core.c
index f156fc6c5c6c..9d41b68d24b2 100644
--- a/drivers/iio/gyro/st_gyro_core.c
+++ b/drivers/iio/gyro/st_gyro_core.c
@@ -103,7 +103,7 @@ static const struct iio_chan_spec st_gyro_16bit_channels[] = {
 	IIO_CHAN_SOFT_TIMESTAMP(3)
 };
 
-static const struct st_sensors st_gyro_sensors[] = {
+static const struct st_sensor_settings st_gyro_sensors_settings[] = {
 	{
 		.wai = ST_GYRO_1_WAI_EXP,
 		.sensors_supported = {
@@ -322,18 +322,19 @@ int st_gyro_common_probe(struct iio_dev *indio_dev,
 	st_sensors_power_enable(indio_dev);
 
 	err = st_sensors_check_device_support(indio_dev,
-				ARRAY_SIZE(st_gyro_sensors), st_gyro_sensors);
+					ARRAY_SIZE(st_gyro_sensors_settings),
+					st_gyro_sensors_settings);
 	if (err < 0)
 		return err;
 
 	gdata->num_data_channels = ST_GYRO_NUMBER_DATA_CHANNELS;
-	gdata->multiread_bit = gdata->sensor->multi_read_bit;
-	indio_dev->channels = gdata->sensor->ch;
+	gdata->multiread_bit = gdata->sensor_settings->multi_read_bit;
+	indio_dev->channels = gdata->sensor_settings->ch;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
 	gdata->current_fullscale = (struct st_sensor_fullscale_avl *)
-						&gdata->sensor->fs.fs_avl[0];
-	gdata->odr = gdata->sensor->odr.odr_avl[0].hz;
+					&gdata->sensor_settings->fs.fs_avl[0];
+	gdata->odr = gdata->sensor_settings->odr.odr_avl[0].hz;
 
 	err = st_sensors_init_sensor(indio_dev, pdata);
 	if (err < 0)
diff --git a/drivers/iio/magnetometer/st_magn_core.c b/drivers/iio/magnetometer/st_magn_core.c
index a4b64130ac2f..b65405fada45 100644
--- a/drivers/iio/magnetometer/st_magn_core.c
+++ b/drivers/iio/magnetometer/st_magn_core.c
@@ -146,7 +146,7 @@ static const struct iio_chan_spec st_magn_2_16bit_channels[] = {
 	IIO_CHAN_SOFT_TIMESTAMP(3)
 };
 
-static const struct st_sensors st_magn_sensors[] = {
+static const struct st_sensor_settings st_magn_sensors_settings[] = {
 	{
 		.wai = ST_MAGN_1_WAI_EXP,
 		.sensors_supported = {
@@ -366,18 +366,19 @@ int st_magn_common_probe(struct iio_dev *indio_dev,
 	st_sensors_power_enable(indio_dev);
 
 	err = st_sensors_check_device_support(indio_dev,
-				ARRAY_SIZE(st_magn_sensors), st_magn_sensors);
+					ARRAY_SIZE(st_magn_sensors_settings),
+					st_magn_sensors_settings);
 	if (err < 0)
 		return err;
 
 	mdata->num_data_channels = ST_MAGN_NUMBER_DATA_CHANNELS;
-	mdata->multiread_bit = mdata->sensor->multi_read_bit;
-	indio_dev->channels = mdata->sensor->ch;
+	mdata->multiread_bit = mdata->sensor_settings->multi_read_bit;
+	indio_dev->channels = mdata->sensor_settings->ch;
 	indio_dev->num_channels = ST_SENSORS_NUMBER_ALL_CHANNELS;
 
 	mdata->current_fullscale = (struct st_sensor_fullscale_avl *)
-						&mdata->sensor->fs.fs_avl[0];
-	mdata->odr = mdata->sensor->odr.odr_avl[0].hz;
+					&mdata->sensor_settings->fs.fs_avl[0];
+	mdata->odr = mdata->sensor_settings->odr.odr_avl[0].hz;
 
 	err = st_sensors_init_sensor(indio_dev, pdata);
 	if (err < 0)
diff --git a/drivers/iio/pressure/st_pressure_core.c b/drivers/iio/pressure/st_pressure_core.c
index 473d914ef470..9563118c4d36 100644
--- a/drivers/iio/pressure/st_pressure_core.c
+++ b/drivers/iio/pressure/st_pressure_core.c
@@ -175,7 +175,7 @@ static const struct iio_chan_spec st_press_lps001wp_channels[] = {
 	IIO_CHAN_SOFT_TIMESTAMP(1)
 };
 
-static const struct st_sensors st_press_sensors[] = {
+static const struct st_sensor_settings st_press_sensors_settings[] = {
 	{
 		.wai = ST_PRESS_LPS331AP_WAI_EXP,
 		.sensors_supported = {
@@ -422,24 +422,24 @@ int st_press_common_probe(struct iio_dev *indio_dev,
 	st_sensors_power_enable(indio_dev);
 
 	err = st_sensors_check_device_support(indio_dev,
-					      ARRAY_SIZE(st_press_sensors),
-					      st_press_sensors);
+					ARRAY_SIZE(st_press_sensors_settings),
+					st_press_sensors_settings);
 	if (err < 0)
 		return err;
 
 	pdata->num_data_channels = ST_PRESS_NUMBER_DATA_CHANNELS;
-	pdata->multiread_bit     = pdata->sensor->multi_read_bit;
-	indio_dev->channels      = pdata->sensor->ch;
-	indio_dev->num_channels  = pdata->sensor->num_ch;
+	pdata->multiread_bit = pdata->sensor_settings->multi_read_bit;
+	indio_dev->channels = pdata->sensor_settings->ch;
+	indio_dev->num_channels = pdata->sensor_settings->num_ch;
 
-	if (pdata->sensor->fs.addr != 0)
+	if (pdata->sensor_settings->fs.addr != 0)
 		pdata->current_fullscale = (struct st_sensor_fullscale_avl *)
-			&pdata->sensor->fs.fs_avl[0];
+			&pdata->sensor_settings->fs.fs_avl[0];
 
-	pdata->odr = pdata->sensor->odr.odr_avl[0].hz;
+	pdata->odr = pdata->sensor_settings->odr.odr_avl[0].hz;
 
 	/* Some devices don't support a data ready pin. */
-	if (!plat_data && pdata->sensor->drdy_irq.addr)
+	if (!plat_data && pdata->sensor_settings->drdy_irq.addr)
 		plat_data =
 			(struct st_sensors_platform_data *)&default_press_pdata;
 
diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h
index d8257ab60bac..2c476acb87d9 100644
--- a/include/linux/iio/common/st_sensors.h
+++ b/include/linux/iio/common/st_sensors.h
@@ -164,7 +164,7 @@ struct st_sensor_transfer_function {
 };
 
 /**
- * struct st_sensors - ST sensors list
+ * struct st_sensor_settings - ST specific sensor settings
  * @wai: Contents of WhoAmI register.
  * @sensors_supported: List of supported sensors by struct itself.
  * @ch: IIO channels for the sensor.
@@ -177,7 +177,7 @@ struct st_sensor_transfer_function {
  * @multi_read_bit: Use or not particular bit for [I2C/SPI] multi-read.
  * @bootime: samples to discard when sensor passing from power-down to power-up.
  */
-struct st_sensors {
+struct st_sensor_settings {
 	u8 wai;
 	char sensors_supported[ST_SENSORS_MAX_4WAI][ST_SENSORS_MAX_NAME];
 	struct iio_chan_spec *ch;
@@ -196,7 +196,7 @@ struct st_sensors {
  * struct st_sensor_data - ST sensor device status
  * @dev: Pointer to instance of struct device (I2C or SPI).
  * @trig: The trigger in use by the core driver.
- * @sensor: Pointer to the current sensor struct in use.
+ * @sensor_settings: Pointer to the specific sensor settings in use.
  * @current_fullscale: Maximum range of measure by the sensor.
  * @vdd: Pointer to sensor's Vdd power supply
  * @vdd_io: Pointer to sensor's Vdd-IO power supply
@@ -213,7 +213,7 @@ struct st_sensors {
 struct st_sensor_data {
 	struct device *dev;
 	struct iio_trigger *trig;
-	struct st_sensors *sensor;
+	struct st_sensor_settings *sensor_settings;
 	struct st_sensor_fullscale_avl *current_fullscale;
 	struct regulator *vdd;
 	struct regulator *vdd_io;
@@ -279,7 +279,7 @@ int st_sensors_read_info_raw(struct iio_dev *indio_dev,
 				struct iio_chan_spec const *ch, int *val);
 
 int st_sensors_check_device_support(struct iio_dev *indio_dev,
-			int num_sensors_list, const struct st_sensors *sensors);
+	int num_sensors_list, const struct st_sensor_settings *sensor_settings);
 
 ssize_t st_sensors_sysfs_sampling_frequency_avail(struct device *dev,
 				struct device_attribute *attr, char *buf);
-- 
cgit v1.2.3


From 7941b27b16e3282f6ec8817e36492f1deec753a7 Mon Sep 17 00:00:00 2001
From: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Date: Fri, 4 Jul 2014 19:59:20 +0300
Subject: of: Introduce Device Tree resolve support.

Introduce support for dynamic device tree resolution.
Using it, it is possible to prepare a device tree that's
been loaded on runtime to be modified and inserted at the kernel
live tree.

Export of of_resolve and bug fix of double free by
	Guenter Roeck <groeck@juniper.net>

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
[grant.likely: Don't need to select CONFIG_OF_DYNAMIC and CONFIG_OF_DEVICE]
[grant.likely: Don't need to depend on OF or !SPARC]
[grant.likely: Factor out duplicate code blocks into single function]
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 .../devicetree/dynamic-resolution-notes.txt        |  25 ++
 drivers/of/Kconfig                                 |   3 +
 drivers/of/Makefile                                |   1 +
 drivers/of/resolver.c                              | 336 +++++++++++++++++++++
 include/linux/of.h                                 |   3 +
 5 files changed, 368 insertions(+)
 create mode 100644 Documentation/devicetree/dynamic-resolution-notes.txt
 create mode 100644 drivers/of/resolver.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/dynamic-resolution-notes.txt b/Documentation/devicetree/dynamic-resolution-notes.txt
new file mode 100644
index 000000000000..083d23262abe
--- /dev/null
+++ b/Documentation/devicetree/dynamic-resolution-notes.txt
@@ -0,0 +1,25 @@
+Device Tree Dynamic Resolver Notes
+----------------------------------
+
+This document describes the implementation of the in-kernel
+Device Tree resolver, residing in drivers/of/resolver.c and is a
+companion document to Documentation/devicetree/dt-object-internal.txt[1]
+
+How the resolver works
+----------------------
+
+The resolver is given as an input an arbitrary tree compiled with the
+proper dtc option and having a /plugin/ tag. This generates the
+appropriate __fixups__ & __local_fixups__ nodes as described in [1].
+
+In sequence the resolver works by the following steps:
+
+1. Get the maximum device tree phandle value from the live tree + 1.
+2. Adjust all the local phandles of the tree to resolve by that amount.
+3. Using the __local__fixups__ node information adjust all local references
+   by the same amount.
+4. For each property in the __fixups__ node locate the node it references
+   in the live tree. This is the label used to tag the node.
+5. Retrieve the phandle of the target of the fixup.
+6. For each fixup in the property locate the node:property:offset location
+   and replace it with the phandle value.
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index 5160c4eb73c2..6b81a36f6420 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -79,4 +79,7 @@ config OF_RESERVED_MEM
 	help
 	  Helpers to allow for reservation of memory regions
 
+config OF_RESOLVE
+	bool
+
 endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index 2b6a7b129d10..ca9209ce50cd 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -13,6 +13,7 @@ obj-$(CONFIG_OF_PCI)	+= of_pci.o
 obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)	+= of_mtd.o
 obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
+obj-$(CONFIG_OF_RESOLVE)  += resolver.o
 
 CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt
 CFLAGS_fdt_address.o = -I$(src)/../../scripts/dtc/libfdt
diff --git a/drivers/of/resolver.c b/drivers/of/resolver.c
new file mode 100644
index 000000000000..aed7959f800d
--- /dev/null
+++ b/drivers/of/resolver.c
@@ -0,0 +1,336 @@
+/*
+ * Functions for dealing with DT resolution
+ *
+ * Copyright (C) 2012 Pantelis Antoniou <panto@antoniou-consulting.com>
+ * Copyright (C) 2012 Texas Instruments Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+
+/* illegal phandle value (set when unresolved) */
+#define OF_PHANDLE_ILLEGAL	0xdeadbeef
+
+/**
+ * Find a node with the give full name by recursively following any of
+ * the child node links.
+ */
+static struct device_node *__of_find_node_by_full_name(struct device_node *node,
+		const char *full_name)
+{
+	struct device_node *child, *found;
+
+	if (node == NULL)
+		return NULL;
+
+	/* check */
+	if (of_node_cmp(node->full_name, full_name) == 0)
+		return node;
+
+	for_each_child_of_node(node, child) {
+		found = __of_find_node_by_full_name(child, full_name);
+		if (found != NULL)
+			return found;
+	}
+
+	return NULL;
+}
+
+/*
+ * Find live tree's maximum phandle value.
+ */
+static phandle of_get_tree_max_phandle(void)
+{
+	struct device_node *node;
+	phandle phandle;
+	unsigned long flags;
+
+	/* now search recursively */
+	raw_spin_lock_irqsave(&devtree_lock, flags);
+	phandle = 0;
+	for_each_of_allnodes(node) {
+		if (node->phandle != OF_PHANDLE_ILLEGAL &&
+				node->phandle > phandle)
+			phandle = node->phandle;
+	}
+	raw_spin_unlock_irqrestore(&devtree_lock, flags);
+
+	return phandle;
+}
+
+/*
+ * Adjust a subtree's phandle values by a given delta.
+ * Makes sure not to just adjust the device node's phandle value,
+ * but modify the phandle properties values as well.
+ */
+static void __of_adjust_tree_phandles(struct device_node *node,
+		int phandle_delta)
+{
+	struct device_node *child;
+	struct property *prop;
+	phandle phandle;
+
+	/* first adjust the node's phandle direct value */
+	if (node->phandle != 0 && node->phandle != OF_PHANDLE_ILLEGAL)
+		node->phandle += phandle_delta;
+
+	/* now adjust phandle & linux,phandle values */
+	for_each_property_of_node(node, prop) {
+
+		/* only look for these two */
+		if (of_prop_cmp(prop->name, "phandle") != 0 &&
+		    of_prop_cmp(prop->name, "linux,phandle") != 0)
+			continue;
+
+		/* must be big enough */
+		if (prop->length < 4)
+			continue;
+
+		/* read phandle value */
+		phandle = be32_to_cpup(prop->value);
+		if (phandle == OF_PHANDLE_ILLEGAL)	/* unresolved */
+			continue;
+
+		/* adjust */
+		*(uint32_t *)prop->value = cpu_to_be32(node->phandle);
+	}
+
+	/* now do the children recursively */
+	for_each_child_of_node(node, child)
+		__of_adjust_tree_phandles(child, phandle_delta);
+}
+
+static int __of_adjust_phandle_ref(struct device_node *node, struct property *rprop, int value, bool is_delta)
+{
+	phandle phandle;
+	struct device_node *refnode;
+	struct property *sprop;
+	char *propval, *propcur, *propend, *nodestr, *propstr, *s;
+	int offset, propcurlen;
+	int err = 0;
+
+	/* make a copy */
+	propval = kmalloc(rprop->length, GFP_KERNEL);
+	if (!propval) {
+		pr_err("%s: Could not copy value of '%s'\n",
+				__func__, rprop->name);
+		return -ENOMEM;
+	}
+	memcpy(propval, rprop->value, rprop->length);
+
+	propend = propval + rprop->length;
+	for (propcur = propval; propcur < propend; propcur += propcurlen + 1) {
+		propcurlen = strlen(propcur);
+
+		nodestr = propcur;
+		s = strchr(propcur, ':');
+		if (!s) {
+			pr_err("%s: Illegal symbol entry '%s' (1)\n",
+				__func__, propcur);
+			err = -EINVAL;
+			goto err_fail;
+		}
+		*s++ = '\0';
+
+		propstr = s;
+		s = strchr(s, ':');
+		if (!s) {
+			pr_err("%s: Illegal symbol entry '%s' (2)\n",
+				__func__, (char *)rprop->value);
+			err = -EINVAL;
+			goto err_fail;
+		}
+
+		*s++ = '\0';
+		err = kstrtoint(s, 10, &offset);
+		if (err != 0) {
+			pr_err("%s: Could get offset '%s'\n",
+				__func__, (char *)rprop->value);
+			goto err_fail;
+		}
+
+		/* look into the resolve node for the full path */
+		refnode = __of_find_node_by_full_name(node, nodestr);
+		if (!refnode) {
+			pr_warn("%s: Could not find refnode '%s'\n",
+				__func__, (char *)rprop->value);
+			continue;
+		}
+
+		/* now find the property */
+		for_each_property_of_node(refnode, sprop) {
+			if (of_prop_cmp(sprop->name, propstr) == 0)
+				break;
+		}
+
+		if (!sprop) {
+			pr_err("%s: Could not find property '%s'\n",
+				__func__, (char *)rprop->value);
+			err = -ENOENT;
+			goto err_fail;
+		}
+
+		phandle = is_delta ? be32_to_cpup(sprop->value + offset) + value : value;
+		*(__be32 *)(sprop->value + offset) = cpu_to_be32(phandle);
+	}
+
+err_fail:
+	kfree(propval);
+	return err;
+}
+
+/*
+ * Adjust the local phandle references by the given phandle delta.
+ * Assumes the existances of a __local_fixups__ node at the root
+ * of the tree. Does not take any devtree locks so make sure you
+ * call this on a tree which is at the detached state.
+ */
+static int __of_adjust_tree_phandle_references(struct device_node *node,
+		int phandle_delta)
+{
+	struct device_node *child;
+	struct property *rprop;
+	int err;
+
+	/* locate the symbols & fixups nodes on resolve */
+	for_each_child_of_node(node, child)
+		if (of_node_cmp(child->name, "__local_fixups__") == 0)
+			break;
+
+	/* no local fixups */
+	if (!child)
+		return 0;
+
+	/* find the local fixups property */
+	for_each_property_of_node(child, rprop) {
+		/* skip properties added automatically */
+		if (of_prop_cmp(rprop->name, "name") == 0)
+			continue;
+
+		err = __of_adjust_phandle_ref(node, rprop, phandle_delta, true);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+/**
+ * of_resolve	- Resolve the given node against the live tree.
+ *
+ * @resolve:	Node to resolve
+ *
+ * Perform dynamic Device Tree resolution against the live tree
+ * to the given node to resolve. This depends on the live tree
+ * having a __symbols__ node, and the resolve node the __fixups__ &
+ * __local_fixups__ nodes (if needed).
+ * The result of the operation is a resolve node that it's contents
+ * are fit to be inserted or operate upon the live tree.
+ * Returns 0 on success or a negative error value on error.
+ */
+int of_resolve_phandles(struct device_node *resolve)
+{
+	struct device_node *child, *refnode;
+	struct device_node *root_sym, *resolve_sym, *resolve_fix;
+	struct property *rprop;
+	const char *refpath;
+	phandle phandle, phandle_delta;
+	int err;
+
+	/* the resolve node must exist, and be detached */
+	if (!resolve || !of_node_check_flag(resolve, OF_DETACHED))
+		return -EINVAL;
+
+	/* first we need to adjust the phandles */
+	phandle_delta = of_get_tree_max_phandle() + 1;
+	__of_adjust_tree_phandles(resolve, phandle_delta);
+	err = __of_adjust_tree_phandle_references(resolve, phandle_delta);
+	if (err != 0)
+		return err;
+
+	root_sym = NULL;
+	resolve_sym = NULL;
+	resolve_fix = NULL;
+
+	/* this may fail (if no fixups are required) */
+	root_sym = of_find_node_by_path("/__symbols__");
+
+	/* locate the symbols & fixups nodes on resolve */
+	for_each_child_of_node(resolve, child) {
+
+		if (!resolve_sym &&
+				of_node_cmp(child->name, "__symbols__") == 0)
+			resolve_sym = child;
+
+		if (!resolve_fix &&
+				of_node_cmp(child->name, "__fixups__") == 0)
+			resolve_fix = child;
+
+		/* both found, don't bother anymore */
+		if (resolve_sym && resolve_fix)
+			break;
+	}
+
+	/* we do allow for the case where no fixups are needed */
+	if (!resolve_fix) {
+		err = 0;	/* no error */
+		goto out;
+	}
+
+	/* we need to fixup, but no root symbols... */
+	if (!root_sym) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	for_each_property_of_node(resolve_fix, rprop) {
+
+		/* skip properties added automatically */
+		if (of_prop_cmp(rprop->name, "name") == 0)
+			continue;
+
+		err = of_property_read_string(root_sym,
+				rprop->name, &refpath);
+		if (err != 0) {
+			pr_err("%s: Could not find symbol '%s'\n",
+					__func__, rprop->name);
+			goto out;
+		}
+
+		refnode = of_find_node_by_path(refpath);
+		if (!refnode) {
+			pr_err("%s: Could not find node by path '%s'\n",
+					__func__, refpath);
+			err = -ENOENT;
+			goto out;
+		}
+
+		phandle = refnode->phandle;
+		of_node_put(refnode);
+
+		pr_debug("%s: %s phandle is 0x%08x\n",
+				__func__, rprop->name, phandle);
+
+		err = __of_adjust_phandle_ref(resolve, rprop, phandle, false);
+		if (err)
+			break;
+	}
+
+out:
+	/* NULL is handled by of_node_put as NOP */
+	of_node_put(root_sym);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(of_resolve_phandles);
diff --git a/include/linux/of.h b/include/linux/of.h
index 6c4363b8ddc3..6545e7aec7bb 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -863,4 +863,7 @@ static inline int of_changeset_update_property(struct of_changeset *ocs,
 }
 #endif
 
+/* CONFIG_OF_RESOLVE api */
+extern int of_resolve_phandles(struct device_node *tree);
+
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From c8753d55afb436fd6a25c8bbe8d783f6dcf1c9f8 Mon Sep 17 00:00:00 2001
From: Vijay Subramanian <subramanian.vijay@gmail.com>
Date: Thu, 2 Oct 2014 10:00:43 -0700
Subject: net: Cleanup skb cloning by adding SKB_FCLONE_FREE

SKB_FCLONE_UNAVAILABLE has overloaded meaning depending on type of skb.
1: If skb is allocated from head_cache, it indicates fclone is not available.
2: If skb is a companion fclone skb (allocated from fclone_cache), it indicates
it is available to be used.

To avoid confusion for case 2 above, this patch  replaces
SKB_FCLONE_UNAVAILABLE with SKB_FCLONE_FREE where appropriate. For fclone
companion skbs, this indicates it is free for use.

SKB_FCLONE_UNAVAILABLE will now simply indicate skb is from head_cache and
cannot / will not have a companion fclone.

Signed-off-by: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++++---
 net/core/skbuff.c      | 8 ++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7c5036d11feb..3a5ec7638627 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -339,9 +339,10 @@ struct skb_shared_info {
 
 
 enum {
-	SKB_FCLONE_UNAVAILABLE,
-	SKB_FCLONE_ORIG,
-	SKB_FCLONE_CLONE,
+	SKB_FCLONE_UNAVAILABLE,	/* skb has no fclone (from head_cache) */
+	SKB_FCLONE_ORIG,	/* orig skb (from fclone_cache) */
+	SKB_FCLONE_CLONE,	/* companion fclone skb (from fclone_cache) */
+	SKB_FCLONE_FREE,	/* this companion fclone skb is available */
 };
 
 enum {
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index a0b312fa3047..28916e47f959 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -265,7 +265,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		skb->fclone = SKB_FCLONE_ORIG;
 		atomic_set(&fclones->fclone_ref, 1);
 
-		fclones->skb2.fclone = SKB_FCLONE_UNAVAILABLE;
+		fclones->skb2.fclone = SKB_FCLONE_FREE;
 		fclones->skb2.pfmemalloc = pfmemalloc;
 	}
 out:
@@ -542,7 +542,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 
 		/* Warning : We must perform the atomic_dec_and_test() before
-		 * setting skb->fclone back to SKB_FCLONE_UNAVAILABLE, otherwise
+		 * setting skb->fclone back to SKB_FCLONE_FREE, otherwise
 		 * skb_clone() could set clone_ref to 2 before our decrement.
 		 * Anyway, if we are going to free the structure, no need to
 		 * rewrite skb->fclone.
@@ -553,7 +553,7 @@ static void kfree_skbmem(struct sk_buff *skb)
 			/* The clone portion is available for
 			 * fast-cloning again.
 			 */
-			skb->fclone = SKB_FCLONE_UNAVAILABLE;
+			skb->fclone = SKB_FCLONE_FREE;
 		}
 		break;
 	}
@@ -874,7 +874,7 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 		return NULL;
 
 	if (skb->fclone == SKB_FCLONE_ORIG &&
-	    n->fclone == SKB_FCLONE_UNAVAILABLE) {
+	    n->fclone == SKB_FCLONE_FREE) {
 		n->fclone = SKB_FCLONE_CLONE;
 		/* As our fastclone was free, clone_ref must be 1 at this point.
 		 * We could use atomic_inc() here, but it is faster
-- 
cgit v1.2.3


From 7dfa4b414d4eec8da56e44fb2b4aea3e549b092f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 12:35:09 +0300
Subject: net/mlx4_en: Code cleanups in tx path

- Remove unused variable ring->poll_cnt
- No need to set some fields if using blueflame
- Add missing const's
- Use unlikely
- Remove unneeded new line
- Make some comments more precise
- struct mlx4_bf @offset field reduced to unsigned int to save space

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c   | 49 +++++++++++++++-------------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h |  1 -
 include/linux/mlx4/device.h                  |  2 +-
 3 files changed, 27 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 0c501253fdab..eaf23eb7266a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -191,7 +191,6 @@ int mlx4_en_activate_tx_ring(struct mlx4_en_priv *priv,
 	ring->prod = 0;
 	ring->cons = 0xffffffff;
 	ring->last_nr_txbb = 1;
-	ring->poll_cnt = 0;
 	memset(ring->tx_info, 0, ring->size * sizeof(struct mlx4_en_tx_info));
 	memset(ring->buf, 0, ring->buf_size);
 
@@ -512,7 +511,8 @@ static struct mlx4_en_tx_desc *mlx4_en_bounce_to_desc(struct mlx4_en_priv *priv,
 	return ring->buf + index * TXBB_SIZE;
 }
 
-static int is_inline(int inline_thold, struct sk_buff *skb, void **pfrag)
+static bool is_inline(int inline_thold, const struct sk_buff *skb,
+		      void **pfrag)
 {
 	void *ptr;
 
@@ -535,7 +535,7 @@ static int is_inline(int inline_thold, struct sk_buff *skb, void **pfrag)
 	return 0;
 }
 
-static int inline_size(struct sk_buff *skb)
+static int inline_size(const struct sk_buff *skb)
 {
 	if (skb->len + CTRL_SIZE + sizeof(struct mlx4_wqe_inline_seg)
 	    <= MLX4_INLINE_ALIGN)
@@ -546,7 +546,8 @@ static int inline_size(struct sk_buff *skb)
 			     sizeof(struct mlx4_wqe_inline_seg), 16);
 }
 
-static int get_real_size(struct sk_buff *skb, struct net_device *dev,
+static int get_real_size(const struct sk_buff *skb,
+			 struct net_device *dev,
 			 int *lso_header_size)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -581,8 +582,10 @@ static int get_real_size(struct sk_buff *skb, struct net_device *dev,
 	return real_size;
 }
 
-static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc, struct sk_buff *skb,
-			     int real_size, u16 *vlan_tag, int tx_ind, void *fragptr)
+static void build_inline_wqe(struct mlx4_en_tx_desc *tx_desc,
+			     const struct sk_buff *skb,
+			     int real_size, u16 *vlan_tag,
+			     int tx_ind, void *fragptr)
 {
 	struct mlx4_wqe_inline_seg *inl = &tx_desc->inl;
 	int spc = MLX4_INLINE_ALIGN - CTRL_SIZE - sizeof *inl;
@@ -642,7 +645,8 @@ u16 mlx4_en_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return fallback(dev, skb) % rings_p_up + up * rings_p_up;
 }
 
-static void mlx4_bf_copy(void __iomem *dst, unsigned long *src, unsigned bytecnt)
+static void mlx4_bf_copy(void __iomem *dst, const void *src,
+			 unsigned int bytecnt)
 {
 	__iowrite64_copy(dst, src, bytecnt / 8);
 }
@@ -736,11 +740,10 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	tx_info->skb = skb;
 	tx_info->nr_txbb = nr_txbb;
 
+	data = &tx_desc->data;
 	if (lso_header_size)
 		data = ((void *)&tx_desc->lso + ALIGN(lso_header_size + 4,
 						      DS_SIZE));
-	else
-		data = &tx_desc->data;
 
 	/* valid only for none inline segments */
 	tx_info->data_offset = (void *)data - (void *)tx_desc;
@@ -753,9 +756,9 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (is_inline(ring->inline_thold, skb, &fragptr)) {
 		tx_info->inl = 1;
 	} else {
-		/* Map fragments */
+		/* Map fragments if any */
 		for (i = skb_shinfo(skb)->nr_frags - 1; i >= 0; i--) {
-			struct skb_frag_struct *frag;
+			const struct skb_frag_struct *frag;
 			dma_addr_t dma;
 
 			frag = &skb_shinfo(skb)->frags[i];
@@ -772,7 +775,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 			--data;
 		}
 
-		/* Map linear part */
+		/* Map linear part if needed */
 		if (tx_info->linear) {
 			u32 byte_count = skb_headlen(skb) - lso_header_size;
 			dma_addr_t dma;
@@ -795,18 +798,14 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	 * For timestamping add flag to skb_shinfo and
 	 * set flag for further reference
 	 */
-	if (ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
-	    skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) {
-		skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+	if (unlikely(ring->hwtstamp_tx_type == HWTSTAMP_TX_ON &&
+		     shinfo->tx_flags & SKBTX_HW_TSTAMP)) {
+		shinfo->tx_flags |= SKBTX_IN_PROGRESS;
 		tx_info->ts_requested = 1;
 	}
 
 	/* Prepare ctrl segement apart opcode+ownership, which depends on
 	 * whether LSO is used */
-	tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
-	tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN *
-		!!vlan_tx_tag_present(skb);
-	tx_desc->ctrl.fence_size = (real_size / 16) & 0x3f;
 	tx_desc->ctrl.srcrb_flags = priv->ctrl_flags;
 	if (likely(skb->ip_summed == CHECKSUM_PARTIAL)) {
 		tx_desc->ctrl.srcrb_flags |= cpu_to_be32(MLX4_WQE_CTRL_IP_CSUM |
@@ -852,7 +851,6 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 			 cpu_to_be32(MLX4_EN_BIT_DESC_OWN) : 0);
 		tx_info->nr_bytes = max_t(unsigned int, skb->len, ETH_ZLEN);
 		ring->packets++;
-
 	}
 	ring->bytes += tx_info->nr_bytes;
 	netdev_tx_sent_queue(ring->tx_queue, tx_info->nr_bytes);
@@ -874,7 +872,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 	ring->prod += nr_txbb;
 
 	/* If we used a bounce buffer then copy descriptor back into place */
-	if (bounce)
+	if (unlikely(bounce))
 		tx_desc = mlx4_en_bounce_to_desc(priv, ring, index, desc_size);
 
 	skb_tx_timestamp(skb);
@@ -894,13 +892,18 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 
 		wmb();
 
-		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, (unsigned long *) &tx_desc->ctrl,
-		     desc_size);
+		mlx4_bf_copy(ring->bf.reg + ring->bf.offset, &tx_desc->ctrl,
+			     desc_size);
 
 		wmb();
 
 		ring->bf.offset ^= ring->bf.buf_size;
 	} else {
+		tx_desc->ctrl.vlan_tag = cpu_to_be16(vlan_tag);
+		tx_desc->ctrl.ins_vlan = MLX4_WQE_CTRL_INS_VLAN *
+			!!vlan_tx_tag_present(skb);
+		tx_desc->ctrl.fence_size = real_size;
+
 		/* Ensure new descriptor hits memory
 		 * before setting ownership of this descriptor to HW
 		 */
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 84c9d5dbfa4f..e54b653de3d4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -263,7 +263,6 @@ struct mlx4_en_tx_ring {
 	u32 buf_size;
 	u32 doorbell_qpn;
 	void *buf;
-	u16 poll_cnt;
 	struct mlx4_en_tx_info *tx_info;
 	u8 *bounce_buf;
 	u8 queue_index;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index b2f8ab9a57c4..37e4404d0227 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -583,7 +583,7 @@ struct mlx4_uar {
 };
 
 struct mlx4_bf {
-	unsigned long		offset;
+	unsigned int		offset;
 	int			buf_size;
 	struct mlx4_uar	       *uar;
 	void __iomem	       *reg;
-- 
cgit v1.2.3


From a6551a76fff15056fde2342d0f7de41ee605264e Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Thu, 18 Sep 2014 17:18:56 +0200
Subject: mfd: cros_ec: stop calling ->cmd_xfer() directly

Instead of having users of the ChromeOS EC call the interface-specific
cmd_xfer() callback directly, introduce a central cros_ec_cmd_xfer()
to use instead.  This will allow us to put all the locking and retry
logic in one place instead of duplicating it across the different
drivers.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Reviewed-by: Simon Glass <sjg@chromium.org>
Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/i2c/busses/i2c-cros-ec-tunnel.c |  2 +-
 drivers/input/keyboard/cros_ec_keyb.c   |  2 +-
 drivers/mfd/cros_ec.c                   |  7 +++++++
 include/linux/mfd/cros_ec.h             | 24 ++++++++++++++++++------
 4 files changed, 27 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/i2c/busses/i2c-cros-ec-tunnel.c b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
index 05e033c98115..43be23dfb115 100644
--- a/drivers/i2c/busses/i2c-cros-ec-tunnel.c
+++ b/drivers/i2c/busses/i2c-cros-ec-tunnel.c
@@ -227,7 +227,7 @@ static int ec_i2c_xfer(struct i2c_adapter *adap, struct i2c_msg i2c_msgs[],
 	msg.indata = response;
 	msg.insize = response_len;
 
-	result = bus->ec->cmd_xfer(bus->ec, &msg);
+	result = cros_ec_cmd_xfer(bus->ec, &msg);
 	if (result < 0)
 		goto exit;
 
diff --git a/drivers/input/keyboard/cros_ec_keyb.c b/drivers/input/keyboard/cros_ec_keyb.c
index 791781ade4e7..93111d1aa617 100644
--- a/drivers/input/keyboard/cros_ec_keyb.c
+++ b/drivers/input/keyboard/cros_ec_keyb.c
@@ -182,7 +182,7 @@ static int cros_ec_keyb_get_state(struct cros_ec_keyb *ckdev, uint8_t *kb_state)
 		.insize = ckdev->cols,
 	};
 
-	return ckdev->ec->cmd_xfer(ckdev->ec, &msg);
+	return cros_ec_cmd_xfer(ckdev->ec, &msg);
 }
 
 static irqreturn_t cros_ec_keyb_irq(int irq, void *data)
diff --git a/drivers/mfd/cros_ec.c b/drivers/mfd/cros_ec.c
index 4873f9c50452..a9faebdcfa14 100644
--- a/drivers/mfd/cros_ec.c
+++ b/drivers/mfd/cros_ec.c
@@ -62,6 +62,13 @@ int cros_ec_check_result(struct cros_ec_device *ec_dev,
 }
 EXPORT_SYMBOL(cros_ec_check_result);
 
+int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
+		     struct cros_ec_command *msg)
+{
+	return ec_dev->cmd_xfer(ec_dev, msg);
+}
+EXPORT_SYMBOL(cros_ec_cmd_xfer);
+
 static const struct mfd_cell cros_devs[] = {
 	{
 		.name = "cros-ec-keyb",
diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h
index fcbe9d129a9d..0e166b92f5b4 100644
--- a/include/linux/mfd/cros_ec.h
+++ b/include/linux/mfd/cros_ec.h
@@ -62,10 +62,6 @@ struct cros_ec_command {
  * @dev: Device pointer
  * @was_wake_device: true if this device was set to wake the system from
  * sleep at the last suspend
- * @cmd_xfer: send command to EC and get response
- *     Returns the number of bytes received if the communication succeeded, but
- *     that doesn't mean the EC was happy with the command. The caller
- *     should check msg.result for the EC's result code.
  *
  * @priv: Private data
  * @irq: Interrupt to use
@@ -82,6 +78,10 @@ struct cros_ec_command {
  * @dout_size: size of dout buffer to allocate (zero to use static dout)
  * @parent: pointer to parent device (e.g. i2c or spi device)
  * @wake_enabled: true if this device can wake the system from sleep
+ * @cmd_xfer: send command to EC and get response
+ *     Returns the number of bytes received if the communication succeeded, but
+ *     that doesn't mean the EC was happy with the command. The caller
+ *     should check msg.result for the EC's result code.
  * @lock: one transaction at a time
  */
 struct cros_ec_device {
@@ -92,8 +92,6 @@ struct cros_ec_device {
 	struct device *dev;
 	bool was_wake_device;
 	struct class *cros_class;
-	int (*cmd_xfer)(struct cros_ec_device *ec,
-			struct cros_ec_command *msg);
 
 	/* These are used to implement the platform-specific interface */
 	void *priv;
@@ -104,6 +102,8 @@ struct cros_ec_device {
 	int dout_size;
 	struct device *parent;
 	bool wake_enabled;
+	int (*cmd_xfer)(struct cros_ec_device *ec,
+			struct cros_ec_command *msg);
 	struct mutex lock;
 };
 
@@ -152,6 +152,18 @@ int cros_ec_prepare_tx(struct cros_ec_device *ec_dev,
 int cros_ec_check_result(struct cros_ec_device *ec_dev,
 			 struct cros_ec_command *msg);
 
+/**
+ * cros_ec_cmd_xfer - Send a command to the ChromeOS EC
+ *
+ * Call this to send a command to the ChromeOS EC.  This should be used
+ * instead of calling the EC's cmd_xfer() callback directly.
+ *
+ * @ec_dev: EC device
+ * @msg: Message to write
+ */
+int cros_ec_cmd_xfer(struct cros_ec_device *ec_dev,
+		     struct cros_ec_command *msg);
+
 /**
  * cros_ec_remove - Remove a ChromeOS EC
  *
-- 
cgit v1.2.3


From fcbeb976d7ce783fd58e63e61c196d9a8912b3be Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 10:11:27 -0700
Subject: net: introduce netdevice gso_min_segs attribute

Some TSO engines might have a too heavy setup cost, that impacts
performance on hosts sending small bursts (2 MSS per packet).

This patch adds a device gso_min_segs, allowing drivers to set
a minimum segment size for TSO packets, according to the NIC
performance.

Tested on a mlx4 NIC, this allows to get a ~110% increase of
throughput when sending 2 MSS per packet.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 4 +++-
 net/core/dev.c            | 9 ++++++---
 2 files changed, 9 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 22d54b9b700d..2df86f50261c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1416,6 +1416,8 @@ enum netdev_priv_flags {
  *	@gso_max_size:	Maximum size of generic segmentation offload
  *	@gso_max_segs:	Maximum number of segments that can be passed to the
  *			NIC for GSO
+ *	@gso_min_segs:	Minimum number of segments that can be passed to the
+ *			NIC for GSO
  *
  *	@dcbnl_ops:	Data Center Bridging netlink ops
  *	@num_tc:	Number of traffic classes in the net device
@@ -1666,7 +1668,7 @@ struct net_device {
 	unsigned int		gso_max_size;
 #define GSO_MAX_SEGS		65535
 	u16			gso_max_segs;
-
+	u16			gso_min_segs;
 #ifdef CONFIG_DCB
 	const struct dcbnl_rtnl_ops *dcbnl_ops;
 #endif
diff --git a/net/core/dev.c b/net/core/dev.c
index 7d5691cc1f47..c1a4de275576 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2567,10 +2567,12 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
 
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
+	const struct net_device *dev = skb->dev;
+	netdev_features_t features = dev->features;
+	u16 gso_segs = skb_shinfo(skb)->gso_segs;
 	__be16 protocol = skb->protocol;
-	netdev_features_t features = skb->dev->features;
 
-	if (skb_shinfo(skb)->gso_segs > skb->dev->gso_max_segs)
+	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
 	if (protocol == htons(ETH_P_8021Q) || protocol == htons(ETH_P_8021AD)) {
@@ -2581,7 +2583,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	}
 
 	features = netdev_intersect_features(features,
-					     skb->dev->vlan_features |
+					     dev->vlan_features |
 					     NETIF_F_HW_VLAN_CTAG_TX |
 					     NETIF_F_HW_VLAN_STAG_TX);
 
@@ -6661,6 +6663,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 	dev->gso_max_segs = GSO_MAX_SEGS;
+	dev->gso_min_segs = 0;
 
 	INIT_LIST_HEAD(&dev->napi_list);
 	INIT_LIST_HEAD(&dev->unreg_list);
-- 
cgit v1.2.3


From af7e9069543aabd415d7c543f3f89b143ac1a932 Mon Sep 17 00:00:00 2001
From: Jacob Pan <jacob.jun.pan@linux.intel.com>
Date: Mon, 6 Oct 2014 21:17:14 -0700
Subject: mfd: axp20x: Extend axp20x to support axp288 pmic

X-Powers AXP288 is a customized PMIC for Intel Baytrail-CR platforms. Similar
to AXP202/209, AXP288 comes with USB charger, more LDO and BUCK channels, and
AD converters. It also provides extended status and interrupt reporting
capabilities than the devices currently supported in axp20x.c.

In addition to feature extension, this patch also adds ACPI binding for
enumeration.

This consolidated driver should support more X-Powers' PMICs in both device
tree and ACPI enumerated platforms.

Signed-off-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Reviewed-by: Maxime Ripard <maxime.ripard@free-electrons.com>
Reviewed-by: Jonathan Cameron <jic23@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig        |   3 +-
 drivers/mfd/axp20x.c       | 361 ++++++++++++++++++++++++++++++++++++++-------
 include/linux/mfd/axp20x.h |  59 ++++++++
 3 files changed, 367 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index de5abf244746..c183edb45b52 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -74,7 +74,8 @@ config MFD_AXP20X
 	select REGMAP_IRQ
 	depends on I2C=y
 	help
-	  If you say Y here you get support for the X-Powers AXP202 and AXP209.
+	  If you say Y here you get support for the X-Powers AXP202, AXP209 and
+	  AXP288 power management IC (PMIC).
 	  This driver include only the core APIs. You have to select individual
 	  components like regulators or the PEK (Power Enable Key) under the
 	  corresponding menus.
diff --git a/drivers/mfd/axp20x.c b/drivers/mfd/axp20x.c
index dee653989e3a..b2fb7f492c86 100644
--- a/drivers/mfd/axp20x.c
+++ b/drivers/mfd/axp20x.c
@@ -1,9 +1,9 @@
 /*
- * axp20x.c - MFD core driver for the X-Powers AXP202 and AXP209
+ * axp20x.c - MFD core driver for the X-Powers' Power Management ICs
  *
- * AXP20x comprises an adaptive USB-Compatible PWM charger, 2 BUCK DC-DC
- * converters, 5 LDOs, multiple 12-bit ADCs of voltage, current and temperature
- * as well as 4 configurable GPIOs.
+ * AXP20x typically comprises an adaptive USB-Compatible PWM charger, BUCK DC-DC
+ * converters, LDOs, multiple 12-bit ADCs of voltage, current and temperature
+ * as well as configurable GPIOs.
  *
  * Author: Carlo Caione <carlo@caione.org>
  *
@@ -25,9 +25,16 @@
 #include <linux/mfd/core.h>
 #include <linux/of_device.h>
 #include <linux/of_irq.h>
+#include <linux/acpi.h>
 
 #define AXP20X_OFF	0x80
 
+static const char const *axp20x_model_names[] = {
+	"AXP202",
+	"AXP209",
+	"AXP288",
+};
+
 static const struct regmap_range axp20x_writeable_ranges[] = {
 	regmap_reg_range(AXP20X_DATACACHE(0), AXP20X_IRQ5_STATE),
 	regmap_reg_range(AXP20X_DCDC_MODE, AXP20X_FG_RES),
@@ -47,6 +54,25 @@ static const struct regmap_access_table axp20x_volatile_table = {
 	.n_yes_ranges	= ARRAY_SIZE(axp20x_volatile_ranges),
 };
 
+static const struct regmap_range axp288_writeable_ranges[] = {
+	regmap_reg_range(AXP20X_DATACACHE(0), AXP20X_IRQ6_STATE),
+	regmap_reg_range(AXP20X_DCDC_MODE, AXP288_FG_TUNE5),
+};
+
+static const struct regmap_range axp288_volatile_ranges[] = {
+	regmap_reg_range(AXP20X_IRQ1_EN, AXP20X_IPSOUT_V_HIGH_L),
+};
+
+static const struct regmap_access_table axp288_writeable_table = {
+	.yes_ranges	= axp288_writeable_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(axp288_writeable_ranges),
+};
+
+static const struct regmap_access_table axp288_volatile_table = {
+	.yes_ranges	= axp288_volatile_ranges,
+	.n_yes_ranges	= ARRAY_SIZE(axp288_volatile_ranges),
+};
+
 static struct resource axp20x_pek_resources[] = {
 	{
 		.name	= "PEK_DBR",
@@ -61,6 +87,39 @@ static struct resource axp20x_pek_resources[] = {
 	},
 };
 
+static struct resource axp288_battery_resources[] = {
+	{
+		.start = AXP288_IRQ_QWBTU,
+		.end   = AXP288_IRQ_QWBTU,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_WBTU,
+		.end   = AXP288_IRQ_WBTU,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_QWBTO,
+		.end   = AXP288_IRQ_QWBTO,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_WBTO,
+		.end   = AXP288_IRQ_WBTO,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_WL2,
+		.end   = AXP288_IRQ_WL2,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_WL1,
+		.end   = AXP288_IRQ_WL1,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
 static const struct regmap_config axp20x_regmap_config = {
 	.reg_bits	= 8,
 	.val_bits	= 8,
@@ -70,47 +129,96 @@ static const struct regmap_config axp20x_regmap_config = {
 	.cache_type	= REGCACHE_RBTREE,
 };
 
-#define AXP20X_IRQ(_irq, _off, _mask) \
-	[AXP20X_IRQ_##_irq] = { .reg_offset = (_off), .mask = BIT(_mask) }
+static const struct regmap_config axp288_regmap_config = {
+	.reg_bits	= 8,
+	.val_bits	= 8,
+	.wr_table	= &axp288_writeable_table,
+	.volatile_table	= &axp288_volatile_table,
+	.max_register	= AXP288_FG_TUNE5,
+	.cache_type	= REGCACHE_RBTREE,
+};
+
+#define INIT_REGMAP_IRQ(_variant, _irq, _off, _mask)			\
+	[_variant##_IRQ_##_irq] = { .reg_offset = (_off), .mask = BIT(_mask) }
 
 static const struct regmap_irq axp20x_regmap_irqs[] = {
-	AXP20X_IRQ(ACIN_OVER_V,		0, 7),
-	AXP20X_IRQ(ACIN_PLUGIN,		0, 6),
-	AXP20X_IRQ(ACIN_REMOVAL,	0, 5),
-	AXP20X_IRQ(VBUS_OVER_V,		0, 4),
-	AXP20X_IRQ(VBUS_PLUGIN,		0, 3),
-	AXP20X_IRQ(VBUS_REMOVAL,	0, 2),
-	AXP20X_IRQ(VBUS_V_LOW,		0, 1),
-	AXP20X_IRQ(BATT_PLUGIN,		1, 7),
-	AXP20X_IRQ(BATT_REMOVAL,	1, 6),
-	AXP20X_IRQ(BATT_ENT_ACT_MODE,	1, 5),
-	AXP20X_IRQ(BATT_EXIT_ACT_MODE,	1, 4),
-	AXP20X_IRQ(CHARG,		1, 3),
-	AXP20X_IRQ(CHARG_DONE,		1, 2),
-	AXP20X_IRQ(BATT_TEMP_HIGH,	1, 1),
-	AXP20X_IRQ(BATT_TEMP_LOW,	1, 0),
-	AXP20X_IRQ(DIE_TEMP_HIGH,	2, 7),
-	AXP20X_IRQ(CHARG_I_LOW,		2, 6),
-	AXP20X_IRQ(DCDC1_V_LONG,	2, 5),
-	AXP20X_IRQ(DCDC2_V_LONG,	2, 4),
-	AXP20X_IRQ(DCDC3_V_LONG,	2, 3),
-	AXP20X_IRQ(PEK_SHORT,		2, 1),
-	AXP20X_IRQ(PEK_LONG,		2, 0),
-	AXP20X_IRQ(N_OE_PWR_ON,		3, 7),
-	AXP20X_IRQ(N_OE_PWR_OFF,	3, 6),
-	AXP20X_IRQ(VBUS_VALID,		3, 5),
-	AXP20X_IRQ(VBUS_NOT_VALID,	3, 4),
-	AXP20X_IRQ(VBUS_SESS_VALID,	3, 3),
-	AXP20X_IRQ(VBUS_SESS_END,	3, 2),
-	AXP20X_IRQ(LOW_PWR_LVL1,	3, 1),
-	AXP20X_IRQ(LOW_PWR_LVL2,	3, 0),
-	AXP20X_IRQ(TIMER,		4, 7),
-	AXP20X_IRQ(PEK_RIS_EDGE,	4, 6),
-	AXP20X_IRQ(PEK_FAL_EDGE,	4, 5),
-	AXP20X_IRQ(GPIO3_INPUT,		4, 3),
-	AXP20X_IRQ(GPIO2_INPUT,		4, 2),
-	AXP20X_IRQ(GPIO1_INPUT,		4, 1),
-	AXP20X_IRQ(GPIO0_INPUT,		4, 0),
+	INIT_REGMAP_IRQ(AXP20X, ACIN_OVER_V,		0, 7),
+	INIT_REGMAP_IRQ(AXP20X, ACIN_PLUGIN,		0, 6),
+	INIT_REGMAP_IRQ(AXP20X, ACIN_REMOVAL,	        0, 5),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_OVER_V,		0, 4),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_PLUGIN,		0, 3),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_REMOVAL,	        0, 2),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_V_LOW,		0, 1),
+	INIT_REGMAP_IRQ(AXP20X, BATT_PLUGIN,		1, 7),
+	INIT_REGMAP_IRQ(AXP20X, BATT_REMOVAL,	        1, 6),
+	INIT_REGMAP_IRQ(AXP20X, BATT_ENT_ACT_MODE,	1, 5),
+	INIT_REGMAP_IRQ(AXP20X, BATT_EXIT_ACT_MODE,	1, 4),
+	INIT_REGMAP_IRQ(AXP20X, CHARG,		        1, 3),
+	INIT_REGMAP_IRQ(AXP20X, CHARG_DONE,		1, 2),
+	INIT_REGMAP_IRQ(AXP20X, BATT_TEMP_HIGH,	        1, 1),
+	INIT_REGMAP_IRQ(AXP20X, BATT_TEMP_LOW,	        1, 0),
+	INIT_REGMAP_IRQ(AXP20X, DIE_TEMP_HIGH,	        2, 7),
+	INIT_REGMAP_IRQ(AXP20X, CHARG_I_LOW,		2, 6),
+	INIT_REGMAP_IRQ(AXP20X, DCDC1_V_LONG,	        2, 5),
+	INIT_REGMAP_IRQ(AXP20X, DCDC2_V_LONG,	        2, 4),
+	INIT_REGMAP_IRQ(AXP20X, DCDC3_V_LONG,	        2, 3),
+	INIT_REGMAP_IRQ(AXP20X, PEK_SHORT,		2, 1),
+	INIT_REGMAP_IRQ(AXP20X, PEK_LONG,		2, 0),
+	INIT_REGMAP_IRQ(AXP20X, N_OE_PWR_ON,		3, 7),
+	INIT_REGMAP_IRQ(AXP20X, N_OE_PWR_OFF,	        3, 6),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_VALID,		3, 5),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_NOT_VALID,	        3, 4),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_SESS_VALID,	3, 3),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_SESS_END,	        3, 2),
+	INIT_REGMAP_IRQ(AXP20X, LOW_PWR_LVL1,	        3, 1),
+	INIT_REGMAP_IRQ(AXP20X, LOW_PWR_LVL2,	        3, 0),
+	INIT_REGMAP_IRQ(AXP20X, TIMER,		        4, 7),
+	INIT_REGMAP_IRQ(AXP20X, PEK_RIS_EDGE,	        4, 6),
+	INIT_REGMAP_IRQ(AXP20X, PEK_FAL_EDGE,	        4, 5),
+	INIT_REGMAP_IRQ(AXP20X, GPIO3_INPUT,		4, 3),
+	INIT_REGMAP_IRQ(AXP20X, GPIO2_INPUT,		4, 2),
+	INIT_REGMAP_IRQ(AXP20X, GPIO1_INPUT,		4, 1),
+	INIT_REGMAP_IRQ(AXP20X, GPIO0_INPUT,		4, 0),
+};
+
+/* some IRQs are compatible with axp20x models */
+static const struct regmap_irq axp288_regmap_irqs[] = {
+	INIT_REGMAP_IRQ(AXP20X, VBUS_REMOVAL,           0, 2),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_PLUGIN,            0, 3),
+	INIT_REGMAP_IRQ(AXP20X, VBUS_OVER_V,            0, 4),
+
+	INIT_REGMAP_IRQ(AXP20X, CHARG_DONE,             1, 2),
+	INIT_REGMAP_IRQ(AXP20X, CHARG,                  1, 3),
+	INIT_REGMAP_IRQ(AXP288, SAFE_QUIT,              1, 4),
+	INIT_REGMAP_IRQ(AXP288, SAFE_ENTER,             1, 5),
+	INIT_REGMAP_IRQ(AXP20X, BATT_REMOVAL,           1, 6),
+	INIT_REGMAP_IRQ(AXP20X, BATT_PLUGIN,            1, 7),
+
+	INIT_REGMAP_IRQ(AXP288, QWBTU,                  2, 0),
+	INIT_REGMAP_IRQ(AXP288, WBTU,                   2, 1),
+	INIT_REGMAP_IRQ(AXP288, QWBTO,                  2, 2),
+	INIT_REGMAP_IRQ(AXP288, WBTU,                   2, 3),
+	INIT_REGMAP_IRQ(AXP288, QCBTU,                  2, 4),
+	INIT_REGMAP_IRQ(AXP288, CBTU,                   2, 5),
+	INIT_REGMAP_IRQ(AXP288, QCBTO,                  2, 6),
+	INIT_REGMAP_IRQ(AXP288, CBTO,                   2, 7),
+
+	INIT_REGMAP_IRQ(AXP288, WL2,                    3, 0),
+	INIT_REGMAP_IRQ(AXP288, WL1,                    3, 1),
+	INIT_REGMAP_IRQ(AXP288, GPADC,                  3, 2),
+	INIT_REGMAP_IRQ(AXP288, OT,                     3, 7),
+
+	INIT_REGMAP_IRQ(AXP288, GPIO0,                  4, 0),
+	INIT_REGMAP_IRQ(AXP288, GPIO1,                  4, 1),
+	INIT_REGMAP_IRQ(AXP288, POKO,                   4, 2),
+	INIT_REGMAP_IRQ(AXP288, POKL,                   4, 3),
+	INIT_REGMAP_IRQ(AXP288, POKS,                   4, 4),
+	INIT_REGMAP_IRQ(AXP288, POKN,                   4, 5),
+	INIT_REGMAP_IRQ(AXP288, POKP,                   4, 6),
+	INIT_REGMAP_IRQ(AXP20X, TIMER,                  4, 7),
+
+	INIT_REGMAP_IRQ(AXP288, MV_CHNG,                5, 0),
+	INIT_REGMAP_IRQ(AXP288, BC_USB_CHNG,            5, 1),
 };
 
 static const struct of_device_id axp20x_of_match[] = {
@@ -128,16 +236,39 @@ static const struct i2c_device_id axp20x_i2c_id[] = {
 };
 MODULE_DEVICE_TABLE(i2c, axp20x_i2c_id);
 
+static struct acpi_device_id axp20x_acpi_match[] = {
+	{
+		.id = "INT33F4",
+		.driver_data = AXP288_ID,
+	},
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, axp20x_acpi_match);
+
 static const struct regmap_irq_chip axp20x_regmap_irq_chip = {
 	.name			= "axp20x_irq_chip",
 	.status_base		= AXP20X_IRQ1_STATE,
 	.ack_base		= AXP20X_IRQ1_STATE,
 	.mask_base		= AXP20X_IRQ1_EN,
-	.num_regs		= 5,
+	.mask_invert		= true,
+	.init_ack_masked	= true,
 	.irqs			= axp20x_regmap_irqs,
 	.num_irqs		= ARRAY_SIZE(axp20x_regmap_irqs),
+	.num_regs		= 5,
+
+};
+
+static const struct regmap_irq_chip axp288_regmap_irq_chip = {
+	.name			= "axp288_irq_chip",
+	.status_base		= AXP20X_IRQ1_STATE,
+	.ack_base		= AXP20X_IRQ1_STATE,
+	.mask_base		= AXP20X_IRQ1_EN,
 	.mask_invert		= true,
 	.init_ack_masked	= true,
+	.irqs			= axp288_regmap_irqs,
+	.num_irqs		= ARRAY_SIZE(axp288_regmap_irqs),
+	.num_regs		= 6,
+
 };
 
 static const char * const axp20x_supplies[] = {
@@ -161,36 +292,155 @@ static struct mfd_cell axp20x_cells[] = {
 	},
 };
 
+static struct resource axp288_adc_resources[] = {
+	{
+		.name  = "GPADC",
+		.start = AXP288_IRQ_GPADC,
+		.end   = AXP288_IRQ_GPADC,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct resource axp288_charger_resources[] = {
+	{
+		.start = AXP288_IRQ_OV,
+		.end   = AXP288_IRQ_OV,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_DONE,
+		.end   = AXP288_IRQ_DONE,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_CHARGING,
+		.end   = AXP288_IRQ_CHARGING,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_SAFE_QUIT,
+		.end   = AXP288_IRQ_SAFE_QUIT,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_SAFE_ENTER,
+		.end   = AXP288_IRQ_SAFE_ENTER,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_QCBTU,
+		.end   = AXP288_IRQ_QCBTU,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_CBTU,
+		.end   = AXP288_IRQ_CBTU,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_QCBTO,
+		.end   = AXP288_IRQ_QCBTO,
+		.flags = IORESOURCE_IRQ,
+	},
+	{
+		.start = AXP288_IRQ_CBTO,
+		.end   = AXP288_IRQ_CBTO,
+		.flags = IORESOURCE_IRQ,
+	},
+};
+
+static struct mfd_cell axp288_cells[] = {
+	{
+		.name = "axp288_adc",
+		.num_resources = ARRAY_SIZE(axp288_adc_resources),
+		.resources = axp288_adc_resources,
+	},
+	{
+		.name = "axp288_charger",
+		.num_resources = ARRAY_SIZE(axp288_charger_resources),
+		.resources = axp288_charger_resources,
+	},
+	{
+		.name = "axp288_battery",
+		.num_resources = ARRAY_SIZE(axp288_battery_resources),
+		.resources = axp288_battery_resources,
+	},
+};
+
 static struct axp20x_dev *axp20x_pm_power_off;
 static void axp20x_power_off(void)
 {
+	if (axp20x_pm_power_off->variant == AXP288_ID)
+		return;
+
 	regmap_write(axp20x_pm_power_off->regmap, AXP20X_OFF_CTRL,
 		     AXP20X_OFF);
 }
 
+static int axp20x_match_device(struct axp20x_dev *axp20x, struct device *dev)
+{
+	const struct acpi_device_id *acpi_id;
+	const struct of_device_id *of_id;
+
+	if (dev->of_node) {
+		of_id = of_match_device(axp20x_of_match, dev);
+		if (!of_id) {
+			dev_err(dev, "Unable to match OF ID\n");
+			return -ENODEV;
+		}
+		axp20x->variant = (long) of_id->data;
+	} else {
+		acpi_id = acpi_match_device(dev->driver->acpi_match_table, dev);
+		if (!acpi_id || !acpi_id->driver_data) {
+			dev_err(dev, "Unable to match ACPI ID and data\n");
+			return -ENODEV;
+		}
+		axp20x->variant = (long) acpi_id->driver_data;
+	}
+
+	switch (axp20x->variant) {
+	case AXP202_ID:
+	case AXP209_ID:
+		axp20x->nr_cells = ARRAY_SIZE(axp20x_cells);
+		axp20x->cells = axp20x_cells;
+		axp20x->regmap_cfg = &axp20x_regmap_config;
+		axp20x->regmap_irq_chip = &axp20x_regmap_irq_chip;
+		break;
+	case AXP288_ID:
+		axp20x->cells = axp288_cells;
+		axp20x->nr_cells = ARRAY_SIZE(axp288_cells);
+		axp20x->regmap_cfg = &axp288_regmap_config;
+		axp20x->regmap_irq_chip = &axp288_regmap_irq_chip;
+		break;
+	default:
+		dev_err(dev, "unsupported AXP20X ID %lu\n", axp20x->variant);
+		return -EINVAL;
+	}
+	dev_info(dev, "AXP20x variant %s found\n",
+		axp20x_model_names[axp20x->variant]);
+
+	return 0;
+}
+
 static int axp20x_i2c_probe(struct i2c_client *i2c,
 			 const struct i2c_device_id *id)
 {
 	struct axp20x_dev *axp20x;
-	const struct of_device_id *of_id;
 	int ret;
 
 	axp20x = devm_kzalloc(&i2c->dev, sizeof(*axp20x), GFP_KERNEL);
 	if (!axp20x)
 		return -ENOMEM;
 
-	of_id = of_match_device(axp20x_of_match, &i2c->dev);
-	if (!of_id) {
-		dev_err(&i2c->dev, "Unable to setup AXP20X data\n");
-		return -ENODEV;
-	}
-	axp20x->variant = (long) of_id->data;
+	ret = axp20x_match_device(axp20x, &i2c->dev);
+	if (ret)
+		return ret;
 
 	axp20x->i2c_client = i2c;
 	axp20x->dev = &i2c->dev;
 	dev_set_drvdata(axp20x->dev, axp20x);
 
-	axp20x->regmap = devm_regmap_init_i2c(i2c, &axp20x_regmap_config);
+	axp20x->regmap = devm_regmap_init_i2c(i2c, axp20x->regmap_cfg);
 	if (IS_ERR(axp20x->regmap)) {
 		ret = PTR_ERR(axp20x->regmap);
 		dev_err(&i2c->dev, "regmap init failed: %d\n", ret);
@@ -199,15 +449,15 @@ static int axp20x_i2c_probe(struct i2c_client *i2c,
 
 	ret = regmap_add_irq_chip(axp20x->regmap, i2c->irq,
 				  IRQF_ONESHOT | IRQF_SHARED, -1,
-				  &axp20x_regmap_irq_chip,
+				  axp20x->regmap_irq_chip,
 				  &axp20x->regmap_irqc);
 	if (ret) {
 		dev_err(&i2c->dev, "failed to add irq chip: %d\n", ret);
 		return ret;
 	}
 
-	ret = mfd_add_devices(axp20x->dev, -1, axp20x_cells,
-			      ARRAY_SIZE(axp20x_cells), NULL, 0, NULL);
+	ret = mfd_add_devices(axp20x->dev, -1, axp20x->cells,
+			axp20x->nr_cells, NULL, 0, NULL);
 
 	if (ret) {
 		dev_err(&i2c->dev, "failed to add MFD devices: %d\n", ret);
@@ -245,6 +495,7 @@ static struct i2c_driver axp20x_i2c_driver = {
 		.name	= "axp20x",
 		.owner	= THIS_MODULE,
 		.of_match_table	= of_match_ptr(axp20x_of_match),
+		.acpi_match_table = ACPI_PTR(axp20x_acpi_match),
 	},
 	.probe		= axp20x_i2c_probe,
 	.remove		= axp20x_i2c_remove,
diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h
index d0e31a2287ac..81589d176ae8 100644
--- a/include/linux/mfd/axp20x.h
+++ b/include/linux/mfd/axp20x.h
@@ -14,6 +14,8 @@
 enum {
 	AXP202_ID = 0,
 	AXP209_ID,
+	AXP288_ID,
+	NR_AXP20X_VARIANTS,
 };
 
 #define AXP20X_DATACACHE(m)		(0x04 + (m))
@@ -49,11 +51,13 @@ enum {
 #define AXP20X_IRQ3_EN			0x42
 #define AXP20X_IRQ4_EN			0x43
 #define AXP20X_IRQ5_EN			0x44
+#define AXP20X_IRQ6_EN			0x45
 #define AXP20X_IRQ1_STATE		0x48
 #define AXP20X_IRQ2_STATE		0x49
 #define AXP20X_IRQ3_STATE		0x4a
 #define AXP20X_IRQ4_STATE		0x4b
 #define AXP20X_IRQ5_STATE		0x4c
+#define AXP20X_IRQ6_STATE		0x4d
 
 /* ADC */
 #define AXP20X_ACIN_V_ADC_H		0x56
@@ -116,6 +120,15 @@ enum {
 #define AXP20X_CC_CTRL			0xb8
 #define AXP20X_FG_RES			0xb9
 
+/* AXP288 specific registers */
+#define AXP288_PMIC_ADC_H               0x56
+#define AXP288_PMIC_ADC_L               0x57
+#define AXP288_ADC_TS_PIN_CTRL          0x84
+
+#define AXP288_PMIC_ADC_EN              0x84
+#define AXP288_FG_TUNE5			0xed
+
+
 /* Regulators IDs */
 enum {
 	AXP20X_LDO1 = 0,
@@ -169,12 +182,58 @@ enum {
 	AXP20X_IRQ_GPIO0_INPUT,
 };
 
+enum axp288_irqs {
+	AXP288_IRQ_VBUS_FALL     = 2,
+	AXP288_IRQ_VBUS_RISE,
+	AXP288_IRQ_OV,
+	AXP288_IRQ_FALLING_ALT,
+	AXP288_IRQ_RISING_ALT,
+	AXP288_IRQ_OV_ALT,
+	AXP288_IRQ_DONE          = 10,
+	AXP288_IRQ_CHARGING,
+	AXP288_IRQ_SAFE_QUIT,
+	AXP288_IRQ_SAFE_ENTER,
+	AXP288_IRQ_ABSENT,
+	AXP288_IRQ_APPEND,
+	AXP288_IRQ_QWBTU,
+	AXP288_IRQ_WBTU,
+	AXP288_IRQ_QWBTO,
+	AXP288_IRQ_WBTO,
+	AXP288_IRQ_QCBTU,
+	AXP288_IRQ_CBTU,
+	AXP288_IRQ_QCBTO,
+	AXP288_IRQ_CBTO,
+	AXP288_IRQ_WL2,
+	AXP288_IRQ_WL1,
+	AXP288_IRQ_GPADC,
+	AXP288_IRQ_OT            = 31,
+	AXP288_IRQ_GPIO0,
+	AXP288_IRQ_GPIO1,
+	AXP288_IRQ_POKO,
+	AXP288_IRQ_POKL,
+	AXP288_IRQ_POKS,
+	AXP288_IRQ_POKN,
+	AXP288_IRQ_POKP,
+	AXP288_IRQ_TIMER,
+	AXP288_IRQ_MV_CHNG,
+	AXP288_IRQ_BC_USB_CHNG,
+};
+
+#define AXP288_TS_ADC_H		0x58
+#define AXP288_TS_ADC_L		0x59
+#define AXP288_GP_ADC_H		0x5a
+#define AXP288_GP_ADC_L		0x5b
+
 struct axp20x_dev {
 	struct device			*dev;
 	struct i2c_client		*i2c_client;
 	struct regmap			*regmap;
 	struct regmap_irq_chip_data	*regmap_irqc;
 	long				variant;
+	int                             nr_cells;
+	struct mfd_cell                 *cells;
+	const struct regmap_config	*regmap_cfg;
+	const struct regmap_irq_chip	*regmap_irq_chip;
 };
 
 #endif /* __LINUX_MFD_AXP20X_H */
-- 
cgit v1.2.3


From fd2ef0ba3071c92ac6272ab22ea3f2b16d88a4eb Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Mon, 6 Oct 2014 11:38:30 -0700
Subject: net: phy: adjust fixed_phy_register() return value

Adjust fixed_phy_register() to return struct phy_device *, so that
it becomes easy to use fixed PHYs without device tree support:

  phydev = fixed_phy_register(PHY_POLL, &fixed_phy_status, NULL);
  fixed_phy_set_link_update(phydev, fixed_phy_link_update);
  phy_connect_direct(netdev, phydev, handler_fn, phy_interface);

This change is a prerequisite for modifying bcmgenet driver to work
without a device tree on Broadcom's MIPS-based 7xxx platforms.

Signed-off-by: Petri Gynther <pgynther@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/fixed.c   | 16 ++++++++--------
 drivers/of/of_mdio.c      |  7 +++++--
 include/linux/phy_fixed.h | 14 +++++++-------
 3 files changed, 20 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
index 5b19fbbda6d4..47872caa0081 100644
--- a/drivers/net/phy/fixed.c
+++ b/drivers/net/phy/fixed.c
@@ -233,9 +233,9 @@ EXPORT_SYMBOL_GPL(fixed_phy_del);
 static int phy_fixed_addr;
 static DEFINE_SPINLOCK(phy_fixed_addr_lock);
 
-int fixed_phy_register(unsigned int irq,
-		       struct fixed_phy_status *status,
-		       struct device_node *np)
+struct phy_device *fixed_phy_register(unsigned int irq,
+				      struct fixed_phy_status *status,
+				      struct device_node *np)
 {
 	struct fixed_mdio_bus *fmb = &platform_fmb;
 	struct phy_device *phy;
@@ -246,19 +246,19 @@ int fixed_phy_register(unsigned int irq,
 	spin_lock(&phy_fixed_addr_lock);
 	if (phy_fixed_addr == PHY_MAX_ADDR) {
 		spin_unlock(&phy_fixed_addr_lock);
-		return -ENOSPC;
+		return ERR_PTR(-ENOSPC);
 	}
 	phy_addr = phy_fixed_addr++;
 	spin_unlock(&phy_fixed_addr_lock);
 
 	ret = fixed_phy_add(PHY_POLL, phy_addr, status);
 	if (ret < 0)
-		return ret;
+		return ERR_PTR(ret);
 
 	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
 	if (!phy || IS_ERR(phy)) {
 		fixed_phy_del(phy_addr);
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
 	}
 
 	of_node_get(np);
@@ -269,10 +269,10 @@ int fixed_phy_register(unsigned int irq,
 		phy_device_free(phy);
 		of_node_put(np);
 		fixed_phy_del(phy_addr);
-		return ret;
+		return ERR_PTR(ret);
 	}
 
-	return 0;
+	return phy;
 }
 
 static int __init fixed_mdio_bus_init(void)
diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index a85d80012993..1bd43053b8c7 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -286,6 +286,7 @@ int of_phy_register_fixed_link(struct device_node *np)
 	struct device_node *fixed_link_node;
 	const __be32 *fixed_link_prop;
 	int len;
+	struct phy_device *phy;
 
 	/* New binding */
 	fixed_link_node = of_get_child_by_name(np, "fixed-link");
@@ -299,7 +300,8 @@ int of_phy_register_fixed_link(struct device_node *np)
 		status.asym_pause = of_property_read_bool(fixed_link_node,
 							  "asym-pause");
 		of_node_put(fixed_link_node);
-		return fixed_phy_register(PHY_POLL, &status, np);
+		phy = fixed_phy_register(PHY_POLL, &status, np);
+		return IS_ERR(phy) ? PTR_ERR(phy) : 0;
 	}
 
 	/* Old binding */
@@ -310,7 +312,8 @@ int of_phy_register_fixed_link(struct device_node *np)
 		status.speed = be32_to_cpu(fixed_link_prop[2]);
 		status.pause = be32_to_cpu(fixed_link_prop[3]);
 		status.asym_pause = be32_to_cpu(fixed_link_prop[4]);
-		return fixed_phy_register(PHY_POLL, &status, np);
+		phy = fixed_phy_register(PHY_POLL, &status, np);
+		return IS_ERR(phy) ? PTR_ERR(phy) : 0;
 	}
 
 	return -ENODEV;
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index 941138664c1d..f2ca1b459377 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -14,9 +14,9 @@ struct device_node;
 #ifdef CONFIG_FIXED_PHY
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
-extern int fixed_phy_register(unsigned int irq,
-			      struct fixed_phy_status *status,
-			      struct device_node *np);
+extern struct phy_device *fixed_phy_register(unsigned int irq,
+					     struct fixed_phy_status *status,
+					     struct device_node *np);
 extern void fixed_phy_del(int phy_addr);
 extern int fixed_phy_set_link_update(struct phy_device *phydev,
 			int (*link_update)(struct net_device *,
@@ -27,11 +27,11 @@ static inline int fixed_phy_add(unsigned int irq, int phy_id,
 {
 	return -ENODEV;
 }
-static inline int fixed_phy_register(unsigned int irq,
-				     struct fixed_phy_status *status,
-				     struct device_node *np)
+static inline struct phy_device *fixed_phy_register(unsigned int irq,
+						struct fixed_phy_status *status,
+						struct device_node *np)
 {
-	return -ENODEV;
+	return ERR_PTR(-ENODEV);
 }
 static inline int fixed_phy_del(int phy_addr)
 {
-- 
cgit v1.2.3


From 0287587884b15041203b3a362d485e1ab1f24445 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 5 Oct 2014 18:38:35 -0700
Subject: net: better IFF_XMIT_DST_RELEASE support

Testing xmit_more support with netperf and connected UDP sockets,
I found strange dst refcount false sharing.

Current handling of IFF_XMIT_DST_RELEASE is not optimal.

Dropping dst in validate_xmit_skb() is certainly too late in case
packet was queued by cpu X but dequeued by cpu Y

The logical point to take care of drop/force is in __dev_queue_xmit()
before even taking qdisc lock.

As Julian Anastasov pointed out, need for skb_dst() might come from some
packet schedulers or classifiers.

This patch adds new helper to cleanly express needs of various drivers
or qdiscs/classifiers.

Drivers that need skb_dst() in their ndo_start_xmit() should call
following helper in their setup instead of the prior :

	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
->
	netif_keep_dst(dev);

Instead of using a single bit, we use two bits, one being
eventually rebuilt in bonding/team drivers.

The other one, is permanent and blocks IFF_XMIT_DST_RELEASE being
rebuilt in bonding/team. Eventually, we could add something
smarter later.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/ulp/ipoib/ipoib_main.c |  2 +-
 drivers/net/appletalk/ipddp.c             |  2 +-
 drivers/net/bonding/bond_main.c           |  9 ++++++---
 drivers/net/eql.c                         |  2 +-
 drivers/net/ifb.c                         |  3 ++-
 drivers/net/loopback.c                    |  2 +-
 drivers/net/macvlan.c                     |  3 ++-
 drivers/net/ppp/ppp_generic.c             |  2 +-
 drivers/net/team/team.c                   |  8 +++++---
 drivers/net/vxlan.c                       |  2 +-
 drivers/net/wan/hdlc_fr.c                 |  2 +-
 drivers/s390/net/qeth_l3_main.c           |  2 +-
 include/linux/netdevice.h                 |  8 ++++++++
 net/8021q/vlan_dev.c                      |  3 ++-
 net/atm/clip.c                            |  2 +-
 net/core/dev.c                            | 19 +++++++++----------
 net/ipv4/ip_gre.c                         |  2 +-
 net/ipv4/ip_vti.c                         |  2 +-
 net/ipv4/ipip.c                           |  2 +-
 net/ipv6/ip6_gre.c                        |  2 +-
 net/ipv6/ip6_tunnel.c                     |  2 +-
 net/ipv6/ip6_vti.c                        |  2 +-
 net/ipv6/sit.c                            |  2 +-
 net/sched/cls_flow.c                      |  2 ++
 net/sched/cls_route.c                     |  1 +
 net/sched/sch_generic.c                   |  3 ---
 net/sched/sch_teql.c                      |  2 +-
 27 files changed, 54 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/ulp/ipoib/ipoib_main.c b/drivers/infiniband/ulp/ipoib/ipoib_main.c
index 13e6e0431592..58b5aa3b6f2d 100644
--- a/drivers/infiniband/ulp/ipoib/ipoib_main.c
+++ b/drivers/infiniband/ulp/ipoib/ipoib_main.c
@@ -1364,7 +1364,7 @@ void ipoib_setup(struct net_device *dev)
 	dev->tx_queue_len	 = ipoib_sendq_size * 2;
 	dev->features		 = (NETIF_F_VLAN_CHALLENGED	|
 				    NETIF_F_HIGHDMA);
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	memcpy(dev->broadcast, ipv4_bcast_addr, INFINIBAND_ALEN);
 
diff --git a/drivers/net/appletalk/ipddp.c b/drivers/net/appletalk/ipddp.c
index 10d0dba572c2..e90c6a7333d7 100644
--- a/drivers/net/appletalk/ipddp.c
+++ b/drivers/net/appletalk/ipddp.c
@@ -74,7 +74,7 @@ static struct net_device * __init ipddp_init(void)
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	strcpy(dev->name, "ipddp%d");
 
 	if (version_printed++ == 0)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 3ad5413d4f57..c9ac06cfe6b7 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -1002,7 +1002,8 @@ static netdev_features_t bond_fix_features(struct net_device *dev,
 
 static void bond_compute_features(struct bonding *bond)
 {
-	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
+	unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
+					IFF_XMIT_DST_RELEASE_PERM;
 	netdev_features_t vlan_features = BOND_VLAN_FEATURES;
 	netdev_features_t enc_features  = BOND_ENC_FEATURES;
 	struct net_device *bond_dev = bond->dev;
@@ -1038,8 +1039,10 @@ done:
 	bond_dev->gso_max_segs = gso_max_segs;
 	netif_set_gso_max_size(bond_dev, gso_max_size);
 
-	flags = bond_dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
-	bond_dev->priv_flags = flags | dst_release_flag;
+	bond_dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	if ((bond_dev->priv_flags & IFF_XMIT_DST_RELEASE_PERM) &&
+	    dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
+		bond_dev->priv_flags |= IFF_XMIT_DST_RELEASE;
 
 	netdev_change_features(bond_dev);
 }
diff --git a/drivers/net/eql.c b/drivers/net/eql.c
index 957e5c0cede3..a10ad74cc8d2 100644
--- a/drivers/net/eql.c
+++ b/drivers/net/eql.c
@@ -199,7 +199,7 @@ static void __init eql_setup(struct net_device *dev)
 
 	dev->type       	= ARPHRD_SLIP;
 	dev->tx_queue_len 	= 5;		/* Hands them off fast */
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int eql_open(struct net_device *dev)
diff --git a/drivers/net/ifb.c b/drivers/net/ifb.c
index d2d4a3d2237f..34f846b4bd05 100644
--- a/drivers/net/ifb.c
+++ b/drivers/net/ifb.c
@@ -185,7 +185,8 @@ static void ifb_setup(struct net_device *dev)
 
 	dev->flags |= IFF_NOARP;
 	dev->flags &= ~IFF_MULTICAST;
-	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags &= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	eth_hw_addr_random(dev);
 }
 
diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c
index 8f2262540561..c76283c2f84a 100644
--- a/drivers/net/loopback.c
+++ b/drivers/net/loopback.c
@@ -169,7 +169,7 @@ static void loopback_setup(struct net_device *dev)
 	dev->type		= ARPHRD_LOOPBACK;	/* 0x0001*/
 	dev->flags		= IFF_LOOPBACK;
 	dev->priv_flags		|= IFF_LIVE_ADDR_CHANGE;
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->hw_features	= NETIF_F_ALL_TSO | NETIF_F_UFO;
 	dev->features 		= NETIF_F_SG | NETIF_F_FRAGLIST
 		| NETIF_F_ALL_TSO
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e8a453f1b458..38b4fae61f04 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -1025,7 +1025,8 @@ void macvlan_common_setup(struct net_device *dev)
 {
 	ether_setup(dev);
 
-	dev->priv_flags	       &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags	       &= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	dev->priv_flags	       |= IFF_UNICAST_FLT;
 	dev->netdev_ops		= &macvlan_netdev_ops;
 	dev->destructor		= free_netdev;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index fa0d71727894..80e6f3430f65 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1103,7 +1103,7 @@ static void ppp_setup(struct net_device *dev)
 	dev->type = ARPHRD_PPP;
 	dev->flags = IFF_POINTOPOINT | IFF_NOARP | IFF_MULTICAST;
 	dev->features |= NETIF_F_NETNS_LOCAL;
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 /*
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 2277c3679a51..a94a9df3e6bd 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -970,7 +970,8 @@ static void __team_compute_features(struct team *team)
 	struct team_port *port;
 	u32 vlan_features = TEAM_VLAN_FEATURES & NETIF_F_ALL_FOR_ALL;
 	unsigned short max_hard_header_len = ETH_HLEN;
-	unsigned int flags, dst_release_flag = IFF_XMIT_DST_RELEASE;
+	unsigned int dst_release_flag = IFF_XMIT_DST_RELEASE |
+					IFF_XMIT_DST_RELEASE_PERM;
 
 	list_for_each_entry(port, &team->port_list, list) {
 		vlan_features = netdev_increment_features(vlan_features,
@@ -985,8 +986,9 @@ static void __team_compute_features(struct team *team)
 	team->dev->vlan_features = vlan_features;
 	team->dev->hard_header_len = max_hard_header_len;
 
-	flags = team->dev->priv_flags & ~IFF_XMIT_DST_RELEASE;
-	team->dev->priv_flags = flags | dst_release_flag;
+	team->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	if (dst_release_flag == (IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM))
+		team->dev->priv_flags |= IFF_XMIT_DST_RELEASE;
 
 	netdev_change_features(team->dev);
 }
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 2af795d6ba05..2a51e6e48e1e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2193,7 +2193,7 @@ static void vxlan_setup(struct net_device *dev)
 	dev->hw_features |= NETIF_F_SG | NETIF_F_HW_CSUM | NETIF_F_RXCSUM;
 	dev->hw_features |= NETIF_F_GSO_SOFTWARE;
 	dev->hw_features |= NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_STAG_TX;
-	dev->priv_flags	&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	INIT_LIST_HEAD(&vxlan->next);
diff --git a/drivers/net/wan/hdlc_fr.c b/drivers/net/wan/hdlc_fr.c
index e5c7e6165a4b..3ebed1c40abb 100644
--- a/drivers/net/wan/hdlc_fr.c
+++ b/drivers/net/wan/hdlc_fr.c
@@ -1047,7 +1047,7 @@ static void pvc_setup(struct net_device *dev)
 	dev->flags = IFF_POINTOPOINT;
 	dev->hard_header_len = 10;
 	dev->addr_len = 2;
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static const struct net_device_ops pvc_ops = {
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index f8427a2c4840..afebb9709763 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -3306,7 +3306,7 @@ static int qeth_l3_setup_netdev(struct qeth_card *card)
 	card->dev->features |=	NETIF_F_HW_VLAN_CTAG_TX |
 				NETIF_F_HW_VLAN_CTAG_RX |
 				NETIF_F_HW_VLAN_CTAG_FILTER;
-	card->dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(card->dev);
 	card->dev->gso_max_size = 15 * PAGE_SIZE;
 
 	SET_NETDEV_DEV(card->dev, &card->gdev->dev);
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2df86f50261c..3a4315b39d20 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1206,6 +1206,7 @@ enum netdev_priv_flags {
 	IFF_SUPP_NOFCS			= 1<<19,
 	IFF_LIVE_ADDR_CHANGE		= 1<<20,
 	IFF_MACVLAN			= 1<<21,
+	IFF_XMIT_DST_RELEASE_PERM	= 1<<22,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1230,6 +1231,7 @@ enum netdev_priv_flags {
 #define IFF_SUPP_NOFCS			IFF_SUPP_NOFCS
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
+#define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
 
 /**
  *	struct net_device - The DEVICE structure.
@@ -3588,6 +3590,12 @@ static inline bool netif_supports_nofcs(struct net_device *dev)
 	return dev->priv_flags & IFF_SUPP_NOFCS;
 }
 
+/* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */
+static inline void netif_keep_dst(struct net_device *dev)
+{
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM);
+}
+
 extern struct pernet_operations __net_initdata loopback_net_ops;
 
 /* Logging, debugging and troubleshooting/diagnostic helpers. */
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 35a6b6b15e8a..0d441ec8763e 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -799,7 +799,8 @@ void vlan_setup(struct net_device *dev)
 	ether_setup(dev);
 
 	dev->priv_flags		|= IFF_802_1Q_VLAN;
-	dev->priv_flags		&= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags		&= ~IFF_TX_SKB_SHARING;
+	netif_keep_dst(dev);
 	dev->tx_queue_len	= 0;
 
 	dev->netdev_ops		= &vlan_netdev_ops;
diff --git a/net/atm/clip.c b/net/atm/clip.c
index 1d9eaa4f041a..17e55dfecbe2 100644
--- a/net/atm/clip.c
+++ b/net/atm/clip.c
@@ -501,7 +501,7 @@ static void clip_setup(struct net_device *dev)
 	/* without any more elaborate queuing. 100 is a reasonable */
 	/* compromise between decent burst-tolerance and protection */
 	/* against memory hogs. */
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int clip_create(int number)
diff --git a/net/core/dev.c b/net/core/dev.c
index a63b8c43c1b6..3c5bdaa44486 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2665,12 +2665,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (skb->next)
 		return skb;
 
-	/* If device doesn't need skb->dst, release it right now while
-	 * its hot in this cpu cache
-	 */
-	if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
-		skb_dst_drop(skb);
-
 	features = netif_skb_features(skb);
 	skb = validate_xmit_vlan(skb, features);
 	if (unlikely(!skb))
@@ -2811,8 +2805,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 		 * waiting to be sent out; and the qdisc is not running -
 		 * xmit the skb directly.
 		 */
-		if (!(dev->priv_flags & IFF_XMIT_DST_RELEASE))
-			skb_dst_force(skb);
 
 		qdisc_bstats_update(q, skb);
 
@@ -2827,7 +2819,6 @@ static inline int __dev_xmit_skb(struct sk_buff *skb, struct Qdisc *q,
 
 		rc = NET_XMIT_SUCCESS;
 	} else {
-		skb_dst_force(skb);
 		rc = q->enqueue(skb, q) & NET_XMIT_MASK;
 		if (qdisc_run_begin(q)) {
 			if (unlikely(contended)) {
@@ -2924,6 +2915,14 @@ static int __dev_queue_xmit(struct sk_buff *skb, void *accel_priv)
 
 	skb_update_prio(skb);
 
+	/* If device/qdisc don't need skb->dst, release it right now while
+	 * its hot in this cpu cache.
+	 */
+	if (dev->priv_flags & IFF_XMIT_DST_RELEASE)
+		skb_dst_drop(skb);
+	else
+		skb_dst_force(skb);
+
 	txq = netdev_pick_tx(dev, skb, accel_priv);
 	q = rcu_dereference_bh(txq->qdisc);
 
@@ -6674,7 +6673,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 	INIT_LIST_HEAD(&dev->adj_list.lower);
 	INIT_LIST_HEAD(&dev->all_adj_list.upper);
 	INIT_LIST_HEAD(&dev->all_adj_list.lower);
-	dev->priv_flags = IFF_XMIT_DST_RELEASE;
+	dev->priv_flags = IFF_XMIT_DST_RELEASE | IFF_XMIT_DST_RELEASE_PERM;
 	setup(dev);
 
 	dev->num_tx_queues = txqs;
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0485ef18d254..12055fdbe716 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -510,7 +510,7 @@ static int ipgre_tunnel_init(struct net_device *dev)
 	memcpy(dev->broadcast, &iph->daddr, 4);
 
 	dev->flags		= IFF_NOARP;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->addr_len		= 4;
 
 	if (iph->daddr) {
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index e453cb724a95..3e861011e4a3 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -364,7 +364,7 @@ static int vti_tunnel_init(struct net_device *dev)
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	return ip_tunnel_init(dev);
 }
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index ea88ab3102a8..37096d64730e 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -289,7 +289,7 @@ static void ipip_tunnel_setup(struct net_device *dev)
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 
 	dev->features		|= IPIP_FEATURES;
 	dev->hw_features	|= IPIP_FEATURES;
diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c
index 74b677916a70..de3b1c86b8d3 100644
--- a/net/ipv6/ip6_gre.c
+++ b/net/ipv6/ip6_gre.c
@@ -1242,7 +1242,7 @@ static void ip6gre_tunnel_setup(struct net_device *dev)
 	dev->flags |= IFF_NOARP;
 	dev->iflink = 0;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static int ip6gre_tunnel_init(struct net_device *dev)
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index d3e8888ad611..9409887fb664 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1493,7 +1493,7 @@ static void ip6_tnl_dev_setup(struct net_device *dev)
 		dev->mtu -= 8;
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	/* This perm addr will be used as interface identifier by IPv6 */
 	dev->addr_assign_type = NET_ADDR_RANDOM;
 	eth_random_addr(dev->perm_addr);
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index 5833a2244467..d440bb585524 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -807,7 +807,7 @@ static void vti6_dev_setup(struct net_device *dev)
 	dev->mtu = ETH_DATA_LEN;
 	dev->flags |= IFF_NOARP;
 	dev->addr_len = sizeof(struct in6_addr);
-	dev->priv_flags &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 /**
diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 0d4e27466f82..6eab37cf5345 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1364,7 +1364,7 @@ static void ipip6_tunnel_setup(struct net_device *dev)
 	dev->hard_header_len	= LL_MAX_HEADER + t_hlen;
 	dev->mtu		= ETH_DATA_LEN - t_hlen;
 	dev->flags		= IFF_NOARP;
-	dev->priv_flags	       &= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 	dev->iflink		= 0;
 	dev->addr_len		= 4;
 	dev->features		|= NETIF_F_LLTX;
diff --git a/net/sched/cls_flow.c b/net/sched/cls_flow.c
index a5d2b20db560..4ac515f2a6ce 100644
--- a/net/sched/cls_flow.c
+++ b/net/sched/cls_flow.c
@@ -493,6 +493,8 @@ static int flow_change(struct net *net, struct sk_buff *in_skb,
 	tcf_exts_change(tp, &fnew->exts, &e);
 	tcf_em_tree_change(tp, &fnew->ematches, &t);
 
+	netif_keep_dst(qdisc_dev(tp->q));
+
 	if (tb[TCA_FLOW_KEYS]) {
 		fnew->keymask = keymask;
 		fnew->nkeys   = nkeys;
diff --git a/net/sched/cls_route.c b/net/sched/cls_route.c
index 6f22baae0afa..109a329b7198 100644
--- a/net/sched/cls_route.c
+++ b/net/sched/cls_route.c
@@ -524,6 +524,7 @@ static int route4_change(struct net *net, struct sk_buff *in_skb,
 		if (f->handle < f1->handle)
 			break;
 
+	netif_keep_dst(qdisc_dev(tp->q));
 	rcu_assign_pointer(f->next, f1);
 	rcu_assign_pointer(*fp, f);
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2b349a4de3c8..38d58e6cef07 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -47,7 +47,6 @@ EXPORT_SYMBOL(default_qdisc_ops);
 
 static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q)
 {
-	skb_dst_force(skb);
 	q->gso_skb = skb;
 	q->qstats.requeues++;
 	q->q.qlen++;	/* it's still part of the queue */
@@ -218,8 +217,6 @@ static inline int qdisc_restart(struct Qdisc *q)
 	if (unlikely(!skb))
 		return 0;
 
-	WARN_ON_ONCE(skb_dst_is_noref(skb));
-
 	root_lock = qdisc_lock(q);
 	dev = qdisc_dev(q);
 	txq = skb_get_tx_queue(dev, skb);
diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c
index 5cd291bd00e4..6ada42396a24 100644
--- a/net/sched/sch_teql.c
+++ b/net/sched/sch_teql.c
@@ -470,7 +470,7 @@ static __init void teql_master_setup(struct net_device *dev)
 	dev->tx_queue_len	= 100;
 	dev->flags		= IFF_NOARP;
 	dev->hard_header_len	= LL_MAX_HEADER;
-	dev->priv_flags		&= ~IFF_XMIT_DST_RELEASE;
+	netif_keep_dst(dev);
 }
 
 static LIST_HEAD(master_dev_list);
-- 
cgit v1.2.3


From e6f5c78930e409f3a6b37f5484313a416359ac7f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 10:40:25 -0400
Subject: locks: plumb a "priv" pointer into the setlease routines

In later patches, we're going to add a new lock_manager_operation to
finish setting up the lease while still holding the i_lock.  To do
this, we'll need to pass a little bit of info in the fcntl setlease
case (primarily an fasync structure). Plumb the extra pointer into
there in advance of that.

We declare this pointer as a void ** to make it clear that this is
private info, and that the caller isn't required to set this unless
the lm_setup specifically requires it.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/filesystems/Locking |  2 +-
 Documentation/filesystems/vfs.txt |  2 +-
 fs/cifs/cifsfs.c                  |  7 ++++---
 fs/libfs.c                        |  4 +++-
 fs/locks.c                        | 32 ++++++++++++++++++++------------
 fs/nfsd/nfs4state.c               |  4 ++--
 include/linux/fs.h                | 12 ++++++------
 7 files changed, 37 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index f1997e9da61f..3d92049ae71d 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -464,7 +464,7 @@ prototypes:
 			size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *,
 			size_t, unsigned int);
-	int (*setlease)(struct file *, long, struct file_lock **);
+	int (*setlease)(struct file *, long, struct file_lock **, void **);
 	long (*fallocate)(struct file *, int, loff_t, loff_t);
 };
 
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 61d65cc65c54..28ebd49f169f 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -826,7 +826,7 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
-	int (*setlease)(struct file *, long arg, struct file_lock **);
+	int (*setlease)(struct file *, long arg, struct file_lock **, void **);
 	long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
 	int (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index ac4f260155c8..85c70d5969ac 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -800,7 +800,8 @@ static loff_t cifs_llseek(struct file *file, loff_t offset, int whence)
 	return generic_file_llseek(file, offset, whence);
 }
 
-static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
+static int
+cifs_setlease(struct file *file, long arg, struct file_lock **lease, void **priv)
 {
 	/*
 	 * Note that this is called by vfs setlease with i_lock held to
@@ -815,7 +816,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 	/* check if file is oplocked */
 	if (((arg == F_RDLCK) && CIFS_CACHE_READ(CIFS_I(inode))) ||
 	    ((arg == F_WRLCK) && CIFS_CACHE_WRITE(CIFS_I(inode))))
-		return generic_setlease(file, arg, lease);
+		return generic_setlease(file, arg, lease, priv);
 	else if (tlink_tcon(cfile->tlink)->local_lease &&
 		 !CIFS_CACHE_READ(CIFS_I(inode)))
 		/*
@@ -826,7 +827,7 @@ static int cifs_setlease(struct file *file, long arg, struct file_lock **lease)
 		 * knows that the file won't be changed on the server by anyone
 		 * else.
 		 */
-		return generic_setlease(file, arg, lease);
+		return generic_setlease(file, arg, lease, priv);
 	else
 		return -EAGAIN;
 }
diff --git a/fs/libfs.c b/fs/libfs.c
index 29012a303ef8..171d2846f2a3 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -1081,12 +1081,14 @@ EXPORT_SYMBOL(alloc_anon_inode);
  * @filp: file pointer
  * @arg: type of lease to obtain
  * @flp: new lease supplied for insertion
+ * @priv: private data for lm_setup operation
  *
  * Generic helper for filesystems that do not wish to allow leases to be set.
  * All arguments are ignored and it just returns -EINVAL.
  */
 int
-simple_nosetlease(struct file *filp, long arg, struct file_lock **flp)
+simple_nosetlease(struct file *filp, long arg, struct file_lock **flp,
+		  void **priv)
 {
 	return -EINVAL;
 }
diff --git a/fs/locks.c b/fs/locks.c
index e16c2c61a44f..4fa269b0bdef 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1297,7 +1297,6 @@ int lease_modify(struct file_lock **before, int arg)
 	}
 	return 0;
 }
-
 EXPORT_SYMBOL(lease_modify);
 
 static bool past_time(unsigned long then)
@@ -1543,7 +1542,8 @@ check_conflicting_open(const struct dentry *dentry, const long arg)
 	return ret;
 }
 
-static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp)
+static int
+generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **priv)
 {
 	struct file_lock *fl, **before, **my_before = NULL, *lease;
 	struct dentry *dentry = filp->f_path.dentry;
@@ -1630,11 +1630,14 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp
 	smp_mb();
 	error = check_conflicting_open(dentry, arg);
 	if (error)
-		locks_unlink_lock(before);
+		goto out_unlink;
 out:
 	if (is_deleg)
 		mutex_unlock(&inode->i_mutex);
 	return error;
+out_unlink:
+	locks_unlink_lock(before);
+	goto out;
 }
 
 static int generic_delete_lease(struct file *filp)
@@ -1661,13 +1664,15 @@ static int generic_delete_lease(struct file *filp)
  *	@filp: file pointer
  *	@arg: type of lease to obtain
  *	@flp: input - file_lock to use, output - file_lock inserted
+ *	@priv: private data for lm_setup
  *
  *	The (input) flp->fl_lmops->lm_break function is required
  *	by break_lease().
  *
  *	Called with inode->i_lock held.
  */
-int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
+int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
+			void **priv)
 {
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
@@ -1692,19 +1697,20 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp)
 			WARN_ON_ONCE(1);
 			return -ENOLCK;
 		}
-		return generic_add_lease(filp, arg, flp);
+		return generic_add_lease(filp, arg, flp, priv);
 	default:
 		return -EINVAL;
 	}
 }
 EXPORT_SYMBOL(generic_setlease);
 
-static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
+static int
+__vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
 	if (filp->f_op->setlease)
-		return filp->f_op->setlease(filp, arg, lease);
+		return filp->f_op->setlease(filp, arg, lease, priv);
 	else
-		return generic_setlease(filp, arg, lease);
+		return generic_setlease(filp, arg, lease, priv);
 }
 
 /**
@@ -1712,6 +1718,7 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
  * @filp: file pointer
  * @arg: type of lease to obtain
  * @lease: file_lock to use when adding a lease
+ * @priv: private info for lm_setup when adding a lease
  *
  * Call this to establish a lease on the file. The "lease" argument is not
  * used for F_UNLCK requests and may be NULL. For commands that set or alter
@@ -1720,13 +1727,14 @@ static int __vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
  * stack trace).
  */
 
-int vfs_setlease(struct file *filp, long arg, struct file_lock **lease)
+int
+vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
 	struct inode *inode = file_inode(filp);
 	int error;
 
 	spin_lock(&inode->i_lock);
-	error = __vfs_setlease(filp, arg, lease);
+	error = __vfs_setlease(filp, arg, lease, priv);
 	spin_unlock(&inode->i_lock);
 
 	return error;
@@ -1751,7 +1759,7 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 	}
 	ret = fl;
 	spin_lock(&inode->i_lock);
-	error = __vfs_setlease(filp, arg, &ret);
+	error = __vfs_setlease(filp, arg, &ret, NULL);
 	if (error)
 		goto out_unlock;
 	if (ret == fl)
@@ -1789,7 +1797,7 @@ out_unlock:
 int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
 	if (arg == F_UNLCK)
-		return vfs_setlease(filp, F_UNLCK, NULL);
+		return vfs_setlease(filp, F_UNLCK, NULL, NULL);
 	return do_fcntl_add_lease(fd, filp, arg);
 }
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 188cd68aefb6..7c803db2a027 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -686,7 +686,7 @@ static void nfs4_put_deleg_lease(struct nfs4_file *fp)
 	spin_unlock(&fp->fi_lock);
 
 	if (filp) {
-		vfs_setlease(filp, F_UNLCK, NULL);
+		vfs_setlease(filp, F_UNLCK, NULL, NULL);
 		fput(filp);
 	}
 }
@@ -3792,7 +3792,7 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
 	}
 	fl->fl_file = filp;
 	ret = fl;
-	status = vfs_setlease(filp, fl->fl_type, &ret);
+	status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
 	if (status) {
 		locks_free_lock(fl);
 		goto out_fput;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 96528f73dda4..f1870eb67b02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -982,8 +982,8 @@ extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl);
 extern int flock_lock_file_wait(struct file *filp, struct file_lock *fl);
 extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type);
 extern void lease_get_mtime(struct inode *, struct timespec *time);
-extern int generic_setlease(struct file *, long, struct file_lock **);
-extern int vfs_setlease(struct file *, long, struct file_lock **);
+extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
+extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
 extern int lease_modify(struct file_lock **, int);
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, unsigned int cmd,
@@ -1100,13 +1100,13 @@ static inline void lease_get_mtime(struct inode *inode, struct timespec *time)
 }
 
 static inline int generic_setlease(struct file *filp, long arg,
-				    struct file_lock **flp)
+				    struct file_lock **flp, void **priv)
 {
 	return -EINVAL;
 }
 
 static inline int vfs_setlease(struct file *filp, long arg,
-			       struct file_lock **lease)
+			       struct file_lock **lease, void **priv)
 {
 	return -EINVAL;
 }
@@ -1494,7 +1494,7 @@ struct file_operations {
 	int (*flock) (struct file *, int, struct file_lock *);
 	ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
 	ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
-	int (*setlease)(struct file *, long, struct file_lock **);
+	int (*setlease)(struct file *, long, struct file_lock **, void **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
 	int (*show_fdinfo)(struct seq_file *m, struct file *f);
@@ -2599,7 +2599,7 @@ extern int simple_write_end(struct file *file, struct address_space *mapping,
 			struct page *page, void *fsdata);
 extern int always_delete_dentry(const struct dentry *);
 extern struct inode *alloc_anon_inode(struct super_block *);
-extern int simple_nosetlease(struct file *, long, struct file_lock **);
+extern int simple_nosetlease(struct file *, long, struct file_lock **, void **);
 extern const struct dentry_operations simple_dentry_operations;
 
 extern struct dentry *simple_lookup(struct inode *, struct dentry *, unsigned int flags);
-- 
cgit v1.2.3


From 1c7dd2ff430fa14b45c9def54468e3a25ab8342b Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 22 Aug 2014 10:55:47 -0400
Subject: locks: define a lm_setup handler for leases

...and move the fasync setup into it for fcntl lease calls. At the same
time, change the semantics of how the file_lock double-pointer is
handled. Up until now, on a successful lease return you got a pointer to
the lock on the list. This is bad, since that pointer can no longer be
relied on as valid once the inode->i_lock has been released.

Change the code to instead just zero out the pointer if the lease we
passed in ended up being used. Then the callers can just check to see
if it's NULL after the call and free it if it isn't.

The priv argument has the same semantics. The lm_setup function can
zero the pointer out to signal to the caller that it should not be
freed after the function returns.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/locks.c          | 92 ++++++++++++++++++++++++++++-------------------------
 fs/nfsd/nfs4state.c |  6 ++--
 include/linux/fs.h  |  1 +
 3 files changed, 51 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index 4fa269b0bdef..a237ba632e8d 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -432,9 +432,27 @@ static void lease_break_callback(struct file_lock *fl)
 	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
 }
 
+static void
+lease_setup(struct file_lock *fl, void **priv)
+{
+	struct file *filp = fl->fl_file;
+	struct fasync_struct *fa = *priv;
+
+	/*
+	 * fasync_insert_entry() returns the old entry if any. If there was no
+	 * old entry, then it used "priv" and inserted it into the fasync list.
+	 * Clear the pointer to indicate that it shouldn't be freed.
+	 */
+	if (!fasync_insert_entry(fa->fa_fd, filp, &fl->fl_fasync, fa))
+		*priv = NULL;
+
+	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
+}
+
 static const struct lock_manager_operations lease_manager_ops = {
 	.lm_break = lease_break_callback,
 	.lm_change = lease_modify,
+	.lm_setup = lease_setup,
 };
 
 /*
@@ -1607,10 +1625,11 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 	}
 
 	if (my_before != NULL) {
+		lease = *my_before;
 		error = lease->fl_lmops->lm_change(my_before, arg);
-		if (!error)
-			*flp = *my_before;
-		goto out;
+		if (error)
+			goto out;
+		goto out_setup;
 	}
 
 	error = -EINVAL;
@@ -1631,9 +1650,15 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 	error = check_conflicting_open(dentry, arg);
 	if (error)
 		goto out_unlink;
+
+out_setup:
+	if (lease->fl_lmops->lm_setup)
+		lease->fl_lmops->lm_setup(lease, priv);
 out:
 	if (is_deleg)
 		mutex_unlock(&inode->i_mutex);
+	if (!error && !my_before)
+		*flp = NULL;
 	return error;
 out_unlink:
 	locks_unlink_lock(before);
@@ -1661,10 +1686,11 @@ static int generic_delete_lease(struct file *filp)
 
 /**
  *	generic_setlease	-	sets a lease on an open file
- *	@filp: file pointer
- *	@arg: type of lease to obtain
- *	@flp: input - file_lock to use, output - file_lock inserted
- *	@priv: private data for lm_setup
+ *	@filp:	file pointer
+ *	@arg:	type of lease to obtain
+ *	@flp:	input - file_lock to use, output - file_lock inserted
+ *	@priv:	private data for lm_setup (may be NULL if lm_setup
+ *		doesn't require it)
  *
  *	The (input) flp->fl_lmops->lm_break function is required
  *	by break_lease().
@@ -1704,29 +1730,23 @@ int generic_setlease(struct file *filp, long arg, struct file_lock **flp,
 }
 EXPORT_SYMBOL(generic_setlease);
 
-static int
-__vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
-{
-	if (filp->f_op->setlease)
-		return filp->f_op->setlease(filp, arg, lease, priv);
-	else
-		return generic_setlease(filp, arg, lease, priv);
-}
-
 /**
  * vfs_setlease        -       sets a lease on an open file
- * @filp: file pointer
- * @arg: type of lease to obtain
- * @lease: file_lock to use when adding a lease
- * @priv: private info for lm_setup when adding a lease
+ * @filp:	file pointer
+ * @arg:	type of lease to obtain
+ * @lease:	file_lock to use when adding a lease
+ * @priv:	private info for lm_setup when adding a lease (may be
+ * 		NULL if lm_setup doesn't require it)
  *
  * Call this to establish a lease on the file. The "lease" argument is not
  * used for F_UNLCK requests and may be NULL. For commands that set or alter
  * an existing lease, the (*lease)->fl_lmops->lm_break operation must be set;
  * if not, this function will return -ENOLCK (and generate a scary-looking
  * stack trace).
+ *
+ * The "priv" pointer is passed directly to the lm_setup function as-is. It
+ * may be NULL if the lm_setup operation doesn't require it.
  */
-
 int
 vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 {
@@ -1734,17 +1754,18 @@ vfs_setlease(struct file *filp, long arg, struct file_lock **lease, void **priv)
 	int error;
 
 	spin_lock(&inode->i_lock);
-	error = __vfs_setlease(filp, arg, lease, priv);
+	if (filp->f_op->setlease)
+		error = filp->f_op->setlease(filp, arg, lease, priv);
+	else
+		error = generic_setlease(filp, arg, lease, priv);
 	spin_unlock(&inode->i_lock);
-
 	return error;
 }
 EXPORT_SYMBOL_GPL(vfs_setlease);
 
 static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 {
-	struct file_lock *fl, *ret;
-	struct inode *inode = file_inode(filp);
+	struct file_lock *fl;
 	struct fasync_struct *new;
 	int error;
 
@@ -1757,26 +1778,9 @@ static int do_fcntl_add_lease(unsigned int fd, struct file *filp, long arg)
 		locks_free_lock(fl);
 		return -ENOMEM;
 	}
-	ret = fl;
-	spin_lock(&inode->i_lock);
-	error = __vfs_setlease(filp, arg, &ret, NULL);
-	if (error)
-		goto out_unlock;
-	if (ret == fl)
-		fl = NULL;
+	new->fa_fd = fd;
 
-	/*
-	 * fasync_insert_entry() returns the old entry if any.
-	 * If there was no old entry, then it used 'new' and
-	 * inserted it into the fasync list. Clear new so that
-	 * we don't release it here.
-	 */
-	if (!fasync_insert_entry(fd, filp, &ret->fl_fasync, new))
-		new = NULL;
-
-	__f_setown(filp, task_pid(current), PIDTYPE_PID, 0);
-out_unlock:
-	spin_unlock(&inode->i_lock);
+	error = vfs_setlease(filp, arg, &fl, (void **)&new);
 	if (fl)
 		locks_free_lock(fl);
 	if (new)
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 7c803db2a027..5349528136e2 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3793,12 +3793,10 @@ static int nfs4_setlease(struct nfs4_delegation *dp)
 	fl->fl_file = filp;
 	ret = fl;
 	status = vfs_setlease(filp, fl->fl_type, &fl, NULL);
-	if (status) {
+	if (fl)
 		locks_free_lock(fl);
+	if (status)
 		goto out_fput;
-	}
-	if (ret != fl)
-		locks_free_lock(fl);
 	spin_lock(&state_lock);
 	spin_lock(&fp->fi_lock);
 	/* Did the lease get broken before we took the lock? */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f1870eb67b02..9a6d56154dd5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -874,6 +874,7 @@ struct lock_manager_operations {
 	int (*lm_grant)(struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
 	int (*lm_change)(struct file_lock **, int);
+	void (*lm_setup)(struct file_lock *, void **);
 };
 
 struct lock_manager {
-- 
cgit v1.2.3


From c45198eda2794bb72601c9f96266d8b95db66dd5 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 1 Sep 2014 07:12:07 -0400
Subject: locks: move freeing of leases outside of i_lock

There was only one place where we still could free a file_lock while
holding the i_lock -- lease_modify. Add a new list_head argument to the
lm_change operation, pass in a private list when calling it, and fix
those callers to dispose of the list once the lock has been dropped.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 Documentation/filesystems/Locking |  3 +--
 fs/locks.c                        | 34 ++++++++++++++++++++++------------
 fs/nfsd/nfs4state.c               |  6 +++---
 include/linux/fs.h                |  7 ++++---
 4 files changed, 30 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 4af288e38f13..94d93b1f8b53 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -469,8 +469,7 @@ prototypes:
 };
 
 locking rules:
-	All may block except for ->setlease.
-	No VFS locks held on entry except for ->setlease.
+	All may block.
 
 ->llseek() locking has moved from llseek to the individual llseek
 implementations.  If your fs is not using generic_file_llseek, you
diff --git a/fs/locks.c b/fs/locks.c
index eb463257f867..c0f789dfa655 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -1292,7 +1292,7 @@ static void lease_clear_pending(struct file_lock *fl, int arg)
 }
 
 /* We already had a lease on this file; just change its type */
-int lease_modify(struct file_lock **before, int arg)
+int lease_modify(struct file_lock **before, int arg, struct list_head *dispose)
 {
 	struct file_lock *fl = *before;
 	int error = assign_type(fl, arg);
@@ -1311,7 +1311,7 @@ int lease_modify(struct file_lock **before, int arg)
 			printk(KERN_ERR "locks_delete_lock: fasync == %p\n", fl->fl_fasync);
 			fl->fl_fasync = NULL;
 		}
-		locks_delete_lock(before, NULL);
+		locks_delete_lock(before, dispose);
 	}
 	return 0;
 }
@@ -1325,7 +1325,7 @@ static bool past_time(unsigned long then)
 	return time_after(jiffies, then);
 }
 
-static void time_out_leases(struct inode *inode)
+static void time_out_leases(struct inode *inode, struct list_head *dispose)
 {
 	struct file_lock **before;
 	struct file_lock *fl;
@@ -1336,9 +1336,9 @@ static void time_out_leases(struct inode *inode)
 	while ((fl = *before) && IS_LEASE(fl) && lease_breaking(fl)) {
 		trace_time_out_leases(inode, fl);
 		if (past_time(fl->fl_downgrade_time))
-			lease_modify(before, F_RDLCK);
+			lease_modify(before, F_RDLCK, dispose);
 		if (past_time(fl->fl_break_time))
-			lease_modify(before, F_UNLCK);
+			lease_modify(before, F_UNLCK, dispose);
 		if (fl == *before)	/* lease_modify may have freed fl */
 			before = &fl->fl_next;
 	}
@@ -1373,6 +1373,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 	int i_have_this_lease = 0;
 	bool lease_conflict = false;
 	int want_write = (mode & O_ACCMODE) != O_RDONLY;
+	LIST_HEAD(dispose);
 
 	new_fl = lease_alloc(NULL, want_write ? F_WRLCK : F_RDLCK);
 	if (IS_ERR(new_fl))
@@ -1381,7 +1382,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 
 	spin_lock(&inode->i_lock);
 
-	time_out_leases(inode);
+	time_out_leases(inode, &dispose);
 
 	flock = inode->i_flock;
 	if ((flock == NULL) || !IS_LEASE(flock))
@@ -1436,6 +1437,7 @@ restart:
 	locks_insert_block(flock, new_fl);
 	trace_break_lease_block(inode, new_fl);
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	error = wait_event_interruptible_timeout(new_fl->fl_wait,
 						!new_fl->fl_next, break_time);
 	spin_lock(&inode->i_lock);
@@ -1443,7 +1445,7 @@ restart:
 	locks_delete_block(new_fl);
 	if (error >= 0) {
 		if (error == 0)
-			time_out_leases(inode);
+			time_out_leases(inode, &dispose);
 		/*
 		 * Wait for the next conflicting lease that has not been
 		 * broken yet
@@ -1458,6 +1460,7 @@ restart:
 
 out:
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	locks_free_lock(new_fl);
 	return error;
 }
@@ -1522,9 +1525,10 @@ int fcntl_getlease(struct file *filp)
 	struct file_lock *fl;
 	struct inode *inode = file_inode(filp);
 	int type = F_UNLCK;
+	LIST_HEAD(dispose);
 
 	spin_lock(&inode->i_lock);
-	time_out_leases(file_inode(filp));
+	time_out_leases(file_inode(filp), &dispose);
 	for (fl = file_inode(filp)->i_flock; fl && IS_LEASE(fl);
 			fl = fl->fl_next) {
 		if (fl->fl_file == filp) {
@@ -1533,6 +1537,7 @@ int fcntl_getlease(struct file *filp)
 		}
 	}
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	return type;
 }
 
@@ -1570,6 +1575,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 	struct inode *inode = dentry->d_inode;
 	bool is_deleg = (*flp)->fl_flags & FL_DELEG;
 	int error;
+	LIST_HEAD(dispose);
 
 	lease = *flp;
 	trace_generic_add_lease(inode, lease);
@@ -1593,7 +1599,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 	}
 
 	spin_lock(&inode->i_lock);
-	time_out_leases(inode);
+	time_out_leases(inode, &dispose);
 	error = check_conflicting_open(dentry, arg);
 	if (error)
 		goto out;
@@ -1630,7 +1636,7 @@ generic_add_lease(struct file *filp, long arg, struct file_lock **flp, void **pr
 
 	if (my_before != NULL) {
 		lease = *my_before;
-		error = lease->fl_lmops->lm_change(my_before, arg);
+		error = lease->fl_lmops->lm_change(my_before, arg, &dispose);
 		if (error)
 			goto out;
 		goto out_setup;
@@ -1660,6 +1666,7 @@ out_setup:
 		lease->fl_lmops->lm_setup(lease, priv);
 out:
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	if (is_deleg)
 		mutex_unlock(&inode->i_mutex);
 	if (!error && !my_before)
@@ -1676,8 +1683,10 @@ static int generic_delete_lease(struct file *filp)
 	struct file_lock *fl, **before;
 	struct dentry *dentry = filp->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
+	LIST_HEAD(dispose);
 
 	spin_lock(&inode->i_lock);
+	time_out_leases(inode, &dispose);
 	for (before = &inode->i_flock;
 			((fl = *before) != NULL) && IS_LEASE(fl);
 			before = &fl->fl_next) {
@@ -1686,8 +1695,9 @@ static int generic_delete_lease(struct file *filp)
 	}
 	trace_generic_delete_lease(inode, fl);
 	if (fl)
-		error = fl->fl_lmops->lm_change(before, F_UNLCK);
+		error = fl->fl_lmops->lm_change(before, F_UNLCK, &dispose);
 	spin_unlock(&inode->i_lock);
+	locks_dispose_list(&dispose);
 	return error;
 }
 
@@ -2372,7 +2382,7 @@ void locks_remove_file(struct file *filp)
 	while ((fl = *before) != NULL) {
 		if (fl->fl_file == filp) {
 			if (IS_LEASE(fl)) {
-				lease_modify(before, F_UNLCK);
+				lease_modify(before, F_UNLCK, &dispose);
 				continue;
 			}
 
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 5349528136e2..604ab6decd28 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3427,11 +3427,11 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
 	spin_unlock(&fp->fi_lock);
 }
 
-static
-int nfsd_change_deleg_cb(struct file_lock **onlist, int arg)
+static int
+nfsd_change_deleg_cb(struct file_lock **onlist, int arg, struct list_head *dispose)
 {
 	if (arg & F_UNLCK)
-		return lease_modify(onlist, arg);
+		return lease_modify(onlist, arg, dispose);
 	else
 		return -EAGAIN;
 }
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9a6d56154dd5..f419f718e447 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -873,7 +873,7 @@ struct lock_manager_operations {
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
 	int (*lm_grant)(struct file_lock *, int);
 	void (*lm_break)(struct file_lock *);
-	int (*lm_change)(struct file_lock **, int);
+	int (*lm_change)(struct file_lock **, int, struct list_head *);
 	void (*lm_setup)(struct file_lock *, void **);
 };
 
@@ -985,7 +985,7 @@ extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int t
 extern void lease_get_mtime(struct inode *, struct timespec *time);
 extern int generic_setlease(struct file *, long, struct file_lock **, void **priv);
 extern int vfs_setlease(struct file *, long, struct file_lock **, void **);
-extern int lease_modify(struct file_lock **, int);
+extern int lease_modify(struct file_lock **, int, struct list_head *);
 #else /* !CONFIG_FILE_LOCKING */
 static inline int fcntl_getlk(struct file *file, unsigned int cmd,
 			      struct flock __user *user)
@@ -1112,7 +1112,8 @@ static inline int vfs_setlease(struct file *filp, long arg,
 	return -EINVAL;
 }
 
-static inline int lease_modify(struct file_lock **before, int arg)
+static inline int lease_modify(struct file_lock **before, int arg,
+			       struct list_head *dispose)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 4d01b7f5e7576858b71cbaa72b541e17a229cb91 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 1 Sep 2014 15:06:54 -0400
Subject: locks: give lm_break a return value

Christoph suggests:

   "Add a return value to lm_break so that the lock manager can tell the
    core code "you can delete this lease right now".  That gets rid of
    the games with the timeout which require all kinds of race avoidance
    code in the users."

Do that here and have the nfsd lease break routine use it when it detects
that there was a race between setting up the lease and it being broken.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 fs/locks.c          | 17 +++++++++++++----
 fs/nfsd/nfs4state.c | 17 +++++++++--------
 include/linux/fs.h  |  2 +-
 3 files changed, 23 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index 7d627ac0ed87..aed4a957d232 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -427,9 +427,11 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl,
 }
 
 /* default lease lock manager operations */
-static void lease_break_callback(struct file_lock *fl)
+static bool
+lease_break_callback(struct file_lock *fl)
 {
 	kill_fasync(&fl->fl_fasync, SIGIO, POLL_MSG);
+	return false;
 }
 
 static void
@@ -1382,7 +1384,7 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 {
 	int error = 0;
 	struct file_lock *new_fl;
-	struct file_lock *fl;
+	struct file_lock *fl, **before;
 	unsigned long break_time;
 	int want_write = (mode & O_ACCMODE) != O_RDONLY;
 	LIST_HEAD(dispose);
@@ -1406,7 +1408,9 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 			break_time++;	/* so that 0 means no break time */
 	}
 
-	for (fl = inode->i_flock; fl && IS_LEASE(fl); fl = fl->fl_next) {
+	for (before = &inode->i_flock;
+			((fl = *before) != NULL) && IS_LEASE(fl);
+			before = &fl->fl_next) {
 		if (!leases_conflict(fl, new_fl))
 			continue;
 		if (want_write) {
@@ -1420,9 +1424,14 @@ int __break_lease(struct inode *inode, unsigned int mode, unsigned int type)
 			fl->fl_flags |= FL_DOWNGRADE_PENDING;
 			fl->fl_downgrade_time = break_time;
 		}
-		fl->fl_lmops->lm_break(fl);
+		if (fl->fl_lmops->lm_break(fl))
+			locks_delete_lock(before, &dispose);
 	}
 
+	fl = inode->i_flock;
+	if (!fl || !IS_LEASE(fl))
+		goto out;
+
 	if (mode & O_NONBLOCK) {
 		trace_break_lease_noblock(inode, new_fl);
 		error = -EWOULDBLOCK;
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index 604ab6decd28..d1b851548b7a 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -3391,18 +3391,20 @@ static void nfsd_break_one_deleg(struct nfs4_delegation *dp)
 }
 
 /* Called from break_lease() with i_lock held. */
-static void nfsd_break_deleg_cb(struct file_lock *fl)
+static bool
+nfsd_break_deleg_cb(struct file_lock *fl)
 {
+	bool ret = false;
 	struct nfs4_file *fp = (struct nfs4_file *)fl->fl_owner;
 	struct nfs4_delegation *dp;
 
 	if (!fp) {
 		WARN(1, "(%p)->fl_owner NULL\n", fl);
-		return;
+		return ret;
 	}
 	if (fp->fi_had_conflict) {
 		WARN(1, "duplicate break on %p\n", fp);
-		return;
+		return ret;
 	}
 	/*
 	 * We don't want the locks code to timeout the lease for us;
@@ -3414,17 +3416,16 @@ static void nfsd_break_deleg_cb(struct file_lock *fl)
 	spin_lock(&fp->fi_lock);
 	fp->fi_had_conflict = true;
 	/*
-	 * If there are no delegations on the list, then we can't count on this
-	 * lease ever being cleaned up. Set the fl_break_time to jiffies so that
-	 * time_out_leases will do it ASAP. The fact that fi_had_conflict is now
-	 * true should keep any new delegations from being hashed.
+	 * If there are no delegations on the list, then return true
+	 * so that the lease code will go ahead and delete it.
 	 */
 	if (list_empty(&fp->fi_delegations))
-		fl->fl_break_time = jiffies;
+		ret = true;
 	else
 		list_for_each_entry(dp, &fp->fi_delegations, dl_perfile)
 			nfsd_break_one_deleg(dp);
 	spin_unlock(&fp->fi_lock);
+	return ret;
 }
 
 static int
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f419f718e447..ed4e1897099c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -872,7 +872,7 @@ struct lock_manager_operations {
 	void (*lm_put_owner)(struct file_lock *);
 	void (*lm_notify)(struct file_lock *);	/* unblock callback */
 	int (*lm_grant)(struct file_lock *, int);
-	void (*lm_break)(struct file_lock *);
+	bool (*lm_break)(struct file_lock *);
 	int (*lm_change)(struct file_lock **, int, struct list_head *);
 	void (*lm_setup)(struct file_lock *, void **);
 };
-- 
cgit v1.2.3


From 7ca76311fe6c397e9f332e5e6c79e3310d5ee98a Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 1 Sep 2014 19:04:48 -0400
Subject: locks: set fl_owner for leases to filp instead of current->files

Like flock locks, leases are owned by the file description. Now that the
i_have_this_lease check in __break_lease is gone, we don't actually use
the fl_owner for leases for anything. So, it's now safe to set this more
appropriately to the same value as the fl_file.

While we're at it, fix up the comments over the fl_owner_t definition
since they're rather out of date.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
---
 fs/locks.c         | 2 +-
 include/linux/fs.h | 8 +-------
 2 files changed, 2 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/fs/locks.c b/fs/locks.c
index aed4a957d232..314135ad820b 100644
--- a/fs/locks.c
+++ b/fs/locks.c
@@ -465,7 +465,7 @@ static int lease_init(struct file *filp, long type, struct file_lock *fl)
 	if (assign_type(fl, type) != 0)
 		return -EINVAL;
 
-	fl->fl_owner = current->files;
+	fl->fl_owner = filp;
 	fl->fl_pid = current->tgid;
 
 	fl->fl_file = filp;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index ed4e1897099c..bb9484ae1eef 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -851,13 +851,7 @@ static inline struct file *get_file(struct file *f)
  */
 #define FILE_LOCK_DEFERRED 1
 
-/*
- * The POSIX file lock owner is determined by
- * the "struct files_struct" in the thread group
- * (or NULL for no owner - BSD locks).
- *
- * Lockd stuffs a "host" pointer into this.
- */
+/* legacy typedef, should eventually be removed */
 typedef void *fl_owner_t;
 
 struct file_lock_operations {
-- 
cgit v1.2.3


From 1b2b32dcdb3df28dd103033c73cac2417fa05845 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 24 Sep 2014 08:38:44 -0400
Subject: locks: fix fcntl_setlease/getlease return when !CONFIG_FILE_LOCKING

Currently they both just return 0. Fix them to return more appropriate
values instead.

For better or worse, most places in the kernel return -EINVAL when
leases aren't available. -ENOLCK would probably have been better, but
let's follow suit here in the case of F_SETLEASE.

In the F_GETLEASE case, just return F_UNLCK since we know that no
lease will have been set.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/fs.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bb9484ae1eef..2023306c620e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1008,12 +1008,12 @@ static inline int fcntl_setlk64(unsigned int fd, struct file *file,
 #endif
 static inline int fcntl_setlease(unsigned int fd, struct file *filp, long arg)
 {
-	return 0;
+	return -EINVAL;
 }
 
 static inline int fcntl_getlease(struct file *filp)
 {
-	return 0;
+	return F_UNLCK;
 }
 
 static inline void locks_init_lock(struct file_lock *fl)
-- 
cgit v1.2.3


From f2fc42b6ac31f4d808da7a9da460dd433a71e976 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Thu, 12 Jun 2014 22:30:34 +0530
Subject: mailbox: rename pl320-ipc specific mailbox.h

The patch 30058677 "ARM / highbank: add support for pl320 IPC"
added a pl320 IPC specific header file as a generic mailbox.h.
This file has been renamed appropriately to allow the
introduction of the generic mailbox API framework.

Acked-by: Mark Langsdorf <mark.langsdorf@calxeda.com>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Suman Anna <s-anna@ti.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Acked-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/mach-highbank/highbank.c  |  2 +-
 drivers/cpufreq/highbank-cpufreq.c |  2 +-
 drivers/mailbox/pl320-ipc.c        |  2 +-
 include/linux/mailbox.h            | 17 -----------------
 include/linux/pl320-ipc.h          | 17 +++++++++++++++++
 5 files changed, 20 insertions(+), 20 deletions(-)
 delete mode 100644 include/linux/mailbox.h
 create mode 100644 include/linux/pl320-ipc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-highbank/highbank.c b/arch/arm/mach-highbank/highbank.c
index 8c35ae4ff176..07a09570175d 100644
--- a/arch/arm/mach-highbank/highbank.c
+++ b/arch/arm/mach-highbank/highbank.c
@@ -20,7 +20,7 @@
 #include <linux/input.h>
 #include <linux/io.h>
 #include <linux/irqchip.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 #include <linux/of.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
diff --git a/drivers/cpufreq/highbank-cpufreq.c b/drivers/cpufreq/highbank-cpufreq.c
index bf8902a0866d..b464f29d8d54 100644
--- a/drivers/cpufreq/highbank-cpufreq.c
+++ b/drivers/cpufreq/highbank-cpufreq.c
@@ -19,7 +19,7 @@
 #include <linux/cpu.h>
 #include <linux/err.h>
 #include <linux/of.h>
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 #include <linux/platform_device.h>
 
 #define HB_CPUFREQ_CHANGE_NOTE	0x80000001
diff --git a/drivers/mailbox/pl320-ipc.c b/drivers/mailbox/pl320-ipc.c
index d873cbae2fbb..f3755e0aa935 100644
--- a/drivers/mailbox/pl320-ipc.c
+++ b/drivers/mailbox/pl320-ipc.c
@@ -26,7 +26,7 @@
 #include <linux/device.h>
 #include <linux/amba/bus.h>
 
-#include <linux/mailbox.h>
+#include <linux/pl320-ipc.h>
 
 #define IPCMxSOURCE(m)		((m) * 0x40)
 #define IPCMxDSET(m)		(((m) * 0x40) + 0x004)
diff --git a/include/linux/mailbox.h b/include/linux/mailbox.h
deleted file mode 100644
index 5161f63ec1c8..000000000000
--- a/include/linux/mailbox.h
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program.  If not, see <http://www.gnu.org/licenses/>.
- */
-
-int pl320_ipc_transmit(u32 *data);
-int pl320_ipc_register_notifier(struct notifier_block *nb);
-int pl320_ipc_unregister_notifier(struct notifier_block *nb);
diff --git a/include/linux/pl320-ipc.h b/include/linux/pl320-ipc.h
new file mode 100644
index 000000000000..5161f63ec1c8
--- /dev/null
+++ b/include/linux/pl320-ipc.h
@@ -0,0 +1,17 @@
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+int pl320_ipc_transmit(u32 *data);
+int pl320_ipc_register_notifier(struct notifier_block *nb);
+int pl320_ipc_unregister_notifier(struct notifier_block *nb);
-- 
cgit v1.2.3


From 2b6d83e2b8b7de82331a6a1dcd64b51020a6031c Mon Sep 17 00:00:00 2001
From: Jassi Brar <jaswinder.singh@linaro.org>
Date: Thu, 12 Jun 2014 22:31:19 +0530
Subject: mailbox: Introduce framework for mailbox

Introduce common framework for client/protocol drivers and
controller drivers of Inter-Processor-Communication (IPC).

Client driver developers should have a look at
 include/linux/mailbox_client.h to understand the part of
the API exposed to client drivers.
Similarly controller driver developers should have a look
at include/linux/mailbox_controller.h

Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 MAINTAINERS                        |   8 +
 drivers/mailbox/Makefile           |   4 +
 drivers/mailbox/mailbox.c          | 465 +++++++++++++++++++++++++++++++++++++
 include/linux/mailbox_client.h     |  46 ++++
 include/linux/mailbox_controller.h | 133 +++++++++++
 5 files changed, 656 insertions(+)
 create mode 100644 drivers/mailbox/mailbox.c
 create mode 100644 include/linux/mailbox_client.h
 create mode 100644 include/linux/mailbox_controller.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index a12edf2624e5..c5f7c854b6d2 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -5740,6 +5740,14 @@ S:	Maintained
 F:	drivers/net/macvlan.c
 F:	include/linux/if_macvlan.h
 
+MAILBOX API
+M:	Jassi Brar <jassisinghbrar@gmail.com>
+L:	linux-kernel@vger.kernel.org
+S:	Maintained
+F:	drivers/mailbox/
+F:	include/linux/mailbox_client.h
+F:	include/linux/mailbox_controller.h
+
 MAN-PAGES: MANUAL PAGES FOR LINUX -- Sections 2, 3, 4, 5, and 7
 M:	Michael Kerrisk <mtk.manpages@gmail.com>
 W:	http://www.kernel.org/doc/man-pages
diff --git a/drivers/mailbox/Makefile b/drivers/mailbox/Makefile
index 6d184dbcaca8..94ed7cefb14d 100644
--- a/drivers/mailbox/Makefile
+++ b/drivers/mailbox/Makefile
@@ -1,3 +1,7 @@
+# Generic MAILBOX API
+
+obj-$(CONFIG_MAILBOX)		+= mailbox.o
+
 obj-$(CONFIG_PL320_MBOX)	+= pl320-ipc.o
 
 obj-$(CONFIG_OMAP2PLUS_MBOX)	+= omap-mailbox.o
diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
new file mode 100644
index 000000000000..afcb430508ec
--- /dev/null
+++ b/drivers/mailbox/mailbox.c
@@ -0,0 +1,465 @@
+/*
+ * Mailbox: Common code for Mailbox controllers and users
+ *
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
+#include <linux/mutex.h>
+#include <linux/delay.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+#include <linux/module.h>
+#include <linux/device.h>
+#include <linux/bitops.h>
+#include <linux/mailbox_client.h>
+#include <linux/mailbox_controller.h>
+
+#define TXDONE_BY_IRQ	BIT(0) /* controller has remote RTR irq */
+#define TXDONE_BY_POLL	BIT(1) /* controller can read status of last TX */
+#define TXDONE_BY_ACK	BIT(2) /* S/W ACK recevied by Client ticks the TX */
+
+static LIST_HEAD(mbox_cons);
+static DEFINE_MUTEX(con_mutex);
+
+static int add_to_rbuf(struct mbox_chan *chan, void *mssg)
+{
+	int idx;
+	unsigned long flags;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	/* See if there is any space left */
+	if (chan->msg_count == MBOX_TX_QUEUE_LEN) {
+		spin_unlock_irqrestore(&chan->lock, flags);
+		return -ENOBUFS;
+	}
+
+	idx = chan->msg_free;
+	chan->msg_data[idx] = mssg;
+	chan->msg_count++;
+
+	if (idx == MBOX_TX_QUEUE_LEN - 1)
+		chan->msg_free = 0;
+	else
+		chan->msg_free++;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	return idx;
+}
+
+static void msg_submit(struct mbox_chan *chan)
+{
+	unsigned count, idx;
+	unsigned long flags;
+	void *data;
+	int err;
+
+	spin_lock_irqsave(&chan->lock, flags);
+
+	if (!chan->msg_count || chan->active_req)
+		goto exit;
+
+	count = chan->msg_count;
+	idx = chan->msg_free;
+	if (idx >= count)
+		idx -= count;
+	else
+		idx += MBOX_TX_QUEUE_LEN - count;
+
+	data = chan->msg_data[idx];
+
+	/* Try to submit a message to the MBOX controller */
+	err = chan->mbox->ops->send_data(chan, data);
+	if (!err) {
+		chan->active_req = data;
+		chan->msg_count--;
+	}
+exit:
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+
+static void tx_tick(struct mbox_chan *chan, int r)
+{
+	unsigned long flags;
+	void *mssg;
+
+	spin_lock_irqsave(&chan->lock, flags);
+	mssg = chan->active_req;
+	chan->active_req = NULL;
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	/* Submit next message */
+	msg_submit(chan);
+
+	/* Notify the client */
+	if (mssg && chan->cl->tx_done)
+		chan->cl->tx_done(chan->cl, mssg, r);
+
+	if (chan->cl->tx_block)
+		complete(&chan->tx_complete);
+}
+
+static void poll_txdone(unsigned long data)
+{
+	struct mbox_controller *mbox = (struct mbox_controller *)data;
+	bool txdone, resched = false;
+	int i;
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		if (chan->active_req && chan->cl) {
+			resched = true;
+			txdone = chan->mbox->ops->last_tx_done(chan);
+			if (txdone)
+				tx_tick(chan, 0);
+		}
+	}
+
+	if (resched)
+		mod_timer(&mbox->poll, jiffies +
+				msecs_to_jiffies(mbox->txpoll_period));
+}
+
+/**
+ * mbox_chan_received_data - A way for controller driver to push data
+ *				received from remote to the upper layer.
+ * @chan: Pointer to the mailbox channel on which RX happened.
+ * @mssg: Client specific message typecasted as void *
+ *
+ * After startup and before shutdown any data received on the chan
+ * is passed on to the API via atomic mbox_chan_received_data().
+ * The controller should ACK the RX only after this call returns.
+ */
+void mbox_chan_received_data(struct mbox_chan *chan, void *mssg)
+{
+	/* No buffering the received data */
+	if (chan->cl->rx_callback)
+		chan->cl->rx_callback(chan->cl, mssg);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_received_data);
+
+/**
+ * mbox_chan_txdone - A way for controller driver to notify the
+ *			framework that the last TX has completed.
+ * @chan: Pointer to the mailbox chan on which TX happened.
+ * @r: Status of last TX - OK or ERROR
+ *
+ * The controller that has IRQ for TX ACK calls this atomic API
+ * to tick the TX state machine. It works only if txdone_irq
+ * is set by the controller.
+ */
+void mbox_chan_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_IRQ))) {
+		dev_err(chan->mbox->dev,
+		       "Controller can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_chan_txdone);
+
+/**
+ * mbox_client_txdone - The way for a client to run the TX state machine.
+ * @chan: Mailbox channel assigned to this client.
+ * @r: Success status of last transmission.
+ *
+ * The client/protocol had received some 'ACK' packet and it notifies
+ * the API that the last packet was sent successfully. This only works
+ * if the controller can't sense TX-Done.
+ */
+void mbox_client_txdone(struct mbox_chan *chan, int r)
+{
+	if (unlikely(!(chan->txdone_method & TXDONE_BY_ACK))) {
+		dev_err(chan->mbox->dev, "Client can't run the TX ticker\n");
+		return;
+	}
+
+	tx_tick(chan, r);
+}
+EXPORT_SYMBOL_GPL(mbox_client_txdone);
+
+/**
+ * mbox_client_peek_data - A way for client driver to pull data
+ *			received from remote by the controller.
+ * @chan: Mailbox channel assigned to this client.
+ *
+ * A poke to controller driver for any received data.
+ * The data is actually passed onto client via the
+ * mbox_chan_received_data()
+ * The call can be made from atomic context, so the controller's
+ * implementation of peek_data() must not sleep.
+ *
+ * Return: True, if controller has, and is going to push after this,
+ *          some data.
+ *         False, if controller doesn't have any data to be read.
+ */
+bool mbox_client_peek_data(struct mbox_chan *chan)
+{
+	if (chan->mbox->ops->peek_data)
+		return chan->mbox->ops->peek_data(chan);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(mbox_client_peek_data);
+
+/**
+ * mbox_send_message -	For client to submit a message to be
+ *				sent to the remote.
+ * @chan: Mailbox channel assigned to this client.
+ * @mssg: Client specific message typecasted.
+ *
+ * For client to submit data to the controller destined for a remote
+ * processor. If the client had set 'tx_block', the call will return
+ * either when the remote receives the data or when 'tx_tout' millisecs
+ * run out.
+ *  In non-blocking mode, the requests are buffered by the API and a
+ * non-negative token is returned for each queued request. If the request
+ * is not queued, a negative token is returned. Upon failure or successful
+ * TX, the API calls 'tx_done' from atomic context, from which the client
+ * could submit yet another request.
+ * The pointer to message should be preserved until it is sent
+ * over the chan, i.e, tx_done() is made.
+ * This function could be called from atomic context as it simply
+ * queues the data and returns a token against the request.
+ *
+ * Return: Non-negative integer for successful submission (non-blocking mode)
+ *	or transmission over chan (blocking mode).
+ *	Negative value denotes failure.
+ */
+int mbox_send_message(struct mbox_chan *chan, void *mssg)
+{
+	int t;
+
+	if (!chan || !chan->cl)
+		return -EINVAL;
+
+	t = add_to_rbuf(chan, mssg);
+	if (t < 0) {
+		dev_err(chan->mbox->dev, "Try increasing MBOX_TX_QUEUE_LEN\n");
+		return t;
+	}
+
+	msg_submit(chan);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL)
+		poll_txdone((unsigned long)chan->mbox);
+
+	if (chan->cl->tx_block && chan->active_req) {
+		unsigned long wait;
+		int ret;
+
+		if (!chan->cl->tx_tout) /* wait forever */
+			wait = msecs_to_jiffies(3600000);
+		else
+			wait = msecs_to_jiffies(chan->cl->tx_tout);
+
+		ret = wait_for_completion_timeout(&chan->tx_complete, wait);
+		if (ret == 0) {
+			t = -EIO;
+			tx_tick(chan, -EIO);
+		}
+	}
+
+	return t;
+}
+EXPORT_SYMBOL_GPL(mbox_send_message);
+
+/**
+ * mbox_request_channel - Request a mailbox channel.
+ * @cl: Identity of the client requesting the channel.
+ * @index: Index of mailbox specifier in 'mboxes' property.
+ *
+ * The Client specifies its requirements and capabilities while asking for
+ * a mailbox channel. It can't be called from atomic context.
+ * The channel is exclusively allocated and can't be used by another
+ * client before the owner calls mbox_free_channel.
+ * After assignment, any packet received on this channel will be
+ * handed over to the client via the 'rx_callback'.
+ * The framework holds reference to the client, so the mbox_client
+ * structure shouldn't be modified until the mbox_free_channel returns.
+ *
+ * Return: Pointer to the channel assigned to the client if successful.
+ *		ERR_PTR for request failure.
+ */
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index)
+{
+	struct device *dev = cl->dev;
+	struct mbox_controller *mbox;
+	struct of_phandle_args spec;
+	struct mbox_chan *chan;
+	unsigned long flags;
+	int ret;
+
+	if (!dev || !dev->of_node) {
+		pr_debug("%s: No owner device node\n", __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
+	mutex_lock(&con_mutex);
+
+	if (of_parse_phandle_with_args(dev->of_node, "mboxes",
+				       "#mbox-cells", index, &spec)) {
+		dev_dbg(dev, "%s: can't parse \"mboxes\" property\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-ENODEV);
+	}
+
+	chan = NULL;
+	list_for_each_entry(mbox, &mbox_cons, node)
+		if (mbox->dev->of_node == spec.np) {
+			chan = mbox->of_xlate(mbox, &spec);
+			break;
+		}
+
+	of_node_put(spec.np);
+
+	if (!chan || chan->cl || !try_module_get(mbox->dev->driver->owner)) {
+		dev_dbg(dev, "%s: mailbox not free\n", __func__);
+		mutex_unlock(&con_mutex);
+		return ERR_PTR(-EBUSY);
+	}
+
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->msg_free = 0;
+	chan->msg_count = 0;
+	chan->active_req = NULL;
+	chan->cl = cl;
+	init_completion(&chan->tx_complete);
+
+	if (chan->txdone_method	== TXDONE_BY_POLL && cl->knows_txdone)
+		chan->txdone_method |= TXDONE_BY_ACK;
+
+	spin_unlock_irqrestore(&chan->lock, flags);
+
+	ret = chan->mbox->ops->startup(chan);
+	if (ret) {
+		dev_err(dev, "Unable to startup the chan (%d)\n", ret);
+		mbox_free_channel(chan);
+		chan = ERR_PTR(ret);
+	}
+
+	mutex_unlock(&con_mutex);
+	return chan;
+}
+EXPORT_SYMBOL_GPL(mbox_request_channel);
+
+/**
+ * mbox_free_channel - The client relinquishes control of a mailbox
+ *			channel by this call.
+ * @chan: The mailbox channel to be freed.
+ */
+void mbox_free_channel(struct mbox_chan *chan)
+{
+	unsigned long flags;
+
+	if (!chan || !chan->cl)
+		return;
+
+	chan->mbox->ops->shutdown(chan);
+
+	/* The queued TX requests are simply aborted, no callbacks are made */
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->cl = NULL;
+	chan->active_req = NULL;
+	if (chan->txdone_method == (TXDONE_BY_POLL | TXDONE_BY_ACK))
+		chan->txdone_method = TXDONE_BY_POLL;
+
+	module_put(chan->mbox->dev->driver->owner);
+	spin_unlock_irqrestore(&chan->lock, flags);
+}
+EXPORT_SYMBOL_GPL(mbox_free_channel);
+
+static struct mbox_chan *
+of_mbox_index_xlate(struct mbox_controller *mbox,
+		    const struct of_phandle_args *sp)
+{
+	int ind = sp->args[0];
+
+	if (ind >= mbox->num_chans)
+		return NULL;
+
+	return &mbox->chans[ind];
+}
+
+/**
+ * mbox_controller_register - Register the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ *
+ * The controller driver registers its communication channels
+ */
+int mbox_controller_register(struct mbox_controller *mbox)
+{
+	int i, txdone;
+
+	/* Sanity check */
+	if (!mbox || !mbox->dev || !mbox->ops || !mbox->num_chans)
+		return -EINVAL;
+
+	if (mbox->txdone_irq)
+		txdone = TXDONE_BY_IRQ;
+	else if (mbox->txdone_poll)
+		txdone = TXDONE_BY_POLL;
+	else /* It has to be ACK then */
+		txdone = TXDONE_BY_ACK;
+
+	if (txdone == TXDONE_BY_POLL) {
+		mbox->poll.function = &poll_txdone;
+		mbox->poll.data = (unsigned long)mbox;
+		init_timer(&mbox->poll);
+	}
+
+	for (i = 0; i < mbox->num_chans; i++) {
+		struct mbox_chan *chan = &mbox->chans[i];
+
+		chan->cl = NULL;
+		chan->mbox = mbox;
+		chan->txdone_method = txdone;
+		spin_lock_init(&chan->lock);
+	}
+
+	if (!mbox->of_xlate)
+		mbox->of_xlate = of_mbox_index_xlate;
+
+	mutex_lock(&con_mutex);
+	list_add_tail(&mbox->node, &mbox_cons);
+	mutex_unlock(&con_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mbox_controller_register);
+
+/**
+ * mbox_controller_unregister - Unregister the mailbox controller
+ * @mbox:	Pointer to the mailbox controller.
+ */
+void mbox_controller_unregister(struct mbox_controller *mbox)
+{
+	int i;
+
+	if (!mbox)
+		return;
+
+	mutex_lock(&con_mutex);
+
+	list_del(&mbox->node);
+
+	for (i = 0; i < mbox->num_chans; i++)
+		mbox_free_channel(&mbox->chans[i]);
+
+	if (mbox->txdone_poll)
+		del_timer_sync(&mbox->poll);
+
+	mutex_unlock(&con_mutex);
+}
+EXPORT_SYMBOL_GPL(mbox_controller_unregister);
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
new file mode 100644
index 000000000000..307d9cab2026
--- /dev/null
+++ b/include/linux/mailbox_client.h
@@ -0,0 +1,46 @@
+/*
+ * Copyright (C) 2013-2014 Linaro Ltd.
+ * Author: Jassi Brar <jassisinghbrar@gmail.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CLIENT_H
+#define __MAILBOX_CLIENT_H
+
+#include <linux/of.h>
+#include <linux/device.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_client - User of a mailbox
+ * @dev:		The client device
+ * @tx_block:		If the mbox_send_message should block until data is
+ *			transmitted.
+ * @tx_tout:		Max block period in ms before TX is assumed failure
+ * @knows_txdone:	If the client could run the TX state machine. Usually
+ *			if the client receives some ACK packet for transmission.
+ *			Unused if the controller already has TX_Done/RTR IRQ.
+ * @rx_callback:	Atomic callback to provide client the data received
+ * @tx_done:		Atomic callback to tell client of data transmission
+ */
+struct mbox_client {
+	struct device *dev;
+	bool tx_block;
+	unsigned long tx_tout;
+	bool knows_txdone;
+
+	void (*rx_callback)(struct mbox_client *cl, void *mssg);
+	void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
+};
+
+struct mbox_chan *mbox_request_channel(struct mbox_client *cl, int index);
+int mbox_send_message(struct mbox_chan *chan, void *mssg);
+void mbox_client_txdone(struct mbox_chan *chan, int r); /* atomic */
+bool mbox_client_peek_data(struct mbox_chan *chan); /* atomic */
+void mbox_free_channel(struct mbox_chan *chan); /* may sleep */
+
+#endif /* __MAILBOX_CLIENT_H */
diff --git a/include/linux/mailbox_controller.h b/include/linux/mailbox_controller.h
new file mode 100644
index 000000000000..d4cf96f07cfc
--- /dev/null
+++ b/include/linux/mailbox_controller.h
@@ -0,0 +1,133 @@
+/*
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __MAILBOX_CONTROLLER_H
+#define __MAILBOX_CONTROLLER_H
+
+#include <linux/of.h>
+#include <linux/types.h>
+#include <linux/timer.h>
+#include <linux/device.h>
+#include <linux/completion.h>
+
+struct mbox_chan;
+
+/**
+ * struct mbox_chan_ops - methods to control mailbox channels
+ * @send_data:	The API asks the MBOX controller driver, in atomic
+ *		context try to transmit a message on the bus. Returns 0 if
+ *		data is accepted for transmission, -EBUSY while rejecting
+ *		if the remote hasn't yet read the last data sent. Actual
+ *		transmission of data is reported by the controller via
+ *		mbox_chan_txdone (if it has some TX ACK irq). It must not
+ *		sleep.
+ * @startup:	Called when a client requests the chan. The controller
+ *		could ask clients for additional parameters of communication
+ *		to be provided via client's chan_data. This call may
+ *		block. After this call the Controller must forward any
+ *		data received on the chan by calling mbox_chan_received_data.
+ *		The controller may do stuff that need to sleep.
+ * @shutdown:	Called when a client relinquishes control of a chan.
+ *		This call may block too. The controller must not forward
+ *		any received data anymore.
+ *		The controller may do stuff that need to sleep.
+ * @last_tx_done: If the controller sets 'txdone_poll', the API calls
+ *		  this to poll status of last TX. The controller must
+ *		  give priority to IRQ method over polling and never
+ *		  set both txdone_poll and txdone_irq. Only in polling
+ *		  mode 'send_data' is expected to return -EBUSY.
+ *		  The controller may do stuff that need to sleep/block.
+ *		  Used only if txdone_poll:=true && txdone_irq:=false
+ * @peek_data: Atomic check for any received data. Return true if controller
+ *		  has some data to push to the client. False otherwise.
+ */
+struct mbox_chan_ops {
+	int (*send_data)(struct mbox_chan *chan, void *data);
+	int (*startup)(struct mbox_chan *chan);
+	void (*shutdown)(struct mbox_chan *chan);
+	bool (*last_tx_done)(struct mbox_chan *chan);
+	bool (*peek_data)(struct mbox_chan *chan);
+};
+
+/**
+ * struct mbox_controller - Controller of a class of communication channels
+ * @dev:		Device backing this controller
+ * @ops:		Operators that work on each communication chan
+ * @chans:		Array of channels
+ * @num_chans:		Number of channels in the 'chans' array.
+ * @txdone_irq:		Indicates if the controller can report to API when
+ *			the last transmitted data was read by the remote.
+ *			Eg, if it has some TX ACK irq.
+ * @txdone_poll:	If the controller can read but not report the TX
+ *			done. Ex, some register shows the TX status but
+ *			no interrupt rises. Ignored if 'txdone_irq' is set.
+ * @txpoll_period:	If 'txdone_poll' is in effect, the API polls for
+ *			last TX's status after these many millisecs
+ * @of_xlate:		Controller driver specific mapping of channel via DT
+ * @poll:		API private. Used to poll for TXDONE on all channels.
+ * @node:		API private. To hook into list of controllers.
+ */
+struct mbox_controller {
+	struct device *dev;
+	struct mbox_chan_ops *ops;
+	struct mbox_chan *chans;
+	int num_chans;
+	bool txdone_irq;
+	bool txdone_poll;
+	unsigned txpoll_period;
+	struct mbox_chan *(*of_xlate)(struct mbox_controller *mbox,
+				      const struct of_phandle_args *sp);
+	/* Internal to API */
+	struct timer_list poll;
+	struct list_head node;
+};
+
+/*
+ * The length of circular buffer for queuing messages from a client.
+ * 'msg_count' tracks the number of buffered messages while 'msg_free'
+ * is the index where the next message would be buffered.
+ * We shouldn't need it too big because every transfer is interrupt
+ * triggered and if we have lots of data to transfer, the interrupt
+ * latencies are going to be the bottleneck, not the buffer length.
+ * Besides, mbox_send_message could be called from atomic context and
+ * the client could also queue another message from the notifier 'tx_done'
+ * of the last transfer done.
+ * REVISIT: If too many platforms see the "Try increasing MBOX_TX_QUEUE_LEN"
+ * print, it needs to be taken from config option or somesuch.
+ */
+#define MBOX_TX_QUEUE_LEN	20
+
+/**
+ * struct mbox_chan - s/w representation of a communication chan
+ * @mbox:		Pointer to the parent/provider of this channel
+ * @txdone_method:	Way to detect TXDone chosen by the API
+ * @cl:			Pointer to the current owner of this channel
+ * @tx_complete:	Transmission completion
+ * @active_req:		Currently active request hook
+ * @msg_count:		No. of mssg currently queued
+ * @msg_free:		Index of next available mssg slot
+ * @msg_data:		Hook for data packet
+ * @lock:		Serialise access to the channel
+ * @con_priv:		Hook for controller driver to attach private data
+ */
+struct mbox_chan {
+	struct mbox_controller *mbox;
+	unsigned txdone_method;
+	struct mbox_client *cl;
+	struct completion tx_complete;
+	void *active_req;
+	unsigned msg_count, msg_free;
+	void *msg_data[MBOX_TX_QUEUE_LEN];
+	spinlock_t lock; /* Serialise access to the channel */
+	void *con_priv;
+};
+
+int mbox_controller_register(struct mbox_controller *mbox); /* can sleep */
+void mbox_controller_unregister(struct mbox_controller *mbox); /* can sleep */
+void mbox_chan_received_data(struct mbox_chan *chan, void *data); /* atomic */
+void mbox_chan_txdone(struct mbox_chan *chan, int r); /* atomic */
+
+#endif /* __MAILBOX_CONTROLLER_H */
-- 
cgit v1.2.3


From 583d4a6885cfa75a3d189f6bb69b5c545e961c75 Mon Sep 17 00:00:00 2001
From: LEROY Christophe <christophe.leroy@c-s.fr>
Date: Tue, 7 Oct 2014 15:04:57 +0200
Subject: net: fs_enet: Remove non NAPI RX

In the probe function, use_napi is inconditionnaly set to 1. This patch removes
all the code which is conditional to !use_napi, and removes use_napi which has
then become useless.

Signed-off-by: Christophe Leroy <christophe.leroy@c-s.fr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/fs_enet/fs_enet-main.c  | 164 ++-------------------
 include/linux/fs_enet_pd.h                         |   1 -
 2 files changed, 15 insertions(+), 150 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
index 748fd24d3d9e..71a25b4e2d5a 100644
--- a/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
+++ b/drivers/net/ethernet/freescale/fs_enet/fs_enet-main.c
@@ -215,128 +215,6 @@ static int fs_enet_rx_napi(struct napi_struct *napi, int budget)
 	return received;
 }
 
-/* non NAPI receive function */
-static int fs_enet_rx_non_napi(struct net_device *dev)
-{
-	struct fs_enet_private *fep = netdev_priv(dev);
-	const struct fs_platform_info *fpi = fep->fpi;
-	cbd_t __iomem *bdp;
-	struct sk_buff *skb, *skbn, *skbt;
-	int received = 0;
-	u16 pkt_len, sc;
-	int curidx;
-	/*
-	 * First, grab all of the stats for the incoming packet.
-	 * These get messed up if we get called due to a busy condition.
-	 */
-	bdp = fep->cur_rx;
-
-	while (((sc = CBDR_SC(bdp)) & BD_ENET_RX_EMPTY) == 0) {
-
-		curidx = bdp - fep->rx_bd_base;
-
-		/*
-		 * Since we have allocated space to hold a complete frame,
-		 * the last indicator should be set.
-		 */
-		if ((sc & BD_ENET_RX_LAST) == 0)
-			dev_warn(fep->dev, "rcv is not +last\n");
-
-		/*
-		 * Check for errors.
-		 */
-		if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH | BD_ENET_RX_CL |
-			  BD_ENET_RX_NO | BD_ENET_RX_CR | BD_ENET_RX_OV)) {
-			fep->stats.rx_errors++;
-			/* Frame too long or too short. */
-			if (sc & (BD_ENET_RX_LG | BD_ENET_RX_SH))
-				fep->stats.rx_length_errors++;
-			/* Frame alignment */
-			if (sc & (BD_ENET_RX_NO | BD_ENET_RX_CL))
-				fep->stats.rx_frame_errors++;
-			/* CRC Error */
-			if (sc & BD_ENET_RX_CR)
-				fep->stats.rx_crc_errors++;
-			/* FIFO overrun */
-			if (sc & BD_ENET_RX_OV)
-				fep->stats.rx_crc_errors++;
-
-			skb = fep->rx_skbuff[curidx];
-
-			dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-				DMA_FROM_DEVICE);
-
-			skbn = skb;
-
-		} else {
-
-			skb = fep->rx_skbuff[curidx];
-
-			dma_unmap_single(fep->dev, CBDR_BUFADDR(bdp),
-				L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-				DMA_FROM_DEVICE);
-
-			/*
-			 * Process the incoming frame.
-			 */
-			fep->stats.rx_packets++;
-			pkt_len = CBDR_DATLEN(bdp) - 4;	/* remove CRC */
-			fep->stats.rx_bytes += pkt_len + 4;
-
-			if (pkt_len <= fpi->rx_copybreak) {
-				/* +2 to make IP header L1 cache aligned */
-				skbn = netdev_alloc_skb(dev, pkt_len + 2);
-				if (skbn != NULL) {
-					skb_reserve(skbn, 2);	/* align IP header */
-					skb_copy_from_linear_data(skb,
-						      skbn->data, pkt_len);
-					/* swap */
-					skbt = skb;
-					skb = skbn;
-					skbn = skbt;
-				}
-			} else {
-				skbn = netdev_alloc_skb(dev, ENET_RX_FRSIZE);
-
-				if (skbn)
-					skb_align(skbn, ENET_RX_ALIGN);
-			}
-
-			if (skbn != NULL) {
-				skb_put(skb, pkt_len);	/* Make room */
-				skb->protocol = eth_type_trans(skb, dev);
-				received++;
-				netif_rx(skb);
-			} else {
-				fep->stats.rx_dropped++;
-				skbn = skb;
-			}
-		}
-
-		fep->rx_skbuff[curidx] = skbn;
-		CBDW_BUFADDR(bdp, dma_map_single(fep->dev, skbn->data,
-			     L1_CACHE_ALIGN(PKT_MAXBUF_SIZE),
-			     DMA_FROM_DEVICE));
-		CBDW_DATLEN(bdp, 0);
-		CBDW_SC(bdp, (sc & ~BD_ENET_RX_STATS) | BD_ENET_RX_EMPTY);
-
-		/*
-		 * Update BD pointer to next entry.
-		 */
-		if ((sc & BD_ENET_RX_WRAP) == 0)
-			bdp++;
-		else
-			bdp = fep->rx_bd_base;
-
-		(*fep->ops->rx_bd_done)(dev);
-	}
-
-	fep->cur_rx = bdp;
-
-	return 0;
-}
-
 static void fs_enet_tx(struct net_device *dev)
 {
 	struct fs_enet_private *fep = netdev_priv(dev);
@@ -453,8 +331,7 @@ fs_enet_interrupt(int irq, void *dev_id)
 		nr++;
 
 		int_clr_events = int_events;
-		if (fpi->use_napi)
-			int_clr_events &= ~fep->ev_napi_rx;
+		int_clr_events &= ~fep->ev_napi_rx;
 
 		(*fep->ops->clear_int_events)(dev, int_clr_events);
 
@@ -462,19 +339,15 @@ fs_enet_interrupt(int irq, void *dev_id)
 			(*fep->ops->ev_error)(dev, int_events);
 
 		if (int_events & fep->ev_rx) {
-			if (!fpi->use_napi)
-				fs_enet_rx_non_napi(dev);
-			else {
-				napi_ok = napi_schedule_prep(&fep->napi);
-
-				(*fep->ops->napi_disable_rx)(dev);
-				(*fep->ops->clear_int_events)(dev, fep->ev_napi_rx);
-
-				/* NOTE: it is possible for FCCs in NAPI mode    */
-				/* to submit a spurious interrupt while in poll  */
-				if (napi_ok)
-					__napi_schedule(&fep->napi);
-			}
+			napi_ok = napi_schedule_prep(&fep->napi);
+
+			(*fep->ops->napi_disable_rx)(dev);
+			(*fep->ops->clear_int_events)(dev, fep->ev_napi_rx);
+
+			/* NOTE: it is possible for FCCs in NAPI mode    */
+			/* to submit a spurious interrupt while in poll  */
+			if (napi_ok)
+				__napi_schedule(&fep->napi);
 		}
 
 		if (int_events & fep->ev_tx)
@@ -811,24 +684,21 @@ static int fs_enet_open(struct net_device *dev)
 	/* not doing this, will cause a crash in fs_enet_rx_napi */
 	fs_init_bds(fep->ndev);
 
-	if (fep->fpi->use_napi)
-		napi_enable(&fep->napi);
+	napi_enable(&fep->napi);
 
 	/* Install our interrupt handler. */
 	r = request_irq(fep->interrupt, fs_enet_interrupt, IRQF_SHARED,
 			"fs_enet-mac", dev);
 	if (r != 0) {
 		dev_err(fep->dev, "Could not allocate FS_ENET IRQ!");
-		if (fep->fpi->use_napi)
-			napi_disable(&fep->napi);
+		napi_disable(&fep->napi);
 		return -EINVAL;
 	}
 
 	err = fs_init_phy(dev);
 	if (err) {
 		free_irq(fep->interrupt, dev);
-		if (fep->fpi->use_napi)
-			napi_disable(&fep->napi);
+		napi_disable(&fep->napi);
 		return err;
 	}
 	phy_start(fep->phydev);
@@ -845,8 +715,7 @@ static int fs_enet_close(struct net_device *dev)
 
 	netif_stop_queue(dev);
 	netif_carrier_off(dev);
-	if (fep->fpi->use_napi)
-		napi_disable(&fep->napi);
+	napi_disable(&fep->napi);
 	phy_stop(fep->phydev);
 
 	spin_lock_irqsave(&fep->lock, flags);
@@ -1022,7 +891,6 @@ static int fs_enet_probe(struct platform_device *ofdev)
 	fpi->rx_ring = 32;
 	fpi->tx_ring = 32;
 	fpi->rx_copybreak = 240;
-	fpi->use_napi = 1;
 	fpi->napi_weight = 17;
 	fpi->phy_node = of_parse_phandle(ofdev->dev.of_node, "phy-handle", 0);
 	if (!fpi->phy_node && of_phy_is_fixed_link(ofdev->dev.of_node)) {
@@ -1102,9 +970,7 @@ static int fs_enet_probe(struct platform_device *ofdev)
 
 	ndev->netdev_ops = &fs_enet_netdev_ops;
 	ndev->watchdog_timeo = 2 * HZ;
-	if (fpi->use_napi)
-		netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi,
-		               fpi->napi_weight);
+	netif_napi_add(ndev, &fep->napi, fs_enet_rx_napi, fpi->napi_weight);
 
 	ndev->ethtool_ops = &fs_ethtool_ops;
 
diff --git a/include/linux/fs_enet_pd.h b/include/linux/fs_enet_pd.h
index efb05961bdd8..77d783f71527 100644
--- a/include/linux/fs_enet_pd.h
+++ b/include/linux/fs_enet_pd.h
@@ -139,7 +139,6 @@ struct fs_platform_info {
 	int rx_ring, tx_ring;	/* number of buffers on rx     */
 	__u8 macaddr[ETH_ALEN];	/* mac address                 */
 	int rx_copybreak;	/* limit we copy small frames  */
-	int use_napi;		/* use NAPI                    */
 	int napi_weight;	/* NAPI weight                 */
 
 	int use_rmii;		/* use RMII mode 	       */
-- 
cgit v1.2.3


From 68939df1d7d8da9088d51000bd334f4c59ea0cb3 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Thu, 14 Aug 2014 14:45:40 -0700
Subject: Move Intel SNB device ids from sb_edac to pci_ids.h

The i2c_imc driver will use two of them, and moving only part of
the list seems messier.

Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Aristeu Rozanski <aris@redhat.com>
Signed-off-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
---
 drivers/edac/sb_edac.c  | 30 ------------------------------
 include/linux/pci_ids.h | 15 +++++++++++++++
 2 files changed, 15 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c
index 07efed452a15..449622935816 100644
--- a/drivers/edac/sb_edac.c
+++ b/drivers/edac/sb_edac.c
@@ -52,36 +52,6 @@ static int probed;
 #define GET_BITFIELD(v, lo, hi)	\
 	(((v) & GENMASK_ULL(hi, lo)) >> (lo))
 
-/*
- * sbridge Memory Controller Registers
- */
-
-/*
- * FIXME: For now, let's order by device function, as it makes
- * easier for driver's development process. This table should be
- * moved to pci_id.h when submitted upstream
- */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0	0x3cf4	/* 12.6 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1	0x3cf6	/* 12.7 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR		0x3cf5	/* 13.6 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0	0x3ca0	/* 14.0 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA	0x3ca8	/* 15.0 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS	0x3c71	/* 15.1 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0	0x3caa	/* 15.2 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1	0x3cab	/* 15.3 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2	0x3cac	/* 15.4 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3	0x3cad	/* 15.5 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO	0x3cb8	/* 17.0 */
-
-	/*
-	 * Currently, unused, but will be needed in the future
-	 * implementations, as they hold the error counters
-	 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0	0x3c72	/* 16.2 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1	0x3c73	/* 16.3 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2	0x3c76	/* 16.6 */
-#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3	0x3c77	/* 16.7 */
-
 /* Devices 12 Function 6, Offsets 0x80 to 0xcc */
 static const u32 sbridge_dram_rule[] = {
 	0x80, 0x88, 0x90, 0x98, 0xa0,
diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 6ed0bb73a864..58aa8d78353f 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -2818,7 +2818,22 @@
 #define PCI_DEVICE_ID_INTEL_UNC_R2PCIE	0x3c43
 #define PCI_DEVICE_ID_INTEL_UNC_R3QPI0	0x3c44
 #define PCI_DEVICE_ID_INTEL_UNC_R3QPI1	0x3c45
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_RAS	0x3c71	/* 15.1 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR0	0x3c72	/* 16.2 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR1	0x3c73	/* 16.3 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR2	0x3c76	/* 16.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_ERR3	0x3c77	/* 16.7 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_HA0	0x3ca0	/* 14.0 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TA	0x3ca8	/* 15.0 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD0	0x3caa	/* 15.2 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD1	0x3cab	/* 15.3 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD2	0x3cac	/* 15.4 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_TAD3	0x3cad	/* 15.5 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_IMC_DDRIO	0x3cb8	/* 17.0 */
 #define PCI_DEVICE_ID_INTEL_JAKETOWN_UBOX	0x3ce0
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD0	0x3cf4	/* 12.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_BR		0x3cf5	/* 13.6 */
+#define PCI_DEVICE_ID_INTEL_SBRIDGE_SAD1	0x3cf6	/* 12.7 */
 #define PCI_DEVICE_ID_INTEL_IOAT_SNB	0x402f
 #define PCI_DEVICE_ID_INTEL_5100_16	0x65f0
 #define PCI_DEVICE_ID_INTEL_5100_19	0x65f3
-- 
cgit v1.2.3


From 709c48b39ecf11a81f3820c13a828c330fd832b9 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Wed, 8 Oct 2014 23:53:39 +0900
Subject: net: description of dma_cookie cause make xmldocs warning

In commit 7bced397510ab569d31de4c70b39e13355046387,
dma_cookie was removed from struct skbuff.
But the description of dma_cookie still exist.
So the "make xmldocs" output following warning.

Warning(.//include/linux/skbuff.h:609): Excess struct/union
/enum/typedef member 'dma_cookie' description in 'sk_buff'

Remove description of dma_cookie fix the symptom.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3a5ec7638627..776104ba02ce 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -483,8 +483,6 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@wifi_acked_valid: wifi_acked was set
  *	@wifi_acked: whether frame was acked on wifi or not
  *	@no_fcs:  Request NIC to treat last 4 bytes as Ethernet FCS
- *	@dma_cookie: a cookie to one of several possible DMA operations
- *		done by skb DMA functions
   *	@napi_id: id of the NAPI struct this skb came from
  *	@secmark: security marking
  *	@mark: Generic packet mark
-- 
cgit v1.2.3


From 535114539bb2c081b6680cb5a34be17e7b45df37 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 8 Oct 2014 08:19:27 -0700
Subject: net: add netdev_txq_bql_{enqueue, complete}_prefetchw() helpers

Add two helpers so that drivers do not have to care of BQL being
available or not.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Reported-by: Jim Davis <jim.epost@gmail.com>
Fixes: 29d40c903247 ("net/mlx4_en: Use prefetch in tx path")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_tx.c |  5 +++--
 include/linux/netdevice.h                  | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 8726a4aee5a7..34c137878545 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -392,7 +392,8 @@ static bool mlx4_en_process_tx_cq(struct net_device *dev,
 	if (!priv->port_up)
 		return true;
 
-	prefetchw(&ring->tx_queue->dql.limit);
+	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
+
 	index = cons_index & size_mask;
 	cqe = mlx4_en_get_cqe(buf, index, priv->cqe_size) + factor;
 	last_nr_txbb = ACCESS_ONCE(ring->last_nr_txbb);
@@ -737,7 +738,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev)
 		vlan_tag = vlan_tx_tag_get(skb);
 
 
-	prefetchw(&ring->tx_queue->dql);
+	netdev_txq_bql_enqueue_prefetchw(ring->tx_queue);
 
 	/* Track current inflight packets for performance analysis */
 	AVG_PERF_COUNTER(priv->pstats.inflight_avg,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3a4315b39d20..838407aea705 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -30,6 +30,7 @@
 #include <linux/bug.h>
 #include <linux/delay.h>
 #include <linux/atomic.h>
+#include <linux/prefetch.h>
 #include <asm/cache.h>
 #include <asm/byteorder.h>
 
@@ -2480,6 +2481,34 @@ netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue)
 	return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN;
 }
 
+/**
+ *	netdev_txq_bql_enqueue_prefetchw - prefetch bql data for write
+ *	@dev_queue: pointer to transmit queue
+ *
+ * BQL enabled drivers might use this helper in their ndo_start_xmit(),
+ * to give appropriate hint to the cpu.
+ */
+static inline void netdev_txq_bql_enqueue_prefetchw(struct netdev_queue *dev_queue)
+{
+#ifdef CONFIG_BQL
+	prefetchw(&dev_queue->dql.num_queued);
+#endif
+}
+
+/**
+ *	netdev_txq_bql_complete_prefetchw - prefetch bql data for write
+ *	@dev_queue: pointer to transmit queue
+ *
+ * BQL enabled drivers might use this helper in their TX completion path,
+ * to give appropriate hint to the cpu.
+ */
+static inline void netdev_txq_bql_complete_prefetchw(struct netdev_queue *dev_queue)
+{
+#ifdef CONFIG_BQL
+	prefetchw(&dev_queue->dql.limit);
+#endif
+}
+
 static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue,
 					unsigned int bytes)
 {
-- 
cgit v1.2.3


From 1ffe46d11cc88479797b262f60d92e5fb461b411 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 09:39:37 -0800
Subject: vfs: Merge check_submounts_and_drop and d_invalidate

Now that d_invalidate is the only caller of check_submounts_and_drop,
expand check_submounts_and_drop inline in d_invalidate.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 55 ++++++++++++++++++++------------------------------
 include/linux/dcache.h |  1 -
 2 files changed, 22 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 484114a4db93..5e02b9eee6b1 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -645,32 +645,6 @@ kill_it:
 }
 EXPORT_SYMBOL(dput);
 
-/**
- * d_invalidate - invalidate a dentry
- * @dentry: dentry to invalidate
- *
- * Try to invalidate the dentry if it turns out to be
- * possible. If there are reasons not to delete it
- * return -EBUSY. On success return 0.
- *
- * no dcache lock.
- */
- 
-int d_invalidate(struct dentry * dentry)
-{
-	/*
-	 * If it's already been dropped, return OK.
-	 */
-	spin_lock(&dentry->d_lock);
-	if (d_unhashed(dentry)) {
-		spin_unlock(&dentry->d_lock);
-		return 0;
-	}
-	spin_unlock(&dentry->d_lock);
-
-	return check_submounts_and_drop(dentry);
-}
-EXPORT_SYMBOL(d_invalidate);
 
 /* This must be called with d_lock held */
 static inline void __dget_dlock(struct dentry *dentry)
@@ -1190,7 +1164,7 @@ EXPORT_SYMBOL(have_submounts);
  * reachable (e.g. NFS can unhash a directory dentry and then the complete
  * subtree can become unreachable).
  *
- * Only one of check_submounts_and_drop() and d_set_mounted() must succeed.  For
+ * Only one of d_invalidate() and d_set_mounted() must succeed.  For
  * this reason take rename_lock and d_lock on dentry and ancestors.
  */
 int d_set_mounted(struct dentry *dentry)
@@ -1199,7 +1173,7 @@ int d_set_mounted(struct dentry *dentry)
 	int ret = -ENOENT;
 	write_seqlock(&rename_lock);
 	for (p = dentry->d_parent; !IS_ROOT(p); p = p->d_parent) {
-		/* Need exclusion wrt. check_submounts_and_drop() */
+		/* Need exclusion wrt. d_invalidate() */
 		spin_lock(&p->d_lock);
 		if (unlikely(d_unhashed(p))) {
 			spin_unlock(&p->d_lock);
@@ -1369,18 +1343,33 @@ static void check_and_drop(void *_data)
 }
 
 /**
- * check_submounts_and_drop - detach submounts, prune dcache, and drop
+ * d_invalidate - detach submounts, prune dcache, and drop
+ * @dentry: dentry to invalidate (aka detach, prune and drop)
+ *
+ * Try to invalidate the dentry if it turns out to be
+ * possible. If there are reasons not to delete it
+ * return -EBUSY. On success return 0.
+ *
+ * no dcache lock.
  *
  * The final d_drop is done as an atomic operation relative to
  * rename_lock ensuring there are no races with d_set_mounted.  This
  * ensures there are no unhashed dentries on the path to a mountpoint.
- *
- * @dentry: dentry to detach, prune and drop
  */
-int check_submounts_and_drop(struct dentry *dentry)
+int d_invalidate(struct dentry *dentry)
 {
 	int ret = 0;
 
+	/*
+	 * If it's already been dropped, return OK.
+	 */
+	spin_lock(&dentry->d_lock);
+	if (d_unhashed(dentry)) {
+		spin_unlock(&dentry->d_lock);
+		return 0;
+	}
+	spin_unlock(&dentry->d_lock);
+
 	/* Negative dentries can be dropped without further checks */
 	if (!dentry->d_inode) {
 		d_drop(dentry);
@@ -1414,7 +1403,7 @@ int check_submounts_and_drop(struct dentry *dentry)
 out:
 	return ret;
 }
-EXPORT_SYMBOL(check_submounts_and_drop);
+EXPORT_SYMBOL(d_invalidate);
 
 /**
  * __d_alloc	-	allocate a dcache entry
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 75a227cc7ce2..595af29ad145 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -269,7 +269,6 @@ extern void d_prune_aliases(struct inode *);
 
 /* test whether we have any submounts in a subdir tree */
 extern int have_submounts(struct dentry *);
-extern int check_submounts_and_drop(struct dentry *);
 
 /*
  * This adds the entry to the hash queues.
-- 
cgit v1.2.3


From 5542aa2fa7f6cddb03c4ac3135e390adffda98ca Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Thu, 13 Feb 2014 09:46:25 -0800
Subject: vfs: Make d_invalidate return void

Now that d_invalidate can no longer fail, stop returning a useless
return code.  For the few callers that checked the return code update
remove the handling of d_invalidate failure.

Reviewed-by: Miklos Szeredi <miklos@szeredi.hu>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/btrfs/ioctl.c       |  5 +----
 fs/cifs/readdir.c      |  6 +-----
 fs/dcache.c            | 15 +++------------
 fs/fuse/dir.c          |  4 +---
 fs/namei.c             | 10 +++++-----
 fs/nfs/dir.c           |  3 +--
 include/linux/dcache.h |  2 +-
 7 files changed, 13 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8a8e29878c34..996eb192fa77 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -2423,9 +2423,7 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
 		goto out_dput;
 	}
 
-	err = d_invalidate(dentry);
-	if (err)
-		goto out_unlock;
+	d_invalidate(dentry);
 
 	down_write(&root->fs_info->subvol_sem);
 
@@ -2510,7 +2508,6 @@ out_release:
 	btrfs_subvolume_release_metadata(root, &block_rsv, qgroup_reserved);
 out_up_write:
 	up_write(&root->fs_info->subvol_sem);
-out_unlock:
 	if (err) {
 		spin_lock(&dest->root_item_lock);
 		root_flags = btrfs_root_flags(&dest->root_item);
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index b334a89d6a66..d2141f101382 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -87,8 +87,6 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 		return;
 
 	if (dentry) {
-		int err;
-
 		inode = dentry->d_inode;
 		if (inode) {
 			/*
@@ -105,10 +103,8 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 				goto out;
 			}
 		}
-		err = d_invalidate(dentry);
+		d_invalidate(dentry);
 		dput(dentry);
-		if (err)
-			return;
 	}
 
 	/*
diff --git a/fs/dcache.c b/fs/dcache.c
index 5e02b9eee6b1..70d102e70271 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1346,34 +1346,28 @@ static void check_and_drop(void *_data)
  * d_invalidate - detach submounts, prune dcache, and drop
  * @dentry: dentry to invalidate (aka detach, prune and drop)
  *
- * Try to invalidate the dentry if it turns out to be
- * possible. If there are reasons not to delete it
- * return -EBUSY. On success return 0.
- *
  * no dcache lock.
  *
  * The final d_drop is done as an atomic operation relative to
  * rename_lock ensuring there are no races with d_set_mounted.  This
  * ensures there are no unhashed dentries on the path to a mountpoint.
  */
-int d_invalidate(struct dentry *dentry)
+void d_invalidate(struct dentry *dentry)
 {
-	int ret = 0;
-
 	/*
 	 * If it's already been dropped, return OK.
 	 */
 	spin_lock(&dentry->d_lock);
 	if (d_unhashed(dentry)) {
 		spin_unlock(&dentry->d_lock);
-		return 0;
+		return;
 	}
 	spin_unlock(&dentry->d_lock);
 
 	/* Negative dentries can be dropped without further checks */
 	if (!dentry->d_inode) {
 		d_drop(dentry);
-		goto out;
+		return;
 	}
 
 	for (;;) {
@@ -1399,9 +1393,6 @@ int d_invalidate(struct dentry *dentry)
 
 		cond_resched();
 	}
-
-out:
-	return ret;
 }
 EXPORT_SYMBOL(d_invalidate);
 
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index 820efd74ca9f..dbab798f5caf 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -1286,9 +1286,7 @@ static int fuse_direntplus_link(struct file *file,
 			d_drop(dentry);
 		} else if (get_node_id(inode) != o->nodeid ||
 			   ((o->attr.mode ^ inode->i_mode) & S_IFMT)) {
-			err = d_invalidate(dentry);
-			if (err)
-				goto out;
+			d_invalidate(dentry);
 		} else if (is_bad_inode(inode)) {
 			err = -EIO;
 			goto out;
diff --git a/fs/namei.c b/fs/namei.c
index 2ba10904dba0..d20d579a022e 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1306,7 +1306,8 @@ static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
 				if (error < 0) {
 					dput(dentry);
 					return ERR_PTR(error);
-				} else if (!d_invalidate(dentry)) {
+				} else {
+					d_invalidate(dentry);
 					dput(dentry);
 					dentry = NULL;
 				}
@@ -1435,10 +1436,9 @@ unlazy:
 			dput(dentry);
 			return status;
 		}
-		if (!d_invalidate(dentry)) {
-			dput(dentry);
-			goto need_lookup;
-		}
+		d_invalidate(dentry);
+		dput(dentry);
+		goto need_lookup;
 	}
 
 	path->mnt = mnt;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 8be6988a1c6c..06e8cfcbb670 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -486,8 +486,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 				nfs_setsecurity(dentry->d_inode, entry->fattr, entry->label);
 			goto out;
 		} else {
-			if (d_invalidate(dentry) != 0)
-				goto out;
+			d_invalidate(dentry);
 			dput(dentry);
 		}
 	}
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 595af29ad145..81b03150f39a 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -254,7 +254,7 @@ extern struct dentry * d_obtain_root(struct inode *);
 extern void shrink_dcache_sb(struct super_block *);
 extern void shrink_dcache_parent(struct dentry *);
 extern void shrink_dcache_for_umount(struct super_block *);
-extern int d_invalidate(struct dentry *);
+extern void d_invalidate(struct dentry *);
 
 /* only used at mount-time */
 extern struct dentry * d_make_root(struct inode *);
-- 
cgit v1.2.3


From 1fa97e8b1f327059aa98089abd8c3378cdf43017 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 7 May 2014 20:47:49 -0400
Subject: constify file_inode()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/fs.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 94187721ad41..75bdd51ec9b8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1874,7 +1874,7 @@ extern int current_umask(void);
 extern void ihold(struct inode * inode);
 extern void iput(struct inode *);
 
-static inline struct inode *file_inode(struct file *f)
+static inline struct inode *file_inode(const struct file *f)
 {
 	return f->f_inode;
 }
-- 
cgit v1.2.3


From c35e02480014f7a86e264a2fda39a568690163da Mon Sep 17 00:00:00 2001
From: Matthew Wilcox <willy@linux.intel.com>
Date: Fri, 1 Aug 2014 09:27:22 -0400
Subject: Add copy_to_iter(), copy_from_iter() and iov_iter_zero()

For DAX, we want to be able to copy between iovecs and kernel addresses
that don't necessarily have a struct page.  This is a fairly simple
rearrangement for bvec iters to kmap the pages outside and pass them in,
but for user iovecs it gets more complicated because we might try various
different ways to kmap the memory.  Duplicating the existing logic works
out best in this case.

We need to be able to write zeroes to an iovec for reads from unwritten
ranges in a file.  This is performed by the new iov_iter_zero() function,
again patterned after the existing code that handles iovec iterators.

[AV: and export the buggers...]

Signed-off-by: Matthew Wilcox <willy@linux.intel.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |   3 +
 mm/iov_iter.c       | 240 +++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 229 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 290fbf0b6b8a..9b1581414cd4 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -80,6 +80,9 @@ size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i);
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
+size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 9a09f2034fcc..eafcf60f6b83 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -4,6 +4,96 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 
+static size_t copy_to_iter_iovec(void *from, size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __copy_to_user(buf, from, copy);
+	copy -= left;
+	skip += copy;
+	from += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_to_user(buf, from, copy);
+		copy -= left;
+		skip = copy;
+		from += copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
+static size_t copy_from_iter_iovec(void *to, size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __copy_from_user(to, buf, copy);
+	copy -= left;
+	skip += copy;
+	to += copy;
+	bytes -= copy;
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __copy_from_user(to, buf, copy);
+		copy -= left;
+		skip = copy;
+		to += copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
@@ -166,6 +256,50 @@ done:
 	return wanted - bytes;
 }
 
+static size_t zero_iovec(size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, left, wanted;
+	const struct iovec *iov;
+	char __user *buf;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	iov = i->iov;
+	skip = i->iov_offset;
+	buf = iov->iov_base + skip;
+	copy = min(bytes, iov->iov_len - skip);
+
+	left = __clear_user(buf, copy);
+	copy -= left;
+	skip += copy;
+	bytes -= copy;
+
+	while (unlikely(!left && bytes)) {
+		iov++;
+		buf = iov->iov_base;
+		copy = min(bytes, iov->iov_len);
+		left = __clear_user(buf, copy);
+		copy -= left;
+		skip = copy;
+		bytes -= copy;
+	}
+
+	if (skip == iov->iov_len) {
+		iov++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= iov - i->iov;
+	i->iov = iov;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t __iovec_copy_from_user_inatomic(char *vaddr,
 			const struct iovec *iov, size_t base, size_t bytes)
 {
@@ -414,12 +548,17 @@ static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t
 	kunmap_atomic(to);
 }
 
-static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
+static void memzero_page(struct page *page, size_t offset, size_t len)
+{
+	char *addr = kmap_atomic(page);
+	memset(addr + offset, 0, len);
+	kunmap_atomic(addr);
+}
+
+static size_t copy_to_iter_bvec(void *from, size_t bytes, struct iov_iter *i)
 {
 	size_t skip, copy, wanted;
 	const struct bio_vec *bvec;
-	void *kaddr, *from;
 
 	if (unlikely(bytes > i->count))
 		bytes = i->count;
@@ -432,8 +571,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 	skip = i->iov_offset;
 	copy = min_t(size_t, bytes, bvec->bv_len - skip);
 
-	kaddr = kmap_atomic(page);
-	from = kaddr + offset;
 	memcpy_to_page(bvec->bv_page, skip + bvec->bv_offset, from, copy);
 	skip += copy;
 	from += copy;
@@ -446,7 +583,6 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 		from += copy;
 		bytes -= copy;
 	}
-	kunmap_atomic(kaddr);
 	if (skip == bvec->bv_len) {
 		bvec++;
 		skip = 0;
@@ -458,12 +594,10 @@ static size_t copy_page_to_iter_bvec(struct page *page, size_t offset, size_t by
 	return wanted - bytes;
 }
 
-static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t bytes,
-			 struct iov_iter *i)
+static size_t copy_from_iter_bvec(void *to, size_t bytes, struct iov_iter *i)
 {
 	size_t skip, copy, wanted;
 	const struct bio_vec *bvec;
-	void *kaddr, *to;
 
 	if (unlikely(bytes > i->count))
 		bytes = i->count;
@@ -475,10 +609,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 	bvec = i->bvec;
 	skip = i->iov_offset;
 
-	kaddr = kmap_atomic(page);
-
-	to = kaddr + offset;
-
 	copy = min(bytes, bvec->bv_len - skip);
 
 	memcpy_from_page(to, bvec->bv_page, bvec->bv_offset + skip, copy);
@@ -495,7 +625,6 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 		to += copy;
 		bytes -= copy;
 	}
-	kunmap_atomic(kaddr);
 	if (skip == bvec->bv_len) {
 		bvec++;
 		skip = 0;
@@ -507,6 +636,61 @@ static size_t copy_page_from_iter_bvec(struct page *page, size_t offset, size_t
 	return wanted;
 }
 
+static size_t copy_page_to_iter_bvec(struct page *page, size_t offset,
+					size_t bytes, struct iov_iter *i)
+{
+	void *kaddr = kmap_atomic(page);
+	size_t wanted = copy_to_iter_bvec(kaddr + offset, bytes, i);
+	kunmap_atomic(kaddr);
+	return wanted;
+}
+
+static size_t copy_page_from_iter_bvec(struct page *page, size_t offset,
+					size_t bytes, struct iov_iter *i)
+{
+	void *kaddr = kmap_atomic(page);
+	size_t wanted = copy_from_iter_bvec(kaddr + offset, bytes, i);
+	kunmap_atomic(kaddr);
+	return wanted;
+}
+
+static size_t zero_bvec(size_t bytes, struct iov_iter *i)
+{
+	size_t skip, copy, wanted;
+	const struct bio_vec *bvec;
+
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	wanted = bytes;
+	bvec = i->bvec;
+	skip = i->iov_offset;
+	copy = min_t(size_t, bytes, bvec->bv_len - skip);
+
+	memzero_page(bvec->bv_page, skip + bvec->bv_offset, copy);
+	skip += copy;
+	bytes -= copy;
+	while (bytes) {
+		bvec++;
+		copy = min(bytes, (size_t)bvec->bv_len);
+		memzero_page(bvec->bv_page, bvec->bv_offset, copy);
+		skip = copy;
+		bytes -= copy;
+	}
+	if (skip == bvec->bv_len) {
+		bvec++;
+		skip = 0;
+	}
+	i->count -= wanted - bytes;
+	i->nr_segs -= bvec - i->bvec;
+	i->bvec = bvec;
+	i->iov_offset = skip;
+	return wanted - bytes;
+}
+
 static size_t copy_from_user_bvec(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
@@ -672,6 +856,34 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 }
 EXPORT_SYMBOL(copy_page_from_iter);
 
+size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_to_iter_bvec(addr, bytes, i);
+	else
+		return copy_to_iter_iovec(addr, bytes, i);
+}
+EXPORT_SYMBOL(copy_to_iter);
+
+size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC)
+		return copy_from_iter_bvec(addr, bytes, i);
+	else
+		return copy_from_iter_iovec(addr, bytes, i);
+}
+EXPORT_SYMBOL(copy_from_iter);
+
+size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
+{
+	if (i->type & ITER_BVEC) {
+		return zero_bvec(bytes, i);
+	} else {
+		return zero_iovec(bytes, i);
+	}
+}
+EXPORT_SYMBOL(iov_iter_zero);
+
 size_t iov_iter_copy_from_user_atomic(struct page *page,
 		struct iov_iter *i, unsigned long offset, size_t bytes)
 {
-- 
cgit v1.2.3


From 5e6123f3477e4260fb14392f0a88f1a842fa4d42 Mon Sep 17 00:00:00 2001
From: Seunghun Lee <waydi1@gmail.com>
Date: Sun, 14 Sep 2014 22:15:10 +0900
Subject: vfs: move getname() from callers to do_mount()

It would make more sense to pass char __user * instead of
char * in callers of do_mount() and do getname() inside do_mount().

Suggested-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Seunghun Lee <waydi1@gmail.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/osf_sys.c | 23 ++++++++++-------------
 fs/compat.c                 | 20 ++++++--------------
 fs/namespace.c              | 19 +++----------------
 include/linux/fs.h          |  3 ++-
 4 files changed, 21 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index 1402fcc11c2c..f9c732e18284 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -446,7 +446,8 @@ struct procfs_args {
  * unhappy with OSF UFS. [CHECKME]
  */
 static int
-osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags)
+osf_ufs_mount(const char __user *dirname,
+	      struct ufs_args __user *args, int flags)
 {
 	int retval;
 	struct cdfs_args tmp;
@@ -466,7 +467,8 @@ osf_ufs_mount(const char *dirname, struct ufs_args __user *args, int flags)
 }
 
 static int
-osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags)
+osf_cdfs_mount(const char __user *dirname,
+	       struct cdfs_args __user *args, int flags)
 {
 	int retval;
 	struct cdfs_args tmp;
@@ -486,7 +488,8 @@ osf_cdfs_mount(const char *dirname, struct cdfs_args __user *args, int flags)
 }
 
 static int
-osf_procfs_mount(const char *dirname, struct procfs_args __user *args, int flags)
+osf_procfs_mount(const char __user *dirname,
+		 struct procfs_args __user *args, int flags)
 {
 	struct procfs_args tmp;
 
@@ -500,28 +503,22 @@ SYSCALL_DEFINE4(osf_mount, unsigned long, typenr, const char __user *, path,
 		int, flag, void __user *, data)
 {
 	int retval;
-	struct filename *name;
 
-	name = getname(path);
-	retval = PTR_ERR(name);
-	if (IS_ERR(name))
-		goto out;
 	switch (typenr) {
 	case 1:
-		retval = osf_ufs_mount(name->name, data, flag);
+		retval = osf_ufs_mount(path, data, flag);
 		break;
 	case 6:
-		retval = osf_cdfs_mount(name->name, data, flag);
+		retval = osf_cdfs_mount(path, data, flag);
 		break;
 	case 9:
-		retval = osf_procfs_mount(name->name, data, flag);
+		retval = osf_procfs_mount(path, data, flag);
 		break;
 	default:
 		retval = -EINVAL;
 		printk("osf_mount(%ld, %x)\n", typenr, flag);
 	}
-	putname(name);
- out:
+
 	return retval;
 }
 
diff --git a/fs/compat.c b/fs/compat.c
index 6205c247a6e3..b13df99f3534 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -794,7 +794,6 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 	char *kernel_type;
 	unsigned long data_page;
 	char *kernel_dev;
-	struct filename *dir;
 	int retval;
 
 	kernel_type = copy_mount_string(type);
@@ -802,19 +801,14 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 	if (IS_ERR(kernel_type))
 		goto out;
 
-	dir = getname(dir_name);
-	retval = PTR_ERR(dir);
-	if (IS_ERR(dir))
-		goto out1;
-
 	kernel_dev = copy_mount_string(dev_name);
 	retval = PTR_ERR(kernel_dev);
 	if (IS_ERR(kernel_dev))
-		goto out2;
+		goto out1;
 
 	retval = copy_mount_options(data, &data_page);
 	if (retval < 0)
-		goto out3;
+		goto out2;
 
 	retval = -EINVAL;
 
@@ -823,19 +817,17 @@ COMPAT_SYSCALL_DEFINE5(mount, const char __user *, dev_name,
 			do_ncp_super_data_conv((void *)data_page);
 		} else if (!strcmp(kernel_type, NFS4_NAME)) {
 			if (do_nfs4_super_data_conv((void *) data_page))
-				goto out4;
+				goto out3;
 		}
 	}
 
-	retval = do_mount(kernel_dev, dir->name, kernel_type,
+	retval = do_mount(kernel_dev, dir_name, kernel_type,
 			flags, (void*)data_page);
 
- out4:
-	free_page(data_page);
  out3:
-	kfree(kernel_dev);
+	free_page(data_page);
  out2:
-	putname(dir);
+	kfree(kernel_dev);
  out1:
 	kfree(kernel_type);
  out:
diff --git a/fs/namespace.c b/fs/namespace.c
index abd3abb52616..348562f14e93 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2533,7 +2533,7 @@ char *copy_mount_string(const void __user *data)
  * Therefore, if this magic number is present, it carries no information
  * and must be discarded.
  */
-long do_mount(const char *dev_name, const char *dir_name,
+long do_mount(const char *dev_name, const char __user *dir_name,
 		const char *type_page, unsigned long flags, void *data_page)
 {
 	struct path path;
@@ -2545,15 +2545,11 @@ long do_mount(const char *dev_name, const char *dir_name,
 		flags &= ~MS_MGC_MSK;
 
 	/* Basic sanity checks */
-
-	if (!dir_name || !*dir_name || !memchr(dir_name, 0, PAGE_SIZE))
-		return -EINVAL;
-
 	if (data_page)
 		((char *)data_page)[PAGE_SIZE - 1] = 0;
 
 	/* ... and get the mountpoint */
-	retval = kern_path(dir_name, LOOKUP_FOLLOW, &path);
+	retval = user_path(dir_name, &path);
 	if (retval)
 		return retval;
 
@@ -2778,7 +2774,6 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 {
 	int ret;
 	char *kernel_type;
-	struct filename *kernel_dir;
 	char *kernel_dev;
 	unsigned long data_page;
 
@@ -2787,12 +2782,6 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	if (IS_ERR(kernel_type))
 		goto out_type;
 
-	kernel_dir = getname(dir_name);
-	if (IS_ERR(kernel_dir)) {
-		ret = PTR_ERR(kernel_dir);
-		goto out_dir;
-	}
-
 	kernel_dev = copy_mount_string(dev_name);
 	ret = PTR_ERR(kernel_dev);
 	if (IS_ERR(kernel_dev))
@@ -2802,15 +2791,13 @@ SYSCALL_DEFINE5(mount, char __user *, dev_name, char __user *, dir_name,
 	if (ret < 0)
 		goto out_data;
 
-	ret = do_mount(kernel_dev, kernel_dir->name, kernel_type, flags,
+	ret = do_mount(kernel_dev, dir_name, kernel_type, flags,
 		(void *) data_page);
 
 	free_page(data_page);
 out_data:
 	kfree(kernel_dev);
 out_dev:
-	putname(kernel_dir);
-out_dir:
 	kfree(kernel_type);
 out_type:
 	return ret;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 75bdd51ec9b8..92148361bef8 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1855,7 +1855,8 @@ extern struct vfsmount *kern_mount_data(struct file_system_type *, void *data);
 extern void kern_unmount(struct vfsmount *mnt);
 extern int may_umount_tree(struct vfsmount *);
 extern int may_umount(struct vfsmount *);
-extern long do_mount(const char *, const char *, const char *, unsigned long, void *);
+extern long do_mount(const char *, const char __user *,
+		     const char *, unsigned long, void *);
 extern struct vfsmount *collect_mounts(struct path *);
 extern void drop_collected_mounts(struct vfsmount *);
 extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *,
-- 
cgit v1.2.3


From fd22f78cf7b95102d8e5b988afe27165e47471fc Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 13 Aug 2014 19:54:29 +0300
Subject: IB/mlx5: Use enumerations for PI copy mask

In case input and output space parameters match, we can use a copy
mask from input and output space.  Use enums for those.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/qp.c | 6 +++---
 include/linux/mlx5/qp.h         | 3 +++
 2 files changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 8c574b63d77b..554410c2655d 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2095,11 +2095,11 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 			/* Same block structure */
 			basic->bsf_size_sbs = 1 << 4;
 			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
-				basic->wire.copy_byte_mask |= 0xc0;
+				basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
 			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
-				basic->wire.copy_byte_mask |= 0x30;
+				basic->wire.copy_byte_mask |= MLX5_CPY_APP_MASK;
 			if (mem->sig.dif.ref_tag == wire->sig.dif.ref_tag)
-				basic->wire.copy_byte_mask |= 0x0f;
+				basic->wire.copy_byte_mask |= MLX5_CPY_REF_MASK;
 		} else
 			basic->wire.bs_selector = bs_selector(wire->sig.dif.pi_interval);
 
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 9709b30e2d69..4aa5634dc210 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -40,6 +40,9 @@
 #define MLX5_SIG_WQE_SIZE	(MLX5_SEND_WQE_BB * 5)
 #define MLX5_DIF_SIZE		8
 #define MLX5_STRIDE_BLOCK_OP	0x400
+#define MLX5_CPY_GRD_MASK	0xc0
+#define MLX5_CPY_APP_MASK	0x30
+#define MLX5_CPY_REF_MASK	0x0f
 
 enum mlx5_qp_optpar {
 	MLX5_QP_OPTPAR_ALT_ADDR_PATH		= 1 << 0,
-- 
cgit v1.2.3


From 142537f4e5f7ffd3e34b0c46646ac9cb5d986d06 Mon Sep 17 00:00:00 2001
From: Sagi Grimberg <sagig@mellanox.com>
Date: Wed, 13 Aug 2014 19:54:32 +0300
Subject: IB/mlx5: Use extended internal signature layout

Rather than using the basic BSF layout which utilizes a pre-configured
signature settings (sufficient for current DIF implementation), we use
the extended BSF layout to expose advanced signature settings. These
settings will also be exposed to the user later.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/qp.c | 80 +++++++++++++++++++----------------------
 include/linux/mlx5/qp.h         | 32 ++++++++++++-----
 2 files changed, 61 insertions(+), 51 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 554410c2655d..13924a256290 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -2020,53 +2020,47 @@ static u8 bs_selector(int block_size)
 	}
 }
 
-static int format_selector(struct ib_sig_attrs *attr,
-			   struct ib_sig_domain *domain,
-			   int *selector)
+static int mlx5_fill_inl_bsf(struct ib_sig_domain *domain,
+			     struct mlx5_bsf_inl *inl)
 {
-
-#define FORMAT_DIF_NONE		0
-#define FORMAT_DIF_CRC_INC	8
-#define FORMAT_DIF_CRC_NO_INC	12
-#define FORMAT_DIF_CSUM_INC	13
-#define FORMAT_DIF_CSUM_NO_INC	14
+	/* Valid inline section and allow BSF refresh */
+	inl->vld_refresh = cpu_to_be16(MLX5_BSF_INL_VALID |
+				       MLX5_BSF_REFRESH_DIF);
+	inl->dif_apptag = cpu_to_be16(domain->sig.dif.app_tag);
+	inl->dif_reftag = cpu_to_be32(domain->sig.dif.ref_tag);
 
 	switch (domain->sig.dif.type) {
 	case IB_T10DIF_NONE:
 		/* No DIF */
-		*selector = FORMAT_DIF_NONE;
 		break;
 	case IB_T10DIF_TYPE1: /* Fall through */
 	case IB_T10DIF_TYPE2:
-		switch (domain->sig.dif.bg_type) {
-		case IB_T10DIF_CRC:
-			*selector = FORMAT_DIF_CRC_INC;
-			break;
-		case IB_T10DIF_CSUM:
-			*selector = FORMAT_DIF_CSUM_INC;
-			break;
-		default:
-			return 1;
-		}
+		inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+				MLX5_DIF_CRC : MLX5_DIF_IPCS;
+		/*
+		 * increment reftag and don't check if
+		 * apptag=0xffff and reftag=0xffffffff
+		 */
+		inl->dif_inc_ref_guard_check = MLX5_BSF_INC_REFTAG |
+					       MLX5_BSF_APPREF_ESCAPE;
+		inl->dif_app_bitmask_check = 0xffff;
+		/* repeating block */
+		inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
 		break;
 	case IB_T10DIF_TYPE3:
-		switch (domain->sig.dif.bg_type) {
-		case IB_T10DIF_CRC:
-			*selector = domain->sig.dif.type3_inc_reftag ?
-					   FORMAT_DIF_CRC_INC :
-					   FORMAT_DIF_CRC_NO_INC;
-			break;
-		case IB_T10DIF_CSUM:
-			*selector = domain->sig.dif.type3_inc_reftag ?
-					   FORMAT_DIF_CSUM_INC :
-					   FORMAT_DIF_CSUM_NO_INC;
-			break;
-		default:
-			return 1;
-		}
+		inl->sig_type = domain->sig.dif.bg_type == IB_T10DIF_CRC ?
+				MLX5_DIF_CRC : MLX5_DIF_IPCS;
+		/*
+		 * Don't inc reftag and don't check if
+		 * apptag=0xffff and reftag=0xffffffff
+		 */
+		inl->dif_inc_ref_guard_check = MLX5_BSF_APPREF_ESCAPE;
+		inl->dif_app_bitmask_check = 0xffff;
+		/* Repeating block */
+		inl->rp_inv_seed = MLX5_BSF_REPEAT_BLOCK;
 		break;
 	default:
-		return 1;
+		return -EINVAL;
 	}
 
 	return 0;
@@ -2080,7 +2074,7 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 	struct mlx5_bsf_basic *basic = &bsf->basic;
 	struct ib_sig_domain *mem = &sig_attrs->mem;
 	struct ib_sig_domain *wire = &sig_attrs->wire;
-	int ret, selector;
+	int ret;
 
 	memset(bsf, 0, sizeof(*bsf));
 	switch (sig_attrs->mem.sig_type) {
@@ -2088,12 +2082,14 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 		if (sig_attrs->wire.sig_type != IB_SIG_TYPE_T10_DIF)
 			return -EINVAL;
 
+		/* Basic + Extended + Inline */
+		basic->bsf_size_sbs = 1 << 7;
 		/* Input domain check byte mask */
 		basic->check_byte_mask = sig_attrs->check_mask;
 		if (mem->sig.dif.pi_interval == wire->sig.dif.pi_interval &&
 		    mem->sig.dif.type == wire->sig.dif.type) {
 			/* Same block structure */
-			basic->bsf_size_sbs = 1 << 4;
+			basic->bsf_size_sbs |= 1 << 4;
 			if (mem->sig.dif.bg_type == wire->sig.dif.bg_type)
 				basic->wire.copy_byte_mask |= MLX5_CPY_GRD_MASK;
 			if (mem->sig.dif.app_tag == wire->sig.dif.app_tag)
@@ -2105,18 +2101,16 @@ static int mlx5_set_bsf(struct ib_mr *sig_mr,
 
 		basic->mem.bs_selector = bs_selector(mem->sig.dif.pi_interval);
 		basic->raw_data_size = cpu_to_be32(data_size);
+		basic->m_bfs_psv = cpu_to_be32(msig->psv_memory.psv_idx);
+		basic->w_bfs_psv = cpu_to_be32(msig->psv_wire.psv_idx);
 
-		ret = format_selector(sig_attrs, mem, &selector);
+		ret = mlx5_fill_inl_bsf(wire, &bsf->w_inl);
 		if (ret)
 			return -EINVAL;
-		basic->m_bfs_psv = cpu_to_be32(selector << 24 |
-					       msig->psv_memory.psv_idx);
 
-		ret = format_selector(sig_attrs, wire, &selector);
+		ret = mlx5_fill_inl_bsf(mem, &bsf->m_inl);
 		if (ret)
 			return -EINVAL;
-		basic->w_bfs_psv = cpu_to_be32(selector << 24 |
-					       msig->psv_wire.psv_idx);
 		break;
 
 	default:
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 4aa5634dc210..69f5378455b7 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -43,6 +43,12 @@
 #define MLX5_CPY_GRD_MASK	0xc0
 #define MLX5_CPY_APP_MASK	0x30
 #define MLX5_CPY_REF_MASK	0x0f
+#define MLX5_BSF_INC_REFTAG	(1 << 6)
+#define MLX5_BSF_INL_VALID	(1 << 15)
+#define MLX5_BSF_REFRESH_DIF	(1 << 14)
+#define MLX5_BSF_REPEAT_BLOCK	(1 << 7)
+#define MLX5_BSF_APPTAG_ESCAPE	0x1
+#define MLX5_BSF_APPREF_ESCAPE	0x2
 
 enum mlx5_qp_optpar {
 	MLX5_QP_OPTPAR_ALT_ADDR_PATH		= 1 << 0,
@@ -290,6 +296,22 @@ struct mlx5_wqe_inline_seg {
 	__be32	byte_count;
 };
 
+enum mlx5_sig_type {
+	MLX5_DIF_CRC = 0x1,
+	MLX5_DIF_IPCS = 0x2,
+};
+
+struct mlx5_bsf_inl {
+	__be16		vld_refresh;
+	__be16		dif_apptag;
+	__be32		dif_reftag;
+	u8		sig_type;
+	u8		rp_inv_seed;
+	u8		rsvd[3];
+	u8		dif_inc_ref_guard_check;
+	__be16		dif_app_bitmask_check;
+};
+
 struct mlx5_bsf {
 	struct mlx5_bsf_basic {
 		u8		bsf_size_sbs;
@@ -313,14 +335,8 @@ struct mlx5_bsf {
 		__be32		w_tfs_psv;
 		__be32		m_tfs_psv;
 	} ext;
-	struct mlx5_bsf_inl {
-		__be32		w_inl_vld;
-		__be32		w_rsvd;
-		__be64		w_block_format;
-		__be32		m_inl_vld;
-		__be32		m_rsvd;
-		__be64		m_block_format;
-	} inl;
+	struct mlx5_bsf_inl	w_inl;
+	struct mlx5_bsf_inl	m_inl;
 };
 
 struct mlx5_klm {
-- 
cgit v1.2.3


From fe0f49768d807a8fe6336b097feb8c4441951710 Mon Sep 17 00:00:00 2001
From: Martin Schwidefsky <schwidefsky@de.ibm.com>
Date: Tue, 30 Sep 2014 17:37:52 +0200
Subject: s390/nohz: use a per-cpu flag for arch_needs_cpu

Move the nohz_delay bit from the s390_idle data structure to the
per-cpu flags. Clear the nohz delay flag in __cpu_disable and
remove the cpu hotplug notifier that used to do this.

Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 arch/s390/include/asm/cputime.h   |  8 --------
 arch/s390/include/asm/processor.h |  4 ++++
 arch/s390/kernel/irq.c            |  2 +-
 arch/s390/kernel/smp.c            |  1 +
 arch/s390/kernel/vtime.c          | 19 +------------------
 drivers/s390/cio/airq.c           |  2 +-
 drivers/s390/cio/cio.c            |  2 +-
 include/linux/tick.h              |  2 +-
 kernel/time/tick-sched.c          |  2 +-
 9 files changed, 11 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h
index f65bd3634519..01887b1fade5 100644
--- a/arch/s390/include/asm/cputime.h
+++ b/arch/s390/include/asm/cputime.h
@@ -166,7 +166,6 @@ static inline clock_t cputime64_to_clock_t(cputime64_t cputime)
 }
 
 struct s390_idle_data {
-	int nohz_delay;
 	unsigned int sequence;
 	unsigned long long idle_count;
 	unsigned long long idle_time;
@@ -182,11 +181,4 @@ cputime64_t s390_get_idle_time(int cpu);
 
 #define arch_idle_time(cpu) s390_get_idle_time(cpu)
 
-static inline int s390_nohz_delay(int cpu)
-{
-	return __get_cpu_var(s390_idle).nohz_delay != 0;
-}
-
-#define arch_needs_cpu(cpu) s390_nohz_delay(cpu)
-
 #endif /* _S390_CPUTIME_H */
diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h
index e568fc8a7250..bc796d73129b 100644
--- a/arch/s390/include/asm/processor.h
+++ b/arch/s390/include/asm/processor.h
@@ -13,9 +13,11 @@
 
 #define CIF_MCCK_PENDING	0	/* machine check handling is pending */
 #define CIF_ASCE		1	/* user asce needs fixup / uaccess */
+#define CIF_NOHZ_DELAY		2	/* delay HZ disable for a tick */
 
 #define _CIF_MCCK_PENDING	(1<<CIF_MCCK_PENDING)
 #define _CIF_ASCE		(1<<CIF_ASCE)
+#define _CIF_NOHZ_DELAY		(1<<CIF_NOHZ_DELAY)
 
 
 #ifndef __ASSEMBLY__
@@ -43,6 +45,8 @@ static inline int test_cpu_flag(int flag)
 	return !!(S390_lowcore.cpu_flags & (1U << flag));
 }
 
+#define arch_needs_cpu() test_cpu_flag(CIF_NOHZ_DELAY)
+
 /*
  * Default implementation of macro that returns current
  * instruction pointer ("program counter").
diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c
index 051574ee5366..1b8a38ab7861 100644
--- a/arch/s390/kernel/irq.c
+++ b/arch/s390/kernel/irq.c
@@ -259,7 +259,7 @@ static irqreturn_t do_ext_interrupt(int irq, void *dummy)
 
 	ext_code = *(struct ext_code *) &regs->int_code;
 	if (ext_code.code != EXT_IRQ_CLK_COMP)
-		__get_cpu_var(s390_idle).nohz_delay = 1;
+		set_cpu_flag(CIF_NOHZ_DELAY);
 
 	index = ext_hash(ext_code.code);
 	rcu_read_lock();
diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c
index abec97b4ddbf..46317d6951c4 100644
--- a/arch/s390/kernel/smp.c
+++ b/arch/s390/kernel/smp.c
@@ -720,6 +720,7 @@ int __cpu_disable(void)
 	cregs[6]  &= ~0xff000000UL;	/* disable all I/O interrupts */
 	cregs[14] &= ~0x1f000000UL;	/* disable most machine checks */
 	__ctl_load(cregs, 0, 15);
+	clear_cpu_flag(CIF_NOHZ_DELAY);
 	return 0;
 }
 
diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c
index 8c34363d6f1e..40709821abde 100644
--- a/arch/s390/kernel/vtime.c
+++ b/arch/s390/kernel/vtime.c
@@ -163,7 +163,7 @@ void __kprobes vtime_stop_cpu(void)
 	/* Wait for external, I/O or machine check interrupt. */
 	psw_mask = PSW_KERNEL_BITS | PSW_MASK_WAIT | PSW_MASK_DAT |
 		PSW_MASK_IO | PSW_MASK_EXT | PSW_MASK_MCHECK;
-	idle->nohz_delay = 0;
+	clear_cpu_flag(CIF_NOHZ_DELAY);
 
 	/* Call the assembler magic in entry.S */
 	psw_idle(idle, psw_mask);
@@ -378,25 +378,8 @@ void init_cpu_vtimer(void)
 	set_vtimer(VTIMER_MAX_SLICE);
 }
 
-static int s390_nohz_notify(struct notifier_block *self, unsigned long action,
-			    void *hcpu)
-{
-	struct s390_idle_data *idle;
-	long cpu = (long) hcpu;
-
-	idle = &per_cpu(s390_idle, cpu);
-	switch (action & ~CPU_TASKS_FROZEN) {
-	case CPU_DYING:
-		idle->nohz_delay = 0;
-	default:
-		break;
-	}
-	return NOTIFY_OK;
-}
-
 void __init vtime_init(void)
 {
 	/* Enable cpu timer interrupts on the boot cpu. */
 	init_cpu_vtimer();
-	cpu_notifier(s390_nohz_notify, 0);
 }
diff --git a/drivers/s390/cio/airq.c b/drivers/s390/cio/airq.c
index 00bfbee0af9e..56eb4ee4deba 100644
--- a/drivers/s390/cio/airq.c
+++ b/drivers/s390/cio/airq.c
@@ -87,7 +87,7 @@ static irqreturn_t do_airq_interrupt(int irq, void *dummy)
 	struct airq_struct *airq;
 	struct hlist_head *head;
 
-	__this_cpu_write(s390_idle.nohz_delay, 1);
+	set_cpu_flag(CIF_NOHZ_DELAY);
 	tpi_info = (struct tpi_info *) &get_irq_regs()->int_code;
 	head = &airq_lists[tpi_info->isc];
 	rcu_read_lock();
diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c
index 2905d8b0ec95..d5a6f287d2fe 100644
--- a/drivers/s390/cio/cio.c
+++ b/drivers/s390/cio/cio.c
@@ -561,7 +561,7 @@ static irqreturn_t do_cio_interrupt(int irq, void *dummy)
 	struct subchannel *sch;
 	struct irb *irb;
 
-	__this_cpu_write(s390_idle.nohz_delay, 1);
+	set_cpu_flag(CIF_NOHZ_DELAY);
 	tpi_info = (struct tpi_info *) &get_irq_regs()->int_code;
 	irb = &__get_cpu_var(cio_irb);
 	sch = (struct subchannel *)(unsigned long) tpi_info->intparm;
diff --git a/include/linux/tick.h b/include/linux/tick.h
index 9a82c7dc3fdd..e5832d03da19 100644
--- a/include/linux/tick.h
+++ b/include/linux/tick.h
@@ -108,7 +108,7 @@ extern struct tick_sched *tick_get_tick_sched(int cpu);
 extern void tick_irq_enter(void);
 extern int tick_oneshot_mode_active(void);
 #  ifndef arch_needs_cpu
-#   define arch_needs_cpu(cpu) (0)
+#   define arch_needs_cpu() (0)
 #  endif
 # else
 static inline void tick_clock_notify(void) { }
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index f654a8a298fa..01d512fd45f1 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -572,7 +572,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 	} while (read_seqretry(&jiffies_lock, seq));
 
 	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
-	    arch_needs_cpu(cpu) || irq_work_needs_cpu()) {
+	    arch_needs_cpu() || irq_work_needs_cpu()) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
 	} else {
-- 
cgit v1.2.3


From 33ac9dba859b07d40e9ec826057d20c857fdede5 Mon Sep 17 00:00:00 2001
From: Maarten ter Huurne <maarten@treewalker.org>
Date: Tue, 9 Sep 2014 13:46:28 +0200
Subject: fonts: Add 6x10 font

This font is suitable for framebuffer consoles on devices with a
320x240 screen, to get a reasonable number of characters (53x24) that
are still at a readable size.

The font is derived from the existing 6x11 font, but gets 3 extra
lines without sacrificing readability. Also I redesigned a some glyhps
so they are more distinct and better fill the available space.

Signed-off-by: Maarten ter Huurne <maarten@treewalker.org>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/linux/font.h  |    4 +-
 lib/fonts/Kconfig     |    9 +
 lib/fonts/Makefile    |    1 +
 lib/fonts/font_6x10.c | 3086 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/fonts/fonts.c     |    4 +
 5 files changed, 3103 insertions(+), 1 deletion(-)
 create mode 100644 lib/fonts/font_6x10.c

(limited to 'include/linux')

diff --git a/include/linux/font.h b/include/linux/font.h
index 40a24ab41b36..d6821769dd1e 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -31,6 +31,7 @@ struct font_desc {
 #define SUN12x22_IDX	7
 #define ACORN8x8_IDX	8
 #define	MINI4x6_IDX	9
+#define FONT6x10_IDX	10
 
 extern const struct font_desc	font_vga_8x8,
 			font_vga_8x16,
@@ -41,7 +42,8 @@ extern const struct font_desc	font_vga_8x8,
 			font_sun_8x16,
 			font_sun_12x22,
 			font_acorn_8x8,
-			font_mini_4x6;
+			font_mini_4x6,
+			font_6x10;
 
 /* Find a font with a specific name */
 
diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig
index 34fd931b54b5..e77dfe00de36 100644
--- a/lib/fonts/Kconfig
+++ b/lib/fonts/Kconfig
@@ -79,6 +79,14 @@ config FONT_MINI_4x6
 	bool "Mini 4x6 font"
 	depends on !SPARC && FONTS
 
+config FONT_6x10
+	bool "Medium-size 6x10 font"
+	depends on !SPARC && FONTS
+	help
+	  Medium-size console font. Suitable for framebuffer consoles on
+	  embedded devices with a 320x240 screen, to get a reasonable number
+	  of characters (53x24) that are still at a readable size.
+
 config FONT_SUN8x16
 	bool "Sparc console 8x16 font"
 	depends on FRAMEBUFFER_CONSOLE && (!SPARC && FONTS || SPARC)
@@ -109,6 +117,7 @@ config FONT_AUTOSELECT
 	depends on !FONT_PEARL_8x8
 	depends on !FONT_ACORN_8x8
 	depends on !FONT_MINI_4x6
+	depends on !FONT_6x10
 	depends on !FONT_SUN8x16
 	depends on !FONT_SUN12x22
 	depends on !FONT_10x18
diff --git a/lib/fonts/Makefile b/lib/fonts/Makefile
index 2761560f3f15..e04d010cfbf5 100644
--- a/lib/fonts/Makefile
+++ b/lib/fonts/Makefile
@@ -12,6 +12,7 @@ font-objs-$(CONFIG_FONT_10x18)     += font_10x18.o
 font-objs-$(CONFIG_FONT_PEARL_8x8) += font_pearl_8x8.o
 font-objs-$(CONFIG_FONT_ACORN_8x8) += font_acorn_8x8.o
 font-objs-$(CONFIG_FONT_MINI_4x6)  += font_mini_4x6.o
+font-objs-$(CONFIG_FONT_6x10)      += font_6x10.o
 
 font-objs += $(font-objs-y)
 
diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c
new file mode 100644
index 000000000000..b20620904d31
--- /dev/null
+++ b/lib/fonts/font_6x10.c
@@ -0,0 +1,3086 @@
+#include <linux/font.h>
+
+static const unsigned char fontdata_6x10[] = {
+
+	/* 0 0x00 '^@' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 1 0x01 '^A' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x84, /* 10000100 */
+	0xCC, /* 11001100 */
+	0x84, /* 10000100 */
+	0xCC, /* 11001100 */
+	0xB4, /* 10110100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 2 0x02 '^B' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0xFC, /* 11111100 */
+	0xB4, /* 10110100 */
+	0xFC, /* 11111100 */
+	0xB4, /* 10110100 */
+	0xCC, /* 11001100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 3 0x03 '^C' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x7C, /* 01111100 */
+	0x7C, /* 01111100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 4 0x04 '^D' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x7C, /* 01111100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 5 0x05 '^E' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x6C, /* 01101100 */
+	0x6C, /* 01101100 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 6 0x06 '^F' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x7C, /* 01111100 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 7 0x07 '^G' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x78, /* 01111000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 8 0x08 '^H' */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xCC, /* 11001100 */
+	0x84, /* 10000100 */
+	0xCC, /* 11001100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+
+	/* 9 0x09 '^I' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x84, /* 10000100 */
+	0x48, /* 01001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 10 0x0A '^J' */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xCC, /* 11001100 */
+	0xB4, /* 10110100 */
+	0x78, /* 01111000 */
+	0xB4, /* 10110100 */
+	0xCC, /* 11001100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+
+	/* 11 0x0B '^K' */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x14, /* 00010100 */
+	0x20, /* 00100000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 12 0x0C '^L' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 13 0x0D '^M' */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x14, /* 00010100 */
+	0x14, /* 00010100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x70, /* 01110000 */
+	0x60, /* 01100000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 14 0x0E '^N' */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x24, /* 00100100 */
+	0x3C, /* 00111100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x6C, /* 01101100 */
+	0x6C, /* 01101100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 15 0x0F '^O' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x6C, /* 01101100 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 16 0x10 '^P' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x60, /* 01100000 */
+	0x70, /* 01110000 */
+	0x78, /* 01111000 */
+	0x70, /* 01110000 */
+	0x60, /* 01100000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 17 0x11 '^Q' */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x0C, /* 00001100 */
+	0x1C, /* 00011100 */
+	0x3C, /* 00111100 */
+	0x1C, /* 00011100 */
+	0x0C, /* 00001100 */
+	0x04, /* 00000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 18 0x12 '^R' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 19 0x13 '^S' */
+	0x00, /* 00000000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x00, /* 00000000 */
+	0x48, /* 01001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 20 0x14 '^T' */
+	0x3C, /* 00111100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x3C, /* 00111100 */
+	0x14, /* 00010100 */
+	0x14, /* 00010100 */
+	0x14, /* 00010100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 21 0x15 '^U' */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x20, /* 00100000 */
+	0x50, /* 01010000 */
+	0x48, /* 01001000 */
+	0x24, /* 00100100 */
+	0x14, /* 00010100 */
+	0x08, /* 00001000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+
+	/* 22 0x16 '^V' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF8, /* 11111000 */
+	0xF8, /* 11111000 */
+	0xF8, /* 11111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 23 0x17 '^W' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+
+	/* 24 0x18 '^X' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 25 0x19 '^Y' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 26 0x1A '^Z' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x7C, /* 01111100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 27 0x1B '^[' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x7C, /* 01111100 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 28 0x1C '^\' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 29 0x1D '^]' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x48, /* 01001000 */
+	0x84, /* 10000100 */
+	0xFC, /* 11111100 */
+	0x84, /* 10000100 */
+	0x48, /* 01001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 30 0x1E '^^' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x7C, /* 01111100 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 31 0x1F '^_' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x7C, /* 01111100 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 32 0x20 ' ' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 33 0x21 '!' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 34 0x22 '"' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 35 0x23 '#' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x7C, /* 01111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x7C, /* 01111100 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 36 0x24 '$' */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x50, /* 01010000 */
+	0x38, /* 00111000 */
+	0x14, /* 00010100 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+
+	/* 37 0x25 '%' */
+	0x00, /* 00000000 */
+	0x64, /* 01100100 */
+	0x64, /* 01100100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x4C, /* 01001100 */
+	0x4C, /* 01001100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 38 0x26 '&' */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x50, /* 01010000 */
+	0x20, /* 00100000 */
+	0x54, /* 01010100 */
+	0x48, /* 01001000 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 39 0x27 ''' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 40 0x28 '(' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+
+	/* 41 0x29 ')' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+
+	/* 42 0x2A '*' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 43 0x2B '+' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 44 0x2C ',' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+
+	/* 45 0x2D '-' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 46 0x2E '.' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 47 0x2F '/' */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+
+	/* 48 0x30 '0' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x54, /* 01010100 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 49 0x31 '1' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x30, /* 00110000 */
+	0x50, /* 01010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 50 0x32 '2' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 51 0x33 '3' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x04, /* 00000100 */
+	0x18, /* 00011000 */
+	0x04, /* 00000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 52 0x34 '4' */
+	0x00, /* 00000000 */
+	0x08, /* 00001000 */
+	0x18, /* 00011000 */
+	0x28, /* 00101000 */
+	0x48, /* 01001000 */
+	0x7C, /* 01111100 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 53 0x35 '5' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 54 0x36 '6' */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x20, /* 00100000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 55 0x37 '7' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 56 0x38 '8' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 57 0x39 '9' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 58 0x3A ':' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 59 0x3B ';' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+
+	/* 60 0x3C '<' */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x04, /* 00000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 61 0x3D '=' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 62 0x3E '>' */
+	0x00, /* 00000000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 63 0x3F '?' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 64 0x40 '@' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x5C, /* 01011100 */
+	0x54, /* 01010100 */
+	0x5C, /* 01011100 */
+	0x40, /* 01000000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 65 0x41 'A' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 66 0x42 'B' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x38, /* 00111000 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 67 0x43 'C' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 68 0x44 'D' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 69 0x45 'E' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 70 0x46 'F' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 71 0x47 'G' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x5C, /* 01011100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 72 0x48 'H' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 73 0x49 'I' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 74 0x4A 'J' */
+	0x00, /* 00000000 */
+	0x1C, /* 00011100 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 75 0x4B 'K' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x48, /* 01001000 */
+	0x50, /* 01010000 */
+	0x60, /* 01100000 */
+	0x50, /* 01010000 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 76 0x4C 'L' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 77 0x4D 'M' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x6C, /* 01101100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 78 0x4E 'N' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x64, /* 01100100 */
+	0x54, /* 01010100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 79 0x4F 'O' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 80 0x50 'P' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x78, /* 01111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 81 0x51 'Q' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x54, /* 01010100 */
+	0x48, /* 01001000 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 82 0x52 'R' */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x78, /* 01111000 */
+	0x50, /* 01010000 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 83 0x53 'S' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 84 0x54 'T' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 85 0x55 'U' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 86 0x56 'V' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 87 0x57 'W' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x6C, /* 01101100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 88 0x58 'X' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 89 0x59 'Y' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 90 0x5A 'Z' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x40, /* 01000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 91 0x5B '[' */
+	0x18, /* 00011000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+
+	/* 92 0x5C '\' */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+
+	/* 93 0x5D ']' */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+
+	/* 94 0x5E '^' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 95 0x5F '_' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+
+	/* 96 0x60 '`' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 97 0x61 'a' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 98 0x62 'b' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x64, /* 01100100 */
+	0x58, /* 01011000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 99 0x63 'c' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 100 0x64 'd' */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x34, /* 00110100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 101 0x65 'e' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 102 0x66 'f' */
+	0x00, /* 00000000 */
+	0x0C, /* 00001100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 103 0x67 'g' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x34, /* 00110100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x04, /* 00000100 */
+	0x38, /* 00111000 */
+
+	/* 104 0x68 'h' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 105 0x69 'i' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 106 0x6A 'j' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x60, /* 01100000 */
+
+	/* 107 0x6B 'k' */
+	0x00, /* 00000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x48, /* 01001000 */
+	0x50, /* 01010000 */
+	0x70, /* 01110000 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 108 0x6C 'l' */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 109 0x6D 'm' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x68, /* 01101000 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 110 0x6E 'n' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 111 0x6F 'o' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 112 0x70 'p' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x64, /* 01100100 */
+	0x58, /* 01011000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+
+	/* 113 0x71 'q' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x34, /* 00110100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+
+	/* 114 0x72 'r' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 115 0x73 's' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x40, /* 01000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 116 0x74 't' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x0C, /* 00001100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 117 0x75 'u' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 118 0x76 'v' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 119 0x77 'w' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 120 0x78 'x' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 121 0x79 'y' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x04, /* 00000100 */
+	0x38, /* 00111000 */
+
+	/* 122 0x7A 'z' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 123 0x7B '{' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+
+	/* 124 0x7C '|' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+
+	/* 125 0x7D '}' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+
+	/* 126 0x7E '~' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x20, /* 00100000 */
+	0x54, /* 01010100 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 127 0x7F '' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 128 0x80 '\200' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+
+	/* 129 0x81 '\201' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 130 0x82 '\202' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 131 0x83 '\203' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 132 0x84 '\204' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 133 0x85 '\205' */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 134 0x86 '\206' */
+	0x18, /* 00011000 */
+	0x24, /* 00100100 */
+	0x18, /* 00011000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 135 0x87 '\207' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+
+	/* 136 0x88 '\210' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 137 0x89 '\211' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 138 0x8A '\212' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 139 0x8B '\213' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 140 0x8C '\214' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 141 0x8D '\215' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 142 0x8E '\216' */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 143 0x8F '\217' */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 144 0x90 '\220' */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x78, /* 01111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 145 0x91 '\221' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x78, /* 01111000 */
+	0x14, /* 00010100 */
+	0x7C, /* 01111100 */
+	0x50, /* 01010000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 146 0x92 '\222' */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x78, /* 01111000 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x5C, /* 01011100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 147 0x93 '\223' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 148 0x94 '\224' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 149 0x95 '\225' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 150 0x96 '\226' */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 151 0x97 '\227' */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 152 0x98 '\230' */
+	0x00, /* 00000000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x04, /* 00000100 */
+	0x38, /* 00111000 */
+
+	/* 153 0x99 '\231' */
+	0x84, /* 10000100 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 154 0x9A '\232' */
+	0x88, /* 10001000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 155 0x9B '\233' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x50, /* 01010000 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+
+	/* 156 0x9C '\234' */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x40, /* 01000000 */
+	0x70, /* 01110000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x78, /* 01111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 157 0x9D '\235' */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 158 0x9E '\236' */
+	0x00, /* 00000000 */
+	0x70, /* 01110000 */
+	0x48, /* 01001000 */
+	0x70, /* 01110000 */
+	0x48, /* 01001000 */
+	0x5C, /* 01011100 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 159 0x9F '\237' */
+	0x00, /* 00000000 */
+	0x0C, /* 00001100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x60, /* 01100000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 160 0xA0 '\240' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 161 0xA1 '\241' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x30, /* 00110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 162 0xA2 '\242' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 163 0xA3 '\243' */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x4C, /* 01001100 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 164 0xA4 '\244' */
+	0x34, /* 00110100 */
+	0x58, /* 01011000 */
+	0x00, /* 00000000 */
+	0x58, /* 01011000 */
+	0x64, /* 01100100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 165 0xA5 '\245' */
+	0x58, /* 01011000 */
+	0x44, /* 01000100 */
+	0x64, /* 01100100 */
+	0x54, /* 01010100 */
+	0x4C, /* 01001100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 166 0xA6 '\246' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x04, /* 00000100 */
+	0x3C, /* 00111100 */
+	0x44, /* 01000100 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 167 0xA7 '\247' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 168 0xA8 '\250' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x40, /* 01000000 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 169 0xA9 '\251' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 170 0xAA '\252' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x04, /* 00000100 */
+	0x04, /* 00000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 171 0xAB '\253' */
+	0x20, /* 00100000 */
+	0x60, /* 01100000 */
+	0x24, /* 00100100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x44, /* 01000100 */
+	0x08, /* 00001000 */
+	0x1C, /* 00011100 */
+	0x00, /* 00000000 */
+
+	/* 172 0xAC '\254' */
+	0x20, /* 00100000 */
+	0x60, /* 01100000 */
+	0x24, /* 00100100 */
+	0x28, /* 00101000 */
+	0x10, /* 00010000 */
+	0x28, /* 00101000 */
+	0x58, /* 01011000 */
+	0x3C, /* 00111100 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+
+	/* 173 0xAD '\255' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 174 0xAE '\256' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x24, /* 00100100 */
+	0x48, /* 01001000 */
+	0x90, /* 10010000 */
+	0x48, /* 01001000 */
+	0x24, /* 00100100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 175 0xAF '\257' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x90, /* 10010000 */
+	0x48, /* 01001000 */
+	0x24, /* 00100100 */
+	0x48, /* 01001000 */
+	0x90, /* 10010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 176 0xB0 '\260' */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+	0x10, /* 00010000 */
+	0x44, /* 01000100 */
+
+	/* 177 0xB1 '\261' */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+	0xA8, /* 10101000 */
+	0x54, /* 01010100 */
+
+	/* 178 0xB2 '\262' */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+	0xDC, /* 11011100 */
+	0x74, /* 01110100 */
+
+	/* 179 0xB3 '\263' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 180 0xB4 '\264' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 181 0xB5 '\265' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 182 0xB6 '\266' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xE8, /* 11101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 183 0xB7 '\267' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF8, /* 11111000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 184 0xB8 '\270' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 185 0xB9 '\271' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xE8, /* 11101000 */
+	0x08, /* 00001000 */
+	0xE8, /* 11101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 186 0xBA '\272' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 187 0xBB '\273' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF8, /* 11111000 */
+	0x08, /* 00001000 */
+	0xE8, /* 11101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 188 0xBC '\274' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xE8, /* 11101000 */
+	0x08, /* 00001000 */
+	0xF8, /* 11111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 189 0xBD '\275' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xF8, /* 11111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 190 0xBE '\276' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 191 0xBF '\277' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xF0, /* 11110000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 192 0xC0 '\300' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 193 0xC1 '\301' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 194 0xC2 '\302' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 195 0xC3 '\303' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 196 0xC4 '\304' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 197 0xC5 '\305' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 198 0xC6 '\306' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 199 0xC7 '\307' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x2C, /* 00101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 200 0xC8 '\310' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x2C, /* 00101100 */
+	0x20, /* 00100000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 201 0xC9 '\311' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x20, /* 00100000 */
+	0x2C, /* 00101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 202 0xCA '\312' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xEC, /* 11101100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 203 0xCB '\313' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xEC, /* 11101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 204 0xCC '\314' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x2C, /* 00101100 */
+	0x20, /* 00100000 */
+	0x2C, /* 00101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 205 0xCD '\315' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 206 0xCE '\316' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xEC, /* 11101100 */
+	0x00, /* 00000000 */
+	0xEC, /* 11101100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 207 0xCF '\317' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 208 0xD0 '\320' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 209 0xD1 '\321' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 210 0xD2 '\322' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 211 0xD3 '\323' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 212 0xD4 '\324' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 213 0xD5 '\325' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 214 0xD6 '\326' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 215 0xD7 '\327' */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0xFC, /* 11111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+
+	/* 216 0xD8 '\330' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0xFC, /* 11111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 217 0xD9 '\331' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0xF0, /* 11110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 218 0xDA '\332' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x1C, /* 00011100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 219 0xDB '\333' */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+
+	/* 220 0xDC '\334' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+
+	/* 221 0xDD '\335' */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+	0xE0, /* 11100000 */
+
+	/* 222 0xDE '\336' */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+	0x1C, /* 00011100 */
+
+	/* 223 0xDF '\337' */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 224 0xE0 '\340' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x34, /* 00110100 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x34, /* 00110100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 225 0xE1 '\341' */
+	0x18, /* 00011000 */
+	0x24, /* 00100100 */
+	0x44, /* 01000100 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x58, /* 01011000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+
+	/* 226 0xE2 '\342' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 227 0xE3 '\343' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x28, /* 00101000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 228 0xE4 '\344' */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x24, /* 00100100 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x24, /* 00100100 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 229 0xE5 '\345' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 230 0xE6 '\346' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x74, /* 01110100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+
+	/* 231 0xE7 '\347' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x0C, /* 00001100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 232 0xE8 '\350' */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 233 0xE9 '\351' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x7C, /* 01111100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 234 0xEA '\352' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x28, /* 00101000 */
+	0x6C, /* 01101100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 235 0xEB '\353' */
+	0x00, /* 00000000 */
+	0x18, /* 00011000 */
+	0x20, /* 00100000 */
+	0x18, /* 00011000 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x24, /* 00100100 */
+	0x18, /* 00011000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 236 0xEC '\354' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 237 0xED '\355' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x38, /* 00111000 */
+	0x54, /* 01010100 */
+	0x54, /* 01010100 */
+	0x38, /* 00111000 */
+	0x40, /* 01000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 238 0xEE '\356' */
+	0x00, /* 00000000 */
+	0x3C, /* 00111100 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x38, /* 00111000 */
+	0x40, /* 01000000 */
+	0x40, /* 01000000 */
+	0x3C, /* 00111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 239 0xEF '\357' */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x44, /* 01000100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 240 0xF0 '\360' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0xFC, /* 11111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 241 0xF1 '\361' */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x7C, /* 01111100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 242 0xF2 '\362' */
+	0x00, /* 00000000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 243 0xF3 '\363' */
+	0x00, /* 00000000 */
+	0x08, /* 00001000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x10, /* 00010000 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 244 0xF4 '\364' */
+	0x00, /* 00000000 */
+	0x0C, /* 00001100 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+
+	/* 245 0xF5 '\365' */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x10, /* 00010000 */
+	0x60, /* 01100000 */
+	0x00, /* 00000000 */
+
+	/* 246 0xF6 '\366' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x7C, /* 01111100 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 247 0xF7 '\367' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x20, /* 00100000 */
+	0x54, /* 01010100 */
+	0x08, /* 00001000 */
+	0x20, /* 00100000 */
+	0x54, /* 01010100 */
+	0x08, /* 00001000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 248 0xF8 '\370' */
+	0x30, /* 00110000 */
+	0x48, /* 01001000 */
+	0x48, /* 01001000 */
+	0x30, /* 00110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 249 0xF9 '\371' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x38, /* 00111000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 250 0xFA '\372' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x10, /* 00010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 251 0xFB '\373' */
+	0x00, /* 00000000 */
+	0x04, /* 00000100 */
+	0x08, /* 00001000 */
+	0x08, /* 00001000 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x20, /* 00100000 */
+	0x20, /* 00100000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 252 0xFC '\374' */
+	0x60, /* 01100000 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x50, /* 01010000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 253 0xFD '\375' */
+	0x60, /* 01100000 */
+	0x10, /* 00010000 */
+	0x20, /* 00100000 */
+	0x70, /* 01110000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 254 0xFE '\376' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x38, /* 00111000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+	/* 255 0xFF '\377' */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+	0x00, /* 00000000 */
+
+};
+
+const struct font_desc font_6x10 = {
+	.idx	= FONT6x10_IDX,
+	.name	= "6x10",
+	.width	= 6,
+	.height	= 10,
+	.data	= fontdata_6x10,
+	.pref	= 0,
+};
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index f947189efe6d..823376ca0a8b 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -63,6 +63,10 @@ static const struct font_desc *fonts[] = {
 #undef NO_FONTS
     &font_mini_4x6,
 #endif
+#ifdef CONFIG_FONT_6x10
+#undef NO_FONTS
+    &font_6x10,
+#endif
 };
 
 #define num_fonts ARRAY_SIZE(fonts)
-- 
cgit v1.2.3


From e9a21949b79414dda42a017855b288901c07e613 Mon Sep 17 00:00:00 2001
From: Luciano Coelho <luciano.coelho@intel.com>
Date: Wed, 8 Oct 2014 09:48:36 +0300
Subject: mac80211: add extended channel switching capability if the driver
 supports CSA

The Extended Channel Switching capability bit in the extended
capabilities element must be set if the driver supports CSA on
non-beaconing interfaces.

Since this capability needs to be set during driver registration, the
extended_capabiliities global variable needs to be moved to the local
structure so that it can be modified.

Signed-off-by: Luciano Coelho <luciano.coelho@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  |  5 +++++
 net/mac80211/ieee80211_i.h |  3 +++
 net/mac80211/main.c        | 20 +++++++++++---------
 3 files changed, 19 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index b1be39c76931..5fab17b382b5 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1998,6 +1998,11 @@ enum ieee80211_tdls_actioncode {
 	WLAN_TDLS_DISCOVERY_REQUEST = 10,
 };
 
+/* Extended Channel Switching capability to be set in the 1st byte of
+ * the @WLAN_EID_EXT_CAPABILITY information element
+ */
+#define WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING	BIT(2)
+
 /* Interworking capabilities are set in 7th bit of 4th byte of the
  * @WLAN_EID_EXT_CAPABILITY information element
  */
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index c2aaec4dfcf0..a9cc49128980 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1307,6 +1307,9 @@ struct ieee80211_local {
 	/* virtual monitor interface */
 	struct ieee80211_sub_if_data __rcu *monitor_sdata;
 	struct cfg80211_chan_def monitor_chandef;
+
+	/* extended capabilities provided by mac80211 */
+	u8 ext_capa[8];
 };
 
 static inline struct ieee80211_sub_if_data *
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0de7c93bf62b..107d1c884de3 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -478,11 +478,6 @@ static const struct ieee80211_vht_cap mac80211_vht_capa_mod_mask = {
 	},
 };
 
-static const u8 extended_capabilities[] = {
-	0, 0, 0, 0, 0, 0, 0,
-	WLAN_EXT_CAPA8_OPMODE_NOTIF,
-};
-
 struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 					const struct ieee80211_ops *ops)
 {
@@ -539,10 +534,6 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 			WIPHY_FLAG_REPORTS_OBSS |
 			WIPHY_FLAG_OFFCHAN_TX;
 
-	wiphy->extended_capabilities = extended_capabilities;
-	wiphy->extended_capabilities_mask = extended_capabilities;
-	wiphy->extended_capabilities_len = ARRAY_SIZE(extended_capabilities);
-
 	if (ops->remain_on_channel)
 		wiphy->flags |= WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL;
 
@@ -591,6 +582,13 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len,
 	wiphy->ht_capa_mod_mask = &mac80211_ht_capa_mod_mask;
 	wiphy->vht_capa_mod_mask = &mac80211_vht_capa_mod_mask;
 
+	local->ext_capa[7] = WLAN_EXT_CAPA8_OPMODE_NOTIF;
+
+	wiphy->extended_capabilities = local->ext_capa;
+	wiphy->extended_capabilities_mask = local->ext_capa;
+	wiphy->extended_capabilities_len =
+		ARRAY_SIZE(local->ext_capa);
+
 	INIT_LIST_HEAD(&local->interfaces);
 
 	__hw_addr_init(&local->mc_list);
@@ -958,6 +956,10 @@ int ieee80211_register_hw(struct ieee80211_hw *hw)
 	if (local->hw.wiphy->flags & WIPHY_FLAG_SUPPORTS_TDLS)
 		local->hw.wiphy->flags |= WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
 
+	/* mac80211 supports eCSA, if the driver supports STA CSA at all */
+	if (local->hw.flags & IEEE80211_HW_CHANCTX_STA_CSA)
+		local->ext_capa[0] |= WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING;
+
 	local->hw.wiphy->max_num_csa_counters = IEEE80211_MAX_CSA_COUNTERS_NUM;
 
 	result = wiphy_register(local->hw.wiphy);
-- 
cgit v1.2.3


From b8839b8c55f3fdd60dc36abcda7e0266aff7985c Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Wed, 8 Oct 2014 18:26:13 -0400
Subject: block: fix alignment_offset math that assumes io_min is a power-of-2

The math in both blk_stack_limits() and queue_limit_alignment_offset()
assume that a block device's io_min (aka minimum_io_size) is always a
power-of-2.  Fix the math such that it works for non-power-of-2 io_min.

This issue (of alignment_offset != 0) became apparent when testing
dm-thinp with a thinp blocksize that matches a RAID6 stripesize of
1280K.  Commit fdfb4c8c1 ("dm thin: set minimum_io_size to pool's data
block size") unlocked the potential for alignment_offset != 0 due to
the dm-thin-pool's io_min possibly being a non-power-of-2.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Cc: stable@vger.kernel.org
Acked-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-settings.c   | 4 ++--
 include/linux/blkdev.h | 5 ++---
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index f1a1795a5683..aa02247d227e 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -574,7 +574,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 		bottom = max(b->physical_block_size, b->io_min) + alignment;
 
 		/* Verify that top and bottom intervals line up */
-		if (max(top, bottom) & (min(top, bottom) - 1)) {
+		if (max(top, bottom) % min(top, bottom)) {
 			t->misaligned = 1;
 			ret = -1;
 		}
@@ -619,7 +619,7 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
 
 	/* Find lowest common alignment_offset */
 	t->alignment_offset = lcm(t->alignment_offset, alignment)
-		& (max(t->physical_block_size, t->io_min) - 1);
+		% max(t->physical_block_size, t->io_min);
 
 	/* Verify that new alignment_offset is on a logical block boundary */
 	if (t->alignment_offset & (t->logical_block_size - 1)) {
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 038b40f84c7a..554639249fd6 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1279,10 +1279,9 @@ static inline int queue_alignment_offset(struct request_queue *q)
 static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector)
 {
 	unsigned int granularity = max(lim->physical_block_size, lim->io_min);
-	unsigned int alignment = (sector << 9) & (granularity - 1);
+	unsigned int alignment = sector_div(sector, granularity >> 9) << 9;
 
-	return (granularity + lim->alignment_offset - alignment)
-		& (granularity - 1);
+	return (granularity + lim->alignment_offset - alignment) % granularity;
 }
 
 static inline int bdev_alignment_offset(struct block_device *bdev)
-- 
cgit v1.2.3


From 58cb65487e92b47448d00a711c9f5922137d5678 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:25:54 -0700
Subject: proc/maps: make vm_is_stack() logic namespace-friendly

- Rename vm_is_stack() to task_of_stack() and change it to return
  "struct task_struct *" rather than the global (and thus wrong in
  general) pid_t.

- Add the new pid_of_stack() helper which calls task_of_stack() and
  uses the right namespace to report the correct pid_t.

  Unfortunately we need to define this helper twice, in task_mmu.c
  and in task_nommu.c. perhaps it makes sense to add fs/proc/util.c
  and move at least pid_of_stack/task_of_stack there to avoid the
  code duplication.

- Change show_map_vma() and show_numa_map() to use the new helper.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Greg Ungerer <gerg@uclinux.org>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c   | 25 +++++++++++++++++++++----
 fs/proc/task_nommu.c | 21 ++++++++++++++++++++-
 include/linux/mm.h   |  4 ++--
 mm/util.c            | 23 ++++++++---------------
 4 files changed, 51 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4793e4a843b0..adddf697c4ea 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -261,13 +261,31 @@ static int do_maps_open(struct inode *inode, struct file *file,
 				sizeof(struct proc_maps_private));
 }
 
+static pid_t pid_of_stack(struct proc_maps_private *priv,
+				struct vm_area_struct *vma, bool is_pid)
+{
+	struct inode *inode = priv->inode;
+	struct task_struct *task;
+	pid_t ret = 0;
+
+	rcu_read_lock();
+	task = pid_task(proc_pid(inode), PIDTYPE_PID);
+	if (task) {
+		task = task_of_stack(task, vma, is_pid);
+		if (task)
+			ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 static void
 show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 {
 	struct mm_struct *mm = vma->vm_mm;
 	struct file *file = vma->vm_file;
 	struct proc_maps_private *priv = m->private;
-	struct task_struct *task = priv->task;
 	vm_flags_t flags = vma->vm_flags;
 	unsigned long ino = 0;
 	unsigned long long pgoff = 0;
@@ -332,8 +350,7 @@ show_map_vma(struct seq_file *m, struct vm_area_struct *vma, int is_pid)
 			goto done;
 		}
 
-		tid = vm_is_stack(task, vma, is_pid);
-
+		tid = pid_of_stack(priv, vma, is_pid);
 		if (tid != 0) {
 			/*
 			 * Thread stack in /proc/PID/task/TID/maps or
@@ -1446,7 +1463,7 @@ static int show_numa_map(struct seq_file *m, void *v, int is_pid)
 	} else if (vma->vm_start <= mm->brk && vma->vm_end >= mm->start_brk) {
 		seq_puts(m, " heap");
 	} else {
-		pid_t tid = vm_is_stack(task, vma, is_pid);
+		pid_t tid = pid_of_stack(proc_priv, vma, is_pid);
 		if (tid != 0) {
 			/*
 			 * Thread stack in /proc/PID/task/TID/maps or
diff --git a/fs/proc/task_nommu.c b/fs/proc/task_nommu.c
index f36e213835cc..599ec2e20104 100644
--- a/fs/proc/task_nommu.c
+++ b/fs/proc/task_nommu.c
@@ -123,6 +123,25 @@ unsigned long task_statm(struct mm_struct *mm,
 	return size;
 }
 
+static pid_t pid_of_stack(struct proc_maps_private *priv,
+				struct vm_area_struct *vma, bool is_pid)
+{
+	struct inode *inode = priv->inode;
+	struct task_struct *task;
+	pid_t ret = 0;
+
+	rcu_read_lock();
+	task = pid_task(proc_pid(inode), PIDTYPE_PID);
+	if (task) {
+		task = task_of_stack(task, vma, is_pid);
+		if (task)
+			ret = task_pid_nr_ns(task, inode->i_sb->s_fs_info);
+	}
+	rcu_read_unlock();
+
+	return ret;
+}
+
 /*
  * display a single VMA to a sequenced file
  */
@@ -163,7 +182,7 @@ static int nommu_vma_show(struct seq_file *m, struct vm_area_struct *vma,
 		seq_pad(m, ' ');
 		seq_path(m, &file->f_path, "");
 	} else if (mm) {
-		pid_t tid = vm_is_stack(priv->task, vma, is_pid);
+		pid_t tid = pid_of_stack(priv, vma, is_pid);
 
 		if (tid != 0) {
 			seq_pad(m, ' ');
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0f4196a0bc20..28df70774b81 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1247,8 +1247,8 @@ static inline int stack_guard_page_end(struct vm_area_struct *vma,
 		!vma_growsup(vma->vm_next, addr);
 }
 
-extern pid_t
-vm_is_stack(struct task_struct *task, struct vm_area_struct *vma, int in_group);
+extern struct task_struct *task_of_stack(struct task_struct *task,
+				struct vm_area_struct *vma, bool in_group);
 
 extern unsigned long move_page_tables(struct vm_area_struct *vma,
 		unsigned long old_addr, struct vm_area_struct *new_vma,
diff --git a/mm/util.c b/mm/util.c
index 093c973f1697..fec39d4509a9 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -170,32 +170,25 @@ static int vm_is_stack_for_task(struct task_struct *t,
 /*
  * Check if the vma is being used as a stack.
  * If is_group is non-zero, check in the entire thread group or else
- * just check in the current task. Returns the pid of the task that
- * the vma is stack for.
+ * just check in the current task. Returns the task_struct of the task
+ * that the vma is stack for. Must be called under rcu_read_lock().
  */
-pid_t vm_is_stack(struct task_struct *task,
-		  struct vm_area_struct *vma, int in_group)
+struct task_struct *task_of_stack(struct task_struct *task,
+				struct vm_area_struct *vma, bool in_group)
 {
-	pid_t ret = 0;
-
 	if (vm_is_stack_for_task(task, vma))
-		return task->pid;
+		return task;
 
 	if (in_group) {
 		struct task_struct *t;
 
-		rcu_read_lock();
 		for_each_thread(task, t) {
-			if (vm_is_stack_for_task(t, vma)) {
-				ret = t->pid;
-				goto done;
-			}
+			if (vm_is_stack_for_task(t, vma))
+				return t;
 		}
-done:
-		rcu_read_unlock();
 	}
 
-	return ret;
+	return NULL;
 }
 
 #if defined(CONFIG_MMU) && !defined(HAVE_ARCH_PICK_MMAP_LAYOUT)
-- 
cgit v1.2.3


From 07f361b2bee38896df8be17d8c3f8af3f3610606 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:00 -0700
Subject: mm/slab_common: move kmem_cache definition to internal header

We don't need to keep kmem_cache definition in include/linux/slab.h if we
don't need to inline kmem_cache_size().  According to my code inspection,
this function is only called at lc_create() in lib/lru_cache.c which may
be called at initialization phase of something, so we don't need to inline
it.  Therfore, move it to slab_common.c and move kmem_cache definition to
internal header.

After this change, we can change kmem_cache definition easily without full
kernel build.  For instance, we can turn on/off CONFIG_SLUB_STATS without
full kernel build.

[akpm@linux-foundation.org: export kmem_cache_size() to modules]
[rdunlap@infradead.org: add header files to fix kmemcheck.c build errors]
Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 42 +-----------------------------------------
 mm/kmemcheck.c       |  1 +
 mm/slab.h            | 35 +++++++++++++++++++++++++++++++++++
 mm/slab_common.c     |  9 +++++++++
 4 files changed, 46 insertions(+), 41 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 1d9abb7d22a0..9062e4ad1787 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -158,31 +158,6 @@ size_t ksize(const void *);
 #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long)
 #endif
 
-#ifdef CONFIG_SLOB
-/*
- * Common fields provided in kmem_cache by all slab allocators
- * This struct is either used directly by the allocator (SLOB)
- * or the allocator must include definitions for all fields
- * provided in kmem_cache_common in their definition of kmem_cache.
- *
- * Once we can do anonymous structs (C11 standard) we could put a
- * anonymous struct definition in these allocators so that the
- * separate allocations in the kmem_cache structure of SLAB and
- * SLUB is no longer needed.
- */
-struct kmem_cache {
-	unsigned int object_size;/* The original size of the object */
-	unsigned int size;	/* The aligned/padded/added on size  */
-	unsigned int align;	/* Alignment as calculated */
-	unsigned long flags;	/* Active flags on the slab */
-	const char *name;	/* Slab name for sysfs */
-	int refcount;		/* Use counter */
-	void (*ctor)(void *);	/* Called on object slot creation */
-	struct list_head list;	/* List of all slab caches on the system */
-};
-
-#endif /* CONFIG_SLOB */
-
 /*
  * Kmalloc array related definitions
  */
@@ -363,14 +338,6 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s,
 }
 #endif /* CONFIG_TRACING */
 
-#ifdef CONFIG_SLAB
-#include <linux/slab_def.h>
-#endif
-
-#ifdef CONFIG_SLUB
-#include <linux/slub_def.h>
-#endif
-
 extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order);
 
 #ifdef CONFIG_TRACING
@@ -650,14 +617,7 @@ static inline void *kzalloc_node(size_t size, gfp_t flags, int node)
 	return kmalloc_node(size, flags | __GFP_ZERO, node);
 }
 
-/*
- * Determine the size of a slab object
- */
-static inline unsigned int kmem_cache_size(struct kmem_cache *s)
-{
-	return s->object_size;
-}
-
+unsigned int kmem_cache_size(struct kmem_cache *s);
 void __init kmem_cache_init_late(void);
 
 #endif	/* _LINUX_SLAB_H */
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index fd814fd61319..cab58bb592d8 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -2,6 +2,7 @@
 #include <linux/mm_types.h>
 #include <linux/mm.h>
 #include <linux/slab.h>
+#include "slab.h"
 #include <linux/kmemcheck.h>
 
 void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node)
diff --git a/mm/slab.h b/mm/slab.h
index 0e0fdd365840..026e7c393f0b 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -4,6 +4,41 @@
  * Internal slab definitions
  */
 
+#ifdef CONFIG_SLOB
+/*
+ * Common fields provided in kmem_cache by all slab allocators
+ * This struct is either used directly by the allocator (SLOB)
+ * or the allocator must include definitions for all fields
+ * provided in kmem_cache_common in their definition of kmem_cache.
+ *
+ * Once we can do anonymous structs (C11 standard) we could put a
+ * anonymous struct definition in these allocators so that the
+ * separate allocations in the kmem_cache structure of SLAB and
+ * SLUB is no longer needed.
+ */
+struct kmem_cache {
+	unsigned int object_size;/* The original size of the object */
+	unsigned int size;	/* The aligned/padded/added on size  */
+	unsigned int align;	/* Alignment as calculated */
+	unsigned long flags;	/* Active flags on the slab */
+	const char *name;	/* Slab name for sysfs */
+	int refcount;		/* Use counter */
+	void (*ctor)(void *);	/* Called on object slot creation */
+	struct list_head list;	/* List of all slab caches on the system */
+};
+
+#endif /* CONFIG_SLOB */
+
+#ifdef CONFIG_SLAB
+#include <linux/slab_def.h>
+#endif
+
+#ifdef CONFIG_SLUB
+#include <linux/slub_def.h>
+#endif
+
+#include <linux/memcontrol.h>
+
 /*
  * State of the slab allocator.
  *
diff --git a/mm/slab_common.c b/mm/slab_common.c
index cabb842c4e7c..d7d8ffd0c306 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -30,6 +30,15 @@ LIST_HEAD(slab_caches);
 DEFINE_MUTEX(slab_mutex);
 struct kmem_cache *kmem_cache;
 
+/*
+ * Determine the size of a slab object
+ */
+unsigned int kmem_cache_size(struct kmem_cache *s)
+{
+	return s->object_size;
+}
+EXPORT_SYMBOL(kmem_cache_size);
+
 #ifdef CONFIG_DEBUG_VM
 static int kmem_cache_sanity_check(const char *name, size_t size)
 {
-- 
cgit v1.2.3


From 61f47105a2c9c60e950ca808b7560f776f9bfa31 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:02 -0700
Subject: mm/sl[ao]b: always track caller in kmalloc_(node_)track_caller()

Now, we track caller if tracing or slab debugging is enabled.  If they are
disabled, we could save one argument passing overhead by calling
__kmalloc(_node)().  But, I think that it would be marginal.  Furthermore,
default slab allocator, SLUB, doesn't use this technique so I think that
it's okay to change this situation.

After this change, we can turn on/off CONFIG_DEBUG_SLAB without full
kernel build and remove some complicated '#if' defintion.  It looks more
benefitial to me.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h | 22 ----------------------
 mm/slab.c            | 18 ------------------
 mm/slob.c            |  2 --
 3 files changed, 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index 9062e4ad1787..c265bec6a57d 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -549,37 +549,15 @@ static inline void *kcalloc(size_t n, size_t size, gfp_t flags)
  * allocator where we care about the real place the memory allocation
  * request comes from.
  */
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \
-	(defined(CONFIG_SLAB) && defined(CONFIG_TRACING)) || \
-	(defined(CONFIG_SLOB) && defined(CONFIG_TRACING))
 extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long);
 #define kmalloc_track_caller(size, flags) \
 	__kmalloc_track_caller(size, flags, _RET_IP_)
-#else
-#define kmalloc_track_caller(size, flags) \
-	__kmalloc(size, flags)
-#endif /* DEBUG_SLAB */
 
 #ifdef CONFIG_NUMA
-/*
- * kmalloc_node_track_caller is a special version of kmalloc_node that
- * records the calling function of the routine calling it for slab leak
- * tracking instead of just the calling function (confusing, eh?).
- * It's useful when the call to kmalloc_node comes from a widely-used
- * standard allocator where we care about the real place the memory
- * allocation request comes from.
- */
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_SLUB) || \
-	(defined(CONFIG_SLAB) && defined(CONFIG_TRACING)) || \
-	(defined(CONFIG_SLOB) && defined(CONFIG_TRACING))
 extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long);
 #define kmalloc_node_track_caller(size, flags, node) \
 	__kmalloc_node_track_caller(size, flags, node, \
 			_RET_IP_)
-#else
-#define kmalloc_node_track_caller(size, flags, node) \
-	__kmalloc_node(size, flags, node)
-#endif
 
 #else /* CONFIG_NUMA */
 
diff --git a/mm/slab.c b/mm/slab.c
index 7c52b3890d25..c52bc5aa6ba0 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3496,7 +3496,6 @@ __do_kmalloc_node(size_t size, gfp_t flags, int node, unsigned long caller)
 	return kmem_cache_alloc_node_trace(cachep, flags, node, size);
 }
 
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
 void *__kmalloc_node(size_t size, gfp_t flags, int node)
 {
 	return __do_kmalloc_node(size, flags, node, _RET_IP_);
@@ -3509,13 +3508,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t flags,
 	return __do_kmalloc_node(size, flags, node, caller);
 }
 EXPORT_SYMBOL(__kmalloc_node_track_caller);
-#else
-void *__kmalloc_node(size_t size, gfp_t flags, int node)
-{
-	return __do_kmalloc_node(size, flags, node, 0);
-}
-EXPORT_SYMBOL(__kmalloc_node);
-#endif /* CONFIG_DEBUG_SLAB || CONFIG_TRACING */
 #endif /* CONFIG_NUMA */
 
 /**
@@ -3541,8 +3533,6 @@ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags,
 	return ret;
 }
 
-
-#if defined(CONFIG_DEBUG_SLAB) || defined(CONFIG_TRACING)
 void *__kmalloc(size_t size, gfp_t flags)
 {
 	return __do_kmalloc(size, flags, _RET_IP_);
@@ -3555,14 +3545,6 @@ void *__kmalloc_track_caller(size_t size, gfp_t flags, unsigned long caller)
 }
 EXPORT_SYMBOL(__kmalloc_track_caller);
 
-#else
-void *__kmalloc(size_t size, gfp_t flags)
-{
-	return __do_kmalloc(size, flags, 0);
-}
-EXPORT_SYMBOL(__kmalloc);
-#endif
-
 /**
  * kmem_cache_free - Deallocate an object
  * @cachep: The cache the allocation was from.
diff --git a/mm/slob.c b/mm/slob.c
index 21980e0f39a8..96a86206a26b 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -468,7 +468,6 @@ void *__kmalloc(size_t size, gfp_t gfp)
 }
 EXPORT_SYMBOL(__kmalloc);
 
-#ifdef CONFIG_TRACING
 void *__kmalloc_track_caller(size_t size, gfp_t gfp, unsigned long caller)
 {
 	return __do_kmalloc_node(size, gfp, NUMA_NO_NODE, caller);
@@ -481,7 +480,6 @@ void *__kmalloc_node_track_caller(size_t size, gfp_t gfp,
 	return __do_kmalloc_node(size, gfp, node, caller);
 }
 #endif
-#endif
 
 void kfree(const void *block)
 {
-- 
cgit v1.2.3


From ad2c8144418c6a81cefe65379fd47bbe8344cef2 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:13 -0700
Subject: topology: add support for node_to_mem_node() to determine the
 fallback node

Anton noticed (http://www.spinics.net/lists/linux-mm/msg67489.html) that
on ppc LPARs with memoryless nodes, a large amount of memory was consumed
by slabs and was marked unreclaimable.  He tracked it down to slab
deactivations in the SLUB core when we allocate remotely, leading to poor
efficiency always when memoryless nodes are present.

After much discussion, Joonsoo provided a few patches that help
significantly.  They don't resolve the problem altogether:

 - memory hotplug still needs testing, that is when a memoryless node
   becomes memory-ful, we want to dtrt
 - there are other reasons for going off-node than memoryless nodes,
   e.g., fully exhausted local nodes

Neither case is resolved with this series, but I don't think that should
block their acceptance, as they can be explored/resolved with follow-on
patches.

The series consists of:

[1/3] topology: add support for node_to_mem_node() to determine the
      fallback node

[2/3] slub: fallback to node_to_mem_node() node if allocating on
      memoryless node

      - Joonsoo's patches to cache the nearest node with memory for each
        NUMA node

[3/3] Partial revert of 81c98869faa5 (""kthread: ensure locality of
      task_struct allocations")

 - At Tejun's request, keep the knowledge of memoryless node fallback
   to the allocator core.

This patch (of 3):

We need to determine the fallback node in slub allocator if the allocation
target node is memoryless node.  Without it, the SLUB wrongly select the
node which has no memory and can't use a partial slab, because of node
mismatch.  Introduced function, node_to_mem_node(X), will return a node Y
with memory that has the nearest distance.  If X is memoryless node, it
will return nearest distance node, but, if X is normal node, it will
return itself.

We will use this function in following patch to determine the fallback
node.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Nishanth Aravamudan <nacc@linux.vnet.ibm.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Han Pingtian <hanpt@linux.vnet.ibm.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Anton Blanchard <anton@samba.org>
Cc: Christoph Lameter <cl@linux.com>
Cc: Wanpeng Li <liwanp@linux.vnet.ibm.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/topology.h | 17 +++++++++++++++++
 mm/page_alloc.c          |  1 +
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/topology.h b/include/linux/topology.h
index dda6ee521e74..909b6e43b694 100644
--- a/include/linux/topology.h
+++ b/include/linux/topology.h
@@ -119,11 +119,20 @@ static inline int numa_node_id(void)
  * Use the accessor functions set_numa_mem(), numa_mem_id() and cpu_to_mem().
  */
 DECLARE_PER_CPU(int, _numa_mem_);
+extern int _node_numa_mem_[MAX_NUMNODES];
 
 #ifndef set_numa_mem
 static inline void set_numa_mem(int node)
 {
 	this_cpu_write(_numa_mem_, node);
+	_node_numa_mem_[numa_node_id()] = node;
+}
+#endif
+
+#ifndef node_to_mem_node
+static inline int node_to_mem_node(int node)
+{
+	return _node_numa_mem_[node];
 }
 #endif
 
@@ -146,6 +155,7 @@ static inline int cpu_to_mem(int cpu)
 static inline void set_cpu_numa_mem(int cpu, int node)
 {
 	per_cpu(_numa_mem_, cpu) = node;
+	_node_numa_mem_[cpu_to_node(cpu)] = node;
 }
 #endif
 
@@ -159,6 +169,13 @@ static inline int numa_mem_id(void)
 }
 #endif
 
+#ifndef node_to_mem_node
+static inline int node_to_mem_node(int node)
+{
+	return node;
+}
+#endif
+
 #ifndef cpu_to_mem
 static inline int cpu_to_mem(int cpu)
 {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index eee961958021..f3bc59f2ed52 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -85,6 +85,7 @@ EXPORT_PER_CPU_SYMBOL(numa_node);
  */
 DEFINE_PER_CPU(int, _numa_mem_);		/* Kernel "local memory" node */
 EXPORT_PER_CPU_SYMBOL(_numa_mem_);
+int _node_numa_mem_[MAX_NUMNODES];
 #endif
 
 /*
-- 
cgit v1.2.3


From bf0dea23a9c094ae869a88bb694fbe966671bf6d Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 9 Oct 2014 15:26:27 -0700
Subject: mm/slab: use percpu allocator for cpu cache

Because of chicken and egg problem, initialization of SLAB is really
complicated.  We need to allocate cpu cache through SLAB to make the
kmem_cache work, but before initialization of kmem_cache, allocation
through SLAB is impossible.

On the other hand, SLUB does initialization in a more simple way.  It uses
percpu allocator to allocate cpu cache so there is no chicken and egg
problem.

So, this patch try to use percpu allocator in SLAB.  This simplifies the
initialization step in SLAB so that we could maintain SLAB code more
easily.

In my testing there is no performance difference.

This implementation relies on percpu allocator.  Because percpu allocator
uses vmalloc address space, vmalloc address space could be exhausted by
this change on many cpu system with *32 bit* kernel.  This implementation
can cover 1024 cpus in worst case by following calculation.

Worst: 1024 cpus * 4 bytes for pointer * 300 kmem_caches *
	120 objects per cpu_cache = 140 MB
Normal: 1024 cpus * 4 bytes for pointer * 150 kmem_caches(slab merge) *
	80 objects per cpu_cache = 46 MB

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Jeremiah Mahler <jmmahler@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab_def.h |  20 +---
 mm/slab.c                | 239 +++++++++++++++--------------------------------
 mm/slab.h                |   1 -
 3 files changed, 78 insertions(+), 182 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h
index 8235dfbb3b05..b869d1662ba3 100644
--- a/include/linux/slab_def.h
+++ b/include/linux/slab_def.h
@@ -8,6 +8,8 @@
  */
 
 struct kmem_cache {
+	struct array_cache __percpu *cpu_cache;
+
 /* 1) Cache tunables. Protected by slab_mutex */
 	unsigned int batchcount;
 	unsigned int limit;
@@ -71,23 +73,7 @@ struct kmem_cache {
 	struct memcg_cache_params *memcg_params;
 #endif
 
-/* 6) per-cpu/per-node data, touched during every alloc/free */
-	/*
-	 * We put array[] at the end of kmem_cache, because we want to size
-	 * this array to nr_cpu_ids slots instead of NR_CPUS
-	 * (see kmem_cache_init())
-	 * We still use [NR_CPUS] and not [1] or [0] because cache_cache
-	 * is statically defined, so we reserve the max number of cpus.
-	 *
-	 * We also need to guarantee that the list is able to accomodate a
-	 * pointer for each node since "nodelists" uses the remainder of
-	 * available pointers.
-	 */
-	struct kmem_cache_node **node;
-	struct array_cache *array[NR_CPUS + MAX_NUMNODES];
-	/*
-	 * Do not add fields after array[]
-	 */
+	struct kmem_cache_node *node[MAX_NUMNODES];
 };
 
 #endif	/* _LINUX_SLAB_DEF_H */
diff --git a/mm/slab.c b/mm/slab.c
index 328233a724af..655d65c3f010 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -237,11 +237,10 @@ struct arraycache_init {
 /*
  * Need this for bootstrapping a per node allocator.
  */
-#define NUM_INIT_LISTS (3 * MAX_NUMNODES)
+#define NUM_INIT_LISTS (2 * MAX_NUMNODES)
 static struct kmem_cache_node __initdata init_kmem_cache_node[NUM_INIT_LISTS];
 #define	CACHE_CACHE 0
-#define	SIZE_AC MAX_NUMNODES
-#define	SIZE_NODE (2 * MAX_NUMNODES)
+#define	SIZE_NODE (MAX_NUMNODES)
 
 static int drain_freelist(struct kmem_cache *cache,
 			struct kmem_cache_node *n, int tofree);
@@ -253,7 +252,6 @@ static void cache_reap(struct work_struct *unused);
 
 static int slab_early_init = 1;
 
-#define INDEX_AC kmalloc_index(sizeof(struct arraycache_init))
 #define INDEX_NODE kmalloc_index(sizeof(struct kmem_cache_node))
 
 static void kmem_cache_node_init(struct kmem_cache_node *parent)
@@ -458,9 +456,6 @@ static inline unsigned int obj_to_index(const struct kmem_cache *cache,
 	return reciprocal_divide(offset, cache->reciprocal_buffer_size);
 }
 
-static struct arraycache_init initarray_generic =
-    { {0, BOOT_CPUCACHE_ENTRIES, 1, 0} };
-
 /* internal cache of cache description objs */
 static struct kmem_cache kmem_cache_boot = {
 	.batchcount = 1,
@@ -476,7 +471,7 @@ static DEFINE_PER_CPU(struct delayed_work, slab_reap_work);
 
 static inline struct array_cache *cpu_cache_get(struct kmem_cache *cachep)
 {
-	return cachep->array[smp_processor_id()];
+	return this_cpu_ptr(cachep->cpu_cache);
 }
 
 static size_t calculate_freelist_size(int nr_objs, size_t align)
@@ -1096,24 +1091,25 @@ static void cpuup_canceled(long cpu)
 		struct alien_cache **alien;
 		LIST_HEAD(list);
 
-		/* cpu is dead; no one can alloc from it. */
-		nc = cachep->array[cpu];
-		cachep->array[cpu] = NULL;
 		n = get_node(cachep, node);
-
 		if (!n)
-			goto free_array_cache;
+			continue;
 
 		spin_lock_irq(&n->list_lock);
 
 		/* Free limit for this kmem_cache_node */
 		n->free_limit -= cachep->batchcount;
-		if (nc)
+
+		/* cpu is dead; no one can alloc from it. */
+		nc = per_cpu_ptr(cachep->cpu_cache, cpu);
+		if (nc) {
 			free_block(cachep, nc->entry, nc->avail, node, &list);
+			nc->avail = 0;
+		}
 
 		if (!cpumask_empty(mask)) {
 			spin_unlock_irq(&n->list_lock);
-			goto free_array_cache;
+			goto free_slab;
 		}
 
 		shared = n->shared;
@@ -1133,9 +1129,9 @@ static void cpuup_canceled(long cpu)
 			drain_alien_cache(cachep, alien);
 			free_alien_cache(alien);
 		}
-free_array_cache:
+
+free_slab:
 		slabs_destroy(cachep, &list);
-		kfree(nc);
 	}
 	/*
 	 * In the previous loop, all the objects were freed to
@@ -1172,32 +1168,23 @@ static int cpuup_prepare(long cpu)
 	 * array caches
 	 */
 	list_for_each_entry(cachep, &slab_caches, list) {
-		struct array_cache *nc;
 		struct array_cache *shared = NULL;
 		struct alien_cache **alien = NULL;
 
-		nc = alloc_arraycache(node, cachep->limit,
-					cachep->batchcount, GFP_KERNEL);
-		if (!nc)
-			goto bad;
 		if (cachep->shared) {
 			shared = alloc_arraycache(node,
 				cachep->shared * cachep->batchcount,
 				0xbaadf00d, GFP_KERNEL);
-			if (!shared) {
-				kfree(nc);
+			if (!shared)
 				goto bad;
-			}
 		}
 		if (use_alien_caches) {
 			alien = alloc_alien_cache(node, cachep->limit, GFP_KERNEL);
 			if (!alien) {
 				kfree(shared);
-				kfree(nc);
 				goto bad;
 			}
 		}
-		cachep->array[cpu] = nc;
 		n = get_node(cachep, node);
 		BUG_ON(!n);
 
@@ -1388,15 +1375,6 @@ static void __init set_up_node(struct kmem_cache *cachep, int index)
 	}
 }
 
-/*
- * The memory after the last cpu cache pointer is used for the
- * the node pointer.
- */
-static void setup_node_pointer(struct kmem_cache *cachep)
-{
-	cachep->node = (struct kmem_cache_node **)&cachep->array[nr_cpu_ids];
-}
-
 /*
  * Initialisation.  Called after the page allocator have been initialised and
  * before smp_init().
@@ -1408,7 +1386,6 @@ void __init kmem_cache_init(void)
 	BUILD_BUG_ON(sizeof(((struct page *)NULL)->lru) <
 					sizeof(struct rcu_head));
 	kmem_cache = &kmem_cache_boot;
-	setup_node_pointer(kmem_cache);
 
 	if (num_possible_nodes() == 1)
 		use_alien_caches = 0;
@@ -1416,8 +1393,6 @@ void __init kmem_cache_init(void)
 	for (i = 0; i < NUM_INIT_LISTS; i++)
 		kmem_cache_node_init(&init_kmem_cache_node[i]);
 
-	set_up_node(kmem_cache, CACHE_CACHE);
-
 	/*
 	 * Fragmentation resistance on low memory - only use bigger
 	 * page orders on machines with more than 32MB of memory if
@@ -1452,49 +1427,22 @@ void __init kmem_cache_init(void)
 	 * struct kmem_cache size depends on nr_node_ids & nr_cpu_ids
 	 */
 	create_boot_cache(kmem_cache, "kmem_cache",
-		offsetof(struct kmem_cache, array[nr_cpu_ids]) +
+		offsetof(struct kmem_cache, node) +
 				  nr_node_ids * sizeof(struct kmem_cache_node *),
 				  SLAB_HWCACHE_ALIGN);
 	list_add(&kmem_cache->list, &slab_caches);
-
-	/* 2+3) create the kmalloc caches */
+	slab_state = PARTIAL;
 
 	/*
-	 * Initialize the caches that provide memory for the array cache and the
-	 * kmem_cache_node structures first.  Without this, further allocations will
-	 * bug.
+	 * Initialize the caches that provide memory for the  kmem_cache_node
+	 * structures first.  Without this, further allocations will bug.
 	 */
-
-	kmalloc_caches[INDEX_AC] = create_kmalloc_cache("kmalloc-ac",
-					kmalloc_size(INDEX_AC), ARCH_KMALLOC_FLAGS);
-
-	if (INDEX_AC != INDEX_NODE)
-		kmalloc_caches[INDEX_NODE] =
-			create_kmalloc_cache("kmalloc-node",
+	kmalloc_caches[INDEX_NODE] = create_kmalloc_cache("kmalloc-node",
 				kmalloc_size(INDEX_NODE), ARCH_KMALLOC_FLAGS);
+	slab_state = PARTIAL_NODE;
 
 	slab_early_init = 0;
 
-	/* 4) Replace the bootstrap head arrays */
-	{
-		struct array_cache *ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		memcpy(ptr, cpu_cache_get(kmem_cache),
-		       sizeof(struct arraycache_init));
-
-		kmem_cache->array[smp_processor_id()] = ptr;
-
-		ptr = kmalloc(sizeof(struct arraycache_init), GFP_NOWAIT);
-
-		BUG_ON(cpu_cache_get(kmalloc_caches[INDEX_AC])
-		       != &initarray_generic.cache);
-		memcpy(ptr, cpu_cache_get(kmalloc_caches[INDEX_AC]),
-		       sizeof(struct arraycache_init));
-
-		kmalloc_caches[INDEX_AC]->array[smp_processor_id()] = ptr;
-	}
 	/* 5) Replace the bootstrap kmem_cache_node */
 	{
 		int nid;
@@ -1502,13 +1450,8 @@ void __init kmem_cache_init(void)
 		for_each_online_node(nid) {
 			init_list(kmem_cache, &init_kmem_cache_node[CACHE_CACHE + nid], nid);
 
-			init_list(kmalloc_caches[INDEX_AC],
-				  &init_kmem_cache_node[SIZE_AC + nid], nid);
-
-			if (INDEX_AC != INDEX_NODE) {
-				init_list(kmalloc_caches[INDEX_NODE],
+			init_list(kmalloc_caches[INDEX_NODE],
 					  &init_kmem_cache_node[SIZE_NODE + nid], nid);
-			}
 		}
 	}
 
@@ -2041,56 +1984,53 @@ static size_t calculate_slab_order(struct kmem_cache *cachep,
 	return left_over;
 }
 
+static struct array_cache __percpu *alloc_kmem_cache_cpus(
+		struct kmem_cache *cachep, int entries, int batchcount)
+{
+	int cpu;
+	size_t size;
+	struct array_cache __percpu *cpu_cache;
+
+	size = sizeof(void *) * entries + sizeof(struct array_cache);
+	cpu_cache = __alloc_percpu(size, 0);
+
+	if (!cpu_cache)
+		return NULL;
+
+	for_each_possible_cpu(cpu) {
+		init_arraycache(per_cpu_ptr(cpu_cache, cpu),
+				entries, batchcount);
+	}
+
+	return cpu_cache;
+}
+
 static int __init_refok setup_cpu_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	if (slab_state >= FULL)
 		return enable_cpucache(cachep, gfp);
 
+	cachep->cpu_cache = alloc_kmem_cache_cpus(cachep, 1, 1);
+	if (!cachep->cpu_cache)
+		return 1;
+
 	if (slab_state == DOWN) {
-		/*
-		 * Note: Creation of first cache (kmem_cache).
-		 * The setup_node is taken care
-		 * of by the caller of __kmem_cache_create
-		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
-		slab_state = PARTIAL;
+		/* Creation of first cache (kmem_cache). */
+		set_up_node(kmem_cache, CACHE_CACHE);
 	} else if (slab_state == PARTIAL) {
-		/*
-		 * Note: the second kmem_cache_create must create the cache
-		 * that's used by kmalloc(24), otherwise the creation of
-		 * further caches will BUG().
-		 */
-		cachep->array[smp_processor_id()] = &initarray_generic.cache;
-
-		/*
-		 * If the cache that's used by kmalloc(sizeof(kmem_cache_node)) is
-		 * the second cache, then we need to set up all its node/,
-		 * otherwise the creation of further caches will BUG().
-		 */
-		set_up_node(cachep, SIZE_AC);
-		if (INDEX_AC == INDEX_NODE)
-			slab_state = PARTIAL_NODE;
-		else
-			slab_state = PARTIAL_ARRAYCACHE;
+		/* For kmem_cache_node */
+		set_up_node(cachep, SIZE_NODE);
 	} else {
-		/* Remaining boot caches */
-		cachep->array[smp_processor_id()] =
-			kmalloc(sizeof(struct arraycache_init), gfp);
+		int node;
 
-		if (slab_state == PARTIAL_ARRAYCACHE) {
-			set_up_node(cachep, SIZE_NODE);
-			slab_state = PARTIAL_NODE;
-		} else {
-			int node;
-			for_each_online_node(node) {
-				cachep->node[node] =
-				    kmalloc_node(sizeof(struct kmem_cache_node),
-						gfp, node);
-				BUG_ON(!cachep->node[node]);
-				kmem_cache_node_init(cachep->node[node]);
-			}
+		for_each_online_node(node) {
+			cachep->node[node] = kmalloc_node(
+				sizeof(struct kmem_cache_node), gfp, node);
+			BUG_ON(!cachep->node[node]);
+			kmem_cache_node_init(cachep->node[node]);
 		}
 	}
+
 	cachep->node[numa_mem_id()]->next_reap =
 			jiffies + REAPTIMEOUT_NODE +
 			((unsigned long)cachep) % REAPTIMEOUT_NODE;
@@ -2213,7 +2153,6 @@ __kmem_cache_create (struct kmem_cache *cachep, unsigned long flags)
 	else
 		gfp = GFP_NOWAIT;
 
-	setup_node_pointer(cachep);
 #if DEBUG
 
 	/*
@@ -2470,8 +2409,7 @@ int __kmem_cache_shutdown(struct kmem_cache *cachep)
 	if (rc)
 		return rc;
 
-	for_each_online_cpu(i)
-	    kfree(cachep->array[i]);
+	free_percpu(cachep->cpu_cache);
 
 	/* NUMA: free the node structures */
 	for_each_kmem_cache_node(cachep, i, n) {
@@ -3719,72 +3657,45 @@ fail:
 	return -ENOMEM;
 }
 
-struct ccupdate_struct {
-	struct kmem_cache *cachep;
-	struct array_cache *new[0];
-};
-
-static void do_ccupdate_local(void *info)
-{
-	struct ccupdate_struct *new = info;
-	struct array_cache *old;
-
-	check_irq_off();
-	old = cpu_cache_get(new->cachep);
-
-	new->cachep->array[smp_processor_id()] = new->new[smp_processor_id()];
-	new->new[smp_processor_id()] = old;
-}
-
 /* Always called with the slab_mutex held */
 static int __do_tune_cpucache(struct kmem_cache *cachep, int limit,
 				int batchcount, int shared, gfp_t gfp)
 {
-	struct ccupdate_struct *new;
-	int i;
+	struct array_cache __percpu *cpu_cache, *prev;
+	int cpu;
 
-	new = kzalloc(sizeof(*new) + nr_cpu_ids * sizeof(struct array_cache *),
-		      gfp);
-	if (!new)
+	cpu_cache = alloc_kmem_cache_cpus(cachep, limit, batchcount);
+	if (!cpu_cache)
 		return -ENOMEM;
 
-	for_each_online_cpu(i) {
-		new->new[i] = alloc_arraycache(cpu_to_mem(i), limit,
-						batchcount, gfp);
-		if (!new->new[i]) {
-			for (i--; i >= 0; i--)
-				kfree(new->new[i]);
-			kfree(new);
-			return -ENOMEM;
-		}
-	}
-	new->cachep = cachep;
-
-	on_each_cpu(do_ccupdate_local, (void *)new, 1);
+	prev = cachep->cpu_cache;
+	cachep->cpu_cache = cpu_cache;
+	kick_all_cpus_sync();
 
 	check_irq_on();
 	cachep->batchcount = batchcount;
 	cachep->limit = limit;
 	cachep->shared = shared;
 
-	for_each_online_cpu(i) {
+	if (!prev)
+		goto alloc_node;
+
+	for_each_online_cpu(cpu) {
 		LIST_HEAD(list);
-		struct array_cache *ccold = new->new[i];
 		int node;
 		struct kmem_cache_node *n;
+		struct array_cache *ac = per_cpu_ptr(prev, cpu);
 
-		if (!ccold)
-			continue;
-
-		node = cpu_to_mem(i);
+		node = cpu_to_mem(cpu);
 		n = get_node(cachep, node);
 		spin_lock_irq(&n->list_lock);
-		free_block(cachep, ccold->entry, ccold->avail, node, &list);
+		free_block(cachep, ac->entry, ac->avail, node, &list);
 		spin_unlock_irq(&n->list_lock);
 		slabs_destroy(cachep, &list);
-		kfree(ccold);
 	}
-	kfree(new);
+	free_percpu(prev);
+
+alloc_node:
 	return alloc_kmem_cache_node(cachep, gfp);
 }
 
diff --git a/mm/slab.h b/mm/slab.h
index 50d29d716db4..ab019e63e3c2 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -50,7 +50,6 @@ struct kmem_cache {
 enum slab_state {
 	DOWN,			/* No slab functionality yet */
 	PARTIAL,		/* SLUB: kmem_cache_node available */
-	PARTIAL_ARRAYCACHE,	/* SLAB: kmalloc size for arraycache available */
 	PARTIAL_NODE,		/* SLAB: kmalloc size for node struct available */
 	UP,			/* Slab caches usable but not all extras yet */
 	FULL			/* Everything is working */
-- 
cgit v1.2.3


From ed2f240094f900833ac06f533ab8bbcf0a1e8199 Mon Sep 17 00:00:00 2001
From: Zhang Zhen <zhenzhang.zhang@huawei.com>
Date: Thu, 9 Oct 2014 15:26:31 -0700
Subject: memory-hotplug: add sysfs valid_zones attribute

Currently memory-hotplug has two limits:

1. If the memory block is in ZONE_NORMAL, you can change it to
   ZONE_MOVABLE, but this memory block must be adjacent to ZONE_MOVABLE.

2. If the memory block is in ZONE_MOVABLE, you can change it to
   ZONE_NORMAL, but this memory block must be adjacent to ZONE_NORMAL.

With this patch, we can easy to know a memory block can be onlined to
which zone, and don't need to know the above two limits.

Updated the related Documentation.

[akpm@linux-foundation.org: use conventional comment layout]
[akpm@linux-foundation.org: fix build with CONFIG_MEMORY_HOTREMOVE=n]
[akpm@linux-foundation.org: remove unused local zone_prev]
Signed-off-by: Zhang Zhen <zhenzhang.zhang@huawei.com>
Cc: Dave Hansen <dave.hansen@intel.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Toshi Kani <toshi.kani@hp.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Wang Nan <wangnan0@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/testing/sysfs-devices-memory |  8 +++++
 Documentation/memory-hotplug.txt               | 11 ++++++-
 drivers/base/memory.c                          | 42 ++++++++++++++++++++++++++
 include/linux/memory_hotplug.h                 |  1 +
 mm/memory_hotplug.c                            |  2 +-
 5 files changed, 62 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-memory b/Documentation/ABI/testing/sysfs-devices-memory
index 7405de26ee60..deef3b5723cf 100644
--- a/Documentation/ABI/testing/sysfs-devices-memory
+++ b/Documentation/ABI/testing/sysfs-devices-memory
@@ -61,6 +61,14 @@ Users:		hotplug memory remove tools
 		http://www.ibm.com/developerworks/wikis/display/LinuxP/powerpc-utils
 
 
+What:           /sys/devices/system/memory/memoryX/valid_zones
+Date:           July 2014
+Contact:	Zhang Zhen <zhenzhang.zhang@huawei.com>
+Description:
+		The file /sys/devices/system/memory/memoryX/valid_zones	is
+		read-only and is designed to show which zone this memory
+		block can be onlined to.
+
 What:		/sys/devices/system/memoryX/nodeY
 Date:		October 2009
 Contact:	Linux Memory Management list <linux-mm@kvack.org>
diff --git a/Documentation/memory-hotplug.txt b/Documentation/memory-hotplug.txt
index 45134dc23854..ea03abfc97e9 100644
--- a/Documentation/memory-hotplug.txt
+++ b/Documentation/memory-hotplug.txt
@@ -155,6 +155,7 @@ Under each memory block, you can see 4 files:
 /sys/devices/system/memory/memoryXXX/phys_device
 /sys/devices/system/memory/memoryXXX/state
 /sys/devices/system/memory/memoryXXX/removable
+/sys/devices/system/memory/memoryXXX/valid_zones
 
 'phys_index'      : read-only and contains memory block id, same as XXX.
 'state'           : read-write
@@ -170,6 +171,15 @@ Under each memory block, you can see 4 files:
                     block is removable and a value of 0 indicates that
                     it is not removable. A memory block is removable only if
                     every section in the block is removable.
+'valid_zones'     : read-only: designed to show which zones this memory block
+		    can be onlined to.
+		    The first column shows it's default zone.
+		    "memory6/valid_zones: Normal Movable" shows this memoryblock
+		    can be onlined to ZONE_NORMAL by default and to ZONE_MOVABLE
+		    by online_movable.
+		    "memory7/valid_zones: Movable Normal" shows this memoryblock
+		    can be onlined to ZONE_MOVABLE by default and to ZONE_NORMAL
+		    by online_kernel.
 
 NOTE:
   These directories/files appear after physical memory hotplug phase.
@@ -408,7 +418,6 @@ node if necessary.
   - allowing memory hot-add to ZONE_MOVABLE. maybe we need some switch like
     sysctl or new control file.
   - showing memory block and physical device relationship.
-  - showing memory block is under ZONE_MOVABLE or not
   - test and make it better memory offlining.
   - support HugeTLB page migration and offlining.
   - memmap removing at memory offline.
diff --git a/drivers/base/memory.c b/drivers/base/memory.c
index a2e13e250bba..7c5d87191b28 100644
--- a/drivers/base/memory.c
+++ b/drivers/base/memory.c
@@ -373,6 +373,45 @@ static ssize_t show_phys_device(struct device *dev,
 	return sprintf(buf, "%d\n", mem->phys_device);
 }
 
+#ifdef CONFIG_MEMORY_HOTREMOVE
+static ssize_t show_valid_zones(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct memory_block *mem = to_memory_block(dev);
+	unsigned long start_pfn, end_pfn;
+	unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block;
+	struct page *first_page;
+	struct zone *zone;
+
+	start_pfn = section_nr_to_pfn(mem->start_section_nr);
+	end_pfn = start_pfn + nr_pages;
+	first_page = pfn_to_page(start_pfn);
+
+	/* The block contains more than one zone can not be offlined. */
+	if (!test_pages_in_a_zone(start_pfn, end_pfn))
+		return sprintf(buf, "none\n");
+
+	zone = page_zone(first_page);
+
+	if (zone_idx(zone) == ZONE_MOVABLE - 1) {
+		/*The mem block is the last memoryblock of this zone.*/
+		if (end_pfn == zone_end_pfn(zone))
+			return sprintf(buf, "%s %s\n",
+					zone->name, (zone + 1)->name);
+	}
+
+	if (zone_idx(zone) == ZONE_MOVABLE) {
+		/*The mem block is the first memoryblock of ZONE_MOVABLE.*/
+		if (start_pfn == zone->zone_start_pfn)
+			return sprintf(buf, "%s %s\n",
+					zone->name, (zone - 1)->name);
+	}
+
+	return sprintf(buf, "%s\n", zone->name);
+}
+static DEVICE_ATTR(valid_zones, 0444, show_valid_zones, NULL);
+#endif
+
 static DEVICE_ATTR(phys_index, 0444, show_mem_start_phys_index, NULL);
 static DEVICE_ATTR(state, 0644, show_mem_state, store_mem_state);
 static DEVICE_ATTR(phys_device, 0444, show_phys_device, NULL);
@@ -523,6 +562,9 @@ static struct attribute *memory_memblk_attrs[] = {
 	&dev_attr_state.attr,
 	&dev_attr_phys_device.attr,
 	&dev_attr_removable.attr,
+#ifdef CONFIG_MEMORY_HOTREMOVE
+	&dev_attr_valid_zones.attr,
+#endif
 	NULL
 };
 
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index d9524c49d767..8f1a41951df9 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -84,6 +84,7 @@ extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
 extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
 /* VM interface that may be used by firmware interface */
 extern int online_pages(unsigned long, unsigned long, int);
+extern int test_pages_in_a_zone(unsigned long, unsigned long);
 extern void __offline_isolated_pages(unsigned long, unsigned long);
 
 typedef void (*online_page_callback_t)(struct page *page);
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 2ff8c2325e96..29d8693d0c61 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1307,7 +1307,7 @@ int is_mem_section_removable(unsigned long start_pfn, unsigned long nr_pages)
 /*
  * Confirm all pages in a range [start, end) is belongs to the same zone.
  */
-static int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
+int test_pages_in_a_zone(unsigned long start_pfn, unsigned long end_pfn)
 {
 	unsigned long pfn;
 	struct zone *zone = NULL;
-- 
cgit v1.2.3


From 505e3be6c082489a32a88e042f930d047b6415bc Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Thu, 9 Oct 2014 15:26:35 -0700
Subject: lib/genalloc.c: add power aligned algorithm

One of the more common algorithms used for allocation is to align the
start address of the allocation to the order of size requested.  Add this
as an algorithm option for genalloc.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Acked-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Riley <davidriley@chromium.org>
Cc: Ritesh Harjain <ritesh.harjani@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h |  4 ++++
 lib/genalloc.c           | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 1c2fdaa2ffc3..3cd0934d62ba 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -110,6 +110,10 @@ extern void gen_pool_set_algo(struct gen_pool *pool, genpool_algo_t algo,
 extern unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
 		unsigned long start, unsigned int nr, void *data);
 
+extern unsigned long gen_pool_first_fit_order_align(unsigned long *map,
+		unsigned long size, unsigned long start, unsigned int nr,
+		void *data);
+
 extern unsigned long gen_pool_best_fit(unsigned long *map, unsigned long size,
 		unsigned long start, unsigned int nr, void *data);
 
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 38d2db82228c..166f17b9f169 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -480,6 +480,26 @@ unsigned long gen_pool_first_fit(unsigned long *map, unsigned long size,
 }
 EXPORT_SYMBOL(gen_pool_first_fit);
 
+/**
+ * gen_pool_first_fit_order_align - find the first available region
+ * of memory matching the size requirement. The region will be aligned
+ * to the order of the size specified.
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @data: additional data - unused
+ */
+unsigned long gen_pool_first_fit_order_align(unsigned long *map,
+		unsigned long size, unsigned long start,
+		unsigned int nr, void *data)
+{
+	unsigned long align_mask = roundup_pow_of_two(nr) - 1;
+
+	return bitmap_find_next_zero_area(map, size, start, nr, align_mask);
+}
+EXPORT_SYMBOL(gen_pool_first_fit_order_align);
+
 /**
  * gen_pool_best_fit - find the best fitting region of memory
  * macthing the size requirement (no alignment constraint)
-- 
cgit v1.2.3


From 9efb3a421d55d30b65fb0dbee05108d15c6c55f7 Mon Sep 17 00:00:00 2001
From: Laura Abbott <lauraa@codeaurora.org>
Date: Thu, 9 Oct 2014 15:26:38 -0700
Subject: lib/genalloc.c: add genpool range check function

After allocating an address from a particular genpool, there is no good
way to verify if that address actually belongs to a genpool.  Introduce
addr_in_gen_pool which will return if an address plus size falls
completely within the genpool range.

Signed-off-by: Laura Abbott <lauraa@codeaurora.org>
Acked-by: Will Deacon <will.deacon@arm.com>
Reviewed-by: Olof Johansson <olof@lixom.net>
Reviewed-by: Catalin Marinas <catalin.marinas@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: David Riley <davidriley@chromium.org>
Cc: Ritesh Harjain <ritesh.harjani@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Thierry Reding <thierry.reding@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/genalloc.h |  3 +++
 lib/genalloc.c           | 29 +++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h
index 3cd0934d62ba..1ccaab44abcc 100644
--- a/include/linux/genalloc.h
+++ b/include/linux/genalloc.h
@@ -121,6 +121,9 @@ extern struct gen_pool *devm_gen_pool_create(struct device *dev,
 		int min_alloc_order, int nid);
 extern struct gen_pool *dev_get_gen_pool(struct device *dev);
 
+bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+			size_t size);
+
 #ifdef CONFIG_OF
 extern struct gen_pool *of_get_named_gen_pool(struct device_node *np,
 	const char *propname, int index);
diff --git a/lib/genalloc.c b/lib/genalloc.c
index 166f17b9f169..cce4dd68c40d 100644
--- a/lib/genalloc.c
+++ b/lib/genalloc.c
@@ -402,6 +402,35 @@ void gen_pool_for_each_chunk(struct gen_pool *pool,
 }
 EXPORT_SYMBOL(gen_pool_for_each_chunk);
 
+/**
+ * addr_in_gen_pool - checks if an address falls within the range of a pool
+ * @pool:	the generic memory pool
+ * @start:	start address
+ * @size:	size of the region
+ *
+ * Check if the range of addresses falls within the specified pool. Returns
+ * true if the entire range is contained in the pool and false otherwise.
+ */
+bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start,
+			size_t size)
+{
+	bool found = false;
+	unsigned long end = start + size;
+	struct gen_pool_chunk *chunk;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(chunk, &(pool)->chunks, next_chunk) {
+		if (start >= chunk->start_addr && start <= chunk->end_addr) {
+			if (end <= chunk->end_addr) {
+				found = true;
+				break;
+			}
+		}
+	}
+	rcu_read_unlock();
+	return found;
+}
+
 /**
  * gen_pool_avail - get available free space of the pool
  * @pool: pool to get available free space
-- 
cgit v1.2.3


From 53853e2d2bfb748a8b5aa2fd1de15699266865e0 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 9 Oct 2014 15:27:02 -0700
Subject: mm, compaction: defer each zone individually instead of preferred
 zone

When direct sync compaction is often unsuccessful, it may become deferred
for some time to avoid further useless attempts, both sync and async.
Successful high-order allocations un-defer compaction, while further
unsuccessful compaction attempts prolong the compaction deferred period.

Currently the checking and setting deferred status is performed only on
the preferred zone of the allocation that invoked direct compaction.  But
compaction itself is attempted on all eligible zones in the zonelist, so
the behavior is suboptimal and may lead both to scenarios where 1)
compaction is attempted uselessly, or 2) where it's not attempted despite
good chances of succeeding, as shown on the examples below:

1) A direct compaction with Normal preferred zone failed and set
   deferred compaction for the Normal zone.  Another unrelated direct
   compaction with DMA32 as preferred zone will attempt to compact DMA32
   zone even though the first compaction attempt also included DMA32 zone.

   In another scenario, compaction with Normal preferred zone failed to
   compact Normal zone, but succeeded in the DMA32 zone, so it will not
   defer compaction.  In the next attempt, it will try Normal zone which
   will fail again, instead of skipping Normal zone and trying DMA32
   directly.

2) Kswapd will balance DMA32 zone and reset defer status based on
   watermarks looking good.  A direct compaction with preferred Normal
   zone will skip compaction of all zones including DMA32 because Normal
   was still deferred.  The allocation might have succeeded in DMA32, but
   won't.

This patch makes compaction deferring work on individual zone basis
instead of preferred zone.  For each zone, it checks compaction_deferred()
to decide if the zone should be skipped.  If watermarks fail after
compacting the zone, defer_compaction() is called.  The zone where
watermarks passed can still be deferred when the allocation attempt is
unsuccessful.  When allocation is successful, compaction_defer_reset() is
called for the zone containing the allocated page.  This approach should
approximate calling defer_compaction() only on zones where compaction was
attempted and did not yield allocated page.  There might be corner cases
but that is inevitable as long as the decision to stop compacting dues not
guarantee that a page will be allocated.

Due to a new COMPACT_DEFERRED return value, some functions relying
implicitly on COMPACT_SKIPPED = 0 had to be updated, with comments made
more accurate.  The did_some_progress output parameter of
__alloc_pages_direct_compact() is removed completely, as the caller
actually does not use it after compaction sets it - it is only considered
when direct reclaim sets it.

During testing on a two-node machine with a single very small Normal zone
on node 1, this patch has improved success rates in stress-highalloc
mmtests benchmark.  The success here were previously made worse by commit
3a025760fc15 ("mm: page_alloc: spill to remote nodes before waking
kswapd") as kswapd was no longer resetting often enough the deferred
compaction for the Normal zone, and DMA32 zones on both nodes were thus
not considered for compaction.  On different machine, success rates were
improved with __GFP_NO_KSWAPD allocations.

[akpm@linux-foundation.org: fix CONFIG_COMPACTION=n build]
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Acked-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 16 ++++++++-----
 mm/compaction.c            | 32 ++++++++++++++++++++------
 mm/page_alloc.c            | 57 +++++++++++++++++++++++++---------------------
 mm/vmscan.c                | 14 ++++++++----
 4 files changed, 76 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 01e3132820da..b2e4c92d0445 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -2,14 +2,16 @@
 #define _LINUX_COMPACTION_H
 
 /* Return values for compact_zone() and try_to_compact_pages() */
+/* compaction didn't start as it was deferred due to past failures */
+#define COMPACT_DEFERRED	0
 /* compaction didn't start as it was not possible or direct reclaim was more suitable */
-#define COMPACT_SKIPPED		0
+#define COMPACT_SKIPPED		1
 /* compaction should continue to another pageblock */
-#define COMPACT_CONTINUE	1
+#define COMPACT_CONTINUE	2
 /* direct compaction partially compacted a zone and there are suitable pages */
-#define COMPACT_PARTIAL		2
+#define COMPACT_PARTIAL		3
 /* The full zone was compacted */
-#define COMPACT_COMPLETE	3
+#define COMPACT_COMPLETE	4
 
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
@@ -22,7 +24,8 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			enum migrate_mode mode, bool *contended);
+			enum migrate_mode mode, bool *contended,
+			struct zone **candidate_zone);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order);
@@ -91,7 +94,8 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended)
+			enum migrate_mode mode, bool *contended,
+			struct zone **candidate_zone)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index 21bf292b642a..1c7195d42e83 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1125,27 +1125,26 @@ int sysctl_extfrag_threshold = 500;
  * @nodemask: The allowed nodes to allocate from
  * @mode: The migration mode for async, sync light, or sync migration
  * @contended: Return value that is true if compaction was aborted due to lock contention
- * @page: Optionally capture a free page of the requested order during compaction
+ * @candidate_zone: Return the zone where we think allocation should succeed
  *
  * This is the main entry point for direct page compaction.
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended)
+			enum migrate_mode mode, bool *contended,
+			struct zone **candidate_zone)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
 	int may_enter_fs = gfp_mask & __GFP_FS;
 	int may_perform_io = gfp_mask & __GFP_IO;
 	struct zoneref *z;
 	struct zone *zone;
-	int rc = COMPACT_SKIPPED;
+	int rc = COMPACT_DEFERRED;
 	int alloc_flags = 0;
 
 	/* Check if the GFP flags allow compaction */
 	if (!order || !may_enter_fs || !may_perform_io)
-		return rc;
-
-	count_compact_event(COMPACTSTALL);
+		return COMPACT_SKIPPED;
 
 #ifdef CONFIG_CMA
 	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
@@ -1156,14 +1155,33 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 								nodemask) {
 		int status;
 
+		if (compaction_deferred(zone, order))
+			continue;
+
 		status = compact_zone_order(zone, order, gfp_mask, mode,
 						contended);
 		rc = max(status, rc);
 
 		/* If a normal allocation would succeed, stop compacting */
 		if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
-				      alloc_flags))
+				      alloc_flags)) {
+			*candidate_zone = zone;
+			/*
+			 * We think the allocation will succeed in this zone,
+			 * but it is not certain, hence the false. The caller
+			 * will repeat this with true if allocation indeed
+			 * succeeds in this zone.
+			 */
+			compaction_defer_reset(zone, order, false);
 			break;
+		} else if (mode != MIGRATE_ASYNC) {
+			/*
+			 * We think that allocation won't succeed in this zone
+			 * so we defer compaction there. If it ends up
+			 * succeeding after all, it will be reset.
+			 */
+			defer_compaction(zone, order);
+		}
 	}
 
 	return rc;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e63bf7744a0c..514fd8008114 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2297,24 +2297,28 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
 	int classzone_idx, int migratetype, enum migrate_mode mode,
-	bool *contended_compaction, bool *deferred_compaction,
-	unsigned long *did_some_progress)
+	bool *contended_compaction, bool *deferred_compaction)
 {
-	if (!order)
-		return NULL;
+	struct zone *last_compact_zone = NULL;
+	unsigned long compact_result;
 
-	if (compaction_deferred(preferred_zone, order)) {
-		*deferred_compaction = true;
+
+	if (!order)
 		return NULL;
-	}
 
 	current->flags |= PF_MEMALLOC;
-	*did_some_progress = try_to_compact_pages(zonelist, order, gfp_mask,
+	compact_result = try_to_compact_pages(zonelist, order, gfp_mask,
 						nodemask, mode,
-						contended_compaction);
+						contended_compaction,
+						&last_compact_zone);
 	current->flags &= ~PF_MEMALLOC;
 
-	if (*did_some_progress != COMPACT_SKIPPED) {
+	if (compact_result > COMPACT_DEFERRED)
+		count_vm_event(COMPACTSTALL);
+	else
+		*deferred_compaction = true;
+
+	if (compact_result > COMPACT_SKIPPED) {
 		struct page *page;
 
 		/* Page migration frees to the PCP lists but we want merging */
@@ -2325,13 +2329,24 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 				order, zonelist, high_zoneidx,
 				alloc_flags & ~ALLOC_NO_WATERMARKS,
 				preferred_zone, classzone_idx, migratetype);
+
 		if (page) {
-			preferred_zone->compact_blockskip_flush = false;
-			compaction_defer_reset(preferred_zone, order, true);
+			struct zone *zone = page_zone(page);
+
+			zone->compact_blockskip_flush = false;
+			compaction_defer_reset(zone, order, true);
 			count_vm_event(COMPACTSUCCESS);
 			return page;
 		}
 
+		/*
+		 * last_compact_zone is where try_to_compact_pages thought
+		 * allocation should succeed, so it did not defer compaction.
+		 * But now we know that it didn't succeed, so we do the defer.
+		 */
+		if (last_compact_zone && mode != MIGRATE_ASYNC)
+			defer_compaction(last_compact_zone, order);
+
 		/*
 		 * It's bad if compaction run occurs and fails.
 		 * The most likely reason is that pages exist,
@@ -2339,13 +2354,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		 */
 		count_vm_event(COMPACTFAIL);
 
-		/*
-		 * As async compaction considers a subset of pageblocks, only
-		 * defer if the failure was a sync compaction failure.
-		 */
-		if (mode != MIGRATE_ASYNC)
-			defer_compaction(preferred_zone, order);
-
 		cond_resched();
 	}
 
@@ -2356,9 +2364,8 @@ static inline struct page *
 __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
-	int classzone_idx, int migratetype,
-	enum migrate_mode mode, bool *contended_compaction,
-	bool *deferred_compaction, unsigned long *did_some_progress)
+	int classzone_idx, int migratetype, enum migrate_mode mode,
+	bool *contended_compaction, bool *deferred_compaction)
 {
 	return NULL;
 }
@@ -2634,8 +2641,7 @@ rebalance:
 					preferred_zone,
 					classzone_idx, migratetype,
 					migration_mode, &contended_compaction,
-					&deferred_compaction,
-					&did_some_progress);
+					&deferred_compaction);
 	if (page)
 		goto got_pg;
 
@@ -2727,8 +2733,7 @@ rebalance:
 					preferred_zone,
 					classzone_idx, migratetype,
 					migration_mode, &contended_compaction,
-					&deferred_compaction,
-					&did_some_progress);
+					&deferred_compaction);
 		if (page)
 			goto got_pg;
 	}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 2836b5373b2e..1a71b8b1ea34 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2315,7 +2315,10 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 	return reclaimable;
 }
 
-/* Returns true if compaction should go ahead for a high-order request */
+/*
+ * Returns true if compaction should go ahead for a high-order request, or
+ * the high-order allocation would succeed without compaction.
+ */
 static inline bool compaction_ready(struct zone *zone, int order)
 {
 	unsigned long balance_gap, watermark;
@@ -2339,8 +2342,11 @@ static inline bool compaction_ready(struct zone *zone, int order)
 	if (compaction_deferred(zone, order))
 		return watermark_ok;
 
-	/* If compaction is not ready to start, keep reclaiming */
-	if (!compaction_suitable(zone, order))
+	/*
+	 * If compaction is not ready to start and allocation is not likely
+	 * to succeed without it, then keep reclaiming.
+	 */
+	if (compaction_suitable(zone, order) == COMPACT_SKIPPED)
 		return false;
 
 	return watermark_ok;
@@ -2818,7 +2824,7 @@ static bool zone_balanced(struct zone *zone, int order,
 		return false;
 
 	if (IS_ENABLED(CONFIG_COMPACTION) && order &&
-	    !compaction_suitable(zone, order))
+	    compaction_suitable(zone, order) == COMPACT_SKIPPED)
 		return false;
 
 	return true;
-- 
cgit v1.2.3


From 1f9efdef4f3f1d2a073e524113fd0038af636f2b Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Thu, 9 Oct 2014 15:27:14 -0700
Subject: mm, compaction: khugepaged should not give up due to need_resched()

Async compaction aborts when it detects zone lock contention or
need_resched() is true.  David Rientjes has reported that in practice,
most direct async compactions for THP allocation abort due to
need_resched().  This means that a second direct compaction is never
attempted, which might be OK for a page fault, but khugepaged is intended
to attempt a sync compaction in such case and in these cases it won't.

This patch replaces "bool contended" in compact_control with an int that
distinguishes between aborting due to need_resched() and aborting due to
lock contention.  This allows propagating the abort through all compaction
functions as before, but passing the abort reason up to
__alloc_pages_slowpath() which decides when to continue with direct
reclaim and another compaction attempt.

Another problem is that try_to_compact_pages() did not act upon the
reported contention (both need_resched() or lock contention) immediately
and would proceed with another zone from the zonelist.  When
need_resched() is true, that means initializing another zone compaction,
only to check again need_resched() in isolate_migratepages() and aborting.
 For zone lock contention, the unintended consequence is that the lock
contended status reported back to the allocator is detrmined from the last
zone where compaction was attempted, which is rather arbitrary.

This patch fixes the problem in the following way:
- async compaction of a zone aborting due to need_resched() or fatal signal
  pending means that further zones should not be tried. We report
  COMPACT_CONTENDED_SCHED to the allocator.
- aborting zone compaction due to lock contention means we can still try
  another zone, since it has different set of locks. We report back
  COMPACT_CONTENDED_LOCK only if *all* zones where compaction was attempted,
  it was aborted due to lock contention.

As a result of these fixes, khugepaged will proceed with second sync
compaction as intended, when the preceding async compaction aborted due to
need_resched().  Page fault compactions aborting due to need_resched()
will spare some cycles previously wasted by initializing another zone
compaction only to abort again.  Lock contention will be reported only
when compaction in all zones aborted due to lock contention, and therefore
it's not a good idea to try again after reclaim.

In stress-highalloc from mmtests configured to use __GFP_NO_KSWAPD, this
has improved number of THP collapse allocations by 10%, which shows
positive effect on khugepaged.  The benchmark's success rates are
unchanged as it is not recognized as khugepaged.  Numbers of compact_stall
and compact_fail events have however decreased by 20%, with
compact_success still a bit improved, which is good.  With benchmark
configured not to use __GFP_NO_KSWAPD, there is 6% improvement in THP
collapse allocations, and only slight improvement in stalls and failures.

[akpm@linux-foundation.org: fix warnings]
Reported-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h | 12 +++++--
 mm/compaction.c            | 87 ++++++++++++++++++++++++++++++++++++++++------
 mm/internal.h              |  4 +--
 mm/page_alloc.c            | 45 +++++++++++++++++-------
 4 files changed, 121 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index b2e4c92d0445..60bdf8dc02a3 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -13,6 +13,14 @@
 /* The full zone was compacted */
 #define COMPACT_COMPLETE	4
 
+/* Used to signal whether compaction detected need_sched() or lock contention */
+/* No contention detected */
+#define COMPACT_CONTENDED_NONE	0
+/* Either need_sched() was true or fatal signal pending */
+#define COMPACT_CONTENDED_SCHED	1
+/* Zone lock or lru_lock was contended in async compaction */
+#define COMPACT_CONTENDED_LOCK	2
+
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
@@ -24,7 +32,7 @@ extern int sysctl_extfrag_handler(struct ctl_table *table, int write,
 extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
-			enum migrate_mode mode, bool *contended,
+			enum migrate_mode mode, int *contended,
 			struct zone **candidate_zone);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
@@ -94,7 +102,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 #else
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended,
+			enum migrate_mode mode, int *contended,
 			struct zone **candidate_zone)
 {
 	return COMPACT_CONTINUE;
diff --git a/mm/compaction.c b/mm/compaction.c
index 1067c07cb33d..26bb20ef853d 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -223,9 +223,21 @@ static void update_pageblock_skip(struct compact_control *cc,
 }
 #endif /* CONFIG_COMPACTION */
 
-static inline bool should_release_lock(spinlock_t *lock)
+static int should_release_lock(spinlock_t *lock)
 {
-	return need_resched() || spin_is_contended(lock);
+	/*
+	 * Sched contention has higher priority here as we may potentially
+	 * have to abort whole compaction ASAP. Returning with lock contention
+	 * means we will try another zone, and further decisions are
+	 * influenced only when all zones are lock contended. That means
+	 * potentially missing a lock contention is less critical.
+	 */
+	if (need_resched())
+		return COMPACT_CONTENDED_SCHED;
+	else if (spin_is_contended(lock))
+		return COMPACT_CONTENDED_LOCK;
+
+	return COMPACT_CONTENDED_NONE;
 }
 
 /*
@@ -240,7 +252,9 @@ static inline bool should_release_lock(spinlock_t *lock)
 static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 				      bool locked, struct compact_control *cc)
 {
-	if (should_release_lock(lock)) {
+	int contended = should_release_lock(lock);
+
+	if (contended) {
 		if (locked) {
 			spin_unlock_irqrestore(lock, *flags);
 			locked = false;
@@ -248,7 +262,7 @@ static bool compact_checklock_irqsave(spinlock_t *lock, unsigned long *flags,
 
 		/* async aborts if taking too long or contended */
 		if (cc->mode == MIGRATE_ASYNC) {
-			cc->contended = true;
+			cc->contended = contended;
 			return false;
 		}
 
@@ -274,7 +288,7 @@ static inline bool compact_should_abort(struct compact_control *cc)
 	/* async compaction aborts if contended */
 	if (need_resched()) {
 		if (cc->mode == MIGRATE_ASYNC) {
-			cc->contended = true;
+			cc->contended = COMPACT_CONTENDED_SCHED;
 			return true;
 		}
 
@@ -1140,7 +1154,7 @@ out:
 }
 
 static unsigned long compact_zone_order(struct zone *zone, int order,
-		gfp_t gfp_mask, enum migrate_mode mode, bool *contended)
+		gfp_t gfp_mask, enum migrate_mode mode, int *contended)
 {
 	unsigned long ret;
 	struct compact_control cc = {
@@ -1172,14 +1186,15 @@ int sysctl_extfrag_threshold = 500;
  * @gfp_mask: The GFP mask of the current allocation
  * @nodemask: The allowed nodes to allocate from
  * @mode: The migration mode for async, sync light, or sync migration
- * @contended: Return value that is true if compaction was aborted due to lock contention
+ * @contended: Return value that determines if compaction was aborted due to
+ *	       need_resched() or lock contention
  * @candidate_zone: Return the zone where we think allocation should succeed
  *
  * This is the main entry point for direct page compaction.
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
-			enum migrate_mode mode, bool *contended,
+			enum migrate_mode mode, int *contended,
 			struct zone **candidate_zone)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1189,6 +1204,9 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	struct zone *zone;
 	int rc = COMPACT_DEFERRED;
 	int alloc_flags = 0;
+	int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
+
+	*contended = COMPACT_CONTENDED_NONE;
 
 	/* Check if the GFP flags allow compaction */
 	if (!order || !may_enter_fs || !may_perform_io)
@@ -1202,13 +1220,19 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
 								nodemask) {
 		int status;
+		int zone_contended;
 
 		if (compaction_deferred(zone, order))
 			continue;
 
 		status = compact_zone_order(zone, order, gfp_mask, mode,
-						contended);
+							&zone_contended);
 		rc = max(status, rc);
+		/*
+		 * It takes at least one zone that wasn't lock contended
+		 * to clear all_zones_contended.
+		 */
+		all_zones_contended &= zone_contended;
 
 		/* If a normal allocation would succeed, stop compacting */
 		if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
@@ -1221,8 +1245,21 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			 * succeeds in this zone.
 			 */
 			compaction_defer_reset(zone, order, false);
-			break;
-		} else if (mode != MIGRATE_ASYNC) {
+			/*
+			 * It is possible that async compaction aborted due to
+			 * need_resched() and the watermarks were ok thanks to
+			 * somebody else freeing memory. The allocation can
+			 * however still fail so we better signal the
+			 * need_resched() contention anyway (this will not
+			 * prevent the allocation attempt).
+			 */
+			if (zone_contended == COMPACT_CONTENDED_SCHED)
+				*contended = COMPACT_CONTENDED_SCHED;
+
+			goto break_loop;
+		}
+
+		if (mode != MIGRATE_ASYNC) {
 			/*
 			 * We think that allocation won't succeed in this zone
 			 * so we defer compaction there. If it ends up
@@ -1230,8 +1267,36 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			 */
 			defer_compaction(zone, order);
 		}
+
+		/*
+		 * We might have stopped compacting due to need_resched() in
+		 * async compaction, or due to a fatal signal detected. In that
+		 * case do not try further zones and signal need_resched()
+		 * contention.
+		 */
+		if ((zone_contended == COMPACT_CONTENDED_SCHED)
+					|| fatal_signal_pending(current)) {
+			*contended = COMPACT_CONTENDED_SCHED;
+			goto break_loop;
+		}
+
+		continue;
+break_loop:
+		/*
+		 * We might not have tried all the zones, so  be conservative
+		 * and assume they are not all lock contended.
+		 */
+		all_zones_contended = 0;
+		break;
 	}
 
+	/*
+	 * If at least one zone wasn't deferred or skipped, we report if all
+	 * zones that were tried were lock contended.
+	 */
+	if (rc > COMPACT_SKIPPED && all_zones_contended)
+		*contended = COMPACT_CONTENDED_LOCK;
+
 	return rc;
 }
 
diff --git a/mm/internal.h b/mm/internal.h
index 5a0738fa649c..4c1d604c396c 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -144,8 +144,8 @@ struct compact_control {
 	int order;			/* order a direct compactor needs */
 	int migratetype;		/* MOVABLE, RECLAIMABLE etc */
 	struct zone *zone;
-	bool contended;			/* True if a lock was contended, or
-					 * need_resched() true during async
+	int contended;			/* Signal need_sched() or lock
+					 * contention detected during
 					 * compaction
 					 */
 };
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index dfbf54b51649..313338d74095 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2297,7 +2297,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
 	int classzone_idx, int migratetype, enum migrate_mode mode,
-	bool *contended_compaction, bool *deferred_compaction)
+	int *contended_compaction, bool *deferred_compaction)
 {
 	struct zone *last_compact_zone = NULL;
 	unsigned long compact_result;
@@ -2371,7 +2371,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	struct zonelist *zonelist, enum zone_type high_zoneidx,
 	nodemask_t *nodemask, int alloc_flags, struct zone *preferred_zone,
 	int classzone_idx, int migratetype, enum migrate_mode mode,
-	bool *contended_compaction, bool *deferred_compaction)
+	int *contended_compaction, bool *deferred_compaction)
 {
 	return NULL;
 }
@@ -2547,7 +2547,7 @@ __alloc_pages_slowpath(gfp_t gfp_mask, unsigned int order,
 	unsigned long did_some_progress;
 	enum migrate_mode migration_mode = MIGRATE_ASYNC;
 	bool deferred_compaction = false;
-	bool contended_compaction = false;
+	int contended_compaction = COMPACT_CONTENDED_NONE;
 
 	/*
 	 * In the slowpath, we sanity check order to avoid ever trying to
@@ -2651,15 +2651,36 @@ rebalance:
 	if (page)
 		goto got_pg;
 
-	/*
-	 * If compaction is deferred for high-order allocations, it is because
-	 * sync compaction recently failed. In this is the case and the caller
-	 * requested a movable allocation that does not heavily disrupt the
-	 * system then fail the allocation instead of entering direct reclaim.
-	 */
-	if ((deferred_compaction || contended_compaction) &&
-						(gfp_mask & __GFP_NO_KSWAPD))
-		goto nopage;
+	/* Checks for THP-specific high-order allocations */
+	if ((gfp_mask & GFP_TRANSHUGE) == GFP_TRANSHUGE) {
+		/*
+		 * If compaction is deferred for high-order allocations, it is
+		 * because sync compaction recently failed. If this is the case
+		 * and the caller requested a THP allocation, we do not want
+		 * to heavily disrupt the system, so we fail the allocation
+		 * instead of entering direct reclaim.
+		 */
+		if (deferred_compaction)
+			goto nopage;
+
+		/*
+		 * In all zones where compaction was attempted (and not
+		 * deferred or skipped), lock contention has been detected.
+		 * For THP allocation we do not want to disrupt the others
+		 * so we fallback to base pages instead.
+		 */
+		if (contended_compaction == COMPACT_CONTENDED_LOCK)
+			goto nopage;
+
+		/*
+		 * If compaction was aborted due to need_resched(), we do not
+		 * want to further increase allocation latency, unless it is
+		 * khugepaged trying to collapse.
+		 */
+		if (contended_compaction == COMPACT_CONTENDED_SCHED
+			&& !(current->flags & PF_KTHREAD))
+			goto nopage;
+	}
 
 	/*
 	 * It can become very expensive to allocate transparent hugepages at
-- 
cgit v1.2.3


From 43e7a34d265e884b7cf34f9b05e6f2e0c05bf120 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Thu, 9 Oct 2014 15:27:25 -0700
Subject: mm: rename allocflags_to_migratetype for clarity

The page allocator has gfp flags (like __GFP_WAIT) and alloc flags (like
ALLOC_CPUSET) that have separate semantics.

The function allocflags_to_migratetype() actually takes gfp flags, not
alloc flags, and returns a migratetype.  Rename it to
gfpflags_to_migratetype().

Signed-off-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Reviewed-by: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 2 +-
 mm/compaction.c     | 4 ++--
 mm/page_alloc.c     | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 5e7219dc0fae..41b30fd4d041 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -156,7 +156,7 @@ struct vm_area_struct;
 #define GFP_DMA32	__GFP_DMA32
 
 /* Convert GFP flags to their corresponding migrate type */
-static inline int allocflags_to_migratetype(gfp_t gfp_flags)
+static inline int gfpflags_to_migratetype(const gfp_t gfp_flags)
 {
 	WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK);
 
diff --git a/mm/compaction.c b/mm/compaction.c
index b9cf751cc00e..7c687c0eef6e 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1242,7 +1242,7 @@ static unsigned long compact_zone_order(struct zone *zone, int order,
 		.nr_freepages = 0,
 		.nr_migratepages = 0,
 		.order = order,
-		.migratetype = allocflags_to_migratetype(gfp_mask),
+		.migratetype = gfpflags_to_migratetype(gfp_mask),
 		.zone = zone,
 		.mode = mode,
 	};
@@ -1294,7 +1294,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 		return COMPACT_SKIPPED;
 
 #ifdef CONFIG_CMA
-	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+	if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
 		alloc_flags |= ALLOC_CMA;
 #endif
 	/* Compact each zone in the list */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 313338d74095..f07588b11d59 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2523,7 +2523,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 			alloc_flags |= ALLOC_NO_WATERMARKS;
 	}
 #ifdef CONFIG_CMA
-	if (allocflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
+	if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
 		alloc_flags |= ALLOC_CMA;
 #endif
 	return alloc_flags;
@@ -2786,7 +2786,7 @@ __alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
 	struct zone *preferred_zone;
 	struct zoneref *preferred_zoneref;
 	struct page *page = NULL;
-	int migratetype = allocflags_to_migratetype(gfp_mask);
+	int migratetype = gfpflags_to_migratetype(gfp_mask);
 	unsigned int cpuset_mems_cookie;
 	int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET|ALLOC_FAIR;
 	int classzone_idx;
-- 
cgit v1.2.3


From 9c5990240e076ae564cccbd921868cd08f6daaa5 Mon Sep 17 00:00:00 2001
From: Cyrill Gorcunov <gorcunov@openvz.org>
Date: Thu, 9 Oct 2014 15:27:29 -0700
Subject: mm: introduce check_data_rlimit helper

To eliminate code duplication lets introduce check_data_rlimit helper
which we will use in brk() and prctl() syscalls.

Signed-off-by: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Tejun Heo <tj@kernel.org>
Cc: Andrew Vagin <avagin@openvz.org>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Cc: H. Peter Anvin <hpa@zytor.com>
Acked-by: Serge Hallyn <serge.hallyn@canonical.com>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Vasiliy Kulikov <segoon@openwall.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Cc: Julien Tinnes <jln@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 28df70774b81..4d814aa97785 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -18,6 +18,7 @@
 #include <linux/pfn.h>
 #include <linux/bit_spinlock.h>
 #include <linux/shrinker.h>
+#include <linux/resource.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -1780,6 +1781,20 @@ extern struct vm_area_struct *copy_vma(struct vm_area_struct **,
 	bool *need_rmap_locks);
 extern void exit_mmap(struct mm_struct *);
 
+static inline int check_data_rlimit(unsigned long rlim,
+				    unsigned long new,
+				    unsigned long start,
+				    unsigned long end_data,
+				    unsigned long start_data)
+{
+	if (rlim < RLIM_INFINITY) {
+		if (((new - start) + (end_data - start_data)) > rlim)
+			return -ENOSPC;
+	}
+
+	return 0;
+}
+
 extern int mm_take_all_locks(struct mm_struct *mm);
 extern void mm_drop_all_locks(struct mm_struct *mm);
 
-- 
cgit v1.2.3


From 1f13ae399c58af5a05b5cee61da864e1f4071de4 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:27:39 -0700
Subject: mm: remove noisy remainder of the scan_unevictable interface

The deprecation warnings for the scan_unevictable interface triggers by
scripts doing `sysctl -a | grep something else'.  This is annoying and not
helpful.

The interface has been defunct since 264e56d8247e ("mm: disable user
interface to manually rescue unevictable pages"), which was in 2011, and
there haven't been any reports of usecases for it, only reports that the
deprecation warnings are annying.  It's unlikely that anybody is using
this interface specifically at this point, so remove it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/ABI/stable/sysfs-devices-node |  8 ----
 drivers/base/node.c                         |  3 --
 include/linux/swap.h                        | 16 --------
 kernel/sysctl.c                             |  7 ----
 mm/vmscan.c                                 | 63 -----------------------------
 5 files changed, 97 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/stable/sysfs-devices-node b/Documentation/ABI/stable/sysfs-devices-node
index ce259c13c36a..5b2d0f08867c 100644
--- a/Documentation/ABI/stable/sysfs-devices-node
+++ b/Documentation/ABI/stable/sysfs-devices-node
@@ -85,14 +85,6 @@ Description:
 		will be compacted. When it completes, memory will be freed
 		into blocks which have as many contiguous pages as possible
 
-What:		/sys/devices/system/node/nodeX/scan_unevictable_pages
-Date:		October 2008
-Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com>
-Description:
-		When set, it triggers scanning the node's unevictable lists
-		and move any pages that have become evictable onto the respective
-		zone's inactive list. See mm/vmscan.c
-
 What:		/sys/devices/system/node/nodeX/hugepages/hugepages-<size>/
 Date:		December 2009
 Contact:	Lee Schermerhorn <lee.schermerhorn@hp.com>
diff --git a/drivers/base/node.c b/drivers/base/node.c
index d51c49c9bafa..472168cd0c97 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -289,8 +289,6 @@ static int register_node(struct node *node, int num, struct node *parent)
 		device_create_file(&node->dev, &dev_attr_distance);
 		device_create_file(&node->dev, &dev_attr_vmstat);
 
-		scan_unevictable_register_node(node);
-
 		hugetlb_register_node(node);
 
 		compaction_register_node(node);
@@ -314,7 +312,6 @@ void unregister_node(struct node *node)
 	device_remove_file(&node->dev, &dev_attr_distance);
 	device_remove_file(&node->dev, &dev_attr_vmstat);
 
-	scan_unevictable_unregister_node(node);
 	hugetlb_unregister_node(node);		/* no-op, if memoryless node */
 
 	device_unregister(&node->dev);
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 1b72060f093a..ea4f926e6b9b 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -354,22 +354,6 @@ static inline int zone_reclaim(struct zone *z, gfp_t mask, unsigned int order)
 extern int page_evictable(struct page *page);
 extern void check_move_unevictable_pages(struct page **, int nr_pages);
 
-extern unsigned long scan_unevictable_pages;
-extern int scan_unevictable_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
-#ifdef CONFIG_NUMA
-extern int scan_unevictable_register_node(struct node *node);
-extern void scan_unevictable_unregister_node(struct node *node);
-#else
-static inline int scan_unevictable_register_node(struct node *node)
-{
-	return 0;
-}
-static inline void scan_unevictable_unregister_node(struct node *node)
-{
-}
-#endif
-
 extern int kswapd_run(int nid);
 extern void kswapd_stop(int nid);
 #ifdef CONFIG_MEMCG
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 75875a741b5e..91180987e40e 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1460,13 +1460,6 @@ static struct ctl_table vm_table[] = {
 		.extra2		= &one,
 	},
 #endif
-	{
-		.procname	= "scan_unevictable_pages",
-		.data		= &scan_unevictable_pages,
-		.maxlen		= sizeof(scan_unevictable_pages),
-		.mode		= 0644,
-		.proc_handler	= scan_unevictable_handler,
-	},
 #ifdef CONFIG_MEMORY_FAILURE
 	{
 		.procname	= "memory_failure_early_kill",
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 1a71b8b1ea34..af72fe8e8d74 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -3797,66 +3797,3 @@ void check_move_unevictable_pages(struct page **pages, int nr_pages)
 	}
 }
 #endif /* CONFIG_SHMEM */
-
-static void warn_scan_unevictable_pages(void)
-{
-	printk_once(KERN_WARNING
-		    "%s: The scan_unevictable_pages sysctl/node-interface has been "
-		    "disabled for lack of a legitimate use case.  If you have "
-		    "one, please send an email to linux-mm@kvack.org.\n",
-		    current->comm);
-}
-
-/*
- * scan_unevictable_pages [vm] sysctl handler.  On demand re-scan of
- * all nodes' unevictable lists for evictable pages
- */
-unsigned long scan_unevictable_pages;
-
-int scan_unevictable_handler(struct ctl_table *table, int write,
-			   void __user *buffer,
-			   size_t *length, loff_t *ppos)
-{
-	warn_scan_unevictable_pages();
-	proc_doulongvec_minmax(table, write, buffer, length, ppos);
-	scan_unevictable_pages = 0;
-	return 0;
-}
-
-#ifdef CONFIG_NUMA
-/*
- * per node 'scan_unevictable_pages' attribute.  On demand re-scan of
- * a specified node's per zone unevictable lists for evictable pages.
- */
-
-static ssize_t read_scan_unevictable_node(struct device *dev,
-					  struct device_attribute *attr,
-					  char *buf)
-{
-	warn_scan_unevictable_pages();
-	return sprintf(buf, "0\n");	/* always zero; should fit... */
-}
-
-static ssize_t write_scan_unevictable_node(struct device *dev,
-					   struct device_attribute *attr,
-					const char *buf, size_t count)
-{
-	warn_scan_unevictable_pages();
-	return 1;
-}
-
-
-static DEVICE_ATTR(scan_unevictable_pages, S_IRUGO | S_IWUSR,
-			read_scan_unevictable_node,
-			write_scan_unevictable_node);
-
-int scan_unevictable_register_node(struct node *node)
-{
-	return device_create_file(&node->dev, &dev_attr_scan_unevictable_pages);
-}
-
-void scan_unevictable_unregister_node(struct node *node)
-{
-	device_remove_file(&node->dev, &dev_attr_scan_unevictable_pages);
-}
-#endif
-- 
cgit v1.2.3


From 6b6482bbf64ef6f6dbc8b52f7a7cf88a0498bd51 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:48 -0700
Subject: mempolicy: remove the "task" arg of vma_policy_mof() and simplify it

1. vma_policy_mof(task) is simply not safe unless task == current,
   it can race with do_exit()->mpol_put(). Remove this arg and update
   its single caller.

2. vma can not be NULL, remove this check and simplify the code.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 +-
 kernel/sched/fair.c       |  2 +-
 mm/mempolicy.c            | 25 +++++++++++--------------
 3 files changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index f230a978e6ba..5e4bfcedd2ce 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -136,7 +136,7 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 
 struct mempolicy *get_vma_policy(struct task_struct *tsk,
 		struct vm_area_struct *vma, unsigned long addr);
-bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma);
+bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
 extern void numa_policy_init(void);
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index bfa3c86d0d68..82088b29704e 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -1946,7 +1946,7 @@ void task_numa_work(struct callback_head *work)
 		vma = mm->mmap;
 	}
 	for (; vma; vma = vma->vm_next) {
-		if (!vma_migratable(vma) || !vma_policy_mof(p, vma))
+		if (!vma_migratable(vma) || !vma_policy_mof(vma))
 			continue;
 
 		/*
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index b86b08e77b8d..ad27bbc757bf 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1646,27 +1646,24 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
 	return pol;
 }
 
-bool vma_policy_mof(struct task_struct *task, struct vm_area_struct *vma)
+bool vma_policy_mof(struct vm_area_struct *vma)
 {
-	struct mempolicy *pol = NULL;
-
-	if (vma) {
-		if (vma->vm_ops && vma->vm_ops->get_policy) {
-			bool ret = false;
+	struct mempolicy *pol;
 
-			pol = vma->vm_ops->get_policy(vma, vma->vm_start);
-			if (pol && (pol->flags & MPOL_F_MOF))
-				ret = true;
-			mpol_cond_put(pol);
+	if (vma->vm_ops && vma->vm_ops->get_policy) {
+		bool ret = false;
 
-			return ret;
-		}
+		pol = vma->vm_ops->get_policy(vma, vma->vm_start);
+		if (pol && (pol->flags & MPOL_F_MOF))
+			ret = true;
+		mpol_cond_put(pol);
 
-		pol = vma->vm_policy;
+		return ret;
 	}
 
+	pol = vma->vm_policy;
 	if (!pol)
-		pol = get_task_policy(task);
+		pol = get_task_policy(current);
 
 	return pol->flags & MPOL_F_MOF;
 }
-- 
cgit v1.2.3


From 74d2c3a05cc6c1eef2d7236a9919036ed85ddaaf Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:50 -0700
Subject: mempolicy: introduce __get_vma_policy(), export get_task_policy()

Extract the code which looks for vma's policy from get_vma_policy()
into the new helper, __get_vma_policy(). Export get_task_policy().

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  3 +++
 mm/mempolicy.c            | 44 ++++++++++++++++++++++++++------------------
 2 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index 5e4bfcedd2ce..e1abe249892a 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -134,6 +134,9 @@ void mpol_free_shared_policy(struct shared_policy *p);
 struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 					    unsigned long idx);
 
+struct mempolicy *get_task_policy(struct task_struct *p);
+struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
+		unsigned long addr);
 struct mempolicy *get_vma_policy(struct task_struct *tsk,
 		struct vm_area_struct *vma, unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index ad27bbc757bf..4378c334e89b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -123,7 +123,7 @@ static struct mempolicy default_policy = {
 
 static struct mempolicy preferred_node_policy[MAX_NUMNODES];
 
-static struct mempolicy *get_task_policy(struct task_struct *p)
+struct mempolicy *get_task_policy(struct task_struct *p)
 {
 	struct mempolicy *pol = p->mempolicy;
 	int node;
@@ -1603,23 +1603,8 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
 
 #endif
 
-/*
- * get_vma_policy(@task, @vma, @addr)
- * @task: task for fallback if vma policy == default
- * @vma: virtual memory area whose policy is sought
- * @addr: address in @vma for shared policy lookup
- *
- * Returns effective policy for a VMA at specified address.
- * Falls back to @task or system default policy, as necessary.
- * Current or other task's task mempolicy and non-shared vma policies must be
- * protected by task_lock(task) by the caller.
- * Shared policies [those marked as MPOL_F_SHARED] require an extra reference
- * count--added by the get_policy() vm_op, as appropriate--to protect against
- * freeing by another task.  It is the caller's responsibility to free the
- * extra reference for shared policies.
- */
-struct mempolicy *get_vma_policy(struct task_struct *task,
-		struct vm_area_struct *vma, unsigned long addr)
+struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr)
 {
 	struct mempolicy *pol = NULL;
 
@@ -1640,6 +1625,29 @@ struct mempolicy *get_vma_policy(struct task_struct *task,
 		}
 	}
 
+	return pol;
+}
+
+/*
+ * get_vma_policy(@task, @vma, @addr)
+ * @task: task for fallback if vma policy == default
+ * @vma: virtual memory area whose policy is sought
+ * @addr: address in @vma for shared policy lookup
+ *
+ * Returns effective policy for a VMA at specified address.
+ * Falls back to @task or system default policy, as necessary.
+ * Current or other task's task mempolicy and non-shared vma policies must be
+ * protected by task_lock(task) by the caller.
+ * Shared policies [those marked as MPOL_F_SHARED] require an extra reference
+ * count--added by the get_policy() vm_op, as appropriate--to protect against
+ * freeing by another task.  It is the caller's responsibility to free the
+ * extra reference for shared policies.
+ */
+struct mempolicy *get_vma_policy(struct task_struct *task,
+		struct vm_area_struct *vma, unsigned long addr)
+{
+	struct mempolicy *pol = __get_vma_policy(vma, addr);
+
 	if (!pol)
 		pol = get_task_policy(task);
 
-- 
cgit v1.2.3


From dd6eecb917938c1b7e505a83df307b3476e7c8bd Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Thu, 9 Oct 2014 15:27:57 -0700
Subject: mempolicy: unexport get_vma_policy() and remove its "task" arg

- get_vma_policy(task) is not safe if task != current, remove this
  argument.

- get_vma_policy() no longer has callers outside of mempolicy.c,
  make it static.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: David Rientjes <rientjes@google.com>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Hugh Dickins <hughd@google.com>
Cc: Andi Kleen <andi@firstfloor.org>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mempolicy.h |  2 --
 mm/mempolicy.c            | 19 ++++++++-----------
 2 files changed, 8 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h
index e1abe249892a..3d385c81c153 100644
--- a/include/linux/mempolicy.h
+++ b/include/linux/mempolicy.h
@@ -137,8 +137,6 @@ struct mempolicy *mpol_shared_policy_lookup(struct shared_policy *sp,
 struct mempolicy *get_task_policy(struct task_struct *p);
 struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 		unsigned long addr);
-struct mempolicy *get_vma_policy(struct task_struct *tsk,
-		struct vm_area_struct *vma, unsigned long addr);
 bool vma_policy_mof(struct vm_area_struct *vma);
 
 extern void numa_default_policy(void);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 9695a9a3ab90..008fb32936eb 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1616,27 +1616,24 @@ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
 }
 
 /*
- * get_vma_policy(@task, @vma, @addr)
- * @task: task for fallback if vma policy == default
+ * get_vma_policy(@vma, @addr)
  * @vma: virtual memory area whose policy is sought
  * @addr: address in @vma for shared policy lookup
  *
  * Returns effective policy for a VMA at specified address.
- * Falls back to @task or system default policy, as necessary.
- * Current or other task's task mempolicy and non-shared vma policies must be
- * protected by task_lock(task) by the caller.
+ * Falls back to current->mempolicy or system default policy, as necessary.
  * Shared policies [those marked as MPOL_F_SHARED] require an extra reference
  * count--added by the get_policy() vm_op, as appropriate--to protect against
  * freeing by another task.  It is the caller's responsibility to free the
  * extra reference for shared policies.
  */
-struct mempolicy *get_vma_policy(struct task_struct *task,
-		struct vm_area_struct *vma, unsigned long addr)
+static struct mempolicy *get_vma_policy(struct vm_area_struct *vma,
+						unsigned long addr)
 {
 	struct mempolicy *pol = __get_vma_policy(vma, addr);
 
 	if (!pol)
-		pol = get_task_policy(task);
+		pol = get_task_policy(current);
 
 	return pol;
 }
@@ -1864,7 +1861,7 @@ struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr,
 {
 	struct zonelist *zl;
 
-	*mpol = get_vma_policy(current, vma, addr);
+	*mpol = get_vma_policy(vma, addr);
 	*nodemask = NULL;	/* assume !MPOL_BIND */
 
 	if (unlikely((*mpol)->mode == MPOL_INTERLEAVE)) {
@@ -2019,7 +2016,7 @@ alloc_pages_vma(gfp_t gfp, int order, struct vm_area_struct *vma,
 	unsigned int cpuset_mems_cookie;
 
 retry_cpuset:
-	pol = get_vma_policy(current, vma, addr);
+	pol = get_vma_policy(vma, addr);
 	cpuset_mems_cookie = read_mems_allowed_begin();
 
 	if (unlikely(pol->mode == MPOL_INTERLEAVE)) {
@@ -2285,7 +2282,7 @@ int mpol_misplaced(struct page *page, struct vm_area_struct *vma, unsigned long
 
 	BUG_ON(!vma);
 
-	pol = get_vma_policy(current, vma, addr);
+	pol = get_vma_policy(vma, addr);
 	if (!(pol->flags & MPOL_F_MOF))
 		goto out;
 
-- 
cgit v1.2.3


From 1c93923cc264105418e6ead149c76bd88302eff4 Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Thu, 9 Oct 2014 15:27:59 -0700
Subject: include/linux/migrate.h: remove migrate_page #define

This is designed to avoid a few ifdefs in .c files but it's obnoxious
because it can cause unsuspecting "migrate_page" symbols to get turned into
"NULL".

Just nuke it and use the ifdefs.

Cc: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: Mel Gorman <mel@csn.ul.ie>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/migrate.h | 3 ---
 mm/shmem.c              | 2 ++
 mm/swap_state.c         | 2 ++
 3 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index a2901c414664..b66fd10f4b93 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -82,9 +82,6 @@ static inline int migrate_huge_page_move_mapping(struct address_space *mapping,
 	return -ENOSYS;
 }
 
-/* Possible settings for the migrate_page() method in address_operations */
-#define migrate_page NULL
-
 #endif /* CONFIG_MIGRATION */
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/shmem.c b/mm/shmem.c
index 469f90d56051..4fad61bb41e5 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -3077,7 +3077,9 @@ static const struct address_space_operations shmem_aops = {
 	.write_begin	= shmem_write_begin,
 	.write_end	= shmem_write_end,
 #endif
+#ifdef CONFIG_MIGRATION
 	.migratepage	= migrate_page,
+#endif
 	.error_remove_page = generic_error_remove_page,
 };
 
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 3e0ec83d000c..ef1f39139b71 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -28,7 +28,9 @@
 static const struct address_space_operations swap_aops = {
 	.writepage	= swap_writepage,
 	.set_page_dirty	= swap_set_page_dirty,
+#ifdef CONFIG_MIGRATION
 	.migratepage	= migrate_page,
+#endif
 };
 
 static struct backing_dev_info swap_backing_dev_info = {
-- 
cgit v1.2.3


From 0bf55139782db1fa96af66e37cc84afde18443ef Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:06 -0700
Subject: mm: introduce dump_vma

Introduce a helper to dump information about a VMA, this also makes
dump_page_flags more generic and re-uses that so the output looks very
similar to dump_page:

[   61.903437] vma ffff88070f88be00 start 00007fff25970000 end 00007fff25992000
[   61.903437] next ffff88070facd600 prev ffff88070face400 mm ffff88070fade000
[   61.903437] prot 8000000000000025 anon_vma ffff88070fa1e200 vm_ops           (null)
[   61.903437] pgoff 7ffffffdd file           (null) private_data           (null)
[   61.909129] flags: 0x100173(read|write|mayread|maywrite|mayexec|growsdown|account)

[akpm@linux-foundation.org: make dump_vma() require CONFIG_DEBUG_VM]
[swarren@nvidia.com: fix dump_vma() compilation]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Stephen Warren <swarren@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h |  2 ++
 mm/page_alloc.c         | 82 +++++++++++++++++++++++++++++++++++++++++++------
 2 files changed, 75 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 2f348d02f640..dfb93333fc62 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -4,10 +4,12 @@
 #include <linux/stringify.h>
 
 struct page;
+struct vm_area_struct;
 
 extern void dump_page(struct page *page, const char *reason);
 extern void dump_page_badflags(struct page *page, const char *reason,
 			       unsigned long badflags);
+void dump_vma(const struct vm_area_struct *vma);
 
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index f07588b11d59..3a950144f80b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6626,27 +6626,26 @@ static const struct trace_print_flags pageflag_names[] = {
 #endif
 };
 
-static void dump_page_flags(unsigned long flags)
+static void dump_flags(unsigned long flags,
+			const struct trace_print_flags *names, int count)
 {
 	const char *delim = "";
 	unsigned long mask;
 	int i;
 
-	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
-
-	printk(KERN_ALERT "page flags: %#lx(", flags);
+	printk(KERN_ALERT "flags: %#lx(", flags);
 
 	/* remove zone id */
 	flags &= (1UL << NR_PAGEFLAGS) - 1;
 
-	for (i = 0; i < ARRAY_SIZE(pageflag_names) && flags; i++) {
+	for (i = 0; i < count && flags; i++) {
 
-		mask = pageflag_names[i].mask;
+		mask = names[i].mask;
 		if ((flags & mask) != mask)
 			continue;
 
 		flags &= ~mask;
-		printk("%s%s", delim, pageflag_names[i].name);
+		printk("%s%s", delim, names[i].name);
 		delim = "|";
 	}
 
@@ -6664,12 +6663,14 @@ void dump_page_badflags(struct page *page, const char *reason,
 	       "page:%p count:%d mapcount:%d mapping:%p index:%#lx\n",
 		page, atomic_read(&page->_count), page_mapcount(page),
 		page->mapping, page->index);
-	dump_page_flags(page->flags);
+	BUILD_BUG_ON(ARRAY_SIZE(pageflag_names) != __NR_PAGEFLAGS);
+	dump_flags(page->flags, pageflag_names, ARRAY_SIZE(pageflag_names));
 	if (reason)
 		pr_alert("page dumped because: %s\n", reason);
 	if (page->flags & badflags) {
 		pr_alert("bad because of flags:\n");
-		dump_page_flags(page->flags & badflags);
+		dump_flags(page->flags & badflags,
+				pageflag_names, ARRAY_SIZE(pageflag_names));
 	}
 	mem_cgroup_print_bad_page(page);
 }
@@ -6679,3 +6680,66 @@ void dump_page(struct page *page, const char *reason)
 	dump_page_badflags(page, reason, 0);
 }
 EXPORT_SYMBOL(dump_page);
+
+#ifdef CONFIG_DEBUG_VM
+
+static const struct trace_print_flags vmaflags_names[] = {
+	{VM_READ,			"read"		},
+	{VM_WRITE,			"write"		},
+	{VM_EXEC,			"exec"		},
+	{VM_SHARED,			"shared"	},
+	{VM_MAYREAD,			"mayread"	},
+	{VM_MAYWRITE,			"maywrite"	},
+	{VM_MAYEXEC,			"mayexec"	},
+	{VM_MAYSHARE,			"mayshare"	},
+	{VM_GROWSDOWN,			"growsdown"	},
+	{VM_PFNMAP,			"pfnmap"	},
+	{VM_DENYWRITE,			"denywrite"	},
+	{VM_LOCKED,			"locked"	},
+	{VM_IO,				"io"		},
+	{VM_SEQ_READ,			"seqread"	},
+	{VM_RAND_READ,			"randread"	},
+	{VM_DONTCOPY,			"dontcopy"	},
+	{VM_DONTEXPAND,			"dontexpand"	},
+	{VM_ACCOUNT,			"account"	},
+	{VM_NORESERVE,			"noreserve"	},
+	{VM_HUGETLB,			"hugetlb"	},
+	{VM_NONLINEAR,			"nonlinear"	},
+#if defined(CONFIG_X86)
+	{VM_PAT,			"pat"		},
+#elif defined(CONFIG_PPC)
+	{VM_SAO,			"sao"		},
+#elif defined(CONFIG_PARISC) || defined(CONFIG_METAG) || defined(CONFIG_IA64)
+	{VM_GROWSUP,			"growsup"	},
+#elif !defined(CONFIG_MMU)
+	{VM_MAPPED_COPY,		"mappedcopy"	},
+#else
+	{VM_ARCH_1,			"arch_1"	},
+#endif
+	{VM_DONTDUMP,			"dontdump"	},
+#ifdef CONFIG_MEM_SOFT_DIRTY
+	{VM_SOFTDIRTY,			"softdirty"	},
+#endif
+	{VM_MIXEDMAP,			"mixedmap"	},
+	{VM_HUGEPAGE,			"hugepage"	},
+	{VM_NOHUGEPAGE,			"nohugepage"	},
+	{VM_MERGEABLE,			"mergeable"	},
+};
+
+void dump_vma(const struct vm_area_struct *vma)
+{
+	printk(KERN_ALERT
+		"vma %p start %p end %p\n"
+		"next %p prev %p mm %p\n"
+		"prot %lx anon_vma %p vm_ops %p\n"
+		"pgoff %lx file %p private_data %p\n",
+		vma, (void *)vma->vm_start, (void *)vma->vm_end, vma->vm_next,
+		vma->vm_prev, vma->vm_mm,
+		(unsigned long)pgprot_val(vma->vm_page_prot),
+		vma->anon_vma, vma->vm_ops, vma->vm_pgoff,
+		vma->vm_file, vma->vm_private_data);
+	dump_flags(vma->vm_flags, vmaflags_names, ARRAY_SIZE(vmaflags_names));
+}
+EXPORT_SYMBOL(dump_vma);
+
+#endif		/* CONFIG_DEBUG_VM */
-- 
cgit v1.2.3


From fa3759ccd5651c4235f572302d58c8ec9ddf1c4b Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:08 -0700
Subject: mm: introduce VM_BUG_ON_VMA

Very similar to VM_BUG_ON_PAGE but dumps VMA information instead.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index dfb93333fc62..569e4c8d0ebb 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -20,12 +20,20 @@ void dump_vma(const struct vm_area_struct *vma);
 			BUG();						\
 		}							\
 	} while (0)
+#define VM_BUG_ON_VMA(cond, vma)					\
+	do {								\
+		if (unlikely(cond)) {					\
+			dump_vma(vma);					\
+			BUG();						\
+		}							\
+	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
 #else
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
+#define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
-- 
cgit v1.2.3


From 81d1b09c6be66afac7d41ee52279d9bccbce56d8 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:10 -0700
Subject: mm: convert a few VM_BUG_ON callers to VM_BUG_ON_VMA

Trivially convert a few VM_BUG_ON calls to VM_BUG_ON_VMA to extract
more information when they trigger.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Reviewed-by: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Konstantin Khlebnikov <khlebnikov@openvz.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Hugh Dickins <hughd@google.com>
Cc: Vlastimil Babka <vbabka@suse.cz>
Cc: Michel Lespinasse <walken@google.com>
Cc: Minchan Kim <minchan@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/huge_mm.h |  2 +-
 include/linux/rmap.h    |  2 +-
 mm/huge_memory.c        |  6 +++---
 mm/hugetlb.c            | 14 +++++++-------
 mm/interval_tree.c      |  2 +-
 mm/mlock.c              |  4 ++--
 mm/mmap.c               |  6 +++---
 mm/mremap.c             |  3 ++-
 mm/rmap.c               |  8 ++++----
 9 files changed, 24 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 63579cb8d3dc..ad9051bab267 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -132,7 +132,7 @@ extern int __pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
 static inline int pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma,
 		spinlock_t **ptl)
 {
-	VM_BUG_ON(!rwsem_is_locked(&vma->vm_mm->mmap_sem));
+	VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
 	if (pmd_trans_huge(*pmd))
 		return __pmd_trans_huge_lock(pmd, vma, ptl);
 	else
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index be574506e6a9..c0c2bce6b0b7 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -150,7 +150,7 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *);
 static inline void anon_vma_merge(struct vm_area_struct *vma,
 				  struct vm_area_struct *next)
 {
-	VM_BUG_ON(vma->anon_vma != next->anon_vma);
+	VM_BUG_ON_VMA(vma->anon_vma != next->anon_vma, vma);
 	unlink_anon_vmas(next);
 }
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 55ab569c31b4..c13148cc745f 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -1096,7 +1096,7 @@ int do_huge_pmd_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	unsigned long mmun_end;		/* For mmu_notifiers */
 
 	ptl = pmd_lockptr(mm, pmd);
-	VM_BUG_ON(!vma->anon_vma);
+	VM_BUG_ON_VMA(!vma->anon_vma, vma);
 	haddr = address & HPAGE_PMD_MASK;
 	if (is_huge_zero_pmd(orig_pmd))
 		goto alloc;
@@ -2083,7 +2083,7 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
+	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
@@ -2406,7 +2406,7 @@ static bool hugepage_vma_check(struct vm_area_struct *vma)
 		return false;
 	if (is_vma_temporary_stack(vma))
 		return false;
-	VM_BUG_ON(vma->vm_flags & VM_NO_THP);
+	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
 	return true;
 }
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index eeceeeb09019..9fd722769927 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -434,7 +434,7 @@ static inline struct resv_map *inode_resv_map(struct inode *inode)
 
 static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
 	if (vma->vm_flags & VM_MAYSHARE) {
 		struct address_space *mapping = vma->vm_file->f_mapping;
 		struct inode *inode = mapping->host;
@@ -449,8 +449,8 @@ static struct resv_map *vma_resv_map(struct vm_area_struct *vma)
 
 static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
+	VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma);
 
 	set_vma_private_data(vma, (get_vma_private_data(vma) &
 				HPAGE_RESV_MASK) | (unsigned long)map);
@@ -458,15 +458,15 @@ static void set_vma_resv_map(struct vm_area_struct *vma, struct resv_map *map)
 
 static void set_vma_resv_flags(struct vm_area_struct *vma, unsigned long flags)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
-	VM_BUG_ON(vma->vm_flags & VM_MAYSHARE);
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
+	VM_BUG_ON_VMA(vma->vm_flags & VM_MAYSHARE, vma);
 
 	set_vma_private_data(vma, get_vma_private_data(vma) | flags);
 }
 
 static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
 
 	return (get_vma_private_data(vma) & flag) != 0;
 }
@@ -474,7 +474,7 @@ static int is_vma_resv_set(struct vm_area_struct *vma, unsigned long flag)
 /* Reset counters to 0 and clear all HPAGE_RESV_* flags */
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma)
 {
-	VM_BUG_ON(!is_vm_hugetlb_page(vma));
+	VM_BUG_ON_VMA(!is_vm_hugetlb_page(vma), vma);
 	if (!(vma->vm_flags & VM_MAYSHARE))
 		vma->vm_private_data = (void *)0;
 }
diff --git a/mm/interval_tree.c b/mm/interval_tree.c
index 4a5822a586e6..8da581fa9060 100644
--- a/mm/interval_tree.c
+++ b/mm/interval_tree.c
@@ -34,7 +34,7 @@ void vma_interval_tree_insert_after(struct vm_area_struct *node,
 	struct vm_area_struct *parent;
 	unsigned long last = vma_last_pgoff(node);
 
-	VM_BUG_ON(vma_start_pgoff(node) != vma_start_pgoff(prev));
+	VM_BUG_ON_VMA(vma_start_pgoff(node) != vma_start_pgoff(prev), node);
 
 	if (!prev->shared.linear.rb.rb_right) {
 		parent = prev;
diff --git a/mm/mlock.c b/mm/mlock.c
index ce84cb0b83ef..d5d09d0786ec 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -233,8 +233,8 @@ long __mlock_vma_pages_range(struct vm_area_struct *vma,
 
 	VM_BUG_ON(start & ~PAGE_MASK);
 	VM_BUG_ON(end   & ~PAGE_MASK);
-	VM_BUG_ON(start < vma->vm_start);
-	VM_BUG_ON(end   > vma->vm_end);
+	VM_BUG_ON_VMA(start < vma->vm_start, vma);
+	VM_BUG_ON_VMA(end   > vma->vm_end, vma);
 	VM_BUG_ON(!rwsem_is_locked(&mm->mmap_sem));
 
 	gup_flags = FOLL_TOUCH | FOLL_MLOCK;
diff --git a/mm/mmap.c b/mm/mmap.c
index 7ff38f1a66ec..69d4c5199fd8 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -786,8 +786,8 @@ again:			remove_next = 1 + (end > next->vm_end);
 	if (!anon_vma && adjust_next)
 		anon_vma = next->anon_vma;
 	if (anon_vma) {
-		VM_BUG_ON(adjust_next && next->anon_vma &&
-			  anon_vma != next->anon_vma);
+		VM_BUG_ON_VMA(adjust_next && next->anon_vma &&
+			  anon_vma != next->anon_vma, next);
 		anon_vma_lock_write(anon_vma);
 		anon_vma_interval_tree_pre_update_vma(vma);
 		if (adjust_next)
@@ -2848,7 +2848,7 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
 			 * safe. It is only safe to keep the vm_pgoff
 			 * linear if there are no pages mapped yet.
 			 */
-			VM_BUG_ON(faulted_in_anon_vma);
+			VM_BUG_ON_VMA(faulted_in_anon_vma, new_vma);
 			*vmap = vma = new_vma;
 		}
 		*need_rmap_locks = (new_vma->vm_pgoff <= vma->vm_pgoff);
diff --git a/mm/mremap.c b/mm/mremap.c
index 05f1180e9f21..89e45d8a983a 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -195,7 +195,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma,
 		if (pmd_trans_huge(*old_pmd)) {
 			int err = 0;
 			if (extent == HPAGE_PMD_SIZE) {
-				VM_BUG_ON(vma->vm_file || !vma->anon_vma);
+				VM_BUG_ON_VMA(vma->vm_file || !vma->anon_vma,
+					      vma);
 				/* See comment in move_ptes() */
 				if (need_rmap_locks)
 					anon_vma_lock_write(vma->anon_vma);
diff --git a/mm/rmap.c b/mm/rmap.c
index bc74e0012809..116a5053415b 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -527,7 +527,7 @@ vma_address(struct page *page, struct vm_area_struct *vma)
 	unsigned long address = __vma_address(page, vma);
 
 	/* page should be within @vma mapping range */
-	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
 
 	return address;
 }
@@ -897,7 +897,7 @@ void page_move_anon_rmap(struct page *page,
 	struct anon_vma *anon_vma = vma->anon_vma;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
-	VM_BUG_ON(!anon_vma);
+	VM_BUG_ON_VMA(!anon_vma, vma);
 	VM_BUG_ON_PAGE(page->index != linear_page_index(vma, address), page);
 
 	anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON;
@@ -1024,7 +1024,7 @@ void do_page_add_anon_rmap(struct page *page,
 void page_add_new_anon_rmap(struct page *page,
 	struct vm_area_struct *vma, unsigned long address)
 {
-	VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end);
+	VM_BUG_ON_VMA(address < vma->vm_start || address >= vma->vm_end, vma);
 	SetPageSwapBacked(page);
 	atomic_set(&page->_mapcount, 0); /* increment count (starts at -1) */
 	if (PageTransHuge(page))
@@ -1670,7 +1670,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 	 * structure at mapping cannot be freed and reused yet,
 	 * so we can safely take mapping->i_mmap_mutex.
 	 */
-	VM_BUG_ON(!PageLocked(page));
+	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
 	if (!mapping)
 		return ret;
-- 
cgit v1.2.3


From 5705465174686d007473e017b76c4b64b44aa690 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:28:17 -0700
Subject: mm: clean up zone flags

Page reclaim tests zone_is_reclaim_dirty(), but the site that actually
sets this state does zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY), sending the
reader through layers indirection just to track down a simple bit.

Remove all zone flag wrappers and just use bitops against zone->flags
directly.  It's just as readable and the lines are barely any longer.

Also rename ZONE_TAIL_LRU_DIRTY to ZONE_DIRTY to match ZONE_WRITEBACK, and
remove the zone_flags_t typedef.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h | 51 +++-----------------------------------------------
 mm/backing-dev.c       |  2 +-
 mm/oom_kill.c          |  6 +++---
 mm/page_alloc.c        |  8 ++++----
 mm/vmscan.c            | 28 +++++++++++++--------------
 5 files changed, 25 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 318df7051850..48bf12ef6620 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -521,13 +521,13 @@ struct zone {
 	atomic_long_t		vm_stat[NR_VM_ZONE_STAT_ITEMS];
 } ____cacheline_internodealigned_in_smp;
 
-typedef enum {
+enum zone_flags {
 	ZONE_RECLAIM_LOCKED,		/* prevents concurrent reclaim */
 	ZONE_OOM_LOCKED,		/* zone is in OOM killer zonelist */
 	ZONE_CONGESTED,			/* zone has many dirty pages backed by
 					 * a congested BDI
 					 */
-	ZONE_TAIL_LRU_DIRTY,		/* reclaim scanning has recently found
+	ZONE_DIRTY,			/* reclaim scanning has recently found
 					 * many dirty file pages at the tail
 					 * of the LRU.
 					 */
@@ -535,52 +535,7 @@ typedef enum {
 					 * many pages under writeback
 					 */
 	ZONE_FAIR_DEPLETED,		/* fair zone policy batch depleted */
-} zone_flags_t;
-
-static inline void zone_set_flag(struct zone *zone, zone_flags_t flag)
-{
-	set_bit(flag, &zone->flags);
-}
-
-static inline int zone_test_and_set_flag(struct zone *zone, zone_flags_t flag)
-{
-	return test_and_set_bit(flag, &zone->flags);
-}
-
-static inline void zone_clear_flag(struct zone *zone, zone_flags_t flag)
-{
-	clear_bit(flag, &zone->flags);
-}
-
-static inline int zone_is_reclaim_congested(const struct zone *zone)
-{
-	return test_bit(ZONE_CONGESTED, &zone->flags);
-}
-
-static inline int zone_is_reclaim_dirty(const struct zone *zone)
-{
-	return test_bit(ZONE_TAIL_LRU_DIRTY, &zone->flags);
-}
-
-static inline int zone_is_reclaim_writeback(const struct zone *zone)
-{
-	return test_bit(ZONE_WRITEBACK, &zone->flags);
-}
-
-static inline int zone_is_reclaim_locked(const struct zone *zone)
-{
-	return test_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
-}
-
-static inline int zone_is_fair_depleted(const struct zone *zone)
-{
-	return test_bit(ZONE_FAIR_DEPLETED, &zone->flags);
-}
-
-static inline int zone_is_oom_locked(const struct zone *zone)
-{
-	return test_bit(ZONE_OOM_LOCKED, &zone->flags);
-}
+};
 
 static inline unsigned long zone_end_pfn(const struct zone *zone)
 {
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index 1706cbbdf5f0..b27714f1b40f 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -631,7 +631,7 @@ long wait_iff_congested(struct zone *zone, int sync, long timeout)
 	 * of sleeping on the congestion queue
 	 */
 	if (atomic_read(&nr_bdi_congested[sync]) == 0 ||
-			!zone_is_reclaim_congested(zone)) {
+	    !test_bit(ZONE_CONGESTED, &zone->flags)) {
 		cond_resched();
 
 		/* In case we scheduled, work out time remaining */
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 1e11df8fa7ec..bbf405a3a18f 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -565,7 +565,7 @@ bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
 
 	spin_lock(&zone_scan_lock);
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
-		if (zone_is_oom_locked(zone)) {
+		if (test_bit(ZONE_OOM_LOCKED, &zone->flags)) {
 			ret = false;
 			goto out;
 		}
@@ -575,7 +575,7 @@ bool oom_zonelist_trylock(struct zonelist *zonelist, gfp_t gfp_mask)
 	 * call to oom_zonelist_trylock() doesn't succeed when it shouldn't.
 	 */
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
-		zone_set_flag(zone, ZONE_OOM_LOCKED);
+		set_bit(ZONE_OOM_LOCKED, &zone->flags);
 
 out:
 	spin_unlock(&zone_scan_lock);
@@ -594,7 +594,7 @@ void oom_zonelist_unlock(struct zonelist *zonelist, gfp_t gfp_mask)
 
 	spin_lock(&zone_scan_lock);
 	for_each_zone_zonelist(zone, z, zonelist, gfp_zone(gfp_mask))
-		zone_clear_flag(zone, ZONE_OOM_LOCKED);
+		clear_bit(ZONE_OOM_LOCKED, &zone->flags);
 	spin_unlock(&zone_scan_lock);
 }
 
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index ae2f8474273c..f3769f0fce3c 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1614,8 +1614,8 @@ again:
 
 	__mod_zone_page_state(zone, NR_ALLOC_BATCH, -(1 << order));
 	if (atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]) <= 0 &&
-	    !zone_is_fair_depleted(zone))
-		zone_set_flag(zone, ZONE_FAIR_DEPLETED);
+	    !test_bit(ZONE_FAIR_DEPLETED, &zone->flags))
+		set_bit(ZONE_FAIR_DEPLETED, &zone->flags);
 
 	__count_zone_vm_events(PGALLOC, zone, 1 << order);
 	zone_statistics(preferred_zone, zone, gfp_flags);
@@ -1935,7 +1935,7 @@ static void reset_alloc_batches(struct zone *preferred_zone)
 		mod_zone_page_state(zone, NR_ALLOC_BATCH,
 			high_wmark_pages(zone) - low_wmark_pages(zone) -
 			atomic_long_read(&zone->vm_stat[NR_ALLOC_BATCH]));
-		zone_clear_flag(zone, ZONE_FAIR_DEPLETED);
+		clear_bit(ZONE_FAIR_DEPLETED, &zone->flags);
 	} while (zone++ != preferred_zone);
 }
 
@@ -1986,7 +1986,7 @@ zonelist_scan:
 		if (alloc_flags & ALLOC_FAIR) {
 			if (!zone_local(preferred_zone, zone))
 				break;
-			if (zone_is_fair_depleted(zone)) {
+			if (test_bit(ZONE_FAIR_DEPLETED, &zone->flags)) {
 				nr_fair_skipped++;
 				continue;
 			}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index af72fe8e8d74..06123f20a326 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -920,7 +920,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			/* Case 1 above */
 			if (current_is_kswapd() &&
 			    PageReclaim(page) &&
-			    zone_is_reclaim_writeback(zone)) {
+			    test_bit(ZONE_WRITEBACK, &zone->flags)) {
 				nr_immediate++;
 				goto keep_locked;
 
@@ -1002,7 +1002,7 @@ static unsigned long shrink_page_list(struct list_head *page_list,
 			 */
 			if (page_is_file_cache(page) &&
 					(!current_is_kswapd() ||
-					 !zone_is_reclaim_dirty(zone))) {
+					 !test_bit(ZONE_DIRTY, &zone->flags))) {
 				/*
 				 * Immediately reclaim when written back.
 				 * Similar in principal to deactivate_page()
@@ -1563,7 +1563,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 	 * are encountered in the nr_immediate check below.
 	 */
 	if (nr_writeback && nr_writeback == nr_taken)
-		zone_set_flag(zone, ZONE_WRITEBACK);
+		set_bit(ZONE_WRITEBACK, &zone->flags);
 
 	/*
 	 * memcg will stall in page writeback so only consider forcibly
@@ -1575,16 +1575,16 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
 		 * backed by a congested BDI and wait_iff_congested will stall.
 		 */
 		if (nr_dirty && nr_dirty == nr_congested)
-			zone_set_flag(zone, ZONE_CONGESTED);
+			set_bit(ZONE_CONGESTED, &zone->flags);
 
 		/*
 		 * If dirty pages are scanned that are not queued for IO, it
 		 * implies that flushers are not keeping up. In this case, flag
-		 * the zone ZONE_TAIL_LRU_DIRTY and kswapd will start writing
-		 * pages from reclaim context.
+		 * the zone ZONE_DIRTY and kswapd will start writing pages from
+		 * reclaim context.
 		 */
 		if (nr_unqueued_dirty == nr_taken)
-			zone_set_flag(zone, ZONE_TAIL_LRU_DIRTY);
+			set_bit(ZONE_DIRTY, &zone->flags);
 
 		/*
 		 * If kswapd scans pages marked marked for immediate
@@ -2984,7 +2984,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
 	/* Account for the number of pages attempted to reclaim */
 	*nr_attempted += sc->nr_to_reclaim;
 
-	zone_clear_flag(zone, ZONE_WRITEBACK);
+	clear_bit(ZONE_WRITEBACK, &zone->flags);
 
 	/*
 	 * If a zone reaches its high watermark, consider it to be no longer
@@ -2994,8 +2994,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
 	 */
 	if (zone_reclaimable(zone) &&
 	    zone_balanced(zone, testorder, 0, classzone_idx)) {
-		zone_clear_flag(zone, ZONE_CONGESTED);
-		zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
+		clear_bit(ZONE_CONGESTED, &zone->flags);
+		clear_bit(ZONE_DIRTY, &zone->flags);
 	}
 
 	return sc->nr_scanned >= sc->nr_to_reclaim;
@@ -3086,8 +3086,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 				 * If balanced, clear the dirty and congested
 				 * flags
 				 */
-				zone_clear_flag(zone, ZONE_CONGESTED);
-				zone_clear_flag(zone, ZONE_TAIL_LRU_DIRTY);
+				clear_bit(ZONE_CONGESTED, &zone->flags);
+				clear_bit(ZONE_DIRTY, &zone->flags);
 			}
 		}
 
@@ -3714,11 +3714,11 @@ int zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 	if (node_state(node_id, N_CPU) && node_id != numa_node_id())
 		return ZONE_RECLAIM_NOSCAN;
 
-	if (zone_test_and_set_flag(zone, ZONE_RECLAIM_LOCKED))
+	if (test_and_set_bit(ZONE_RECLAIM_LOCKED, &zone->flags))
 		return ZONE_RECLAIM_NOSCAN;
 
 	ret = __zone_reclaim(zone, gfp_mask, order);
-	zone_clear_flag(zone, ZONE_RECLAIM_LOCKED);
+	clear_bit(ZONE_RECLAIM_LOCKED, &zone->flags);
 
 	if (!ret)
 		count_vm_event(PGSCAN_ZONE_RECLAIM_FAILED);
-- 
cgit v1.2.3


From 934f3072c17cc8886f4c043b47eeeb1b12f8de33 Mon Sep 17 00:00:00 2001
From: Junxiao Bi <junxiao.bi@oracle.com>
Date: Thu, 9 Oct 2014 15:28:23 -0700
Subject: mm: clear __GFP_FS when PF_MEMALLOC_NOIO is set

commit 21caf2fc1931 ("mm: teach mm by current context info to not do I/O
during memory allocation") introduces PF_MEMALLOC_NOIO flag to avoid doing
I/O inside memory allocation, __GFP_IO is cleared when this flag is set,
but __GFP_FS implies __GFP_IO, it should also be cleared.  Or it may still
run into I/O, like in superblock shrinker.  And this will make the kernel
run into the deadlock case described in that commit.

See Dave Chinner's comment about io in superblock shrinker:

Filesystem shrinkers do indeed perform IO from the superblock shrinker and
have for years.  Even clean inodes can require IO before they can be freed
- e.g.  on an orphan list, need truncation of post-eof blocks, need to
wait for ordered operations to complete before it can be freed, etc.

IOWs, Ext4, btrfs and XFS all can issue and/or block on arbitrary amounts
of IO in the superblock shrinker context.  XFS, in particular, has been
doing transactions and IO from the VFS inode cache shrinker since it was
first introduced....

Fix this by clearing __GFP_FS in memalloc_noio_flags(), this function has
masked all the gfp_mask that will be passed into fs for the processes
setting PF_MEMALLOC_NOIO in the direct reclaim path.

v1 thread at: https://lkml.org/lkml/2014/9/3/32

Signed-off-by: Junxiao Bi <junxiao.bi@oracle.com>
Cc: Dave Chinner <david@fromorbit.com>
Cc: joyce.xue <xuejiufei@huawei.com>
Cc: Ming Lei <ming.lei@canonical.com>
Cc: Trond Myklebust <trond.myklebust@primarydata.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 9c6353d9e63a..5e63ba59258c 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1935,11 +1935,13 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
 #define tsk_used_math(p) ((p)->flags & PF_USED_MATH)
 #define used_math() tsk_used_math(current)
 
-/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags */
+/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags
+ * __GFP_FS is also cleared as it implies __GFP_IO.
+ */
 static inline gfp_t memalloc_noio_flags(gfp_t flags)
 {
 	if (unlikely(current->flags & PF_MEMALLOC_NOIO))
-		flags &= ~__GFP_IO;
+		flags &= ~(__GFP_IO | __GFP_FS);
 	return flags;
 }
 
-- 
cgit v1.2.3


From 31c9afa6db122a5c7a7843278aaf77dd08ea6e98 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Thu, 9 Oct 2014 15:28:37 -0700
Subject: mm: introduce VM_BUG_ON_MM

Very similar to VM_BUG_ON_PAGE and VM_BUG_ON_VMA, dump struct_mm when the
bug is hit.

[akpm@linux-foundation.org: coding-style fixes]
[mhocko@suse.cz: fix build]
[mhocko@suse.cz: fix build some more]
[akpm@linux-foundation.org: do strange things to avoid doing strange things for the comma separators]
Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Dave Jones <davej@redhat.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmdebug.h | 10 +++++++
 mm/debug.c              | 78 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 88 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h
index 569e4c8d0ebb..877ef226f90f 100644
--- a/include/linux/mmdebug.h
+++ b/include/linux/mmdebug.h
@@ -5,11 +5,13 @@
 
 struct page;
 struct vm_area_struct;
+struct mm_struct;
 
 extern void dump_page(struct page *page, const char *reason);
 extern void dump_page_badflags(struct page *page, const char *reason,
 			       unsigned long badflags);
 void dump_vma(const struct vm_area_struct *vma);
+void dump_mm(const struct mm_struct *mm);
 
 #ifdef CONFIG_DEBUG_VM
 #define VM_BUG_ON(cond) BUG_ON(cond)
@@ -27,6 +29,13 @@ void dump_vma(const struct vm_area_struct *vma);
 			BUG();						\
 		}							\
 	} while (0)
+#define VM_BUG_ON_MM(cond, mm)						\
+	do {								\
+		if (unlikely(cond)) {					\
+			dump_mm(mm);					\
+			BUG();						\
+		}							\
+	} while (0)
 #define VM_WARN_ON(cond) WARN_ON(cond)
 #define VM_WARN_ON_ONCE(cond) WARN_ON_ONCE(cond)
 #define VM_WARN_ONCE(cond, format...) WARN_ONCE(cond, format)
@@ -34,6 +43,7 @@ void dump_vma(const struct vm_area_struct *vma);
 #define VM_BUG_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_BUG_ON_PAGE(cond, page) VM_BUG_ON(cond)
 #define VM_BUG_ON_VMA(cond, vma) VM_BUG_ON(cond)
+#define VM_BUG_ON_MM(cond, mm) VM_BUG_ON(cond)
 #define VM_WARN_ON(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ON_ONCE(cond) BUILD_BUG_ON_INVALID(cond)
 #define VM_WARN_ONCE(cond, format...) BUILD_BUG_ON_INVALID(cond)
diff --git a/mm/debug.c b/mm/debug.c
index 697df9050193..5a1b6194089c 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -1,3 +1,10 @@
+/*
+ * mm/debug.c
+ *
+ * mm/ specific debug routines.
+ *
+ */
+
 #include <linux/kernel.h>
 #include <linux/mm.h>
 #include <linux/ftrace_event.h>
@@ -159,4 +166,75 @@ void dump_vma(const struct vm_area_struct *vma)
 }
 EXPORT_SYMBOL(dump_vma);
 
+void dump_mm(const struct mm_struct *mm)
+{
+	printk(KERN_ALERT
+		"mm %p mmap %p seqnum %d task_size %lu\n"
+#ifdef CONFIG_MMU
+		"get_unmapped_area %p\n"
+#endif
+		"mmap_base %lu mmap_legacy_base %lu highest_vm_end %lu\n"
+		"pgd %p mm_users %d mm_count %d nr_ptes %lu map_count %d\n"
+		"hiwater_rss %lx hiwater_vm %lx total_vm %lx locked_vm %lx\n"
+		"pinned_vm %lx shared_vm %lx exec_vm %lx stack_vm %lx\n"
+		"start_code %lx end_code %lx start_data %lx end_data %lx\n"
+		"start_brk %lx brk %lx start_stack %lx\n"
+		"arg_start %lx arg_end %lx env_start %lx env_end %lx\n"
+		"binfmt %p flags %lx core_state %p\n"
+#ifdef CONFIG_AIO
+		"ioctx_table %p\n"
+#endif
+#ifdef CONFIG_MEMCG
+		"owner %p "
+#endif
+		"exe_file %p\n"
+#ifdef CONFIG_MMU_NOTIFIER
+		"mmu_notifier_mm %p\n"
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+		"numa_next_scan %lu numa_scan_offset %lu numa_scan_seq %d\n"
+#endif
+#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
+		"tlb_flush_pending %d\n"
+#endif
+		"%s",	/* This is here to hold the comma */
+
+		mm, mm->mmap, mm->vmacache_seqnum, mm->task_size,
+#ifdef CONFIG_MMU
+		mm->get_unmapped_area,
+#endif
+		mm->mmap_base, mm->mmap_legacy_base, mm->highest_vm_end,
+		mm->pgd, atomic_read(&mm->mm_users),
+		atomic_read(&mm->mm_count),
+		atomic_long_read((atomic_long_t *)&mm->nr_ptes),
+		mm->map_count,
+		mm->hiwater_rss, mm->hiwater_vm, mm->total_vm, mm->locked_vm,
+		mm->pinned_vm, mm->shared_vm, mm->exec_vm, mm->stack_vm,
+		mm->start_code, mm->end_code, mm->start_data, mm->end_data,
+		mm->start_brk, mm->brk, mm->start_stack,
+		mm->arg_start, mm->arg_end, mm->env_start, mm->env_end,
+		mm->binfmt, mm->flags, mm->core_state,
+#ifdef CONFIG_AIO
+		mm->ioctx_table,
+#endif
+#ifdef CONFIG_MEMCG
+		mm->owner,
+#endif
+		mm->exe_file,
+#ifdef CONFIG_MMU_NOTIFIER
+		mm->mmu_notifier_mm,
+#endif
+#ifdef CONFIG_NUMA_BALANCING
+		mm->numa_next_scan, mm->numa_scan_offset, mm->numa_scan_seq,
+#endif
+#if defined(CONFIG_NUMA_BALANCING) || defined(CONFIG_COMPACTION)
+		mm->tlb_flush_pending,
+#endif
+		""		/* This is here to not have a comma! */
+		);
+
+		dump_flags(mm->def_flags, vmaflags_names,
+				ARRAY_SIZE(vmaflags_names));
+}
+
 #endif		/* CONFIG_DEBUG_VM */
-- 
cgit v1.2.3


From 33a690c45b202e4c6483bfd1d93ad8d0f51df2ca Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Thu, 9 Oct 2014 15:28:43 -0700
Subject: memcg: move memcg_{alloc,free}_cache_params to slab_common.c

The only reason why they live in memcontrol.c is that we get/put css
reference to the owner memory cgroup in them.  However, we can do that in
memcg_{un,}register_cache.  OTOH, there are several reasons to move them
to slab_common.c.

First, I think that the less public interface functions we have in
memcontrol.h the better.  Since the functions I move don't depend on
memcontrol, I think it's worth making them private to slab, especially
taking into account that the arrays are defined on the slab's side too.

Second, the way how per-memcg arrays are updated looks rather awkward: it
proceeds from memcontrol.c (__memcg_activate_kmem) to slab_common.c
(memcg_update_all_caches) and back to memcontrol.c again
(memcg_update_array_size).  In the following patches I move the function
relocating the arrays (memcg_update_array_size) to slab_common.c and
therefore get rid this circular call path.  I think we should have the
cache allocation stuff in the same place where we have relocation, because
it's easier to follow the code then.  So I move arrays alloc/free
functions to slab_common.c too.

The third point isn't obvious.  I'm going to make the list_lru structure
per-memcg to allow targeted kmem reclaim.  That means we will have
per-memcg arrays in list_lrus too.  It turns out that it's much easier to
update these arrays in list_lru.c rather than in memcontrol.c, because all
the stuff we need is defined there.  This patch makes memcg caches arrays
allocation path conform that of the upcoming list_lru.

So let's move these functions to slab_common.c and make them static.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 14 --------------
 mm/memcontrol.c            | 41 ++++-------------------------------------
 mm/slab_common.c           | 44 +++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 47 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e0752d204d9e..4d17242eeff7 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -440,10 +440,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
 
-int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
-			     struct kmem_cache *root_cache);
-void memcg_free_cache_params(struct kmem_cache *s);
-
 int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
 
@@ -574,16 +570,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg)
 	return -1;
 }
 
-static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
-		struct kmem_cache *s, struct kmem_cache *root_cache)
-{
-	return 0;
-}
-
-static inline void memcg_free_cache_params(struct kmem_cache *s)
-{
-}
-
 static inline struct kmem_cache *
 memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 28928ce9b07f..865e87c014d6 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2984,43 +2984,6 @@ int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
 	return 0;
 }
 
-int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s,
-			     struct kmem_cache *root_cache)
-{
-	size_t size;
-
-	if (!memcg_kmem_enabled())
-		return 0;
-
-	if (!memcg) {
-		size = offsetof(struct memcg_cache_params, memcg_caches);
-		size += memcg_limited_groups_array_size * sizeof(void *);
-	} else
-		size = sizeof(struct memcg_cache_params);
-
-	s->memcg_params = kzalloc(size, GFP_KERNEL);
-	if (!s->memcg_params)
-		return -ENOMEM;
-
-	if (memcg) {
-		s->memcg_params->memcg = memcg;
-		s->memcg_params->root_cache = root_cache;
-		css_get(&memcg->css);
-	} else
-		s->memcg_params->is_root_cache = true;
-
-	return 0;
-}
-
-void memcg_free_cache_params(struct kmem_cache *s)
-{
-	if (!s->memcg_params)
-		return;
-	if (!s->memcg_params->is_root_cache)
-		css_put(&s->memcg_params->memcg->css);
-	kfree(s->memcg_params);
-}
-
 static void memcg_register_cache(struct mem_cgroup *memcg,
 				 struct kmem_cache *root_cache)
 {
@@ -3051,6 +3014,7 @@ static void memcg_register_cache(struct mem_cgroup *memcg,
 	if (!cachep)
 		return;
 
+	css_get(&memcg->css);
 	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
 
 	/*
@@ -3084,6 +3048,9 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
 	list_del(&cachep->memcg_params->list);
 
 	kmem_cache_destroy(cachep);
+
+	/* drop the reference taken in memcg_register_cache */
+	css_put(&memcg->css);
 }
 
 /*
diff --git a/mm/slab_common.c b/mm/slab_common.c
index f206cb10a544..c2a8661f8b81 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -116,6 +116,38 @@ static inline int kmem_cache_sanity_check(const char *name, size_t size)
 #endif
 
 #ifdef CONFIG_MEMCG_KMEM
+static int memcg_alloc_cache_params(struct mem_cgroup *memcg,
+		struct kmem_cache *s, struct kmem_cache *root_cache)
+{
+	size_t size;
+
+	if (!memcg_kmem_enabled())
+		return 0;
+
+	if (!memcg) {
+		size = offsetof(struct memcg_cache_params, memcg_caches);
+		size += memcg_limited_groups_array_size * sizeof(void *);
+	} else
+		size = sizeof(struct memcg_cache_params);
+
+	s->memcg_params = kzalloc(size, GFP_KERNEL);
+	if (!s->memcg_params)
+		return -ENOMEM;
+
+	if (memcg) {
+		s->memcg_params->memcg = memcg;
+		s->memcg_params->root_cache = root_cache;
+	} else
+		s->memcg_params->is_root_cache = true;
+
+	return 0;
+}
+
+static void memcg_free_cache_params(struct kmem_cache *s)
+{
+	kfree(s->memcg_params);
+}
+
 int memcg_update_all_caches(int num_memcgs)
 {
 	struct kmem_cache *s;
@@ -141,7 +173,17 @@ out:
 	mutex_unlock(&slab_mutex);
 	return ret;
 }
-#endif
+#else
+static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg,
+		struct kmem_cache *s, struct kmem_cache *root_cache)
+{
+	return 0;
+}
+
+static inline void memcg_free_cache_params(struct kmem_cache *s)
+{
+}
+#endif /* CONFIG_MEMCG_KMEM */
 
 /*
  * Find a mergeable slab cache
-- 
cgit v1.2.3


From 6f817f4cda68b09621312ec5ba84217bc5e37b3d Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Thu, 9 Oct 2014 15:28:47 -0700
Subject: memcg: move memcg_update_cache_size() to slab_common.c

`While growing per memcg caches arrays, we jump between memcontrol.c and
slab_common.c in a weird way:

  memcg_alloc_cache_id - memcontrol.c
    memcg_update_all_caches - slab_common.c
      memcg_update_cache_size - memcontrol.c

There's absolutely no reason why memcg_update_cache_size can't live on the
slab's side though.  So let's move it there and settle it comfortably amid
per-memcg cache allocation functions.

Besides, this patch cleans this function up a bit, removing all the
useless comments from it, and renames it to memcg_update_cache_params to
conform to memcg_alloc/free_cache_params, which we already have in
slab_common.c.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Christoph Lameter <cl@linux.com>
Cc: Glauber Costa <glommer@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: David Rientjes <rientjes@google.com>
Cc: Pekka Enberg <penberg@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  1 -
 mm/memcontrol.c            | 49 ----------------------------------------------
 mm/slab_common.c           | 30 ++++++++++++++++++++++++++--
 3 files changed, 28 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 4d17242eeff7..19df5d857411 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -440,7 +440,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order);
 
 int memcg_cache_id(struct mem_cgroup *memcg);
 
-int memcg_update_cache_size(struct kmem_cache *s, int num_groups);
 void memcg_update_array_size(int num_groups);
 
 struct kmem_cache *
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ef4fbc5e4ca3..fff511e25bb2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2958,55 +2958,6 @@ void memcg_update_array_size(int num)
 	memcg_limited_groups_array_size = num;
 }
 
-int memcg_update_cache_size(struct kmem_cache *s, int num_groups)
-{
-	struct memcg_cache_params *cur_params = s->memcg_params;
-	struct memcg_cache_params *new_params;
-	size_t size;
-	int i;
-
-	VM_BUG_ON(!is_root_cache(s));
-
-	size = num_groups * sizeof(void *);
-	size += offsetof(struct memcg_cache_params, memcg_caches);
-
-	new_params = kzalloc(size, GFP_KERNEL);
-	if (!new_params)
-		return -ENOMEM;
-
-	new_params->is_root_cache = true;
-
-	/*
-	 * There is the chance it will be bigger than
-	 * memcg_limited_groups_array_size, if we failed an allocation
-	 * in a cache, in which case all caches updated before it, will
-	 * have a bigger array.
-	 *
-	 * But if that is the case, the data after
-	 * memcg_limited_groups_array_size is certainly unused
-	 */
-	for (i = 0; i < memcg_limited_groups_array_size; i++) {
-		if (!cur_params->memcg_caches[i])
-			continue;
-		new_params->memcg_caches[i] =
-			cur_params->memcg_caches[i];
-	}
-
-	/*
-	 * Ideally, we would wait until all caches succeed, and only
-	 * then free the old one. But this is not worth the extra
-	 * pointer per-cache we'd have to have for this.
-	 *
-	 * It is not a big deal if some caches are left with a size
-	 * bigger than the others. And all updates will reset this
-	 * anyway.
-	 */
-	rcu_assign_pointer(s->memcg_params, new_params);
-	if (cur_params)
-		kfree_rcu(cur_params, rcu_head);
-	return 0;
-}
-
 static void memcg_register_cache(struct mem_cgroup *memcg,
 				 struct kmem_cache *root_cache)
 {
diff --git a/mm/slab_common.c b/mm/slab_common.c
index c2a8661f8b81..3a6e0cfdf03a 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -148,6 +148,33 @@ static void memcg_free_cache_params(struct kmem_cache *s)
 	kfree(s->memcg_params);
 }
 
+static int memcg_update_cache_params(struct kmem_cache *s, int num_memcgs)
+{
+	int size;
+	struct memcg_cache_params *new_params, *cur_params;
+
+	BUG_ON(!is_root_cache(s));
+
+	size = offsetof(struct memcg_cache_params, memcg_caches);
+	size += num_memcgs * sizeof(void *);
+
+	new_params = kzalloc(size, GFP_KERNEL);
+	if (!new_params)
+		return -ENOMEM;
+
+	cur_params = s->memcg_params;
+	memcpy(new_params->memcg_caches, cur_params->memcg_caches,
+	       memcg_limited_groups_array_size * sizeof(void *));
+
+	new_params->is_root_cache = true;
+
+	rcu_assign_pointer(s->memcg_params, new_params);
+	if (cur_params)
+		kfree_rcu(cur_params, rcu_head);
+
+	return 0;
+}
+
 int memcg_update_all_caches(int num_memcgs)
 {
 	struct kmem_cache *s;
@@ -158,9 +185,8 @@ int memcg_update_all_caches(int num_memcgs)
 		if (!is_root_cache(s))
 			continue;
 
-		ret = memcg_update_cache_size(s, num_memcgs);
+		ret = memcg_update_cache_params(s, num_memcgs);
 		/*
-		 * See comment in memcontrol.c, memcg_update_cache_size:
 		 * Instead of freeing the memory, we'll just leave the caches
 		 * up to this point in an updated state.
 		 */
-- 
cgit v1.2.3


From b70a2a21dc9d4ad455931b53131a0cb4fc01fafe Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 9 Oct 2014 15:28:56 -0700
Subject: mm: memcontrol: fix transparent huge page allocations under pressure

In a memcg with even just moderate cache pressure, success rates for
transparent huge page allocations drop to zero, wasting a lot of effort
that the allocator puts into assembling these pages.

The reason for this is that the memcg reclaim code was never designed for
higher-order charges.  It reclaims in small batches until there is room
for at least one page.  Huge page charges only succeed when these batches
add up over a series of huge faults, which is unlikely under any
significant load involving order-0 allocations in the group.

Remove that loop on the memcg side in favor of passing the actual reclaim
goal to direct reclaim, which is already set up and optimized to meet
higher-order goals efficiently.

This brings memcg's THP policy in line with the system policy: if the
allocator painstakingly assembles a hugepage, memcg will at least make an
honest effort to charge it.  As a result, transparent hugepage allocation
rates amid cache activity are drastically improved:

                                      vanilla                 patched
pgalloc                 4717530.80 (  +0.00%)   4451376.40 (  -5.64%)
pgfault                  491370.60 (  +0.00%)    225477.40 ( -54.11%)
pgmajfault                    2.00 (  +0.00%)         1.80 (  -6.67%)
thp_fault_alloc               0.00 (  +0.00%)       531.60 (+100.00%)
thp_fault_fallback          749.00 (  +0.00%)       217.40 ( -70.88%)

[ Note: this may in turn increase memory consumption from internal
  fragmentation, which is an inherent risk of transparent hugepages.
  Some setups may have to adjust the memcg limits accordingly to
  accomodate this - or, if the machine is already packed to capacity,
  disable the transparent huge page feature. ]

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Dave Hansen <dave@sr71.net>
Cc: Greg Thelen <gthelen@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/swap.h |  6 +++--
 mm/memcontrol.c      | 69 +++++++++++++---------------------------------------
 mm/vmscan.c          |  7 +++---
 3 files changed, 25 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/swap.h b/include/linux/swap.h
index ea4f926e6b9b..37a585beef5c 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -327,8 +327,10 @@ extern void lru_cache_add_active_or_unevictable(struct page *page,
 extern unsigned long try_to_free_pages(struct zonelist *zonelist, int order,
 					gfp_t gfp_mask, nodemask_t *mask);
 extern int __isolate_lru_page(struct page *page, isolate_mode_t mode);
-extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *mem,
-						  gfp_t gfp_mask, bool noswap);
+extern unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+						  unsigned long nr_pages,
+						  gfp_t gfp_mask,
+						  bool may_swap);
 extern unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *mem,
 						gfp_t gfp_mask, bool noswap,
 						struct zone *zone,
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 9cda99dfac4f..c86cc442ada4 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -480,14 +480,6 @@ enum res_type {
 /* Used for OOM nofiier */
 #define OOM_CONTROL		(0)
 
-/*
- * Reclaim flags for mem_cgroup_hierarchical_reclaim
- */
-#define MEM_CGROUP_RECLAIM_NOSWAP_BIT	0x0
-#define MEM_CGROUP_RECLAIM_NOSWAP	(1 << MEM_CGROUP_RECLAIM_NOSWAP_BIT)
-#define MEM_CGROUP_RECLAIM_SHRINK_BIT	0x1
-#define MEM_CGROUP_RECLAIM_SHRINK	(1 << MEM_CGROUP_RECLAIM_SHRINK_BIT)
-
 /*
  * The memcg_create_mutex will be held whenever a new cgroup is created.
  * As a consequence, any change that needs to protect against new child cgroups
@@ -1805,40 +1797,6 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 			 NULL, "Memory cgroup out of memory");
 }
 
-static unsigned long mem_cgroup_reclaim(struct mem_cgroup *memcg,
-					gfp_t gfp_mask,
-					unsigned long flags)
-{
-	unsigned long total = 0;
-	bool noswap = false;
-	int loop;
-
-	if (flags & MEM_CGROUP_RECLAIM_NOSWAP)
-		noswap = true;
-
-	for (loop = 0; loop < MEM_CGROUP_MAX_RECLAIM_LOOPS; loop++) {
-		if (loop)
-			drain_all_stock_async(memcg);
-		total += try_to_free_mem_cgroup_pages(memcg, gfp_mask, noswap);
-		/*
-		 * Allow limit shrinkers, which are triggered directly
-		 * by userspace, to catch signals and stop reclaim
-		 * after minimal progress, regardless of the margin.
-		 */
-		if (total && (flags & MEM_CGROUP_RECLAIM_SHRINK))
-			break;
-		if (mem_cgroup_margin(memcg))
-			break;
-		/*
-		 * If nothing was reclaimed after two attempts, there
-		 * may be no reclaimable pages in this hierarchy.
-		 */
-		if (loop && !total)
-			break;
-	}
-	return total;
-}
-
 /**
  * test_mem_cgroup_node_reclaimable
  * @memcg: the target memcg
@@ -2541,8 +2499,9 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	struct mem_cgroup *mem_over_limit;
 	struct res_counter *fail_res;
 	unsigned long nr_reclaimed;
-	unsigned long flags = 0;
 	unsigned long long size;
+	bool may_swap = true;
+	bool drained = false;
 	int ret = 0;
 
 	if (mem_cgroup_is_root(memcg))
@@ -2561,7 +2520,7 @@ retry:
 		mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
 	} else {
 		mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
-		flags |= MEM_CGROUP_RECLAIM_NOSWAP;
+		may_swap = false;
 	}
 
 	if (batch > nr_pages) {
@@ -2586,11 +2545,18 @@ retry:
 	if (!(gfp_mask & __GFP_WAIT))
 		goto nomem;
 
-	nr_reclaimed = mem_cgroup_reclaim(mem_over_limit, gfp_mask, flags);
+	nr_reclaimed = try_to_free_mem_cgroup_pages(mem_over_limit, nr_pages,
+						    gfp_mask, may_swap);
 
 	if (mem_cgroup_margin(mem_over_limit) >= nr_pages)
 		goto retry;
 
+	if (!drained) {
+		drain_all_stock_async(mem_over_limit);
+		drained = true;
+		goto retry;
+	}
+
 	if (gfp_mask & __GFP_NORETRY)
 		goto nomem;
 	/*
@@ -3666,8 +3632,8 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_reclaim(memcg, GFP_KERNEL,
-				   MEM_CGROUP_RECLAIM_SHRINK);
+		try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true);
+
 		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
@@ -3717,9 +3683,8 @@ static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
 		if (!ret)
 			break;
 
-		mem_cgroup_reclaim(memcg, GFP_KERNEL,
-				   MEM_CGROUP_RECLAIM_NOSWAP |
-				   MEM_CGROUP_RECLAIM_SHRINK);
+		try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, false);
+
 		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
@@ -3968,8 +3933,8 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 		if (signal_pending(current))
 			return -EINTR;
 
-		progress = try_to_free_mem_cgroup_pages(memcg, GFP_KERNEL,
-						false);
+		progress = try_to_free_mem_cgroup_pages(memcg, 1,
+							GFP_KERNEL, true);
 		if (!progress) {
 			nr_retries--;
 			/* maybe some writeback is necessary */
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 06123f20a326..dcb47074ae03 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2759,21 +2759,22 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
 }
 
 unsigned long try_to_free_mem_cgroup_pages(struct mem_cgroup *memcg,
+					   unsigned long nr_pages,
 					   gfp_t gfp_mask,
-					   bool noswap)
+					   bool may_swap)
 {
 	struct zonelist *zonelist;
 	unsigned long nr_reclaimed;
 	int nid;
 	struct scan_control sc = {
-		.nr_to_reclaim = SWAP_CLUSTER_MAX,
+		.nr_to_reclaim = max(nr_pages, SWAP_CLUSTER_MAX),
 		.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 				(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK),
 		.target_mem_cgroup = memcg,
 		.priority = DEF_PRIORITY,
 		.may_writepage = !laptop_mode,
 		.may_unmap = 1,
-		.may_swap = !noswap,
+		.may_swap = may_swap,
 	};
 
 	/*
-- 
cgit v1.2.3


From d6d86c0a7f8ddc5b38cf089222cb1d9540762dc2 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:27 -0700
Subject: mm/balloon_compaction: redesign ballooned pages management

Sasha Levin reported KASAN splash inside isolate_migratepages_range().
Problem is in the function __is_movable_balloon_page() which tests
AS_BALLOON_MAP in page->mapping->flags.  This function has no protection
against anonymous pages.  As result it tried to check address space flags
inside struct anon_vma.

Further investigation shows more problems in current implementation:

* Special branch in __unmap_and_move() never works:
  balloon_page_movable() checks page flags and page_count.  In
  __unmap_and_move() page is locked, reference counter is elevated, thus
  balloon_page_movable() always fails.  As a result execution goes to the
  normal migration path.  virtballoon_migratepage() returns
  MIGRATEPAGE_BALLOON_SUCCESS instead of MIGRATEPAGE_SUCCESS,
  move_to_new_page() thinks this is an error code and assigns
  newpage->mapping to NULL.  Newly migrated page lose connectivity with
  balloon an all ability for further migration.

* lru_lock erroneously required in isolate_migratepages_range() for
  isolation ballooned page.  This function releases lru_lock periodically,
  this makes migration mostly impossible for some pages.

* balloon_page_dequeue have a tight race with balloon_page_isolate:
  balloon_page_isolate could be executed in parallel with dequeue between
  picking page from list and locking page_lock.  Race is rare because they
  use trylock_page() for locking.

This patch fixes all of them.

Instead of fake mapping with special flag this patch uses special state of
page->_mapcount: PAGE_BALLOON_MAPCOUNT_VALUE = -256.  Buddy allocator uses
PAGE_BUDDY_MAPCOUNT_VALUE = -128 for similar purpose.  Storing mark
directly in struct page makes everything safer and easier.

PagePrivate is used to mark pages present in page list (i.e.  not
isolated, like PageLRU for normal pages).  It replaces special rules for
reference counter and makes balloon migration similar to migration of
normal pages.  This flag is protected by page_lock together with link to
the balloon device.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Link: http://lkml.kernel.org/p/53E6CEAA.9020105@oracle.com
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Cc: <stable@vger.kernel.org>	[3.8+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/virtio_balloon.c    | 15 +++---
 include/linux/balloon_compaction.h | 97 ++++++++++----------------------------
 include/linux/migrate.h            | 11 +----
 include/linux/mm.h                 | 19 ++++++++
 mm/balloon_compaction.c            | 26 +++++-----
 mm/compaction.c                    |  2 +-
 mm/migrate.c                       | 16 ++-----
 7 files changed, 68 insertions(+), 118 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 25ebe8eecdb7..c3eb93fc9261 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -163,8 +163,8 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
 	/* Find pfns pointing at start of each page, get pages and free them. */
 	for (i = 0; i < num; i += VIRTIO_BALLOON_PAGES_PER_PAGE) {
 		struct page *page = balloon_pfn_to_page(pfns[i]);
-		balloon_page_free(page);
 		adjust_managed_page_count(page, 1);
+		put_page(page); /* balloon reference */
 	}
 }
 
@@ -395,6 +395,8 @@ static int virtballoon_migratepage(struct address_space *mapping,
 	if (!mutex_trylock(&vb->balloon_lock))
 		return -EAGAIN;
 
+	get_page(newpage); /* balloon reference */
+
 	/* balloon's page migration 1st step  -- inflate "newpage" */
 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
 	balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
@@ -404,12 +406,7 @@ static int virtballoon_migratepage(struct address_space *mapping,
 	set_page_pfns(vb->pfns, newpage);
 	tell_host(vb, vb->inflate_vq);
 
-	/*
-	 * balloon's page migration 2nd step -- deflate "page"
-	 *
-	 * It's safe to delete page->lru here because this page is at
-	 * an isolated migration list, and this step is expected to happen here
-	 */
+	/* balloon's page migration 2nd step -- deflate "page" */
 	balloon_page_delete(page);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
 	set_page_pfns(vb->pfns, page);
@@ -417,7 +414,9 @@ static int virtballoon_migratepage(struct address_space *mapping,
 
 	mutex_unlock(&vb->balloon_lock);
 
-	return MIGRATEPAGE_BALLOON_SUCCESS;
+	put_page(page); /* balloon reference */
+
+	return MIGRATEPAGE_SUCCESS;
 }
 
 /* define the balloon_mapping->a_ops callback to allow balloon page migration */
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 089743ade734..38aa07d5b81c 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -27,10 +27,13 @@
  *      counter raised only while it is under our special handling;
  *
  * iii. after the lockless scan step have selected a potential balloon page for
- *      isolation, re-test the page->mapping flags and the page ref counter
+ *      isolation, re-test the PageBalloon mark and the PagePrivate flag
  *      under the proper page lock, to ensure isolating a valid balloon page
  *      (not yet isolated, nor under release procedure)
  *
+ *  iv. isolation or dequeueing procedure must clear PagePrivate flag under
+ *      page lock together with removing page from balloon device page list.
+ *
  * The functions provided by this interface are placed to help on coping with
  * the aforementioned balloon page corner case, as well as to ensure the simple
  * set of exposed rules are satisfied while we are dealing with balloon pages
@@ -71,28 +74,6 @@ static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
 	kfree(b_dev_info);
 }
 
-/*
- * balloon_page_free - release a balloon page back to the page free lists
- * @page: ballooned page to be set free
- *
- * This function must be used to properly set free an isolated/dequeued balloon
- * page at the end of a sucessful page migration, or at the balloon driver's
- * page release procedure.
- */
-static inline void balloon_page_free(struct page *page)
-{
-	/*
-	 * Balloon pages always get an extra refcount before being isolated
-	 * and before being dequeued to help on sorting out fortuite colisions
-	 * between a thread attempting to isolate and another thread attempting
-	 * to release the very same balloon page.
-	 *
-	 * Before we handle the page back to Buddy, lets drop its extra refcnt.
-	 */
-	put_page(page);
-	__free_page(page);
-}
-
 #ifdef CONFIG_BALLOON_COMPACTION
 extern bool balloon_page_isolate(struct page *page);
 extern void balloon_page_putback(struct page *page);
@@ -108,74 +89,33 @@ static inline void balloon_mapping_free(struct address_space *balloon_mapping)
 }
 
 /*
- * page_flags_cleared - helper to perform balloon @page ->flags tests.
- *
- * As balloon pages are obtained from buddy and we do not play with page->flags
- * at driver level (exception made when we get the page lock for compaction),
- * we can safely identify a ballooned page by checking if the
- * PAGE_FLAGS_CHECK_AT_PREP page->flags are all cleared.  This approach also
- * helps us skip ballooned pages that are locked for compaction or release, thus
- * mitigating their racy check at balloon_page_movable()
- */
-static inline bool page_flags_cleared(struct page *page)
-{
-	return !(page->flags & PAGE_FLAGS_CHECK_AT_PREP);
-}
-
-/*
- * __is_movable_balloon_page - helper to perform @page mapping->flags tests
+ * __is_movable_balloon_page - helper to perform @page PageBalloon tests
  */
 static inline bool __is_movable_balloon_page(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	return mapping_balloon(mapping);
+	return PageBalloon(page);
 }
 
 /*
- * balloon_page_movable - test page->mapping->flags to identify balloon pages
- *			  that can be moved by compaction/migration.
- *
- * This function is used at core compaction's page isolation scheme, therefore
- * most pages exposed to it are not enlisted as balloon pages and so, to avoid
- * undesired side effects like racing against __free_pages(), we cannot afford
- * holding the page locked while testing page->mapping->flags here.
+ * balloon_page_movable - test PageBalloon to identify balloon pages
+ *			  and PagePrivate to check that the page is not
+ *			  isolated and can be moved by compaction/migration.
  *
  * As we might return false positives in the case of a balloon page being just
- * released under us, the page->mapping->flags need to be re-tested later,
- * under the proper page lock, at the functions that will be coping with the
- * balloon page case.
+ * released under us, this need to be re-tested later, under the page lock.
  */
 static inline bool balloon_page_movable(struct page *page)
 {
-	/*
-	 * Before dereferencing and testing mapping->flags, let's make sure
-	 * this is not a page that uses ->mapping in a different way
-	 */
-	if (page_flags_cleared(page) && !page_mapped(page) &&
-	    page_count(page) == 1)
-		return __is_movable_balloon_page(page);
-
-	return false;
+	return PageBalloon(page) && PagePrivate(page);
 }
 
 /*
  * isolated_balloon_page - identify an isolated balloon page on private
  *			   compaction/migration page lists.
- *
- * After a compaction thread isolates a balloon page for migration, it raises
- * the page refcount to prevent concurrent compaction threads from re-isolating
- * the same page. For that reason putback_movable_pages(), or other routines
- * that need to identify isolated balloon pages on private pagelists, cannot
- * rely on balloon_page_movable() to accomplish the task.
  */
 static inline bool isolated_balloon_page(struct page *page)
 {
-	/* Already isolated balloon pages, by default, have a raised refcount */
-	if (page_flags_cleared(page) && !page_mapped(page) &&
-	    page_count(page) >= 2)
-		return __is_movable_balloon_page(page);
-
-	return false;
+	return PageBalloon(page);
 }
 
 /*
@@ -192,6 +132,8 @@ static inline void balloon_page_insert(struct page *page,
 				       struct address_space *mapping,
 				       struct list_head *head)
 {
+	__SetPageBalloon(page);
+	SetPagePrivate(page);
 	page->mapping = mapping;
 	list_add(&page->lru, head);
 }
@@ -206,8 +148,12 @@ static inline void balloon_page_insert(struct page *page,
  */
 static inline void balloon_page_delete(struct page *page)
 {
+	__ClearPageBalloon(page);
 	page->mapping = NULL;
-	list_del(&page->lru);
+	if (PagePrivate(page)) {
+		ClearPagePrivate(page);
+		list_del(&page->lru);
+	}
 }
 
 /*
@@ -258,6 +204,11 @@ static inline void balloon_page_delete(struct page *page)
 	list_del(&page->lru);
 }
 
+static inline bool __is_movable_balloon_page(struct page *page)
+{
+	return false;
+}
+
 static inline bool balloon_page_movable(struct page *page)
 {
 	return false;
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index b66fd10f4b93..01aad3ed89ec 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -13,18 +13,9 @@ typedef void free_page_t(struct page *page, unsigned long private);
  * Return values from addresss_space_operations.migratepage():
  * - negative errno on page migration failure;
  * - zero on page migration success;
- *
- * The balloon page migration introduces this special case where a 'distinct'
- * return code is used to flag a successful page migration to unmap_and_move().
- * This approach is necessary because page migration can race against balloon
- * deflation procedure, and for such case we could introduce a nasty page leak
- * if a successfully migrated balloon page gets released concurrently with
- * migration's unmap_and_move() wrap-up steps.
  */
 #define MIGRATEPAGE_SUCCESS		0
-#define MIGRATEPAGE_BALLOON_SUCCESS	1 /* special ret code for balloon page
-					   * sucessful migration case.
-					   */
+
 enum migrate_reason {
 	MR_COMPACTION,
 	MR_MEMORY_FAILURE,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4d814aa97785..fa0d74e06428 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -554,6 +554,25 @@ static inline void __ClearPageBuddy(struct page *page)
 	atomic_set(&page->_mapcount, -1);
 }
 
+#define PAGE_BALLOON_MAPCOUNT_VALUE (-256)
+
+static inline int PageBalloon(struct page *page)
+{
+	return atomic_read(&page->_mapcount) == PAGE_BALLOON_MAPCOUNT_VALUE;
+}
+
+static inline void __SetPageBalloon(struct page *page)
+{
+	VM_BUG_ON_PAGE(atomic_read(&page->_mapcount) != -1, page);
+	atomic_set(&page->_mapcount, PAGE_BALLOON_MAPCOUNT_VALUE);
+}
+
+static inline void __ClearPageBalloon(struct page *page)
+{
+	VM_BUG_ON_PAGE(!PageBalloon(page), page);
+	atomic_set(&page->_mapcount, -1);
+}
+
 void put_page(struct page *page);
 void put_pages_list(struct list_head *pages);
 
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 6e45a5074bf0..52abeeb3cb9d 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -93,17 +93,12 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 		 * to be released by the balloon driver.
 		 */
 		if (trylock_page(page)) {
+			if (!PagePrivate(page)) {
+				/* raced with isolation */
+				unlock_page(page);
+				continue;
+			}
 			spin_lock_irqsave(&b_dev_info->pages_lock, flags);
-			/*
-			 * Raise the page refcount here to prevent any wrong
-			 * attempt to isolate this page, in case of coliding
-			 * with balloon_page_isolate() just after we release
-			 * the page lock.
-			 *
-			 * balloon_page_free() will take care of dropping
-			 * this extra refcount later.
-			 */
-			get_page(page);
 			balloon_page_delete(page);
 			spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 			unlock_page(page);
@@ -187,7 +182,9 @@ static inline void __isolate_balloon_page(struct page *page)
 {
 	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
 	unsigned long flags;
+
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	ClearPagePrivate(page);
 	list_del(&page->lru);
 	b_dev_info->isolated_pages++;
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
@@ -197,7 +194,9 @@ static inline void __putback_balloon_page(struct page *page)
 {
 	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
 	unsigned long flags;
+
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
+	SetPagePrivate(page);
 	list_add(&page->lru, &b_dev_info->pages);
 	b_dev_info->isolated_pages--;
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
@@ -235,12 +234,11 @@ bool balloon_page_isolate(struct page *page)
 		 */
 		if (likely(trylock_page(page))) {
 			/*
-			 * A ballooned page, by default, has just one refcount.
+			 * A ballooned page, by default, has PagePrivate set.
 			 * Prevent concurrent compaction threads from isolating
-			 * an already isolated balloon page by refcount check.
+			 * an already isolated balloon page by clearing it.
 			 */
-			if (__is_movable_balloon_page(page) &&
-			    page_count(page) == 2) {
+			if (balloon_page_movable(page)) {
 				__isolate_balloon_page(page);
 				unlock_page(page);
 				return true;
diff --git a/mm/compaction.c b/mm/compaction.c
index b9972c0fd917..edba18aed173 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -640,7 +640,7 @@ isolate_migratepages_block(struct compact_control *cc, unsigned long low_pfn,
 		 */
 		if (!PageLRU(page)) {
 			if (unlikely(balloon_page_movable(page))) {
-				if (locked && balloon_page_isolate(page)) {
+				if (balloon_page_isolate(page)) {
 					/* Successfully isolated */
 					goto isolate_success;
 				}
diff --git a/mm/migrate.c b/mm/migrate.c
index 2740360cd216..01439953abf5 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -876,7 +876,7 @@ static int __unmap_and_move(struct page *page, struct page *newpage,
 		}
 	}
 
-	if (unlikely(balloon_page_movable(page))) {
+	if (unlikely(isolated_balloon_page(page))) {
 		/*
 		 * A ballooned page does not need any special attention from
 		 * physical to virtual reverse mapping procedures.
@@ -955,17 +955,6 @@ static int unmap_and_move(new_page_t get_new_page, free_page_t put_new_page,
 
 	rc = __unmap_and_move(page, newpage, force, mode);
 
-	if (unlikely(rc == MIGRATEPAGE_BALLOON_SUCCESS)) {
-		/*
-		 * A ballooned page has been migrated already.
-		 * Now, it's the time to wrap-up counters,
-		 * handle the page back to Buddy and return.
-		 */
-		dec_zone_page_state(page, NR_ISOLATED_ANON +
-				    page_is_file_cache(page));
-		balloon_page_free(page);
-		return MIGRATEPAGE_SUCCESS;
-	}
 out:
 	if (rc != -EAGAIN) {
 		/*
@@ -988,6 +977,9 @@ out:
 	if (rc != MIGRATEPAGE_SUCCESS && put_new_page) {
 		ClearPageSwapBacked(newpage);
 		put_new_page(newpage, private);
+	} else if (unlikely(__is_movable_balloon_page(newpage))) {
+		/* drop our reference, page already in the balloon */
+		put_page(newpage);
 	} else
 		putback_lru_page(newpage);
 
-- 
cgit v1.2.3


From 9d1ba8056474a208ed9efb7e58cd014795d9f818 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:29 -0700
Subject: mm/balloon_compaction: remove balloon mapping and flag AS_BALLOON_MAP

Now ballooned pages are detected using PageBalloon().  Fake mapping is no
longer required.  This patch links ballooned pages to balloon device using
field page->private instead of page->mapping.  Also this patch embeds
balloon_dev_info directly into struct virtio_balloon.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/virtio_balloon.c    | 60 ++++++------------------
 include/linux/balloon_compaction.h | 72 ++++++++---------------------
 include/linux/pagemap.h            | 18 +-------
 mm/balloon_compaction.c            | 95 +++-----------------------------------
 4 files changed, 39 insertions(+), 206 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c3eb93fc9261..2bad7f9dd2ac 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -59,7 +59,7 @@ struct virtio_balloon
 	 * Each page on this list adds VIRTIO_BALLOON_PAGES_PER_PAGE
 	 * to num_pages above.
 	 */
-	struct balloon_dev_info *vb_dev_info;
+	struct balloon_dev_info vb_dev_info;
 
 	/* Synchronize access/update to this struct virtio_balloon elements */
 	struct mutex balloon_lock;
@@ -127,7 +127,7 @@ static void set_page_pfns(u32 pfns[], struct page *page)
 
 static void fill_balloon(struct virtio_balloon *vb, size_t num)
 {
-	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
+	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 
 	/* We can only do one array worth at a time. */
 	num = min(num, ARRAY_SIZE(vb->pfns));
@@ -171,7 +171,7 @@ static void release_pages_by_pfn(const u32 pfns[], unsigned int num)
 static void leak_balloon(struct virtio_balloon *vb, size_t num)
 {
 	struct page *page;
-	struct balloon_dev_info *vb_dev_info = vb->vb_dev_info;
+	struct balloon_dev_info *vb_dev_info = &vb->vb_dev_info;
 
 	/* We can only do one array worth at a time. */
 	num = min(num, ARRAY_SIZE(vb->pfns));
@@ -353,12 +353,11 @@ static int init_vqs(struct virtio_balloon *vb)
 	return 0;
 }
 
-static const struct address_space_operations virtio_balloon_aops;
 #ifdef CONFIG_BALLOON_COMPACTION
 /*
  * virtballoon_migratepage - perform the balloon page migration on behalf of
  *			     a compation thread.     (called under page lock)
- * @mapping: the page->mapping which will be assigned to the new migrated page.
+ * @vb_dev_info: the balloon device
  * @newpage: page that will replace the isolated page after migration finishes.
  * @page   : the isolated (old) page that is about to be migrated to newpage.
  * @mode   : compaction mode -- not used for balloon page migration.
@@ -373,17 +372,13 @@ static const struct address_space_operations virtio_balloon_aops;
  * This function preforms the balloon page migration task.
  * Called through balloon_mapping->a_ops->migratepage
  */
-static int virtballoon_migratepage(struct address_space *mapping,
+static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
 		struct page *newpage, struct page *page, enum migrate_mode mode)
 {
-	struct balloon_dev_info *vb_dev_info = balloon_page_device(page);
-	struct virtio_balloon *vb;
+	struct virtio_balloon *vb = container_of(vb_dev_info,
+			struct virtio_balloon, vb_dev_info);
 	unsigned long flags;
 
-	BUG_ON(!vb_dev_info);
-
-	vb = vb_dev_info->balloon_device;
-
 	/*
 	 * In order to avoid lock contention while migrating pages concurrently
 	 * to leak_balloon() or fill_balloon() we just give up the balloon_lock
@@ -399,7 +394,7 @@ static int virtballoon_migratepage(struct address_space *mapping,
 
 	/* balloon's page migration 1st step  -- inflate "newpage" */
 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
-	balloon_page_insert(newpage, mapping, &vb_dev_info->pages);
+	balloon_page_insert(vb_dev_info, newpage);
 	vb_dev_info->isolated_pages--;
 	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
@@ -418,18 +413,11 @@ static int virtballoon_migratepage(struct address_space *mapping,
 
 	return MIGRATEPAGE_SUCCESS;
 }
-
-/* define the balloon_mapping->a_ops callback to allow balloon page migration */
-static const struct address_space_operations virtio_balloon_aops = {
-			.migratepage = virtballoon_migratepage,
-};
 #endif /* CONFIG_BALLOON_COMPACTION */
 
 static int virtballoon_probe(struct virtio_device *vdev)
 {
 	struct virtio_balloon *vb;
-	struct address_space *vb_mapping;
-	struct balloon_dev_info *vb_devinfo;
 	int err;
 
 	vdev->priv = vb = kmalloc(sizeof(*vb), GFP_KERNEL);
@@ -445,30 +433,14 @@ static int virtballoon_probe(struct virtio_device *vdev)
 	vb->vdev = vdev;
 	vb->need_stats_update = 0;
 
-	vb_devinfo = balloon_devinfo_alloc(vb);
-	if (IS_ERR(vb_devinfo)) {
-		err = PTR_ERR(vb_devinfo);
-		goto out_free_vb;
-	}
-
-	vb_mapping = balloon_mapping_alloc(vb_devinfo,
-					   (balloon_compaction_check()) ?
-					   &virtio_balloon_aops : NULL);
-	if (IS_ERR(vb_mapping)) {
-		/*
-		 * IS_ERR(vb_mapping) && PTR_ERR(vb_mapping) == -EOPNOTSUPP
-		 * This means !CONFIG_BALLOON_COMPACTION, otherwise we get off.
-		 */
-		err = PTR_ERR(vb_mapping);
-		if (err != -EOPNOTSUPP)
-			goto out_free_vb_devinfo;
-	}
-
-	vb->vb_dev_info = vb_devinfo;
+	balloon_devinfo_init(&vb->vb_dev_info);
+#ifdef CONFIG_BALLOON_COMPACTION
+	vb->vb_dev_info.migratepage = virtballoon_migratepage;
+#endif
 
 	err = init_vqs(vb);
 	if (err)
-		goto out_free_vb_mapping;
+		goto out_free_vb;
 
 	vb->thread = kthread_run(balloon, vb, "vballoon");
 	if (IS_ERR(vb->thread)) {
@@ -480,10 +452,6 @@ static int virtballoon_probe(struct virtio_device *vdev)
 
 out_del_vqs:
 	vdev->config->del_vqs(vdev);
-out_free_vb_mapping:
-	balloon_mapping_free(vb_mapping);
-out_free_vb_devinfo:
-	balloon_devinfo_free(vb_devinfo);
 out_free_vb:
 	kfree(vb);
 out:
@@ -509,8 +477,6 @@ static void virtballoon_remove(struct virtio_device *vdev)
 
 	kthread_stop(vb->thread);
 	remove_common(vb);
-	balloon_mapping_free(vb->vb_dev_info->mapping);
-	balloon_devinfo_free(vb->vb_dev_info);
 	kfree(vb);
 }
 
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index 38aa07d5b81c..bc3d2985cc9a 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -57,21 +57,22 @@
  * balloon driver as a page book-keeper for its registered balloon devices.
  */
 struct balloon_dev_info {
-	void *balloon_device;		/* balloon device descriptor */
-	struct address_space *mapping;	/* balloon special page->mapping */
 	unsigned long isolated_pages;	/* # of isolated pages for migration */
 	spinlock_t pages_lock;		/* Protection to pages list */
 	struct list_head pages;		/* Pages enqueued & handled to Host */
+	int (*migratepage)(struct balloon_dev_info *, struct page *newpage,
+			struct page *page, enum migrate_mode mode);
 };
 
 extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info);
 extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info);
-extern struct balloon_dev_info *balloon_devinfo_alloc(
-						void *balloon_dev_descriptor);
 
-static inline void balloon_devinfo_free(struct balloon_dev_info *b_dev_info)
+static inline void balloon_devinfo_init(struct balloon_dev_info *balloon)
 {
-	kfree(b_dev_info);
+	balloon->isolated_pages = 0;
+	spin_lock_init(&balloon->pages_lock);
+	INIT_LIST_HEAD(&balloon->pages);
+	balloon->migratepage = NULL;
 }
 
 #ifdef CONFIG_BALLOON_COMPACTION
@@ -79,14 +80,6 @@ extern bool balloon_page_isolate(struct page *page);
 extern void balloon_page_putback(struct page *page);
 extern int balloon_page_migrate(struct page *newpage,
 				struct page *page, enum migrate_mode mode);
-extern struct address_space
-*balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
-			const struct address_space_operations *a_ops);
-
-static inline void balloon_mapping_free(struct address_space *balloon_mapping)
-{
-	kfree(balloon_mapping);
-}
 
 /*
  * __is_movable_balloon_page - helper to perform @page PageBalloon tests
@@ -120,27 +113,25 @@ static inline bool isolated_balloon_page(struct page *page)
 
 /*
  * balloon_page_insert - insert a page into the balloon's page list and make
- *		         the page->mapping assignment accordingly.
+ *			 the page->private assignment accordingly.
+ * @balloon : pointer to balloon device
  * @page    : page to be assigned as a 'balloon page'
- * @mapping : allocated special 'balloon_mapping'
- * @head    : balloon's device page list head
  *
  * Caller must ensure the page is locked and the spin_lock protecting balloon
  * pages list is held before inserting a page into the balloon device.
  */
-static inline void balloon_page_insert(struct page *page,
-				       struct address_space *mapping,
-				       struct list_head *head)
+static inline void balloon_page_insert(struct balloon_dev_info *balloon,
+				       struct page *page)
 {
 	__SetPageBalloon(page);
 	SetPagePrivate(page);
-	page->mapping = mapping;
-	list_add(&page->lru, head);
+	set_page_private(page, (unsigned long)balloon);
+	list_add(&page->lru, &balloon->pages);
 }
 
 /*
  * balloon_page_delete - delete a page from balloon's page list and clear
- *			 the page->mapping assignement accordingly.
+ *			 the page->private assignement accordingly.
  * @page    : page to be released from balloon's page list
  *
  * Caller must ensure the page is locked and the spin_lock protecting balloon
@@ -149,7 +140,7 @@ static inline void balloon_page_insert(struct page *page,
 static inline void balloon_page_delete(struct page *page)
 {
 	__ClearPageBalloon(page);
-	page->mapping = NULL;
+	set_page_private(page, 0);
 	if (PagePrivate(page)) {
 		ClearPagePrivate(page);
 		list_del(&page->lru);
@@ -162,11 +153,7 @@ static inline void balloon_page_delete(struct page *page)
  */
 static inline struct balloon_dev_info *balloon_page_device(struct page *page)
 {
-	struct address_space *mapping = page->mapping;
-	if (likely(mapping))
-		return mapping->private_data;
-
-	return NULL;
+	return (struct balloon_dev_info *)page_private(page);
 }
 
 static inline gfp_t balloon_mapping_gfp_mask(void)
@@ -174,29 +161,12 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 	return GFP_HIGHUSER_MOVABLE;
 }
 
-static inline bool balloon_compaction_check(void)
-{
-	return true;
-}
-
 #else /* !CONFIG_BALLOON_COMPACTION */
 
-static inline void *balloon_mapping_alloc(void *balloon_device,
-				const struct address_space_operations *a_ops)
+static inline void balloon_page_insert(struct balloon_dev_info *balloon,
+				       struct page *page)
 {
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static inline void balloon_mapping_free(struct address_space *balloon_mapping)
-{
-	return;
-}
-
-static inline void balloon_page_insert(struct page *page,
-				       struct address_space *mapping,
-				       struct list_head *head)
-{
-	list_add(&page->lru, head);
+	list_add(&page->lru, &balloon->pages);
 }
 
 static inline void balloon_page_delete(struct page *page)
@@ -240,9 +210,5 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 	return GFP_HIGHUSER;
 }
 
-static inline bool balloon_compaction_check(void)
-{
-	return false;
-}
 #endif /* CONFIG_BALLOON_COMPACTION */
 #endif /* _LINUX_BALLOON_COMPACTION_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 19191d39c4f3..7ea069cd3257 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -24,8 +24,7 @@ enum mapping_flags {
 	AS_ENOSPC	= __GFP_BITS_SHIFT + 1,	/* ENOSPC on async write */
 	AS_MM_ALL_LOCKS	= __GFP_BITS_SHIFT + 2,	/* under mm_take_all_locks() */
 	AS_UNEVICTABLE	= __GFP_BITS_SHIFT + 3,	/* e.g., ramdisk, SHM_LOCK */
-	AS_BALLOON_MAP  = __GFP_BITS_SHIFT + 4, /* balloon page special map */
-	AS_EXITING	= __GFP_BITS_SHIFT + 5, /* final truncate in progress */
+	AS_EXITING	= __GFP_BITS_SHIFT + 4, /* final truncate in progress */
 };
 
 static inline void mapping_set_error(struct address_space *mapping, int error)
@@ -55,21 +54,6 @@ static inline int mapping_unevictable(struct address_space *mapping)
 	return !!mapping;
 }
 
-static inline void mapping_set_balloon(struct address_space *mapping)
-{
-	set_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
-static inline void mapping_clear_balloon(struct address_space *mapping)
-{
-	clear_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
-static inline int mapping_balloon(struct address_space *mapping)
-{
-	return mapping && test_bit(AS_BALLOON_MAP, &mapping->flags);
-}
-
 static inline void mapping_set_exiting(struct address_space *mapping)
 {
 	set_bit(AS_EXITING, &mapping->flags);
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 52abeeb3cb9d..3afdabdbc0a4 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -10,32 +10,6 @@
 #include <linux/export.h>
 #include <linux/balloon_compaction.h>
 
-/*
- * balloon_devinfo_alloc - allocates a balloon device information descriptor.
- * @balloon_dev_descriptor: pointer to reference the balloon device which
- *                          this struct balloon_dev_info will be servicing.
- *
- * Driver must call it to properly allocate and initialize an instance of
- * struct balloon_dev_info which will be used to reference a balloon device
- * as well as to keep track of the balloon device page list.
- */
-struct balloon_dev_info *balloon_devinfo_alloc(void *balloon_dev_descriptor)
-{
-	struct balloon_dev_info *b_dev_info;
-	b_dev_info = kmalloc(sizeof(*b_dev_info), GFP_KERNEL);
-	if (!b_dev_info)
-		return ERR_PTR(-ENOMEM);
-
-	b_dev_info->balloon_device = balloon_dev_descriptor;
-	b_dev_info->mapping = NULL;
-	b_dev_info->isolated_pages = 0;
-	spin_lock_init(&b_dev_info->pages_lock);
-	INIT_LIST_HEAD(&b_dev_info->pages);
-
-	return b_dev_info;
-}
-EXPORT_SYMBOL_GPL(balloon_devinfo_alloc);
-
 /*
  * balloon_page_enqueue - allocates a new page and inserts it into the balloon
  *			  page list.
@@ -61,7 +35,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 	 */
 	BUG_ON(!trylock_page(page));
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
-	balloon_page_insert(page, b_dev_info->mapping, &b_dev_info->pages);
+	balloon_page_insert(b_dev_info, page);
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 	unlock_page(page);
 	return page;
@@ -127,60 +101,10 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 EXPORT_SYMBOL_GPL(balloon_page_dequeue);
 
 #ifdef CONFIG_BALLOON_COMPACTION
-/*
- * balloon_mapping_alloc - allocates a special ->mapping for ballooned pages.
- * @b_dev_info: holds the balloon device information descriptor.
- * @a_ops: balloon_mapping address_space_operations descriptor.
- *
- * Driver must call it to properly allocate and initialize an instance of
- * struct address_space which will be used as the special page->mapping for
- * balloon device enlisted page instances.
- */
-struct address_space *balloon_mapping_alloc(struct balloon_dev_info *b_dev_info,
-				const struct address_space_operations *a_ops)
-{
-	struct address_space *mapping;
-
-	mapping = kmalloc(sizeof(*mapping), GFP_KERNEL);
-	if (!mapping)
-		return ERR_PTR(-ENOMEM);
-
-	/*
-	 * Give a clean 'zeroed' status to all elements of this special
-	 * balloon page->mapping struct address_space instance.
-	 */
-	address_space_init_once(mapping);
-
-	/*
-	 * Set mapping->flags appropriately, to allow balloon pages
-	 * ->mapping identification.
-	 */
-	mapping_set_balloon(mapping);
-	mapping_set_gfp_mask(mapping, balloon_mapping_gfp_mask());
-
-	/* balloon's page->mapping->a_ops callback descriptor */
-	mapping->a_ops = a_ops;
-
-	/*
-	 * Establish a pointer reference back to the balloon device descriptor
-	 * this particular page->mapping will be servicing.
-	 * This is used by compaction / migration procedures to identify and
-	 * access the balloon device pageset while isolating / migrating pages.
-	 *
-	 * As some balloon drivers can register multiple balloon devices
-	 * for a single guest, this also helps compaction / migration to
-	 * properly deal with multiple balloon pagesets, when required.
-	 */
-	mapping->private_data = b_dev_info;
-	b_dev_info->mapping = mapping;
-
-	return mapping;
-}
-EXPORT_SYMBOL_GPL(balloon_mapping_alloc);
 
 static inline void __isolate_balloon_page(struct page *page)
 {
-	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+	struct balloon_dev_info *b_dev_info = balloon_page_device(page);
 	unsigned long flags;
 
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
@@ -192,7 +116,7 @@ static inline void __isolate_balloon_page(struct page *page)
 
 static inline void __putback_balloon_page(struct page *page)
 {
-	struct balloon_dev_info *b_dev_info = page->mapping->private_data;
+	struct balloon_dev_info *b_dev_info = balloon_page_device(page);
 	unsigned long flags;
 
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
@@ -202,12 +126,6 @@ static inline void __putback_balloon_page(struct page *page)
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 }
 
-static inline int __migrate_balloon_page(struct address_space *mapping,
-		struct page *newpage, struct page *page, enum migrate_mode mode)
-{
-	return page->mapping->a_ops->migratepage(mapping, newpage, page, mode);
-}
-
 /* __isolate_lru_page() counterpart for a ballooned page */
 bool balloon_page_isolate(struct page *page)
 {
@@ -274,7 +192,7 @@ void balloon_page_putback(struct page *page)
 int balloon_page_migrate(struct page *newpage,
 			 struct page *page, enum migrate_mode mode)
 {
-	struct address_space *mapping;
+	struct balloon_dev_info *balloon = balloon_page_device(page);
 	int rc = -EAGAIN;
 
 	/*
@@ -290,9 +208,8 @@ int balloon_page_migrate(struct page *newpage,
 		return rc;
 	}
 
-	mapping = page->mapping;
-	if (mapping)
-		rc = __migrate_balloon_page(mapping, newpage, page, mode);
+	if (balloon && balloon->migratepage)
+		rc = balloon->migratepage(balloon, newpage, page, mode);
 
 	unlock_page(newpage);
 	return rc;
-- 
cgit v1.2.3


From 09316c09dde33aae14f34489d9e3d243ec0d5938 Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Date: Thu, 9 Oct 2014 15:29:32 -0700
Subject: mm/balloon_compaction: add vmstat counters and kpageflags bit

Always mark pages with PageBalloon even if balloon compaction is disabled
and expose this mark in /proc/kpageflags as KPF_BALLOON.

Also this patch adds three counters into /proc/vmstat: "balloon_inflate",
"balloon_deflate" and "balloon_migrate".  They accumulate balloon
activity.  Current size of balloon is (balloon_inflate - balloon_deflate)
pages.

All generic balloon code now gathered under option CONFIG_MEMORY_BALLOON.
It should be selected by ballooning driver which wants use this feature.
Currently virtio-balloon is the only user.

Signed-off-by: Konstantin Khlebnikov <k.khlebnikov@samsung.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Andrey Ryabinin <ryabinin.a.a@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/virtio/Kconfig                 |  1 +
 drivers/virtio/virtio_balloon.c        |  1 +
 fs/proc/page.c                         |  3 +++
 include/linux/balloon_compaction.h     |  2 ++
 include/linux/vm_event_item.h          |  7 +++++++
 include/uapi/linux/kernel-page-flags.h |  1 +
 mm/Kconfig                             |  7 ++++++-
 mm/Makefile                            |  3 ++-
 mm/balloon_compaction.c                |  2 ++
 mm/vmstat.c                            | 12 +++++++++++-
 tools/vm/page-types.c                  |  1 +
 11 files changed, 37 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/Kconfig b/drivers/virtio/Kconfig
index c6683f2e396c..00b228638274 100644
--- a/drivers/virtio/Kconfig
+++ b/drivers/virtio/Kconfig
@@ -25,6 +25,7 @@ config VIRTIO_PCI
 config VIRTIO_BALLOON
 	tristate "Virtio balloon driver"
 	depends on VIRTIO
+	select MEMORY_BALLOON
 	---help---
 	 This driver supports increasing and decreasing the amount
 	 of memory within a KVM guest.
diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index 2bad7f9dd2ac..f893148a107b 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -396,6 +396,7 @@ static int virtballoon_migratepage(struct balloon_dev_info *vb_dev_info,
 	spin_lock_irqsave(&vb_dev_info->pages_lock, flags);
 	balloon_page_insert(vb_dev_info, newpage);
 	vb_dev_info->isolated_pages--;
+	__count_vm_event(BALLOON_MIGRATE);
 	spin_unlock_irqrestore(&vb_dev_info->pages_lock, flags);
 	vb->num_pfns = VIRTIO_BALLOON_PAGES_PER_PAGE;
 	set_page_pfns(vb->pfns, newpage);
diff --git a/fs/proc/page.c b/fs/proc/page.c
index e647c55275d9..1e3187da1fed 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -133,6 +133,9 @@ u64 stable_page_flags(struct page *page)
 	if (PageBuddy(page))
 		u |= 1 << KPF_BUDDY;
 
+	if (PageBalloon(page))
+		u |= 1 << KPF_BALLOON;
+
 	u |= kpf_copy_bit(k, KPF_LOCKED,	PG_locked);
 
 	u |= kpf_copy_bit(k, KPF_SLAB,		PG_slab);
diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h
index bc3d2985cc9a..9b0a15d06a4f 100644
--- a/include/linux/balloon_compaction.h
+++ b/include/linux/balloon_compaction.h
@@ -166,11 +166,13 @@ static inline gfp_t balloon_mapping_gfp_mask(void)
 static inline void balloon_page_insert(struct balloon_dev_info *balloon,
 				       struct page *page)
 {
+	__SetPageBalloon(page);
 	list_add(&page->lru, &balloon->pages);
 }
 
 static inline void balloon_page_delete(struct page *page)
 {
+	__ClearPageBalloon(page);
 	list_del(&page->lru);
 }
 
diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index ced92345c963..730334cdf037 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -72,6 +72,13 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 		THP_ZERO_PAGE_ALLOC,
 		THP_ZERO_PAGE_ALLOC_FAILED,
 #endif
+#ifdef CONFIG_MEMORY_BALLOON
+		BALLOON_INFLATE,
+		BALLOON_DEFLATE,
+#ifdef CONFIG_BALLOON_COMPACTION
+		BALLOON_MIGRATE,
+#endif
+#endif
 #ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 		NR_TLB_REMOTE_FLUSH,	/* cpu tried to flush others' tlbs */
diff --git a/include/uapi/linux/kernel-page-flags.h b/include/uapi/linux/kernel-page-flags.h
index 5116a0e48172..2f96d233c980 100644
--- a/include/uapi/linux/kernel-page-flags.h
+++ b/include/uapi/linux/kernel-page-flags.h
@@ -31,6 +31,7 @@
 
 #define KPF_KSM			21
 #define KPF_THP			22
+#define KPF_BALLOON		23
 
 
 #endif /* _UAPILINUX_KERNEL_PAGE_FLAGS_H */
diff --git a/mm/Kconfig b/mm/Kconfig
index 0ceb8a567dab..1d1ae6b078fd 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -230,12 +230,17 @@ config SPLIT_PTLOCK_CPUS
 config ARCH_ENABLE_SPLIT_PMD_PTLOCK
 	boolean
 
+#
+# support for memory balloon
+config MEMORY_BALLOON
+	boolean
+
 #
 # support for memory balloon compaction
 config BALLOON_COMPACTION
 	bool "Allow for balloon memory compaction/migration"
 	def_bool y
-	depends on COMPACTION && VIRTIO_BALLOON
+	depends on COMPACTION && MEMORY_BALLOON
 	help
 	  Memory fragmentation introduced by ballooning might reduce
 	  significantly the number of 2MB contiguous memory blocks that can be
diff --git a/mm/Makefile b/mm/Makefile
index f8ed7ab417b1..1f534a7f0a71 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -16,7 +16,7 @@ obj-y			:= filemap.o mempool.o oom_kill.o \
 			   readahead.o swap.o truncate.o vmscan.o shmem.o \
 			   util.o mmzone.o vmstat.o backing-dev.o \
 			   mm_init.o mmu_context.o percpu.o slab_common.o \
-			   compaction.o balloon_compaction.o vmacache.o \
+			   compaction.o vmacache.o \
 			   interval_tree.o list_lru.o workingset.o \
 			   iov_iter.o debug.o $(mmu-y)
 
@@ -67,3 +67,4 @@ obj-$(CONFIG_ZBUD)	+= zbud.o
 obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
 obj-$(CONFIG_CMA)	+= cma.o
+obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
diff --git a/mm/balloon_compaction.c b/mm/balloon_compaction.c
index 3afdabdbc0a4..b3cbe19f71b5 100644
--- a/mm/balloon_compaction.c
+++ b/mm/balloon_compaction.c
@@ -36,6 +36,7 @@ struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info)
 	BUG_ON(!trylock_page(page));
 	spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 	balloon_page_insert(b_dev_info, page);
+	__count_vm_event(BALLOON_INFLATE);
 	spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 	unlock_page(page);
 	return page;
@@ -74,6 +75,7 @@ struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info)
 			}
 			spin_lock_irqsave(&b_dev_info->pages_lock, flags);
 			balloon_page_delete(page);
+			__count_vm_event(BALLOON_DEFLATE);
 			spin_unlock_irqrestore(&b_dev_info->pages_lock, flags);
 			unlock_page(page);
 			dequeued_page = true;
diff --git a/mm/vmstat.c b/mm/vmstat.c
index e9ab104b956f..cce7c766da7a 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -735,7 +735,7 @@ static void walk_zones_in_node(struct seq_file *m, pg_data_t *pgdat,
 					TEXT_FOR_HIGHMEM(xx) xx "_movable",
 
 const char * const vmstat_text[] = {
-	/* Zoned VM counters */
+	/* enum zone_stat_item countes */
 	"nr_free_pages",
 	"nr_alloc_batch",
 	"nr_inactive_anon",
@@ -778,10 +778,13 @@ const char * const vmstat_text[] = {
 	"workingset_nodereclaim",
 	"nr_anon_transparent_hugepages",
 	"nr_free_cma",
+
+	/* enum writeback_stat_item counters */
 	"nr_dirty_threshold",
 	"nr_dirty_background_threshold",
 
 #ifdef CONFIG_VM_EVENT_COUNTERS
+	/* enum vm_event_item counters */
 	"pgpgin",
 	"pgpgout",
 	"pswpin",
@@ -860,6 +863,13 @@ const char * const vmstat_text[] = {
 	"thp_zero_page_alloc",
 	"thp_zero_page_alloc_failed",
 #endif
+#ifdef CONFIG_MEMORY_BALLOON
+	"balloon_inflate",
+	"balloon_deflate",
+#ifdef CONFIG_BALLOON_COMPACTION
+	"balloon_migrate",
+#endif
+#endif /* CONFIG_MEMORY_BALLOON */
 #ifdef CONFIG_DEBUG_TLBFLUSH
 #ifdef CONFIG_SMP
 	"nr_tlb_remote_flush",
diff --git a/tools/vm/page-types.c b/tools/vm/page-types.c
index c4d6d2e20e0d..264fbc297e0b 100644
--- a/tools/vm/page-types.c
+++ b/tools/vm/page-types.c
@@ -132,6 +132,7 @@ static const char * const page_flag_names[] = {
 	[KPF_NOPAGE]		= "n:nopage",
 	[KPF_KSM]		= "x:ksm",
 	[KPF_THP]		= "t:thp",
+	[KPF_BALLOON]		= "o:balloon",
 
 	[KPF_RESERVED]		= "r:reserved",
 	[KPF_MLOCKED]		= "m:mlocked",
-- 
cgit v1.2.3


From 722cdc17232f0f684011407f7cf3c40d39457971 Mon Sep 17 00:00:00 2001
From: Minchan Kim <minchan@kernel.org>
Date: Thu, 9 Oct 2014 15:29:50 -0700
Subject: zsmalloc: change return value unit of zs_get_total_size_bytes

zs_get_total_size_bytes returns a amount of memory zsmalloc consumed with
*byte unit* but zsmalloc operates *page unit* rather than byte unit so
let's change the API so benefit we could get is that reduce unnecessary
overhead (ie, change page unit with byte unit) in zsmalloc.

Since return type is pages, "zs_get_total_pages" is better than
"zs_get_total_size_bytes".

Signed-off-by: Minchan Kim <minchan@kernel.org>
Reviewed-by: Dan Streetman <ddstreet@ieee.org>
Cc: Sergey Senozhatsky <sergey.senozhatsky@gmail.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: <juno.choi@lge.com>
Cc: <seungho1.park@lge.com>
Cc: Luigi Semenzato <semenzato@google.com>
Cc: Nitin Gupta <ngupta@vflare.org>
Cc: Seth Jennings <sjennings@variantweb.net>
Cc: David Horner <ds2horner@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/block/zram/zram_drv.c | 4 ++--
 include/linux/zsmalloc.h      | 2 +-
 mm/zsmalloc.c                 | 9 ++++-----
 3 files changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/zram/zram_drv.c b/drivers/block/zram/zram_drv.c
index d00831c3d731..f0b8b30a7128 100644
--- a/drivers/block/zram/zram_drv.c
+++ b/drivers/block/zram/zram_drv.c
@@ -103,10 +103,10 @@ static ssize_t mem_used_total_show(struct device *dev,
 
 	down_read(&zram->init_lock);
 	if (init_done(zram))
-		val = zs_get_total_size_bytes(meta->mem_pool);
+		val = zs_get_total_pages(meta->mem_pool);
 	up_read(&zram->init_lock);
 
-	return scnprintf(buf, PAGE_SIZE, "%llu\n", val);
+	return scnprintf(buf, PAGE_SIZE, "%llu\n", val << PAGE_SHIFT);
 }
 
 static ssize_t max_comp_streams_show(struct device *dev,
diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h
index e44d634e7fb7..05c214760977 100644
--- a/include/linux/zsmalloc.h
+++ b/include/linux/zsmalloc.h
@@ -46,6 +46,6 @@ void *zs_map_object(struct zs_pool *pool, unsigned long handle,
 			enum zs_mapmode mm);
 void zs_unmap_object(struct zs_pool *pool, unsigned long handle);
 
-u64 zs_get_total_size_bytes(struct zs_pool *pool);
+unsigned long zs_get_total_pages(struct zs_pool *pool);
 
 #endif
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index 2a4acf400846..c4a91578dc96 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -297,7 +297,7 @@ static void zs_zpool_unmap(void *pool, unsigned long handle)
 
 static u64 zs_zpool_total_size(void *pool)
 {
-	return zs_get_total_size_bytes(pool);
+	return zs_get_total_pages(pool) << PAGE_SHIFT;
 }
 
 static struct zpool_driver zs_zpool_driver = {
@@ -1181,12 +1181,11 @@ void zs_unmap_object(struct zs_pool *pool, unsigned long handle)
 }
 EXPORT_SYMBOL_GPL(zs_unmap_object);
 
-u64 zs_get_total_size_bytes(struct zs_pool *pool)
+unsigned long zs_get_total_pages(struct zs_pool *pool)
 {
-	u64 npages = atomic_long_read(&pool->pages_allocated);
-	return npages << PAGE_SHIFT;
+	return atomic_long_read(&pool->pages_allocated);
 }
-EXPORT_SYMBOL_GPL(zs_get_total_size_bytes);
+EXPORT_SYMBOL_GPL(zs_get_total_pages);
 
 module_init(zs_init);
 module_exit(zs_exit);
-- 
cgit v1.2.3


From 2e1d06e1c05af9dbe8a3bfddeefbf041ca637fff Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 9 Oct 2014 15:30:13 -0700
Subject: include/linux/kernel.h: rewrite min3, max3 and clamp using min and
 max

It appears that gcc is better at optimising a double call to min and max
rather than open coded min3 and max3.  This can be observed here:

    $ cat min-max.c
    #define min(x, y) ({				\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	(void) (&_min1 == &_min2);		\
    	_min1 < _min2 ? _min1 : _min2; })
    #define min3(x, y, z) ({			\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	typeof(z) _min3 = (z);			\
    	(void) (&_min1 == &_min2);		\
    	(void) (&_min1 == &_min3);		\
    	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
    		(_min2 < _min3 ? _min2 : _min3); })

    int fmin3(int x, int y, int z) { return min3(x, y, z); }
    int fmin2(int x, int y, int z) { return min(min(x, y), z); }

    $ gcc -O2 -o min-max.s -S min-max.c; cat min-max.s
    	.file	"min-max.c"
    	.text
    	.p2align 4,,15
    	.globl	fmin3
    	.type	fmin3, @function
    fmin3:
    .LFB0:
    	.cfi_startproc
    	cmpl	%esi, %edi
    	jl	.L5
    	cmpl	%esi, %edx
    	movl	%esi, %eax
    	cmovle	%edx, %eax
    	ret
    	.p2align 4,,10
    	.p2align 3
    .L5:
    	cmpl	%edi, %edx
    	movl	%edi, %eax
    	cmovle	%edx, %eax
    	ret
    	.cfi_endproc
    .LFE0:
    	.size	fmin3, .-fmin3
    	.p2align 4,,15
    	.globl	fmin2
    	.type	fmin2, @function
    fmin2:
    .LFB1:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	movl	%edx, %eax
    	cmovle	%esi, %edi
    	cmpl	%edx, %edi
    	cmovle	%edi, %eax
    	ret
    	.cfi_endproc
    .LFE1:
    	.size	fmin2, .-fmin2
    	.ident	"GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
    	.section	.note.GNU-stack,"",@progbits

fmin3 function, which uses open-coded min3 macro, is compiled into total
of ten instructions including a conditional branch, whereas fmin2
function, which uses two calls to min2 macro, is compiled into six
instructions with no branches.

Similarly, open-coded clamp produces the same code as clamp using min and
max macros, but the latter is much shorter:

    $ cat clamp.c
    #define clamp(val, min, max) ({			\
    	typeof(val) __val = (val);		\
    	typeof(min) __min = (min);		\
    	typeof(max) __max = (max);		\
    	(void) (&__val == &__min);		\
    	(void) (&__val == &__max);		\
    	__val = __val < __min ? __min: __val;	\
    	__val > __max ? __max: __val; })
    #define min(x, y) ({				\
    	typeof(x) _min1 = (x);			\
    	typeof(y) _min2 = (y);			\
    	(void) (&_min1 == &_min2);		\
    	_min1 < _min2 ? _min1 : _min2; })
    #define max(x, y) ({				\
    	typeof(x) _max1 = (x);			\
    	typeof(y) _max2 = (y);			\
    	(void) (&_max1 == &_max2);		\
    	_max1 > _max2 ? _max1 : _max2; })

    int fclamp(int v, int min, int max) { return clamp(v, min, max); }
    int fclampmm(int v, int min, int max) { return min(max(v, min), max); }

    $ gcc -O2 -o clamp.s -S clamp.c; cat clamp.s
    	.file	"clamp.c"
    	.text
    	.p2align 4,,15
    	.globl	fclamp
    	.type	fclamp, @function
    fclamp:
    .LFB0:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	movl	%edx, %eax
    	cmovge	%esi, %edi
    	cmpl	%edx, %edi
    	cmovle	%edi, %eax
    	ret
    	.cfi_endproc
    .LFE0:
    	.size	fclamp, .-fclamp
    	.p2align 4,,15
    	.globl	fclampmm
    	.type	fclampmm, @function
    fclampmm:
    .LFB1:
    	.cfi_startproc
    	cmpl	%edi, %esi
    	cmovge	%esi, %edi
    	cmpl	%edi, %edx
    	movl	%edi, %eax
    	cmovle	%edx, %eax
    	ret
    	.cfi_endproc
    .LFE1:
    	.size	fclampmm, .-fclampmm
    	.ident	"GCC: (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3"
    	.section	.note.GNU-stack,"",@progbits

    Linux mpn-glaptop 3.13.0-29-generic #53~precise1-Ubuntu SMP Wed Jun 4 22:06:25 UTC 2014 x86_64 x86_64 x86_64 GNU/Linux
    gcc (Ubuntu/Linaro 4.6.3-1ubuntu5) 4.6.3
    Copyright (C) 2011 Free Software Foundation, Inc.
    This is free software; see the source for copying conditions.  There is NO
    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    -rwx------ 1 mpn eng 51224656 Jun 17 14:15 vmlinux.before
    -rwx------ 1 mpn eng 51224608 Jun 17 13:57 vmlinux.after

48 bytes reduction.  The do_fault_around was a few instruction shorter
and as far as I can tell saved 12 bytes on the stack, i.e.:

    $ grep -e rsp -e pop -e push do_fault_around.*
    do_fault_around.before.s:push   %rbp
    do_fault_around.before.s:mov    %rsp,%rbp
    do_fault_around.before.s:push   %r13
    do_fault_around.before.s:push   %r12
    do_fault_around.before.s:push   %rbx
    do_fault_around.before.s:sub    $0x38,%rsp
    do_fault_around.before.s:add    $0x38,%rsp
    do_fault_around.before.s:pop    %rbx
    do_fault_around.before.s:pop    %r12
    do_fault_around.before.s:pop    %r13
    do_fault_around.before.s:pop    %rbp

    do_fault_around.after.s:push   %rbp
    do_fault_around.after.s:mov    %rsp,%rbp
    do_fault_around.after.s:push   %r12
    do_fault_around.after.s:push   %rbx
    do_fault_around.after.s:sub    $0x30,%rsp
    do_fault_around.after.s:add    $0x30,%rsp
    do_fault_around.after.s:pop    %rbx
    do_fault_around.after.s:pop    %r12
    do_fault_around.after.s:pop    %rbp

or here side-by-side:

    Before                    After
    push   %rbp               push   %rbp
    mov    %rsp,%rbp          mov    %rsp,%rbp
    push   %r13
    push   %r12               push   %r12
    push   %rbx               push   %rbx
    sub    $0x38,%rsp         sub    $0x30,%rsp
    add    $0x38,%rsp         add    $0x30,%rsp
    pop    %rbx               pop    %rbx
    pop    %r12               pop    %r12
    pop    %r13
    pop    %rbp               pop    %rbp

There are also fewer branches:

    $ grep ^j do_fault_around.*
    do_fault_around.before.s:jae    ffffffff812079b7
    do_fault_around.before.s:jmp    ffffffff812079c5
    do_fault_around.before.s:jmp    ffffffff81207a14
    do_fault_around.before.s:ja     ffffffff812079f9
    do_fault_around.before.s:jb     ffffffff81207a10
    do_fault_around.before.s:jmp    ffffffff81207a63
    do_fault_around.before.s:jne    ffffffff812079df

    do_fault_around.after.s:jmp    ffffffff812079fd
    do_fault_around.after.s:ja     ffffffff812079e2
    do_fault_around.after.s:jb     ffffffff812079f9
    do_fault_around.after.s:jmp    ffffffff81207a4c
    do_fault_around.after.s:jne    ffffffff812079c8

And here's with allyesconfig on a different machine:

    $ uname -a; gcc --version; ls -l vmlinux.*
    Linux erwin 3.14.7-mn #54 SMP Sun Jun 15 11:25:08 CEST 2014 x86_64 AMD Phenom(tm) II X3 710 Processor AuthenticAMD GNU/Linux
    gcc (GCC) 4.8.3
    Copyright (C) 2013 Free Software Foundation, Inc.
    This is free software; see the source for copying conditions.  There is NO
    warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.

    -rwx------ 1 mpn eng 437027411 Jun 20 16:04 vmlinux.before
    -rwx------ 1 mpn eng 437026881 Jun 20 15:30 vmlinux.after

530 bytes reduction.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Hagen Paul Pfeifer <hagen@jauu.net>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: David Rientjes <rientjes@google.com>
Cc: "Rustad, Mark D" <mark.d.rustad@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 32 +++++---------------------------
 1 file changed, 5 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 95624bed87ef..aa2a0cb57f50 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -715,23 +715,8 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 	(void) (&_max1 == &_max2);		\
 	_max1 > _max2 ? _max1 : _max2; })
 
-#define min3(x, y, z) ({			\
-	typeof(x) _min1 = (x);			\
-	typeof(y) _min2 = (y);			\
-	typeof(z) _min3 = (z);			\
-	(void) (&_min1 == &_min2);		\
-	(void) (&_min1 == &_min3);		\
-	_min1 < _min2 ? (_min1 < _min3 ? _min1 : _min3) : \
-		(_min2 < _min3 ? _min2 : _min3); })
-
-#define max3(x, y, z) ({			\
-	typeof(x) _max1 = (x);			\
-	typeof(y) _max2 = (y);			\
-	typeof(z) _max3 = (z);			\
-	(void) (&_max1 == &_max2);		\
-	(void) (&_max1 == &_max3);		\
-	_max1 > _max2 ? (_max1 > _max3 ? _max1 : _max3) : \
-		(_max2 > _max3 ? _max2 : _max3); })
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
 
 /**
  * min_not_zero - return the minimum that is _not_ zero, unless both are zero
@@ -746,20 +731,13 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 /**
  * clamp - return a value clamped to a given range with strict typechecking
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
  *
  * This macro does strict typechecking of min/max to make sure they are of the
  * same type as val.  See the unnecessary pointer comparisons.
  */
-#define clamp(val, min, max) ({			\
-	typeof(val) __val = (val);		\
-	typeof(min) __min = (min);		\
-	typeof(max) __max = (max);		\
-	(void) (&__val == &__min);		\
-	(void) (&__val == &__max);		\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
 
 /*
  * ..and if you can't take the strict
-- 
cgit v1.2.3


From c185b07fc9f24d52a864376ed22a6d84384b0c53 Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Thu, 9 Oct 2014 15:30:15 -0700
Subject: include/linux/kernel.h: deduplicate code implementing clamp* macros

Instead of open-coding clamp_t macro min_t and max_t the way clamp macro
does and instead of open-coding clamp_val simply use clamp_t.
Furthermore, normalise argument naming in the macros to be lo and hi.

Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Mark Rustad <mark.d.rustad@intel.com>
Cc: "Kirsher, Jeffrey T" <jeffrey.t.kirsher@intel.com>
Cc: Hagen Paul Pfeifer <hagen@jauu.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 24 +++++++-----------------
 1 file changed, 7 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index aa2a0cb57f50..e9e420b6d931 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -734,7 +734,7 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * @lo: lowest allowable value
  * @hi: highest allowable value
  *
- * This macro does strict typechecking of min/max to make sure they are of the
+ * This macro does strict typechecking of lo/hi to make sure they are of the
  * same type as val.  See the unnecessary pointer comparisons.
  */
 #define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
@@ -759,36 +759,26 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
  * clamp_t - return a value clamped to a given range using a given type
  * @type: the type of variable to use
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of type
  * 'type' to make all the comparisons.
  */
-#define clamp_t(type, val, min, max) ({		\
-	type __val = (val);			\
-	type __min = (min);			\
-	type __max = (max);			\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
 
 /**
  * clamp_val - return a value clamped to a given range using val's type
  * @val: current value
- * @min: minimum allowable value
- * @max: maximum allowable value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
  *
  * This macro does no typechecking and uses temporary variables of whatever
  * type the input argument 'val' is.  This is useful when val is an unsigned
  * type and min and max are literals that will otherwise be assigned a signed
  * integer type.
  */
-#define clamp_val(val, min, max) ({		\
-	typeof(val) __val = (val);		\
-	typeof(val) __min = (min);		\
-	typeof(val) __max = (max);		\
-	__val = __val < __min ? __min: __val;	\
-	__val > __max ? __max: __val; })
+#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
 
 
 /*
-- 
cgit v1.2.3


From 61a04e5b306ab9d6a30f78e86f1f140d7c888304 Mon Sep 17 00:00:00 2001
From: Michele Curti <michele.curti@gmail.com>
Date: Thu, 9 Oct 2014 15:30:17 -0700
Subject: include/linux/blkdev.h: use NULL instead of zero

Quite useless but it shuts up some warnings.

Signed-off-by: Michele Curti <michele.curti@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 518b46555b80..87be398166d3 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1564,7 +1564,7 @@ static inline int blk_rq_map_integrity_sg(struct request_queue *q,
 }
 static inline struct blk_integrity *bdev_get_integrity(struct block_device *b)
 {
-	return 0;
+	return NULL;
 }
 static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk)
 {
-- 
cgit v1.2.3


From 578b25dfce2990d8bab5631f33a4283bd5b01556 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 9 Oct 2014 15:30:28 -0700
Subject: include/linux/screen_info.h: remove unused ORIG_* macros

The ORIG_* macros definitions to access struct screen_info members and all
of their users were removed 7 years ago by commit 3ea335100014785f
("Remove magic macros for screen_info structure members"), but (only) the
definitions reappeared a few days later in commit ee8e7cfe9d330d6f ("Make
asm-x86/bootparam.h includable from userspace.").

Remove them for good. Amen.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/screen_info.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/screen_info.h b/include/linux/screen_info.h
index 005bf3e38db5..f0f8bad54be9 100644
--- a/include/linux/screen_info.h
+++ b/include/linux/screen_info.h
@@ -5,12 +5,4 @@
 
 extern struct screen_info screen_info;
 
-#define ORIG_X			(screen_info.orig_x)
-#define ORIG_Y			(screen_info.orig_y)
-#define ORIG_VIDEO_MODE		(screen_info.orig_video_mode)
-#define ORIG_VIDEO_COLS 	(screen_info.orig_video_cols)
-#define ORIG_VIDEO_EGA_BX	(screen_info.orig_video_ega_bx)
-#define ORIG_VIDEO_LINES	(screen_info.orig_video_lines)
-#define ORIG_VIDEO_ISVGA	(screen_info.orig_video_isVGA)
-#define ORIG_VIDEO_POINTS       (screen_info.orig_video_points)
 #endif /* _SCREEN_INFO_H */
-- 
cgit v1.2.3


From 083bf668cb70e47b84db64856606e94beac87f01 Mon Sep 17 00:00:00 2001
From: Zhang Rui <rui.zhang@intel.com>
Date: Fri, 14 Mar 2014 14:06:25 +0800
Subject: ACPI: make acpi_create_platform_device() an external API

Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/acpi_platform.c | 1 +
 drivers/acpi/internal.h      | 7 -------
 include/linux/acpi.h         | 1 +
 3 files changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_platform.c b/drivers/acpi/acpi_platform.c
index 2bf9082f7523..a3c89a1bcf54 100644
--- a/drivers/acpi/acpi_platform.c
+++ b/drivers/acpi/acpi_platform.c
@@ -113,3 +113,4 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *adev)
 	kfree(resources);
 	return pdev;
 }
+EXPORT_SYMBOL_GPL(acpi_create_platform_device);
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index de47f9f746c9..f221d1eb594a 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -168,13 +168,6 @@ static inline int suspend_nvs_save(void) { return 0; }
 static inline void suspend_nvs_restore(void) {}
 #endif
 
-/*--------------------------------------------------------------------------
-				Platform bus support
-  -------------------------------------------------------------------------- */
-struct platform_device;
-
-struct platform_device *acpi_create_platform_device(struct acpi_device *adev);
-
 /*--------------------------------------------------------------------------
 					Video
   -------------------------------------------------------------------------- */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 807cbc46d73e..2c24c2c1be45 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -432,6 +432,7 @@ static inline bool acpi_driver_match_device(struct device *dev,
 int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
 
+struct platform_device *acpi_create_platform_device(struct acpi_device *);
 #define ACPI_PTR(_ptr)	(_ptr)
 
 #else	/* !CONFIG_ACPI */
-- 
cgit v1.2.3


From 7b83fd9d91a411158f72d36958103c708c3b5a86 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Tue, 25 Mar 2014 10:40:09 +0800
Subject: Thermal: move the KELVIN_TO_MILLICELSIUS macro to thermal.h

This macro can be used by other component so move it to a common header,
but in a slightly different way: define two macros, one macro with an
offset and the other doesn't.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/acpi/thermal.c  | 18 +++++++++---------
 include/linux/thermal.h |  2 ++
 2 files changed, 11 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c
index 112817e963e0..d24fa1964eb8 100644
--- a/drivers/acpi/thermal.c
+++ b/drivers/acpi/thermal.c
@@ -528,7 +528,6 @@ static void acpi_thermal_check(void *data)
 }
 
 /* sys I/F for generic thermal sysfs support */
-#define KELVIN_TO_MILLICELSIUS(t, off) (((t) - (off)) * 100)
 
 static int thermal_get_temp(struct thermal_zone_device *thermal,
 			    unsigned long *temp)
@@ -543,7 +542,8 @@ static int thermal_get_temp(struct thermal_zone_device *thermal,
 	if (result)
 		return result;
 
-	*temp = KELVIN_TO_MILLICELSIUS(tz->temperature, tz->kelvin_offset);
+	*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(tz->temperature,
+							tz->kelvin_offset);
 	return 0;
 }
 
@@ -647,7 +647,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 
 	if (tz->trips.critical.flags.valid) {
 		if (!trip) {
-			*temp = KELVIN_TO_MILLICELSIUS(
+			*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.critical.temperature,
 				tz->kelvin_offset);
 			return 0;
@@ -657,7 +657,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 
 	if (tz->trips.hot.flags.valid) {
 		if (!trip) {
-			*temp = KELVIN_TO_MILLICELSIUS(
+			*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.hot.temperature,
 				tz->kelvin_offset);
 			return 0;
@@ -667,7 +667,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 
 	if (tz->trips.passive.flags.valid) {
 		if (!trip) {
-			*temp = KELVIN_TO_MILLICELSIUS(
+			*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.passive.temperature,
 				tz->kelvin_offset);
 			return 0;
@@ -678,7 +678,7 @@ static int thermal_get_trip_temp(struct thermal_zone_device *thermal,
 	for (i = 0; i < ACPI_THERMAL_MAX_ACTIVE &&
 		tz->trips.active[i].flags.valid; i++) {
 		if (!trip) {
-			*temp = KELVIN_TO_MILLICELSIUS(
+			*temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.active[i].temperature,
 				tz->kelvin_offset);
 			return 0;
@@ -694,7 +694,7 @@ static int thermal_get_crit_temp(struct thermal_zone_device *thermal,
 	struct acpi_thermal *tz = thermal->devdata;
 
 	if (tz->trips.critical.flags.valid) {
-		*temperature = KELVIN_TO_MILLICELSIUS(
+		*temperature = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
 				tz->trips.critical.temperature,
 				tz->kelvin_offset);
 		return 0;
@@ -714,8 +714,8 @@ static int thermal_get_trend(struct thermal_zone_device *thermal,
 
 	if (type == THERMAL_TRIP_ACTIVE) {
 		unsigned long trip_temp;
-		unsigned long temp = KELVIN_TO_MILLICELSIUS(tz->temperature,
-							tz->kelvin_offset);
+		unsigned long temp = DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(
+					tz->temperature, tz->kelvin_offset);
 		if (thermal_get_trip_temp(thermal, trip, &trip_temp))
 			return -EINVAL;
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 0305cde21a74..79ce6b94884a 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -44,6 +44,8 @@
 #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
 				((long)t-2732+5)/10 : ((long)t-2732-5)/10)
 #define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
+#define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100)
+#define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732)
 
 /* Adding event notification support elements */
 #define THERMAL_GENL_FAMILY_NAME                "thermal_event"
-- 
cgit v1.2.3


From 77e337c6e23e3b9d22e09ffec202a80f755a54c2 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Wed, 3 Sep 2014 15:13:02 +0800
Subject: Thermal: introduce INT3402 thermal driver

ACPI INT3402 device object could report temperature for the memory module.
To expose such information to user space, a thermal zone device is registered
for it so that the thermal sysfs interface can expose such information for
userspace to use.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 drivers/thermal/int340x_thermal/Makefile          |   1 +
 drivers/thermal/int340x_thermal/int3402_thermal.c | 242 ++++++++++++++++++++++
 include/linux/thermal.h                           |   2 +
 3 files changed, 245 insertions(+)
 create mode 100644 drivers/thermal/int340x_thermal/int3402_thermal.c

(limited to 'include/linux')

diff --git a/drivers/thermal/int340x_thermal/Makefile b/drivers/thermal/int340x_thermal/Makefile
index e10a53bcefe7..67c98fd24200 100644
--- a/drivers/thermal/int340x_thermal/Makefile
+++ b/drivers/thermal/int340x_thermal/Makefile
@@ -1 +1,2 @@
 obj-$(CONFIG_INT340X_THERMAL)	+= int3400_thermal.o
+obj-$(CONFIG_INT340X_THERMAL)	+= int3402_thermal.o
diff --git a/drivers/thermal/int340x_thermal/int3402_thermal.c b/drivers/thermal/int340x_thermal/int3402_thermal.c
new file mode 100644
index 000000000000..a5d08c14ba24
--- /dev/null
+++ b/drivers/thermal/int340x_thermal/int3402_thermal.c
@@ -0,0 +1,242 @@
+/*
+ * INT3402 thermal driver for memory temperature reporting
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Aaron Lu <aaron.lu@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ */
+
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/acpi.h>
+#include <linux/thermal.h>
+
+#define ACPI_ACTIVE_COOLING_MAX_NR 10
+
+struct active_trip {
+	unsigned long temp;
+	int id;
+	bool valid;
+};
+
+struct int3402_thermal_data {
+	unsigned long *aux_trips;
+	int aux_trip_nr;
+	unsigned long psv_temp;
+	int psv_trip_id;
+	unsigned long crt_temp;
+	int crt_trip_id;
+	unsigned long hot_temp;
+	int hot_trip_id;
+	struct active_trip act_trips[ACPI_ACTIVE_COOLING_MAX_NR];
+	acpi_handle *handle;
+};
+
+static int int3402_thermal_get_zone_temp(struct thermal_zone_device *zone,
+					 unsigned long *temp)
+{
+	struct int3402_thermal_data *d = zone->devdata;
+	unsigned long long tmp;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(d->handle, "_TMP", NULL, &tmp);
+	if (ACPI_FAILURE(status))
+		return -ENODEV;
+
+	/* _TMP returns the temperature in tenths of degrees Kelvin */
+	*temp = DECI_KELVIN_TO_MILLICELSIUS(tmp);
+
+	return 0;
+}
+
+static int int3402_thermal_get_trip_temp(struct thermal_zone_device *zone,
+					 int trip, unsigned long *temp)
+{
+	struct int3402_thermal_data *d = zone->devdata;
+	int i;
+
+	if (trip < d->aux_trip_nr)
+		*temp = d->aux_trips[trip];
+	else if (trip == d->crt_trip_id)
+		*temp = d->crt_temp;
+	else if (trip == d->psv_trip_id)
+		*temp = d->psv_temp;
+	else if (trip == d->hot_trip_id)
+		*temp = d->hot_temp;
+	else {
+		for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+			if (d->act_trips[i].valid &&
+			    d->act_trips[i].id == trip) {
+				*temp = d->act_trips[i].temp;
+				break;
+			}
+		}
+		if (i == ACPI_ACTIVE_COOLING_MAX_NR)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int int3402_thermal_get_trip_type(struct thermal_zone_device *zone,
+					 int trip, enum thermal_trip_type *type)
+{
+	struct int3402_thermal_data *d = zone->devdata;
+	int i;
+
+	if (trip < d->aux_trip_nr)
+		*type = THERMAL_TRIP_PASSIVE;
+	else if (trip == d->crt_trip_id)
+		*type = THERMAL_TRIP_CRITICAL;
+	else if (trip == d->hot_trip_id)
+		*type = THERMAL_TRIP_HOT;
+	else if (trip == d->psv_trip_id)
+		*type = THERMAL_TRIP_PASSIVE;
+	else {
+		for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+			if (d->act_trips[i].valid &&
+			    d->act_trips[i].id == trip) {
+				*type = THERMAL_TRIP_ACTIVE;
+				break;
+			}
+		}
+		if (i == ACPI_ACTIVE_COOLING_MAX_NR)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int int3402_thermal_set_trip_temp(struct thermal_zone_device *zone, int trip,
+				  unsigned long temp)
+{
+	struct int3402_thermal_data *d = zone->devdata;
+	acpi_status status;
+	char name[10];
+
+	snprintf(name, sizeof(name), "PAT%d", trip);
+	status = acpi_execute_simple_method(d->handle, name,
+			MILLICELSIUS_TO_DECI_KELVIN(temp));
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	d->aux_trips[trip] = temp;
+	return 0;
+}
+
+static struct thermal_zone_device_ops int3402_thermal_zone_ops = {
+	.get_temp       = int3402_thermal_get_zone_temp,
+	.get_trip_temp	= int3402_thermal_get_trip_temp,
+	.get_trip_type	= int3402_thermal_get_trip_type,
+	.set_trip_temp	= int3402_thermal_set_trip_temp,
+};
+
+static struct thermal_zone_params int3402_thermal_params = {
+	.governor_name = "user_space",
+	.no_hwmon = true,
+};
+
+static int int3402_thermal_get_temp(acpi_handle handle, char *name,
+				    unsigned long *temp)
+{
+	unsigned long long r;
+	acpi_status status;
+
+	status = acpi_evaluate_integer(handle, name, NULL, &r);
+	if (ACPI_FAILURE(status))
+		return -EIO;
+
+	*temp = DECI_KELVIN_TO_MILLICELSIUS(r);
+	return 0;
+}
+
+static int int3402_thermal_probe(struct platform_device *pdev)
+{
+	struct acpi_device *adev = ACPI_COMPANION(&pdev->dev);
+	struct int3402_thermal_data *d;
+	struct thermal_zone_device *zone;
+	acpi_status status;
+	unsigned long long trip_cnt;
+	int trip_mask = 0, i;
+
+	if (!acpi_has_method(adev->handle, "_TMP"))
+		return -ENODEV;
+
+	d = devm_kzalloc(&pdev->dev, sizeof(*d), GFP_KERNEL);
+	if (!d)
+		return -ENOMEM;
+
+	status = acpi_evaluate_integer(adev->handle, "PATC", NULL, &trip_cnt);
+	if (ACPI_FAILURE(status))
+		trip_cnt = 0;
+	else {
+		d->aux_trips = devm_kzalloc(&pdev->dev,
+				sizeof(*d->aux_trips) * trip_cnt, GFP_KERNEL);
+		if (!d->aux_trips)
+			return -ENOMEM;
+		trip_mask = trip_cnt - 1;
+		d->handle = adev->handle;
+		d->aux_trip_nr = trip_cnt;
+	}
+
+	d->crt_trip_id = -1;
+	if (!int3402_thermal_get_temp(adev->handle, "_CRT", &d->crt_temp))
+		d->crt_trip_id = trip_cnt++;
+	d->hot_trip_id = -1;
+	if (!int3402_thermal_get_temp(adev->handle, "_HOT", &d->hot_temp))
+		d->hot_trip_id = trip_cnt++;
+	d->psv_trip_id = -1;
+	if (!int3402_thermal_get_temp(adev->handle, "_PSV", &d->psv_temp))
+		d->psv_trip_id = trip_cnt++;
+	for (i = 0; i < ACPI_ACTIVE_COOLING_MAX_NR; i++) {
+		char name[5] = { '_', 'A', 'C', '0' + i, '\0' };
+		if (int3402_thermal_get_temp(adev->handle, name,
+					     &d->act_trips[i].temp))
+			break;
+		d->act_trips[i].id = trip_cnt++;
+		d->act_trips[i].valid = true;
+	}
+
+	zone = thermal_zone_device_register(acpi_device_bid(adev), trip_cnt,
+					    trip_mask, d,
+					    &int3402_thermal_zone_ops,
+					    &int3402_thermal_params,
+					    0, 0);
+	if (IS_ERR(zone))
+		return PTR_ERR(zone);
+	platform_set_drvdata(pdev, zone);
+
+	return 0;
+}
+
+static int int3402_thermal_remove(struct platform_device *pdev)
+{
+	struct thermal_zone_device *zone = platform_get_drvdata(pdev);
+
+	thermal_zone_device_unregister(zone);
+	return 0;
+}
+
+static const struct acpi_device_id int3402_thermal_match[] = {
+	{"INT3402", 0},
+	{}
+};
+
+MODULE_DEVICE_TABLE(acpi, int3402_thermal_match);
+
+static struct platform_driver int3402_thermal_driver = {
+	.probe = int3402_thermal_probe,
+	.remove = int3402_thermal_remove,
+	.driver = {
+		   .name = "int3402 thermal",
+		   .owner = THIS_MODULE,
+		   .acpi_match_table = int3402_thermal_match,
+		   },
+};
+
+module_platform_driver(int3402_thermal_driver);
+
+MODULE_DESCRIPTION("INT3402 Thermal driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 79ce6b94884a..ef90838b36a0 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -46,6 +46,8 @@
 #define CELSIUS_TO_KELVIN(t)	((t)*10+2732)
 #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100)
 #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732)
+#define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
+#define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732)
 
 /* Adding event notification support elements */
 #define THERMAL_GENL_FAMILY_NAME                "thermal_event"
-- 
cgit v1.2.3


From 1fadee0c364572f2b2e098b34001fbaa82ee2e00 Mon Sep 17 00:00:00 2001
From: Sascha Hauer <s.hauer@pengutronix.de>
Date: Fri, 10 Oct 2014 09:48:05 +0200
Subject: net/phy: micrel: Add clock support for KSZ8021/KSZ8031

The KSZ8021 and KSZ8031 support RMII reference input clocks of 25MHz
and 50MHz. Both PHYs differ in the default frequency they expect
after reset. If this differs from the actual input clock, then
register 0x1f bit 7 must be changed.

Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/micrel.txt |  6 +++++
 drivers/net/phy/micrel.c                         | 31 ++++++++++++++++++++++--
 include/linux/micrel_phy.h                       |  1 +
 3 files changed, 36 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/net/micrel.txt b/Documentation/devicetree/bindings/net/micrel.txt
index 98a3e61f9ee8..e1d99b95c4ec 100644
--- a/Documentation/devicetree/bindings/net/micrel.txt
+++ b/Documentation/devicetree/bindings/net/micrel.txt
@@ -16,3 +16,9 @@ Optional properties:
 	      KSZ8051: register 0x1f, bits 5..4
 
               See the respective PHY datasheet for the mode values.
+
+ - clocks, clock-names: contains clocks according to the common clock bindings.
+
+              supported clocks:
+	      - KSZ8021, KSZ8031: "rmii-ref": The RMII refence input clock. Used
+		to determine the XI input clock.
diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 011dbda2b2f1..492435fce1d4 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -26,6 +26,7 @@
 #include <linux/phy.h>
 #include <linux/micrel_phy.h>
 #include <linux/of.h>
+#include <linux/clk.h>
 
 /* Operation Mode Strap Override */
 #define MII_KSZPHY_OMSO				0x16
@@ -72,9 +73,12 @@ static int ksz_config_flags(struct phy_device *phydev)
 {
 	int regval;
 
-	if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK) {
+	if (phydev->dev_flags & (MICREL_PHY_50MHZ_CLK | MICREL_PHY_25MHZ_CLK)) {
 		regval = phy_read(phydev, MII_KSZPHY_CTRL);
-		regval |= KSZ8051_RMII_50MHZ_CLK;
+		if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK)
+			regval |= KSZ8051_RMII_50MHZ_CLK;
+		else
+			regval &= ~KSZ8051_RMII_50MHZ_CLK;
 		return phy_write(phydev, MII_KSZPHY_CTRL, regval);
 	}
 	return 0;
@@ -440,6 +444,27 @@ ksz9021_wr_mmd_phyreg(struct phy_device *phydev, int ptrad, int devnum,
 {
 }
 
+static int ksz8021_probe(struct phy_device *phydev)
+{
+	struct clk *clk;
+
+	clk = devm_clk_get(&phydev->dev, "rmii-ref");
+	if (!IS_ERR(clk)) {
+		unsigned long rate = clk_get_rate(clk);
+
+		if (rate > 24500000 && rate < 25500000) {
+			phydev->dev_flags |= MICREL_PHY_25MHZ_CLK;
+		} else if (rate > 49500000 && rate < 50500000) {
+			phydev->dev_flags |= MICREL_PHY_50MHZ_CLK;
+		} else {
+			dev_err(&phydev->dev, "Clock rate out of range: %ld\n", rate);
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
 static struct phy_driver ksphy_driver[] = {
 {
 	.phy_id		= PHY_ID_KS8737,
@@ -462,6 +487,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.probe		= ksz8021_probe,
 	.config_init	= ksz8021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -477,6 +503,7 @@ static struct phy_driver ksphy_driver[] = {
 	.features	= (PHY_BASIC_FEATURES | SUPPORTED_Pause |
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
+	.probe		= ksz8021_probe,
 	.config_init	= ksz8021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 2e5b194b9b19..53d33dee70e1 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -37,6 +37,7 @@
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
+#define MICREL_PHY_25MHZ_CLK	0x00000002
 
 #define MICREL_KSZ9021_EXTREG_CTRL	0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE	0xC
-- 
cgit v1.2.3


From 810bb172671aec17cf85cc748120cf73c17af372 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 12:45:37 -0400
Subject: take dname_external() into fs/dcache.c

never used outside and it's too low-level for legitimate uses outside
of fs/dcache.c anyway

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 5 +++++
 include/linux/dcache.h | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index 8221faae0bef..d5a23fd0da90 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -264,6 +264,11 @@ static void __d_free_external(struct rcu_head *head)
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
+static inline int dname_external(const struct dentry *dentry)
+{
+	return dentry->d_name.name != dentry->d_iname;
+}
+
 static void dentry_free(struct dentry *dentry)
 {
 	if (unlikely(dname_external(dentry))) {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 81b03150f39a..27a7f00bc89e 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -226,11 +226,6 @@ struct dentry_operations {
 
 extern seqlock_t rename_lock;
 
-static inline int dname_external(const struct dentry *dentry)
-{
-	return dentry->d_name.name != dentry->d_iname;
-}
-
 /*
  * These are the low-level FS interfaces to the dcache..
  */
-- 
cgit v1.2.3


From 7b600f2abb36909e70963cc7c744c15983500bee Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 13:31:58 -0400
Subject: don't need that forward declaration of struct nameidata in dcache.h
 anymore

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/dcache.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 27a7f00bc89e..b2a2a08523bf 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -11,7 +11,6 @@
 #include <linux/rcupdate.h>
 #include <linux/lockref.h>
 
-struct nameidata;
 struct path;
 struct vfsmount;
 
-- 
cgit v1.2.3


From 174e964ec224c3c591b83a6b5f0984d905d3678f Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 9 Oct 2014 12:43:27 -0700
Subject: regulator: Include err.h from consumer.h to fix build failure

sh:sh2007_defconfig fails to build with the following error:

In file included from include/linux/regulator/machine.h:18:0,
                 from arch/sh/boards/board-sh2007.c:10:
include/linux/regulator/consumer.h: In function 'regulator_get_optional':
include/linux/regulator/consumer.h:271:2:
		error: implicit declaration of function 'ERR_PTR'
include/linux/err.h: At top level:
include/linux/err.h:23:35: error: conflicting types for 'ERR_PTR'
include/linux/regulator/consumer.h:271:9:
		note: previous implicit declaration of 'ERR_PTR' was here

Since consumer.h uses ERR_PTR, it should include err.h.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index d347c805f923..f540b1496e2f 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -35,6 +35,8 @@
 #ifndef __LINUX_REGULATOR_CONSUMER_H_
 #define __LINUX_REGULATOR_CONSUMER_H_
 
+#include <linux/err.h>
+
 struct device;
 struct notifier_block;
 struct regmap;
-- 
cgit v1.2.3


From 71458cfc782eafe4b27656e078d379a34e472adf Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Mon, 13 Oct 2014 15:51:05 -0700
Subject: kernel: add support for gcc 5

We're missing include/linux/compiler-gcc5.h which is required now
because gcc branched off to v5 in trunk.

Just copy the relevant bits out of include/linux/compiler-gcc4.h,
no new code is added as of now.

This fixes a build error when using gcc 5.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compiler-gcc5.h | 66 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 66 insertions(+)
 create mode 100644 include/linux/compiler-gcc5.h

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h
new file mode 100644
index 000000000000..cdd1cc202d51
--- /dev/null
+++ b/include/linux/compiler-gcc5.h
@@ -0,0 +1,66 @@
+#ifndef __LINUX_COMPILER_H
+#error "Please don't include <linux/compiler-gcc5.h> directly, include <linux/compiler.h> instead."
+#endif
+
+#define __used				__attribute__((__used__))
+#define __must_check			__attribute__((warn_unused_result))
+#define __compiler_offsetof(a, b)	__builtin_offsetof(a, b)
+
+/* Mark functions as cold. gcc will assume any path leading to a call
+   to them will be unlikely.  This means a lot of manual unlikely()s
+   are unnecessary now for any paths leading to the usual suspects
+   like BUG(), printk(), panic() etc. [but let's keep them for now for
+   older compilers]
+
+   Early snapshots of gcc 4.3 don't support this and we can't detect this
+   in the preprocessor, but we can live with this because they're unreleased.
+   Maketime probing would be overkill here.
+
+   gcc also has a __attribute__((__hot__)) to move hot functions into
+   a special section, but I don't see any sense in this right now in
+   the kernel context */
+#define __cold			__attribute__((__cold__))
+
+#define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__)
+
+#ifndef __CHECKER__
+# define __compiletime_warning(message) __attribute__((warning(message)))
+# define __compiletime_error(message) __attribute__((error(message)))
+#endif /* __CHECKER__ */
+
+/*
+ * Mark a position in code as unreachable.  This can be used to
+ * suppress control flow warnings after asm blocks that transfer
+ * control elsewhere.
+ *
+ * Early snapshots of gcc 4.5 don't support this and we can't detect
+ * this in the preprocessor, but we can live with this because they're
+ * unreleased.  Really, we need to have autoconf for the kernel.
+ */
+#define unreachable() __builtin_unreachable()
+
+/* Mark a function definition as prohibited from being cloned. */
+#define __noclone	__attribute__((__noclone__))
+
+/*
+ * Tell the optimizer that something else uses this function or variable.
+ */
+#define __visible __attribute__((externally_visible))
+
+/*
+ * GCC 'asm goto' miscompiles certain code sequences:
+ *
+ *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
+ *
+ * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
+ * Fixed in GCC 4.8.2 and later versions.
+ *
+ * (asm goto is automatically volatile - the naming reflects this.)
+ */
+#define asm_volatile_goto(x...)	do { asm goto(x); asm (""); } while (0)
+
+#ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP
+#define __HAVE_BUILTIN_BSWAP32__
+#define __HAVE_BUILTIN_BSWAP64__
+#define __HAVE_BUILTIN_BSWAP16__
+#endif /* CONFIG_ARCH_USE_BUILTIN_BSWAP */
-- 
cgit v1.2.3


From de9e14eebf33a60712a52a0bc6e08c043c0aba53 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Mon, 13 Oct 2014 15:51:09 -0700
Subject: drivers: dma-contiguous: add initialization from device tree

Add a function to create CMA region from previously reserved memory and
add support for handling 'shared-dma-pool' reserved-memory device tree
nodes.

Based on previous code provided by Josh Cartwright <joshc@codeaurora.org>

Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Josh Cartwright <joshc@codeaurora.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/dma-contiguous.c | 66 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/cma.h           |  3 ++
 mm/cma.c                      | 62 ++++++++++++++++++++++++++++++++--------
 3 files changed, 120 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 6606abdf880c..473ff4892401 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -211,3 +211,69 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 {
 	return cma_release(dev_get_cma_area(dev), pages, count);
 }
+
+/*
+ * Support for reserved memory regions defined in device tree
+ */
+#ifdef CONFIG_OF_RESERVED_MEM
+#include <linux/of.h>
+#include <linux/of_fdt.h>
+#include <linux/of_reserved_mem.h>
+
+#undef pr_fmt
+#define pr_fmt(fmt) fmt
+
+static void rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
+{
+	dev_set_cma_area(dev, rmem->priv);
+}
+
+static void rmem_cma_device_release(struct reserved_mem *rmem,
+				    struct device *dev)
+{
+	dev_set_cma_area(dev, NULL);
+}
+
+static const struct reserved_mem_ops rmem_cma_ops = {
+	.device_init	= rmem_cma_device_init,
+	.device_release = rmem_cma_device_release,
+};
+
+static int __init rmem_cma_setup(struct reserved_mem *rmem)
+{
+	phys_addr_t align = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+	phys_addr_t mask = align - 1;
+	unsigned long node = rmem->fdt_node;
+	struct cma *cma;
+	int err;
+
+	if (!of_get_flat_dt_prop(node, "reusable", NULL) ||
+	    of_get_flat_dt_prop(node, "no-map", NULL))
+		return -EINVAL;
+
+	if ((rmem->base & mask) || (rmem->size & mask)) {
+		pr_err("Reserved memory: incorrect alignment of CMA region\n");
+		return -EINVAL;
+	}
+
+	err = cma_init_reserved_mem(rmem->base, rmem->size, 0, &cma);
+	if (err) {
+		pr_err("Reserved memory: unable to setup CMA region\n");
+		return err;
+	}
+	/* Architecture specific contiguous memory fixup. */
+	dma_contiguous_early_fixup(rmem->base, rmem->size);
+
+	if (of_get_flat_dt_prop(node, "linux,cma-default", NULL))
+		dma_contiguous_set_default(cma);
+
+	rmem->ops = &rmem_cma_ops;
+	rmem->priv = cma;
+
+	pr_info("Reserved memory: created CMA memory pool at %pa, size %ld MiB\n",
+		&rmem->base, (unsigned long)rmem->size / SZ_1M);
+
+	return 0;
+}
+RESERVEDMEM_OF_DECLARE(cma, "shared-dma-pool", rmem_cma_setup);
+#endif
diff --git a/include/linux/cma.h b/include/linux/cma.h
index 371b93042520..0430ed05d3b9 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -22,6 +22,9 @@ extern int __init cma_declare_contiguous(phys_addr_t size,
 			phys_addr_t base, phys_addr_t limit,
 			phys_addr_t alignment, unsigned int order_per_bit,
 			bool fixed, struct cma **res_cma);
+extern int cma_init_reserved_mem(phys_addr_t size,
+					phys_addr_t base, int order_per_bit,
+					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
 extern bool cma_release(struct cma *cma, struct page *pages, int count);
 #endif
diff --git a/mm/cma.c b/mm/cma.c
index a951a3b3ed36..963bc4add9af 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -142,6 +142,54 @@ static int __init cma_init_reserved_areas(void)
 }
 core_initcall(cma_init_reserved_areas);
 
+/**
+ * cma_init_reserved_mem() - create custom contiguous area from reserved memory
+ * @base: Base address of the reserved area
+ * @size: Size of the reserved area (in bytes),
+ * @order_per_bit: Order of pages represented by one bit on bitmap.
+ * @res_cma: Pointer to store the created cma region.
+ *
+ * This function creates custom contiguous area from already reserved memory.
+ */
+int __init cma_init_reserved_mem(phys_addr_t base, phys_addr_t size,
+				 int order_per_bit, struct cma **res_cma)
+{
+	struct cma *cma;
+	phys_addr_t alignment;
+
+	/* Sanity checks */
+	if (cma_area_count == ARRAY_SIZE(cma_areas)) {
+		pr_err("Not enough slots for CMA reserved regions!\n");
+		return -ENOSPC;
+	}
+
+	if (!size || !memblock_is_region_reserved(base, size))
+		return -EINVAL;
+
+	/* ensure minimal alignment requied by mm core */
+	alignment = PAGE_SIZE << max(MAX_ORDER - 1, pageblock_order);
+
+	/* alignment should be aligned with order_per_bit */
+	if (!IS_ALIGNED(alignment >> PAGE_SHIFT, 1 << order_per_bit))
+		return -EINVAL;
+
+	if (ALIGN(base, alignment) != base || ALIGN(size, alignment) != size)
+		return -EINVAL;
+
+	/*
+	 * Each reserved area must be initialised later, when more kernel
+	 * subsystems (like slab allocator) are available.
+	 */
+	cma = &cma_areas[cma_area_count];
+	cma->base_pfn = PFN_DOWN(base);
+	cma->count = size >> PAGE_SHIFT;
+	cma->order_per_bit = order_per_bit;
+	*res_cma = cma;
+	cma_area_count++;
+
+	return 0;
+}
+
 /**
  * cma_declare_contiguous() - reserve custom contiguous area
  * @base: Base address of the reserved area optional, use 0 for any
@@ -165,7 +213,6 @@ int __init cma_declare_contiguous(phys_addr_t base,
 			phys_addr_t alignment, unsigned int order_per_bit,
 			bool fixed, struct cma **res_cma)
 {
-	struct cma *cma;
 	phys_addr_t memblock_end = memblock_end_of_DRAM();
 	phys_addr_t highmem_start = __pa(high_memory);
 	int ret = 0;
@@ -237,16 +284,9 @@ int __init cma_declare_contiguous(phys_addr_t base,
 		}
 	}
 
-	/*
-	 * Each reserved area must be initialised later, when more kernel
-	 * subsystems (like slab allocator) are available.
-	 */
-	cma = &cma_areas[cma_area_count];
-	cma->base_pfn = PFN_DOWN(base);
-	cma->count = size >> PAGE_SHIFT;
-	cma->order_per_bit = order_per_bit;
-	*res_cma = cma;
-	cma_area_count++;
+	ret = cma_init_reserved_mem(base, size, order_per_bit, res_cma);
+	if (ret)
+		goto err;
 
 	pr_info("Reserved %ld MiB at %08lx\n", (unsigned long)size / SZ_1M,
 		(unsigned long)base);
-- 
cgit v1.2.3


From 8b21d9ca17ff8ed0dbf650f4162ee2d59bb5a881 Mon Sep 17 00:00:00 2001
From: Masahiro Yamada <yamada.m@jp.panasonic.com>
Date: Mon, 13 Oct 2014 15:51:30 -0700
Subject: list: include linux/kernel.h

linux/list.h uses container_of, therefore it depends on linux/kernel.h.

Signed-off-by: Masahiro Yamada <yamada.m@jp.panasonic.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/list.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/list.h b/include/linux/list.h
index cbbb96fcead9..f33f831eb3c8 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -5,6 +5,7 @@
 #include <linux/stddef.h>
 #include <linux/poison.h>
 #include <linux/const.h>
+#include <linux/kernel.h>
 
 /*
  * Simple doubly linked list implementation.
-- 
cgit v1.2.3


From 6de8ab68bc30da75116209d818c75497bdaed09d Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 13 Oct 2014 15:51:36 -0700
Subject: lib: remove prio_heap

The prio_heap code is unused since commit 889ed9ceaa97 ("cgroup: remove
css_scan_tasks()").  It should be compiled out to shrink the binary
kernel size which can be done via introducing CONFIG_PRIO_HEAD or by
removing the code.

We can simply recover the code from git when needed, so it would be
better to remove it IMO.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Ingo Molnar <mingo@kernel.org>
Acked-by: Peter Zijlstra <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Francesco Fusco <ffusco@redhat.com>
Cc: Greg Thelen <gthelen@google.com>
Cc: Chris Wilson <chris@chris-wilson.co.uk>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: George Spelvin <linux@horizon.com>
Cc: Mark Salter <msalter@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/prio_heap.h | 58 ---------------------------------------
 lib/Makefile              |  2 +-
 lib/prio_heap.c           | 70 -----------------------------------------------
 3 files changed, 1 insertion(+), 129 deletions(-)
 delete mode 100644 include/linux/prio_heap.h
 delete mode 100644 lib/prio_heap.c

(limited to 'include/linux')

diff --git a/include/linux/prio_heap.h b/include/linux/prio_heap.h
deleted file mode 100644
index 08094350f26a..000000000000
--- a/include/linux/prio_heap.h
+++ /dev/null
@@ -1,58 +0,0 @@
-#ifndef _LINUX_PRIO_HEAP_H
-#define _LINUX_PRIO_HEAP_H
-
-/*
- * Simple insertion-only static-sized priority heap containing
- * pointers, based on CLR, chapter 7
- */
-
-#include <linux/gfp.h>
-
-/**
- * struct ptr_heap - simple static-sized priority heap
- * @ptrs - pointer to data area
- * @max - max number of elements that can be stored in @ptrs
- * @size - current number of valid elements in @ptrs (in the range 0..@size-1
- * @gt: comparison operator, which should implement "greater than"
- */
-struct ptr_heap {
-	void **ptrs;
-	int max;
-	int size;
-	int (*gt)(void *, void *);
-};
-
-/**
- * heap_init - initialize an empty heap with a given memory size
- * @heap: the heap structure to be initialized
- * @size: amount of memory to use in bytes
- * @gfp_mask: mask to pass to kmalloc()
- * @gt: comparison operator, which should implement "greater than"
- */
-extern int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
-		     int (*gt)(void *, void *));
-
-/**
- * heap_free - release a heap's storage
- * @heap: the heap structure whose data should be released
- */
-void heap_free(struct ptr_heap *heap);
-
-/**
- * heap_insert - insert a value into the heap and return any overflowed value
- * @heap: the heap to be operated on
- * @p: the pointer to be inserted
- *
- * Attempts to insert the given value into the priority heap. If the
- * heap is full prior to the insertion, then the resulting heap will
- * consist of the smallest @max elements of the original heap and the
- * new element; the greatest element will be removed from the heap and
- * returned. Note that the returned element will be the new element
- * (i.e. no change to the heap) if the new element is greater than all
- * elements currently in the heap.
- */
-extern void *heap_insert(struct ptr_heap *heap, void *p);
-
-
-
-#endif /* _LINUX_PRIO_HEAP_H */
diff --git a/lib/Makefile b/lib/Makefile
index d6b4bc496408..eb95e0fdcca5 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -11,7 +11,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o dump_stack.o timerqueue.o\
 	 idr.o int_sqrt.o extable.o \
 	 sha1.o md5.o irq_regs.o reciprocal_div.o argv_split.o \
-	 proportions.o flex_proportions.o prio_heap.o ratelimit.o show_mem.o \
+	 proportions.o flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o
 
diff --git a/lib/prio_heap.c b/lib/prio_heap.c
deleted file mode 100644
index a7af6f85eca8..000000000000
--- a/lib/prio_heap.c
+++ /dev/null
@@ -1,70 +0,0 @@
-/*
- * Simple insertion-only static-sized priority heap containing
- * pointers, based on CLR, chapter 7
- */
-
-#include <linux/slab.h>
-#include <linux/prio_heap.h>
-
-int heap_init(struct ptr_heap *heap, size_t size, gfp_t gfp_mask,
-	      int (*gt)(void *, void *))
-{
-	heap->ptrs = kmalloc(size, gfp_mask);
-	if (!heap->ptrs)
-		return -ENOMEM;
-	heap->size = 0;
-	heap->max = size / sizeof(void *);
-	heap->gt = gt;
-	return 0;
-}
-
-void heap_free(struct ptr_heap *heap)
-{
-	kfree(heap->ptrs);
-}
-
-void *heap_insert(struct ptr_heap *heap, void *p)
-{
-	void *res;
-	void **ptrs = heap->ptrs;
-	int pos;
-
-	if (heap->size < heap->max) {
-		/* Heap insertion */
-		pos = heap->size++;
-		while (pos > 0 && heap->gt(p, ptrs[(pos-1)/2])) {
-			ptrs[pos] = ptrs[(pos-1)/2];
-			pos = (pos-1)/2;
-		}
-		ptrs[pos] = p;
-		return NULL;
-	}
-
-	/* The heap is full, so something will have to be dropped */
-
-	/* If the new pointer is greater than the current max, drop it */
-	if (heap->gt(p, ptrs[0]))
-		return p;
-
-	/* Replace the current max and heapify */
-	res = ptrs[0];
-	ptrs[0] = p;
-	pos = 0;
-
-	while (1) {
-		int left = 2 * pos + 1;
-		int right = 2 * pos + 2;
-		int largest = pos;
-		if (left < heap->size && heap->gt(ptrs[left], p))
-			largest = left;
-		if (right < heap->size && heap->gt(ptrs[right], ptrs[largest]))
-			largest = right;
-		if (largest == pos)
-			break;
-		/* Push p down the heap one level and bump one up */
-		ptrs[pos] = ptrs[largest];
-		ptrs[largest] = p;
-		pos = largest;
-	}
-	return res;
-}
-- 
cgit v1.2.3


From 1c3bea0e71892ef9100c01d3799cdae8cac273ef Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Mon, 13 Oct 2014 15:53:33 -0700
Subject: signal: use BUILD_BUG() instead of _NSIG_WORDS_is_unsupported_size()

Kill _NSIG_WORDS_is_unsupported_size(), use BUILD_BUG() instead.  This
simplifies the code, avoids the nested-externs warnings, and this way we
do not defer the problem to linker.

Also, fix the indentation in _SIG_SET_BINOP() and _SIG_SET_OP().

Note: this patch assumes that the code like "if (0) BUILD_BUG();" is
valid.  If not (say __compiletime_error() is not defined and thus
__compiletime_error_fallback() uses a negative array) we should fix
BUILD_BUG() and/or BUILD_BUG_ON_MSG().  This code should be fine by
definition, this is the documented purpose of BUILD_BUG().

[sfr@canb.auug.org.au: fix powerpc build failures]
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Reported-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/signal.h | 29 +++++++++++++----------------
 1 file changed, 13 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/signal.h b/include/linux/signal.h
index 750196fcc0a5..ab1e0392b5ac 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -2,6 +2,7 @@
 #define _LINUX_SIGNAL_H
 
 #include <linux/list.h>
+#include <linux/bug.h>
 #include <uapi/linux/signal.h>
 
 struct task_struct;
@@ -67,7 +68,6 @@ static inline int sigismember(sigset_t *set, int _sig)
 
 static inline int sigisemptyset(sigset_t *set)
 {
-	extern void _NSIG_WORDS_is_unsupported_size(void);
 	switch (_NSIG_WORDS) {
 	case 4:
 		return (set->sig[3] | set->sig[2] |
@@ -77,7 +77,7 @@ static inline int sigisemptyset(sigset_t *set)
 	case 1:
 		return set->sig[0] == 0;
 	default:
-		_NSIG_WORDS_is_unsupported_size();
+		BUILD_BUG();
 		return 0;
 	}
 }
@@ -90,24 +90,23 @@ static inline int sigisemptyset(sigset_t *set)
 #define _SIG_SET_BINOP(name, op)					\
 static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \
 {									\
-	extern void _NSIG_WORDS_is_unsupported_size(void);		\
 	unsigned long a0, a1, a2, a3, b0, b1, b2, b3;			\
 									\
 	switch (_NSIG_WORDS) {						\
-	    case 4:							\
+	case 4:								\
 		a3 = a->sig[3]; a2 = a->sig[2];				\
 		b3 = b->sig[3]; b2 = b->sig[2];				\
 		r->sig[3] = op(a3, b3);					\
 		r->sig[2] = op(a2, b2);					\
-	    case 2:							\
+	case 2:								\
 		a1 = a->sig[1]; b1 = b->sig[1];				\
 		r->sig[1] = op(a1, b1);					\
-	    case 1:							\
+	case 1:								\
 		a0 = a->sig[0]; b0 = b->sig[0];				\
 		r->sig[0] = op(a0, b0);					\
 		break;							\
-	    default:							\
-		_NSIG_WORDS_is_unsupported_size();			\
+	default:							\
+		BUILD_BUG();						\
 	}								\
 }
 
@@ -128,16 +127,14 @@ _SIG_SET_BINOP(sigandnsets, _sig_andn)
 #define _SIG_SET_OP(name, op)						\
 static inline void name(sigset_t *set)					\
 {									\
-	extern void _NSIG_WORDS_is_unsupported_size(void);		\
-									\
 	switch (_NSIG_WORDS) {						\
-	    case 4: set->sig[3] = op(set->sig[3]);			\
-		    set->sig[2] = op(set->sig[2]);			\
-	    case 2: set->sig[1] = op(set->sig[1]);			\
-	    case 1: set->sig[0] = op(set->sig[0]);			\
+	case 4:	set->sig[3] = op(set->sig[3]);				\
+		set->sig[2] = op(set->sig[2]);				\
+	case 2:	set->sig[1] = op(set->sig[1]);				\
+	case 1:	set->sig[0] = op(set->sig[0]);				\
 		    break;						\
-	    default:							\
-		_NSIG_WORDS_is_unsupported_size();			\
+	default:							\
+		BUILD_BUG();						\
 	}								\
 }
 
-- 
cgit v1.2.3


From 669280a152ce5144321c0e511498877383f34393 Mon Sep 17 00:00:00 2001
From: Baoquan He <bhe@redhat.com>
Date: Mon, 13 Oct 2014 15:53:40 -0700
Subject: kexec: take the segment adding out of locate_mem_hole functions

In locate_mem_hole functions, a memory hole is located and added as
kexec_segment.  But from the name of locate_mem_hole, it should only take
responsibility of searching a available memory hole to contain data of a
specified size.

So in this patch add a new field 'mem' into kexec_buf, then take that
kexec segment adding code out of locate_mem_hole_top_down and
locate_mem_hole_bottom_up.  This make clear of the functionality of
locate_mem_hole just like it declars to do.  And by this
locate_mem_hole_callback chould be used later if anyone want to locate a
memory hole for other use.

Meanwhile Vivek suggested opening code function __kexec_add_segment(),
that way we have to retreive ksegment pointer once and it is easy to read.
 So just do it in this patch and remove __kexec_add_segment() since no one
use it anymore.

Signed-off-by: Baoquan He <bhe@redhat.com>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
Cc: Eric W. Biederman <ebiederm@xmission.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kexec.h |  1 +
 kernel/kexec.c        | 29 ++++++++---------------------
 2 files changed, 9 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kexec.h b/include/linux/kexec.h
index 4b2a0e11cc5b..9d957b7ae095 100644
--- a/include/linux/kexec.h
+++ b/include/linux/kexec.h
@@ -178,6 +178,7 @@ struct kexec_buf {
 	struct kimage *image;
 	char *buffer;
 	unsigned long bufsz;
+	unsigned long mem;
 	unsigned long memsz;
 	unsigned long buf_align;
 	unsigned long buf_min;
diff --git a/kernel/kexec.c b/kernel/kexec.c
index 2bee072268d9..63bc3cdfb629 100644
--- a/kernel/kexec.c
+++ b/kernel/kexec.c
@@ -2016,22 +2016,6 @@ static int __init crash_save_vmcoreinfo_init(void)
 subsys_initcall(crash_save_vmcoreinfo_init);
 
 #ifdef CONFIG_KEXEC_FILE
-static int __kexec_add_segment(struct kimage *image, char *buf,
-			       unsigned long bufsz, unsigned long mem,
-			       unsigned long memsz)
-{
-	struct kexec_segment *ksegment;
-
-	ksegment = &image->segment[image->nr_segments];
-	ksegment->kbuf = buf;
-	ksegment->bufsz = bufsz;
-	ksegment->mem = mem;
-	ksegment->memsz = memsz;
-	image->nr_segments++;
-
-	return 0;
-}
-
 static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
 				    struct kexec_buf *kbuf)
 {
@@ -2064,8 +2048,7 @@ static int locate_mem_hole_top_down(unsigned long start, unsigned long end,
 	} while (1);
 
 	/* If we are here, we found a suitable memory range */
-	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
-			    kbuf->memsz);
+	kbuf->mem = temp_start;
 
 	/* Success, stop navigating through remaining System RAM ranges */
 	return 1;
@@ -2099,8 +2082,7 @@ static int locate_mem_hole_bottom_up(unsigned long start, unsigned long end,
 	} while (1);
 
 	/* If we are here, we found a suitable memory range */
-	__kexec_add_segment(image, kbuf->buffer, kbuf->bufsz, temp_start,
-			    kbuf->memsz);
+	kbuf->mem = temp_start;
 
 	/* Success, stop navigating through remaining System RAM ranges */
 	return 1;
@@ -2187,7 +2169,12 @@ int kexec_add_buffer(struct kimage *image, char *buffer, unsigned long bufsz,
 	}
 
 	/* Found a suitable memory range */
-	ksegment = &image->segment[image->nr_segments - 1];
+	ksegment = &image->segment[image->nr_segments];
+	ksegment->kbuf = kbuf->buffer;
+	ksegment->bufsz = kbuf->bufsz;
+	ksegment->mem = kbuf->mem;
+	ksegment->memsz = kbuf->memsz;
+	image->nr_segments++;
 	*load_addr = ksegment->mem;
 	return 0;
 }
-- 
cgit v1.2.3


From a841b65921a959c759da6b5c8d5dc21966b4cf86 Mon Sep 17 00:00:00 2001
From: Lai Jiangshan <laijs@cn.fujitsu.com>
Date: Mon, 13 Oct 2014 15:53:48 -0700
Subject: rbtree: add comment to rb_insert_augmented()

The comment is copied from Documentation/rbtree.txt, but this comment is
so important that it should also be in the code.

Signed-off-by: Lai Jiangshan <laijs@cn.fujitsu.com>
Acked-by: Michel Lespinasse <walken@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rbtree_augmented.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rbtree_augmented.h b/include/linux/rbtree_augmented.h
index fea49b5da12a..378c5ee75f78 100644
--- a/include/linux/rbtree_augmented.h
+++ b/include/linux/rbtree_augmented.h
@@ -43,6 +43,16 @@ struct rb_augment_callbacks {
 
 extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 	void (*augment_rotate)(struct rb_node *old, struct rb_node *new));
+/*
+ * Fixup the rbtree and update the augmented information when rebalancing.
+ *
+ * On insertion, the user must update the augmented information on the path
+ * leading to the inserted node, then call rb_link_node() as usual and
+ * rb_augment_inserted() instead of the usual rb_insert_color() call.
+ * If rb_augment_inserted() rebalances the rbtree, it will callback into
+ * a user provided function to update the augmented information on the
+ * affected subtrees.
+ */
 static inline void
 rb_insert_augmented(struct rb_node *node, struct rb_root *root,
 		    const struct rb_augment_callbacks *augment)
-- 
cgit v1.2.3


From 67cf13ceed89e2c1a967719e98624a20c48dfb5a Mon Sep 17 00:00:00 2001
From: Mike Travis <travis@sgi.com>
Date: Mon, 13 Oct 2014 15:54:03 -0700
Subject: x86: optimize resource lookups for ioremap

We have a large university system in the UK that is experiencing very long
delays modprobing the driver for a specific I/O device.  The delay is from
8-10 minutes per device and there are 31 devices in the system.  This 4 to
5 hour delay in starting up those I/O devices is very much a burden on the
customer.

There are two causes for requiring a restart/reload of the drivers.  First
is periodic preventive maintenance (PM) and the second is if any of the
devices experience a fatal error.  Both of these trigger this excessively
long delay in bringing the system back up to full capability.

The problem was tracked down to a very slow IOREMAP operation and the
excessively long ioresource lookup to insure that the user is not
attempting to ioremap RAM.  These patches provide a speed up to that
function.

The modprobe time appears to be affected quite a bit by previous activity
on the ioresource list, which I suspect is due to cache preloading.  While
the overall improvement is impacted by other overhead of starting the
devices, this drastically improves the modprobe time.

Also our system is considerably smaller so the percentages gained will not
be the same.  Best case improvement with the modprobe on our 20 device
smallish system was from 'real 5m51.913s' to 'real 0m18.275s'.

This patch (of 2):

Since the ioremap operation is verifying that the specified address range
is NOT RAM, it will search the entire ioresource list if the condition is
true.  To make matters worse, it does this one 4k page at a time.  For a
128M BAR region this is 32 passes to determine the entire region does not
contain any RAM addresses.

This patch provides another resource lookup function, region_is_ram, that
searches for the entire region specified, verifying that it is completely
contained within the resource region.  If it is found, then it is checked
to be RAM or not, within a single pass.

The return result reflects if it was found or not (-1), and whether it is
RAM (1) or not (0).  This allows the caller to fallback to the previous
page by page search if it was not found.

[akpm@linux-foundation.org: fix spellos and typos in comment]
Signed-off-by: Mike Travis <travis@sgi.com>
Acked-by: Alex Thorlton <athorlton@sgi.com>
Reviewed-by: Cliff Wickman <cpw@sgi.com>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: H. Peter Anvin <hpa@zytor.com>
Cc: Mark Salter <msalter@redhat.com>
Cc: Dave Young <dyoung@redhat.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h |  1 +
 kernel/resource.c  | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index fa0d74e06428..4cd45cb95e6d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -347,6 +347,7 @@ static inline int put_page_unless_one(struct page *page)
 }
 
 extern int page_is_ram(unsigned long pfn);
+extern int region_is_ram(resource_size_t phys_addr, unsigned long size);
 
 /* Support for virtually mapped pages */
 struct page *vmalloc_to_page(const void *addr);
diff --git a/kernel/resource.c b/kernel/resource.c
index 46322019ab7d..0bcebffc4e77 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -491,6 +491,42 @@ int __weak page_is_ram(unsigned long pfn)
 }
 EXPORT_SYMBOL_GPL(page_is_ram);
 
+/*
+ * Search for a resouce entry that fully contains the specified region.
+ * If found, return 1 if it is RAM, 0 if not.
+ * If not found, or region is not fully contained, return -1
+ *
+ * Used by the ioremap functions to ensure the user is not remapping RAM and is
+ * a vast speed up over walking through the resource table page by page.
+ */
+int region_is_ram(resource_size_t start, unsigned long size)
+{
+	struct resource *p;
+	resource_size_t end = start + size - 1;
+	int flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	const char *name = "System RAM";
+	int ret = -1;
+
+	read_lock(&resource_lock);
+	for (p = iomem_resource.child; p ; p = p->sibling) {
+		if (end < p->start)
+			continue;
+
+		if (p->start <= start && end <= p->end) {
+			/* resource fully contains region */
+			if ((p->flags != flags) || strcmp(p->name, name))
+				ret = 0;
+			else
+				ret = 1;
+			break;
+		}
+		if (p->end < start)
+			break;	/* not found */
+	}
+	read_unlock(&resource_lock);
+	return ret;
+}
+
 void __weak arch_remove_reservations(struct resource *avail)
 {
 }
-- 
cgit v1.2.3


From b0bfb63118612e3614cf77b115c00f895a42c96a Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Mon, 13 Oct 2014 15:54:27 -0700
Subject: lib: string: Make all calls to strnicmp into calls to strncasecmp

The previous patch made strnicmp into a wrapper for strncasecmp.

This patch makes all in-tree users of strnicmp call strncasecmp
directly, while still making sure that the strnicmp symbol can be used
by out-of-tree modules.  It should be considered a temporary hack until
all in-tree callers have been converted.

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string.h | 2 +-
 lib/string.c           | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/string.h b/include/linux/string.h
index d36977e029af..e6edfe51575a 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -41,7 +41,7 @@ extern int strcmp(const char *,const char *);
 extern int strncmp(const char *,const char *,__kernel_size_t);
 #endif
 #ifndef __HAVE_ARCH_STRNICMP
-extern int strnicmp(const char *, const char *, __kernel_size_t);
+#define strnicmp strncasecmp
 #endif
 #ifndef __HAVE_ARCH_STRCASECMP
 extern int strcasecmp(const char *s1, const char *s2);
diff --git a/lib/string.c b/lib/string.c
index 3181e267a033..2fc20aa06f84 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -59,6 +59,7 @@ int strncasecmp(const char *s1, const char *s2, size_t len)
 EXPORT_SYMBOL(strncasecmp);
 #endif
 #ifndef __HAVE_ARCH_STRNICMP
+#undef strnicmp
 int strnicmp(const char *s1, const char *s2, size_t len)
 {
 	return strncasecmp(s1, s2, len);
-- 
cgit v1.2.3


From 3db2e9cdc085144e243495137273e2318c53a82f Mon Sep 17 00:00:00 2001
From: Daniel Walter <dwalter@google.com>
Date: Mon, 13 Oct 2014 15:55:09 -0700
Subject: include/linux: remove strict_strto* definitions

Remove obsolete and unused strict_strto* functions

Signed-off-by: Daniel Walter <dwalter@google.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kernel.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 35c8ffb0136f..40728cf1c452 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -376,10 +376,6 @@ extern unsigned long simple_strtoul(const char *,char **,unsigned int);
 extern long simple_strtol(const char *,char **,unsigned int);
 extern unsigned long long simple_strtoull(const char *,char **,unsigned int);
 extern long long simple_strtoll(const char *,char **,unsigned int);
-#define strict_strtoul	kstrtoul
-#define strict_strtol	kstrtol
-#define strict_strtoull	kstrtoull
-#define strict_strtoll	kstrtoll
 
 extern int num_to_str(char *buf, int size, unsigned long long num);
 
-- 
cgit v1.2.3


From d295634e965ecacdb44c6760b3ca4eae08812715 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 13 Oct 2014 15:55:11 -0700
Subject: lib / string_helpers: move documentation to c-file

The introduced function string_escape_mem() is a kind of opposite to
string_unescape.  We have several users of such functionality each of
them created custom implementation.  The series contains clean up of
test suite, adding new call, and switching few users to use it via %*pE
specifier.

Test suite covers all of existing and most of potential use cases.

This patch (of 11):

The documentation of API belongs to c-file.  This patch moves it
accordingly.

There is no functional change.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "John W . Linville" <linville@tuxdriver.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string_helpers.h | 34 ----------------------------------
 lib/string_helpers.c           | 38 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 38 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 3eeee9672a4a..5a30f2a86239 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -20,40 +20,6 @@ int string_get_size(u64 size, enum string_size_units units,
 #define UNESCAPE_ANY		\
 	(UNESCAPE_SPACE | UNESCAPE_OCTAL | UNESCAPE_HEX | UNESCAPE_SPECIAL)
 
-/**
- * string_unescape - unquote characters in the given string
- * @src:	source buffer (escaped)
- * @dst:	destination buffer (unescaped)
- * @size:	size of the destination buffer (0 to unlimit)
- * @flags:	combination of the flags (bitwise OR):
- *	%UNESCAPE_SPACE:
- *		'\f' - form feed
- *		'\n' - new line
- *		'\r' - carriage return
- *		'\t' - horizontal tab
- *		'\v' - vertical tab
- *	%UNESCAPE_OCTAL:
- *		'\NNN' - byte with octal value NNN (1 to 3 digits)
- *	%UNESCAPE_HEX:
- *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
- *	%UNESCAPE_SPECIAL:
- *		'\"' - double quote
- *		'\\' - backslash
- *		'\a' - alert (BEL)
- *		'\e' - escape
- *	%UNESCAPE_ANY:
- *		all previous together
- *
- * Returns amount of characters processed to the destination buffer excluding
- * trailing '\0'.
- *
- * Because the size of the output will be the same as or less than the size of
- * the input, the transformation may be performed in place.
- *
- * Caller must provide valid source and destination pointers. Be aware that
- * destination buffer will always be NULL-terminated. Source string must be
- * NULL-terminated as well.
- */
 int string_unescape(char *src, char *dst, size_t size, unsigned int flags);
 
 static inline int string_unescape_inplace(char *buf, unsigned int flags)
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 29033f319aea..74ec60469640 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -168,6 +168,44 @@ static bool unescape_special(char **src, char **dst)
 	return true;
 }
 
+/**
+ * string_unescape - unquote characters in the given string
+ * @src:	source buffer (escaped)
+ * @dst:	destination buffer (unescaped)
+ * @size:	size of the destination buffer (0 to unlimit)
+ * @flags:	combination of the flags (bitwise OR):
+ *	%UNESCAPE_SPACE:
+ *		'\f' - form feed
+ *		'\n' - new line
+ *		'\r' - carriage return
+ *		'\t' - horizontal tab
+ *		'\v' - vertical tab
+ *	%UNESCAPE_OCTAL:
+ *		'\NNN' - byte with octal value NNN (1 to 3 digits)
+ *	%UNESCAPE_HEX:
+ *		'\xHH' - byte with hexadecimal value HH (1 to 2 digits)
+ *	%UNESCAPE_SPECIAL:
+ *		'\"' - double quote
+ *		'\\' - backslash
+ *		'\a' - alert (BEL)
+ *		'\e' - escape
+ *	%UNESCAPE_ANY:
+ *		all previous together
+ *
+ * Description:
+ * The function unquotes characters in the given string.
+ *
+ * Because the size of the output will be the same as or less than the size of
+ * the input, the transformation may be performed in place.
+ *
+ * Caller must provide valid source and destination pointers. Be aware that
+ * destination buffer will always be NULL-terminated. Source string must be
+ * NULL-terminated as well.
+ *
+ * Return:
+ * The amount of the characters processed to the destination buffer excluding
+ * trailing '\0' is returned.
+ */
 int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
 {
 	char *out = dst;
-- 
cgit v1.2.3


From c8250381c8272a9828fdd353171727b154fbd296 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 13 Oct 2014 15:55:16 -0700
Subject: lib / string_helpers: introduce string_escape_mem()

This is almost the opposite function to string_unescape().  Nevertheless
it handles \0 and could be used for any byte buffer.

The documentation is supplied together with the function prototype.

The test cases covers most of the scenarios and would be expanded later
on.

[akpm@linux-foundation.org: avoid 1k stack consumption]
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: "John W . Linville" <linville@tuxdriver.com>
Cc: Johannes Berg <johannes@sipsolutions.net>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Joe Perches <joe@perches.com>
Cc: Wu Fengguang <fengguang.wu@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/string_helpers.h |  31 +++++
 lib/string_helpers.c           | 274 +++++++++++++++++++++++++++++++++++++++++
 lib/test-string_helpers.c      | 240 +++++++++++++++++++++++++++++++++++-
 3 files changed, 541 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 5a30f2a86239..6eb567ac56bc 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -37,4 +37,35 @@ static inline int string_unescape_any_inplace(char *buf)
 	return string_unescape_any(buf, buf, 0);
 }
 
+#define ESCAPE_SPACE		0x01
+#define ESCAPE_SPECIAL		0x02
+#define ESCAPE_NULL		0x04
+#define ESCAPE_OCTAL		0x08
+#define ESCAPE_ANY		\
+	(ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_SPECIAL | ESCAPE_NULL)
+#define ESCAPE_NP		0x10
+#define ESCAPE_ANY_NP		(ESCAPE_ANY | ESCAPE_NP)
+#define ESCAPE_HEX		0x20
+
+int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
+		unsigned int flags, const char *esc);
+
+static inline int string_escape_mem_any_np(const char *src, size_t isz,
+		char **dst, size_t osz, const char *esc)
+{
+	return string_escape_mem(src, isz, dst, osz, ESCAPE_ANY_NP, esc);
+}
+
+static inline int string_escape_str(const char *src, char **dst, size_t sz,
+		unsigned int flags, const char *esc)
+{
+	return string_escape_mem(src, strlen(src), dst, sz, flags, esc);
+}
+
+static inline int string_escape_str_any_np(const char *src, char **dst,
+		size_t sz, const char *esc)
+{
+	return string_escape_str(src, dst, sz, ESCAPE_ANY_NP, esc);
+}
+
 #endif
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 74ec60469640..58b78ba57439 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -8,6 +8,8 @@
 #include <linux/math64.h>
 #include <linux/export.h>
 #include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/string.h>
 #include <linux/string_helpers.h>
 
 /**
@@ -240,3 +242,275 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags)
 	return out - dst;
 }
 EXPORT_SYMBOL(string_unescape);
+
+static int escape_passthrough(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+
+	if (*osz < 1)
+		return -ENOMEM;
+
+	*out++ = c;
+
+	*dst = out;
+	*osz -= 1;
+
+	return 1;
+}
+
+static int escape_space(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+	unsigned char to;
+
+	if (*osz < 2)
+		return -ENOMEM;
+
+	switch (c) {
+	case '\n':
+		to = 'n';
+		break;
+	case '\r':
+		to = 'r';
+		break;
+	case '\t':
+		to = 't';
+		break;
+	case '\v':
+		to = 'v';
+		break;
+	case '\f':
+		to = 'f';
+		break;
+	default:
+		return 0;
+	}
+
+	*out++ = '\\';
+	*out++ = to;
+
+	*dst = out;
+	*osz -= 2;
+
+	return 1;
+}
+
+static int escape_special(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+	unsigned char to;
+
+	if (*osz < 2)
+		return -ENOMEM;
+
+	switch (c) {
+	case '\\':
+		to = '\\';
+		break;
+	case '\a':
+		to = 'a';
+		break;
+	case '\e':
+		to = 'e';
+		break;
+	default:
+		return 0;
+	}
+
+	*out++ = '\\';
+	*out++ = to;
+
+	*dst = out;
+	*osz -= 2;
+
+	return 1;
+}
+
+static int escape_null(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+
+	if (*osz < 2)
+		return -ENOMEM;
+
+	if (c)
+		return 0;
+
+	*out++ = '\\';
+	*out++ = '0';
+
+	*dst = out;
+	*osz -= 2;
+
+	return 1;
+}
+
+static int escape_octal(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+
+	if (*osz < 4)
+		return -ENOMEM;
+
+	*out++ = '\\';
+	*out++ = ((c >> 6) & 0x07) + '0';
+	*out++ = ((c >> 3) & 0x07) + '0';
+	*out++ = ((c >> 0) & 0x07) + '0';
+
+	*dst = out;
+	*osz -= 4;
+
+	return 1;
+}
+
+static int escape_hex(unsigned char c, char **dst, size_t *osz)
+{
+	char *out = *dst;
+
+	if (*osz < 4)
+		return -ENOMEM;
+
+	*out++ = '\\';
+	*out++ = 'x';
+	*out++ = hex_asc_hi(c);
+	*out++ = hex_asc_lo(c);
+
+	*dst = out;
+	*osz -= 4;
+
+	return 1;
+}
+
+/**
+ * string_escape_mem - quote characters in the given memory buffer
+ * @src:	source buffer (unescaped)
+ * @isz:	source buffer size
+ * @dst:	destination buffer (escaped)
+ * @osz:	destination buffer size
+ * @flags:	combination of the flags (bitwise OR):
+ *	%ESCAPE_SPACE:
+ *		'\f' - form feed
+ *		'\n' - new line
+ *		'\r' - carriage return
+ *		'\t' - horizontal tab
+ *		'\v' - vertical tab
+ *	%ESCAPE_SPECIAL:
+ *		'\\' - backslash
+ *		'\a' - alert (BEL)
+ *		'\e' - escape
+ *	%ESCAPE_NULL:
+ *		'\0' - null
+ *	%ESCAPE_OCTAL:
+ *		'\NNN' - byte with octal value NNN (3 digits)
+ *	%ESCAPE_ANY:
+ *		all previous together
+ *	%ESCAPE_NP:
+ *		escape only non-printable characters (checked by isprint)
+ *	%ESCAPE_ANY_NP:
+ *		all previous together
+ *	%ESCAPE_HEX:
+ *		'\xHH' - byte with hexadecimal value HH (2 digits)
+ * @esc:	NULL-terminated string of characters any of which, if found in
+ *		the source, has to be escaped
+ *
+ * Description:
+ * The process of escaping byte buffer includes several parts. They are applied
+ * in the following sequence.
+ *	1. The character is matched to the printable class, if asked, and in
+ *	   case of match it passes through to the output.
+ *	2. The character is not matched to the one from @esc string and thus
+ *	   must go as is to the output.
+ *	3. The character is checked if it falls into the class given by @flags.
+ *	   %ESCAPE_OCTAL and %ESCAPE_HEX are going last since they cover any
+ *	   character. Note that they actually can't go together, otherwise
+ *	   %ESCAPE_HEX will be ignored.
+ *
+ * Caller must provide valid source and destination pointers. Be aware that
+ * destination buffer will not be NULL-terminated, thus caller have to append
+ * it if needs.
+ *
+ * Return:
+ * The amount of the characters processed to the destination buffer, or
+ * %-ENOMEM if the size of buffer is not enough to put an escaped character is
+ * returned.
+ *
+ * Even in the case of error @dst pointer will be updated to point to the byte
+ * after the last processed character.
+ */
+int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz,
+		      unsigned int flags, const char *esc)
+{
+	char *out = *dst, *p = out;
+	bool is_dict = esc && *esc;
+	int ret = 0;
+
+	while (isz--) {
+		unsigned char c = *src++;
+
+		/*
+		 * Apply rules in the following sequence:
+		 *	- the character is printable, when @flags has
+		 *	  %ESCAPE_NP bit set
+		 *	- the @esc string is supplied and does not contain a
+		 *	  character under question
+		 *	- the character doesn't fall into a class of symbols
+		 *	  defined by given @flags
+		 * In these cases we just pass through a character to the
+		 * output buffer.
+		 */
+		if ((flags & ESCAPE_NP && isprint(c)) ||
+		    (is_dict && !strchr(esc, c))) {
+			/* do nothing */
+		} else {
+			if (flags & ESCAPE_SPACE) {
+				ret = escape_space(c, &p, &osz);
+				if (ret < 0)
+					break;
+				if (ret > 0)
+					continue;
+			}
+
+			if (flags & ESCAPE_SPECIAL) {
+				ret = escape_special(c, &p, &osz);
+				if (ret < 0)
+					break;
+				if (ret > 0)
+					continue;
+			}
+
+			if (flags & ESCAPE_NULL) {
+				ret = escape_null(c, &p, &osz);
+				if (ret < 0)
+					break;
+				if (ret > 0)
+					continue;
+			}
+
+			/* ESCAPE_OCTAL and ESCAPE_HEX always go last */
+			if (flags & ESCAPE_OCTAL) {
+				ret = escape_octal(c, &p, &osz);
+				if (ret < 0)
+					break;
+				continue;
+			}
+			if (flags & ESCAPE_HEX) {
+				ret = escape_hex(c, &p, &osz);
+				if (ret < 0)
+					break;
+				continue;
+			}
+		}
+
+		ret = escape_passthrough(c, &p, &osz);
+		if (ret < 0)
+			break;
+	}
+
+	*dst = p;
+
+	if (ret < 0)
+		return ret;
+
+	return p - out;
+}
+EXPORT_SYMBOL(string_escape_mem);
diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c
index ac44c9245dcf..ab0d30e1e18f 100644
--- a/lib/test-string_helpers.c
+++ b/lib/test-string_helpers.c
@@ -5,6 +5,7 @@
 
 #include <linux/init.h>
 #include <linux/kernel.h>
+#include <linux/slab.h>
 #include <linux/module.h>
 #include <linux/random.h>
 #include <linux/string.h>
@@ -62,10 +63,14 @@ static const struct test_string strings[] __initconst = {
 static void __init test_string_unescape(const char *name, unsigned int flags,
 					bool inplace)
 {
-	char in[256];
-	char out_test[256];
-	char out_real[256];
-	int i, p = 0, q_test = 0, q_real = sizeof(out_real);
+	int q_real = 256;
+	char *in = kmalloc(q_real, GFP_KERNEL);
+	char *out_test = kmalloc(q_real, GFP_KERNEL);
+	char *out_real = kmalloc(q_real, GFP_KERNEL);
+	int i, p = 0, q_test = 0;
+
+	if (!in || !out_test || !out_real)
+		goto out;
 
 	for (i = 0; i < ARRAY_SIZE(strings); i++) {
 		const char *s = strings[i].in;
@@ -100,6 +105,223 @@ static void __init test_string_unescape(const char *name, unsigned int flags,
 
 	test_string_check_buf(name, flags, in, p - 1, out_real, q_real,
 			      out_test, q_test);
+out:
+	kfree(out_real);
+	kfree(out_test);
+	kfree(in);
+}
+
+struct test_string_1 {
+	const char *out;
+	unsigned int flags;
+};
+
+#define	TEST_STRING_2_MAX_S1		32
+struct test_string_2 {
+	const char *in;
+	struct test_string_1 s1[TEST_STRING_2_MAX_S1];
+};
+
+#define	TEST_STRING_2_DICT_0		NULL
+static const struct test_string_2 escape0[] __initconst = {{
+	.in = "\f\\ \n\r\t\v",
+	.s1 = {{
+		.out = "\\f\\ \\n\\r\\t\\v",
+		.flags = ESCAPE_SPACE,
+	},{
+		.out = "\\f\\134\\040\\n\\r\\t\\v",
+		.flags = ESCAPE_SPACE | ESCAPE_OCTAL,
+	},{
+		.out = "\\f\\x5c\\x20\\n\\r\\t\\v",
+		.flags = ESCAPE_SPACE | ESCAPE_HEX,
+	},{
+		/* terminator */
+	}},
+},{
+	.in = "\\h\\\"\a\e\\",
+	.s1 = {{
+		.out = "\\\\h\\\\\"\\a\\e\\\\",
+		.flags = ESCAPE_SPECIAL,
+	},{
+		.out = "\\\\\\150\\\\\\042\\a\\e\\\\",
+		.flags = ESCAPE_SPECIAL | ESCAPE_OCTAL,
+	},{
+		.out = "\\\\\\x68\\\\\\x22\\a\\e\\\\",
+		.flags = ESCAPE_SPECIAL | ESCAPE_HEX,
+	},{
+		/* terminator */
+	}},
+},{
+	.in = "\eb \\C\007\"\x90\r]",
+	.s1 = {{
+		.out = "\eb \\C\007\"\x90\\r]",
+		.flags = ESCAPE_SPACE,
+	},{
+		.out = "\\eb \\\\C\\a\"\x90\r]",
+		.flags = ESCAPE_SPECIAL,
+	},{
+		.out = "\\eb \\\\C\\a\"\x90\\r]",
+		.flags = ESCAPE_SPACE | ESCAPE_SPECIAL,
+	},{
+		.out = "\\033\\142\\040\\134\\103\\007\\042\\220\\015\\135",
+		.flags = ESCAPE_OCTAL,
+	},{
+		.out = "\\033\\142\\040\\134\\103\\007\\042\\220\\r\\135",
+		.flags = ESCAPE_SPACE | ESCAPE_OCTAL,
+	},{
+		.out = "\\e\\142\\040\\\\\\103\\a\\042\\220\\015\\135",
+		.flags = ESCAPE_SPECIAL | ESCAPE_OCTAL,
+	},{
+		.out = "\\e\\142\\040\\\\\\103\\a\\042\\220\\r\\135",
+		.flags = ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_OCTAL,
+	},{
+		.out = "\eb \\C\007\"\x90\r]",
+		.flags = ESCAPE_NP,
+	},{
+		.out = "\eb \\C\007\"\x90\\r]",
+		.flags = ESCAPE_SPACE | ESCAPE_NP,
+	},{
+		.out = "\\eb \\C\\a\"\x90\r]",
+		.flags = ESCAPE_SPECIAL | ESCAPE_NP,
+	},{
+		.out = "\\eb \\C\\a\"\x90\\r]",
+		.flags = ESCAPE_SPACE | ESCAPE_SPECIAL | ESCAPE_NP,
+	},{
+		.out = "\\033b \\C\\007\"\\220\\015]",
+		.flags = ESCAPE_OCTAL | ESCAPE_NP,
+	},{
+		.out = "\\033b \\C\\007\"\\220\\r]",
+		.flags = ESCAPE_SPACE | ESCAPE_OCTAL | ESCAPE_NP,
+	},{
+		.out = "\\eb \\C\\a\"\\220\\r]",
+		.flags = ESCAPE_SPECIAL | ESCAPE_SPACE | ESCAPE_OCTAL |
+			 ESCAPE_NP,
+	},{
+		.out = "\\x1bb \\C\\x07\"\\x90\\x0d]",
+		.flags = ESCAPE_NP | ESCAPE_HEX,
+	},{
+		/* terminator */
+	}},
+},{
+	/* terminator */
+}};
+
+#define	TEST_STRING_2_DICT_1		"b\\ \t\r"
+static const struct test_string_2 escape1[] __initconst = {{
+	.in = "\f\\ \n\r\t\v",
+	.s1 = {{
+		.out = "\f\\134\\040\n\\015\\011\v",
+		.flags = ESCAPE_OCTAL,
+	},{
+		.out = "\f\\x5c\\x20\n\\x0d\\x09\v",
+		.flags = ESCAPE_HEX,
+	},{
+		/* terminator */
+	}},
+},{
+	.in = "\\h\\\"\a\e\\",
+	.s1 = {{
+		.out = "\\134h\\134\"\a\e\\134",
+		.flags = ESCAPE_OCTAL,
+	},{
+		/* terminator */
+	}},
+},{
+	.in = "\eb \\C\007\"\x90\r]",
+	.s1 = {{
+		.out = "\e\\142\\040\\134C\007\"\x90\\015]",
+		.flags = ESCAPE_OCTAL,
+	},{
+		/* terminator */
+	}},
+},{
+	/* terminator */
+}};
+
+static __init const char *test_string_find_match(const struct test_string_2 *s2,
+						 unsigned int flags)
+{
+	const struct test_string_1 *s1 = s2->s1;
+	unsigned int i;
+
+	if (!flags)
+		return s2->in;
+
+	/* Test cases are NULL-aware */
+	flags &= ~ESCAPE_NULL;
+
+	/* ESCAPE_OCTAL has a higher priority */
+	if (flags & ESCAPE_OCTAL)
+		flags &= ~ESCAPE_HEX;
+
+	for (i = 0; i < TEST_STRING_2_MAX_S1 && s1->out; i++, s1++)
+		if (s1->flags == flags)
+			return s1->out;
+	return NULL;
+}
+
+static __init void test_string_escape(const char *name,
+				      const struct test_string_2 *s2,
+				      unsigned int flags, const char *esc)
+{
+	int q_real = 512;
+	char *out_test = kmalloc(q_real, GFP_KERNEL);
+	char *out_real = kmalloc(q_real, GFP_KERNEL);
+	char *in = kmalloc(256, GFP_KERNEL);
+	char *buf = out_real;
+	int p = 0, q_test = 0;
+
+	if (!out_test || !out_real || !in)
+		goto out;
+
+	for (; s2->in; s2++) {
+		const char *out;
+		int len;
+
+		/* NULL injection */
+		if (flags & ESCAPE_NULL) {
+			in[p++] = '\0';
+			out_test[q_test++] = '\\';
+			out_test[q_test++] = '0';
+		}
+
+		/* Don't try strings that have no output */
+		out = test_string_find_match(s2, flags);
+		if (!out)
+			continue;
+
+		/* Copy string to in buffer */
+		len = strlen(s2->in);
+		memcpy(&in[p], s2->in, len);
+		p += len;
+
+		/* Copy expected result for given flags */
+		len = strlen(out);
+		memcpy(&out_test[q_test], out, len);
+		q_test += len;
+	}
+
+	q_real = string_escape_mem(in, p, &buf, q_real, flags, esc);
+
+	test_string_check_buf(name, flags, in, p, out_real, q_real, out_test,
+			      q_test);
+out:
+	kfree(in);
+	kfree(out_real);
+	kfree(out_test);
+}
+
+static __init void test_string_escape_nomem(void)
+{
+	char *in = "\eb \\C\007\"\x90\r]";
+	char out[64], *buf = out;
+	int rc = -ENOMEM, ret;
+
+	ret = string_escape_str_any_np(in, &buf, strlen(in), NULL);
+	if (ret == rc)
+		return;
+
+	pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc);
 }
 
 static int __init test_string_helpers_init(void)
@@ -112,6 +334,16 @@ static int __init test_string_helpers_init(void)
 	test_string_unescape("unescape inplace",
 			     get_random_int() % (UNESCAPE_ANY + 1), true);
 
+	/* Without dictionary */
+	for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
+		test_string_escape("escape 0", escape0, i, TEST_STRING_2_DICT_0);
+
+	/* With dictionary */
+	for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++)
+		test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1);
+
+	test_string_escape_nomem();
+
 	return -EINVAL;
 }
 module_init(test_string_helpers_init);
-- 
cgit v1.2.3


From 6e7458a6f074c71e74cda31c483114e65ea0f570 Mon Sep 17 00:00:00 2001
From: Ulrich Obergfell <uobergfe@redhat.com>
Date: Mon, 13 Oct 2014 15:55:35 -0700
Subject: kernel/watchdog.c: control hard lockup detection default

In some cases we don't want hard lockup detection enabled by default.
An example is when running as a guest.  Introduce

  watchdog_enable_hardlockup_detector(bool)

allowing those cases to disable hard lockup detection.  This must be
executed early by the boot processor from e.g.  smp_prepare_boot_cpu, in
order to allow kernel command line arguments to override it, as well as
to avoid hard lockup detection being enabled before we've had a chance
to indicate that it's unwanted.  In summary,

  initial boot:					default=enabled
  smp_prepare_boot_cpu
    watchdog_enable_hardlockup_detector(false):	default=disabled
  cmdline has 'nmi_watchdog=1':			default=enabled

The running kernel still has the ability to enable/disable at any time
with /proc/sys/kernel/nmi_watchdog us usual.  However even when the
default has been overridden /proc/sys/kernel/nmi_watchdog will initially
show '1'.  To truly turn it on one must disable/enable it, i.e.

  echo 0 > /proc/sys/kernel/nmi_watchdog
  echo 1 > /proc/sys/kernel/nmi_watchdog

This patch will be immediately useful for KVM with the next patch of this
series.  Other hypervisor guest types may find it useful as well.

[akpm@linux-foundation.org: fix build]
[dzickus@redhat.com: fix compile issues on sparc]
Signed-off-by: Ulrich Obergfell <uobergfe@redhat.com>
Signed-off-by: Andrew Jones <drjones@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Signed-off-by: Don Zickus <dzickus@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/nmi.h | 13 +++++++++++++
 kernel/watchdog.c   | 50 ++++++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 61 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 1d2a6ab6b8bb..9b2022ab4d85 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -24,6 +24,19 @@ static inline void touch_nmi_watchdog(void)
 }
 #endif
 
+#if defined(CONFIG_HARDLOCKUP_DETECTOR)
+extern void watchdog_enable_hardlockup_detector(bool val);
+extern bool watchdog_hardlockup_detector_is_enabled(void);
+#else
+static inline void watchdog_enable_hardlockup_detector(bool val)
+{
+}
+static inline bool watchdog_hardlockup_detector_is_enabled(void)
+{
+	return true;
+}
+#endif
+
 /*
  * Create trigger_all_cpu_backtrace() out of the arch-provided
  * base function. Return whether such support was available,
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index ff7fd80bef99..49e9537f3673 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -59,6 +59,25 @@ static unsigned long soft_lockup_nmi_warn;
 static int hardlockup_panic =
 			CONFIG_BOOTPARAM_HARDLOCKUP_PANIC_VALUE;
 
+static bool hardlockup_detector_enabled = true;
+/*
+ * We may not want to enable hard lockup detection by default in all cases,
+ * for example when running the kernel as a guest on a hypervisor. In these
+ * cases this function can be called to disable hard lockup detection. This
+ * function should only be executed once by the boot processor before the
+ * kernel command line parameters are parsed, because otherwise it is not
+ * possible to override this in hardlockup_panic_setup().
+ */
+void watchdog_enable_hardlockup_detector(bool val)
+{
+	hardlockup_detector_enabled = val;
+}
+
+bool watchdog_hardlockup_detector_is_enabled(void)
+{
+	return hardlockup_detector_enabled;
+}
+
 static int __init hardlockup_panic_setup(char *str)
 {
 	if (!strncmp(str, "panic", 5))
@@ -67,6 +86,14 @@ static int __init hardlockup_panic_setup(char *str)
 		hardlockup_panic = 0;
 	else if (!strncmp(str, "0", 1))
 		watchdog_user_enabled = 0;
+	else if (!strncmp(str, "1", 1) || !strncmp(str, "2", 1)) {
+		/*
+		 * Setting 'nmi_watchdog=1' or 'nmi_watchdog=2' (legacy option)
+		 * has the same effect.
+		 */
+		watchdog_user_enabled = 1;
+		watchdog_enable_hardlockup_detector(true);
+	}
 	return 1;
 }
 __setup("nmi_watchdog=", hardlockup_panic_setup);
@@ -465,6 +492,15 @@ static int watchdog_nmi_enable(unsigned int cpu)
 	struct perf_event_attr *wd_attr;
 	struct perf_event *event = per_cpu(watchdog_ev, cpu);
 
+	/*
+	 * Some kernels need to default hard lockup detection to
+	 * 'disabled', for example a guest on a hypervisor.
+	 */
+	if (!watchdog_hardlockup_detector_is_enabled()) {
+		event = ERR_PTR(-ENOENT);
+		goto handle_err;
+	}
+
 	/* is it already setup and enabled? */
 	if (event && event->state > PERF_EVENT_STATE_OFF)
 		goto out;
@@ -479,6 +515,7 @@ static int watchdog_nmi_enable(unsigned int cpu)
 	/* Try to register using hardware perf events */
 	event = perf_event_create_kernel_counter(wd_attr, cpu, NULL, watchdog_overflow_callback, NULL);
 
+handle_err:
 	/* save cpu0 error for future comparision */
 	if (cpu == 0 && IS_ERR(event))
 		cpu0_err = PTR_ERR(event);
@@ -624,11 +661,13 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	int err, old_thresh, old_enabled;
+	bool old_hardlockup;
 	static DEFINE_MUTEX(watchdog_proc_mutex);
 
 	mutex_lock(&watchdog_proc_mutex);
 	old_thresh = ACCESS_ONCE(watchdog_thresh);
 	old_enabled = ACCESS_ONCE(watchdog_user_enabled);
+	old_hardlockup = watchdog_hardlockup_detector_is_enabled();
 
 	err = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (err || !write)
@@ -640,15 +679,22 @@ int proc_dowatchdog(struct ctl_table *table, int write,
 	 * disabled. The 'watchdog_running' variable check in
 	 * watchdog_*_all_cpus() function takes care of this.
 	 */
-	if (watchdog_user_enabled && watchdog_thresh)
+	if (watchdog_user_enabled && watchdog_thresh) {
+		/*
+		 * Prevent a change in watchdog_thresh accidentally overriding
+		 * the enablement of the hardlockup detector.
+		 */
+		if (watchdog_user_enabled != old_enabled)
+			watchdog_enable_hardlockup_detector(true);
 		err = watchdog_enable_all_cpus(old_thresh != watchdog_thresh);
-	else
+	} else
 		watchdog_disable_all_cpus();
 
 	/* Restore old values on failure */
 	if (err) {
 		watchdog_thresh = old_thresh;
 		watchdog_user_enabled = old_enabled;
+		watchdog_enable_hardlockup_detector(old_hardlockup);
 	}
 out:
 	mutex_unlock(&watchdog_proc_mutex);
-- 
cgit v1.2.3


From 63a12d9d01831208a47f5c0fbbf93f503d1fb162 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Mon, 13 Oct 2014 15:55:44 -0700
Subject: kernel/param: consolidate __{start,stop}___param[] in
 <linux/moduleparam.h>

Consolidate the various external const and non-const declarations of
__start___param[] and __stop___param in <linux/moduleparam.h>.  This
requires making a few struct kernel_param pointers in kernel/params.c
const.

Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Acked-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/moduleparam.h | 2 ++
 init/main.c                 | 2 --
 kernel/params.c             | 7 +++----
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h
index b43f4752304e..1c9effa25e26 100644
--- a/include/linux/moduleparam.h
+++ b/include/linux/moduleparam.h
@@ -78,6 +78,8 @@ struct kernel_param {
 	};
 };
 
+extern const struct kernel_param __start___param[], __stop___param[];
+
 /* Special one for strings we want to copy into */
 struct kparam_string {
 	unsigned int maxlen;
diff --git a/init/main.c b/init/main.c
index 89ec862da2d4..800a0daede7e 100644
--- a/init/main.c
+++ b/init/main.c
@@ -501,7 +501,6 @@ asmlinkage __visible void __init start_kernel(void)
 {
 	char *command_line;
 	char *after_dashes;
-	extern const struct kernel_param __start___param[], __stop___param[];
 
 	/*
 	 * Need to run as early as possible, to initialize the
@@ -844,7 +843,6 @@ static char *initcall_level_names[] __initdata = {
 
 static void __init do_initcall_level(int level)
 {
-	extern const struct kernel_param __start___param[], __stop___param[];
 	initcall_t *fn;
 
 	strcpy(initcall_command_line, saved_command_line);
diff --git a/kernel/params.c b/kernel/params.c
index 041b5899d5e2..db97b791390f 100644
--- a/kernel/params.c
+++ b/kernel/params.c
@@ -19,6 +19,7 @@
 #include <linux/string.h>
 #include <linux/errno.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/device.h>
 #include <linux/err.h>
 #include <linux/slab.h>
@@ -513,8 +514,6 @@ EXPORT_SYMBOL(param_ops_string);
 #define to_module_attr(n) container_of(n, struct module_attribute, attr)
 #define to_module_kobject(n) container_of(n, struct module_kobject, kobj)
 
-extern struct kernel_param __start___param[], __stop___param[];
-
 struct param_attribute
 {
 	struct module_attribute mattr;
@@ -774,7 +773,7 @@ static struct module_kobject * __init locate_module_kobject(const char *name)
 }
 
 static void __init kernel_add_sysfs_param(const char *name,
-					  struct kernel_param *kparam,
+					  const struct kernel_param *kparam,
 					  unsigned int name_skip)
 {
 	struct module_kobject *mk;
@@ -809,7 +808,7 @@ static void __init kernel_add_sysfs_param(const char *name,
  */
 static void __init param_sysfs_builtin(void)
 {
-	struct kernel_param *kp;
+	const struct kernel_param *kp;
 	unsigned int name_len;
 	char modname[MODULE_NAME_LEN];
 
-- 
cgit v1.2.3


From 64e455079e1bd7787cc47be30b7f601ce682a5f6 Mon Sep 17 00:00:00 2001
From: Peter Feiner <pfeiner@google.com>
Date: Mon, 13 Oct 2014 15:55:46 -0700
Subject: mm: softdirty: enable write notifications on VMAs after VM_SOFTDIRTY
 cleared

For VMAs that don't want write notifications, PTEs created for read faults
have their write bit set.  If the read fault happens after VM_SOFTDIRTY is
cleared, then the PTE's softdirty bit will remain clear after subsequent
writes.

Here's a simple code snippet to demonstrate the bug:

  char* m = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE,
                 MAP_ANONYMOUS | MAP_SHARED, -1, 0);
  system("echo 4 > /proc/$PPID/clear_refs"); /* clear VM_SOFTDIRTY */
  assert(*m == '\0');     /* new PTE allows write access */
  assert(!soft_dirty(x));
  *m = 'x';               /* should dirty the page */
  assert(soft_dirty(x));  /* fails */

With this patch, write notifications are enabled when VM_SOFTDIRTY is
cleared.  Furthermore, to avoid unnecessary faults, write notifications
are disabled when VM_SOFTDIRTY is set.

As a side effect of enabling and disabling write notifications with
care, this patch fixes a bug in mprotect where vm_page_prot bits set by
drivers were zapped on mprotect.  An analogous bug was fixed in mmap by
commit c9d0bf241451 ("mm: uncached vma support with writenotify").

Signed-off-by: Peter Feiner <pfeiner@google.com>
Reported-by: Peter Feiner <pfeiner@google.com>
Suggested-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Cc: Cyrill Gorcunov <gorcunov@openvz.org>
Cc: Pavel Emelyanov <xemul@parallels.com>
Cc: Jamie Liu <jamieliu@google.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/proc/task_mmu.c            | 19 +++++++++++++-----
 include/asm-generic/pgtable.h | 14 ++++++++++++++
 include/linux/mm.h            |  5 +++++
 mm/memory.c                   |  3 ++-
 mm/mmap.c                     | 45 +++++++++++++++++++++++++++----------------
 mm/mprotect.c                 | 20 +++++--------------
 6 files changed, 68 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index b7a7dc963a35..4e0388cffe3d 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -827,8 +827,21 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 			.private = &cp,
 		};
 		down_read(&mm->mmap_sem);
-		if (type == CLEAR_REFS_SOFT_DIRTY)
+		if (type == CLEAR_REFS_SOFT_DIRTY) {
+			for (vma = mm->mmap; vma; vma = vma->vm_next) {
+				if (!(vma->vm_flags & VM_SOFTDIRTY))
+					continue;
+				up_read(&mm->mmap_sem);
+				down_write(&mm->mmap_sem);
+				for (vma = mm->mmap; vma; vma = vma->vm_next) {
+					vma->vm_flags &= ~VM_SOFTDIRTY;
+					vma_set_page_prot(vma);
+				}
+				downgrade_write(&mm->mmap_sem);
+				break;
+			}
 			mmu_notifier_invalidate_range_start(mm, 0, -1);
+		}
 		for (vma = mm->mmap; vma; vma = vma->vm_next) {
 			cp.vma = vma;
 			if (is_vm_hugetlb_page(vma))
@@ -848,10 +861,6 @@ static ssize_t clear_refs_write(struct file *file, const char __user *buf,
 				continue;
 			if (type == CLEAR_REFS_MAPPED && !vma->vm_file)
 				continue;
-			if (type == CLEAR_REFS_SOFT_DIRTY) {
-				if (vma->vm_flags & VM_SOFTDIRTY)
-					vma->vm_flags &= ~VM_SOFTDIRTY;
-			}
 			walk_page_range(vma->vm_start, vma->vm_end,
 					&clear_refs_walk);
 		}
diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h
index 081ff8826bf6..752e30d63904 100644
--- a/include/asm-generic/pgtable.h
+++ b/include/asm-generic/pgtable.h
@@ -253,6 +253,20 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b)
 #define pgprot_device pgprot_noncached
 #endif
 
+#ifndef pgprot_modify
+#define pgprot_modify pgprot_modify
+static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
+{
+	if (pgprot_val(oldprot) == pgprot_val(pgprot_noncached(oldprot)))
+		newprot = pgprot_noncached(newprot);
+	if (pgprot_val(oldprot) == pgprot_val(pgprot_writecombine(oldprot)))
+		newprot = pgprot_writecombine(newprot);
+	if (pgprot_val(oldprot) == pgprot_val(pgprot_device(oldprot)))
+		newprot = pgprot_device(newprot);
+	return newprot;
+}
+#endif
+
 /*
  * When walking page tables, get the address of the next boundary,
  * or the end address of the range if that comes earlier.  Although no
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 4cd45cb95e6d..02d11ee7f19d 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1974,11 +1974,16 @@ static inline struct vm_area_struct *find_exact_vma(struct mm_struct *mm,
 
 #ifdef CONFIG_MMU
 pgprot_t vm_get_page_prot(unsigned long vm_flags);
+void vma_set_page_prot(struct vm_area_struct *vma);
 #else
 static inline pgprot_t vm_get_page_prot(unsigned long vm_flags)
 {
 	return __pgprot(0);
 }
+static inline void vma_set_page_prot(struct vm_area_struct *vma)
+{
+	vma->vm_page_prot = vm_get_page_prot(vma->vm_flags);
+}
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
diff --git a/mm/memory.c b/mm/memory.c
index e229970e4223..1cc6bfbd872e 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2053,7 +2053,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,
 	old_page = vm_normal_page(vma, address, orig_pte);
 	if (!old_page) {
 		/*
-		 * VM_MIXEDMAP !pfn_valid() case
+		 * VM_MIXEDMAP !pfn_valid() case, or VM_SOFTDIRTY clear on a
+		 * VM_PFNMAP VMA.
 		 *
 		 * We should not cow pages in a shared writeable mapping.
 		 * Just mark the pages writable as we can't do any dirty
diff --git a/mm/mmap.c b/mm/mmap.c
index 93d28c7e5420..7f855206e7fb 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -89,6 +89,25 @@ pgprot_t vm_get_page_prot(unsigned long vm_flags)
 }
 EXPORT_SYMBOL(vm_get_page_prot);
 
+static pgprot_t vm_pgprot_modify(pgprot_t oldprot, unsigned long vm_flags)
+{
+	return pgprot_modify(oldprot, vm_get_page_prot(vm_flags));
+}
+
+/* Update vma->vm_page_prot to reflect vma->vm_flags. */
+void vma_set_page_prot(struct vm_area_struct *vma)
+{
+	unsigned long vm_flags = vma->vm_flags;
+
+	vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot, vm_flags);
+	if (vma_wants_writenotify(vma)) {
+		vm_flags &= ~VM_SHARED;
+		vma->vm_page_prot = vm_pgprot_modify(vma->vm_page_prot,
+						     vm_flags);
+	}
+}
+
+
 int sysctl_overcommit_memory __read_mostly = OVERCOMMIT_GUESS;  /* heuristic overcommit */
 int sysctl_overcommit_ratio __read_mostly = 50;	/* default is 50% */
 unsigned long sysctl_overcommit_kbytes __read_mostly;
@@ -1475,11 +1494,16 @@ int vma_wants_writenotify(struct vm_area_struct *vma)
 	if (vma->vm_ops && vma->vm_ops->page_mkwrite)
 		return 1;
 
-	/* The open routine did something to the protections already? */
+	/* The open routine did something to the protections that pgprot_modify
+	 * won't preserve? */
 	if (pgprot_val(vma->vm_page_prot) !=
-	    pgprot_val(vm_get_page_prot(vm_flags)))
+	    pgprot_val(vm_pgprot_modify(vma->vm_page_prot, vm_flags)))
 		return 0;
 
+	/* Do we need to track softdirty? */
+	if (IS_ENABLED(CONFIG_MEM_SOFT_DIRTY) && !(vm_flags & VM_SOFTDIRTY))
+		return 1;
+
 	/* Specialty mapping? */
 	if (vm_flags & VM_PFNMAP)
 		return 0;
@@ -1615,21 +1639,6 @@ munmap_back:
 			goto free_vma;
 	}
 
-	if (vma_wants_writenotify(vma)) {
-		pgprot_t pprot = vma->vm_page_prot;
-
-		/* Can vma->vm_page_prot have changed??
-		 *
-		 * Answer: Yes, drivers may have changed it in their
-		 *         f_op->mmap method.
-		 *
-		 * Ensures that vmas marked as uncached stay that way.
-		 */
-		vma->vm_page_prot = vm_get_page_prot(vm_flags & ~VM_SHARED);
-		if (pgprot_val(pprot) == pgprot_val(pgprot_noncached(pprot)))
-			vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot);
-	}
-
 	vma_link(mm, vma, prev, rb_link, rb_parent);
 	/* Once vma denies write, undo our temporary denial count */
 	if (file) {
@@ -1663,6 +1672,8 @@ out:
 	 */
 	vma->vm_flags |= VM_SOFTDIRTY;
 
+	vma_set_page_prot(vma);
+
 	return addr;
 
 unmap_and_free_vma:
diff --git a/mm/mprotect.c b/mm/mprotect.c
index c43d557941f8..ace93454ce8e 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -29,13 +29,6 @@
 #include <asm/cacheflush.h>
 #include <asm/tlbflush.h>
 
-#ifndef pgprot_modify
-static inline pgprot_t pgprot_modify(pgprot_t oldprot, pgprot_t newprot)
-{
-	return newprot;
-}
-#endif
-
 /*
  * For a prot_numa update we only hold mmap_sem for read so there is a
  * potential race with faulting where a pmd was temporarily none. This
@@ -93,7 +86,9 @@ static unsigned long change_pte_range(struct vm_area_struct *vma, pmd_t *pmd,
 				 * Avoid taking write faults for pages we
 				 * know to be dirty.
 				 */
-				if (dirty_accountable && pte_dirty(ptent))
+				if (dirty_accountable && pte_dirty(ptent) &&
+				    (pte_soft_dirty(ptent) ||
+				     !(vma->vm_flags & VM_SOFTDIRTY)))
 					ptent = pte_mkwrite(ptent);
 				ptep_modify_prot_commit(mm, addr, pte, ptent);
 				updated = true;
@@ -320,13 +315,8 @@ success:
 	 * held in write mode.
 	 */
 	vma->vm_flags = newflags;
-	vma->vm_page_prot = pgprot_modify(vma->vm_page_prot,
-					  vm_get_page_prot(newflags));
-
-	if (vma_wants_writenotify(vma)) {
-		vma->vm_page_prot = vm_get_page_prot(newflags & ~VM_SHARED);
-		dirty_accountable = 1;
-	}
+	dirty_accountable = vma_wants_writenotify(vma);
+	vma_set_page_prot(vma);
 
 	change_protection(vma, start, end, vma->vm_page_prot,
 			  dirty_accountable, 0);
-- 
cgit v1.2.3


From e19a8a0ad2d255316830ead05b59c5a704434cbb Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 14 Oct 2014 09:00:44 -0600
Subject: block: Remove REQ_KERNEL

REQ_KERNEL is no longer used. Remove it and drop the redundant uio
argument to nfs_file_direct_{read,write}.

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Cc: Christoph Hellwig <hch@infradead.org>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 fs/nfs/direct.c           | 12 +++++-------
 fs/nfs/file.c             |  4 ++--
 include/linux/blk_types.h |  2 --
 include/linux/fs.h        |  2 --
 include/linux/nfs_fs.h    |  4 ++--
 5 files changed, 9 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 65ef6e00deee..891f7dd8cbd6 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -222,11 +222,9 @@ ssize_t nfs_direct_IO(int rw, struct kiocb *iocb, struct iov_iter *iter, loff_t
 #else
 	VM_BUG_ON(iocb->ki_nbytes != PAGE_SIZE);
 
-	if (rw == READ || rw == KERNEL_READ)
-		return nfs_file_direct_read(iocb, iter, pos,
-				rw == READ ? true : false);
-	return nfs_file_direct_write(iocb, iter, pos,
-				rw == WRITE ? true : false);
+	if (rw == READ)
+		return nfs_file_direct_read(iocb, iter, pos);
+	return nfs_file_direct_write(iocb, iter, pos);
 #endif /* CONFIG_NFS_SWAP */
 }
 
@@ -512,7 +510,7 @@ static ssize_t nfs_direct_read_schedule_iovec(struct nfs_direct_req *dreq,
  * cache.
  */
 ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter,
-				loff_t pos, bool uio)
+				loff_t pos)
 {
 	struct file *file = iocb->ki_filp;
 	struct address_space *mapping = file->f_mapping;
@@ -893,7 +891,7 @@ static ssize_t nfs_direct_write_schedule_iovec(struct nfs_direct_req *dreq,
  * is no atomic O_APPEND write facility in the NFS protocol.
  */
 ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter,
-				loff_t pos, bool uio)
+				loff_t pos)
 {
 	ssize_t result = -EINVAL;
 	struct file *file = iocb->ki_filp;
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 524dd80d1898..3b42cb86218e 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -171,7 +171,7 @@ nfs_file_read(struct kiocb *iocb, struct iov_iter *to)
 	ssize_t result;
 
 	if (iocb->ki_filp->f_flags & O_DIRECT)
-		return nfs_file_direct_read(iocb, to, iocb->ki_pos, true);
+		return nfs_file_direct_read(iocb, to, iocb->ki_pos);
 
 	dprintk("NFS: read(%pD2, %zu@%lu)\n",
 		iocb->ki_filp,
@@ -648,7 +648,7 @@ ssize_t nfs_file_write(struct kiocb *iocb, struct iov_iter *from)
 		return result;
 
 	if (file->f_flags & O_DIRECT)
-		return nfs_file_direct_write(iocb, from, pos, true);
+		return nfs_file_direct_write(iocb, from, pos);
 
 	dprintk("NFS: write(%pD2, %zu@%Ld)\n",
 		file, count, (long long) pos);
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 38bc008e4503..445d59231bc4 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -187,7 +187,6 @@ enum rq_flag_bits {
 	__REQ_FLUSH_SEQ,	/* request for flush sequence */
 	__REQ_IO_STAT,		/* account I/O stat */
 	__REQ_MIXED_MERGE,	/* merge of different types, fail separately */
-	__REQ_KERNEL, 		/* direct IO to kernel pages */
 	__REQ_PM,		/* runtime pm request */
 	__REQ_HASHED,		/* on IO scheduler merge hash */
 	__REQ_MQ_INFLIGHT,	/* track inflight for MQ */
@@ -241,7 +240,6 @@ enum rq_flag_bits {
 #define REQ_IO_STAT		(1ULL << __REQ_IO_STAT)
 #define REQ_MIXED_MERGE		(1ULL << __REQ_MIXED_MERGE)
 #define REQ_SECURE		(1ULL << __REQ_SECURE)
-#define REQ_KERNEL		(1ULL << __REQ_KERNEL)
 #define REQ_PM			(1ULL << __REQ_PM)
 #define REQ_HASHED		(1ULL << __REQ_HASHED)
 #define REQ_MQ_INFLIGHT		(1ULL << __REQ_MQ_INFLIGHT)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 94187721ad41..9b5bc1cacb8e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -192,8 +192,6 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define READ			0
 #define WRITE			RW_MASK
 #define READA			RWA_MASK
-#define KERNEL_READ		(READ|REQ_KERNEL)
-#define KERNEL_WRITE		(WRITE|REQ_KERNEL)
 
 #define READ_SYNC		(READ | REQ_SYNC)
 #define WRITE_SYNC		(WRITE | REQ_SYNC | REQ_NOIDLE)
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index 5180a7ededec..e6e1c4e0d7f3 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -464,10 +464,10 @@ extern int nfs3_removexattr (struct dentry *, const char *name);
 extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t);
 extern ssize_t nfs_file_direct_read(struct kiocb *iocb,
 			struct iov_iter *iter,
-			loff_t pos, bool uio);
+			loff_t pos);
 extern ssize_t nfs_file_direct_write(struct kiocb *iocb,
 			struct iov_iter *iter,
-			loff_t pos, bool uio);
+			loff_t pos);
 
 /*
  * linux/fs/nfs/dir.c
-- 
cgit v1.2.3


From 31eff81e94472ddb7549509bf4b6e93e1f6f7dc9 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Fri, 10 Oct 2014 23:10:47 +0200
Subject: skbuff: fix ftrace handling in skb_unshare

If the skb is not dropped afterwards we should run consume_skb instead
kfree_skb. Inside of function skb_unshare we do always a kfree_skb,
doesn't depend if skb_copy failed or was successful.

This patch switch this behaviour like skb_share_check, if allocation of
sk_buff failed we use kfree_skb otherwise consume_skb.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3ab0749d6875..a59d9343c25b 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1203,7 +1203,12 @@ static inline struct sk_buff *skb_unshare(struct sk_buff *skb,
 	might_sleep_if(pri & __GFP_WAIT);
 	if (skb_cloned(skb)) {
 		struct sk_buff *nskb = skb_copy(skb, pri);
-		kfree_skb(skb);	/* Free our shared copy */
+
+		/* Free our shared copy */
+		if (likely(nskb))
+			consume_skb(skb);
+		else
+			kfree_skb(skb);
 		skb = nskb;
 	}
 	return skb;
-- 
cgit v1.2.3


From 9ea8aa8d5087529210553114b7bc4bf4374ace8f Mon Sep 17 00:00:00 2001
From: Tilman Schmidt <tilman@imap.cc>
Date: Sat, 11 Oct 2014 13:46:30 +0200
Subject: isdn/capi: correct capi20_manufacturer argument type mismatch

Function capi20_manufacturer() is declared with unsigned int cmd
argument but called with unsigned long.
Fix by correcting the function prototype since the actual argument
is part of the user visible API.

Spotted with Coverity.

Signed-off-by: Tilman Schmidt <tilman@imap.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/capi/kcapi.c  | 4 ++--
 include/linux/kernelcapi.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c
index c123709acf82..823f6985b260 100644
--- a/drivers/isdn/capi/kcapi.c
+++ b/drivers/isdn/capi/kcapi.c
@@ -1184,7 +1184,7 @@ static int old_capi_manufacturer(unsigned int cmd, void __user *data)
  * Return value: CAPI result code
  */
 
-int capi20_manufacturer(unsigned int cmd, void __user *data)
+int capi20_manufacturer(unsigned long cmd, void __user *data)
 {
 	struct capi_ctr *ctr;
 	int retval;
@@ -1259,7 +1259,7 @@ int capi20_manufacturer(unsigned int cmd, void __user *data)
 	}
 
 	default:
-		printk(KERN_ERR "kcapi: manufacturer command %d unknown.\n",
+		printk(KERN_ERR "kcapi: manufacturer command %lu unknown.\n",
 		       cmd);
 		break;
 
diff --git a/include/linux/kernelcapi.h b/include/linux/kernelcapi.h
index 9be37da93680..e985ba679c4a 100644
--- a/include/linux/kernelcapi.h
+++ b/include/linux/kernelcapi.h
@@ -41,7 +41,7 @@ u16 capi20_get_manufacturer(u32 contr, u8 buf[CAPI_MANUFACTURER_LEN]);
 u16 capi20_get_version(u32 contr, struct capi_version *verp);
 u16 capi20_get_serial(u32 contr, u8 serial[CAPI_SERIAL_LEN]);
 u16 capi20_get_profile(u32 contr, struct capi_profile *profp);
-int capi20_manufacturer(unsigned int cmd, void __user *data);
+int capi20_manufacturer(unsigned long cmd, void __user *data);
 
 #define CAPICTR_UP			0
 #define CAPICTR_DOWN			1
-- 
cgit v1.2.3


From e4339d28f640a7c0d92903bcf389a2dfa281270d Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 16 Sep 2014 17:50:45 +0800
Subject: libceph: reference counting pagelist

this allow pagelist to present data that may be sent multiple times.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 fs/ceph/mds_client.c          | 1 -
 include/linux/ceph/pagelist.h | 5 ++++-
 net/ceph/messenger.c          | 4 +---
 net/ceph/pagelist.c           | 7 +++++--
 4 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 5474feb77743..b4430ce1b3f6 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2865,7 +2865,6 @@ fail:
 	mutex_unlock(&session->s_mutex);
 fail_nomsg:
 	ceph_pagelist_release(pagelist);
-	kfree(pagelist);
 fail_nopagelist:
 	pr_err("error %d preparing reconnect for mds%d\n", err, mds);
 	return;
diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 9660d6b0a35d..5f871d84ddce 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -2,6 +2,7 @@
 #define __FS_CEPH_PAGELIST_H
 
 #include <linux/list.h>
+#include <linux/atomic.h>
 
 struct ceph_pagelist {
 	struct list_head head;
@@ -10,6 +11,7 @@ struct ceph_pagelist {
 	size_t room;
 	struct list_head free_list;
 	size_t num_pages_free;
+	atomic_t refcnt;
 };
 
 struct ceph_pagelist_cursor {
@@ -26,9 +28,10 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl)
 	pl->room = 0;
 	INIT_LIST_HEAD(&pl->free_list);
 	pl->num_pages_free = 0;
+	atomic_set(&pl->refcnt, 1);
 }
 
-extern int ceph_pagelist_release(struct ceph_pagelist *pl);
+extern void ceph_pagelist_release(struct ceph_pagelist *pl);
 
 extern int ceph_pagelist_append(struct ceph_pagelist *pl, const void *d, size_t l);
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index e7d94113f2d6..9764c771cfb1 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3071,10 +3071,8 @@ static void ceph_msg_data_destroy(struct ceph_msg_data *data)
 		return;
 
 	WARN_ON(!list_empty(&data->links));
-	if (data->type == CEPH_MSG_DATA_PAGELIST) {
+	if (data->type == CEPH_MSG_DATA_PAGELIST)
 		ceph_pagelist_release(data->pagelist);
-		kfree(data->pagelist);
-	}
 	kmem_cache_free(ceph_msg_data_cache, data);
 }
 
diff --git a/net/ceph/pagelist.c b/net/ceph/pagelist.c
index 92866bebb65f..c7c220a736e5 100644
--- a/net/ceph/pagelist.c
+++ b/net/ceph/pagelist.c
@@ -1,5 +1,6 @@
 #include <linux/module.h>
 #include <linux/gfp.h>
+#include <linux/slab.h>
 #include <linux/pagemap.h>
 #include <linux/highmem.h>
 #include <linux/ceph/pagelist.h>
@@ -13,8 +14,10 @@ static void ceph_pagelist_unmap_tail(struct ceph_pagelist *pl)
 	}
 }
 
-int ceph_pagelist_release(struct ceph_pagelist *pl)
+void ceph_pagelist_release(struct ceph_pagelist *pl)
 {
+	if (!atomic_dec_and_test(&pl->refcnt))
+		return;
 	ceph_pagelist_unmap_tail(pl);
 	while (!list_empty(&pl->head)) {
 		struct page *page = list_first_entry(&pl->head, struct page,
@@ -23,7 +26,7 @@ int ceph_pagelist_release(struct ceph_pagelist *pl)
 		__free_page(page);
 	}
 	ceph_pagelist_free_reserve(pl);
-	return 0;
+	kfree(pl);
 }
 EXPORT_SYMBOL(ceph_pagelist_release);
 
-- 
cgit v1.2.3


From eb179d3975c804ad98eaa403425eb6e48cfd3cc2 Mon Sep 17 00:00:00 2001
From: Fabian Frederick <fabf@skynet.be>
Date: Tue, 30 Sep 2014 22:07:50 +0200
Subject: libceph: remove redundant declaration

ceph_release_page_vector was defined twice in libceph.h

Signed-off-by: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
---
 include/linux/ceph/libceph.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 279b0afac1c1..07bc359b88ac 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -211,7 +211,6 @@ extern struct page **ceph_get_direct_page_vector(const void __user *data,
 						 bool write_page);
 extern void ceph_put_page_vector(struct page **pages, int num_pages,
 				 bool dirty);
-extern void ceph_release_page_vector(struct page **pages, int num_pages);
 extern struct page **ceph_alloc_page_vector(int num_pages, gfp_t flags);
 extern int ceph_copy_user_to_page_vector(struct page **pages,
 					 const void __user *data,
-- 
cgit v1.2.3


From 70b5bfa360aea4157b45c2863746ca67896c6ef1 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Thu, 2 Oct 2014 17:22:29 +0400
Subject: libceph: sync osd op definitions in rados.h

Bring in missing osd ops and strings, use macros to eliminate multiple
points of maintenance.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/rados.h | 225 ++++++++++++++++++++++++++-------------------
 net/ceph/ceph_strings.c    |  75 +--------------
 net/ceph/osd_client.c      |  65 +------------
 3 files changed, 137 insertions(+), 228 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h
index f20e0d8a2155..2f822dca1046 100644
--- a/include/linux/ceph/rados.h
+++ b/include/linux/ceph/rados.h
@@ -172,6 +172,7 @@ extern const char *ceph_osd_state_name(int s);
 #define CEPH_OSD_OP_MODE_WR    0x2000
 #define CEPH_OSD_OP_MODE_RMW   0x3000
 #define CEPH_OSD_OP_MODE_SUB   0x4000
+#define CEPH_OSD_OP_MODE_CACHE 0x8000
 
 #define CEPH_OSD_OP_TYPE       0x0f00
 #define CEPH_OSD_OP_TYPE_LOCK  0x0100
@@ -181,103 +182,135 @@ extern const char *ceph_osd_state_name(int s);
 #define CEPH_OSD_OP_TYPE_PG    0x0500
 #define CEPH_OSD_OP_TYPE_MULTI 0x0600 /* multiobject */
 
+#define __CEPH_OSD_OP1(mode, nr) \
+	(CEPH_OSD_OP_MODE_##mode | (nr))
+
+#define __CEPH_OSD_OP(mode, type, nr) \
+	(CEPH_OSD_OP_MODE_##mode | CEPH_OSD_OP_TYPE_##type | (nr))
+
+#define __CEPH_FORALL_OSD_OPS(f)					    \
+	/** data **/							    \
+	/* read */							    \
+	f(READ,		__CEPH_OSD_OP(RD, DATA, 1),	"read")		    \
+	f(STAT,		__CEPH_OSD_OP(RD, DATA, 2),	"stat")		    \
+	f(MAPEXT,	__CEPH_OSD_OP(RD, DATA, 3),	"mapext")	    \
+									    \
+	/* fancy read */						    \
+	f(MASKTRUNC,	__CEPH_OSD_OP(RD, DATA, 4),	"masktrunc")	    \
+	f(SPARSE_READ,	__CEPH_OSD_OP(RD, DATA, 5),	"sparse-read")	    \
+									    \
+	f(NOTIFY,	__CEPH_OSD_OP(RD, DATA, 6),	"notify")	    \
+	f(NOTIFY_ACK,	__CEPH_OSD_OP(RD, DATA, 7),	"notify-ack")	    \
+									    \
+	/* versioning */						    \
+	f(ASSERT_VER,	__CEPH_OSD_OP(RD, DATA, 8),	"assert-version")   \
+									    \
+	f(LIST_WATCHERS, __CEPH_OSD_OP(RD, DATA, 9),	"list-watchers")    \
+									    \
+	f(LIST_SNAPS,	__CEPH_OSD_OP(RD, DATA, 10),	"list-snaps")	    \
+									    \
+	/* sync */							    \
+	f(SYNC_READ,	__CEPH_OSD_OP(RD, DATA, 11),	"sync_read")	    \
+									    \
+	/* write */							    \
+	f(WRITE,	__CEPH_OSD_OP(WR, DATA, 1),	"write")	    \
+	f(WRITEFULL,	__CEPH_OSD_OP(WR, DATA, 2),	"writefull")	    \
+	f(TRUNCATE,	__CEPH_OSD_OP(WR, DATA, 3),	"truncate")	    \
+	f(ZERO,		__CEPH_OSD_OP(WR, DATA, 4),	"zero")		    \
+	f(DELETE,	__CEPH_OSD_OP(WR, DATA, 5),	"delete")	    \
+									    \
+	/* fancy write */						    \
+	f(APPEND,	__CEPH_OSD_OP(WR, DATA, 6),	"append")	    \
+	f(STARTSYNC,	__CEPH_OSD_OP(WR, DATA, 7),	"startsync")	    \
+	f(SETTRUNC,	__CEPH_OSD_OP(WR, DATA, 8),	"settrunc")	    \
+	f(TRIMTRUNC,	__CEPH_OSD_OP(WR, DATA, 9),	"trimtrunc")	    \
+									    \
+	f(TMAPUP,	__CEPH_OSD_OP(RMW, DATA, 10),	"tmapup")	    \
+	f(TMAPPUT,	__CEPH_OSD_OP(WR, DATA, 11),	"tmapput")	    \
+	f(TMAPGET,	__CEPH_OSD_OP(RD, DATA, 12),	"tmapget")	    \
+									    \
+	f(CREATE,	__CEPH_OSD_OP(WR, DATA, 13),	"create")	    \
+	f(ROLLBACK,	__CEPH_OSD_OP(WR, DATA, 14),	"rollback")	    \
+									    \
+	f(WATCH,	__CEPH_OSD_OP(WR, DATA, 15),	"watch")	    \
+									    \
+	/* omap */							    \
+	f(OMAPGETKEYS,	__CEPH_OSD_OP(RD, DATA, 17),	"omap-get-keys")    \
+	f(OMAPGETVALS,	__CEPH_OSD_OP(RD, DATA, 18),	"omap-get-vals")    \
+	f(OMAPGETHEADER, __CEPH_OSD_OP(RD, DATA, 19),	"omap-get-header")  \
+	f(OMAPGETVALSBYKEYS, __CEPH_OSD_OP(RD, DATA, 20), "omap-get-vals-by-keys") \
+	f(OMAPSETVALS,	__CEPH_OSD_OP(WR, DATA, 21),	"omap-set-vals")    \
+	f(OMAPSETHEADER, __CEPH_OSD_OP(WR, DATA, 22),	"omap-set-header")  \
+	f(OMAPCLEAR,	__CEPH_OSD_OP(WR, DATA, 23),	"omap-clear")	    \
+	f(OMAPRMKEYS,	__CEPH_OSD_OP(WR, DATA, 24),	"omap-rm-keys")	    \
+	f(OMAP_CMP,	__CEPH_OSD_OP(RD, DATA, 25),	"omap-cmp")	    \
+									    \
+	/* tiering */							    \
+	f(COPY_FROM,	__CEPH_OSD_OP(WR, DATA, 26),	"copy-from")	    \
+	f(COPY_GET_CLASSIC, __CEPH_OSD_OP(RD, DATA, 27), "copy-get-classic") \
+	f(UNDIRTY,	__CEPH_OSD_OP(WR, DATA, 28),	"undirty")	    \
+	f(ISDIRTY,	__CEPH_OSD_OP(RD, DATA, 29),	"isdirty")	    \
+	f(COPY_GET,	__CEPH_OSD_OP(RD, DATA, 30),	"copy-get")	    \
+	f(CACHE_FLUSH,	__CEPH_OSD_OP(CACHE, DATA, 31),	"cache-flush")	    \
+	f(CACHE_EVICT,	__CEPH_OSD_OP(CACHE, DATA, 32),	"cache-evict")	    \
+	f(CACHE_TRY_FLUSH, __CEPH_OSD_OP(CACHE, DATA, 33), "cache-try-flush") \
+									    \
+	/* convert tmap to omap */					    \
+	f(TMAP2OMAP,	__CEPH_OSD_OP(RMW, DATA, 34),	"tmap2omap")	    \
+									    \
+	/* hints */							    \
+	f(SETALLOCHINT,	__CEPH_OSD_OP(WR, DATA, 35),	"set-alloc-hint")   \
+									    \
+	/** multi **/							    \
+	f(CLONERANGE,	__CEPH_OSD_OP(WR, MULTI, 1),	"clonerange")	    \
+	f(ASSERT_SRC_VERSION, __CEPH_OSD_OP(RD, MULTI, 2), "assert-src-version") \
+	f(SRC_CMPXATTR,	__CEPH_OSD_OP(RD, MULTI, 3),	"src-cmpxattr")	    \
+									    \
+	/** attrs **/							    \
+	/* read */							    \
+	f(GETXATTR,	__CEPH_OSD_OP(RD, ATTR, 1),	"getxattr")	    \
+	f(GETXATTRS,	__CEPH_OSD_OP(RD, ATTR, 2),	"getxattrs")	    \
+	f(CMPXATTR,	__CEPH_OSD_OP(RD, ATTR, 3),	"cmpxattr")	    \
+									    \
+	/* write */							    \
+	f(SETXATTR,	__CEPH_OSD_OP(WR, ATTR, 1),	"setxattr")	    \
+	f(SETXATTRS,	__CEPH_OSD_OP(WR, ATTR, 2),	"setxattrs")	    \
+	f(RESETXATTRS,	__CEPH_OSD_OP(WR, ATTR, 3),	"resetxattrs")	    \
+	f(RMXATTR,	__CEPH_OSD_OP(WR, ATTR, 4),	"rmxattr")	    \
+									    \
+	/** subop **/							    \
+	f(PULL,		__CEPH_OSD_OP1(SUB, 1),		"pull")		    \
+	f(PUSH,		__CEPH_OSD_OP1(SUB, 2),		"push")		    \
+	f(BALANCEREADS,	__CEPH_OSD_OP1(SUB, 3),		"balance-reads")    \
+	f(UNBALANCEREADS, __CEPH_OSD_OP1(SUB, 4),	"unbalance-reads")  \
+	f(SCRUB,	__CEPH_OSD_OP1(SUB, 5),		"scrub")	    \
+	f(SCRUB_RESERVE, __CEPH_OSD_OP1(SUB, 6),	"scrub-reserve")    \
+	f(SCRUB_UNRESERVE, __CEPH_OSD_OP1(SUB, 7),	"scrub-unreserve")  \
+	f(SCRUB_STOP,	__CEPH_OSD_OP1(SUB, 8),		"scrub-stop")	    \
+	f(SCRUB_MAP,	__CEPH_OSD_OP1(SUB, 9),		"scrub-map")	    \
+									    \
+	/** lock **/							    \
+	f(WRLOCK,	__CEPH_OSD_OP(WR, LOCK, 1),	"wrlock")	    \
+	f(WRUNLOCK,	__CEPH_OSD_OP(WR, LOCK, 2),	"wrunlock")	    \
+	f(RDLOCK,	__CEPH_OSD_OP(WR, LOCK, 3),	"rdlock")	    \
+	f(RDUNLOCK,	__CEPH_OSD_OP(WR, LOCK, 4),	"rdunlock")	    \
+	f(UPLOCK,	__CEPH_OSD_OP(WR, LOCK, 5),	"uplock")	    \
+	f(DNLOCK,	__CEPH_OSD_OP(WR, LOCK, 6),	"dnlock")	    \
+									    \
+	/** exec **/							    \
+	/* note: the RD bit here is wrong; see special-case below in helper */ \
+	f(CALL,		__CEPH_OSD_OP(RD, EXEC, 1),	"call")		    \
+									    \
+	/** pg **/							    \
+	f(PGLS,		__CEPH_OSD_OP(RD, PG, 1),	"pgls")		    \
+	f(PGLS_FILTER,	__CEPH_OSD_OP(RD, PG, 2),	"pgls-filter")	    \
+	f(PG_HITSET_LS,	__CEPH_OSD_OP(RD, PG, 3),	"pg-hitset-ls")	    \
+	f(PG_HITSET_GET, __CEPH_OSD_OP(RD, PG, 4),	"pg-hitset-get")
+
 enum {
-	/** data **/
-	/* read */
-	CEPH_OSD_OP_READ      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 1,
-	CEPH_OSD_OP_STAT      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 2,
-	CEPH_OSD_OP_MAPEXT    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 3,
-
-	/* fancy read */
-	CEPH_OSD_OP_MASKTRUNC   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 4,
-	CEPH_OSD_OP_SPARSE_READ = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 5,
-
-	CEPH_OSD_OP_NOTIFY    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 6,
-	CEPH_OSD_OP_NOTIFY_ACK = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 7,
-
-	/* versioning */
-	CEPH_OSD_OP_ASSERT_VER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 8,
-
-	/* write */
-	CEPH_OSD_OP_WRITE     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 1,
-	CEPH_OSD_OP_WRITEFULL = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 2,
-	CEPH_OSD_OP_TRUNCATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 3,
-	CEPH_OSD_OP_ZERO      = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 4,
-	CEPH_OSD_OP_DELETE    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 5,
-
-	/* fancy write */
-	CEPH_OSD_OP_APPEND    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 6,
-	CEPH_OSD_OP_STARTSYNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 7,
-	CEPH_OSD_OP_SETTRUNC  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 8,
-	CEPH_OSD_OP_TRIMTRUNC = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 9,
-
-	CEPH_OSD_OP_TMAPUP  = CEPH_OSD_OP_MODE_RMW | CEPH_OSD_OP_TYPE_DATA | 10,
-	CEPH_OSD_OP_TMAPPUT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 11,
-	CEPH_OSD_OP_TMAPGET = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 12,
-
-	CEPH_OSD_OP_CREATE  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 13,
-	CEPH_OSD_OP_ROLLBACK= CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 14,
-
-	CEPH_OSD_OP_WATCH   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 15,
-
-	/* omap */
-	CEPH_OSD_OP_OMAPGETKEYS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 17,
-	CEPH_OSD_OP_OMAPGETVALS   = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 18,
-	CEPH_OSD_OP_OMAPGETHEADER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 19,
-	CEPH_OSD_OP_OMAPGETVALSBYKEYS  =
-	  CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 20,
-	CEPH_OSD_OP_OMAPSETVALS   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 21,
-	CEPH_OSD_OP_OMAPSETHEADER = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 22,
-	CEPH_OSD_OP_OMAPCLEAR     = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 23,
-	CEPH_OSD_OP_OMAPRMKEYS    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24,
-	CEPH_OSD_OP_OMAP_CMP      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25,
-
-	/* hints */
-	CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35,
-
-	/** multi **/
-	CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1,
-	CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2,
-	CEPH_OSD_OP_SRC_CMPXATTR = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 3,
-
-	/** attrs **/
-	/* read */
-	CEPH_OSD_OP_GETXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 1,
-	CEPH_OSD_OP_GETXATTRS = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 2,
-	CEPH_OSD_OP_CMPXATTR  = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_ATTR | 3,
-
-	/* write */
-	CEPH_OSD_OP_SETXATTR  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 1,
-	CEPH_OSD_OP_SETXATTRS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 2,
-	CEPH_OSD_OP_RESETXATTRS = CEPH_OSD_OP_MODE_WR|CEPH_OSD_OP_TYPE_ATTR | 3,
-	CEPH_OSD_OP_RMXATTR   = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_ATTR | 4,
-
-	/** subop **/
-	CEPH_OSD_OP_PULL            = CEPH_OSD_OP_MODE_SUB | 1,
-	CEPH_OSD_OP_PUSH            = CEPH_OSD_OP_MODE_SUB | 2,
-	CEPH_OSD_OP_BALANCEREADS    = CEPH_OSD_OP_MODE_SUB | 3,
-	CEPH_OSD_OP_UNBALANCEREADS  = CEPH_OSD_OP_MODE_SUB | 4,
-	CEPH_OSD_OP_SCRUB           = CEPH_OSD_OP_MODE_SUB | 5,
-	CEPH_OSD_OP_SCRUB_RESERVE   = CEPH_OSD_OP_MODE_SUB | 6,
-	CEPH_OSD_OP_SCRUB_UNRESERVE = CEPH_OSD_OP_MODE_SUB | 7,
-	CEPH_OSD_OP_SCRUB_STOP      = CEPH_OSD_OP_MODE_SUB | 8,
-	CEPH_OSD_OP_SCRUB_MAP     = CEPH_OSD_OP_MODE_SUB | 9,
-
-	/** lock **/
-	CEPH_OSD_OP_WRLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 1,
-	CEPH_OSD_OP_WRUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 2,
-	CEPH_OSD_OP_RDLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 3,
-	CEPH_OSD_OP_RDUNLOCK  = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 4,
-	CEPH_OSD_OP_UPLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 5,
-	CEPH_OSD_OP_DNLOCK    = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_LOCK | 6,
-
-	/** exec **/
-	/* note: the RD bit here is wrong; see special-case below in helper */
-	CEPH_OSD_OP_CALL    = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_EXEC | 1,
-
-	/** pg **/
-	CEPH_OSD_OP_PGLS      = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 1,
-	CEPH_OSD_OP_PGLS_FILTER = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_PG | 2,
+#define GENERATE_ENUM_ENTRY(op, opcode, str)	CEPH_OSD_OP_##op = (opcode),
+__CEPH_FORALL_OSD_OPS(GENERATE_ENUM_ENTRY)
+#undef GENERATE_ENUM_ENTRY
 };
 
 static inline int ceph_osd_op_type_lock(int op)
diff --git a/net/ceph/ceph_strings.c b/net/ceph/ceph_strings.c
index 1348df96fe15..30560202f57b 100644
--- a/net/ceph/ceph_strings.c
+++ b/net/ceph/ceph_strings.c
@@ -19,77 +19,12 @@ const char *ceph_entity_type_name(int type)
 const char *ceph_osd_op_name(int op)
 {
 	switch (op) {
-	case CEPH_OSD_OP_READ: return "read";
-	case CEPH_OSD_OP_STAT: return "stat";
-	case CEPH_OSD_OP_MAPEXT: return "mapext";
-	case CEPH_OSD_OP_SPARSE_READ: return "sparse-read";
-	case CEPH_OSD_OP_NOTIFY: return "notify";
-	case CEPH_OSD_OP_NOTIFY_ACK: return "notify-ack";
-	case CEPH_OSD_OP_ASSERT_VER: return "assert-version";
-
-	case CEPH_OSD_OP_MASKTRUNC: return "masktrunc";
-
-	case CEPH_OSD_OP_CREATE: return "create";
-	case CEPH_OSD_OP_WRITE: return "write";
-	case CEPH_OSD_OP_DELETE: return "delete";
-	case CEPH_OSD_OP_TRUNCATE: return "truncate";
-	case CEPH_OSD_OP_ZERO: return "zero";
-	case CEPH_OSD_OP_WRITEFULL: return "writefull";
-	case CEPH_OSD_OP_ROLLBACK: return "rollback";
-
-	case CEPH_OSD_OP_APPEND: return "append";
-	case CEPH_OSD_OP_STARTSYNC: return "startsync";
-	case CEPH_OSD_OP_SETTRUNC: return "settrunc";
-	case CEPH_OSD_OP_TRIMTRUNC: return "trimtrunc";
-
-	case CEPH_OSD_OP_TMAPUP: return "tmapup";
-	case CEPH_OSD_OP_TMAPGET: return "tmapget";
-	case CEPH_OSD_OP_TMAPPUT: return "tmapput";
-	case CEPH_OSD_OP_WATCH: return "watch";
-
-	case CEPH_OSD_OP_CLONERANGE: return "clonerange";
-	case CEPH_OSD_OP_ASSERT_SRC_VERSION: return "assert-src-version";
-	case CEPH_OSD_OP_SRC_CMPXATTR: return "src-cmpxattr";
-
-	case CEPH_OSD_OP_GETXATTR: return "getxattr";
-	case CEPH_OSD_OP_GETXATTRS: return "getxattrs";
-	case CEPH_OSD_OP_SETXATTR: return "setxattr";
-	case CEPH_OSD_OP_SETXATTRS: return "setxattrs";
-	case CEPH_OSD_OP_RESETXATTRS: return "resetxattrs";
-	case CEPH_OSD_OP_RMXATTR: return "rmxattr";
-	case CEPH_OSD_OP_CMPXATTR: return "cmpxattr";
-
-	case CEPH_OSD_OP_PULL: return "pull";
-	case CEPH_OSD_OP_PUSH: return "push";
-	case CEPH_OSD_OP_BALANCEREADS: return "balance-reads";
-	case CEPH_OSD_OP_UNBALANCEREADS: return "unbalance-reads";
-	case CEPH_OSD_OP_SCRUB: return "scrub";
-	case CEPH_OSD_OP_SCRUB_RESERVE: return "scrub-reserve";
-	case CEPH_OSD_OP_SCRUB_UNRESERVE: return "scrub-unreserve";
-	case CEPH_OSD_OP_SCRUB_STOP: return "scrub-stop";
-	case CEPH_OSD_OP_SCRUB_MAP: return "scrub-map";
-
-	case CEPH_OSD_OP_WRLOCK: return "wrlock";
-	case CEPH_OSD_OP_WRUNLOCK: return "wrunlock";
-	case CEPH_OSD_OP_RDLOCK: return "rdlock";
-	case CEPH_OSD_OP_RDUNLOCK: return "rdunlock";
-	case CEPH_OSD_OP_UPLOCK: return "uplock";
-	case CEPH_OSD_OP_DNLOCK: return "dnlock";
-
-	case CEPH_OSD_OP_CALL: return "call";
-
-	case CEPH_OSD_OP_PGLS: return "pgls";
-	case CEPH_OSD_OP_PGLS_FILTER: return "pgls-filter";
-	case CEPH_OSD_OP_OMAPGETKEYS: return "omap-get-keys";
-	case CEPH_OSD_OP_OMAPGETVALS: return "omap-get-vals";
-	case CEPH_OSD_OP_OMAPGETHEADER: return "omap-get-header";
-	case CEPH_OSD_OP_OMAPGETVALSBYKEYS: return "omap-get-vals-by-keys";
-	case CEPH_OSD_OP_OMAPSETVALS: return "omap-set-vals";
-	case CEPH_OSD_OP_OMAPSETHEADER: return "omap-set-header";
-	case CEPH_OSD_OP_OMAPCLEAR: return "omap-clear";
-	case CEPH_OSD_OP_OMAPRMKEYS: return "omap-rm-keys";
+#define GENERATE_CASE(op, opcode, str)	case CEPH_OSD_OP_##op: return (str);
+__CEPH_FORALL_OSD_OPS(GENERATE_CASE)
+#undef GENERATE_CASE
+	default:
+		return "???";
 	}
-	return "???";
 }
 
 const char *ceph_osd_state_name(int s)
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index a528ea34253a..f3fc54eac09d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -431,68 +431,9 @@ EXPORT_SYMBOL(ceph_osdc_alloc_request);
 static bool osd_req_opcode_valid(u16 opcode)
 {
 	switch (opcode) {
-	case CEPH_OSD_OP_READ:
-	case CEPH_OSD_OP_STAT:
-	case CEPH_OSD_OP_MAPEXT:
-	case CEPH_OSD_OP_MASKTRUNC:
-	case CEPH_OSD_OP_SPARSE_READ:
-	case CEPH_OSD_OP_NOTIFY:
-	case CEPH_OSD_OP_NOTIFY_ACK:
-	case CEPH_OSD_OP_ASSERT_VER:
-	case CEPH_OSD_OP_WRITE:
-	case CEPH_OSD_OP_WRITEFULL:
-	case CEPH_OSD_OP_TRUNCATE:
-	case CEPH_OSD_OP_ZERO:
-	case CEPH_OSD_OP_DELETE:
-	case CEPH_OSD_OP_APPEND:
-	case CEPH_OSD_OP_STARTSYNC:
-	case CEPH_OSD_OP_SETTRUNC:
-	case CEPH_OSD_OP_TRIMTRUNC:
-	case CEPH_OSD_OP_TMAPUP:
-	case CEPH_OSD_OP_TMAPPUT:
-	case CEPH_OSD_OP_TMAPGET:
-	case CEPH_OSD_OP_CREATE:
-	case CEPH_OSD_OP_ROLLBACK:
-	case CEPH_OSD_OP_WATCH:
-	case CEPH_OSD_OP_OMAPGETKEYS:
-	case CEPH_OSD_OP_OMAPGETVALS:
-	case CEPH_OSD_OP_OMAPGETHEADER:
-	case CEPH_OSD_OP_OMAPGETVALSBYKEYS:
-	case CEPH_OSD_OP_OMAPSETVALS:
-	case CEPH_OSD_OP_OMAPSETHEADER:
-	case CEPH_OSD_OP_OMAPCLEAR:
-	case CEPH_OSD_OP_OMAPRMKEYS:
-	case CEPH_OSD_OP_OMAP_CMP:
-	case CEPH_OSD_OP_SETALLOCHINT:
-	case CEPH_OSD_OP_CLONERANGE:
-	case CEPH_OSD_OP_ASSERT_SRC_VERSION:
-	case CEPH_OSD_OP_SRC_CMPXATTR:
-	case CEPH_OSD_OP_GETXATTR:
-	case CEPH_OSD_OP_GETXATTRS:
-	case CEPH_OSD_OP_CMPXATTR:
-	case CEPH_OSD_OP_SETXATTR:
-	case CEPH_OSD_OP_SETXATTRS:
-	case CEPH_OSD_OP_RESETXATTRS:
-	case CEPH_OSD_OP_RMXATTR:
-	case CEPH_OSD_OP_PULL:
-	case CEPH_OSD_OP_PUSH:
-	case CEPH_OSD_OP_BALANCEREADS:
-	case CEPH_OSD_OP_UNBALANCEREADS:
-	case CEPH_OSD_OP_SCRUB:
-	case CEPH_OSD_OP_SCRUB_RESERVE:
-	case CEPH_OSD_OP_SCRUB_UNRESERVE:
-	case CEPH_OSD_OP_SCRUB_STOP:
-	case CEPH_OSD_OP_SCRUB_MAP:
-	case CEPH_OSD_OP_WRLOCK:
-	case CEPH_OSD_OP_WRUNLOCK:
-	case CEPH_OSD_OP_RDLOCK:
-	case CEPH_OSD_OP_RDUNLOCK:
-	case CEPH_OSD_OP_UPLOCK:
-	case CEPH_OSD_OP_DNLOCK:
-	case CEPH_OSD_OP_CALL:
-	case CEPH_OSD_OP_PGLS:
-	case CEPH_OSD_OP_PGLS_FILTER:
-		return true;
+#define GENERATE_CASE(op, opcode, str)	case CEPH_OSD_OP_##op: return true;
+__CEPH_FORALL_OSD_OPS(GENERATE_CASE)
+#undef GENERATE_CASE
 	default:
 		return false;
 	}
-- 
cgit v1.2.3


From db404b13617fe0cdb415da55762203d456837912 Mon Sep 17 00:00:00 2001
From: Mark Rustad <mark.d.rustad@intel.com>
Date: Tue, 14 Oct 2014 06:28:38 -0700
Subject: genl_magic: Resolve logical-op warnings

Resolve "logical 'and' applied to non-boolean constant" warnings"
that appear in W=2 builds by adding !! to a bit test.

Signed-off-by: Mark Rustad <mark.d.rustad@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/genl_magic_func.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/genl_magic_func.h b/include/linux/genl_magic_func.h
index c0894dd8827b..667c31101b8b 100644
--- a/include/linux/genl_magic_func.h
+++ b/include/linux/genl_magic_func.h
@@ -178,12 +178,12 @@ static int s_name ## _from_attrs_for_change(struct s_name *s,		\
 #define __assign(attr_nr, attr_flag, name, nla_type, type, assignment...)	\
 		nla = ntb[attr_nr];						\
 		if (nla) {						\
-			if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
+			if (exclude_invariants && !!((attr_flag) & DRBD_F_INVARIANT)) {		\
 				pr_info("<< must not change invariant attr: %s\n", #name);	\
 				return -EEXIST;				\
 			}						\
 			assignment;					\
-		} else if (exclude_invariants && ((attr_flag) & DRBD_F_INVARIANT)) {		\
+		} else if (exclude_invariants && !!((attr_flag) & DRBD_F_INVARIANT)) {		\
 			/* attribute missing from payload, */		\
 			/* which was expected */			\
 		} else if ((attr_flag) & DRBD_F_REQUIRED) {		\
-- 
cgit v1.2.3


From 016c98c6fe0c914d12e2e242b2bccde6d6dea54b Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 14 Oct 2014 10:40:34 +1030
Subject: virtio: unify config_changed handling

Replace duplicated code in all transports with a single wrapper in
virtio.c.

The only functional change is in virtio_mmio.c: if a buggy device sends
us an interrupt before driver is set, we previously returned IRQ_NONE,
now we return IRQ_HANDLED.

As this must not happen in practice, this does not look like a big deal.

See also commit 3fff0179e33cd7d0a688dab65700c46ad089e934
	virtio-pci: do not oops on config change if driver not loaded.
for the original motivation behind the driver check.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/misc/mic/card/mic_virtio.c | 6 +-----
 drivers/s390/kvm/kvm_virtio.c      | 9 +--------
 drivers/s390/kvm/virtio_ccw.c      | 6 +-----
 drivers/virtio/virtio.c            | 9 +++++++++
 drivers/virtio/virtio_mmio.c       | 7 ++-----
 drivers/virtio/virtio_pci.c        | 6 +-----
 include/linux/virtio.h             | 2 ++
 7 files changed, 17 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index f14b60080c21..e64794730e21 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -462,16 +462,12 @@ static void mic_handle_config_change(struct mic_device_desc __iomem *d,
 	struct mic_device_ctrl __iomem *dc
 		= (void __iomem *)d + mic_aligned_desc_size(d);
 	struct mic_vdev *mvdev = (struct mic_vdev *)ioread64(&dc->vdev);
-	struct virtio_driver *drv;
 
 	if (ioread8(&dc->config_change) != MIC_VIRTIO_PARAM_CONFIG_CHANGED)
 		return;
 
 	dev_dbg(mdrv->dev, "%s %d\n", __func__, __LINE__);
-	drv = container_of(mvdev->vdev.dev.driver,
-				struct virtio_driver, driver);
-	if (drv->config_changed)
-		drv->config_changed(&mvdev->vdev);
+	virtio_config_changed(&mvdev->vdev);
 	iowrite8(1, &dc->guest_ack);
 }
 
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index a1349653c6d9..643129070c51 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -406,15 +406,8 @@ static void kvm_extint_handler(struct ext_code ext_code,
 
 	switch (param) {
 	case VIRTIO_PARAM_CONFIG_CHANGED:
-	{
-		struct virtio_driver *drv;
-		drv = container_of(vq->vdev->dev.driver,
-				   struct virtio_driver, driver);
-		if (drv->config_changed)
-			drv->config_changed(vq->vdev);
-
+		virtio_config_changed(vq->vdev);
 		break;
-	}
 	case VIRTIO_PARAM_DEV_ADD:
 		schedule_work(&hotplug_work);
 		break;
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index d2c0b442bce5..6cbe6ef3c889 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -940,11 +940,7 @@ static void virtio_ccw_int_handler(struct ccw_device *cdev,
 		vring_interrupt(0, vq);
 	}
 	if (test_bit(0, &vcdev->indicators2)) {
-		drv = container_of(vcdev->vdev.dev.driver,
-				   struct virtio_driver, driver);
-
-		if (drv && drv->config_changed)
-			drv->config_changed(&vcdev->vdev);
+		virtio_config_changed(&vcdev->vdev);
 		clear_bit(0, &vcdev->indicators2);
 	}
 }
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index fed0ce198ae3..3980687401f6 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -239,6 +239,15 @@ void unregister_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_virtio_device);
 
+void virtio_config_changed(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	if (drv && drv->config_changed)
+		drv->config_changed(dev);
+}
+EXPORT_SYMBOL_GPL(virtio_config_changed);
+
 static int virtio_init(void)
 {
 	if (bus_register(&virtio_bus) != 0)
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index c600ccfd6922..ef9a1650bb80 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -234,8 +234,6 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
 {
 	struct virtio_mmio_device *vm_dev = opaque;
 	struct virtio_mmio_vq_info *info;
-	struct virtio_driver *vdrv = container_of(vm_dev->vdev.dev.driver,
-			struct virtio_driver, driver);
 	unsigned long status;
 	unsigned long flags;
 	irqreturn_t ret = IRQ_NONE;
@@ -244,9 +242,8 @@ static irqreturn_t vm_interrupt(int irq, void *opaque)
 	status = readl(vm_dev->base + VIRTIO_MMIO_INTERRUPT_STATUS);
 	writel(status, vm_dev->base + VIRTIO_MMIO_INTERRUPT_ACK);
 
-	if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)
-			&& vdrv && vdrv->config_changed) {
-		vdrv->config_changed(&vm_dev->vdev);
+	if (unlikely(status & VIRTIO_MMIO_INT_CONFIG)) {
+		virtio_config_changed(&vm_dev->vdev);
 		ret = IRQ_HANDLED;
 	}
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index add40d00dcdb..f39f4e772e6a 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -211,12 +211,8 @@ static bool vp_notify(struct virtqueue *vq)
 static irqreturn_t vp_config_changed(int irq, void *opaque)
 {
 	struct virtio_pci_device *vp_dev = opaque;
-	struct virtio_driver *drv;
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
 
-	if (drv && drv->config_changed)
-		drv->config_changed(&vp_dev->vdev);
+	virtio_config_changed(&vp_dev->vdev);
 	return IRQ_HANDLED;
 }
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index b46671e28de2..3c19bd3189cb 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -108,6 +108,8 @@ void unregister_virtio_device(struct virtio_device *dev);
 
 void virtio_break_device(struct virtio_device *dev);
 
+void virtio_config_changed(struct virtio_device *dev);
+
 /**
  * virtio_driver - operations for a virtio I/O driver
  * @driver: underlying device driver (populate name and owner).
-- 
cgit v1.2.3


From c6716bae52f97347e25166c6270aa98693d9212c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 14 Oct 2014 10:40:35 +1030
Subject: virtio-pci: move freeze/restore to virtio core

This is in preparation to extending config changed event handling
in core.
Wrapping these in an API also seems to make for a cleaner code.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 drivers/virtio/virtio.c     | 54 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/virtio/virtio_pci.c | 54 ++-------------------------------------------
 include/linux/virtio.h      |  6 +++++
 3 files changed, 62 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3980687401f6..8216b7311092 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -248,6 +248,60 @@ void virtio_config_changed(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(virtio_config_changed);
 
+#ifdef CONFIG_PM_SLEEP
+int virtio_device_freeze(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
+
+	if (drv && drv->freeze)
+		return drv->freeze(dev);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtio_device_freeze);
+
+int virtio_device_restore(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	/* We always start by resetting the device, in case a previous
+	 * driver messed it up. */
+	dev->config->reset(dev);
+
+	/* Acknowledge that we've seen the device. */
+	add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE);
+
+	/* Maybe driver failed before freeze.
+	 * Restore the failed status, for debugging. */
+	if (dev->failed)
+		add_status(dev, VIRTIO_CONFIG_S_FAILED);
+
+	if (!drv)
+		return 0;
+
+	/* We have a driver! */
+	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
+
+	dev->config->finalize_features(dev);
+
+	if (drv->restore) {
+		int ret = drv->restore(dev);
+		if (ret) {
+			add_status(dev, VIRTIO_CONFIG_S_FAILED);
+			return ret;
+		}
+	}
+
+	/* Finally, tell the device we're all set */
+	add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(virtio_device_restore);
+#endif
+
 static int virtio_init(void)
 {
 	if (bus_register(&virtio_bus) != 0)
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index f39f4e772e6a..d34ebfa604f3 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -57,9 +57,6 @@ struct virtio_pci_device
 	/* Vectors allocated, excluding per-vq vectors if any */
 	unsigned msix_used_vectors;
 
-	/* Status saved during hibernate/restore */
-	u8 saved_status;
-
 	/* Whether we have vector per vq */
 	bool per_vq_vectors;
 };
@@ -764,16 +761,9 @@ static int virtio_pci_freeze(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
-	struct virtio_driver *drv;
 	int ret;
 
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
-
-	ret = 0;
-	vp_dev->saved_status = vp_get_status(&vp_dev->vdev);
-	if (drv && drv->freeze)
-		ret = drv->freeze(&vp_dev->vdev);
+	ret = virtio_device_freeze(&vp_dev->vdev);
 
 	if (!ret)
 		pci_disable_device(pci_dev);
@@ -784,54 +774,14 @@ static int virtio_pci_restore(struct device *dev)
 {
 	struct pci_dev *pci_dev = to_pci_dev(dev);
 	struct virtio_pci_device *vp_dev = pci_get_drvdata(pci_dev);
-	struct virtio_driver *drv;
-	unsigned status = 0;
 	int ret;
 
-	drv = container_of(vp_dev->vdev.dev.driver,
-			   struct virtio_driver, driver);
-
 	ret = pci_enable_device(pci_dev);
 	if (ret)
 		return ret;
 
 	pci_set_master(pci_dev);
-	/* We always start by resetting the device, in case a previous
-	 * driver messed it up. */
-	vp_reset(&vp_dev->vdev);
-
-	/* Acknowledge that we've seen the device. */
-	status |= VIRTIO_CONFIG_S_ACKNOWLEDGE;
-	vp_set_status(&vp_dev->vdev, status);
-
-	/* Maybe driver failed before freeze.
-	 * Restore the failed status, for debugging. */
-	status |= vp_dev->saved_status & VIRTIO_CONFIG_S_FAILED;
-	vp_set_status(&vp_dev->vdev, status);
-
-	if (!drv)
-		return 0;
-
-	/* We have a driver! */
-	status |= VIRTIO_CONFIG_S_DRIVER;
-	vp_set_status(&vp_dev->vdev, status);
-
-	vp_finalize_features(&vp_dev->vdev);
-
-	if (drv->restore) {
-		ret = drv->restore(&vp_dev->vdev);
-		if (ret) {
-			status |= VIRTIO_CONFIG_S_FAILED;
-			vp_set_status(&vp_dev->vdev, status);
-			return ret;
-		}
-	}
-
-	/* Finally, tell the device we're all set */
-	status |= VIRTIO_CONFIG_S_DRIVER_OK;
-	vp_set_status(&vp_dev->vdev, status);
-
-	return ret;
+	return virtio_device_restore(&vp_dev->vdev);
 }
 
 static const struct dev_pm_ops virtio_pci_pm_ops = {
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 3c19bd3189cb..8df7ba81e5c7 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -78,6 +78,7 @@ bool virtqueue_is_broken(struct virtqueue *vq);
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
+ * @failed: saved value for CONFIG_S_FAILED bit (for restore)
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
@@ -88,6 +89,7 @@ bool virtqueue_is_broken(struct virtqueue *vq);
  */
 struct virtio_device {
 	int index;
+	bool failed;
 	struct device dev;
 	struct virtio_device_id id;
 	const struct virtio_config_ops *config;
@@ -109,6 +111,10 @@ void unregister_virtio_device(struct virtio_device *dev);
 void virtio_break_device(struct virtio_device *dev);
 
 void virtio_config_changed(struct virtio_device *dev);
+#ifdef CONFIG_PM_SLEEP
+int virtio_device_freeze(struct virtio_device *dev);
+int virtio_device_restore(struct virtio_device *dev);
+#endif
 
 /**
  * virtio_driver - operations for a virtio I/O driver
-- 
cgit v1.2.3


From 22b7050a024d7deb0c9ef1e14ed73e3b1e369f24 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 15 Oct 2014 10:21:55 +1030
Subject: virtio: defer config changed notifications

Defer config changed notifications that arrive during
probe/scan/freeze/restore.

This will allow drivers to set DRIVER_OK earlier, without worrying about
racing with config change interrupts.

This change will also benefit old hypervisors (before 2009)
that send interrupts without checking DRIVER_OK: previously,
the callback could race with driver-specific initialization.

This will also help simplify drivers.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au> (cosmetic changes)
---
 drivers/virtio/virtio.c | 58 +++++++++++++++++++++++++++++++++++++++++--------
 include/linux/virtio.h  |  6 +++++
 2 files changed, 55 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 8216b7311092..df598dd8c5c8 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -117,6 +117,43 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
 }
 EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
 
+static void __virtio_config_changed(struct virtio_device *dev)
+{
+	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+
+	if (!dev->config_enabled)
+		dev->config_change_pending = true;
+	else if (drv && drv->config_changed)
+		drv->config_changed(dev);
+}
+
+void virtio_config_changed(struct virtio_device *dev)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&dev->config_lock, flags);
+	__virtio_config_changed(dev);
+	spin_unlock_irqrestore(&dev->config_lock, flags);
+}
+EXPORT_SYMBOL_GPL(virtio_config_changed);
+
+static void virtio_config_disable(struct virtio_device *dev)
+{
+	spin_lock_irq(&dev->config_lock);
+	dev->config_enabled = false;
+	spin_unlock_irq(&dev->config_lock);
+}
+
+static void virtio_config_enable(struct virtio_device *dev)
+{
+	spin_lock_irq(&dev->config_lock);
+	dev->config_enabled = true;
+	if (dev->config_change_pending)
+		__virtio_config_changed(dev);
+	dev->config_change_pending = false;
+	spin_unlock_irq(&dev->config_lock);
+}
+
 static int virtio_dev_probe(struct device *_d)
 {
 	int err, i;
@@ -153,6 +190,8 @@ static int virtio_dev_probe(struct device *_d)
 		add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 		if (drv->scan)
 			drv->scan(dev);
+
+		virtio_config_enable(dev);
 	}
 
 	return err;
@@ -163,6 +202,8 @@ static int virtio_dev_remove(struct device *_d)
 	struct virtio_device *dev = dev_to_virtio(_d);
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 
+	virtio_config_disable(dev);
+
 	drv->remove(dev);
 
 	/* Driver should have reset device. */
@@ -211,6 +252,10 @@ int register_virtio_device(struct virtio_device *dev)
 	dev->index = err;
 	dev_set_name(&dev->dev, "virtio%u", dev->index);
 
+	spin_lock_init(&dev->config_lock);
+	dev->config_enabled = false;
+	dev->config_change_pending = false;
+
 	/* We always start by resetting the device, in case a previous
 	 * driver messed it up.  This also tests that code path a little. */
 	dev->config->reset(dev);
@@ -239,20 +284,13 @@ void unregister_virtio_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(unregister_virtio_device);
 
-void virtio_config_changed(struct virtio_device *dev)
-{
-	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
-
-	if (drv && drv->config_changed)
-		drv->config_changed(dev);
-}
-EXPORT_SYMBOL_GPL(virtio_config_changed);
-
 #ifdef CONFIG_PM_SLEEP
 int virtio_device_freeze(struct virtio_device *dev)
 {
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 
+	virtio_config_disable(dev);
+
 	dev->failed = dev->config->get_status(dev) & VIRTIO_CONFIG_S_FAILED;
 
 	if (drv && drv->freeze)
@@ -297,6 +335,8 @@ int virtio_device_restore(struct virtio_device *dev)
 	/* Finally, tell the device we're all set */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK);
 
+	virtio_config_enable(dev);
+
 	return 0;
 }
 EXPORT_SYMBOL_GPL(virtio_device_restore);
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 8df7ba81e5c7..65261a7244fc 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -79,6 +79,9 @@ bool virtqueue_is_broken(struct virtqueue *vq);
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
  * @failed: saved value for CONFIG_S_FAILED bit (for restore)
+ * @config_enabled: configuration change reporting enabled
+ * @config_change_pending: configuration change reported while disabled
+ * @config_lock: protects configuration change reporting
  * @dev: underlying device.
  * @id: the device type identification (used to match it with a driver).
  * @config: the configuration ops for this device.
@@ -90,6 +93,9 @@ bool virtqueue_is_broken(struct virtqueue *vq);
 struct virtio_device {
 	int index;
 	bool failed;
+	bool config_enabled;
+	bool config_change_pending;
+	spinlock_t config_lock;
 	struct device dev;
 	struct virtio_device_id id;
 	const struct virtio_config_ops *config;
-- 
cgit v1.2.3


From 3569db593081fd88bbd6df21b9b0531873f2042c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 15 Oct 2014 10:22:30 +1030
Subject: virtio: add API to enable VQs early

virtio spec 0.9.X requires DRIVER_OK to be set before
VQs are used, but some drivers use VQs before probe
function returns.
Since DRIVER_OK is set after probe, this violates the spec.

Even though under virtio 1.0 transitional devices support this
behaviour, we want to make it possible for those early callers to become
spec compliant and eventually support non-transitional devices.

Add API for drivers to call before using VQs.

Sets DRIVER_OK internally.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio_config.h | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index e8f8f71e843c..7f4ef66873ef 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -109,6 +109,23 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev,
 	return vq;
 }
 
+/**
+ * virtio_device_ready - enable vq use in probe function
+ * @vdev: the device
+ *
+ * Driver must call this to use vqs in the probe function.
+ *
+ * Note: vqs are enabled automatically after probe returns.
+ */
+static inline
+void virtio_device_ready(struct virtio_device *dev)
+{
+	unsigned status = dev->config->get_status(dev);
+
+	BUG_ON(status & VIRTIO_CONFIG_S_DRIVER_OK);
+	dev->config->set_status(dev, status | VIRTIO_CONFIG_S_DRIVER_OK);
+}
+
 static inline
 const char *virtio_bus_name(struct virtio_device *vdev)
 {
-- 
cgit v1.2.3


From 46e8c83c83c06b90ebc000df481c2fdcee79a141 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 Sep 2014 17:18:10 +0300
Subject: dmaengine: dw: move private definitions to regs.h

Since we don't allow user to set registers directly through private slave
configuration we may move definitions to the regs.h because they are not used
anywhere except core.c part.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/regs.h                | 32 +++++++++++++++++++++++++++++---
 include/linux/platform_data/dma-dw.h | 31 -------------------------------
 2 files changed, 29 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw/regs.h b/drivers/dma/dw/regs.h
index 00d27a9d9c27..e8f92b28ffc2 100644
--- a/drivers/dma/dw/regs.h
+++ b/drivers/dma/dw/regs.h
@@ -11,7 +11,6 @@
 
 #include <linux/interrupt.h>
 #include <linux/dmaengine.h>
-#include <linux/platform_data/dma-dw.h>
 
 #define DW_DMA_MAX_NR_CHANNELS	8
 #define DW_DMA_MAX_NR_REQUESTS	16
@@ -132,6 +131,18 @@ struct dw_dma_regs {
 /* Bitfields in DWC_PARAMS */
 #define DWC_PARAMS_MBLK_EN	11		/* multi block transfer */
 
+/* bursts size */
+enum dw_dma_msize {
+	DW_DMA_MSIZE_1,
+	DW_DMA_MSIZE_4,
+	DW_DMA_MSIZE_8,
+	DW_DMA_MSIZE_16,
+	DW_DMA_MSIZE_32,
+	DW_DMA_MSIZE_64,
+	DW_DMA_MSIZE_128,
+	DW_DMA_MSIZE_256,
+};
+
 /* Bitfields in CTL_LO */
 #define DWC_CTLL_INT_EN		(1 << 0)	/* irqs enabled? */
 #define DWC_CTLL_DST_WIDTH(n)	((n)<<1)	/* bytes per element */
@@ -161,20 +172,35 @@ struct dw_dma_regs {
 #define DWC_CTLH_DONE		0x00001000
 #define DWC_CTLH_BLOCK_TS_MASK	0x00000fff
 
-/* Bitfields in CFG_LO. Platform-configurable bits are in <linux/platform_data/dma-dw.h> */
+/* Bitfields in CFG_LO */
 #define DWC_CFGL_CH_PRIOR_MASK	(0x7 << 5)	/* priority mask */
 #define DWC_CFGL_CH_PRIOR(x)	((x) << 5)	/* priority */
 #define DWC_CFGL_CH_SUSP	(1 << 8)	/* pause xfer */
 #define DWC_CFGL_FIFO_EMPTY	(1 << 9)	/* pause xfer */
 #define DWC_CFGL_HS_DST		(1 << 10)	/* handshake w/dst */
 #define DWC_CFGL_HS_SRC		(1 << 11)	/* handshake w/src */
+#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
+#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
+#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
+#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
+#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
+#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
+#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
+#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
+#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
+#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
 #define DWC_CFGL_MAX_BURST(x)	((x) << 20)
 #define DWC_CFGL_RELOAD_SAR	(1 << 30)
 #define DWC_CFGL_RELOAD_DAR	(1 << 31)
 
-/* Bitfields in CFG_HI. Platform-configurable bits are in <linux/platform_data/dma-dw.h> */
+/* Bitfields in CFG_HI */
+#define DWC_CFGH_FCMODE		(1 << 0)
+#define DWC_CFGH_FIFO_MODE	(1 << 1)
+#define DWC_CFGH_PROTCTL(x)	((x) << 2)
 #define DWC_CFGH_DS_UPD_EN	(1 << 5)
 #define DWC_CFGH_SS_UPD_EN	(1 << 6)
+#define DWC_CFGH_SRC_PER(x)	((x) << 7)
+#define DWC_CFGH_DST_PER(x)	((x) << 11)
 
 /* Bitfields in SGR */
 #define DWC_SGR_SGI(x)		((x) << 0)
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index bc411a1bf8e7..d0c97da66e22 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -56,37 +56,6 @@ struct dw_dma_platform_data {
 	unsigned char	data_width[4];
 };
 
-/* bursts size */
-enum dw_dma_msize {
-	DW_DMA_MSIZE_1,
-	DW_DMA_MSIZE_4,
-	DW_DMA_MSIZE_8,
-	DW_DMA_MSIZE_16,
-	DW_DMA_MSIZE_32,
-	DW_DMA_MSIZE_64,
-	DW_DMA_MSIZE_128,
-	DW_DMA_MSIZE_256,
-};
-
-/* Platform-configurable bits in CFG_HI */
-#define DWC_CFGH_FCMODE		(1 << 0)
-#define DWC_CFGH_FIFO_MODE	(1 << 1)
-#define DWC_CFGH_PROTCTL(x)	((x) << 2)
-#define DWC_CFGH_SRC_PER(x)	((x) << 7)
-#define DWC_CFGH_DST_PER(x)	((x) << 11)
-
-/* Platform-configurable bits in CFG_LO */
-#define DWC_CFGL_LOCK_CH_XFER	(0 << 12)	/* scope of LOCK_CH */
-#define DWC_CFGL_LOCK_CH_BLOCK	(1 << 12)
-#define DWC_CFGL_LOCK_CH_XACT	(2 << 12)
-#define DWC_CFGL_LOCK_BUS_XFER	(0 << 14)	/* scope of LOCK_BUS */
-#define DWC_CFGL_LOCK_BUS_BLOCK	(1 << 14)
-#define DWC_CFGL_LOCK_BUS_XACT	(2 << 14)
-#define DWC_CFGL_LOCK_CH	(1 << 15)	/* channel lockout */
-#define DWC_CFGL_LOCK_BUS	(1 << 16)	/* busmaster lockout */
-#define DWC_CFGL_HS_DST_POL	(1 << 18)	/* dst handshake active low */
-#define DWC_CFGL_HS_SRC_POL	(1 << 19)	/* src handshake active low */
-
 /* DMA API extensions */
 struct dw_cyclic_desc {
 	struct dw_desc	**desc;
-- 
cgit v1.2.3


From 3d588f83e4d6a5230d9094b97d38621cbaa9a972 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 Sep 2014 17:18:11 +0300
Subject: dmaengine: dw: split dma-dw.h to platform and private parts

The introduced include/linux/dma/dw.h is going to contain the private
extensions and structures which are shared for dw_dmac users in the kernel.
Meanwhile include/linux/platform_data/dma-dw.h keeps only platform related data
types and definitions.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/internal.h            |  2 ++
 include/linux/dma/dw.h               | 37 ++++++++++++++++++++++++++++++++++++
 include/linux/platform_data/dma-dw.h | 27 ++++----------------------
 sound/atmel/abdac.c                  |  4 +++-
 sound/atmel/ac97c.c                  |  1 +
 5 files changed, 47 insertions(+), 24 deletions(-)
 create mode 100644 include/linux/dma/dw.h

(limited to 'include/linux')

diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index 82258a167a0e..9a886e3c31d3 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -12,6 +12,8 @@
 #define _DW_DMAC_INTERNAL_H
 
 #include <linux/device.h>
+
+#include <linux/dma/dw.h>
 #include <linux/platform_data/dma-dw.h>
 
 #include "regs.h"
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
new file mode 100644
index 000000000000..24756130eadb
--- /dev/null
+++ b/include/linux/dma/dw.h
@@ -0,0 +1,37 @@
+/*
+ * Driver for the Synopsys DesignWare DMA Controller
+ *
+ * Copyright (C) 2007 Atmel Corporation
+ * Copyright (C) 2010-2011 ST Microelectronics
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+#ifndef _DMA_DW_H
+#define _DMA_DW_H
+
+#include <linux/dmaengine.h>
+
+/* DMA API extensions */
+struct dw_desc;
+
+struct dw_cyclic_desc {
+	struct dw_desc	**desc;
+	unsigned long	periods;
+	void		(*period_callback)(void *param);
+	void		*period_callback_param;
+};
+
+struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
+		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
+		enum dma_transfer_direction direction);
+void dw_dma_cyclic_free(struct dma_chan *chan);
+int dw_dma_cyclic_start(struct dma_chan *chan);
+void dw_dma_cyclic_stop(struct dma_chan *chan);
+
+dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
+
+dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
+
+#endif /* _DMA_DW_H */
diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h
index d0c97da66e22..d8155c005242 100644
--- a/include/linux/platform_data/dma-dw.h
+++ b/include/linux/platform_data/dma-dw.h
@@ -8,10 +8,10 @@
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
  */
-#ifndef DW_DMAC_H
-#define DW_DMAC_H
+#ifndef _PLATFORM_DATA_DMA_DW_H
+#define _PLATFORM_DATA_DMA_DW_H
 
-#include <linux/dmaengine.h>
+#include <linux/device.h>
 
 /**
  * struct dw_dma_slave - Controller-specific information about a slave
@@ -56,23 +56,4 @@ struct dw_dma_platform_data {
 	unsigned char	data_width[4];
 };
 
-/* DMA API extensions */
-struct dw_cyclic_desc {
-	struct dw_desc	**desc;
-	unsigned long	periods;
-	void		(*period_callback)(void *param);
-	void		*period_callback_param;
-};
-
-struct dw_cyclic_desc *dw_dma_cyclic_prep(struct dma_chan *chan,
-		dma_addr_t buf_addr, size_t buf_len, size_t period_len,
-		enum dma_transfer_direction direction);
-void dw_dma_cyclic_free(struct dma_chan *chan);
-int dw_dma_cyclic_start(struct dma_chan *chan);
-void dw_dma_cyclic_stop(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_src_addr(struct dma_chan *chan);
-
-dma_addr_t dw_dma_get_dst_addr(struct dma_chan *chan);
-
-#endif /* DW_DMAC_H */
+#endif /* _PLATFORM_DATA_DMA_DW_H */
diff --git a/sound/atmel/abdac.c b/sound/atmel/abdac.c
index 154a7c44e38d..31061e3521d4 100644
--- a/sound/atmel/abdac.c
+++ b/sound/atmel/abdac.c
@@ -9,7 +9,6 @@
  */
 #include <linux/clk.h>
 #include <linux/bitmap.h>
-#include <linux/platform_data/dma-dw.h>
 #include <linux/dmaengine.h>
 #include <linux/dma-mapping.h>
 #include <linux/init.h>
@@ -25,6 +24,9 @@
 #include <sound/pcm_params.h>
 #include <sound/atmel-abdac.h>
 
+#include <linux/platform_data/dma-dw.h>
+#include <linux/dma/dw.h>
+
 /* DAC register offsets */
 #define DAC_DATA                                0x0000
 #define DAC_CTRL                                0x0008
diff --git a/sound/atmel/ac97c.c b/sound/atmel/ac97c.c
index 1dfb35afef8f..b59427d5a697 100644
--- a/sound/atmel/ac97c.c
+++ b/sound/atmel/ac97c.c
@@ -32,6 +32,7 @@
 #include <sound/memalloc.h>
 
 #include <linux/platform_data/dma-dw.h>
+#include <linux/dma/dw.h>
 
 #include <mach/cpu.h>
 
-- 
cgit v1.2.3


From 2a52f6e49e5e400ed98a79503193d81207009647 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 23 Sep 2014 17:18:15 +0300
Subject: dmaengine: dw: export probe()/remove() and Co to users

The driver library functions can be used directly by the compound devices such
as ADSP or serial driver where DesignWare DMA IP is privately attached to the
main hardware.

Instead of creating a new platform device leaf they may call dw_dma_probe()
with given struct dw_dma_chip directly and make sure that the main device is
DMA capable.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/dw/internal.h | 29 +++--------------------------
 include/linux/dma/dw.h    | 27 +++++++++++++++++++++++++++
 2 files changed, 30 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma/dw/internal.h b/drivers/dma/dw/internal.h
index c55c3e0bf1fe..41439732ff6b 100644
--- a/drivers/dma/dw/internal.h
+++ b/drivers/dma/dw/internal.h
@@ -8,39 +8,16 @@
  * published by the Free Software Foundation.
  */
 
-#ifndef _DW_DMAC_INTERNAL_H
-#define _DW_DMAC_INTERNAL_H
-
-#include <linux/device.h>
+#ifndef _DMA_DW_INTERNAL_H
+#define _DMA_DW_INTERNAL_H
 
 #include <linux/dma/dw.h>
-#include <linux/platform_data/dma-dw.h>
 
 #include "regs.h"
 
-/**
- * struct dw_dma_chip - representation of DesignWare DMA controller hardware
- * @dev:		struct device of the DMA controller
- * @irq:		irq line
- * @regs:		memory mapped I/O space
- * @clk:		hclk clock
- * @dw:			struct dw_dma that is filed by dw_dma_probe()
- */
-struct dw_dma_chip {
-	struct device	*dev;
-	int		irq;
-	void __iomem	*regs;
-	struct clk	*clk;
-	struct dw_dma	*dw;
-};
-
-/* Export to the platform drivers */
-int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
-int dw_dma_remove(struct dw_dma_chip *chip);
-
 int dw_dma_disable(struct dw_dma_chip *chip);
 int dw_dma_enable(struct dw_dma_chip *chip);
 
 extern bool dw_dma_filter(struct dma_chan *chan, void *param);
 
-#endif /* _DW_DMAC_INTERNAL_H */
+#endif /* _DMA_DW_INTERNAL_H */
diff --git a/include/linux/dma/dw.h b/include/linux/dma/dw.h
index 24756130eadb..71456442ebe3 100644
--- a/include/linux/dma/dw.h
+++ b/include/linux/dma/dw.h
@@ -3,6 +3,7 @@
  *
  * Copyright (C) 2007 Atmel Corporation
  * Copyright (C) 2010-2011 ST Microelectronics
+ * Copyright (C) 2014 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
@@ -11,8 +12,34 @@
 #ifndef _DMA_DW_H
 #define _DMA_DW_H
 
+#include <linux/clk.h>
+#include <linux/device.h>
 #include <linux/dmaengine.h>
 
+#include <linux/platform_data/dma-dw.h>
+
+struct dw_dma;
+
+/**
+ * struct dw_dma_chip - representation of DesignWare DMA controller hardware
+ * @dev:		struct device of the DMA controller
+ * @irq:		irq line
+ * @regs:		memory mapped I/O space
+ * @clk:		hclk clock
+ * @dw:			struct dw_dma that is filed by dw_dma_probe()
+ */
+struct dw_dma_chip {
+	struct device	*dev;
+	int		irq;
+	void __iomem	*regs;
+	struct clk	*clk;
+	struct dw_dma	*dw;
+};
+
+/* Export to the platform drivers */
+int dw_dma_probe(struct dw_dma_chip *chip, struct dw_dma_platform_data *pdata);
+int dw_dma_remove(struct dw_dma_chip *chip);
+
 /* DMA API extensions */
 struct dw_desc;
 
-- 
cgit v1.2.3


From b65612a868768cd0431084ccf376d0946c12132d Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 11 Oct 2014 21:16:43 +0530
Subject: dmaengine: add dmaengine_prep_dma_sg() helper

This was only prep API which didnt have an helper

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 1f9e642c66ad..7e6b3a281da8 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -755,6 +755,16 @@ static inline struct dma_async_tx_descriptor *dmaengine_prep_interleaved_dma(
 	return chan->device->device_prep_interleaved_dma(chan, xt, flags);
 }
 
+static inline struct dma_async_tx_descriptor *dmaengine_prep_dma_sg(
+		struct dma_chan *chan,
+		struct scatterlist *dst_sg, unsigned int dst_nents,
+		struct scatterlist *src_sg, unsigned int src_nents,
+		unsigned long flags)
+{
+	return chan->device->device_prep_dma_sg(chan, dst_sg, dst_nents,
+			src_sg, src_nents, flags);
+}
+
 static inline int dma_get_slave_caps(struct dma_chan *chan, struct dma_slave_caps *caps)
 {
 	if (!chan || !caps)
-- 
cgit v1.2.3


From 0a5642be03293f73706961a7649ac1d12bd0be59 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 11 Oct 2014 21:16:44 +0530
Subject: dmaengine: freescale: add and export fsl_dma_external_start()

The freescale driver uses custom device control FSLDMA_EXTERNAL_START to
put the controller in external start mode.
Since we are planning to deprecate the device control, move this to exported
API. Subsequent patches will remove the FSLDMA_EXTERNAL_START

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 drivers/dma/fsldma.c   | 16 +++++++++++++++-
 include/linux/fsldma.h | 13 +++++++++++++
 2 files changed, 28 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/fsldma.h

(limited to 'include/linux')

diff --git a/drivers/dma/fsldma.c b/drivers/dma/fsldma.c
index d5d6885ab341..0cded86f946c 100644
--- a/drivers/dma/fsldma.c
+++ b/drivers/dma/fsldma.c
@@ -36,7 +36,7 @@
 #include <linux/of_address.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
-
+#include <linux/fsldma.h>
 #include "dmaengine.h"
 #include "fsldma.h"
 
@@ -367,6 +367,20 @@ static void fsl_chan_toggle_ext_start(struct fsldma_chan *chan, int enable)
 		chan->feature &= ~FSL_DMA_CHAN_START_EXT;
 }
 
+int fsl_dma_external_start(struct dma_chan *dchan, int enable)
+{
+	struct fsldma_chan *chan;
+
+	if (!dchan)
+		return -EINVAL;
+
+	chan = to_fsl_chan(dchan);
+
+	fsl_chan_toggle_ext_start(chan, enable);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(fsl_dma_external_start);
+
 static void append_ld_queue(struct fsldma_chan *chan, struct fsl_desc_sw *desc)
 {
 	struct fsl_desc_sw *tail = to_fsl_desc(chan->ld_pending.prev);
diff --git a/include/linux/fsldma.h b/include/linux/fsldma.h
new file mode 100644
index 000000000000..b213c02963c9
--- /dev/null
+++ b/include/linux/fsldma.h
@@ -0,0 +1,13 @@
+/*
+ * This is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#ifndef FSL_DMA_H
+#define FSL_DMA_H
+/* fsl dma API for enxternal start */
+int fsl_dma_external_start(struct dma_chan *dchan, int enable);
+
+#endif
-- 
cgit v1.2.3


From b80719b6bd083130c112cb4d3e5329a164eef4c3 Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Sat, 11 Oct 2014 21:16:48 +0530
Subject: dmaengine: remove FSLDMA_EXTERNAL_START

as users have been converted, so no need of this custom method

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 7e6b3a281da8..f8e5a9ea461a 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -199,15 +199,12 @@ enum dma_ctrl_flags {
  * configuration data in statically from the platform). An additional
  * argument of struct dma_slave_config must be passed in with this
  * command.
- * @FSLDMA_EXTERNAL_START: this command will put the Freescale DMA controller
- * into external start mode.
  */
 enum dma_ctrl_cmd {
 	DMA_TERMINATE_ALL,
 	DMA_PAUSE,
 	DMA_RESUME,
 	DMA_SLAVE_CONFIG,
-	FSLDMA_EXTERNAL_START,
 };
 
 /**
-- 
cgit v1.2.3


From 04ffcb255f22a2a988ce7393e6e72f6eb3fcb7aa Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 14 Oct 2014 15:19:06 -0700
Subject: net: Add ndo_gso_check

Add ndo_gso_check which a device can define to indicate whether is
is capable of doing GSO on a packet. This funciton would be called from
the stack to determine whether software GSO is needed to be done. A
driver should populate this function if it advertises GSO types for
which there are combinations that it wouldn't be able to handle. For
instance a device that performs UDP tunneling might only implement
support for transparent Ethernet bridging type of inner packets
or might have limitations on lengths of inner headers.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvtap.c      |  2 +-
 drivers/net/xen-netfront.c |  2 +-
 include/linux/netdevice.h  | 12 +++++++++++-
 net/core/dev.c             |  2 +-
 4 files changed, 14 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index 0c6adaaf898c..65e2892342bd 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -298,7 +298,7 @@ static rx_handler_result_t macvtap_handle_frame(struct sk_buff **pskb)
 	 */
 	if (q->flags & IFF_VNET_HDR)
 		features |= vlan->tap_features;
-	if (netif_needs_gso(skb, features)) {
+	if (netif_needs_gso(dev, skb, features)) {
 		struct sk_buff *segs = __skb_gso_segment(skb, features, false);
 
 		if (IS_ERR(segs))
diff --git a/drivers/net/xen-netfront.c b/drivers/net/xen-netfront.c
index ca82f545ec2c..3c0b37574d05 100644
--- a/drivers/net/xen-netfront.c
+++ b/drivers/net/xen-netfront.c
@@ -638,7 +638,7 @@ static int xennet_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	if (unlikely(!netif_carrier_ok(dev) ||
 		     (slots > 1 && !xennet_can_sg(dev)) ||
-		     netif_needs_gso(skb, netif_skb_features(skb)))) {
+		     netif_needs_gso(dev, skb, netif_skb_features(skb)))) {
 		spin_unlock_irqrestore(&queue->tx_lock, flags);
 		goto drop;
 	}
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 838407aea705..74fd5d37f15a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -998,6 +998,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
+ * bool	(*ndo_gso_check) (struct sk_buff *skb,
+ *			  struct net_device *dev);
+ *	Called by core transmit path to determine if device is capable of
+ *	performing GSO on a packet. The device returns true if it is
+ *	able to GSO the packet, false otherwise. If the return value is
+ *	false the stack will do software GSO.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1147,6 +1153,8 @@ struct net_device_ops {
 							struct net_device *dev,
 							void *priv);
 	int			(*ndo_get_lock_subclass)(struct net_device *dev);
+	bool			(*ndo_gso_check) (struct sk_buff *skb,
+						  struct net_device *dev);
 };
 
 /**
@@ -3572,10 +3580,12 @@ static inline bool skb_gso_ok(struct sk_buff *skb, netdev_features_t features)
 	       (!skb_has_frag_list(skb) || (features & NETIF_F_FRAGLIST));
 }
 
-static inline bool netif_needs_gso(struct sk_buff *skb,
+static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb,
 				   netdev_features_t features)
 {
 	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
+		(dev->netdev_ops->ndo_gso_check &&
+		 !dev->netdev_ops->ndo_gso_check(skb, dev)) ||
 		unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
 			 (skb->ip_summed != CHECKSUM_UNNECESSARY)));
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 4699dcfdc4ab..9f77a78c6b1c 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2675,7 +2675,7 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (skb->encapsulation)
 		features &= dev->hw_enc_features;
 
-	if (netif_needs_gso(skb, features)) {
+	if (netif_needs_gso(dev, skb, features)) {
 		struct sk_buff *segs;
 
 		segs = skb_gso_segment(skb, features);
-- 
cgit v1.2.3


From d4c5efdb97773f59a2b711754ca0953f24516739 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Tue, 26 Aug 2014 23:16:35 -0400
Subject: random: add and use memzero_explicit() for clearing data

zatimend has reported that in his environment (3.16/gcc4.8.3/corei7)
memset() calls which clear out sensitive data in extract_{buf,entropy,
entropy_user}() in random driver are being optimized away by gcc.

Add a helper memzero_explicit() (similarly as explicit_bzero() variants)
that can be used in such cases where a variable with sensitive data is
being cleared out in the end. Other use cases might also be in crypto
code. [ I have put this into lib/string.c though, as it's always built-in
and doesn't need any dependencies then. ]

Fixes kernel bugzilla: 82041

Reported-by: zatimend@hotmail.co.uk
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Signed-off-by: Theodore Ts'o <tytso@mit.edu>
Cc: stable@vger.kernel.org
---
 drivers/char/random.c  |  8 ++++----
 include/linux/string.h |  5 +++--
 lib/string.c           | 16 ++++++++++++++++
 3 files changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index c18d41db83d8..8c86a95203a0 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1106,7 +1106,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 	__mix_pool_bytes(r, hash.w, sizeof(hash.w));
 	spin_unlock_irqrestore(&r->lock, flags);
 
-	memset(workspace, 0, sizeof(workspace));
+	memzero_explicit(workspace, sizeof(workspace));
 
 	/*
 	 * In case the hash function has some recognizable output
@@ -1118,7 +1118,7 @@ static void extract_buf(struct entropy_store *r, __u8 *out)
 	hash.w[2] ^= rol32(hash.w[2], 16);
 
 	memcpy(out, &hash, EXTRACT_SIZE);
-	memset(&hash, 0, sizeof(hash));
+	memzero_explicit(&hash, sizeof(hash));
 }
 
 /*
@@ -1175,7 +1175,7 @@ static ssize_t extract_entropy(struct entropy_store *r, void *buf,
 	}
 
 	/* Wipe data just returned from memory */
-	memset(tmp, 0, sizeof(tmp));
+	memzero_explicit(tmp, sizeof(tmp));
 
 	return ret;
 }
@@ -1218,7 +1218,7 @@ static ssize_t extract_entropy_user(struct entropy_store *r, void __user *buf,
 	}
 
 	/* Wipe data just returned from memory */
-	memset(tmp, 0, sizeof(tmp));
+	memzero_explicit(tmp, sizeof(tmp));
 
 	return ret;
 }
diff --git a/include/linux/string.h b/include/linux/string.h
index d36977e029af..3b42b3732da6 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -132,7 +132,7 @@ int bprintf(u32 *bin_buf, size_t size, const char *fmt, ...) __printf(3, 4);
 #endif
 
 extern ssize_t memory_read_from_buffer(void *to, size_t count, loff_t *ppos,
-			const void *from, size_t available);
+				       const void *from, size_t available);
 
 /**
  * strstarts - does @str start with @prefix?
@@ -144,7 +144,8 @@ static inline bool strstarts(const char *str, const char *prefix)
 	return strncmp(str, prefix, strlen(prefix)) == 0;
 }
 
-extern size_t memweight(const void *ptr, size_t bytes);
+size_t memweight(const void *ptr, size_t bytes);
+void memzero_explicit(void *s, size_t count);
 
 /**
  * kbasename - return the last part of a pathname.
diff --git a/lib/string.c b/lib/string.c
index 992bf30af759..3a3120452a1d 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -604,6 +604,22 @@ void *memset(void *s, int c, size_t count)
 EXPORT_SYMBOL(memset);
 #endif
 
+/**
+ * memzero_explicit - Fill a region of memory (e.g. sensitive
+ *		      keying data) with 0s.
+ * @s: Pointer to the start of the area.
+ * @count: The size of the area.
+ *
+ * memzero_explicit() doesn't need an arch-specific version as
+ * it just invokes the one of memset() implicitly.
+ */
+void memzero_explicit(void *s, size_t count)
+{
+	memset(s, 0, count);
+	OPTIMIZER_HIDE_VAR(s);
+}
+EXPORT_SYMBOL(memzero_explicit);
+
 #ifndef __HAVE_ARCH_MEMCPY
 /**
  * memcpy - Copy one area of memory to another
-- 
cgit v1.2.3


From 70f3ce0510afdad7cbaf27ab7ab961377205c782 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Mon, 29 Sep 2014 11:47:54 +0200
Subject: mtd: spi-nor: make spi_nor_scan() take a chip type name, not
 spi_device_id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drivers currently call spi_nor_match_id() and then spi_nor_scan().
This adds a dependency on struct spi_device_id which we want to
avoid.  Make spi_nor_scan() do it for them.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Rafał Miłecki <zajec5@gmail.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/m25p80.c      |  4 +---
 drivers/mtd/spi-nor/fsl-quadspi.c |  7 +------
 drivers/mtd/spi-nor/spi-nor.c     | 13 +++++++++----
 include/linux/mtd/spi-nor.h       | 20 +++-----------------
 4 files changed, 14 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 822209d10689..bd5e4c6edfd4 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -193,7 +193,6 @@ static int m25p_probe(struct spi_device *spi)
 {
 	struct mtd_part_parser_data	ppdata;
 	struct flash_platform_data	*data;
-	const struct spi_device_id *id = NULL;
 	struct m25p *flash;
 	struct spi_nor *nor;
 	enum read_mode mode = SPI_NOR_NORMAL;
@@ -241,8 +240,7 @@ static int m25p_probe(struct spi_device *spi)
 	else
 		flash_name = spi->modalias;
 
-	id = spi_nor_match_id(flash_name);
-	ret = spi_nor_scan(nor, id, mode);
+	ret = spi_nor_scan(nor, flash_name, mode);
 	if (ret)
 		return ret;
 
diff --git a/drivers/mtd/spi-nor/fsl-quadspi.c b/drivers/mtd/spi-nor/fsl-quadspi.c
index 8d659a2888d5..d5269a26c839 100644
--- a/drivers/mtd/spi-nor/fsl-quadspi.c
+++ b/drivers/mtd/spi-nor/fsl-quadspi.c
@@ -881,7 +881,6 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 
 	/* iterate the subnodes. */
 	for_each_available_child_of_node(dev->of_node, np) {
-		const struct spi_device_id *id;
 		char modalias[40];
 
 		/* skip the holes */
@@ -909,10 +908,6 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		if (of_modalias_node(np, modalias, sizeof(modalias)) < 0)
 			goto map_failed;
 
-		id = spi_nor_match_id(modalias);
-		if (!id)
-			goto map_failed;
-
 		ret = of_property_read_u32(np, "spi-max-frequency",
 				&q->clk_rate);
 		if (ret < 0)
@@ -921,7 +916,7 @@ static int fsl_qspi_probe(struct platform_device *pdev)
 		/* set the chip address for READID */
 		fsl_qspi_set_base_addr(q, nor);
 
-		ret = spi_nor_scan(nor, id, SPI_NOR_QUAD);
+		ret = spi_nor_scan(nor, modalias, SPI_NOR_QUAD);
 		if (ret)
 			goto map_failed;
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index ae16aa2f6885..5c8e39977bc5 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -28,6 +28,8 @@
 
 #define JEDEC_MFR(_jedec_id)	((_jedec_id) >> 16)
 
+static const struct spi_device_id *spi_nor_match_id(const char *name);
+
 /*
  * Read the status register, returning its value in the location
  * Return the status register value.
@@ -911,9 +913,9 @@ static int spi_nor_check(struct spi_nor *nor)
 	return 0;
 }
 
-int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
-			enum read_mode mode)
+int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 {
+	const struct spi_device_id	*id = NULL;
 	struct flash_info		*info;
 	struct device *dev = nor->dev;
 	struct mtd_info *mtd = nor->mtd;
@@ -925,6 +927,10 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 	if (ret)
 		return ret;
 
+	id = spi_nor_match_id(name);
+	if (!id)
+		return -ENOENT;
+
 	info = (void *)id->driver_data;
 
 	if (info->jedec_id) {
@@ -1113,7 +1119,7 @@ int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
 }
 EXPORT_SYMBOL_GPL(spi_nor_scan);
 
-const struct spi_device_id *spi_nor_match_id(char *name)
+static const struct spi_device_id *spi_nor_match_id(const char *name)
 {
 	const struct spi_device_id *id = spi_nor_ids;
 
@@ -1124,7 +1130,6 @@ const struct spi_device_id *spi_nor_match_id(char *name)
 	}
 	return NULL;
 }
-EXPORT_SYMBOL_GPL(spi_nor_match_id);
 
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Huang Shijie <shijie8@gmail.com>");
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 9e6294f32ba8..a5a7a086748d 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -187,32 +187,18 @@ struct spi_nor {
 /**
  * spi_nor_scan() - scan the SPI NOR
  * @nor:	the spi_nor structure
- * @id:		the spi_device_id provided by the driver
+ * @name:	the chip type name
  * @mode:	the read mode supported by the driver
  *
  * The drivers can use this fuction to scan the SPI NOR.
  * In the scanning, it will try to get all the necessary information to
  * fill the mtd_info{} and the spi_nor{}.
  *
- * The board may assigns a spi_device_id with @id which be used to compared with
- * the spi_device_id detected by the scanning.
+ * The chip type name can be provided through the @name parameter.
  *
  * Return: 0 for success, others for failure.
  */
-int spi_nor_scan(struct spi_nor *nor, const struct spi_device_id *id,
-			enum read_mode mode);
+int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode);
 extern const struct spi_device_id spi_nor_ids[];
 
-/**
- * spi_nor_match_id() - find the spi_device_id by the name
- * @name:	the name of the spi_device_id
- *
- * The drivers use this function to find the spi_device_id
- * specified by the @name.
- *
- * Return: returns the right spi_device_id pointer on success,
- *         and returns NULL on failure.
- */
-const struct spi_device_id *spi_nor_match_id(char *name);
-
 #endif
-- 
cgit v1.2.3


From aa281ac631008b9c18c405c8880007789f659c7d Mon Sep 17 00:00:00 2001
From: Boaz Harrosh <ooo@electrozaur.com>
Date: Sun, 19 Oct 2014 19:38:58 +0300
Subject: Boaz Harrosh - Fix broken email address

I no longer have access to the Panasas email.
So change to an email that can always reach me.

Signed-off-by: Boaz Harrosh <ooo@electrozaur.com>
---
 drivers/scsi/osd/Kbuild             | 2 +-
 drivers/scsi/osd/Kconfig            | 2 +-
 drivers/scsi/osd/osd_debug.h        | 2 +-
 drivers/scsi/osd/osd_initiator.c    | 4 ++--
 drivers/scsi/osd/osd_uld.c          | 4 ++--
 fs/exofs/Kbuild                     | 2 +-
 fs/exofs/common.h                   | 2 +-
 fs/exofs/dir.c                      | 2 +-
 fs/exofs/exofs.h                    | 2 +-
 fs/exofs/file.c                     | 2 +-
 fs/exofs/inode.c                    | 2 +-
 fs/exofs/namei.c                    | 2 +-
 fs/exofs/ore.c                      | 4 ++--
 fs/exofs/ore_raid.c                 | 2 +-
 fs/exofs/ore_raid.h                 | 2 +-
 fs/exofs/super.c                    | 2 +-
 fs/exofs/symlink.c                  | 2 +-
 fs/exofs/sys.c                      | 2 +-
 fs/nfs/objlayout/objio_osd.c        | 2 +-
 fs/nfs/objlayout/objlayout.c        | 2 +-
 fs/nfs/objlayout/objlayout.h        | 2 +-
 fs/nfs/objlayout/pnfs_osd_xdr_cli.c | 2 +-
 include/linux/pnfs_osd_xdr.h        | 2 +-
 include/scsi/osd_initiator.h        | 2 +-
 include/scsi/osd_ore.h              | 2 +-
 include/scsi/osd_protocol.h         | 4 ++--
 include/scsi/osd_sec.h              | 2 +-
 include/scsi/osd_sense.h            | 2 +-
 include/scsi/osd_types.h            | 2 +-
 29 files changed, 33 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/osd/Kbuild b/drivers/scsi/osd/Kbuild
index 5fd73d77c3af..58cecd45b0f5 100644
--- a/drivers/scsi/osd/Kbuild
+++ b/drivers/scsi/osd/Kbuild
@@ -4,7 +4,7 @@
 # Copyright (C) 2008 Panasas Inc.  All rights reserved.
 #
 # Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
+#   Boaz Harrosh <ooo@electrozaur.com>
 #   Benny Halevy <bhalevy@panasas.com>
 #
 # This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/Kconfig b/drivers/scsi/osd/Kconfig
index a0703514eb0f..347cc5e33749 100644
--- a/drivers/scsi/osd/Kconfig
+++ b/drivers/scsi/osd/Kconfig
@@ -4,7 +4,7 @@
 # Copyright (C) 2008 Panasas Inc.  All rights reserved.
 #
 # Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
+#   Boaz Harrosh <ooo@electrozaur.com>
 #   Benny Halevy <bhalevy@panasas.com>
 #
 # This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/osd_debug.h b/drivers/scsi/osd/osd_debug.h
index 579e491f11df..26341261bb5c 100644
--- a/drivers/scsi/osd/osd_debug.h
+++ b/drivers/scsi/osd/osd_debug.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/drivers/scsi/osd/osd_initiator.c b/drivers/scsi/osd/osd_initiator.c
index 5f4cbf0c4759..52e243b77000 100644
--- a/drivers/scsi/osd/osd_initiator.c
+++ b/drivers/scsi/osd/osd_initiator.c
@@ -7,7 +7,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -57,7 +57,7 @@
 
 enum { OSD_REQ_RETRIES = 1 };
 
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
 MODULE_DESCRIPTION("open-osd initiator library libosd.ko");
 MODULE_LICENSE("GPL");
 
diff --git a/drivers/scsi/osd/osd_uld.c b/drivers/scsi/osd/osd_uld.c
index e1d9a4c4c4b3..92cdd4b06526 100644
--- a/drivers/scsi/osd/osd_uld.c
+++ b/drivers/scsi/osd/osd_uld.c
@@ -10,7 +10,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -74,7 +74,7 @@
 static const char osd_name[] = "osd";
 static const char *osd_version_string = "open-osd 0.2.1";
 
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
 MODULE_DESCRIPTION("open-osd Upper-Layer-Driver osd.ko");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS_CHARDEV_MAJOR(SCSI_OSD_MAJOR);
diff --git a/fs/exofs/Kbuild b/fs/exofs/Kbuild
index 389ba8312d5d..b47c7b8dc275 100644
--- a/fs/exofs/Kbuild
+++ b/fs/exofs/Kbuild
@@ -4,7 +4,7 @@
 # Copyright (C) 2008 Panasas Inc.  All rights reserved.
 #
 # Authors:
-#   Boaz Harrosh <bharrosh@panasas.com>
+#   Boaz Harrosh <ooo@electrozaur.com>
 #
 # This program is free software; you can redistribute it and/or modify
 # it under the terms of the GNU General Public License version 2
diff --git a/fs/exofs/common.h b/fs/exofs/common.h
index 3bbd46956d77..7d88ef566213 100644
--- a/fs/exofs/common.h
+++ b/fs/exofs/common.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index 49f51ab4caac..d7defd557601 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/exofs.h b/fs/exofs/exofs.h
index fffe86fd7a42..ad9cac670a47 100644
--- a/fs/exofs/exofs.h
+++ b/fs/exofs/exofs.h
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/file.c b/fs/exofs/file.c
index 71bf8e4fb5d4..1a376b42d305 100644
--- a/fs/exofs/file.c
+++ b/fs/exofs/file.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 3f9cafd73931..f1d3d4eb8c4f 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index 4731fd991efe..28907460e8fa 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/ore.c b/fs/exofs/ore.c
index cfc0205d62c4..7bd8ac8dfb28 100644
--- a/fs/exofs/ore.c
+++ b/fs/exofs/ore.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of exofs.
  *
@@ -29,7 +29,7 @@
 
 #include "ore_raid.h"
 
-MODULE_AUTHOR("Boaz Harrosh <bharrosh@panasas.com>");
+MODULE_AUTHOR("Boaz Harrosh <ooo@electrozaur.com>");
 MODULE_DESCRIPTION("Objects Raid Engine ore.ko");
 MODULE_LICENSE("GPL");
 
diff --git a/fs/exofs/ore_raid.c b/fs/exofs/ore_raid.c
index 84529b8a331b..27cbdb697649 100644
--- a/fs/exofs/ore_raid.c
+++ b/fs/exofs/ore_raid.c
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of the objects raid engine (ore).
  *
diff --git a/fs/exofs/ore_raid.h b/fs/exofs/ore_raid.h
index cf6375d82129..a6e746775570 100644
--- a/fs/exofs/ore_raid.h
+++ b/fs/exofs/ore_raid.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) from 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of the objects raid engine (ore).
  *
diff --git a/fs/exofs/super.c b/fs/exofs/super.c
index ed73ed8ebbee..95965503afcb 100644
--- a/fs/exofs/super.c
+++ b/fs/exofs/super.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/symlink.c b/fs/exofs/symlink.c
index 4dd687c3e747..832e2624b80b 100644
--- a/fs/exofs/symlink.c
+++ b/fs/exofs/symlink.c
@@ -2,7 +2,7 @@
  * Copyright (C) 2005, 2006
  * Avishay Traeger (avishay@gmail.com)
  * Copyright (C) 2008, 2009
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Copyrights for code taken from ext2:
  *     Copyright (C) 1992, 1993, 1994, 1995
diff --git a/fs/exofs/sys.c b/fs/exofs/sys.c
index 1b4f2f95fc37..5e6a2c0a1f0b 100644
--- a/fs/exofs/sys.c
+++ b/fs/exofs/sys.c
@@ -1,7 +1,7 @@
 /*
  * Copyright (C) 2012
  * Sachin Bhamare <sbhamare@panasas.com>
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * This file is part of exofs.
  *
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index ae05278b3761..5cbd8cf9a5a5 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.c b/fs/nfs/objlayout/objlayout.c
index 697a16d11fac..ad988428162e 100644
--- a/fs/nfs/objlayout/objlayout.c
+++ b/fs/nfs/objlayout/objlayout.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/objlayout.h b/fs/nfs/objlayout/objlayout.h
index fd13f1d2f136..3472e8390ba5 100644
--- a/fs/nfs/objlayout/objlayout.h
+++ b/fs/nfs/objlayout/objlayout.h
@@ -6,7 +6,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
index b3918f7ac34d..f093c7ec983b 100644
--- a/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
+++ b/fs/nfs/objlayout/pnfs_osd_xdr_cli.c
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/include/linux/pnfs_osd_xdr.h b/include/linux/pnfs_osd_xdr.h
index fe25876c1a5d..17d7d0d20eca 100644
--- a/include/linux/pnfs_osd_xdr.h
+++ b/include/linux/pnfs_osd_xdr.h
@@ -5,7 +5,7 @@
  *  All rights reserved.
  *
  *  Benny Halevy <bhalevy@panasas.com>
- *  Boaz Harrosh <bharrosh@panasas.com>
+ *  Boaz Harrosh <ooo@electrozaur.com>
  *
  *  This program is free software; you can redistribute it and/or modify
  *  it under the terms of the GNU General Public License version 2
diff --git a/include/scsi/osd_initiator.h b/include/scsi/osd_initiator.h
index b2e85fdd2ae0..a09cca829082 100644
--- a/include/scsi/osd_initiator.h
+++ b/include/scsi/osd_initiator.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_ore.h b/include/scsi/osd_ore.h
index 6ca3265a4dca..7a8d2cd30328 100644
--- a/include/scsi/osd_ore.h
+++ b/include/scsi/osd_ore.h
@@ -1,6 +1,6 @@
 /*
  * Copyright (C) 2011
- * Boaz Harrosh <bharrosh@panasas.com>
+ * Boaz Harrosh <ooo@electrozaur.com>
  *
  * Public Declarations of the ORE API
  *
diff --git a/include/scsi/osd_protocol.h b/include/scsi/osd_protocol.h
index a2594afe05c7..e0ca835e7bf7 100644
--- a/include/scsi/osd_protocol.h
+++ b/include/scsi/osd_protocol.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
@@ -496,7 +496,7 @@ struct osd_timestamp {
  */
 
 struct osd_key_identifier {
-	u8 id[7]; /* if you know why 7 please email bharrosh@panasas.com */
+	u8 id[7]; /* if you know why 7 please email ooo@electrozaur.com */
 } __packed;
 
 /* for osd_capability.format */
diff --git a/include/scsi/osd_sec.h b/include/scsi/osd_sec.h
index f96151c9c9e8..7abeb0f0db30 100644
--- a/include/scsi/osd_sec.h
+++ b/include/scsi/osd_sec.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_sense.h b/include/scsi/osd_sense.h
index 91db543a5502..d52aa93a0b2d 100644
--- a/include/scsi/osd_sense.h
+++ b/include/scsi/osd_sense.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
diff --git a/include/scsi/osd_types.h b/include/scsi/osd_types.h
index bd0be7ed4bcf..48e8a165e136 100644
--- a/include/scsi/osd_types.h
+++ b/include/scsi/osd_types.h
@@ -4,7 +4,7 @@
  * Copyright (C) 2008 Panasas Inc.  All rights reserved.
  *
  * Authors:
- *   Boaz Harrosh <bharrosh@panasas.com>
+ *   Boaz Harrosh <ooo@electrozaur.com>
  *   Benny Halevy <bhalevy@panasas.com>
  *
  * This program is free software; you can redistribute it and/or modify
-- 
cgit v1.2.3


From 729de41baf63e2172b9d61de61bbd53f231095ca Mon Sep 17 00:00:00 2001
From: Dinh Nguyen <dinguyen@opensource.altera.com>
Date: Fri, 10 Oct 2014 10:21:14 -0500
Subject: reset: add reset_control_status helper function

There are cases where a system will want to read a reset status bit before
doing any other toggling. Add a reset_control_status helper function to the
reset controller API.

Signed-off-by: Dinh Nguyen <dinguyen@opensource.altera.com>
Signed-off-by: Philipp Zabel <p.zabel@pengutronix.de>
---
 drivers/reset/core.c             | 15 +++++++++++++++
 include/linux/reset-controller.h |  2 ++
 include/linux/reset.h            |  7 +++++++
 3 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/reset/core.c b/drivers/reset/core.c
index baeaf82d40d9..7955e00d04d4 100644
--- a/drivers/reset/core.c
+++ b/drivers/reset/core.c
@@ -125,6 +125,21 @@ int reset_control_deassert(struct reset_control *rstc)
 }
 EXPORT_SYMBOL_GPL(reset_control_deassert);
 
+/**
+ * reset_control_status - returns a negative errno if not supported, a
+ * positive value if the reset line is asserted, or zero if the reset
+ * line is not asserted.
+ * @rstc: reset controller
+ */
+int reset_control_status(struct reset_control *rstc)
+{
+	if (rstc->rcdev->ops->status)
+		return rstc->rcdev->ops->status(rstc->rcdev, rstc->id);
+
+	return -ENOSYS;
+}
+EXPORT_SYMBOL_GPL(reset_control_status);
+
 /**
  * of_reset_control_get - Lookup and obtain a reference to a reset controller.
  * @node: device to be reset by the controller
diff --git a/include/linux/reset-controller.h b/include/linux/reset-controller.h
index 41a4695fde08..ce6b962ffed4 100644
--- a/include/linux/reset-controller.h
+++ b/include/linux/reset-controller.h
@@ -12,11 +12,13 @@ struct reset_controller_dev;
  *         things to reset the device
  * @assert: manually assert the reset line, if supported
  * @deassert: manually deassert the reset line, if supported
+ * @status: return the status of the reset line, if supported
  */
 struct reset_control_ops {
 	int (*reset)(struct reset_controller_dev *rcdev, unsigned long id);
 	int (*assert)(struct reset_controller_dev *rcdev, unsigned long id);
 	int (*deassert)(struct reset_controller_dev *rcdev, unsigned long id);
+	int (*status)(struct reset_controller_dev *rcdev, unsigned long id);
 };
 
 struct module;
diff --git a/include/linux/reset.h b/include/linux/reset.h
index 349f150ae12c..da5602bd77d7 100644
--- a/include/linux/reset.h
+++ b/include/linux/reset.h
@@ -10,6 +10,7 @@ struct reset_control;
 int reset_control_reset(struct reset_control *rstc);
 int reset_control_assert(struct reset_control *rstc);
 int reset_control_deassert(struct reset_control *rstc);
+int reset_control_status(struct reset_control *rstc);
 
 struct reset_control *reset_control_get(struct device *dev, const char *id);
 void reset_control_put(struct reset_control *rstc);
@@ -57,6 +58,12 @@ static inline int reset_control_deassert(struct reset_control *rstc)
 	return 0;
 }
 
+static inline int reset_control_status(struct reset_control *rstc)
+{
+	WARN_ON(1);
+	return 0;
+}
+
 static inline void reset_control_put(struct reset_control *rstc)
 {
 	WARN_ON(1);
-- 
cgit v1.2.3


From 76f439df50aba1838e06dd01e5f20dada7473f57 Mon Sep 17 00:00:00 2001
From: Markus Pargmann <mpa@pengutronix.de>
Date: Wed, 8 Oct 2014 15:47:05 +0200
Subject: regulator: Add ena_gpio_initialized to regulator_config

Most drivers do not set the ena_gpio field of struct regulator_config
before passing it to the regulator core. This is fine as long as the
gpio identifier that is passed is a positive integer. But the gpio
identifier 0 is also valid. So we are not able to decide wether we got a
real gpio identifier or not based on a 0 in ena_gpio.

To be able to decide if it is a valid gpio that got passed, this patch
adds a ena_gpio_initialized field that should be set if was initialized
with a correct value, either a gpio >= 0 or a negative error number. The
core then checks if ena_gpio or ena_gpio_initialized before handling it
as a gpio. This way we maintain backwards compatibility and fix the
behaviour for gpio number 0.

Signed-off-by: Markus Pargmann <mpa@pengutronix.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c         | 3 ++-
 include/linux/regulator/driver.h | 4 ++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index cd87c0c37034..55a87a2722d8 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -3650,7 +3650,8 @@ regulator_register(const struct regulator_desc *regulator_desc,
 
 	dev_set_drvdata(&rdev->dev, rdev);
 
-	if (config->ena_gpio && gpio_is_valid(config->ena_gpio)) {
+	if ((config->ena_gpio || config->ena_gpio_initialized) &&
+	    gpio_is_valid(config->ena_gpio)) {
 		ret = regulator_ena_gpio_request(rdev, config);
 		if (ret != 0) {
 			rdev_err(rdev, "Failed to request enable GPIO%d: %d\n",
diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index fc0ee0ce8325..28da08e4671f 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -301,6 +301,9 @@ struct regulator_desc {
  *           NULL).
  * @regmap: regmap to use for core regmap helpers if dev_get_regulator() is
  *          insufficient.
+ * @ena_gpio_initialized: GPIO controlling regulator enable was properly
+ *                        initialized, meaning that >= 0 is a valid gpio
+ *                        identifier and < 0 is a non existent gpio.
  * @ena_gpio: GPIO controlling regulator enable.
  * @ena_gpio_invert: Sense for GPIO enable control.
  * @ena_gpio_flags: Flags to use when calling gpio_request_one()
@@ -312,6 +315,7 @@ struct regulator_config {
 	struct device_node *of_node;
 	struct regmap *regmap;
 
+	bool ena_gpio_initialized;
 	int ena_gpio;
 	unsigned int ena_gpio_invert:1;
 	unsigned int ena_gpio_flags;
-- 
cgit v1.2.3


From 5356e0da49e61e0de29a5f61996be66e97425217 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Thu, 16 Oct 2014 18:48:50 +0200
Subject: regulator: max77802: Add header for operating modes

Add a header file for the max77802 constants that could be shared between
the regulator driver and Device Tree source files. Also, remove standby
and off opmodes since only normal and low power are valid operating modes.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/max77802.c                   |  1 +
 include/dt-bindings/regulator/maxim,max77802.h | 18 ++++++++++++++++++
 include/linux/mfd/max77686.h                   |  7 -------
 3 files changed, 19 insertions(+), 7 deletions(-)
 create mode 100644 include/dt-bindings/regulator/maxim,max77802.h

(limited to 'include/linux')

diff --git a/drivers/regulator/max77802.c b/drivers/regulator/max77802.c
index 3abf99dbf953..5839c4509e1f 100644
--- a/drivers/regulator/max77802.c
+++ b/drivers/regulator/max77802.c
@@ -33,6 +33,7 @@
 #include <linux/regulator/of_regulator.h>
 #include <linux/mfd/max77686.h>
 #include <linux/mfd/max77686-private.h>
+#include <dt-bindings/regulator/maxim,max77802.h>
 
 /* Default ramp delay in case it is not manually set */
 #define MAX77802_RAMP_DELAY		100000		/* uV/us */
diff --git a/include/dt-bindings/regulator/maxim,max77802.h b/include/dt-bindings/regulator/maxim,max77802.h
new file mode 100644
index 000000000000..cf28631d7109
--- /dev/null
+++ b/include/dt-bindings/regulator/maxim,max77802.h
@@ -0,0 +1,18 @@
+/*
+ * Copyright (C) 2014 Google, Inc
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Device Tree binding constants for the Maxim 77802 PMIC regulators
+ */
+
+#ifndef _DT_BINDINGS_REGULATOR_MAXIM_MAX77802_H
+#define _DT_BINDINGS_REGULATOR_MAXIM_MAX77802_H
+
+/* Regulator operating modes */
+#define MAX77802_OPMODE_LP	1
+#define MAX77802_OPMODE_NORMAL	3
+
+#endif /* _DT_BINDINGS_REGULATOR_MAXIM_MAX77802_H */
diff --git a/include/linux/mfd/max77686.h b/include/linux/mfd/max77686.h
index 7e6dc4b2b795..553f7d09258a 100644
--- a/include/linux/mfd/max77686.h
+++ b/include/linux/mfd/max77686.h
@@ -131,13 +131,6 @@ enum max77686_opmode {
 	MAX77686_OPMODE_STANDBY,
 };
 
-enum max77802_opmode {
-	MAX77802_OPMODE_OFF,
-	MAX77802_OPMODE_STANDBY,
-	MAX77802_OPMODE_LP,
-	MAX77802_OPMODE_NORMAL,
-};
-
 struct max77686_opmode_data {
 	int id;
 	int mode;
-- 
cgit v1.2.3


From 4cbbdb51cc921f95978360fd7a0652d493dadc3e Mon Sep 17 00:00:00 2001
From: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Date: Thu, 18 Sep 2014 14:56:35 -0500
Subject: pci_ids: Add PCI device IDs for F15h M60h

Add F3, F4 device IDs to be used in amd_nb.c and amd64_edac.c

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Link: http://lkml.kernel.org/r/1411070195-10177-1-git-send-email-Aravind.Gopalakrishnan@amd.com
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 include/linux/pci_ids.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1fa99a301817..97fb9f69aaed 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -522,6 +522,8 @@
 #define PCI_DEVICE_ID_AMD_15H_M10H_F3	0x1403
 #define PCI_DEVICE_ID_AMD_15H_M30H_NB_F3 0x141d
 #define PCI_DEVICE_ID_AMD_15H_M30H_NB_F4 0x141e
+#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F3 0x1573
+#define PCI_DEVICE_ID_AMD_15H_M60H_NB_F4 0x1574
 #define PCI_DEVICE_ID_AMD_15H_NB_F0	0x1600
 #define PCI_DEVICE_ID_AMD_15H_NB_F1	0x1601
 #define PCI_DEVICE_ID_AMD_15H_NB_F2	0x1602
-- 
cgit v1.2.3


From 348fec70213835df18a587353b3bdc0481b37c6b Mon Sep 17 00:00:00 2001
From: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Date: Thu, 18 Sep 2014 14:56:58 -0500
Subject: EDAC: Add DDR3 LRDIMM entries to edac_mem_types

F15hM60h adds support for DDR4 and DDR3 LRDIMMs. Add them here.

Signed-off-by: Aravind Gopalakrishnan <Aravind.Gopalakrishnan@amd.com>
Link: http://lkml.kernel.org/r/1411070218-10258-1-git-send-email-Aravind.Gopalakrishnan@amd.com
[ Boris: improve comments. ]
Signed-off-by: Borislav Petkov <bp@suse.de>
---
 drivers/edac/edac_mc.c | 3 +++
 include/linux/edac.h   | 4 +++-
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/edac/edac_mc.c b/drivers/edac/edac_mc.c
index c3893b0ddb18..129ff9c36a78 100644
--- a/drivers/edac/edac_mc.c
+++ b/drivers/edac/edac_mc.c
@@ -146,6 +146,9 @@ const char * const edac_mem_types[] = {
 	"Rambus XDR",
 	"Unbuffered DDR3 RAM",
 	"Registered DDR3 RAM",
+	"Load-Reduced DDR3 RAM",
+	"Unbuffered DDR4 RAM",
+	"Registered DDR4 RAM",
 };
 EXPORT_SYMBOL_GPL(edac_mem_types);
 
diff --git a/include/linux/edac.h b/include/linux/edac.h
index e1e68da6f35c..da3b72e95db3 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -194,7 +194,8 @@ static inline char *mc_event_error_type(const unsigned int err_type)
  * @MEM_DDR3:		DDR3 RAM
  * @MEM_RDDR3:		Registered DDR3 RAM
  *			This is a variant of the DDR3 memories.
- * @MEM_DDR4:		DDR4 RAM
+ * @MEM_LRDDR3		Load-Reduced DDR3 memory.
+ * @MEM_DDR4:		Unbuffered DDR4 RAM
  * @MEM_RDDR4:		Registered DDR4 RAM
  *			This is a variant of the DDR4 memories.
  */
@@ -216,6 +217,7 @@ enum mem_type {
 	MEM_XDR,
 	MEM_DDR3,
 	MEM_RDDR3,
+	MEM_LRDDR3,
 	MEM_DDR4,
 	MEM_RDDR4,
 };
-- 
cgit v1.2.3


From 4846e3784585173f48e267b76f968bcb4a12d3b2 Mon Sep 17 00:00:00 2001
From: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Date: Tue, 9 Sep 2014 22:18:31 +0200
Subject: watchdog: simplify definitions of WATCHDOG_NOWAYOUT(_INIT_STATUS)?

Signed-off-by: Uwe Kleine-K=C3=B6nig <u.kleine-koenig@pengutronix.de>
Reviewed-by: Guenter Roeck <linux@roeck-us.net>
Signed-off-by: Wim Van Sebroeck <wim@iguana.be>
---
 include/linux/watchdog.h | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h
index 2a3038ee17a3..395b70e0eccf 100644
--- a/include/linux/watchdog.h
+++ b/include/linux/watchdog.h
@@ -97,13 +97,8 @@ struct watchdog_device {
 #define WDOG_UNREGISTERED	4	/* Has the device been unregistered */
 };
 
-#ifdef CONFIG_WATCHDOG_NOWAYOUT
-#define WATCHDOG_NOWAYOUT		1
-#define WATCHDOG_NOWAYOUT_INIT_STATUS	(1 << WDOG_NO_WAY_OUT)
-#else
-#define WATCHDOG_NOWAYOUT		0
-#define WATCHDOG_NOWAYOUT_INIT_STATUS	0
-#endif
+#define WATCHDOG_NOWAYOUT		IS_BUILTIN(CONFIG_WATCHDOG_NOWAYOUT)
+#define WATCHDOG_NOWAYOUT_INIT_STATUS	(WATCHDOG_NOWAYOUT << WDOG_NO_WAY_OUT)
 
 /* Use the following function to check whether or not the watchdog is active */
 static inline bool watchdog_active(struct watchdog_device *wdd)
-- 
cgit v1.2.3


From 51315cdfa0521fff3059cec5fb8ffecc7f37cba7 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sun, 19 Oct 2014 11:30:27 +0200
Subject: cpufreq: allow driver-specific data

This commit extends the cpufreq_driver structure with an additional
'void *driver_data' field that can be filled by the ->probe() function
of a cpufreq driver to pass additional custom information to the
driver itself.

A new function called cpufreq_get_driver_data() is added to allow a
cpufreq driver to retrieve those driver data, since they are typically
needed from a cpufreq_policy->init() callback, which does not have
access to the cpufreq_driver structure. This function call is similar
to the existing cpufreq_get_current_driver() function call.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 15 +++++++++++++++
 include/linux/cpufreq.h   |  2 ++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index 24bf76fba141..058d6e084a6d 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1731,6 +1731,21 @@ const char *cpufreq_get_current_driver(void)
 }
 EXPORT_SYMBOL_GPL(cpufreq_get_current_driver);
 
+/**
+ *	cpufreq_get_driver_data - return current driver data
+ *
+ *	Return the private data of the currently loaded cpufreq
+ *	driver, or NULL if no cpufreq driver is loaded.
+ */
+void *cpufreq_get_driver_data(void)
+{
+	if (cpufreq_driver)
+		return cpufreq_driver->driver_data;
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(cpufreq_get_driver_data);
+
 /*********************************************************************
  *                     NOTIFIER LISTS INTERFACE                      *
  *********************************************************************/
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 138336b6bb04..503b085b7832 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -219,6 +219,7 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 struct cpufreq_driver {
 	char			name[CPUFREQ_NAME_LEN];
 	u8			flags;
+	void			*driver_data;
 
 	/* needed by all drivers */
 	int	(*init)		(struct cpufreq_policy *policy);
@@ -312,6 +313,7 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data);
 int cpufreq_unregister_driver(struct cpufreq_driver *driver_data);
 
 const char *cpufreq_get_current_driver(void);
+void *cpufreq_get_driver_data(void);
 
 static inline void cpufreq_verify_within_limits(struct cpufreq_policy *policy,
 		unsigned int min, unsigned int max)
-- 
cgit v1.2.3


From 34e5a5273d6aa0ee8836bd5d6111b135ffae6931 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Sun, 19 Oct 2014 11:30:28 +0200
Subject: cpufreq: cpufreq-dt: extend with platform_data

This commit extends the cpufreq-dt driver to take a platform_data
structure. This structure is for now used to tell the cpufreq-dt
driver the layout of the clocks on the platform, i.e whether all CPUs
share the same clock or whether each CPU has a separate clock.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt.c | 17 +++++++++++++++--
 include/linux/cpufreq-dt.h   | 22 ++++++++++++++++++++++
 2 files changed, 37 insertions(+), 2 deletions(-)
 create mode 100644 include/linux/cpufreq-dt.h

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 6bbb8b913446..52facdaf531e 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -18,6 +18,7 @@
 #include <linux/cpu.h>
 #include <linux/cpu_cooling.h>
 #include <linux/cpufreq.h>
+#include <linux/cpufreq-dt.h>
 #include <linux/cpumask.h>
 #include <linux/err.h>
 #include <linux/module.h>
@@ -178,6 +179,7 @@ try_again:
 
 static int cpufreq_init(struct cpufreq_policy *policy)
 {
+	struct cpufreq_dt_platform_data *pd;
 	struct cpufreq_frequency_table *freq_table;
 	struct thermal_cooling_device *cdev;
 	struct device_node *np;
@@ -265,9 +267,18 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	policy->driver_data = priv;
 
 	policy->clk = cpu_clk;
-	ret = cpufreq_generic_init(policy, freq_table, transition_latency);
-	if (ret)
+	ret = cpufreq_table_validate_and_show(policy, freq_table);
+	if (ret) {
+		dev_err(cpu_dev, "%s: invalid frequency table: %d\n", __func__,
+			ret);
 		goto out_cooling_unregister;
+	}
+
+	policy->cpuinfo.transition_latency = transition_latency;
+
+	pd = cpufreq_get_driver_data();
+	if (pd && !pd->independent_clocks)
+		cpumask_setall(policy->cpus);
 
 	of_node_put(np);
 
@@ -335,6 +346,8 @@ static int dt_cpufreq_probe(struct platform_device *pdev)
 	if (!IS_ERR(cpu_reg))
 		regulator_put(cpu_reg);
 
+	dt_cpufreq_driver.driver_data = dev_get_platdata(&pdev->dev);
+
 	ret = cpufreq_register_driver(&dt_cpufreq_driver);
 	if (ret)
 		dev_err(cpu_dev, "failed register driver: %d\n", ret);
diff --git a/include/linux/cpufreq-dt.h b/include/linux/cpufreq-dt.h
new file mode 100644
index 000000000000..0414009e2c30
--- /dev/null
+++ b/include/linux/cpufreq-dt.h
@@ -0,0 +1,22 @@
+/*
+ * Copyright (C) 2014 Marvell
+ * Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef __CPUFREQ_DT_H__
+#define __CPUFREQ_DT_H__
+
+struct cpufreq_dt_platform_data {
+	/*
+	 * True when each CPU has its own clock to control its
+	 * frequency, false when all CPUs are controlled by a single
+	 * clock.
+	 */
+	bool independent_clocks;
+};
+
+#endif /* __CPUFREQ_DT_H__ */
-- 
cgit v1.2.3


From 34b48db66e08ca1c1bc07cf305d672ac940268dc Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 6 Sep 2014 16:08:05 -0700
Subject: block: remove artifical max_hw_sectors cap

Set max_sectors to the value the drivers provides as hardware limit by
default.  Linux had proper I/O throttling for a long time and doesn't
rely on a artifically small maximum I/O size anymore.  By not limiting
the I/O size by default we remove an annoying tuning step required for
most Linux installation.

Note that both the user, and if absolutely required the driver can still
impose a limit for FS requests below max_hw_sectors_kb.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-settings.c       | 4 +---
 drivers/block/aoe/aoeblk.c | 2 +-
 include/linux/blkdev.h     | 1 -
 3 files changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-settings.c b/block/blk-settings.c
index aa02247d227e..6ed2cbe5e8c9 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -257,9 +257,7 @@ void blk_limits_max_hw_sectors(struct queue_limits *limits, unsigned int max_hw_
 		       __func__, max_hw_sectors);
 	}
 
-	limits->max_hw_sectors = max_hw_sectors;
-	limits->max_sectors = min_t(unsigned int, max_hw_sectors,
-				    BLK_DEF_MAX_SECTORS);
+	limits->max_sectors = limits->max_hw_sectors = max_hw_sectors;
 }
 EXPORT_SYMBOL(blk_limits_max_hw_sectors);
 
diff --git a/drivers/block/aoe/aoeblk.c b/drivers/block/aoe/aoeblk.c
index dd73e1ff1759..46c282fff104 100644
--- a/drivers/block/aoe/aoeblk.c
+++ b/drivers/block/aoe/aoeblk.c
@@ -395,7 +395,7 @@ aoeblk_gdalloc(void *vp)
 	WARN_ON(d->flags & DEVFL_TKILL);
 	WARN_ON(d->gd);
 	WARN_ON(d->flags & DEVFL_UP);
-	blk_queue_max_hw_sectors(q, BLK_DEF_MAX_SECTORS);
+	blk_queue_max_hw_sectors(q, 1024);
 	q->backing_dev_info.name = "aoe";
 	q->backing_dev_info.ra_pages = READ_AHEAD / PAGE_CACHE_SIZE;
 	d->bufpool = mp;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0207a78a8d82..74d14dba6fb7 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1186,7 +1186,6 @@ extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm);
 enum blk_default_limits {
 	BLK_MAX_SEGMENTS	= 128,
 	BLK_SAFE_MAX_SECTORS	= 255,
-	BLK_DEF_MAX_SECTORS	= 1024,
 	BLK_MAX_SEGMENT_SIZE	= 65536,
 	BLK_SEG_BOUNDARY_MASK	= 0xFFFFFFFFUL,
 };
-- 
cgit v1.2.3


From a5b7616c55e188fe3d6ef686bef402d4703ecb62 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Tue, 30 Sep 2014 03:14:55 +0100
Subject: mtd: m25p80,spi-nor: Fix module aliases for m25p80

m25p80's device ID table is now spi_nor_ids, defined in spi-nor.  The
MODULE_DEVICE_TABLE() macro doesn't work with extern definitions, but
its use was also removed at the same time.  Now if m25p80 is built as
a module it doesn't get the necessary aliases to be loaded
automatically.

A clean solution to this will involve defining the list of device
IDs in spi-nor.h and removing struct spi_device_id from the spi-nor
API, but this is quite a large change.

As a quick fix suitable for stable, copy the device IDs back into
m25p80.

Fixes: 03e296f613af ("mtd: m25p80: use the SPI nor framework")
Cc: <stable@vger.kernel.org> # 3.16.x: 32f1b7c8352f: mtd: move support for struct flash_platform_data into m25p80
Cc: <stable@vger.kernel.org> # 3.16.x: 90e55b3812a1: mtd: m25p80: get rid of spi_get_device_id
Cc: <stable@vger.kernel.org> # 3.16.x: 70f3ce0510af: mtd: spi-nor: make spi_nor_scan() take a chip type name, not spi_device_id
Cc: <stable@vger.kernel.org> # 3.16.x
Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/devices/m25p80.c  | 52 ++++++++++++++++++++++++++++++++++++++++++-
 drivers/mtd/spi-nor/spi-nor.c |  3 +--
 include/linux/mtd/spi-nor.h   |  1 -
 3 files changed, 52 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index bd5e4c6edfd4..ed827cf894e4 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -261,12 +261,62 @@ static int m25p_remove(struct spi_device *spi)
 }
 
 
+/*
+ * XXX This needs to be kept in sync with spi_nor_ids.  We can't share
+ * it with spi-nor, because if this is built as a module then modpost
+ * won't be able to read it and add appropriate aliases.
+ */
+static const struct spi_device_id m25p_ids[] = {
+	{"at25fs010"},	{"at25fs040"},	{"at25df041a"},	{"at25df321a"},
+	{"at25df641"},	{"at26f004"},	{"at26df081a"},	{"at26df161a"},
+	{"at26df321"},	{"at45db081d"},
+	{"en25f32"},	{"en25p32"},	{"en25q32b"},	{"en25p64"},
+	{"en25q64"},	{"en25qh128"},	{"en25qh256"},
+	{"f25l32pa"},
+	{"mr25h256"},	{"mr25h10"},
+	{"gd25q32"},	{"gd25q64"},
+	{"160s33b"},	{"320s33b"},	{"640s33b"},
+	{"mx25l2005a"},	{"mx25l4005a"},	{"mx25l8005"},	{"mx25l1606e"},
+	{"mx25l3205d"},	{"mx25l3255e"},	{"mx25l6405d"},	{"mx25l12805d"},
+	{"mx25l12855e"},{"mx25l25635e"},{"mx25l25655e"},{"mx66l51235l"},
+	{"mx66l1g55g"},
+	{"n25q064"},	{"n25q128a11"},	{"n25q128a13"},	{"n25q256a"},
+	{"n25q512a"},	{"n25q512ax3"},	{"n25q00"},
+	{"pm25lv512"},	{"pm25lv010"},	{"pm25lq032"},
+	{"s25sl032p"},	{"s25sl064p"},	{"s25fl256s0"},	{"s25fl256s1"},
+	{"s25fl512s"},	{"s70fl01gs"},	{"s25sl12800"},	{"s25sl12801"},
+	{"s25fl129p0"},	{"s25fl129p1"},	{"s25sl004a"},	{"s25sl008a"},
+	{"s25sl016a"},	{"s25sl032a"},	{"s25sl064a"},	{"s25fl008k"},
+	{"s25fl016k"},	{"s25fl064k"},
+	{"sst25vf040b"},{"sst25vf080b"},{"sst25vf016b"},{"sst25vf032b"},
+	{"sst25vf064c"},{"sst25wf512"},	{"sst25wf010"},	{"sst25wf020"},
+	{"sst25wf040"},
+	{"m25p05"},	{"m25p10"},	{"m25p20"},	{"m25p40"},
+	{"m25p80"},	{"m25p16"},	{"m25p32"},	{"m25p64"},
+	{"m25p128"},	{"n25q032"},
+	{"m25p05-nonjedec"},	{"m25p10-nonjedec"},	{"m25p20-nonjedec"},
+	{"m25p40-nonjedec"},	{"m25p80-nonjedec"},	{"m25p16-nonjedec"},
+	{"m25p32-nonjedec"},	{"m25p64-nonjedec"},	{"m25p128-nonjedec"},
+	{"m45pe10"},	{"m45pe80"},	{"m45pe16"},
+	{"m25pe20"},	{"m25pe80"},	{"m25pe16"},
+	{"m25px16"},	{"m25px32"},	{"m25px32-s0"},	{"m25px32-s1"},
+	{"m25px64"},
+	{"w25x10"},	{"w25x20"},	{"w25x40"},	{"w25x80"},
+	{"w25x16"},	{"w25x32"},	{"w25q32"},	{"w25q32dw"},
+	{"w25x64"},	{"w25q64"},	{"w25q128"},	{"w25q80"},
+	{"w25q80bl"},	{"w25q128"},	{"w25q256"},	{"cat25c11"},
+	{"cat25c03"},	{"cat25c09"},	{"cat25c17"},	{"cat25128"},
+	{ },
+};
+MODULE_DEVICE_TABLE(spi, m25p_ids);
+
+
 static struct spi_driver m25p80_driver = {
 	.driver = {
 		.name	= "m25p80",
 		.owner	= THIS_MODULE,
 	},
-	.id_table	= spi_nor_ids,
+	.id_table	= m25p_ids,
 	.probe	= m25p_probe,
 	.remove	= m25p_remove,
 
diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 5c8e39977bc5..c51ee52386a7 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -475,7 +475,7 @@ struct flash_info {
  * more nor chips.  This current list focusses on newer chips, which
  * have been converging on command sets which including JEDEC ID.
  */
-const struct spi_device_id spi_nor_ids[] = {
+static const struct spi_device_id spi_nor_ids[] = {
 	/* Atmel -- some are (confusingly) marketed as "DataFlash" */
 	{ "at25fs010",  INFO(0x1f6601, 0, 32 * 1024,   4, SECT_4K) },
 	{ "at25fs040",  INFO(0x1f6604, 0, 64 * 1024,   8, SECT_4K) },
@@ -639,7 +639,6 @@ const struct spi_device_id spi_nor_ids[] = {
 	{ "cat25128", CAT25_INFO(2048, 8, 64, 2, SPI_NOR_NO_ERASE | SPI_NOR_NO_FR) },
 	{ },
 };
-EXPORT_SYMBOL_GPL(spi_nor_ids);
 
 static const struct spi_device_id *spi_nor_read_id(struct spi_nor *nor)
 {
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index a5a7a086748d..046a0a2e4c4e 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -199,6 +199,5 @@ struct spi_nor {
  * Return: 0 for success, others for failure.
  */
 int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode);
-extern const struct spi_device_id spi_nor_ids[];
 
 #endif
-- 
cgit v1.2.3


From 5695be142e203167e3cb515ef86a88424f3524eb Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Mon, 20 Oct 2014 18:12:32 +0200
Subject: OOM, PM: OOM killed task shouldn't escape PM suspend

PM freezer relies on having all tasks frozen by the time devices are
getting frozen so that no task will touch them while they are getting
frozen. But OOM killer is allowed to kill an already frozen task in
order to handle OOM situtation. In order to protect from late wake ups
OOM killer is disabled after all tasks are frozen. This, however, still
keeps a window open when a killed task didn't manage to die by the time
freeze_processes finishes.

Reduce the race window by checking all tasks after OOM killer has been
disabled. This is still not race free completely unfortunately because
oom_killer_disable cannot stop an already ongoing OOM killer so a task
might still wake up from the fridge and get killed without
freeze_processes noticing. Full synchronization of OOM and freezer is,
however, too heavy weight for this highly unlikely case.

Introduce and check oom_kills counter which gets incremented early when
the allocator enters __alloc_pages_may_oom path and only check all the
tasks if the counter changes during the freezing attempt. The counter
is updated so early to reduce the race window since allocator checked
oom_killer_disabled which is set by PM-freezing code. A false positive
will push the PM-freezer into a slow path but that is not a big deal.

Changes since v1
- push the re-check loop out of freeze_processes into
  check_frozen_processes and invert the condition to make the code more
  readable as per Rafael

Fixes: f660daac474c6f (oom: thaw threads if oom killed thread is frozen before deferring)
Cc: 3.2+ <stable@vger.kernel.org> # 3.2+
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/oom.h    |  3 +++
 kernel/power/process.c | 40 +++++++++++++++++++++++++++++++++++++++-
 mm/oom_kill.c          | 17 +++++++++++++++++
 mm/page_alloc.c        |  8 ++++++++
 4 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index 647395a1a550..e8d6e1058723 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -50,6 +50,9 @@ static inline bool oom_task_origin(const struct task_struct *p)
 extern unsigned long oom_badness(struct task_struct *p,
 		struct mem_cgroup *memcg, const nodemask_t *nodemask,
 		unsigned long totalpages);
+
+extern int oom_kills_count(void);
+extern void note_oom_kill(void);
 extern void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 			     unsigned int points, unsigned long totalpages,
 			     struct mem_cgroup *memcg, nodemask_t *nodemask,
diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7b323221b9ee..5cc588c1abab 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -108,6 +108,28 @@ static int try_to_freeze_tasks(bool user_only)
 	return todo ? -EBUSY : 0;
 }
 
+/*
+ * Returns true if all freezable tasks (except for current) are frozen already
+ */
+static bool check_frozen_processes(void)
+{
+	struct task_struct *g, *p;
+	bool ret = true;
+
+	read_lock(&tasklist_lock);
+	for_each_process_thread(g, p) {
+		if (p != current && !freezer_should_skip(p) &&
+		    !frozen(p)) {
+			ret = false;
+			goto done;
+		}
+	}
+done:
+	read_unlock(&tasklist_lock);
+
+	return ret;
+}
+
 /**
  * freeze_processes - Signal user space processes to enter the refrigerator.
  * The current thread will not be frozen.  The same process that calls
@@ -118,6 +140,7 @@ static int try_to_freeze_tasks(bool user_only)
 int freeze_processes(void)
 {
 	int error;
+	int oom_kills_saved;
 
 	error = __usermodehelper_disable(UMH_FREEZING);
 	if (error)
@@ -132,12 +155,27 @@ int freeze_processes(void)
 	pm_wakeup_clear();
 	printk("Freezing user space processes ... ");
 	pm_freezing = true;
+	oom_kills_saved = oom_kills_count();
 	error = try_to_freeze_tasks(true);
 	if (!error) {
-		printk("done.");
 		__usermodehelper_set_disable_depth(UMH_DISABLED);
 		oom_killer_disable();
+
+		/*
+		 * There might have been an OOM kill while we were
+		 * freezing tasks and the killed task might be still
+		 * on the way out so we have to double check for race.
+		 */
+		if (oom_kills_count() != oom_kills_saved &&
+				!check_frozen_processes()) {
+			__usermodehelper_set_disable_depth(UMH_ENABLED);
+			printk("OOM in progress.");
+			error = -EBUSY;
+			goto done;
+		}
+		printk("done.");
 	}
+done:
 	printk("\n");
 	BUG_ON(in_atomic());
 
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index bbf405a3a18f..5340f6b91312 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -404,6 +404,23 @@ static void dump_header(struct task_struct *p, gfp_t gfp_mask, int order,
 		dump_tasks(memcg, nodemask);
 }
 
+/*
+ * Number of OOM killer invocations (including memcg OOM killer).
+ * Primarily used by PM freezer to check for potential races with
+ * OOM killed frozen task.
+ */
+static atomic_t oom_kills = ATOMIC_INIT(0);
+
+int oom_kills_count(void)
+{
+	return atomic_read(&oom_kills);
+}
+
+void note_oom_kill(void)
+{
+	atomic_inc(&oom_kills);
+}
+
 #define K(x) ((x) << (PAGE_SHIFT-10))
 /*
  * Must be called while holding a reference to p, which will be released upon
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 736d8e1b6381..9cd36b822444 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2251,6 +2251,14 @@ __alloc_pages_may_oom(gfp_t gfp_mask, unsigned int order,
 		return NULL;
 	}
 
+	/*
+	 * PM-freezer should be notified that there might be an OOM killer on
+	 * its way to kill and wake somebody up. This is too early and we might
+	 * end up not killing anything but false positives are acceptable.
+	 * See freeze_processes.
+	 */
+	note_oom_kill();
+
 	/*
 	 * Go through the zonelist yet one more time, keep very high watermark
 	 * here, this is only to catch a parallel oom killing, we must fail if
-- 
cgit v1.2.3


From e66fcf722a74786c500957608f3230e843960b42 Mon Sep 17 00:00:00 2001
From: Ben Hutchings <ben@decadent.org.uk>
Date: Tue, 30 Sep 2014 03:15:04 +0100
Subject: spi-nor: Remove spi_nor::read_id operation

There is currently no useful way to override the default
implementation of this operation.  The returned struct spi_device_id
must have a pointer to struct flash_info in its private data, but this
structure is defined inside spi-nor.

Signed-off-by: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/spi-nor/spi-nor.c | 4 +---
 include/linux/mtd/spi-nor.h   | 3 ---
 2 files changed, 1 insertion(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index c51ee52386a7..a98c134e51b6 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -904,8 +904,6 @@ static int spi_nor_check(struct spi_nor *nor)
 		return -EINVAL;
 	}
 
-	if (!nor->read_id)
-		nor->read_id = spi_nor_read_id;
 	if (!nor->wait_till_ready)
 		nor->wait_till_ready = spi_nor_wait_till_ready;
 
@@ -935,7 +933,7 @@ int spi_nor_scan(struct spi_nor *nor, const char *name, enum read_mode mode)
 	if (info->jedec_id) {
 		const struct spi_device_id *jid;
 
-		jid = nor->read_id(nor);
+		jid = spi_nor_read_id(nor);
 		if (IS_ERR(jid)) {
 			return PTR_ERR(jid);
 		} else if (jid != id) {
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 046a0a2e4c4e..2f27713b3ae1 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -139,8 +139,6 @@ enum spi_nor_ops {
  * @write_xfer:		[OPTIONAL] the writefundamental primitive
  * @read_reg:		[DRIVER-SPECIFIC] read out the register
  * @write_reg:		[DRIVER-SPECIFIC] write data to the register
- * @read_id:		[REPLACEABLE] read out the ID data, and find
- *			the proper spi_device_id
  * @wait_till_ready:	[REPLACEABLE] wait till the NOR becomes ready
  * @read:		[DRIVER-SPECIFIC] read data from the SPI NOR
  * @write:		[DRIVER-SPECIFIC] write data to the SPI NOR
@@ -172,7 +170,6 @@ struct spi_nor {
 	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
 	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
 			int write_enable);
-	const struct spi_device_id *(*read_id)(struct spi_nor *nor);
 	int (*wait_till_ready)(struct spi_nor *nor);
 
 	int (*read)(struct spi_nor *nor, loff_t from,
-- 
cgit v1.2.3


From a4b4e0461ec5532ad498f0dd0e68993ad79bec2b Mon Sep 17 00:00:00 2001
From: Romain Perier <romain.perier@gmail.com>
Date: Tue, 14 Oct 2014 06:31:09 +0000
Subject: of: Add standard property for poweroff capability

Several drivers create their own devicetree property when they register
poweroff capabilities. This is for example the case for mfd, regulator
or power drivers which define "vendor,system-power-controller" property.
This patch adds support for a standard property "poweroff-source"
which marks the device as able to shutdown the system.

Signed-off-by: Romain Perier <romain.perier@gmail.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/of.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 6545e7aec7bb..27b3ba1e9e59 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -866,4 +866,15 @@ static inline int of_changeset_update_property(struct of_changeset *ocs,
 /* CONFIG_OF_RESOLVE api */
 extern int of_resolve_phandles(struct device_node *tree);
 
+/**
+ * of_system_has_poweroff_source - Tells if poweroff-source is found for device_node
+ * @np: Pointer to the given device_node
+ *
+ * return true if present false otherwise
+ */
+static inline bool of_system_has_poweroff_source(const struct device_node *np)
+{
+	return of_property_read_bool(np, "poweroff-source");
+}
+
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From 9e8beeb79ded25c5c1986f80fb8a7f6815345d5a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 18:58:48 -0600
Subject: audit: Remove "weak" from audit_classify_compat_syscall() declaration

There's only one audit_classify_compat_syscall() definition, so it doesn't
need to be weak.

Remove the "weak" attribute from the audit_classify_compat_syscall()
declaration.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Richard Guy Briggs <rgb@redhat.com>
CC: AKASHI Takahiro <takahiro.akashi@linaro.org>
---
 include/linux/audit.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 36dffeccebdb..e58fe7df8b9c 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -90,7 +90,7 @@ extern unsigned compat_dir_class[];
 extern unsigned compat_chattr_class[];
 extern unsigned compat_signal_class[];
 
-extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall);
+extern int audit_classify_compat_syscall(int abi, unsigned syscall);
 
 /* audit_names->type values */
 #define	AUDIT_TYPE_UNKNOWN	0	/* we don't know yet */
-- 
cgit v1.2.3


From 96a2adbc6f501996418da9f7afe39bf0e4d006a9 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 18:59:09 -0600
Subject: clocksource: Remove "weak" from clocksource_default_clock()
 declaration

kernel/time/jiffies.c provides a default clocksource_default_clock()
definition explicitly marked "weak".  arch/s390 provides its own definition
intended to override the default, but the "weak" attribute on the
declaration applied to the s390 definition as well, so the linker chose one
based on link order (see 10629d711ed7 ("PCI: Remove __weak annotation from
pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the clocksource_default_clock()
declaration so we always prefer a non-weak definition over the weak one,
independent of link order.

Fixes: f1b82746c1e9 ("clocksource: Cleanup clocksource selection")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: John Stultz <john.stultz@linaro.org>
Acked-by: Ingo Molnar <mingo@kernel.org>
CC: Daniel Lezcano <daniel.lezcano@linaro.org>
CC: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/clocksource.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h
index 653f0e2b6ca9..abcafaa20b86 100644
--- a/include/linux/clocksource.h
+++ b/include/linux/clocksource.h
@@ -287,7 +287,7 @@ extern struct clocksource* clocksource_get_next(void);
 extern void clocksource_change_rating(struct clocksource *cs, int rating);
 extern void clocksource_suspend(void);
 extern void clocksource_resume(void);
-extern struct clocksource * __init __weak clocksource_default_clock(void);
+extern struct clocksource * __init clocksource_default_clock(void);
 extern void clocksource_mark_unstable(struct clocksource *cs);
 
 extern u64
-- 
cgit v1.2.3


From 5ab03ac5aaa1f032e071f1b3dc433b7839359c03 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 18:59:41 -0600
Subject: vmcore: Remove "weak" from function declarations

For the following functions:

  elfcorehdr_alloc()
  elfcorehdr_free()
  elfcorehdr_read()
  elfcorehdr_read_notes()
  remap_oldmem_pfn_range()

fs/proc/vmcore.c provides default definitions explicitly marked "weak".
arch/s390 provides its own definitions intended to override the default
ones, but the "weak" attribute on the declarations applied to the s390
definitions as well, so the linker chose one based on link order (see
10629d711ed7 ("PCI: Remove __weak annotation from pcibios_get_phb_of_node
decl")).

Remove the "weak" attribute from the declarations so we always prefer a
non-weak definition over the weak one, independent of link order.

Fixes: be8a8d069e50 ("vmcore: introduce ELF header in new memory feature")
Fixes: 9cb218131de1 ("vmcore: introduce remap_oldmem_pfn_range()")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Vivek Goyal <vgoyal@redhat.com>
CC: Michael Holzheu <holzheu@linux.vnet.ibm.com>
---
 include/linux/crash_dump.h | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h
index 72ab536ad3de..3849fce7ecfe 100644
--- a/include/linux/crash_dump.h
+++ b/include/linux/crash_dump.h
@@ -14,14 +14,13 @@
 extern unsigned long long elfcorehdr_addr;
 extern unsigned long long elfcorehdr_size;
 
-extern int __weak elfcorehdr_alloc(unsigned long long *addr,
-				   unsigned long long *size);
-extern void __weak elfcorehdr_free(unsigned long long addr);
-extern ssize_t __weak elfcorehdr_read(char *buf, size_t count, u64 *ppos);
-extern ssize_t __weak elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
-extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma,
-					 unsigned long from, unsigned long pfn,
-					 unsigned long size, pgprot_t prot);
+extern int elfcorehdr_alloc(unsigned long long *addr, unsigned long long *size);
+extern void elfcorehdr_free(unsigned long long addr);
+extern ssize_t elfcorehdr_read(char *buf, size_t count, u64 *ppos);
+extern ssize_t elfcorehdr_read_notes(char *buf, size_t count, u64 *ppos);
+extern int remap_oldmem_pfn_range(struct vm_area_struct *vma,
+				  unsigned long from, unsigned long pfn,
+				  unsigned long size, pgprot_t prot);
 
 extern ssize_t copy_oldmem_page(unsigned long, char *, size_t,
 						unsigned long, int);
-- 
cgit v1.2.3


From 107bcc6d566cb40184068d888637f9aefe6252dd Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 19:00:25 -0600
Subject: kgdb: Remove "weak" from kgdb_arch_pc() declaration

kernel/debug/debug_core.c provides a default kgdb_arch_pc() definition
explicitly marked "weak".  Several architectures provide their own
definitions intended to override the default, but the "weak" attribute on
the declaration applied to the arch definitions as well, so the linker
chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak
annotation from pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the declaration so we always prefer a
non-weak definition over the weak one, independent of link order.

Fixes: 688b744d8bc8 ("kgdb: fix signedness mixmatches, add statics, add declaration to header")
Tested-by: Vineet Gupta <vgupta@synopsys.com>	# for ARC build
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: Harvey Harrison <harvey.harrison@gmail.com>
---
 include/linux/kgdb.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 6b06d378f3df..e465bb15912d 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -283,7 +283,7 @@ struct kgdb_io {
 
 extern struct kgdb_arch		arch_kgdb_ops;
 
-extern unsigned long __weak kgdb_arch_pc(int exception, struct pt_regs *regs);
+extern unsigned long kgdb_arch_pc(int exception, struct pt_regs *regs);
 
 #ifdef CONFIG_SERIAL_KGDB_NMI
 extern int kgdb_register_nmi_console(void);
-- 
cgit v1.2.3


From e0a8400c6923a163265d52798cdd4c33f3f8ab5a Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 19:00:47 -0600
Subject: memory-hotplug: Remove "weak" from memory_block_size_bytes()
 declaration

drivers/base/memory.c provides a default memory_block_size_bytes()
definition explicitly marked "weak".  Several architectures provide their
own definitions intended to override the default, but the "weak" attribute
on the declaration applied to the arch definitions as well, so the linker
chose one based on link order (see 10629d711ed7 ("PCI: Remove __weak
annotation from pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the declaration so we always prefer a
non-weak definition over the weak one, independent of link order.

Fixes: 41f107266b19 ("drivers: base: Add prototype declaration to the header file")
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Andrew Morton <akpm@linux-foundation.org>
CC: Rashika Kheria <rashika.kheria@gmail.com>
CC: Nathan Fontenot <nfont@austin.ibm.com>
CC: Anton Blanchard <anton@au1.ibm.com>
CC: Heiko Carstens <heiko.carstens@de.ibm.com>
CC: Yinghai Lu <yinghai@kernel.org>
---
 include/linux/memory.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/memory.h b/include/linux/memory.h
index bb7384e3c3d8..8b8d8d12348e 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -35,7 +35,7 @@ struct memory_block {
 };
 
 int arch_get_memory_phys_device(unsigned long start_pfn);
-unsigned long __weak memory_block_size_bytes(void);
+unsigned long memory_block_size_bytes(void);
 
 /* These states are exposed to userspace as text strings in sysfs */
 #define	MEM_ONLINE		(1<<0) /* exposed to userspace */
-- 
cgit v1.2.3


From 271a9c35158910496f6fc3a635c2ed85df6be3d9 Mon Sep 17 00:00:00 2001
From: Bjorn Helgaas <bhelgaas@google.com>
Date: Mon, 13 Oct 2014 19:01:03 -0600
Subject: uprobes: Remove "weak" from function declarations

For the following interfaces:

  set_swbp()
  set_orig_insn()
  is_swbp_insn()
  is_trap_insn()
  uprobe_get_swbp_addr()
  arch_uprobe_ignore()
  arch_uprobe_copy_ixol()

kernel/events/uprobes.c provides default definitions explicitly marked
"weak".  Some architectures provide their own definitions intended to
override the defaults, but the "weak" attribute on the declarations applied
to the arch definitions as well, so the linker chose one based on link
order (see 10629d711ed7 ("PCI: Remove __weak annotation from
pcibios_get_phb_of_node decl")).

Remove the "weak" attribute from the declarations so we always prefer a
non-weak definition over the weak one, independent of link order.

Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Ingo Molnar <mingo@kernel.org>
Acked-by: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
CC: Victor Kamensky <victor.kamensky@linaro.org>
CC: Oleg Nesterov <oleg@redhat.com>
CC: David A. Long <dave.long@linaro.org>
CC: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
---
 include/linux/uprobes.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
index 4f844c6b03ee..60beb5dc7977 100644
--- a/include/linux/uprobes.h
+++ b/include/linux/uprobes.h
@@ -98,11 +98,11 @@ struct uprobes_state {
 	struct xol_area		*xol_area;
 };
 
-extern int __weak set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
-extern int __weak set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
-extern bool __weak is_swbp_insn(uprobe_opcode_t *insn);
-extern bool __weak is_trap_insn(uprobe_opcode_t *insn);
-extern unsigned long __weak uprobe_get_swbp_addr(struct pt_regs *regs);
+extern int set_swbp(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern int set_orig_insn(struct arch_uprobe *aup, struct mm_struct *mm, unsigned long vaddr);
+extern bool is_swbp_insn(uprobe_opcode_t *insn);
+extern bool is_trap_insn(uprobe_opcode_t *insn);
+extern unsigned long uprobe_get_swbp_addr(struct pt_regs *regs);
 extern unsigned long uprobe_get_trap_addr(struct pt_regs *regs);
 extern int uprobe_write_opcode(struct mm_struct *mm, unsigned long vaddr, uprobe_opcode_t);
 extern int uprobe_register(struct inode *inode, loff_t offset, struct uprobe_consumer *uc);
@@ -128,8 +128,8 @@ extern bool arch_uprobe_xol_was_trapped(struct task_struct *tsk);
 extern int  arch_uprobe_exception_notify(struct notifier_block *self, unsigned long val, void *data);
 extern void arch_uprobe_abort_xol(struct arch_uprobe *aup, struct pt_regs *regs);
 extern unsigned long arch_uretprobe_hijack_return_addr(unsigned long trampoline_vaddr, struct pt_regs *regs);
-extern bool __weak arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
-extern void __weak arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
+extern bool arch_uprobe_ignore(struct arch_uprobe *aup, struct pt_regs *regs);
+extern void arch_uprobe_copy_ixol(struct page *page, unsigned long vaddr,
 					 void *src, unsigned long len);
 #else /* !CONFIG_UPROBES */
 struct uprobes_state {
-- 
cgit v1.2.3


From 4aa7c6346be395bdf776f82bbb2e3e2bc60bdd2b Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:35 +0200
Subject: vfs: add i_op->dentry_open()

Add a new inode operation i_op->dentry_open().  This is for stacked filesystems
that want to return a struct file from a different filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 Documentation/filesystems/Locking |  2 ++
 Documentation/filesystems/vfs.txt |  7 +++++++
 fs/namei.c                        |  9 ++++++---
 fs/open.c                         | 23 +++++++++++++++++++++--
 include/linux/fs.h                |  4 ++++
 5 files changed, 40 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 94d93b1f8b53..b30753cbf431 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -67,6 +67,7 @@ prototypes:
 				struct file *, unsigned open_flag,
 				umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+	int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
 
 locking rules:
 	all may block
@@ -96,6 +97,7 @@ fiemap:		no
 update_time:	no
 atomic_open:	yes
 tmpfile:	no
+dentry_open:	no
 
 	Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
 victim.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index fceff7c00a3c..20bf204426ca 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,7 @@ struct inode_operations {
 	int (*atomic_open)(struct inode *, struct dentry *, struct file *,
 			unsigned open_flag, umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
+	int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
 };
 
 Again, all methods are called without any locks being held, unless
@@ -696,6 +697,12 @@ struct address_space_operations {
   	but instead uses bmap to find out where the blocks in the file
   	are and uses those addresses directly.
 
+  dentry_open: *WARNING: probably going away soon, do not use!* This is an
+	alternative to f_op->open(), the difference is that this method may open
+	a file not necessarily originating from the same filesystem as the one
+	i_op->open() was called on.  It may be useful for stacking filesystems
+	which want to allow native I/O directly on underlying files.
+
 
   invalidatepage: If a page has PagePrivate set, then invalidatepage
         will be called when part or all of the page is to be removed
diff --git a/fs/namei.c b/fs/namei.c
index 43927d14db67..75306b3c9526 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -3064,9 +3064,12 @@ finish_open_created:
 	error = may_open(&nd->path, acc_mode, open_flag);
 	if (error)
 		goto out;
-	file->f_path.mnt = nd->path.mnt;
-	error = finish_open(file, nd->path.dentry, NULL, opened);
-	if (error) {
+
+	BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
+	error = vfs_open(&nd->path, file, current_cred());
+	if (!error) {
+		*opened |= FILE_OPENED;
+	} else {
 		if (error == -EOPENSTALE)
 			goto stale_open;
 		goto out;
diff --git a/fs/open.c b/fs/open.c
index d6fd3acde134..de92c13b58be 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -823,8 +823,7 @@ struct file *dentry_open(const struct path *path, int flags,
 	f = get_empty_filp();
 	if (!IS_ERR(f)) {
 		f->f_flags = flags;
-		f->f_path = *path;
-		error = do_dentry_open(f, NULL, cred);
+		error = vfs_open(path, f, cred);
 		if (!error) {
 			/* from now on we need fput() to dispose of f */
 			error = open_check_o_direct(f);
@@ -841,6 +840,26 @@ struct file *dentry_open(const struct path *path, int flags,
 }
 EXPORT_SYMBOL(dentry_open);
 
+/**
+ * vfs_open - open the file at the given path
+ * @path: path to open
+ * @filp: newly allocated file with f_flag initialized
+ * @cred: credentials to use
+ */
+int vfs_open(const struct path *path, struct file *filp,
+	     const struct cred *cred)
+{
+	struct inode *inode = path->dentry->d_inode;
+
+	if (inode->i_op->dentry_open)
+		return inode->i_op->dentry_open(path->dentry, filp, cred);
+	else {
+		filp->f_path = *path;
+		return do_dentry_open(filp, NULL, cred);
+	}
+}
+EXPORT_SYMBOL(vfs_open);
+
 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
 {
 	int lookup_flags = 0;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a957d4366c24..5cf7f6759679 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1528,6 +1528,9 @@ struct inode_operations {
 			   umode_t create_mode, int *opened);
 	int (*tmpfile) (struct inode *, struct dentry *, umode_t);
 	int (*set_acl)(struct inode *, struct posix_acl *, int);
+
+	/* WARNING: probably going away soon, do not use! */
+	int (*dentry_open)(struct dentry *, struct file *, const struct cred *);
 } ____cacheline_aligned;
 
 ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
@@ -2040,6 +2043,7 @@ extern struct file *file_open_name(struct filename *, int, umode_t);
 extern struct file *filp_open(const char *, int, umode_t);
 extern struct file *file_open_root(struct dentry *, struct vfsmount *,
 				   const char *, int);
+extern int vfs_open(const struct path *, struct file *, const struct cred *);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 
-- 
cgit v1.2.3


From 1c118596a7682912106c80007102ce0184c77780 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:35 +0200
Subject: vfs: export do_splice_direct() to modules

Export do_splice_direct() to modules.  Needed by overlay filesystem.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/internal.h      | 6 ------
 fs/splice.c        | 1 +
 include/linux/fs.h | 3 +++
 3 files changed, 4 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index 9477f8f6aefc..0f0626a6997c 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -138,12 +138,6 @@ extern long prune_dcache_sb(struct super_block *sb, unsigned long nr_to_scan,
  */
 extern int rw_verify_area(int, struct file *, const loff_t *, size_t);
 
-/*
- * splice.c
- */
-extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
-		loff_t *opos, size_t len, unsigned int flags);
-
 /*
  * pipe.c
  */
diff --git a/fs/splice.c b/fs/splice.c
index f5cb9ba84510..75c6058eabf2 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1330,6 +1330,7 @@ long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
 
 	return ret;
 }
+EXPORT_SYMBOL(do_splice_direct);
 
 static int splice_pipe_to_pipe(struct pipe_inode_info *ipipe,
 			       struct pipe_inode_info *opipe,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5cf7f6759679..10ed65b2c31d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2456,6 +2456,9 @@ extern ssize_t iter_file_splice_write(struct pipe_inode_info *,
 		struct file *, loff_t *, size_t, unsigned int);
 extern ssize_t generic_splice_sendpage(struct pipe_inode_info *pipe,
 		struct file *out, loff_t *, size_t len, unsigned int flags);
+extern long do_splice_direct(struct file *in, loff_t *ppos, struct file *out,
+		loff_t *opos, size_t len, unsigned int flags);
+
 
 extern void
 file_ra_state_init(struct file_ra_state *ra, struct address_space *mapping);
-- 
cgit v1.2.3


From bd5d08569cc379f8366663a61558a9ce17c2e460 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:35 +0200
Subject: vfs: export __inode_permission() to modules

We need to be able to check inode permissions (but not filesystem implied
permissions) for stackable filesystems.  Expose this interface for overlayfs.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/internal.h      | 1 -
 fs/namei.c         | 1 +
 include/linux/fs.h | 1 +
 3 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/internal.h b/fs/internal.h
index 0f0626a6997c..757ba2abf21e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -47,7 +47,6 @@ extern void __init chrdev_init(void);
 /*
  * namei.c
  */
-extern int __inode_permission(struct inode *, int);
 extern int user_path_mountpoint_at(int, const char __user *, unsigned int, struct path *);
 extern int vfs_path_lookup(struct dentry *, struct vfsmount *,
 			   const char *, unsigned int, struct path *);
diff --git a/fs/namei.c b/fs/namei.c
index 75306b3c9526..d944f6db9b07 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -416,6 +416,7 @@ int __inode_permission(struct inode *inode, int mask)
 
 	return security_inode_permission(inode, mask);
 }
+EXPORT_SYMBOL(__inode_permission);
 
 /**
  * sb_permission - Check superblock-level permissions
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 10ed65b2c31d..5419df70a835 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2257,6 +2257,7 @@ extern sector_t bmap(struct inode *, sector_t);
 #endif
 extern int notify_change(struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
+extern int __inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int);
 
 static inline bool execute_ok(struct inode *inode)
-- 
cgit v1.2.3


From c771d683a62e5d36bc46036f5c07f4f5bb7dda61 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:36 +0200
Subject: vfs: introduce clone_private_mount()

Overlayfs needs a private clone of the mount, so create a function for
this and export to modules.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namespace.c        | 27 +++++++++++++++++++++++++++
 include/linux/mount.h |  3 +++
 2 files changed, 30 insertions(+)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index fbba8b17330d..5b66b2b3624d 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1686,6 +1686,33 @@ void drop_collected_mounts(struct vfsmount *mnt)
 	namespace_unlock();
 }
 
+/**
+ * clone_private_mount - create a private clone of a path
+ *
+ * This creates a new vfsmount, which will be the clone of @path.  The new will
+ * not be attached anywhere in the namespace and will be private (i.e. changes
+ * to the originating mount won't be propagated into this).
+ *
+ * Release with mntput().
+ */
+struct vfsmount *clone_private_mount(struct path *path)
+{
+	struct mount *old_mnt = real_mount(path->mnt);
+	struct mount *new_mnt;
+
+	if (IS_MNT_UNBINDABLE(old_mnt))
+		return ERR_PTR(-EINVAL);
+
+	down_read(&namespace_sem);
+	new_mnt = clone_mnt(old_mnt, path->dentry, CL_PRIVATE);
+	up_read(&namespace_sem);
+	if (IS_ERR(new_mnt))
+		return ERR_CAST(new_mnt);
+
+	return &new_mnt->mnt;
+}
+EXPORT_SYMBOL_GPL(clone_private_mount);
+
 int iterate_mounts(int (*f)(struct vfsmount *, void *), void *arg,
 		   struct vfsmount *root)
 {
diff --git a/include/linux/mount.h b/include/linux/mount.h
index 9262e4bf0cc3..c2c561dc0114 100644
--- a/include/linux/mount.h
+++ b/include/linux/mount.h
@@ -81,6 +81,9 @@ extern struct vfsmount *mntget(struct vfsmount *mnt);
 extern struct vfsmount *mnt_clone_internal(struct path *path);
 extern int __mnt_is_readonly(struct vfsmount *mnt);
 
+struct path;
+extern struct vfsmount *clone_private_mount(struct path *path);
+
 struct file_system_type;
 extern struct vfsmount *vfs_kern_mount(struct file_system_type *type,
 				      int flags, const char *name,
-- 
cgit v1.2.3


From cbdf35bcb833bfd00f0925d7a9a33a21f41ea582 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:36 +0200
Subject: vfs: export check_sticky()

It's already duplicated in btrfs and about to be used in overlayfs too.

Move the sticky bit check to an inline helper and call the out-of-line
helper only in the unlikly case of the sticky bit being set.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/btrfs/ioctl.c   | 20 +-------------------
 fs/namei.c         |  9 ++-------
 include/linux/fs.h |  9 +++++++++
 3 files changed, 12 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 8d2b76e29d3b..4399f0c3a4ce 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -765,23 +765,6 @@ out:
 	return ret;
 }
 
-/*  copy of check_sticky in fs/namei.c()
-* It's inline, so penalty for filesystems that don't use sticky bit is
-* minimal.
-*/
-static inline int btrfs_check_sticky(struct inode *dir, struct inode *inode)
-{
-	kuid_t fsuid = current_fsuid();
-
-	if (!(dir->i_mode & S_ISVTX))
-		return 0;
-	if (uid_eq(inode->i_uid, fsuid))
-		return 0;
-	if (uid_eq(dir->i_uid, fsuid))
-		return 0;
-	return !capable(CAP_FOWNER);
-}
-
 /*  copy of may_delete in fs/namei.c()
  *	Check whether we can remove a link victim from directory dir, check
  *  whether the type of victim is right.
@@ -817,8 +800,7 @@ static int btrfs_may_delete(struct inode *dir, struct dentry *victim, int isdir)
 		return error;
 	if (IS_APPEND(dir))
 		return -EPERM;
-	if (btrfs_check_sticky(dir, victim->d_inode)||
-		IS_APPEND(victim->d_inode)||
+	if (check_sticky(dir, victim->d_inode) || IS_APPEND(victim->d_inode) ||
 	    IS_IMMUTABLE(victim->d_inode) || IS_SWAPFILE(victim->d_inode))
 		return -EPERM;
 	if (isdir) {
diff --git a/fs/namei.c b/fs/namei.c
index d944f6db9b07..77fd536106cb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2384,22 +2384,17 @@ kern_path_mountpoint(int dfd, const char *name, struct path *path,
 }
 EXPORT_SYMBOL(kern_path_mountpoint);
 
-/*
- * It's inline, so penalty for filesystems that don't use sticky bit is
- * minimal.
- */
-static inline int check_sticky(struct inode *dir, struct inode *inode)
+int __check_sticky(struct inode *dir, struct inode *inode)
 {
 	kuid_t fsuid = current_fsuid();
 
-	if (!(dir->i_mode & S_ISVTX))
-		return 0;
 	if (uid_eq(inode->i_uid, fsuid))
 		return 0;
 	if (uid_eq(dir->i_uid, fsuid))
 		return 0;
 	return !capable_wrt_inode_uidgid(inode, CAP_FOWNER);
 }
+EXPORT_SYMBOL(__check_sticky);
 
 /*
  *	Check whether we can remove a link victim from directory dir, check
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 5419df70a835..55cc0a319baa 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2259,6 +2259,7 @@ extern int notify_change(struct dentry *, struct iattr *, struct inode **);
 extern int inode_permission(struct inode *, int);
 extern int __inode_permission(struct inode *, int);
 extern int generic_permission(struct inode *, int);
+extern int __check_sticky(struct inode *dir, struct inode *inode);
 
 static inline bool execute_ok(struct inode *inode)
 {
@@ -2745,6 +2746,14 @@ static inline int is_sxid(umode_t mode)
 	return (mode & S_ISUID) || ((mode & S_ISGID) && (mode & S_IXGRP));
 }
 
+static inline int check_sticky(struct inode *dir, struct inode *inode)
+{
+	if (!(dir->i_mode & S_ISVTX))
+		return 0;
+
+	return __check_sticky(dir, inode);
+}
+
 static inline void inode_has_no_xattr(struct inode *inode)
 {
 	if (!is_sxid(inode->i_mode) && (inode->i_sb->s_flags & MS_NOSEC))
-- 
cgit v1.2.3


From 787fb6bc9682ec7c05fb5d9561b57100fbc1cc41 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:36 +0200
Subject: vfs: add whiteout support

Whiteout isn't actually a new file type, but is represented as a char
device (Linus's idea) with 0/0 device number.

This has several advantages compared to introducing a new whiteout file
type:

 - no userspace API changes (e.g. trivial to make backups of upper layer
   filesystem, without losing whiteouts)

 - no fs image format changes (you can boot an old kernel/fsck without
   whiteout support and things won't break)

 - implementation is trivial

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/namei.c         | 14 ++++++++++++++
 include/linux/fs.h | 11 +++++++++++
 2 files changed, 25 insertions(+)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 77fd536106cb..d20191c0ebf5 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4346,6 +4346,20 @@ SYSCALL_DEFINE2(rename, const char __user *, oldname, const char __user *, newna
 	return sys_renameat2(AT_FDCWD, oldname, AT_FDCWD, newname, 0);
 }
 
+int vfs_whiteout(struct inode *dir, struct dentry *dentry)
+{
+	int error = may_create(dir, dentry);
+	if (error)
+		return error;
+
+	if (!dir->i_op->mknod)
+		return -EPERM;
+
+	return dir->i_op->mknod(dir, dentry,
+				S_IFCHR | WHITEOUT_MODE, WHITEOUT_DEV);
+}
+EXPORT_SYMBOL(vfs_whiteout);
+
 int readlink_copy(char __user *buffer, int buflen, const char *link)
 {
 	int len = PTR_ERR(link);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 55cc0a319baa..69118b3cb917 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -222,6 +222,13 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define ATTR_OPEN	(1 << 15) /* Truncating from open(O_TRUNC) */
 #define ATTR_TIMES_SET	(1 << 16)
 
+/*
+ * Whiteout is represented by a char device.  The following constants define the
+ * mode and device number to use.
+ */
+#define WHITEOUT_MODE 0
+#define WHITEOUT_DEV 0
+
 /*
  * This is the Inode Attributes structure, used for notify_change().  It
  * uses the above definitions as flags, to know which values have changed.
@@ -1398,6 +1405,7 @@ extern int vfs_link(struct dentry *, struct inode *, struct dentry *, struct ino
 extern int vfs_rmdir(struct inode *, struct dentry *);
 extern int vfs_unlink(struct inode *, struct dentry *, struct inode **);
 extern int vfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *, struct inode **, unsigned int);
+extern int vfs_whiteout(struct inode *, struct dentry *);
 
 /*
  * VFS dentry helper functions.
@@ -1628,6 +1636,9 @@ struct super_operations {
 #define IS_AUTOMOUNT(inode)	((inode)->i_flags & S_AUTOMOUNT)
 #define IS_NOSEC(inode)		((inode)->i_flags & S_NOSEC)
 
+#define IS_WHITEOUT(inode)	(S_ISCHR(inode->i_mode) && \
+				 (inode)->i_rdev == WHITEOUT_DEV)
+
 /*
  * Inode state bits.  Protected by inode->i_lock
  *
-- 
cgit v1.2.3


From 69c433ed2ecd2d3264efd7afec4439524b319121 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <mszeredi@suse.cz>
Date: Fri, 24 Oct 2014 00:14:39 +0200
Subject: fs: limit filesystem stacking depth

Add a simple read-only counter to super_block that indicates how deep this
is in the stack of filesystems.  Previously ecryptfs was the only stackable
filesystem and it explicitly disallowed multiple layers of itself.

Overlayfs, however, can be stacked recursively and also may be stacked
on top of ecryptfs or vice versa.

To limit the kernel stack usage we must limit the depth of the
filesystem stack.  Initially the limit is set to 2.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
---
 fs/ecryptfs/main.c   |  7 +++++++
 fs/overlayfs/super.c |  9 +++++++++
 include/linux/fs.h   | 11 +++++++++++
 3 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index 1b119d3bf924..c4cd1fd86cc2 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -566,6 +566,13 @@ static struct dentry *ecryptfs_mount(struct file_system_type *fs_type, int flags
 	s->s_maxbytes = path.dentry->d_sb->s_maxbytes;
 	s->s_blocksize = path.dentry->d_sb->s_blocksize;
 	s->s_magic = ECRYPTFS_SUPER_MAGIC;
+	s->s_stack_depth = path.dentry->d_sb->s_stack_depth + 1;
+
+	rc = -EINVAL;
+	if (s->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("eCryptfs: maximum fs stacking depth exceeded\n");
+		goto out_free;
+	}
 
 	inode = ecryptfs_get_inode(path.dentry->d_inode, s);
 	rc = PTR_ERR(inode);
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 7dcc24e84417..08b704cebfc4 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -677,6 +677,15 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
 	}
 	ufs->lower_namelen = statfs.f_namelen;
 
+	sb->s_stack_depth = max(upperpath.mnt->mnt_sb->s_stack_depth,
+				lowerpath.mnt->mnt_sb->s_stack_depth) + 1;
+
+	err = -EINVAL;
+	if (sb->s_stack_depth > FILESYSTEM_MAX_STACK_DEPTH) {
+		pr_err("overlayfs: maximum fs stacking depth exceeded\n");
+		goto out_put_workpath;
+	}
+
 	ufs->upper_mnt = clone_private_mount(&upperpath);
 	err = PTR_ERR(ufs->upper_mnt);
 	if (IS_ERR(ufs->upper_mnt)) {
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 69118b3cb917..4e41a4a331bb 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -261,6 +261,12 @@ struct iattr {
  */
 #include <linux/quota.h>
 
+/*
+ * Maximum number of layers of fs stack.  Needs to be limited to
+ * prevent kernel stack overflow
+ */
+#define FILESYSTEM_MAX_STACK_DEPTH 2
+
 /** 
  * enum positive_aop_returns - aop return codes with specific semantics
  *
@@ -1273,6 +1279,11 @@ struct super_block {
 	struct list_lru		s_dentry_lru ____cacheline_aligned_in_smp;
 	struct list_lru		s_inode_lru ____cacheline_aligned_in_smp;
 	struct rcu_head		rcu;
+
+	/*
+	 * Indicates how deep in a filesystem stack this SB is
+	 */
+	int s_stack_depth;
 };
 
 extern struct timespec current_fs_time(struct super_block *sb);
-- 
cgit v1.2.3


From 571ee1b6859869a09ed718d390aac2b9414646a2 Mon Sep 17 00:00:00 2001
From: Wanpeng Li <wanpeng.li@linux.intel.com>
Date: Thu, 9 Oct 2014 18:30:08 +0800
Subject: kvm: vfio: fix unregister kvm_device_ops of vfio

After commit 80ce163 (KVM: VFIO: register kvm_device_ops dynamically),
kvm_device_ops of vfio can be registered dynamically. Commit 3c3c29fd
(kvm-vfio: do not use module_init) move the dynamic register invoked by
kvm_init in order to fix broke unloading of the kvm module. However,
kvm_device_ops of vfio is unregistered after rmmod kvm-intel module
which lead to device type collision detection warning after kvm-intel
module reinsmod.

    WARNING: CPU: 1 PID: 10358 at /root/cathy/kvm/arch/x86/kvm/../../../virt/kvm/kvm_main.c:3289 kvm_init+0x234/0x282 [kvm]()
    Modules linked in: kvm_intel(O+) kvm(O) nfsv3 nfs_acl auth_rpcgss oid_registry nfsv4 dns_resolver nfs fscache lockd sunrpc pci_stub bridge stp llc autofs4 8021q cpufreq_ondemand ipv6 joydev microcode pcspkr igb i2c_algo_bit ehci_pci ehci_hcd e1000e i2c_i801 ixgbe ptp pps_core hwmon mdio tpm_tis tpm ipmi_si ipmi_msghandler acpi_cpufreq isci libsas scsi_transport_sas button dm_mirror dm_region_hash dm_log dm_mod [last unloaded: kvm_intel]
    CPU: 1 PID: 10358 Comm: insmod Tainted: G        W  O   3.17.0-rc1 #2
    Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013
     0000000000000cd9 ffff880ff08cfd18 ffffffff814a61d9 0000000000000cd9
     0000000000000000 ffff880ff08cfd58 ffffffff810417b7 ffff880ff08cfd48
     ffffffffa045bcac ffffffffa049c420 0000000000000040 00000000000000ff
    Call Trace:
     [<ffffffff814a61d9>] dump_stack+0x49/0x60
     [<ffffffff810417b7>] warn_slowpath_common+0x7c/0x96
     [<ffffffffa045bcac>] ? kvm_init+0x234/0x282 [kvm]
     [<ffffffff810417e6>] warn_slowpath_null+0x15/0x17
     [<ffffffffa045bcac>] kvm_init+0x234/0x282 [kvm]
     [<ffffffffa016e995>] vmx_init+0x1bf/0x42a [kvm_intel]
     [<ffffffffa016e7d6>] ? vmx_check_processor_compat+0x64/0x64 [kvm_intel]
     [<ffffffff810002ab>] do_one_initcall+0xe3/0x170
     [<ffffffff811168a9>] ? __vunmap+0xad/0xb8
     [<ffffffff8109c58f>] do_init_module+0x2b/0x174
     [<ffffffff8109d414>] load_module+0x43e/0x569
     [<ffffffff8109c6d8>] ? do_init_module+0x174/0x174
     [<ffffffff8109c75a>] ? copy_module_from_user+0x39/0x82
     [<ffffffff8109b7dd>] ? module_sect_show+0x20/0x20
     [<ffffffff8109d65f>] SyS_init_module+0x54/0x81
     [<ffffffff814a9a12>] system_call_fastpath+0x16/0x1b
    ---[ end trace 0626f4a3ddea56f3 ]---

The bug can be reproduced by:

    rmmod kvm_intel.ko
    insmod kvm_intel.ko

without rmmod/insmod kvm.ko
This patch fixes the bug by unregistering kvm_device_ops of vfio when the
kvm-intel module is removed.

Reported-by: Liu Rongrong <rongrongx.liu@intel.com>
Fixes: 3c3c29fd0d7cddc32862c350d0700ce69953e3bd
Signed-off-by: Wanpeng Li <wanpeng.li@linux.intel.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 1 +
 virt/kvm/kvm_main.c      | 7 +++++++
 virt/kvm/vfio.c          | 5 +++++
 virt/kvm/vfio.h          | 4 ++++
 4 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 28be31f49250..ea53b04993f2 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -1080,6 +1080,7 @@ void kvm_device_get(struct kvm_device *dev);
 void kvm_device_put(struct kvm_device *dev);
 struct kvm_device *kvm_device_from_filp(struct file *filp);
 int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type);
+void kvm_unregister_device_ops(u32 type);
 
 extern struct kvm_device_ops kvm_mpic_ops;
 extern struct kvm_device_ops kvm_xics_ops;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 384eaa7b02fa..25ffac9e947d 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2354,6 +2354,12 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type)
 	return 0;
 }
 
+void kvm_unregister_device_ops(u32 type)
+{
+	if (kvm_device_ops_table[type] != NULL)
+		kvm_device_ops_table[type] = NULL;
+}
+
 static int kvm_ioctl_create_device(struct kvm *kvm,
 				   struct kvm_create_device *cd)
 {
@@ -3328,5 +3334,6 @@ void kvm_exit(void)
 	kvm_arch_exit();
 	kvm_irqfd_exit();
 	free_cpumask_var(cpus_hardware_enabled);
+	kvm_vfio_ops_exit();
 }
 EXPORT_SYMBOL_GPL(kvm_exit);
diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c
index 281e7cf2b8e5..620e37f741b8 100644
--- a/virt/kvm/vfio.c
+++ b/virt/kvm/vfio.c
@@ -283,3 +283,8 @@ int kvm_vfio_ops_init(void)
 {
 	return kvm_register_device_ops(&kvm_vfio_ops, KVM_DEV_TYPE_VFIO);
 }
+
+void kvm_vfio_ops_exit(void)
+{
+	kvm_unregister_device_ops(KVM_DEV_TYPE_VFIO);
+}
diff --git a/virt/kvm/vfio.h b/virt/kvm/vfio.h
index 92eac75d6b62..ab88c7dc0514 100644
--- a/virt/kvm/vfio.h
+++ b/virt/kvm/vfio.h
@@ -3,11 +3,15 @@
 
 #ifdef CONFIG_KVM_VFIO
 int kvm_vfio_ops_init(void);
+void kvm_vfio_ops_exit(void);
 #else
 static inline int kvm_vfio_ops_init(void)
 {
 	return 0;
 }
+static inline void kvm_vfio_ops_exit(void)
+{
+}
 #endif
 
 #endif
-- 
cgit v1.2.3


From 6b358aedced8180830727258718c3916bef3e249 Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Wed, 22 Oct 2014 20:26:44 +0200
Subject: phy: marvell: Add support for 88E3016 FastEthernet PHY
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Marvell 88E3016 is a FastEthernet PHY that also can be found in Marvell
Berlin SoCs as integrated PHY.

Tested-by: Antoine Ténart <antoine.tenart@free-electrons.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c   | 46 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/marvell_phy.h |  1 +
 2 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index bd37e45c89c0..d2b2f2f795d5 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -118,6 +118,9 @@
 
 #define MII_M1116R_CONTROL_REG_MAC	21
 
+#define MII_88E3016_PHY_SPEC_CTRL	0x10
+#define MII_88E3016_DISABLE_SCRAMBLER	0x0200
+#define MII_88E3016_AUTO_MDIX_CROSSOVER	0x0030
 
 MODULE_DESCRIPTION("Marvell PHY driver");
 MODULE_AUTHOR("Andy Fleming");
@@ -434,6 +437,25 @@ static int m88e1116r_config_init(struct phy_device *phydev)
 	return 0;
 }
 
+static int m88e3016_config_init(struct phy_device *phydev)
+{
+	int reg;
+
+	/* Enable Scrambler and Auto-Crossover */
+	reg = phy_read(phydev, MII_88E3016_PHY_SPEC_CTRL);
+	if (reg < 0)
+		return reg;
+
+	reg &= ~MII_88E3016_DISABLE_SCRAMBLER;
+	reg |= MII_88E3016_AUTO_MDIX_CROSSOVER;
+
+	reg = phy_write(phydev, MII_88E3016_PHY_SPEC_CTRL, reg);
+	if (reg < 0)
+		return reg;
+
+	return 0;
+}
+
 static int m88e1111_config_init(struct phy_device *phydev)
 {
 	int err;
@@ -770,6 +792,12 @@ static int marvell_read_status(struct phy_device *phydev)
 	return 0;
 }
 
+static int marvell_aneg_done(struct phy_device *phydev)
+{
+	int retval = phy_read(phydev, MII_M1011_PHY_STATUS);
+	return (retval < 0) ? retval : (retval & MII_M1011_PHY_STATUS_RESOLVED);
+}
+
 static int m88e1121_did_interrupt(struct phy_device *phydev)
 {
 	int imask;
@@ -1050,6 +1078,23 @@ static struct phy_driver marvell_drivers[] = {
 		.suspend = &genphy_suspend,
 		.driver = { .owner = THIS_MODULE },
 	},
+	{
+		.phy_id = MARVELL_PHY_ID_88E3016,
+		.phy_id_mask = MARVELL_PHY_ID_MASK,
+		.name = "Marvell 88E3016",
+		.features = PHY_BASIC_FEATURES,
+		.flags = PHY_HAS_INTERRUPT,
+		.config_aneg = &genphy_config_aneg,
+		.config_init = &m88e3016_config_init,
+		.aneg_done = &marvell_aneg_done,
+		.read_status = &marvell_read_status,
+		.ack_interrupt = &marvell_ack_interrupt,
+		.config_intr = &marvell_config_intr,
+		.did_interrupt = &m88e1121_did_interrupt,
+		.resume = &genphy_resume,
+		.suspend = &genphy_suspend,
+		.driver = { .owner = THIS_MODULE },
+	},
 };
 
 static int __init marvell_init(void)
@@ -1079,6 +1124,7 @@ static struct mdio_device_id __maybe_unused marvell_tbl[] = {
 	{ MARVELL_PHY_ID_88E1318S, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1116R, MARVELL_PHY_ID_MASK },
 	{ MARVELL_PHY_ID_88E1510, MARVELL_PHY_ID_MASK },
+	{ MARVELL_PHY_ID_88E3016, MARVELL_PHY_ID_MASK },
 	{ }
 };
 
diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h
index 8e9a029e093d..e6982ac3200d 100644
--- a/include/linux/marvell_phy.h
+++ b/include/linux/marvell_phy.h
@@ -16,6 +16,7 @@
 #define MARVELL_PHY_ID_88E1318S		0x01410e90
 #define MARVELL_PHY_ID_88E1116R		0x01410e40
 #define MARVELL_PHY_ID_88E1510		0x01410dd0
+#define MARVELL_PHY_ID_88E3016		0x01410e60
 
 /* struct phy_device dev_flags definitions */
 #define MARVELL_PHY_M1145_FLAGS_RESISTANCE	0x00000001
-- 
cgit v1.2.3


From e7de17abeda24d8acc316b2e07bd969d03099eea Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Wed, 22 Oct 2014 20:26:45 +0200
Subject: net: pxa168_eth: Provide phy_interface mode on platform_data
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The PXA168 Ethernet IP support MII and RMII connection to its PHY.
Currently, pxa168 platform_data does not provide a way to pass that
and there is one user of pxa168 platform_data (mach-mmp/gplug).
Given the pinctrl settings of gplug it uses RMII, so add and pass
a corresponding phy_interface_t.

Tested-by: Antoine Ténart <antoine.tenart@free-electrons.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/mach-mmp/gplugd.c | 2 ++
 include/linux/pxa168_eth.h | 1 +
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mmp/gplugd.c b/arch/arm/mach-mmp/gplugd.c
index d81b2475e67e..3b5794cd0357 100644
--- a/arch/arm/mach-mmp/gplugd.c
+++ b/arch/arm/mach-mmp/gplugd.c
@@ -12,6 +12,7 @@
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/gpio-pxa.h>
+#include <linux/phy.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
@@ -158,6 +159,7 @@ struct pxa168_eth_platform_data gplugd_eth_platform_data = {
 	.port_number = 0,
 	.phy_addr    = 0,
 	.speed       = 0, /* Autonagotiation */
+	.intf        = PHY_INTERFACE_MODE_RMII,
 	.init        = gplugd_eth_init,
 };
 
diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h
index 18d75e795606..37c381120bc8 100644
--- a/include/linux/pxa168_eth.h
+++ b/include/linux/pxa168_eth.h
@@ -13,6 +13,7 @@ struct pxa168_eth_platform_data {
 	 */
 	int	speed;		/* 0, SPEED_10, SPEED_100 */
 	int	duplex;		/* DUPLEX_HALF or DUPLEX_FULL */
+	phy_interface_t intf;
 
 	/*
 	 * Override default RX/TX queue sizes if nonzero.
-- 
cgit v1.2.3


From 70b946f9acf4e805361bd877a7e25cc05e497c52 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Fri, 24 Oct 2014 21:56:58 +0100
Subject: regulator: Return an error from stubbed regulator_get_exclusive()

The user hasn't got a regulator and shouldn't be mislead into thinking
they have one; really we should probably remove this stub entirely (and
may well before the next merge window).

Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/consumer.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index d347c805f923..c0c0a437ec75 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -282,7 +282,7 @@ devm_regulator_get(struct device *dev, const char *id)
 static inline struct regulator *__must_check
 regulator_get_exclusive(struct device *dev, const char *id)
 {
-	return NULL;
+	return ERR_PTR(-ENODEV);
 }
 
 static inline struct regulator *__must_check
-- 
cgit v1.2.3


From b3020f0a35fc431f7acf3fba9a5b7376d79932e5 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sat, 25 Oct 2014 05:25:07 +0200
Subject: ieee802154: mac802154: remove FSF address

This patch removes the FSF address in files which belongs to ieee802154
and mac802154.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Cc: Alan Ott <alan@signal11.us>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/at86rf230.c | 4 ----
 drivers/net/ieee802154/fakelb.c    | 4 ----
 drivers/net/ieee802154/mrf24j40.c  | 4 ----
 include/linux/nl802154.h           | 4 ----
 include/net/af_ieee802154.h        | 4 ----
 include/net/ieee802154.h           | 4 ----
 include/net/ieee802154_netdev.h    | 4 ----
 include/net/mac802154.h            | 3 ---
 include/net/nl802154.h             | 4 ----
 include/net/wpan-phy.h             | 4 ----
 net/ieee802154/af802154.h          | 4 ----
 net/ieee802154/af_ieee802154.c     | 4 ----
 net/ieee802154/dgram.c             | 4 ----
 net/ieee802154/ieee802154.h        | 4 ----
 net/ieee802154/netlink.c           | 4 ----
 net/ieee802154/nl-mac.c            | 4 ----
 net/ieee802154/nl-phy.c            | 4 ----
 net/ieee802154/nl_policy.c         | 4 ----
 net/ieee802154/raw.c               | 4 ----
 net/ieee802154/wpan-class.c        | 4 ----
 net/mac802154/ieee802154_dev.c     | 4 ----
 net/mac802154/mac802154.h          | 4 ----
 net/mac802154/mac_cmd.c            | 4 ----
 net/mac802154/mib.c                | 4 ----
 net/mac802154/monitor.c            | 4 ----
 net/mac802154/rx.c                 | 4 ----
 net/mac802154/tx.c                 | 4 ----
 net/mac802154/wpan.c               | 4 ----
 28 files changed, 111 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 6ebd665892d8..b8b1e40feeb8 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
diff --git a/drivers/net/ieee802154/fakelb.c b/drivers/net/ieee802154/fakelb.c
index 27d83207d24c..e4b1b1f66bd2 100644
--- a/drivers/net/ieee802154/fakelb.c
+++ b/drivers/net/ieee802154/fakelb.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index b1e73bc6e398..ba9a3960d26d 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -13,10 +13,6 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License
- * along with this program; if not, write to the Free Software
- * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
 #include <linux/spi/spi.h>
diff --git a/include/linux/nl802154.h b/include/linux/nl802154.h
index 20163b9a0eae..167342c2ce6b 100644
--- a/include/linux/nl802154.h
+++ b/include/linux/nl802154.h
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  */
 
 #ifndef NL802154_H
diff --git a/include/net/af_ieee802154.h b/include/net/af_ieee802154.h
index 085940f7eeec..7d38e2ffd256 100644
--- a/include/net/af_ieee802154.h
+++ b/include/net/af_ieee802154.h
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/include/net/ieee802154.h b/include/net/ieee802154.h
index 0aa7122e8f15..4db4e320b2f5 100644
--- a/include/net/ieee802154.h
+++ b/include/net/ieee802154.h
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
  * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index 3b53c8e405e4..f87420689d70 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
  * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index 2e67cdd19cdc..f95b98ec50a8 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -12,9 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 #ifndef NET_MAC802154_H
 #define NET_MAC802154_H
diff --git a/include/net/nl802154.h b/include/net/nl802154.h
index b23548e04098..b5cdea29d9d9 100644
--- a/include/net/nl802154.h
+++ b/include/net/nl802154.h
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  */
 
 #ifndef IEEE802154_NL_H
diff --git a/include/net/wpan-phy.h b/include/net/wpan-phy.h
index 10ab0fc6d4f7..65a05f19eb68 100644
--- a/include/net/wpan-phy.h
+++ b/include/net/wpan-phy.h
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  */
diff --git a/net/ieee802154/af802154.h b/net/ieee802154/af802154.h
index 8330a09bfc95..343b63e6f953 100644
--- a/net/ieee802154/af802154.h
+++ b/net/ieee802154/af802154.h
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/net/ieee802154/af_ieee802154.c b/net/ieee802154/af_ieee802154.c
index 29e0de63001b..26da1e179737 100644
--- a/net/ieee802154/af_ieee802154.c
+++ b/net/ieee802154/af_ieee802154.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index ef2ad8aaef13..71e99a0994e1 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/net/ieee802154/ieee802154.h b/net/ieee802154/ieee802154.h
index 5d352f86979e..42ae63a345ab 100644
--- a/net/ieee802154/ieee802154.h
+++ b/net/ieee802154/ieee802154.h
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  */
 #ifndef IEEE_802154_LOCAL_H
 #define IEEE_802154_LOCAL_H
diff --git a/net/ieee802154/netlink.c b/net/ieee802154/netlink.c
index 9222966f5e6d..6c3c2595a201 100644
--- a/net/ieee802154/netlink.c
+++ b/net/ieee802154/netlink.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index c6bfe22bfa5e..78a1529e8bb2 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c
index 972baf83411a..e943e20bcf09 100644
--- a/net/ieee802154/nl-phy.c
+++ b/net/ieee802154/nl-phy.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/net/ieee802154/nl_policy.c b/net/ieee802154/nl_policy.c
index 3a703ab88348..35c432668454 100644
--- a/net/ieee802154/nl_policy.c
+++ b/net/ieee802154/nl_policy.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  */
 
 #include <linux/kernel.h>
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 9d1f64806f02..3ffcf4a9de01 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/net/ieee802154/wpan-class.c b/net/ieee802154/wpan-class.c
index 4955e0fe5883..9a2dfab5648d 100644
--- a/net/ieee802154/wpan-class.c
+++ b/net/ieee802154/wpan-class.c
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  */
 
 #include <linux/slab.h>
diff --git a/net/mac802154/ieee802154_dev.c b/net/mac802154/ieee802154_dev.c
index b36b2b996578..6af6a2427aaf 100644
--- a/net/mac802154/ieee802154_dev.c
+++ b/net/mac802154/ieee802154_dev.c
@@ -14,10 +14,6 @@
  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
- *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  */
 
 #include <linux/kernel.h>
diff --git a/net/mac802154/mac802154.h b/net/mac802154/mac802154.h
index 762a6f849c6b..e3503c1bc04e 100644
--- a/net/mac802154/mac802154.h
+++ b/net/mac802154/mac802154.h
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
  * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index bf809131eef7..85f70edbe8dd 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -12,10 +12,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Sergey Lapin <slapin@ossfans.org>
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
diff --git a/net/mac802154/mib.c b/net/mac802154/mib.c
index 868a040fd422..d7e5df872be7 100644
--- a/net/mac802154/mib.c
+++ b/net/mac802154/mib.c
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  * Sergey Lapin <slapin@ossfans.org>
diff --git a/net/mac802154/monitor.c b/net/mac802154/monitor.c
index a68230e2b25f..81249bb7c935 100644
--- a/net/mac802154/monitor.c
+++ b/net/mac802154/monitor.c
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  * Sergey Lapin <slapin@ossfans.org>
diff --git a/net/mac802154/rx.c b/net/mac802154/rx.c
index a14cf9ede171..e99d9394d13a 100644
--- a/net/mac802154/rx.c
+++ b/net/mac802154/rx.c
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
  * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index fdf4c0e67259..95ea412395c7 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  * Sergey Lapin <slapin@ossfans.org>
diff --git a/net/mac802154/wpan.c b/net/mac802154/wpan.c
index 4ab86a57dca5..b11a98d824e0 100644
--- a/net/mac802154/wpan.c
+++ b/net/mac802154/wpan.c
@@ -10,10 +10,6 @@
  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  * GNU General Public License for more details.
  *
- * You should have received a copy of the GNU General Public License along
- * with this program; if not, write to the Free Software Foundation, Inc.,
- * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
- *
  * Written by:
  * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
  * Sergey Lapin <slapin@ossfans.org>
-- 
cgit v1.2.3


From acd8256723f286b7217801fbed24503f8565b91e Mon Sep 17 00:00:00 2001
From: "Ivan T. Ivanov" <iivanov@mm-sol.com>
Date: Wed, 22 Oct 2014 18:29:43 +0300
Subject: iio: inkern: Add of_xlate function to struct iio_info

When #iio-cells is greater than '0', the driver could provide
a custom of_xlate function that reads the *args* and returns
the appropriate index in registered IIO channels array.

Add simple translation function, suitable for the most 1:1
mapped channels in IIO chips, and use it when driver did not
provide custom implementation.

Signed-off-by: Ivan T. Ivanov <iivanov@mm-sol.com>
Reviewed-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 drivers/iio/inkern.c    | 33 ++++++++++++++++++++++++++++-----
 include/linux/iio/iio.h |  8 ++++++++
 2 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iio/inkern.c b/drivers/iio/inkern.c
index c7497009d60a..0cc505d98686 100644
--- a/drivers/iio/inkern.c
+++ b/drivers/iio/inkern.c
@@ -100,6 +100,28 @@ static int iio_dev_node_match(struct device *dev, void *data)
 	return dev->of_node == data && dev->type == &iio_device_type;
 }
 
+/**
+ * __of_iio_simple_xlate - translate iiospec to the IIO channel index
+ * @indio_dev:	pointer to the iio_dev structure
+ * @iiospec:	IIO specifier as found in the device tree
+ *
+ * This is simple translation function, suitable for the most 1:1 mapped
+ * channels in IIO chips. This function performs only one sanity check:
+ * whether IIO index is less than num_channels (that is specified in the
+ * iio_dev).
+ */
+static int __of_iio_simple_xlate(struct iio_dev *indio_dev,
+				const struct of_phandle_args *iiospec)
+{
+	if (!iiospec->args_count)
+		return 0;
+
+	if (iiospec->args[0] >= indio_dev->num_channels)
+		return -EINVAL;
+
+	return iiospec->args[0];
+}
+
 static int __of_iio_channel_get(struct iio_channel *channel,
 				struct device_node *np, int index)
 {
@@ -122,18 +144,19 @@ static int __of_iio_channel_get(struct iio_channel *channel,
 
 	indio_dev = dev_to_iio_dev(idev);
 	channel->indio_dev = indio_dev;
-	index = iiospec.args_count ? iiospec.args[0] : 0;
-	if (index >= indio_dev->num_channels) {
-		err = -EINVAL;
+	if (indio_dev->info->of_xlate)
+		index = indio_dev->info->of_xlate(indio_dev, &iiospec);
+	else
+		index = __of_iio_simple_xlate(indio_dev, &iiospec);
+	if (index < 0)
 		goto err_put;
-	}
 	channel->channel = &indio_dev->channels[index];
 
 	return 0;
 
 err_put:
 	iio_device_put(indio_dev);
-	return err;
+	return index;
 }
 
 static struct iio_channel *of_iio_channel_get(struct device_node *np, int index)
diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h
index 15dc6bc2bdd2..3642ce7ef512 100644
--- a/include/linux/iio/iio.h
+++ b/include/linux/iio/iio.h
@@ -13,6 +13,7 @@
 #include <linux/device.h>
 #include <linux/cdev.h>
 #include <linux/iio/types.h>
+#include <linux/of.h>
 /* IIO TODO LIST */
 /*
  * Provide means of adjusting timer accuracy.
@@ -326,6 +327,11 @@ struct iio_dev;
  * @update_scan_mode:	function to configure device and scan buffer when
  *			channels have changed
  * @debugfs_reg_access:	function to read or write register value of device
+ * @of_xlate:		function pointer to obtain channel specifier index.
+ *			When #iio-cells is greater than '0', the driver could
+ *			provide a custom of_xlate function that reads the
+ *			*args* and returns the appropriate index in registered
+ *			IIO channels array.
  **/
 struct iio_info {
 	struct module			*driver_module;
@@ -385,6 +391,8 @@ struct iio_info {
 	int (*debugfs_reg_access)(struct iio_dev *indio_dev,
 				  unsigned reg, unsigned writeval,
 				  unsigned *readval);
+	int (*of_xlate)(struct iio_dev *indio_dev,
+			const struct of_phandle_args *iiospec);
 };
 
 /**
-- 
cgit v1.2.3


From 4ca24aca55fe1e2a61f3ffaac9015d9c45204729 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sat, 25 Oct 2014 09:41:04 +0200
Subject: ieee802154: move ieee802154 header

This patch moves the ieee802154 header into include/linux instead
include/net. Similar like wireless which have the ieee80211 header
inside of include/linux.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Cc: Alan Ott <alan@signal11.us>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/at86rf230.c |   2 +-
 drivers/net/ieee802154/cc2520.c    |   2 +-
 drivers/net/ieee802154/fakehard.c  |   2 +-
 drivers/net/ieee802154/mrf24j40.c  |   2 +-
 include/linux/ieee802154.h         | 189 ++++++++++++++++++++++++++++++++++++
 include/net/ieee802154.h           | 191 -------------------------------------
 include/net/ieee802154_netdev.h    |   2 +-
 net/ieee802154/6lowpan_rtnl.c      |   2 +-
 net/ieee802154/dgram.c             |   2 +-
 net/ieee802154/header_ops.c        |   3 +-
 net/ieee802154/nl-mac.c            |   2 +-
 net/mac802154/iface.c              |   2 +-
 net/mac802154/llsec.c              |   2 +-
 net/mac802154/mac_cmd.c            |   2 +-
 net/mac802154/monitor.c            |   2 +-
 15 files changed, 203 insertions(+), 204 deletions(-)
 create mode 100644 include/linux/ieee802154.h
 delete mode 100644 include/net/ieee802154.h

(limited to 'include/linux')

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 795ac116602c..a433d20587e1 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -29,8 +29,8 @@
 #include <linux/regmap.h>
 #include <linux/skbuff.h>
 #include <linux/of_gpio.h>
+#include <linux/ieee802154.h>
 
-#include <net/ieee802154.h>
 #include <net/mac802154.h>
 #include <net/cfg802154.h>
 
diff --git a/drivers/net/ieee802154/cc2520.c b/drivers/net/ieee802154/cc2520.c
index f1770cf892ed..32b3c8862b4f 100644
--- a/drivers/net/ieee802154/cc2520.c
+++ b/drivers/net/ieee802154/cc2520.c
@@ -21,10 +21,10 @@
 #include <linux/skbuff.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/of_gpio.h>
+#include <linux/ieee802154.h>
 
 #include <net/mac802154.h>
 #include <net/cfg802154.h>
-#include <net/ieee802154.h>
 
 #define	SPI_COMMAND_BUFFER	3
 #define	HIGH			1
diff --git a/drivers/net/ieee802154/fakehard.c b/drivers/net/ieee802154/fakehard.c
index 1460bf520498..8be05ade0ceb 100644
--- a/drivers/net/ieee802154/fakehard.c
+++ b/drivers/net/ieee802154/fakehard.c
@@ -25,10 +25,10 @@
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
+#include <linux/ieee802154.h>
 
 #include <net/af_ieee802154.h>
 #include <net/ieee802154_netdev.h>
-#include <net/ieee802154.h>
 #include <net/nl802154.h>
 #include <net/cfg802154.h>
 
diff --git a/drivers/net/ieee802154/mrf24j40.c b/drivers/net/ieee802154/mrf24j40.c
index bea7349db6ad..56a69599ac31 100644
--- a/drivers/net/ieee802154/mrf24j40.c
+++ b/drivers/net/ieee802154/mrf24j40.c
@@ -18,9 +18,9 @@
 #include <linux/spi/spi.h>
 #include <linux/interrupt.h>
 #include <linux/module.h>
+#include <linux/ieee802154.h>
 #include <net/cfg802154.h>
 #include <net/mac802154.h>
-#include <net/ieee802154.h>
 
 /* MRF24J40 Short Address Registers */
 #define REG_RXMCR    0x00  /* Receive MAC control */
diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
new file mode 100644
index 000000000000..2dfab2db103a
--- /dev/null
+++ b/include/linux/ieee802154.h
@@ -0,0 +1,189 @@
+/*
+ * IEEE802.15.4-2003 specification
+ *
+ * Copyright (C) 2007, 2008 Siemens AG
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2
+ * as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * Written by:
+ * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
+ * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
+ * Maxim Osipov <maxim.osipov@siemens.com>
+ * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
+ * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
+ */
+
+#ifndef LINUX_IEEE802154_H
+#define LINUX_IEEE802154_H
+
+#define IEEE802154_MTU			127
+
+#define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
+#define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
+#define IEEE802154_FC_TYPE_ACK		0x2	/* Frame is acknowledgment */
+#define IEEE802154_FC_TYPE_MAC_CMD	0x3	/* Frame is MAC command */
+
+#define IEEE802154_FC_TYPE_SHIFT		0
+#define IEEE802154_FC_TYPE_MASK		((1 << 3) - 1)
+#define IEEE802154_FC_TYPE(x)		((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT)
+#define IEEE802154_FC_SET_TYPE(v, x)	do {	\
+	v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \
+	    (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \
+	} while (0)
+
+#define IEEE802154_FC_SECEN_SHIFT	3
+#define IEEE802154_FC_SECEN		(1 << IEEE802154_FC_SECEN_SHIFT)
+#define IEEE802154_FC_FRPEND_SHIFT	4
+#define IEEE802154_FC_FRPEND		(1 << IEEE802154_FC_FRPEND_SHIFT)
+#define IEEE802154_FC_ACK_REQ_SHIFT	5
+#define IEEE802154_FC_ACK_REQ		(1 << IEEE802154_FC_ACK_REQ_SHIFT)
+#define IEEE802154_FC_INTRA_PAN_SHIFT	6
+#define IEEE802154_FC_INTRA_PAN		(1 << IEEE802154_FC_INTRA_PAN_SHIFT)
+
+#define IEEE802154_FC_SAMODE_SHIFT	14
+#define IEEE802154_FC_SAMODE_MASK	(3 << IEEE802154_FC_SAMODE_SHIFT)
+#define IEEE802154_FC_DAMODE_SHIFT	10
+#define IEEE802154_FC_DAMODE_MASK	(3 << IEEE802154_FC_DAMODE_SHIFT)
+
+#define IEEE802154_FC_VERSION_SHIFT	12
+#define IEEE802154_FC_VERSION_MASK	(3 << IEEE802154_FC_VERSION_SHIFT)
+#define IEEE802154_FC_VERSION(x)	((x & IEEE802154_FC_VERSION_MASK) >> IEEE802154_FC_VERSION_SHIFT)
+
+#define IEEE802154_FC_SAMODE(x)		\
+	(((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT)
+
+#define IEEE802154_FC_DAMODE(x)		\
+	(((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT)
+
+#define IEEE802154_SCF_SECLEVEL_MASK		7
+#define IEEE802154_SCF_SECLEVEL_SHIFT		0
+#define IEEE802154_SCF_SECLEVEL(x)		(x & IEEE802154_SCF_SECLEVEL_MASK)
+#define IEEE802154_SCF_KEY_ID_MODE_SHIFT	3
+#define IEEE802154_SCF_KEY_ID_MODE_MASK		(3 << IEEE802154_SCF_KEY_ID_MODE_SHIFT)
+#define IEEE802154_SCF_KEY_ID_MODE(x)		\
+	((x & IEEE802154_SCF_KEY_ID_MODE_MASK) >> IEEE802154_SCF_KEY_ID_MODE_SHIFT)
+
+#define IEEE802154_SCF_KEY_IMPLICIT		0
+#define IEEE802154_SCF_KEY_INDEX		1
+#define IEEE802154_SCF_KEY_SHORT_INDEX		2
+#define IEEE802154_SCF_KEY_HW_INDEX		3
+
+#define IEEE802154_SCF_SECLEVEL_NONE		0
+#define IEEE802154_SCF_SECLEVEL_MIC32		1
+#define IEEE802154_SCF_SECLEVEL_MIC64		2
+#define IEEE802154_SCF_SECLEVEL_MIC128		3
+#define IEEE802154_SCF_SECLEVEL_ENC		4
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC32	5
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC64	6
+#define IEEE802154_SCF_SECLEVEL_ENC_MIC128	7
+
+/* MAC footer size */
+#define IEEE802154_MFR_SIZE	2 /* 2 octets */
+
+/* MAC's Command Frames Identifiers */
+#define IEEE802154_CMD_ASSOCIATION_REQ		0x01
+#define IEEE802154_CMD_ASSOCIATION_RESP		0x02
+#define IEEE802154_CMD_DISASSOCIATION_NOTIFY	0x03
+#define IEEE802154_CMD_DATA_REQ			0x04
+#define IEEE802154_CMD_PANID_CONFLICT_NOTIFY	0x05
+#define IEEE802154_CMD_ORPHAN_NOTIFY		0x06
+#define IEEE802154_CMD_BEACON_REQ		0x07
+#define IEEE802154_CMD_COORD_REALIGN_NOTIFY	0x08
+#define IEEE802154_CMD_GTS_REQ			0x09
+
+/*
+ * The return values of MAC operations
+ */
+enum {
+	/*
+	 * The requested operation was completed successfully.
+	 * For a transmission request, this value indicates
+	 * a successful transmission.
+	 */
+	IEEE802154_SUCCESS = 0x0,
+
+	/* The beacon was lost following a synchronization request. */
+	IEEE802154_BEACON_LOSS = 0xe0,
+	/*
+	 * A transmission could not take place due to activity on the
+	 * channel, i.e., the CSMA-CA mechanism has failed.
+	 */
+	IEEE802154_CHNL_ACCESS_FAIL = 0xe1,
+	/* The GTS request has been denied by the PAN coordinator. */
+	IEEE802154_DENINED = 0xe2,
+	/* The attempt to disable the transceiver has failed. */
+	IEEE802154_DISABLE_TRX_FAIL = 0xe3,
+	/*
+	 * The received frame induces a failed security check according to
+	 * the security suite.
+	 */
+	IEEE802154_FAILED_SECURITY_CHECK = 0xe4,
+	/*
+	 * The frame resulting from secure processing has a length that is
+	 * greater than aMACMaxFrameSize.
+	 */
+	IEEE802154_FRAME_TOO_LONG = 0xe5,
+	/*
+	 * The requested GTS transmission failed because the specified GTS
+	 * either did not have a transmit GTS direction or was not defined.
+	 */
+	IEEE802154_INVALID_GTS = 0xe6,
+	/*
+	 * A request to purge an MSDU from the transaction queue was made using
+	 * an MSDU handle that was not found in the transaction table.
+	 */
+	IEEE802154_INVALID_HANDLE = 0xe7,
+	/* A parameter in the primitive is out of the valid range.*/
+	IEEE802154_INVALID_PARAMETER = 0xe8,
+	/* No acknowledgment was received after aMaxFrameRetries. */
+	IEEE802154_NO_ACK = 0xe9,
+	/* A scan operation failed to find any network beacons.*/
+	IEEE802154_NO_BEACON = 0xea,
+	/* No response data were available following a request. */
+	IEEE802154_NO_DATA = 0xeb,
+	/* The operation failed because a short address was not allocated. */
+	IEEE802154_NO_SHORT_ADDRESS = 0xec,
+	/*
+	 * A receiver enable request was unsuccessful because it could not be
+	 * completed within the CAP.
+	 */
+	IEEE802154_OUT_OF_CAP = 0xed,
+	/*
+	 * A PAN identifier conflict has been detected and communicated to the
+	 * PAN coordinator.
+	 */
+	IEEE802154_PANID_CONFLICT = 0xee,
+	/* A coordinator realignment command has been received. */
+	IEEE802154_REALIGMENT = 0xef,
+	/* The transaction has expired and its information discarded. */
+	IEEE802154_TRANSACTION_EXPIRED = 0xf0,
+	/* There is no capacity to store the transaction. */
+	IEEE802154_TRANSACTION_OVERFLOW = 0xf1,
+	/*
+	 * The transceiver was in the transmitter enabled state when the
+	 * receiver was requested to be enabled.
+	 */
+	IEEE802154_TX_ACTIVE = 0xf2,
+	/* The appropriate key is not available in the ACL. */
+	IEEE802154_UNAVAILABLE_KEY = 0xf3,
+	/*
+	 * A SET/GET request was issued with the identifier of a PIB attribute
+	 * that is not supported.
+	 */
+	IEEE802154_UNSUPPORTED_ATTR = 0xf4,
+	/*
+	 * A request to perform a scan operation failed because the MLME was
+	 * in the process of performing a previously initiated scan operation.
+	 */
+	IEEE802154_SCAN_IN_PROGRESS = 0xfc,
+};
+
+
+#endif /* LINUX_IEEE802154_H */
diff --git a/include/net/ieee802154.h b/include/net/ieee802154.h
deleted file mode 100644
index 4db4e320b2f5..000000000000
--- a/include/net/ieee802154.h
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * IEEE802.15.4-2003 specification
- *
- * Copyright (C) 2007, 2008 Siemens AG
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2
- * as published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Written by:
- * Pavel Smolenskiy <pavel.smolenskiy@gmail.com>
- * Maxim Gorbachyov <maxim.gorbachev@siemens.com>
- * Maxim Osipov <maxim.osipov@siemens.com>
- * Dmitry Eremin-Solenikov <dbaryshkov@gmail.com>
- * Alexander Smirnov <alex.bluesman.smirnov@gmail.com>
- */
-
-#ifndef NET_IEEE802154_H
-#define NET_IEEE802154_H
-
-#define IEEE802154_MTU			127
-
-#define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
-#define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
-#define IEEE802154_FC_TYPE_ACK		0x2	/* Frame is acknowledgment */
-#define IEEE802154_FC_TYPE_MAC_CMD	0x3	/* Frame is MAC command */
-
-#define IEEE802154_FC_TYPE_SHIFT		0
-#define IEEE802154_FC_TYPE_MASK		((1 << 3) - 1)
-#define IEEE802154_FC_TYPE(x)		((x & IEEE802154_FC_TYPE_MASK) >> IEEE802154_FC_TYPE_SHIFT)
-#define IEEE802154_FC_SET_TYPE(v, x)	do {	\
-	v = (((v) & ~IEEE802154_FC_TYPE_MASK) | \
-	    (((x) << IEEE802154_FC_TYPE_SHIFT) & IEEE802154_FC_TYPE_MASK)); \
-	} while (0)
-
-#define IEEE802154_FC_SECEN_SHIFT	3
-#define IEEE802154_FC_SECEN		(1 << IEEE802154_FC_SECEN_SHIFT)
-#define IEEE802154_FC_FRPEND_SHIFT	4
-#define IEEE802154_FC_FRPEND		(1 << IEEE802154_FC_FRPEND_SHIFT)
-#define IEEE802154_FC_ACK_REQ_SHIFT	5
-#define IEEE802154_FC_ACK_REQ		(1 << IEEE802154_FC_ACK_REQ_SHIFT)
-#define IEEE802154_FC_INTRA_PAN_SHIFT	6
-#define IEEE802154_FC_INTRA_PAN		(1 << IEEE802154_FC_INTRA_PAN_SHIFT)
-
-#define IEEE802154_FC_SAMODE_SHIFT	14
-#define IEEE802154_FC_SAMODE_MASK	(3 << IEEE802154_FC_SAMODE_SHIFT)
-#define IEEE802154_FC_DAMODE_SHIFT	10
-#define IEEE802154_FC_DAMODE_MASK	(3 << IEEE802154_FC_DAMODE_SHIFT)
-
-#define IEEE802154_FC_VERSION_SHIFT	12
-#define IEEE802154_FC_VERSION_MASK	(3 << IEEE802154_FC_VERSION_SHIFT)
-#define IEEE802154_FC_VERSION(x)	((x & IEEE802154_FC_VERSION_MASK) >> IEEE802154_FC_VERSION_SHIFT)
-
-#define IEEE802154_FC_SAMODE(x)		\
-	(((x) & IEEE802154_FC_SAMODE_MASK) >> IEEE802154_FC_SAMODE_SHIFT)
-
-#define IEEE802154_FC_DAMODE(x)		\
-	(((x) & IEEE802154_FC_DAMODE_MASK) >> IEEE802154_FC_DAMODE_SHIFT)
-
-#define IEEE802154_SCF_SECLEVEL_MASK		7
-#define IEEE802154_SCF_SECLEVEL_SHIFT		0
-#define IEEE802154_SCF_SECLEVEL(x)		(x & IEEE802154_SCF_SECLEVEL_MASK)
-#define IEEE802154_SCF_KEY_ID_MODE_SHIFT	3
-#define IEEE802154_SCF_KEY_ID_MODE_MASK		(3 << IEEE802154_SCF_KEY_ID_MODE_SHIFT)
-#define IEEE802154_SCF_KEY_ID_MODE(x)		\
-	((x & IEEE802154_SCF_KEY_ID_MODE_MASK) >> IEEE802154_SCF_KEY_ID_MODE_SHIFT)
-
-#define IEEE802154_SCF_KEY_IMPLICIT		0
-#define IEEE802154_SCF_KEY_INDEX		1
-#define IEEE802154_SCF_KEY_SHORT_INDEX		2
-#define IEEE802154_SCF_KEY_HW_INDEX		3
-
-#define IEEE802154_SCF_SECLEVEL_NONE		0
-#define IEEE802154_SCF_SECLEVEL_MIC32		1
-#define IEEE802154_SCF_SECLEVEL_MIC64		2
-#define IEEE802154_SCF_SECLEVEL_MIC128		3
-#define IEEE802154_SCF_SECLEVEL_ENC		4
-#define IEEE802154_SCF_SECLEVEL_ENC_MIC32	5
-#define IEEE802154_SCF_SECLEVEL_ENC_MIC64	6
-#define IEEE802154_SCF_SECLEVEL_ENC_MIC128	7
-
-/* MAC footer size */
-#define IEEE802154_MFR_SIZE	2 /* 2 octets */
-
-/* MAC's Command Frames Identifiers */
-#define IEEE802154_CMD_ASSOCIATION_REQ		0x01
-#define IEEE802154_CMD_ASSOCIATION_RESP		0x02
-#define IEEE802154_CMD_DISASSOCIATION_NOTIFY	0x03
-#define IEEE802154_CMD_DATA_REQ			0x04
-#define IEEE802154_CMD_PANID_CONFLICT_NOTIFY	0x05
-#define IEEE802154_CMD_ORPHAN_NOTIFY		0x06
-#define IEEE802154_CMD_BEACON_REQ		0x07
-#define IEEE802154_CMD_COORD_REALIGN_NOTIFY	0x08
-#define IEEE802154_CMD_GTS_REQ			0x09
-
-/*
- * The return values of MAC operations
- */
-enum {
-	/*
-	 * The requested operation was completed successfully.
-	 * For a transmission request, this value indicates
-	 * a successful transmission.
-	 */
-	IEEE802154_SUCCESS = 0x0,
-
-	/* The beacon was lost following a synchronization request. */
-	IEEE802154_BEACON_LOSS = 0xe0,
-	/*
-	 * A transmission could not take place due to activity on the
-	 * channel, i.e., the CSMA-CA mechanism has failed.
-	 */
-	IEEE802154_CHNL_ACCESS_FAIL = 0xe1,
-	/* The GTS request has been denied by the PAN coordinator. */
-	IEEE802154_DENINED = 0xe2,
-	/* The attempt to disable the transceiver has failed. */
-	IEEE802154_DISABLE_TRX_FAIL = 0xe3,
-	/*
-	 * The received frame induces a failed security check according to
-	 * the security suite.
-	 */
-	IEEE802154_FAILED_SECURITY_CHECK = 0xe4,
-	/*
-	 * The frame resulting from secure processing has a length that is
-	 * greater than aMACMaxFrameSize.
-	 */
-	IEEE802154_FRAME_TOO_LONG = 0xe5,
-	/*
-	 * The requested GTS transmission failed because the specified GTS
-	 * either did not have a transmit GTS direction or was not defined.
-	 */
-	IEEE802154_INVALID_GTS = 0xe6,
-	/*
-	 * A request to purge an MSDU from the transaction queue was made using
-	 * an MSDU handle that was not found in the transaction table.
-	 */
-	IEEE802154_INVALID_HANDLE = 0xe7,
-	/* A parameter in the primitive is out of the valid range.*/
-	IEEE802154_INVALID_PARAMETER = 0xe8,
-	/* No acknowledgment was received after aMaxFrameRetries. */
-	IEEE802154_NO_ACK = 0xe9,
-	/* A scan operation failed to find any network beacons.*/
-	IEEE802154_NO_BEACON = 0xea,
-	/* No response data were available following a request. */
-	IEEE802154_NO_DATA = 0xeb,
-	/* The operation failed because a short address was not allocated. */
-	IEEE802154_NO_SHORT_ADDRESS = 0xec,
-	/*
-	 * A receiver enable request was unsuccessful because it could not be
-	 * completed within the CAP.
-	 */
-	IEEE802154_OUT_OF_CAP = 0xed,
-	/*
-	 * A PAN identifier conflict has been detected and communicated to the
-	 * PAN coordinator.
-	 */
-	IEEE802154_PANID_CONFLICT = 0xee,
-	/* A coordinator realignment command has been received. */
-	IEEE802154_REALIGMENT = 0xef,
-	/* The transaction has expired and its information discarded. */
-	IEEE802154_TRANSACTION_EXPIRED = 0xf0,
-	/* There is no capacity to store the transaction. */
-	IEEE802154_TRANSACTION_OVERFLOW = 0xf1,
-	/*
-	 * The transceiver was in the transmitter enabled state when the
-	 * receiver was requested to be enabled.
-	 */
-	IEEE802154_TX_ACTIVE = 0xf2,
-	/* The appropriate key is not available in the ACL. */
-	IEEE802154_UNAVAILABLE_KEY = 0xf3,
-	/*
-	 * A SET/GET request was issued with the identifier of a PIB attribute
-	 * that is not supported.
-	 */
-	IEEE802154_UNSUPPORTED_ATTR = 0xf4,
-	/*
-	 * A request to perform a scan operation failed because the MLME was
-	 * in the process of performing a previously initiated scan operation.
-	 */
-	IEEE802154_SCAN_IN_PROGRESS = 0xfc,
-};
-
-
-#endif
-
-
diff --git a/include/net/ieee802154_netdev.h b/include/net/ieee802154_netdev.h
index f87420689d70..5e62d758eea5 100644
--- a/include/net/ieee802154_netdev.h
+++ b/include/net/ieee802154_netdev.h
@@ -23,10 +23,10 @@
 #ifndef IEEE802154_NETDEVICE_H
 #define IEEE802154_NETDEVICE_H
 
-#include <net/ieee802154.h>
 #include <net/af_ieee802154.h>
 #include <linux/netdevice.h>
 #include <linux/skbuff.h>
+#include <linux/ieee802154.h>
 
 struct ieee802154_sechdr {
 #if defined(__LITTLE_ENDIAN_BITFIELD)
diff --git a/net/ieee802154/6lowpan_rtnl.c b/net/ieee802154/6lowpan_rtnl.c
index 0c1a49b51e57..1779a08d110a 100644
--- a/net/ieee802154/6lowpan_rtnl.c
+++ b/net/ieee802154/6lowpan_rtnl.c
@@ -49,8 +49,8 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/netdevice.h>
+#include <linux/ieee802154.h>
 #include <net/af_ieee802154.h>
-#include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
 #include <net/6lowpan.h>
 #include <net/ipv6.h>
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index 71e99a0994e1..3d58befef467 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -23,9 +23,9 @@
 #include <linux/if_arp.h>
 #include <linux/list.h>
 #include <linux/slab.h>
+#include <linux/ieee802154.h>
 #include <net/sock.h>
 #include <net/af_ieee802154.h>
-#include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
 
 #include <asm/ioctls.h>
diff --git a/net/ieee802154/header_ops.c b/net/ieee802154/header_ops.c
index c09294e39ca6..a051b6993177 100644
--- a/net/ieee802154/header_ops.c
+++ b/net/ieee802154/header_ops.c
@@ -14,8 +14,9 @@
  * Phoebe Buckheister <phoebe.buckheister@itwm.fraunhofer.de>
  */
 
+#include <linux/ieee802154.h>
+
 #include <net/mac802154.h>
-#include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
 
 static int
diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c
index a9c8e3e98387..fb6866d1dd39 100644
--- a/net/ieee802154/nl-mac.c
+++ b/net/ieee802154/nl-mac.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/if_arp.h>
 #include <linux/netdevice.h>
+#include <linux/ieee802154.h>
 #include <net/netlink.h>
 #include <net/genetlink.h>
 #include <net/sock.h>
@@ -29,7 +30,6 @@
 #include <linux/export.h>
 #include <net/af_ieee802154.h>
 #include <net/nl802154.h>
-#include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
 #include <net/cfg802154.h>
 
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index 5a604074555b..03eedc3b23ef 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -20,13 +20,13 @@
 #include <linux/netdevice.h>
 #include <linux/module.h>
 #include <linux/if_arp.h>
+#include <linux/ieee802154.h>
 
 #include <net/rtnetlink.h>
 #include <linux/nl802154.h>
 #include <net/af_ieee802154.h>
 #include <net/mac802154.h>
 #include <net/ieee802154_netdev.h>
-#include <net/ieee802154.h>
 #include <net/cfg802154.h>
 
 #include "ieee802154_i.h"
diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c
index 26f876128ae0..fa0d5237c2e0 100644
--- a/net/mac802154/llsec.c
+++ b/net/mac802154/llsec.c
@@ -17,7 +17,7 @@
 #include <linux/err.h>
 #include <linux/bug.h>
 #include <linux/completion.h>
-#include <net/ieee802154.h>
+#include <linux/ieee802154.h>
 #include <crypto/algapi.h>
 
 #include "ieee802154_i.h"
diff --git a/net/mac802154/mac_cmd.c b/net/mac802154/mac_cmd.c
index f118ea06d344..ad09d54bc690 100644
--- a/net/mac802154/mac_cmd.c
+++ b/net/mac802154/mac_cmd.c
@@ -20,8 +20,8 @@
 
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
+#include <linux/ieee802154.h>
 
-#include <net/ieee802154.h>
 #include <net/ieee802154_netdev.h>
 #include <net/cfg802154.h>
 #include <net/mac802154.h>
diff --git a/net/mac802154/monitor.c b/net/mac802154/monitor.c
index ca1dedd9b229..ca82c72a635c 100644
--- a/net/mac802154/monitor.c
+++ b/net/mac802154/monitor.c
@@ -21,8 +21,8 @@
 #include <linux/skbuff.h>
 #include <linux/if_arp.h>
 #include <linux/crc-ccitt.h>
+#include <linux/ieee802154.h>
 
-#include <net/ieee802154.h>
 #include <net/mac802154.h>
 #include <net/netlink.h>
 #include <net/cfg802154.h>
-- 
cgit v1.2.3


From dda02fd6278d9e995850b3c1dba484f17cbe4de4 Mon Sep 17 00:00:00 2001
From: Weijie Yang <weijie.yang@samsung.com>
Date: Fri, 24 Oct 2014 17:47:57 +0800
Subject: mm, cma: make parameters order consistent in func declaration and
 definition

In the current code, the base and size parameters order is not consistent
in functions declaration and definition. If someone calls these functions
according to the declaration parameters order in cma.h, he will run into
some bug and it's hard to find the reason.

This patch makes the parameters order consistent in functions declaration
and definition.

Signed-off-by: Weijie Yang <weijie.yang@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
---
 include/linux/cma.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cma.h b/include/linux/cma.h
index 0430ed05d3b9..a93438beb33c 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -18,12 +18,12 @@ struct cma;
 extern phys_addr_t cma_get_base(struct cma *cma);
 extern unsigned long cma_get_size(struct cma *cma);
 
-extern int __init cma_declare_contiguous(phys_addr_t size,
-			phys_addr_t base, phys_addr_t limit,
+extern int __init cma_declare_contiguous(phys_addr_t base,
+			phys_addr_t size, phys_addr_t limit,
 			phys_addr_t alignment, unsigned int order_per_bit,
 			bool fixed, struct cma **res_cma);
-extern int cma_init_reserved_mem(phys_addr_t size,
-					phys_addr_t base, int order_per_bit,
+extern int cma_init_reserved_mem(phys_addr_t base,
+					phys_addr_t size, int order_per_bit,
 					struct cma **res_cma);
 extern struct page *cma_alloc(struct cma *cma, int count, unsigned int align);
 extern bool cma_release(struct cma *cma, struct page *pages, int count);
-- 
cgit v1.2.3


From 593befa6ab74a805e4f503c8c737c3cffa8066b6 Mon Sep 17 00:00:00 2001
From: Dominik Dingel <dingel@linux.vnet.ibm.com>
Date: Thu, 23 Oct 2014 12:07:44 +0200
Subject: mm: introduce mm_forbids_zeropage function

Add a new function stub to allow architectures to disable for
an mm_structthe backing of non-present, anonymous pages with
read-only empty zero pages.

Signed-off-by: Dominik Dingel <dingel@linux.vnet.ibm.com>
Reviewed-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/mm.h | 11 +++++++++++
 mm/huge_memory.c   |  2 +-
 mm/memory.c        |  2 +-
 3 files changed, 13 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27eb1bfbe704..ab7dadca4ea5 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -56,6 +56,17 @@ extern int sysctl_legacy_va_layout;
 #define __pa_symbol(x)  __pa(RELOC_HIDE((unsigned long)(x), 0))
 #endif
 
+/*
+ * To prevent common memory management code establishing
+ * a zero page mapping on a read fault.
+ * This macro should be defined within <asm/pgtable.h>.
+ * s390 does this to prevent multiplexing of hardware bits
+ * related to the physical page in case of virtualization.
+ */
+#ifndef mm_forbids_zeropage
+#define mm_forbids_zeropage(X)	(0)
+#endif
+
 extern unsigned long sysctl_user_reserve_kbytes;
 extern unsigned long sysctl_admin_reserve_kbytes;
 
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 74c78aa8bc2f..7e9c15cb93a9 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -805,7 +805,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_OOM;
 	if (unlikely(khugepaged_enter(vma)))
 		return VM_FAULT_OOM;
-	if (!(flags & FAULT_FLAG_WRITE) &&
+	if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm) &&
 			transparent_hugepage_use_zero_page()) {
 		spinlock_t *ptl;
 		pgtable_t pgtable;
diff --git a/mm/memory.c b/mm/memory.c
index 1cc6bfbd872e..d722d4f481c9 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2640,7 +2640,7 @@ static int do_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_SIGBUS;
 
 	/* Use the zero-page for reads */
-	if (!(flags & FAULT_FLAG_WRITE)) {
+	if (!(flags & FAULT_FLAG_WRITE) && !mm_forbids_zeropage(mm)) {
 		entry = pte_mkspecial(pfn_pte(my_zero_pfn(address),
 						vma->vm_page_prot));
 		page_table = pte_offset_map_lock(mm, pmd, address, &ptl);
-- 
cgit v1.2.3


From f7f242ff004499e0904d3664713dfba01f24c408 Mon Sep 17 00:00:00 2001
From: Heiko Carstens <heiko.carstens@de.ibm.com>
Date: Wed, 15 Oct 2014 12:17:34 +0200
Subject: kprobes: introduce weak arch_check_ftrace_location() helper function

Introduce weak arch_check_ftrace_location() helper function which
architectures can override in order to implement handling of kprobes
on function tracer call sites on their own, without depending on
common code or implementing the KPROBES_ON_FTRACE feature.

Signed-off-by: Heiko Carstens <heiko.carstens@de.ibm.com>
Acked-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Signed-off-by: Martin Schwidefsky <schwidefsky@de.ibm.com>
---
 include/linux/kprobes.h |  1 +
 kernel/kprobes.c        | 18 +++++++++++-------
 2 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index f7296e57d614..5297f9fa0ef2 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -335,6 +335,7 @@ extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip,
 extern int arch_prepare_kprobe_ftrace(struct kprobe *p);
 #endif
 
+int arch_check_ftrace_location(struct kprobe *p);
 
 /* Get the kprobe at this addr (if any) - called with preemption disabled */
 struct kprobe *get_kprobe(void *addr);
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 3995f546d0f3..317eb8ad28dd 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -1410,16 +1410,10 @@ static inline int check_kprobe_rereg(struct kprobe *p)
 	return ret;
 }
 
-static int check_kprobe_address_safe(struct kprobe *p,
-				     struct module **probed_mod)
+int __weak arch_check_ftrace_location(struct kprobe *p)
 {
-	int ret = 0;
 	unsigned long ftrace_addr;
 
-	/*
-	 * If the address is located on a ftrace nop, set the
-	 * breakpoint to the following instruction.
-	 */
 	ftrace_addr = ftrace_location((unsigned long)p->addr);
 	if (ftrace_addr) {
 #ifdef CONFIG_KPROBES_ON_FTRACE
@@ -1431,7 +1425,17 @@ static int check_kprobe_address_safe(struct kprobe *p,
 		return -EINVAL;
 #endif
 	}
+	return 0;
+}
 
+static int check_kprobe_address_safe(struct kprobe *p,
+				     struct module **probed_mod)
+{
+	int ret;
+
+	ret = arch_check_ftrace_location(p);
+	if (ret)
+		return ret;
 	jump_label_lock();
 	preempt_disable();
 
-- 
cgit v1.2.3


From 344736f29b359790facd0b7a521e367f1715c11c Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Mon, 20 Oct 2014 15:50:30 +0400
Subject: cpuset: simplify cpuset_node_allowed API

Current cpuset API for checking if a zone/node is allowed to allocate
from looks rather awkward. We have hardwall and softwall versions of
cpuset_node_allowed with the softwall version doing literally the same
as the hardwall version if __GFP_HARDWALL is passed to it in gfp flags.
If it isn't, the softwall version may check the given node against the
enclosing hardwall cpuset, which it needs to take the callback lock to
do.

Such a distinction was introduced by commit 02a0e53d8227 ("cpuset:
rework cpuset_zone_allowed api"). Before, we had the only version with
the __GFP_HARDWALL flag determining its behavior. The purpose of the
commit was to avoid sleep-in-atomic bugs when someone would mistakenly
call the function without the __GFP_HARDWALL flag for an atomic
allocation. The suffixes introduced were intended to make the callers
think before using the function.

However, since the callback lock was converted from mutex to spinlock by
the previous patch, the softwall check function cannot sleep, and these
precautions are no longer necessary.

So let's simplify the API back to the single check.

Suggested-by: David Rientjes <rientjes@google.com>
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Christoph Lameter <cl@linux.com>
Acked-by: Zefan Li <lizefan@huawei.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/cpuset.h | 37 +++++++--------------------------
 kernel/cpuset.c        | 55 ++------------------------------------------------
 mm/hugetlb.c           |  2 +-
 mm/oom_kill.c          |  2 +-
 mm/page_alloc.c        |  6 +++---
 mm/slab.c              |  2 +-
 mm/slub.c              |  3 ++-
 mm/vmscan.c            |  5 +++--
 8 files changed, 20 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h
index 2f073db7392e..1b357997cac5 100644
--- a/include/linux/cpuset.h
+++ b/include/linux/cpuset.h
@@ -48,29 +48,16 @@ extern nodemask_t cpuset_mems_allowed(struct task_struct *p);
 void cpuset_init_current_mems_allowed(void);
 int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask);
 
-extern int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask);
-extern int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask);
+extern int __cpuset_node_allowed(int node, gfp_t gfp_mask);
 
-static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
+static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
 {
-	return nr_cpusets() <= 1 ||
-		__cpuset_node_allowed_softwall(node, gfp_mask);
+	return nr_cpusets() <= 1 || __cpuset_node_allowed(node, gfp_mask);
 }
 
-static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
+static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
-	return nr_cpusets() <= 1 ||
-		__cpuset_node_allowed_hardwall(node, gfp_mask);
-}
-
-static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
-{
-	return cpuset_node_allowed_softwall(zone_to_nid(z), gfp_mask);
-}
-
-static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
-{
-	return cpuset_node_allowed_hardwall(zone_to_nid(z), gfp_mask);
+	return cpuset_node_allowed(zone_to_nid(z), gfp_mask);
 }
 
 extern int cpuset_mems_allowed_intersects(const struct task_struct *tsk1,
@@ -179,22 +166,12 @@ static inline int cpuset_nodemask_valid_mems_allowed(nodemask_t *nodemask)
 	return 1;
 }
 
-static inline int cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
-{
-	return 1;
-}
-
-static inline int cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
-{
-	return 1;
-}
-
-static inline int cpuset_zone_allowed_softwall(struct zone *z, gfp_t gfp_mask)
+static inline int cpuset_node_allowed(int node, gfp_t gfp_mask)
 {
 	return 1;
 }
 
-static inline int cpuset_zone_allowed_hardwall(struct zone *z, gfp_t gfp_mask)
+static inline int cpuset_zone_allowed(struct zone *z, gfp_t gfp_mask)
 {
 	return 1;
 }
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index f21ba868f0d1..38f7433c1cd2 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -2453,7 +2453,7 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
 }
 
 /**
- * cpuset_node_allowed_softwall - Can we allocate on a memory node?
+ * cpuset_node_allowed - Can we allocate on a memory node?
  * @node: is this an allowed node?
  * @gfp_mask: memory allocation flags
  *
@@ -2465,13 +2465,6 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
  * flag, yes.
  * Otherwise, no.
  *
- * If __GFP_HARDWALL is set, cpuset_node_allowed_softwall() reduces to
- * cpuset_node_allowed_hardwall().  Otherwise, cpuset_node_allowed_softwall()
- * might sleep, and might allow a node from an enclosing cpuset.
- *
- * cpuset_node_allowed_hardwall() only handles the simpler case of hardwall
- * cpusets, and never sleeps.
- *
  * The __GFP_THISNODE placement logic is really handled elsewhere,
  * by forcibly using a zonelist starting at a specified node, and by
  * (in get_page_from_freelist()) refusing to consider the zones for
@@ -2506,13 +2499,8 @@ static struct cpuset *nearest_hardwall_ancestor(struct cpuset *cs)
  *	TIF_MEMDIE   - any node ok
  *	GFP_KERNEL   - any node in enclosing hardwalled cpuset ok
  *	GFP_USER     - only nodes in current tasks mems allowed ok.
- *
- * Rule:
- *    Don't call cpuset_node_allowed_softwall if you can't sleep, unless you
- *    pass in the __GFP_HARDWALL flag set in gfp_flag, which disables
- *    the code that might scan up ancestor cpusets and sleep.
  */
-int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
+int __cpuset_node_allowed(int node, gfp_t gfp_mask)
 {
 	struct cpuset *cs;		/* current cpuset ancestors */
 	int allowed;			/* is allocation in zone z allowed? */
@@ -2520,7 +2508,6 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
 
 	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
 		return 1;
-	might_sleep_if(!(gfp_mask & __GFP_HARDWALL));
 	if (node_isset(node, current->mems_allowed))
 		return 1;
 	/*
@@ -2547,44 +2534,6 @@ int __cpuset_node_allowed_softwall(int node, gfp_t gfp_mask)
 	return allowed;
 }
 
-/*
- * cpuset_node_allowed_hardwall - Can we allocate on a memory node?
- * @node: is this an allowed node?
- * @gfp_mask: memory allocation flags
- *
- * If we're in interrupt, yes, we can always allocate.  If __GFP_THISNODE is
- * set, yes, we can always allocate.  If node is in our task's mems_allowed,
- * yes.  If the task has been OOM killed and has access to memory reserves as
- * specified by the TIF_MEMDIE flag, yes.
- * Otherwise, no.
- *
- * The __GFP_THISNODE placement logic is really handled elsewhere,
- * by forcibly using a zonelist starting at a specified node, and by
- * (in get_page_from_freelist()) refusing to consider the zones for
- * any node on the zonelist except the first.  By the time any such
- * calls get to this routine, we should just shut up and say 'yes'.
- *
- * Unlike the cpuset_node_allowed_softwall() variant, above,
- * this variant requires that the node be in the current task's
- * mems_allowed or that we're in interrupt.  It does not scan up the
- * cpuset hierarchy for the nearest enclosing mem_exclusive cpuset.
- * It never sleeps.
- */
-int __cpuset_node_allowed_hardwall(int node, gfp_t gfp_mask)
-{
-	if (in_interrupt() || (gfp_mask & __GFP_THISNODE))
-		return 1;
-	if (node_isset(node, current->mems_allowed))
-		return 1;
-	/*
-	 * Allow tasks that have access to memory reserves because they have
-	 * been OOM killed to get memory anywhere.
-	 */
-	if (unlikely(test_thread_flag(TIF_MEMDIE)))
-		return 1;
-	return 0;
-}
-
 /**
  * cpuset_mem_spread_node() - On which node to begin search for a file page
  * cpuset_slab_spread_node() - On which node to begin search for a slab page
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 9fd722769927..82da930fa3f8 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -582,7 +582,7 @@ retry_cpuset:
 
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						MAX_NR_ZONES - 1, nodemask) {
-		if (cpuset_zone_allowed_softwall(zone, htlb_alloc_mask(h))) {
+		if (cpuset_zone_allowed(zone, htlb_alloc_mask(h))) {
 			page = dequeue_huge_page_node(h, zone_to_nid(zone));
 			if (page) {
 				if (avoid_reserve)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 5340f6b91312..3348280eef89 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -233,7 +233,7 @@ static enum oom_constraint constrained_alloc(struct zonelist *zonelist,
 	/* Check this allocation failure is caused by cpuset's wall function */
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 			high_zoneidx, nodemask)
-		if (!cpuset_zone_allowed_softwall(zone, gfp_mask))
+		if (!cpuset_zone_allowed(zone, gfp_mask))
 			cpuset_limited = true;
 
 	if (cpuset_limited) {
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9cd36b822444..ab07b496672f 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1962,7 +1962,7 @@ zonelist_scan:
 
 	/*
 	 * Scan zonelist, looking for a zone with enough free.
-	 * See also __cpuset_node_allowed_softwall() comment in kernel/cpuset.c.
+	 * See also __cpuset_node_allowed() comment in kernel/cpuset.c.
 	 */
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
 						high_zoneidx, nodemask) {
@@ -1973,7 +1973,7 @@ zonelist_scan:
 				continue;
 		if (cpusets_enabled() &&
 			(alloc_flags & ALLOC_CPUSET) &&
-			!cpuset_zone_allowed_softwall(zone, gfp_mask))
+			!cpuset_zone_allowed(zone, gfp_mask))
 				continue;
 		/*
 		 * Distribute pages in proportion to the individual
@@ -2514,7 +2514,7 @@ gfp_to_alloc_flags(gfp_t gfp_mask)
 			alloc_flags |= ALLOC_HARDER;
 		/*
 		 * Ignore cpuset mems for GFP_ATOMIC rather than fail, see the
-		 * comment for __cpuset_node_allowed_softwall().
+		 * comment for __cpuset_node_allowed().
 		 */
 		alloc_flags &= ~ALLOC_CPUSET;
 	} else if (unlikely(rt_task(current)) && !in_interrupt())
diff --git a/mm/slab.c b/mm/slab.c
index eb2b2ea30130..063a91bc8826 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3012,7 +3012,7 @@ retry:
 	for_each_zone_zonelist(zone, z, zonelist, high_zoneidx) {
 		nid = zone_to_nid(zone);
 
-		if (cpuset_zone_allowed_hardwall(zone, flags) &&
+		if (cpuset_zone_allowed(zone, flags | __GFP_HARDWALL) &&
 			get_node(cache, nid) &&
 			get_node(cache, nid)->free_objects) {
 				obj = ____cache_alloc_node(cache,
diff --git a/mm/slub.c b/mm/slub.c
index ae7b9f1ad394..7d12f51d9bac 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1662,7 +1662,8 @@ static void *get_any_partial(struct kmem_cache *s, gfp_t flags,
 
 			n = get_node(s, zone_to_nid(zone));
 
-			if (n && cpuset_zone_allowed_hardwall(zone, flags) &&
+			if (n && cpuset_zone_allowed(zone,
+						     flags | __GFP_HARDWALL) &&
 					n->nr_partial > s->min_partial) {
 				object = get_partial_node(s, n, c, flags);
 				if (object) {
diff --git a/mm/vmscan.c b/mm/vmscan.c
index dcb47074ae03..38878b2ab1d0 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2405,7 +2405,8 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 		 * to global LRU.
 		 */
 		if (global_reclaim(sc)) {
-			if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
+			if (!cpuset_zone_allowed(zone,
+						 GFP_KERNEL | __GFP_HARDWALL))
 				continue;
 
 			lru_pages += zone_reclaimable_pages(zone);
@@ -3388,7 +3389,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx)
 	if (!populated_zone(zone))
 		return;
 
-	if (!cpuset_zone_allowed_hardwall(zone, GFP_KERNEL))
+	if (!cpuset_zone_allowed(zone, GFP_KERNEL | __GFP_HARDWALL))
 		return;
 	pgdat = zone->zone_pgdat;
 	if (pgdat->kswapd_max_order < order) {
-- 
cgit v1.2.3


From fa491001e4edae5ed68a562b61ed729968a3ca1c Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Mon, 27 Oct 2014 17:13:40 +0100
Subject: ieee802154: add valid psdu length helper

This patch adds a generic valid psdu length check function helper. This
is useful to check the length field after receiving. For example the
at86rf231 doesn't filter invalid psdu length. Sometimes the CRC can also
be correct. If we get the lqi value with an invalid frame length the
kernel may crash because we dereference an invalid pointer in the
receive buffer.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 11 +++++++++++
 1 file changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 2dfab2db103a..6e50a2a1d485 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -23,7 +23,10 @@
 #ifndef LINUX_IEEE802154_H
 #define LINUX_IEEE802154_H
 
+#include <linux/types.h>
+
 #define IEEE802154_MTU			127
+#define IEEE802154_MIN_PSDU_LEN		5
 
 #define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
 #define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
@@ -185,5 +188,13 @@ enum {
 	IEEE802154_SCAN_IN_PROGRESS = 0xfc,
 };
 
+/**
+ * ieee802154_is_valid_psdu_len - check if psdu len is valid
+ * @len: psdu len with (MHR + payload + MFR)
+ */
+static inline bool ieee802154_is_valid_psdu_len(const u8 len)
+{
+	return (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU);
+}
 
 #endif /* LINUX_IEEE802154_H */
-- 
cgit v1.2.3


From a59dadbeeaf7d33f2e92dbf5a290965d6df64162 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 25 Oct 2014 17:19:33 +0200
Subject: ath9k: add support for endian swap of eeprom from platform data

On some devices (especially little-endian ones), the flash EEPROM data
has a different endian, which needs to be detected.
Add a flag to the platform data to allow overriding that behavior

Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/eeprom_def.c | 31 ++++++++++-------------------
 drivers/net/wireless/ath/ath9k/hw.h         |  1 +
 drivers/net/wireless/ath/ath9k/init.c       |  2 ++
 include/linux/ath9k_platform.h              |  1 +
 4 files changed, 14 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/eeprom_def.c b/drivers/net/wireless/ath/ath9k/eeprom_def.c
index 3218ca994746..122b846b8ec0 100644
--- a/drivers/net/wireless/ath/ath9k/eeprom_def.c
+++ b/drivers/net/wireless/ath/ath9k/eeprom_def.c
@@ -262,7 +262,7 @@ static int ath9k_hw_def_check_eeprom(struct ath_hw *ah)
 {
 	struct ar5416_eeprom_def *eep = &ah->eeprom.def;
 	struct ath_common *common = ath9k_hw_common(ah);
-	u16 *eepdata, temp, magic, magic2;
+	u16 *eepdata, temp, magic;
 	u32 sum = 0, el;
 	bool need_swap = false;
 	int i, addr, size;
@@ -272,27 +272,16 @@ static int ath9k_hw_def_check_eeprom(struct ath_hw *ah)
 		return false;
 	}
 
-	if (!ath9k_hw_use_flash(ah)) {
-		ath_dbg(common, EEPROM, "Read Magic = 0x%04X\n", magic);
-
-		if (magic != AR5416_EEPROM_MAGIC) {
-			magic2 = swab16(magic);
-
-			if (magic2 == AR5416_EEPROM_MAGIC) {
-				size = sizeof(struct ar5416_eeprom_def);
-				need_swap = true;
-				eepdata = (u16 *) (&ah->eeprom);
+	if (swab16(magic) == AR5416_EEPROM_MAGIC &&
+	    !(ah->ah_flags & AH_NO_EEP_SWAP)) {
+		size = sizeof(struct ar5416_eeprom_def);
+		need_swap = true;
+		eepdata = (u16 *) (&ah->eeprom);
 
-				for (addr = 0; addr < size / sizeof(u16); addr++) {
-					temp = swab16(*eepdata);
-					*eepdata = temp;
-					eepdata++;
-				}
-			} else {
-				ath_err(common,
-					"Invalid EEPROM Magic. Endianness mismatch.\n");
-				return -EINVAL;
-			}
+		for (addr = 0; addr < size / sizeof(u16); addr++) {
+			temp = swab16(*eepdata);
+			*eepdata = temp;
+			eepdata++;
 		}
 	}
 
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index f204099c38b8..c6dba9b4afdf 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -731,6 +731,7 @@ enum ath_cal_list {
 #define AH_USE_EEPROM   0x1
 #define AH_UNPLUGGED    0x2 /* The card has been physically removed. */
 #define AH_FASTCC       0x4
+#define AH_NO_EEP_SWAP  0x8 /* Do not swap EEPROM data */
 
 struct ath_hw {
 	struct ath_ops reg_ops;
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 57a17601b0b6..5d9c711b6aa0 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -531,6 +531,8 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 		ah->is_clk_25mhz = pdata->is_clk_25mhz;
 		ah->get_mac_revision = pdata->get_mac_revision;
 		ah->external_reset = pdata->external_reset;
+		if (!pdata->endian_check)
+			ah->ah_flags |= AH_NO_EEP_SWAP;
 	}
 
 	common->ops = &ah->reg_ops;
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index a495a959e8a7..43501657bce9 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -31,6 +31,7 @@ struct ath9k_platform_data {
 	u32 gpio_mask;
 	u32 gpio_val;
 
+	bool endian_check;
 	bool is_clk_25mhz;
 	bool tx_gain_buffalo;
 
-- 
cgit v1.2.3


From 3468968ef766d7bb4ab29c0ef7ebd169a4ac2e96 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@openwrt.org>
Date: Sat, 25 Oct 2014 17:19:34 +0200
Subject: ath9k: allow disabling bands via platform data

Some devices have multiple bands enables in the EEPROM data, even though
they are only calibrated for one. Allow platform data to disable
unsupported bands.

Signed-off-by: Gabor Juhos <juhosg@openwrt.org>
Signed-off-by: Felix Fietkau <nbd@openwrt.org>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/net/wireless/ath/ath9k/hw.c   | 24 ++++++++++++++++--------
 drivers/net/wireless/ath/ath9k/hw.h   |  2 ++
 drivers/net/wireless/ath/ath9k/init.c |  2 ++
 include/linux/ath9k_platform.h        |  2 ++
 4 files changed, 22 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/wireless/ath/ath9k/hw.c b/drivers/net/wireless/ath/ath9k/hw.c
index 85a78176b809..47f410ed7cd4 100644
--- a/drivers/net/wireless/ath/ath9k/hw.c
+++ b/drivers/net/wireless/ath/ath9k/hw.c
@@ -2344,17 +2344,25 @@ int ath9k_hw_fill_cap_info(struct ath_hw *ah)
 	}
 
 	eeval = ah->eep_ops->get_eeprom(ah, EEP_OP_MODE);
-	if ((eeval & (AR5416_OPFLAGS_11G | AR5416_OPFLAGS_11A)) == 0) {
-		ath_err(common,
-			"no band has been marked as supported in EEPROM\n");
-		return -EINVAL;
+
+	if (eeval & AR5416_OPFLAGS_11A) {
+		if (ah->disable_5ghz)
+			ath_warn(common, "disabling 5GHz band\n");
+		else
+			pCap->hw_caps |= ATH9K_HW_CAP_5GHZ;
 	}
 
-	if (eeval & AR5416_OPFLAGS_11A)
-		pCap->hw_caps |= ATH9K_HW_CAP_5GHZ;
+	if (eeval & AR5416_OPFLAGS_11G) {
+		if (ah->disable_2ghz)
+			ath_warn(common, "disabling 2GHz band\n");
+		else
+			pCap->hw_caps |= ATH9K_HW_CAP_2GHZ;
+	}
 
-	if (eeval & AR5416_OPFLAGS_11G)
-		pCap->hw_caps |= ATH9K_HW_CAP_2GHZ;
+	if ((pCap->hw_caps & (ATH9K_HW_CAP_2GHZ | ATH9K_HW_CAP_5GHZ)) == 0) {
+		ath_err(common, "both bands are disabled\n");
+		return -EINVAL;
+	}
 
 	if (AR_SREV_9485(ah) ||
 	    AR_SREV_9285(ah) ||
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index c6dba9b4afdf..e49721e85f6a 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -930,6 +930,8 @@ struct ath_hw {
 	bool is_clk_25mhz;
 	int (*get_mac_revision)(void);
 	int (*external_reset)(void);
+	bool disable_2ghz;
+	bool disable_5ghz;
 
 	const struct firmware *eeprom_blob;
 
diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 5d9c711b6aa0..2294109f79e9 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -531,6 +531,8 @@ static int ath9k_init_softc(u16 devid, struct ath_softc *sc,
 		ah->is_clk_25mhz = pdata->is_clk_25mhz;
 		ah->get_mac_revision = pdata->get_mac_revision;
 		ah->external_reset = pdata->external_reset;
+		ah->disable_2ghz = pdata->disable_2ghz;
+		ah->disable_5ghz = pdata->disable_5ghz;
 		if (!pdata->endian_check)
 			ah->ah_flags |= AH_NO_EEP_SWAP;
 	}
diff --git a/include/linux/ath9k_platform.h b/include/linux/ath9k_platform.h
index 43501657bce9..33eb274cd0e6 100644
--- a/include/linux/ath9k_platform.h
+++ b/include/linux/ath9k_platform.h
@@ -34,6 +34,8 @@ struct ath9k_platform_data {
 	bool endian_check;
 	bool is_clk_25mhz;
 	bool tx_gain_buffalo;
+	bool disable_2ghz;
+	bool disable_5ghz;
 
 	int (*get_mac_revision)(void);
 	int (*external_reset)(void);
-- 
cgit v1.2.3


From a69d82b9bdf1e53e94423048e8bda8c5f5a3dd4e Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Tue, 7 Oct 2014 17:47:36 +0200
Subject: power_supply: Add no_thermal property to prevent recursive get_temp
 calls

Add a 'no_thermal' property to the power supply class. If true then
thermal zone won't be created for this power supply in
power_supply_register().

Power supply drivers may want to set it if they support
POWER_SUPPLY_PROP_TEMP and they are forwarding this get property call to
other thermal zone.

If they won't set it lockdep may report false positive deadlock for
thermal zone's mutex because of nested calls to thermal_zone_get_temp().
First is the call to thermal_zone_get_temp() of the driver's thermal
zone. Thermal core gets POWER_SUPPLY_PROP_TEMP property from this
driver. The driver then calls other thermal zone thermal_zone_get_temp()
and returns result.

Example of such driver is charger manager.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/power_supply_core.c | 3 +++
 include/linux/power_supply.h      | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/power/power_supply_core.c b/drivers/power/power_supply_core.c
index 6cb7fe5c022d..694e8cddd5c1 100644
--- a/drivers/power/power_supply_core.c
+++ b/drivers/power/power_supply_core.c
@@ -417,6 +417,9 @@ static int psy_register_thermal(struct power_supply *psy)
 {
 	int i;
 
+	if (psy->no_thermal)
+		return 0;
+
 	/* Register battery zone device psy reports temperature */
 	for (i = 0; i < psy->num_properties; i++) {
 		if (psy->properties[i] == POWER_SUPPLY_PROP_TEMP) {
diff --git a/include/linux/power_supply.h b/include/linux/power_supply.h
index 3ed049673022..096dbced02ac 100644
--- a/include/linux/power_supply.h
+++ b/include/linux/power_supply.h
@@ -200,6 +200,12 @@ struct power_supply {
 	void (*external_power_changed)(struct power_supply *psy);
 	void (*set_charged)(struct power_supply *psy);
 
+	/*
+	 * Set if thermal zone should not be created for this power supply.
+	 * For example for virtual supplies forwarding calls to actual
+	 * sensors or other supplies.
+	 */
+	bool no_thermal;
 	/* For APM emulation, think legacy userspace. */
 	int use_for_apm;
 
-- 
cgit v1.2.3


From bdbe81445407644492b9ac69a24d35e3202d773b Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 13 Oct 2014 15:34:30 +0200
Subject: power: charger-manager: Fix accessing invalidated power supply after
 fuel gauge unbind

The charger manager obtained reference to fuel gauge power supply in probe
with power_supply_get_by_name() for later usage. However if fuel gauge
driver was removed and re-added then this reference would point to old
power supply (from driver which was removed).

This lead to accessing old (and probably invalid) memory which could be
observed with:
$ echo "12-0036" > /sys/bus/i2c/drivers/max17042/unbind
$ echo "12-0036" > /sys/bus/i2c/drivers/max17042/bind
$ cat /sys/devices/virtual/power_supply/battery/capacity
[  240.480084] INFO: task cat:1393 blocked for more than 120 seconds.
[  240.484799]       Not tainted 3.17.0-next-20141007-00028-ge60b6dd79570 #203
[  240.491782] "echo 0 > /proc/sys/kernel/hung_task_timeout_secs" disables this message.
[  240.499589] cat             D c0469530     0  1393      1 0x00000000
[  240.505947] [<c0469530>] (__schedule) from [<c0469d3c>] (schedule_preempt_disabled+0x14/0x20)
[  240.514449] [<c0469d3c>] (schedule_preempt_disabled) from [<c046af08>] (mutex_lock_nested+0x1bc/0x458)
[  240.523736] [<c046af08>] (mutex_lock_nested) from [<c0287a98>] (regmap_read+0x30/0x60)
[  240.531647] [<c0287a98>] (regmap_read) from [<c032238c>] (max17042_get_property+0x2e8/0x350)
[  240.540055] [<c032238c>] (max17042_get_property) from [<c03247d8>] (charger_get_property+0x264/0x348)
[  240.549252] [<c03247d8>] (charger_get_property) from [<c0320764>] (power_supply_show_property+0x48/0x1e0)
[  240.558808] [<c0320764>] (power_supply_show_property) from [<c027308c>] (dev_attr_show+0x1c/0x48)
[  240.567664] [<c027308c>] (dev_attr_show) from [<c0141fb0>] (sysfs_kf_seq_show+0x84/0x104)
[  240.575814] [<c0141fb0>] (sysfs_kf_seq_show) from [<c0140b18>] (kernfs_seq_show+0x24/0x28)
[  240.584061] [<c0140b18>] (kernfs_seq_show) from [<c0104574>] (seq_read+0x1b0/0x484)
[  240.591702] [<c0104574>] (seq_read) from [<c00e1e24>] (vfs_read+0x88/0x144)
[  240.598640] [<c00e1e24>] (vfs_read) from [<c00e1f20>] (SyS_read+0x40/0x8c)
[  240.605507] [<c00e1f20>] (SyS_read) from [<c000e760>] (ret_fast_syscall+0x0/0x48)
[  240.612952] 4 locks held by cat/1393:
[  240.616589]  #0:  (&p->lock){+.+.+.}, at: [<c01043f4>] seq_read+0x30/0x484
[  240.623414]  #1:  (&of->mutex){+.+.+.}, at: [<c01417dc>] kernfs_seq_start+0x1c/0x8c
[  240.631086]  #2:  (s_active#31){++++.+}, at: [<c01417e4>] kernfs_seq_start+0x24/0x8c
[  240.638777]  #3:  (&map->mutex){+.+...}, at: [<c0287a98>] regmap_read+0x30/0x60

The charger-manager should get reference to fuel gauge power supply on
each use of get_property callback. The thermal zone 'tzd' field of
power supply should not be used because of the same reason.

Additionally this change solves also the issue with nested
thermal_zone_get_temp() calls and related false lockdep positive for
deadlock for thermal zone's mutex [1]. When fuel gauge is used as source of
temperature then the charger manager forwards its get_temp calls to fuel
gauge thermal zone. So actually different mutexes are used (one for
charger manager thermal zone and second for fuel gauge thermal zone) but
for lockdep this is one class of mutex.

The recursion is removed by retrieving temperature through power
supply's get_property().

In case external thermal zone is used ('cm-thermal-zone' property is
present in DTS) the recursion does not exist. Charger manager simply
exports POWER_SUPPLY_PROP_TEMP_AMBIENT property (instead of
POWER_SUPPLY_PROP_TEMP) thus no thermal zone is created for this power
supply.

[1] https://lkml.org/lkml/2014/10/6/309

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: <stable@vger.kernel.org>
Fixes: 3bb3dbbd56ea ("power_supply: Add initial Charger-Manager driver")
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/charger-manager.c       | 99 +++++++++++++++++++++++++----------
 include/linux/power/charger-manager.h |  1 -
 2 files changed, 71 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 7a1177ea238d..7ae8cb70204a 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -97,6 +97,7 @@ static struct charger_global_desc *g_desc; /* init with setup_charger_manager */
 static bool is_batt_present(struct charger_manager *cm)
 {
 	union power_supply_propval val;
+	struct power_supply *psy;
 	bool present = false;
 	int i, ret;
 
@@ -107,7 +108,11 @@ static bool is_batt_present(struct charger_manager *cm)
 	case CM_NO_BATTERY:
 		break;
 	case CM_FUEL_GAUGE:
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		psy = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+		if (!psy)
+			break;
+
+		ret = psy->get_property(psy,
 				POWER_SUPPLY_PROP_PRESENT, &val);
 		if (ret == 0 && val.intval)
 			present = true;
@@ -167,12 +172,14 @@ static bool is_ext_pwr_online(struct charger_manager *cm)
 static int get_batt_uV(struct charger_manager *cm, int *uV)
 {
 	union power_supply_propval val;
+	struct power_supply *fuel_gauge;
 	int ret;
 
-	if (!cm->fuel_gauge)
+	fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+	if (!fuel_gauge)
 		return -ENODEV;
 
-	ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+	ret = fuel_gauge->get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_VOLTAGE_NOW, &val);
 	if (ret)
 		return ret;
@@ -248,6 +255,7 @@ static bool is_full_charged(struct charger_manager *cm)
 {
 	struct charger_desc *desc = cm->desc;
 	union power_supply_propval val;
+	struct power_supply *fuel_gauge;
 	int ret = 0;
 	int uV;
 
@@ -255,11 +263,15 @@ static bool is_full_charged(struct charger_manager *cm)
 	if (!is_batt_present(cm))
 		return false;
 
-	if (cm->fuel_gauge && desc->fullbatt_full_capacity > 0) {
+	fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+	if (!fuel_gauge)
+		return false;
+
+	if (desc->fullbatt_full_capacity > 0) {
 		val.intval = 0;
 
 		/* Not full if capacity of fuel gauge isn't full */
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		ret = fuel_gauge->get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CHARGE_FULL, &val);
 		if (!ret && val.intval > desc->fullbatt_full_capacity)
 			return true;
@@ -273,10 +285,10 @@ static bool is_full_charged(struct charger_manager *cm)
 	}
 
 	/* Full, if the capacity is more than fullbatt_soc */
-	if (cm->fuel_gauge && desc->fullbatt_soc > 0) {
+	if (desc->fullbatt_soc > 0) {
 		val.intval = 0;
 
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		ret = fuel_gauge->get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CAPACITY, &val);
 		if (!ret && val.intval >= desc->fullbatt_soc)
 			return true;
@@ -551,6 +563,20 @@ static int check_charging_duration(struct charger_manager *cm)
 	return ret;
 }
 
+static int cm_get_battery_temperature_by_psy(struct charger_manager *cm,
+					int *temp)
+{
+	struct power_supply *fuel_gauge;
+
+	fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+	if (!fuel_gauge)
+		return -ENODEV;
+
+	return fuel_gauge->get_property(fuel_gauge,
+				POWER_SUPPLY_PROP_TEMP,
+				(union power_supply_propval *)temp);
+}
+
 static int cm_get_battery_temperature(struct charger_manager *cm,
 					int *temp)
 {
@@ -560,15 +586,18 @@ static int cm_get_battery_temperature(struct charger_manager *cm,
 		return -ENODEV;
 
 #ifdef CONFIG_THERMAL
-	ret = thermal_zone_get_temp(cm->tzd_batt, (unsigned long *)temp);
-	if (!ret)
-		/* Calibrate temperature unit */
-		*temp /= 100;
-#else
-	ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
-				POWER_SUPPLY_PROP_TEMP,
-				(union power_supply_propval *)temp);
+	if (cm->tzd_batt) {
+		ret = thermal_zone_get_temp(cm->tzd_batt, (unsigned long *)temp);
+		if (!ret)
+			/* Calibrate temperature unit */
+			*temp /= 100;
+	} else
 #endif
+	{
+		/* if-else continued from CONFIG_THERMAL */
+		ret = cm_get_battery_temperature_by_psy(cm, temp);
+	}
+
 	return ret;
 }
 
@@ -827,6 +856,7 @@ static int charger_get_property(struct power_supply *psy,
 	struct charger_manager *cm = container_of(psy,
 			struct charger_manager, charger_psy);
 	struct charger_desc *desc = cm->desc;
+	struct power_supply *fuel_gauge;
 	int ret = 0;
 	int uV;
 
@@ -857,14 +887,20 @@ static int charger_get_property(struct power_supply *psy,
 		ret = get_batt_uV(cm, &val->intval);
 		break;
 	case POWER_SUPPLY_PROP_CURRENT_NOW:
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+		if (!fuel_gauge) {
+			ret = -ENODEV;
+			break;
+		}
+		ret = fuel_gauge->get_property(fuel_gauge,
 				POWER_SUPPLY_PROP_CURRENT_NOW, val);
 		break;
 	case POWER_SUPPLY_PROP_TEMP:
 	case POWER_SUPPLY_PROP_TEMP_AMBIENT:
 		return cm_get_battery_temperature(cm, &val->intval);
 	case POWER_SUPPLY_PROP_CAPACITY:
-		if (!cm->fuel_gauge) {
+		fuel_gauge = power_supply_get_by_name(cm->desc->psy_fuel_gauge);
+		if (!fuel_gauge) {
 			ret = -ENODEV;
 			break;
 		}
@@ -875,7 +911,7 @@ static int charger_get_property(struct power_supply *psy,
 			break;
 		}
 
-		ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+		ret = fuel_gauge->get_property(fuel_gauge,
 					POWER_SUPPLY_PROP_CAPACITY, val);
 		if (ret)
 			break;
@@ -924,7 +960,14 @@ static int charger_get_property(struct power_supply *psy,
 		break;
 	case POWER_SUPPLY_PROP_CHARGE_NOW:
 		if (is_charging(cm)) {
-			ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+			fuel_gauge = power_supply_get_by_name(
+					cm->desc->psy_fuel_gauge);
+			if (!fuel_gauge) {
+				ret = -ENODEV;
+				break;
+			}
+
+			ret = fuel_gauge->get_property(fuel_gauge,
 						POWER_SUPPLY_PROP_CHARGE_NOW,
 						val);
 			if (ret) {
@@ -1486,14 +1529,15 @@ err:
 	return ret;
 }
 
-static int cm_init_thermal_data(struct charger_manager *cm)
+static int cm_init_thermal_data(struct charger_manager *cm,
+		struct power_supply *fuel_gauge)
 {
 	struct charger_desc *desc = cm->desc;
 	union power_supply_propval val;
 	int ret;
 
 	/* Verify whether fuel gauge provides battery temperature */
-	ret = cm->fuel_gauge->get_property(cm->fuel_gauge,
+	ret = fuel_gauge->get_property(fuel_gauge,
 					POWER_SUPPLY_PROP_TEMP, &val);
 
 	if (!ret) {
@@ -1503,8 +1547,6 @@ static int cm_init_thermal_data(struct charger_manager *cm)
 		cm->desc->measure_battery_temp = true;
 	}
 #ifdef CONFIG_THERMAL
-	cm->tzd_batt = cm->fuel_gauge->tzd;
-
 	if (ret && desc->thermal_zone) {
 		cm->tzd_batt =
 			thermal_zone_get_zone_by_name(desc->thermal_zone);
@@ -1667,6 +1709,7 @@ static int charger_manager_probe(struct platform_device *pdev)
 	int ret = 0, i = 0;
 	int j = 0;
 	union power_supply_propval val;
+	struct power_supply *fuel_gauge;
 
 	if (g_desc && !rtc_dev && g_desc->rtc_name) {
 		rtc_dev = rtc_class_open(g_desc->rtc_name);
@@ -1745,8 +1788,8 @@ static int charger_manager_probe(struct platform_device *pdev)
 		}
 	}
 
-	cm->fuel_gauge = power_supply_get_by_name(desc->psy_fuel_gauge);
-	if (!cm->fuel_gauge) {
+	fuel_gauge = power_supply_get_by_name(desc->psy_fuel_gauge);
+	if (!fuel_gauge) {
 		dev_err(&pdev->dev, "Cannot find power supply \"%s\"\n",
 			desc->psy_fuel_gauge);
 		return -ENODEV;
@@ -1789,13 +1832,13 @@ static int charger_manager_probe(struct platform_device *pdev)
 	cm->charger_psy.num_properties = psy_default.num_properties;
 
 	/* Find which optional psy-properties are available */
-	if (!cm->fuel_gauge->get_property(cm->fuel_gauge,
+	if (!fuel_gauge->get_property(fuel_gauge,
 					  POWER_SUPPLY_PROP_CHARGE_NOW, &val)) {
 		cm->charger_psy.properties[cm->charger_psy.num_properties] =
 				POWER_SUPPLY_PROP_CHARGE_NOW;
 		cm->charger_psy.num_properties++;
 	}
-	if (!cm->fuel_gauge->get_property(cm->fuel_gauge,
+	if (!fuel_gauge->get_property(fuel_gauge,
 					  POWER_SUPPLY_PROP_CURRENT_NOW,
 					  &val)) {
 		cm->charger_psy.properties[cm->charger_psy.num_properties] =
@@ -1803,7 +1846,7 @@ static int charger_manager_probe(struct platform_device *pdev)
 		cm->charger_psy.num_properties++;
 	}
 
-	ret = cm_init_thermal_data(cm);
+	ret = cm_init_thermal_data(cm, fuel_gauge);
 	if (ret) {
 		dev_err(&pdev->dev, "Failed to initialize thermal data\n");
 		cm->desc->measure_battery_temp = false;
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 07e7945a1ff2..5d90d329e0ba 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -253,7 +253,6 @@ struct charger_manager {
 	struct device *dev;
 	struct charger_desc *desc;
 
-	struct power_supply *fuel_gauge;
 	struct power_supply **charger_stat;
 
 #ifdef CONFIG_THERMAL
-- 
cgit v1.2.3


From cdaf3e15385d3232b52287e50692506f8fd01a09 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Mon, 13 Oct 2014 15:34:31 +0200
Subject: power: charger-manager: Fix accessing invalidated power supply after
 charger unbind

The charger manager obtained in probe references to power supplies for
all chargers with power_supply_get_by_name() for later usage. However
if such charger driver was removed then this reference would point to
old power supply (from driver which was removed).

This lead to accessing invalid memory which could be observed with:
$ echo "max77693-charger" > /sys/bus/platform/drivers/max77693-charger/unbind
$ grep . /sys/devices/virtual/power_supply/battery/charger.0/*
$ grep . /sys/devices/virtual/power_supply/battery/*
[   15.339817] Unable to handle kernel paging request at virtual address 0001c12c
[   15.346187] pgd = edd08000
[   15.348814] [0001c12c] *pgd=6dce2831, *pte=00000000, *ppte=00000000
[   15.355075] Internal error: Oops: 80000007 [#1] PREEMPT SMP ARM
[   15.360967] Modules linked in:
[   15.364010] CPU: 2 PID: 1388 Comm: grep Not tainted 3.17.0-next-20141007-00027-ga95e761db1b0 #245
[   15.372859] task: ee03ad00 ti: edcf6000 task.ti: edcf6000
[   15.378241] PC is at 0x1c12c
[   15.381113] LR is at is_ext_pwr_online+0x30/0x6c
[   15.385706] pc : [<0001c12c>]    lr : [<c0339fc4>]    psr: a0000013
[   15.385706] sp : edcf7e88  ip : 00000000  fp : 00000000
[   15.397161] r10: eeb02c08  r9 : c04b1f84  r8 : eeb02c00
[   15.402369] r7 : edc69a10  r6 : eea6ac10  r5 : eea6ac10  r4 : 00000004
[   15.408878] r3 : 0001c12c  r2 : edcf7e8c  r1 : 00000004  r0 : ee914418
[   15.415390] Flags: NzCv  IRQs on  FIQs on  Mode SVC_32  ISA ARM  Segment user
[   15.422506] Control: 10c5387d  Table: 6dd0804a  DAC: 00000015
[   15.428236] Process grep (pid: 1388, stack limit = 0xedcf6240)
[   15.434050] Stack: (0xedcf7e88 to 0xedcf8000)
[   15.438395] 7e80:                   ee03ad00 00000000 edcf7f80 eea6aca8 edcf7ec4 c033b7b0
[   15.446554] 7ea0: 00000001 ee1cc3f0 00000004 c06e1e44 eebdc000 c06e1e44 eeb02c00 c0337144
[   15.454713] 7ec0: ee2dac68 c005cffc ee1cc3c0 c06e1e44 00000fff 00001000 eebdc000 c0278ca8
[   15.462872] 7ee0: c0278c8c ee1cc3c0 eeb7ce00 c014422c edcf7f20 00008000 ee1cc3c0 ee9a48c0
[   15.471030] 7f00: 00000001 00000001 edcf7f80 c0142d94 c0142d70 c01060f4 00021000 ee1cc3f0
[   15.479190] 7f20: 00000000 00000000 c06a2150 eebdc000 2e7ec000 ee9a48c0 00008000 00021000
[   15.487349] 7f40: edcf7f80 00008000 edcf6000 00021000 00021000 c00e39a4 00000000 ee9a48c0
[   15.495508] 7f60: 00004000 00000000 00000000 ee9a48c0 ee9a48c0 00008000 00021000 c00e3aa0
[   15.503668] 7f80: 00000000 00000000 0001f2e0 0001f2e0 00021000 00001000 00000003 c000f364
[   15.511826] 7fa0: 00000000 c000f1a0 0001f2e0 00021000 00000003 00021000 00008000 00000000
[   15.519986] 7fc0: 0001f2e0 00021000 00001000 00000003 00000001 000205e8 00000000 00021000
[   15.528145] 7fe0: 00008000 bebbe910 0000a7ad b6edc49c 60000010 00000003 aaaaaaaa aaaaaaaa
[   15.536320] [<c0339fc4>] (is_ext_pwr_online) from [<c033b7b0>] (charger_get_property+0x170/0x314)
[   15.545164] [<c033b7b0>] (charger_get_property) from [<c0337144>] (power_supply_show_property+0x48/0x20c)
[   15.554719] [<c0337144>] (power_supply_show_property) from [<c0278ca8>] (dev_attr_show+0x1c/0x48)
[   15.563577] [<c0278ca8>] (dev_attr_show) from [<c014422c>] (sysfs_kf_seq_show+0x84/0x104)
[   15.571725] [<c014422c>] (sysfs_kf_seq_show) from [<c0142d94>] (kernfs_seq_show+0x24/0x28)
[   15.579973] [<c0142d94>] (kernfs_seq_show) from [<c01060f4>] (seq_read+0x1b0/0x484)
[   15.587614] [<c01060f4>] (seq_read) from [<c00e39a4>] (vfs_read+0x88/0x144)
[   15.594552] [<c00e39a4>] (vfs_read) from [<c00e3aa0>] (SyS_read+0x40/0x8c)
[   15.601417] [<c00e3aa0>] (SyS_read) from [<c000f1a0>] (ret_fast_syscall+0x0/0x48)
[   15.608877] Code: bad PC value
[   15.611991] ---[ end trace a88fcc95208db283 ]---

The charger-manager should get reference to charger power supply on
each use of get_property callback.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Cc: <stable@vger.kernel.org>
Fixes: 3bb3dbbd56ea ("power_supply: Add initial Charger-Manager driver")
Signed-off-by: Sebastian Reichel <sre@kernel.org>
---
 drivers/power/charger-manager.c       | 64 +++++++++++++++++++++--------------
 include/linux/power/charger-manager.h |  2 --
 2 files changed, 39 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/power/charger-manager.c b/drivers/power/charger-manager.c
index 7ae8cb70204a..ef8094a61f1e 100644
--- a/drivers/power/charger-manager.c
+++ b/drivers/power/charger-manager.c
@@ -118,10 +118,17 @@ static bool is_batt_present(struct charger_manager *cm)
 			present = true;
 		break;
 	case CM_CHARGER_STAT:
-		for (i = 0; cm->charger_stat[i]; i++) {
-			ret = cm->charger_stat[i]->get_property(
-					cm->charger_stat[i],
-					POWER_SUPPLY_PROP_PRESENT, &val);
+		for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
+			psy = power_supply_get_by_name(
+					cm->desc->psy_charger_stat[i]);
+			if (!psy) {
+				dev_err(cm->dev, "Cannot find power supply \"%s\"\n",
+					cm->desc->psy_charger_stat[i]);
+				continue;
+			}
+
+			ret = psy->get_property(psy, POWER_SUPPLY_PROP_PRESENT,
+					&val);
 			if (ret == 0 && val.intval) {
 				present = true;
 				break;
@@ -144,14 +151,20 @@ static bool is_batt_present(struct charger_manager *cm)
 static bool is_ext_pwr_online(struct charger_manager *cm)
 {
 	union power_supply_propval val;
+	struct power_supply *psy;
 	bool online = false;
 	int i, ret;
 
 	/* If at least one of them has one, it's yes. */
-	for (i = 0; cm->charger_stat[i]; i++) {
-		ret = cm->charger_stat[i]->get_property(
-				cm->charger_stat[i],
-				POWER_SUPPLY_PROP_ONLINE, &val);
+	for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
+		psy = power_supply_get_by_name(cm->desc->psy_charger_stat[i]);
+		if (!psy) {
+			dev_err(cm->dev, "Cannot find power supply \"%s\"\n",
+					cm->desc->psy_charger_stat[i]);
+			continue;
+		}
+
+		ret = psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &val);
 		if (ret == 0 && val.intval) {
 			online = true;
 			break;
@@ -196,6 +209,7 @@ static bool is_charging(struct charger_manager *cm)
 {
 	int i, ret;
 	bool charging = false;
+	struct power_supply *psy;
 	union power_supply_propval val;
 
 	/* If there is no battery, it cannot be charged */
@@ -203,17 +217,22 @@ static bool is_charging(struct charger_manager *cm)
 		return false;
 
 	/* If at least one of the charger is charging, return yes */
-	for (i = 0; cm->charger_stat[i]; i++) {
+	for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
 		/* 1. The charger sholuld not be DISABLED */
 		if (cm->emergency_stop)
 			continue;
 		if (!cm->charger_enabled)
 			continue;
 
+		psy = power_supply_get_by_name(cm->desc->psy_charger_stat[i]);
+		if (!psy) {
+			dev_err(cm->dev, "Cannot find power supply \"%s\"\n",
+					cm->desc->psy_charger_stat[i]);
+			continue;
+		}
+
 		/* 2. The charger should be online (ext-power) */
-		ret = cm->charger_stat[i]->get_property(
-				cm->charger_stat[i],
-				POWER_SUPPLY_PROP_ONLINE, &val);
+		ret = psy->get_property(psy, POWER_SUPPLY_PROP_ONLINE, &val);
 		if (ret) {
 			dev_warn(cm->dev, "Cannot read ONLINE value from %s\n",
 				 cm->desc->psy_charger_stat[i]);
@@ -226,9 +245,7 @@ static bool is_charging(struct charger_manager *cm)
 		 * 3. The charger should not be FULL, DISCHARGING,
 		 * or NOT_CHARGING.
 		 */
-		ret = cm->charger_stat[i]->get_property(
-				cm->charger_stat[i],
-				POWER_SUPPLY_PROP_STATUS, &val);
+		ret = psy->get_property(psy, POWER_SUPPLY_PROP_STATUS, &val);
 		if (ret) {
 			dev_warn(cm->dev, "Cannot read STATUS value from %s\n",
 				 cm->desc->psy_charger_stat[i]);
@@ -1773,15 +1790,12 @@ static int charger_manager_probe(struct platform_device *pdev)
 	while (desc->psy_charger_stat[i])
 		i++;
 
-	cm->charger_stat = devm_kzalloc(&pdev->dev,
-				sizeof(struct power_supply *) * i, GFP_KERNEL);
-	if (!cm->charger_stat)
-		return -ENOMEM;
-
+	/* Check if charger's supplies are present at probe */
 	for (i = 0; desc->psy_charger_stat[i]; i++) {
-		cm->charger_stat[i] = power_supply_get_by_name(
-					desc->psy_charger_stat[i]);
-		if (!cm->charger_stat[i]) {
+		struct power_supply *psy;
+
+		psy = power_supply_get_by_name(desc->psy_charger_stat[i]);
+		if (!psy) {
 			dev_err(&pdev->dev, "Cannot find power supply \"%s\"\n",
 				desc->psy_charger_stat[i]);
 			return -ENODEV;
@@ -2110,8 +2124,8 @@ static bool find_power_supply(struct charger_manager *cm,
 	int i;
 	bool found = false;
 
-	for (i = 0; cm->charger_stat[i]; i++) {
-		if (psy == cm->charger_stat[i]) {
+	for (i = 0; cm->desc->psy_charger_stat[i]; i++) {
+		if (!strcmp(psy->name, cm->desc->psy_charger_stat[i])) {
 			found = true;
 			break;
 		}
diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h
index 5d90d329e0ba..e97fc656a058 100644
--- a/include/linux/power/charger-manager.h
+++ b/include/linux/power/charger-manager.h
@@ -253,8 +253,6 @@ struct charger_manager {
 	struct device *dev;
 	struct charger_desc *desc;
 
-	struct power_supply **charger_stat;
-
 #ifdef CONFIG_THERMAL
 	struct thermal_zone_device *tzd_batt;
 #endif
-- 
cgit v1.2.3


From e999dbc254044e8d2a5818d92d205f65bae28f37 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 19 Oct 2014 17:13:57 +0200
Subject: Revert "block: all blk-mq requests are tagged"

This reverts commit fb3ccb5da71273e7f0d50b50bc879e50cedd60e7.

SCSI-2/SPI actually needs the tagged/untagged flag in the request to
work properly.  Revert this patch and add a follow on to set it in
the right place.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Reported-by: Meelis Roos <mroos@linux.ee>
Tested-by: Meelis Roos <mroos@linux.ee>
Cc: stable@vger.kernel.org
---
 include/linux/blkdev.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0207a78a8d82..51d0dc2259cf 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1136,8 +1136,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 /*
  * tag stuff
  */
-#define blk_rq_tagged(rq) \
-	((rq)->mq_ctx || ((rq)->cmd_flags & REQ_QUEUED))
+#define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
-- 
cgit v1.2.3


From 7f51412a415d87ea8598d14722fb31e4f5701257 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@arm.com>
Date: Fri, 19 Sep 2014 10:22:40 +0100
Subject: sched/deadline: Fix bandwidth check/update when migrating tasks
 between exclusive cpusets

Exclusive cpusets are the only way users can restrict SCHED_DEADLINE tasks
affinity (performing what is commonly called clustered scheduling).
Unfortunately, such thing is currently broken for two reasons:

 - No check is performed when the user tries to attach a task to
   an exlusive cpuset (recall that exclusive cpusets have an
   associated maximum allowed bandwidth).

 - Bandwidths of source and destination cpusets are not correctly
   updated after a task is migrated between them.

This patch fixes both things at once, as they are opposite faces
of the same coin.

The check is performed in cpuset_can_attach(), as there aren't any
points of failure after that function. The updated is split in two
halves. We first reserve bandwidth in the destination cpuset, after
we pass the check in cpuset_can_attach(). And we then release
bandwidth from the source cpuset when the task's affinity is
actually changed. Even if there can be time windows when sched_setattr()
may erroneously fail in the source cpuset, we are fine with it, as
we can't perfom an atomic update of both cpusets at once.

Reported-by: Daniel Wagner <daniel.wagner@bmw-carit.de>
Reported-by: Vincent Legout <vincent@legout.info>
Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Dario Faggioli <raistlin@linux.it>
Cc: Michael Trimarchi <michael@amarulasolutions.com>
Cc: Fabio Checconi <fchecconi@gmail.com>
Cc: michael@amarulasolutions.com
Cc: luca.abeni@unitn.it
Cc: Li Zefan <lizefan@huawei.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: cgroups@vger.kernel.org
Link: http://lkml.kernel.org/r/1411118561-26323-3-git-send-email-juri.lelli@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h   |  2 ++
 kernel/cpuset.c         | 13 ++-------
 kernel/sched/core.c     | 70 +++++++++++++++++++++++++++++++++++--------------
 kernel/sched/deadline.c | 25 ++++++++++++++++--
 kernel/sched/sched.h    | 19 ++++++++++++++
 5 files changed, 97 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e344bbe63ec..1d1fa081d44f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2052,6 +2052,8 @@ static inline void tsk_restore_flags(struct task_struct *task,
 	task->flags |= orig_flags & flags;
 }
 
+extern int task_can_attach(struct task_struct *p,
+			   const struct cpumask *cs_cpus_allowed);
 #ifdef CONFIG_SMP
 extern void do_set_cpus_allowed(struct task_struct *p,
 			       const struct cpumask *new_mask);
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 1f107c74087b..7af8577fc8f8 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -1429,17 +1429,8 @@ static int cpuset_can_attach(struct cgroup_subsys_state *css,
 		goto out_unlock;
 
 	cgroup_taskset_for_each(task, tset) {
-		/*
-		 * Kthreads which disallow setaffinity shouldn't be moved
-		 * to a new cpuset; we don't want to change their cpu
-		 * affinity and isolating such threads by their set of
-		 * allowed nodes is unnecessary.  Thus, cpusets are not
-		 * applicable for such threads.  This prevents checking for
-		 * success of set_cpus_allowed_ptr() on all attached tasks
-		 * before cpus_allowed may be changed.
-		 */
-		ret = -EINVAL;
-		if (task->flags & PF_NO_SETAFFINITY)
+		ret = task_can_attach(task, cs->cpus_allowed);
+		if (ret)
 			goto out_unlock;
 		ret = security_task_setscheduler(task);
 		if (ret)
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5c067fd66db9..9993feeb8b10 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2034,25 +2034,6 @@ static inline int dl_bw_cpus(int i)
 }
 #endif
 
-static inline
-void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
-{
-	dl_b->total_bw -= tsk_bw;
-}
-
-static inline
-void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
-{
-	dl_b->total_bw += tsk_bw;
-}
-
-static inline
-bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
-{
-	return dl_b->bw != -1 &&
-	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
-}
-
 /*
  * We must be sure that accepting a new task (or allowing changing the
  * parameters of an existing one) is consistent with the bandwidth
@@ -4669,6 +4650,57 @@ void init_idle(struct task_struct *idle, int cpu)
 #endif
 }
 
+int task_can_attach(struct task_struct *p,
+		    const struct cpumask *cs_cpus_allowed)
+{
+	int ret = 0;
+
+	/*
+	 * Kthreads which disallow setaffinity shouldn't be moved
+	 * to a new cpuset; we don't want to change their cpu
+	 * affinity and isolating such threads by their set of
+	 * allowed nodes is unnecessary.  Thus, cpusets are not
+	 * applicable for such threads.  This prevents checking for
+	 * success of set_cpus_allowed_ptr() on all attached tasks
+	 * before cpus_allowed may be changed.
+	 */
+	if (p->flags & PF_NO_SETAFFINITY) {
+		ret = -EINVAL;
+		goto out;
+	}
+
+#ifdef CONFIG_SMP
+	if (dl_task(p) && !cpumask_intersects(task_rq(p)->rd->span,
+					      cs_cpus_allowed)) {
+		unsigned int dest_cpu = cpumask_any_and(cpu_active_mask,
+							cs_cpus_allowed);
+		struct dl_bw *dl_b = dl_bw_of(dest_cpu);
+		bool overflow;
+		int cpus;
+		unsigned long flags;
+
+		raw_spin_lock_irqsave(&dl_b->lock, flags);
+		cpus = dl_bw_cpus(dest_cpu);
+		overflow = __dl_overflow(dl_b, cpus, 0, p->dl.dl_bw);
+		if (overflow)
+			ret = -EBUSY;
+		else {
+			/*
+			 * We reserve space for this task in the destination
+			 * root_domain, as we can't fail after this point.
+			 * We will free resources in the source root_domain
+			 * later on (see set_cpus_allowed_dl()).
+			 */
+			__dl_add(dl_b, p->dl.dl_bw);
+		}
+		raw_spin_unlock_irqrestore(&dl_b->lock, flags);
+
+	}
+#endif
+out:
+	return ret;
+}
+
 #ifdef CONFIG_SMP
 /*
  * move_queued_task - move a queued task to new rq.
diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c
index 9d1e76a21297..8aaa971ffecd 100644
--- a/kernel/sched/deadline.c
+++ b/kernel/sched/deadline.c
@@ -1517,10 +1517,33 @@ static void set_cpus_allowed_dl(struct task_struct *p,
 				const struct cpumask *new_mask)
 {
 	struct rq *rq;
+	struct root_domain *src_rd;
 	int weight;
 
 	BUG_ON(!dl_task(p));
 
+	rq = task_rq(p);
+	src_rd = rq->rd;
+	/*
+	 * Migrating a SCHED_DEADLINE task between exclusive
+	 * cpusets (different root_domains) entails a bandwidth
+	 * update. We already made space for us in the destination
+	 * domain (see cpuset_can_attach()).
+	 */
+	if (!cpumask_intersects(src_rd->span, new_mask)) {
+		struct dl_bw *src_dl_b;
+
+		src_dl_b = dl_bw_of(cpu_of(rq));
+		/*
+		 * We now free resources of the root_domain we are migrating
+		 * off. In the worst case, sched_setattr() may temporary fail
+		 * until we complete the update.
+		 */
+		raw_spin_lock(&src_dl_b->lock);
+		__dl_clear(src_dl_b, p->dl.dl_bw);
+		raw_spin_unlock(&src_dl_b->lock);
+	}
+
 	/*
 	 * Update only if the task is actually running (i.e.,
 	 * it is on the rq AND it is not throttled).
@@ -1537,8 +1560,6 @@ static void set_cpus_allowed_dl(struct task_struct *p,
 	if ((p->nr_cpus_allowed > 1) == (weight > 1))
 		return;
 
-	rq = task_rq(p);
-
 	/*
 	 * The process used to be able to migrate OR it can now migrate
 	 */
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 57aacea1cbdf..ec3917c5f898 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -176,6 +176,25 @@ struct dl_bw {
 	u64 bw, total_bw;
 };
 
+static inline
+void __dl_clear(struct dl_bw *dl_b, u64 tsk_bw)
+{
+	dl_b->total_bw -= tsk_bw;
+}
+
+static inline
+void __dl_add(struct dl_bw *dl_b, u64 tsk_bw)
+{
+	dl_b->total_bw += tsk_bw;
+}
+
+static inline
+bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw)
+{
+	return dl_b->bw != -1 &&
+	       dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw;
+}
+
 extern struct mutex sched_domains_mutex;
 
 #ifdef CONFIG_CGROUP_SCHED
-- 
cgit v1.2.3


From f82f80426f7afcf55953924e71555984a4bd6ce6 Mon Sep 17 00:00:00 2001
From: Juri Lelli <juri.lelli@arm.com>
Date: Tue, 7 Oct 2014 09:52:11 +0100
Subject: sched/deadline: Ensure that updates to exclusive cpusets don't break
 AC

How we deal with updates to exclusive cpusets is currently broken.
As an example, suppose we have an exclusive cpuset composed of
two cpus: A[cpu0,cpu1]. We can assign SCHED_DEADLINE task to it
up to the allowed bandwidth. If we want now to modify cpusetA's
cpumask, we have to check that removing a cpu's amount of
bandwidth doesn't break AC guarantees. This thing isn't checked
in the current code.

This patch fixes the problem above, denying an update if the
new cpumask won't have enough bandwidth for SCHED_DEADLINE tasks
that are currently active.

Signed-off-by: Juri Lelli <juri.lelli@arm.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Li Zefan <lizefan@huawei.com>
Cc: cgroups@vger.kernel.org
Link: http://lkml.kernel.org/r/5433E6AF.5080105@arm.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  2 ++
 kernel/cpuset.c       | 10 ++++++++++
 kernel/sched/core.c   | 19 +++++++++++++++++++
 3 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 1d1fa081d44f..320a9779f1b4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2052,6 +2052,8 @@ static inline void tsk_restore_flags(struct task_struct *task,
 	task->flags |= orig_flags & flags;
 }
 
+extern int cpuset_cpumask_can_shrink(const struct cpumask *cur,
+				     const struct cpumask *trial);
 extern int task_can_attach(struct task_struct *p,
 			   const struct cpumask *cs_cpus_allowed);
 #ifdef CONFIG_SMP
diff --git a/kernel/cpuset.c b/kernel/cpuset.c
index 7af8577fc8f8..723cfc9d0ad7 100644
--- a/kernel/cpuset.c
+++ b/kernel/cpuset.c
@@ -506,6 +506,16 @@ static int validate_change(struct cpuset *cur, struct cpuset *trial)
 			goto out;
 	}
 
+	/*
+	 * We can't shrink if we won't have enough room for SCHED_DEADLINE
+	 * tasks.
+	 */
+	ret = -EBUSY;
+	if (is_cpu_exclusive(cur) &&
+	    !cpuset_cpumask_can_shrink(cur->cpus_allowed,
+				       trial->cpus_allowed))
+		goto out;
+
 	ret = 0;
 out:
 	rcu_read_unlock();
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 9993feeb8b10..0456a55fc27f 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4650,6 +4650,25 @@ void init_idle(struct task_struct *idle, int cpu)
 #endif
 }
 
+int cpuset_cpumask_can_shrink(const struct cpumask *cur,
+			      const struct cpumask *trial)
+{
+	int ret = 1, trial_cpus;
+	struct dl_bw *cur_dl_b;
+	unsigned long flags;
+
+	cur_dl_b = dl_bw_of(cpumask_any(cur));
+	trial_cpus = cpumask_weight(trial);
+
+	raw_spin_lock_irqsave(&cur_dl_b->lock, flags);
+	if (cur_dl_b->bw != -1 &&
+	    cur_dl_b->bw * trial_cpus < cur_dl_b->total_bw)
+		ret = 0;
+	raw_spin_unlock_irqrestore(&cur_dl_b->lock, flags);
+
+	return ret;
+}
+
 int task_can_attach(struct task_struct *p,
 		    const struct cpumask *cs_cpus_allowed)
 {
-- 
cgit v1.2.3


From 61ada528dea028331e99e8ceaed87c683ad25de2 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Sep 2014 10:18:47 +0200
Subject: sched/wait: Provide infrastructure to deal with nested blocking

There are a few places that call blocking primitives from wait loops,
provide infrastructure to support this without the typical
task_struct::state collision.

We record the wakeup in wait_queue_t::flags which leaves
task_struct::state free to be used by others.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Oleg Nesterov <oleg@redhat.com>
Cc: tglx@linutronix.de
Cc: ilya.dryomov@inktank.com
Cc: umgwanakikbuti@gmail.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140924082242.051202318@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h |  7 +++++-
 kernel/sched/wait.c  | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 67 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index e4a8eb9312ea..fc0e99395fbb 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -13,9 +13,12 @@ typedef struct __wait_queue wait_queue_t;
 typedef int (*wait_queue_func_t)(wait_queue_t *wait, unsigned mode, int flags, void *key);
 int default_wake_function(wait_queue_t *wait, unsigned mode, int flags, void *key);
 
+/* __wait_queue::flags */
+#define WQ_FLAG_EXCLUSIVE	0x01
+#define WQ_FLAG_WOKEN		0x02
+
 struct __wait_queue {
 	unsigned int		flags;
-#define WQ_FLAG_EXCLUSIVE	0x01
 	void			*private;
 	wait_queue_func_t	func;
 	struct list_head	task_list;
@@ -830,6 +833,8 @@ void prepare_to_wait_exclusive(wait_queue_head_t *q, wait_queue_t *wait, int sta
 long prepare_to_wait_event(wait_queue_head_t *q, wait_queue_t *wait, int state);
 void finish_wait(wait_queue_head_t *q, wait_queue_t *wait);
 void abort_exclusive_wait(wait_queue_head_t *q, wait_queue_t *wait, unsigned int mode, void *key);
+long wait_woken(wait_queue_t *wait, unsigned mode, long timeout);
+int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *key);
 
diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c
index 5a62915f47a8..4dae1885db6f 100644
--- a/kernel/sched/wait.c
+++ b/kernel/sched/wait.c
@@ -297,6 +297,67 @@ int autoremove_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *
 }
 EXPORT_SYMBOL(autoremove_wake_function);
 
+
+/*
+ * DEFINE_WAIT_FUNC(wait, woken_wake_func);
+ *
+ * add_wait_queue(&wq, &wait);
+ * for (;;) {
+ *     if (condition)
+ *         break;
+ *
+ *     p->state = mode;				condition = true;
+ *     smp_mb(); // A				smp_wmb(); // C
+ *     if (!wait->flags & WQ_FLAG_WOKEN)	wait->flags |= WQ_FLAG_WOKEN;
+ *         schedule()				try_to_wake_up();
+ *     p->state = TASK_RUNNING;		    ~~~~~~~~~~~~~~~~~~
+ *     wait->flags &= ~WQ_FLAG_WOKEN;		condition = true;
+ *     smp_mb() // B				smp_wmb(); // C
+ *						wait->flags |= WQ_FLAG_WOKEN;
+ * }
+ * remove_wait_queue(&wq, &wait);
+ *
+ */
+long wait_woken(wait_queue_t *wait, unsigned mode, long timeout)
+{
+	set_current_state(mode); /* A */
+	/*
+	 * The above implies an smp_mb(), which matches with the smp_wmb() from
+	 * woken_wake_function() such that if we observe WQ_FLAG_WOKEN we must
+	 * also observe all state before the wakeup.
+	 */
+	if (!(wait->flags & WQ_FLAG_WOKEN))
+		timeout = schedule_timeout(timeout);
+	__set_current_state(TASK_RUNNING);
+
+	/*
+	 * The below implies an smp_mb(), it too pairs with the smp_wmb() from
+	 * woken_wake_function() such that we must either observe the wait
+	 * condition being true _OR_ WQ_FLAG_WOKEN such that we will not miss
+	 * an event.
+	 */
+	set_mb(wait->flags, wait->flags & ~WQ_FLAG_WOKEN); /* B */
+
+	return timeout;
+}
+EXPORT_SYMBOL(wait_woken);
+
+int woken_wake_function(wait_queue_t *wait, unsigned mode, int sync, void *key)
+{
+	/*
+	 * Although this function is called under waitqueue lock, LOCK
+	 * doesn't imply write barrier and the users expects write
+	 * barrier semantics on wakeup functions.  The following
+	 * smp_wmb() is equivalent to smp_wmb() in try_to_wake_up()
+	 * and is paired with set_mb() in wait_woken().
+	 */
+	smp_wmb(); /* C */
+	wait->flags |= WQ_FLAG_WOKEN;
+
+	return default_wake_function(wait, mode, sync, key);
+}
+EXPORT_SYMBOL(woken_wake_function);
+
 int wake_bit_function(wait_queue_t *wait, unsigned mode, int sync, void *arg)
 {
 	struct wait_bit_key *key = arg;
-- 
cgit v1.2.3


From e22b886a8a43b147e1994a9f970f678fc0df2033 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Sep 2014 10:18:48 +0200
Subject: sched/wait: Add might_sleep() checks

Add more might_sleep() checks, suppose someone put a wait_event() like
thing in a wait loop..

Can't put might_sleep() in ___wait_event() because there's the locked
primitives which call ___wait_event() with locks held.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: tglx@linutronix.de
Cc: ilya.dryomov@inktank.com
Cc: umgwanakikbuti@gmail.com
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140924082242.119255706@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/wait.h | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index fc0e99395fbb..0421775e0b9f 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -261,6 +261,7 @@ __out:	__ret;								\
  */
 #define wait_event(wq, condition)					\
 do {									\
+	might_sleep();							\
 	if (condition)							\
 		break;							\
 	__wait_event(wq, condition);					\
@@ -293,6 +294,7 @@ do {									\
 #define wait_event_timeout(wq, condition, timeout)			\
 ({									\
 	long __ret = timeout;						\
+	might_sleep();							\
 	if (!___wait_cond_timeout(condition))				\
 		__ret = __wait_event_timeout(wq, condition, timeout);	\
 	__ret;								\
@@ -318,6 +320,7 @@ do {									\
  */
 #define wait_event_cmd(wq, condition, cmd1, cmd2)			\
 do {									\
+	might_sleep();							\
 	if (condition)							\
 		break;							\
 	__wait_event_cmd(wq, condition, cmd1, cmd2);			\
@@ -345,6 +348,7 @@ do {									\
 #define wait_event_interruptible(wq, condition)				\
 ({									\
 	int __ret = 0;							\
+	might_sleep();							\
 	if (!(condition))						\
 		__ret = __wait_event_interruptible(wq, condition);	\
 	__ret;								\
@@ -378,6 +382,7 @@ do {									\
 #define wait_event_interruptible_timeout(wq, condition, timeout)	\
 ({									\
 	long __ret = timeout;						\
+	might_sleep();							\
 	if (!___wait_cond_timeout(condition))				\
 		__ret = __wait_event_interruptible_timeout(wq,		\
 						condition, timeout);	\
@@ -428,6 +433,7 @@ do {									\
 #define wait_event_hrtimeout(wq, condition, timeout)			\
 ({									\
 	int __ret = 0;							\
+	might_sleep();							\
 	if (!(condition))						\
 		__ret = __wait_event_hrtimeout(wq, condition, timeout,	\
 					       TASK_UNINTERRUPTIBLE);	\
@@ -453,6 +459,7 @@ do {									\
 #define wait_event_interruptible_hrtimeout(wq, condition, timeout)	\
 ({									\
 	long __ret = 0;							\
+	might_sleep();							\
 	if (!(condition))						\
 		__ret = __wait_event_hrtimeout(wq, condition, timeout,	\
 					       TASK_INTERRUPTIBLE);	\
@@ -466,6 +473,7 @@ do {									\
 #define wait_event_interruptible_exclusive(wq, condition)		\
 ({									\
 	int __ret = 0;							\
+	might_sleep();							\
 	if (!(condition))						\
 		__ret = __wait_event_interruptible_exclusive(wq, condition);\
 	__ret;								\
@@ -640,6 +648,7 @@ do {									\
 #define wait_event_killable(wq, condition)				\
 ({									\
 	int __ret = 0;							\
+	might_sleep();							\
 	if (!(condition))						\
 		__ret = __wait_event_killable(wq, condition);		\
 	__ret;								\
@@ -891,6 +900,7 @@ extern int bit_wait_io_timeout(struct wait_bit_key *);
 static inline int
 wait_on_bit(void *word, int bit, unsigned mode)
 {
+	might_sleep();
 	if (!test_bit(bit, word))
 		return 0;
 	return out_of_line_wait_on_bit(word, bit,
@@ -915,6 +925,7 @@ wait_on_bit(void *word, int bit, unsigned mode)
 static inline int
 wait_on_bit_io(void *word, int bit, unsigned mode)
 {
+	might_sleep();
 	if (!test_bit(bit, word))
 		return 0;
 	return out_of_line_wait_on_bit(word, bit,
@@ -941,6 +952,7 @@ wait_on_bit_io(void *word, int bit, unsigned mode)
 static inline int
 wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
 {
+	might_sleep();
 	if (!test_bit(bit, word))
 		return 0;
 	return out_of_line_wait_on_bit(word, bit, action, mode);
@@ -968,6 +980,7 @@ wait_on_bit_action(void *word, int bit, wait_bit_action_f *action, unsigned mode
 static inline int
 wait_on_bit_lock(void *word, int bit, unsigned mode)
 {
+	might_sleep();
 	if (!test_and_set_bit(bit, word))
 		return 0;
 	return out_of_line_wait_on_bit_lock(word, bit, bit_wait, mode);
@@ -991,6 +1004,7 @@ wait_on_bit_lock(void *word, int bit, unsigned mode)
 static inline int
 wait_on_bit_lock_io(void *word, int bit, unsigned mode)
 {
+	might_sleep();
 	if (!test_and_set_bit(bit, word))
 		return 0;
 	return out_of_line_wait_on_bit_lock(word, bit, bit_wait_io, mode);
@@ -1016,6 +1030,7 @@ wait_on_bit_lock_io(void *word, int bit, unsigned mode)
 static inline int
 wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned mode)
 {
+	might_sleep();
 	if (!test_and_set_bit(bit, word))
 		return 0;
 	return out_of_line_wait_on_bit_lock(word, bit, action, mode);
@@ -1034,6 +1049,7 @@ wait_on_bit_lock_action(void *word, int bit, wait_bit_action_f *action, unsigned
 static inline
 int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode)
 {
+	might_sleep();
 	if (atomic_read(val) == 0)
 		return 0;
 	return out_of_line_wait_on_atomic_t(val, action, mode);
-- 
cgit v1.2.3


From 1029a2b52c09e479fd7b07275812ad97868c0fb0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Sep 2014 10:18:49 +0200
Subject: sched, exit: Deal with nested sleeps

do_wait() is a big wait loop, but we set TASK_RUNNING too late; we end
up calling potential sleeps before we reset it.

Not strictly a bug since we're guaranteed to exit the loop and not
call schedule(); put in annotations to quiet might_sleep().

 WARNING: CPU: 0 PID: 1 at ../kernel/sched/core.c:7123 __might_sleep+0x7e/0x90()
 do not call blocking ops when !TASK_RUNNING; state=1 set at [<ffffffff8109a788>] do_wait+0x88/0x270

 Call Trace:
  [<ffffffff81694991>] dump_stack+0x4e/0x7a
  [<ffffffff8109877c>] warn_slowpath_common+0x8c/0xc0
  [<ffffffff8109886c>] warn_slowpath_fmt+0x4c/0x50
  [<ffffffff810bca6e>] __might_sleep+0x7e/0x90
  [<ffffffff811a1c15>] might_fault+0x55/0xb0
  [<ffffffff8109a3fb>] wait_consider_task+0x90b/0xc10
  [<ffffffff8109a804>] do_wait+0x104/0x270
  [<ffffffff8109b837>] SyS_wait4+0x77/0x100
  [<ffffffff8169d692>] system_call_fastpath+0x16/0x1b

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: tglx@linutronix.de
Cc: umgwanakikbuti@gmail.com
Cc: ilya.dryomov@inktank.com
Cc: Alex Elder <alex.elder@linaro.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Axel Lin <axel.lin@ingics.com>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Guillaume Morin <guillaume@morinfr.org>
Cc: Ionut Alexa <ionut.m.alexa@gmail.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Michal Schmidt <mschmidt@redhat.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20140924082242.186408915@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel.h | 2 ++
 kernel/exit.c          | 5 +++++
 2 files changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3d770f5564b8..5068a0d9fecd 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -175,10 +175,12 @@ extern int _cond_resched(void);
  */
 # define might_sleep() \
 	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
+# define sched_annotate_sleep()	__set_current_state(TASK_RUNNING)
 #else
   static inline void __might_sleep(const char *file, int line,
 				   int preempt_offset) { }
 # define might_sleep() do { might_resched(); } while (0)
+# define sched_annotate_sleep() do { } while (0)
 #endif
 
 #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
diff --git a/kernel/exit.c b/kernel/exit.c
index 5d30019ff953..232c4bc8bcc9 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -997,6 +997,8 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 
 		get_task_struct(p);
 		read_unlock(&tasklist_lock);
+		sched_annotate_sleep();
+
 		if ((exit_code & 0x7f) == 0) {
 			why = CLD_EXITED;
 			status = exit_code >> 8;
@@ -1079,6 +1081,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p)
 	 * thread can reap it because we its state == DEAD/TRACE.
 	 */
 	read_unlock(&tasklist_lock);
+	sched_annotate_sleep();
 
 	retval = wo->wo_rusage
 		? getrusage(p, RUSAGE_BOTH, wo->wo_rusage) : 0;
@@ -1210,6 +1213,7 @@ unlock_sig:
 	pid = task_pid_vnr(p);
 	why = ptrace ? CLD_TRAPPED : CLD_STOPPED;
 	read_unlock(&tasklist_lock);
+	sched_annotate_sleep();
 
 	if (unlikely(wo->wo_flags & WNOWAIT))
 		return wait_noreap_copyout(wo, p, pid, uid, why, exit_code);
@@ -1272,6 +1276,7 @@ static int wait_task_continued(struct wait_opts *wo, struct task_struct *p)
 	pid = task_pid_vnr(p);
 	get_task_struct(p);
 	read_unlock(&tasklist_lock);
+	sched_annotate_sleep();
 
 	if (!wo->wo_info) {
 		retval = wo->wo_rusage
-- 
cgit v1.2.3


From 8eb23b9f35aae413140d3fda766a98092c21e9b0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Sep 2014 10:18:55 +0200
Subject: sched: Debug nested sleeps

Validate we call might_sleep() with TASK_RUNNING, which catches places
where we nest blocking primitives, eg. mutex usage in a wait loop.

Since all blocking is arranged through task_struct::state, nesting
this will cause the inner primitive to set TASK_RUNNING and the outer
will thus not block.

Another observed problem is calling a blocking function from
schedule()->sched_submit_work()->blk_schedule_flush_plug() which will
then destroy the task state for the actual __schedule() call that
comes after it.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: tglx@linutronix.de
Cc: ilya.dryomov@inktank.com
Cc: umgwanakikbuti@gmail.com
Cc: oleg@redhat.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20140924082242.591637616@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h | 46 ++++++++++++++++++++++++++++++++++++++++++++--
 kernel/sched/core.c   | 13 +++++++++++++
 2 files changed, 57 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 320a9779f1b4..4648e07f7d6f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -243,6 +243,43 @@ extern char ___assert_task_state[1 - 2*!!(
 				((task->state & TASK_UNINTERRUPTIBLE) != 0 && \
 				 (task->flags & PF_FROZEN) == 0)
 
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+
+#define __set_task_state(tsk, state_value)			\
+	do {							\
+		(tsk)->task_state_change = _THIS_IP_;		\
+		(tsk)->state = (state_value);			\
+	} while (0)
+#define set_task_state(tsk, state_value)			\
+	do {							\
+		(tsk)->task_state_change = _THIS_IP_;		\
+		set_mb((tsk)->state, (state_value));		\
+	} while (0)
+
+/*
+ * set_current_state() includes a barrier so that the write of current->state
+ * is correctly serialised wrt the caller's subsequent test of whether to
+ * actually sleep:
+ *
+ *	set_current_state(TASK_UNINTERRUPTIBLE);
+ *	if (do_i_need_to_sleep())
+ *		schedule();
+ *
+ * If the caller does not need such serialisation then use __set_current_state()
+ */
+#define __set_current_state(state_value)			\
+	do {							\
+		current->task_state_change = _THIS_IP_;		\
+		current->state = (state_value);			\
+	} while (0)
+#define set_current_state(state_value)				\
+	do {							\
+		current->task_state_change = _THIS_IP_;		\
+		set_mb(current->state, (state_value));		\
+	} while (0)
+
+#else
+
 #define __set_task_state(tsk, state_value)		\
 	do { (tsk)->state = (state_value); } while (0)
 #define set_task_state(tsk, state_value)		\
@@ -259,11 +296,13 @@ extern char ___assert_task_state[1 - 2*!!(
  *
  * If the caller does not need such serialisation then use __set_current_state()
  */
-#define __set_current_state(state_value)			\
+#define __set_current_state(state_value)		\
 	do { current->state = (state_value); } while (0)
-#define set_current_state(state_value)		\
+#define set_current_state(state_value)			\
 	set_mb(current->state, (state_value))
 
+#endif
+
 /* Task command name length */
 #define TASK_COMM_LEN 16
 
@@ -1661,6 +1700,9 @@ struct task_struct {
 	unsigned int	sequential_io;
 	unsigned int	sequential_io_avg;
 #endif
+#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+	unsigned long	task_state_change;
+#endif
 };
 
 /* Future-safe accessor for struct task_struct's cpus_allowed. */
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 0456a55fc27f..5b4b96b27cd7 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7298,6 +7298,19 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 {
 	static unsigned long prev_jiffy;	/* ratelimiting */
 
+	/*
+	 * Blocking primitives will set (and therefore destroy) current->state,
+	 * since we will exit with TASK_RUNNING make sure we enter with it,
+	 * otherwise we will destroy state.
+	 */
+	if (WARN(current->state != TASK_RUNNING,
+			"do not call blocking ops when !TASK_RUNNING; "
+			"state=%lx set at [<%p>] %pS\n",
+			current->state,
+			(void *)current->task_state_change,
+			(void *)current->task_state_change))
+		__set_current_state(TASK_RUNNING);
+
 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
 	     !is_idle_task(current)) ||
-- 
cgit v1.2.3


From 3427445afd26bd2395f29241319283a93f362cd0 Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Sep 2014 10:18:56 +0200
Subject: sched: Exclude cond_resched() from nested sleep test

cond_resched() is a preemption point, not strictly a blocking
primitive, so exclude it from the ->state test.

In particular, preemption preserves task_struct::state.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: tglx@linutronix.de
Cc: ilya.dryomov@inktank.com
Cc: umgwanakikbuti@gmail.com
Cc: oleg@redhat.com
Cc: Alex Elder <alex.elder@linaro.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Axel Lin <axel.lin@ingics.com>
Cc: Daniel Borkmann <dborkman@redhat.com>
Cc: Dave Jones <davej@redhat.com>
Cc: Jason Baron <jbaron@akamai.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/20140924082242.656559952@infradead.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel.h |  3 +++
 include/linux/sched.h  |  6 +++---
 kernel/sched/core.c    | 12 +++++++++---
 3 files changed, 15 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 5068a0d9fecd..446d76a87ba1 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -162,6 +162,7 @@ extern int _cond_resched(void);
 #endif
 
 #ifdef CONFIG_DEBUG_ATOMIC_SLEEP
+  void ___might_sleep(const char *file, int line, int preempt_offset);
   void __might_sleep(const char *file, int line, int preempt_offset);
 /**
  * might_sleep - annotation for functions that can sleep
@@ -177,6 +178,8 @@ extern int _cond_resched(void);
 	do { __might_sleep(__FILE__, __LINE__, 0); might_resched(); } while (0)
 # define sched_annotate_sleep()	__set_current_state(TASK_RUNNING)
 #else
+  static inline void ___might_sleep(const char *file, int line,
+				   int preempt_offset) { }
   static inline void __might_sleep(const char *file, int line,
 				   int preempt_offset) { }
 # define might_sleep() do { might_resched(); } while (0)
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4648e07f7d6f..4400ddc2fe73 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2806,7 +2806,7 @@ static inline int signal_pending_state(long state, struct task_struct *p)
 extern int _cond_resched(void);
 
 #define cond_resched() ({			\
-	__might_sleep(__FILE__, __LINE__, 0);	\
+	___might_sleep(__FILE__, __LINE__, 0);	\
 	_cond_resched();			\
 })
 
@@ -2819,14 +2819,14 @@ extern int __cond_resched_lock(spinlock_t *lock);
 #endif
 
 #define cond_resched_lock(lock) ({				\
-	__might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);	\
+	___might_sleep(__FILE__, __LINE__, PREEMPT_LOCK_OFFSET);\
 	__cond_resched_lock(lock);				\
 })
 
 extern int __cond_resched_softirq(void);
 
 #define cond_resched_softirq() ({					\
-	__might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\
+	___might_sleep(__FILE__, __LINE__, SOFTIRQ_DISABLE_OFFSET);	\
 	__cond_resched_softirq();					\
 })
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5b4b96b27cd7..b9f78f12ac22 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -7296,8 +7296,6 @@ static inline int preempt_count_equals(int preempt_offset)
 
 void __might_sleep(const char *file, int line, int preempt_offset)
 {
-	static unsigned long prev_jiffy;	/* ratelimiting */
-
 	/*
 	 * Blocking primitives will set (and therefore destroy) current->state,
 	 * since we will exit with TASK_RUNNING make sure we enter with it,
@@ -7311,6 +7309,14 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 			(void *)current->task_state_change))
 		__set_current_state(TASK_RUNNING);
 
+	___might_sleep(file, line, preempt_offset);
+}
+EXPORT_SYMBOL(__might_sleep);
+
+void ___might_sleep(const char *file, int line, int preempt_offset)
+{
+	static unsigned long prev_jiffy;	/* ratelimiting */
+
 	rcu_sleep_check(); /* WARN_ON_ONCE() by default, no rate limit reqd. */
 	if ((preempt_count_equals(preempt_offset) && !irqs_disabled() &&
 	     !is_idle_task(current)) ||
@@ -7340,7 +7346,7 @@ void __might_sleep(const char *file, int line, int preempt_offset)
 #endif
 	dump_stack();
 }
-EXPORT_SYMBOL(__might_sleep);
+EXPORT_SYMBOL(___might_sleep);
 #endif
 
 #ifdef CONFIG_MAGIC_SYSRQ
-- 
cgit v1.2.3


From 5631b8fba640a4ab2f8a954f63a603fa34eda96b Mon Sep 17 00:00:00 2001
From: Steven Noonan <steven@uplinklabs.net>
Date: Sat, 25 Oct 2014 15:09:42 -0700
Subject: compiler/gcc4+: Remove inaccurate comment about 'asm goto'
 miscompiles

The bug referenced by the comment in this commit was not
completely fixed in GCC 4.8.2, as I mentioned in a thread back
in February:

   https://lkml.org/lkml/2014/2/12/797

The conclusion at that time was to make the quirk unconditional
until the bug could be found and fixed in GCC. Unfortunately,
when I submitted the patch (commit a9f18034) I left a comment
in that claimed the bug was fixed in GCC 4.8.2+.

This comment is inaccurate, and should be removed.

Signed-off-by: Steven Noonan <steven@uplinklabs.net>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Cc: Jakub Jelinek <jakub@redhat.com>
Cc: Richard Henderson <rth@twiddle.net>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Steven Rostedt <rostedt@goodmis.org>
Link: http://lkml.kernel.org/r/1414274982-14040-1-git-send-email-steven@uplinklabs.net
Cc: Ingo Molnar <mingo@kernel.org>
---
 include/linux/compiler-gcc4.h | 1 -
 include/linux/compiler-gcc5.h | 1 -
 2 files changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h
index 2507fd2a1eb4..d1a558239b1a 100644
--- a/include/linux/compiler-gcc4.h
+++ b/include/linux/compiler-gcc4.h
@@ -71,7 +71,6 @@
  *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
  *
  * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
- * Fixed in GCC 4.8.2 and later versions.
  *
  * (asm goto is automatically volatile - the naming reflects this.)
  */
diff --git a/include/linux/compiler-gcc5.h b/include/linux/compiler-gcc5.h
index cdd1cc202d51..c8c565952548 100644
--- a/include/linux/compiler-gcc5.h
+++ b/include/linux/compiler-gcc5.h
@@ -53,7 +53,6 @@
  *   http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670
  *
  * Work it around via a compiler barrier quirk suggested by Jakub Jelinek.
- * Fixed in GCC 4.8.2 and later versions.
  *
  * (asm goto is automatically volatile - the naming reflects this.)
  */
-- 
cgit v1.2.3


From e3a2e87893125bcd99bd7e1ddf9bfc421e492572 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Thu, 23 Oct 2014 17:27:07 +0900
Subject: gpio: rename gpio_lock_as_irq to gpiochip_lock_as_irq

This function actually operates on a gpio_chip, so its prefix should
reflect that fact for consistency with other functions defined in
gpio/driver.h.

Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/driver.txt            |  4 ++--
 drivers/gpio/gpio-bcm-kona.c             |  4 ++--
 drivers/gpio/gpio-dwapb.c                |  4 ++--
 drivers/gpio/gpio-em.c                   |  4 ++--
 drivers/gpio/gpio-mcp23s08.c             |  4 ++--
 drivers/gpio/gpio-omap.c                 |  2 +-
 drivers/gpio/gpio-tegra.c                |  4 ++--
 drivers/gpio/gpio-vr41xx.c               |  4 ++--
 drivers/gpio/gpiolib-acpi.c              |  6 +++---
 drivers/gpio/gpiolib-sysfs.c             |  4 ++--
 drivers/gpio/gpiolib.c                   | 16 ++++++++--------
 drivers/pinctrl/pinctrl-at91.c           |  4 ++--
 drivers/pinctrl/samsung/pinctrl-exynos.c |  4 ++--
 drivers/pinctrl/sunxi/pinctrl-sunxi.c    |  6 +++---
 include/linux/gpio.h                     |  7 ++++---
 include/linux/gpio/driver.h              |  4 ++--
 16 files changed, 41 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/gpio/driver.txt b/Documentation/gpio/driver.txt
index 31e0b5db55d8..90d0f6aba7a6 100644
--- a/Documentation/gpio/driver.txt
+++ b/Documentation/gpio/driver.txt
@@ -158,12 +158,12 @@ Locking IRQ usage
 Input GPIOs can be used as IRQ signals. When this happens, a driver is requested
 to mark the GPIO as being used as an IRQ:
 
-	int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
+	int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 
 This will prevent the use of non-irq related GPIO APIs until the GPIO IRQ lock
 is released:
 
-	void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
+	void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 
 When implementing an irqchip inside a GPIO driver, these two functions should
 typically be called in the .startup() and .shutdown() callbacks from the
diff --git a/drivers/gpio/gpio-bcm-kona.c b/drivers/gpio/gpio-bcm-kona.c
index de0801e9767a..56fb5ce47aa1 100644
--- a/drivers/gpio/gpio-bcm-kona.c
+++ b/drivers/gpio/gpio-bcm-kona.c
@@ -470,7 +470,7 @@ static int bcm_kona_gpio_irq_reqres(struct irq_data *d)
 {
 	struct bcm_kona_gpio *kona_gpio = irq_data_get_irq_chip_data(d);
 
-	if (gpio_lock_as_irq(&kona_gpio->gpio_chip, d->hwirq)) {
+	if (gpiochip_lock_as_irq(&kona_gpio->gpio_chip, d->hwirq)) {
 		dev_err(kona_gpio->gpio_chip.dev,
 			"unable to lock HW IRQ %lu for IRQ\n",
 			d->hwirq);
@@ -483,7 +483,7 @@ static void bcm_kona_gpio_irq_relres(struct irq_data *d)
 {
 	struct bcm_kona_gpio *kona_gpio = irq_data_get_irq_chip_data(d);
 
-	gpio_unlock_as_irq(&kona_gpio->gpio_chip, d->hwirq);
+	gpiochip_unlock_as_irq(&kona_gpio->gpio_chip, d->hwirq);
 }
 
 static struct irq_chip bcm_gpio_irq_chip = {
diff --git a/drivers/gpio/gpio-dwapb.c b/drivers/gpio/gpio-dwapb.c
index b43cd84b61f1..4beb37839392 100644
--- a/drivers/gpio/gpio-dwapb.c
+++ b/drivers/gpio/gpio-dwapb.c
@@ -194,7 +194,7 @@ static int dwapb_irq_reqres(struct irq_data *d)
 	struct dwapb_gpio *gpio = igc->private;
 	struct bgpio_chip *bgc = &gpio->ports[0].bgc;
 
-	if (gpio_lock_as_irq(&bgc->gc, irqd_to_hwirq(d))) {
+	if (gpiochip_lock_as_irq(&bgc->gc, irqd_to_hwirq(d))) {
 		dev_err(gpio->dev, "unable to lock HW IRQ %lu for IRQ\n",
 			irqd_to_hwirq(d));
 		return -EINVAL;
@@ -208,7 +208,7 @@ static void dwapb_irq_relres(struct irq_data *d)
 	struct dwapb_gpio *gpio = igc->private;
 	struct bgpio_chip *bgc = &gpio->ports[0].bgc;
 
-	gpio_unlock_as_irq(&bgc->gc, irqd_to_hwirq(d));
+	gpiochip_unlock_as_irq(&bgc->gc, irqd_to_hwirq(d));
 }
 
 static int dwapb_irq_set_type(struct irq_data *d, u32 type)
diff --git a/drivers/gpio/gpio-em.c b/drivers/gpio/gpio-em.c
index fe49ec3cdb7d..21d34d4d473d 100644
--- a/drivers/gpio/gpio-em.c
+++ b/drivers/gpio/gpio-em.c
@@ -103,7 +103,7 @@ static int em_gio_irq_reqres(struct irq_data *d)
 {
 	struct em_gio_priv *p = irq_data_get_irq_chip_data(d);
 
-	if (gpio_lock_as_irq(&p->gpio_chip, irqd_to_hwirq(d))) {
+	if (gpiochip_lock_as_irq(&p->gpio_chip, irqd_to_hwirq(d))) {
 		dev_err(p->gpio_chip.dev,
 			"unable to lock HW IRQ %lu for IRQ\n",
 			irqd_to_hwirq(d));
@@ -116,7 +116,7 @@ static void em_gio_irq_relres(struct irq_data *d)
 {
 	struct em_gio_priv *p = irq_data_get_irq_chip_data(d);
 
-	gpio_unlock_as_irq(&p->gpio_chip, irqd_to_hwirq(d));
+	gpiochip_unlock_as_irq(&p->gpio_chip, irqd_to_hwirq(d));
 }
 
 
diff --git a/drivers/gpio/gpio-mcp23s08.c b/drivers/gpio/gpio-mcp23s08.c
index 8488e2fd307c..468d10d2ac1e 100644
--- a/drivers/gpio/gpio-mcp23s08.c
+++ b/drivers/gpio/gpio-mcp23s08.c
@@ -444,7 +444,7 @@ static int mcp23s08_irq_reqres(struct irq_data *data)
 {
 	struct mcp23s08 *mcp = irq_data_get_irq_chip_data(data);
 
-	if (gpio_lock_as_irq(&mcp->chip, data->hwirq)) {
+	if (gpiochip_lock_as_irq(&mcp->chip, data->hwirq)) {
 		dev_err(mcp->chip.dev,
 			"unable to lock HW IRQ %lu for IRQ usage\n",
 			data->hwirq);
@@ -458,7 +458,7 @@ static void mcp23s08_irq_relres(struct irq_data *data)
 {
 	struct mcp23s08 *mcp = irq_data_get_irq_chip_data(data);
 
-	gpio_unlock_as_irq(&mcp->chip, data->hwirq);
+	gpiochip_unlock_as_irq(&mcp->chip, data->hwirq);
 }
 
 static struct irq_chip mcp23s08_irq_chip = {
diff --git a/drivers/gpio/gpio-omap.c b/drivers/gpio/gpio-omap.c
index 415682f69214..a38686786e46 100644
--- a/drivers/gpio/gpio-omap.c
+++ b/drivers/gpio/gpio-omap.c
@@ -800,7 +800,7 @@ static void omap_gpio_irq_shutdown(struct irq_data *d)
 	unsigned offset = GPIO_INDEX(bank, gpio);
 
 	spin_lock_irqsave(&bank->lock, flags);
-	gpio_unlock_as_irq(&bank->chip, offset);
+	gpiochip_unlock_as_irq(&bank->chip, offset);
 	bank->irq_usage &= ~(BIT(offset));
 	omap_disable_gpio_module(bank, offset);
 	omap_reset_gpio(bank, gpio);
diff --git a/drivers/gpio/gpio-tegra.c b/drivers/gpio/gpio-tegra.c
index 4e8fb8261a87..61bcfa93606d 100644
--- a/drivers/gpio/gpio-tegra.c
+++ b/drivers/gpio/gpio-tegra.c
@@ -233,7 +233,7 @@ static int tegra_gpio_irq_set_type(struct irq_data *d, unsigned int type)
 		return -EINVAL;
 	}
 
-	ret = gpio_lock_as_irq(&tegra_gpio_chip, gpio);
+	ret = gpiochip_lock_as_irq(&tegra_gpio_chip, gpio);
 	if (ret) {
 		dev_err(dev, "unable to lock Tegra GPIO %d as IRQ\n", gpio);
 		return ret;
@@ -263,7 +263,7 @@ static void tegra_gpio_irq_shutdown(struct irq_data *d)
 {
 	int gpio = d->hwirq;
 
-	gpio_unlock_as_irq(&tegra_gpio_chip, gpio);
+	gpiochip_unlock_as_irq(&tegra_gpio_chip, gpio);
 }
 
 static void tegra_gpio_irq_handler(unsigned int irq, struct irq_desc *desc)
diff --git a/drivers/gpio/gpio-vr41xx.c b/drivers/gpio/gpio-vr41xx.c
index dbf28fa03f67..cec55226143d 100644
--- a/drivers/gpio/gpio-vr41xx.c
+++ b/drivers/gpio/gpio-vr41xx.c
@@ -138,7 +138,7 @@ static void unmask_giuint_low(struct irq_data *d)
 
 static unsigned int startup_giuint(struct irq_data *data)
 {
-	if (gpio_lock_as_irq(&vr41xx_gpio_chip, data->hwirq))
+	if (gpiochip_lock_as_irq(&vr41xx_gpio_chip, data->hwirq))
 		dev_err(vr41xx_gpio_chip.dev,
 			"unable to lock HW IRQ %lu for IRQ\n",
 			data->hwirq);
@@ -150,7 +150,7 @@ static unsigned int startup_giuint(struct irq_data *data)
 static void shutdown_giuint(struct irq_data *data)
 {
 	mask_giuint_low(data);
-	gpio_unlock_as_irq(&vr41xx_gpio_chip, data->hwirq);
+	gpiochip_unlock_as_irq(&vr41xx_gpio_chip, data->hwirq);
 }
 
 static struct irq_chip giuint_low_irq_chip = {
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 05c6275da224..349a7552dd23 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -153,7 +153,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 
 	gpiod_direction_input(desc);
 
-	ret = gpio_lock_as_irq(chip, pin);
+	ret = gpiochip_lock_as_irq(chip, pin);
 	if (ret) {
 		dev_err(chip->dev, "Failed to lock GPIO as interrupt\n");
 		goto fail_free_desc;
@@ -209,7 +209,7 @@ static acpi_status acpi_gpiochip_request_interrupt(struct acpi_resource *ares,
 fail_free_event:
 	kfree(event);
 fail_unlock_irq:
-	gpio_unlock_as_irq(chip, pin);
+	gpiochip_unlock_as_irq(chip, pin);
 fail_free_desc:
 	gpiochip_free_own_desc(desc);
 
@@ -280,7 +280,7 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
 		desc = event->desc;
 		if (WARN_ON(IS_ERR(desc)))
 			continue;
-		gpio_unlock_as_irq(chip, event->pin);
+		gpiochip_unlock_as_irq(chip, event->pin);
 		gpiochip_free_own_desc(desc);
 		list_del(&event->node);
 		kfree(event);
diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 5f2150b619a7..781fbed00fc3 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -161,7 +161,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 	desc->flags &= ~GPIO_TRIGGER_MASK;
 
 	if (!gpio_flags) {
-		gpio_unlock_as_irq(desc->chip, gpio_chip_hwgpio(desc));
+		gpiochip_unlock_as_irq(desc->chip, gpio_chip_hwgpio(desc));
 		ret = 0;
 		goto free_id;
 	}
@@ -200,7 +200,7 @@ static int gpio_setup_irq(struct gpio_desc *desc, struct device *dev,
 	if (ret < 0)
 		goto free_id;
 
-	ret = gpio_lock_as_irq(desc->chip, gpio_chip_hwgpio(desc));
+	ret = gpiochip_lock_as_irq(desc->chip, gpio_chip_hwgpio(desc));
 	if (ret < 0) {
 		gpiod_warn(desc, "failed to flag the GPIO for IRQ\n");
 		goto free_id;
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index e8e98ca25ec7..50e18a4b3a9f 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -495,7 +495,7 @@ static int gpiochip_irq_reqres(struct irq_data *d)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
 
-	if (gpio_lock_as_irq(chip, d->hwirq)) {
+	if (gpiochip_lock_as_irq(chip, d->hwirq)) {
 		chip_err(chip,
 			"unable to lock HW IRQ %lu for IRQ\n",
 			d->hwirq);
@@ -508,7 +508,7 @@ static void gpiochip_irq_relres(struct irq_data *d)
 {
 	struct gpio_chip *chip = irq_data_get_irq_chip_data(d);
 
-	gpio_unlock_as_irq(chip, d->hwirq);
+	gpiochip_unlock_as_irq(chip, d->hwirq);
 }
 
 static int gpiochip_to_irq(struct gpio_chip *chip, unsigned offset)
@@ -1332,14 +1332,14 @@ int gpiod_to_irq(const struct gpio_desc *desc)
 EXPORT_SYMBOL_GPL(gpiod_to_irq);
 
 /**
- * gpio_lock_as_irq() - lock a GPIO to be used as IRQ
+ * gpiochip_lock_as_irq() - lock a GPIO to be used as IRQ
  * @chip: the chip the GPIO to lock belongs to
  * @offset: the offset of the GPIO to lock as IRQ
  *
  * This is used directly by GPIO drivers that want to lock down
  * a certain GPIO line to be used for IRQs.
  */
-int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
+int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 {
 	if (offset >= chip->ngpio)
 		return -EINVAL;
@@ -1354,24 +1354,24 @@ int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
 	set_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
 	return 0;
 }
-EXPORT_SYMBOL_GPL(gpio_lock_as_irq);
+EXPORT_SYMBOL_GPL(gpiochip_lock_as_irq);
 
 /**
- * gpio_unlock_as_irq() - unlock a GPIO used as IRQ
+ * gpiochip_unlock_as_irq() - unlock a GPIO used as IRQ
  * @chip: the chip the GPIO to lock belongs to
  * @offset: the offset of the GPIO to lock as IRQ
  *
  * This is used directly by GPIO drivers that want to indicate
  * that a certain GPIO is no longer used exclusively for IRQ.
  */
-void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
+void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset)
 {
 	if (offset >= chip->ngpio)
 		return;
 
 	clear_bit(FLAG_USED_AS_IRQ, &chip->desc[offset].flags);
 }
-EXPORT_SYMBOL_GPL(gpio_unlock_as_irq);
+EXPORT_SYMBOL_GPL(gpiochip_unlock_as_irq);
 
 /**
  * gpiod_get_raw_value_cansleep() - return a gpio's raw value
diff --git a/drivers/pinctrl/pinctrl-at91.c b/drivers/pinctrl/pinctrl-at91.c
index 354a81d40925..608671704522 100644
--- a/drivers/pinctrl/pinctrl-at91.c
+++ b/drivers/pinctrl/pinctrl-at91.c
@@ -1472,7 +1472,7 @@ static unsigned int gpio_irq_startup(struct irq_data *d)
 	unsigned	pin = d->hwirq;
 	int ret;
 
-	ret = gpio_lock_as_irq(&at91_gpio->chip, pin);
+	ret = gpiochip_lock_as_irq(&at91_gpio->chip, pin);
 	if (ret) {
 		dev_err(at91_gpio->chip.dev, "unable to lock pind %lu IRQ\n",
 			d->hwirq);
@@ -1488,7 +1488,7 @@ static void gpio_irq_shutdown(struct irq_data *d)
 	unsigned	pin = d->hwirq;
 
 	gpio_irq_mask(d);
-	gpio_unlock_as_irq(&at91_gpio->chip, pin);
+	gpiochip_unlock_as_irq(&at91_gpio->chip, pin);
 }
 
 #ifdef CONFIG_PM
diff --git a/drivers/pinctrl/samsung/pinctrl-exynos.c b/drivers/pinctrl/samsung/pinctrl-exynos.c
index d7154ed0b0eb..fa54a2d1c966 100644
--- a/drivers/pinctrl/samsung/pinctrl-exynos.c
+++ b/drivers/pinctrl/samsung/pinctrl-exynos.c
@@ -180,7 +180,7 @@ static int exynos_irq_request_resources(struct irq_data *irqd)
 	unsigned int con;
 	int ret;
 
-	ret = gpio_lock_as_irq(&bank->gpio_chip, irqd->hwirq);
+	ret = gpiochip_lock_as_irq(&bank->gpio_chip, irqd->hwirq);
 	if (ret) {
 		dev_err(bank->gpio_chip.dev, "unable to lock pin %s-%lu IRQ\n",
 			bank->name, irqd->hwirq);
@@ -233,7 +233,7 @@ static void exynos_irq_release_resources(struct irq_data *irqd)
 
 	spin_unlock_irqrestore(&bank->slock, flags);
 
-	gpio_unlock_as_irq(&bank->gpio_chip, irqd->hwirq);
+	gpiochip_unlock_as_irq(&bank->gpio_chip, irqd->hwirq);
 }
 
 /*
diff --git a/drivers/pinctrl/sunxi/pinctrl-sunxi.c b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
index ef9d804e55de..3d0744337736 100644
--- a/drivers/pinctrl/sunxi/pinctrl-sunxi.c
+++ b/drivers/pinctrl/sunxi/pinctrl-sunxi.c
@@ -553,7 +553,7 @@ static int sunxi_pinctrl_irq_request_resources(struct irq_data *d)
 	if (!func)
 		return -EINVAL;
 
-	ret = gpio_lock_as_irq(pctl->chip,
+	ret = gpiochip_lock_as_irq(pctl->chip,
 			pctl->irq_array[d->hwirq] - pctl->desc->pin_base);
 	if (ret) {
 		dev_err(pctl->dev, "unable to lock HW IRQ %lu for IRQ\n",
@@ -571,8 +571,8 @@ static void sunxi_pinctrl_irq_release_resources(struct irq_data *d)
 {
 	struct sunxi_pinctrl *pctl = irq_data_get_irq_chip_data(d);
 
-	gpio_unlock_as_irq(pctl->chip,
-			   pctl->irq_array[d->hwirq] - pctl->desc->pin_base);
+	gpiochip_unlock_as_irq(pctl->chip,
+			      pctl->irq_array[d->hwirq] - pctl->desc->pin_base);
 }
 
 static int sunxi_pinctrl_irq_set_type(struct irq_data *d, unsigned int type)
diff --git a/include/linux/gpio.h b/include/linux/gpio.h
index 85aa5d0b9357..ab81339a8590 100644
--- a/include/linux/gpio.h
+++ b/include/linux/gpio.h
@@ -216,14 +216,15 @@ static inline int gpio_to_irq(unsigned gpio)
 	return -EINVAL;
 }
 
-static inline int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset)
+static inline int gpiochip_lock_as_irq(struct gpio_chip *chip,
+				       unsigned int offset)
 {
 	WARN_ON(1);
 	return -EINVAL;
 }
 
-static inline void gpio_unlock_as_irq(struct gpio_chip *chip,
-				      unsigned int offset)
+static inline void gpiochip_unlock_as_irq(struct gpio_chip *chip,
+					  unsigned int offset)
 {
 	WARN_ON(1);
 }
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index 249db3057e4d..ff200a75501e 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -149,8 +149,8 @@ extern struct gpio_chip *gpiochip_find(void *data,
 			      int (*match)(struct gpio_chip *chip, void *data));
 
 /* lock/unlock as IRQ */
-int gpio_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
-void gpio_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
+int gpiochip_lock_as_irq(struct gpio_chip *chip, unsigned int offset);
+void gpiochip_unlock_as_irq(struct gpio_chip *chip, unsigned int offset);
 
 struct gpio_chip *gpiod_to_chip(const struct gpio_desc *desc);
 
-- 
cgit v1.2.3


From 95f259ca3bf485a0c1f17d9024813d4aab485a23 Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Sat, 25 Oct 2014 12:08:59 +0200
Subject: net: pxa168_eth: Fix providing of phy_interface mode on platform_data

Do not add phy include to the board file but platform_data include
instead.

Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/mach-mmp/gplugd.c | 1 -
 include/linux/pxa168_eth.h | 2 ++
 2 files changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-mmp/gplugd.c b/arch/arm/mach-mmp/gplugd.c
index 3b5794cd0357..22762a1f9f72 100644
--- a/arch/arm/mach-mmp/gplugd.c
+++ b/arch/arm/mach-mmp/gplugd.c
@@ -12,7 +12,6 @@
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
 #include <linux/gpio-pxa.h>
-#include <linux/phy.h>
 
 #include <asm/mach/arch.h>
 #include <asm/mach-types.h>
diff --git a/include/linux/pxa168_eth.h b/include/linux/pxa168_eth.h
index 37c381120bc8..e1ab6e86cdb3 100644
--- a/include/linux/pxa168_eth.h
+++ b/include/linux/pxa168_eth.h
@@ -4,6 +4,8 @@
 #ifndef __LINUX_PXA168_ETH_H
 #define __LINUX_PXA168_ETH_H
 
+#include <linux/phy.h>
+
 struct pxa168_eth_platform_data {
 	int	port_number;
 	int	phy_addr;
-- 
cgit v1.2.3


From ebcf34f3d4be11f994340aff629f3c17171a4f65 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sun, 26 Oct 2014 19:14:06 -0700
Subject: skbuff.h: fix kernel-doc warning for headers_end

Fix kernel-doc warning in <linux/skbuff.h> by making both headers_start
and headers_end private fields.

Warning(..//include/linux/skbuff.h:654): No description found for parameter 'headers_end[0]'

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a59d9343c25b..5884f95ff0e9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -557,7 +557,9 @@ struct sk_buff {
 	/* fields enclosed in headers_start/headers_end are copied
 	 * using a single memcpy() in __copy_skb_header()
 	 */
+	/* private: */
 	__u32			headers_start[0];
+	/* public: */
 
 /* if you move pkt_type around you also must adapt those constants */
 #ifdef __BIG_ENDIAN_BITFIELD
@@ -642,7 +644,9 @@ struct sk_buff {
 	__u16			network_header;
 	__u16			mac_header;
 
+	/* private: */
 	__u32			headers_end[0];
+	/* public: */
 
 	/* These elements must be at the end, see alloc_skb() for details.  */
 	sk_buff_data_t		tail;
-- 
cgit v1.2.3


From 32a173c7f9e9ec2b87142f67e1478cd20084a45b Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Mon, 27 Oct 2014 11:37:35 +0200
Subject: net/mlx4_core: Introduce mlx4_get_module_info for cable module info
 reading

Added new MAD_IFC command to read cable module info with attribute id (0xFF60).
Update include/linux/mlx4/device.h with function declaration (mlx4_get_module_info)
and the needed defines/enums for future use.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/port.c | 156 ++++++++++++++++++++++++++++++
 include/linux/mlx4/device.h               |  30 ++++++
 2 files changed, 186 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/port.c b/drivers/net/ethernet/mellanox/mlx4/port.c
index 94eeb2c7d7e4..30eb1ead0fe6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/port.c
@@ -1311,3 +1311,159 @@ int mlx4_get_roce_gid_from_slave(struct mlx4_dev *dev, int port, int slave_id,
 	return 0;
 }
 EXPORT_SYMBOL(mlx4_get_roce_gid_from_slave);
+
+/* Cable Module Info */
+#define MODULE_INFO_MAX_READ 48
+
+#define I2C_ADDR_LOW  0x50
+#define I2C_ADDR_HIGH 0x51
+#define I2C_PAGE_SIZE 256
+
+/* Module Info Data */
+struct mlx4_cable_info {
+	u8	i2c_addr;
+	u8	page_num;
+	__be16	dev_mem_address;
+	__be16	reserved1;
+	__be16	size;
+	__be32	reserved2[2];
+	u8	data[MODULE_INFO_MAX_READ];
+};
+
+enum cable_info_err {
+	 CABLE_INF_INV_PORT      = 0x1,
+	 CABLE_INF_OP_NOSUP      = 0x2,
+	 CABLE_INF_NOT_CONN      = 0x3,
+	 CABLE_INF_NO_EEPRM      = 0x4,
+	 CABLE_INF_PAGE_ERR      = 0x5,
+	 CABLE_INF_INV_ADDR      = 0x6,
+	 CABLE_INF_I2C_ADDR      = 0x7,
+	 CABLE_INF_QSFP_VIO      = 0x8,
+	 CABLE_INF_I2C_BUSY      = 0x9,
+};
+
+#define MAD_STATUS_2_CABLE_ERR(mad_status) ((mad_status >> 8) & 0xFF)
+
+static inline const char *cable_info_mad_err_str(u16 mad_status)
+{
+	u8 err = MAD_STATUS_2_CABLE_ERR(mad_status);
+
+	switch (err) {
+	case CABLE_INF_INV_PORT:
+		return "invalid port selected";
+	case CABLE_INF_OP_NOSUP:
+		return "operation not supported for this port (the port is of type CX4 or internal)";
+	case CABLE_INF_NOT_CONN:
+		return "cable is not connected";
+	case CABLE_INF_NO_EEPRM:
+		return "the connected cable has no EPROM (passive copper cable)";
+	case CABLE_INF_PAGE_ERR:
+		return "page number is greater than 15";
+	case CABLE_INF_INV_ADDR:
+		return "invalid device_address or size (that is, size equals 0 or address+size is greater than 256)";
+	case CABLE_INF_I2C_ADDR:
+		return "invalid I2C slave address";
+	case CABLE_INF_QSFP_VIO:
+		return "at least one cable violates the QSFP specification and ignores the modsel signal";
+	case CABLE_INF_I2C_BUSY:
+		return "I2C bus is constantly busy";
+	}
+	return "Unknown Error";
+}
+
+/**
+ * mlx4_get_module_info - Read cable module eeprom data
+ * @dev: mlx4_dev.
+ * @port: port number.
+ * @offset: byte offset in eeprom to start reading data from.
+ * @size: num of bytes to read.
+ * @data: output buffer to put the requested data into.
+ *
+ * Reads cable module eeprom data, puts the outcome data into
+ * data pointer paramer.
+ * Returns num of read bytes on success or a negative error
+ * code.
+ */
+int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
+			 u16 offset, u16 size, u8 *data)
+{
+	struct mlx4_cmd_mailbox *inbox, *outbox;
+	struct mlx4_mad_ifc *inmad, *outmad;
+	struct mlx4_cable_info *cable_info;
+	u16 i2c_addr;
+	int ret;
+
+	if (size > MODULE_INFO_MAX_READ)
+		size = MODULE_INFO_MAX_READ;
+
+	inbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(inbox))
+		return PTR_ERR(inbox);
+
+	outbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(outbox)) {
+		mlx4_free_cmd_mailbox(dev, inbox);
+		return PTR_ERR(outbox);
+	}
+
+	inmad = (struct mlx4_mad_ifc *)(inbox->buf);
+	outmad = (struct mlx4_mad_ifc *)(outbox->buf);
+
+	inmad->method = 0x1; /* Get */
+	inmad->class_version = 0x1;
+	inmad->mgmt_class = 0x1;
+	inmad->base_version = 0x1;
+	inmad->attr_id = cpu_to_be16(0xFF60); /* Module Info */
+
+	if (offset < I2C_PAGE_SIZE && offset + size > I2C_PAGE_SIZE)
+		/* Cross pages reads are not allowed
+		 * read until offset 256 in low page
+		 */
+		size -= offset + size - I2C_PAGE_SIZE;
+
+	i2c_addr = I2C_ADDR_LOW;
+	if (offset >= I2C_PAGE_SIZE) {
+		/* Reset offset to high page */
+		i2c_addr = I2C_ADDR_HIGH;
+		offset -= I2C_PAGE_SIZE;
+	}
+
+	cable_info = (struct mlx4_cable_info *)inmad->data;
+	cable_info->dev_mem_address = cpu_to_be16(offset);
+	cable_info->page_num = 0;
+	cable_info->i2c_addr = i2c_addr;
+	cable_info->size = cpu_to_be16(size);
+
+	ret = mlx4_cmd_box(dev, inbox->dma, outbox->dma, port, 3,
+			   MLX4_CMD_MAD_IFC, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
+	if (ret)
+		goto out;
+
+	if (be16_to_cpu(outmad->status)) {
+		/* Mad returned with bad status */
+		ret = be16_to_cpu(outmad->status);
+		mlx4_warn(dev,
+			  "MLX4_CMD_MAD_IFC Get Module info attr(%x) port(%d) i2c_addr(%x) offset(%d) size(%d): Response Mad Status(%x) - %s\n",
+			  0xFF60, port, i2c_addr, offset, size,
+			  ret, cable_info_mad_err_str(ret));
+
+		if (i2c_addr == I2C_ADDR_HIGH &&
+		    MAD_STATUS_2_CABLE_ERR(ret) == CABLE_INF_I2C_ADDR)
+			/* Some SFP cables do not support i2c slave
+			 * address 0x51 (high page), abort silently.
+			 */
+			ret = 0;
+		else
+			ret = -ret;
+		goto out;
+	}
+	cable_info = (struct mlx4_cable_info *)outmad->data;
+	memcpy(data, cable_info->data, size);
+	ret = size;
+out:
+	mlx4_free_cmd_mailbox(dev, inbox);
+	mlx4_free_cmd_mailbox(dev, outbox);
+	return ret;
+}
+EXPORT_SYMBOL(mlx4_get_module_info);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 37e4404d0227..73910daec317 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -379,6 +379,13 @@ enum {
 #define MSTR_SM_CHANGE_MASK (MLX4_EQ_PORT_INFO_MSTR_SM_SL_CHANGE_MASK | \
 			     MLX4_EQ_PORT_INFO_MSTR_SM_LID_CHANGE_MASK)
 
+enum mlx4_module_id {
+	MLX4_MODULE_ID_SFP              = 0x3,
+	MLX4_MODULE_ID_QSFP             = 0xC,
+	MLX4_MODULE_ID_QSFP_PLUS        = 0xD,
+	MLX4_MODULE_ID_QSFP28           = 0x11,
+};
+
 static inline u64 mlx4_fw_ver(u64 major, u64 minor, u64 subminor)
 {
 	return (major << 32) | (minor << 16) | subminor;
@@ -799,6 +806,26 @@ struct mlx4_init_port_param {
 	u64			si_guid;
 };
 
+#define MAD_IFC_DATA_SZ 192
+/* MAD IFC Mailbox */
+struct mlx4_mad_ifc {
+	u8	base_version;
+	u8	mgmt_class;
+	u8	class_version;
+	u8	method;
+	__be16	status;
+	__be16	class_specific;
+	__be64	tid;
+	__be16	attr_id;
+	__be16	resv;
+	__be32	attr_mod;
+	__be64	mkey;
+	__be16	dr_slid;
+	__be16	dr_dlid;
+	u8	reserved[28];
+	u8	data[MAD_IFC_DATA_SZ];
+} __packed;
+
 #define mlx4_foreach_port(port, dev, type)				\
 	for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++)	\
 		if ((type) == (dev)->caps.port_mask[(port)])
@@ -1283,6 +1310,9 @@ int mlx4_mr_rereg_mem_write(struct mlx4_dev *dev, struct mlx4_mr *mr,
 			    u64 iova, u64 size, int npages,
 			    int page_shift, struct mlx4_mpt_entry *mpt_entry);
 
+int mlx4_get_module_info(struct mlx4_dev *dev, u8 port,
+			 u16 offset, u16 size, u8 *data);
+
 /* Returns true if running in low memory profile (kdump kernel) */
 static inline bool mlx4_low_memory_profile(void)
 {
-- 
cgit v1.2.3


From adbc7ac5c15eb5e9d70393428345e72a1a897d6a Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Mon, 27 Oct 2014 11:37:37 +0200
Subject: net/mlx4_core: Introduce ACCESS_REG CMD and eth_prot_ctrl dev cap

Adding ACCESS REG mlx4 command and use it to implement Query method for
PTYS (Port Type and Speed Register).
Query and store eth_prot_ctrl dev cap.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c |   9 +++
 drivers/net/ethernet/mellanox/mlx4/fw.c  | 118 ++++++++++++++++++++++++++++++-
 include/linux/mlx4/cmd.h                 |   2 +
 include/linux/mlx4/device.h              |  40 ++++++++++-
 4 files changed, 166 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index b16e1b95566f..916459effcfa 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -1338,6 +1338,15 @@ static struct mlx4_cmd_info cmd_info[] = {
 		.verify = NULL,
 		.wrapper = mlx4_QUERY_IF_STAT_wrapper
 	},
+	{
+		.opcode = MLX4_CMD_ACCESS_REG,
+		.has_inbox = true,
+		.has_outbox = true,
+		.out_is_imm = false,
+		.encode_slave_id = false,
+		.verify = NULL,
+		.wrapper = NULL,
+	},
 	/* Native multicast commands are not available for guests */
 	{
 		.opcode = MLX4_CMD_QP_ATTACH,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 2e88a235e26b..6fd9b8581e95 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -139,7 +139,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[10] = "TCP/IP offloads/flow-steering for VXLAN support",
 		[11] = "MAD DEMUX (Secure-Host) support",
 		[12] = "Large cache line (>64B) CQE stride support",
-		[13] = "Large cache line (>64B) EQE stride support"
+		[13] = "Large cache line (>64B) EQE stride support",
+		[14] = "Ethernet protocol control support"
 	};
 	int i;
 
@@ -560,6 +561,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_FLOW_STEERING_RANGE_EN_OFFSET	0x76
 #define QUERY_DEV_CAP_FLOW_STEERING_MAX_QP_OFFSET	0x77
 #define QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE	0x7a
+#define QUERY_DEV_CAP_ETH_PROT_CTRL_OFFSET	0x7a
 #define QUERY_DEV_CAP_RDMARC_ENTRY_SZ_OFFSET	0x80
 #define QUERY_DEV_CAP_QPC_ENTRY_SZ_OFFSET	0x82
 #define QUERY_DEV_CAP_AUX_ENTRY_SZ_OFFSET	0x84
@@ -737,11 +739,12 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	MLX4_GET(size, outbox, QUERY_DEV_CAP_MAX_DESC_SZ_RQ_OFFSET);
 	dev_cap->max_rq_desc_sz = size;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_CQ_EQ_CACHE_LINE_STRIDE);
+	if (field & (1 << 5))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL;
 	if (field & (1 << 6))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CQE_STRIDE;
 	if (field & (1 << 7))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
-
 	MLX4_GET(dev_cap->bmme_flags, outbox,
 		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
 	MLX4_GET(dev_cap->reserved_lkey, outbox,
@@ -2144,3 +2147,114 @@ out:
 	mlx4_free_cmd_mailbox(dev, mailbox);
 	return err;
 }
+
+/* Access Reg commands */
+enum mlx4_access_reg_masks {
+	MLX4_ACCESS_REG_STATUS_MASK = 0x7f,
+	MLX4_ACCESS_REG_METHOD_MASK = 0x7f,
+	MLX4_ACCESS_REG_LEN_MASK = 0x7ff
+};
+
+struct mlx4_access_reg {
+	__be16 constant1;
+	u8 status;
+	u8 resrvd1;
+	__be16 reg_id;
+	u8 method;
+	u8 constant2;
+	__be32 resrvd2[2];
+	__be16 len_const;
+	__be16 resrvd3;
+#define MLX4_ACCESS_REG_HEADER_SIZE (20)
+	u8 reg_data[MLX4_MAILBOX_SIZE-MLX4_ACCESS_REG_HEADER_SIZE];
+} __attribute__((__packed__));
+
+/**
+ * mlx4_ACCESS_REG - Generic access reg command.
+ * @dev: mlx4_dev.
+ * @reg_id: register ID to access.
+ * @method: Access method Read/Write.
+ * @reg_len: register length to Read/Write in bytes.
+ * @reg_data: reg_data pointer to Read/Write From/To.
+ *
+ * Access ConnectX registers FW command.
+ * Returns 0 on success and copies outbox mlx4_access_reg data
+ * field into reg_data or a negative error code.
+ */
+static int mlx4_ACCESS_REG(struct mlx4_dev *dev, u16 reg_id,
+			   enum mlx4_access_reg_method method,
+			   u16 reg_len, void *reg_data)
+{
+	struct mlx4_cmd_mailbox *inbox, *outbox;
+	struct mlx4_access_reg *inbuf, *outbuf;
+	int err;
+
+	inbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(inbox))
+		return PTR_ERR(inbox);
+
+	outbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(outbox)) {
+		mlx4_free_cmd_mailbox(dev, inbox);
+		return PTR_ERR(outbox);
+	}
+
+	inbuf = inbox->buf;
+	outbuf = outbox->buf;
+
+	inbuf->constant1 = cpu_to_be16(0x1<<11 | 0x4);
+	inbuf->constant2 = 0x1;
+	inbuf->reg_id = cpu_to_be16(reg_id);
+	inbuf->method = method & MLX4_ACCESS_REG_METHOD_MASK;
+
+	reg_len = min(reg_len, (u16)(sizeof(inbuf->reg_data)));
+	inbuf->len_const =
+		cpu_to_be16(((reg_len/4 + 1) & MLX4_ACCESS_REG_LEN_MASK) |
+			    ((0x3) << 12));
+
+	memcpy(inbuf->reg_data, reg_data, reg_len);
+	err = mlx4_cmd_box(dev, inbox->dma, outbox->dma, 0, 0,
+			   MLX4_CMD_ACCESS_REG, MLX4_CMD_TIME_CLASS_C,
+			   MLX4_CMD_NATIVE);
+	if (err)
+		goto out;
+
+	if (outbuf->status & MLX4_ACCESS_REG_STATUS_MASK) {
+		err = outbuf->status & MLX4_ACCESS_REG_STATUS_MASK;
+		mlx4_err(dev,
+			 "MLX4_CMD_ACCESS_REG(%x) returned REG status (%x)\n",
+			 reg_id, err);
+		goto out;
+	}
+
+	memcpy(reg_data, outbuf->reg_data, reg_len);
+out:
+	mlx4_free_cmd_mailbox(dev, inbox);
+	mlx4_free_cmd_mailbox(dev, outbox);
+	return err;
+}
+
+/* ConnectX registers IDs */
+enum mlx4_reg_id {
+	MLX4_REG_ID_PTYS = 0x5004,
+};
+
+/**
+ * mlx4_ACCESS_PTYS_REG - Access PTYs (Port Type and Speed)
+ * register
+ * @dev: mlx4_dev.
+ * @method: Access method Read/Write.
+ * @ptys_reg: PTYS register data pointer.
+ *
+ * Access ConnectX PTYS register, to Read/Write Port Type/Speed
+ * configuration
+ * Returns 0 on success or a negative error code.
+ */
+int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev,
+			 enum mlx4_access_reg_method method,
+			 struct mlx4_ptys_reg *ptys_reg)
+{
+	return mlx4_ACCESS_REG(dev, MLX4_REG_ID_PTYS,
+			       method, sizeof(*ptys_reg), ptys_reg);
+}
+EXPORT_SYMBOL_GPL(mlx4_ACCESS_PTYS_REG);
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index 379c02648ab3..ff5f5deb3dcf 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -67,6 +67,8 @@ enum {
 	MLX4_CMD_MAP_ICM_AUX	 = 0xffc,
 	MLX4_CMD_UNMAP_ICM_AUX	 = 0xffb,
 	MLX4_CMD_SET_ICM_SIZE	 = 0xffd,
+	MLX4_CMD_ACCESS_REG	 = 0x3b,
+
 	/*master notify fw on finish for slave's flr*/
 	MLX4_CMD_INFORM_FLR_DONE = 0x5b,
 	MLX4_CMD_GET_OP_REQ      = 0x59,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 73910daec317..181cd9fc90f2 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -186,7 +186,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS	= 1LL <<  10,
 	MLX4_DEV_CAP_FLAG2_MAD_DEMUX		= 1LL <<  11,
 	MLX4_DEV_CAP_FLAG2_CQE_STRIDE		= 1LL <<  12,
-	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13
+	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13,
+	MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL        = 1LL <<  14
 };
 
 enum {
@@ -1319,4 +1320,41 @@ static inline bool mlx4_low_memory_profile(void)
 	return is_kdump_kernel();
 }
 
+/* ACCESS REG commands */
+enum mlx4_access_reg_method {
+	MLX4_ACCESS_REG_QUERY = 0x1,
+	MLX4_ACCESS_REG_WRITE = 0x2,
+};
+
+/* ACCESS PTYS Reg command */
+enum mlx4_ptys_proto {
+	MLX4_PTYS_IB = 1<<0,
+	MLX4_PTYS_EN = 1<<2,
+};
+
+struct mlx4_ptys_reg {
+	u8 resrvd1;
+	u8 local_port;
+	u8 resrvd2;
+	u8 proto_mask;
+	__be32 resrvd3[2];
+	__be32 eth_proto_cap;
+	__be16 ib_width_cap;
+	__be16 ib_speed_cap;
+	__be32 resrvd4;
+	__be32 eth_proto_admin;
+	__be16 ib_width_admin;
+	__be16 ib_speed_admin;
+	__be32 resrvd5;
+	__be32 eth_proto_oper;
+	__be16 ib_width_oper;
+	__be16 ib_speed_oper;
+	__be32 resrvd6;
+	__be32 eth_proto_lp_adv;
+} __packed;
+
+int mlx4_ACCESS_PTYS_REG(struct mlx4_dev *dev,
+			 enum mlx4_access_reg_method method,
+			 struct mlx4_ptys_reg *ptys_reg);
+
 #endif /* MLX4_DEVICE_H */
-- 
cgit v1.2.3


From a53e3e8c1db547981e13d1ebf24a659bd4e87710 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Mon, 27 Oct 2014 11:37:38 +0200
Subject: net/mlx4_core: Add ethernet backplane autoneg device capability

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c | 7 ++++++-
 include/linux/mlx4/device.h             | 3 ++-
 2 files changed, 8 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 6fd9b8581e95..72289ef5ebbe 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -140,7 +140,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[11] = "MAD DEMUX (Secure-Host) support",
 		[12] = "Large cache line (>64B) CQE stride support",
 		[13] = "Large cache line (>64B) EQE stride support",
-		[14] = "Ethernet protocol control support"
+		[14] = "Ethernet protocol control support",
+		[15] = "Ethernet Backplane autoneg support"
 	};
 	int i;
 
@@ -575,6 +576,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET		0x94
 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET		0x98
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
+#define QUERY_DEV_CAP_ETH_BACKPL_OFFSET		0x9c
 #define QUERY_DEV_CAP_FW_REASSIGN_MAC		0x9d
 #define QUERY_DEV_CAP_VXLAN			0x9e
 #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET		0xb0
@@ -749,6 +751,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
 	MLX4_GET(dev_cap->reserved_lkey, outbox,
 		 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
+	MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET);
+	if (field32 & (1 << 0))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_FW_REASSIGN_MAC);
 	if (field & 1<<6)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_REASSIGN_MAC_EN;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 181cd9fc90f2..e4c136ebe79b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -187,7 +187,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_MAD_DEMUX		= 1LL <<  11,
 	MLX4_DEV_CAP_FLAG2_CQE_STRIDE		= 1LL <<  12,
 	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13,
-	MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL        = 1LL <<  14
+	MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL        = 1LL <<  14,
+	MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP	= 1LL <<  15
 };
 
 enum {
-- 
cgit v1.2.3


From 1efed2d06c703489342ab6af2951683e07509c99 Mon Sep 17 00:00:00 2001
From: Olivier Blin <olivier.blin@softathome.com>
Date: Fri, 24 Oct 2014 19:43:00 +0200
Subject: usbnet: add a callback for set_rx_mode

To delegate promiscuous mode and multicast filtering to the subdriver.

Signed-off-by: Olivier Blin <olivier.blin@softathome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/usbnet.c   | 20 ++++++++++++++++++++
 include/linux/usb/usbnet.h |  4 ++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/usb/usbnet.c b/drivers/net/usb/usbnet.c
index 20615bbd693b..3a6770a65d78 100644
--- a/drivers/net/usb/usbnet.c
+++ b/drivers/net/usb/usbnet.c
@@ -1052,6 +1052,21 @@ static void __handle_link_change(struct usbnet *dev)
 	clear_bit(EVENT_LINK_CHANGE, &dev->flags);
 }
 
+static void usbnet_set_rx_mode(struct net_device *net)
+{
+	struct usbnet		*dev = netdev_priv(net);
+
+	usbnet_defer_kevent(dev, EVENT_SET_RX_MODE);
+}
+
+static void __handle_set_rx_mode(struct usbnet *dev)
+{
+	if (dev->driver_info->set_rx_mode)
+		(dev->driver_info->set_rx_mode)(dev);
+
+	clear_bit(EVENT_SET_RX_MODE, &dev->flags);
+}
+
 /* work that cannot be done in interrupt context uses keventd.
  *
  * NOTE:  with 2.5 we could do more of this using completion callbacks,
@@ -1157,6 +1172,10 @@ skip_reset:
 	if (test_bit (EVENT_LINK_CHANGE, &dev->flags))
 		__handle_link_change(dev);
 
+	if (test_bit (EVENT_SET_RX_MODE, &dev->flags))
+		__handle_set_rx_mode(dev);
+
+
 	if (dev->flags)
 		netdev_dbg(dev->net, "kevent done, flags = 0x%lx\n", dev->flags);
 }
@@ -1525,6 +1544,7 @@ static const struct net_device_ops usbnet_netdev_ops = {
 	.ndo_stop		= usbnet_stop,
 	.ndo_start_xmit		= usbnet_start_xmit,
 	.ndo_tx_timeout		= usbnet_tx_timeout,
+	.ndo_set_rx_mode	= usbnet_set_rx_mode,
 	.ndo_change_mtu		= usbnet_change_mtu,
 	.ndo_set_mac_address 	= eth_mac_addr,
 	.ndo_validate_addr	= eth_validate_addr,
diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h
index 26088feb6608..d9a4905e01d0 100644
--- a/include/linux/usb/usbnet.h
+++ b/include/linux/usb/usbnet.h
@@ -78,6 +78,7 @@ struct usbnet {
 #		define EVENT_NO_RUNTIME_PM	9
 #		define EVENT_RX_KILL	10
 #		define EVENT_LINK_CHANGE	11
+#		define EVENT_SET_RX_MODE	12
 };
 
 static inline struct usb_driver *driver_of(struct usb_interface *intf)
@@ -159,6 +160,9 @@ struct driver_info {
 	/* called by minidriver when receiving indication */
 	void	(*indication)(struct usbnet *dev, void *ind, int indlen);
 
+	/* rx mode change (device changes address list filtering) */
+	void	(*set_rx_mode)(struct usbnet *dev);
+
 	/* for new devices, use the descriptor-reading code instead */
 	int		in;		/* rx endpoint */
 	int		out;		/* tx endpoint */
-- 
cgit v1.2.3


From 54ef6df3f3f1353d99c80c437259d317b2cd1cbd Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Mon, 27 Oct 2014 21:11:27 -0700
Subject: rcu: Provide counterpart to rcu_dereference() for non-RCU situations

Although rcu_dereference() and friends can be used in situations where
object lifetimes are being managed by something other than RCU, the
resulting sparse and lockdep-RCU noise can be annoying.  This commit
therefore supplies a lockless_dereference(), which provides the
protection for dereferences without the RCU-related debugging noise.

Reported-by: Al Viro <viro@ZenIV.linux.org.uk>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/rcupdate.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a4a819ffb2d1..53ff1a752d7e 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -616,6 +616,21 @@ static inline void rcu_preempt_sleep_check(void)
  */
 #define RCU_INITIALIZER(v) (typeof(*(v)) __force __rcu *)(v)
 
+/**
+ * lockless_dereference() - safely load a pointer for later dereference
+ * @p: The pointer to load
+ *
+ * Similar to rcu_dereference(), but for situations where the pointed-to
+ * object's lifetime is managed by something other than RCU.  That
+ * "something other" might be reference counting or simple immortality.
+ */
+#define lockless_dereference(p) \
+({ \
+	typeof(p) _________p1 = ACCESS_ONCE(p); \
+	smp_read_barrier_depends(); /* Dependency order vs. p above. */ \
+	(_________p1); \
+})
+
 /**
  * rcu_assign_pointer() - assign to RCU-protected pointer
  * @p: pointer to assign to
-- 
cgit v1.2.3


From d1b72cc6d8cb766c802fdc70a5edc2f0ba8a2b57 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Mon, 27 Oct 2014 15:42:01 +0100
Subject: overlayfs: fix lockdep misannotation

In an overlay directory that shadows an empty lower directory, say
/mnt/a/empty102, do:

 	touch /mnt/a/empty102/x
 	unlink /mnt/a/empty102/x
 	rmdir /mnt/a/empty102

It's actually harmless, but needs another level of nesting between
I_MUTEX_CHILD and I_MUTEX_NORMAL.

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Tested-by: David Howells <dhowells@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c             | 2 +-
 fs/overlayfs/readdir.c | 2 +-
 include/linux/fs.h     | 9 ++++++---
 3 files changed, 8 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index 42df664e95e5..922f27068c4c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2497,7 +2497,7 @@ struct dentry *lock_rename(struct dentry *p1, struct dentry *p2)
 	}
 
 	mutex_lock_nested(&p1->d_inode->i_mutex, I_MUTEX_PARENT);
-	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_CHILD);
+	mutex_lock_nested(&p2->d_inode->i_mutex, I_MUTEX_PARENT2);
 	return NULL;
 }
 EXPORT_SYMBOL(lock_rename);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 3fbf0d306e12..401f0840f5cc 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -571,7 +571,7 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
 {
 	struct ovl_cache_entry *p;
 
-	mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_PARENT);
+	mutex_lock_nested(&upper->d_inode->i_mutex, I_MUTEX_CHILD);
 	list_for_each_entry(p, list, l_node) {
 		struct dentry *dentry;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4e41a4a331bb..01036262095f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -639,11 +639,13 @@ static inline int inode_unhashed(struct inode *inode)
  * 2: child/target
  * 3: xattr
  * 4: second non-directory
- * The last is for certain operations (such as rename) which lock two
+ * 5: second parent (when locking independent directories in rename)
+ *
+ * I_MUTEX_NONDIR2 is for certain operations (such as rename) which lock two
  * non-directories at once.
  *
  * The locking order between these classes is
- * parent -> child -> normal -> xattr -> second non-directory
+ * parent[2] -> child -> grandchild -> normal -> xattr -> second non-directory
  */
 enum inode_i_mutex_lock_class
 {
@@ -651,7 +653,8 @@ enum inode_i_mutex_lock_class
 	I_MUTEX_PARENT,
 	I_MUTEX_CHILD,
 	I_MUTEX_XATTR,
-	I_MUTEX_NONDIR2
+	I_MUTEX_NONDIR2,
+	I_MUTEX_PARENT2,
 };
 
 void lock_two_nondirectories(struct inode *, struct inode*);
-- 
cgit v1.2.3


From cb1a5ab6ece7a37da4ac98ee26b0475b7c3ea79e Mon Sep 17 00:00:00 2001
From: "Martin K. Petersen" <martin.petersen@oracle.com>
Date: Tue, 28 Oct 2014 20:27:43 -0600
Subject: block: Fix merge logic when CONFIG_BLK_DEV_INTEGRITY is not defined

Commit 4eaf99beadce switched to returning bool and as a result reversed
the logic of the integrity merge checks.  However, the empty stubs used
when the block integrity code is compiled out were still returning
0. Make these stubs return "true".

Signed-off-by: Martin K. Petersen <martin.petersen@oracle.com>
Reported-by: Michael L. Semon <mlsemon35@gmail.com>
Tested-by: Michael L. Semon <mlsemon35@gmail.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 include/linux/blkdev.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 0207a78a8d82..6cbee8395f60 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1583,13 +1583,13 @@ static inline bool blk_integrity_merge_rq(struct request_queue *rq,
 					  struct request *r1,
 					  struct request *r2)
 {
-	return 0;
+	return true;
 }
 static inline bool blk_integrity_merge_bio(struct request_queue *rq,
 					   struct request *r,
 					   struct bio *b)
 {
-	return 0;
+	return true;
 }
 static inline bool blk_integrity_is_initialized(struct gendisk *g)
 {
-- 
cgit v1.2.3


From c241c5eea822344639898512780cff823fc7d730 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 30 Sep 2014 13:18:23 -0400
Subject: HID: fix merge from wacom into the HID tree

While merging wacom from the input to the hid tree, some
comments have been duplicated. We can also integrate the
test for Synaptics devices in the switch case below, so
it is clear that there will be only one place for such
quirks.

No functional changes are expected in this commit.

Signed-off-by: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c | 17 +++++++----------
 include/linux/hid.h    |  4 ----
 2 files changed, 7 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 73bd9e2e42bc..aef7c56ca5e7 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -779,16 +779,6 @@ static int hid_scan_report(struct hid_device *hid)
 	    (hid->group == HID_GROUP_MULTITOUCH))
 		hid->group = HID_GROUP_MULTITOUCH_WIN_8;
 
-	/*
-	* Vendor specific handlings
-	*/
-	if ((hid->vendor == USB_VENDOR_ID_SYNAPTICS) &&
-	    (hid->group == HID_GROUP_GENERIC) &&
-	    /* only bind to the mouse interface of composite USB devices */
-	    (hid->bus != BUS_USB || hid->type == HID_TYPE_USBMOUSE))
-		/* hid-rmi should take care of them, not hid-generic */
-		hid->group = HID_GROUP_RMI;
-
 	/*
 	 * Vendor specific handlings
 	 */
@@ -796,6 +786,13 @@ static int hid_scan_report(struct hid_device *hid)
 	case USB_VENDOR_ID_WACOM:
 		hid->group = HID_GROUP_WACOM;
 		break;
+	case USB_VENDOR_ID_SYNAPTICS:
+		if ((hid->group == HID_GROUP_GENERIC) &&
+		    (hid->bus != BUS_USB || hid->type == HID_TYPE_USBMOUSE))
+			/* hid-rmi should only bind to the mouse interface of
+			 * composite USB devices */
+			hid->group = HID_GROUP_RMI;
+		break;
 	}
 
 	vfree(parser);
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 78ea9bf941cd..5b1ff6110e25 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -312,10 +312,6 @@ struct hid_item {
  * Vendor specific HID device groups
  */
 #define HID_GROUP_RMI				0x0100
-
-/*
- * Vendor specific HID device groups
- */
 #define HID_GROUP_WACOM				0x0101
 
 /*
-- 
cgit v1.2.3


From d610274b0301e5ef35811fa736036d022f707564 Mon Sep 17 00:00:00 2001
From: Benjamin Tissoires <benjamin.tissoires@redhat.com>
Date: Tue, 30 Sep 2014 13:18:25 -0400
Subject: HID: logitech-dj: rely on hid groups to separate receivers from dj
 devices

Several benefits here:
- we can drop the macro is_dj_device: I never been really conviced by
  this macro as we could fall into a null pointer anytime. Anyway time
  showed that this never happened.
- we can simplify the hid driver logitech-djdevice, and make it aware
  of any new receiver VID/PID.
- we can use the Wireless PID of the DJ device as the product id of the
  hid device, this way the sysfs will differentiate between different
  DJ devices.

Signed-off-by: Benjamin Tisssoires <benjamin.tissoires@redhat.com>
Tested-by: Andrew de los Reyes <adlr@chromium.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-logitech-dj.c | 38 ++++++++++----------------------------
 drivers/hid/hid-logitech-dj.h | 10 ----------
 include/linux/hid.h           |  1 +
 3 files changed, 11 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-logitech-dj.c b/drivers/hid/hid-logitech-dj.c
index 71f569292cab..42d38d50afdf 100644
--- a/drivers/hid/hid-logitech-dj.c
+++ b/drivers/hid/hid-logitech-dj.c
@@ -256,11 +256,15 @@ static void logi_dj_recv_add_djhid_device(struct dj_receiver_dev *djrcv_dev,
 	dj_hiddev->dev.parent = &djrcv_hdev->dev;
 	dj_hiddev->bus = BUS_USB;
 	dj_hiddev->vendor = le16_to_cpu(usbdev->descriptor.idVendor);
-	dj_hiddev->product = le16_to_cpu(usbdev->descriptor.idProduct);
+	dj_hiddev->product =
+		(dj_report->report_params[DEVICE_PAIRED_PARAM_EQUAD_ID_MSB]
+									<< 8) |
+		dj_report->report_params[DEVICE_PAIRED_PARAM_EQUAD_ID_LSB];
 	snprintf(dj_hiddev->name, sizeof(dj_hiddev->name),
-		"Logitech Unifying Device. Wireless PID:%02x%02x",
-		dj_report->report_params[DEVICE_PAIRED_PARAM_EQUAD_ID_MSB],
-		dj_report->report_params[DEVICE_PAIRED_PARAM_EQUAD_ID_LSB]);
+		"Logitech Unifying Device. Wireless PID:%04x",
+		dj_hiddev->product);
+
+	dj_hiddev->group = HID_GROUP_LOGITECH_DJ_DEVICE;
 
 	usb_make_path(usbdev, dj_hiddev->phys, sizeof(dj_hiddev->phys));
 	snprintf(tmpstr, sizeof(tmpstr), ":%d", dj_report->device_index);
@@ -714,9 +718,6 @@ static int logi_dj_probe(struct hid_device *hdev,
 	struct dj_receiver_dev *djrcv_dev;
 	int retval;
 
-	if (is_dj_device((struct dj_device *)hdev->driver_data))
-		return -ENODEV;
-
 	dbg_hid("%s called for ifnum %d\n", __func__,
 		intf->cur_altsetting->desc.bInterfaceNumber);
 
@@ -869,22 +870,6 @@ static void logi_dj_remove(struct hid_device *hdev)
 	hid_set_drvdata(hdev, NULL);
 }
 
-static int logi_djdevice_probe(struct hid_device *hdev,
-			 const struct hid_device_id *id)
-{
-	int ret;
-	struct dj_device *dj_dev = hdev->driver_data;
-
-	if (!is_dj_device(dj_dev))
-		return -ENODEV;
-
-	ret = hid_parse(hdev);
-	if (!ret)
-		ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
-
-	return ret;
-}
-
 static const struct hid_device_id logi_dj_receivers[] = {
 	{HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
 		USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER)},
@@ -908,17 +893,14 @@ static struct hid_driver logi_djreceiver_driver = {
 
 
 static const struct hid_device_id logi_dj_devices[] = {
-	{HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
-		USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER)},
-	{HID_USB_DEVICE(USB_VENDOR_ID_LOGITECH,
-		USB_DEVICE_ID_LOGITECH_UNIFYING_RECEIVER_2)},
+	{ HID_DEVICE(BUS_USB, HID_GROUP_LOGITECH_DJ_DEVICE,
+		USB_VENDOR_ID_LOGITECH, HID_ANY_ID)},
 	{}
 };
 
 static struct hid_driver logi_djdevice_driver = {
 	.name = "logitech-djdevice",
 	.id_table = logi_dj_devices,
-	.probe = logi_djdevice_probe,
 };
 
 
diff --git a/drivers/hid/hid-logitech-dj.h b/drivers/hid/hid-logitech-dj.h
index daeb0aa4bee9..b1208c9c0573 100644
--- a/drivers/hid/hid-logitech-dj.h
+++ b/drivers/hid/hid-logitech-dj.h
@@ -112,14 +112,4 @@ struct dj_device {
 	u8 device_index;
 };
 
-/**
- * is_dj_device - know if the given dj_device is not the receiver.
- * @dj_dev: the dj device to test
- *
- * This macro tests if a struct dj_device pointer is a device created
- * by the bus enumarator.
- */
-#define is_dj_device(dj_dev) \
-	(&(dj_dev)->dj_receiver_dev->hdev->dev == (dj_dev)->hdev->dev.parent)
-
 #endif
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 5b1ff6110e25..7d6e0556302a 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -313,6 +313,7 @@ struct hid_item {
  */
 #define HID_GROUP_RMI				0x0100
 #define HID_GROUP_WACOM				0x0101
+#define HID_GROUP_LOGITECH_DJ_DEVICE		0x0102
 
 /*
  * This is the global environment of the parser. This information is
-- 
cgit v1.2.3


From de869917c663f44b5b032bff238bad22e15dda56 Mon Sep 17 00:00:00 2001
From: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Date: Thu, 2 Oct 2014 06:45:38 +0200
Subject: mod_devicetable.h: grammar fix in comment

Signed-off-by: Michael Opdenacker <michael.opdenacker@free-electrons.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/mod_devicetable.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h
index 44eeef0da186..745def862580 100644
--- a/include/linux/mod_devicetable.h
+++ b/include/linux/mod_devicetable.h
@@ -69,7 +69,7 @@ struct ieee1394_device_id {
  * @bDeviceClass: Class of device; numbers are assigned
  *	by the USB forum.  Products may choose to implement classes,
  *	or be vendor-specific.  Device classes specify behavior of all
- *	the interfaces on a devices.
+ *	the interfaces on a device.
  * @bDeviceSubClass: Subclass of device; associated with bDeviceClass.
  * @bDeviceProtocol: Protocol of device; associated with bDeviceClass.
  * @bInterfaceClass: Class of interface; numbers are assigned
-- 
cgit v1.2.3


From 9e3680b1750b9a62680b0262c9f438de98b77655 Mon Sep 17 00:00:00 2001
From: Heena Sirwani <heenasirwani@gmail.com>
Date: Wed, 29 Oct 2014 16:01:16 +0530
Subject: timekeeping: Provide fast accessor to the seconds part of
 CLOCK_MONOTONIC

This is the counterpart to get_seconds() based on CLOCK_MONOTONIC. The
use case for this interface are kernel internal coarse grained
timestamps which do neither require the nanoseconds fraction of
current time nor the CLOCK_REALTIME properties. Such timestamps can
currently only retrieved by calling ktime_get_ts64() and using the
tv_sec field of the returned timespec64. That's inefficient as it
involves the read of the clocksource, math operations and must be
protected by the timekeeper sequence counter.

To avoid the sequence counter protection we restrict the return value
to unsigned 32bit on 32bit machines. This covers ~136 years of uptime
and therefor an overflow is not expected to hit anytime soon.

To avoid math in the function we calculate the current seconds portion
of CLOCK_MONOTONIC when the timekeeper gets updated in
tk_update_ktime_data() similar to the CLOCK_REALTIME counterpart
xtime_sec.

[ tglx: Massaged changelog, simplified and commented the update
  	function, added docbook comment ]

Signed-off-by: Heena Sirwani <heenasirwani@gmail.com>
Reviewed-by: Arnd Bergman <arnd@arndb.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: opw-kernel@googlegroups.com
Link: http://lkml.kernel.org/r/da0b63f4bdf3478909f92becb35861197da3a905.1414578445.git.heenasirwani@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timekeeper_internal.h |  2 ++
 include/linux/timekeeping.h         |  1 +
 kernel/time/timekeeping.c           | 38 ++++++++++++++++++++++++++++++++-----
 3 files changed, 36 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h
index 95640dcd1899..05af9a334893 100644
--- a/include/linux/timekeeper_internal.h
+++ b/include/linux/timekeeper_internal.h
@@ -42,6 +42,7 @@ struct tk_read_base {
  * struct timekeeper - Structure holding internal timekeeping values.
  * @tkr:		The readout base structure
  * @xtime_sec:		Current CLOCK_REALTIME time in seconds
+ * @ktime_sec:		Current CLOCK_MONOTONIC time in seconds
  * @wall_to_monotonic:	CLOCK_REALTIME to CLOCK_MONOTONIC offset
  * @offs_real:		Offset clock monotonic -> clock realtime
  * @offs_boot:		Offset clock monotonic -> clock boottime
@@ -77,6 +78,7 @@ struct tk_read_base {
 struct timekeeper {
 	struct tk_read_base	tkr;
 	u64			xtime_sec;
+	unsigned long		ktime_sec;
 	struct timespec64	wall_to_monotonic;
 	ktime_t			offs_real;
 	ktime_t			offs_boot;
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 1caa6b04fdc5..115d55e11bc9 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -28,6 +28,7 @@ struct timespec __current_kernel_time(void);
 struct timespec get_monotonic_coarse(void);
 extern void getrawmonotonic(struct timespec *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
+extern time64_t ktime_get_seconds(void);
 
 extern int __getnstimeofday64(struct timespec64 *tv);
 extern void getnstimeofday64(struct timespec64 *tv);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index ec1791fae965..a693270efafb 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -417,7 +417,8 @@ EXPORT_SYMBOL_GPL(pvclock_gtod_unregister_notifier);
  */
 static inline void tk_update_ktime_data(struct timekeeper *tk)
 {
-	s64 nsec;
+	u64 seconds;
+	u32 nsec;
 
 	/*
 	 * The xtime based monotonic readout is:
@@ -426,13 +427,22 @@ static inline void tk_update_ktime_data(struct timekeeper *tk)
 	 *	nsec = base_mono + now();
 	 * ==> base_mono = (xtime_sec + wtm_sec) * 1e9 + wtm_nsec
 	 */
-	nsec = (s64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
-	nsec *= NSEC_PER_SEC;
-	nsec += tk->wall_to_monotonic.tv_nsec;
-	tk->tkr.base_mono = ns_to_ktime(nsec);
+	seconds = (u64)(tk->xtime_sec + tk->wall_to_monotonic.tv_sec);
+	nsec = (u32) tk->wall_to_monotonic.tv_nsec;
+	tk->tkr.base_mono = ns_to_ktime(seconds * NSEC_PER_SEC + nsec);
 
 	/* Update the monotonic raw base */
 	tk->base_raw = timespec64_to_ktime(tk->raw_time);
+
+	/*
+	 * The sum of the nanoseconds portions of xtime and
+	 * wall_to_monotonic can be greater/equal one second. Take
+	 * this into account before updating tk->ktime_sec.
+	 */
+	nsec += (u32)(tk->tkr.xtime_nsec >> tk->tkr.shift);
+	if (nsec >= NSEC_PER_SEC)
+		seconds++;
+	tk->ktime_sec = seconds;
 }
 
 /* must hold timekeeper_lock */
@@ -648,6 +658,24 @@ void ktime_get_ts64(struct timespec64 *ts)
 }
 EXPORT_SYMBOL_GPL(ktime_get_ts64);
 
+/**
+ * ktime_get_seconds - Get the seconds portion of CLOCK_MONOTONIC
+ *
+ * Returns the seconds portion of CLOCK_MONOTONIC with a single non
+ * serialized read. tk->ktime_sec is of type 'unsigned long' so this
+ * works on both 32 and 64 bit systems. On 32 bit systems the readout
+ * covers ~136 years of uptime which should be enough to prevent
+ * premature wrap arounds.
+ */
+time64_t ktime_get_seconds(void)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+
+	WARN_ON(timekeeping_suspended);
+	return tk->ktime_sec;
+}
+EXPORT_SYMBOL_GPL(ktime_get_seconds);
+
 #ifdef CONFIG_NTP_PPS
 
 /**
-- 
cgit v1.2.3


From dbe7aa622db96b5cd601f59d09c4f00b98b76079 Mon Sep 17 00:00:00 2001
From: Heena Sirwani <heenasirwani@gmail.com>
Date: Wed, 29 Oct 2014 16:01:50 +0530
Subject: timekeeping: Provide y2038 safe accessor to the seconds portion of
 CLOCK_REALTIME

ktime_get_real_seconds() is the replacement function for get_seconds()
returning the seconds portion of CLOCK_REALTIME in a time64_t. For
64bit the function is equivivalent to get_seconds(), but for 32bit it
protects the readout with the timekeeper sequence count. This is
required because 32-bit machines cannot access 64-bit tk->xtime_sec
variable atomically.

[tglx: Massaged changelog and added docbook comment ]

Signed-off-by: Heena Sirwani <heenasirwani@gmail.com>
Reviewed-by: Arnd Bergman <arnd@arndb.de>
Cc: John Stultz <john.stultz@linaro.org>
Cc: opw-kernel@googlegroups.com
Link: http://lkml.kernel.org/r/7adcfaa8962b8ad58785d9a2456c3f77d93c0ffb.1414578445.git.heenasirwani@gmail.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/timekeeping.h |  1 +
 kernel/time/timekeeping.c   | 30 ++++++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 115d55e11bc9..91454dea2bc6 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -29,6 +29,7 @@ struct timespec get_monotonic_coarse(void);
 extern void getrawmonotonic(struct timespec *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 extern time64_t ktime_get_seconds(void);
+extern time64_t ktime_get_real_seconds(void);
 
 extern int __getnstimeofday64(struct timespec64 *tv);
 extern void getnstimeofday64(struct timespec64 *tv);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index a693270efafb..0aef92a0a701 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -676,6 +676,36 @@ time64_t ktime_get_seconds(void)
 }
 EXPORT_SYMBOL_GPL(ktime_get_seconds);
 
+/**
+ * ktime_get_real_seconds - Get the seconds portion of CLOCK_REALTIME
+ *
+ * Returns the wall clock seconds since 1970. This replaces the
+ * get_seconds() interface which is not y2038 safe on 32bit systems.
+ *
+ * For 64bit systems the fast access to tk->xtime_sec is preserved. On
+ * 32bit systems the access must be protected with the sequence
+ * counter to provide "atomic" access to the 64bit tk->xtime_sec
+ * value.
+ */
+time64_t ktime_get_real_seconds(void)
+{
+	struct timekeeper *tk = &tk_core.timekeeper;
+	time64_t seconds;
+	unsigned int seq;
+
+	if (IS_ENABLED(CONFIG_64BIT))
+		return tk->xtime_sec;
+
+	do {
+		seq = read_seqcount_begin(&tk_core.seq);
+		seconds = tk->xtime_sec;
+
+	} while (read_seqcount_retry(&tk_core.seq, seq));
+
+	return seconds;
+}
+EXPORT_SYMBOL_GPL(ktime_get_real_seconds);
+
 #ifdef CONFIG_NTP_PPS
 
 /**
-- 
cgit v1.2.3


From 74c450521dd8d245b982da62592a18aa6f88b045 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 29 Oct 2014 11:14:52 -0600
Subject: blk-mq: add a 'list' parameter to ->queue_rq()

Since we have the notion of a 'last' request in a chain, we can use
this to have the hardware optimize the issuing of requests. Add
a list_head parameter to queue_rq that the driver can use to
temporarily store hw commands for issue when 'last' is true. If we
are doing a chain of requests, pass in a NULL list for the first
request to force issue of that immediately, then batch the remainder
for deferred issue until the last request has been sent.

Instead of adding yet another argument to the hot ->queue_rq path,
encapsulate the passed arguments in a blk_mq_queue_data structure.
This is passed as a constant, and has been tested as faster than
passing 4 (or even 3) args through ->queue_rq. Update drivers for
the new ->queue_rq() prototype. There are no functional changes
in this patch for drivers - if they don't use the passed in list,
then they will just queue requests individually like before.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c                    | 29 +++++++++++++++++++++++++++--
 drivers/block/mtip32xx/mtip32xx.c |  5 +++--
 drivers/block/null_blk.c          | 10 +++++-----
 drivers/block/virtio_blk.c        |  7 ++++---
 drivers/scsi/scsi_lib.c           |  5 +++--
 include/linux/blk-mq.h            |  8 +++++++-
 6 files changed, 49 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 68929bad9a6a..7e5303820452 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -680,6 +680,8 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 	struct request_queue *q = hctx->queue;
 	struct request *rq;
 	LIST_HEAD(rq_list);
+	LIST_HEAD(driver_list);
+	struct list_head *dptr;
 	int queued;
 
 	WARN_ON(!cpumask_test_cpu(raw_smp_processor_id(), hctx->cpumask));
@@ -705,17 +707,28 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 		spin_unlock(&hctx->lock);
 	}
 
+	/*
+	 * Start off with dptr being NULL, so we start the first request
+	 * immediately, even if we have more pending.
+	 */
+	dptr = NULL;
+
 	/*
 	 * Now process all the entries, sending them to the driver.
 	 */
 	queued = 0;
 	while (!list_empty(&rq_list)) {
+		struct blk_mq_queue_data bd;
 		int ret;
 
 		rq = list_first_entry(&rq_list, struct request, queuelist);
 		list_del_init(&rq->queuelist);
 
-		ret = q->mq_ops->queue_rq(hctx, rq, list_empty(&rq_list));
+		bd.rq = rq;
+		bd.list = dptr;
+		bd.last = list_empty(&rq_list);
+
+		ret = q->mq_ops->queue_rq(hctx, &bd);
 		switch (ret) {
 		case BLK_MQ_RQ_QUEUE_OK:
 			queued++;
@@ -734,6 +747,13 @@ static void __blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx)
 
 		if (ret == BLK_MQ_RQ_QUEUE_BUSY)
 			break;
+
+		/*
+		 * We've done the first request. If we have more than 1
+		 * left in the list, set dptr to defer issue.
+		 */
+		if (!dptr && rq_list.next != rq_list.prev)
+			dptr = &driver_list;
 	}
 
 	if (!queued)
@@ -1153,6 +1173,11 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 	}
 
 	if (is_sync) {
+		struct blk_mq_queue_data bd = {
+			.rq = rq,
+			.list = NULL,
+			.last = 1
+		};
 		int ret;
 
 		blk_mq_bio_to_request(rq, bio);
@@ -1162,7 +1187,7 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		 * error (busy), just add it to our list as we previously
 		 * would have done
 		 */
-		ret = q->mq_ops->queue_rq(data.hctx, rq, true);
+		ret = q->mq_ops->queue_rq(data.hctx, &bd);
 		if (ret == BLK_MQ_RQ_QUEUE_OK)
 			goto done;
 		else {
diff --git a/drivers/block/mtip32xx/mtip32xx.c b/drivers/block/mtip32xx/mtip32xx.c
index 1bd5f523f8fd..3bd7ca9853a8 100644
--- a/drivers/block/mtip32xx/mtip32xx.c
+++ b/drivers/block/mtip32xx/mtip32xx.c
@@ -3775,9 +3775,10 @@ static bool mtip_check_unal_depth(struct blk_mq_hw_ctx *hctx,
 	return false;
 }
 
-static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
-		bool last)
+static int mtip_queue_rq(struct blk_mq_hw_ctx *hctx,
+			 const struct blk_mq_queue_data *bd)
 {
+	struct request *rq = bd->rq;
 	int ret;
 
 	if (unlikely(mtip_check_unal_depth(hctx, rq)))
diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 2671a3f02f0c..8433bc8ead3d 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -313,15 +313,15 @@ static void null_request_fn(struct request_queue *q)
 	}
 }
 
-static int null_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *rq,
-		bool last)
+static int null_queue_rq(struct blk_mq_hw_ctx *hctx,
+			 const struct blk_mq_queue_data *bd)
 {
-	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(rq);
+	struct nullb_cmd *cmd = blk_mq_rq_to_pdu(bd->rq);
 
-	cmd->rq = rq;
+	cmd->rq = bd->rq;
 	cmd->nq = hctx->driver_data;
 
-	blk_mq_start_request(rq);
+	blk_mq_start_request(bd->rq);
 
 	null_handle_cmd(cmd);
 	return BLK_MQ_RQ_QUEUE_OK;
diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c
index c6a27d54ad62..cecd3f983e49 100644
--- a/drivers/block/virtio_blk.c
+++ b/drivers/block/virtio_blk.c
@@ -158,10 +158,11 @@ static void virtblk_done(struct virtqueue *vq)
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 }
 
-static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
-		bool last)
+static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx,
+			   const struct blk_mq_queue_data *bd)
 {
 	struct virtio_blk *vblk = hctx->queue->queuedata;
+	struct request *req = bd->rq;
 	struct virtblk_req *vbr = blk_mq_rq_to_pdu(req);
 	unsigned long flags;
 	unsigned int num;
@@ -222,7 +223,7 @@ static int virtio_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
 		return BLK_MQ_RQ_QUEUE_ERROR;
 	}
 
-	if (last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
+	if (bd->last && virtqueue_kick_prepare(vblk->vqs[qid].vq))
 		notify = true;
 	spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags);
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9eff8a375132..161dcc93ac75 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1858,9 +1858,10 @@ static void scsi_mq_done(struct scsi_cmnd *cmd)
 	blk_mq_complete_request(cmd->request);
 }
 
-static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
-		bool last)
+static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx,
+			 const struct blk_mq_queue_data *bd)
 {
+	struct request *req = bd->rq;
 	struct request_queue *q = req->q;
 	struct scsi_device *sdev = q->queuedata;
 	struct Scsi_Host *shost = sdev->host;
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c9be1589415a..be01d7a687d4 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -79,7 +79,13 @@ struct blk_mq_tag_set {
 	struct list_head	tag_list;
 };
 
-typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool);
+struct blk_mq_queue_data {
+	struct request *rq;
+	struct list_head *list;
+	bool last;
+};
+
+typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *);
 typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int);
 typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool);
 typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int);
-- 
cgit v1.2.3


From e167dfb53cb85fde7b15f644e9dbef7ba31896b6 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Wed, 29 Oct 2014 11:18:26 -0600
Subject: blk-mq: add BLK_MQ_F_DEFER_ISSUE support flag

Drivers can now tell blk-mq if they take advantage of the deferred
issue through 'last' or not. If they do, don't do queue-direct
for sync IO. This is a preparation patch for the nvme conversion.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 7 ++++++-
 include/linux/blk-mq.h | 1 +
 2 files changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 7e5303820452..b355b5957cd7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -1172,7 +1172,12 @@ static void blk_mq_make_request(struct request_queue *q, struct bio *bio)
 		goto run_queue;
 	}
 
-	if (is_sync) {
+	/*
+	 * If the driver supports defer issued based on 'last', then
+	 * queue it up like normal since we can potentially save some
+	 * CPU this way.
+	 */
+	if (is_sync && !(data.hctx->flags & BLK_MQ_F_DEFER_ISSUE)) {
 		struct blk_mq_queue_data bd = {
 			.rq = rq,
 			.list = NULL,
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index be01d7a687d4..c3b64ec5321e 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -146,6 +146,7 @@ enum {
 	BLK_MQ_F_TAG_SHARED	= 1 << 1,
 	BLK_MQ_F_SG_MERGE	= 1 << 2,
 	BLK_MQ_F_SYSFS_UP	= 1 << 3,
+	BLK_MQ_F_DEFER_ISSUE	= 1 << 4,
 
 	BLK_MQ_S_STOPPED	= 0,
 	BLK_MQ_S_TAG_ACTIVE	= 1,
-- 
cgit v1.2.3


From 28f6569ab7d036cd4ee94c26bb76dc1b3f3fc056 Mon Sep 17 00:00:00 2001
From: Pranith Kumar <bobby.prani@gmail.com>
Date: Mon, 22 Sep 2014 14:00:48 -0400
Subject: rcu: Remove redundant TREE_PREEMPT_RCU config option

PREEMPT_RCU and TREE_PREEMPT_RCU serve the same function after
TINY_PREEMPT_RCU has been removed. This patch removes TREE_PREEMPT_RCU
and uses PREEMPT_RCU config option in its place.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 Documentation/RCU/rcu.txt                          |  4 ++--
 Documentation/RCU/stallwarn.txt                    |  8 ++++----
 Documentation/RCU/trace.txt                        |  4 ++--
 Documentation/RCU/whatisRCU.txt                    |  2 +-
 include/linux/init_task.h                          |  2 +-
 include/linux/rcupdate.h                           |  6 +++---
 include/linux/sched.h                              |  4 ++--
 include/trace/events/rcu.h                         |  4 ++--
 init/Kconfig                                       | 22 ++++++++--------------
 kernel/rcu/Makefile                                |  2 +-
 kernel/rcu/tree.h                                  | 10 +++++-----
 kernel/rcu/tree_plugin.h                           |  6 +++---
 kernel/rcu/update.c                                |  2 +-
 lib/Kconfig.debug                                  |  2 +-
 .../selftests/rcutorture/configs/rcu/TREE01        |  2 +-
 .../selftests/rcutorture/configs/rcu/TREE02        |  2 +-
 .../selftests/rcutorture/configs/rcu/TREE02-T      |  2 +-
 .../selftests/rcutorture/configs/rcu/TREE03        |  2 +-
 .../selftests/rcutorture/configs/rcu/TREE08        |  2 +-
 .../selftests/rcutorture/configs/rcu/TREE08-T      |  2 +-
 .../selftests/rcutorture/configs/rcu/TREE09        |  2 +-
 .../configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP           |  2 +-
 .../configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp           |  2 +-
 .../configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp           |  2 +-
 .../configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP           |  2 +-
 .../configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp           |  2 +-
 .../configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP          |  2 +-
 .../configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp          |  2 +-
 .../configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp          |  2 +-
 .../configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP          |  2 +-
 .../configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp          |  2 +-
 .../configs/rcu/v3.12/P6---t-nh-SD-smp-hp          |  2 +-
 .../configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP          |  2 +-
 .../configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all      |  2 +-
 .../configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none     |  2 +-
 .../configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp          |  2 +-
 .../configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP           |  2 +-
 .../configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp           |  2 +-
 .../configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp           |  2 +-
 .../configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP           |  2 +-
 .../configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp           |  2 +-
 .../configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP           |  2 +-
 .../configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp           |  2 +-
 .../configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp           |  2 +-
 .../configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP           |  2 +-
 .../configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp           |  2 +-
 .../testing/selftests/rcutorture/doc/TINY_RCU.txt  |  2 +-
 .../selftests/rcutorture/doc/TREE_RCU-kconfig.txt  | 10 +++++-----
 48 files changed, 74 insertions(+), 80 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/RCU/rcu.txt b/Documentation/RCU/rcu.txt
index bf778332a28f..745f429fda79 100644
--- a/Documentation/RCU/rcu.txt
+++ b/Documentation/RCU/rcu.txt
@@ -36,7 +36,7 @@ o	How can the updater tell when a grace period has completed
 	executed in user mode, or executed in the idle loop, we can
 	safely free up that item.
 
-	Preemptible variants of RCU (CONFIG_TREE_PREEMPT_RCU) get the
+	Preemptible variants of RCU (CONFIG_PREEMPT_RCU) get the
 	same effect, but require that the readers manipulate CPU-local
 	counters.  These counters allow limited types of blocking within
 	RCU read-side critical sections.  SRCU also uses CPU-local
@@ -81,7 +81,7 @@ o	I hear that RCU is patented?  What is with that?
 o	I hear that RCU needs work in order to support realtime kernels?
 
 	This work is largely completed.  Realtime-friendly RCU can be
-	enabled via the CONFIG_TREE_PREEMPT_RCU kernel configuration
+	enabled via the CONFIG_PREEMPT_RCU kernel configuration
 	parameter.  However, work is in progress for enabling priority
 	boosting of preempted RCU read-side critical sections.	This is
 	needed if you have CPU-bound realtime threads.
diff --git a/Documentation/RCU/stallwarn.txt b/Documentation/RCU/stallwarn.txt
index ef5a2fd4ff70..783e0720994d 100644
--- a/Documentation/RCU/stallwarn.txt
+++ b/Documentation/RCU/stallwarn.txt
@@ -77,7 +77,7 @@ This message indicates that CPU 5 detected that it was causing a stall,
 and that the stall was affecting RCU-sched.  This message will normally be
 followed by a stack dump of the offending CPU.  On TREE_RCU kernel builds,
 RCU and RCU-sched are implemented by the same underlying mechanism,
-while on TREE_PREEMPT_RCU kernel builds, RCU is instead implemented
+while on PREEMPT_RCU kernel builds, RCU is instead implemented
 by rcu_preempt_state.
 
 On the other hand, if the offending CPU fails to print out a stall-warning
@@ -89,7 +89,7 @@ INFO: rcu_bh_state detected stalls on CPUs/tasks: { 3 5 } (detected by 2, 2502 j
 This message indicates that CPU 2 detected that CPUs 3 and 5 were both
 causing stalls, and that the stall was affecting RCU-bh.  This message
 will normally be followed by stack dumps for each CPU.  Please note that
-TREE_PREEMPT_RCU builds can be stalled by tasks as well as by CPUs,
+PREEMPT_RCU builds can be stalled by tasks as well as by CPUs,
 and that the tasks will be indicated by PID, for example, "P3421".
 It is even possible for a rcu_preempt_state stall to be caused by both
 CPUs -and- tasks, in which case the offending CPUs and tasks will all
@@ -205,10 +205,10 @@ o	A CPU-bound real-time task in a CONFIG_PREEMPT kernel, which might
 o	A CPU-bound real-time task in a CONFIG_PREEMPT_RT kernel that
 	is running at a higher priority than the RCU softirq threads.
 	This will prevent RCU callbacks from ever being invoked,
-	and in a CONFIG_TREE_PREEMPT_RCU kernel will further prevent
+	and in a CONFIG_PREEMPT_RCU kernel will further prevent
 	RCU grace periods from ever completing.  Either way, the
 	system will eventually run out of memory and hang.  In the
-	CONFIG_TREE_PREEMPT_RCU case, you might see stall-warning
+	CONFIG_PREEMPT_RCU case, you might see stall-warning
 	messages.
 
 o	A hardware or software issue shuts off the scheduler-clock
diff --git a/Documentation/RCU/trace.txt b/Documentation/RCU/trace.txt
index 910870b15acd..b63b9bb3bc0c 100644
--- a/Documentation/RCU/trace.txt
+++ b/Documentation/RCU/trace.txt
@@ -8,7 +8,7 @@ The following sections describe the debugfs files and formats, first
 for rcutree and next for rcutiny.
 
 
-CONFIG_TREE_RCU and CONFIG_TREE_PREEMPT_RCU debugfs Files and Formats
+CONFIG_TREE_RCU and CONFIG_PREEMPT_RCU debugfs Files and Formats
 
 These implementations of RCU provide several debugfs directories under the
 top-level directory "rcu":
@@ -18,7 +18,7 @@ rcu/rcu_preempt
 rcu/rcu_sched
 
 Each directory contains files for the corresponding flavor of RCU.
-Note that rcu/rcu_preempt is only present for CONFIG_TREE_PREEMPT_RCU.
+Note that rcu/rcu_preempt is only present for CONFIG_PREEMPT_RCU.
 For CONFIG_TREE_RCU, the RCU flavor maps onto the RCU-sched flavor,
 so that activity for both appears in rcu/rcu_sched.
 
diff --git a/Documentation/RCU/whatisRCU.txt b/Documentation/RCU/whatisRCU.txt
index e48c57f1943b..88dfce182f66 100644
--- a/Documentation/RCU/whatisRCU.txt
+++ b/Documentation/RCU/whatisRCU.txt
@@ -137,7 +137,7 @@ rcu_read_lock()
 	Used by a reader to inform the reclaimer that the reader is
 	entering an RCU read-side critical section.  It is illegal
 	to block while in an RCU read-side critical section, though
-	kernels built with CONFIG_TREE_PREEMPT_RCU can preempt RCU
+	kernels built with CONFIG_PREEMPT_RCU can preempt RCU
 	read-side critical sections.  Any RCU-protected data structure
 	accessed during an RCU read-side critical section is guaranteed to
 	remain unreclaimed for the full duration of that critical section.
diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 77fc43f8fb72..d996aef8044f 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -102,7 +102,7 @@ extern struct group_info init_groups;
 #define INIT_IDS
 #endif
 
-#ifdef CONFIG_TREE_PREEMPT_RCU
+#ifdef CONFIG_PREEMPT_RCU
 #define INIT_TASK_RCU_TREE_PREEMPT()					\
 	.rcu_blocked_node = NULL,
 #else
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a4a819ffb2d1..295bb4595de6 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -57,7 +57,7 @@ enum rcutorture_type {
 	INVALID_RCU_FLAVOR
 };
 
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
 void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags,
 			    unsigned long *gpnum, unsigned long *completed);
 void rcutorture_record_test_transition(void);
@@ -365,7 +365,7 @@ typedef void call_rcu_func_t(struct rcu_head *head,
 			     void (*func)(struct rcu_head *head));
 void wait_rcu_gp(call_rcu_func_t crf);
 
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
 #include <linux/rcutree.h>
 #elif defined(CONFIG_TINY_RCU)
 #include <linux/rcutiny.h>
@@ -852,7 +852,7 @@ static inline void rcu_preempt_sleep_check(void)
  *
  * In non-preemptible RCU implementations (TREE_RCU and TINY_RCU),
  * it is illegal to block while in an RCU read-side critical section.
- * In preemptible RCU implementations (TREE_PREEMPT_RCU) in CONFIG_PREEMPT
+ * In preemptible RCU implementations (PREEMPT_RCU) in CONFIG_PREEMPT
  * kernel builds, RCU read-side critical sections may be preempted,
  * but explicit blocking is illegal.  Finally, in preemptible RCU
  * implementations in real-time (with -rt patchset) kernel builds, RCU
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 5e344bbe63ec..706a9f744909 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1278,9 +1278,9 @@ struct task_struct {
 	union rcu_special rcu_read_unlock_special;
 	struct list_head rcu_node_entry;
 #endif /* #ifdef CONFIG_PREEMPT_RCU */
-#ifdef CONFIG_TREE_PREEMPT_RCU
+#ifdef CONFIG_PREEMPT_RCU
 	struct rcu_node *rcu_blocked_node;
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
 #ifdef CONFIG_TASKS_RCU
 	unsigned long rcu_tasks_nvcsw;
 	bool rcu_tasks_holdout;
diff --git a/include/trace/events/rcu.h b/include/trace/events/rcu.h
index e335e7d8c6c2..c78e88ce5ea3 100644
--- a/include/trace/events/rcu.h
+++ b/include/trace/events/rcu.h
@@ -36,7 +36,7 @@ TRACE_EVENT(rcu_utilization,
 
 #ifdef CONFIG_RCU_TRACE
 
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU)
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU)
 
 /*
  * Tracepoint for grace-period events.  Takes a string identifying the
@@ -345,7 +345,7 @@ TRACE_EVENT(rcu_fqs,
 		  __entry->cpu, __entry->qsevent)
 );
 
-#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) */
+#endif /* #if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) */
 
 /*
  * Tracepoint for dyntick-idle entry/exit events.  These take a string
diff --git a/init/Kconfig b/init/Kconfig
index 15c299cc7c1e..5ac596e2cb4b 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -477,7 +477,7 @@ config TREE_RCU
 	  thousands of CPUs.  It also scales down nicely to
 	  smaller systems.
 
-config TREE_PREEMPT_RCU
+config PREEMPT_RCU
 	bool "Preemptible tree-based hierarchical RCU"
 	depends on PREEMPT
 	select IRQ_WORK
@@ -501,12 +501,6 @@ config TINY_RCU
 
 endchoice
 
-config PREEMPT_RCU
-	def_bool TREE_PREEMPT_RCU
-	help
-	  This option enables preemptible-RCU code that is common between
-	  TREE_PREEMPT_RCU and, in the old days, TINY_PREEMPT_RCU.
-
 config TASKS_RCU
 	bool "Task_based RCU implementation using voluntary context switch"
 	default n
@@ -518,7 +512,7 @@ config TASKS_RCU
 	  If unsure, say N.
 
 config RCU_STALL_COMMON
-	def_bool ( TREE_RCU || TREE_PREEMPT_RCU || RCU_TRACE )
+	def_bool ( TREE_RCU || PREEMPT_RCU || RCU_TRACE )
 	help
 	  This option enables RCU CPU stall code that is common between
 	  the TINY and TREE variants of RCU.  The purpose is to allow
@@ -576,7 +570,7 @@ config RCU_FANOUT
 	int "Tree-based hierarchical RCU fanout value"
 	range 2 64 if 64BIT
 	range 2 32 if !64BIT
-	depends on TREE_RCU || TREE_PREEMPT_RCU
+	depends on TREE_RCU || PREEMPT_RCU
 	default 64 if 64BIT
 	default 32 if !64BIT
 	help
@@ -596,7 +590,7 @@ config RCU_FANOUT_LEAF
 	int "Tree-based hierarchical RCU leaf-level fanout value"
 	range 2 RCU_FANOUT if 64BIT
 	range 2 RCU_FANOUT if !64BIT
-	depends on TREE_RCU || TREE_PREEMPT_RCU
+	depends on TREE_RCU || PREEMPT_RCU
 	default 16
 	help
 	  This option controls the leaf-level fanout of hierarchical
@@ -621,7 +615,7 @@ config RCU_FANOUT_LEAF
 
 config RCU_FANOUT_EXACT
 	bool "Disable tree-based hierarchical RCU auto-balancing"
-	depends on TREE_RCU || TREE_PREEMPT_RCU
+	depends on TREE_RCU || PREEMPT_RCU
 	default n
 	help
 	  This option forces use of the exact RCU_FANOUT value specified,
@@ -652,11 +646,11 @@ config RCU_FAST_NO_HZ
 	  Say N if you are unsure.
 
 config TREE_RCU_TRACE
-	def_bool RCU_TRACE && ( TREE_RCU || TREE_PREEMPT_RCU )
+	def_bool RCU_TRACE && ( TREE_RCU || PREEMPT_RCU )
 	select DEBUG_FS
 	help
 	  This option provides tracing for the TREE_RCU and
-	  TREE_PREEMPT_RCU implementations, permitting Makefile to
+	  PREEMPT_RCU implementations, permitting Makefile to
 	  trivially select kernel/rcutree_trace.c.
 
 config RCU_BOOST
@@ -716,7 +710,7 @@ config RCU_BOOST_DELAY
 
 config RCU_NOCB_CPU
 	bool "Offload RCU callback processing from boot-selected CPUs"
-	depends on TREE_RCU || TREE_PREEMPT_RCU
+	depends on TREE_RCU || PREEMPT_RCU
 	default n
 	help
 	  Use this option to reduce OS jitter for aggressive HPC or
diff --git a/kernel/rcu/Makefile b/kernel/rcu/Makefile
index 807ccfbf69b3..e6fae503d1bc 100644
--- a/kernel/rcu/Makefile
+++ b/kernel/rcu/Makefile
@@ -1,6 +1,6 @@
 obj-y += update.o srcu.o
 obj-$(CONFIG_RCU_TORTURE_TEST) += rcutorture.o
 obj-$(CONFIG_TREE_RCU) += tree.o
-obj-$(CONFIG_TREE_PREEMPT_RCU) += tree.o
+obj-$(CONFIG_PREEMPT_RCU) += tree.o
 obj-$(CONFIG_TREE_RCU_TRACE) += tree_trace.o
 obj-$(CONFIG_TINY_RCU) += tiny.o
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index bbdc45d8d74f..66cde5109c7d 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -139,7 +139,7 @@ struct rcu_node {
 	unsigned long expmask;	/* Groups that have ->blkd_tasks */
 				/*  elements that need to drain to allow the */
 				/*  current expedited grace period to */
-				/*  complete (only for TREE_PREEMPT_RCU). */
+				/*  complete (only for PREEMPT_RCU). */
 	unsigned long qsmaskinit;
 				/* Per-GP initial value for qsmask & expmask. */
 	unsigned long grpmask;	/* Mask to apply to parent qsmask. */
@@ -530,10 +530,10 @@ DECLARE_PER_CPU(struct rcu_data, rcu_sched_data);
 extern struct rcu_state rcu_bh_state;
 DECLARE_PER_CPU(struct rcu_data, rcu_bh_data);
 
-#ifdef CONFIG_TREE_PREEMPT_RCU
+#ifdef CONFIG_PREEMPT_RCU
 extern struct rcu_state rcu_preempt_state;
 DECLARE_PER_CPU(struct rcu_data, rcu_preempt_data);
-#endif /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#endif /* #ifdef CONFIG_PREEMPT_RCU */
 
 #ifdef CONFIG_RCU_BOOST
 DECLARE_PER_CPU(unsigned int, rcu_cpu_kthread_status);
@@ -563,10 +563,10 @@ static int rcu_preempt_offline_tasks(struct rcu_state *rsp,
 #endif /* #ifdef CONFIG_HOTPLUG_CPU */
 static void rcu_preempt_check_callbacks(int cpu);
 void call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu));
-#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU)
+#if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU)
 static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
 			       bool wake);
-#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_TREE_PREEMPT_RCU) */
+#endif /* #if defined(CONFIG_HOTPLUG_CPU) || defined(CONFIG_PREEMPT_RCU) */
 static void __init __rcu_init_preempt(void);
 static void rcu_initiate_boost(struct rcu_node *rnp, unsigned long flags);
 static void rcu_preempt_boost_start_gp(struct rcu_node *rnp);
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 344f0e661515..6d07fb402e84 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -100,7 +100,7 @@ static void __init rcu_bootup_announce_oddness(void)
 #endif
 }
 
-#ifdef CONFIG_TREE_PREEMPT_RCU
+#ifdef CONFIG_PREEMPT_RCU
 
 RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
 static struct rcu_state *rcu_state_p = &rcu_preempt_state;
@@ -932,7 +932,7 @@ void exit_rcu(void)
 	__rcu_read_unlock();
 }
 
-#else /* #ifdef CONFIG_TREE_PREEMPT_RCU */
+#else /* #ifdef CONFIG_PREEMPT_RCU */
 
 static struct rcu_state *rcu_state_p = &rcu_sched_state;
 
@@ -1083,7 +1083,7 @@ void exit_rcu(void)
 {
 }
 
-#endif /* #else #ifdef CONFIG_TREE_PREEMPT_RCU */
+#endif /* #else #ifdef CONFIG_PREEMPT_RCU */
 
 #ifdef CONFIG_RCU_BOOST
 
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 3ef8ba58694e..27a5b174b2a4 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -306,7 +306,7 @@ struct debug_obj_descr rcuhead_debug_descr = {
 EXPORT_SYMBOL_GPL(rcuhead_debug_descr);
 #endif /* #ifdef CONFIG_DEBUG_OBJECTS_RCU_HEAD */
 
-#if defined(CONFIG_TREE_RCU) || defined(CONFIG_TREE_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
+#if defined(CONFIG_TREE_RCU) || defined(CONFIG_PREEMPT_RCU) || defined(CONFIG_RCU_TRACE)
 void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp,
 			       unsigned long secs,
 			       unsigned long c_old, unsigned long c)
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 4e35a5d767ed..12e7b020539f 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1252,7 +1252,7 @@ config RCU_CPU_STALL_VERBOSE
 
 config RCU_CPU_STALL_INFO
 	bool "Print additional diagnostics on RCU CPU stall"
-	depends on (TREE_RCU || TREE_PREEMPT_RCU) && DEBUG_KERNEL
+	depends on (TREE_RCU || PREEMPT_RCU) && DEBUG_KERNEL
 	default n
 	help
 	  For each stalled CPU that is aware of the current RCU grace
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
index 38e3895759dd..69f8e5878a58 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01
@@ -2,7 +2,7 @@ CONFIG_SMP=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
index ea119ba2f7d4..fc29c6297197 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02
@@ -3,7 +3,7 @@ CONFIG_NR_CPUS=8
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
index 19cf9485f48a..778ec567a472 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T
@@ -3,7 +3,7 @@ CONFIG_NR_CPUS=8
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
index b95f62efd6f2..a022f0332303 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03
@@ -3,7 +3,7 @@ CONFIG_NR_CPUS=8
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_HZ_PERIODIC=y
 CONFIG_NO_HZ_IDLE=n
 CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
index 69a2e255bf98..6f8609d2d072 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08
@@ -3,7 +3,7 @@ CONFIG_NR_CPUS=16
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
index a0f32fb8f17e..4e55bee2d3f8 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T
@@ -3,7 +3,7 @@ CONFIG_NR_CPUS=16
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
index b7a62a540ad1..47e2ecd1844f 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09
@@ -3,7 +3,7 @@ CONFIG_NR_CPUS=1
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_HZ_PERIODIC=n
 CONFIG_NO_HZ_IDLE=y
 CONFIG_NO_HZ_FULL=n
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP
index f72402d7c13d..dd7bd4d2d85a 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P1-S-T-NH-SD-SMP-HP
@@ -9,7 +9,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp
index 0f3b667d2a9f..81d484362820 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P2-2-t-nh-sd-SMP-hp
@@ -10,7 +10,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp
index b035e141bf2a..16e22b7815c2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P3-3-T-nh-SD-SMP-hp
@@ -10,7 +10,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP
index 3ccf6a9447f5..ea7ed3b9a2da 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P4-A-t-NH-sd-SMP-HP
@@ -8,7 +8,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_RT_MUTEXES=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp
index a55c00877fe4..24f4d5e40c80 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v0.0/P5-U-T-NH-sd-SMP-hp
@@ -11,7 +11,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP
index 9647c44cf4b7..688066322c89 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P1-S-T-NH-SD-SMP-HP
@@ -10,7 +10,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp
index 0f3b667d2a9f..81d484362820 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P2-2-t-nh-sd-SMP-hp
@@ -10,7 +10,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp
index b035e141bf2a..16e22b7815c2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P3-3-T-nh-SD-SMP-hp
@@ -10,7 +10,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP
index 3ccf6a9447f5..ea7ed3b9a2da 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P4-A-t-NH-sd-SMP-HP
@@ -8,7 +8,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_RT_MUTEXES=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp
index a55c00877fe4..24f4d5e40c80 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P5-U-T-NH-sd-SMP-hp
@@ -11,7 +11,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P6---t-nh-SD-smp-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P6---t-nh-SD-smp-hp
index f4c9175828bf..1be59e4c8ba7 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P6---t-nh-SD-smp-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P6---t-nh-SD-smp-hp
@@ -8,7 +8,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-CONFIG_TREE_PREEMPT_RCU=y
+CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP
index 77a8c5b75763..b49ef7463099 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP
@@ -14,7 +14,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all
index 0eecebc6e95f..a55a6ac447d8 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-all
@@ -14,7 +14,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none
index 0eecebc6e95f..a55a6ac447d8 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-HP-none
@@ -14,7 +14,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp
index 588bc70420cd..3134f46b2f87 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.12/P7-4-T-NH-SD-SMP-hp
@@ -14,7 +14,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_PROVE_LOCKING=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP
index 9647c44cf4b7..688066322c89 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P1-S-T-NH-SD-SMP-HP
@@ -10,7 +10,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp
index 0f3b667d2a9f..81d484362820 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P2-2-t-nh-sd-SMP-hp
@@ -10,7 +10,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp
index b035e141bf2a..16e22b7815c2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P3-3-T-nh-SD-SMP-hp
@@ -10,7 +10,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP
index 3ccf6a9447f5..ea7ed3b9a2da 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P4-A-t-NH-sd-SMP-HP
@@ -8,7 +8,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_RT_MUTEXES=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp
index a55c00877fe4..24f4d5e40c80 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.3/P5-U-T-NH-sd-SMP-hp
@@ -11,7 +11,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP
index 9647c44cf4b7..688066322c89 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P1-S-T-NH-SD-SMP-HP
@@ -10,7 +10,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp
index 0f3b667d2a9f..81d484362820 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P2-2-t-nh-sd-SMP-hp
@@ -10,7 +10,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp
index b035e141bf2a..16e22b7815c2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P3-3-T-nh-SD-SMP-hp
@@ -10,7 +10,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_SYSFS_DEPRECATED_V2=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP
index 3ccf6a9447f5..ea7ed3b9a2da 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P4-A-t-NH-sd-SMP-HP
@@ -8,7 +8,7 @@ CONFIG_HOTPLUG_CPU=y
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_RCU_TORTURE_TEST=m
 CONFIG_MODULE_UNLOAD=y
 CONFIG_RT_MUTEXES=y
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp
index a55c00877fe4..24f4d5e40c80 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp
+++ b/tools/testing/selftests/rcutorture/configs/rcu/v3.5/P5-U-T-NH-sd-SMP-hp
@@ -11,7 +11,7 @@ CONFIG_HIBERNATION=n
 CONFIG_PREEMPT_NONE=n
 CONFIG_PREEMPT_VOLUNTARY=n
 CONFIG_PREEMPT=y
-#CHECK#CONFIG_TREE_PREEMPT_RCU=y
+#CHECK#CONFIG_PREEMPT_RCU=y
 CONFIG_DEBUG_KERNEL=y
 CONFIG_DEBUG_OBJECTS=y
 CONFIG_DEBUG_OBJECTS_RCU_HEAD=y
diff --git a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
index 28db67b54e55..9ef33a743b73 100644
--- a/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
+++ b/tools/testing/selftests/rcutorture/doc/TINY_RCU.txt
@@ -34,7 +34,7 @@ CONFIG_PREEMPT
 CONFIG_PREEMPT_RCU
 CONFIG_SMP
 CONFIG_TINY_RCU
-CONFIG_TREE_PREEMPT_RCU
+CONFIG_PREEMPT_RCU
 CONFIG_TREE_RCU
 
 	All forced by CONFIG_TINY_RCU.
diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
index ab6e7b4103ac..f613755a90bd 100644
--- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
+++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt
@@ -1,5 +1,5 @@
 This document gives a brief rationale for the TREE_RCU-related test
-cases, a group that includes TREE_PREEMPT_RCU.
+cases, a group that includes PREEMPT_RCU.
 
 
 Kconfig Parameters:
@@ -14,7 +14,7 @@ CONFIG_NO_HZ_FULL_SYSIDLE -- Do one.
 CONFIG_PREEMPT -- Do half.  (First three and #8.)
 CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not.
 CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING.
-CONFIG_RCU_BOOST -- one of TREE_PREEMPT_RCU.
+CONFIG_RCU_BOOST -- one of PREEMPT_RCU.
 CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing.
 CONFIG_RCU_CPU_STALL_INFO -- Do one.
 CONFIG_RCU_CPU_STALL_VERBOSE -- do one with and without _INFO.
@@ -27,7 +27,7 @@ CONFIG_RCU_NOCB_CPU_ALL -- Do one.
 CONFIG_RCU_NOCB_CPU_NONE -- Do one.
 CONFIG_RCU_NOCB_CPU_ZERO -- Do one.
 CONFIG_RCU_TRACE -- Do half.
-CONFIG_SMP -- Need one !SMP for TREE_PREEMPT_RCU.
+CONFIG_SMP -- Need one !SMP for PREEMPT_RCU.
 RCU-bh: Do one with PREEMPT and one with !PREEMPT.
 RCU-sched: Do one with PREEMPT but not BOOST.
 
@@ -77,7 +77,7 @@ CONFIG_RCU_CPU_STALL_TIMEOUT
 
 CONFIG_RCU_STALL_COMMON
 
-	Implied by TREE_RCU and TREE_PREEMPT_RCU.
+	Implied by TREE_RCU and PREEMPT_RCU.
 
 CONFIG_RCU_TORTURE_TEST
 CONFIG_RCU_TORTURE_TEST_RUNNABLE
@@ -88,7 +88,7 @@ CONFIG_RCU_USER_QS
 
 	Redundant with CONFIG_NO_HZ_FULL.
 
-CONFIG_TREE_PREEMPT_RCU
+CONFIG_PREEMPT_RCU
 CONFIG_TREE_RCU
 
 	These are controlled by CONFIG_PREEMPT.
-- 
cgit v1.2.3


From dca145ffaa8d39ea1904491ac81b92b7049372c0 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 27 Oct 2014 21:45:24 -0700
Subject: tcp: allow for bigger reordering level

While testing upcoming Yaogong patch (converting out of order queue
into an RB tree), I hit the max reordering level of linux TCP stack.

Reordering level was limited to 127 for no good reason, and some
network setups [1] can easily reach this limit and get limited
throughput.

Allow a new max limit of 300, and add a sysctl to allow admins to even
allow bigger (or lower) values if needed.

[1] Aggregation of links, per packet load balancing, fabrics not doing
 deep packet inspections, alternative TCP congestion modules...

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yaogong Wang <wygivan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.txt   |  7 ++-----
 Documentation/networking/ip-sysctl.txt | 10 +++++++++-
 include/linux/tcp.h                    |  4 ++--
 include/net/tcp.h                      |  4 +---
 net/ipv4/sysctl_net_ipv4.c             |  7 +++++++
 net/ipv4/tcp_input.c                   |  3 ++-
 6 files changed, 23 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
index eeb5b2e97bed..83bf4986baea 100644
--- a/Documentation/networking/bonding.txt
+++ b/Documentation/networking/bonding.txt
@@ -2230,11 +2230,8 @@ balance-rr: This mode is the only mode that will permit a single
 
 	It is possible to adjust TCP/IP's congestion limits by
 	altering the net.ipv4.tcp_reordering sysctl parameter.  The
-	usual default value is 3, and the maximum useful value is 127.
-	For a four interface balance-rr bond, expect that a single
-	TCP/IP stream will utilize no more than approximately 2.3
-	interface's worth of throughput, even after adjusting
-	tcp_reordering.
+	usual default value is 3. But keep in mind TCP stack is able
+	to automatically increase this when it detects reorders.
 
 	Note that the fraction of packets that will be delivered out of
 	order is highly variable, and is unlikely to be zero.  The level
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 0307e2875f21..9028b879a97b 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -376,9 +376,17 @@ tcp_orphan_retries - INTEGER
 	may consume significant resources. Cf. tcp_max_orphans.
 
 tcp_reordering - INTEGER
-	Maximal reordering of packets in a TCP stream.
+	Initial reordering level of packets in a TCP stream.
+	TCP stack can then dynamically adjust flow reordering level
+	between this initial value and tcp_max_reordering
 	Default: 3
 
+tcp_max_reordering - INTEGER
+	Maximal reordering level of packets in a TCP stream.
+	300 is a fairly conservative value, but you might increase it
+	if paths are using per packet load balancing (like bonding rr mode)
+	Default: 300
+
 tcp_retrans_collapse - BOOLEAN
 	Bug-to-bug compatibility with some broken printers.
 	On retransmit try to send bigger packets to work around bugs in
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index c2dee7deefa8..f566b8567892 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -204,10 +204,10 @@ struct tcp_sock {
 
 	u16	urg_data;	/* Saved octet of OOB data and control flags */
 	u8	ecn_flags;	/* ECN status bits.			*/
-	u8	reordering;	/* Packet reordering metric.		*/
+	u8	keepalive_probes; /* num of allowed keep alive probes	*/
+	u32	reordering;	/* Packet reordering metric.		*/
 	u32	snd_up;		/* Urgent pointer		*/
 
-	u8	keepalive_probes; /* num of allowed keep alive probes	*/
 /*
  *      Options received (usually on last packet, some only on SYN packets).
  */
diff --git a/include/net/tcp.h b/include/net/tcp.h
index c73fc145ee45..3a35b1500359 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -70,9 +70,6 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 /* After receiving this amount of duplicate ACKs fast retransmit starts. */
 #define TCP_FASTRETRANS_THRESH 3
 
-/* Maximal reordering. */
-#define TCP_MAX_REORDERING	127
-
 /* Maximal number of ACKs sent quickly to accelerate slow-start. */
 #define TCP_MAX_QUICKACKS	16U
 
@@ -252,6 +249,7 @@ extern int sysctl_tcp_abort_on_overflow;
 extern int sysctl_tcp_max_orphans;
 extern int sysctl_tcp_fack;
 extern int sysctl_tcp_reordering;
+extern int sysctl_tcp_max_reordering;
 extern int sysctl_tcp_dsack;
 extern long sysctl_tcp_mem[3];
 extern int sysctl_tcp_wmem[3];
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index b3c53c8b331e..e0ee384a448f 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -495,6 +495,13 @@ static struct ctl_table ipv4_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "tcp_max_reordering",
+		.data		= &sysctl_tcp_max_reordering,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
 	{
 		.procname	= "tcp_dsack",
 		.data		= &sysctl_tcp_dsack,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index a12b455928e5..9a18cdd633f3 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -81,6 +81,7 @@ int sysctl_tcp_window_scaling __read_mostly = 1;
 int sysctl_tcp_sack __read_mostly = 1;
 int sysctl_tcp_fack __read_mostly = 1;
 int sysctl_tcp_reordering __read_mostly = TCP_FASTRETRANS_THRESH;
+int sysctl_tcp_max_reordering __read_mostly = 300;
 EXPORT_SYMBOL(sysctl_tcp_reordering);
 int sysctl_tcp_dsack __read_mostly = 1;
 int sysctl_tcp_app_win __read_mostly = 31;
@@ -833,7 +834,7 @@ static void tcp_update_reordering(struct sock *sk, const int metric,
 	if (metric > tp->reordering) {
 		int mib_idx;
 
-		tp->reordering = min(TCP_MAX_REORDERING, metric);
+		tp->reordering = min(sysctl_tcp_max_reordering, metric);
 
 		/* This exciting event is worth to be remembered. 8) */
 		if (ts)
-- 
cgit v1.2.3


From 7fd2561e4ebdd070ebba6d3326c4c5b13942323f Mon Sep 17 00:00:00 2001
From: Erik Kline <ek@google.com>
Date: Tue, 28 Oct 2014 18:11:14 +0900
Subject: net: ipv6: Add a sysctl to make optimistic addresses useful
 candidates

Add a sysctl that causes an interface's optimistic addresses
to be considered equivalent to other non-deprecated addresses
for source address selection purposes.  Preferred addresses
will still take precedence over optimistic addresses, subject
to other ranking in the source address selection algorithm.

This is useful where different interfaces are connected to
different networks from different ISPs (e.g., a cell network
and a home wifi network).

The current behaviour complies with RFC 3484/6724, and it
makes sense if the host has only one interface, or has
multiple interfaces on the same network (same or cooperating
administrative domain(s), but not in the multiple distinct
networks case.

For example, if a mobile device has an IPv6 address on an LTE
network and then connects to IPv6-enabled wifi, while the wifi
IPv6 address is undergoing DAD, IPv6 connections will try use
the wifi default route with the LTE IPv6 address, and will get
stuck until they time out.

Also, because optimistic nodes can receive frames, issue
an RTM_NEWADDR as soon as DAD starts (with the IFA_F_OPTIMSTIC
flag appropriately set).  A second RTM_NEWADDR is sent if DAD
completes (the address flags have changed), otherwise an
RTM_DELADDR is sent.

Also: add an entry in ip-sysctl.txt for optimistic_dad.

Signed-off-by: Erik Kline <ek@google.com>
Acked-by: Lorenzo Colitti <lorenzo@google.com>
Acked-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 13 ++++++++++
 include/linux/ipv6.h                   |  1 +
 include/uapi/linux/ipv6.h              |  1 +
 net/ipv6/addrconf.c                    | 46 ++++++++++++++++++++++++++++++++--
 4 files changed, 59 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 9028b879a97b..368e3251c553 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1460,6 +1460,19 @@ suppress_frag_ndisc - INTEGER
 	1 - (default) discard fragmented neighbor discovery packets
 	0 - allow fragmented neighbor discovery packets
 
+optimistic_dad - BOOLEAN
+	Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
+		0: disabled (default)
+		1: enabled
+
+use_optimistic - BOOLEAN
+	If enabled, do not classify optimistic addresses as deprecated during
+	source address selection.  Preferred addresses will still be chosen
+	before optimistic addresses, subject to other ranking in the source
+	address selection algorithm.
+		0: disabled (default)
+		1: enabled
+
 icmp/*:
 ratelimit - INTEGER
 	Limit the maximal rates for sending ICMPv6 packets.
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index ff560537dd61..7121a2e97ce2 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -42,6 +42,7 @@ struct ipv6_devconf {
 	__s32		accept_ra_from_local;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	__s32		optimistic_dad;
+	__s32		use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 	__s32		mc_forwarding;
diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h
index efa2666f4b8a..e863d088b9a5 100644
--- a/include/uapi/linux/ipv6.h
+++ b/include/uapi/linux/ipv6.h
@@ -164,6 +164,7 @@ enum {
 	DEVCONF_MLDV2_UNSOLICITED_REPORT_INTERVAL,
 	DEVCONF_SUPPRESS_FRAG_NDISC,
 	DEVCONF_ACCEPT_RA_FROM_LOCAL,
+	DEVCONF_USE_OPTIMISTIC,
 	DEVCONF_MAX
 };
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 50b95b2db87c..8d12b7c7018f 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1170,6 +1170,9 @@ enum {
 	IPV6_SADDR_RULE_PRIVACY,
 	IPV6_SADDR_RULE_ORCHID,
 	IPV6_SADDR_RULE_PREFIX,
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	IPV6_SADDR_RULE_NOT_OPTIMISTIC,
+#endif
 	IPV6_SADDR_RULE_MAX
 };
 
@@ -1197,6 +1200,15 @@ static inline int ipv6_saddr_preferred(int type)
 	return 0;
 }
 
+static inline bool ipv6_use_optimistic_addr(struct inet6_dev *idev)
+{
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	return idev && idev->cnf.optimistic_dad && idev->cnf.use_optimistic;
+#else
+	return false;
+#endif
+}
+
 static int ipv6_get_saddr_eval(struct net *net,
 			       struct ipv6_saddr_score *score,
 			       struct ipv6_saddr_dst *dst,
@@ -1257,10 +1269,16 @@ static int ipv6_get_saddr_eval(struct net *net,
 		score->scopedist = ret;
 		break;
 	case IPV6_SADDR_RULE_PREFERRED:
+	    {
 		/* Rule 3: Avoid deprecated and optimistic addresses */
+		u8 avoid = IFA_F_DEPRECATED;
+
+		if (!ipv6_use_optimistic_addr(score->ifa->idev))
+			avoid |= IFA_F_OPTIMISTIC;
 		ret = ipv6_saddr_preferred(score->addr_type) ||
-		      !(score->ifa->flags & (IFA_F_DEPRECATED|IFA_F_OPTIMISTIC));
+		      !(score->ifa->flags & avoid);
 		break;
+	    }
 #ifdef CONFIG_IPV6_MIP6
 	case IPV6_SADDR_RULE_HOA:
 	    {
@@ -1306,6 +1324,14 @@ static int ipv6_get_saddr_eval(struct net *net,
 			ret = score->ifa->prefix_len;
 		score->matchlen = ret;
 		break;
+#ifdef CONFIG_IPV6_OPTIMISTIC_DAD
+	case IPV6_SADDR_RULE_NOT_OPTIMISTIC:
+		/* Optimistic addresses still have lower precedence than other
+		 * preferred addresses.
+		 */
+		ret = !(score->ifa->flags & IFA_F_OPTIMISTIC);
+		break;
+#endif
 	default:
 		ret = 0;
 	}
@@ -3222,8 +3248,15 @@ static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
 	 * Optimistic nodes can start receiving
 	 * Frames right away
 	 */
-	if (ifp->flags & IFA_F_OPTIMISTIC)
+	if (ifp->flags & IFA_F_OPTIMISTIC) {
 		ip6_ins_rt(ifp->rt);
+		if (ipv6_use_optimistic_addr(idev)) {
+			/* Because optimistic nodes can use this address,
+			 * notify listeners. If DAD fails, RTM_DELADDR is sent.
+			 */
+			ipv6_ifa_notify(RTM_NEWADDR, ifp);
+		}
+	}
 
 	addrconf_dad_kick(ifp);
 out:
@@ -4330,6 +4363,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf,
 	array[DEVCONF_ACCEPT_SOURCE_ROUTE] = cnf->accept_source_route;
 #ifdef CONFIG_IPV6_OPTIMISTIC_DAD
 	array[DEVCONF_OPTIMISTIC_DAD] = cnf->optimistic_dad;
+	array[DEVCONF_USE_OPTIMISTIC] = cnf->use_optimistic;
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 	array[DEVCONF_MC_FORWARDING] = cnf->mc_forwarding;
@@ -5155,6 +5189,14 @@ static struct addrconf_sysctl_table
 			.proc_handler   = proc_dointvec,
 
 		},
+		{
+			.procname       = "use_optimistic",
+			.data           = &ipv6_devconf.use_optimistic,
+			.maxlen         = sizeof(int),
+			.mode           = 0644,
+			.proc_handler   = proc_dointvec,
+
+		},
 #endif
 #ifdef CONFIG_IPV6_MROUTE
 		{
-- 
cgit v1.2.3


From bc9ad166e38ae1cdcb5323a8aa45dff834d68bfa Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 28 Oct 2014 18:05:13 -0700
Subject: net: introduce napi_schedule_irqoff()

napi_schedule() can be called from any context and has to mask hard
irqs.

Add a variant that can only be called from hard interrupts handlers
or when irqs are already masked.

Many NIC drivers can use it from their hard IRQ handler instead of
generic variant.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 13 +++++++++++++
 net/core/dev.c            | 15 ++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 74fd5d37f15a..c85e06512246 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -386,6 +386,7 @@ typedef enum rx_handler_result rx_handler_result_t;
 typedef rx_handler_result_t rx_handler_func_t(struct sk_buff **pskb);
 
 void __napi_schedule(struct napi_struct *n);
+void __napi_schedule_irqoff(struct napi_struct *n);
 
 static inline bool napi_disable_pending(struct napi_struct *n)
 {
@@ -420,6 +421,18 @@ static inline void napi_schedule(struct napi_struct *n)
 		__napi_schedule(n);
 }
 
+/**
+ *	napi_schedule_irqoff - schedule NAPI poll
+ *	@n: napi context
+ *
+ * Variant of napi_schedule(), assuming hard irqs are masked.
+ */
+static inline void napi_schedule_irqoff(struct napi_struct *n)
+{
+	if (napi_schedule_prep(n))
+		__napi_schedule_irqoff(n);
+}
+
 /* Try to reschedule poll. Called by dev->poll() after napi_complete().  */
 static inline bool napi_reschedule(struct napi_struct *napi)
 {
diff --git a/net/core/dev.c b/net/core/dev.c
index b793e3521a36..759940cdf896 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4372,7 +4372,8 @@ static int process_backlog(struct napi_struct *napi, int quota)
  * __napi_schedule - schedule for receive
  * @n: entry to schedule
  *
- * The entry's receive function will be scheduled to run
+ * The entry's receive function will be scheduled to run.
+ * Consider using __napi_schedule_irqoff() if hard irqs are masked.
  */
 void __napi_schedule(struct napi_struct *n)
 {
@@ -4384,6 +4385,18 @@ void __napi_schedule(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_schedule);
 
+/**
+ * __napi_schedule_irqoff - schedule for receive
+ * @n: entry to schedule
+ *
+ * Variant of __napi_schedule() assuming hard irqs are masked
+ */
+void __napi_schedule_irqoff(struct napi_struct *n)
+{
+	____napi_schedule(this_cpu_ptr(&softnet_data), n);
+}
+EXPORT_SYMBOL(__napi_schedule_irqoff);
+
 void __napi_complete(struct napi_struct *n)
 {
 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
-- 
cgit v1.2.3


From 47f29df7db78ee4fcdb104cf36918d987ddd0278 Mon Sep 17 00:00:00 2001
From: Marek Szyprowski <m.szyprowski@samsung.com>
Date: Wed, 29 Oct 2014 14:50:29 -0700
Subject: drivers: of: add return value to of_reserved_mem_device_init()

Driver calling of_reserved_mem_device_init() might be interested if the
initialization has been successful or not, so add support for returning
error code.

This fixes a build warining caused by commit 7bfa5ab6fa1b ("drivers:
dma-coherent: add initialization from device tree"), which has been
merged without this change and without fixing function return value.

Fixes: 7bfa5ab6fa1b1 ("drivers: dma-coherent: add initialization from device tree")
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Josh Cartwright <joshc@codeaurora.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Russell King <rmk+kernel@arm.linux.org.uk>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/base/dma-contiguous.c   |  3 ++-
 drivers/of/of_reserved_mem.c    | 14 +++++++++-----
 include/linux/of_reserved_mem.h |  9 ++++++---
 3 files changed, 17 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c
index 473ff4892401..950fff9ce453 100644
--- a/drivers/base/dma-contiguous.c
+++ b/drivers/base/dma-contiguous.c
@@ -223,9 +223,10 @@ bool dma_release_from_contiguous(struct device *dev, struct page *pages,
 #undef pr_fmt
 #define pr_fmt(fmt) fmt
 
-static void rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
+static int rmem_cma_device_init(struct reserved_mem *rmem, struct device *dev)
 {
 	dev_set_cma_area(dev, rmem->priv);
+	return 0;
 }
 
 static void rmem_cma_device_release(struct reserved_mem *rmem,
diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c
index 59fb12e84e6b..dc566b38645f 100644
--- a/drivers/of/of_reserved_mem.c
+++ b/drivers/of/of_reserved_mem.c
@@ -243,23 +243,27 @@ static inline struct reserved_mem *__find_rmem(struct device_node *node)
  * This function assign memory region pointed by "memory-region" device tree
  * property to the given device.
  */
-void of_reserved_mem_device_init(struct device *dev)
+int of_reserved_mem_device_init(struct device *dev)
 {
 	struct reserved_mem *rmem;
 	struct device_node *np;
+	int ret;
 
 	np = of_parse_phandle(dev->of_node, "memory-region", 0);
 	if (!np)
-		return;
+		return -ENODEV;
 
 	rmem = __find_rmem(np);
 	of_node_put(np);
 
 	if (!rmem || !rmem->ops || !rmem->ops->device_init)
-		return;
+		return -EINVAL;
+
+	ret = rmem->ops->device_init(rmem, dev);
+	if (ret == 0)
+		dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
 
-	rmem->ops->device_init(rmem, dev);
-	dev_info(dev, "assigned reserved memory node %s\n", rmem->name);
+	return ret;
 }
 
 /**
diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h
index 5b5efae09135..ad2f67054372 100644
--- a/include/linux/of_reserved_mem.h
+++ b/include/linux/of_reserved_mem.h
@@ -16,7 +16,7 @@ struct reserved_mem {
 };
 
 struct reserved_mem_ops {
-	void	(*device_init)(struct reserved_mem *rmem,
+	int	(*device_init)(struct reserved_mem *rmem,
 			       struct device *dev);
 	void	(*device_release)(struct reserved_mem *rmem,
 				  struct device *dev);
@@ -28,14 +28,17 @@ typedef int (*reservedmem_of_init_fn)(struct reserved_mem *rmem);
 	_OF_DECLARE(reservedmem, name, compat, init, reservedmem_of_init_fn)
 
 #ifdef CONFIG_OF_RESERVED_MEM
-void of_reserved_mem_device_init(struct device *dev);
+int of_reserved_mem_device_init(struct device *dev);
 void of_reserved_mem_device_release(struct device *dev);
 
 void fdt_init_reserved_mem(void);
 void fdt_reserved_mem_save_node(unsigned long node, const char *uname,
 			       phys_addr_t base, phys_addr_t size);
 #else
-static inline void of_reserved_mem_device_init(struct device *dev) { }
+static inline int of_reserved_mem_device_init(struct device *dev)
+{
+	return -ENOSYS;
+}
 static inline void of_reserved_mem_device_release(struct device *pdev) { }
 
 static inline void fdt_init_reserved_mem(void) { }
-- 
cgit v1.2.3


From 6d50e60cd2edb5a57154db5a6f64eef5aa59b751 Mon Sep 17 00:00:00 2001
From: David Rientjes <rientjes@google.com>
Date: Wed, 29 Oct 2014 14:50:31 -0700
Subject: mm, thp: fix collapsing of hugepages on madvise

If an anonymous mapping is not allowed to fault thp memory and then
madvise(MADV_HUGEPAGE) is used after fault, khugepaged will never
collapse this memory into thp memory.

This occurs because the madvise(2) handler for thp, hugepage_madvise(),
clears VM_NOHUGEPAGE on the stack and it isn't stored in vma->vm_flags
until the final action of madvise_behavior().  This causes the
khugepaged_enter_vma_merge() to be a no-op in hugepage_madvise() when
the vma had previously had VM_NOHUGEPAGE set.

Fix this by passing the correct vma flags to the khugepaged mm slot
handler.  There's no chance khugepaged can run on this vma until after
madvise_behavior() returns since we hold mm->mmap_sem.

It would be possible to clear VM_NOHUGEPAGE directly from vma->vm_flags
in hugepage_advise(), but I didn't want to introduce special case
behavior into madvise_behavior().  I think it's best to just let it
always set vma->vm_flags itself.

Signed-off-by: David Rientjes <rientjes@google.com>
Reported-by: Suleiman Souhlal <suleiman@google.com>
Cc: "Kirill A. Shutemov" <kirill.shutemov@linux.intel.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/khugepaged.h | 17 ++++++++++-------
 mm/huge_memory.c           | 11 ++++++-----
 mm/mmap.c                  |  8 ++++----
 3 files changed, 20 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/khugepaged.h b/include/linux/khugepaged.h
index 6b394f0b5148..eeb307985715 100644
--- a/include/linux/khugepaged.h
+++ b/include/linux/khugepaged.h
@@ -6,7 +6,8 @@
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
 extern int __khugepaged_enter(struct mm_struct *mm);
 extern void __khugepaged_exit(struct mm_struct *mm);
-extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma);
+extern int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+				      unsigned long vm_flags);
 
 #define khugepaged_enabled()					       \
 	(transparent_hugepage_flags &				       \
@@ -35,13 +36,13 @@ static inline void khugepaged_exit(struct mm_struct *mm)
 		__khugepaged_exit(mm);
 }
 
-static inline int khugepaged_enter(struct vm_area_struct *vma)
+static inline int khugepaged_enter(struct vm_area_struct *vma,
+				   unsigned long vm_flags)
 {
 	if (!test_bit(MMF_VM_HUGEPAGE, &vma->vm_mm->flags))
 		if ((khugepaged_always() ||
-		     (khugepaged_req_madv() &&
-		      vma->vm_flags & VM_HUGEPAGE)) &&
-		    !(vma->vm_flags & VM_NOHUGEPAGE))
+		     (khugepaged_req_madv() && (vm_flags & VM_HUGEPAGE))) &&
+		    !(vm_flags & VM_NOHUGEPAGE))
 			if (__khugepaged_enter(vma->vm_mm))
 				return -ENOMEM;
 	return 0;
@@ -54,11 +55,13 @@ static inline int khugepaged_fork(struct mm_struct *mm, struct mm_struct *oldmm)
 static inline void khugepaged_exit(struct mm_struct *mm)
 {
 }
-static inline int khugepaged_enter(struct vm_area_struct *vma)
+static inline int khugepaged_enter(struct vm_area_struct *vma,
+				   unsigned long vm_flags)
 {
 	return 0;
 }
-static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+static inline int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+					     unsigned long vm_flags)
 {
 	return 0;
 }
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 780d12c000e9..de984159cf0b 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -803,7 +803,7 @@ int do_huge_pmd_anonymous_page(struct mm_struct *mm, struct vm_area_struct *vma,
 		return VM_FAULT_FALLBACK;
 	if (unlikely(anon_vma_prepare(vma)))
 		return VM_FAULT_OOM;
-	if (unlikely(khugepaged_enter(vma)))
+	if (unlikely(khugepaged_enter(vma, vma->vm_flags)))
 		return VM_FAULT_OOM;
 	if (!(flags & FAULT_FLAG_WRITE) &&
 			transparent_hugepage_use_zero_page()) {
@@ -1970,7 +1970,7 @@ int hugepage_madvise(struct vm_area_struct *vma,
 		 * register it here without waiting a page fault that
 		 * may not happen any time soon.
 		 */
-		if (unlikely(khugepaged_enter_vma_merge(vma)))
+		if (unlikely(khugepaged_enter_vma_merge(vma, *vm_flags)))
 			return -ENOMEM;
 		break;
 	case MADV_NOHUGEPAGE:
@@ -2071,7 +2071,8 @@ int __khugepaged_enter(struct mm_struct *mm)
 	return 0;
 }
 
-int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
+int khugepaged_enter_vma_merge(struct vm_area_struct *vma,
+			       unsigned long vm_flags)
 {
 	unsigned long hstart, hend;
 	if (!vma->anon_vma)
@@ -2083,11 +2084,11 @@ int khugepaged_enter_vma_merge(struct vm_area_struct *vma)
 	if (vma->vm_ops)
 		/* khugepaged not yet working on file or special mappings */
 		return 0;
-	VM_BUG_ON_VMA(vma->vm_flags & VM_NO_THP, vma);
+	VM_BUG_ON_VMA(vm_flags & VM_NO_THP, vma);
 	hstart = (vma->vm_start + ~HPAGE_PMD_MASK) & HPAGE_PMD_MASK;
 	hend = vma->vm_end & HPAGE_PMD_MASK;
 	if (hstart < hend)
-		return khugepaged_enter(vma);
+		return khugepaged_enter(vma, vm_flags);
 	return 0;
 }
 
diff --git a/mm/mmap.c b/mm/mmap.c
index 7f855206e7fb..87e82b38453c 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1080,7 +1080,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 				end, prev->vm_pgoff, NULL);
 		if (err)
 			return NULL;
-		khugepaged_enter_vma_merge(prev);
+		khugepaged_enter_vma_merge(prev, vm_flags);
 		return prev;
 	}
 
@@ -1099,7 +1099,7 @@ struct vm_area_struct *vma_merge(struct mm_struct *mm,
 				next->vm_pgoff - pglen, NULL);
 		if (err)
 			return NULL;
-		khugepaged_enter_vma_merge(area);
+		khugepaged_enter_vma_merge(area, vm_flags);
 		return area;
 	}
 
@@ -2208,7 +2208,7 @@ int expand_upwards(struct vm_area_struct *vma, unsigned long address)
 		}
 	}
 	vma_unlock_anon_vma(vma);
-	khugepaged_enter_vma_merge(vma);
+	khugepaged_enter_vma_merge(vma, vma->vm_flags);
 	validate_mm(vma->vm_mm);
 	return error;
 }
@@ -2277,7 +2277,7 @@ int expand_downwards(struct vm_area_struct *vma,
 		}
 	}
 	vma_unlock_anon_vma(vma);
-	khugepaged_enter_vma_merge(vma);
+	khugepaged_enter_vma_merge(vma, vma->vm_flags);
 	validate_mm(vma->vm_mm);
 	return error;
 }
-- 
cgit v1.2.3


From 3a3c02ecf7f2852f122d6d16fb9b3d9cb0c6f201 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 29 Oct 2014 14:50:46 -0700
Subject: mm: page-writeback: inline account_page_dirtied() into single caller

A follow-up patch would have changed the call signature.  To save the
trouble, just fold it instead.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: <stable@vger.kernel.org>	[3.17.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h  |  1 -
 mm/page-writeback.c | 23 ++++-------------------
 2 files changed, 4 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 27eb1bfbe704..b46461116cd2 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1235,7 +1235,6 @@ int __set_page_dirty_no_writeback(struct page *page);
 int redirty_page_for_writepage(struct writeback_control *wbc,
 				struct page *page);
 void account_page_dirtied(struct page *page, struct address_space *mapping);
-void account_page_writeback(struct page *page);
 int set_page_dirty(struct page *page);
 int set_page_dirty_lock(struct page *page);
 int clear_page_dirty_for_io(struct page *page);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ff24c9d83112..ff6a5b07211e 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2115,23 +2115,6 @@ void account_page_dirtied(struct page *page, struct address_space *mapping)
 }
 EXPORT_SYMBOL(account_page_dirtied);
 
-/*
- * Helper function for set_page_writeback family.
- *
- * The caller must hold mem_cgroup_begin/end_update_page_stat() lock
- * while calling this function.
- * See test_set_page_writeback for example.
- *
- * NOTE: Unlike account_page_dirtied this does not rely on being atomic
- * wrt interrupts.
- */
-void account_page_writeback(struct page *page)
-{
-	mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
-	inc_zone_page_state(page, NR_WRITEBACK);
-}
-EXPORT_SYMBOL(account_page_writeback);
-
 /*
  * For address_spaces which do not use buffers.  Just tag the page as dirty in
  * its radix tree.
@@ -2410,8 +2393,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 	} else {
 		ret = TestSetPageWriteback(page);
 	}
-	if (!ret)
-		account_page_writeback(page);
+	if (!ret) {
+		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+		inc_zone_page_state(page, NR_WRITEBACK);
+	}
 	mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
 	return ret;
 
-- 
cgit v1.2.3


From d7365e783edb858279be1d03f61bc8d5d3383d90 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 29 Oct 2014 14:50:48 -0700
Subject: mm: memcontrol: fix missed end-writeback page accounting

Commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") changed
page migration to uncharge the old page right away.  The page is locked,
unmapped, truncated, and off the LRU, but it could race with writeback
ending, which then doesn't unaccount the page properly:

test_clear_page_writeback()              migration
                                           wait_on_page_writeback()
  TestClearPageWriteback()
                                           mem_cgroup_migrate()
                                             clear PCG_USED
  mem_cgroup_update_page_stat()
    if (PageCgroupUsed(pc))
      decrease memcg pages under writeback

  release pc->mem_cgroup->move_lock

The per-page statistics interface is heavily optimized to avoid a
function call and a lookup_page_cgroup() in the file unmap fast path,
which means it doesn't verify whether a page is still charged before
clearing PageWriteback() and it has to do it in the stat update later.

Rework it so that it looks up the page's memcg once at the beginning of
the transaction and then uses it throughout.  The charge will be
verified before clearing PageWriteback() and migration can't uncharge
the page as long as that is still set.  The RCU lock will protect the
memcg past uncharge.

As far as losing the optimization goes, the following test results are
from a microbenchmark that maps, faults, and unmaps a 4GB sparse file
three times in a nested fashion, so that there are two negative passes
that don't account but still go through the new transaction overhead.
There is no actual difference:

 old:     33.195102545 seconds time elapsed       ( +-  0.01% )
 new:     33.199231369 seconds time elapsed       ( +-  0.03% )

The time spent in page_remove_rmap()'s callees still adds up to the
same, but the time spent in the function itself seems reduced:

     # Children      Self  Command        Shared Object       Symbol
 old:     0.12%     0.11%  filemapstress  [kernel.kallsyms]   [k] page_remove_rmap
 new:     0.12%     0.08%  filemapstress  [kernel.kallsyms]   [k] page_remove_rmap

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: <stable@vger.kernel.org>	[3.17.x]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h |  58 ++++++++-----------------
 mm/memcontrol.c            | 105 ++++++++++++++++++++++++---------------------
 mm/page-writeback.c        |  22 +++++-----
 mm/rmap.c                  |  20 ++++-----
 4 files changed, 96 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 19df5d857411..6b75640ef5ab 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -139,48 +139,23 @@ static inline bool mem_cgroup_disabled(void)
 	return false;
 }
 
-void __mem_cgroup_begin_update_page_stat(struct page *page, bool *locked,
-					 unsigned long *flags);
-
-extern atomic_t memcg_moving;
-
-static inline void mem_cgroup_begin_update_page_stat(struct page *page,
-					bool *locked, unsigned long *flags)
-{
-	if (mem_cgroup_disabled())
-		return;
-	rcu_read_lock();
-	*locked = false;
-	if (atomic_read(&memcg_moving))
-		__mem_cgroup_begin_update_page_stat(page, locked, flags);
-}
-
-void __mem_cgroup_end_update_page_stat(struct page *page,
-				unsigned long *flags);
-static inline void mem_cgroup_end_update_page_stat(struct page *page,
-					bool *locked, unsigned long *flags)
-{
-	if (mem_cgroup_disabled())
-		return;
-	if (*locked)
-		__mem_cgroup_end_update_page_stat(page, flags);
-	rcu_read_unlock();
-}
-
-void mem_cgroup_update_page_stat(struct page *page,
-				 enum mem_cgroup_stat_index idx,
-				 int val);
-
-static inline void mem_cgroup_inc_page_stat(struct page *page,
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
+					      unsigned long *flags);
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
+			      unsigned long flags);
+void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
+				 enum mem_cgroup_stat_index idx, int val);
+
+static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
-	mem_cgroup_update_page_stat(page, idx, 1);
+	mem_cgroup_update_page_stat(memcg, idx, 1);
 }
 
-static inline void mem_cgroup_dec_page_stat(struct page *page,
+static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
-	mem_cgroup_update_page_stat(page, idx, -1);
+	mem_cgroup_update_page_stat(memcg, idx, -1);
 }
 
 unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
@@ -315,13 +290,14 @@ mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 {
 }
 
-static inline void mem_cgroup_begin_update_page_stat(struct page *page,
+static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
 					bool *locked, unsigned long *flags)
 {
+	return NULL;
 }
 
-static inline void mem_cgroup_end_update_page_stat(struct page *page,
-					bool *locked, unsigned long *flags)
+static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
+					bool locked, unsigned long flags)
 {
 }
 
@@ -343,12 +319,12 @@ static inline bool mem_cgroup_oom_synchronize(bool wait)
 	return false;
 }
 
-static inline void mem_cgroup_inc_page_stat(struct page *page,
+static inline void mem_cgroup_inc_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
 }
 
-static inline void mem_cgroup_dec_page_stat(struct page *page,
+static inline void mem_cgroup_dec_page_stat(struct mem_cgroup *memcg,
 					    enum mem_cgroup_stat_index idx)
 {
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 23976fd885fd..d6ac0e33e150 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1536,12 +1536,8 @@ int mem_cgroup_swappiness(struct mem_cgroup *memcg)
  *         start move here.
  */
 
-/* for quick checking without looking up memcg */
-atomic_t memcg_moving __read_mostly;
-
 static void mem_cgroup_start_move(struct mem_cgroup *memcg)
 {
-	atomic_inc(&memcg_moving);
 	atomic_inc(&memcg->moving_account);
 	synchronize_rcu();
 }
@@ -1552,10 +1548,8 @@ static void mem_cgroup_end_move(struct mem_cgroup *memcg)
 	 * Now, mem_cgroup_clear_mc() may call this function with NULL.
 	 * We check NULL in callee rather than caller.
 	 */
-	if (memcg) {
-		atomic_dec(&memcg_moving);
+	if (memcg)
 		atomic_dec(&memcg->moving_account);
-	}
 }
 
 /*
@@ -2204,41 +2198,52 @@ cleanup:
 	return true;
 }
 
-/*
- * Used to update mapped file or writeback or other statistics.
+/**
+ * mem_cgroup_begin_page_stat - begin a page state statistics transaction
+ * @page: page that is going to change accounted state
+ * @locked: &memcg->move_lock slowpath was taken
+ * @flags: IRQ-state flags for &memcg->move_lock
  *
- * Notes: Race condition
+ * This function must mark the beginning of an accounted page state
+ * change to prevent double accounting when the page is concurrently
+ * being moved to another memcg:
  *
- * Charging occurs during page instantiation, while the page is
- * unmapped and locked in page migration, or while the page table is
- * locked in THP migration.  No race is possible.
+ *   memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
+ *   if (TestClearPageState(page))
+ *     mem_cgroup_update_page_stat(memcg, state, -1);
+ *   mem_cgroup_end_page_stat(memcg, locked, flags);
  *
- * Uncharge happens to pages with zero references, no race possible.
+ * The RCU lock is held throughout the transaction.  The fast path can
+ * get away without acquiring the memcg->move_lock (@locked is false)
+ * because page moving starts with an RCU grace period.
  *
- * Charge moving between groups is protected by checking mm->moving
- * account and taking the move_lock in the slowpath.
+ * The RCU lock also protects the memcg from being freed when the page
+ * state that is going to change is the only thing preventing the page
+ * from being uncharged.  E.g. end-writeback clearing PageWriteback(),
+ * which allows migration to go ahead and uncharge the page before the
+ * account transaction might be complete.
  */
-
-void __mem_cgroup_begin_update_page_stat(struct page *page,
-				bool *locked, unsigned long *flags)
+struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
+					      bool *locked,
+					      unsigned long *flags)
 {
 	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
 
+	rcu_read_lock();
+
+	if (mem_cgroup_disabled())
+		return NULL;
+
 	pc = lookup_page_cgroup(page);
 again:
 	memcg = pc->mem_cgroup;
 	if (unlikely(!memcg || !PageCgroupUsed(pc)))
-		return;
-	/*
-	 * If this memory cgroup is not under account moving, we don't
-	 * need to take move_lock_mem_cgroup(). Because we already hold
-	 * rcu_read_lock(), any calls to move_account will be delayed until
-	 * rcu_read_unlock().
-	 */
-	VM_BUG_ON(!rcu_read_lock_held());
+		return NULL;
+
+	*locked = false;
 	if (atomic_read(&memcg->moving_account) <= 0)
-		return;
+		return memcg;
 
 	move_lock_mem_cgroup(memcg, flags);
 	if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
@@ -2246,36 +2251,40 @@ again:
 		goto again;
 	}
 	*locked = true;
+
+	return memcg;
 }
 
-void __mem_cgroup_end_update_page_stat(struct page *page, unsigned long *flags)
+/**
+ * mem_cgroup_end_page_stat - finish a page state statistics transaction
+ * @memcg: the memcg that was accounted against
+ * @locked: value received from mem_cgroup_begin_page_stat()
+ * @flags: value received from mem_cgroup_begin_page_stat()
+ */
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
+			      unsigned long flags)
 {
-	struct page_cgroup *pc = lookup_page_cgroup(page);
+	if (memcg && locked)
+		move_unlock_mem_cgroup(memcg, &flags);
 
-	/*
-	 * It's guaranteed that pc->mem_cgroup never changes while
-	 * lock is held because a routine modifies pc->mem_cgroup
-	 * should take move_lock_mem_cgroup().
-	 */
-	move_unlock_mem_cgroup(pc->mem_cgroup, flags);
+	rcu_read_unlock();
 }
 
-void mem_cgroup_update_page_stat(struct page *page,
+/**
+ * mem_cgroup_update_page_stat - update page state statistics
+ * @memcg: memcg to account against
+ * @idx: page state item to account
+ * @val: number of pages (positive or negative)
+ *
+ * See mem_cgroup_begin_page_stat() for locking requirements.
+ */
+void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
 				 enum mem_cgroup_stat_index idx, int val)
 {
-	struct mem_cgroup *memcg;
-	struct page_cgroup *pc = lookup_page_cgroup(page);
-	unsigned long uninitialized_var(flags);
-
-	if (mem_cgroup_disabled())
-		return;
-
 	VM_BUG_ON(!rcu_read_lock_held());
-	memcg = pc->mem_cgroup;
-	if (unlikely(!memcg || !PageCgroupUsed(pc)))
-		return;
 
-	this_cpu_add(memcg->stat->count[idx], val);
+	if (memcg)
+		this_cpu_add(memcg->stat->count[idx], val);
 }
 
 /*
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index ff6a5b07211e..19ceae87522d 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2327,11 +2327,12 @@ EXPORT_SYMBOL(clear_page_dirty_for_io);
 int test_clear_page_writeback(struct page *page)
 {
 	struct address_space *mapping = page_mapping(page);
-	int ret;
-	bool locked;
 	unsigned long memcg_flags;
+	struct mem_cgroup *memcg;
+	bool locked;
+	int ret;
 
-	mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
+	memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
 	if (mapping) {
 		struct backing_dev_info *bdi = mapping->backing_dev_info;
 		unsigned long flags;
@@ -2352,22 +2353,23 @@ int test_clear_page_writeback(struct page *page)
 		ret = TestClearPageWriteback(page);
 	}
 	if (ret) {
-		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+		mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
 		dec_zone_page_state(page, NR_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITTEN);
 	}
-	mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
+	mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
 	return ret;
 }
 
 int __test_set_page_writeback(struct page *page, bool keep_write)
 {
 	struct address_space *mapping = page_mapping(page);
-	int ret;
-	bool locked;
 	unsigned long memcg_flags;
+	struct mem_cgroup *memcg;
+	bool locked;
+	int ret;
 
-	mem_cgroup_begin_update_page_stat(page, &locked, &memcg_flags);
+	memcg = mem_cgroup_begin_page_stat(page, &locked, &memcg_flags);
 	if (mapping) {
 		struct backing_dev_info *bdi = mapping->backing_dev_info;
 		unsigned long flags;
@@ -2394,10 +2396,10 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		ret = TestSetPageWriteback(page);
 	}
 	if (!ret) {
-		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_WRITEBACK);
+		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITEBACK);
 	}
-	mem_cgroup_end_update_page_stat(page, &locked, &memcg_flags);
+	mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
 	return ret;
 
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 116a5053415b..f574046f77d4 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1042,15 +1042,16 @@ void page_add_new_anon_rmap(struct page *page,
  */
 void page_add_file_rmap(struct page *page)
 {
-	bool locked;
+	struct mem_cgroup *memcg;
 	unsigned long flags;
+	bool locked;
 
-	mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+	memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
 	if (atomic_inc_and_test(&page->_mapcount)) {
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
-		mem_cgroup_inc_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
+		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
 	}
-	mem_cgroup_end_update_page_stat(page, &locked, &flags);
+	mem_cgroup_end_page_stat(memcg, locked, flags);
 }
 
 /**
@@ -1061,9 +1062,10 @@ void page_add_file_rmap(struct page *page)
  */
 void page_remove_rmap(struct page *page)
 {
+	struct mem_cgroup *uninitialized_var(memcg);
 	bool anon = PageAnon(page);
-	bool locked;
 	unsigned long flags;
+	bool locked;
 
 	/*
 	 * The anon case has no mem_cgroup page_stat to update; but may
@@ -1071,7 +1073,7 @@ void page_remove_rmap(struct page *page)
 	 * we hold the lock against page_stat move: so avoid it on anon.
 	 */
 	if (!anon)
-		mem_cgroup_begin_update_page_stat(page, &locked, &flags);
+		memcg = mem_cgroup_begin_page_stat(page, &locked, &flags);
 
 	/* page still mapped by someone else? */
 	if (!atomic_add_negative(-1, &page->_mapcount))
@@ -1096,8 +1098,7 @@ void page_remove_rmap(struct page *page)
 				-hpage_nr_pages(page));
 	} else {
 		__dec_zone_page_state(page, NR_FILE_MAPPED);
-		mem_cgroup_dec_page_stat(page, MEM_CGROUP_STAT_FILE_MAPPED);
-		mem_cgroup_end_update_page_stat(page, &locked, &flags);
+		mem_cgroup_dec_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
 	}
 	if (unlikely(PageMlocked(page)))
 		clear_page_mlock(page);
@@ -1110,10 +1111,9 @@ void page_remove_rmap(struct page *page)
 	 * Leaving it set also helps swapoff to reinstate ptes
 	 * faster for those pages still in swapcache.
 	 */
-	return;
 out:
 	if (!anon)
-		mem_cgroup_end_update_page_stat(page, &locked, &flags);
+		mem_cgroup_end_page_stat(memcg, locked, flags);
 }
 
 /*
-- 
cgit v1.2.3


From 1f33c41c03daece85a84b8dcea5733f3efe3e2b0 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 29 Sep 2014 16:08:21 -0700
Subject: seq_file: Rename seq_overflow() to seq_has_overflowed() and make
 public

The return values of seq_printf/puts/putc are frequently misused.

Start down a path to remove all the return value uses of these
functions.

Move the seq_overflow() to a global inlined function called
seq_has_overflowed() that can be used by the users of seq_file() calls.

Update the documentation to not show return types for seq_printf
et al.  Add a description of seq_has_overflowed().

Link: http://lkml.kernel.org/p/848ac7e3d1c31cddf638a8526fa3c59fa6fdeb8a.1412031505.git.joe@perches.com

Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Joe Perches <joe@perches.com>
[ Reworked the original patch from Joe ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/filesystems/seq_file.txt | 22 +++++++++++++---------
 fs/seq_file.c                          | 15 ++-------------
 include/linux/seq_file.h               | 15 +++++++++++++++
 3 files changed, 30 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/seq_file.txt b/Documentation/filesystems/seq_file.txt
index 8ea3e90ace07..b797ed38de46 100644
--- a/Documentation/filesystems/seq_file.txt
+++ b/Documentation/filesystems/seq_file.txt
@@ -180,23 +180,19 @@ output must be passed to the seq_file code. Some utility functions have
 been defined which make this task easy.
 
 Most code will simply use seq_printf(), which works pretty much like
-printk(), but which requires the seq_file pointer as an argument. It is
-common to ignore the return value from seq_printf(), but a function
-producing complicated output may want to check that value and quit if
-something non-zero is returned; an error return means that the seq_file
-buffer has been filled and further output will be discarded.
+printk(), but which requires the seq_file pointer as an argument.
 
 For straight character output, the following functions may be used:
 
-	int seq_putc(struct seq_file *m, char c);
-	int seq_puts(struct seq_file *m, const char *s);
-	int seq_escape(struct seq_file *m, const char *s, const char *esc);
+	seq_putc(struct seq_file *m, char c);
+	seq_puts(struct seq_file *m, const char *s);
+	seq_escape(struct seq_file *m, const char *s, const char *esc);
 
 The first two output a single character and a string, just like one would
 expect. seq_escape() is like seq_puts(), except that any character in s
 which is in the string esc will be represented in octal form in the output.
 
-There is also a pair of functions for printing filenames:
+There are also a pair of functions for printing filenames:
 
 	int seq_path(struct seq_file *m, struct path *path, char *esc);
 	int seq_path_root(struct seq_file *m, struct path *path,
@@ -209,6 +205,14 @@ root is desired, it can be used with seq_path_root().  Note that, if it
 turns out that path cannot be reached from root, the value of root will be
 changed in seq_file_root() to a root which *does* work.
 
+A function producing complicated output may want to check
+	bool seq_has_overflowed(struct seq_file *m);
+and avoid further seq_<output> calls if true is returned.
+
+A true return from seq_has_overflowed means that the seq_file buffer will
+be discarded and the seq_show function will attempt to allocate a larger
+buffer and retry printing.
+
 
 Making it all work
 
diff --git a/fs/seq_file.c b/fs/seq_file.c
index 3857b720cb1b..353948ba1c5b 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -16,17 +16,6 @@
 #include <asm/uaccess.h>
 #include <asm/page.h>
 
-
-/*
- * seq_files have a buffer which can may overflow. When this happens a larger
- * buffer is reallocated and all the data will be printed again.
- * The overflow state is true when m->count == m->size.
- */
-static bool seq_overflow(struct seq_file *m)
-{
-	return m->count == m->size;
-}
-
 static void seq_set_overflow(struct seq_file *m)
 {
 	m->count = m->size;
@@ -124,7 +113,7 @@ static int traverse(struct seq_file *m, loff_t offset)
 			error = 0;
 			m->count = 0;
 		}
-		if (seq_overflow(m))
+		if (seq_has_overflowed(m))
 			goto Eoverflow;
 		if (pos + m->count > offset) {
 			m->from = offset - pos;
@@ -267,7 +256,7 @@ Fill:
 			break;
 		}
 		err = m->op->show(m, p);
-		if (seq_overflow(m) || err) {
+		if (seq_has_overflowed(m) || err) {
 			m->count = offs;
 			if (likely(err <= 0))
 				break;
diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h
index 52e0097f61f0..cf6a9daaaf6d 100644
--- a/include/linux/seq_file.h
+++ b/include/linux/seq_file.h
@@ -42,6 +42,21 @@ struct seq_operations {
 
 #define SEQ_SKIP 1
 
+/**
+ * seq_has_overflowed - check if the buffer has overflowed
+ * @m: the seq_file handle
+ *
+ * seq_files have a buffer which may overflow. When this happens a larger
+ * buffer is reallocated and all the data will be printed again.
+ * The overflow state is true when m->count == m->size.
+ *
+ * Returns true if the buffer received more than it can hold.
+ */
+static inline bool seq_has_overflowed(struct seq_file *m)
+{
+	return m->count == m->size;
+}
+
 /**
  * seq_get_buf - get buffer to write arbitrary data to
  * @m: the seq_file handle
-- 
cgit v1.2.3


From 39bb5e62867de82b269b07df900165029b928359 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 30 Oct 2014 10:32:34 -0700
Subject: net: skb_fclone_busy() needs to detect orphaned skb

Some drivers are unable to perform TX completions in a bound time.
They instead call skb_orphan()

Problem is skb_fclone_busy() has to detect this case, otherwise
we block TCP retransmits and can freeze unlucky tcp sessions on
mostly idle hosts.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Fixes: 1f3279ae0c13 ("tcp: avoid retransmits of TCP packets hanging in host queues")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 8 ++++++--
 net/ipv4/tcp_output.c  | 2 +-
 net/xfrm/xfrm_policy.c | 2 +-
 3 files changed, 8 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5884f95ff0e9..6c8b6f604e76 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -799,15 +799,19 @@ struct sk_buff_fclones {
  *	@skb: buffer
  *
  * Returns true is skb is a fast clone, and its clone is not freed.
+ * Some drivers call skb_orphan() in their ndo_start_xmit(),
+ * so we also check that this didnt happen.
  */
-static inline bool skb_fclone_busy(const struct sk_buff *skb)
+static inline bool skb_fclone_busy(const struct sock *sk,
+				   const struct sk_buff *skb)
 {
 	const struct sk_buff_fclones *fclones;
 
 	fclones = container_of(skb, struct sk_buff_fclones, skb1);
 
 	return skb->fclone == SKB_FCLONE_ORIG &&
-	       fclones->skb2.fclone == SKB_FCLONE_CLONE;
+	       fclones->skb2.fclone == SKB_FCLONE_CLONE &&
+	       fclones->skb2.sk == sk;
 }
 
 static inline struct sk_buff *alloc_skb_fclone(unsigned int size,
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 3af21296d967..a3d453b94747 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2126,7 +2126,7 @@ bool tcp_schedule_loss_probe(struct sock *sk)
 static bool skb_still_in_host_queue(const struct sock *sk,
 				    const struct sk_buff *skb)
 {
-	if (unlikely(skb_fclone_busy(skb))) {
+	if (unlikely(skb_fclone_busy(sk, skb))) {
 		NET_INC_STATS_BH(sock_net(sk),
 				 LINUX_MIB_TCPSPURIOUS_RTX_HOSTQUEUES);
 		return true;
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
index 4c4e457e7888..88bf289abdc9 100644
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -1962,7 +1962,7 @@ static int xdst_queue_output(struct sock *sk, struct sk_buff *skb)
 	struct xfrm_policy *pol = xdst->pols[0];
 	struct xfrm_policy_queue *pq = &pol->polq;
 
-	if (unlikely(skb_fclone_busy(skb))) {
+	if (unlikely(skb_fclone_busy(sk, skb))) {
 		kfree_skb(skb);
 		return 0;
 	}
-- 
cgit v1.2.3


From b2de525f095708b2adbadaec3f1e4017a23d1e09 Mon Sep 17 00:00:00 2001
From: David Jeffery <djeffery@redhat.com>
Date: Mon, 29 Sep 2014 10:21:10 -0400
Subject: Return short read or 0 at end of a raw device, not EIO

Author: David Jeffery <djeffery@redhat.com>
Changes to the basic direct I/O code have broken the raw driver when reading
to the end of a raw device.  Instead of returning a short read for a read that
extends partially beyond the device's end or 0 when at the end of the device,
these reads now return EIO.

The raw driver needs the same end of device handling as was added for normal
block devices.  Using blkdev_read_iter, which has the needed size checks,
prevents the EIO conditions at the end of the device.

Signed-off-by: David Jeffery <djeffery@redhat.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/char/raw.c | 2 +-
 fs/block_dev.c     | 3 ++-
 include/linux/fs.h | 1 +
 3 files changed, 4 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/raw.c b/drivers/char/raw.c
index 0102dc788608..a24891b97547 100644
--- a/drivers/char/raw.c
+++ b/drivers/char/raw.c
@@ -285,7 +285,7 @@ static long raw_ctl_compat_ioctl(struct file *file, unsigned int cmd,
 
 static const struct file_operations raw_fops = {
 	.read		= new_sync_read,
-	.read_iter	= generic_file_read_iter,
+	.read_iter	= blkdev_read_iter,
 	.write		= new_sync_write,
 	.write_iter	= blkdev_write_iter,
 	.fsync		= blkdev_fsync,
diff --git a/fs/block_dev.c b/fs/block_dev.c
index cc9d4114cda0..1d9c9f3754f8 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -1585,7 +1585,7 @@ ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
 }
 EXPORT_SYMBOL_GPL(blkdev_write_iter);
 
-static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
+ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 {
 	struct file *file = iocb->ki_filp;
 	struct inode *bd_inode = file->f_mapping->host;
@@ -1599,6 +1599,7 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	iov_iter_truncate(to, size);
 	return generic_file_read_iter(iocb, to);
 }
+EXPORT_SYMBOL_GPL(blkdev_read_iter);
 
 /*
  * Try to release a page associated with block device when the system
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 01036262095f..9ab779e8a63c 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2469,6 +2469,7 @@ extern ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, lo
 extern ssize_t new_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos);
 
 /* fs/block_dev.c */
+extern ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to);
 extern ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from);
 extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end,
 			int datasync);
-- 
cgit v1.2.3


From f3bea49115b21e0995abf41402ad2f4d9c69eda4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 2 Jul 2014 23:23:31 -0400
Subject: ftrace/x86: Add dynamic allocated trampoline for ftrace_ops

The current method of handling multiple function callbacks is to register
a list function callback that calls all the other callbacks based on
their hash tables and compare it to the function that the callback was
called on. But this is very inefficient.

For example, if you are tracing all functions in the kernel and then
add a kprobe to a function such that the kprobe uses ftrace, the
mcount trampoline will switch from calling the function trace callback
to calling the list callback that will iterate over all registered
ftrace_ops (in this case, the function tracer and the kprobes callback).
That means for every function being traced it checks the hash of the
ftrace_ops for function tracing and kprobes, even though the kprobes
is only set at a single function. The kprobes ftrace_ops is checked
for every function being traced!

Instead of calling the list function for functions that are only being
traced by a single callback, we can call a dynamically allocated
trampoline that calls the callback directly. The function graph tracer
already uses a direct call trampoline when it is being traced by itself
but it is not dynamically allocated. It's trampoline is static in the
kernel core. The infrastructure that called the function graph trampoline
can also be used to call a dynamically allocated one.

For now, only ftrace_ops that are not dynamically allocated can have
a trampoline. That is, users such as function tracer or stack tracer.
kprobes and perf allocate their ftrace_ops, and until there's a safe
way to free the trampoline, it can not be used. The dynamically allocated
ftrace_ops may, although, use the trampoline if the kernel is not
compiled with CONFIG_PREEMPT. But that will come later.

Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c    | 195 ++++++++++++++++++++++++++++++++++++++++++--
 arch/x86/kernel/mcount_64.S |  25 +++++-
 include/linux/ftrace.h      |   8 ++
 kernel/trace/ftrace.c       |  40 ++++++++-
 4 files changed, 254 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 3386dc9aa333..e4d48f6cad86 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -17,6 +17,7 @@
 #include <linux/ftrace.h>
 #include <linux/percpu.h>
 #include <linux/sched.h>
+#include <linux/slab.h>
 #include <linux/init.h>
 #include <linux/list.h>
 #include <linux/module.h>
@@ -644,13 +645,8 @@ int __init ftrace_dyn_arch_init(void)
 {
 	return 0;
 }
-#endif
-
-#ifdef CONFIG_FUNCTION_GRAPH_TRACER
-
-#ifdef CONFIG_DYNAMIC_FTRACE
-extern void ftrace_graph_call(void);
 
+#if defined(CONFIG_X86_64) || defined(CONFIG_FUNCTION_GRAPH_TRACER)
 static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 {
 	static union ftrace_code_union calc;
@@ -664,6 +660,193 @@ static unsigned char *ftrace_jmp_replace(unsigned long ip, unsigned long addr)
 	 */
 	return calc.code;
 }
+#endif
+
+/* Currently only x86_64 supports dynamic trampolines */
+#ifdef CONFIG_X86_64
+
+#ifdef CONFIG_MODULES
+#include <linux/moduleloader.h>
+/* Module allocation simplifies allocating memory for code */
+static inline void *alloc_tramp(unsigned long size)
+{
+	return module_alloc(size);
+}
+static inline void tramp_free(void *tramp)
+{
+	module_free(NULL, tramp);
+}
+#else
+/* Trampolines can only be created if modules are supported */
+static inline void *alloc_tramp(unsigned long size)
+{
+	return NULL;
+}
+static inline void tramp_free(void *tramp) { }
+#endif
+
+/* Defined as markers to the end of the ftrace default trampolines */
+extern void ftrace_caller_end(void);
+extern void ftrace_regs_caller_end(void);
+extern void ftrace_return(void);
+extern void ftrace_caller_op_ptr(void);
+extern void ftrace_regs_caller_op_ptr(void);
+
+/* movq function_trace_op(%rip), %rdx */
+/* 0x48 0x8b 0x15 <offset-to-ftrace_trace_op (4 bytes)> */
+#define OP_REF_SIZE	7
+
+/*
+ * The ftrace_ops is passed to the function callback. Since the
+ * trampoline only services a single ftrace_ops, we can pass in
+ * that ops directly.
+ *
+ * The ftrace_op_code_union is used to create a pointer to the
+ * ftrace_ops that will be passed to the callback function.
+ */
+union ftrace_op_code_union {
+	char code[OP_REF_SIZE];
+	struct {
+		char op[3];
+		int offset;
+	} __attribute__((packed));
+};
+
+static unsigned long create_trampoline(struct ftrace_ops *ops)
+{
+	unsigned const char *jmp;
+	unsigned long start_offset;
+	unsigned long end_offset;
+	unsigned long op_offset;
+	unsigned long offset;
+	unsigned long size;
+	unsigned long ip;
+	unsigned long *ptr;
+	void *trampoline;
+	/* 48 8b 15 <offset> is movq <offset>(%rip), %rdx */
+	unsigned const char op_ref[] = { 0x48, 0x8b, 0x15 };
+	union ftrace_op_code_union op_ptr;
+	int ret;
+
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+		start_offset = (unsigned long)ftrace_regs_caller;
+		end_offset = (unsigned long)ftrace_regs_caller_end;
+		op_offset = (unsigned long)ftrace_regs_caller_op_ptr;
+	} else {
+		start_offset = (unsigned long)ftrace_caller;
+		end_offset = (unsigned long)ftrace_caller_end;
+		op_offset = (unsigned long)ftrace_caller_op_ptr;
+	}
+
+	size = end_offset - start_offset;
+
+	/*
+	 * Allocate enough size to store the ftrace_caller code,
+	 * the jmp to ftrace_return, as well as the address of
+	 * the ftrace_ops this trampoline is used for.
+	 */
+	trampoline = alloc_tramp(size + MCOUNT_INSN_SIZE + sizeof(void *));
+	if (!trampoline)
+		return 0;
+
+	/* Copy ftrace_caller onto the trampoline memory */
+	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
+	if (WARN_ON(ret < 0)) {
+		tramp_free(trampoline);
+		return 0;
+	}
+
+	ip = (unsigned long)trampoline + size;
+
+	/* The trampoline ends with a jmp to ftrace_return */
+	jmp = ftrace_jmp_replace(ip, (unsigned long)ftrace_return);
+	memcpy(trampoline + size, jmp, MCOUNT_INSN_SIZE);
+
+	/*
+	 * The address of the ftrace_ops that is used for this trampoline
+	 * is stored at the end of the trampoline. This will be used to
+	 * load the third parameter for the callback. Basically, that
+	 * location at the end of the trampoline takes the place of
+	 * the global function_trace_op variable.
+	 */
+
+	ptr = (unsigned long *)(trampoline + size + MCOUNT_INSN_SIZE);
+	*ptr = (unsigned long)ops;
+
+	op_offset -= start_offset;
+	memcpy(&op_ptr, trampoline + op_offset, OP_REF_SIZE);
+
+	/* Are we pointing to the reference? */
+	if (WARN_ON(memcmp(op_ptr.op, op_ref, 3) != 0)) {
+		tramp_free(trampoline);
+		return 0;
+	}
+
+	/* Load the contents of ptr into the callback parameter */
+	offset = (unsigned long)ptr;
+	offset -= (unsigned long)trampoline + op_offset + OP_REF_SIZE;
+
+	op_ptr.offset = offset;
+
+	/* put in the new offset to the ftrace_ops */
+	memcpy(trampoline + op_offset, &op_ptr, OP_REF_SIZE);
+
+	/* ALLOC_TRAMP flags lets us know we created it */
+	ops->flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
+
+	return (unsigned long)trampoline;
+}
+
+void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
+{
+	ftrace_func_t func;
+	unsigned char *new;
+	unsigned long start_offset;
+	unsigned long call_offset;
+	unsigned long offset;
+	unsigned long ip;
+	int ret;
+
+	if (ops->trampoline) {
+		/*
+		 * The ftrace_ops caller may set up its own trampoline.
+		 * In such a case, this code must not modify it.
+		 */
+		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
+			return;
+	} else {
+		ops->trampoline = create_trampoline(ops);
+		if (!ops->trampoline)
+			return;
+	}
+
+	if (ops->flags & FTRACE_OPS_FL_SAVE_REGS) {
+		start_offset = (unsigned long)ftrace_regs_caller;
+		call_offset = (unsigned long)ftrace_regs_call;
+	} else {
+		start_offset = (unsigned long)ftrace_caller;
+		call_offset = (unsigned long)ftrace_call;
+	}
+
+	offset = call_offset - start_offset;
+	ip = ops->trampoline + offset;
+
+	func = ftrace_ops_get_func(ops);
+
+	/* Do a safe modify in case the trampoline is executing */
+	new = ftrace_call_replace(ip, (unsigned long)func);
+	ret = update_ftrace_func(ip, new);
+
+	/* The update should never fail */
+	WARN_ON(ret);
+}
+#endif /* CONFIG_X86_64 */
+#endif /* CONFIG_DYNAMIC_FTRACE */
+
+#ifdef CONFIG_FUNCTION_GRAPH_TRACER
+
+#ifdef CONFIG_DYNAMIC_FTRACE
+extern void ftrace_graph_call(void);
 
 static int ftrace_mod_jmp(unsigned long ip, void *func)
 {
diff --git a/arch/x86/kernel/mcount_64.S b/arch/x86/kernel/mcount_64.S
index c73aecf10d34..42f0cdd20baf 100644
--- a/arch/x86/kernel/mcount_64.S
+++ b/arch/x86/kernel/mcount_64.S
@@ -28,9 +28,11 @@ ENTRY(function_hook)
 END(function_hook)
 
 /* skip is set if stack has been adjusted */
-.macro ftrace_caller_setup skip=0
+.macro ftrace_caller_setup trace_label skip=0
 	MCOUNT_SAVE_FRAME \skip
 
+	/* Save this location */
+GLOBAL(\trace_label)
 	/* Load the ftrace_ops into the 3rd parameter */
 	movq function_trace_op(%rip), %rdx
 
@@ -46,7 +48,7 @@ END(function_hook)
 .endm
 
 ENTRY(ftrace_caller)
-	ftrace_caller_setup
+	ftrace_caller_setup ftrace_caller_op_ptr
 	/* regs go into 4th parameter (but make it NULL) */
 	movq $0, %rcx
 
@@ -54,7 +56,14 @@ GLOBAL(ftrace_call)
 	call ftrace_stub
 
 	MCOUNT_RESTORE_FRAME
-ftrace_return:
+
+	/*
+	 * The copied trampoline must call ftrace_return as it
+	 * still may need to call the function graph tracer.
+	 */
+GLOBAL(ftrace_caller_end)
+
+GLOBAL(ftrace_return)
 
 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
 GLOBAL(ftrace_graph_call)
@@ -70,7 +79,7 @@ ENTRY(ftrace_regs_caller)
 	pushfq
 
 	/* skip=8 to skip flags saved in SS */
-	ftrace_caller_setup 8
+	ftrace_caller_setup ftrace_regs_caller_op_ptr 8
 
 	/* Save the rest of pt_regs */
 	movq %r15, R15(%rsp)
@@ -122,6 +131,14 @@ GLOBAL(ftrace_regs_call)
 	/* Restore flags */
 	popfq
 
+	/*
+	 * As this jmp to ftrace_return can be a short jump
+	 * it must not be copied into the trampoline.
+	 * The trampoline will add the code to jump
+	 * to the return.
+	 */
+GLOBAL(ftrace_regs_caller_end)
+
 	jmp ftrace_return
 
 	popfq
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 662697babd48..06e3ca5a5083 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -94,6 +94,13 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
  * ADDING  - The ops is in the process of being added.
  * REMOVING - The ops is in the process of being removed.
  * MODIFYING - The ops is in the process of changing its filter functions.
+ * ALLOC_TRAMP - A dynamic trampoline was allocated by the core code.
+ *            The arch specific code sets this flag when it allocated a
+ *            trampoline. This lets the arch know that it can update the
+ *            trampoline in case the callback function changes.
+ *            The ftrace_ops trampoline can be set by the ftrace users, and
+ *            in such cases the arch must not modify it. Only the arch ftrace
+ *            core code should set this flag.
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
@@ -108,6 +115,7 @@ enum {
 	FTRACE_OPS_FL_ADDING			= 1 << 9,
 	FTRACE_OPS_FL_REMOVING			= 1 << 10,
 	FTRACE_OPS_FL_MODIFYING			= 1 << 11,
+	FTRACE_OPS_FL_ALLOC_TRAMP		= 1 << 12,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 31c90fec4158..15f85eac7e95 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -387,6 +387,8 @@ static int remove_ftrace_list_ops(struct ftrace_ops **list,
 	return ret;
 }
 
+static void ftrace_update_trampoline(struct ftrace_ops *ops);
+
 static int __register_ftrace_function(struct ftrace_ops *ops)
 {
 	if (ops->flags & FTRACE_OPS_FL_DELETED)
@@ -419,6 +421,8 @@ static int __register_ftrace_function(struct ftrace_ops *ops)
 	} else
 		add_ftrace_ops(&ftrace_ops_list, ops);
 
+	ftrace_update_trampoline(ops);
+
 	if (ftrace_enabled)
 		update_ftrace_function();
 
@@ -3020,9 +3024,6 @@ ftrace_enabled_open(struct inode *inode, struct file *file)
 {
 	struct ftrace_iterator *iter;
 
-	if (unlikely(ftrace_disabled))
-		return -ENODEV;
-
 	iter = __seq_open_private(file, &show_ftrace_seq_ops, sizeof(*iter));
 	if (iter) {
 		iter->pg = ftrace_pages_start;
@@ -3975,6 +3976,9 @@ static char ftrace_graph_buf[FTRACE_FILTER_SIZE] __initdata;
 static char ftrace_graph_notrace_buf[FTRACE_FILTER_SIZE] __initdata;
 static int ftrace_set_func(unsigned long *array, int *idx, int size, char *buffer);
 
+static unsigned long save_global_trampoline;
+static unsigned long save_global_flags;
+
 static int __init set_graph_function(char *str)
 {
 	strlcpy(ftrace_graph_buf, str, FTRACE_FILTER_SIZE);
@@ -4696,6 +4700,20 @@ void __init ftrace_init(void)
 	ftrace_disabled = 1;
 }
 
+/* Do nothing if arch does not support this */
+void __weak arch_ftrace_update_trampoline(struct ftrace_ops *ops)
+{
+}
+
+static void ftrace_update_trampoline(struct ftrace_ops *ops)
+{
+	/* Currently, only non dynamic ops can have a trampoline */
+	if (ops->flags & FTRACE_OPS_FL_DYNAMIC)
+		return;
+
+	arch_ftrace_update_trampoline(ops);
+}
+
 #else
 
 static struct ftrace_ops global_ops = {
@@ -4738,6 +4756,10 @@ ftrace_ops_test(struct ftrace_ops *ops, unsigned long ip, void *regs)
 	return 1;
 }
 
+static void ftrace_update_trampoline(struct ftrace_ops *ops)
+{
+}
+
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 __init void ftrace_init_global_array_ops(struct trace_array *tr)
@@ -5522,7 +5544,6 @@ int register_ftrace_graph(trace_func_graph_ret_t retfunc,
 	update_function_graph_func();
 
 	ret = ftrace_startup(&graph_ops, FTRACE_START_FUNC_RET);
-
 out:
 	mutex_unlock(&ftrace_lock);
 	return ret;
@@ -5543,6 +5564,17 @@ void unregister_ftrace_graph(void)
 	unregister_pm_notifier(&ftrace_suspend_notifier);
 	unregister_trace_sched_switch(ftrace_graph_probe_sched_switch, NULL);
 
+#ifdef CONFIG_DYNAMIC_FTRACE
+	/*
+	 * Function graph does not allocate the trampoline, but
+	 * other global_ops do. We need to reset the ALLOC_TRAMP flag
+	 * if one was used.
+	 */
+	global_ops.trampoline = save_global_trampoline;
+	if (save_global_flags & FTRACE_OPS_FL_ALLOC_TRAMP)
+		global_ops.flags |= FTRACE_OPS_FL_ALLOC_TRAMP;
+#endif
+
  out:
 	mutex_unlock(&ftrace_lock);
 }
-- 
cgit v1.2.3


From ac7576f4b1da8c9c6bc1ae026c2b9e86ae617ba5 Mon Sep 17 00:00:00 2001
From: Miklos Szeredi <miklos@szeredi.hu>
Date: Thu, 30 Oct 2014 17:37:34 +0100
Subject: vfs: make first argument of dir_context.actor typed

Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/kernel/osf_sys.c                     |  7 ++++---
 arch/parisc/hpux/fs.c                           |  7 ++++---
 drivers/staging/lustre/lustre/llite/llite_nfs.c |  8 +++++---
 fs/afs/dir.c                                    |  9 +++++----
 fs/compat.c                                     | 21 +++++++++++++--------
 fs/ecryptfs/file.c                              |  6 +++---
 fs/exportfs/expfs.c                             |  5 +++--
 fs/fat/dir.c                                    |  5 +++--
 fs/gfs2/export.c                                |  8 +++++---
 fs/hppfs/hppfs.c                                |  5 +++--
 fs/nfsd/nfs4recover.c                           |  5 +++--
 fs/nfsd/vfs.c                                   | 12 +++++++-----
 fs/nfsd/vfs.h                                   |  4 ++--
 fs/ocfs2/dir.c                                  |  8 +++++---
 fs/ocfs2/journal.c                              |  8 +++++---
 fs/overlayfs/readdir.c                          |  8 +++++---
 fs/readdir.c                                    | 21 ++++++++++++---------
 fs/reiserfs/xattr.c                             | 15 +++++++++------
 include/linux/fs.h                              |  5 ++++-
 19 files changed, 100 insertions(+), 67 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c
index f9c732e18284..e51f578636a5 100644
--- a/arch/alpha/kernel/osf_sys.c
+++ b/arch/alpha/kernel/osf_sys.c
@@ -104,11 +104,12 @@ struct osf_dirent_callback {
 };
 
 static int
-osf_filldir(void *__buf, const char *name, int namlen, loff_t offset,
-	    u64 ino, unsigned int d_type)
+osf_filldir(struct dir_context *ctx, const char *name, int namlen,
+	    loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct osf_dirent __user *dirent;
-	struct osf_dirent_callback *buf = (struct osf_dirent_callback *) __buf;
+	struct osf_dirent_callback *buf =
+		container_of(ctx, struct osf_dirent_callback, ctx);
 	unsigned int reclen = ALIGN(NAME_OFFSET + namlen + 1, sizeof(u32));
 	unsigned int d_ino;
 
diff --git a/arch/parisc/hpux/fs.c b/arch/parisc/hpux/fs.c
index 2bedafea3d94..97a7bf8df348 100644
--- a/arch/parisc/hpux/fs.c
+++ b/arch/parisc/hpux/fs.c
@@ -56,11 +56,12 @@ struct getdents_callback {
 
 #define NAME_OFFSET(de) ((int) ((de)->d_name - (char __user *) (de)))
 
-static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
-		u64 ino, unsigned d_type)
+static int filldir(struct dir_context *ctx, const char *name, int namlen,
+		   loff_t offset, u64 ino, unsigned d_type)
 {
 	struct hpux_dirent __user * dirent;
-	struct getdents_callback * buf = (struct getdents_callback *) __buf;
+	struct getdents_callback *buf =
+		container_of(ctx, struct getdents_callback, ctx);
 	ino_t d_ino;
 	int reclen = ALIGN(NAME_OFFSET(dirent) + namlen + 1, sizeof(long));
 
diff --git a/drivers/staging/lustre/lustre/llite/llite_nfs.c b/drivers/staging/lustre/lustre/llite/llite_nfs.c
index ae3a12ab7fa1..243a7840457f 100644
--- a/drivers/staging/lustre/lustre/llite/llite_nfs.c
+++ b/drivers/staging/lustre/lustre/llite/llite_nfs.c
@@ -207,13 +207,15 @@ static int ll_encode_fh(struct inode *inode, __u32 *fh, int *plen,
 	return LUSTRE_NFS_FID;
 }
 
-static int ll_nfs_get_name_filldir(void *cookie, const char *name, int namelen,
-				   loff_t hash, u64 ino, unsigned type)
+static int ll_nfs_get_name_filldir(struct dir_context *ctx, const char *name,
+				   int namelen, loff_t hash, u64 ino,
+				   unsigned type)
 {
 	/* It is hack to access lde_fid for comparison with lgd_fid.
 	 * So the input 'name' must be part of the 'lu_dirent'. */
 	struct lu_dirent *lde = container_of0(name, struct lu_dirent, lde_name);
-	struct ll_getname_data *lgd = cookie;
+	struct ll_getname_data *lgd =
+		container_of(ctx, struct ll_getname_data, ctx);
 	struct lu_fid fid;
 
 	fid_le_to_cpu(&fid, &lde->lde_fid);
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index a1645b88fe8a..d452f3de5434 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -26,7 +26,7 @@ static int afs_readdir(struct file *file, struct dir_context *ctx);
 static int afs_d_revalidate(struct dentry *dentry, unsigned int flags);
 static int afs_d_delete(const struct dentry *dentry);
 static void afs_d_release(struct dentry *dentry);
-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
+static int afs_lookup_filldir(struct dir_context *ctx, const char *name, int nlen,
 				  loff_t fpos, u64 ino, unsigned dtype);
 static int afs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 		      bool excl);
@@ -391,10 +391,11 @@ static int afs_readdir(struct file *file, struct dir_context *ctx)
  * - if afs_dir_iterate_block() spots this function, it'll pass the FID
  *   uniquifier through dtype
  */
-static int afs_lookup_filldir(void *_cookie, const char *name, int nlen,
-			      loff_t fpos, u64 ino, unsigned dtype)
+static int afs_lookup_filldir(struct dir_context *ctx, const char *name,
+			      int nlen, loff_t fpos, u64 ino, unsigned dtype)
 {
-	struct afs_lookup_cookie *cookie = _cookie;
+	struct afs_lookup_cookie *cookie =
+		container_of(ctx, struct afs_lookup_cookie, ctx);
 
 	_enter("{%s,%u},%s,%u,,%llu,%u",
 	       cookie->name.name, cookie->name.len, name, nlen,
diff --git a/fs/compat.c b/fs/compat.c
index b13df99f3534..6fd272d455e4 100644
--- a/fs/compat.c
+++ b/fs/compat.c
@@ -847,10 +847,12 @@ struct compat_readdir_callback {
 	int result;
 };
 
-static int compat_fillonedir(void *__buf, const char *name, int namlen,
-			loff_t offset, u64 ino, unsigned int d_type)
+static int compat_fillonedir(struct dir_context *ctx, const char *name,
+			     int namlen, loff_t offset, u64 ino,
+			     unsigned int d_type)
 {
-	struct compat_readdir_callback *buf = __buf;
+	struct compat_readdir_callback *buf =
+		container_of(ctx, struct compat_readdir_callback, ctx);
 	struct compat_old_linux_dirent __user *dirent;
 	compat_ulong_t d_ino;
 
@@ -915,11 +917,12 @@ struct compat_getdents_callback {
 	int error;
 };
 
-static int compat_filldir(void *__buf, const char *name, int namlen,
+static int compat_filldir(struct dir_context *ctx, const char *name, int namlen,
 		loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct compat_linux_dirent __user * dirent;
-	struct compat_getdents_callback *buf = __buf;
+	struct compat_getdents_callback *buf =
+		container_of(ctx, struct compat_getdents_callback, ctx);
 	compat_ulong_t d_ino;
 	int reclen = ALIGN(offsetof(struct compat_linux_dirent, d_name) +
 		namlen + 2, sizeof(compat_long_t));
@@ -1001,11 +1004,13 @@ struct compat_getdents_callback64 {
 	int error;
 };
 
-static int compat_filldir64(void * __buf, const char * name, int namlen, loff_t offset,
-		     u64 ino, unsigned int d_type)
+static int compat_filldir64(struct dir_context *ctx, const char *name,
+			    int namlen, loff_t offset, u64 ino,
+			    unsigned int d_type)
 {
 	struct linux_dirent64 __user *dirent;
-	struct compat_getdents_callback64 *buf = __buf;
+	struct compat_getdents_callback64 *buf =
+		container_of(ctx, struct compat_getdents_callback64, ctx);
 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
 		sizeof(u64));
 	u64 off;
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index f5bce9096555..80154ec4f8c2 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -75,11 +75,11 @@ struct ecryptfs_getdents_callback {
 
 /* Inspired by generic filldir in fs/readdir.c */
 static int
-ecryptfs_filldir(void *dirent, const char *lower_name, int lower_namelen,
-		 loff_t offset, u64 ino, unsigned int d_type)
+ecryptfs_filldir(struct dir_context *ctx, const char *lower_name,
+		 int lower_namelen, loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct ecryptfs_getdents_callback *buf =
-	    (struct ecryptfs_getdents_callback *)dirent;
+		container_of(ctx, struct ecryptfs_getdents_callback, ctx);
 	size_t name_size;
 	char *name;
 	int rc;
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index b01fbfb51f43..a2b350ddd402 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -241,10 +241,11 @@ struct getdents_callback {
  * A rather strange filldir function to capture
  * the name matching the specified inode number.
  */
-static int filldir_one(void * __buf, const char * name, int len,
+static int filldir_one(struct dir_context *ctx, const char *name, int len,
 			loff_t pos, u64 ino, unsigned int d_type)
 {
-	struct getdents_callback *buf = __buf;
+	struct getdents_callback *buf =
+		container_of(ctx, struct getdents_callback, ctx);
 	int result = 0;
 
 	buf->sequence++;
diff --git a/fs/fat/dir.c b/fs/fat/dir.c
index 3963ede84eb0..c5d6bb939d19 100644
--- a/fs/fat/dir.c
+++ b/fs/fat/dir.c
@@ -702,10 +702,11 @@ static int fat_readdir(struct file *file, struct dir_context *ctx)
 }
 
 #define FAT_IOCTL_FILLDIR_FUNC(func, dirent_type)			   \
-static int func(void *__buf, const char *name, int name_len,		   \
+static int func(struct dir_context *ctx, const char *name, int name_len,   \
 			     loff_t offset, u64 ino, unsigned int d_type)  \
 {									   \
-	struct fat_ioctl_filldir_callback *buf = __buf;			   \
+	struct fat_ioctl_filldir_callback *buf =			   \
+		container_of(ctx, struct fat_ioctl_filldir_callback, ctx); \
 	struct dirent_type __user *d1 = buf->dirent;			   \
 	struct dirent_type __user *d2 = d1 + 1;				   \
 									   \
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 8b9b3775e2e7..c41d255b6a7b 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -69,10 +69,12 @@ struct get_name_filldir {
 	char *name;
 };
 
-static int get_name_filldir(void *opaque, const char *name, int length,
-			    loff_t offset, u64 inum, unsigned int type)
+static int get_name_filldir(struct dir_context *ctx, const char *name,
+			    int length, loff_t offset, u64 inum,
+			    unsigned int type)
 {
-	struct get_name_filldir *gnfd = opaque;
+	struct get_name_filldir *gnfd =
+		container_of(ctx, struct get_name_filldir, ctx);
 
 	if (inum != gnfd->inum.no_addr)
 		return 0;
diff --git a/fs/hppfs/hppfs.c b/fs/hppfs/hppfs.c
index 4338ff32959d..5f2755117ce7 100644
--- a/fs/hppfs/hppfs.c
+++ b/fs/hppfs/hppfs.c
@@ -548,10 +548,11 @@ struct hppfs_dirent {
 	struct dentry *dentry;
 };
 
-static int hppfs_filldir(void *d, const char *name, int size,
+static int hppfs_filldir(struct dir_context *ctx, const char *name, int size,
 			 loff_t offset, u64 inode, unsigned int type)
 {
-	struct hppfs_dirent *dirent = d;
+	struct hppfs_dirent *dirent =
+		container_of(ctx, struct hppfs_dirent, ctx);
 
 	if (file_removed(dirent->dentry, name))
 		return 0;
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index a25490ae6c62..0e71a0dd58ca 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -245,10 +245,11 @@ struct nfs4_dir_ctx {
 };
 
 static int
-nfsd4_build_namelist(void *arg, const char *name, int namlen,
+nfsd4_build_namelist(struct dir_context *__ctx, const char *name, int namlen,
 		loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct nfs4_dir_ctx *ctx = arg;
+	struct nfs4_dir_ctx *ctx =
+		container_of(__ctx, struct nfs4_dir_ctx, ctx);
 	struct name_list *entry;
 
 	if (namlen != HEXDIR_LEN - 1)
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 989129e2d6ea..161d2d51b6f2 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1819,10 +1819,12 @@ struct readdir_data {
 	int		full;
 };
 
-static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
-				 loff_t offset, u64 ino, unsigned int d_type)
+static int nfsd_buffered_filldir(struct dir_context *ctx, const char *name,
+				 int namlen, loff_t offset, u64 ino,
+				 unsigned int d_type)
 {
-	struct readdir_data *buf = __buf;
+	struct readdir_data *buf =
+		container_of(ctx, struct readdir_data, ctx);
 	struct buffered_dirent *de = (void *)(buf->dirent + buf->used);
 	unsigned int reclen;
 
@@ -1842,7 +1844,7 @@ static int nfsd_buffered_filldir(void *__buf, const char *name, int namlen,
 	return 0;
 }
 
-static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
+static __be32 nfsd_buffered_readdir(struct file *file, nfsd_filldir_t func,
 				    struct readdir_cd *cdp, loff_t *offsetp)
 {
 	struct buffered_dirent *de;
@@ -1926,7 +1928,7 @@ static __be32 nfsd_buffered_readdir(struct file *file, filldir_t func,
  */
 __be32
 nfsd_readdir(struct svc_rqst *rqstp, struct svc_fh *fhp, loff_t *offsetp, 
-	     struct readdir_cd *cdp, filldir_t func)
+	     struct readdir_cd *cdp, nfsd_filldir_t func)
 {
 	__be32		err;
 	struct file	*file;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c2ff3f14e5f6..b1796d6ee538 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -36,7 +36,7 @@
 /*
  * Callback function for readdir
  */
-typedef int (*nfsd_dirop_t)(struct inode *, struct dentry *, int, int);
+typedef int (*nfsd_filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
 
 /* nfsd/vfs.c */
 int		nfsd_racache_init(int);
@@ -95,7 +95,7 @@ __be32		nfsd_rename(struct svc_rqst *,
 __be32		nfsd_unlink(struct svc_rqst *, struct svc_fh *, int type,
 				char *name, int len);
 __be32		nfsd_readdir(struct svc_rqst *, struct svc_fh *,
-			     loff_t *, struct readdir_cd *, filldir_t);
+			     loff_t *, struct readdir_cd *, nfsd_filldir_t);
 __be32		nfsd_statfs(struct svc_rqst *, struct svc_fh *,
 				struct kstatfs *, int access);
 
diff --git a/fs/ocfs2/dir.c b/fs/ocfs2/dir.c
index 0717662b4aef..c43d9b4a1ec0 100644
--- a/fs/ocfs2/dir.c
+++ b/fs/ocfs2/dir.c
@@ -2073,10 +2073,12 @@ struct ocfs2_empty_dir_priv {
 	unsigned seen_other;
 	unsigned dx_dir;
 };
-static int ocfs2_empty_dir_filldir(void *priv, const char *name, int name_len,
-				   loff_t pos, u64 ino, unsigned type)
+static int ocfs2_empty_dir_filldir(struct dir_context *ctx, const char *name,
+				   int name_len, loff_t pos, u64 ino,
+				   unsigned type)
 {
-	struct ocfs2_empty_dir_priv *p = priv;
+	struct ocfs2_empty_dir_priv *p =
+		container_of(ctx, struct ocfs2_empty_dir_priv, ctx);
 
 	/*
 	 * Check the positions of "." and ".." records to be sure
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 4b0c68849b36..4f502382180f 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -1982,10 +1982,12 @@ struct ocfs2_orphan_filldir_priv {
 	struct ocfs2_super	*osb;
 };
 
-static int ocfs2_orphan_filldir(void *priv, const char *name, int name_len,
-				loff_t pos, u64 ino, unsigned type)
+static int ocfs2_orphan_filldir(struct dir_context *ctx, const char *name,
+				int name_len, loff_t pos, u64 ino,
+				unsigned type)
 {
-	struct ocfs2_orphan_filldir_priv *p = priv;
+	struct ocfs2_orphan_filldir_priv *p =
+		container_of(ctx, struct ocfs2_orphan_filldir_priv, ctx);
 	struct inode *iter;
 
 	if (name_len == 1 && !strncmp(".", name, 1))
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index 4e9d7c1fea52..301f64aa8a45 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -180,10 +180,12 @@ static void ovl_cache_put(struct ovl_dir_file *od, struct dentry *dentry)
 	}
 }
 
-static int ovl_fill_merge(void *buf, const char *name, int namelen,
-			  loff_t offset, u64 ino, unsigned int d_type)
+static int ovl_fill_merge(struct dir_context *ctx, const char *name,
+			  int namelen, loff_t offset, u64 ino,
+			  unsigned int d_type)
 {
-	struct ovl_readdir_data *rdd = buf;
+	struct ovl_readdir_data *rdd =
+		container_of(ctx, struct ovl_readdir_data, ctx);
 
 	rdd->count++;
 	if (!rdd->is_merge)
diff --git a/fs/readdir.c b/fs/readdir.c
index 33fd92208cb7..ced679179cac 100644
--- a/fs/readdir.c
+++ b/fs/readdir.c
@@ -74,10 +74,11 @@ struct readdir_callback {
 	int result;
 };
 
-static int fillonedir(void * __buf, const char * name, int namlen, loff_t offset,
-		      u64 ino, unsigned int d_type)
+static int fillonedir(struct dir_context *ctx, const char *name, int namlen,
+		      loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct readdir_callback *buf = (struct readdir_callback *) __buf;
+	struct readdir_callback *buf =
+		container_of(ctx, struct readdir_callback, ctx);
 	struct old_linux_dirent __user * dirent;
 	unsigned long d_ino;
 
@@ -148,11 +149,12 @@ struct getdents_callback {
 	int error;
 };
 
-static int filldir(void * __buf, const char * name, int namlen, loff_t offset,
-		   u64 ino, unsigned int d_type)
+static int filldir(struct dir_context *ctx, const char *name, int namlen,
+		   loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct linux_dirent __user * dirent;
-	struct getdents_callback * buf = (struct getdents_callback *) __buf;
+	struct getdents_callback *buf =
+		container_of(ctx, struct getdents_callback, ctx);
 	unsigned long d_ino;
 	int reclen = ALIGN(offsetof(struct linux_dirent, d_name) + namlen + 2,
 		sizeof(long));
@@ -232,11 +234,12 @@ struct getdents_callback64 {
 	int error;
 };
 
-static int filldir64(void * __buf, const char * name, int namlen, loff_t offset,
-		     u64 ino, unsigned int d_type)
+static int filldir64(struct dir_context *ctx, const char *name, int namlen,
+		     loff_t offset, u64 ino, unsigned int d_type)
 {
 	struct linux_dirent64 __user *dirent;
-	struct getdents_callback64 * buf = (struct getdents_callback64 *) __buf;
+	struct getdents_callback64 *buf =
+		container_of(ctx, struct getdents_callback64, ctx);
 	int reclen = ALIGN(offsetof(struct linux_dirent64, d_name) + namlen + 1,
 		sizeof(u64));
 
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 7c36898af402..628248ce2f8b 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -188,10 +188,11 @@ struct reiserfs_dentry_buf {
 };
 
 static int
-fill_with_dentries(void *buf, const char *name, int namelen, loff_t offset,
-		    u64 ino, unsigned int d_type)
+fill_with_dentries(struct dir_context *ctx, const char *name, int namelen,
+		   loff_t offset, u64 ino, unsigned int d_type)
 {
-	struct reiserfs_dentry_buf *dbuf = buf;
+	struct reiserfs_dentry_buf *dbuf =
+		container_of(ctx, struct reiserfs_dentry_buf, ctx);
 	struct dentry *dentry;
 
 	WARN_ON_ONCE(!mutex_is_locked(&dbuf->xadir->d_inode->i_mutex));
@@ -824,10 +825,12 @@ struct listxattr_buf {
 	struct dentry *dentry;
 };
 
-static int listxattr_filler(void *buf, const char *name, int namelen,
-			    loff_t offset, u64 ino, unsigned int d_type)
+static int listxattr_filler(struct dir_context *ctx, const char *name,
+			    int namelen, loff_t offset, u64 ino,
+			    unsigned int d_type)
 {
-	struct listxattr_buf *b = (struct listxattr_buf *)buf;
+	struct listxattr_buf *b =
+		container_of(ctx, struct listxattr_buf, ctx);
 	size_t size;
 
 	if (name[0] != '.' ||
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ab779e8a63c..00c8e4f65cb6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1467,7 +1467,10 @@ int fiemap_check_flags(struct fiemap_extent_info *fieinfo, u32 fs_flags);
  * This allows the kernel to read directories into kernel space or
  * to have different dirent layouts depending on the binary type.
  */
-typedef int (*filldir_t)(void *, const char *, int, loff_t, u64, unsigned);
+struct dir_context;
+typedef int (*filldir_t)(struct dir_context *, const char *, int, loff_t, u64,
+			 unsigned);
+
 struct dir_context {
 	const filldir_t actor;
 	loff_t pos;
-- 
cgit v1.2.3


From a7400222e3eb7d5ce3820d2234905bbeafabd171 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 21 Oct 2014 15:20:42 -0400
Subject: new helper: is_root_inode()

replace open-coded instances

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/coda/coda_linux.c |  6 ------
 fs/coda/coda_linux.h |  1 -
 fs/coda/dir.c        | 12 ++++++------
 fs/ncpfs/dir.c       |  4 ++--
 include/linux/fs.h   |  5 +++++
 5 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/coda/coda_linux.c b/fs/coda/coda_linux.c
index 1326d38960db..f1714cfb589c 100644
--- a/fs/coda/coda_linux.c
+++ b/fs/coda/coda_linux.c
@@ -40,12 +40,6 @@ int coda_iscontrol(const char *name, size_t length)
                 (strncmp(name, CODA_CONTROL, CODA_CONTROLLEN) == 0));
 }
 
-/* recognize /coda inode */
-int coda_isroot(struct inode *i)
-{
-    return ( i->i_sb->s_root->d_inode == i );
-}
-
 unsigned short coda_flags_to_cflags(unsigned short flags)
 {
 	unsigned short coda_flags = 0;
diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h
index d42b725b1d21..d6f7a76a1f5b 100644
--- a/fs/coda/coda_linux.h
+++ b/fs/coda/coda_linux.h
@@ -52,7 +52,6 @@ int coda_setattr(struct dentry *, struct iattr *);
 
 /* this file:  heloers */
 char *coda_f2s(struct CodaFid *f);
-int coda_isroot(struct inode *i);
 int coda_iscontrol(const char *name, size_t length);
 
 void coda_vattr_to_iattr(struct inode *, struct coda_vattr *);
diff --git a/fs/coda/dir.c b/fs/coda/dir.c
index 9c3dedc000d1..7ff025966e4f 100644
--- a/fs/coda/dir.c
+++ b/fs/coda/dir.c
@@ -107,7 +107,7 @@ static struct dentry *coda_lookup(struct inode *dir, struct dentry *entry, unsig
 	}
 
 	/* control object, create inode on the fly */
-	if (coda_isroot(dir) && coda_iscontrol(name, length)) {
+	if (is_root_inode(dir) && coda_iscontrol(name, length)) {
 		inode = coda_cnode_makectl(sb);
 		type = CODA_NOCACHE;
 	} else {
@@ -195,7 +195,7 @@ static int coda_create(struct inode *dir, struct dentry *de, umode_t mode, bool
 	struct CodaFid newfid;
 	struct coda_vattr attrs;
 
-	if (coda_isroot(dir) && coda_iscontrol(name, length))
+	if (is_root_inode(dir) && coda_iscontrol(name, length))
 		return -EPERM;
 
 	error = venus_create(dir->i_sb, coda_i2f(dir), name, length, 
@@ -227,7 +227,7 @@ static int coda_mkdir(struct inode *dir, struct dentry *de, umode_t mode)
 	int error;
 	struct CodaFid newfid;
 
-	if (coda_isroot(dir) && coda_iscontrol(name, len))
+	if (is_root_inode(dir) && coda_iscontrol(name, len))
 		return -EPERM;
 
 	attrs.va_mode = mode;
@@ -261,7 +261,7 @@ static int coda_link(struct dentry *source_de, struct inode *dir_inode,
 	int len = de->d_name.len;
 	int error;
 
-	if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
+	if (is_root_inode(dir_inode) && coda_iscontrol(name, len))
 		return -EPERM;
 
 	error = venus_link(dir_inode->i_sb, coda_i2f(inode),
@@ -287,7 +287,7 @@ static int coda_symlink(struct inode *dir_inode, struct dentry *de,
 	int symlen;
 	int error;
 
-	if (coda_isroot(dir_inode) && coda_iscontrol(name, len))
+	if (is_root_inode(dir_inode) && coda_iscontrol(name, len))
 		return -EPERM;
 
 	symlen = strlen(symname);
@@ -507,7 +507,7 @@ static int coda_dentry_revalidate(struct dentry *de, unsigned int flags)
 		return -ECHILD;
 
 	inode = de->d_inode;
-	if (!inode || coda_isroot(inode))
+	if (!inode || is_root_inode(inode))
 		goto out;
 	if (is_bad_inode(inode))
 		goto bad;
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 7cb751dfbeef..461f6be5df20 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -198,8 +198,8 @@ ncp_single_volume(struct ncp_server *server)
 
 static inline int ncp_is_server_root(struct inode *inode)
 {
-	return (!ncp_single_volume(NCP_SERVER(inode)) &&
-		inode == inode->i_sb->s_root->d_inode);
+	return !ncp_single_volume(NCP_SERVER(inode)) &&
+		is_root_inode(inode);
 }
 
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 00c8e4f65cb6..1c12c681803f 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2789,6 +2789,11 @@ static inline void inode_has_no_xattr(struct inode *inode)
 		inode->i_flags |= S_NOSEC;
 }
 
+static inline bool is_root_inode(struct inode *inode)
+{
+	return inode == inode->i_sb->s_root->d_inode;
+}
+
 static inline bool dir_emit(struct dir_context *ctx,
 			    const char *name, int namelen,
 			    u64 ino, unsigned type)
-- 
cgit v1.2.3


From cb904b0a16305f9b2a98200cc6eb9dc3610278b0 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 2 Nov 2014 04:18:45 +0100
Subject: ieee802154: add extended address validation helper

This patch introduce an extended address validation helper to check if
an extended address is valid or not.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 6e50a2a1d485..9bba5ca7f0ad 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -197,4 +197,18 @@ static inline bool ieee802154_is_valid_psdu_len(const u8 len)
 	return (len >= IEEE802154_MIN_PSDU_LEN && len <= IEEE802154_MTU);
 }
 
+/**
+ * ieee802154_is_valid_psdu_len - check if extended addr is valid
+ * @addr: extended addr to check
+ */
+static inline bool ieee802154_is_valid_extended_addr(const __le64 addr)
+{
+	/* These EUI-64 addresses are reserved by IEEE. 0xffffffffffffffff
+	 * is used internally as extended to short address broadcast mapping.
+	 * This is currently a workaround because neighbor discovery can't
+	 * deal with short addresses types right now.
+	 */
+	return ((addr != 0x0000000000000000) || (addr != 0xffffffffffffffff));
+}
+
 #endif /* LINUX_IEEE802154_H */
-- 
cgit v1.2.3


From a4164eb4dd3f4f2a22f8bf7b26394e8384f3d9a2 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 2 Nov 2014 21:43:01 +0100
Subject: ieee802154: add missing ULL definition

Running make C=2 occurs warning:

constant 0xffffffffffffffff is so big it is unsigned long

This patch fix this warning by adding a ULL to the constant definitions.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Reported-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 9bba5ca7f0ad..9da7c011fbba 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -208,7 +208,8 @@ static inline bool ieee802154_is_valid_extended_addr(const __le64 addr)
 	 * This is currently a workaround because neighbor discovery can't
 	 * deal with short addresses types right now.
 	 */
-	return ((addr != 0x0000000000000000) || (addr != 0xffffffffffffffff));
+	return ((addr != 0x0000000000000000ULL) ||
+		(addr != 0xffffffffffffffffULL));
 }
 
 #endif /* LINUX_IEEE802154_H */
-- 
cgit v1.2.3


From c28bee84c5c49312befe1b442e1044ac2392d80d Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 2 Nov 2014 21:43:02 +0100
Subject: ieee802154: fix byteorder issues

This patch fix byteorder issues which occurs because we compare __le64
with an host byteorder value. Simple add a cpu_to_le64 to convert the
host byteorder values to __le64.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Reported-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 9da7c011fbba..5d9e7459d94b 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -24,6 +24,7 @@
 #define LINUX_IEEE802154_H
 
 #include <linux/types.h>
+#include <asm/byteorder.h>
 
 #define IEEE802154_MTU			127
 #define IEEE802154_MIN_PSDU_LEN		5
@@ -208,8 +209,8 @@ static inline bool ieee802154_is_valid_extended_addr(const __le64 addr)
 	 * This is currently a workaround because neighbor discovery can't
 	 * deal with short addresses types right now.
 	 */
-	return ((addr != 0x0000000000000000ULL) ||
-		(addr != 0xffffffffffffffffULL));
+	return ((addr != cpu_to_le64(0x0000000000000000ULL)) ||
+		(addr != cpu_to_le64(0xffffffffffffffffULL)));
 }
 
 #endif /* LINUX_IEEE802154_H */
-- 
cgit v1.2.3


From 1a3f83f6493f9d78aa0fe31401fd530b0fe296da Mon Sep 17 00:00:00 2001
From: JD Cole <jd@jdc.me>
Date: Fri, 31 Oct 2014 17:34:42 -0700
Subject: HID: plantronics: fix errant mouse events

This version of the driver prevents Telephony pages which are not mapped as
Consumer Control applications AND are not on the Consumer Page from being
registered by the hid-input driver.

Signed-off-by: JD Cole <jd.cole@plantronics.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/Kconfig           |  7 ++++
 drivers/hid/Makefile          |  1 +
 drivers/hid/hid-core.c        |  1 +
 drivers/hid/hid-ids.h         |  2 ++
 drivers/hid/hid-plantronics.c | 78 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/hid.h           |  3 ++
 6 files changed, 92 insertions(+)
 create mode 100644 drivers/hid/hid-plantronics.c

(limited to 'include/linux')

diff --git a/drivers/hid/Kconfig b/drivers/hid/Kconfig
index f42df4dd58d2..bf1c74e19c75 100644
--- a/drivers/hid/Kconfig
+++ b/drivers/hid/Kconfig
@@ -613,6 +613,13 @@ config HID_PICOLCD_CIR
 	---help---
 	  Provide access to PicoLCD's CIR interface via remote control (LIRC).
 
+config HID_PLANTRONICS
+	tristate "Plantronics USB HID Driver"
+	default !EXPERT
+	depends on HID
+	---help---
+	Provides HID support for Plantronics telephony devices.
+
 config HID_PRIMAX
 	tristate "Primax non-fully HID-compliant devices"
 	depends on HID
diff --git a/drivers/hid/Makefile b/drivers/hid/Makefile
index e2850d8af9ca..5e7ac59447d8 100644
--- a/drivers/hid/Makefile
+++ b/drivers/hid/Makefile
@@ -94,6 +94,7 @@ ifdef CONFIG_DEBUG_FS
 hid-picolcd-y			+= hid-picolcd_debugfs.o
 endif
 
+obj-$(CONFIG_HID_PLANTRONICS)	+= hid-plantronics.o
 obj-$(CONFIG_HID_PRIMAX)	+= hid-primax.o
 obj-$(CONFIG_HID_ROCCAT)	+= hid-roccat.o hid-roccat-common.o \
 	hid-roccat-arvo.o hid-roccat-isku.o hid-roccat-kone.o \
diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 73bd9e2e42bc..d50313ca64a6 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1886,6 +1886,7 @@ static const struct hid_device_id hid_have_special_driver[] = {
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ORTEK, USB_DEVICE_ID_ORTEK_WKB2000) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PENMOUNT, USB_DEVICE_ID_PENMOUNT_6000) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PETALYNX, USB_DEVICE_ID_PETALYNX_MAXTER_REMOTE) },
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) },
 	{ HID_USB_DEVICE(USB_VENDOR_ID_PRIMAX, USB_DEVICE_ID_PRIMAX_KEYBOARD) },
 #if IS_ENABLED(CONFIG_HID_ROCCAT)
 	{ HID_USB_DEVICE(USB_VENDOR_ID_ROCCAT, USB_DEVICE_ID_ROCCAT_ARVO) },
diff --git a/drivers/hid/hid-ids.h b/drivers/hid/hid-ids.h
index e23ab8b30626..f9f476db671c 100644
--- a/drivers/hid/hid-ids.h
+++ b/drivers/hid/hid-ids.h
@@ -715,6 +715,8 @@
 #define USB_DEVICE_ID_ORTEK_PKB1700	0x1700
 #define USB_DEVICE_ID_ORTEK_WKB2000	0x2000
 
+#define USB_VENDOR_ID_PLANTRONICS	0x047f
+
 #define USB_VENDOR_ID_PANASONIC		0x04da
 #define USB_DEVICE_ID_PANABOARD_UBT780	0x1044
 #define USB_DEVICE_ID_PANABOARD_UBT880	0x104d
diff --git a/drivers/hid/hid-plantronics.c b/drivers/hid/hid-plantronics.c
new file mode 100644
index 000000000000..215cdbd10dea
--- /dev/null
+++ b/drivers/hid/hid-plantronics.c
@@ -0,0 +1,78 @@
+/*
+ *  Plantronics USB HID Driver
+ *
+ *  Copyright (c) 2014 JD Cole <jd.cole@plantronics.com>
+ *  Copyright (c) 2014 Terry Junge <terry.junge@plantronics.com>
+ */
+
+/*
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the Free
+ * Software Foundation; either version 2 of the License, or (at your option)
+ * any later version.
+ */
+
+#include "hid-ids.h"
+
+#include <linux/hid.h>
+#include <linux/module.h>
+
+static int plantronics_input_mapping(struct hid_device *hdev,
+				     struct hid_input *hi,
+				     struct hid_field *field,
+				     struct hid_usage *usage,
+				     unsigned long **bit, int *max)
+{
+	if (field->application == HID_CP_CONSUMERCONTROL
+	    && (usage->hid & HID_USAGE_PAGE) == HID_UP_CONSUMER) {
+		hid_dbg(hdev, "usage: %08x (appl: %08x) - defaulted\n",
+			 usage->hid, field->application);
+		return 0;
+	}
+
+	hid_dbg(hdev, "usage: %08x (appl: %08x) - ignored\n",
+		usage->hid, field->application);
+
+	return -1;
+}
+
+static int plantronics_probe(struct hid_device *hdev,
+			     const struct hid_device_id *id)
+{
+	int ret;
+
+	ret = hid_parse(hdev);
+	if (ret) {
+		hid_err(hdev, "parse failed\n");
+		goto err;
+	}
+
+	ret = hid_hw_start(hdev, HID_CONNECT_DEFAULT);
+	if (ret) {
+		hid_err(hdev, "hw start failed\n");
+		goto err;
+	}
+
+	return 0;
+ err:
+	return ret;
+}
+
+static const struct hid_device_id plantronics_devices[] = {
+	{ HID_USB_DEVICE(USB_VENDOR_ID_PLANTRONICS, HID_ANY_ID) },
+	{ }
+};
+MODULE_DEVICE_TABLE(hid, plantronics_devices);
+
+static struct hid_driver plantronics_driver = {
+	.name = "plantronics",
+	.id_table = plantronics_devices,
+	.input_mapping = plantronics_input_mapping,
+	.probe = plantronics_probe,
+};
+module_hid_driver(plantronics_driver);
+
+MODULE_AUTHOR("JD Cole <jd.cole@plantronics.com>");
+MODULE_AUTHOR("Terry Junge <terry.junge@plantronics.com>");
+MODULE_DESCRIPTION("Plantronics USB HID Driver");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 78ea9bf941cd..a63f2aaed646 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -234,6 +234,9 @@ struct hid_item {
 #define HID_DG_BARRELSWITCH	0x000d0044
 #define HID_DG_ERASER		0x000d0045
 #define HID_DG_TABLETPICK	0x000d0046
+
+#define HID_CP_CONSUMERCONTROL	0x000c0001
+
 #define HID_DG_CONFIDENCE	0x000d0047
 #define HID_DG_WIDTH		0x000d0048
 #define HID_DG_HEIGHT		0x000d0049
-- 
cgit v1.2.3


From a45c30ec59342c47604ea074bcf56a43a8cde8aa Mon Sep 17 00:00:00 2001
From: JD Cole <jd@jdc.me>
Date: Fri, 31 Oct 2014 17:44:42 -0700
Subject: HID: added missing HID Consumer Page identifiers

Adds CA and NAry usage type identifiers.

Signed-off-by: JD Cole <jd.cole@plantronics.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 include/linux/hid.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/hid.h b/include/linux/hid.h
index a63f2aaed646..58a89ed86acc 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -236,6 +236,30 @@ struct hid_item {
 #define HID_DG_TABLETPICK	0x000d0046
 
 #define HID_CP_CONSUMERCONTROL	0x000c0001
+#define HID_CP_NUMERICKEYPAD	0x000c0002
+#define HID_CP_PROGRAMMABLEBUTTONS	0x000c0003
+#define HID_CP_MICROPHONE	0x000c0004
+#define HID_CP_HEADPHONE	0x000c0005
+#define HID_CP_GRAPHICEQUALIZER	0x000c0006
+#define HID_CP_FUNCTIONBUTTONS	0x000c0036
+#define HID_CP_SELECTION	0x000c0080
+#define HID_CP_MEDIASELECTION	0x000c0087
+#define HID_CP_SELECTDISC	0x000c00ba
+#define HID_CP_PLAYBACKSPEED	0x000c00f1
+#define HID_CP_PROXIMITY	0x000c0109
+#define HID_CP_SPEAKERSYSTEM	0x000c0160
+#define HID_CP_CHANNELLEFT	0x000c0161
+#define HID_CP_CHANNELRIGHT	0x000c0162
+#define HID_CP_CHANNELCENTER	0x000c0163
+#define HID_CP_CHANNELFRONT	0x000c0164
+#define HID_CP_CHANNELCENTERFRONT	0x000c0165
+#define HID_CP_CHANNELSIDE	0x000c0166
+#define HID_CP_CHANNELSURROUND	0x000c0167
+#define HID_CP_CHANNELLOWFREQUENCYENHANCEMENT	0x000c0168
+#define HID_CP_CHANNELTOP	0x000c0169
+#define HID_CP_CHANNELUNKNOWN	0x000c016a
+#define HID_CP_APPLICATIONLAUNCHBUTTONS	0x000c0180
+#define HID_CP_GENERICGUIAPPLICATIONCONTROLS	0x000c0200
 
 #define HID_DG_CONFIDENCE	0x000d0047
 #define HID_DG_WIDTH		0x000d0048
-- 
cgit v1.2.3


From 6c93b5342374b3ff2a8beac050ed6e07373cbe95 Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Thu, 18 Sep 2014 09:51:23 -0500
Subject: usb: gadget: composite: introduce setup and os_desc pending flags

These flags we be set to true whenever their
matching request is queued. They will be cleared
to false when that request completes.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/composite.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index c330f5ef42cf..ed3811c09ec1 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -427,6 +427,8 @@ static inline struct usb_composite_driver *to_cdriver(
  * @b_vendor_code: bMS_VendorCode part of the OS string
  * @use_os_string: false by default, interested gadgets set it
  * @os_desc_config: the configuration to be used with OS descriptors
+ * @setup_pending: true when setup request is queued but not completed
+ * @os_desc_pending: true when os_desc request is queued but not completed
  *
  * One of these devices is allocated and initialized before the
  * associated device driver's bind() is called.
@@ -488,6 +490,9 @@ struct usb_composite_dev {
 
 	/* protects deactivations and delayed_status counts*/
 	spinlock_t			lock;
+
+	unsigned			setup_pending:1;
+	unsigned			os_desc_pending:1;
 };
 
 extern int usb_string_id(struct usb_composite_dev *c);
-- 
cgit v1.2.3


From 3a571870856f63064a3a45d7ffa2526d597b7fbe Mon Sep 17 00:00:00 2001
From: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Date: Wed, 8 Oct 2014 12:03:36 +0200
Subject: usb: gadget: configfs: add suspend/resume

USB gadgets composed with configfs lack suspend and resume
methods. This patch uses composite_suspend()/composite_resume()
the same way e.g. composite_setup() or composite_disconnect()
are used in a configfs-based gadget.

Signed-off-by: Andrzej Pietrasiewicz <andrzej.p@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/composite.c | 6 ++----
 drivers/usb/gadget/configfs.c  | 3 +++
 include/linux/usb/composite.h  | 2 ++
 3 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/composite.c b/drivers/usb/gadget/composite.c
index e071d580346e..617835348569 100644
--- a/drivers/usb/gadget/composite.c
+++ b/drivers/usb/gadget/composite.c
@@ -2064,8 +2064,7 @@ fail:
 
 /*-------------------------------------------------------------------------*/
 
-static void
-composite_suspend(struct usb_gadget *gadget)
+void composite_suspend(struct usb_gadget *gadget)
 {
 	struct usb_composite_dev	*cdev = get_gadget_data(gadget);
 	struct usb_function		*f;
@@ -2088,8 +2087,7 @@ composite_suspend(struct usb_gadget *gadget)
 	usb_gadget_vbus_draw(gadget, 2);
 }
 
-static void
-composite_resume(struct usb_gadget *gadget)
+void composite_resume(struct usb_gadget *gadget)
 {
 	struct usb_composite_dev	*cdev = get_gadget_data(gadget);
 	struct usb_function		*f;
diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index 34034333f7f6..d25f9f3dfea0 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -1453,6 +1453,9 @@ static const struct usb_gadget_driver configfs_driver_template = {
 	.reset          = composite_disconnect,
 	.disconnect     = composite_disconnect,
 
+	.suspend	= composite_suspend,
+	.resume		= composite_resume,
+
 	.max_speed	= USB_SPEED_SUPER,
 	.driver = {
 		.owner          = THIS_MODULE,
diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h
index ed3811c09ec1..3d87defcc527 100644
--- a/include/linux/usb/composite.h
+++ b/include/linux/usb/composite.h
@@ -506,6 +506,8 @@ extern int usb_string_ids_n(struct usb_composite_dev *c, unsigned n);
 extern void composite_disconnect(struct usb_gadget *gadget);
 extern int composite_setup(struct usb_gadget *gadget,
 		const struct usb_ctrlrequest *ctrl);
+extern void composite_suspend(struct usb_gadget *gadget);
+extern void composite_resume(struct usb_gadget *gadget);
 
 /*
  * Some systems will need runtime overrides for the  product identifiers
-- 
cgit v1.2.3


From 22835b807e7ca946a4d1fbd4c7af56aa09cd273e Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Fri, 17 Oct 2014 12:05:12 -0500
Subject: usb: gadget: remove unnecessary 'driver' argument

now that no UDC driver relies on the extra
'driver' argument to ->udc_stop(), we can
safely remove it.

This commit is based on previous work by
Robert Baldyga <r.baldyga@samsung.com> which
can be found at [1]; however that patch turned
out to have a high probability of regressing
many UDC drivers because of a blind search & replace
s/driver/$udc->driver/ which caused the 'driver'
argument to stop_activity() to be a valid non-NULL
pointer when it should be NULL, thus causing UDCs
to mistakenly call gadget driver's ->disconnect()
callback.

[1] http://markmail.org/message/x5zneg4xea4zntab

Acked-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/chipidea/udc.c              | 6 ++----
 drivers/usb/dwc2/gadget.c               | 3 +--
 drivers/usb/dwc3/gadget.c               | 3 +--
 drivers/usb/gadget/udc/amd5536udc.c     | 8 +++-----
 drivers/usb/gadget/udc/at91_udc.c       | 7 +++----
 drivers/usb/gadget/udc/atmel_usba_udc.c | 7 +++----
 drivers/usb/gadget/udc/bcm63xx_udc.c    | 3 +--
 drivers/usb/gadget/udc/dummy_hcd.c      | 6 ++----
 drivers/usb/gadget/udc/fotg210-udc.c    | 3 +--
 drivers/usb/gadget/udc/fsl_qe_udc.c     | 6 ++----
 drivers/usb/gadget/udc/fsl_udc_core.c   | 8 +++-----
 drivers/usb/gadget/udc/fusb300_udc.c    | 3 +--
 drivers/usb/gadget/udc/goku_udc.c       | 6 ++----
 drivers/usb/gadget/udc/gr_udc.c         | 3 +--
 drivers/usb/gadget/udc/lpc32xx_udc.c    | 5 ++---
 drivers/usb/gadget/udc/m66592-udc.c     | 3 +--
 drivers/usb/gadget/udc/mv_u3d_core.c    | 3 +--
 drivers/usb/gadget/udc/mv_udc_core.c    | 5 ++---
 drivers/usb/gadget/udc/net2272.c        | 6 ++----
 drivers/usb/gadget/udc/net2280.c        | 6 ++----
 drivers/usb/gadget/udc/omap_udc.c       | 6 ++----
 drivers/usb/gadget/udc/pch_udc.c        | 7 +++----
 drivers/usb/gadget/udc/pxa25x_udc.c     | 6 ++----
 drivers/usb/gadget/udc/pxa27x_udc.c     | 6 ++----
 drivers/usb/gadget/udc/r8a66597-udc.c   | 3 +--
 drivers/usb/gadget/udc/s3c-hsudc.c      | 3 +--
 drivers/usb/gadget/udc/s3c2410_udc.c    | 6 ++----
 drivers/usb/gadget/udc/udc-core.c       | 2 +-
 drivers/usb/gadget/udc/udc-xilinx.c     | 3 +--
 drivers/usb/musb/musb_gadget.c          | 6 ++----
 drivers/usb/renesas_usbhs/mod_gadget.c  | 3 +--
 include/linux/usb/gadget.h              | 3 +--
 32 files changed, 55 insertions(+), 99 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index 0444d3f8971a..f4397b29891f 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1544,8 +1544,7 @@ static int ci_udc_pullup(struct usb_gadget *_gadget, int is_on)
 
 static int ci_udc_start(struct usb_gadget *gadget,
 			 struct usb_gadget_driver *driver);
-static int ci_udc_stop(struct usb_gadget *gadget,
-			struct usb_gadget_driver *driver);
+static int ci_udc_stop(struct usb_gadget *gadget);
 /**
  * Device operations part of the API to the USB controller hardware,
  * which don't involve endpoints (or i/o)
@@ -1682,8 +1681,7 @@ static int ci_udc_start(struct usb_gadget *gadget,
 /**
  * ci_udc_stop: unregister a gadget driver
  */
-static int ci_udc_stop(struct usb_gadget *gadget,
-			struct usb_gadget_driver *driver)
+static int ci_udc_stop(struct usb_gadget *gadget)
 {
 	struct ci_hdrc *ci = container_of(gadget, struct ci_hdrc, gadget);
 	unsigned long flags;
diff --git a/drivers/usb/dwc2/gadget.c b/drivers/usb/dwc2/gadget.c
index eee87098bb8b..441f1c4b8eb4 100644
--- a/drivers/usb/dwc2/gadget.c
+++ b/drivers/usb/dwc2/gadget.c
@@ -2921,8 +2921,7 @@ err:
  *
  * Stop udc hw block and stay tunned for future transmissions
  */
-static int s3c_hsotg_udc_stop(struct usb_gadget *gadget,
-			  struct usb_gadget_driver *driver)
+static int s3c_hsotg_udc_stop(struct usb_gadget *gadget)
 {
 	struct s3c_hsotg *hsotg = to_hsotg(gadget);
 	unsigned long flags = 0;
diff --git a/drivers/usb/dwc3/gadget.c b/drivers/usb/dwc3/gadget.c
index 20e4ee922c47..20dda60b27c3 100644
--- a/drivers/usb/dwc3/gadget.c
+++ b/drivers/usb/dwc3/gadget.c
@@ -1630,8 +1630,7 @@ err0:
 	return ret;
 }
 
-static int dwc3_gadget_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int dwc3_gadget_stop(struct usb_gadget *g)
 {
 	struct dwc3		*dwc = gadget_to_dwc(g);
 	unsigned long		flags;
diff --git a/drivers/usb/gadget/udc/amd5536udc.c b/drivers/usb/gadget/udc/amd5536udc.c
index 7d0e0b8a9606..606b9009861a 100644
--- a/drivers/usb/gadget/udc/amd5536udc.c
+++ b/drivers/usb/gadget/udc/amd5536udc.c
@@ -1401,9 +1401,8 @@ static int udc_wakeup(struct usb_gadget *gadget)
 
 static int amd5536_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int amd5536_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
-/* gadget operations */
+static int amd5536_udc_stop(struct usb_gadget *g);
+
 static const struct usb_gadget_ops udc_ops = {
 	.wakeup		= udc_wakeup,
 	.get_frame	= udc_get_frame,
@@ -1962,8 +1961,7 @@ __acquires(dev->lock)
 }
 
 /* Called by gadget driver to unregister itself */
-static int amd5536_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int amd5536_udc_stop(struct usb_gadget *g)
 {
 	struct udc *dev = to_amd5536_udc(g);
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/at91_udc.c b/drivers/usb/gadget/udc/at91_udc.c
index 5c4cede5365d..f47f16c25f15 100644
--- a/drivers/usb/gadget/udc/at91_udc.c
+++ b/drivers/usb/gadget/udc/at91_udc.c
@@ -984,8 +984,8 @@ static int at91_set_selfpowered(struct usb_gadget *gadget, int is_on)
 
 static int at91_start(struct usb_gadget *gadget,
 		struct usb_gadget_driver *driver);
-static int at91_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver);
+static int at91_stop(struct usb_gadget *gadget);
+
 static const struct usb_gadget_ops at91_udc_ops = {
 	.get_frame		= at91_get_frame,
 	.wakeup			= at91_wakeup,
@@ -1644,8 +1644,7 @@ static int at91_start(struct usb_gadget *gadget,
 	return 0;
 }
 
-static int at91_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
+static int at91_stop(struct usb_gadget *gadget)
 {
 	struct at91_udc *udc;
 	unsigned long	flags;
diff --git a/drivers/usb/gadget/udc/atmel_usba_udc.c b/drivers/usb/gadget/udc/atmel_usba_udc.c
index eaee5f90e9fd..c537a6604b46 100644
--- a/drivers/usb/gadget/udc/atmel_usba_udc.c
+++ b/drivers/usb/gadget/udc/atmel_usba_udc.c
@@ -987,8 +987,8 @@ usba_udc_set_selfpowered(struct usb_gadget *gadget, int is_selfpowered)
 
 static int atmel_usba_start(struct usb_gadget *gadget,
 		struct usb_gadget_driver *driver);
-static int atmel_usba_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver);
+static int atmel_usba_stop(struct usb_gadget *gadget);
+
 static const struct usb_gadget_ops usba_udc_ops = {
 	.get_frame		= usba_udc_get_frame,
 	.wakeup			= usba_udc_wakeup,
@@ -1807,8 +1807,7 @@ static int atmel_usba_start(struct usb_gadget *gadget,
 	return 0;
 }
 
-static int atmel_usba_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
+static int atmel_usba_stop(struct usb_gadget *gadget)
 {
 	struct usba_udc *udc = container_of(gadget, struct usba_udc, gadget);
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/bcm63xx_udc.c b/drivers/usb/gadget/udc/bcm63xx_udc.c
index 485c8c2bddc1..9319ff27c73f 100644
--- a/drivers/usb/gadget/udc/bcm63xx_udc.c
+++ b/drivers/usb/gadget/udc/bcm63xx_udc.c
@@ -1836,8 +1836,7 @@ static int bcm63xx_udc_start(struct usb_gadget *gadget,
  * @gadget: USB slave device.
  * @driver: Driver for USB slave devices.
  */
-static int bcm63xx_udc_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
+static int bcm63xx_udc_stop(struct usb_gadget *gadget)
 {
 	struct bcm63xx_udc *udc = gadget_to_udc(gadget);
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/dummy_hcd.c b/drivers/usb/gadget/udc/dummy_hcd.c
index 5c72ade61aa6..5bffb75922f8 100644
--- a/drivers/usb/gadget/udc/dummy_hcd.c
+++ b/drivers/usb/gadget/udc/dummy_hcd.c
@@ -851,8 +851,7 @@ static int dummy_pullup(struct usb_gadget *_gadget, int value)
 
 static int dummy_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int dummy_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
+static int dummy_udc_stop(struct usb_gadget *g);
 
 static const struct usb_gadget_ops dummy_ops = {
 	.get_frame	= dummy_g_get_frame,
@@ -913,8 +912,7 @@ static int dummy_udc_start(struct usb_gadget *g,
 	return 0;
 }
 
-static int dummy_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int dummy_udc_stop(struct usb_gadget *g)
 {
 	struct dummy_hcd	*dum_hcd = gadget_to_dummy_hcd(g);
 	struct dummy		*dum = dum_hcd->dum;
diff --git a/drivers/usb/gadget/udc/fotg210-udc.c b/drivers/usb/gadget/udc/fotg210-udc.c
index 1d315921bf34..1ca52e11eb98 100644
--- a/drivers/usb/gadget/udc/fotg210-udc.c
+++ b/drivers/usb/gadget/udc/fotg210-udc.c
@@ -1053,8 +1053,7 @@ static void fotg210_init(struct fotg210_udc *fotg210)
 	iowrite32(value, fotg210->reg + FOTG210_DMISGR0);
 }
 
-static int fotg210_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int fotg210_udc_stop(struct usb_gadget *g)
 {
 	struct fotg210_udc *fotg210 = gadget_to_fotg210(g);
 	unsigned long	flags;
diff --git a/drivers/usb/gadget/udc/fsl_qe_udc.c b/drivers/usb/gadget/udc/fsl_qe_udc.c
index 6ca61e6b72e5..01f29ef622bc 100644
--- a/drivers/usb/gadget/udc/fsl_qe_udc.c
+++ b/drivers/usb/gadget/udc/fsl_qe_udc.c
@@ -1887,8 +1887,7 @@ static int qe_get_frame(struct usb_gadget *gadget)
 
 static int fsl_qe_start(struct usb_gadget *gadget,
 		struct usb_gadget_driver *driver);
-static int fsl_qe_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver);
+static int fsl_qe_stop(struct usb_gadget *gadget);
 
 /* defined in usb_gadget.h */
 static const struct usb_gadget_ops qe_gadget_ops = {
@@ -2308,8 +2307,7 @@ static int fsl_qe_start(struct usb_gadget *gadget,
 	return 0;
 }
 
-static int fsl_qe_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
+static int fsl_qe_stop(struct usb_gadget *gadget)
 {
 	struct qe_udc *udc;
 	struct qe_ep *loop_ep;
diff --git a/drivers/usb/gadget/udc/fsl_udc_core.c b/drivers/usb/gadget/udc/fsl_udc_core.c
index c3620791a315..b184ed3ef37a 100644
--- a/drivers/usb/gadget/udc/fsl_udc_core.c
+++ b/drivers/usb/gadget/udc/fsl_udc_core.c
@@ -1236,9 +1236,8 @@ static int fsl_pullup(struct usb_gadget *gadget, int is_on)
 
 static int fsl_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int fsl_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
-/* defined in gadget.h */
+static int fsl_udc_stop(struct usb_gadget *g);
+
 static const struct usb_gadget_ops fsl_gadget_ops = {
 	.get_frame = fsl_get_frame,
 	.wakeup = fsl_wakeup,
@@ -1975,8 +1974,7 @@ static int fsl_udc_start(struct usb_gadget *g,
 }
 
 /* Disconnect from gadget driver */
-static int fsl_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int fsl_udc_stop(struct usb_gadget *g)
 {
 	struct fsl_ep *loop_ep;
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/fusb300_udc.c b/drivers/usb/gadget/udc/fusb300_udc.c
index 8286df72add4..a1b33f534b52 100644
--- a/drivers/usb/gadget/udc/fusb300_udc.c
+++ b/drivers/usb/gadget/udc/fusb300_udc.c
@@ -1320,8 +1320,7 @@ static int fusb300_udc_start(struct usb_gadget *g,
 	return 0;
 }
 
-static int fusb300_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int fusb300_udc_stop(struct usb_gadget *g)
 {
 	struct fusb300 *fusb300 = to_fusb300(g);
 
diff --git a/drivers/usb/gadget/udc/goku_udc.c b/drivers/usb/gadget/udc/goku_udc.c
index bf9c5ef8b56b..5b9176e7202a 100644
--- a/drivers/usb/gadget/udc/goku_udc.c
+++ b/drivers/usb/gadget/udc/goku_udc.c
@@ -992,8 +992,7 @@ static int goku_get_frame(struct usb_gadget *_gadget)
 
 static int goku_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int goku_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
+static int goku_udc_stop(struct usb_gadget *g);
 
 static const struct usb_gadget_ops goku_ops = {
 	.get_frame	= goku_get_frame,
@@ -1364,8 +1363,7 @@ static void stop_activity(struct goku_udc *dev)
 		udc_enable(dev);
 }
 
-static int goku_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int goku_udc_stop(struct usb_gadget *g)
 {
 	struct goku_udc	*dev = to_goku_udc(g);
 	unsigned long	flags;
diff --git a/drivers/usb/gadget/udc/gr_udc.c b/drivers/usb/gadget/udc/gr_udc.c
index bde989f566c3..320df9a250ff 100644
--- a/drivers/usb/gadget/udc/gr_udc.c
+++ b/drivers/usb/gadget/udc/gr_udc.c
@@ -1935,8 +1935,7 @@ static int gr_udc_start(struct usb_gadget *gadget,
 	return 0;
 }
 
-static int gr_udc_stop(struct usb_gadget *gadget,
-		       struct usb_gadget_driver *driver)
+static int gr_udc_stop(struct usb_gadget *gadget)
 {
 	struct gr_udc *dev = to_gr_udc(gadget);
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/lpc32xx_udc.c b/drivers/usb/gadget/udc/lpc32xx_udc.c
index cef64b98b6e5..4be497d39d91 100644
--- a/drivers/usb/gadget/udc/lpc32xx_udc.c
+++ b/drivers/usb/gadget/udc/lpc32xx_udc.c
@@ -2559,7 +2559,7 @@ static int lpc32xx_pullup(struct usb_gadget *gadget, int is_on)
 }
 
 static int lpc32xx_start(struct usb_gadget *, struct usb_gadget_driver *);
-static int lpc32xx_stop(struct usb_gadget *, struct usb_gadget_driver *);
+static int lpc32xx_stop(struct usb_gadget *);
 
 static const struct usb_gadget_ops lpc32xx_udc_ops = {
 	.get_frame		= lpc32xx_get_frame,
@@ -2961,8 +2961,7 @@ static int lpc32xx_start(struct usb_gadget *gadget,
 	return 0;
 }
 
-static int lpc32xx_stop(struct usb_gadget *gadget,
-			struct usb_gadget_driver *driver)
+static int lpc32xx_stop(struct usb_gadget *gadget)
 {
 	int i;
 	struct lpc32xx_udc *udc = to_udc(gadget);
diff --git a/drivers/usb/gadget/udc/m66592-udc.c b/drivers/usb/gadget/udc/m66592-udc.c
index 898565687a8c..311ec5f1cbfe 100644
--- a/drivers/usb/gadget/udc/m66592-udc.c
+++ b/drivers/usb/gadget/udc/m66592-udc.c
@@ -1485,8 +1485,7 @@ static int m66592_udc_start(struct usb_gadget *g,
 	return 0;
 }
 
-static int m66592_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int m66592_udc_stop(struct usb_gadget *g)
 {
 	struct m66592 *m66592 = to_m66592(g);
 
diff --git a/drivers/usb/gadget/udc/mv_u3d_core.c b/drivers/usb/gadget/udc/mv_u3d_core.c
index e3ef744c4ff9..ea422ac79990 100644
--- a/drivers/usb/gadget/udc/mv_u3d_core.c
+++ b/drivers/usb/gadget/udc/mv_u3d_core.c
@@ -1266,8 +1266,7 @@ static int mv_u3d_start(struct usb_gadget *g,
 	return 0;
 }
 
-static int mv_u3d_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int mv_u3d_stop(struct usb_gadget *g)
 {
 	struct mv_u3d *u3d = container_of(g, struct mv_u3d, gadget);
 	struct mv_usb_platform_data *pdata = dev_get_platdata(u3d->dev);
diff --git a/drivers/usb/gadget/udc/mv_udc_core.c b/drivers/usb/gadget/udc/mv_udc_core.c
index 32d24ff2c4cd..f104ac090a99 100644
--- a/drivers/usb/gadget/udc/mv_udc_core.c
+++ b/drivers/usb/gadget/udc/mv_udc_core.c
@@ -1223,7 +1223,7 @@ static int mv_udc_pullup(struct usb_gadget *gadget, int is_on)
 }
 
 static int mv_udc_start(struct usb_gadget *, struct usb_gadget_driver *);
-static int mv_udc_stop(struct usb_gadget *, struct usb_gadget_driver *);
+static int mv_udc_stop(struct usb_gadget *);
 /* device controller usb_gadget_ops structure */
 static const struct usb_gadget_ops mv_ops = {
 
@@ -1371,8 +1371,7 @@ static int mv_udc_start(struct usb_gadget *gadget,
 	return 0;
 }
 
-static int mv_udc_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
+static int mv_udc_stop(struct usb_gadget *gadget)
 {
 	struct mv_udc *udc;
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/net2272.c b/drivers/usb/gadget/udc/net2272.c
index 4641df5a8175..887bea42e5aa 100644
--- a/drivers/usb/gadget/udc/net2272.c
+++ b/drivers/usb/gadget/udc/net2272.c
@@ -1169,8 +1169,7 @@ net2272_pullup(struct usb_gadget *_gadget, int is_on)
 
 static int net2272_start(struct usb_gadget *_gadget,
 		struct usb_gadget_driver *driver);
-static int net2272_stop(struct usb_gadget *_gadget,
-		struct usb_gadget_driver *driver);
+static int net2272_stop(struct usb_gadget *_gadget);
 
 static const struct usb_gadget_ops net2272_ops = {
 	.get_frame	= net2272_get_frame,
@@ -1500,8 +1499,7 @@ stop_activity(struct net2272 *dev, struct usb_gadget_driver *driver)
 	net2272_usb_reinit(dev);
 }
 
-static int net2272_stop(struct usb_gadget *_gadget,
-		struct usb_gadget_driver *driver)
+static int net2272_stop(struct usb_gadget *_gadget)
 {
 	struct net2272 *dev;
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/net2280.c b/drivers/usb/gadget/udc/net2280.c
index a30df8efa2d8..bd03a1b4192d 100644
--- a/drivers/usb/gadget/udc/net2280.c
+++ b/drivers/usb/gadget/udc/net2280.c
@@ -1548,8 +1548,7 @@ static int net2280_pullup(struct usb_gadget *_gadget, int is_on)
 
 static int net2280_start(struct usb_gadget *_gadget,
 		struct usb_gadget_driver *driver);
-static int net2280_stop(struct usb_gadget *_gadget,
-		struct usb_gadget_driver *driver);
+static int net2280_stop(struct usb_gadget *_gadget);
 
 static const struct usb_gadget_ops net2280_ops = {
 	.get_frame	= net2280_get_frame,
@@ -2432,8 +2431,7 @@ static void stop_activity(struct net2280 *dev, struct usb_gadget_driver *driver)
 	usb_reinit(dev);
 }
 
-static int net2280_stop(struct usb_gadget *_gadget,
-		struct usb_gadget_driver *driver)
+static int net2280_stop(struct usb_gadget *_gadget)
 {
 	struct net2280	*dev;
 	unsigned long	flags;
diff --git a/drivers/usb/gadget/udc/omap_udc.c b/drivers/usb/gadget/udc/omap_udc.c
index dcdfea46003b..534b85c07feb 100644
--- a/drivers/usb/gadget/udc/omap_udc.c
+++ b/drivers/usb/gadget/udc/omap_udc.c
@@ -1311,8 +1311,7 @@ static int omap_pullup(struct usb_gadget *gadget, int is_on)
 
 static int omap_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int omap_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
+static int omap_udc_stop(struct usb_gadget *g);
 
 static const struct usb_gadget_ops omap_gadget_ops = {
 	.get_frame		= omap_get_frame,
@@ -2102,8 +2101,7 @@ done:
 	return status;
 }
 
-static int omap_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int omap_udc_stop(struct usb_gadget *g)
 {
 	unsigned long	flags;
 	int		status = -ENODEV;
diff --git a/drivers/usb/gadget/udc/pch_udc.c b/drivers/usb/gadget/udc/pch_udc.c
index ccbe3d4a2a50..6534f36866dc 100644
--- a/drivers/usb/gadget/udc/pch_udc.c
+++ b/drivers/usb/gadget/udc/pch_udc.c
@@ -1240,8 +1240,8 @@ static int pch_udc_pcd_vbus_draw(struct usb_gadget *gadget, unsigned int mA)
 
 static int pch_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int pch_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
+static int pch_udc_stop(struct usb_gadget *g);
+
 static const struct usb_gadget_ops pch_udc_ops = {
 	.get_frame = pch_udc_pcd_get_frame,
 	.wakeup = pch_udc_pcd_wakeup,
@@ -3008,8 +3008,7 @@ static int pch_udc_start(struct usb_gadget *g,
 	return 0;
 }
 
-static int pch_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int pch_udc_stop(struct usb_gadget *g)
 {
 	struct pch_udc_dev	*dev = to_pch_udc(g);
 
diff --git a/drivers/usb/gadget/udc/pxa25x_udc.c b/drivers/usb/gadget/udc/pxa25x_udc.c
index 098fa57c3b80..2944092cd92e 100644
--- a/drivers/usb/gadget/udc/pxa25x_udc.c
+++ b/drivers/usb/gadget/udc/pxa25x_udc.c
@@ -998,8 +998,7 @@ static int pxa25x_udc_vbus_draw(struct usb_gadget *_gadget, unsigned mA)
 
 static int pxa25x_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int pxa25x_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
+static int pxa25x_udc_stop(struct usb_gadget *g);
 
 static const struct usb_gadget_ops pxa25x_udc_ops = {
 	.get_frame	= pxa25x_udc_get_frame,
@@ -1311,8 +1310,7 @@ stop_activity(struct pxa25x_udc *dev, struct usb_gadget_driver *driver)
 	udc_reinit(dev);
 }
 
-static int pxa25x_udc_stop(struct usb_gadget*g,
-		struct usb_gadget_driver *driver)
+static int pxa25x_udc_stop(struct usb_gadget*g)
 {
 	struct pxa25x_udc	*dev = to_pxa25x(g);
 
diff --git a/drivers/usb/gadget/udc/pxa27x_udc.c b/drivers/usb/gadget/udc/pxa27x_udc.c
index b90b1f308992..17a0193b5e01 100644
--- a/drivers/usb/gadget/udc/pxa27x_udc.c
+++ b/drivers/usb/gadget/udc/pxa27x_udc.c
@@ -1669,8 +1669,7 @@ static int pxa_udc_vbus_draw(struct usb_gadget *_gadget, unsigned mA)
 
 static int pxa27x_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int pxa27x_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
+static int pxa27x_udc_stop(struct usb_gadget *g);
 
 static const struct usb_gadget_ops pxa_udc_ops = {
 	.get_frame	= pxa_udc_get_frame,
@@ -1857,8 +1856,7 @@ static void stop_activity(struct pxa_udc *udc, struct usb_gadget_driver *driver)
  *
  * Returns 0 if no error, -ENODEV, -EINVAL otherwise
  */
-static int pxa27x_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int pxa27x_udc_stop(struct usb_gadget *g)
 {
 	struct pxa_udc *udc = to_pxa(g);
 
diff --git a/drivers/usb/gadget/udc/r8a66597-udc.c b/drivers/usb/gadget/udc/r8a66597-udc.c
index f8186613b53e..b63a527baf13 100644
--- a/drivers/usb/gadget/udc/r8a66597-udc.c
+++ b/drivers/usb/gadget/udc/r8a66597-udc.c
@@ -1763,8 +1763,7 @@ static int r8a66597_start(struct usb_gadget *gadget,
 	return 0;
 }
 
-static int r8a66597_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
+static int r8a66597_stop(struct usb_gadget *gadget)
 {
 	struct r8a66597 *r8a66597 = gadget_to_r8a66597(gadget);
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/s3c-hsudc.c b/drivers/usb/gadget/udc/s3c-hsudc.c
index de95e9dd957f..97d3a9144381 100644
--- a/drivers/usb/gadget/udc/s3c-hsudc.c
+++ b/drivers/usb/gadget/udc/s3c-hsudc.c
@@ -1188,8 +1188,7 @@ err_supplies:
 	return ret;
 }
 
-static int s3c_hsudc_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
+static int s3c_hsudc_stop(struct usb_gadget *gadget)
 {
 	struct s3c_hsudc *hsudc = to_hsudc(gadget);
 	unsigned long flags;
diff --git a/drivers/usb/gadget/udc/s3c2410_udc.c b/drivers/usb/gadget/udc/s3c2410_udc.c
index ff423d15beff..2a8e36d31488 100644
--- a/drivers/usb/gadget/udc/s3c2410_udc.c
+++ b/drivers/usb/gadget/udc/s3c2410_udc.c
@@ -1541,8 +1541,7 @@ static int s3c2410_vbus_draw(struct usb_gadget *_gadget, unsigned ma)
 
 static int s3c2410_udc_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int s3c2410_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
+static int s3c2410_udc_stop(struct usb_gadget *g);
 
 static const struct usb_gadget_ops s3c2410_ops = {
 	.get_frame		= s3c2410_udc_get_frame,
@@ -1683,8 +1682,7 @@ static int s3c2410_udc_start(struct usb_gadget *g,
 	return 0;
 }
 
-static int s3c2410_udc_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int s3c2410_udc_stop(struct usb_gadget *g)
 {
 	struct s3c2410_udc *udc = to_s3c2410(g);
 
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index 52f457bb0bbc..135504b60425 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -204,7 +204,7 @@ static inline int usb_gadget_udc_start(struct usb_udc *udc)
  */
 static inline void usb_gadget_udc_stop(struct usb_udc *udc)
 {
-	udc->gadget->ops->udc_stop(udc->gadget, udc->driver);
+	udc->gadget->ops->udc_stop(udc->gadget);
 }
 
 /**
diff --git a/drivers/usb/gadget/udc/udc-xilinx.c b/drivers/usb/gadget/udc/udc-xilinx.c
index ed27e1687a4e..1eac56fc384d 100644
--- a/drivers/usb/gadget/udc/udc-xilinx.c
+++ b/drivers/usb/gadget/udc/udc-xilinx.c
@@ -1403,8 +1403,7 @@ err:
  *
  * Return: zero always
  */
-static int xudc_stop(struct usb_gadget *gadget,
-		     struct usb_gadget_driver *driver)
+static int xudc_stop(struct usb_gadget *gadget)
 {
 	struct xusb_udc *udc = to_udc(gadget);
 	unsigned long flags;
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 88d63e0c79f5..4ab1896957e1 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1684,8 +1684,7 @@ static int musb_gadget_pullup(struct usb_gadget *gadget, int is_on)
 
 static int musb_gadget_start(struct usb_gadget *g,
 		struct usb_gadget_driver *driver);
-static int musb_gadget_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver);
+static int musb_gadget_stop(struct usb_gadget *g);
 
 static const struct usb_gadget_ops musb_gadget_operations = {
 	.get_frame		= musb_gadget_get_frame,
@@ -1923,8 +1922,7 @@ static void stop_activity(struct musb *musb, struct usb_gadget_driver *driver)
  *
  * @param driver the gadget driver to unregister
  */
-static int musb_gadget_stop(struct usb_gadget *g,
-		struct usb_gadget_driver *driver)
+static int musb_gadget_stop(struct usb_gadget *g)
 {
 	struct musb	*musb = gadget_to_musb(g);
 	unsigned long	flags;
diff --git a/drivers/usb/renesas_usbhs/mod_gadget.c b/drivers/usb/renesas_usbhs/mod_gadget.c
index 2d17c10a0428..7a4521030505 100644
--- a/drivers/usb/renesas_usbhs/mod_gadget.c
+++ b/drivers/usb/renesas_usbhs/mod_gadget.c
@@ -851,8 +851,7 @@ static int usbhsg_gadget_start(struct usb_gadget *gadget,
 	return usbhsg_try_start(priv, USBHSG_STATUS_REGISTERD);
 }
 
-static int usbhsg_gadget_stop(struct usb_gadget *gadget,
-		struct usb_gadget_driver *driver)
+static int usbhsg_gadget_stop(struct usb_gadget *gadget)
 {
 	struct usbhsg_gpriv *gpriv = usbhsg_gadget_to_gpriv(gadget);
 	struct usbhs_priv *priv = usbhsg_gpriv_to_priv(gpriv);
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 522cafe26790..70965fc829d3 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -490,8 +490,7 @@ struct usb_gadget_ops {
 	void	(*get_config_params)(struct usb_dcd_config_params *);
 	int	(*udc_start)(struct usb_gadget *,
 			struct usb_gadget_driver *);
-	int	(*udc_stop)(struct usb_gadget *,
-			struct usb_gadget_driver *);
+	int	(*udc_stop)(struct usb_gadget *);
 };
 
 /**
-- 
cgit v1.2.3


From 02e8c966274f1049cca8d3f17092f8275979b8eb Mon Sep 17 00:00:00 2001
From: Felipe Balbi <balbi@ti.com>
Date: Fri, 17 Oct 2014 18:57:06 -0500
Subject: usb: gadget: udc: core: prepend udc_attach_driver with usb_

No functional changes, just adding a prefix
which should have been there from the start.

Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/gadget/configfs.c     | 2 +-
 drivers/usb/gadget/udc/udc-core.c | 4 ++--
 include/linux/usb/gadget.h        | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/gadget/configfs.c b/drivers/usb/gadget/configfs.c
index d25f9f3dfea0..75648145dc1b 100644
--- a/drivers/usb/gadget/configfs.c
+++ b/drivers/usb/gadget/configfs.c
@@ -271,7 +271,7 @@ static ssize_t gadget_dev_desc_UDC_store(struct gadget_info *gi,
 			ret = -EBUSY;
 			goto err;
 		}
-		ret = udc_attach_driver(name, &gi->composite.gadget_driver);
+		ret = usb_udc_attach_driver(name, &gi->composite.gadget_driver);
 		if (ret)
 			goto err;
 		gi->udc_name = name;
diff --git a/drivers/usb/gadget/udc/udc-core.c b/drivers/usb/gadget/udc/udc-core.c
index 135504b60425..e31d574d8860 100644
--- a/drivers/usb/gadget/udc/udc-core.c
+++ b/drivers/usb/gadget/udc/udc-core.c
@@ -411,7 +411,7 @@ err1:
 	return ret;
 }
 
-int udc_attach_driver(const char *name, struct usb_gadget_driver *driver)
+int usb_udc_attach_driver(const char *name, struct usb_gadget_driver *driver)
 {
 	struct usb_udc *udc = NULL;
 	int ret = -ENODEV;
@@ -435,7 +435,7 @@ out:
 	mutex_unlock(&udc_lock);
 	return ret;
 }
-EXPORT_SYMBOL_GPL(udc_attach_driver);
+EXPORT_SYMBOL_GPL(usb_udc_attach_driver);
 
 int usb_gadget_probe_driver(struct usb_gadget_driver *driver)
 {
diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h
index 70965fc829d3..70ddb3943b62 100644
--- a/include/linux/usb/gadget.h
+++ b/include/linux/usb/gadget.h
@@ -924,7 +924,7 @@ extern int usb_add_gadget_udc_release(struct device *parent,
 		struct usb_gadget *gadget, void (*release)(struct device *dev));
 extern int usb_add_gadget_udc(struct device *parent, struct usb_gadget *gadget);
 extern void usb_del_gadget_udc(struct usb_gadget *gadget);
-extern int udc_attach_driver(const char *name,
+extern int usb_udc_attach_driver(const char *name,
 		struct usb_gadget_driver *driver);
 
 /*-------------------------------------------------------------------------*/
-- 
cgit v1.2.3


From e47d92545c2972bcf3711e7db80f481e402163c7 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Thu, 30 Oct 2014 18:41:13 +0100
Subject: usb: move the OTG state from the USB PHY to the OTG structure

Before using the PHY framework instead of the USB PHY one, we need to
move the OTG state into another place, since it won't be available when
USB PHY isn't used. This patch moves the OTG state into the OTG
structure, and makes all the needed modifications in the drivers
using the OTG state.

[ balbi@ti.com : fix build regressions with phy-tahvo.c, musb_dsps.c,
		phy-isp1301-omap, and chipidea's debug.c ]

Acked-by: Kishon Vijay Abraham I <kishon@ti.com>
Acked-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/phy/phy-omap-usb2.c         |  8 +---
 drivers/usb/chipidea/debug.c        |  2 +-
 drivers/usb/chipidea/otg_fsm.c      | 12 ++---
 drivers/usb/common/usb-otg-fsm.c    |  8 ++--
 drivers/usb/host/ohci-omap.c        |  2 +-
 drivers/usb/musb/am35x.c            | 28 +++++------
 drivers/usb/musb/blackfin.c         | 18 +++----
 drivers/usb/musb/da8xx.c            | 28 +++++------
 drivers/usb/musb/davinci.c          | 18 +++----
 drivers/usb/musb/musb_core.c        | 94 ++++++++++++++++++-------------------
 drivers/usb/musb/musb_dsps.c        | 28 +++++------
 drivers/usb/musb/musb_gadget.c      | 36 +++++++-------
 drivers/usb/musb/musb_host.c        |  8 ++--
 drivers/usb/musb/musb_virthub.c     | 22 ++++-----
 drivers/usb/musb/omap2430.c         | 30 ++++++------
 drivers/usb/musb/tusb6010.c         | 40 ++++++++--------
 drivers/usb/musb/ux500.c            | 10 ++--
 drivers/usb/phy/phy-ab8500-usb.c    | 10 ++--
 drivers/usb/phy/phy-fsl-usb.c       | 10 ++--
 drivers/usb/phy/phy-generic.c       |  4 +-
 drivers/usb/phy/phy-gpio-vbus-usb.c | 10 ++--
 drivers/usb/phy/phy-isp1301-omap.c  | 94 ++++++++++++++++++-------------------
 drivers/usb/phy/phy-msm-usb.c       | 34 +++++++-------
 drivers/usb/phy/phy-mv-usb.c        | 46 +++++++++---------
 drivers/usb/phy/phy-tahvo.c         | 24 +++++-----
 include/linux/usb/otg.h             |  2 +
 include/linux/usb/phy.h             |  1 -
 27 files changed, 312 insertions(+), 315 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c
index 8c842980834a..9f4093590f4c 100644
--- a/drivers/phy/phy-omap-usb2.c
+++ b/drivers/phy/phy-omap-usb2.c
@@ -80,11 +80,9 @@ static int omap_usb_start_srp(struct usb_otg *otg)
 
 static int omap_usb_set_host(struct usb_otg *otg, struct usb_bus *host)
 {
-	struct usb_phy	*phy = otg->phy;
-
 	otg->host = host;
 	if (!host)
-		phy->state = OTG_STATE_UNDEFINED;
+		otg->state = OTG_STATE_UNDEFINED;
 
 	return 0;
 }
@@ -92,11 +90,9 @@ static int omap_usb_set_host(struct usb_otg *otg, struct usb_bus *host)
 static int omap_usb_set_peripheral(struct usb_otg *otg,
 		struct usb_gadget *gadget)
 {
-	struct usb_phy	*phy = otg->phy;
-
 	otg->gadget = gadget;
 	if (!gadget)
-		phy->state = OTG_STATE_UNDEFINED;
+		otg->state = OTG_STATE_UNDEFINED;
 
 	return 0;
 }
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index 795d6538d630..f038804d13dd 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -220,7 +220,7 @@ static int ci_otg_show(struct seq_file *s, void *unused)
 
 	/* ------ State ----- */
 	seq_printf(s, "OTG state: %s\n\n",
-		usb_otg_state_string(ci->transceiver->state));
+		usb_otg_state_string(ci->transceiver->otg->state));
 
 	/* ------ State Machine Variables ----- */
 	seq_printf(s, "a_bus_drop: %d\n", fsm->a_bus_drop);
diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c
index caaabc58021e..8cb2508a6b71 100644
--- a/drivers/usb/chipidea/otg_fsm.c
+++ b/drivers/usb/chipidea/otg_fsm.c
@@ -328,7 +328,7 @@ static void b_ssend_srp_tmout_func(void *ptr, unsigned long indicator)
 	set_tmout(ci, indicator);
 
 	/* only vbus fall below B_sess_vld in b_idle state */
-	if (ci->transceiver->state == OTG_STATE_B_IDLE)
+	if (ci->fsm.otg->state == OTG_STATE_B_IDLE)
 		ci_otg_queue_work(ci);
 }
 
@@ -582,11 +582,11 @@ int ci_otg_fsm_work(struct ci_hdrc *ci)
 	 * when there is no gadget class driver
 	 */
 	if (ci->fsm.id && !(ci->driver) &&
-		ci->transceiver->state < OTG_STATE_A_IDLE)
+		ci->fsm.otg->state < OTG_STATE_A_IDLE)
 		return 0;
 
 	if (otg_statemachine(&ci->fsm)) {
-		if (ci->transceiver->state == OTG_STATE_A_IDLE) {
+		if (ci->fsm.otg->state == OTG_STATE_A_IDLE) {
 			/*
 			 * Further state change for cases:
 			 * a_idle to b_idle; or
@@ -600,7 +600,7 @@ int ci_otg_fsm_work(struct ci_hdrc *ci)
 				ci_otg_queue_work(ci);
 			if (ci->id_event)
 				ci->id_event = false;
-		} else if (ci->transceiver->state == OTG_STATE_B_IDLE) {
+		} else if (ci->fsm.otg->state == OTG_STATE_B_IDLE) {
 			if (ci->fsm.b_sess_vld) {
 				ci->fsm.power_up = 0;
 				/*
@@ -627,7 +627,7 @@ static void ci_otg_fsm_event(struct ci_hdrc *ci)
 	otg_bsess_vld = hw_read_otgsc(ci, OTGSC_BSV);
 	port_conn = hw_read(ci, OP_PORTSC, PORTSC_CCS);
 
-	switch (ci->transceiver->state) {
+	switch (ci->fsm.otg->state) {
 	case OTG_STATE_A_WAIT_BCON:
 		if (port_conn) {
 			fsm->b_conn = 1;
@@ -794,7 +794,7 @@ int ci_hdrc_otg_fsm_init(struct ci_hdrc *ci)
 	ci->transceiver->otg = ci->fsm.otg;
 	ci->fsm.power_up = 1;
 	ci->fsm.id = hw_read_otgsc(ci, OTGSC_ID) ? 1 : 0;
-	ci->transceiver->state = OTG_STATE_UNDEFINED;
+	ci->fsm.otg->state = OTG_STATE_UNDEFINED;
 	ci->fsm.ops = &ci_otg_ops;
 
 	mutex_init(&ci->fsm.lock);
diff --git a/drivers/usb/common/usb-otg-fsm.c b/drivers/usb/common/usb-otg-fsm.c
index 98e8340a5bb1..c6b35b77dab7 100644
--- a/drivers/usb/common/usb-otg-fsm.c
+++ b/drivers/usb/common/usb-otg-fsm.c
@@ -124,10 +124,10 @@ static void otg_leave_state(struct otg_fsm *fsm, enum usb_otg_state old_state)
 static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state)
 {
 	state_changed = 1;
-	if (fsm->otg->phy->state == new_state)
+	if (fsm->otg->state == new_state)
 		return 0;
 	VDBG("Set state: %s\n", usb_otg_state_string(new_state));
-	otg_leave_state(fsm, fsm->otg->phy->state);
+	otg_leave_state(fsm, fsm->otg->state);
 	switch (new_state) {
 	case OTG_STATE_B_IDLE:
 		otg_drv_vbus(fsm, 0);
@@ -236,7 +236,7 @@ static int otg_set_state(struct otg_fsm *fsm, enum usb_otg_state new_state)
 		break;
 	}
 
-	fsm->otg->phy->state = new_state;
+	fsm->otg->state = new_state;
 	return 0;
 }
 
@@ -247,7 +247,7 @@ int otg_statemachine(struct otg_fsm *fsm)
 
 	mutex_lock(&fsm->lock);
 
-	state = fsm->otg->phy->state;
+	state = fsm->otg->state;
 	state_changed = 0;
 	/* State machine state change judgement */
 
diff --git a/drivers/usb/host/ohci-omap.c b/drivers/usb/host/ohci-omap.c
index 0231606d47c2..cf89b4b17f14 100644
--- a/drivers/usb/host/ohci-omap.c
+++ b/drivers/usb/host/ohci-omap.c
@@ -183,7 +183,7 @@ static void start_hnp(struct ohci_hcd *ohci)
 	otg_start_hnp(hcd->usb_phy->otg);
 
 	local_irq_save(flags);
-	hcd->usb_phy->state = OTG_STATE_A_SUSPEND;
+	hcd->usb_phy->otg.state = OTG_STATE_A_SUSPEND;
 	writel (RH_PS_PSS, &ohci->regs->roothub.portstatus [port]);
 	l = omap_readl(OTG_CTRL);
 	l &= ~OTG_A_BUSREQ;
diff --git a/drivers/usb/musb/am35x.c b/drivers/usb/musb/am35x.c
index a2735df24cc6..d836a3e1f8ec 100644
--- a/drivers/usb/musb/am35x.c
+++ b/drivers/usb/musb/am35x.c
@@ -149,25 +149,25 @@ static void otg_timer(unsigned long _musb)
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
 	dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl,
-		usb_otg_state_string(musb->xceiv->state));
+		usb_otg_state_string(musb->xceiv->otg->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_WAIT_BCON:
 		devctl &= ~MUSB_DEVCTL_SESSION;
 		musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
 
 		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 		if (devctl & MUSB_DEVCTL_BDEVICE) {
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			MUSB_DEV_MODE(musb);
 		} else {
-			musb->xceiv->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 			MUSB_HST_MODE(musb);
 		}
 		break;
 	case OTG_STATE_A_WAIT_VFALL:
-		musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 		musb_writel(musb->ctrl_base, CORE_INTR_SRC_SET_REG,
 			    MUSB_INTR_VBUSERROR << AM35X_INTR_USB_SHIFT);
 		break;
@@ -176,7 +176,7 @@ static void otg_timer(unsigned long _musb)
 		if (devctl & MUSB_DEVCTL_BDEVICE)
 			mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 		else
-			musb->xceiv->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		break;
 	default:
 		break;
@@ -193,9 +193,9 @@ static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout)
 
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || (musb->a_wait_bcon == 0 &&
-				musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) {
+				musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)) {
 		dev_dbg(musb->controller, "%s active, deleting timer\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 		del_timer(&otg_workaround);
 		last_timer = jiffies;
 		return;
@@ -208,7 +208,7 @@ static void am35x_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 		jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&otg_workaround, timeout);
 }
@@ -278,27 +278,27 @@ static irqreturn_t am35x_musb_interrupt(int irq, void *hci)
 			 * devctl.
 			 */
 			musb->int_usb &= ~MUSB_INTR_VBUSERROR;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
 			mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 			WARNING("VBUS error workaround (delay coming)\n");
 		} else if (drvvbus) {
 			MUSB_HST_MODE(musb);
 			otg->default_a = 1;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
 			del_timer(&otg_workaround);
 		} else {
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
 			otg->default_a = 0;
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			portstate(musb->port1_status &= ~USB_PORT_STAT_POWER);
 		}
 
 		/* NOTE: this must complete power-on within 100 ms. */
 		dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				usb_otg_state_string(musb->xceiv->state),
+				usb_otg_state_string(musb->xceiv->otg->state),
 				err ? " ERROR" : "",
 				devctl);
 		ret = IRQ_HANDLED;
@@ -324,7 +324,7 @@ eoi:
 	}
 
 	/* Poll for ID change */
-	if (musb->xceiv->state == OTG_STATE_B_IDLE)
+	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE)
 		mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 
 	spin_unlock_irqrestore(&musb->lock, flags);
diff --git a/drivers/usb/musb/blackfin.c b/drivers/usb/musb/blackfin.c
index 8554c6f7ab72..f23ce40b4d64 100644
--- a/drivers/usb/musb/blackfin.c
+++ b/drivers/usb/musb/blackfin.c
@@ -185,8 +185,8 @@ static irqreturn_t blackfin_interrupt(int irq, void *__hci)
 	}
 
 	/* Start sampling ID pin, when plug is removed from MUSB */
-	if ((musb->xceiv->state == OTG_STATE_B_IDLE
-		|| musb->xceiv->state == OTG_STATE_A_WAIT_BCON) ||
+	if ((musb->xceiv->otg->state == OTG_STATE_B_IDLE
+		|| musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON) ||
 		(musb->int_usb & MUSB_INTR_DISCONNECT && is_host_active(musb))) {
 		mod_timer(&musb_conn_timer, jiffies + TIMER_DELAY);
 		musb->a_wait_bcon = TIMER_DELAY;
@@ -205,7 +205,7 @@ static void musb_conn_timer_handler(unsigned long _musb)
 	static u8 toggle;
 
 	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_IDLE:
 	case OTG_STATE_A_WAIT_BCON:
 		/* Start a new session */
@@ -219,7 +219,7 @@ static void musb_conn_timer_handler(unsigned long _musb)
 
 		if (!(val & MUSB_DEVCTL_BDEVICE)) {
 			gpio_set_value(musb->config->gpio_vrsel, 1);
-			musb->xceiv->state = OTG_STATE_A_WAIT_BCON;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
 		} else {
 			gpio_set_value(musb->config->gpio_vrsel, 0);
 			/* Ignore VBUSERROR and SUSPEND IRQ */
@@ -229,7 +229,7 @@ static void musb_conn_timer_handler(unsigned long _musb)
 
 			val = MUSB_INTR_SUSPEND | MUSB_INTR_VBUSERROR;
 			musb_writeb(musb->mregs, MUSB_INTRUSB, val);
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		}
 		mod_timer(&musb_conn_timer, jiffies + TIMER_DELAY);
 		break;
@@ -245,7 +245,7 @@ static void musb_conn_timer_handler(unsigned long _musb)
 
 		if (!(val & MUSB_DEVCTL_BDEVICE)) {
 			gpio_set_value(musb->config->gpio_vrsel, 1);
-			musb->xceiv->state = OTG_STATE_A_WAIT_BCON;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
 		} else {
 			gpio_set_value(musb->config->gpio_vrsel, 0);
 
@@ -280,13 +280,13 @@ static void musb_conn_timer_handler(unsigned long _musb)
 		break;
 	default:
 		dev_dbg(musb->controller, "%s state not handled\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 		break;
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
 
 	dev_dbg(musb->controller, "state is %s\n",
-		usb_otg_state_string(musb->xceiv->state));
+		usb_otg_state_string(musb->xceiv->otg->state));
 }
 
 static void bfin_musb_enable(struct musb *musb)
@@ -307,7 +307,7 @@ static void bfin_musb_set_vbus(struct musb *musb, int is_on)
 
 	dev_dbg(musb->controller, "VBUS %s, devctl %02x "
 		/* otg %3x conf %08x prcm %08x */ "\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
diff --git a/drivers/usb/musb/da8xx.c b/drivers/usb/musb/da8xx.c
index 058775e647ad..527c7fe60ece 100644
--- a/drivers/usb/musb/da8xx.c
+++ b/drivers/usb/musb/da8xx.c
@@ -198,20 +198,20 @@ static void otg_timer(unsigned long _musb)
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
 	dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl,
-		usb_otg_state_string(musb->xceiv->state));
+		usb_otg_state_string(musb->xceiv->otg->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_WAIT_BCON:
 		devctl &= ~MUSB_DEVCTL_SESSION;
 		musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
 
 		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 		if (devctl & MUSB_DEVCTL_BDEVICE) {
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			MUSB_DEV_MODE(musb);
 		} else {
-			musb->xceiv->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 			MUSB_HST_MODE(musb);
 		}
 		break;
@@ -226,7 +226,7 @@ static void otg_timer(unsigned long _musb)
 			mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 			break;
 		}
-		musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 		musb_writel(musb->ctrl_base, DA8XX_USB_INTR_SRC_SET_REG,
 			    MUSB_INTR_VBUSERROR << DA8XX_INTR_USB_SHIFT);
 		break;
@@ -248,7 +248,7 @@ static void otg_timer(unsigned long _musb)
 		if (devctl & MUSB_DEVCTL_BDEVICE)
 			mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 		else
-			musb->xceiv->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		break;
 	default:
 		break;
@@ -265,9 +265,9 @@ static void da8xx_musb_try_idle(struct musb *musb, unsigned long timeout)
 
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || (musb->a_wait_bcon == 0 &&
-				musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) {
+				musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)) {
 		dev_dbg(musb->controller, "%s active, deleting timer\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 		del_timer(&otg_workaround);
 		last_timer = jiffies;
 		return;
@@ -280,7 +280,7 @@ static void da8xx_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 		jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&otg_workaround, timeout);
 }
@@ -341,26 +341,26 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
 			 * devctl.
 			 */
 			musb->int_usb &= ~MUSB_INTR_VBUSERROR;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
 			mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 			WARNING("VBUS error workaround (delay coming)\n");
 		} else if (drvvbus) {
 			MUSB_HST_MODE(musb);
 			otg->default_a = 1;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
 			del_timer(&otg_workaround);
 		} else {
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
 			otg->default_a = 0;
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			portstate(musb->port1_status &= ~USB_PORT_STAT_POWER);
 		}
 
 		dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				usb_otg_state_string(musb->xceiv->state),
+				usb_otg_state_string(musb->xceiv->otg->state),
 				err ? " ERROR" : "",
 				devctl);
 		ret = IRQ_HANDLED;
@@ -375,7 +375,7 @@ static irqreturn_t da8xx_musb_interrupt(int irq, void *hci)
 		musb_writel(reg_base, DA8XX_USB_END_OF_INTR_REG, 0);
 
 	/* Poll for ID change */
-	if (musb->xceiv->state == OTG_STATE_B_IDLE)
+	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE)
 		mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 
 	spin_unlock_irqrestore(&musb->lock, flags);
diff --git a/drivers/usb/musb/davinci.c b/drivers/usb/musb/davinci.c
index 04de3aca938d..3c1d9b211b51 100644
--- a/drivers/usb/musb/davinci.c
+++ b/drivers/usb/musb/davinci.c
@@ -214,10 +214,10 @@ static void otg_timer(unsigned long _musb)
 	 */
 	devctl = musb_readb(mregs, MUSB_DEVCTL);
 	dev_dbg(musb->controller, "poll devctl %02x (%s)\n", devctl,
-		usb_otg_state_string(musb->xceiv->state));
+		usb_otg_state_string(musb->xceiv->otg->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_WAIT_VFALL:
 		/* Wait till VBUS falls below SessionEnd (~0.2V); the 1.3 RTL
 		 * seems to mis-handle session "start" otherwise (or in our
@@ -228,7 +228,7 @@ static void otg_timer(unsigned long _musb)
 			mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 			break;
 		}
-		musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 		musb_writel(musb->ctrl_base, DAVINCI_USB_INT_SET_REG,
 			MUSB_INTR_VBUSERROR << DAVINCI_USB_USBINT_SHIFT);
 		break;
@@ -251,7 +251,7 @@ static void otg_timer(unsigned long _musb)
 		if (devctl & MUSB_DEVCTL_BDEVICE)
 			mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 		else
-			musb->xceiv->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		break;
 	default:
 		break;
@@ -325,20 +325,20 @@ static irqreturn_t davinci_musb_interrupt(int irq, void *__hci)
 			 * to stop registering in devctl.
 			 */
 			musb->int_usb &= ~MUSB_INTR_VBUSERROR;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
 			mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 			WARNING("VBUS error workaround (delay coming)\n");
 		} else if (drvvbus) {
 			MUSB_HST_MODE(musb);
 			otg->default_a = 1;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			portstate(musb->port1_status |= USB_PORT_STAT_POWER);
 			del_timer(&otg_workaround);
 		} else {
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
 			otg->default_a = 0;
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			portstate(musb->port1_status &= ~USB_PORT_STAT_POWER);
 		}
 
@@ -348,7 +348,7 @@ static irqreturn_t davinci_musb_interrupt(int irq, void *__hci)
 		davinci_musb_source_power(musb, drvvbus, 0);
 		dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				usb_otg_state_string(musb->xceiv->state),
+				usb_otg_state_string(musb->xceiv->otg->state),
 				err ? " ERROR" : "",
 				devctl);
 		retval = IRQ_HANDLED;
@@ -361,7 +361,7 @@ static irqreturn_t davinci_musb_interrupt(int irq, void *__hci)
 	musb_writel(tibase, DAVINCI_USB_EOI_REG, 0);
 
 	/* poll for ID change */
-	if (musb->xceiv->state == OTG_STATE_B_IDLE)
+	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE)
 		mod_timer(&otg_workaround, jiffies + POLL_SECONDS * HZ);
 
 	spin_unlock_irqrestore(&musb->lock, flags);
diff --git a/drivers/usb/musb/musb_core.c b/drivers/usb/musb/musb_core.c
index e46e295a383a..df9c5fa2772e 100644
--- a/drivers/usb/musb/musb_core.c
+++ b/drivers/usb/musb/musb_core.c
@@ -360,23 +360,23 @@ static void musb_otg_timer_func(unsigned long data)
 	unsigned long	flags;
 
 	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_B_WAIT_ACON:
 		dev_dbg(musb->controller, "HNP: b_wait_acon timeout; back to b_peripheral\n");
 		musb_g_disconnect(musb);
-		musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 		musb->is_active = 0;
 		break;
 	case OTG_STATE_A_SUSPEND:
 	case OTG_STATE_A_WAIT_BCON:
 		dev_dbg(musb->controller, "HNP: %s timeout\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 		musb_platform_set_vbus(musb, 0);
-		musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
 		break;
 	default:
 		dev_dbg(musb->controller, "HNP: Unhandled mode %s\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 	}
 	spin_unlock_irqrestore(&musb->lock, flags);
 }
@@ -391,19 +391,19 @@ void musb_hnp_stop(struct musb *musb)
 	u8	reg;
 
 	dev_dbg(musb->controller, "HNP: stop from %s\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_PERIPHERAL:
 		musb_g_disconnect(musb);
 		dev_dbg(musb->controller, "HNP: back to %s\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 		break;
 	case OTG_STATE_B_HOST:
 		dev_dbg(musb->controller, "HNP: Disabling HR\n");
 		if (hcd)
 			hcd->self.is_b_host = 0;
-		musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 		MUSB_DEV_MODE(musb);
 		reg = musb_readb(mbase, MUSB_POWER);
 		reg |= MUSB_POWER_SUSPENDM;
@@ -412,7 +412,7 @@ void musb_hnp_stop(struct musb *musb)
 		break;
 	default:
 		dev_dbg(musb->controller, "HNP: Stopping in unknown state %s\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 	}
 
 	/*
@@ -449,13 +449,13 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 	 */
 	if (int_usb & MUSB_INTR_RESUME) {
 		handled = IRQ_HANDLED;
-		dev_dbg(musb->controller, "RESUME (%s)\n", usb_otg_state_string(musb->xceiv->state));
+		dev_dbg(musb->controller, "RESUME (%s)\n", usb_otg_state_string(musb->xceiv->otg->state));
 
 		if (devctl & MUSB_DEVCTL_HM) {
 			void __iomem *mbase = musb->mregs;
 			u8 power;
 
-			switch (musb->xceiv->state) {
+			switch (musb->xceiv->otg->state) {
 			case OTG_STATE_A_SUSPEND:
 				/* remote wakeup?  later, GetPortStatus
 				 * will stop RESUME signaling
@@ -482,25 +482,25 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 					&musb->finish_resume_work,
 					msecs_to_jiffies(20));
 
-				musb->xceiv->state = OTG_STATE_A_HOST;
+				musb->xceiv->otg->state = OTG_STATE_A_HOST;
 				musb->is_active = 1;
 				musb_host_resume_root_hub(musb);
 				break;
 			case OTG_STATE_B_WAIT_ACON:
-				musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+				musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 				musb->is_active = 1;
 				MUSB_DEV_MODE(musb);
 				break;
 			default:
 				WARNING("bogus %s RESUME (%s)\n",
 					"host",
-					usb_otg_state_string(musb->xceiv->state));
+					usb_otg_state_string(musb->xceiv->otg->state));
 			}
 		} else {
-			switch (musb->xceiv->state) {
+			switch (musb->xceiv->otg->state) {
 			case OTG_STATE_A_SUSPEND:
 				/* possibly DISCONNECT is upcoming */
-				musb->xceiv->state = OTG_STATE_A_HOST;
+				musb->xceiv->otg->state = OTG_STATE_A_HOST;
 				musb_host_resume_root_hub(musb);
 				break;
 			case OTG_STATE_B_WAIT_ACON:
@@ -523,7 +523,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			default:
 				WARNING("bogus %s RESUME (%s)\n",
 					"peripheral",
-					usb_otg_state_string(musb->xceiv->state));
+					usb_otg_state_string(musb->xceiv->otg->state));
 			}
 		}
 	}
@@ -539,7 +539,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 		}
 
 		dev_dbg(musb->controller, "SESSION_REQUEST (%s)\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 
 		/* IRQ arrives from ID pin sense or (later, if VBUS power
 		 * is removed) SRP.  responses are time critical:
@@ -550,7 +550,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 		 */
 		musb_writeb(mbase, MUSB_DEVCTL, MUSB_DEVCTL_SESSION);
 		musb->ep0_stage = MUSB_EP0_START;
-		musb->xceiv->state = OTG_STATE_A_IDLE;
+		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		MUSB_HST_MODE(musb);
 		musb_platform_set_vbus(musb, 1);
 
@@ -576,7 +576,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 		 * REVISIT:  do delays from lots of DEBUG_KERNEL checks
 		 * make trouble here, keeping VBUS < 4.4V ?
 		 */
-		switch (musb->xceiv->state) {
+		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_A_HOST:
 			/* recovery is dicey once we've gotten past the
 			 * initial stages of enumeration, but if VBUS
@@ -605,7 +605,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 
 		dev_printk(ignore ? KERN_DEBUG : KERN_ERR, musb->controller,
 				"VBUS_ERROR in %s (%02x, %s), retry #%d, port1 %08x\n",
-				usb_otg_state_string(musb->xceiv->state),
+				usb_otg_state_string(musb->xceiv->otg->state),
 				devctl,
 				({ char *s;
 				switch (devctl & MUSB_DEVCTL_VBUS) {
@@ -630,10 +630,10 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 
 	if (int_usb & MUSB_INTR_SUSPEND) {
 		dev_dbg(musb->controller, "SUSPEND (%s) devctl %02x\n",
-			usb_otg_state_string(musb->xceiv->state), devctl);
+			usb_otg_state_string(musb->xceiv->otg->state), devctl);
 		handled = IRQ_HANDLED;
 
-		switch (musb->xceiv->state) {
+		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_A_PERIPHERAL:
 			/* We also come here if the cable is removed, since
 			 * this silicon doesn't report ID-no-longer-grounded.
@@ -657,7 +657,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			musb_g_suspend(musb);
 			musb->is_active = musb->g.b_hnp_enable;
 			if (musb->is_active) {
-				musb->xceiv->state = OTG_STATE_B_WAIT_ACON;
+				musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON;
 				dev_dbg(musb->controller, "HNP: Setting timer for b_ase0_brst\n");
 				mod_timer(&musb->otg_timer, jiffies
 					+ msecs_to_jiffies(
@@ -670,7 +670,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 					+ msecs_to_jiffies(musb->a_wait_bcon));
 			break;
 		case OTG_STATE_A_HOST:
-			musb->xceiv->state = OTG_STATE_A_SUSPEND;
+			musb->xceiv->otg->state = OTG_STATE_A_SUSPEND;
 			musb->is_active = musb->hcd->self.b_hnp_enable;
 			break;
 		case OTG_STATE_B_HOST:
@@ -713,7 +713,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 			musb->port1_status |= USB_PORT_STAT_LOW_SPEED;
 
 		/* indicate new connection to OTG machine */
-		switch (musb->xceiv->state) {
+		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_B_PERIPHERAL:
 			if (int_usb & MUSB_INTR_SUSPEND) {
 				dev_dbg(musb->controller, "HNP: SUSPEND+CONNECT, now b_host\n");
@@ -725,7 +725,7 @@ static irqreturn_t musb_stage0_irq(struct musb *musb, u8 int_usb,
 		case OTG_STATE_B_WAIT_ACON:
 			dev_dbg(musb->controller, "HNP: CONNECT, now b_host\n");
 b_host:
-			musb->xceiv->state = OTG_STATE_B_HOST;
+			musb->xceiv->otg->state = OTG_STATE_B_HOST;
 			if (musb->hcd)
 				musb->hcd->self.is_b_host = 1;
 			del_timer(&musb->otg_timer);
@@ -733,7 +733,7 @@ b_host:
 		default:
 			if ((devctl & MUSB_DEVCTL_VBUS)
 					== (3 << MUSB_DEVCTL_VBUS_SHIFT)) {
-				musb->xceiv->state = OTG_STATE_A_HOST;
+				musb->xceiv->otg->state = OTG_STATE_A_HOST;
 				if (hcd)
 					hcd->self.is_b_host = 0;
 			}
@@ -743,16 +743,16 @@ b_host:
 		musb_host_poke_root_hub(musb);
 
 		dev_dbg(musb->controller, "CONNECT (%s) devctl %02x\n",
-				usb_otg_state_string(musb->xceiv->state), devctl);
+				usb_otg_state_string(musb->xceiv->otg->state), devctl);
 	}
 
 	if (int_usb & MUSB_INTR_DISCONNECT) {
 		dev_dbg(musb->controller, "DISCONNECT (%s) as %s, devctl %02x\n",
-				usb_otg_state_string(musb->xceiv->state),
+				usb_otg_state_string(musb->xceiv->otg->state),
 				MUSB_MODE(musb), devctl);
 		handled = IRQ_HANDLED;
 
-		switch (musb->xceiv->state) {
+		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_A_HOST:
 		case OTG_STATE_A_SUSPEND:
 			musb_host_resume_root_hub(musb);
@@ -770,7 +770,7 @@ b_host:
 			musb_root_disconnect(musb);
 			if (musb->hcd)
 				musb->hcd->self.is_b_host = 0;
-			musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 			MUSB_DEV_MODE(musb);
 			musb_g_disconnect(musb);
 			break;
@@ -786,7 +786,7 @@ b_host:
 			break;
 		default:
 			WARNING("unhandled DISCONNECT transition (%s)\n",
-				usb_otg_state_string(musb->xceiv->state));
+				usb_otg_state_string(musb->xceiv->otg->state));
 			break;
 		}
 	}
@@ -812,15 +812,15 @@ b_host:
 			}
 		} else {
 			dev_dbg(musb->controller, "BUS RESET as %s\n",
-				usb_otg_state_string(musb->xceiv->state));
-			switch (musb->xceiv->state) {
+				usb_otg_state_string(musb->xceiv->otg->state));
+			switch (musb->xceiv->otg->state) {
 			case OTG_STATE_A_SUSPEND:
 				musb_g_reset(musb);
 				/* FALLTHROUGH */
 			case OTG_STATE_A_WAIT_BCON:	/* OPT TD.4.7-900ms */
 				/* never use invalid T(a_wait_bcon) */
 				dev_dbg(musb->controller, "HNP: in %s, %d msec timeout\n",
-					usb_otg_state_string(musb->xceiv->state),
+					usb_otg_state_string(musb->xceiv->otg->state),
 					TA_WAIT_BCON(musb));
 				mod_timer(&musb->otg_timer, jiffies
 					+ msecs_to_jiffies(TA_WAIT_BCON(musb)));
@@ -831,19 +831,19 @@ b_host:
 				break;
 			case OTG_STATE_B_WAIT_ACON:
 				dev_dbg(musb->controller, "HNP: RESET (%s), to b_peripheral\n",
-					usb_otg_state_string(musb->xceiv->state));
-				musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+					usb_otg_state_string(musb->xceiv->otg->state));
+				musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 				musb_g_reset(musb);
 				break;
 			case OTG_STATE_B_IDLE:
-				musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+				musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 				/* FALLTHROUGH */
 			case OTG_STATE_B_PERIPHERAL:
 				musb_g_reset(musb);
 				break;
 			default:
 				dev_dbg(musb->controller, "Unhandled BUS RESET as %s\n",
-					usb_otg_state_string(musb->xceiv->state));
+					usb_otg_state_string(musb->xceiv->otg->state));
 			}
 		}
 	}
@@ -1630,7 +1630,7 @@ musb_mode_show(struct device *dev, struct device_attribute *attr, char *buf)
 	int ret = -EINVAL;
 
 	spin_lock_irqsave(&musb->lock, flags);
-	ret = sprintf(buf, "%s\n", usb_otg_state_string(musb->xceiv->state));
+	ret = sprintf(buf, "%s\n", usb_otg_state_string(musb->xceiv->otg->state));
 	spin_unlock_irqrestore(&musb->lock, flags);
 
 	return ret;
@@ -1675,7 +1675,7 @@ musb_vbus_store(struct device *dev, struct device_attribute *attr,
 	spin_lock_irqsave(&musb->lock, flags);
 	/* force T(a_wait_bcon) to be zero/unlimited *OR* valid */
 	musb->a_wait_bcon = val ? max_t(int, val, OTG_TIME_A_WAIT_BCON) : 0 ;
-	if (musb->xceiv->state == OTG_STATE_A_WAIT_BCON)
+	if (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)
 		musb->is_active = 0;
 	musb_platform_try_idle(musb, jiffies + msecs_to_jiffies(val));
 	spin_unlock_irqrestore(&musb->lock, flags);
@@ -1743,8 +1743,8 @@ static void musb_irq_work(struct work_struct *data)
 {
 	struct musb *musb = container_of(data, struct musb, irq_work);
 
-	if (musb->xceiv->state != musb->xceiv_old_state) {
-		musb->xceiv_old_state = musb->xceiv->state;
+	if (musb->xceiv->otg->state != musb->xceiv_old_state) {
+		musb->xceiv_old_state = musb->xceiv->otg->state;
 		sysfs_notify(&musb->controller->kobj, NULL, "mode");
 	}
 }
@@ -1983,10 +1983,10 @@ musb_init_controller(struct device *dev, int nIrq, void __iomem *ctrl)
 
 	if (musb->xceiv->otg->default_a) {
 		MUSB_HST_MODE(musb);
-		musb->xceiv->state = OTG_STATE_A_IDLE;
+		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 	} else {
 		MUSB_DEV_MODE(musb);
-		musb->xceiv->state = OTG_STATE_B_IDLE;
+		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 	}
 
 	switch (musb->port_mode) {
diff --git a/drivers/usb/musb/musb_dsps.c b/drivers/usb/musb/musb_dsps.c
index 759ef1d236b2..440333fcf3a7 100644
--- a/drivers/usb/musb/musb_dsps.c
+++ b/drivers/usb/musb/musb_dsps.c
@@ -179,9 +179,9 @@ static void dsps_musb_try_idle(struct musb *musb, unsigned long timeout)
 
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || (musb->a_wait_bcon == 0 &&
-				musb->xceiv->state == OTG_STATE_A_WAIT_BCON)) {
+			musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON)) {
 		dev_dbg(musb->controller, "%s active, deleting timer\n",
-				usb_otg_state_string(musb->xceiv->state));
+				usb_otg_state_string(musb->xceiv->otg->state));
 		del_timer(&glue->timer);
 		glue->last_timer = jiffies;
 		return;
@@ -201,7 +201,7 @@ static void dsps_musb_try_idle(struct musb *musb, unsigned long timeout)
 	glue->last_timer = timeout;
 
 	dev_dbg(musb->controller, "%s inactive, starting idle timer for %u ms\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 			jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&glue->timer, timeout);
 }
@@ -265,10 +265,10 @@ static void otg_timer(unsigned long _musb)
 	 */
 	devctl = dsps_readb(mregs, MUSB_DEVCTL);
 	dev_dbg(musb->controller, "Poll devctl %02x (%s)\n", devctl,
-				usb_otg_state_string(musb->xceiv->state));
+				usb_otg_state_string(musb->xceiv->otg->state));
 
 	spin_lock_irqsave(&musb->lock, flags);
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_WAIT_BCON:
 		dsps_writeb(musb->mregs, MUSB_DEVCTL, 0);
 		skip_session = 1;
@@ -277,10 +277,10 @@ static void otg_timer(unsigned long _musb)
 	case OTG_STATE_A_IDLE:
 	case OTG_STATE_B_IDLE:
 		if (devctl & MUSB_DEVCTL_BDEVICE) {
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			MUSB_DEV_MODE(musb);
 		} else {
-			musb->xceiv->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 			MUSB_HST_MODE(musb);
 		}
 		if (!(devctl & MUSB_DEVCTL_SESSION) && !skip_session)
@@ -288,7 +288,7 @@ static void otg_timer(unsigned long _musb)
 		mod_timer(&glue->timer, jiffies + wrp->poll_seconds * HZ);
 		break;
 	case OTG_STATE_A_WAIT_VFALL:
-		musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 		dsps_writel(musb->ctrl_base, wrp->coreintr_set,
 			    MUSB_INTR_VBUSERROR << wrp->usb_shift);
 		break;
@@ -373,26 +373,26 @@ static irqreturn_t dsps_interrupt(int irq, void *hci)
 			 * devctl.
 			 */
 			musb->int_usb &= ~MUSB_INTR_VBUSERROR;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
 			mod_timer(&glue->timer,
 					jiffies + wrp->poll_seconds * HZ);
 			WARNING("VBUS error workaround (delay coming)\n");
 		} else if (drvvbus) {
 			MUSB_HST_MODE(musb);
 			musb->xceiv->otg->default_a = 1;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			del_timer(&glue->timer);
 		} else {
 			musb->is_active = 0;
 			MUSB_DEV_MODE(musb);
 			musb->xceiv->otg->default_a = 0;
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		}
 
 		/* NOTE: this must complete power-on within 100 ms. */
 		dev_dbg(musb->controller, "VBUS %s (%s)%s, devctl %02x\n",
 				drvvbus ? "on" : "off",
-				usb_otg_state_string(musb->xceiv->state),
+				usb_otg_state_string(musb->xceiv->otg->state),
 				err ? " ERROR" : "",
 				devctl);
 		ret = IRQ_HANDLED;
@@ -402,7 +402,7 @@ static irqreturn_t dsps_interrupt(int irq, void *hci)
 		ret |= musb_interrupt(musb);
 
 	/* Poll for ID change in OTG port mode */
-	if (musb->xceiv->state == OTG_STATE_B_IDLE &&
+	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE &&
 			musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
 		mod_timer(&glue->timer, jiffies + wrp->poll_seconds * HZ);
 out:
@@ -900,7 +900,7 @@ static int dsps_resume(struct device *dev)
 	dsps_writel(mbase, wrp->mode, glue->context.mode);
 	dsps_writel(mbase, wrp->tx_mode, glue->context.tx_mode);
 	dsps_writel(mbase, wrp->rx_mode, glue->context.rx_mode);
-	if (musb->xceiv->state == OTG_STATE_B_IDLE &&
+	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE &&
 	    musb->port_mode == MUSB_PORT_MODE_DUAL_ROLE)
 		mod_timer(&glue->timer, jiffies + wrp->poll_seconds * HZ);
 
diff --git a/drivers/usb/musb/musb_gadget.c b/drivers/usb/musb/musb_gadget.c
index 4ab1896957e1..56c31b769a54 100644
--- a/drivers/usb/musb/musb_gadget.c
+++ b/drivers/usb/musb/musb_gadget.c
@@ -1546,7 +1546,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 
 	spin_lock_irqsave(&musb->lock, flags);
 
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_B_PERIPHERAL:
 		/* NOTE:  OTG state machine doesn't include B_SUSPENDED;
 		 * that's part of the standard usb 1.1 state machine, and
@@ -1587,7 +1587,7 @@ static int musb_gadget_wakeup(struct usb_gadget *gadget)
 		goto done;
 	default:
 		dev_dbg(musb->controller, "Unhandled wake: %s\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 		goto done;
 	}
 
@@ -1791,7 +1791,7 @@ int musb_gadget_setup(struct musb *musb)
 
 	MUSB_DEV_MODE(musb);
 	musb->xceiv->otg->default_a = 0;
-	musb->xceiv->state = OTG_STATE_B_IDLE;
+	musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 
 	/* this "gadget" abstracts/virtualizes the controller */
 	musb->g.name = musb_driver_name;
@@ -1857,7 +1857,7 @@ static int musb_gadget_start(struct usb_gadget *g,
 	musb->is_active = 1;
 
 	otg_set_peripheral(otg, &musb->g);
-	musb->xceiv->state = OTG_STATE_B_IDLE;
+	musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 	spin_unlock_irqrestore(&musb->lock, flags);
 
 	musb_start(musb);
@@ -1941,7 +1941,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
 
 	(void) musb_gadget_vbus_draw(&musb->g, 0);
 
-	musb->xceiv->state = OTG_STATE_UNDEFINED;
+	musb->xceiv->otg->state = OTG_STATE_UNDEFINED;
 	stop_activity(musb, NULL);
 	otg_set_peripheral(musb->xceiv->otg, NULL);
 
@@ -1968,7 +1968,7 @@ static int musb_gadget_stop(struct usb_gadget *g)
 void musb_g_resume(struct musb *musb)
 {
 	musb->is_suspended = 0;
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_B_IDLE:
 		break;
 	case OTG_STATE_B_WAIT_ACON:
@@ -1982,7 +1982,7 @@ void musb_g_resume(struct musb *musb)
 		break;
 	default:
 		WARNING("unhandled RESUME transition (%s)\n",
-				usb_otg_state_string(musb->xceiv->state));
+				usb_otg_state_string(musb->xceiv->otg->state));
 	}
 }
 
@@ -1994,10 +1994,10 @@ void musb_g_suspend(struct musb *musb)
 	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 	dev_dbg(musb->controller, "devctl %02x\n", devctl);
 
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_B_IDLE:
 		if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS)
-			musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+			musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 		break;
 	case OTG_STATE_B_PERIPHERAL:
 		musb->is_suspended = 1;
@@ -2012,7 +2012,7 @@ void musb_g_suspend(struct musb *musb)
 		 * A_PERIPHERAL may need care too
 		 */
 		WARNING("unhandled SUSPEND transition (%s)\n",
-				usb_otg_state_string(musb->xceiv->state));
+				usb_otg_state_string(musb->xceiv->otg->state));
 	}
 }
 
@@ -2043,22 +2043,22 @@ void musb_g_disconnect(struct musb *musb)
 		spin_lock(&musb->lock);
 	}
 
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	default:
 		dev_dbg(musb->controller, "Unhandled disconnect %s, setting a_idle\n",
-			usb_otg_state_string(musb->xceiv->state));
-		musb->xceiv->state = OTG_STATE_A_IDLE;
+			usb_otg_state_string(musb->xceiv->otg->state));
+		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		MUSB_HST_MODE(musb);
 		break;
 	case OTG_STATE_A_PERIPHERAL:
-		musb->xceiv->state = OTG_STATE_A_WAIT_BCON;
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
 		MUSB_HST_MODE(musb);
 		break;
 	case OTG_STATE_B_WAIT_ACON:
 	case OTG_STATE_B_HOST:
 	case OTG_STATE_B_PERIPHERAL:
 	case OTG_STATE_B_IDLE:
-		musb->xceiv->state = OTG_STATE_B_IDLE;
+		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		break;
 	case OTG_STATE_B_SRP_INIT:
 		break;
@@ -2118,13 +2118,13 @@ __acquires(musb->lock)
 		 * In that case, do not rely on devctl for setting
 		 * peripheral mode.
 		 */
-		musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 		musb->g.is_a_peripheral = 0;
 	} else if (devctl & MUSB_DEVCTL_BDEVICE) {
-		musb->xceiv->state = OTG_STATE_B_PERIPHERAL;
+		musb->xceiv->otg->state = OTG_STATE_B_PERIPHERAL;
 		musb->g.is_a_peripheral = 0;
 	} else {
-		musb->xceiv->state = OTG_STATE_A_PERIPHERAL;
+		musb->xceiv->otg->state = OTG_STATE_A_PERIPHERAL;
 		musb->g.is_a_peripheral = 1;
 	}
 
diff --git a/drivers/usb/musb/musb_host.c b/drivers/usb/musb/musb_host.c
index 855793d701bb..23d474d3d7f4 100644
--- a/drivers/usb/musb/musb_host.c
+++ b/drivers/usb/musb/musb_host.c
@@ -2463,7 +2463,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 	if (!is_host_active(musb))
 		return 0;
 
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_SUSPEND:
 		return 0;
 	case OTG_STATE_A_WAIT_VRISE:
@@ -2473,7 +2473,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 		 */
 		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 		if ((devctl & MUSB_DEVCTL_VBUS) == MUSB_DEVCTL_VBUS)
-			musb->xceiv->state = OTG_STATE_A_WAIT_BCON;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
 		break;
 	default:
 		break;
@@ -2481,7 +2481,7 @@ static int musb_bus_suspend(struct usb_hcd *hcd)
 
 	if (musb->is_active) {
 		WARNING("trying to suspend as %s while active\n",
-				usb_otg_state_string(musb->xceiv->state));
+				usb_otg_state_string(musb->xceiv->otg->state));
 		return -EBUSY;
 	} else
 		return 0;
@@ -2678,7 +2678,7 @@ int musb_host_setup(struct musb *musb, int power_budget)
 
 	MUSB_HST_MODE(musb);
 	musb->xceiv->otg->default_a = 1;
-	musb->xceiv->state = OTG_STATE_A_IDLE;
+	musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 
 	otg_set_host(musb->xceiv->otg, &hcd->self);
 	hcd->self.otg_port = 1;
diff --git a/drivers/usb/musb/musb_virthub.c b/drivers/usb/musb/musb_virthub.c
index e2d2d8c9891b..a133bd8c5dc7 100644
--- a/drivers/usb/musb/musb_virthub.c
+++ b/drivers/usb/musb/musb_virthub.c
@@ -69,7 +69,7 @@ void musb_host_finish_resume(struct work_struct *work)
 	musb->port1_status |= USB_PORT_STAT_C_SUSPEND << 16;
 	usb_hcd_poll_rh_status(musb->hcd);
 	/* NOTE: it might really be A_WAIT_BCON ... */
-	musb->xceiv->state = OTG_STATE_A_HOST;
+	musb->xceiv->otg->state = OTG_STATE_A_HOST;
 
 	spin_unlock_irqrestore(&musb->lock, flags);
 }
@@ -107,9 +107,9 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
 		dev_dbg(musb->controller, "Root port suspended, power %02x\n", power);
 
 		musb->port1_status |= USB_PORT_STAT_SUSPEND;
-		switch (musb->xceiv->state) {
+		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_A_HOST:
-			musb->xceiv->state = OTG_STATE_A_SUSPEND;
+			musb->xceiv->otg->state = OTG_STATE_A_SUSPEND;
 			musb->is_active = otg->host->b_hnp_enable;
 			if (musb->is_active)
 				mod_timer(&musb->otg_timer, jiffies
@@ -118,13 +118,13 @@ void musb_port_suspend(struct musb *musb, bool do_suspend)
 			musb_platform_try_idle(musb, 0);
 			break;
 		case OTG_STATE_B_HOST:
-			musb->xceiv->state = OTG_STATE_B_WAIT_ACON;
+			musb->xceiv->otg->state = OTG_STATE_B_WAIT_ACON;
 			musb->is_active = otg->host->b_hnp_enable;
 			musb_platform_try_idle(musb, 0);
 			break;
 		default:
 			dev_dbg(musb->controller, "bogus rh suspend? %s\n",
-				usb_otg_state_string(musb->xceiv->state));
+				usb_otg_state_string(musb->xceiv->otg->state));
 		}
 	} else if (power & MUSB_POWER_SUSPENDM) {
 		power &= ~MUSB_POWER_SUSPENDM;
@@ -145,7 +145,7 @@ void musb_port_reset(struct musb *musb, bool do_reset)
 	u8		power;
 	void __iomem	*mbase = musb->mregs;
 
-	if (musb->xceiv->state == OTG_STATE_B_IDLE) {
+	if (musb->xceiv->otg->state == OTG_STATE_B_IDLE) {
 		dev_dbg(musb->controller, "HNP: Returning from HNP; no hub reset from b_idle\n");
 		musb->port1_status &= ~USB_PORT_STAT_RESET;
 		return;
@@ -224,24 +224,24 @@ void musb_root_disconnect(struct musb *musb)
 	usb_hcd_poll_rh_status(musb->hcd);
 	musb->is_active = 0;
 
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_SUSPEND:
 		if (otg->host->b_hnp_enable) {
-			musb->xceiv->state = OTG_STATE_A_PERIPHERAL;
+			musb->xceiv->otg->state = OTG_STATE_A_PERIPHERAL;
 			musb->g.is_a_peripheral = 1;
 			break;
 		}
 		/* FALLTHROUGH */
 	case OTG_STATE_A_HOST:
-		musb->xceiv->state = OTG_STATE_A_WAIT_BCON;
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
 		musb->is_active = 0;
 		break;
 	case OTG_STATE_A_WAIT_VFALL:
-		musb->xceiv->state = OTG_STATE_B_IDLE;
+		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		break;
 	default:
 		dev_dbg(musb->controller, "host disconnect (%s)\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 	}
 }
 
diff --git a/drivers/usb/musb/omap2430.c b/drivers/usb/musb/omap2430.c
index 20fc2a532a24..763649eb4987 100644
--- a/drivers/usb/musb/omap2430.c
+++ b/drivers/usb/musb/omap2430.c
@@ -65,15 +65,15 @@ static void musb_do_idle(unsigned long _musb)
 
 	spin_lock_irqsave(&musb->lock, flags);
 
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_WAIT_BCON:
 
 		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 		if (devctl & MUSB_DEVCTL_BDEVICE) {
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			MUSB_DEV_MODE(musb);
 		} else {
-			musb->xceiv->state = OTG_STATE_A_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 			MUSB_HST_MODE(musb);
 		}
 		break;
@@ -90,15 +90,15 @@ static void musb_do_idle(unsigned long _musb)
 			musb->port1_status |= USB_PORT_STAT_C_SUSPEND << 16;
 			usb_hcd_poll_rh_status(musb->hcd);
 			/* NOTE: it might really be A_WAIT_BCON ... */
-			musb->xceiv->state = OTG_STATE_A_HOST;
+			musb->xceiv->otg->state = OTG_STATE_A_HOST;
 		}
 		break;
 	case OTG_STATE_A_HOST:
 		devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 		if (devctl &  MUSB_DEVCTL_BDEVICE)
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		else
-			musb->xceiv->state = OTG_STATE_A_WAIT_BCON;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
 	default:
 		break;
 	}
@@ -116,9 +116,9 @@ static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
 
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || ((musb->a_wait_bcon == 0)
-			&& (musb->xceiv->state == OTG_STATE_A_WAIT_BCON))) {
+			&& (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON))) {
 		dev_dbg(musb->controller, "%s active, deleting timer\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 		del_timer(&musb_idle_timer);
 		last_timer = jiffies;
 		return;
@@ -135,7 +135,7 @@ static void omap2430_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	dev_dbg(musb->controller, "%s inactive, for idle timer for %lu ms\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 		(unsigned long)jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&musb_idle_timer, timeout);
 }
@@ -153,7 +153,7 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 
 	if (is_on) {
-		if (musb->xceiv->state == OTG_STATE_A_IDLE) {
+		if (musb->xceiv->otg->state == OTG_STATE_A_IDLE) {
 			int loops = 100;
 			/* start the session */
 			devctl |= MUSB_DEVCTL_SESSION;
@@ -180,7 +180,7 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 		} else {
 			musb->is_active = 1;
 			otg->default_a = 1;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			devctl |= MUSB_DEVCTL_SESSION;
 			MUSB_HST_MODE(musb);
 		}
@@ -192,7 +192,7 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 		 */
 
 		otg->default_a = 0;
-		musb->xceiv->state = OTG_STATE_B_IDLE;
+		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		devctl &= ~MUSB_DEVCTL_SESSION;
 
 		MUSB_DEV_MODE(musb);
@@ -201,7 +201,7 @@ static void omap2430_musb_set_vbus(struct musb *musb, int is_on)
 
 	dev_dbg(musb->controller, "VBUS %s, devctl %02x "
 		/* otg %3x conf %08x prcm %08x */ "\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
@@ -266,7 +266,7 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		dev_dbg(dev, "ID GND\n");
 
 		otg->default_a = true;
-		musb->xceiv->state = OTG_STATE_A_IDLE;
+		musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 		musb->xceiv->last_event = USB_EVENT_ID;
 		if (musb->gadget_driver) {
 			pm_runtime_get_sync(dev);
@@ -280,7 +280,7 @@ static void omap_musb_set_mailbox(struct omap2430_glue *glue)
 		dev_dbg(dev, "VBUS Connect\n");
 
 		otg->default_a = false;
-		musb->xceiv->state = OTG_STATE_B_IDLE;
+		musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		musb->xceiv->last_event = USB_EVENT_VBUS;
 		if (musb->gadget_driver)
 			pm_runtime_get_sync(dev);
diff --git a/drivers/usb/musb/tusb6010.c b/drivers/usb/musb/tusb6010.c
index 25f02dfc8955..69ca92d6032f 100644
--- a/drivers/usb/musb/tusb6010.c
+++ b/drivers/usb/musb/tusb6010.c
@@ -415,13 +415,13 @@ static void musb_do_idle(unsigned long _musb)
 
 	spin_lock_irqsave(&musb->lock, flags);
 
-	switch (musb->xceiv->state) {
+	switch (musb->xceiv->otg->state) {
 	case OTG_STATE_A_WAIT_BCON:
 		if ((musb->a_wait_bcon != 0)
 			&& (musb->idle_timeout == 0
 				|| time_after(jiffies, musb->idle_timeout))) {
 			dev_dbg(musb->controller, "Nothing connected %s, turning off VBUS\n",
-					usb_otg_state_string(musb->xceiv->state));
+					usb_otg_state_string(musb->xceiv->otg->state));
 		}
 		/* FALLTHROUGH */
 	case OTG_STATE_A_IDLE:
@@ -474,9 +474,9 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout)
 
 	/* Never idle if active, or when VBUS timeout is not set as host */
 	if (musb->is_active || ((musb->a_wait_bcon == 0)
-			&& (musb->xceiv->state == OTG_STATE_A_WAIT_BCON))) {
+			&& (musb->xceiv->otg->state == OTG_STATE_A_WAIT_BCON))) {
 		dev_dbg(musb->controller, "%s active, deleting timer\n",
-			usb_otg_state_string(musb->xceiv->state));
+			usb_otg_state_string(musb->xceiv->otg->state));
 		del_timer(&musb_idle_timer);
 		last_timer = jiffies;
 		return;
@@ -493,7 +493,7 @@ static void tusb_musb_try_idle(struct musb *musb, unsigned long timeout)
 	last_timer = timeout;
 
 	dev_dbg(musb->controller, "%s inactive, for idle timer for %lu ms\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 		(unsigned long)jiffies_to_msecs(timeout - jiffies));
 	mod_timer(&musb_idle_timer, timeout);
 }
@@ -524,7 +524,7 @@ static void tusb_musb_set_vbus(struct musb *musb, int is_on)
 	if (is_on) {
 		timer = OTG_TIMER_MS(OTG_TIME_A_WAIT_VRISE);
 		otg->default_a = 1;
-		musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+		musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 		devctl |= MUSB_DEVCTL_SESSION;
 
 		conf |= TUSB_DEV_CONF_USB_HOST_MODE;
@@ -537,16 +537,16 @@ static void tusb_musb_set_vbus(struct musb *musb, int is_on)
 		/* If ID pin is grounded, we want to be a_idle */
 		otg_stat = musb_readl(tbase, TUSB_DEV_OTG_STAT);
 		if (!(otg_stat & TUSB_DEV_OTG_STAT_ID_STATUS)) {
-			switch (musb->xceiv->state) {
+			switch (musb->xceiv->otg->state) {
 			case OTG_STATE_A_WAIT_VRISE:
 			case OTG_STATE_A_WAIT_BCON:
-				musb->xceiv->state = OTG_STATE_A_WAIT_VFALL;
+				musb->xceiv->otg->state = OTG_STATE_A_WAIT_VFALL;
 				break;
 			case OTG_STATE_A_WAIT_VFALL:
-				musb->xceiv->state = OTG_STATE_A_IDLE;
+				musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 				break;
 			default:
-				musb->xceiv->state = OTG_STATE_A_IDLE;
+				musb->xceiv->otg->state = OTG_STATE_A_IDLE;
 			}
 			musb->is_active = 0;
 			otg->default_a = 1;
@@ -554,7 +554,7 @@ static void tusb_musb_set_vbus(struct musb *musb, int is_on)
 		} else {
 			musb->is_active = 0;
 			otg->default_a = 0;
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 			MUSB_DEV_MODE(musb);
 		}
 
@@ -569,7 +569,7 @@ static void tusb_musb_set_vbus(struct musb *musb, int is_on)
 	musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
 
 	dev_dbg(musb->controller, "VBUS %s, devctl %02x otg %3x conf %08x prcm %08x\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL),
 		musb_readl(tbase, TUSB_DEV_OTG_STAT),
 		conf, prcm);
@@ -668,23 +668,23 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
 
 			if (otg_stat & TUSB_DEV_OTG_STAT_SESS_END) {
 				dev_dbg(musb->controller, "Forcing disconnect (no interrupt)\n");
-				if (musb->xceiv->state != OTG_STATE_B_IDLE) {
+				if (musb->xceiv->otg->state != OTG_STATE_B_IDLE) {
 					/* INTR_DISCONNECT can hide... */
-					musb->xceiv->state = OTG_STATE_B_IDLE;
+					musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 					musb->int_usb |= MUSB_INTR_DISCONNECT;
 				}
 				musb->is_active = 0;
 			}
 			dev_dbg(musb->controller, "vbus change, %s, otg %03x\n",
-				usb_otg_state_string(musb->xceiv->state), otg_stat);
+				usb_otg_state_string(musb->xceiv->otg->state), otg_stat);
 			idle_timeout = jiffies + (1 * HZ);
 			schedule_work(&musb->irq_work);
 
 		} else /* A-dev state machine */ {
 			dev_dbg(musb->controller, "vbus change, %s, otg %03x\n",
-				usb_otg_state_string(musb->xceiv->state), otg_stat);
+				usb_otg_state_string(musb->xceiv->otg->state), otg_stat);
 
-			switch (musb->xceiv->state) {
+			switch (musb->xceiv->otg->state) {
 			case OTG_STATE_A_IDLE:
 				dev_dbg(musb->controller, "Got SRP, turning on VBUS\n");
 				musb_platform_set_vbus(musb, 1);
@@ -731,9 +731,9 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
 		u8	devctl;
 
 		dev_dbg(musb->controller, "%s timer, %03x\n",
-			usb_otg_state_string(musb->xceiv->state), otg_stat);
+			usb_otg_state_string(musb->xceiv->otg->state), otg_stat);
 
-		switch (musb->xceiv->state) {
+		switch (musb->xceiv->otg->state) {
 		case OTG_STATE_A_WAIT_VRISE:
 			/* VBUS has probably been valid for a while now,
 			 * but may well have bounced out of range a bit
@@ -745,7 +745,7 @@ tusb_otg_ints(struct musb *musb, u32 int_src, void __iomem *tbase)
 					dev_dbg(musb->controller, "devctl %02x\n", devctl);
 					break;
 				}
-				musb->xceiv->state = OTG_STATE_A_WAIT_BCON;
+				musb->xceiv->otg->state = OTG_STATE_A_WAIT_BCON;
 				musb->is_active = 0;
 				idle_timeout = jiffies
 					+ msecs_to_jiffies(musb->a_wait_bcon);
diff --git a/drivers/usb/musb/ux500.c b/drivers/usb/musb/ux500.c
index d0180a70051d..1d631d5f4a27 100644
--- a/drivers/usb/musb/ux500.c
+++ b/drivers/usb/musb/ux500.c
@@ -56,7 +56,7 @@ static void ux500_musb_set_vbus(struct musb *musb, int is_on)
 	devctl = musb_readb(musb->mregs, MUSB_DEVCTL);
 
 	if (is_on) {
-		if (musb->xceiv->state == OTG_STATE_A_IDLE) {
+		if (musb->xceiv->otg->state == OTG_STATE_A_IDLE) {
 			/* start the session */
 			devctl |= MUSB_DEVCTL_SESSION;
 			musb_writeb(musb->mregs, MUSB_DEVCTL, devctl);
@@ -76,7 +76,7 @@ static void ux500_musb_set_vbus(struct musb *musb, int is_on)
 		} else {
 			musb->is_active = 1;
 			musb->xceiv->otg->default_a = 1;
-			musb->xceiv->state = OTG_STATE_A_WAIT_VRISE;
+			musb->xceiv->otg->state = OTG_STATE_A_WAIT_VRISE;
 			devctl |= MUSB_DEVCTL_SESSION;
 			MUSB_HST_MODE(musb);
 		}
@@ -102,7 +102,7 @@ static void ux500_musb_set_vbus(struct musb *musb, int is_on)
 		mdelay(200);
 
 	dev_dbg(musb->controller, "VBUS %s, devctl %02x\n",
-		usb_otg_state_string(musb->xceiv->state),
+		usb_otg_state_string(musb->xceiv->otg->state),
 		musb_readb(musb->mregs, MUSB_DEVCTL));
 }
 
@@ -112,7 +112,7 @@ static int musb_otg_notifications(struct notifier_block *nb,
 	struct musb *musb = container_of(nb, struct musb, nb);
 
 	dev_dbg(musb->controller, "musb_otg_notifications %ld %s\n",
-			event, usb_otg_state_string(musb->xceiv->state));
+			event, usb_otg_state_string(musb->xceiv->otg->state));
 
 	switch (event) {
 	case UX500_MUSB_ID:
@@ -127,7 +127,7 @@ static int musb_otg_notifications(struct notifier_block *nb,
 		if (is_host_active(musb))
 			ux500_musb_set_vbus(musb, 0);
 		else
-			musb->xceiv->state = OTG_STATE_B_IDLE;
+			musb->xceiv->otg->state = OTG_STATE_B_IDLE;
 		break;
 	default:
 		dev_dbg(musb->controller, "ID float\n");
diff --git a/drivers/usb/phy/phy-ab8500-usb.c b/drivers/usb/phy/phy-ab8500-usb.c
index 11ab2c45e462..2d5250143ce1 100644
--- a/drivers/usb/phy/phy-ab8500-usb.c
+++ b/drivers/usb/phy/phy-ab8500-usb.c
@@ -446,7 +446,7 @@ static int ab9540_usb_link_status_update(struct ab8500_usb *ab,
 		if (event != UX500_MUSB_RIDB)
 			event = UX500_MUSB_NONE;
 		/* Fallback to default B_IDLE as nothing is connected. */
-		ab->phy.state = OTG_STATE_B_IDLE;
+		ab->phy.otg->state = OTG_STATE_B_IDLE;
 		break;
 
 	case USB_LINK_ACA_RID_C_NM_9540:
@@ -584,7 +584,7 @@ static int ab8540_usb_link_status_update(struct ab8500_usb *ab,
 		 * Fallback to default B_IDLE as nothing
 		 * is connected
 		 */
-		ab->phy.state = OTG_STATE_B_IDLE;
+		ab->phy.otg->state = OTG_STATE_B_IDLE;
 		break;
 
 	case USB_LINK_ACA_RID_C_NM_8540:
@@ -693,7 +693,7 @@ static int ab8505_usb_link_status_update(struct ab8500_usb *ab,
 		 * Fallback to default B_IDLE as nothing
 		 * is connected
 		 */
-		ab->phy.state = OTG_STATE_B_IDLE;
+		ab->phy.otg->state = OTG_STATE_B_IDLE;
 		break;
 
 	case USB_LINK_ACA_RID_C_NM_8505:
@@ -776,7 +776,7 @@ static int ab8500_usb_link_status_update(struct ab8500_usb *ab,
 		if (event != UX500_MUSB_RIDB)
 			event = UX500_MUSB_NONE;
 		/* Fallback to default B_IDLE as nothing is connected */
-		ab->phy.state = OTG_STATE_B_IDLE;
+		ab->phy.otg->state = OTG_STATE_B_IDLE;
 		break;
 
 	case USB_LINK_ACA_RID_C_NM_8500:
@@ -1380,7 +1380,7 @@ static int ab8500_usb_probe(struct platform_device *pdev)
 	ab->phy.label		= "ab8500";
 	ab->phy.set_suspend	= ab8500_usb_set_suspend;
 	ab->phy.set_power	= ab8500_usb_set_power;
-	ab->phy.state		= OTG_STATE_UNDEFINED;
+	ab->phy.otg->state	= OTG_STATE_UNDEFINED;
 
 	otg->phy		= &ab->phy;
 	otg->set_host		= ab8500_usb_set_host;
diff --git a/drivers/usb/phy/phy-fsl-usb.c b/drivers/usb/phy/phy-fsl-usb.c
index f1ea5990a50a..15d7a81eece5 100644
--- a/drivers/usb/phy/phy-fsl-usb.c
+++ b/drivers/usb/phy/phy-fsl-usb.c
@@ -623,7 +623,7 @@ static int fsl_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
 			/* Mini-A cable connected */
 			struct otg_fsm *fsm = &otg_dev->fsm;
 
-			otg->phy->state = OTG_STATE_UNDEFINED;
+			otg.state = OTG_STATE_UNDEFINED;
 			fsm->protocol = PROTO_UNDEF;
 		}
 	}
@@ -681,7 +681,7 @@ static int fsl_otg_set_power(struct usb_phy *phy, unsigned mA)
 {
 	if (!fsl_otg_dev)
 		return -ENODEV;
-	if (phy->state == OTG_STATE_B_PERIPHERAL)
+	if (phy->otg.state == OTG_STATE_B_PERIPHERAL)
 		pr_info("FSL OTG: Draw %d mA\n", mA);
 
 	return 0;
@@ -714,7 +714,7 @@ static int fsl_otg_start_srp(struct usb_otg *otg)
 {
 	struct fsl_otg *otg_dev;
 
-	if (!otg || otg->phy->state != OTG_STATE_B_IDLE)
+	if (!otg || otg.state != OTG_STATE_B_IDLE)
 		return -ENODEV;
 
 	otg_dev = container_of(otg->phy, struct fsl_otg, phy);
@@ -989,10 +989,10 @@ int usb_otg_start(struct platform_device *pdev)
 	 * Also: record initial state of ID pin
 	 */
 	if (fsl_readl(&p_otg->dr_mem_map->otgsc) & OTGSC_STS_USB_ID) {
-		p_otg->phy.state = OTG_STATE_UNDEFINED;
+		p_otg->phy->otg.state = OTG_STATE_UNDEFINED;
 		p_otg->fsm.id = 1;
 	} else {
-		p_otg->phy.state = OTG_STATE_A_IDLE;
+		p_otg->phy->otg.state = OTG_STATE_A_IDLE;
 		p_otg->fsm.id = 0;
 	}
 
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 7594e5069ae5..280a3458ff6b 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -123,7 +123,7 @@ static int nop_set_peripheral(struct usb_otg *otg, struct usb_gadget *gadget)
 	}
 
 	otg->gadget = gadget;
-	otg->phy->state = OTG_STATE_B_IDLE;
+	otg->state = OTG_STATE_B_IDLE;
 	return 0;
 }
 
@@ -225,9 +225,9 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop,
 	nop->phy.dev		= nop->dev;
 	nop->phy.label		= "nop-xceiv";
 	nop->phy.set_suspend	= nop_set_suspend;
-	nop->phy.state		= OTG_STATE_UNDEFINED;
 	nop->phy.type		= type;
 
+	nop->phy.otg->state		= OTG_STATE_UNDEFINED;
 	nop->phy.otg->phy		= &nop->phy;
 	nop->phy.otg->set_host		= nop_set_host;
 	nop->phy.otg->set_peripheral	= nop_set_peripheral;
diff --git a/drivers/usb/phy/phy-gpio-vbus-usb.c b/drivers/usb/phy/phy-gpio-vbus-usb.c
index f4b14bd97e14..7a6be3e5dc23 100644
--- a/drivers/usb/phy/phy-gpio-vbus-usb.c
+++ b/drivers/usb/phy/phy-gpio-vbus-usb.c
@@ -121,7 +121,7 @@ static void gpio_vbus_work(struct work_struct *work)
 
 	if (vbus) {
 		status = USB_EVENT_VBUS;
-		gpio_vbus->phy.state = OTG_STATE_B_PERIPHERAL;
+		gpio_vbus->phy.otg->state = OTG_STATE_B_PERIPHERAL;
 		gpio_vbus->phy.last_event = status;
 		usb_gadget_vbus_connect(gpio_vbus->phy.otg->gadget);
 
@@ -143,7 +143,7 @@ static void gpio_vbus_work(struct work_struct *work)
 
 		usb_gadget_vbus_disconnect(gpio_vbus->phy.otg->gadget);
 		status = USB_EVENT_NONE;
-		gpio_vbus->phy.state = OTG_STATE_B_IDLE;
+		gpio_vbus->phy.otg->state = OTG_STATE_B_IDLE;
 		gpio_vbus->phy.last_event = status;
 
 		atomic_notifier_call_chain(&gpio_vbus->phy.notifier,
@@ -196,7 +196,7 @@ static int gpio_vbus_set_peripheral(struct usb_otg *otg,
 		set_vbus_draw(gpio_vbus, 0);
 
 		usb_gadget_vbus_disconnect(otg->gadget);
-		otg->phy->state = OTG_STATE_UNDEFINED;
+		otg->state = OTG_STATE_UNDEFINED;
 
 		otg->gadget = NULL;
 		return 0;
@@ -218,7 +218,7 @@ static int gpio_vbus_set_power(struct usb_phy *phy, unsigned mA)
 
 	gpio_vbus = container_of(phy, struct gpio_vbus_data, phy);
 
-	if (phy->state == OTG_STATE_B_PERIPHERAL)
+	if (phy->otg->state == OTG_STATE_B_PERIPHERAL)
 		set_vbus_draw(gpio_vbus, mA);
 	return 0;
 }
@@ -269,8 +269,8 @@ static int gpio_vbus_probe(struct platform_device *pdev)
 	gpio_vbus->phy.dev = gpio_vbus->dev;
 	gpio_vbus->phy.set_power = gpio_vbus_set_power;
 	gpio_vbus->phy.set_suspend = gpio_vbus_set_suspend;
-	gpio_vbus->phy.state = OTG_STATE_UNDEFINED;
 
+	gpio_vbus->phy.otg->state = OTG_STATE_UNDEFINED;
 	gpio_vbus->phy.otg->phy = &gpio_vbus->phy;
 	gpio_vbus->phy.otg->set_peripheral = gpio_vbus_set_peripheral;
 
diff --git a/drivers/usb/phy/phy-isp1301-omap.c b/drivers/usb/phy/phy-isp1301-omap.c
index 8eea56d3ded6..24f84cbbed57 100644
--- a/drivers/usb/phy/phy-isp1301-omap.c
+++ b/drivers/usb/phy/phy-isp1301-omap.c
@@ -234,7 +234,7 @@ isp1301_clear_bits(struct isp1301 *isp, u8 reg, u8 bits)
 
 static inline const char *state_name(struct isp1301 *isp)
 {
-	return usb_otg_state_string(isp->phy.state);
+	return usb_otg_state_string(isp->phy.otg->state);
 }
 
 /*-------------------------------------------------------------------------*/
@@ -249,7 +249,7 @@ static inline const char *state_name(struct isp1301 *isp)
 
 static void power_down(struct isp1301 *isp)
 {
-	isp->phy.state = OTG_STATE_UNDEFINED;
+	isp->phy.otg->state = OTG_STATE_UNDEFINED;
 
 	// isp1301_set_bits(isp, ISP1301_MODE_CONTROL_2, MC2_GLOBAL_PWR_DN);
 	isp1301_set_bits(isp, ISP1301_MODE_CONTROL_1, MC1_SUSPEND);
@@ -339,7 +339,7 @@ static void a_idle(struct isp1301 *isp, const char *tag)
 {
 	u32 l;
 
-	if (isp->phy.state == OTG_STATE_A_IDLE)
+	if (isp->phy.otg->state == OTG_STATE_A_IDLE)
 		return;
 
 	isp->phy.otg->default_a = 1;
@@ -351,7 +351,7 @@ static void a_idle(struct isp1301 *isp, const char *tag)
 		isp->phy.otg->gadget->is_a_peripheral = 1;
 		gadget_suspend(isp);
 	}
-	isp->phy.state = OTG_STATE_A_IDLE;
+	isp->phy.otg->state = OTG_STATE_A_IDLE;
 	l = omap_readl(OTG_CTRL) & OTG_XCEIV_OUTPUTS;
 	omap_writel(l, OTG_CTRL);
 	isp->last_otg_ctrl = l;
@@ -363,7 +363,7 @@ static void b_idle(struct isp1301 *isp, const char *tag)
 {
 	u32 l;
 
-	if (isp->phy.state == OTG_STATE_B_IDLE)
+	if (isp->phy.otg->state == OTG_STATE_B_IDLE)
 		return;
 
 	isp->phy.otg->default_a = 0;
@@ -375,7 +375,7 @@ static void b_idle(struct isp1301 *isp, const char *tag)
 		isp->phy.otg->gadget->is_a_peripheral = 0;
 		gadget_suspend(isp);
 	}
-	isp->phy.state = OTG_STATE_B_IDLE;
+	isp->phy.otg->state = OTG_STATE_B_IDLE;
 	l = omap_readl(OTG_CTRL) & OTG_XCEIV_OUTPUTS;
 	omap_writel(l, OTG_CTRL);
 	isp->last_otg_ctrl = l;
@@ -474,7 +474,7 @@ static void check_state(struct isp1301 *isp, const char *tag)
 	default:
 		break;
 	}
-	if (isp->phy.state == state && !extra)
+	if (isp->phy.otg->state == state && !extra)
 		return;
 	pr_debug("otg: %s FSM %s/%02x, %s, %06x\n", tag,
 		usb_otg_state_string(state), fsm, state_name(isp),
@@ -498,23 +498,23 @@ static void update_otg1(struct isp1301 *isp, u8 int_src)
 
 	if (int_src & INTR_SESS_VLD)
 		otg_ctrl |= OTG_ASESSVLD;
-	else if (isp->phy.state == OTG_STATE_A_WAIT_VFALL) {
+	else if (isp->phy.otg->state == OTG_STATE_A_WAIT_VFALL) {
 		a_idle(isp, "vfall");
 		otg_ctrl &= ~OTG_CTRL_BITS;
 	}
 	if (int_src & INTR_VBUS_VLD)
 		otg_ctrl |= OTG_VBUSVLD;
 	if (int_src & INTR_ID_GND) {		/* default-A */
-		if (isp->phy.state == OTG_STATE_B_IDLE
-				|| isp->phy.state
+		if (isp->phy.otg->state == OTG_STATE_B_IDLE
+				|| isp->phy.otg->state
 					== OTG_STATE_UNDEFINED) {
 			a_idle(isp, "init");
 			return;
 		}
 	} else {				/* default-B */
 		otg_ctrl |= OTG_ID;
-		if (isp->phy.state == OTG_STATE_A_IDLE
-			|| isp->phy.state == OTG_STATE_UNDEFINED) {
+		if (isp->phy.otg->state == OTG_STATE_A_IDLE
+			|| isp->phy.otg->state == OTG_STATE_UNDEFINED) {
 			b_idle(isp, "init");
 			return;
 		}
@@ -548,14 +548,14 @@ static void otg_update_isp(struct isp1301 *isp)
 	isp->last_otg_ctrl = otg_ctrl;
 	otg_ctrl = otg_ctrl & OTG_XCEIV_INPUTS;
 
-	switch (isp->phy.state) {
+	switch (isp->phy.otg->state) {
 	case OTG_STATE_B_IDLE:
 	case OTG_STATE_B_PERIPHERAL:
 	case OTG_STATE_B_SRP_INIT:
 		if (!(otg_ctrl & OTG_PULLUP)) {
 			// if (otg_ctrl & OTG_B_HNPEN) {
 			if (isp->phy.otg->gadget->b_hnp_enable) {
-				isp->phy.state = OTG_STATE_B_WAIT_ACON;
+				isp->phy.otg->state = OTG_STATE_B_WAIT_ACON;
 				pr_debug("  --> b_wait_acon\n");
 			}
 			goto pulldown;
@@ -585,7 +585,7 @@ pulldown:
 	if (!(isp->phy.otg->host))
 		otg_ctrl &= ~OTG_DRV_VBUS;
 
-	switch (isp->phy.state) {
+	switch (isp->phy.otg->state) {
 	case OTG_STATE_A_SUSPEND:
 		if (otg_ctrl & OTG_DRV_VBUS) {
 			set |= OTG1_VBUS_DRV;
@@ -596,7 +596,7 @@ pulldown:
 
 		/* FALLTHROUGH */
 	case OTG_STATE_A_VBUS_ERR:
-		isp->phy.state = OTG_STATE_A_WAIT_VFALL;
+		isp->phy.otg->state = OTG_STATE_A_WAIT_VFALL;
 		pr_debug("  --> a_wait_vfall\n");
 		/* FALLTHROUGH */
 	case OTG_STATE_A_WAIT_VFALL:
@@ -605,7 +605,7 @@ pulldown:
 		break;
 	case OTG_STATE_A_IDLE:
 		if (otg_ctrl & OTG_DRV_VBUS) {
-			isp->phy.state = OTG_STATE_A_WAIT_VRISE;
+			isp->phy.otg->state = OTG_STATE_A_WAIT_VRISE;
 			pr_debug("  --> a_wait_vrise\n");
 		}
 		/* FALLTHROUGH */
@@ -625,17 +625,17 @@ pulldown:
 	if (otg_change & OTG_PULLUP) {
 		u32 l;
 
-		switch (isp->phy.state) {
+		switch (isp->phy.otg->state) {
 		case OTG_STATE_B_IDLE:
 			if (clr & OTG1_DP_PULLUP)
 				break;
-			isp->phy.state = OTG_STATE_B_PERIPHERAL;
+			isp->phy.otg->state = OTG_STATE_B_PERIPHERAL;
 			pr_debug("  --> b_peripheral\n");
 			break;
 		case OTG_STATE_A_SUSPEND:
 			if (clr & OTG1_DP_PULLUP)
 				break;
-			isp->phy.state = OTG_STATE_A_PERIPHERAL;
+			isp->phy.otg->state = OTG_STATE_A_PERIPHERAL;
 			pr_debug("  --> a_peripheral\n");
 			break;
 		default:
@@ -673,7 +673,7 @@ static irqreturn_t omap_otg_irq(int irq, void *_isp)
 		 * remote wakeup (SRP, normal) using their own timer
 		 * to give "check cable and A-device" messages.
 		 */
-		if (isp->phy.state == OTG_STATE_B_SRP_INIT)
+		if (isp->phy.otg->state == OTG_STATE_B_SRP_INIT)
 			b_idle(isp, "srp_timeout");
 
 		omap_writew(B_SRP_TMROUT, OTG_IRQ_SRC);
@@ -691,7 +691,7 @@ static irqreturn_t omap_otg_irq(int irq, void *_isp)
 		omap_writel(otg_ctrl, OTG_CTRL);
 
 		/* subset of b_peripheral()... */
-		isp->phy.state = OTG_STATE_B_PERIPHERAL;
+		isp->phy.otg->state = OTG_STATE_B_PERIPHERAL;
 		pr_debug("  --> b_peripheral\n");
 
 		omap_writew(B_HNP_FAIL, OTG_IRQ_SRC);
@@ -703,7 +703,7 @@ static irqreturn_t omap_otg_irq(int irq, void *_isp)
 				state_name(isp), omap_readl(OTG_CTRL));
 
 		isp1301_defer_work(isp, WORK_UPDATE_OTG);
-		switch (isp->phy.state) {
+		switch (isp->phy.otg->state) {
 		case OTG_STATE_A_IDLE:
 			if (!otg->host)
 				break;
@@ -734,7 +734,7 @@ static irqreturn_t omap_otg_irq(int irq, void *_isp)
 		otg_ctrl |= OTG_BUSDROP;
 		otg_ctrl &= ~OTG_A_BUSREQ & OTG_CTRL_MASK & ~OTG_XCEIV_INPUTS;
 		omap_writel(otg_ctrl, OTG_CTRL);
-		isp->phy.state = OTG_STATE_A_WAIT_VFALL;
+		isp->phy.otg->state = OTG_STATE_A_WAIT_VFALL;
 
 		omap_writew(A_REQ_TMROUT, OTG_IRQ_SRC);
 		ret = IRQ_HANDLED;
@@ -748,7 +748,7 @@ static irqreturn_t omap_otg_irq(int irq, void *_isp)
 		otg_ctrl |= OTG_BUSDROP;
 		otg_ctrl &= ~OTG_A_BUSREQ & OTG_CTRL_MASK & ~OTG_XCEIV_INPUTS;
 		omap_writel(otg_ctrl, OTG_CTRL);
-		isp->phy.state = OTG_STATE_A_VBUS_ERR;
+		isp->phy.otg->state = OTG_STATE_A_VBUS_ERR;
 
 		omap_writew(A_VBUS_ERR, OTG_IRQ_SRC);
 		ret = IRQ_HANDLED;
@@ -769,7 +769,7 @@ static irqreturn_t omap_otg_irq(int irq, void *_isp)
 
 		/* role is peripheral */
 		if (otg_ctrl & OTG_DRIVER_SEL) {
-			switch (isp->phy.state) {
+			switch (isp->phy.otg->state) {
 			case OTG_STATE_A_IDLE:
 				b_idle(isp, __func__);
 				break;
@@ -786,18 +786,18 @@ static irqreturn_t omap_otg_irq(int irq, void *_isp)
 			}
 
 			if (otg->host) {
-				switch (isp->phy.state) {
+				switch (isp->phy.otg->state) {
 				case OTG_STATE_B_WAIT_ACON:
-					isp->phy.state = OTG_STATE_B_HOST;
+					isp->phy.otg->state = OTG_STATE_B_HOST;
 					pr_debug("  --> b_host\n");
 					kick = 1;
 					break;
 				case OTG_STATE_A_WAIT_BCON:
-					isp->phy.state = OTG_STATE_A_HOST;
+					isp->phy.otg->state = OTG_STATE_A_HOST;
 					pr_debug("  --> a_host\n");
 					break;
 				case OTG_STATE_A_PERIPHERAL:
-					isp->phy.state = OTG_STATE_A_WAIT_BCON;
+					isp->phy.otg->state = OTG_STATE_A_WAIT_BCON;
 					pr_debug("  --> a_wait_bcon\n");
 					break;
 				default:
@@ -937,7 +937,7 @@ static void b_peripheral(struct isp1301 *isp)
 	/* UDC driver just set OTG_BSESSVLD */
 	isp1301_set_bits(isp, ISP1301_OTG_CONTROL_1, OTG1_DP_PULLUP);
 	isp1301_clear_bits(isp, ISP1301_OTG_CONTROL_1, OTG1_DP_PULLDOWN);
-	isp->phy.state = OTG_STATE_B_PERIPHERAL;
+	isp->phy.otg->state = OTG_STATE_B_PERIPHERAL;
 	pr_debug("  --> b_peripheral\n");
 	dump_regs(isp, "2periph");
 #endif
@@ -947,7 +947,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 {
 	struct usb_otg		*otg = isp->phy.otg;
 	u8			isp_stat, isp_bstat;
-	enum usb_otg_state	state = isp->phy.state;
+	enum usb_otg_state	state = isp->phy.otg->state;
 
 	if (stat & INTR_BDIS_ACON)
 		pr_debug("OTG:  BDIS_ACON, %s\n", state_name(isp));
@@ -970,7 +970,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 				 * when HNP is used.
 				 */
 				if (isp_stat & INTR_VBUS_VLD)
-					isp->phy.state = OTG_STATE_A_HOST;
+					isp->phy.otg->state = OTG_STATE_A_HOST;
 				break;
 			case OTG_STATE_A_WAIT_VFALL:
 				if (!(isp_stat & INTR_SESS_VLD))
@@ -978,7 +978,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 				break;
 			default:
 				if (!(isp_stat & INTR_VBUS_VLD))
-					isp->phy.state = OTG_STATE_A_VBUS_ERR;
+					isp->phy.otg->state = OTG_STATE_A_VBUS_ERR;
 				break;
 			}
 			isp_bstat = isp1301_get_u8(isp, ISP1301_OTG_STATUS);
@@ -1007,7 +1007,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 		if (otg->default_a) {
 			switch (state) {
 			default:
-				isp->phy.state = OTG_STATE_A_WAIT_VFALL;
+				isp->phy.otg->state = OTG_STATE_A_WAIT_VFALL;
 				break;
 			case OTG_STATE_A_WAIT_VFALL:
 				state = OTG_STATE_A_IDLE;
@@ -1020,7 +1020,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 				host_suspend(isp);
 				isp1301_clear_bits(isp, ISP1301_MODE_CONTROL_1,
 						MC1_BDIS_ACON_EN);
-				isp->phy.state = OTG_STATE_B_IDLE;
+				isp->phy.otg->state = OTG_STATE_B_IDLE;
 				l = omap_readl(OTG_CTRL) & OTG_CTRL_MASK;
 				l &= ~OTG_CTRL_BITS;
 				omap_writel(l, OTG_CTRL);
@@ -1031,7 +1031,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 		}
 		isp_bstat = isp1301_get_u8(isp, ISP1301_OTG_STATUS);
 
-		switch (isp->phy.state) {
+		switch (isp->phy.otg->state) {
 		case OTG_STATE_B_PERIPHERAL:
 		case OTG_STATE_B_WAIT_ACON:
 		case OTG_STATE_B_HOST:
@@ -1071,7 +1071,7 @@ static void isp_update_otg(struct isp1301 *isp, u8 stat)
 		}
 	}
 
-	if (state != isp->phy.state)
+	if (state != isp->phy.otg->state)
 		pr_debug("  isp, %s -> %s\n",
 				usb_otg_state_string(state), state_name(isp));
 
@@ -1129,10 +1129,10 @@ isp1301_work(struct work_struct *work)
 			 * skip A_WAIT_VRISE; hc transitions invisibly
 			 * skip A_WAIT_BCON; same.
 			 */
-			switch (isp->phy.state) {
+			switch (isp->phy.otg->state) {
 			case OTG_STATE_A_WAIT_BCON:
 			case OTG_STATE_A_WAIT_VRISE:
-				isp->phy.state = OTG_STATE_A_HOST;
+				isp->phy.otg->state = OTG_STATE_A_HOST;
 				pr_debug("  --> a_host\n");
 				otg_ctrl = omap_readl(OTG_CTRL);
 				otg_ctrl |= OTG_A_BUSREQ;
@@ -1141,7 +1141,7 @@ isp1301_work(struct work_struct *work)
 				omap_writel(otg_ctrl, OTG_CTRL);
 				break;
 			case OTG_STATE_B_WAIT_ACON:
-				isp->phy.state = OTG_STATE_B_HOST;
+				isp->phy.otg->state = OTG_STATE_B_HOST;
 				pr_debug("  --> b_host (acon)\n");
 				break;
 			case OTG_STATE_B_HOST:
@@ -1368,7 +1368,7 @@ isp1301_set_peripheral(struct usb_otg *otg, struct usb_gadget *gadget)
 	}
 
 	power_up(isp);
-	isp->phy.state = OTG_STATE_B_IDLE;
+	isp->phy.otg->state = OTG_STATE_B_IDLE;
 
 	if (machine_is_omap_h2() || machine_is_omap_h3())
 		isp1301_set_bits(isp, ISP1301_MODE_CONTROL_1, MC1_DAT_SE0);
@@ -1403,7 +1403,7 @@ isp1301_set_power(struct usb_phy *dev, unsigned mA)
 {
 	if (!the_transceiver)
 		return -ENODEV;
-	if (dev->state == OTG_STATE_B_PERIPHERAL)
+	if (dev->otg->state == OTG_STATE_B_PERIPHERAL)
 		enable_vbus_draw(the_transceiver, mA);
 	return 0;
 }
@@ -1414,7 +1414,7 @@ isp1301_start_srp(struct usb_otg *otg)
 	struct isp1301	*isp = container_of(otg->phy, struct isp1301, phy);
 	u32		otg_ctrl;
 
-	if (isp != the_transceiver || isp->phy.state != OTG_STATE_B_IDLE)
+	if (isp != the_transceiver || isp->phy.otg->state != OTG_STATE_B_IDLE)
 		return -ENODEV;
 
 	otg_ctrl = omap_readl(OTG_CTRL);
@@ -1424,7 +1424,7 @@ isp1301_start_srp(struct usb_otg *otg)
 	otg_ctrl |= OTG_B_BUSREQ;
 	otg_ctrl &= ~OTG_A_BUSREQ & OTG_CTRL_MASK;
 	omap_writel(otg_ctrl, OTG_CTRL);
-	isp->phy.state = OTG_STATE_B_SRP_INIT;
+	isp->phy.otg->state = OTG_STATE_B_SRP_INIT;
 
 	pr_debug("otg: SRP, %s ... %06x\n", state_name(isp),
 			omap_readl(OTG_CTRL));
@@ -1452,9 +1452,9 @@ isp1301_start_hnp(struct usb_otg *otg)
 	/* We want hardware to manage most HNP protocol timings.
 	 * So do this part as early as possible...
 	 */
-	switch (isp->phy.state) {
+	switch (isp->phy.otg->state) {
 	case OTG_STATE_B_HOST:
-		isp->phy.state = OTG_STATE_B_PERIPHERAL;
+		isp->phy.otg->state = OTG_STATE_B_PERIPHERAL;
 		/* caller will suspend next */
 		break;
 	case OTG_STATE_A_HOST:
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 471e69dbcca0..18015b7c8afc 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -721,11 +721,11 @@ static int msm_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
 	}
 
 	if (!host) {
-		if (otg->phy->state == OTG_STATE_A_HOST) {
+		if (otg->state == OTG_STATE_A_HOST) {
 			pm_runtime_get_sync(otg->phy->dev);
 			msm_otg_start_host(otg->phy, 0);
 			otg->host = NULL;
-			otg->phy->state = OTG_STATE_UNDEFINED;
+			otg->state = OTG_STATE_UNDEFINED;
 			schedule_work(&motg->sm_work);
 		} else {
 			otg->host = NULL;
@@ -794,11 +794,11 @@ static int msm_otg_set_peripheral(struct usb_otg *otg,
 	}
 
 	if (!gadget) {
-		if (otg->phy->state == OTG_STATE_B_PERIPHERAL) {
+		if (otg->state == OTG_STATE_B_PERIPHERAL) {
 			pm_runtime_get_sync(otg->phy->dev);
 			msm_otg_start_peripheral(otg->phy, 0);
 			otg->gadget = NULL;
-			otg->phy->state = OTG_STATE_UNDEFINED;
+			otg->state = OTG_STATE_UNDEFINED;
 			schedule_work(&motg->sm_work);
 		} else {
 			otg->gadget = NULL;
@@ -1170,12 +1170,12 @@ static void msm_otg_sm_work(struct work_struct *w)
 	struct msm_otg *motg = container_of(w, struct msm_otg, sm_work);
 	struct usb_otg *otg = motg->phy.otg;
 
-	switch (otg->phy->state) {
+	switch (otg->state) {
 	case OTG_STATE_UNDEFINED:
 		dev_dbg(otg->phy->dev, "OTG_STATE_UNDEFINED state\n");
 		msm_otg_reset(otg->phy);
 		msm_otg_init_sm(motg);
-		otg->phy->state = OTG_STATE_B_IDLE;
+		otg->state = OTG_STATE_B_IDLE;
 		/* FALL THROUGH */
 	case OTG_STATE_B_IDLE:
 		dev_dbg(otg->phy->dev, "OTG_STATE_B_IDLE state\n");
@@ -1183,7 +1183,7 @@ static void msm_otg_sm_work(struct work_struct *w)
 			/* disable BSV bit */
 			writel(readl(USB_OTGSC) & ~OTGSC_BSVIE, USB_OTGSC);
 			msm_otg_start_host(otg->phy, 1);
-			otg->phy->state = OTG_STATE_A_HOST;
+			otg->state = OTG_STATE_A_HOST;
 		} else if (test_bit(B_SESS_VLD, &motg->inputs)) {
 			switch (motg->chg_state) {
 			case USB_CHG_STATE_UNDEFINED:
@@ -1199,13 +1199,13 @@ static void msm_otg_sm_work(struct work_struct *w)
 					msm_otg_notify_charger(motg,
 							IDEV_CHG_MAX);
 					msm_otg_start_peripheral(otg->phy, 1);
-					otg->phy->state
+					otg->state
 						= OTG_STATE_B_PERIPHERAL;
 					break;
 				case USB_SDP_CHARGER:
 					msm_otg_notify_charger(motg, IUNIT);
 					msm_otg_start_peripheral(otg->phy, 1);
-					otg->phy->state
+					otg->state
 						= OTG_STATE_B_PERIPHERAL;
 					break;
 				default:
@@ -1230,7 +1230,7 @@ static void msm_otg_sm_work(struct work_struct *w)
 			motg->chg_type = USB_INVALID_CHARGER;
 		}
 
-		if (otg->phy->state == OTG_STATE_B_IDLE)
+		if (otg->state == OTG_STATE_B_IDLE)
 			pm_runtime_put_sync(otg->phy->dev);
 		break;
 	case OTG_STATE_B_PERIPHERAL:
@@ -1241,7 +1241,7 @@ static void msm_otg_sm_work(struct work_struct *w)
 			msm_otg_start_peripheral(otg->phy, 0);
 			motg->chg_state = USB_CHG_STATE_UNDEFINED;
 			motg->chg_type = USB_INVALID_CHARGER;
-			otg->phy->state = OTG_STATE_B_IDLE;
+			otg->state = OTG_STATE_B_IDLE;
 			msm_otg_reset(otg->phy);
 			schedule_work(w);
 		}
@@ -1250,7 +1250,7 @@ static void msm_otg_sm_work(struct work_struct *w)
 		dev_dbg(otg->phy->dev, "OTG_STATE_A_HOST state\n");
 		if (test_bit(ID, &motg->inputs)) {
 			msm_otg_start_host(otg->phy, 0);
-			otg->phy->state = OTG_STATE_B_IDLE;
+			otg->state = OTG_STATE_B_IDLE;
 			msm_otg_reset(otg->phy);
 			schedule_work(w);
 		}
@@ -1303,7 +1303,7 @@ static int msm_otg_mode_show(struct seq_file *s, void *unused)
 	struct msm_otg *motg = s->private;
 	struct usb_otg *otg = motg->phy.otg;
 
-	switch (otg->phy->state) {
+	switch (otg->state) {
 	case OTG_STATE_A_HOST:
 		seq_puts(s, "host\n");
 		break;
@@ -1353,7 +1353,7 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 
 	switch (req_mode) {
 	case USB_DR_MODE_UNKNOWN:
-		switch (otg->phy->state) {
+		switch (otg->state) {
 		case OTG_STATE_A_HOST:
 		case OTG_STATE_B_PERIPHERAL:
 			set_bit(ID, &motg->inputs);
@@ -1364,7 +1364,7 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 		}
 		break;
 	case USB_DR_MODE_PERIPHERAL:
-		switch (otg->phy->state) {
+		switch (otg->state) {
 		case OTG_STATE_B_IDLE:
 		case OTG_STATE_A_HOST:
 			set_bit(ID, &motg->inputs);
@@ -1375,7 +1375,7 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 		}
 		break;
 	case USB_DR_MODE_HOST:
-		switch (otg->phy->state) {
+		switch (otg->state) {
 		case OTG_STATE_B_IDLE:
 		case OTG_STATE_B_PERIPHERAL:
 			clear_bit(ID, &motg->inputs);
@@ -1769,7 +1769,7 @@ static int msm_otg_runtime_idle(struct device *dev)
 	 * This 1 sec delay also prevents entering into LPM immediately
 	 * after asynchronous interrupt.
 	 */
-	if (otg->phy->state != OTG_STATE_UNDEFINED)
+	if (otg->state != OTG_STATE_UNDEFINED)
 		pm_schedule_suspend(dev, 1000);
 
 	return -EAGAIN;
diff --git a/drivers/usb/phy/phy-mv-usb.c b/drivers/usb/phy/phy-mv-usb.c
index c9517d8bda8d..ee87aa7144db 100644
--- a/drivers/usb/phy/phy-mv-usb.c
+++ b/drivers/usb/phy/phy-mv-usb.c
@@ -339,68 +339,68 @@ static void mv_otg_update_state(struct mv_otg *mvotg)
 {
 	struct mv_otg_ctrl *otg_ctrl = &mvotg->otg_ctrl;
 	struct usb_phy *phy = &mvotg->phy;
-	int old_state = phy->state;
+	int old_state = mvotg->phy.otg->state;
 
 	switch (old_state) {
 	case OTG_STATE_UNDEFINED:
-		phy->state = OTG_STATE_B_IDLE;
+		mvotg->phy.otg->state = OTG_STATE_B_IDLE;
 		/* FALL THROUGH */
 	case OTG_STATE_B_IDLE:
 		if (otg_ctrl->id == 0)
-			phy->state = OTG_STATE_A_IDLE;
+			mvotg->phy.otg->state = OTG_STATE_A_IDLE;
 		else if (otg_ctrl->b_sess_vld)
-			phy->state = OTG_STATE_B_PERIPHERAL;
+			mvotg->phy.otg->state = OTG_STATE_B_PERIPHERAL;
 		break;
 	case OTG_STATE_B_PERIPHERAL:
 		if (!otg_ctrl->b_sess_vld || otg_ctrl->id == 0)
-			phy->state = OTG_STATE_B_IDLE;
+			mvotg->phy.otg->state = OTG_STATE_B_IDLE;
 		break;
 	case OTG_STATE_A_IDLE:
 		if (otg_ctrl->id)
-			phy->state = OTG_STATE_B_IDLE;
+			mvotg->phy.otg->state = OTG_STATE_B_IDLE;
 		else if (!(otg_ctrl->a_bus_drop) &&
 			 (otg_ctrl->a_bus_req || otg_ctrl->a_srp_det))
-			phy->state = OTG_STATE_A_WAIT_VRISE;
+			mvotg->phy.otg->state = OTG_STATE_A_WAIT_VRISE;
 		break;
 	case OTG_STATE_A_WAIT_VRISE:
 		if (otg_ctrl->a_vbus_vld)
-			phy->state = OTG_STATE_A_WAIT_BCON;
+			mvotg->phy.otg->state = OTG_STATE_A_WAIT_BCON;
 		break;
 	case OTG_STATE_A_WAIT_BCON:
 		if (otg_ctrl->id || otg_ctrl->a_bus_drop
 		    || otg_ctrl->a_wait_bcon_timeout) {
 			mv_otg_cancel_timer(mvotg, A_WAIT_BCON_TIMER);
 			mvotg->otg_ctrl.a_wait_bcon_timeout = 0;
-			phy->state = OTG_STATE_A_WAIT_VFALL;
+			mvotg->phy.otg->state = OTG_STATE_A_WAIT_VFALL;
 			otg_ctrl->a_bus_req = 0;
 		} else if (!otg_ctrl->a_vbus_vld) {
 			mv_otg_cancel_timer(mvotg, A_WAIT_BCON_TIMER);
 			mvotg->otg_ctrl.a_wait_bcon_timeout = 0;
-			phy->state = OTG_STATE_A_VBUS_ERR;
+			mvotg->phy.otg->state = OTG_STATE_A_VBUS_ERR;
 		} else if (otg_ctrl->b_conn) {
 			mv_otg_cancel_timer(mvotg, A_WAIT_BCON_TIMER);
 			mvotg->otg_ctrl.a_wait_bcon_timeout = 0;
-			phy->state = OTG_STATE_A_HOST;
+			mvotg->phy.otg->state = OTG_STATE_A_HOST;
 		}
 		break;
 	case OTG_STATE_A_HOST:
 		if (otg_ctrl->id || !otg_ctrl->b_conn
 		    || otg_ctrl->a_bus_drop)
-			phy->state = OTG_STATE_A_WAIT_BCON;
+			mvotg->phy.otg->state = OTG_STATE_A_WAIT_BCON;
 		else if (!otg_ctrl->a_vbus_vld)
-			phy->state = OTG_STATE_A_VBUS_ERR;
+			mvotg->phy.otg->state = OTG_STATE_A_VBUS_ERR;
 		break;
 	case OTG_STATE_A_WAIT_VFALL:
 		if (otg_ctrl->id
 		    || (!otg_ctrl->b_conn && otg_ctrl->a_sess_vld)
 		    || otg_ctrl->a_bus_req)
-			phy->state = OTG_STATE_A_IDLE;
+			mvotg->phy.otg->state = OTG_STATE_A_IDLE;
 		break;
 	case OTG_STATE_A_VBUS_ERR:
 		if (otg_ctrl->id || otg_ctrl->a_clr_err
 		    || otg_ctrl->a_bus_drop) {
 			otg_ctrl->a_clr_err = 0;
-			phy->state = OTG_STATE_A_WAIT_VFALL;
+			mvotg->phy.otg->state = OTG_STATE_A_WAIT_VFALL;
 		}
 		break;
 	default:
@@ -420,8 +420,8 @@ static void mv_otg_work(struct work_struct *work)
 run:
 	/* work queue is single thread, or we need spin_lock to protect */
 	phy = &mvotg->phy;
-	otg = phy->otg;
-	old_state = phy->state;
+	otg = mvotg->phy.otg;
+	old_state = otg->state;
 
 	if (!mvotg->active)
 		return;
@@ -429,12 +429,12 @@ run:
 	mv_otg_update_inputs(mvotg);
 	mv_otg_update_state(mvotg);
 
-	if (old_state != phy->state) {
+	if (old_state != mvotg->phy.otg->state) {
 		dev_info(&mvotg->pdev->dev, "change from state %s to %s\n",
 			 state_string[old_state],
-			 state_string[phy->state]);
+			 state_string[mvotg->phy.otg->state]);
 
-		switch (phy->state) {
+		switch (mvotg->phy.otg->state) {
 		case OTG_STATE_B_IDLE:
 			otg->default_a = 0;
 			if (old_state == OTG_STATE_B_PERIPHERAL)
@@ -545,8 +545,8 @@ set_a_bus_req(struct device *dev, struct device_attribute *attr,
 		return -1;
 
 	/* We will use this interface to change to A device */
-	if (mvotg->phy.state != OTG_STATE_B_IDLE
-	    && mvotg->phy.state != OTG_STATE_A_IDLE)
+	if (mvotg->phy.otg->state != OTG_STATE_B_IDLE
+	    && mvotg->phy.otg->state != OTG_STATE_A_IDLE)
 		return -1;
 
 	/* The clock may disabled and we need to set irq for ID detected */
@@ -715,9 +715,9 @@ static int mv_otg_probe(struct platform_device *pdev)
 	mvotg->phy.dev = &pdev->dev;
 	mvotg->phy.otg = otg;
 	mvotg->phy.label = driver_name;
-	mvotg->phy.state = OTG_STATE_UNDEFINED;
 
 	otg->phy = &mvotg->phy;
+	otg->state = OTG_STATE_UNDEFINED;
 	otg->set_host = mv_otg_set_host;
 	otg->set_peripheral = mv_otg_set_peripheral;
 	otg->set_vbus = mv_otg_set_vbus;
diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c
index cc61ee44b911..04ece535c2f8 100644
--- a/drivers/usb/phy/phy-tahvo.c
+++ b/drivers/usb/phy/phy-tahvo.c
@@ -81,33 +81,33 @@ static void check_vbus_state(struct tahvo_usb *tu)
 
 	reg = retu_read(rdev, TAHVO_REG_IDSR);
 	if (reg & TAHVO_STAT_VBUS) {
-		switch (tu->phy.state) {
+		switch (tu->phy.otg->state) {
 		case OTG_STATE_B_IDLE:
 			/* Enable the gadget driver */
 			if (tu->phy.otg->gadget)
 				usb_gadget_vbus_connect(tu->phy.otg->gadget);
-			tu->phy.state = OTG_STATE_B_PERIPHERAL;
+			tu->phy.otg->state = OTG_STATE_B_PERIPHERAL;
 			break;
 		case OTG_STATE_A_IDLE:
 			/*
 			 * Session is now valid assuming the USB hub is driving
 			 * Vbus.
 			 */
-			tu->phy.state = OTG_STATE_A_HOST;
+			tu->phy.otg->state = OTG_STATE_A_HOST;
 			break;
 		default:
 			break;
 		}
 		dev_info(&tu->pt_dev->dev, "USB cable connected\n");
 	} else {
-		switch (tu->phy.state) {
+		switch (tu->phy.otg->state) {
 		case OTG_STATE_B_PERIPHERAL:
 			if (tu->phy.otg->gadget)
 				usb_gadget_vbus_disconnect(tu->phy.otg->gadget);
-			tu->phy.state = OTG_STATE_B_IDLE;
+			tu->phy.otg->state = OTG_STATE_B_IDLE;
 			break;
 		case OTG_STATE_A_HOST:
-			tu->phy.state = OTG_STATE_A_IDLE;
+			tu->phy.otg->state = OTG_STATE_A_IDLE;
 			break;
 		default:
 			break;
@@ -132,14 +132,14 @@ static void tahvo_usb_become_host(struct tahvo_usb *tu)
 	/* Power up the transceiver in USB host mode */
 	retu_write(rdev, TAHVO_REG_USBR, USBR_REGOUT | USBR_NSUSPEND |
 		   USBR_MASTER_SW2 | USBR_MASTER_SW1);
-	tu->phy.state = OTG_STATE_A_IDLE;
+	tu->phy.otg->state = OTG_STATE_A_IDLE;
 
 	check_vbus_state(tu);
 }
 
 static void tahvo_usb_stop_host(struct tahvo_usb *tu)
 {
-	tu->phy.state = OTG_STATE_A_IDLE;
+	tu->phy.otg->state = OTG_STATE_A_IDLE;
 }
 
 static void tahvo_usb_become_peripheral(struct tahvo_usb *tu)
@@ -151,7 +151,7 @@ static void tahvo_usb_become_peripheral(struct tahvo_usb *tu)
 	/* Power up transceiver and set it in USB peripheral mode */
 	retu_write(rdev, TAHVO_REG_USBR, USBR_SLAVE_CONTROL | USBR_REGOUT |
 		   USBR_NSUSPEND | USBR_SLAVE_SW);
-	tu->phy.state = OTG_STATE_B_IDLE;
+	tu->phy.otg->state = OTG_STATE_B_IDLE;
 
 	check_vbus_state(tu);
 }
@@ -160,7 +160,7 @@ static void tahvo_usb_stop_peripheral(struct tahvo_usb *tu)
 {
 	if (tu->phy.otg->gadget)
 		usb_gadget_vbus_disconnect(tu->phy.otg->gadget);
-	tu->phy.state = OTG_STATE_B_IDLE;
+	tu->phy.otg->state = OTG_STATE_B_IDLE;
 }
 
 static void tahvo_usb_power_off(struct tahvo_usb *tu)
@@ -173,7 +173,7 @@ static void tahvo_usb_power_off(struct tahvo_usb *tu)
 
 	/* Power off transceiver */
 	retu_write(rdev, TAHVO_REG_USBR, 0);
-	tu->phy.state = OTG_STATE_UNDEFINED;
+	tu->phy.otg->state = OTG_STATE_UNDEFINED;
 }
 
 static int tahvo_usb_set_suspend(struct usb_phy *dev, int suspend)
@@ -379,7 +379,7 @@ static int tahvo_usb_probe(struct platform_device *pdev)
 	/* Create OTG interface */
 	tahvo_usb_power_off(tu);
 	tu->phy.dev = &pdev->dev;
-	tu->phy.state = OTG_STATE_UNDEFINED;
+	tu->phy.otg->state = OTG_STATE_UNDEFINED;
 	tu->phy.label = DRIVER_NAME;
 	tu->phy.set_suspend = tahvo_usb_set_suspend;
 
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 154332b7c8c0..33d3480c9cda 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -18,6 +18,8 @@ struct usb_otg {
 	struct usb_bus		*host;
 	struct usb_gadget	*gadget;
 
+	enum usb_otg_state	state;
+
 	/* bind/unbind the host controller */
 	int	(*set_host)(struct usb_otg *otg, struct usb_bus *host);
 
diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index 353053a33f21..ac7d7913694f 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -77,7 +77,6 @@ struct usb_phy {
 	unsigned int		 flags;
 
 	enum usb_phy_type	type;
-	enum usb_otg_state	state;
 	enum usb_phy_events	last_event;
 
 	struct usb_otg		*otg;
-- 
cgit v1.2.3


From 19c1eac2685b62640ca2386a0a885ac2152668c8 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Thu, 30 Oct 2014 18:41:14 +0100
Subject: usb: rename phy to usb_phy in OTG

This patch prepares the introduction of the generic PHY support in the
USB OTG common functions. The USB PHY member of the OTG structure is
renamed to 'usb_phy' and modifications are done in all drivers accessing
it. Renaming this pointer will allow to keep the compatibility for USB
PHY drivers.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/phy/phy-omap-usb2.c         |  6 ++--
 drivers/usb/chipidea/otg_fsm.c      |  2 +-
 drivers/usb/phy/phy-ab8500-usb.c    |  6 ++--
 drivers/usb/phy/phy-fsl-usb.c       | 13 ++++----
 drivers/usb/phy/phy-generic.c       |  2 +-
 drivers/usb/phy/phy-gpio-vbus-usb.c |  4 +--
 drivers/usb/phy/phy-isp1301-omap.c  | 10 +++---
 drivers/usb/phy/phy-msm-usb.c       | 61 +++++++++++++++++++------------------
 drivers/usb/phy/phy-mv-usb.c        |  4 +--
 drivers/usb/phy/phy-tahvo.c         |  8 +++--
 drivers/usb/phy/phy-ulpi.c          |  6 ++--
 include/linux/usb/otg.h             |  2 +-
 12 files changed, 65 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c
index 9f4093590f4c..32c3e86b4935 100644
--- a/drivers/phy/phy-omap-usb2.c
+++ b/drivers/phy/phy-omap-usb2.c
@@ -60,7 +60,7 @@ EXPORT_SYMBOL_GPL(omap_usb2_set_comparator);
 
 static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled)
 {
-	struct omap_usb *phy = phy_to_omapusb(otg->phy);
+	struct omap_usb *phy = phy_to_omapusb(otg->usb_phy);
 
 	if (!phy->comparator)
 		return -ENODEV;
@@ -70,7 +70,7 @@ static int omap_usb_set_vbus(struct usb_otg *otg, bool enabled)
 
 static int omap_usb_start_srp(struct usb_otg *otg)
 {
-	struct omap_usb *phy = phy_to_omapusb(otg->phy);
+	struct omap_usb *phy = phy_to_omapusb(otg->usb_phy);
 
 	if (!phy->comparator)
 		return -ENODEV;
@@ -251,7 +251,7 @@ static int omap_usb2_probe(struct platform_device *pdev)
 		otg->set_vbus		= omap_usb_set_vbus;
 	if (phy_data->flags & OMAP_USB2_HAS_START_SRP)
 		otg->start_srp		= omap_usb_start_srp;
-	otg->phy		= &phy->phy;
+	otg->usb_phy		= &phy->phy;
 
 	platform_set_drvdata(pdev, phy);
 
diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c
index 8cb2508a6b71..d8490e758a74 100644
--- a/drivers/usb/chipidea/otg_fsm.c
+++ b/drivers/usb/chipidea/otg_fsm.c
@@ -788,7 +788,7 @@ int ci_hdrc_otg_fsm_init(struct ci_hdrc *ci)
 		return -ENOMEM;
 	}
 
-	otg->phy = ci->transceiver;
+	otg->usb_phy = ci->transceiver;
 	otg->gadget = &ci->gadget;
 	ci->fsm.otg = otg;
 	ci->transceiver->otg = ci->fsm.otg;
diff --git a/drivers/usb/phy/phy-ab8500-usb.c b/drivers/usb/phy/phy-ab8500-usb.c
index 2d5250143ce1..3a802fa7dae2 100644
--- a/drivers/usb/phy/phy-ab8500-usb.c
+++ b/drivers/usb/phy/phy-ab8500-usb.c
@@ -1056,7 +1056,7 @@ static int ab8500_usb_set_peripheral(struct usb_otg *otg,
 	if (!otg)
 		return -ENODEV;
 
-	ab = phy_to_ab(otg->phy);
+	ab = phy_to_ab(otg->usb_phy);
 
 	ab->phy.otg->gadget = gadget;
 
@@ -1080,7 +1080,7 @@ static int ab8500_usb_set_host(struct usb_otg *otg, struct usb_bus *host)
 	if (!otg)
 		return -ENODEV;
 
-	ab = phy_to_ab(otg->phy);
+	ab = phy_to_ab(otg->usb_phy);
 
 	ab->phy.otg->host = host;
 
@@ -1382,7 +1382,7 @@ static int ab8500_usb_probe(struct platform_device *pdev)
 	ab->phy.set_power	= ab8500_usb_set_power;
 	ab->phy.otg->state	= OTG_STATE_UNDEFINED;
 
-	otg->phy		= &ab->phy;
+	otg->usb_phy		= &ab->phy;
 	otg->set_host		= ab8500_usb_set_host;
 	otg->set_peripheral	= ab8500_usb_set_peripheral;
 
diff --git a/drivers/usb/phy/phy-fsl-usb.c b/drivers/usb/phy/phy-fsl-usb.c
index 15d7a81eece5..b7f36b212422 100644
--- a/drivers/usb/phy/phy-fsl-usb.c
+++ b/drivers/usb/phy/phy-fsl-usb.c
@@ -499,7 +499,8 @@ int fsl_otg_start_host(struct otg_fsm *fsm, int on)
 {
 	struct usb_otg *otg = fsm->otg;
 	struct device *dev;
-	struct fsl_otg *otg_dev = container_of(otg->phy, struct fsl_otg, phy);
+	struct fsl_otg *otg_dev =
+		container_of(otg->usb_phy, struct fsl_otg, phy);
 	u32 retval = 0;
 
 	if (!otg->host)
@@ -594,7 +595,7 @@ static int fsl_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
 	if (!otg)
 		return -ENODEV;
 
-	otg_dev = container_of(otg->phy, struct fsl_otg, phy);
+	otg_dev = container_of(otg->usb_phy, struct fsl_otg, phy);
 	if (otg_dev != fsl_otg_dev)
 		return -ENODEV;
 
@@ -644,7 +645,7 @@ static int fsl_otg_set_peripheral(struct usb_otg *otg,
 	if (!otg)
 		return -ENODEV;
 
-	otg_dev = container_of(otg->phy, struct fsl_otg, phy);
+	otg_dev = container_of(otg->usb_phy, struct fsl_otg, phy);
 	VDBG("otg_dev 0x%x\n", (int)otg_dev);
 	VDBG("fsl_otg_dev 0x%x\n", (int)fsl_otg_dev);
 	if (otg_dev != fsl_otg_dev)
@@ -717,7 +718,7 @@ static int fsl_otg_start_srp(struct usb_otg *otg)
 	if (!otg || otg.state != OTG_STATE_B_IDLE)
 		return -ENODEV;
 
-	otg_dev = container_of(otg->phy, struct fsl_otg, phy);
+	otg_dev = container_of(otg->usb_phy, struct fsl_otg, phy);
 	if (otg_dev != fsl_otg_dev)
 		return -ENODEV;
 
@@ -735,7 +736,7 @@ static int fsl_otg_start_hnp(struct usb_otg *otg)
 	if (!otg)
 		return -ENODEV;
 
-	otg_dev = container_of(otg->phy, struct fsl_otg, phy);
+	otg_dev = container_of(otg->usb_phy, struct fsl_otg, phy);
 	if (otg_dev != fsl_otg_dev)
 		return -ENODEV;
 
@@ -857,7 +858,7 @@ static int fsl_otg_conf(struct platform_device *pdev)
 	fsl_otg_tc->phy.dev = &pdev->dev;
 	fsl_otg_tc->phy.set_power = fsl_otg_set_power;
 
-	fsl_otg_tc->phy.otg->phy = &fsl_otg_tc->phy;
+	fsl_otg_tc->phy.otg->usb_phy = &fsl_otg_tc->phy;
 	fsl_otg_tc->phy.otg->set_host = fsl_otg_set_host;
 	fsl_otg_tc->phy.otg->set_peripheral = fsl_otg_set_peripheral;
 	fsl_otg_tc->phy.otg->start_hnp = fsl_otg_start_hnp;
diff --git a/drivers/usb/phy/phy-generic.c b/drivers/usb/phy/phy-generic.c
index 280a3458ff6b..0c01bd12cabc 100644
--- a/drivers/usb/phy/phy-generic.c
+++ b/drivers/usb/phy/phy-generic.c
@@ -228,7 +228,7 @@ int usb_phy_gen_create_phy(struct device *dev, struct usb_phy_generic *nop,
 	nop->phy.type		= type;
 
 	nop->phy.otg->state		= OTG_STATE_UNDEFINED;
-	nop->phy.otg->phy		= &nop->phy;
+	nop->phy.otg->usb_phy		= &nop->phy;
 	nop->phy.otg->set_host		= nop_set_host;
 	nop->phy.otg->set_peripheral	= nop_set_peripheral;
 
diff --git a/drivers/usb/phy/phy-gpio-vbus-usb.c b/drivers/usb/phy/phy-gpio-vbus-usb.c
index 7a6be3e5dc23..9fcf19ba1416 100644
--- a/drivers/usb/phy/phy-gpio-vbus-usb.c
+++ b/drivers/usb/phy/phy-gpio-vbus-usb.c
@@ -180,7 +180,7 @@ static int gpio_vbus_set_peripheral(struct usb_otg *otg,
 	struct platform_device *pdev;
 	int gpio;
 
-	gpio_vbus = container_of(otg->phy, struct gpio_vbus_data, phy);
+	gpio_vbus = container_of(otg->usb_phy, struct gpio_vbus_data, phy);
 	pdev = to_platform_device(gpio_vbus->dev);
 	pdata = dev_get_platdata(gpio_vbus->dev);
 	gpio = pdata->gpio_pullup;
@@ -271,7 +271,7 @@ static int gpio_vbus_probe(struct platform_device *pdev)
 	gpio_vbus->phy.set_suspend = gpio_vbus_set_suspend;
 
 	gpio_vbus->phy.otg->state = OTG_STATE_UNDEFINED;
-	gpio_vbus->phy.otg->phy = &gpio_vbus->phy;
+	gpio_vbus->phy.otg->usb_phy = &gpio_vbus->phy;
 	gpio_vbus->phy.otg->set_peripheral = gpio_vbus_set_peripheral;
 
 	err = devm_gpio_request(&pdev->dev, gpio, "vbus_detect");
diff --git a/drivers/usb/phy/phy-isp1301-omap.c b/drivers/usb/phy/phy-isp1301-omap.c
index 24f84cbbed57..a2dfb2ae520e 100644
--- a/drivers/usb/phy/phy-isp1301-omap.c
+++ b/drivers/usb/phy/phy-isp1301-omap.c
@@ -1275,7 +1275,7 @@ static int isp1301_otg_enable(struct isp1301 *isp)
 static int
 isp1301_set_host(struct usb_otg *otg, struct usb_bus *host)
 {
-	struct isp1301	*isp = container_of(otg->phy, struct isp1301, phy);
+	struct isp1301	*isp = container_of(otg->usb_phy, struct isp1301, phy);
 
 	if (isp != the_transceiver)
 		return -ENODEV;
@@ -1331,7 +1331,7 @@ isp1301_set_host(struct usb_otg *otg, struct usb_bus *host)
 static int
 isp1301_set_peripheral(struct usb_otg *otg, struct usb_gadget *gadget)
 {
-	struct isp1301	*isp = container_of(otg->phy, struct isp1301, phy);
+	struct isp1301	*isp = container_of(otg->usb_phy, struct isp1301, phy);
 
 	if (isp != the_transceiver)
 		return -ENODEV;
@@ -1411,7 +1411,7 @@ isp1301_set_power(struct usb_phy *dev, unsigned mA)
 static int
 isp1301_start_srp(struct usb_otg *otg)
 {
-	struct isp1301	*isp = container_of(otg->phy, struct isp1301, phy);
+	struct isp1301	*isp = container_of(otg->usb_phy, struct isp1301, phy);
 	u32		otg_ctrl;
 
 	if (isp != the_transceiver || isp->phy.otg->state != OTG_STATE_B_IDLE)
@@ -1438,7 +1438,7 @@ static int
 isp1301_start_hnp(struct usb_otg *otg)
 {
 #ifdef	CONFIG_USB_OTG
-	struct isp1301	*isp = container_of(otg->phy, struct isp1301, phy);
+	struct isp1301	*isp = container_of(otg->usb_phy, struct isp1301, phy);
 	u32 l;
 
 	if (isp != the_transceiver)
@@ -1583,7 +1583,7 @@ isp1301_probe(struct i2c_client *i2c, const struct i2c_device_id *id)
 	isp->phy.label = DRIVER_NAME;
 	isp->phy.set_power = isp1301_set_power,
 
-	isp->phy.otg->phy = &isp->phy;
+	isp->phy.otg->usb_phy = &isp->phy;
 	isp->phy.otg->set_host = isp1301_set_host,
 	isp->phy.otg->set_peripheral = isp1301_set_peripheral,
 	isp->phy.otg->start_srp = isp1301_start_srp,
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 18015b7c8afc..e120d87778b2 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -708,7 +708,7 @@ static void msm_otg_start_host(struct usb_phy *phy, int on)
 
 static int msm_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
 {
-	struct msm_otg *motg = container_of(otg->phy, struct msm_otg, phy);
+	struct msm_otg *motg = container_of(otg->usb_phy, struct msm_otg, phy);
 	struct usb_hcd *hcd;
 
 	/*
@@ -716,14 +716,14 @@ static int msm_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
 	 * only peripheral configuration.
 	 */
 	if (motg->pdata->mode == USB_DR_MODE_PERIPHERAL) {
-		dev_info(otg->phy->dev, "Host mode is not supported\n");
+		dev_info(otg->usb_phy->dev, "Host mode is not supported\n");
 		return -ENODEV;
 	}
 
 	if (!host) {
 		if (otg->state == OTG_STATE_A_HOST) {
-			pm_runtime_get_sync(otg->phy->dev);
-			msm_otg_start_host(otg->phy, 0);
+			pm_runtime_get_sync(otg->usb_phy->dev);
+			msm_otg_start_host(otg->usb_phy, 0);
 			otg->host = NULL;
 			otg->state = OTG_STATE_UNDEFINED;
 			schedule_work(&motg->sm_work);
@@ -738,14 +738,14 @@ static int msm_otg_set_host(struct usb_otg *otg, struct usb_bus *host)
 	hcd->power_budget = motg->pdata->power_budget;
 
 	otg->host = host;
-	dev_dbg(otg->phy->dev, "host driver registered w/ tranceiver\n");
+	dev_dbg(otg->usb_phy->dev, "host driver registered w/ tranceiver\n");
 
 	/*
 	 * Kick the state machine work, if peripheral is not supported
 	 * or peripheral is already registered with us.
 	 */
 	if (motg->pdata->mode == USB_DR_MODE_HOST || otg->gadget) {
-		pm_runtime_get_sync(otg->phy->dev);
+		pm_runtime_get_sync(otg->usb_phy->dev);
 		schedule_work(&motg->sm_work);
 	}
 
@@ -782,21 +782,21 @@ static void msm_otg_start_peripheral(struct usb_phy *phy, int on)
 static int msm_otg_set_peripheral(struct usb_otg *otg,
 					struct usb_gadget *gadget)
 {
-	struct msm_otg *motg = container_of(otg->phy, struct msm_otg, phy);
+	struct msm_otg *motg = container_of(otg->usb_phy, struct msm_otg, phy);
 
 	/*
 	 * Fail peripheral registration if this board can support
 	 * only host configuration.
 	 */
 	if (motg->pdata->mode == USB_DR_MODE_HOST) {
-		dev_info(otg->phy->dev, "Peripheral mode is not supported\n");
+		dev_info(otg->usb_phy->dev, "Peripheral mode is not supported\n");
 		return -ENODEV;
 	}
 
 	if (!gadget) {
 		if (otg->state == OTG_STATE_B_PERIPHERAL) {
-			pm_runtime_get_sync(otg->phy->dev);
-			msm_otg_start_peripheral(otg->phy, 0);
+			pm_runtime_get_sync(otg->usb_phy->dev);
+			msm_otg_start_peripheral(otg->usb_phy, 0);
 			otg->gadget = NULL;
 			otg->state = OTG_STATE_UNDEFINED;
 			schedule_work(&motg->sm_work);
@@ -807,14 +807,15 @@ static int msm_otg_set_peripheral(struct usb_otg *otg,
 		return 0;
 	}
 	otg->gadget = gadget;
-	dev_dbg(otg->phy->dev, "peripheral driver registered w/ tranceiver\n");
+	dev_dbg(otg->usb_phy->dev,
+		"peripheral driver registered w/ tranceiver\n");
 
 	/*
 	 * Kick the state machine work, if host is not supported
 	 * or host is already registered with us.
 	 */
 	if (motg->pdata->mode == USB_DR_MODE_PERIPHERAL || otg->host) {
-		pm_runtime_get_sync(otg->phy->dev);
+		pm_runtime_get_sync(otg->usb_phy->dev);
 		schedule_work(&motg->sm_work);
 	}
 
@@ -1172,17 +1173,17 @@ static void msm_otg_sm_work(struct work_struct *w)
 
 	switch (otg->state) {
 	case OTG_STATE_UNDEFINED:
-		dev_dbg(otg->phy->dev, "OTG_STATE_UNDEFINED state\n");
-		msm_otg_reset(otg->phy);
+		dev_dbg(otg->usb_phy->dev, "OTG_STATE_UNDEFINED state\n");
+		msm_otg_reset(otg->usb_phy);
 		msm_otg_init_sm(motg);
 		otg->state = OTG_STATE_B_IDLE;
 		/* FALL THROUGH */
 	case OTG_STATE_B_IDLE:
-		dev_dbg(otg->phy->dev, "OTG_STATE_B_IDLE state\n");
+		dev_dbg(otg->usb_phy->dev, "OTG_STATE_B_IDLE state\n");
 		if (!test_bit(ID, &motg->inputs) && otg->host) {
 			/* disable BSV bit */
 			writel(readl(USB_OTGSC) & ~OTGSC_BSVIE, USB_OTGSC);
-			msm_otg_start_host(otg->phy, 1);
+			msm_otg_start_host(otg->usb_phy, 1);
 			otg->state = OTG_STATE_A_HOST;
 		} else if (test_bit(B_SESS_VLD, &motg->inputs)) {
 			switch (motg->chg_state) {
@@ -1198,13 +1199,15 @@ static void msm_otg_sm_work(struct work_struct *w)
 				case USB_CDP_CHARGER:
 					msm_otg_notify_charger(motg,
 							IDEV_CHG_MAX);
-					msm_otg_start_peripheral(otg->phy, 1);
+					msm_otg_start_peripheral(otg->usb_phy,
+								 1);
 					otg->state
 						= OTG_STATE_B_PERIPHERAL;
 					break;
 				case USB_SDP_CHARGER:
 					msm_otg_notify_charger(motg, IUNIT);
-					msm_otg_start_peripheral(otg->phy, 1);
+					msm_otg_start_peripheral(otg->usb_phy,
+								 1);
 					otg->state
 						= OTG_STATE_B_PERIPHERAL;
 					break;
@@ -1222,8 +1225,8 @@ static void msm_otg_sm_work(struct work_struct *w)
 			 * is incremented in charger detection work.
 			 */
 			if (cancel_delayed_work_sync(&motg->chg_work)) {
-				pm_runtime_put_sync(otg->phy->dev);
-				msm_otg_reset(otg->phy);
+				pm_runtime_put_sync(otg->usb_phy->dev);
+				msm_otg_reset(otg->usb_phy);
 			}
 			msm_otg_notify_charger(motg, 0);
 			motg->chg_state = USB_CHG_STATE_UNDEFINED;
@@ -1231,27 +1234,27 @@ static void msm_otg_sm_work(struct work_struct *w)
 		}
 
 		if (otg->state == OTG_STATE_B_IDLE)
-			pm_runtime_put_sync(otg->phy->dev);
+			pm_runtime_put_sync(otg->usb_phy->dev);
 		break;
 	case OTG_STATE_B_PERIPHERAL:
-		dev_dbg(otg->phy->dev, "OTG_STATE_B_PERIPHERAL state\n");
+		dev_dbg(otg->usb_phy->dev, "OTG_STATE_B_PERIPHERAL state\n");
 		if (!test_bit(B_SESS_VLD, &motg->inputs) ||
 				!test_bit(ID, &motg->inputs)) {
 			msm_otg_notify_charger(motg, 0);
-			msm_otg_start_peripheral(otg->phy, 0);
+			msm_otg_start_peripheral(otg->usb_phy, 0);
 			motg->chg_state = USB_CHG_STATE_UNDEFINED;
 			motg->chg_type = USB_INVALID_CHARGER;
 			otg->state = OTG_STATE_B_IDLE;
-			msm_otg_reset(otg->phy);
+			msm_otg_reset(otg->usb_phy);
 			schedule_work(w);
 		}
 		break;
 	case OTG_STATE_A_HOST:
-		dev_dbg(otg->phy->dev, "OTG_STATE_A_HOST state\n");
+		dev_dbg(otg->usb_phy->dev, "OTG_STATE_A_HOST state\n");
 		if (test_bit(ID, &motg->inputs)) {
-			msm_otg_start_host(otg->phy, 0);
+			msm_otg_start_host(otg->usb_phy, 0);
 			otg->state = OTG_STATE_B_IDLE;
-			msm_otg_reset(otg->phy);
+			msm_otg_reset(otg->usb_phy);
 			schedule_work(w);
 		}
 		break;
@@ -1388,7 +1391,7 @@ static ssize_t msm_otg_mode_write(struct file *file, const char __user *ubuf,
 		goto out;
 	}
 
-	pm_runtime_get_sync(otg->phy->dev);
+	pm_runtime_get_sync(otg->usb_phy->dev);
 	schedule_work(&motg->sm_work);
 out:
 	return status;
@@ -1668,7 +1671,7 @@ static int msm_otg_probe(struct platform_device *pdev)
 
 	phy->io_ops = &msm_otg_io_ops;
 
-	phy->otg->phy = &motg->phy;
+	phy->otg->usb_phy = &motg->phy;
 	phy->otg->set_host = msm_otg_set_host;
 	phy->otg->set_peripheral = msm_otg_set_peripheral;
 
diff --git a/drivers/usb/phy/phy-mv-usb.c b/drivers/usb/phy/phy-mv-usb.c
index ee87aa7144db..81d934fdd0c3 100644
--- a/drivers/usb/phy/phy-mv-usb.c
+++ b/drivers/usb/phy/phy-mv-usb.c
@@ -56,7 +56,7 @@ static char *state_string[] = {
 
 static int mv_otg_set_vbus(struct usb_otg *otg, bool on)
 {
-	struct mv_otg *mvotg = container_of(otg->phy, struct mv_otg, phy);
+	struct mv_otg *mvotg = container_of(otg->usb_phy, struct mv_otg, phy);
 	if (mvotg->pdata->set_vbus == NULL)
 		return -ENODEV;
 
@@ -716,8 +716,8 @@ static int mv_otg_probe(struct platform_device *pdev)
 	mvotg->phy.otg = otg;
 	mvotg->phy.label = driver_name;
 
-	otg->phy = &mvotg->phy;
 	otg->state = OTG_STATE_UNDEFINED;
+	otg->usb_phy = &mvotg->phy;
 	otg->set_host = mv_otg_set_host;
 	otg->set_peripheral = mv_otg_set_peripheral;
 	otg->set_vbus = mv_otg_set_vbus;
diff --git a/drivers/usb/phy/phy-tahvo.c b/drivers/usb/phy/phy-tahvo.c
index 04ece535c2f8..9cabc1a20d1c 100644
--- a/drivers/usb/phy/phy-tahvo.c
+++ b/drivers/usb/phy/phy-tahvo.c
@@ -196,7 +196,8 @@ static int tahvo_usb_set_suspend(struct usb_phy *dev, int suspend)
 
 static int tahvo_usb_set_host(struct usb_otg *otg, struct usb_bus *host)
 {
-	struct tahvo_usb *tu = container_of(otg->phy, struct tahvo_usb, phy);
+	struct tahvo_usb *tu = container_of(otg->usb_phy, struct tahvo_usb,
+					    phy);
 
 	dev_dbg(&tu->pt_dev->dev, "%s %p\n", __func__, host);
 
@@ -225,7 +226,8 @@ static int tahvo_usb_set_host(struct usb_otg *otg, struct usb_bus *host)
 static int tahvo_usb_set_peripheral(struct usb_otg *otg,
 				    struct usb_gadget *gadget)
 {
-	struct tahvo_usb *tu = container_of(otg->phy, struct tahvo_usb, phy);
+	struct tahvo_usb *tu = container_of(otg->usb_phy, struct tahvo_usb,
+					    phy);
 
 	dev_dbg(&tu->pt_dev->dev, "%s %p\n", __func__, gadget);
 
@@ -383,7 +385,7 @@ static int tahvo_usb_probe(struct platform_device *pdev)
 	tu->phy.label = DRIVER_NAME;
 	tu->phy.set_suspend = tahvo_usb_set_suspend;
 
-	tu->phy.otg->phy = &tu->phy;
+	tu->phy.otg->usb_phy = &tu->phy;
 	tu->phy.otg->set_host = tahvo_usb_set_host;
 	tu->phy.otg->set_peripheral = tahvo_usb_set_peripheral;
 
diff --git a/drivers/usb/phy/phy-ulpi.c b/drivers/usb/phy/phy-ulpi.c
index 4e3877c329f2..f48a7a21e3c2 100644
--- a/drivers/usb/phy/phy-ulpi.c
+++ b/drivers/usb/phy/phy-ulpi.c
@@ -211,7 +211,7 @@ static int ulpi_init(struct usb_phy *phy)
 
 static int ulpi_set_host(struct usb_otg *otg, struct usb_bus *host)
 {
-	struct usb_phy *phy = otg->phy;
+	struct usb_phy *phy = otg->usb_phy;
 	unsigned int flags = usb_phy_io_read(phy, ULPI_IFC_CTRL);
 
 	if (!host) {
@@ -237,7 +237,7 @@ static int ulpi_set_host(struct usb_otg *otg, struct usb_bus *host)
 
 static int ulpi_set_vbus(struct usb_otg *otg, bool on)
 {
-	struct usb_phy *phy = otg->phy;
+	struct usb_phy *phy = otg->usb_phy;
 	unsigned int flags = usb_phy_io_read(phy, ULPI_OTG_CTRL);
 
 	flags &= ~(ULPI_OTG_CTRL_DRVVBUS | ULPI_OTG_CTRL_DRVVBUS_EXT);
@@ -276,7 +276,7 @@ otg_ulpi_create(struct usb_phy_io_ops *ops,
 	phy->otg	= otg;
 	phy->init	= ulpi_init;
 
-	otg->phy	= phy;
+	otg->usb_phy	= phy;
 	otg->set_host	= ulpi_set_host;
 	otg->set_vbus	= ulpi_set_vbus;
 
diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 33d3480c9cda..978fbbb0e266 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -14,7 +14,7 @@
 struct usb_otg {
 	u8			default_a;
 
-	struct usb_phy		*phy;
+	struct usb_phy		*usb_phy;
 	struct usb_bus		*host;
 	struct usb_gadget	*gadget;
 
-- 
cgit v1.2.3


From 48bcc18076df4e07ef86226ac6ce795f64c84f7f Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Thu, 30 Oct 2014 18:41:15 +0100
Subject: usb: add support to the generic PHY framework in OTG

This patch adds support of the PHY framework in OTG and keeps the USB
PHY compatibility. Here the only modification is to add PHY member in
the OTG structure, along with the USB PHY one.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/usb/otg.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h
index 978fbbb0e266..52661c5da690 100644
--- a/include/linux/usb/otg.h
+++ b/include/linux/usb/otg.h
@@ -9,11 +9,14 @@
 #ifndef __LINUX_USB_OTG_H
 #define __LINUX_USB_OTG_H
 
+#include <linux/phy/phy.h>
 #include <linux/usb/phy.h>
 
 struct usb_otg {
 	u8			default_a;
 
+	struct phy		*phy;
+	/* old usb_phy interface */
 	struct usb_phy		*usb_phy;
 	struct usb_bus		*host;
 	struct usb_gadget	*gadget;
-- 
cgit v1.2.3


From ef44cb4226d132146e44f8ea562a16b27ff61126 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Thu, 30 Oct 2014 18:41:16 +0100
Subject: usb: allow to supply the PHY in the drivers when using HCD

This patch modify the generic code handling PHYs to allow them to be
supplied from the drivers. This adds checks to ensure no PHY was already
there when looking for one in the generic code. This also makes sure we
do not modify its state in the generic HCD functions, it was provided by
the driver.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/chipidea/ci.h          |  5 +++--
 drivers/usb/chipidea/ci_hdrc_imx.c |  2 +-
 drivers/usb/chipidea/ci_hdrc_msm.c |  8 ++++----
 drivers/usb/chipidea/core.c        | 20 ++++++++++----------
 drivers/usb/chipidea/debug.c       |  2 +-
 drivers/usb/chipidea/host.c        |  7 ++++---
 drivers/usb/chipidea/otg_fsm.c     | 16 +++-------------
 drivers/usb/chipidea/udc.c         |  4 ++--
 drivers/usb/core/hcd.c             |  7 ++++---
 include/linux/usb/chipidea.h       |  2 +-
 10 files changed, 33 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h
index ea40626e0246..9015139a8027 100644
--- a/drivers/usb/chipidea/ci.h
+++ b/drivers/usb/chipidea/ci.h
@@ -161,7 +161,7 @@ struct hw_bank {
  * @test_mode: the selected test mode
  * @platdata: platform specific information supplied by parent device
  * @vbus_active: is VBUS active
- * @transceiver: pointer to USB PHY, if any
+ * @usb_phy: pointer to USB PHY, if any
  * @hcd: pointer to usb_hcd for ehci host driver
  * @debugfs: root dentry for this controller in debugfs
  * @id_event: indicates there is an id event, and handled at ci_otg_work
@@ -177,6 +177,7 @@ struct ci_hdrc {
 	struct ci_role_driver		*roles[CI_ROLE_END];
 	enum ci_role			role;
 	bool				is_otg;
+	struct usb_otg			otg;
 	struct otg_fsm			fsm;
 	struct ci_otg_fsm_timer_list	*fsm_timer;
 	struct work_struct		work;
@@ -201,7 +202,7 @@ struct ci_hdrc {
 
 	struct ci_hdrc_platform_data	*platdata;
 	int				vbus_active;
-	struct usb_phy			*transceiver;
+	struct usb_phy			*usb_phy;
 	struct usb_hcd			*hcd;
 	struct dentry			*debugfs;
 	bool				id_event;
diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index a7ab0f15926e..6f8b1b1045b5 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -147,7 +147,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
 		goto err_clk;
 	}
 
-	pdata.phy = data->phy;
+	pdata.usb_phy = data->phy;
 
 	if (imx_platform_flag->flags & CI_HDRC_IMX_IMX28_WRITE_FIX)
 		pdata.flags |= CI_HDRC_IMX28_WRITE_FIX;
diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c
index 4935ac38fd00..3edf969ed797 100644
--- a/drivers/usb/chipidea/ci_hdrc_msm.c
+++ b/drivers/usb/chipidea/ci_hdrc_msm.c
@@ -26,15 +26,15 @@ static void ci_hdrc_msm_notify_event(struct ci_hdrc *ci, unsigned event)
 		dev_dbg(dev, "CI_HDRC_CONTROLLER_RESET_EVENT received\n");
 		writel(0, USB_AHBBURST);
 		writel(0, USB_AHBMODE);
-		usb_phy_init(ci->transceiver);
+		usb_phy_init(ci->usb_phy);
 		break;
 	case CI_HDRC_CONTROLLER_STOPPED_EVENT:
 		dev_dbg(dev, "CI_HDRC_CONTROLLER_STOPPED_EVENT received\n");
 		/*
-		 * Put the transceiver in non-driving mode. Otherwise host
+		 * Put the phy in non-driving mode. Otherwise host
 		 * may not detect soft-disconnection.
 		 */
-		usb_phy_notify_disconnect(ci->transceiver, USB_SPEED_UNKNOWN);
+		usb_phy_notify_disconnect(ci->usb_phy, USB_SPEED_UNKNOWN);
 		break;
 	default:
 		dev_dbg(dev, "unknown ci_hdrc event\n");
@@ -68,7 +68,7 @@ static int ci_hdrc_msm_probe(struct platform_device *pdev)
 	if (IS_ERR(phy))
 		return PTR_ERR(phy);
 
-	ci_hdrc_msm_platdata.phy = phy;
+	ci_hdrc_msm_platdata.usb_phy = phy;
 
 	plat_ci = ci_hdrc_add_device(&pdev->dev,
 				pdev->resource, pdev->num_resources,
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 9bdc6bd73432..30f89426bf05 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -312,7 +312,7 @@ static int ci_usb_phy_init(struct ci_hdrc *ci)
 	case USBPHY_INTERFACE_MODE_UTMI:
 	case USBPHY_INTERFACE_MODE_UTMIW:
 	case USBPHY_INTERFACE_MODE_HSIC:
-		ret = usb_phy_init(ci->transceiver);
+		ret = usb_phy_init(ci->usb_phy);
 		if (ret)
 			return ret;
 		hw_phymode_configure(ci);
@@ -320,12 +320,12 @@ static int ci_usb_phy_init(struct ci_hdrc *ci)
 	case USBPHY_INTERFACE_MODE_ULPI:
 	case USBPHY_INTERFACE_MODE_SERIAL:
 		hw_phymode_configure(ci);
-		ret = usb_phy_init(ci->transceiver);
+		ret = usb_phy_init(ci->usb_phy);
 		if (ret)
 			return ret;
 		break;
 	default:
-		ret = usb_phy_init(ci->transceiver);
+		ret = usb_phy_init(ci->usb_phy);
 	}
 
 	return ret;
@@ -605,13 +605,13 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	if (ci->platdata->phy)
-		ci->transceiver = ci->platdata->phy;
+	if (ci->platdata->usb_phy)
+		ci->usb_phy = ci->platdata->usb_phy;
 	else
-		ci->transceiver = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
+		ci->usb_phy = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
 
-	if (IS_ERR(ci->transceiver)) {
-		ret = PTR_ERR(ci->transceiver);
+	if (IS_ERR(ci->usb_phy)) {
+		ret = PTR_ERR(ci->usb_phy);
 		/*
 		 * if -ENXIO is returned, it means PHY layer wasn't
 		 * enabled, so it makes no sense to return -EPROBE_DEFER
@@ -728,7 +728,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 stop:
 	ci_role_destroy(ci);
 deinit_phy:
-	usb_phy_shutdown(ci->transceiver);
+	usb_phy_shutdown(ci->usb_phy);
 
 	return ret;
 }
@@ -741,7 +741,7 @@ static int ci_hdrc_remove(struct platform_device *pdev)
 	free_irq(ci->irq, ci);
 	ci_role_destroy(ci);
 	ci_hdrc_enter_lpm(ci, true);
-	usb_phy_shutdown(ci->transceiver);
+	usb_phy_shutdown(ci->usb_phy);
 
 	return 0;
 }
diff --git a/drivers/usb/chipidea/debug.c b/drivers/usb/chipidea/debug.c
index f038804d13dd..268e4236e84c 100644
--- a/drivers/usb/chipidea/debug.c
+++ b/drivers/usb/chipidea/debug.c
@@ -220,7 +220,7 @@ static int ci_otg_show(struct seq_file *s, void *unused)
 
 	/* ------ State ----- */
 	seq_printf(s, "OTG state: %s\n\n",
-		usb_otg_state_string(ci->transceiver->otg->state));
+			usb_otg_state_string(ci->otg.state));
 
 	/* ------ State Machine Variables ----- */
 	seq_printf(s, "a_bus_drop: %d\n", fsm->a_bus_drop);
diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index ebde7b6ce687..789809f680aa 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -59,7 +59,7 @@ static int host_start(struct ci_hdrc *ci)
 	hcd->has_tt = 1;
 
 	hcd->power_budget = ci->platdata->power_budget;
-	hcd->usb_phy = ci->transceiver;
+	hcd->usb_phy = ci->usb_phy;
 	hcd->tpl_support = ci->platdata->tpl_support;
 
 	ehci = hcd_to_ehci(hcd);
@@ -86,10 +86,11 @@ static int host_start(struct ci_hdrc *ci)
 	if (ret) {
 		goto disable_reg;
 	} else {
-		struct usb_otg *otg = ci->transceiver->otg;
+		struct usb_otg *otg = &ci->otg;
 
 		ci->hcd = hcd;
-		if (otg) {
+
+		if (ci_otg_is_fsm_mode(ci)) {
 			otg->host = &hcd->self;
 			hcd->self.otg_port = 1;
 		}
diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c
index d8490e758a74..862d7cb01b92 100644
--- a/drivers/usb/chipidea/otg_fsm.c
+++ b/drivers/usb/chipidea/otg_fsm.c
@@ -778,20 +778,10 @@ void ci_hdrc_otg_fsm_start(struct ci_hdrc *ci)
 int ci_hdrc_otg_fsm_init(struct ci_hdrc *ci)
 {
 	int retval = 0;
-	struct usb_otg *otg;
 
-	otg = devm_kzalloc(ci->dev,
-			sizeof(struct usb_otg), GFP_KERNEL);
-	if (!otg) {
-		dev_err(ci->dev,
-		"Failed to allocate usb_otg structure for ci hdrc otg!\n");
-		return -ENOMEM;
-	}
-
-	otg->usb_phy = ci->transceiver;
-	otg->gadget = &ci->gadget;
-	ci->fsm.otg = otg;
-	ci->transceiver->otg = ci->fsm.otg;
+	ci->otg.usb_phy = ci->usb_phy;
+	ci->otg.gadget = &ci->gadget;
+	ci->fsm.otg = &ci->otg;
 	ci->fsm.power_up = 1;
 	ci->fsm.id = hw_read_otgsc(ci, OTGSC_ID) ? 1 : 0;
 	ci->fsm.otg->state = OTG_STATE_UNDEFINED;
diff --git a/drivers/usb/chipidea/udc.c b/drivers/usb/chipidea/udc.c
index f4397b29891f..a2d80ab3d9c4 100644
--- a/drivers/usb/chipidea/udc.c
+++ b/drivers/usb/chipidea/udc.c
@@ -1519,8 +1519,8 @@ static int ci_udc_vbus_draw(struct usb_gadget *_gadget, unsigned ma)
 {
 	struct ci_hdrc *ci = container_of(_gadget, struct ci_hdrc, gadget);
 
-	if (ci->transceiver)
-		return usb_phy_set_power(ci->transceiver, ma);
+	if (ci->usb_phy)
+		return usb_phy_set_power(ci->usb_phy, ma);
 	return -ENOTSUPP;
 }
 
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index b84fb141e122..6a2a2fd990ab 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2648,7 +2648,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 		}
 	}
 
-	if (IS_ENABLED(CONFIG_GENERIC_PHY)) {
+	if (IS_ENABLED(CONFIG_GENERIC_PHY) && !hcd->phy) {
 		struct phy *phy = phy_get(hcd->self.controller, "usb");
 
 		if (IS_ERR(phy)) {
@@ -2668,6 +2668,7 @@ int usb_add_hcd(struct usb_hcd *hcd,
 				goto err_phy;
 			}
 			hcd->phy = phy;
+			hcd->remove_phy = 1;
 		}
 	}
 
@@ -2814,7 +2815,7 @@ err_allocate_root_hub:
 err_register_bus:
 	hcd_buffer_destroy(hcd);
 err_create_buf:
-	if (IS_ENABLED(CONFIG_GENERIC_PHY) && hcd->phy) {
+	if (IS_ENABLED(CONFIG_GENERIC_PHY) && hcd->remove_phy && hcd->phy) {
 		phy_power_off(hcd->phy);
 		phy_exit(hcd->phy);
 		phy_put(hcd->phy);
@@ -2898,7 +2899,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 	usb_deregister_bus(&hcd->self);
 	hcd_buffer_destroy(hcd);
 
-	if (IS_ENABLED(CONFIG_GENERIC_PHY) && hcd->phy) {
+	if (IS_ENABLED(CONFIG_GENERIC_PHY) && hcd->remove_phy && hcd->phy) {
 		phy_power_off(hcd->phy);
 		phy_exit(hcd->phy);
 		phy_put(hcd->phy);
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index e14c09a45c5a..4fe161a84c7d 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -13,7 +13,7 @@ struct ci_hdrc_platform_data {
 	/* offset of the capability registers */
 	uintptr_t	 capoffset;
 	unsigned	 power_budget;
-	struct usb_phy	*phy;
+	struct usb_phy	*usb_phy;
 	enum usb_phy_interface phy_mode;
 	unsigned long	 flags;
 #define CI_HDRC_REGS_SHARED		BIT(0)
-- 
cgit v1.2.3


From 1e5e2d3d055436c114e2f16145b83339aed024ff Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@free-electrons.com>
Date: Thu, 30 Oct 2014 18:41:19 +0100
Subject: usb: chipidea: add support to the generic PHY framework

This patch adds support of the PHY framework for ChipIdea drivers.
Changes are done in both the ChipIdea common code and in the drivers
accessing the PHY. This is done by adding a new PHY member in
ChipIdea's structures and by taking care of it in the code.

Signed-off-by: Antoine Tenart <antoine.tenart@free-electrons.com>
Acked-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/chipidea/ci.h      |  5 ++-
 drivers/usb/chipidea/core.c    | 83 +++++++++++++++++++++++++++++++++---------
 drivers/usb/chipidea/host.c    |  5 ++-
 drivers/usb/chipidea/otg_fsm.c |  6 ++-
 include/linux/usb/chipidea.h   |  2 +
 5 files changed, 80 insertions(+), 21 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/ci.h b/drivers/usb/chipidea/ci.h
index 9015139a8027..5bbfcc73cf70 100644
--- a/drivers/usb/chipidea/ci.h
+++ b/drivers/usb/chipidea/ci.h
@@ -161,7 +161,8 @@ struct hw_bank {
  * @test_mode: the selected test mode
  * @platdata: platform specific information supplied by parent device
  * @vbus_active: is VBUS active
- * @usb_phy: pointer to USB PHY, if any
+ * @phy: pointer to PHY, if any
+ * @usb_phy: pointer to USB PHY, if any and if using the USB PHY framework
  * @hcd: pointer to usb_hcd for ehci host driver
  * @debugfs: root dentry for this controller in debugfs
  * @id_event: indicates there is an id event, and handled at ci_otg_work
@@ -202,6 +203,8 @@ struct ci_hdrc {
 
 	struct ci_hdrc_platform_data	*platdata;
 	int				vbus_active;
+	struct phy			*phy;
+	/* old usb_phy interface */
 	struct usb_phy			*usb_phy;
 	struct usb_hcd			*hcd;
 	struct dentry			*debugfs;
diff --git a/drivers/usb/chipidea/core.c b/drivers/usb/chipidea/core.c
index 30f89426bf05..60578d9c896d 100644
--- a/drivers/usb/chipidea/core.c
+++ b/drivers/usb/chipidea/core.c
@@ -47,6 +47,7 @@
 #include <linux/delay.h>
 #include <linux/device.h>
 #include <linux/dma-mapping.h>
+#include <linux/phy/phy.h>
 #include <linux/platform_device.h>
 #include <linux/module.h>
 #include <linux/idr.h>
@@ -298,6 +299,49 @@ static void hw_phymode_configure(struct ci_hdrc *ci)
 	}
 }
 
+/**
+ * _ci_usb_phy_init: initialize phy taking in account both phy and usb_phy
+ * interfaces
+ * @ci: the controller
+ *
+ * This function returns an error code if the phy failed to init
+ */
+static int _ci_usb_phy_init(struct ci_hdrc *ci)
+{
+	int ret;
+
+	if (ci->phy) {
+		ret = phy_init(ci->phy);
+		if (ret)
+			return ret;
+
+		ret = phy_power_on(ci->phy);
+		if (ret) {
+			phy_exit(ci->phy);
+			return ret;
+		}
+	} else {
+		ret = usb_phy_init(ci->usb_phy);
+	}
+
+	return ret;
+}
+
+/**
+ * _ci_usb_phy_exit: deinitialize phy taking in account both phy and usb_phy
+ * interfaces
+ * @ci: the controller
+ */
+static void ci_usb_phy_exit(struct ci_hdrc *ci)
+{
+	if (ci->phy) {
+		phy_power_off(ci->phy);
+		phy_exit(ci->phy);
+	} else {
+		usb_phy_shutdown(ci->usb_phy);
+	}
+}
+
 /**
  * ci_usb_phy_init: initialize phy according to different phy type
  * @ci: the controller
@@ -312,7 +356,7 @@ static int ci_usb_phy_init(struct ci_hdrc *ci)
 	case USBPHY_INTERFACE_MODE_UTMI:
 	case USBPHY_INTERFACE_MODE_UTMIW:
 	case USBPHY_INTERFACE_MODE_HSIC:
-		ret = usb_phy_init(ci->usb_phy);
+		ret = _ci_usb_phy_init(ci);
 		if (ret)
 			return ret;
 		hw_phymode_configure(ci);
@@ -320,12 +364,12 @@ static int ci_usb_phy_init(struct ci_hdrc *ci)
 	case USBPHY_INTERFACE_MODE_ULPI:
 	case USBPHY_INTERFACE_MODE_SERIAL:
 		hw_phymode_configure(ci);
-		ret = usb_phy_init(ci->usb_phy);
+		ret = _ci_usb_phy_init(ci);
 		if (ret)
 			return ret;
 		break;
 	default:
-		ret = usb_phy_init(ci->usb_phy);
+		ret = _ci_usb_phy_init(ci);
 	}
 
 	return ret;
@@ -605,23 +649,26 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
-	if (ci->platdata->usb_phy)
+	if (ci->platdata->phy) {
+		ci->phy = ci->platdata->phy;
+	} else if (ci->platdata->usb_phy) {
 		ci->usb_phy = ci->platdata->usb_phy;
-	else
+	} else {
+		ci->phy = devm_phy_get(dev, "usb-phy");
 		ci->usb_phy = devm_usb_get_phy(dev, USB_PHY_TYPE_USB2);
 
-	if (IS_ERR(ci->usb_phy)) {
-		ret = PTR_ERR(ci->usb_phy);
-		/*
-		 * if -ENXIO is returned, it means PHY layer wasn't
-		 * enabled, so it makes no sense to return -EPROBE_DEFER
-		 * in that case, since no PHY driver will ever probe.
-		 */
-		if (ret == -ENXIO)
-			return ret;
+		/* if both generic PHY and USB PHY layers aren't enabled */
+		if (PTR_ERR(ci->phy) == -ENOSYS &&
+				PTR_ERR(ci->usb_phy) == -ENXIO)
+			return -ENXIO;
+
+		if (IS_ERR(ci->phy) && IS_ERR(ci->usb_phy))
+			return -EPROBE_DEFER;
 
-		dev_err(dev, "no usb2 phy configured\n");
-		return -EPROBE_DEFER;
+		if (IS_ERR(ci->phy))
+			ci->phy = NULL;
+		else if (IS_ERR(ci->usb_phy))
+			ci->usb_phy = NULL;
 	}
 
 	ret = ci_usb_phy_init(ci);
@@ -728,7 +775,7 @@ static int ci_hdrc_probe(struct platform_device *pdev)
 stop:
 	ci_role_destroy(ci);
 deinit_phy:
-	usb_phy_shutdown(ci->usb_phy);
+	ci_usb_phy_exit(ci);
 
 	return ret;
 }
@@ -741,7 +788,7 @@ static int ci_hdrc_remove(struct platform_device *pdev)
 	free_irq(ci->irq, ci);
 	ci_role_destroy(ci);
 	ci_hdrc_enter_lpm(ci, true);
-	usb_phy_shutdown(ci->usb_phy);
+	ci_usb_phy_exit(ci);
 
 	return 0;
 }
diff --git a/drivers/usb/chipidea/host.c b/drivers/usb/chipidea/host.c
index 789809f680aa..4f8eb40ad93a 100644
--- a/drivers/usb/chipidea/host.c
+++ b/drivers/usb/chipidea/host.c
@@ -59,8 +59,11 @@ static int host_start(struct ci_hdrc *ci)
 	hcd->has_tt = 1;
 
 	hcd->power_budget = ci->platdata->power_budget;
-	hcd->usb_phy = ci->usb_phy;
 	hcd->tpl_support = ci->platdata->tpl_support;
+	if (ci->phy)
+		hcd->phy = ci->phy;
+	else
+		hcd->usb_phy = ci->usb_phy;
 
 	ehci = hcd_to_ehci(hcd);
 	ehci->caps = ci->hw_bank.cap;
diff --git a/drivers/usb/chipidea/otg_fsm.c b/drivers/usb/chipidea/otg_fsm.c
index 862d7cb01b92..3c2ab1ae00fc 100644
--- a/drivers/usb/chipidea/otg_fsm.c
+++ b/drivers/usb/chipidea/otg_fsm.c
@@ -779,7 +779,11 @@ int ci_hdrc_otg_fsm_init(struct ci_hdrc *ci)
 {
 	int retval = 0;
 
-	ci->otg.usb_phy = ci->usb_phy;
+	if (ci->phy)
+		ci->otg.phy = ci->phy;
+	else
+		ci->otg.usb_phy = ci->usb_phy;
+
 	ci->otg.gadget = &ci->gadget;
 	ci->fsm.otg = &ci->otg;
 	ci->fsm.power_up = 1;
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index 4fe161a84c7d..c01bf4ea27b9 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -13,6 +13,8 @@ struct ci_hdrc_platform_data {
 	/* offset of the capability registers */
 	uintptr_t	 capoffset;
 	unsigned	 power_budget;
+	struct phy	*phy;
+	/* old usb_phy interface */
 	struct usb_phy	*usb_phy;
 	enum usb_phy_interface phy_mode;
 	unsigned long	 flags;
-- 
cgit v1.2.3


From c53a2b512b6f2b9b1b6353c1587b8b069997852f Mon Sep 17 00:00:00 2001
From: Huang Rui <ray.huang@amd.com>
Date: Fri, 31 Oct 2014 11:11:15 +0800
Subject: PCI: Add support for AMD Nolan USB3 DRD

This patch adds PCI id for USB3 Dual-Role Device of AMD Nolan (NL) SoC.
It will be used for PCI quirks and DWC3 device driver.

Signed-off-by: Jason Chang <jason.chang@amd.com>
Signed-off-by: Huang Rui <ray.huang@amd.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 include/linux/pci_ids.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h
index 1fa99a301817..5decad77d8ed 100644
--- a/include/linux/pci_ids.h
+++ b/include/linux/pci_ids.h
@@ -562,6 +562,7 @@
 #define PCI_DEVICE_ID_AMD_8131_BRIDGE	0x7450
 #define PCI_DEVICE_ID_AMD_8131_APIC	0x7451
 #define PCI_DEVICE_ID_AMD_8132_BRIDGE	0x7458
+#define PCI_DEVICE_ID_AMD_NL_USB	0x7912
 #define PCI_DEVICE_ID_AMD_CS5535_IDE    0x208F
 #define PCI_DEVICE_ID_AMD_CS5536_ISA    0x2090
 #define PCI_DEVICE_ID_AMD_CS5536_FLASH  0x2091
-- 
cgit v1.2.3


From f353d71f75394e518b4d939aea2030d8e239dd41 Mon Sep 17 00:00:00 2001
From: Masanari Iida <standby24x7@gmail.com>
Date: Wed, 22 Oct 2014 00:00:14 +0900
Subject: treewide: Fix typo in Documentation/DocBook/device-drivers

This patch fix speeling typo found in html files within
Documentation/DocBook/device-drivers.
It is because html files are generated from comments in source,
so I have to fix comments in the source.

Signed-off-by: Masanari Iida <standby24x7@gmail.com>
Acked-by: Randy Dunlap <rdunlap@infradead.org>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Jonathan Corbet <corbet@lwn.net>
---
 drivers/dma-buf/fence.c | 2 +-
 include/linux/fence.h   | 4 ++--
 include/linux/i2c.h     | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/dma-buf/fence.c b/drivers/dma-buf/fence.c
index 7bb9d65d9a2c..e5541117b3e9 100644
--- a/drivers/dma-buf/fence.c
+++ b/drivers/dma-buf/fence.c
@@ -283,7 +283,7 @@ EXPORT_SYMBOL(fence_add_callback);
  * @cb:		[in]	the callback to remove
  *
  * Remove a previously queued callback from the fence. This function returns
- * true if the callback is succesfully removed, or false if the fence has
+ * true if the callback is successfully removed, or false if the fence has
  * already been signaled.
  *
  * *WARNING*:
diff --git a/include/linux/fence.h b/include/linux/fence.h
index d174585b874b..39efee130d2b 100644
--- a/include/linux/fence.h
+++ b/include/linux/fence.h
@@ -128,8 +128,8 @@ struct fence_cb {
  * from irq context, so normal spinlocks can be used.
  *
  * A return value of false indicates the fence already passed,
- * or some failure occured that made it impossible to enable
- * signaling. True indicates succesful enabling.
+ * or some failure occurred that made it impossible to enable
+ * signaling. True indicates successful enabling.
  *
  * fence->status may be set in enable_signaling, but only when false is
  * returned.
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b556e0ab946f..70ee0d3a2be3 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -359,7 +359,7 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
  * to name two of the most common.
  *
  * The return codes from the @master_xfer field should indicate the type of
- * error code that occured during the transfer, as documented in the kernel
+ * error code that occurred during the transfer, as documented in the kernel
  * Documentation file Documentation/i2c/fault-codes.
  */
 struct i2c_algorithm {
-- 
cgit v1.2.3


From 4cdb1e2e3d3495423db558d3bb7ed11d66aabce7 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 2 Nov 2014 06:00:12 -0800
Subject: net: shrink struct softnet_data

flow_limit in struct softnet_data is only read from local cpu
and can be moved to fill a hole, reducing softnet_data size by
64 bytes on x86_64

While we are at it, move output_queue, output_queue_tailp and
completion_queue, so that rx / tx paths touch a single cache line.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c85e06512246..5ed05bd764dc 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2329,10 +2329,7 @@ extern int netdev_flow_limit_table_len;
  * Incoming packets are placed on per-cpu queues
  */
 struct softnet_data {
-	struct Qdisc		*output_queue;
-	struct Qdisc		**output_queue_tailp;
 	struct list_head	poll_list;
-	struct sk_buff		*completion_queue;
 	struct sk_buff_head	process_queue;
 
 	/* stats */
@@ -2340,10 +2337,17 @@ struct softnet_data {
 	unsigned int		time_squeeze;
 	unsigned int		cpu_collision;
 	unsigned int		received_rps;
-
 #ifdef CONFIG_RPS
 	struct softnet_data	*rps_ipi_list;
+#endif
+#ifdef CONFIG_NET_FLOW_LIMIT
+	struct sd_flow_limit __rcu *flow_limit;
+#endif
+	struct Qdisc		*output_queue;
+	struct Qdisc		**output_queue_tailp;
+	struct sk_buff		*completion_queue;
 
+#ifdef CONFIG_RPS
 	/* Elements below can be accessed between CPUs for RPS */
 	struct call_single_data	csd ____cacheline_aligned_in_smp;
 	struct softnet_data	*rps_ipi_next;
@@ -2355,9 +2359,6 @@ struct softnet_data {
 	struct sk_buff_head	input_pkt_queue;
 	struct napi_struct	backlog;
 
-#ifdef CONFIG_NET_FLOW_LIMIT
-	struct sd_flow_limit __rcu *flow_limit;
-#endif
 };
 
 static inline void input_queue_head_incr(struct softnet_data *sd)
-- 
cgit v1.2.3


From d475c95b4bcff983ac76e8522bfd2d29bcc567d0 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Sun, 2 Nov 2014 16:26:17 +0200
Subject: net/mlx4_core: Add retrieval of CONFIG_DEV parameters

Add code to issue CONFIG_DEV "get" firmware command.

This command is used in order to obtain certain parameters used for
supporting various RX checksumming options and vxlan UDP port.

The GET operation is allowed for VFs too.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/cmd.c           |  4 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c            | 88 +++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  5 ++
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  | 17 +++++
 include/linux/mlx4/cmd.h                           | 29 +++++++
 include/linux/mlx4/device.h                        |  3 +-
 6 files changed, 139 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c
index 1312ccf8b83a..3c05e5878b49 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c
@@ -990,11 +990,11 @@ static struct mlx4_cmd_info cmd_info[] = {
 	{
 		.opcode = MLX4_CMD_CONFIG_DEV,
 		.has_inbox = false,
-		.has_outbox = false,
+		.has_outbox = true,
 		.out_is_imm = false,
 		.encode_slave_id = false,
 		.verify = NULL,
-		.wrapper = mlx4_CMD_EPERM_wrapper
+		.wrapper = mlx4_CONFIG_DEV_wrapper
 	},
 	{
 		.opcode = MLX4_CMD_ALLOC_RES,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index e7639e31fc8a..d6dba77ae4ba 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -141,7 +141,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[12] = "Large cache line (>64B) CQE stride support",
 		[13] = "Large cache line (>64B) EQE stride support",
 		[14] = "Ethernet protocol control support",
-		[15] = "Ethernet Backplane autoneg support"
+		[15] = "Ethernet Backplane autoneg support",
+		[16] = "CONFIG DEV support"
 	};
 	int i;
 
@@ -574,6 +575,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MTT_ENTRY_SZ_OFFSET	0x90
 #define QUERY_DEV_CAP_D_MPT_ENTRY_SZ_OFFSET	0x92
 #define QUERY_DEV_CAP_BMME_FLAGS_OFFSET		0x94
+#define QUERY_DEV_CAP_CONFIG_DEV_OFFSET		0x94
 #define QUERY_DEV_CAP_RSVD_LKEY_OFFSET		0x98
 #define QUERY_DEV_CAP_MAX_ICM_SZ_OFFSET		0xa0
 #define QUERY_DEV_CAP_ETH_BACKPL_OFFSET		0x9c
@@ -749,6 +751,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_EQE_STRIDE;
 	MLX4_GET(dev_cap->bmme_flags, outbox,
 		 QUERY_DEV_CAP_BMME_FLAGS_OFFSET);
+	MLX4_GET(field, outbox, QUERY_DEV_CAP_CONFIG_DEV_OFFSET);
+	if (field & 0x20)
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_CONFIG_DEV;
 	MLX4_GET(dev_cap->reserved_lkey, outbox,
 		 QUERY_DEV_CAP_RSVD_LKEY_OFFSET);
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_ETH_BACKPL_OFFSET);
@@ -1849,14 +1854,18 @@ int mlx4_CLOSE_HCA(struct mlx4_dev *dev, int panic)
 
 struct mlx4_config_dev {
 	__be32	update_flags;
-	__be32	rsdv1[3];
+	__be32	rsvd1[3];
 	__be16	vxlan_udp_dport;
 	__be16	rsvd2;
+	__be32	rsvd3[27];
+	__be16	rsvd4;
+	u8	rsvd5;
+	u8	rx_checksum_val;
 };
 
 #define MLX4_VXLAN_UDP_DPORT (1 << 0)
 
-static int mlx4_CONFIG_DEV(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
+static int mlx4_CONFIG_DEV_set(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
 {
 	int err;
 	struct mlx4_cmd_mailbox *mailbox;
@@ -1874,6 +1883,77 @@ static int mlx4_CONFIG_DEV(struct mlx4_dev *dev, struct mlx4_config_dev *config_
 	return err;
 }
 
+static int mlx4_CONFIG_DEV_get(struct mlx4_dev *dev, struct mlx4_config_dev *config_dev)
+{
+	int err;
+	struct mlx4_cmd_mailbox *mailbox;
+
+	mailbox = mlx4_alloc_cmd_mailbox(dev);
+	if (IS_ERR(mailbox))
+		return PTR_ERR(mailbox);
+
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, 0, 1, MLX4_CMD_CONFIG_DEV,
+			   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_NATIVE);
+
+	if (!err)
+		memcpy(config_dev, mailbox->buf, sizeof(*config_dev));
+
+	mlx4_free_cmd_mailbox(dev, mailbox);
+	return err;
+}
+
+/* Conversion between the HW values and the actual functionality.
+ * The value represented by the array index,
+ * and the functionality determined by the flags.
+ */
+static const u8 config_dev_csum_flags[] = {
+	[0] =	0,
+	[1] =	MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP,
+	[2] =	MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP	|
+		MLX4_RX_CSUM_MODE_L4,
+	[3] =	MLX4_RX_CSUM_MODE_L4			|
+		MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP	|
+		MLX4_RX_CSUM_MODE_MULTI_VLAN
+};
+
+int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
+			      struct mlx4_config_dev_params *params)
+{
+	struct mlx4_config_dev config_dev;
+	int err;
+	u8 csum_mask;
+
+#define CONFIG_DEV_RX_CSUM_MODE_MASK			0x7
+#define CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET	0
+#define CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET	4
+
+	if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_CONFIG_DEV))
+		return -ENOTSUPP;
+
+	err = mlx4_CONFIG_DEV_get(dev, &config_dev);
+	if (err)
+		return err;
+
+	csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT1_BIT_OFFSET) &
+			CONFIG_DEV_RX_CSUM_MODE_MASK;
+
+	if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0]))
+		return -EINVAL;
+	params->rx_csum_flags_port_1 = config_dev_csum_flags[csum_mask];
+
+	csum_mask = (config_dev.rx_checksum_val >> CONFIG_DEV_RX_CSUM_MODE_PORT2_BIT_OFFSET) &
+			CONFIG_DEV_RX_CSUM_MODE_MASK;
+
+	if (csum_mask >= sizeof(config_dev_csum_flags)/sizeof(config_dev_csum_flags[0]))
+		return -EINVAL;
+	params->rx_csum_flags_port_2 = config_dev_csum_flags[csum_mask];
+
+	params->vxlan_udp_dport = be16_to_cpu(config_dev.vxlan_udp_dport);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mlx4_config_dev_retrieval);
+
 int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port)
 {
 	struct mlx4_config_dev config_dev;
@@ -1882,7 +1962,7 @@ int mlx4_config_vxlan_port(struct mlx4_dev *dev, __be16 udp_port)
 	config_dev.update_flags    = cpu_to_be32(MLX4_VXLAN_UDP_DPORT);
 	config_dev.vxlan_udp_dport = udp_port;
 
-	return mlx4_CONFIG_DEV(dev, &config_dev);
+	return mlx4_CONFIG_DEV_set(dev, &config_dev);
 }
 EXPORT_SYMBOL_GPL(mlx4_config_vxlan_port);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index 254ec7b1ca2f..f8fc7bd6f48b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -947,6 +947,11 @@ int mlx4_SW2HW_EQ_wrapper(struct mlx4_dev *dev, int slave,
 			  struct mlx4_cmd_mailbox *inbox,
 			  struct mlx4_cmd_mailbox *outbox,
 			  struct mlx4_cmd_info *cmd);
+int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			    struct mlx4_cmd_info *cmd);
 int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
 		     struct mlx4_vhcr *vhcr,
 		     struct mlx4_cmd_mailbox *inbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 5d2498dcf536..d718ca0f88da 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -2872,6 +2872,23 @@ out_add:
 	return err;
 }
 
+int mlx4_CONFIG_DEV_wrapper(struct mlx4_dev *dev, int slave,
+			    struct mlx4_vhcr *vhcr,
+			    struct mlx4_cmd_mailbox *inbox,
+			    struct mlx4_cmd_mailbox *outbox,
+			    struct mlx4_cmd_info *cmd)
+{
+	int err;
+	u8 get = vhcr->op_modifier;
+
+	if (get != 1)
+		return -EPERM;
+
+	err = mlx4_DMA_wrapper(dev, slave, vhcr, inbox, outbox, cmd);
+
+	return err;
+}
+
 static int get_containing_mtt(struct mlx4_dev *dev, int slave, int start,
 			      int len, struct res_mtt **res)
 {
diff --git a/include/linux/mlx4/cmd.h b/include/linux/mlx4/cmd.h
index ff5f5deb3dcf..64d25941b329 100644
--- a/include/linux/mlx4/cmd.h
+++ b/include/linux/mlx4/cmd.h
@@ -199,6 +199,33 @@ enum {
 	MLX4_CMD_NATIVE
 };
 
+/*
+ * MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP -
+ * Receive checksum value is reported in CQE also for non TCP/UDP packets.
+ *
+ * MLX4_RX_CSUM_MODE_L4 -
+ * L4_CSUM bit in CQE, which indicates whether or not L4 checksum
+ * was validated correctly, is supported.
+ *
+ * MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP -
+ * IP_OK CQE's field is supported also for non TCP/UDP IP packets.
+ *
+ * MLX4_RX_CSUM_MODE_MULTI_VLAN -
+ * Receive Checksum offload is supported for packets with more than 2 vlan headers.
+ */
+enum mlx4_rx_csum_mode {
+	MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP		= 1UL << 0,
+	MLX4_RX_CSUM_MODE_L4				= 1UL << 1,
+	MLX4_RX_CSUM_MODE_IP_OK_IP_NON_TCP_UDP		= 1UL << 2,
+	MLX4_RX_CSUM_MODE_MULTI_VLAN			= 1UL << 3
+};
+
+struct mlx4_config_dev_params {
+	u16	vxlan_udp_dport;
+	u8	rx_csum_flags_port_1;
+	u8	rx_csum_flags_port_2;
+};
+
 struct mlx4_dev;
 
 struct mlx4_cmd_mailbox {
@@ -250,6 +277,8 @@ int mlx4_set_vf_vlan(struct mlx4_dev *dev, int port, int vf, u16 vlan, u8 qos);
 int mlx4_set_vf_spoofchk(struct mlx4_dev *dev, int port, int vf, bool setting);
 int mlx4_get_vf_config(struct mlx4_dev *dev, int port, int vf, struct ifla_vf_info *ivf);
 int mlx4_set_vf_link_state(struct mlx4_dev *dev, int port, int vf, int link_state);
+int mlx4_config_dev_retrieval(struct mlx4_dev *dev,
+			      struct mlx4_config_dev_params *params);
 /*
  * mlx4_get_slave_default_vlan -
  * return true if VST ( default vlan)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index e4c136ebe79b..5cc5eac47d1b 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -188,7 +188,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_CQE_STRIDE		= 1LL <<  12,
 	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13,
 	MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL        = 1LL <<  14,
-	MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP	= 1LL <<  15
+	MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP	= 1LL <<  15,
+	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16
 };
 
 enum {
-- 
cgit v1.2.3


From 946e51f2bf37f1656916eb75bd0742ba33983c28 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Oct 2014 19:19:16 -0400
Subject: move d_rcu from overlapping d_child to overlapping d_alias

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/powerpc/platforms/cell/spufs/inode.c       |  2 +-
 drivers/staging/lustre/lustre/llite/dcache.c    |  2 +-
 drivers/staging/lustre/lustre/llite/llite_lib.c |  2 +-
 drivers/staging/lustre/lustre/llite/namei.c     |  8 ++--
 fs/affs/amigaffs.c                              |  2 +-
 fs/autofs4/expire.c                             | 12 +++---
 fs/autofs4/root.c                               |  2 +-
 fs/ceph/dir.c                                   |  8 ++--
 fs/ceph/inode.c                                 |  2 +-
 fs/cifs/inode.c                                 |  2 +-
 fs/coda/cache.c                                 |  2 +-
 fs/dcache.c                                     | 53 ++++++++++++-------------
 fs/debugfs/inode.c                              |  2 +-
 fs/exportfs/expfs.c                             |  2 +-
 fs/libfs.c                                      | 12 +++---
 fs/ncpfs/dir.c                                  |  2 +-
 fs/ncpfs/ncplib_kernel.h                        |  4 +-
 fs/nfs/getroot.c                                |  2 +-
 fs/notify/fsnotify.c                            |  4 +-
 fs/ocfs2/dcache.c                               |  2 +-
 include/linux/dcache.h                          |  8 ++--
 kernel/trace/trace.c                            |  4 +-
 kernel/trace/trace_events.c                     |  2 +-
 security/selinux/selinuxfs.c                    |  6 +--
 24 files changed, 73 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
index 87ba7cf99cd7..65d633f20d37 100644
--- a/arch/powerpc/platforms/cell/spufs/inode.c
+++ b/arch/powerpc/platforms/cell/spufs/inode.c
@@ -164,7 +164,7 @@ static void spufs_prune_dir(struct dentry *dir)
 	struct dentry *dentry, *tmp;
 
 	mutex_lock(&dir->d_inode->i_mutex);
-	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_u.d_child) {
+	list_for_each_entry_safe(dentry, tmp, &dir->d_subdirs, d_child) {
 		spin_lock(&dentry->d_lock);
 		if (!(d_unhashed(dentry)) && dentry->d_inode) {
 			dget_dlock(dentry);
diff --git a/drivers/staging/lustre/lustre/llite/dcache.c b/drivers/staging/lustre/lustre/llite/dcache.c
index 439e4875b05c..311907b762bd 100644
--- a/drivers/staging/lustre/lustre/llite/dcache.c
+++ b/drivers/staging/lustre/lustre/llite/dcache.c
@@ -258,7 +258,7 @@ void ll_invalidate_aliases(struct inode *inode)
 	       inode->i_ino, inode->i_generation, inode);
 
 	ll_lock_dcache(inode);
-	ll_d_hlist_for_each_entry(dentry, p, &inode->i_dentry, d_alias) {
+	ll_d_hlist_for_each_entry(dentry, p, &inode->i_dentry, d_u.d_alias) {
 		CDEBUG(D_DENTRY, "dentry in drop %.*s (%p) parent %p "
 		       "inode %p flags %d\n", dentry->d_name.len,
 		       dentry->d_name.name, dentry, dentry->d_parent,
diff --git a/drivers/staging/lustre/lustre/llite/llite_lib.c b/drivers/staging/lustre/lustre/llite/llite_lib.c
index a8bcc51057f1..f4ca7b753021 100644
--- a/drivers/staging/lustre/lustre/llite/llite_lib.c
+++ b/drivers/staging/lustre/lustre/llite/llite_lib.c
@@ -711,7 +711,7 @@ void lustre_dump_dentry(struct dentry *dentry, int recur)
 		return;
 
 	list_for_each(tmp, &dentry->d_subdirs) {
-		struct dentry *d = list_entry(tmp, struct dentry, d_u.d_child);
+		struct dentry *d = list_entry(tmp, struct dentry, d_child);
 		lustre_dump_dentry(d, recur - 1);
 	}
 }
diff --git a/drivers/staging/lustre/lustre/llite/namei.c b/drivers/staging/lustre/lustre/llite/namei.c
index 7a68c1e027e0..6dfd98509268 100644
--- a/drivers/staging/lustre/lustre/llite/namei.c
+++ b/drivers/staging/lustre/lustre/llite/namei.c
@@ -167,14 +167,14 @@ static void ll_invalidate_negative_children(struct inode *dir)
 	struct ll_d_hlist_node *p;
 
 	ll_lock_dcache(dir);
-	ll_d_hlist_for_each_entry(dentry, p, &dir->i_dentry, d_alias) {
+	ll_d_hlist_for_each_entry(dentry, p, &dir->i_dentry, d_u.d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!list_empty(&dentry->d_subdirs)) {
 			struct dentry *child;
 
 			list_for_each_entry_safe(child, tmp_subdir,
 						 &dentry->d_subdirs,
-						 d_u.d_child) {
+						 d_child) {
 				if (child->d_inode == NULL)
 					d_lustre_invalidate(child, 1);
 			}
@@ -362,7 +362,7 @@ static struct dentry *ll_find_alias(struct inode *inode, struct dentry *dentry)
 	discon_alias = invalid_alias = NULL;
 
 	ll_lock_dcache(inode);
-	ll_d_hlist_for_each_entry(alias, p, &inode->i_dentry, d_alias) {
+	ll_d_hlist_for_each_entry(alias, p, &inode->i_dentry, d_u.d_alias) {
 		LASSERT(alias != dentry);
 
 		spin_lock(&alias->d_lock);
@@ -953,7 +953,7 @@ static void ll_get_child_fid(struct inode * dir, struct qstr *name,
 {
 	struct dentry *parent, *child;
 
-	parent = ll_d_hlist_entry(dir->i_dentry, struct dentry, d_alias);
+	parent = ll_d_hlist_entry(dir->i_dentry, struct dentry, d_u.d_alias);
 	child = d_lookup(parent, name);
 	if (child) {
 		if (child->d_inode)
diff --git a/fs/affs/amigaffs.c b/fs/affs/amigaffs.c
index abc853968fed..937ce8754b24 100644
--- a/fs/affs/amigaffs.c
+++ b/fs/affs/amigaffs.c
@@ -125,7 +125,7 @@ affs_fix_dcache(struct inode *inode, u32 entry_ino)
 {
 	struct dentry *dentry;
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		if (entry_ino == (u32)(long)dentry->d_fsdata) {
 			dentry->d_fsdata = (void *)inode->i_ino;
 			break;
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 683a5b9ce22a..dcdec6fd33c6 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -85,7 +85,7 @@ static struct dentry *get_next_positive_subdir(struct dentry *prev,
 	spin_lock(&root->d_lock);
 
 	if (prev)
-		next = prev->d_u.d_child.next;
+		next = prev->d_child.next;
 	else {
 		prev = dget_dlock(root);
 		next = prev->d_subdirs.next;
@@ -99,13 +99,13 @@ cont:
 		return NULL;
 	}
 
-	q = list_entry(next, struct dentry, d_u.d_child);
+	q = list_entry(next, struct dentry, d_child);
 
 	spin_lock_nested(&q->d_lock, DENTRY_D_LOCK_NESTED);
 	/* Already gone or negative dentry (under construction) - try next */
 	if (!d_count(q) || !simple_positive(q)) {
 		spin_unlock(&q->d_lock);
-		next = q->d_u.d_child.next;
+		next = q->d_child.next;
 		goto cont;
 	}
 	dget_dlock(q);
@@ -155,13 +155,13 @@ again:
 				goto relock;
 			}
 			spin_unlock(&p->d_lock);
-			next = p->d_u.d_child.next;
+			next = p->d_child.next;
 			p = parent;
 			if (next != &parent->d_subdirs)
 				break;
 		}
 	}
-	ret = list_entry(next, struct dentry, d_u.d_child);
+	ret = list_entry(next, struct dentry, d_child);
 
 	spin_lock_nested(&ret->d_lock, DENTRY_D_LOCK_NESTED);
 	/* Negative dentry - try next */
@@ -489,7 +489,7 @@ found:
 	spin_lock(&sbi->lookup_lock);
 	spin_lock(&expired->d_parent->d_lock);
 	spin_lock_nested(&expired->d_lock, DENTRY_D_LOCK_NESTED);
-	list_move(&expired->d_parent->d_subdirs, &expired->d_u.d_child);
+	list_move(&expired->d_parent->d_subdirs, &expired->d_child);
 	spin_unlock(&expired->d_lock);
 	spin_unlock(&expired->d_parent->d_lock);
 	spin_unlock(&sbi->lookup_lock);
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index d76d083f2f06..0822c9eacc56 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -687,7 +687,7 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
 	/* only consider parents below dentrys in the root */
 	if (IS_ROOT(parent->d_parent))
 		return;
-	d_child = &dentry->d_u.d_child;
+	d_child = &dentry->d_child;
 	/* Set parent managed if it's becoming empty */
 	if (d_child->next == &parent->d_subdirs &&
 	    d_child->prev == &parent->d_subdirs)
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index e6d63f8f98c0..695e7888fef8 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -111,7 +111,7 @@ static int fpos_cmp(loff_t l, loff_t r)
 /*
  * When possible, we try to satisfy a readdir by peeking at the
  * dcache.  We make this work by carefully ordering dentries on
- * d_u.d_child when we initially get results back from the MDS, and
+ * d_child when we initially get results back from the MDS, and
  * falling back to a "normal" sync readdir if any dentries in the dir
  * are dropped.
  *
@@ -147,11 +147,11 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 		p = parent->d_subdirs.prev;
 		dout(" initial p %p/%p\n", p->prev, p->next);
 	} else {
-		p = last->d_u.d_child.prev;
+		p = last->d_child.prev;
 	}
 
 more:
-	dentry = list_entry(p, struct dentry, d_u.d_child);
+	dentry = list_entry(p, struct dentry, d_child);
 	di = ceph_dentry(dentry);
 	while (1) {
 		dout(" p %p/%p %s d_subdirs %p/%p\n", p->prev, p->next,
@@ -174,7 +174,7 @@ more:
 		     !dentry->d_inode ? " null" : "");
 		spin_unlock(&dentry->d_lock);
 		p = p->prev;
-		dentry = list_entry(p, struct dentry, d_u.d_child);
+		dentry = list_entry(p, struct dentry, d_child);
 		di = ceph_dentry(dentry);
 	}
 
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7b6139004401..7a1df90c7771 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -1399,7 +1399,7 @@ retry_lookup:
 			/* reorder parent's d_subdirs */
 			spin_lock(&parent->d_lock);
 			spin_lock_nested(&dn->d_lock, DENTRY_D_LOCK_NESTED);
-			list_move(&dn->d_u.d_child, &parent->d_subdirs);
+			list_move(&dn->d_child, &parent->d_subdirs);
 			spin_unlock(&dn->d_lock);
 			spin_unlock(&parent->d_lock);
 		}
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index 197cb503d528..0c3ce464cae4 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -895,7 +895,7 @@ inode_has_hashed_dentries(struct inode *inode)
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		if (!d_unhashed(dentry) || IS_ROOT(dentry)) {
 			spin_unlock(&inode->i_lock);
 			return true;
diff --git a/fs/coda/cache.c b/fs/coda/cache.c
index 278f8fdeb9ef..46ee6f238985 100644
--- a/fs/coda/cache.c
+++ b/fs/coda/cache.c
@@ -92,7 +92,7 @@ static void coda_flag_children(struct dentry *parent, int flag)
 	struct dentry *de;
 
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(de, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(de, &parent->d_subdirs, d_child) {
 		/* don't know what to do with negative dentries */
 		if (de->d_inode ) 
 			coda_flag_inode(de->d_inode, flag);
diff --git a/fs/dcache.c b/fs/dcache.c
index 3ffef7f4e5cd..8b4c45e40834 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -44,7 +44,7 @@
 /*
  * Usage:
  * dcache->d_inode->i_lock protects:
- *   - i_dentry, d_alias, d_inode of aliases
+ *   - i_dentry, d_u.d_alias, d_inode of aliases
  * dcache_hash_bucket lock protects:
  *   - the dcache hash table
  * s_anon bl list spinlock protects:
@@ -59,7 +59,7 @@
  *   - d_unhashed()
  *   - d_parent and d_subdirs
  *   - childrens' d_child and d_parent
- *   - d_alias, d_inode
+ *   - d_u.d_alias, d_inode
  *
  * Ordering:
  * dentry->d_inode->i_lock
@@ -252,14 +252,12 @@ static void __d_free(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
 
-	WARN_ON(!hlist_unhashed(&dentry->d_alias));
 	kmem_cache_free(dentry_cache, dentry); 
 }
 
 static void __d_free_external(struct rcu_head *head)
 {
 	struct dentry *dentry = container_of(head, struct dentry, d_u.d_rcu);
-	WARN_ON(!hlist_unhashed(&dentry->d_alias));
 	kfree(external_name(dentry));
 	kmem_cache_free(dentry_cache, dentry); 
 }
@@ -271,6 +269,7 @@ static inline int dname_external(const struct dentry *dentry)
 
 static void dentry_free(struct dentry *dentry)
 {
+	WARN_ON(!hlist_unhashed(&dentry->d_u.d_alias));
 	if (unlikely(dname_external(dentry))) {
 		struct external_name *p = external_name(dentry);
 		if (likely(atomic_dec_and_test(&p->u.count))) {
@@ -311,7 +310,7 @@ static void dentry_iput(struct dentry * dentry)
 	struct inode *inode = dentry->d_inode;
 	if (inode) {
 		dentry->d_inode = NULL;
-		hlist_del_init(&dentry->d_alias);
+		hlist_del_init(&dentry->d_u.d_alias);
 		spin_unlock(&dentry->d_lock);
 		spin_unlock(&inode->i_lock);
 		if (!inode->i_nlink)
@@ -336,7 +335,7 @@ static void dentry_unlink_inode(struct dentry * dentry)
 	struct inode *inode = dentry->d_inode;
 	__d_clear_type(dentry);
 	dentry->d_inode = NULL;
-	hlist_del_init(&dentry->d_alias);
+	hlist_del_init(&dentry->d_u.d_alias);
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
 	spin_unlock(&inode->i_lock);
@@ -496,7 +495,7 @@ static void __dentry_kill(struct dentry *dentry)
 	}
 	/* if it was on the hash then remove it */
 	__d_drop(dentry);
-	list_del(&dentry->d_u.d_child);
+	list_del(&dentry->d_child);
 	/*
 	 * Inform d_walk() that we are no longer attached to the
 	 * dentry tree
@@ -722,7 +721,7 @@ static struct dentry *__d_find_alias(struct inode *inode)
 
 again:
 	discon_alias = NULL;
-	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		spin_lock(&alias->d_lock);
  		if (S_ISDIR(inode->i_mode) || !d_unhashed(alias)) {
 			if (IS_ROOT(alias) &&
@@ -772,7 +771,7 @@ void d_prune_aliases(struct inode *inode)
 	struct dentry *dentry;
 restart:
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (!dentry->d_lockref.count) {
 			struct dentry *parent = lock_parent(dentry);
@@ -1050,7 +1049,7 @@ repeat:
 resume:
 	while (next != &this_parent->d_subdirs) {
 		struct list_head *tmp = next;
-		struct dentry *dentry = list_entry(tmp, struct dentry, d_u.d_child);
+		struct dentry *dentry = list_entry(tmp, struct dentry, d_child);
 		next = tmp->next;
 
 		spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
@@ -1102,7 +1101,7 @@ resume:
 			goto rename_retry;
 		}
 		rcu_read_unlock();
-		next = child->d_u.d_child.next;
+		next = child->d_child.next;
 		goto resume;
 	}
 	if (need_seqretry(&rename_lock, seq)) {
@@ -1454,8 +1453,8 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
 	INIT_HLIST_BL_NODE(&dentry->d_hash);
 	INIT_LIST_HEAD(&dentry->d_lru);
 	INIT_LIST_HEAD(&dentry->d_subdirs);
-	INIT_HLIST_NODE(&dentry->d_alias);
-	INIT_LIST_HEAD(&dentry->d_u.d_child);
+	INIT_HLIST_NODE(&dentry->d_u.d_alias);
+	INIT_LIST_HEAD(&dentry->d_child);
 	d_set_d_op(dentry, dentry->d_sb->s_d_op);
 
 	this_cpu_inc(nr_dentry);
@@ -1485,7 +1484,7 @@ struct dentry *d_alloc(struct dentry * parent, const struct qstr *name)
 	 */
 	__dget_dlock(parent);
 	dentry->d_parent = parent;
-	list_add(&dentry->d_u.d_child, &parent->d_subdirs);
+	list_add(&dentry->d_child, &parent->d_subdirs);
 	spin_unlock(&parent->d_lock);
 
 	return dentry;
@@ -1578,7 +1577,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
 	spin_lock(&dentry->d_lock);
 	__d_set_type(dentry, add_flags);
 	if (inode)
-		hlist_add_head(&dentry->d_alias, &inode->i_dentry);
+		hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
 	dentry->d_inode = inode;
 	dentry_rcuwalk_barrier(dentry);
 	spin_unlock(&dentry->d_lock);
@@ -1602,7 +1601,7 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
  
 void d_instantiate(struct dentry *entry, struct inode * inode)
 {
-	BUG_ON(!hlist_unhashed(&entry->d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
 	if (inode)
 		spin_lock(&inode->i_lock);
 	__d_instantiate(entry, inode);
@@ -1641,7 +1640,7 @@ static struct dentry *__d_instantiate_unique(struct dentry *entry,
 		return NULL;
 	}
 
-	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		/*
 		 * Don't need alias->d_lock here, because aliases with
 		 * d_parent == entry->d_parent are not subject to name or
@@ -1667,7 +1666,7 @@ struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
 {
 	struct dentry *result;
 
-	BUG_ON(!hlist_unhashed(&entry->d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
 
 	if (inode)
 		spin_lock(&inode->i_lock);
@@ -1698,7 +1697,7 @@ EXPORT_SYMBOL(d_instantiate_unique);
  */
 int d_instantiate_no_diralias(struct dentry *entry, struct inode *inode)
 {
-	BUG_ON(!hlist_unhashed(&entry->d_alias));
+	BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
 
 	spin_lock(&inode->i_lock);
 	if (S_ISDIR(inode->i_mode) && !hlist_empty(&inode->i_dentry)) {
@@ -1737,7 +1736,7 @@ static struct dentry * __d_find_any_alias(struct inode *inode)
 
 	if (hlist_empty(&inode->i_dentry))
 		return NULL;
-	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+	alias = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
 	__dget(alias);
 	return alias;
 }
@@ -1799,7 +1798,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
 	spin_lock(&tmp->d_lock);
 	tmp->d_inode = inode;
 	tmp->d_flags |= add_flags;
-	hlist_add_head(&tmp->d_alias, &inode->i_dentry);
+	hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
 	hlist_bl_lock(&tmp->d_sb->s_anon);
 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
 	hlist_bl_unlock(&tmp->d_sb->s_anon);
@@ -2234,7 +2233,7 @@ int d_validate(struct dentry *dentry, struct dentry *dparent)
 	struct dentry *child;
 
 	spin_lock(&dparent->d_lock);
-	list_for_each_entry(child, &dparent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(child, &dparent->d_subdirs, d_child) {
 		if (dentry == child) {
 			spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
 			__dget_dlock(dentry);
@@ -2525,13 +2524,13 @@ static void __d_move(struct dentry *dentry, struct dentry *target,
 		/* splicing a tree */
 		dentry->d_parent = target->d_parent;
 		target->d_parent = target;
-		list_del_init(&target->d_u.d_child);
-		list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+		list_del_init(&target->d_child);
+		list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
 	} else {
 		/* swapping two dentries */
 		swap(dentry->d_parent, target->d_parent);
-		list_move(&target->d_u.d_child, &target->d_parent->d_subdirs);
-		list_move(&dentry->d_u.d_child, &dentry->d_parent->d_subdirs);
+		list_move(&target->d_child, &target->d_parent->d_subdirs);
+		list_move(&dentry->d_child, &dentry->d_parent->d_subdirs);
 		if (exchange)
 			fsnotify_d_move(target);
 		fsnotify_d_move(dentry);
@@ -3320,7 +3319,7 @@ void d_tmpfile(struct dentry *dentry, struct inode *inode)
 {
 	inode_dec_link_count(inode);
 	BUG_ON(dentry->d_name.name != dentry->d_iname ||
-		!hlist_unhashed(&dentry->d_alias) ||
+		!hlist_unhashed(&dentry->d_u.d_alias) ||
 		!d_unlinked(dentry));
 	spin_lock(&dentry->d_parent->d_lock);
 	spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED);
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 1e3b99d3db0d..05f2960ed7c3 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -553,7 +553,7 @@ void debugfs_remove_recursive(struct dentry *dentry)
 	 * use the d_u.d_child as the rcu head and corrupt this list.
 	 */
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(child, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(child, &parent->d_subdirs, d_child) {
 		if (!debugfs_positive(child))
 			continue;
 
diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c
index a2b350ddd402..fdfd206c737a 100644
--- a/fs/exportfs/expfs.c
+++ b/fs/exportfs/expfs.c
@@ -50,7 +50,7 @@ find_acceptable_alias(struct dentry *result,
 
 	inode = result->d_inode;
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		dget(dentry);
 		spin_unlock(&inode->i_lock);
 		if (toput)
diff --git a/fs/libfs.c b/fs/libfs.c
index 171d2846f2a3..005843ce5dbd 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -114,18 +114,18 @@ loff_t dcache_dir_lseek(struct file *file, loff_t offset, int whence)
 
 			spin_lock(&dentry->d_lock);
 			/* d_lock not required for cursor */
-			list_del(&cursor->d_u.d_child);
+			list_del(&cursor->d_child);
 			p = dentry->d_subdirs.next;
 			while (n && p != &dentry->d_subdirs) {
 				struct dentry *next;
-				next = list_entry(p, struct dentry, d_u.d_child);
+				next = list_entry(p, struct dentry, d_child);
 				spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 				if (simple_positive(next))
 					n--;
 				spin_unlock(&next->d_lock);
 				p = p->next;
 			}
-			list_add_tail(&cursor->d_u.d_child, p);
+			list_add_tail(&cursor->d_child, p);
 			spin_unlock(&dentry->d_lock);
 		}
 	}
@@ -150,7 +150,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 {
 	struct dentry *dentry = file->f_path.dentry;
 	struct dentry *cursor = file->private_data;
-	struct list_head *p, *q = &cursor->d_u.d_child;
+	struct list_head *p, *q = &cursor->d_child;
 
 	if (!dir_emit_dots(file, ctx))
 		return 0;
@@ -159,7 +159,7 @@ int dcache_readdir(struct file *file, struct dir_context *ctx)
 		list_move(q, &dentry->d_subdirs);
 
 	for (p = q->next; p != &dentry->d_subdirs; p = p->next) {
-		struct dentry *next = list_entry(p, struct dentry, d_u.d_child);
+		struct dentry *next = list_entry(p, struct dentry, d_child);
 		spin_lock_nested(&next->d_lock, DENTRY_D_LOCK_NESTED);
 		if (!simple_positive(next)) {
 			spin_unlock(&next->d_lock);
@@ -287,7 +287,7 @@ int simple_empty(struct dentry *dentry)
 	int ret = 0;
 
 	spin_lock(&dentry->d_lock);
-	list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) {
+	list_for_each_entry(child, &dentry->d_subdirs, d_child) {
 		spin_lock_nested(&child->d_lock, DENTRY_D_LOCK_NESTED);
 		if (simple_positive(child)) {
 			spin_unlock(&child->d_lock);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index 461f6be5df20..865d578704c8 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -403,7 +403,7 @@ ncp_dget_fpos(struct dentry *dentry, struct dentry *parent, unsigned long fpos)
 
 	/* If a pointer is invalid, we search the dentry. */
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(dent, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(dent, &parent->d_subdirs, d_child) {
 		if ((unsigned long)dent->d_fsdata == fpos) {
 			if (dent->d_inode)
 				dget(dent);
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 52cb19d66ecb..b785f74bfe3c 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -191,7 +191,7 @@ ncp_renew_dentries(struct dentry *parent)
 	struct dentry *dentry;
 
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(dentry, &parent->d_subdirs, d_child) {
 		if (dentry->d_fsdata == NULL)
 			ncp_age_dentry(server, dentry);
 		else
@@ -207,7 +207,7 @@ ncp_invalidate_dircache_entries(struct dentry *parent)
 	struct dentry *dentry;
 
 	spin_lock(&parent->d_lock);
-	list_for_each_entry(dentry, &parent->d_subdirs, d_u.d_child) {
+	list_for_each_entry(dentry, &parent->d_subdirs, d_child) {
 		dentry->d_fsdata = NULL;
 		ncp_age_dentry(server, dentry);
 	}
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index 880618a8b048..ebc6a0add5ae 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -58,7 +58,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 		 */
 		spin_lock(&sb->s_root->d_inode->i_lock);
 		spin_lock(&sb->s_root->d_lock);
-		hlist_del_init(&sb->s_root->d_alias);
+		hlist_del_init(&sb->s_root->d_u.d_alias);
 		spin_unlock(&sb->s_root->d_lock);
 		spin_unlock(&sb->s_root->d_inode->i_lock);
 	}
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 9d3e9c50066a..700129940c6e 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -63,14 +63,14 @@ void __fsnotify_update_child_dentry_flags(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	/* run all of the dentries associated with this inode.  Since this is a
 	 * directory, there damn well better only be one item on this list */
-	hlist_for_each_entry(alias, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
 		struct dentry *child;
 
 		/* run all of the children of the original inode and fix their
 		 * d_flags to indicate parental interest (their parent is the
 		 * original inode) */
 		spin_lock(&alias->d_lock);
-		list_for_each_entry(child, &alias->d_subdirs, d_u.d_child) {
+		list_for_each_entry(child, &alias->d_subdirs, d_child) {
 			if (!child->d_inode)
 				continue;
 
diff --git a/fs/ocfs2/dcache.c b/fs/ocfs2/dcache.c
index e2e05a106beb..92edcfc23c1c 100644
--- a/fs/ocfs2/dcache.c
+++ b/fs/ocfs2/dcache.c
@@ -172,7 +172,7 @@ struct dentry *ocfs2_find_local_alias(struct inode *inode,
 	struct dentry *dentry;
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry(dentry, &inode->i_dentry, d_alias) {
+	hlist_for_each_entry(dentry, &inode->i_dentry, d_u.d_alias) {
 		spin_lock(&dentry->d_lock);
 		if (ocfs2_match_dentry(dentry, parent_blkno, skip_unhashed)) {
 			trace_ocfs2_find_local_alias(dentry->d_name.len,
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index b2a2a08523bf..1c2f1b84468b 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -124,15 +124,15 @@ struct dentry {
 	void *d_fsdata;			/* fs-specific data */
 
 	struct list_head d_lru;		/* LRU list */
+	struct list_head d_child;	/* child of parent list */
+	struct list_head d_subdirs;	/* our children */
 	/*
-	 * d_child and d_rcu can share memory
+	 * d_alias and d_rcu can share memory
 	 */
 	union {
-		struct list_head d_child;	/* child of parent list */
+		struct hlist_node d_alias;	/* inode alias list */
 	 	struct rcu_head d_rcu;
 	} d_u;
-	struct list_head d_subdirs;	/* our children */
-	struct hlist_node d_alias;	/* inode alias list */
 };
 
 /*
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a528392b1f4..459a7b1251e5 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6420,7 +6420,7 @@ static int instance_mkdir (struct inode *inode, struct dentry *dentry, umode_t m
 	int ret;
 
 	/* Paranoid: Make sure the parent is the "instances" directory */
-	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
 	if (WARN_ON_ONCE(parent != trace_instance_dir))
 		return -ENOENT;
 
@@ -6447,7 +6447,7 @@ static int instance_rmdir(struct inode *inode, struct dentry *dentry)
 	int ret;
 
 	/* Paranoid: Make sure the parent is the "instances" directory */
-	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_alias);
+	parent = hlist_entry(inode->i_dentry.first, struct dentry, d_u.d_alias);
 	if (WARN_ON_ONCE(parent != trace_instance_dir))
 		return -ENOENT;
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 0cc51edde3a8..1b0df1e504f0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -461,7 +461,7 @@ static void remove_event_file_dir(struct ftrace_event_file *file)
 
 	if (dir) {
 		spin_lock(&dir->d_lock);	/* probably unneeded */
-		list_for_each_entry(child, &dir->d_subdirs, d_u.d_child) {
+		list_for_each_entry(child, &dir->d_subdirs, d_child) {
 			if (child->d_inode)	/* probably unneeded */
 				child->d_inode->i_private = NULL;
 		}
diff --git a/security/selinux/selinuxfs.c b/security/selinux/selinuxfs.c
index c71737f6d1cc..33db1ad4fd10 100644
--- a/security/selinux/selinuxfs.c
+++ b/security/selinux/selinuxfs.c
@@ -1200,7 +1200,7 @@ static void sel_remove_entries(struct dentry *de)
 	spin_lock(&de->d_lock);
 	node = de->d_subdirs.next;
 	while (node != &de->d_subdirs) {
-		struct dentry *d = list_entry(node, struct dentry, d_u.d_child);
+		struct dentry *d = list_entry(node, struct dentry, d_child);
 
 		spin_lock_nested(&d->d_lock, DENTRY_D_LOCK_NESTED);
 		list_del_init(node);
@@ -1674,12 +1674,12 @@ static void sel_remove_classes(void)
 
 	list_for_each(class_node, &class_dir->d_subdirs) {
 		struct dentry *class_subdir = list_entry(class_node,
-					struct dentry, d_u.d_child);
+					struct dentry, d_child);
 		struct list_head *class_subdir_node;
 
 		list_for_each(class_subdir_node, &class_subdir->d_subdirs) {
 			struct dentry *d = list_entry(class_subdir_node,
-						struct dentry, d_u.d_child);
+						struct dentry, d_child);
 
 			if (d->d_inode)
 				if (d->d_inode->i_mode & S_IFDIR)
-- 
cgit v1.2.3


From 56b174256b6936ec4c1ed8f3407109ac6929d3ca Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 3 Nov 2014 08:19:53 -0800
Subject: net: add rbnode to struct sk_buff

Yaogong replaces TCP out of order receive queue by an RB tree.

As netem already does a private skb->{next/prev/tstamp} union
with a 'struct rb_node', lets do this in a cleaner way.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Yaogong Wang <wygivan@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 20 +++++++++++++-------
 net/sched/sch_netem.c  | 27 +++++++--------------------
 2 files changed, 20 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 6c8b6f604e76..5ad9675b6fe1 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -20,6 +20,7 @@
 #include <linux/time.h>
 #include <linux/bug.h>
 #include <linux/cache.h>
+#include <linux/rbtree.h>
 
 #include <linux/atomic.h>
 #include <asm/types.h>
@@ -440,6 +441,7 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  *	@next: Next buffer in list
  *	@prev: Previous buffer in list
  *	@tstamp: Time we arrived/left
+ *	@rbnode: RB tree node, alternative to next/prev for netem/tcp
  *	@sk: Socket we are owned by
  *	@dev: Device we arrived on/are leaving by
  *	@cb: Control buffer. Free for use by every layer. Put private vars here
@@ -504,15 +506,19 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1,
  */
 
 struct sk_buff {
-	/* These two members must be first. */
-	struct sk_buff		*next;
-	struct sk_buff		*prev;
-
 	union {
-		ktime_t		tstamp;
-		struct skb_mstamp skb_mstamp;
+		struct {
+			/* These two members must be first. */
+			struct sk_buff		*next;
+			struct sk_buff		*prev;
+
+			union {
+				ktime_t		tstamp;
+				struct skb_mstamp skb_mstamp;
+			};
+		};
+		struct rb_node	rbnode; /* used in netem & tcp stack */
 	};
-
 	struct sock		*sk;
 	struct net_device	*dev;
 
diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c
index b34331967e02..179f1c8c0d8b 100644
--- a/net/sched/sch_netem.c
+++ b/net/sched/sch_netem.c
@@ -139,33 +139,20 @@ struct netem_sched_data {
 
 /* Time stamp put into socket buffer control block
  * Only valid when skbs are in our internal t(ime)fifo queue.
+ *
+ * As skb->rbnode uses same storage than skb->next, skb->prev and skb->tstamp,
+ * and skb->next & skb->prev are scratch space for a qdisc,
+ * we save skb->tstamp value in skb->cb[] before destroying it.
  */
 struct netem_skb_cb {
 	psched_time_t	time_to_send;
 	ktime_t		tstamp_save;
 };
 
-/* Because space in skb->cb[] is tight, netem overloads skb->next/prev/tstamp
- * to hold a rb_node structure.
- *
- * If struct sk_buff layout is changed, the following checks will complain.
- */
-static struct rb_node *netem_rb_node(struct sk_buff *skb)
-{
-	BUILD_BUG_ON(offsetof(struct sk_buff, next) != 0);
-	BUILD_BUG_ON(offsetof(struct sk_buff, prev) !=
-		     offsetof(struct sk_buff, next) + sizeof(skb->next));
-	BUILD_BUG_ON(offsetof(struct sk_buff, tstamp) !=
-		     offsetof(struct sk_buff, prev) + sizeof(skb->prev));
-	BUILD_BUG_ON(sizeof(struct rb_node) > sizeof(skb->next) +
-					      sizeof(skb->prev) +
-					      sizeof(skb->tstamp));
-	return (struct rb_node *)&skb->next;
-}
 
 static struct sk_buff *netem_rb_to_skb(struct rb_node *rb)
 {
-	return (struct sk_buff *)rb;
+	return container_of(rb, struct sk_buff, rbnode);
 }
 
 static inline struct netem_skb_cb *netem_skb_cb(struct sk_buff *skb)
@@ -403,8 +390,8 @@ static void tfifo_enqueue(struct sk_buff *nskb, struct Qdisc *sch)
 		else
 			p = &parent->rb_left;
 	}
-	rb_link_node(netem_rb_node(nskb), parent, p);
-	rb_insert_color(netem_rb_node(nskb), &q->t_root);
+	rb_link_node(&nskb->rbnode, parent, p);
+	rb_insert_color(&nskb->rbnode, &q->t_root);
 	sch->q.qlen++;
 }
 
-- 
cgit v1.2.3


From 98a18b6ffc79baa69f4a0d1bae58faf2a8aef4c8 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Sun, 2 Nov 2014 06:44:54 +0100
Subject: netdevice: add ieee802154_ptr to net_device

This patch adds an ieee802154_ptr to the net_device structure.
Furthermore the 802.15.4 subsystem will introduce a nl802154 framework
which is similar like the nl80211 framework and a wpan_dev structure.
The wpan_dev structure will hold additional net_device attributes like
address options which are 802.15.4 specific. In the upcoming nl802154
implementation we will introduce a NL802154_FLAG_NEED_WPAN_DEV like
NL80211_FLAG_NEED_WDEV. For this flag an ieee802154_ptr in net_device is
needed. Additional we can access the wpan_dev attributes in upper layers
like IEEE 802.15.4 6LoWPAN easily. Current solution is a complicated
callback interface and getting these values over subif data structure
in mac802154.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/netdevice.h | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 74fd5d37f15a..c9bcf33efb47 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -57,6 +57,8 @@ struct device;
 struct phy_device;
 /* 802.11 specific */
 struct wireless_dev;
+/* 802.15.4 specific */
+struct wpan_dev;
 
 void netdev_set_default_ethtool_ops(struct net_device *dev,
 				    const struct ethtool_ops *ops);
@@ -1572,6 +1574,7 @@ struct net_device {
 	struct inet6_dev __rcu	*ip6_ptr;
 	void			*ax25_ptr;
 	struct wireless_dev	*ieee80211_ptr;
+	struct wpan_dev		*ieee802154_ptr;
 
 /*
  * Cache lines mostly used on receive path (including eth_type_trans())
-- 
cgit v1.2.3


From 11a7e59405148c855e0a9d13588930ccec02c150 Mon Sep 17 00:00:00 2001
From: Michael Grzeschik <m.grzeschik@pengutronix.de>
Date: Mon, 13 Oct 2014 09:53:03 +0800
Subject: usb: ehci: add ehci_port_power interface

The current EHCI implementation is prepared to toggle the
PORT_POWER bit to enable or disable a USB-Port. In some
cases this port power can not be just toggled by the PORT_POWER
bit, and the gpio-regulator is needed to be toggled too.

This patch defines a port power control interface ehci_port_power for
ehci core use, it toggles PORT_POWER bit as well as calls platform
defined .port_power if it is defined.

Signed-off-by: Michael Grzeschik <m.grzeschik@pengutronix.de>
Signed-off-by: Peter Chen <peter.chen@freescale.com>
Acked-off-by: Alan Stern <stern@rowland.harvard.edu>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/host/ehci-hcd.c |  9 ++++++++-
 drivers/usb/host/ehci-hub.c | 45 ++++++++++++++++++++++++++++++++-------------
 drivers/usb/host/ehci.h     |  2 ++
 include/linux/usb/hcd.h     |  3 +++
 4 files changed, 45 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/host/ehci-hcd.c b/drivers/usb/host/ehci-hcd.c
index 15feaf924b71..df75b8e7d157 100644
--- a/drivers/usb/host/ehci-hcd.c
+++ b/drivers/usb/host/ehci-hcd.c
@@ -311,6 +311,7 @@ static void unlink_empty_async_suspended(struct ehci_hcd *ehci);
 static void ehci_work(struct ehci_hcd *ehci);
 static void start_unlink_intr(struct ehci_hcd *ehci, struct ehci_qh *qh);
 static void end_unlink_intr(struct ehci_hcd *ehci, struct ehci_qh *qh);
+static int ehci_port_power(struct ehci_hcd *ehci, int portnum, bool enable);
 
 #include "ehci-timer.c"
 #include "ehci-hub.c"
@@ -329,9 +330,13 @@ static void ehci_turn_off_all_ports(struct ehci_hcd *ehci)
 {
 	int	port = HCS_N_PORTS(ehci->hcs_params);
 
-	while (port--)
+	while (port--) {
 		ehci_writel(ehci, PORT_RWC_BITS,
 				&ehci->regs->port_status[port]);
+		spin_unlock_irq(&ehci->lock);
+		ehci_port_power(ehci, port, false);
+		spin_lock_irq(&ehci->lock);
+	}
 }
 
 /*
@@ -1233,6 +1238,8 @@ void ehci_init_driver(struct hc_driver *drv,
 		drv->hcd_priv_size += over->extra_priv_size;
 		if (over->reset)
 			drv->reset = over->reset;
+		if (over->port_power)
+			drv->port_power = over->port_power;
 	}
 }
 EXPORT_SYMBOL_GPL(ehci_init_driver);
diff --git a/drivers/usb/host/ehci-hub.c b/drivers/usb/host/ehci-hub.c
index 5728829cf6ef..118edb7bdca2 100644
--- a/drivers/usb/host/ehci-hub.c
+++ b/drivers/usb/host/ehci-hub.c
@@ -69,10 +69,8 @@ static void ehci_handover_companion_ports(struct ehci_hcd *ehci)
 		if (test_bit(port, &ehci->owned_ports)) {
 			reg = &ehci->regs->port_status[port];
 			status = ehci_readl(ehci, reg) & ~PORT_RWC_BITS;
-			if (!(status & PORT_POWER)) {
-				status |= PORT_POWER;
-				ehci_writel(ehci, status, reg);
-			}
+			if (!(status & PORT_POWER))
+				ehci_port_power(ehci, port, true);
 		}
 	}
 
@@ -952,9 +950,11 @@ int ehci_hub_control(
 			clear_bit(wIndex, &ehci->port_c_suspend);
 			break;
 		case USB_PORT_FEAT_POWER:
-			if (HCS_PPC (ehci->hcs_params))
-				ehci_writel(ehci, temp & ~PORT_POWER,
-						status_reg);
+			if (HCS_PPC(ehci->hcs_params)) {
+				spin_unlock_irqrestore(&ehci->lock, flags);
+				ehci_port_power(ehci, wIndex, false);
+				spin_lock_irqsave(&ehci->lock, flags);
+			}
 			break;
 		case USB_PORT_FEAT_C_CONNECTION:
 			ehci_writel(ehci, temp | PORT_CSC, status_reg);
@@ -1004,9 +1004,9 @@ int ehci_hub_control(
 			 */
 			if (((temp & PORT_OC) || (ehci->need_oc_pp_cycle))
 					&& HCS_PPC(ehci->hcs_params)) {
-				ehci_writel(ehci,
-					temp & ~(PORT_RWC_BITS | PORT_POWER),
-					status_reg);
+				spin_unlock_irqrestore(&ehci->lock, flags);
+				ehci_port_power(ehci, wIndex, false);
+				spin_lock_irqsave(&ehci->lock, flags);
 				temp = ehci_readl(ehci, status_reg);
 			}
 		}
@@ -1187,9 +1187,11 @@ int ehci_hub_control(
 			set_bit(wIndex, &ehci->suspended_ports);
 			break;
 		case USB_PORT_FEAT_POWER:
-			if (HCS_PPC (ehci->hcs_params))
-				ehci_writel(ehci, temp | PORT_POWER,
-						status_reg);
+			if (HCS_PPC(ehci->hcs_params)) {
+				spin_unlock_irqrestore(&ehci->lock, flags);
+				ehci_port_power(ehci, wIndex, true);
+				spin_lock_irqsave(&ehci->lock, flags);
+			}
 			break;
 		case USB_PORT_FEAT_RESET:
 			if (temp & (PORT_SUSPEND|PORT_RESUME))
@@ -1297,3 +1299,20 @@ static int ehci_port_handed_over(struct usb_hcd *hcd, int portnum)
 	reg = &ehci->regs->port_status[portnum - 1];
 	return ehci_readl(ehci, reg) & PORT_OWNER;
 }
+
+static int ehci_port_power(struct ehci_hcd *ehci, int portnum, bool enable)
+{
+	struct usb_hcd *hcd = ehci_to_hcd(ehci);
+	u32 __iomem *status_reg = &ehci->regs->port_status[portnum];
+	u32 temp = ehci_readl(ehci, status_reg) & ~PORT_RWC_BITS;
+
+	if (enable)
+		ehci_writel(ehci, temp | PORT_POWER, status_reg);
+	else
+		ehci_writel(ehci, temp & ~PORT_POWER, status_reg);
+
+	if (hcd->driver->port_power)
+		hcd->driver->port_power(hcd, portnum, enable);
+
+	return 0;
+}
diff --git a/drivers/usb/host/ehci.h b/drivers/usb/host/ehci.h
index eee228a26a0e..6f0577b0a5ae 100644
--- a/drivers/usb/host/ehci.h
+++ b/drivers/usb/host/ehci.h
@@ -859,6 +859,8 @@ static inline u32 hc32_to_cpup (const struct ehci_hcd *ehci, const __hc32 *x)
 struct ehci_driver_overrides {
 	size_t		extra_priv_size;
 	int		(*reset)(struct usb_hcd *hcd);
+	int		(*port_power)(struct usb_hcd *hcd,
+				int portnum, bool enable);
 };
 
 extern void	ehci_init_driver(struct hc_driver *drv,
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index cd96a2bc3388..9cf7e3594609 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -379,6 +379,9 @@ struct hc_driver {
 	int	(*disable_usb3_lpm_timeout)(struct usb_hcd *,
 			struct usb_device *, enum usb3_link_state state);
 	int	(*find_raw_port_number)(struct usb_hcd *, int);
+	/* Call for power on/off the port if necessary */
+	int	(*port_power)(struct usb_hcd *hcd, int portnum, bool enable);
+
 };
 
 static inline int hcd_giveback_urb_in_bh(struct usb_hcd *hcd)
-- 
cgit v1.2.3


From a78186ebe516b6d7df43636603f0998803ab356a Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Fri, 17 Oct 2014 17:57:29 -0700
Subject: f2fs: use highmem for directory pages

This patch fixes to use highmem for directory pages.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/inode.c         | 2 +-
 include/linux/f2fs_fs.h | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 0deead4505e7..52d6f54664f6 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -156,7 +156,7 @@ make_now:
 		inode->i_op = &f2fs_dir_inode_operations;
 		inode->i_fop = &f2fs_dir_operations;
 		inode->i_mapping->a_ops = &f2fs_dblock_aops;
-		mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+		mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_HIGH_ZERO);
 	} else if (S_ISLNK(inode->i_mode)) {
 		inode->i_op = &f2fs_symlink_inode_operations;
 		inode->i_mapping->a_ops = &f2fs_dblock_aops;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 860313a33a43..6d7381b41304 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -33,7 +33,8 @@
 #define F2FS_META_INO(sbi)	(sbi->meta_ino_num)
 
 /* This flag is used by node and meta inodes, and by recovery */
-#define GFP_F2FS_ZERO	(GFP_NOFS | __GFP_ZERO)
+#define GFP_F2FS_ZERO		(GFP_NOFS | __GFP_ZERO)
+#define GFP_F2FS_HIGH_ZERO	(GFP_NOFS | __GFP_ZERO | __GFP_HIGHMEM)
 
 /*
  * For further optimization on multi-head logs, on-disk layout supports maximum
-- 
cgit v1.2.3


From 34d67debe02b3b2b035b5bdce0fab75800f9a344 Mon Sep 17 00:00:00 2001
From: Chao Yu <chao2.yu@samsung.com>
Date: Wed, 24 Sep 2014 18:15:19 +0800
Subject: f2fs: add infra struct and helper for inline dir

This patch defines macro/inline dentry structure, and adds some helpers for
inline dir infrastructure.

Signed-off-by: Chao Yu <chao2.yu@samsung.com>
Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/f2fs.h          | 21 +++++++++++++++++++--
 include/linux/f2fs_fs.h | 19 +++++++++++++++++++
 2 files changed, 38 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index d41d1b725919..4fa0df5f54ea 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -46,8 +46,9 @@
 #define F2FS_MOUNT_DISABLE_EXT_IDENTIFY	0x00000040
 #define F2FS_MOUNT_INLINE_XATTR		0x00000080
 #define F2FS_MOUNT_INLINE_DATA		0x00000100
-#define F2FS_MOUNT_FLUSH_MERGE		0x00000200
-#define F2FS_MOUNT_NOBARRIER		0x00000400
+#define F2FS_MOUNT_INLINE_DENTRY	0x00000200
+#define F2FS_MOUNT_FLUSH_MERGE		0x00000400
+#define F2FS_MOUNT_NOBARRIER		0x00000800
 
 #define clear_opt(sbi, option)	(sbi->mount_opt.opt &= ~F2FS_MOUNT_##option)
 #define set_opt(sbi, option)	(sbi->mount_opt.opt |= F2FS_MOUNT_##option)
@@ -1058,6 +1059,7 @@ enum {
 	FI_NO_EXTENT,		/* not to use the extent cache */
 	FI_INLINE_XATTR,	/* used for inline xattr */
 	FI_INLINE_DATA,		/* used for inline data*/
+	FI_INLINE_DENTRY,	/* used for inline dentry */
 	FI_APPEND_WRITE,	/* inode has appended data */
 	FI_UPDATE_WRITE,	/* inode has in-place-update data */
 	FI_NEED_IPU,		/* used for ipu per file */
@@ -1104,6 +1106,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
 		set_inode_flag(fi, FI_INLINE_XATTR);
 	if (ri->i_inline & F2FS_INLINE_DATA)
 		set_inode_flag(fi, FI_INLINE_DATA);
+	if (ri->i_inline & F2FS_INLINE_DENTRY)
+		set_inode_flag(fi, FI_INLINE_DENTRY);
 }
 
 static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1115,6 +1119,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
 		ri->i_inline |= F2FS_INLINE_XATTR;
 	if (is_inode_flag_set(fi, FI_INLINE_DATA))
 		ri->i_inline |= F2FS_INLINE_DATA;
+	if (is_inode_flag_set(fi, FI_INLINE_DENTRY))
+		ri->i_inline |= F2FS_INLINE_DENTRY;
 }
 
 static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1165,6 +1171,17 @@ static inline void *inline_data_addr(struct page *page)
 	return (void *)&(ri->i_addr[1]);
 }
 
+static inline int f2fs_has_inline_dentry(struct inode *inode)
+{
+	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DENTRY);
+}
+
+static inline void *inline_dentry_addr(struct page *page)
+{
+	struct f2fs_inode *ri = F2FS_INODE(page);
+	return (void *)&(ri->i_addr[1]);
+}
+
 static inline int f2fs_readonly(struct super_block *sb)
 {
 	return sb->s_flags & MS_RDONLY;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 6d7381b41304..63f8303b79ba 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -171,6 +171,7 @@ struct f2fs_extent {
 
 #define F2FS_INLINE_XATTR	0x01	/* file inline xattr flag */
 #define F2FS_INLINE_DATA	0x02	/* file inline data flag */
+#define F2FS_INLINE_DENTRY	0x04	/* file inline dentry flag */
 
 #define MAX_INLINE_DATA		(sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
 						F2FS_INLINE_XATTR_ADDRS - 1))
@@ -436,6 +437,24 @@ struct f2fs_dentry_block {
 	__u8 filename[NR_DENTRY_IN_BLOCK][F2FS_SLOT_LEN];
 } __packed;
 
+/* for inline dir */
+#define NR_INLINE_DENTRY	(MAX_INLINE_DATA * BITS_PER_BYTE / \
+				((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
+				BITS_PER_BYTE + 1))
+#define INLINE_DENTRY_BITMAP_SIZE	((NR_INLINE_DENTRY + \
+					BITS_PER_BYTE - 1) / BITS_PER_BYTE)
+#define INLINE_RESERVED_SIZE	(MAX_INLINE_DATA - \
+				((SIZE_OF_DIR_ENTRY + F2FS_SLOT_LEN) * \
+				NR_INLINE_DENTRY + INLINE_DENTRY_BITMAP_SIZE))
+
+/* inline directory entry structure */
+struct f2fs_inline_dentry {
+	__u8 dentry_bitmap[INLINE_DENTRY_BITMAP_SIZE];
+	__u8 reserved[INLINE_RESERVED_SIZE];
+	struct f2fs_dir_entry dentry[NR_INLINE_DENTRY];
+	__u8 filename[NR_INLINE_DENTRY][F2FS_SLOT_LEN];
+} __packed;
+
 /* file types used in inode_info->flags */
 enum {
 	F2FS_FT_UNKNOWN,
-- 
cgit v1.2.3


From c3377c2da6e594504c900d5ef72374c109e4ca99 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Oct 2014 07:53:02 -0700
Subject: rcu: Remove "cpu" argument to rcu_check_callbacks()

The "cpu" argument was kept around on the off-chance that RCU might
offload scheduler-clock interrupts.  However, this offload approach
has been replaced by NO_HZ_FULL, which offloads -all- RCU processing
from qualifying CPUs.  It is therefore time to remove the "cpu" argument
to rcu_check_callbacks(), which this commit does.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
---
 include/linux/rcupdate.h | 2 +-
 kernel/rcu/tiny.c        | 2 +-
 kernel/rcu/tree.c        | 6 +++---
 kernel/time/timer.c      | 3 +--
 4 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a4a819ffb2d1..b4ad275d90d0 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -260,7 +260,7 @@ static inline int rcu_preempt_depth(void)
 void rcu_init(void);
 void rcu_sched_qs(void);
 void rcu_bh_qs(void);
-void rcu_check_callbacks(int cpu, int user);
+void rcu_check_callbacks(int user);
 struct notifier_block;
 void rcu_idle_enter(void);
 void rcu_idle_exit(void);
diff --git a/kernel/rcu/tiny.c b/kernel/rcu/tiny.c
index c0623fc47125..01570c68d237 100644
--- a/kernel/rcu/tiny.c
+++ b/kernel/rcu/tiny.c
@@ -247,7 +247,7 @@ void rcu_bh_qs(void)
  * be called from hardirq context.  It is normally called from the
  * scheduling-clock interrupt.
  */
-void rcu_check_callbacks(int cpu, int user)
+void rcu_check_callbacks(int user)
 {
 	RCU_TRACE(check_cpu_stalls());
 	if (user || rcu_is_cpu_rrupt_from_idle())
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index ab6fcfb4fe11..3107811bba69 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2388,7 +2388,7 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)
  * invoked from the scheduling-clock interrupt.  If rcu_pending returns
  * false, there is no point in invoking rcu_check_callbacks().
  */
-void rcu_check_callbacks(int cpu, int user)
+void rcu_check_callbacks(int user)
 {
 	trace_rcu_utilization(TPS("Start scheduler-tick"));
 	increment_cpu_stall_ticks();
@@ -2420,8 +2420,8 @@ void rcu_check_callbacks(int cpu, int user)
 
 		rcu_bh_qs();
 	}
-	rcu_preempt_check_callbacks(cpu);
-	if (rcu_pending(cpu))
+	rcu_preempt_check_callbacks(smp_processor_id());
+	if (rcu_pending(smp_processor_id()))
 		invoke_rcu_core();
 	if (user)
 		rcu_note_voluntary_context_switch(current);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 3260ffdb368f..2d3f5c504939 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1377,12 +1377,11 @@ unsigned long get_next_timer_interrupt(unsigned long now)
 void update_process_times(int user_tick)
 {
 	struct task_struct *p = current;
-	int cpu = smp_processor_id();
 
 	/* Note: this timer irq context must be accounted for as well. */
 	account_process_tick(p, user_tick);
 	run_local_timers();
-	rcu_check_callbacks(cpu, user_tick);
+	rcu_check_callbacks(user_tick);
 #ifdef CONFIG_IRQ_WORK
 	if (in_irq())
 		irq_work_tick();
-- 
cgit v1.2.3


From 38200cf24702e5d79ce6c8f4c62036c41845c62d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Oct 2014 12:50:04 -0700
Subject: rcu: Remove "cpu" argument to rcu_note_context_switch()

The "cpu" argument to rcu_note_context_switch() is always the current
CPU, so drop it.  This in turn allows the "cpu" argument to
rcu_preempt_note_context_switch() to be removed, which allows the sole
use of "cpu" in both functions to be replaced with a this_cpu_ptr().
Again, the anticipated cross-CPU uses of these functions has been
replaced by NO_HZ_FULL.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
---
 include/linux/rcutiny.h  | 2 +-
 include/linux/rcutree.h  | 4 ++--
 kernel/rcu/tree.c        | 4 ++--
 kernel/rcu/tree.h        | 2 +-
 kernel/rcu/tree_plugin.h | 6 +++---
 kernel/sched/core.c      | 2 +-
 kernel/softirq.c         | 2 +-
 7 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 38cc5b1e252d..0e5366200154 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -78,7 +78,7 @@ static inline void kfree_call_rcu(struct rcu_head *head,
 	call_rcu(head, func);
 }
 
-static inline void rcu_note_context_switch(int cpu)
+static inline void rcu_note_context_switch(void)
 {
 	rcu_sched_qs();
 }
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 3e2f5d432743..7b5484db1857 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -30,7 +30,7 @@
 #ifndef __LINUX_RCUTREE_H
 #define __LINUX_RCUTREE_H
 
-void rcu_note_context_switch(int cpu);
+void rcu_note_context_switch(void);
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
 int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
 #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
@@ -43,7 +43,7 @@ void rcu_cpu_stall_reset(void);
  */
 static inline void rcu_virt_note_context_switch(int cpu)
 {
-	rcu_note_context_switch(cpu);
+	rcu_note_context_switch();
 }
 
 void synchronize_rcu_bh(void);
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 1af5e2cdcbeb..b591f1459240 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -286,11 +286,11 @@ static void rcu_momentary_dyntick_idle(void)
  * and requires special handling for preemptible RCU.
  * The caller must have disabled preemption.
  */
-void rcu_note_context_switch(int cpu)
+void rcu_note_context_switch(void)
 {
 	trace_rcu_utilization(TPS("Start context switch"));
 	rcu_sched_qs();
-	rcu_preempt_note_context_switch(cpu);
+	rcu_preempt_note_context_switch();
 	if (unlikely(raw_cpu_read(rcu_sched_qs_mask)))
 		rcu_momentary_dyntick_idle();
 	trace_rcu_utilization(TPS("End context switch"));
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 94a26e330c1b..238ac39053fc 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -547,7 +547,7 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work);
 /* Forward declarations for rcutree_plugin.h */
 static void rcu_bootup_announce(void);
 long rcu_batches_completed(void);
-static void rcu_preempt_note_context_switch(int cpu);
+static void rcu_preempt_note_context_switch(void);
 static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp);
 #ifdef CONFIG_HOTPLUG_CPU
 static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp,
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 7158814b7b4a..495d4cce47a7 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -156,7 +156,7 @@ static void rcu_preempt_qs(void)
  *
  * Caller must disable preemption.
  */
-static void rcu_preempt_note_context_switch(int cpu)
+static void rcu_preempt_note_context_switch(void)
 {
 	struct task_struct *t = current;
 	unsigned long flags;
@@ -167,7 +167,7 @@ static void rcu_preempt_note_context_switch(int cpu)
 	    !t->rcu_read_unlock_special.b.blocked) {
 
 		/* Possibly blocking in an RCU read-side critical section. */
-		rdp = per_cpu_ptr(rcu_preempt_state.rda, cpu);
+		rdp = this_cpu_ptr(rcu_preempt_state.rda);
 		rnp = rdp->mynode;
 		raw_spin_lock_irqsave(&rnp->lock, flags);
 		smp_mb__after_unlock_lock();
@@ -945,7 +945,7 @@ EXPORT_SYMBOL_GPL(rcu_batches_completed);
  * Because preemptible RCU does not exist, we never have to check for
  * CPUs being in quiescent states.
  */
-static void rcu_preempt_note_context_switch(int cpu)
+static void rcu_preempt_note_context_switch(void)
 {
 }
 
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 44999505e1bf..cc186945296d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2802,7 +2802,7 @@ need_resched:
 	preempt_disable();
 	cpu = smp_processor_id();
 	rq = cpu_rq(cpu);
-	rcu_note_context_switch(cpu);
+	rcu_note_context_switch();
 	prev = rq->curr;
 
 	schedule_debug(prev);
diff --git a/kernel/softirq.c b/kernel/softirq.c
index 0699add19164..501baa9ac1be 100644
--- a/kernel/softirq.c
+++ b/kernel/softirq.c
@@ -656,7 +656,7 @@ static void run_ksoftirqd(unsigned int cpu)
 		 * in the task stack here.
 		 */
 		__do_softirq();
-		rcu_note_context_switch(cpu);
+		rcu_note_context_switch();
 		local_irq_enable();
 		cond_resched();
 		return;
-- 
cgit v1.2.3


From aa6da5140b784ece799f670bf532096f67aa7785 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Tue, 21 Oct 2014 13:23:08 -0700
Subject: rcu: Remove "cpu" argument to rcu_needs_cpu()

The "cpu" argument to rcu_needs_cpu() is always the current CPU, so drop
it.  This in turn allows the "cpu" argument to rcu_cpu_has_callbacks()
to be removed, which allows the uses of "cpu" in both functions to be
replaced with a this_cpu_ptr().  Again, the anticipated cross-CPU uses
of these functions has been replaced by NO_HZ_FULL.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Pranith Kumar <bobby.prani@gmail.com>
---
 include/linux/rcupdate.h |  2 +-
 include/linux/rcutree.h  |  2 +-
 kernel/rcu/tree.c        |  4 ++--
 kernel/rcu/tree_plugin.h | 12 ++++++------
 kernel/time/tick-sched.c |  2 +-
 5 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index b4ad275d90d0..4eb810832b19 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1103,7 +1103,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
 	__kfree_rcu(&((ptr)->rcu_head), offsetof(typeof(*(ptr)), rcu_head))
 
 #if defined(CONFIG_TINY_RCU) || defined(CONFIG_RCU_NOCB_CPU_ALL)
-static inline int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
+static inline int rcu_needs_cpu(unsigned long *delta_jiffies)
 {
 	*delta_jiffies = ULONG_MAX;
 	return 0;
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 7b5484db1857..52953790dcca 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -32,7 +32,7 @@
 
 void rcu_note_context_switch(void);
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
-int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies);
+int rcu_needs_cpu(unsigned long *delta_jiffies);
 #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 void rcu_cpu_stall_reset(void);
 
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index b591f1459240..d678a98caf1d 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3159,7 +3159,7 @@ static int rcu_pending(void)
  * non-NULL, store an indication of whether all callbacks are lazy.
  * (If there are no callbacks, all of them are deemed to be lazy.)
  */
-static int __maybe_unused rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
+static int __maybe_unused rcu_cpu_has_callbacks(bool *all_lazy)
 {
 	bool al = true;
 	bool hc = false;
@@ -3167,7 +3167,7 @@ static int __maybe_unused rcu_cpu_has_callbacks(int cpu, bool *all_lazy)
 	struct rcu_state *rsp;
 
 	for_each_rcu_flavor(rsp) {
-		rdp = per_cpu_ptr(rsp->rda, cpu);
+		rdp = this_cpu_ptr(rsp->rda);
 		if (!rdp->nxtlist)
 			continue;
 		hc = true;
diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 495d4cce47a7..1797b76cb3ff 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -1512,10 +1512,10 @@ static void rcu_prepare_kthreads(int cpu)
  * any flavor of RCU.
  */
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
-int rcu_needs_cpu(int cpu, unsigned long *delta_jiffies)
+int rcu_needs_cpu(unsigned long *delta_jiffies)
 {
 	*delta_jiffies = ULONG_MAX;
-	return rcu_cpu_has_callbacks(cpu, NULL);
+	return rcu_cpu_has_callbacks(NULL);
 }
 #endif /* #ifndef CONFIG_RCU_NOCB_CPU_ALL */
 
@@ -1624,15 +1624,15 @@ static bool __maybe_unused rcu_try_advance_all_cbs(void)
  * The caller must have disabled interrupts.
  */
 #ifndef CONFIG_RCU_NOCB_CPU_ALL
-int rcu_needs_cpu(int cpu, unsigned long *dj)
+int rcu_needs_cpu(unsigned long *dj)
 {
-	struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
+	struct rcu_dynticks *rdtp = this_cpu_ptr(&rcu_dynticks);
 
 	/* Snapshot to detect later posting of non-lazy callback. */
 	rdtp->nonlazy_posted_snap = rdtp->nonlazy_posted;
 
 	/* If no callbacks, RCU doesn't need the CPU. */
-	if (!rcu_cpu_has_callbacks(cpu, &rdtp->all_lazy)) {
+	if (!rcu_cpu_has_callbacks(&rdtp->all_lazy)) {
 		*dj = ULONG_MAX;
 		return 0;
 	}
@@ -1679,7 +1679,7 @@ static void rcu_prepare_for_idle(int cpu)
 	/* Handle nohz enablement switches conservatively. */
 	tne = ACCESS_ONCE(tick_nohz_active);
 	if (tne != rdtp->tick_nohz_enabled_snap) {
-		if (rcu_cpu_has_callbacks(cpu, NULL))
+		if (rcu_cpu_has_callbacks(NULL))
 			invoke_rcu_core(); /* force nohz to see update. */
 		rdtp->tick_nohz_enabled_snap = tne;
 		return;
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 7b5741fc4110..1f4356037a7d 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -585,7 +585,7 @@ static ktime_t tick_nohz_stop_sched_tick(struct tick_sched *ts,
 		last_jiffies = jiffies;
 	} while (read_seqretry(&jiffies_lock, seq));
 
-	if (rcu_needs_cpu(cpu, &rcu_delta_jiffies) ||
+	if (rcu_needs_cpu(&rcu_delta_jiffies) ||
 	    arch_needs_cpu() || irq_work_needs_cpu()) {
 		next_jiffies = last_jiffies + 1;
 		delta_jiffies = 1;
-- 
cgit v1.2.3


From 36df04bc5273a046f53b5e359febc1225f85aa7b Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 29 Oct 2014 12:21:57 +0100
Subject: sched/wait: Reimplement wait_event_freezable()

Provide better implementations of wait_event_freezable() APIs.

The problem is with freezer_do_not_count(), it hides the thread from
the freezer, even though this thread might not actually freeze/sleep
at all.

Cc: oleg@redhat.com
Cc: Rafael Wysocki <rjw@rjwysocki.net>

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: linux-pm@vger.kernel.org
Link: http://lkml.kernel.org/n/tip-d86fz1jmso9wjxa8jfpinp8o@git.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/freezer.h | 38 ---------------------------------
 include/linux/wait.h    | 57 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index 7fd81b8c4897..e203665c0faa 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -265,35 +265,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
 	__retval;							\
 })
 
-#define wait_event_freezable(wq, condition)				\
-({									\
-	int __retval;							\
-	freezer_do_not_count();						\
-	__retval = wait_event_interruptible(wq, (condition));		\
-	freezer_count();						\
-	__retval;							\
-})
-
-#define wait_event_freezable_timeout(wq, condition, timeout)		\
-({									\
-	long __retval = timeout;					\
-	freezer_do_not_count();						\
-	__retval = wait_event_interruptible_timeout(wq,	(condition),	\
-				__retval);				\
-	freezer_count();						\
-	__retval;							\
-})
-
-#define wait_event_freezable_exclusive(wq, condition)			\
-({									\
-	int __retval;							\
-	freezer_do_not_count();						\
-	__retval = wait_event_interruptible_exclusive(wq, condition);	\
-	freezer_count();						\
-	__retval;							\
-})
-
-
 #else /* !CONFIG_FREEZER */
 static inline bool frozen(struct task_struct *p) { return false; }
 static inline bool freezing(struct task_struct *p) { return false; }
@@ -331,15 +302,6 @@ static inline void set_freezable(void) {}
 #define freezable_schedule_hrtimeout_range(expires, delta, mode)	\
 	schedule_hrtimeout_range(expires, delta, mode)
 
-#define wait_event_freezable(wq, condition)				\
-		wait_event_interruptible(wq, condition)
-
-#define wait_event_freezable_timeout(wq, condition, timeout)		\
-		wait_event_interruptible_timeout(wq, condition, timeout)
-
-#define wait_event_freezable_exclusive(wq, condition)			\
-		wait_event_interruptible_exclusive(wq, condition)
-
 #define wait_event_freezekillable(wq, condition)		\
 		wait_event_killable(wq, condition)
 
diff --git a/include/linux/wait.h b/include/linux/wait.h
index 0421775e0b9f..2232ed16635a 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -267,6 +267,31 @@ do {									\
 	__wait_event(wq, condition);					\
 } while (0)
 
+#define __wait_event_freezable(wq, condition)				\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0,		\
+			    schedule(); try_to_freeze())
+
+/**
+ * wait_event - sleep (or freeze) until a condition gets true
+ * @wq: the waitqueue to wait on
+ * @condition: a C expression for the event to wait for
+ *
+ * The process is put to sleep (TASK_INTERRUPTIBLE -- so as not to contribute
+ * to system load) until the @condition evaluates to true. The
+ * @condition is checked each time the waitqueue @wq is woken up.
+ *
+ * wake_up() has to be called after changing any variable that could
+ * change the result of the wait condition.
+ */
+#define wait_event_freezable(wq, condition)				\
+({									\
+	int __ret = 0;							\
+	might_sleep();							\
+	if (!(condition))						\
+		__ret = __wait_event_freezable(wq, condition);		\
+	__ret;								\
+})
+
 #define __wait_event_timeout(wq, condition, timeout)			\
 	___wait_event(wq, ___wait_cond_timeout(condition),		\
 		      TASK_UNINTERRUPTIBLE, 0, timeout,			\
@@ -300,6 +325,24 @@ do {									\
 	__ret;								\
 })
 
+#define __wait_event_freezable_timeout(wq, condition, timeout)		\
+	___wait_event(wq, ___wait_cond_timeout(condition),		\
+		      TASK_INTERRUPTIBLE, 0, timeout,			\
+		      __ret = schedule_timeout(__ret); try_to_freeze())
+
+/*
+ * like wait_event_timeout() -- except it uses TASK_INTERRUPTIBLE to avoid
+ * increasing load and is freezable.
+ */
+#define wait_event_freezable_timeout(wq, condition, timeout)		\
+({									\
+	long __ret = timeout;						\
+	might_sleep();							\
+	if (!___wait_cond_timeout(condition))				\
+		__ret = __wait_event_freezable_timeout(wq, condition, timeout);	\
+	__ret;								\
+})
+
 #define __wait_event_cmd(wq, condition, cmd1, cmd2)			\
 	(void)___wait_event(wq, condition, TASK_UNINTERRUPTIBLE, 0, 0,	\
 			    cmd1; schedule(); cmd2)
@@ -480,6 +523,20 @@ do {									\
 })
 
 
+#define __wait_event_freezable_exclusive(wq, condition)			\
+	___wait_event(wq, condition, TASK_INTERRUPTIBLE, 1, 0,		\
+			schedule(); try_to_freeze())
+
+#define wait_event_freezable_exclusive(wq, condition)			\
+({									\
+	int __ret = 0;							\
+	might_sleep();							\
+	if (!(condition))						\
+		__ret = __wait_event_freezable_exclusive(wq, condition);\
+	__ret;								\
+})
+
+
 #define __wait_event_interruptible_locked(wq, condition, exclusive, irq) \
 ({									\
 	int __ret = 0;							\
-- 
cgit v1.2.3


From 5d4d56582467f3c08dfedd0d995ce2092f384ecc Mon Sep 17 00:00:00 2001
From: "Peter Zijlstra (Intel)" <peterz@infradead.org>
Date: Wed, 29 Oct 2014 14:48:13 +0100
Subject: sched/wait: Remove wait_event_freezekillable()

There is no user.. make it go away.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: oleg@redhat.com
Cc: Rafael Wysocki <rjw@rjwysocki.net>
Cc: Len Brown <len.brown@intel.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: linux-pm@vger.kernel.org
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/freezer.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/freezer.h b/include/linux/freezer.h
index e203665c0faa..6b7fd9cf5ea2 100644
--- a/include/linux/freezer.h
+++ b/include/linux/freezer.h
@@ -246,15 +246,6 @@ static inline int freezable_schedule_hrtimeout_range(ktime_t *expires,
  * defined in <linux/wait.h>
  */
 
-#define wait_event_freezekillable(wq, condition)			\
-({									\
-	int __retval;							\
-	freezer_do_not_count();						\
-	__retval = wait_event_killable(wq, (condition));		\
-	freezer_count();						\
-	__retval;							\
-})
-
 /* DO NOT ADD ANY NEW CALLERS OF THIS FUNCTION */
 #define wait_event_freezekillable_unsafe(wq, condition)			\
 ({									\
@@ -302,9 +293,6 @@ static inline void set_freezable(void) {}
 #define freezable_schedule_hrtimeout_range(expires, delta, mode)	\
 	schedule_hrtimeout_range(expires, delta, mode)
 
-#define wait_event_freezekillable(wq, condition)		\
-		wait_event_killable(wq, condition)
-
 #define wait_event_freezekillable_unsafe(wq, condition)			\
 		wait_event_killable(wq, condition)
 
-- 
cgit v1.2.3


From 44dba3d5d6a10685fb15bd1954e62016334825e0 Mon Sep 17 00:00:00 2001
From: Iulia Manda <iulia.manda21@gmail.com>
Date: Fri, 31 Oct 2014 02:13:31 +0200
Subject: sched: Refactor task_struct to use numa_faults instead of numa_*
 pointers

This patch simplifies task_struct by removing the four numa_* pointers
in the same array and replacing them with the array pointer. By doing this,
on x86_64, the size of task_struct is reduced by 3 ulong pointers (24 bytes on
x86_64).

A new parameter is added to the task_faults_idx function so that it can return
an index to the correct offset, corresponding with the old precalculated
pointers.

All of the code in sched/ that depended on task_faults_idx and numa_* was
changed in order to match the new logic.

Signed-off-by: Iulia Manda <iulia.manda21@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: mgorman@suse.de
Cc: dave@stgolabs.net
Cc: riel@redhat.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/20141031001331.GA30662@winterfell
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/sched.h |  31 ++++++--------
 kernel/sched/core.c   |   3 +-
 kernel/sched/debug.c  |   4 +-
 kernel/sched/fair.c   | 110 ++++++++++++++++++++++++++------------------------
 kernel/sched/sched.h  |   7 ++++
 5 files changed, 80 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4400ddc2fe73..bd7c14ba86c4 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1597,27 +1597,22 @@ struct task_struct {
 	struct numa_group *numa_group;
 
 	/*
-	 * Exponential decaying average of faults on a per-node basis.
-	 * Scheduling placement decisions are made based on the these counts.
-	 * The values remain static for the duration of a PTE scan
+	 * numa_faults is an array split into four regions:
+	 * faults_memory, faults_cpu, faults_memory_buffer, faults_cpu_buffer
+	 * in this precise order.
+	 *
+	 * faults_memory: Exponential decaying average of faults on a per-node
+	 * basis. Scheduling placement decisions are made based on these
+	 * counts. The values remain static for the duration of a PTE scan.
+	 * faults_cpu: Track the nodes the process was running on when a NUMA
+	 * hinting fault was incurred.
+	 * faults_memory_buffer and faults_cpu_buffer: Record faults per node
+	 * during the current scan window. When the scan completes, the counts
+	 * in faults_memory and faults_cpu decay and these values are copied.
 	 */
-	unsigned long *numa_faults_memory;
+	unsigned long *numa_faults;
 	unsigned long total_numa_faults;
 
-	/*
-	 * numa_faults_buffer records faults per node during the current
-	 * scan window. When the scan completes, the counts in
-	 * numa_faults_memory decay and these values are copied.
-	 */
-	unsigned long *numa_faults_buffer_memory;
-
-	/*
-	 * Track the nodes the process was running on when a NUMA hinting
-	 * fault was incurred.
-	 */
-	unsigned long *numa_faults_cpu;
-	unsigned long *numa_faults_buffer_cpu;
-
 	/*
 	 * numa_faults_locality tracks if faults recorded during the last
 	 * scan window were remote/local. The task scan period is adapted
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index df0569ebec0f..72d9d926a034 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1857,8 +1857,7 @@ static void __sched_fork(unsigned long clone_flags, struct task_struct *p)
 	p->numa_scan_seq = p->mm ? p->mm->numa_scan_seq : 0;
 	p->numa_scan_period = sysctl_numa_balancing_scan_delay;
 	p->numa_work.next = &p->numa_work;
-	p->numa_faults_memory = NULL;
-	p->numa_faults_buffer_memory = NULL;
+	p->numa_faults = NULL;
 	p->last_task_numa_placement = 0;
 	p->last_sum_exec_runtime = 0;
 
diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c
index eeb6046d60c7..92cc52001e74 100644
--- a/kernel/sched/debug.c
+++ b/kernel/sched/debug.c
@@ -535,8 +535,8 @@ static void sched_show_numa(struct task_struct *p, struct seq_file *m)
 			unsigned long nr_faults = -1;
 			int cpu_current, home_node;
 
-			if (p->numa_faults_memory)
-				nr_faults = p->numa_faults_memory[2*node + i];
+			if (p->numa_faults)
+				nr_faults = p->numa_faults[2*node + i];
 
 			cpu_current = !i ? (task_node(p) == node) :
 				(pol && node_isset(node, pol->v.nodes));
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index d03d76de7aff..826fdf326683 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -896,18 +896,24 @@ pid_t task_numa_group_id(struct task_struct *p)
 	return p->numa_group ? p->numa_group->gid : 0;
 }
 
-static inline int task_faults_idx(int nid, int priv)
+/*
+ * The averaged statistics, shared & private, memory & cpu,
+ * occupy the first half of the array. The second half of the
+ * array is for current counters, which are averaged into the
+ * first set by task_numa_placement.
+ */
+static inline int task_faults_idx(enum numa_faults_stats s, int nid, int priv)
 {
-	return NR_NUMA_HINT_FAULT_TYPES * nid + priv;
+	return NR_NUMA_HINT_FAULT_TYPES * (s * nr_node_ids + nid) + priv;
 }
 
 static inline unsigned long task_faults(struct task_struct *p, int nid)
 {
-	if (!p->numa_faults_memory)
+	if (!p->numa_faults)
 		return 0;
 
-	return p->numa_faults_memory[task_faults_idx(nid, 0)] +
-		p->numa_faults_memory[task_faults_idx(nid, 1)];
+	return p->numa_faults[task_faults_idx(NUMA_MEM, nid, 0)] +
+		p->numa_faults[task_faults_idx(NUMA_MEM, nid, 1)];
 }
 
 static inline unsigned long group_faults(struct task_struct *p, int nid)
@@ -915,14 +921,14 @@ static inline unsigned long group_faults(struct task_struct *p, int nid)
 	if (!p->numa_group)
 		return 0;
 
-	return p->numa_group->faults[task_faults_idx(nid, 0)] +
-		p->numa_group->faults[task_faults_idx(nid, 1)];
+	return p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 0)] +
+		p->numa_group->faults[task_faults_idx(NUMA_MEM, nid, 1)];
 }
 
 static inline unsigned long group_faults_cpu(struct numa_group *group, int nid)
 {
-	return group->faults_cpu[task_faults_idx(nid, 0)] +
-		group->faults_cpu[task_faults_idx(nid, 1)];
+	return group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 0)] +
+		group->faults_cpu[task_faults_idx(NUMA_MEM, nid, 1)];
 }
 
 /* Handle placement on systems where not all nodes are directly connected. */
@@ -1001,7 +1007,7 @@ static inline unsigned long task_weight(struct task_struct *p, int nid,
 {
 	unsigned long faults, total_faults;
 
-	if (!p->numa_faults_memory)
+	if (!p->numa_faults)
 		return 0;
 
 	total_faults = p->total_numa_faults;
@@ -1517,7 +1523,7 @@ static void numa_migrate_preferred(struct task_struct *p)
 	unsigned long interval = HZ;
 
 	/* This task has no NUMA fault statistics yet */
-	if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults_memory))
+	if (unlikely(p->numa_preferred_nid == -1 || !p->numa_faults))
 		return;
 
 	/* Periodically retry migrating the task to the preferred node */
@@ -1779,18 +1785,23 @@ static void task_numa_placement(struct task_struct *p)
 
 	/* Find the node with the highest number of faults */
 	for_each_online_node(nid) {
+		/* Keep track of the offsets in numa_faults array */
+		int mem_idx, membuf_idx, cpu_idx, cpubuf_idx;
 		unsigned long faults = 0, group_faults = 0;
-		int priv, i;
+		int priv;
 
 		for (priv = 0; priv < NR_NUMA_HINT_FAULT_TYPES; priv++) {
 			long diff, f_diff, f_weight;
 
-			i = task_faults_idx(nid, priv);
+			mem_idx = task_faults_idx(NUMA_MEM, nid, priv);
+			membuf_idx = task_faults_idx(NUMA_MEMBUF, nid, priv);
+			cpu_idx = task_faults_idx(NUMA_CPU, nid, priv);
+			cpubuf_idx = task_faults_idx(NUMA_CPUBUF, nid, priv);
 
 			/* Decay existing window, copy faults since last scan */
-			diff = p->numa_faults_buffer_memory[i] - p->numa_faults_memory[i] / 2;
-			fault_types[priv] += p->numa_faults_buffer_memory[i];
-			p->numa_faults_buffer_memory[i] = 0;
+			diff = p->numa_faults[membuf_idx] - p->numa_faults[mem_idx] / 2;
+			fault_types[priv] += p->numa_faults[membuf_idx];
+			p->numa_faults[membuf_idx] = 0;
 
 			/*
 			 * Normalize the faults_from, so all tasks in a group
@@ -1800,21 +1811,27 @@ static void task_numa_placement(struct task_struct *p)
 			 * faults are less important.
 			 */
 			f_weight = div64_u64(runtime << 16, period + 1);
-			f_weight = (f_weight * p->numa_faults_buffer_cpu[i]) /
+			f_weight = (f_weight * p->numa_faults[cpubuf_idx]) /
 				   (total_faults + 1);
-			f_diff = f_weight - p->numa_faults_cpu[i] / 2;
-			p->numa_faults_buffer_cpu[i] = 0;
+			f_diff = f_weight - p->numa_faults[cpu_idx] / 2;
+			p->numa_faults[cpubuf_idx] = 0;
 
-			p->numa_faults_memory[i] += diff;
-			p->numa_faults_cpu[i] += f_diff;
-			faults += p->numa_faults_memory[i];
+			p->numa_faults[mem_idx] += diff;
+			p->numa_faults[cpu_idx] += f_diff;
+			faults += p->numa_faults[mem_idx];
 			p->total_numa_faults += diff;
 			if (p->numa_group) {
-				/* safe because we can only change our own group */
-				p->numa_group->faults[i] += diff;
-				p->numa_group->faults_cpu[i] += f_diff;
+				/*
+				 * safe because we can only change our own group
+				 *
+				 * mem_idx represents the offset for a given
+				 * nid and priv in a specific region because it
+				 * is at the beginning of the numa_faults array.
+				 */
+				p->numa_group->faults[mem_idx] += diff;
+				p->numa_group->faults_cpu[mem_idx] += f_diff;
 				p->numa_group->total_faults += diff;
-				group_faults += p->numa_group->faults[i];
+				group_faults += p->numa_group->faults[mem_idx];
 			}
 		}
 
@@ -1886,7 +1903,7 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 		node_set(task_node(current), grp->active_nodes);
 
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
-			grp->faults[i] = p->numa_faults_memory[i];
+			grp->faults[i] = p->numa_faults[i];
 
 		grp->total_faults = p->total_numa_faults;
 
@@ -1945,8 +1962,8 @@ static void task_numa_group(struct task_struct *p, int cpupid, int flags,
 	double_lock_irq(&my_grp->lock, &grp->lock);
 
 	for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++) {
-		my_grp->faults[i] -= p->numa_faults_memory[i];
-		grp->faults[i] += p->numa_faults_memory[i];
+		my_grp->faults[i] -= p->numa_faults[i];
+		grp->faults[i] += p->numa_faults[i];
 	}
 	my_grp->total_faults -= p->total_numa_faults;
 	grp->total_faults += p->total_numa_faults;
@@ -1971,14 +1988,14 @@ no_join:
 void task_numa_free(struct task_struct *p)
 {
 	struct numa_group *grp = p->numa_group;
-	void *numa_faults = p->numa_faults_memory;
+	void *numa_faults = p->numa_faults;
 	unsigned long flags;
 	int i;
 
 	if (grp) {
 		spin_lock_irqsave(&grp->lock, flags);
 		for (i = 0; i < NR_NUMA_HINT_FAULT_STATS * nr_node_ids; i++)
-			grp->faults[i] -= p->numa_faults_memory[i];
+			grp->faults[i] -= p->numa_faults[i];
 		grp->total_faults -= p->total_numa_faults;
 
 		list_del(&p->numa_entry);
@@ -1988,10 +2005,7 @@ void task_numa_free(struct task_struct *p)
 		put_numa_group(grp);
 	}
 
-	p->numa_faults_memory = NULL;
-	p->numa_faults_buffer_memory = NULL;
-	p->numa_faults_cpu= NULL;
-	p->numa_faults_buffer_cpu = NULL;
+	p->numa_faults = NULL;
 	kfree(numa_faults);
 }
 
@@ -2014,24 +2028,14 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 		return;
 
 	/* Allocate buffer to track faults on a per-node basis */
-	if (unlikely(!p->numa_faults_memory)) {
-		int size = sizeof(*p->numa_faults_memory) *
+	if (unlikely(!p->numa_faults)) {
+		int size = sizeof(*p->numa_faults) *
 			   NR_NUMA_HINT_FAULT_BUCKETS * nr_node_ids;
 
-		p->numa_faults_memory = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
-		if (!p->numa_faults_memory)
+		p->numa_faults = kzalloc(size, GFP_KERNEL|__GFP_NOWARN);
+		if (!p->numa_faults)
 			return;
 
-		BUG_ON(p->numa_faults_buffer_memory);
-		/*
-		 * The averaged statistics, shared & private, memory & cpu,
-		 * occupy the first half of the array. The second half of the
-		 * array is for current counters, which are averaged into the
-		 * first set by task_numa_placement.
-		 */
-		p->numa_faults_cpu = p->numa_faults_memory + (2 * nr_node_ids);
-		p->numa_faults_buffer_memory = p->numa_faults_memory + (4 * nr_node_ids);
-		p->numa_faults_buffer_cpu = p->numa_faults_memory + (6 * nr_node_ids);
 		p->total_numa_faults = 0;
 		memset(p->numa_faults_locality, 0, sizeof(p->numa_faults_locality));
 	}
@@ -2071,8 +2075,8 @@ void task_numa_fault(int last_cpupid, int mem_node, int pages, int flags)
 	if (migrated)
 		p->numa_pages_migrated += pages;
 
-	p->numa_faults_buffer_memory[task_faults_idx(mem_node, priv)] += pages;
-	p->numa_faults_buffer_cpu[task_faults_idx(cpu_node, priv)] += pages;
+	p->numa_faults[task_faults_idx(NUMA_MEMBUF, mem_node, priv)] += pages;
+	p->numa_faults[task_faults_idx(NUMA_CPUBUF, cpu_node, priv)] += pages;
 	p->numa_faults_locality[local] += pages;
 }
 
@@ -5361,7 +5365,7 @@ static bool migrate_improves_locality(struct task_struct *p, struct lb_env *env)
 	struct numa_group *numa_group = rcu_dereference(p->numa_group);
 	int src_nid, dst_nid;
 
-	if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults_memory ||
+	if (!sched_feat(NUMA_FAVOUR_HIGHER) || !p->numa_faults ||
 	    !(env->sd->flags & SD_NUMA)) {
 		return false;
 	}
@@ -5400,7 +5404,7 @@ static bool migrate_degrades_locality(struct task_struct *p, struct lb_env *env)
 	if (!sched_feat(NUMA) || !sched_feat(NUMA_RESIST_LOWER))
 		return false;
 
-	if (!p->numa_faults_memory || !(env->sd->flags & SD_NUMA))
+	if (!p->numa_faults || !(env->sd->flags & SD_NUMA))
 		return false;
 
 	src_nid = cpu_to_node(env->src_cpu);
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 7e5c1eebc110..31f1e4d2996a 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -709,6 +709,13 @@ extern bool find_numa_distance(int distance);
 #endif
 
 #ifdef CONFIG_NUMA_BALANCING
+/* The regions in numa_faults array from task_struct */
+enum numa_faults_stats {
+	NUMA_MEM = 0,
+	NUMA_CPU,
+	NUMA_MEMBUF,
+	NUMA_CPUBUF
+};
 extern void sched_setnuma(struct task_struct *p, int node);
 extern int migrate_task_to(struct task_struct *p, int cpu);
 extern int migrate_swap(struct task_struct *, struct task_struct *);
-- 
cgit v1.2.3


From 0563921abf01a7a38b5f670c3de05dc0b0b8617d Mon Sep 17 00:00:00 2001
From: Eran Harary <eran.harary@intel.com>
Date: Mon, 3 Nov 2014 20:06:47 +0200
Subject: ieee80211: add "max length of AMPDU" enum for VHT

Maximum length of AMPDU that an STA can receive in VHT.
length = 2 ^ (13 + max_ampdu_length_exp) - 1.

Signed-off-by: Eran Harary <eran.harary@intel.com>
Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 17 ++++++++++++++++-
 1 file changed, 16 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 5fab17b382b5..f65b5446d983 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1274,7 +1274,7 @@ struct ieee80211_ht_cap {
 #define		IEEE80211_HT_AMPDU_PARM_DENSITY_SHIFT	2
 
 /*
- * Maximum length of AMPDU that the STA can receive.
+ * Maximum length of AMPDU that the STA can receive in high-throughput (HT).
  * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets)
  */
 enum ieee80211_max_ampdu_length_exp {
@@ -1284,6 +1284,21 @@ enum ieee80211_max_ampdu_length_exp {
 	IEEE80211_HT_MAX_AMPDU_64K = 3
 };
 
+/*
+ * Maximum length of AMPDU that the STA can receive in VHT.
+ * Length = 2 ^ (13 + max_ampdu_length_exp) - 1 (octets)
+ */
+enum ieee80211_vht_max_ampdu_length_exp {
+	IEEE80211_VHT_MAX_AMPDU_8K = 0,
+	IEEE80211_VHT_MAX_AMPDU_16K = 1,
+	IEEE80211_VHT_MAX_AMPDU_32K = 2,
+	IEEE80211_VHT_MAX_AMPDU_64K = 3,
+	IEEE80211_VHT_MAX_AMPDU_128K = 4,
+	IEEE80211_VHT_MAX_AMPDU_256K = 5,
+	IEEE80211_VHT_MAX_AMPDU_512K = 6,
+	IEEE80211_VHT_MAX_AMPDU_1024K = 7
+};
+
 #define IEEE80211_HT_MAX_AMPDU_FACTOR 13
 
 /* Minimum MPDU start spacing */
-- 
cgit v1.2.3


From a87fa1d81a9fb5e9adca9820e16008c40ad09f33 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Mon, 3 Nov 2014 15:15:35 +0000
Subject: of: Fix overflow bug in string property parsing functions

The string property read helpers will run off the end of the buffer if
it is handed a malformed string property. Rework the parsers to make
sure that doesn't happen. At the same time add new test cases to make
sure the functions behave themselves.

The original implementations of of_property_read_string_index() and
of_property_count_strings() both open-coded the same block of parsing
code, each with it's own subtly different bugs. The fix here merges
functions into a single helper and makes the original functions static
inline wrappers around the helper.

One non-bugfix aspect of this patch is the addition of a new wrapper,
of_property_read_string_array(). The new wrapper is needed by the
device_properties feature that Rafael is working on and planning to
merge for v3.19. The implementation is identical both with and without
the new static inline wrapper, so it just got left in to reduce the
churn on the header file.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Cc: Mika Westerberg <mika.westerberg@linux.intel.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Darren Hart <darren.hart@intel.com>
Cc: <stable@vger.kernel.org>  # v3.3+: Drop selftest hunks that don't apply
---
 drivers/of/base.c                           | 88 ++++++++---------------------
 drivers/of/selftest.c                       | 66 ++++++++++++++++++++--
 drivers/of/testcase-data/tests-phandle.dtsi |  2 +
 include/linux/of.h                          | 84 ++++++++++++++++++++++-----
 4 files changed, 154 insertions(+), 86 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 2305dc0382bc..3823edf2d012 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1279,52 +1279,6 @@ int of_property_read_string(struct device_node *np, const char *propname,
 }
 EXPORT_SYMBOL_GPL(of_property_read_string);
 
-/**
- * of_property_read_string_index - Find and read a string from a multiple
- * strings property.
- * @np:		device node from which the property value is to be read.
- * @propname:	name of the property to be searched.
- * @index:	index of the string in the list of strings
- * @out_string:	pointer to null terminated return string, modified only if
- *		return value is 0.
- *
- * Search for a property in a device tree node and retrieve a null
- * terminated string value (pointer to data, not a copy) in the list of strings
- * contained in that property.
- * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if
- * property does not have a value, and -EILSEQ if the string is not
- * null-terminated within the length of the property data.
- *
- * The out_string pointer is modified only if a valid string can be decoded.
- */
-int of_property_read_string_index(struct device_node *np, const char *propname,
-				  int index, const char **output)
-{
-	struct property *prop = of_find_property(np, propname, NULL);
-	int i = 0;
-	size_t l = 0, total = 0;
-	const char *p;
-
-	if (!prop)
-		return -EINVAL;
-	if (!prop->value)
-		return -ENODATA;
-	if (strnlen(prop->value, prop->length) >= prop->length)
-		return -EILSEQ;
-
-	p = prop->value;
-
-	for (i = 0; total < prop->length; total += l, p += l) {
-		l = strlen(p) + 1;
-		if (i++ == index) {
-			*output = p;
-			return 0;
-		}
-	}
-	return -ENODATA;
-}
-EXPORT_SYMBOL_GPL(of_property_read_string_index);
-
 /**
  * of_property_match_string() - Find string in a list and return index
  * @np: pointer to node containing string list property
@@ -1351,7 +1305,7 @@ int of_property_match_string(struct device_node *np, const char *propname,
 	end = p + prop->length;
 
 	for (i = 0; p < end; i++, p += l) {
-		l = strlen(p) + 1;
+		l = strnlen(p, end - p) + 1;
 		if (p + l > end)
 			return -EILSEQ;
 		pr_debug("comparing %s with %s\n", string, p);
@@ -1363,39 +1317,41 @@ int of_property_match_string(struct device_node *np, const char *propname,
 EXPORT_SYMBOL_GPL(of_property_match_string);
 
 /**
- * of_property_count_strings - Find and return the number of strings from a
- * multiple strings property.
+ * of_property_read_string_util() - Utility helper for parsing string properties
  * @np:		device node from which the property value is to be read.
  * @propname:	name of the property to be searched.
+ * @out_strs:	output array of string pointers.
+ * @sz:		number of array elements to read.
+ * @skip:	Number of strings to skip over at beginning of list.
  *
- * Search for a property in a device tree node and retrieve the number of null
- * terminated string contain in it. Returns the number of strings on
- * success, -EINVAL if the property does not exist, -ENODATA if property
- * does not have a value, and -EILSEQ if the string is not null-terminated
- * within the length of the property data.
+ * Don't call this function directly. It is a utility helper for the
+ * of_property_read_string*() family of functions.
  */
-int of_property_count_strings(struct device_node *np, const char *propname)
+int of_property_read_string_helper(struct device_node *np, const char *propname,
+				   const char **out_strs, size_t sz, int skip)
 {
 	struct property *prop = of_find_property(np, propname, NULL);
-	int i = 0;
-	size_t l = 0, total = 0;
-	const char *p;
+	int l = 0, i = 0;
+	const char *p, *end;
 
 	if (!prop)
 		return -EINVAL;
 	if (!prop->value)
 		return -ENODATA;
-	if (strnlen(prop->value, prop->length) >= prop->length)
-		return -EILSEQ;
-
 	p = prop->value;
+	end = p + prop->length;
 
-	for (i = 0; total < prop->length; total += l, p += l, i++)
-		l = strlen(p) + 1;
-
-	return i;
+	for (i = 0; p < end && (!out_strs || i < skip + sz); i++, p += l) {
+		l = strnlen(p, end - p) + 1;
+		if (p + l > end)
+			return -EILSEQ;
+		if (out_strs && i >= skip)
+			*out_strs++ = p;
+	}
+	i -= skip;
+	return i <= 0 ? -ENODATA : i;
 }
-EXPORT_SYMBOL_GPL(of_property_count_strings);
+EXPORT_SYMBOL_GPL(of_property_read_string_helper);
 
 void of_print_phandle_args(const char *msg, const struct of_phandle_args *args)
 {
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 78001270a598..11b873c54a77 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -339,8 +339,9 @@ static void __init of_selftest_parse_phandle_with_args(void)
 	selftest(rc == -EINVAL, "expected:%i got:%i\n", -EINVAL, rc);
 }
 
-static void __init of_selftest_property_match_string(void)
+static void __init of_selftest_property_string(void)
 {
+	const char *strings[4];
 	struct device_node *np;
 	int rc;
 
@@ -357,13 +358,66 @@ static void __init of_selftest_property_match_string(void)
 	rc = of_property_match_string(np, "phandle-list-names", "third");
 	selftest(rc == 2, "third expected:0 got:%i\n", rc);
 	rc = of_property_match_string(np, "phandle-list-names", "fourth");
-	selftest(rc == -ENODATA, "unmatched string; rc=%i", rc);
+	selftest(rc == -ENODATA, "unmatched string; rc=%i\n", rc);
 	rc = of_property_match_string(np, "missing-property", "blah");
-	selftest(rc == -EINVAL, "missing property; rc=%i", rc);
+	selftest(rc == -EINVAL, "missing property; rc=%i\n", rc);
 	rc = of_property_match_string(np, "empty-property", "blah");
-	selftest(rc == -ENODATA, "empty property; rc=%i", rc);
+	selftest(rc == -ENODATA, "empty property; rc=%i\n", rc);
 	rc = of_property_match_string(np, "unterminated-string", "blah");
-	selftest(rc == -EILSEQ, "unterminated string; rc=%i", rc);
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+
+	/* of_property_count_strings() tests */
+	rc = of_property_count_strings(np, "string-property");
+	selftest(rc == 1, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "phandle-list-names");
+	selftest(rc == 3, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "unterminated-string");
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+	rc = of_property_count_strings(np, "unterminated-string-list");
+	selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc);
+
+	/* of_property_read_string_index() tests */
+	rc = of_property_read_string_index(np, "string-property", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "foobar"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "string-property", 1, strings);
+	selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 1, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "second"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "phandle-list-names", 2, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "third"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "phandle-list-names", 3, strings);
+	selftest(rc == -ENODATA && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "unterminated-string", 0, strings);
+	selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	rc = of_property_read_string_index(np, "unterminated-string-list", 0, strings);
+	selftest(rc == 0 && !strcmp(strings[0], "first"), "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[0] = NULL;
+	rc = of_property_read_string_index(np, "unterminated-string-list", 2, strings); /* should fail */
+	selftest(rc == -EILSEQ && strings[0] == NULL, "of_property_read_string_index() failure; rc=%i\n", rc);
+	strings[1] = NULL;
+
+	/* of_property_read_string_array() tests */
+	rc = of_property_read_string_array(np, "string-property", strings, 4);
+	selftest(rc == 1, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_read_string_array(np, "phandle-list-names", strings, 4);
+	selftest(rc == 3, "Incorrect string count; rc=%i\n", rc);
+	rc = of_property_read_string_array(np, "unterminated-string", strings, 4);
+	selftest(rc == -EILSEQ, "unterminated string; rc=%i\n", rc);
+	/* -- An incorrectly formed string should cause a failure */
+	rc = of_property_read_string_array(np, "unterminated-string-list", strings, 4);
+	selftest(rc == -EILSEQ, "unterminated string array; rc=%i\n", rc);
+	/* -- parsing the correctly formed strings should still work: */
+	strings[2] = NULL;
+	rc = of_property_read_string_array(np, "unterminated-string-list", strings, 2);
+	selftest(rc == 2 && strings[2] == NULL, "of_property_read_string_array() failure; rc=%i\n", rc);
+	strings[1] = NULL;
+	rc = of_property_read_string_array(np, "phandle-list-names", strings, 1);
+	selftest(rc == 1 && strings[1] == NULL, "Overwrote end of string array; rc=%i, str='%s'\n", rc, strings[1]);
 }
 
 #define propcmp(p1, p2) (((p1)->length == (p2)->length) && \
@@ -881,7 +935,7 @@ static int __init of_selftest(void)
 	of_selftest_find_node_by_name();
 	of_selftest_dynamic();
 	of_selftest_parse_phandle_with_args();
-	of_selftest_property_match_string();
+	of_selftest_property_string();
 	of_selftest_property_copy();
 	of_selftest_changeset();
 	of_selftest_parse_interrupts();
diff --git a/drivers/of/testcase-data/tests-phandle.dtsi b/drivers/of/testcase-data/tests-phandle.dtsi
index ce0fe083d406..5b1527e8a7fb 100644
--- a/drivers/of/testcase-data/tests-phandle.dtsi
+++ b/drivers/of/testcase-data/tests-phandle.dtsi
@@ -39,7 +39,9 @@
 				phandle-list-bad-args = <&provider2 1 0>,
 							<&provider3 0>;
 				empty-property;
+				string-property = "foobar";
 				unterminated-string = [40 41 42 43];
+				unterminated-string-list = "first", "second", [40 41 42 43];
 			};
 		};
 	};
diff --git a/include/linux/of.h b/include/linux/of.h
index 6545e7aec7bb..29f0adc5f3e4 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -267,14 +267,12 @@ extern int of_property_read_u64(const struct device_node *np,
 extern int of_property_read_string(struct device_node *np,
 				   const char *propname,
 				   const char **out_string);
-extern int of_property_read_string_index(struct device_node *np,
-					 const char *propname,
-					 int index, const char **output);
 extern int of_property_match_string(struct device_node *np,
 				    const char *propname,
 				    const char *string);
-extern int of_property_count_strings(struct device_node *np,
-				     const char *propname);
+extern int of_property_read_string_helper(struct device_node *np,
+					      const char *propname,
+					      const char **out_strs, size_t sz, int index);
 extern int of_device_is_compatible(const struct device_node *device,
 				   const char *);
 extern int of_device_is_available(const struct device_node *device);
@@ -486,15 +484,9 @@ static inline int of_property_read_string(struct device_node *np,
 	return -ENOSYS;
 }
 
-static inline int of_property_read_string_index(struct device_node *np,
-						const char *propname, int index,
-						const char **out_string)
-{
-	return -ENOSYS;
-}
-
-static inline int of_property_count_strings(struct device_node *np,
-					    const char *propname)
+static inline int of_property_read_string_helper(struct device_node *np,
+						 const char *propname,
+						 const char **out_strs, size_t sz, int index)
 {
 	return -ENOSYS;
 }
@@ -667,6 +659,70 @@ static inline int of_property_count_u64_elems(const struct device_node *np,
 	return of_property_count_elems_of_size(np, propname, sizeof(u64));
 }
 
+/**
+ * of_property_read_string_array() - Read an array of strings from a multiple
+ * strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_strs:	output array of string pointers.
+ * @sz:		number of array elements to read.
+ *
+ * Search for a property in a device tree node and retrieve a list of
+ * terminated string values (pointer to data, not a copy) in that property.
+ *
+ * If @out_strs is NULL, the number of strings in the property is returned.
+ */
+static inline int of_property_read_string_array(struct device_node *np,
+						const char *propname, const char **out_strs,
+						size_t sz)
+{
+	return of_property_read_string_helper(np, propname, out_strs, sz, 0);
+}
+
+/**
+ * of_property_count_strings() - Find and return the number of strings from a
+ * multiple strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ *
+ * Search for a property in a device tree node and retrieve the number of null
+ * terminated string contain in it. Returns the number of strings on
+ * success, -EINVAL if the property does not exist, -ENODATA if property
+ * does not have a value, and -EILSEQ if the string is not null-terminated
+ * within the length of the property data.
+ */
+static inline int of_property_count_strings(struct device_node *np,
+					    const char *propname)
+{
+	return of_property_read_string_helper(np, propname, NULL, 0, 0);
+}
+
+/**
+ * of_property_read_string_index() - Find and read a string from a multiple
+ * strings property.
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @index:	index of the string in the list of strings
+ * @out_string:	pointer to null terminated return string, modified only if
+ *		return value is 0.
+ *
+ * Search for a property in a device tree node and retrieve a null
+ * terminated string value (pointer to data, not a copy) in the list of strings
+ * contained in that property.
+ * Returns 0 on success, -EINVAL if the property does not exist, -ENODATA if
+ * property does not have a value, and -EILSEQ if the string is not
+ * null-terminated within the length of the property data.
+ *
+ * The out_string pointer is modified only if a valid string can be decoded.
+ */
+static inline int of_property_read_string_index(struct device_node *np,
+						const char *propname,
+						int index, const char **output)
+{
+	int rc = of_property_read_string_helper(np, propname, output, 1, index);
+	return rc < 0 ? rc : 0;
+}
+
 /**
  * of_property_read_bool - Findfrom a property
  * @np:		device node from which the property value is to be read.
-- 
cgit v1.2.3


From e7a00e4210e4cc980e3ba67ec7301af54061d14b Mon Sep 17 00:00:00 2001
From: Sebastian Reichel <sre@kernel.org>
Date: Sun, 6 Apr 2014 12:52:11 +0200
Subject: of: introduce of_property_read_s32

Introduce signed 32bit integer of_property_read method.

Signed-off-by: Sebastian Reichel <sre@kernel.org>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 29f0adc5f3e4..30912939a610 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -760,6 +760,13 @@ static inline int of_property_read_u32(const struct device_node *np,
 	return of_property_read_u32_array(np, propname, out_value, 1);
 }
 
+static inline int of_property_read_s32(const struct device_node *np,
+				       const char *propname,
+				       s32 *out_value)
+{
+	return of_property_read_u32(np, propname, (u32*) out_value);
+}
+
 #define of_property_for_each_u32(np, propname, prop, p, u)	\
 	for (prop = of_find_property(np, propname, NULL),	\
 		p = of_prop_next_u32(prop, NULL, &u);		\
-- 
cgit v1.2.3


From 5063e25a302e6a83f6590d9a06bd5f6400b17430 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 3 Oct 2014 16:28:27 +0100
Subject: of: Eliminate of_allnodes list

The device tree structure is composed of two lists; the 'allnodes' list
which is a singly linked list containing every node in the tree, and the
child->parent structure where each parent node has a singly linked list
of children. All of the data in the allnodes list can be easily
reproduced with the parent-child lists, so of_allnodes is actually
unnecessary. Remove it entirely which saves a bit of memory and
simplifies the data structure quite a lot.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Rob Herring <robh@kernel.org>
Cc: Gaurav Minocha <gaurav.minocha.os@gmail.com>
Cc: Pantelis Antoniou <pantelis@pantelis.antoniou@konsulko.com>
---
 Documentation/devicetree/of_selftest.txt | 20 ++-------
 Documentation/devicetree/todo.txt        |  1 -
 drivers/mfd/vexpress-sysreg.c            |  2 +-
 drivers/of/base.c                        | 53 ++++++++++++++----------
 drivers/of/dynamic.c                     | 13 ------
 drivers/of/fdt.c                         | 30 +++++++-------
 drivers/of/pdt.c                         | 27 ++++--------
 drivers/of/selftest.c                    | 71 +++++++++++++++-----------------
 include/linux/of.h                       | 11 ++---
 include/linux/of_pdt.h                   |  3 +-
 10 files changed, 98 insertions(+), 133 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/of_selftest.txt b/Documentation/devicetree/of_selftest.txt
index 1e3d5c92b5e3..57a808b588bf 100644
--- a/Documentation/devicetree/of_selftest.txt
+++ b/Documentation/devicetree/of_selftest.txt
@@ -63,7 +63,6 @@ struct device_node {
     struct  device_node *parent;
     struct  device_node *child;
     struct  device_node *sibling;
-    struct  device_node *allnext;   /* next in list of all nodes */
     ...
  };
 
@@ -99,12 +98,6 @@ child11 -> sibling12 -> sibling13 -> sibling14 -> null
 Figure 1: Generic structure of un-flattened device tree
 
 
-*allnext: it is used to link all the nodes of DT into a list. So, for the
- above tree the list would be as follows:
-
-root->child1->child11->sibling12->sibling13->child131->sibling14->sibling2->
-child21->sibling22->sibling23->sibling3->child31->sibling32->sibling4->null
-
 Before executing OF selftest, it is required to attach the test data to
 machine's device tree (if present). So, when selftest_data_add() is called,
 at first it reads the flattened device tree data linked into the kernel image
@@ -131,11 +124,6 @@ root ('/')
  test-child01      null             null             null
 
 
-allnext list:
-
-root->testcase-data->test-child0->test-child01->test-sibling1->test-sibling2
-->test-sibling3->null
-
 Figure 2: Example test data tree to be attached to live tree.
 
 According to the scenario above, the live tree is already present so it isn't
@@ -204,8 +192,6 @@ detached and then moving up the parent nodes are removed, and eventually the
 whole tree). selftest_data_remove() calls detach_node_and_children() that uses
 of_detach_node() to detach the nodes from the live device tree.
 
-To detach a node, of_detach_node() first updates all_next linked list, by
-attaching the previous node's allnext to current node's allnext pointer. And
-then, it either updates the child pointer of given node's parent to its
-sibling or attaches the previous sibling to the given node's sibling, as
-appropriate. That is it :)
+To detach a node, of_detach_node() either updates the child pointer of given
+node's parent to its sibling or attaches the previous sibling to the given
+node's sibling, as appropriate. That is it :)
diff --git a/Documentation/devicetree/todo.txt b/Documentation/devicetree/todo.txt
index c3cf0659bd19..b5139d1de811 100644
--- a/Documentation/devicetree/todo.txt
+++ b/Documentation/devicetree/todo.txt
@@ -2,7 +2,6 @@ Todo list for devicetree:
 
 === General structure ===
 - Switch from custom lists to (h)list_head for nodes and properties structure
-- Remove of_allnodes list and iterate using list of child nodes alone
 
 === CONFIG_OF_DYNAMIC ===
 - Switch to RCU for tree updates and get rid of global spinlock
diff --git a/drivers/mfd/vexpress-sysreg.c b/drivers/mfd/vexpress-sysreg.c
index 9e21e4fc9599..8f43ab8fd2d6 100644
--- a/drivers/mfd/vexpress-sysreg.c
+++ b/drivers/mfd/vexpress-sysreg.c
@@ -223,7 +223,7 @@ static int vexpress_sysreg_probe(struct platform_device *pdev)
 	vexpress_config_set_master(vexpress_sysreg_get_master());
 
 	/* Confirm board type against DT property, if available */
-	if (of_property_read_u32(of_allnodes, "arm,hbi", &dt_hbi) == 0) {
+	if (of_property_read_u32(of_root, "arm,hbi", &dt_hbi) == 0) {
 		u32 id = vexpress_get_procid(VEXPRESS_SITE_MASTER);
 		u32 hbi = (id >> SYS_PROCIDx_HBI_SHIFT) & SYS_HBI_MASK;
 
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 3823edf2d012..1f61a908a767 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -32,8 +32,8 @@
 
 LIST_HEAD(aliases_lookup);
 
-struct device_node *of_allnodes;
-EXPORT_SYMBOL(of_allnodes);
+struct device_node *of_root;
+EXPORT_SYMBOL(of_root);
 struct device_node *of_chosen;
 struct device_node *of_aliases;
 struct device_node *of_stdout;
@@ -48,7 +48,7 @@ struct kset *of_kset;
  */
 DEFINE_MUTEX(of_mutex);
 
-/* use when traversing tree through the allnext, child, sibling,
+/* use when traversing tree through the child, sibling,
  * or parent members of struct device_node.
  */
 DEFINE_RAW_SPINLOCK(devtree_lock);
@@ -204,7 +204,7 @@ static int __init of_init(void)
 	mutex_unlock(&of_mutex);
 
 	/* Symlink in /proc as required by userspace ABI */
-	if (of_allnodes)
+	if (of_root)
 		proc_symlink("device-tree", NULL, "/sys/firmware/devicetree/base");
 
 	return 0;
@@ -245,6 +245,23 @@ struct property *of_find_property(const struct device_node *np,
 }
 EXPORT_SYMBOL(of_find_property);
 
+struct device_node *__of_find_all_nodes(struct device_node *prev)
+{
+	struct device_node *np;
+	if (!prev) {
+		np = of_root;
+	} else if (prev->child) {
+		np = prev->child;
+	} else {
+		/* Walk back up looking for a sibling, or the end of the structure */
+		np = prev;
+		while (np->parent && !np->sibling)
+			np = np->parent;
+		np = np->sibling; /* Might be null at the end of the tree */
+	}
+	return np;
+}
+
 /**
  * of_find_all_nodes - Get next node in global list
  * @prev:	Previous node or NULL to start iteration
@@ -259,10 +276,8 @@ struct device_node *of_find_all_nodes(struct device_node *prev)
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = prev ? prev->allnext : of_allnodes;
-	for (; np != NULL; np = np->allnext)
-		if (of_node_get(np))
-			break;
+	np = __of_find_all_nodes(prev);
+	of_node_get(np);
 	of_node_put(prev);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
@@ -736,7 +751,7 @@ struct device_node *of_find_node_by_path(const char *path)
 	unsigned long flags;
 
 	if (strcmp(path, "/") == 0)
-		return of_node_get(of_allnodes);
+		return of_node_get(of_root);
 
 	/* The path could begin with an alias */
 	if (*path != '/') {
@@ -761,7 +776,7 @@ struct device_node *of_find_node_by_path(const char *path)
 	/* Step down the tree matching path components */
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	if (!np)
-		np = of_node_get(of_allnodes);
+		np = of_node_get(of_root);
 	while (np && *path == '/') {
 		path++; /* Increment past '/' delimiter */
 		np = __of_find_node_by_path(np, path);
@@ -790,8 +805,7 @@ struct device_node *of_find_node_by_name(struct device_node *from,
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext)
+	for_each_of_allnodes_from(from, np)
 		if (np->name && (of_node_cmp(np->name, name) == 0)
 		    && of_node_get(np))
 			break;
@@ -820,8 +834,7 @@ struct device_node *of_find_node_by_type(struct device_node *from,
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext)
+	for_each_of_allnodes_from(from, np)
 		if (np->type && (of_node_cmp(np->type, type) == 0)
 		    && of_node_get(np))
 			break;
@@ -852,12 +865,10 @@ struct device_node *of_find_compatible_node(struct device_node *from,
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext) {
+	for_each_of_allnodes_from(from, np)
 		if (__of_device_is_compatible(np, compatible, type, NULL) &&
 		    of_node_get(np))
 			break;
-	}
 	of_node_put(from);
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
@@ -884,8 +895,7 @@ struct device_node *of_find_node_with_property(struct device_node *from,
 	unsigned long flags;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext) {
+	for_each_of_allnodes_from(from, np) {
 		for (pp = np->properties; pp; pp = pp->next) {
 			if (of_prop_cmp(pp->name, prop_name) == 0) {
 				of_node_get(np);
@@ -967,8 +977,7 @@ struct device_node *of_find_matching_node_and_match(struct device_node *from,
 		*match = NULL;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	np = from ? from->allnext : of_allnodes;
-	for (; np; np = np->allnext) {
+	for_each_of_allnodes_from(from, np) {
 		m = __of_match_node(matches, np);
 		if (m && of_node_get(np)) {
 			if (match)
@@ -1025,7 +1034,7 @@ struct device_node *of_find_node_by_phandle(phandle handle)
 		return NULL;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
-	for (np = of_allnodes; np; np = np->allnext)
+	for_each_of_allnodes(np)
 		if (np->phandle == handle)
 			break;
 	of_node_get(np);
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index f297891d8529..da2509d639c8 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -117,8 +117,6 @@ void __of_attach_node(struct device_node *np)
 
 	np->child = NULL;
 	np->sibling = np->parent->child;
-	np->allnext = np->parent->allnext;
-	np->parent->allnext = np;
 	np->parent->child = np;
 	of_node_clear_flag(np, OF_DETACHED);
 }
@@ -154,17 +152,6 @@ void __of_detach_node(struct device_node *np)
 	if (WARN_ON(!parent))
 		return;
 
-	if (of_allnodes == np)
-		of_allnodes = np->allnext;
-	else {
-		struct device_node *prev;
-		for (prev = of_allnodes;
-		     prev->allnext != np;
-		     prev = prev->allnext)
-			;
-		prev->allnext = np->allnext;
-	}
-
 	if (parent->child == np)
 		parent->child = np->sibling;
 	else {
diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index d1ffca8b34ea..1d30b9f96466 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -145,15 +145,15 @@ static void *unflatten_dt_alloc(void **mem, unsigned long size,
  * @mem: Memory chunk to use for allocating device nodes and properties
  * @p: pointer to node in flat tree
  * @dad: Parent struct device_node
- * @allnextpp: pointer to ->allnext from last allocated device_node
  * @fpsize: Size of the node path up at the current depth.
  */
 static void * unflatten_dt_node(void *blob,
 				void *mem,
 				int *poffset,
 				struct device_node *dad,
-				struct device_node ***allnextpp,
-				unsigned long fpsize)
+				struct device_node **nodepp,
+				unsigned long fpsize,
+				bool dryrun)
 {
 	const __be32 *p;
 	struct device_node *np;
@@ -200,7 +200,7 @@ static void * unflatten_dt_node(void *blob,
 
 	np = unflatten_dt_alloc(&mem, sizeof(struct device_node) + allocl,
 				__alignof__(struct device_node));
-	if (allnextpp) {
+	if (!dryrun) {
 		char *fn;
 		of_node_init(np);
 		np->full_name = fn = ((char *)np) + sizeof(*np);
@@ -222,8 +222,6 @@ static void * unflatten_dt_node(void *blob,
 		memcpy(fn, pathp, l);
 
 		prev_pp = &np->properties;
-		**allnextpp = np;
-		*allnextpp = &np->allnext;
 		if (dad != NULL) {
 			np->parent = dad;
 			/* we temporarily use the next field as `last_child'*/
@@ -254,7 +252,7 @@ static void * unflatten_dt_node(void *blob,
 			has_name = 1;
 		pp = unflatten_dt_alloc(&mem, sizeof(struct property),
 					__alignof__(struct property));
-		if (allnextpp) {
+		if (!dryrun) {
 			/* We accept flattened tree phandles either in
 			 * ePAPR-style "phandle" properties, or the
 			 * legacy "linux,phandle" properties.  If both
@@ -296,7 +294,7 @@ static void * unflatten_dt_node(void *blob,
 		sz = (pa - ps) + 1;
 		pp = unflatten_dt_alloc(&mem, sizeof(struct property) + sz,
 					__alignof__(struct property));
-		if (allnextpp) {
+		if (!dryrun) {
 			pp->name = "name";
 			pp->length = sz;
 			pp->value = pp + 1;
@@ -308,7 +306,7 @@ static void * unflatten_dt_node(void *blob,
 				(char *)pp->value);
 		}
 	}
-	if (allnextpp) {
+	if (!dryrun) {
 		*prev_pp = NULL;
 		np->name = of_get_property(np, "name", NULL);
 		np->type = of_get_property(np, "device_type", NULL);
@@ -324,11 +322,13 @@ static void * unflatten_dt_node(void *blob,
 	if (depth < 0)
 		depth = 0;
 	while (*poffset > 0 && depth > old_depth)
-		mem = unflatten_dt_node(blob, mem, poffset, np, allnextpp,
-					fpsize);
+		mem = unflatten_dt_node(blob, mem, poffset, np, NULL,
+					fpsize, dryrun);
 
 	if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
 		pr_err("unflatten: error %d processing FDT\n", *poffset);
+	if (nodepp)
+		*nodepp = np;
 
 	return mem;
 }
@@ -352,7 +352,6 @@ static void __unflatten_device_tree(void *blob,
 	unsigned long size;
 	int start;
 	void *mem;
-	struct device_node **allnextp = mynodes;
 
 	pr_debug(" -> unflatten_device_tree()\n");
 
@@ -373,7 +372,7 @@ static void __unflatten_device_tree(void *blob,
 
 	/* First pass, scan for size */
 	start = 0;
-	size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0);
+	size = (unsigned long)unflatten_dt_node(blob, NULL, &start, NULL, NULL, 0, true);
 	size = ALIGN(size, 4);
 
 	pr_debug("  size is %lx, allocating...\n", size);
@@ -388,11 +387,10 @@ static void __unflatten_device_tree(void *blob,
 
 	/* Second pass, do actual unflattening */
 	start = 0;
-	unflatten_dt_node(blob, mem, &start, NULL, &allnextp, 0);
+	unflatten_dt_node(blob, mem, &start, NULL, mynodes, 0, false);
 	if (be32_to_cpup(mem + size) != 0xdeadbeef)
 		pr_warning("End of tree marker overwritten: %08x\n",
 			   be32_to_cpup(mem + size));
-	*allnextp = NULL;
 
 	pr_debug(" <- unflatten_device_tree()\n");
 }
@@ -1041,7 +1039,7 @@ bool __init early_init_dt_scan(void *params)
  */
 void __init unflatten_device_tree(void)
 {
-	__unflatten_device_tree(initial_boot_params, &of_allnodes,
+	__unflatten_device_tree(initial_boot_params, &of_root,
 				early_init_dt_alloc_memory_arch);
 
 	/* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */
diff --git a/drivers/of/pdt.c b/drivers/of/pdt.c
index 36b4035881b0..d2acae825af9 100644
--- a/drivers/of/pdt.c
+++ b/drivers/of/pdt.c
@@ -25,8 +25,7 @@
 
 static struct of_pdt_ops *of_pdt_prom_ops __initdata;
 
-void __initdata (*of_pdt_build_more)(struct device_node *dp,
-		struct device_node ***nextp);
+void __initdata (*of_pdt_build_more)(struct device_node *dp);
 
 #if defined(CONFIG_SPARC)
 unsigned int of_pdt_unique_id __initdata;
@@ -192,8 +191,7 @@ static struct device_node * __init of_pdt_create_node(phandle node,
 }
 
 static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
-						   phandle node,
-						   struct device_node ***nextp)
+						   phandle node)
 {
 	struct device_node *ret = NULL, *prev_sibling = NULL;
 	struct device_node *dp;
@@ -210,16 +208,12 @@ static struct device_node * __init of_pdt_build_tree(struct device_node *parent,
 			ret = dp;
 		prev_sibling = dp;
 
-		*(*nextp) = dp;
-		*nextp = &dp->allnext;
-
 		dp->full_name = of_pdt_build_full_name(dp);
 
-		dp->child = of_pdt_build_tree(dp,
-				of_pdt_prom_ops->getchild(node), nextp);
+		dp->child = of_pdt_build_tree(dp, of_pdt_prom_ops->getchild(node));
 
 		if (of_pdt_build_more)
-			of_pdt_build_more(dp, nextp);
+			of_pdt_build_more(dp);
 
 		node = of_pdt_prom_ops->getsibling(node);
 	}
@@ -234,20 +228,17 @@ static void * __init kernel_tree_alloc(u64 size, u64 align)
 
 void __init of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops)
 {
-	struct device_node **nextp;
-
 	BUG_ON(!ops);
 	of_pdt_prom_ops = ops;
 
-	of_allnodes = of_pdt_create_node(root_node, NULL);
+	of_root = of_pdt_create_node(root_node, NULL);
 #if defined(CONFIG_SPARC)
-	of_allnodes->path_component_name = "";
+	of_root->path_component_name = "";
 #endif
-	of_allnodes->full_name = "/";
+	of_root->full_name = "/";
 
-	nextp = &of_allnodes->allnext;
-	of_allnodes->child = of_pdt_build_tree(of_allnodes,
-			of_pdt_prom_ops->getchild(of_allnodes->phandle), &nextp);
+	of_root->child = of_pdt_build_tree(of_root,
+				of_pdt_prom_ops->getchild(of_root->phandle));
 
 	/* Get pointer to "/chosen" and "/aliases" nodes for use everywhere */
 	of_alias_scan(kernel_tree_alloc);
diff --git a/drivers/of/selftest.c b/drivers/of/selftest.c
index 11b873c54a77..bf7d99317a94 100644
--- a/drivers/of/selftest.c
+++ b/drivers/of/selftest.c
@@ -148,7 +148,7 @@ static void __init of_selftest_dynamic(void)
 
 static int __init of_selftest_check_node_linkage(struct device_node *np)
 {
-	struct device_node *child, *allnext_index = np;
+	struct device_node *child;
 	int count = 0, rc;
 
 	for_each_child_of_node(np, child) {
@@ -158,14 +158,6 @@ static int __init of_selftest_check_node_linkage(struct device_node *np)
 			return -EINVAL;
 		}
 
-		while (allnext_index && allnext_index != child)
-			allnext_index = allnext_index->allnext;
-		if (allnext_index != child) {
-			pr_err("Node %s is ordered differently in sibling and allnode lists\n",
-				 child->name);
-			return -EINVAL;
-		}
-
 		rc = of_selftest_check_node_linkage(child);
 		if (rc < 0)
 			return rc;
@@ -180,12 +172,12 @@ static void __init of_selftest_check_tree_linkage(void)
 	struct device_node *np;
 	int allnode_count = 0, child_count;
 
-	if (!of_allnodes)
+	if (!of_root)
 		return;
 
 	for_each_of_allnodes(np)
 		allnode_count++;
-	child_count = of_selftest_check_node_linkage(of_allnodes);
+	child_count = of_selftest_check_node_linkage(of_root);
 
 	selftest(child_count > 0, "Device node data structure is corrupted\n");
 	selftest(child_count == allnode_count, "allnodes list size (%i) doesn't match"
@@ -775,33 +767,29 @@ static void update_node_properties(struct device_node *np,
  */
 static int attach_node_and_children(struct device_node *np)
 {
-	struct device_node *next, *root = np, *dup;
+	struct device_node *next, *dup, *child;
 
-	/* skip root node */
-	np = np->child;
-	/* storing a copy in temporary node */
-	dup = np;
+	dup = of_find_node_by_path(np->full_name);
+	if (dup) {
+		update_node_properties(np, dup);
+		return 0;
+	}
 
-	while (dup) {
+	/* Children of the root need to be remembered for removal */
+	if (np->parent == of_root) {
 		if (WARN_ON(last_node_index >= NO_OF_NODES))
 			return -EINVAL;
-		nodes[last_node_index++] = dup;
-		dup = dup->sibling;
+		nodes[last_node_index++] = np;
 	}
-	dup = NULL;
 
-	while (np) {
-		next = np->allnext;
-		dup = of_find_node_by_path(np->full_name);
-		if (dup)
-			update_node_properties(np, dup);
-		else {
-			np->child = NULL;
-			if (np->parent == root)
-				np->parent = of_allnodes;
-			of_attach_node(np);
-		}
-		np = next;
+	child = np->child;
+	np->child = NULL;
+	np->sibling = NULL;
+	of_attach_node(np);
+	while (child) {
+		next = child->sibling;
+		attach_node_and_children(child);
+		child = next;
 	}
 
 	return 0;
@@ -846,10 +834,10 @@ static int __init selftest_data_add(void)
 		return -EINVAL;
 	}
 
-	if (!of_allnodes) {
+	if (!of_root) {
 		/* enabling flag for removing nodes */
 		selftest_live_tree = true;
-		of_allnodes = selftest_data_node;
+		of_root = selftest_data_node;
 
 		for_each_of_allnodes(np)
 			__of_attach_node_sysfs(np);
@@ -859,7 +847,14 @@ static int __init selftest_data_add(void)
 	}
 
 	/* attach the sub-tree to live tree */
-	return attach_node_and_children(selftest_data_node);
+	np = selftest_data_node->child;
+	while (np) {
+		struct device_node *next = np->sibling;
+		np->parent = of_root;
+		attach_node_and_children(np);
+		np = next;
+	}
+	return 0;
 }
 
 /**
@@ -889,10 +884,10 @@ static void selftest_data_remove(void)
 		of_node_put(of_chosen);
 		of_aliases = NULL;
 		of_chosen = NULL;
-		for_each_child_of_node(of_allnodes, np)
+		for_each_child_of_node(of_root, np)
 			detach_node_and_children(np);
-		__of_detach_node_sysfs(of_allnodes);
-		of_allnodes = NULL;
+		__of_detach_node_sysfs(of_root);
+		of_root = NULL;
 		return;
 	}
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 30912939a610..f54da3b699a8 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -56,7 +56,6 @@ struct device_node {
 	struct	device_node *child;
 	struct	device_node *sibling;
 	struct	device_node *next;	/* next device of same type */
-	struct	device_node *allnext;	/* next in list of all nodes */
 	struct	kobject kobj;
 	unsigned long _flags;
 	void	*data;
@@ -108,7 +107,7 @@ static inline void of_node_put(struct device_node *node) { }
 #ifdef CONFIG_OF
 
 /* Pointer for first entry in chain of all nodes. */
-extern struct device_node *of_allnodes;
+extern struct device_node *of_root;
 extern struct device_node *of_chosen;
 extern struct device_node *of_aliases;
 extern struct device_node *of_stdout;
@@ -116,7 +115,7 @@ extern raw_spinlock_t devtree_lock;
 
 static inline bool of_have_populated_dt(void)
 {
-	return of_allnodes != NULL;
+	return of_root != NULL;
 }
 
 static inline bool of_node_is_root(const struct device_node *node)
@@ -160,6 +159,7 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag
 	clear_bit(flag, &p->_flags);
 }
 
+extern struct device_node *__of_find_all_nodes(struct device_node *prev);
 extern struct device_node *of_find_all_nodes(struct device_node *prev);
 
 /*
@@ -215,8 +215,9 @@ static inline const char *of_node_full_name(const struct device_node *np)
 	return np ? np->full_name : "<no-node>";
 }
 
-#define for_each_of_allnodes(dn) \
-	for (dn = of_allnodes; dn; dn = dn->allnext)
+#define for_each_of_allnodes_from(from, dn) \
+	for (dn = __of_find_all_nodes(from); dn; dn = __of_find_all_nodes(dn))
+#define for_each_of_allnodes(dn) for_each_of_allnodes_from(NULL, dn)
 extern struct device_node *of_find_node_by_name(struct device_node *from,
 	const char *name);
 extern struct device_node *of_find_node_by_type(struct device_node *from,
diff --git a/include/linux/of_pdt.h b/include/linux/of_pdt.h
index c65a18a0cfdf..7e09244bb679 100644
--- a/include/linux/of_pdt.h
+++ b/include/linux/of_pdt.h
@@ -39,7 +39,6 @@ extern void *prom_early_alloc(unsigned long size);
 /* for building the device tree */
 extern void of_pdt_build_devicetree(phandle root_node, struct of_pdt_ops *ops);
 
-extern void (*of_pdt_build_more)(struct device_node *dp,
-		struct device_node ***nextp);
+extern void (*of_pdt_build_more)(struct device_node *dp);
 
 #endif /* _LINUX_OF_PDT_H */
-- 
cgit v1.2.3


From 315786ebbf4ad6552b6fd8e0e7b2ea220fcbfdbd Mon Sep 17 00:00:00 2001
From: Olav Haugan <ohaugan@codeaurora.org>
Date: Sat, 25 Oct 2014 09:55:16 -0700
Subject: iommu: Add iommu_map_sg() function

Mapping and unmapping are more often than not in the critical path.
map_sg allows IOMMU driver implementations to optimize the process
of mapping buffers into the IOMMU page tables.

Instead of mapping a buffer one page at a time and requiring potentially
expensive TLB operations for each page, this function allows the driver
to map all pages in one go and defer TLB maintenance until after all
pages have been mapped.

Additionally, the mapping operation would be faster in general since
clients does not have to keep calling map API over and over again for
each physically contiguous chunk of memory that needs to be mapped to a
virtually contiguous region.

Signed-off-by: Olav Haugan <ohaugan@codeaurora.org>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/amd_iommu.c      |  1 +
 drivers/iommu/arm-smmu.c       |  1 +
 drivers/iommu/exynos-iommu.c   |  1 +
 drivers/iommu/intel-iommu.c    |  1 +
 drivers/iommu/iommu.c          | 25 +++++++++++++++++++++++++
 drivers/iommu/ipmmu-vmsa.c     |  1 +
 drivers/iommu/msm_iommu.c      |  1 +
 drivers/iommu/omap-iommu.c     |  1 +
 drivers/iommu/shmobile-iommu.c |  1 +
 drivers/iommu/tegra-smmu.c     |  1 +
 include/linux/iommu.h          | 22 ++++++++++++++++++++++
 11 files changed, 56 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c
index 505a9adac2d5..2d84c9edf3b8 100644
--- a/drivers/iommu/amd_iommu.c
+++ b/drivers/iommu/amd_iommu.c
@@ -3424,6 +3424,7 @@ static const struct iommu_ops amd_iommu_ops = {
 	.detach_dev = amd_iommu_detach_device,
 	.map = amd_iommu_map,
 	.unmap = amd_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = amd_iommu_iova_to_phys,
 	.pgsize_bitmap	= AMD_IOMMU_PGSIZES,
 };
diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 60558f794922..e393ae01b5d2 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1652,6 +1652,7 @@ static const struct iommu_ops arm_smmu_ops = {
 	.detach_dev	= arm_smmu_detach_dev,
 	.map		= arm_smmu_map,
 	.unmap		= arm_smmu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= arm_smmu_iova_to_phys,
 	.add_device	= arm_smmu_add_device,
 	.remove_device	= arm_smmu_remove_device,
diff --git a/drivers/iommu/exynos-iommu.c b/drivers/iommu/exynos-iommu.c
index 74233186f6f7..28372b85d8da 100644
--- a/drivers/iommu/exynos-iommu.c
+++ b/drivers/iommu/exynos-iommu.c
@@ -1178,6 +1178,7 @@ static const struct iommu_ops exynos_iommu_ops = {
 	.detach_dev = exynos_iommu_detach_device,
 	.map = exynos_iommu_map,
 	.unmap = exynos_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = exynos_iommu_iova_to_phys,
 	.add_device = exynos_iommu_add_device,
 	.remove_device = exynos_iommu_remove_device,
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index a27d6cb1a793..02cd26a17fe0 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -4467,6 +4467,7 @@ static const struct iommu_ops intel_iommu_ops = {
 	.detach_dev	= intel_iommu_detach_device,
 	.map		= intel_iommu_map,
 	.unmap		= intel_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= intel_iommu_iova_to_phys,
 	.add_device	= intel_iommu_add_device,
 	.remove_device	= intel_iommu_remove_device,
diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c
index ed8b04867b1f..46727ce9280d 100644
--- a/drivers/iommu/iommu.c
+++ b/drivers/iommu/iommu.c
@@ -1124,6 +1124,31 @@ size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size)
 }
 EXPORT_SYMBOL_GPL(iommu_unmap);
 
+size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+			 struct scatterlist *sg, unsigned int nents, int prot)
+{
+	int ret;
+	size_t mapped = 0;
+	unsigned int i;
+	struct scatterlist *s;
+
+	for_each_sg(sg, s, nents, i) {
+		phys_addr_t phys = page_to_phys(sg_page(s));
+		size_t page_len = s->offset + s->length;
+
+		ret = iommu_map(domain, iova + mapped, phys, page_len, prot);
+		if (ret) {
+			/* undo mappings already done */
+			iommu_unmap(domain, iova, mapped);
+			mapped = 0;
+			break;
+		}
+		mapped += page_len;
+	}
+
+	return mapped;
+}
+EXPORT_SYMBOL_GPL(default_iommu_map_sg);
 
 int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr,
 			       phys_addr_t paddr, u64 size, int prot)
diff --git a/drivers/iommu/ipmmu-vmsa.c b/drivers/iommu/ipmmu-vmsa.c
index 7dab5cbcc775..e509c58eee92 100644
--- a/drivers/iommu/ipmmu-vmsa.c
+++ b/drivers/iommu/ipmmu-vmsa.c
@@ -1127,6 +1127,7 @@ static const struct iommu_ops ipmmu_ops = {
 	.detach_dev = ipmmu_detach_device,
 	.map = ipmmu_map,
 	.unmap = ipmmu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = ipmmu_iova_to_phys,
 	.add_device = ipmmu_add_device,
 	.remove_device = ipmmu_remove_device,
diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c
index 6e3dcc289d59..1c7b78ecf3e3 100644
--- a/drivers/iommu/msm_iommu.c
+++ b/drivers/iommu/msm_iommu.c
@@ -681,6 +681,7 @@ static const struct iommu_ops msm_iommu_ops = {
 	.detach_dev = msm_iommu_detach_dev,
 	.map = msm_iommu_map,
 	.unmap = msm_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = msm_iommu_iova_to_phys,
 	.pgsize_bitmap = MSM_IOMMU_PGSIZES,
 };
diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c
index 36278870e84a..18003c044454 100644
--- a/drivers/iommu/omap-iommu.c
+++ b/drivers/iommu/omap-iommu.c
@@ -1288,6 +1288,7 @@ static const struct iommu_ops omap_iommu_ops = {
 	.detach_dev	= omap_iommu_detach_dev,
 	.map		= omap_iommu_map,
 	.unmap		= omap_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= omap_iommu_iova_to_phys,
 	.add_device	= omap_iommu_add_device,
 	.remove_device	= omap_iommu_remove_device,
diff --git a/drivers/iommu/shmobile-iommu.c b/drivers/iommu/shmobile-iommu.c
index 1333e6fb3405..f1b00774e4de 100644
--- a/drivers/iommu/shmobile-iommu.c
+++ b/drivers/iommu/shmobile-iommu.c
@@ -361,6 +361,7 @@ static const struct iommu_ops shmobile_iommu_ops = {
 	.detach_dev = shmobile_iommu_detach_device,
 	.map = shmobile_iommu_map,
 	.unmap = shmobile_iommu_unmap,
+	.map_sg = default_iommu_map_sg,
 	.iova_to_phys = shmobile_iommu_iova_to_phys,
 	.add_device = shmobile_iommu_add_device,
 	.pgsize_bitmap = SZ_1M | SZ_64K | SZ_4K,
diff --git a/drivers/iommu/tegra-smmu.c b/drivers/iommu/tegra-smmu.c
index 3afdf43f732a..73e845a66925 100644
--- a/drivers/iommu/tegra-smmu.c
+++ b/drivers/iommu/tegra-smmu.c
@@ -955,6 +955,7 @@ static const struct iommu_ops smmu_iommu_ops = {
 	.detach_dev	= smmu_iommu_detach_dev,
 	.map		= smmu_iommu_map,
 	.unmap		= smmu_iommu_unmap,
+	.map_sg		= default_iommu_map_sg,
 	.iova_to_phys	= smmu_iommu_iova_to_phys,
 	.pgsize_bitmap	= SMMU_IOMMU_PGSIZES,
 };
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e6a7c9ff72f2..b29a5982e1c3 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -22,6 +22,7 @@
 #include <linux/errno.h>
 #include <linux/err.h>
 #include <linux/types.h>
+#include <linux/scatterlist.h>
 #include <trace/events/iommu.h>
 
 #define IOMMU_READ	(1 << 0)
@@ -97,6 +98,8 @@ enum iommu_attr {
  * @detach_dev: detach device from an iommu domain
  * @map: map a physically contiguous memory region to an iommu domain
  * @unmap: unmap a physically contiguous memory region from an iommu domain
+ * @map_sg: map a scatter-gather list of physically contiguous memory chunks
+ * to an iommu domain
  * @iova_to_phys: translate iova to physical address
  * @add_device: add device to iommu grouping
  * @remove_device: remove device from iommu grouping
@@ -114,6 +117,8 @@ struct iommu_ops {
 		   phys_addr_t paddr, size_t size, int prot);
 	size_t (*unmap)(struct iommu_domain *domain, unsigned long iova,
 		     size_t size);
+	size_t (*map_sg)(struct iommu_domain *domain, unsigned long iova,
+			 struct scatterlist *sg, unsigned int nents, int prot);
 	phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova);
 	int (*add_device)(struct device *dev);
 	void (*remove_device)(struct device *dev);
@@ -156,6 +161,9 @@ extern int iommu_map(struct iommu_domain *domain, unsigned long iova,
 		     phys_addr_t paddr, size_t size, int prot);
 extern size_t iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 		       size_t size);
+extern size_t default_iommu_map_sg(struct iommu_domain *domain, unsigned long iova,
+				struct scatterlist *sg,unsigned int nents,
+				int prot);
 extern phys_addr_t iommu_iova_to_phys(struct iommu_domain *domain, dma_addr_t iova);
 extern void iommu_set_fault_handler(struct iommu_domain *domain,
 			iommu_fault_handler_t handler, void *token);
@@ -241,6 +249,13 @@ static inline int report_iommu_fault(struct iommu_domain *domain,
 	return ret;
 }
 
+static inline size_t iommu_map_sg(struct iommu_domain *domain,
+				  unsigned long iova, struct scatterlist *sg,
+				  unsigned int nents, int prot)
+{
+	return domain->ops->map_sg(domain, iova, sg, nents, prot);
+}
+
 #else /* CONFIG_IOMMU_API */
 
 struct iommu_ops {};
@@ -293,6 +308,13 @@ static inline int iommu_unmap(struct iommu_domain *domain, unsigned long iova,
 	return -ENODEV;
 }
 
+static inline size_t iommu_map_sg(struct iommu_domain *domain,
+				  unsigned long iova, struct scatterlist *sg,
+				  unsigned int nents, int prot)
+{
+	return -ENODEV;
+}
+
 static inline int iommu_domain_window_enable(struct iommu_domain *domain,
 					     u32 wnd_nr, phys_addr_t paddr,
 					     u64 size, int prot)
-- 
cgit v1.2.3


From b75b276bead4850c86e60747babe09be5c13d4d1 Mon Sep 17 00:00:00 2001
From: Matthias Brugger <matthias.bgg@gmail.com>
Date: Tue, 21 Oct 2014 18:27:25 +0200
Subject: of: Request and map make argument name constant

This patch makes the name argument from of_io_request_and_map constant.

Signed-off-by: Matthias Brugger <matthias.bgg@gmail.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/address.c       | 2 +-
 include/linux/of_address.h | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/address.c b/drivers/of/address.c
index afdb78299f61..e02828fa3acd 100644
--- a/drivers/of/address.c
+++ b/drivers/of/address.c
@@ -871,7 +871,7 @@ EXPORT_SYMBOL(of_iomap);
  *		return PTR_ERR(base);
  */
 void __iomem *of_io_request_and_map(struct device_node *np, int index,
-					char *name)
+					const char *name)
 {
 	struct resource res;
 	void __iomem *mem;
diff --git a/include/linux/of_address.h b/include/linux/of_address.h
index 8cb14eb393d6..d88e81be6368 100644
--- a/include/linux/of_address.h
+++ b/include/linux/of_address.h
@@ -106,7 +106,7 @@ extern int of_address_to_resource(struct device_node *dev, int index,
 				  struct resource *r);
 void __iomem *of_iomap(struct device_node *node, int index);
 void __iomem *of_io_request_and_map(struct device_node *device,
-					int index, char *name);
+					int index, const char *name);
 #else
 
 #include <linux/io.h>
@@ -123,7 +123,7 @@ static inline void __iomem *of_iomap(struct device_node *device, int index)
 }
 
 static inline void __iomem *of_io_request_and_map(struct device_node *device,
-					int index, char *name)
+					int index, const char *name)
 {
 	return IOMEM_ERR_PTR(-EINVAL);
 }
-- 
cgit v1.2.3


From 5f563585ab0afa8c35b3627d65c07966f7a5080e Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 2 Oct 2014 16:01:10 +0200
Subject: of: Fix padding in _OF_DECLARE macro definition

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index f54da3b699a8..3c851a8f23eb 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -836,7 +836,7 @@ static inline int of_get_available_child_count(const struct device_node *np)
 		 = { .compatible = compat,				\
 		     .data = (fn == (fn_type)NULL) ? fn : fn  }
 #else
-#define _OF_DECLARE(table, name, compat, fn, fn_type)					\
+#define _OF_DECLARE(table, name, compat, fn, fn_type)			\
 	static const struct of_device_id __of_table_##name		\
 		__attribute__((unused))					\
 		 = { .compatible = compat,				\
-- 
cgit v1.2.3


From 0d8a52f933f817d0b62955a5a362fb7f2508f06c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Tue, 4 Nov 2014 11:55:09 +0300
Subject: ieee802154: || vs && in ieee802154_is_valid_extended_addr()

The ieee802154_is_valid_extended_addr() always returns true because
there is a typo.  The || should be &&.  Neither 0x0000000000000000ULL
nor 0xffffffffffffffffULL are valid addresses.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Acked-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 5d9e7459d94b..4c032863cd71 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -209,7 +209,7 @@ static inline bool ieee802154_is_valid_extended_addr(const __le64 addr)
 	 * This is currently a workaround because neighbor discovery can't
 	 * deal with short addresses types right now.
 	 */
-	return ((addr != cpu_to_le64(0x0000000000000000ULL)) ||
+	return ((addr != cpu_to_le64(0x0000000000000000ULL)) &&
 		(addr != cpu_to_le64(0xffffffffffffffffULL)));
 }
 
-- 
cgit v1.2.3


From 6fccf9383b280d463a7dfe1e0d048aff8df8a25e Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Wed, 18 Jun 2014 13:58:57 -0600
Subject: NVMe: Async event request

Submits NVMe asynchronous event requests, one event up to the controller
maximum or number of possible different event types (8), whichever is
smaller. Events successfully returned by the controller are logged.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/nvme.h      |  1 +
 2 files changed, 43 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index baee59530d6e..42a62bbf4a11 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -207,6 +207,7 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
 #define CMD_CTX_ABORT		(0x318 + CMD_CTX_BASE)
+#define CMD_CTX_ASYNC		(0x31C + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
@@ -229,6 +230,17 @@ static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 				cqe->command_id, le16_to_cpup(&cqe->sq_id));
 		return;
 	}
+	if (ctx == CMD_CTX_ASYNC) {
+		u32 result = le32_to_cpup(&cqe->result);
+		u16 status = le16_to_cpup(&cqe->status) >> 1;
+
+		if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
+			++nvmeq->dev->event_limit;
+		if (status == NVME_SC_SUCCESS)
+			dev_warn(nvmeq->q_dmadev,
+				"async event result %08x\n", result);
+		return;
+	}
 
 	dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
 }
@@ -1161,6 +1173,8 @@ static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
 			continue;
 		if (info[cmdid].ctx == CMD_CTX_CANCELLED)
 			continue;
+		if (timeout && info[cmdid].ctx == CMD_CTX_ASYNC)
+			continue;
 		if (timeout && nvmeq->dev->initialized) {
 			nvme_abort_cmd(cmdid, nvmeq);
 			continue;
@@ -1842,6 +1856,27 @@ static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
 	}
 }
 
+static int nvme_submit_async_req(struct nvme_queue *nvmeq)
+{
+	struct nvme_command *c;
+	int cmdid;
+
+	cmdid = alloc_cmdid(nvmeq, CMD_CTX_ASYNC, special_completion, 0);
+	if (cmdid < 0)
+		return cmdid;
+
+	c = &nvmeq->sq_cmds[nvmeq->sq_tail];
+	memset(c, 0, sizeof(*c));
+	c->common.opcode = nvme_admin_async_event;
+	c->common.command_id = cmdid;
+
+	if (++nvmeq->sq_tail == nvmeq->q_depth)
+		nvmeq->sq_tail = 0;
+	writel(nvmeq->sq_tail, nvmeq->q_db);
+
+	return 0;
+}
+
 static int nvme_kthread(void *data)
 {
 	struct nvme_dev *dev, *next;
@@ -1875,6 +1910,12 @@ static int nvme_kthread(void *data)
 				nvme_cancel_ios(nvmeq, true);
 				nvme_resubmit_bios(nvmeq);
 				nvme_resubmit_iods(nvmeq);
+
+				while ((i == 0) && (dev->event_limit > 0)) {
+					if (nvme_submit_async_req(nvmeq))
+						break;
+					dev->event_limit--;
+				}
  unlock:
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
@@ -2244,6 +2285,7 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	dev->oncs = le16_to_cpup(&ctrl->oncs);
 	dev->abort_limit = ctrl->acl + 1;
 	dev->vwc = ctrl->vwc;
+	dev->event_limit = min(ctrl->aerl + 1, 8);
 	memcpy(dev->serial, ctrl->sn, sizeof(ctrl->sn));
 	memcpy(dev->model, ctrl->mn, sizeof(ctrl->mn));
 	memcpy(dev->firmware_rev, ctrl->fr, sizeof(ctrl->fr));
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 2bf403195c09..974efd04a4b1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -99,6 +99,7 @@ struct nvme_dev {
 	u32 stripe_size;
 	u16 oncs;
 	u16 abort_limit;
+	u8 event_limit;
 	u8 vwc;
 	u8 initialized;
 };
-- 
cgit v1.2.3


From 1d0906246095184d1624c643c2088152d330c40a Mon Sep 17 00:00:00 2001
From: Keith Busch <keith.busch@intel.com>
Date: Mon, 23 Jun 2014 11:34:01 -0600
Subject: NVMe: Mismatched host/device page size support

Adds support for devices with max page size smaller than the host's.
In the case we encounter such a host/device combination, the driver will
split a page into as many PRP entries as necessary for the device's page
size capabilities. If the device's reported minimum page size is greater
than the host's, the driver will not attempt to enable the device and
return an error instead.

Signed-off-by: Keith Busch <keith.busch@intel.com>
Signed-off-by: Matthew Wilcox <matthew.r.wilcox@intel.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 59 ++++++++++++++++++++++++++++++++---------------
 include/linux/nvme.h      |  2 ++
 2 files changed, 42 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index 42a62bbf4a11..e60bb0fec7e3 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -373,17 +373,17 @@ static __le64 **iod_list(struct nvme_iod *iod)
  * as it only leads to a small amount of wasted memory for the lifetime of
  * the I/O.
  */
-static int nvme_npages(unsigned size)
+static int nvme_npages(unsigned size, struct nvme_dev *dev)
 {
-	unsigned nprps = DIV_ROUND_UP(size + PAGE_SIZE, PAGE_SIZE);
-	return DIV_ROUND_UP(8 * nprps, PAGE_SIZE - 8);
+	unsigned nprps = DIV_ROUND_UP(size + dev->page_size, dev->page_size);
+	return DIV_ROUND_UP(8 * nprps, dev->page_size - 8);
 }
 
 static struct nvme_iod *
-nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
+nvme_alloc_iod(unsigned nseg, unsigned nbytes, struct nvme_dev *dev, gfp_t gfp)
 {
 	struct nvme_iod *iod = kmalloc(sizeof(struct nvme_iod) +
-				sizeof(__le64 *) * nvme_npages(nbytes) +
+				sizeof(__le64 *) * nvme_npages(nbytes, dev) +
 				sizeof(struct scatterlist) * nseg, gfp);
 
 	if (iod) {
@@ -400,7 +400,7 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, gfp_t gfp)
 
 void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
 {
-	const int last_prp = PAGE_SIZE / 8 - 1;
+	const int last_prp = dev->page_size / 8 - 1;
 	int i;
 	__le64 **list = iod_list(iod);
 	dma_addr_t prp_dma = iod->first_dma;
@@ -491,26 +491,27 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 	__le64 **list = iod_list(iod);
 	dma_addr_t prp_dma;
 	int nprps, i;
+	u32 page_size = dev->page_size;
 
-	length -= (PAGE_SIZE - offset);
+	length -= (page_size - offset);
 	if (length <= 0)
 		return total_len;
 
-	dma_len -= (PAGE_SIZE - offset);
+	dma_len -= (page_size - offset);
 	if (dma_len) {
-		dma_addr += (PAGE_SIZE - offset);
+		dma_addr += (page_size - offset);
 	} else {
 		sg = sg_next(sg);
 		dma_addr = sg_dma_address(sg);
 		dma_len = sg_dma_len(sg);
 	}
 
-	if (length <= PAGE_SIZE) {
+	if (length <= page_size) {
 		iod->first_dma = dma_addr;
 		return total_len;
 	}
 
-	nprps = DIV_ROUND_UP(length, PAGE_SIZE);
+	nprps = DIV_ROUND_UP(length, page_size);
 	if (nprps <= (256 / 8)) {
 		pool = dev->prp_small_pool;
 		iod->npages = 0;
@@ -523,13 +524,13 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 	if (!prp_list) {
 		iod->first_dma = dma_addr;
 		iod->npages = -1;
-		return (total_len - length) + PAGE_SIZE;
+		return (total_len - length) + page_size;
 	}
 	list[0] = prp_list;
 	iod->first_dma = prp_dma;
 	i = 0;
 	for (;;) {
-		if (i == PAGE_SIZE / 8) {
+		if (i == page_size >> 3) {
 			__le64 *old_prp_list = prp_list;
 			prp_list = dma_pool_alloc(pool, gfp, &prp_dma);
 			if (!prp_list)
@@ -540,9 +541,9 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 			i = 1;
 		}
 		prp_list[i++] = cpu_to_le64(dma_addr);
-		dma_len -= PAGE_SIZE;
-		dma_addr += PAGE_SIZE;
-		length -= PAGE_SIZE;
+		dma_len -= page_size;
+		dma_addr += page_size;
+		length -= page_size;
 		if (length <= 0)
 			break;
 		if (dma_len > 0)
@@ -749,7 +750,7 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	if ((bio->bi_rw & REQ_FLUSH) && psegs)
 		return nvme_split_flush_data(nvmeq, bio);
 
-	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, GFP_ATOMIC);
+	iod = nvme_alloc_iod(psegs, bio->bi_iter.bi_size, ns->dev, GFP_ATOMIC);
 	if (!iod)
 		return -ENOMEM;
 
@@ -1463,6 +1464,24 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	u32 aqa;
 	u64 cap = readq(&dev->bar->cap);
 	struct nvme_queue *nvmeq;
+	unsigned page_shift = PAGE_SHIFT;
+	unsigned dev_page_min = NVME_CAP_MPSMIN(cap) + 12;
+	unsigned dev_page_max = NVME_CAP_MPSMAX(cap) + 12;
+
+	if (page_shift < dev_page_min) {
+		dev_err(&dev->pci_dev->dev,
+				"Minimum device page size (%u) too large for "
+				"host (%u)\n", 1 << dev_page_min,
+				1 << page_shift);
+		return -ENODEV;
+	}
+	if (page_shift > dev_page_max) {
+		dev_info(&dev->pci_dev->dev,
+				"Device maximum page size (%u) smaller than "
+				"host (%u); enabling work-around\n",
+				1 << dev_page_max, 1 << page_shift);
+		page_shift = dev_page_max;
+	}
 
 	result = nvme_disable_ctrl(dev, cap);
 	if (result < 0)
@@ -1478,8 +1497,10 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	aqa = nvmeq->q_depth - 1;
 	aqa |= aqa << 16;
 
+	dev->page_size = 1 << page_shift;
+
 	dev->ctrl_config = NVME_CC_ENABLE | NVME_CC_CSS_NVM;
-	dev->ctrl_config |= (PAGE_SHIFT - 12) << NVME_CC_MPS_SHIFT;
+	dev->ctrl_config |= (page_shift - 12) << NVME_CC_MPS_SHIFT;
 	dev->ctrl_config |= NVME_CC_ARB_RR | NVME_CC_SHN_NONE;
 	dev->ctrl_config |= NVME_CC_IOSQES | NVME_CC_IOCQES;
 
@@ -1529,7 +1550,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 	}
 
 	err = -ENOMEM;
-	iod = nvme_alloc_iod(count, length, GFP_KERNEL);
+	iod = nvme_alloc_iod(count, length, dev, GFP_KERNEL);
 	if (!iod)
 		goto put_pages;
 
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 974efd04a4b1..ed09074e5554 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -38,6 +38,7 @@ struct nvme_bar {
 #define NVME_CAP_TIMEOUT(cap)	(((cap) >> 24) & 0xff)
 #define NVME_CAP_STRIDE(cap)	(((cap) >> 32) & 0xf)
 #define NVME_CAP_MPSMIN(cap)	(((cap) >> 48) & 0xf)
+#define NVME_CAP_MPSMAX(cap)	(((cap) >> 52) & 0xf)
 
 enum {
 	NVME_CC_ENABLE		= 1 << 0,
@@ -97,6 +98,7 @@ struct nvme_dev {
 	char firmware_rev[8];
 	u32 max_hw_sectors;
 	u32 stripe_size;
+	u32 page_size;
 	u16 oncs;
 	u16 abort_limit;
 	u8 event_limit;
-- 
cgit v1.2.3


From a4aea5623d4a54682b6ff5c18196d7802f3e478f Mon Sep 17 00:00:00 2001
From: Matias Bjørling <m@bjorling.me>
Date: Tue, 4 Nov 2014 08:20:14 -0700
Subject: NVMe: Convert to blk-mq

This converts the NVMe driver to a blk-mq request-based driver.

The NVMe driver is currently bio-based and implements queue logic within
itself.  By using blk-mq, a lot of these responsibilities can be moved
and simplified.

The patch is divided into the following blocks:

 * Per-command data and cmdid have been moved into the struct request
   field. The cmdid_data can be retrieved using blk_mq_rq_to_pdu() and id
   maintenance are now handled by blk-mq through the rq->tag field.

 * The logic for splitting bio's has been moved into the blk-mq layer.
   The driver instead notifies the block layer about limited gap support in
   SG lists.

 * blk-mq handles timeouts and is reimplemented within nvme_timeout().
   This both includes abort handling and command cancelation.

 * Assignment of nvme queues to CPUs are replaced with the blk-mq
   version. The current blk-mq strategy is to assign the number of
   mapped queues and CPUs to provide synergy, while the nvme driver
   assign as many nvme hw queues as possible. This can be implemented in
   blk-mq if needed.

 * NVMe queues are merged with the tags structure of blk-mq.

 * blk-mq takes care of setup/teardown of nvme queues and guards invalid
   accesses. Therefore, RCU-usage for nvme queues can be removed.

 * IO tracing and accounting are handled by blk-mq and therefore removed.

 * Queue suspension logic is replaced with the logic from the block
   layer.

Contributions in this patch from:

  Sam Bradshaw <sbradshaw@micron.com>
  Jens Axboe <axboe@fb.com>
  Keith Busch <keith.busch@intel.com>
  Robert Nelson <rlnelson@google.com>

Acked-by: Keith Busch <keith.busch@intel.com>
Acked-by: Jens Axboe <axboe@fb.com>

Updated for new ->queue_rq() prototype.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/nvme-core.c | 1366 ++++++++++++++++++---------------------------
 drivers/block/nvme-scsi.c |    8 +-
 include/linux/nvme.h      |   15 +-
 3 files changed, 570 insertions(+), 819 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/nvme-core.c b/drivers/block/nvme-core.c
index c70eff3673d0..39050a3d10fd 100644
--- a/drivers/block/nvme-core.c
+++ b/drivers/block/nvme-core.c
@@ -13,9 +13,9 @@
  */
 
 #include <linux/nvme.h>
-#include <linux/bio.h>
 #include <linux/bitops.h>
 #include <linux/blkdev.h>
+#include <linux/blk-mq.h>
 #include <linux/cpu.h>
 #include <linux/delay.h>
 #include <linux/errno.h>
@@ -33,7 +33,6 @@
 #include <linux/module.h>
 #include <linux/moduleparam.h>
 #include <linux/pci.h>
-#include <linux/percpu.h>
 #include <linux/poison.h>
 #include <linux/ptrace.h>
 #include <linux/sched.h>
@@ -42,9 +41,8 @@
 #include <scsi/sg.h>
 #include <asm-generic/io-64-nonatomic-lo-hi.h>
 
-#include <trace/events/block.h>
-
 #define NVME_Q_DEPTH		1024
+#define NVME_AQ_DEPTH		64
 #define SQ_SIZE(depth)		(depth * sizeof(struct nvme_command))
 #define CQ_SIZE(depth)		(depth * sizeof(struct nvme_completion))
 #define ADMIN_TIMEOUT		(admin_timeout * HZ)
@@ -81,10 +79,12 @@ static wait_queue_head_t nvme_kthread_wait;
 static struct notifier_block nvme_nb;
 
 static void nvme_reset_failed_dev(struct work_struct *ws);
+static int nvme_process_cq(struct nvme_queue *nvmeq);
 
 struct async_cmd_info {
 	struct kthread_work work;
 	struct kthread_worker *worker;
+	struct request *req;
 	u32 result;
 	int status;
 	void *ctx;
@@ -104,10 +104,6 @@ struct nvme_queue {
 	volatile struct nvme_completion *cqes;
 	dma_addr_t sq_dma_addr;
 	dma_addr_t cq_dma_addr;
-	wait_queue_head_t sq_full;
-	wait_queue_t sq_cong_wait;
-	struct bio_list sq_cong;
-	struct list_head iod_bio;
 	u32 __iomem *q_db;
 	u16 q_depth;
 	u16 cq_vector;
@@ -117,10 +113,8 @@ struct nvme_queue {
 	u16 qid;
 	u8 cq_phase;
 	u8 cqe_seen;
-	u8 q_suspended;
-	cpumask_var_t cpu_mask;
 	struct async_cmd_info cmdinfo;
-	unsigned long cmdid_data[];
+	struct blk_mq_hw_ctx *hctx;
 };
 
 /*
@@ -148,62 +142,72 @@ typedef void (*nvme_completion_fn)(struct nvme_queue *, void *,
 struct nvme_cmd_info {
 	nvme_completion_fn fn;
 	void *ctx;
-	unsigned long timeout;
 	int aborted;
+	struct nvme_queue *nvmeq;
 };
 
-static struct nvme_cmd_info *nvme_cmd_info(struct nvme_queue *nvmeq)
+static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+				unsigned int hctx_idx)
 {
-	return (void *)&nvmeq->cmdid_data[BITS_TO_LONGS(nvmeq->q_depth)];
+	struct nvme_dev *dev = data;
+	struct nvme_queue *nvmeq = dev->queues[0];
+
+	WARN_ON(nvmeq->hctx);
+	nvmeq->hctx = hctx;
+	hctx->driver_data = nvmeq;
+	return 0;
 }
 
-static unsigned nvme_queue_extra(int depth)
+static int nvme_admin_init_request(void *data, struct request *req,
+				unsigned int hctx_idx, unsigned int rq_idx,
+				unsigned int numa_node)
 {
-	return DIV_ROUND_UP(depth, 8) + (depth * sizeof(struct nvme_cmd_info));
+	struct nvme_dev *dev = data;
+	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = dev->queues[0];
+
+	BUG_ON(!nvmeq);
+	cmd->nvmeq = nvmeq;
+	return 0;
 }
 
-/**
- * alloc_cmdid() - Allocate a Command ID
- * @nvmeq: The queue that will be used for this command
- * @ctx: A pointer that will be passed to the handler
- * @handler: The function to call on completion
- *
- * Allocate a Command ID for a queue.  The data passed in will
- * be passed to the completion handler.  This is implemented by using
- * the bottom two bits of the ctx pointer to store the handler ID.
- * Passing in a pointer that's not 4-byte aligned will cause a BUG.
- * We can change this if it becomes a problem.
- *
- * May be called with local interrupts disabled and the q_lock held,
- * or with interrupts enabled and no locks held.
- */
-static int alloc_cmdid(struct nvme_queue *nvmeq, void *ctx,
-				nvme_completion_fn handler, unsigned timeout)
+static int nvme_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
+			  unsigned int hctx_idx)
 {
-	int depth = nvmeq->q_depth - 1;
-	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
-	int cmdid;
+	struct nvme_dev *dev = data;
+	struct nvme_queue *nvmeq = dev->queues[
+					(hctx_idx % dev->queue_count) + 1];
 
-	do {
-		cmdid = find_first_zero_bit(nvmeq->cmdid_data, depth);
-		if (cmdid >= depth)
-			return -EBUSY;
-	} while (test_and_set_bit(cmdid, nvmeq->cmdid_data));
+	if (!nvmeq->hctx)
+		nvmeq->hctx = hctx;
+
+	/* nvmeq queues are shared between namespaces. We assume here that
+	 * blk-mq map the tags so they match up with the nvme queue tags. */
+	WARN_ON(nvmeq->hctx->tags != hctx->tags);
 
-	info[cmdid].fn = handler;
-	info[cmdid].ctx = ctx;
-	info[cmdid].timeout = jiffies + timeout;
-	info[cmdid].aborted = 0;
-	return cmdid;
+	hctx->driver_data = nvmeq;
+	return 0;
 }
 
-static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
-				nvme_completion_fn handler, unsigned timeout)
+static int nvme_init_request(void *data, struct request *req,
+				unsigned int hctx_idx, unsigned int rq_idx,
+				unsigned int numa_node)
 {
-	int cmdid;
-	wait_event_killable(nvmeq->sq_full,
-		(cmdid = alloc_cmdid(nvmeq, ctx, handler, timeout)) >= 0);
-	return (cmdid < 0) ? -EINTR : cmdid;
+	struct nvme_dev *dev = data;
+	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = dev->queues[hctx_idx + 1];
+
+	BUG_ON(!nvmeq);
+	cmd->nvmeq = nvmeq;
+	return 0;
+}
+
+static void nvme_set_info(struct nvme_cmd_info *cmd, void *ctx,
+				nvme_completion_fn handler)
+{
+	cmd->fn = handler;
+	cmd->ctx = ctx;
+	cmd->aborted = 0;
 }
 
 /* Special values must be less than 0x1000 */
@@ -211,18 +215,12 @@ static int alloc_cmdid_killable(struct nvme_queue *nvmeq, void *ctx,
 #define CMD_CTX_CANCELLED	(0x30C + CMD_CTX_BASE)
 #define CMD_CTX_COMPLETED	(0x310 + CMD_CTX_BASE)
 #define CMD_CTX_INVALID		(0x314 + CMD_CTX_BASE)
-#define CMD_CTX_ABORT		(0x318 + CMD_CTX_BASE)
-#define CMD_CTX_ASYNC		(0x31C + CMD_CTX_BASE)
 
 static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	if (ctx == CMD_CTX_CANCELLED)
 		return;
-	if (ctx == CMD_CTX_ABORT) {
-		++nvmeq->dev->abort_limit;
-		return;
-	}
 	if (ctx == CMD_CTX_COMPLETED) {
 		dev_warn(nvmeq->q_dmadev,
 				"completed id %d twice on queue %d\n",
@@ -235,110 +233,89 @@ static void special_completion(struct nvme_queue *nvmeq, void *ctx,
 				cqe->command_id, le16_to_cpup(&cqe->sq_id));
 		return;
 	}
-	if (ctx == CMD_CTX_ASYNC) {
-		u32 result = le32_to_cpup(&cqe->result);
-		u16 status = le16_to_cpup(&cqe->status) >> 1;
-
-		if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
-			++nvmeq->dev->event_limit;
-		if (status == NVME_SC_SUCCESS)
-			dev_warn(nvmeq->q_dmadev,
-				"async event result %08x\n", result);
-		return;
-	}
-
 	dev_warn(nvmeq->q_dmadev, "Unknown special completion %p\n", ctx);
 }
 
-static void async_completion(struct nvme_queue *nvmeq, void *ctx,
-						struct nvme_completion *cqe)
-{
-	struct async_cmd_info *cmdinfo = ctx;
-	cmdinfo->result = le32_to_cpup(&cqe->result);
-	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
-	queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
-}
-
-/*
- * Called with local interrupts disabled and the q_lock held.  May not sleep.
- */
-static void *free_cmdid(struct nvme_queue *nvmeq, int cmdid,
-						nvme_completion_fn *fn)
+static void *cancel_cmd_info(struct nvme_cmd_info *cmd, nvme_completion_fn *fn)
 {
 	void *ctx;
-	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
 
-	if (cmdid >= nvmeq->q_depth || !info[cmdid].fn) {
-		if (fn)
-			*fn = special_completion;
-		return CMD_CTX_INVALID;
-	}
 	if (fn)
-		*fn = info[cmdid].fn;
-	ctx = info[cmdid].ctx;
-	info[cmdid].fn = special_completion;
-	info[cmdid].ctx = CMD_CTX_COMPLETED;
-	clear_bit(cmdid, nvmeq->cmdid_data);
-	wake_up(&nvmeq->sq_full);
+		*fn = cmd->fn;
+	ctx = cmd->ctx;
+	cmd->fn = special_completion;
+	cmd->ctx = CMD_CTX_CANCELLED;
 	return ctx;
 }
 
-static void *cancel_cmdid(struct nvme_queue *nvmeq, int cmdid,
-						nvme_completion_fn *fn)
+static void async_req_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
 {
-	void *ctx;
-	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
-	if (fn)
-		*fn = info[cmdid].fn;
-	ctx = info[cmdid].ctx;
-	info[cmdid].fn = special_completion;
-	info[cmdid].ctx = CMD_CTX_CANCELLED;
-	return ctx;
-}
+	struct request *req = ctx;
 
-static struct nvme_queue *raw_nvmeq(struct nvme_dev *dev, int qid)
-{
-	return rcu_dereference_raw(dev->queues[qid]);
+	u32 result = le32_to_cpup(&cqe->result);
+	u16 status = le16_to_cpup(&cqe->status) >> 1;
+
+	if (status == NVME_SC_SUCCESS || status == NVME_SC_ABORT_REQ)
+		++nvmeq->dev->event_limit;
+	if (status == NVME_SC_SUCCESS)
+		dev_warn(nvmeq->q_dmadev,
+			"async event result %08x\n", result);
+
+	blk_put_request(req);
 }
 
-static struct nvme_queue *get_nvmeq(struct nvme_dev *dev) __acquires(RCU)
+static void abort_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
 {
-	struct nvme_queue *nvmeq;
-	unsigned queue_id = get_cpu_var(*dev->io_queue);
+	struct request *req = ctx;
+
+	u16 status = le16_to_cpup(&cqe->status) >> 1;
+	u32 result = le32_to_cpup(&cqe->result);
 
-	rcu_read_lock();
-	nvmeq = rcu_dereference(dev->queues[queue_id]);
-	if (nvmeq)
-		return nvmeq;
+	blk_put_request(req);
 
-	rcu_read_unlock();
-	put_cpu_var(*dev->io_queue);
-	return NULL;
+	dev_warn(nvmeq->q_dmadev, "Abort status:%x result:%x", status, result);
+	++nvmeq->dev->abort_limit;
 }
 
-static void put_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
+static void async_completion(struct nvme_queue *nvmeq, void *ctx,
+						struct nvme_completion *cqe)
 {
-	rcu_read_unlock();
-	put_cpu_var(nvmeq->dev->io_queue);
+	struct async_cmd_info *cmdinfo = ctx;
+	cmdinfo->result = le32_to_cpup(&cqe->result);
+	cmdinfo->status = le16_to_cpup(&cqe->status) >> 1;
+	queue_kthread_work(cmdinfo->worker, &cmdinfo->work);
+	blk_put_request(cmdinfo->req);
 }
 
-static struct nvme_queue *lock_nvmeq(struct nvme_dev *dev, int q_idx)
-							__acquires(RCU)
+static inline struct nvme_cmd_info *get_cmd_from_tag(struct nvme_queue *nvmeq,
+				  unsigned int tag)
 {
-	struct nvme_queue *nvmeq;
-
-	rcu_read_lock();
-	nvmeq = rcu_dereference(dev->queues[q_idx]);
-	if (nvmeq)
-		return nvmeq;
+	struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
+	struct request *req = blk_mq_tag_to_rq(hctx->tags, tag);
 
-	rcu_read_unlock();
-	return NULL;
+	return blk_mq_rq_to_pdu(req);
 }
 
-static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
+/*
+ * Called with local interrupts disabled and the q_lock held.  May not sleep.
+ */
+static void *nvme_finish_cmd(struct nvme_queue *nvmeq, int tag,
+						nvme_completion_fn *fn)
 {
-	rcu_read_unlock();
+	struct nvme_cmd_info *cmd = get_cmd_from_tag(nvmeq, tag);
+	void *ctx;
+	if (tag >= nvmeq->q_depth) {
+		*fn = special_completion;
+		return CMD_CTX_INVALID;
+	}
+	if (fn)
+		*fn = cmd->fn;
+	ctx = cmd->ctx;
+	cmd->fn = special_completion;
+	cmd->ctx = CMD_CTX_COMPLETED;
+	return ctx;
 }
 
 /**
@@ -348,26 +325,29 @@ static void unlock_nvmeq(struct nvme_queue *nvmeq) __releases(RCU)
  *
  * Safe to use from interrupt context
  */
-static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+static int __nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
 {
-	unsigned long flags;
-	u16 tail;
-	spin_lock_irqsave(&nvmeq->q_lock, flags);
-	if (nvmeq->q_suspended) {
-		spin_unlock_irqrestore(&nvmeq->q_lock, flags);
-		return -EBUSY;
-	}
-	tail = nvmeq->sq_tail;
+	u16 tail = nvmeq->sq_tail;
+
 	memcpy(&nvmeq->sq_cmds[tail], cmd, sizeof(*cmd));
 	if (++tail == nvmeq->q_depth)
 		tail = 0;
 	writel(tail, nvmeq->q_db);
 	nvmeq->sq_tail = tail;
-	spin_unlock_irqrestore(&nvmeq->q_lock, flags);
 
 	return 0;
 }
 
+static int nvme_submit_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd)
+{
+	unsigned long flags;
+	int ret;
+	spin_lock_irqsave(&nvmeq->q_lock, flags);
+	ret = __nvme_submit_cmd(nvmeq, cmd);
+	spin_unlock_irqrestore(&nvmeq->q_lock, flags);
+	return ret;
+}
+
 static __le64 **iod_list(struct nvme_iod *iod)
 {
 	return ((void *)iod) + iod->offset;
@@ -397,7 +377,6 @@ nvme_alloc_iod(unsigned nseg, unsigned nbytes, struct nvme_dev *dev, gfp_t gfp)
 		iod->length = nbytes;
 		iod->nents = 0;
 		iod->first_dma = 0ULL;
-		iod->start_time = jiffies;
 	}
 
 	return iod;
@@ -421,35 +400,6 @@ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod)
 	kfree(iod);
 }
 
-static void nvme_start_io_acct(struct bio *bio)
-{
-	struct gendisk *disk = bio->bi_bdev->bd_disk;
-	if (blk_queue_io_stat(disk->queue)) {
-		const int rw = bio_data_dir(bio);
-		int cpu = part_stat_lock();
-		part_round_stats(cpu, &disk->part0);
-		part_stat_inc(cpu, &disk->part0, ios[rw]);
-		part_stat_add(cpu, &disk->part0, sectors[rw],
-							bio_sectors(bio));
-		part_inc_in_flight(&disk->part0, rw);
-		part_stat_unlock();
-	}
-}
-
-static void nvme_end_io_acct(struct bio *bio, unsigned long start_time)
-{
-	struct gendisk *disk = bio->bi_bdev->bd_disk;
-	if (blk_queue_io_stat(disk->queue)) {
-		const int rw = bio_data_dir(bio);
-		unsigned long duration = jiffies - start_time;
-		int cpu = part_stat_lock();
-		part_stat_add(cpu, &disk->part0, ticks[rw], duration);
-		part_round_stats(cpu, &disk->part0);
-		part_dec_in_flight(&disk->part0, rw);
-		part_stat_unlock();
-	}
-}
-
 static int nvme_error_status(u16 status)
 {
 	switch (status & 0x7ff) {
@@ -462,36 +412,37 @@ static int nvme_error_status(u16 status)
 	}
 }
 
-static void bio_completion(struct nvme_queue *nvmeq, void *ctx,
+static void req_completion(struct nvme_queue *nvmeq, void *ctx,
 						struct nvme_completion *cqe)
 {
 	struct nvme_iod *iod = ctx;
-	struct bio *bio = iod->private;
+	struct request *req = iod->private;
+	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
+
 	u16 status = le16_to_cpup(&cqe->status) >> 1;
-	int error = 0;
 
 	if (unlikely(status)) {
-		if (!(status & NVME_SC_DNR ||
-				bio->bi_rw & REQ_FAILFAST_MASK) &&
-				(jiffies - iod->start_time) < IOD_TIMEOUT) {
-			if (!waitqueue_active(&nvmeq->sq_full))
-				add_wait_queue(&nvmeq->sq_full,
-							&nvmeq->sq_cong_wait);
-			list_add_tail(&iod->node, &nvmeq->iod_bio);
-			wake_up(&nvmeq->sq_full);
+		if (!(status & NVME_SC_DNR || blk_noretry_request(req))
+		    && (jiffies - req->start_time) < req->timeout) {
+			blk_mq_requeue_request(req);
+			blk_mq_kick_requeue_list(req->q);
 			return;
 		}
-		error = nvme_error_status(status);
-	}
-	if (iod->nents) {
-		dma_unmap_sg(nvmeq->q_dmadev, iod->sg, iod->nents,
-			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
-		nvme_end_io_acct(bio, iod->start_time);
-	}
+		req->errors = nvme_error_status(status);
+	} else
+		req->errors = 0;
+
+	if (cmd_rq->aborted)
+		dev_warn(&nvmeq->dev->pci_dev->dev,
+			"completing aborted command with status:%04x\n",
+			status);
+
+	if (iod->nents)
+		dma_unmap_sg(&nvmeq->dev->pci_dev->dev, iod->sg, iod->nents,
+			rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
 	nvme_free_iod(nvmeq->dev, iod);
 
-	trace_block_bio_complete(bdev_get_queue(bio->bi_bdev), bio, error);
-	bio_endio(bio, error);
+	blk_mq_complete_request(req);
 }
 
 /* length is in bytes.  gfp flags indicates whether we may sleep. */
@@ -574,88 +525,25 @@ int nvme_setup_prps(struct nvme_dev *dev, struct nvme_iod *iod, int total_len,
 	return total_len;
 }
 
-static int nvme_split_and_submit(struct bio *bio, struct nvme_queue *nvmeq,
-				 int len)
-{
-	struct bio *split = bio_split(bio, len >> 9, GFP_ATOMIC, NULL);
-	if (!split)
-		return -ENOMEM;
-
-	trace_block_split(bdev_get_queue(bio->bi_bdev), bio,
-					split->bi_iter.bi_sector);
-	bio_chain(split, bio);
-
-	if (!waitqueue_active(&nvmeq->sq_full))
-		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
-	bio_list_add(&nvmeq->sq_cong, split);
-	bio_list_add(&nvmeq->sq_cong, bio);
-	wake_up(&nvmeq->sq_full);
-
-	return 0;
-}
-
-/* NVMe scatterlists require no holes in the virtual address */
-#define BIOVEC_NOT_VIRT_MERGEABLE(vec1, vec2)	((vec2)->bv_offset || \
-			(((vec1)->bv_offset + (vec1)->bv_len) % PAGE_SIZE))
-
-static int nvme_map_bio(struct nvme_queue *nvmeq, struct nvme_iod *iod,
-		struct bio *bio, enum dma_data_direction dma_dir, int psegs)
-{
-	struct bio_vec bvec, bvprv;
-	struct bvec_iter iter;
-	struct scatterlist *sg = NULL;
-	int length = 0, nsegs = 0, split_len = bio->bi_iter.bi_size;
-	int first = 1;
-
-	if (nvmeq->dev->stripe_size)
-		split_len = nvmeq->dev->stripe_size -
-			((bio->bi_iter.bi_sector << 9) &
-			 (nvmeq->dev->stripe_size - 1));
-
-	sg_init_table(iod->sg, psegs);
-	bio_for_each_segment(bvec, bio, iter) {
-		if (!first && BIOVEC_PHYS_MERGEABLE(&bvprv, &bvec)) {
-			sg->length += bvec.bv_len;
-		} else {
-			if (!first && BIOVEC_NOT_VIRT_MERGEABLE(&bvprv, &bvec))
-				return nvme_split_and_submit(bio, nvmeq,
-							     length);
-
-			sg = sg ? sg + 1 : iod->sg;
-			sg_set_page(sg, bvec.bv_page,
-				    bvec.bv_len, bvec.bv_offset);
-			nsegs++;
-		}
-
-		if (split_len - length < bvec.bv_len)
-			return nvme_split_and_submit(bio, nvmeq, split_len);
-		length += bvec.bv_len;
-		bvprv = bvec;
-		first = 0;
-	}
-	iod->nents = nsegs;
-	sg_mark_end(sg);
-	if (dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir) == 0)
-		return -ENOMEM;
-
-	BUG_ON(length != bio->bi_iter.bi_size);
-	return length;
-}
-
-static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
-		struct bio *bio, struct nvme_iod *iod, int cmdid)
+/*
+ * We reuse the small pool to allocate the 16-byte range here as it is not
+ * worth having a special pool for these or additional cases to handle freeing
+ * the iod.
+ */
+static void nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+		struct request *req, struct nvme_iod *iod)
 {
 	struct nvme_dsm_range *range =
 				(struct nvme_dsm_range *)iod_list(iod)[0];
 	struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
 
 	range->cattr = cpu_to_le32(0);
-	range->nlb = cpu_to_le32(bio->bi_iter.bi_size >> ns->lba_shift);
-	range->slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
+	range->nlb = cpu_to_le32(blk_rq_bytes(req) >> ns->lba_shift);
+	range->slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
 
 	memset(cmnd, 0, sizeof(*cmnd));
 	cmnd->dsm.opcode = nvme_cmd_dsm;
-	cmnd->dsm.command_id = cmdid;
+	cmnd->dsm.command_id = req->tag;
 	cmnd->dsm.nsid = cpu_to_le32(ns->ns_id);
 	cmnd->dsm.prp1 = cpu_to_le64(iod->first_dma);
 	cmnd->dsm.nr = 0;
@@ -664,11 +552,9 @@ static int nvme_submit_discard(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	if (++nvmeq->sq_tail == nvmeq->q_depth)
 		nvmeq->sq_tail = 0;
 	writel(nvmeq->sq_tail, nvmeq->q_db);
-
-	return 0;
 }
 
-static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
+static void nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 								int cmdid)
 {
 	struct nvme_command *cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
@@ -681,49 +567,34 @@ static int nvme_submit_flush(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 	if (++nvmeq->sq_tail == nvmeq->q_depth)
 		nvmeq->sq_tail = 0;
 	writel(nvmeq->sq_tail, nvmeq->q_db);
-
-	return 0;
 }
 
-static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
+static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod,
+							struct nvme_ns *ns)
 {
-	struct bio *bio = iod->private;
-	struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
+	struct request *req = iod->private;
 	struct nvme_command *cmnd;
-	int cmdid;
-	u16 control;
-	u32 dsmgmt;
+	u16 control = 0;
+	u32 dsmgmt = 0;
 
-	cmdid = alloc_cmdid(nvmeq, iod, bio_completion, NVME_IO_TIMEOUT);
-	if (unlikely(cmdid < 0))
-		return cmdid;
-
-	if (bio->bi_rw & REQ_DISCARD)
-		return nvme_submit_discard(nvmeq, ns, bio, iod, cmdid);
-	if (bio->bi_rw & REQ_FLUSH)
-		return nvme_submit_flush(nvmeq, ns, cmdid);
-
-	control = 0;
-	if (bio->bi_rw & REQ_FUA)
+	if (req->cmd_flags & REQ_FUA)
 		control |= NVME_RW_FUA;
-	if (bio->bi_rw & (REQ_FAILFAST_DEV | REQ_RAHEAD))
+	if (req->cmd_flags & (REQ_FAILFAST_DEV | REQ_RAHEAD))
 		control |= NVME_RW_LR;
 
-	dsmgmt = 0;
-	if (bio->bi_rw & REQ_RAHEAD)
+	if (req->cmd_flags & REQ_RAHEAD)
 		dsmgmt |= NVME_RW_DSM_FREQ_PREFETCH;
 
 	cmnd = &nvmeq->sq_cmds[nvmeq->sq_tail];
 	memset(cmnd, 0, sizeof(*cmnd));
 
-	cmnd->rw.opcode = bio_data_dir(bio) ? nvme_cmd_write : nvme_cmd_read;
-	cmnd->rw.command_id = cmdid;
+	cmnd->rw.opcode = (rq_data_dir(req) ? nvme_cmd_write : nvme_cmd_read);
+	cmnd->rw.command_id = req->tag;
 	cmnd->rw.nsid = cpu_to_le32(ns->ns_id);
 	cmnd->rw.prp1 = cpu_to_le64(sg_dma_address(iod->sg));
 	cmnd->rw.prp2 = cpu_to_le64(iod->first_dma);
-	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, bio->bi_iter.bi_sector));
-	cmnd->rw.length =
-		cpu_to_le16((bio->bi_iter.bi_size >> ns->lba_shift) - 1);
+	cmnd->rw.slba = cpu_to_le64(nvme_block_nr(ns, blk_rq_pos(req)));
+	cmnd->rw.length = cpu_to_le16((blk_rq_bytes(req) >> ns->lba_shift) - 1);
 	cmnd->rw.control = cpu_to_le16(control);
 	cmnd->rw.dsmgmt = cpu_to_le32(dsmgmt);
 
@@ -734,47 +605,37 @@ static int nvme_submit_iod(struct nvme_queue *nvmeq, struct nvme_iod *iod)
 	return 0;
 }
 
-static int nvme_split_flush_data(struct nvme_queue *nvmeq, struct bio *bio)
-{
-	struct bio *split = bio_clone(bio, GFP_ATOMIC);
-	if (!split)
-		return -ENOMEM;
-
-	split->bi_iter.bi_size = 0;
-	split->bi_phys_segments = 0;
-	bio->bi_rw &= ~REQ_FLUSH;
-	bio_chain(split, bio);
-
-	if (!waitqueue_active(&nvmeq->sq_full))
-		add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
-	bio_list_add(&nvmeq->sq_cong, split);
-	bio_list_add(&nvmeq->sq_cong, bio);
-	wake_up_process(nvme_thread);
-
-	return 0;
-}
-
-/*
- * Called with local interrupts disabled and the q_lock held.  May not sleep.
- */
-static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
-								struct bio *bio)
+static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
+			 const struct blk_mq_queue_data *bd)
 {
+	struct nvme_ns *ns = hctx->queue->queuedata;
+	struct nvme_queue *nvmeq = hctx->driver_data;
+	struct request *req = bd->rq;
+	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
 	struct nvme_iod *iod;
-	int psegs = bio_phys_segments(ns->queue, bio);
-	int result;
-	unsigned size = !(bio->bi_rw & REQ_DISCARD) ? bio->bi_iter.bi_size :
+	int psegs = req->nr_phys_segments;
+	int result = BLK_MQ_RQ_QUEUE_BUSY;
+	enum dma_data_direction dma_dir;
+	unsigned size = !(req->cmd_flags & REQ_DISCARD) ? blk_rq_bytes(req) :
 						sizeof(struct nvme_dsm_range);
 
-	if ((bio->bi_rw & REQ_FLUSH) && psegs)
-		return nvme_split_flush_data(nvmeq, bio);
+	/*
+	 * Requeued IO has already been prepped
+	 */
+	iod = req->special;
+	if (iod)
+		goto submit_iod;
 
 	iod = nvme_alloc_iod(psegs, size, ns->dev, GFP_ATOMIC);
 	if (!iod)
-		return -ENOMEM;
+		return result;
+
+	iod->private = req;
+	req->special = iod;
 
-	iod->private = bio;
-	if (bio->bi_rw & REQ_DISCARD) {
+	nvme_set_info(cmd, iod, req_completion);
+
+	if (req->cmd_flags & REQ_DISCARD) {
 		void *range;
 		/*
 		 * We reuse the small pool to allocate the 16-byte range here
@@ -784,33 +645,45 @@ static int nvme_submit_bio_queue(struct nvme_queue *nvmeq, struct nvme_ns *ns,
 		range = dma_pool_alloc(nvmeq->dev->prp_small_pool,
 						GFP_ATOMIC,
 						&iod->first_dma);
-		if (!range) {
-			result = -ENOMEM;
-			goto free_iod;
-		}
+		if (!range)
+			goto finish_cmd;
 		iod_list(iod)[0] = (__le64 *)range;
 		iod->npages = 0;
 	} else if (psegs) {
-		result = nvme_map_bio(nvmeq, iod, bio,
-			bio_data_dir(bio) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
-			psegs);
-		if (result <= 0)
-			goto free_iod;
-		if (nvme_setup_prps(nvmeq->dev, iod, result, GFP_ATOMIC) !=
-								result) {
-			result = -ENOMEM;
-			goto free_iod;
+		dma_dir = rq_data_dir(req) ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
+
+		sg_init_table(iod->sg, psegs);
+		iod->nents = blk_rq_map_sg(req->q, req, iod->sg);
+		if (!iod->nents) {
+			result = BLK_MQ_RQ_QUEUE_ERROR;
+			goto finish_cmd;
 		}
-		nvme_start_io_acct(bio);
-	}
-	if (unlikely(nvme_submit_iod(nvmeq, iod))) {
-		if (!waitqueue_active(&nvmeq->sq_full))
-			add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
-		list_add_tail(&iod->node, &nvmeq->iod_bio);
+
+		if (!dma_map_sg(nvmeq->q_dmadev, iod->sg, iod->nents, dma_dir))
+			goto finish_cmd;
+
+		if (blk_rq_bytes(req) != nvme_setup_prps(nvmeq->dev, iod,
+						blk_rq_bytes(req), GFP_ATOMIC))
+			goto finish_cmd;
 	}
-	return 0;
 
- free_iod:
+	blk_mq_start_request(req);
+
+ submit_iod:
+	spin_lock_irq(&nvmeq->q_lock);
+	if (req->cmd_flags & REQ_DISCARD)
+		nvme_submit_discard(nvmeq, ns, req, iod);
+	else if (req->cmd_flags & REQ_FLUSH)
+		nvme_submit_flush(nvmeq, ns, req->tag);
+	else
+		nvme_submit_iod(nvmeq, iod, ns);
+
+	nvme_process_cq(nvmeq);
+	spin_unlock_irq(&nvmeq->q_lock);
+	return BLK_MQ_RQ_QUEUE_OK;
+
+ finish_cmd:
+	nvme_finish_cmd(nvmeq, req->tag, NULL);
 	nvme_free_iod(nvmeq->dev, iod);
 	return result;
 }
@@ -833,8 +706,7 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
 			head = 0;
 			phase = !phase;
 		}
-
-		ctx = free_cmdid(nvmeq, cqe.command_id, &fn);
+		ctx = nvme_finish_cmd(nvmeq, cqe.command_id, &fn);
 		fn(nvmeq, ctx, &cqe);
 	}
 
@@ -855,29 +727,13 @@ static int nvme_process_cq(struct nvme_queue *nvmeq)
 	return 1;
 }
 
-static void nvme_make_request(struct request_queue *q, struct bio *bio)
+/* Admin queue isn't initialized as a request queue. If at some point this
+ * happens anyway, make sure to notify the user */
+static int nvme_admin_queue_rq(struct blk_mq_hw_ctx *hctx,
+			       const struct blk_mq_queue_data *bd)
 {
-	struct nvme_ns *ns = q->queuedata;
-	struct nvme_queue *nvmeq = get_nvmeq(ns->dev);
-	int result = -EBUSY;
-
-	if (!nvmeq) {
-		bio_endio(bio, -EIO);
-		return;
-	}
-
-	spin_lock_irq(&nvmeq->q_lock);
-	if (!nvmeq->q_suspended && bio_list_empty(&nvmeq->sq_cong))
-		result = nvme_submit_bio_queue(nvmeq, ns, bio);
-	if (unlikely(result)) {
-		if (!waitqueue_active(&nvmeq->sq_full))
-			add_wait_queue(&nvmeq->sq_full, &nvmeq->sq_cong_wait);
-		bio_list_add(&nvmeq->sq_cong, bio);
-	}
-
-	nvme_process_cq(nvmeq);
-	spin_unlock_irq(&nvmeq->q_lock);
-	put_nvmeq(nvmeq);
+	WARN_ON_ONCE(1);
+	return BLK_MQ_RQ_QUEUE_ERROR;
 }
 
 static irqreturn_t nvme_irq(int irq, void *data)
@@ -901,10 +757,11 @@ static irqreturn_t nvme_irq_check(int irq, void *data)
 	return IRQ_WAKE_THREAD;
 }
 
-static void nvme_abort_command(struct nvme_queue *nvmeq, int cmdid)
+static void nvme_abort_cmd_info(struct nvme_queue *nvmeq, struct nvme_cmd_info *
+								cmd_info)
 {
 	spin_lock_irq(&nvmeq->q_lock);
-	cancel_cmdid(nvmeq, cmdid, NULL);
+	cancel_cmd_info(cmd_info, NULL);
 	spin_unlock_irq(&nvmeq->q_lock);
 }
 
@@ -927,45 +784,31 @@ static void sync_completion(struct nvme_queue *nvmeq, void *ctx,
  * Returns 0 on success.  If the result is negative, it's a Linux error code;
  * if the result is positive, it's an NVM Express status code
  */
-static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx,
-						struct nvme_command *cmd,
+static int nvme_submit_sync_cmd(struct request *req, struct nvme_command *cmd,
 						u32 *result, unsigned timeout)
 {
-	int cmdid, ret;
+	int ret;
 	struct sync_cmd_info cmdinfo;
-	struct nvme_queue *nvmeq;
-
-	nvmeq = lock_nvmeq(dev, q_idx);
-	if (!nvmeq)
-		return -ENODEV;
+	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = cmd_rq->nvmeq;
 
 	cmdinfo.task = current;
 	cmdinfo.status = -EINTR;
 
-	cmdid = alloc_cmdid(nvmeq, &cmdinfo, sync_completion, timeout);
-	if (cmdid < 0) {
-		unlock_nvmeq(nvmeq);
-		return cmdid;
-	}
-	cmd->common.command_id = cmdid;
+	cmd->common.command_id = req->tag;
+
+	nvme_set_info(cmd_rq, &cmdinfo, sync_completion);
 
 	set_current_state(TASK_KILLABLE);
 	ret = nvme_submit_cmd(nvmeq, cmd);
 	if (ret) {
-		free_cmdid(nvmeq, cmdid, NULL);
-		unlock_nvmeq(nvmeq);
+		nvme_finish_cmd(nvmeq, req->tag, NULL);
 		set_current_state(TASK_RUNNING);
-		return ret;
 	}
-	unlock_nvmeq(nvmeq);
 	schedule_timeout(timeout);
 
 	if (cmdinfo.status == -EINTR) {
-		nvmeq = lock_nvmeq(dev, q_idx);
-		if (nvmeq) {
-			nvme_abort_command(nvmeq, cmdid);
-			unlock_nvmeq(nvmeq);
-		}
+		nvme_abort_cmd_info(nvmeq, blk_mq_rq_to_pdu(req));
 		return -EINTR;
 	}
 
@@ -975,59 +818,99 @@ static int nvme_submit_sync_cmd(struct nvme_dev *dev, int q_idx,
 	return cmdinfo.status;
 }
 
-static int nvme_submit_async_cmd(struct nvme_queue *nvmeq,
+static int nvme_submit_async_admin_req(struct nvme_dev *dev)
+{
+	struct nvme_queue *nvmeq = dev->queues[0];
+	struct nvme_command c;
+	struct nvme_cmd_info *cmd_info;
+	struct request *req;
+
+	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
+	if (!req)
+		return -ENOMEM;
+
+	cmd_info = blk_mq_rq_to_pdu(req);
+	nvme_set_info(cmd_info, req, async_req_completion);
+
+	memset(&c, 0, sizeof(c));
+	c.common.opcode = nvme_admin_async_event;
+	c.common.command_id = req->tag;
+
+	return __nvme_submit_cmd(nvmeq, &c);
+}
+
+static int nvme_submit_admin_async_cmd(struct nvme_dev *dev,
 			struct nvme_command *cmd,
 			struct async_cmd_info *cmdinfo, unsigned timeout)
 {
-	int cmdid;
+	struct nvme_queue *nvmeq = dev->queues[0];
+	struct request *req;
+	struct nvme_cmd_info *cmd_rq;
 
-	cmdid = alloc_cmdid_killable(nvmeq, cmdinfo, async_completion, timeout);
-	if (cmdid < 0)
-		return cmdid;
+	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
+	if (!req)
+		return -ENOMEM;
+
+	req->timeout = timeout;
+	cmd_rq = blk_mq_rq_to_pdu(req);
+	cmdinfo->req = req;
+	nvme_set_info(cmd_rq, cmdinfo, async_completion);
 	cmdinfo->status = -EINTR;
-	cmd->common.command_id = cmdid;
+
+	cmd->common.command_id = req->tag;
+
 	return nvme_submit_cmd(nvmeq, cmd);
 }
 
-int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
-								u32 *result)
+int __nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
+						u32 *result, unsigned timeout)
 {
-	return nvme_submit_sync_cmd(dev, 0, cmd, result, ADMIN_TIMEOUT);
+	int res;
+	struct request *req;
+
+	req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_KERNEL, false);
+	if (!req)
+		return -ENOMEM;
+	res = nvme_submit_sync_cmd(req, cmd, result, timeout);
+	blk_put_request(req);
+	return res;
 }
 
-int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
+int nvme_submit_admin_cmd(struct nvme_dev *dev, struct nvme_command *cmd,
 								u32 *result)
 {
-	return nvme_submit_sync_cmd(dev, this_cpu_read(*dev->io_queue), cmd,
-						result,	NVME_IO_TIMEOUT);
+	return __nvme_submit_admin_cmd(dev, cmd, result, ADMIN_TIMEOUT);
 }
 
-static int nvme_submit_admin_cmd_async(struct nvme_dev *dev,
-		struct nvme_command *cmd, struct async_cmd_info *cmdinfo)
+int nvme_submit_io_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
+					struct nvme_command *cmd, u32 *result)
 {
-	return nvme_submit_async_cmd(raw_nvmeq(dev, 0), cmd, cmdinfo,
-								ADMIN_TIMEOUT);
+	int res;
+	struct request *req;
+
+	req = blk_mq_alloc_request(ns->queue, WRITE, (GFP_KERNEL|__GFP_WAIT),
+									false);
+	if (!req)
+		return -ENOMEM;
+	res = nvme_submit_sync_cmd(req, cmd, result, NVME_IO_TIMEOUT);
+	blk_put_request(req);
+	return res;
 }
 
 static int adapter_delete_queue(struct nvme_dev *dev, u8 opcode, u16 id)
 {
-	int status;
 	struct nvme_command c;
 
 	memset(&c, 0, sizeof(c));
 	c.delete_queue.opcode = opcode;
 	c.delete_queue.qid = cpu_to_le16(id);
 
-	status = nvme_submit_admin_cmd(dev, &c, NULL);
-	if (status)
-		return -EIO;
-	return 0;
+	return nvme_submit_admin_cmd(dev, &c, NULL);
 }
 
 static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 						struct nvme_queue *nvmeq)
 {
-	int status;
 	struct nvme_command c;
 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_CQ_IRQ_ENABLED;
 
@@ -1039,16 +922,12 @@ static int adapter_alloc_cq(struct nvme_dev *dev, u16 qid,
 	c.create_cq.cq_flags = cpu_to_le16(flags);
 	c.create_cq.irq_vector = cpu_to_le16(nvmeq->cq_vector);
 
-	status = nvme_submit_admin_cmd(dev, &c, NULL);
-	if (status)
-		return -EIO;
-	return 0;
+	return nvme_submit_admin_cmd(dev, &c, NULL);
 }
 
 static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 						struct nvme_queue *nvmeq)
 {
-	int status;
 	struct nvme_command c;
 	int flags = NVME_QUEUE_PHYS_CONTIG | NVME_SQ_PRIO_MEDIUM;
 
@@ -1060,10 +939,7 @@ static int adapter_alloc_sq(struct nvme_dev *dev, u16 qid,
 	c.create_sq.sq_flags = cpu_to_le16(flags);
 	c.create_sq.cqid = cpu_to_le16(qid);
 
-	status = nvme_submit_admin_cmd(dev, &c, NULL);
-	if (status)
-		return -EIO;
-	return 0;
+	return nvme_submit_admin_cmd(dev, &c, NULL);
 }
 
 static int adapter_delete_cq(struct nvme_dev *dev, u16 cqid)
@@ -1119,28 +995,27 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11,
 }
 
 /**
- * nvme_abort_cmd - Attempt aborting a command
- * @cmdid: Command id of a timed out IO
- * @queue: The queue with timed out IO
+ * nvme_abort_req - Attempt aborting a request
  *
  * Schedule controller reset if the command was already aborted once before and
  * still hasn't been returned to the driver, or if this is the admin queue.
  */
-static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq)
+static void nvme_abort_req(struct request *req)
 {
-	int a_cmdid;
-	struct nvme_command cmd;
+	struct nvme_cmd_info *cmd_rq = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = cmd_rq->nvmeq;
 	struct nvme_dev *dev = nvmeq->dev;
-	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
-	struct nvme_queue *adminq;
+	struct request *abort_req;
+	struct nvme_cmd_info *abort_cmd;
+	struct nvme_command cmd;
 
-	if (!nvmeq->qid || info[cmdid].aborted) {
+	if (!nvmeq->qid || cmd_rq->aborted) {
 		if (work_busy(&dev->reset_work))
 			return;
 		list_del_init(&dev->node);
 		dev_warn(&dev->pci_dev->dev,
-			"I/O %d QID %d timeout, reset controller\n", cmdid,
-								nvmeq->qid);
+			"I/O %d QID %d timeout, reset controller\n",
+							req->tag, nvmeq->qid);
 		dev->reset_workfn = nvme_reset_failed_dev;
 		queue_work(nvme_workq, &dev->reset_work);
 		return;
@@ -1149,89 +1024,79 @@ static void nvme_abort_cmd(int cmdid, struct nvme_queue *nvmeq)
 	if (!dev->abort_limit)
 		return;
 
-	adminq = rcu_dereference(dev->queues[0]);
-	a_cmdid = alloc_cmdid(adminq, CMD_CTX_ABORT, special_completion,
-								ADMIN_TIMEOUT);
-	if (a_cmdid < 0)
+	abort_req = blk_mq_alloc_request(dev->admin_q, WRITE, GFP_ATOMIC,
+									false);
+	if (!abort_req)
 		return;
 
+	abort_cmd = blk_mq_rq_to_pdu(abort_req);
+	nvme_set_info(abort_cmd, abort_req, abort_completion);
+
 	memset(&cmd, 0, sizeof(cmd));
 	cmd.abort.opcode = nvme_admin_abort_cmd;
-	cmd.abort.cid = cmdid;
+	cmd.abort.cid = req->tag;
 	cmd.abort.sqid = cpu_to_le16(nvmeq->qid);
-	cmd.abort.command_id = a_cmdid;
+	cmd.abort.command_id = abort_req->tag;
 
 	--dev->abort_limit;
-	info[cmdid].aborted = 1;
-	info[cmdid].timeout = jiffies + ADMIN_TIMEOUT;
+	cmd_rq->aborted = 1;
 
-	dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", cmdid,
+	dev_warn(nvmeq->q_dmadev, "Aborting I/O %d QID %d\n", req->tag,
 							nvmeq->qid);
-	nvme_submit_cmd(adminq, &cmd);
+	if (nvme_submit_cmd(dev->queues[0], &cmd) < 0) {
+		dev_warn(nvmeq->q_dmadev,
+				"Could not abort I/O %d QID %d",
+				req->tag, nvmeq->qid);
+		blk_put_request(req);
+	}
 }
 
-/**
- * nvme_cancel_ios - Cancel outstanding I/Os
- * @queue: The queue to cancel I/Os on
- * @timeout: True to only cancel I/Os which have timed out
- */
-static void nvme_cancel_ios(struct nvme_queue *nvmeq, bool timeout)
+static void nvme_cancel_queue_ios(struct blk_mq_hw_ctx *hctx,
+				struct request *req, void *data, bool reserved)
 {
-	int depth = nvmeq->q_depth - 1;
-	struct nvme_cmd_info *info = nvme_cmd_info(nvmeq);
-	unsigned long now = jiffies;
-	int cmdid;
+	struct nvme_queue *nvmeq = data;
+	void *ctx;
+	nvme_completion_fn fn;
+	struct nvme_cmd_info *cmd;
+	static struct nvme_completion cqe = {
+		.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
+	};
 
-	for_each_set_bit(cmdid, nvmeq->cmdid_data, depth) {
-		void *ctx;
-		nvme_completion_fn fn;
-		static struct nvme_completion cqe = {
-			.status = cpu_to_le16(NVME_SC_ABORT_REQ << 1),
-		};
+	cmd = blk_mq_rq_to_pdu(req);
 
-		if (timeout && !time_after(now, info[cmdid].timeout))
-			continue;
-		if (info[cmdid].ctx == CMD_CTX_CANCELLED)
-			continue;
-		if (timeout && info[cmdid].ctx == CMD_CTX_ASYNC)
-			continue;
-		if (timeout && nvmeq->dev->initialized) {
-			nvme_abort_cmd(cmdid, nvmeq);
-			continue;
-		}
-		dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n", cmdid,
-								nvmeq->qid);
-		ctx = cancel_cmdid(nvmeq, cmdid, &fn);
-		fn(nvmeq, ctx, &cqe);
-	}
+	if (cmd->ctx == CMD_CTX_CANCELLED)
+		return;
+
+	dev_warn(nvmeq->q_dmadev, "Cancelling I/O %d QID %d\n",
+						req->tag, nvmeq->qid);
+	ctx = cancel_cmd_info(cmd, &fn);
+	fn(nvmeq, ctx, &cqe);
 }
 
-static void nvme_free_queue(struct nvme_queue *nvmeq)
+static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
 {
-	spin_lock_irq(&nvmeq->q_lock);
-	while (bio_list_peek(&nvmeq->sq_cong)) {
-		struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
-		bio_endio(bio, -EIO);
-	}
-	while (!list_empty(&nvmeq->iod_bio)) {
-		static struct nvme_completion cqe = {
-			.status = cpu_to_le16(
-				(NVME_SC_ABORT_REQ | NVME_SC_DNR) << 1),
-		};
-		struct nvme_iod *iod = list_first_entry(&nvmeq->iod_bio,
-							struct nvme_iod,
-							node);
-		list_del(&iod->node);
-		bio_completion(nvmeq, iod, &cqe);
-	}
-	spin_unlock_irq(&nvmeq->q_lock);
+	struct nvme_cmd_info *cmd = blk_mq_rq_to_pdu(req);
+	struct nvme_queue *nvmeq = cmd->nvmeq;
+
+	dev_warn(nvmeq->q_dmadev, "Timeout I/O %d QID %d\n", req->tag,
+							nvmeq->qid);
+	if (nvmeq->dev->initialized)
+		nvme_abort_req(req);
+
+	/*
+	 * The aborted req will be completed on receiving the abort req.
+	 * We enable the timer again. If hit twice, it'll cause a device reset,
+	 * as the device then is in a faulty state.
+	 */
+	return BLK_EH_RESET_TIMER;
+}
 
+static void nvme_free_queue(struct nvme_queue *nvmeq)
+{
 	dma_free_coherent(nvmeq->q_dmadev, CQ_SIZE(nvmeq->q_depth),
 				(void *)nvmeq->cqes, nvmeq->cq_dma_addr);
 	dma_free_coherent(nvmeq->q_dmadev, SQ_SIZE(nvmeq->q_depth),
 					nvmeq->sq_cmds, nvmeq->sq_dma_addr);
-	if (nvmeq->qid)
-		free_cpumask_var(nvmeq->cpu_mask);
 	kfree(nvmeq);
 }
 
@@ -1243,10 +1108,10 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 	int i;
 
 	for (i = dev->queue_count - 1; i >= lowest; i--) {
-		nvmeq = raw_nvmeq(dev, i);
-		RCU_INIT_POINTER(dev->queues[i], NULL);
+		struct nvme_queue *nvmeq = dev->queues[i];
 		llist_add(&nvmeq->node, &q_list);
 		dev->queue_count--;
+		dev->queues[i] = NULL;
 	}
 	synchronize_rcu();
 	entry = llist_del_all(&q_list);
@@ -1257,19 +1122,12 @@ static void nvme_free_queues(struct nvme_dev *dev, int lowest)
 /**
  * nvme_suspend_queue - put queue into suspended state
  * @nvmeq - queue to suspend
- *
- * Returns 1 if already suspended, 0 otherwise.
  */
 static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 {
 	int vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
 
 	spin_lock_irq(&nvmeq->q_lock);
-	if (nvmeq->q_suspended) {
-		spin_unlock_irq(&nvmeq->q_lock);
-		return 1;
-	}
-	nvmeq->q_suspended = 1;
 	nvmeq->dev->online_queues--;
 	spin_unlock_irq(&nvmeq->q_lock);
 
@@ -1281,15 +1139,18 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
 
 static void nvme_clear_queue(struct nvme_queue *nvmeq)
 {
+	struct blk_mq_hw_ctx *hctx = nvmeq->hctx;
+
 	spin_lock_irq(&nvmeq->q_lock);
 	nvme_process_cq(nvmeq);
-	nvme_cancel_ios(nvmeq, false);
+	if (hctx && hctx->tags)
+		blk_mq_tag_busy_iter(hctx, nvme_cancel_queue_ios, nvmeq);
 	spin_unlock_irq(&nvmeq->q_lock);
 }
 
 static void nvme_disable_queue(struct nvme_dev *dev, int qid)
 {
-	struct nvme_queue *nvmeq = raw_nvmeq(dev, qid);
+	struct nvme_queue *nvmeq = dev->queues[qid];
 
 	if (!nvmeq)
 		return;
@@ -1309,8 +1170,7 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 							int depth, int vector)
 {
 	struct device *dmadev = &dev->pci_dev->dev;
-	unsigned extra = nvme_queue_extra(depth);
-	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq) + extra, GFP_KERNEL);
+	struct nvme_queue *nvmeq = kzalloc(sizeof(*nvmeq), GFP_KERNEL);
 	if (!nvmeq)
 		return NULL;
 
@@ -1324,9 +1184,6 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	if (!nvmeq->sq_cmds)
 		goto free_cqdma;
 
-	if (qid && !zalloc_cpumask_var(&nvmeq->cpu_mask, GFP_KERNEL))
-		goto free_sqdma;
-
 	nvmeq->q_dmadev = dmadev;
 	nvmeq->dev = dev;
 	snprintf(nvmeq->irqname, sizeof(nvmeq->irqname), "nvme%dq%d",
@@ -1334,22 +1191,15 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
 	spin_lock_init(&nvmeq->q_lock);
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
-	init_waitqueue_head(&nvmeq->sq_full);
-	bio_list_init(&nvmeq->sq_cong);
-	INIT_LIST_HEAD(&nvmeq->iod_bio);
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
 	nvmeq->q_depth = depth;
 	nvmeq->cq_vector = vector;
 	nvmeq->qid = qid;
-	nvmeq->q_suspended = 1;
 	dev->queue_count++;
-	rcu_assign_pointer(dev->queues[qid], nvmeq);
+	dev->queues[qid] = nvmeq;
 
 	return nvmeq;
 
- free_sqdma:
-	dma_free_coherent(dmadev, SQ_SIZE(depth), (void *)nvmeq->sq_cmds,
-							nvmeq->sq_dma_addr);
  free_cqdma:
 	dma_free_coherent(dmadev, CQ_SIZE(depth), (void *)nvmeq->cqes,
 							nvmeq->cq_dma_addr);
@@ -1372,18 +1222,13 @@ static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
 static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
 {
 	struct nvme_dev *dev = nvmeq->dev;
-	unsigned extra = nvme_queue_extra(nvmeq->q_depth);
 
 	spin_lock_irq(&nvmeq->q_lock);
-	init_waitqueue_entry(&nvmeq->sq_cong_wait, nvme_thread);
 	nvmeq->sq_tail = 0;
 	nvmeq->cq_head = 0;
 	nvmeq->cq_phase = 1;
 	nvmeq->q_db = &dev->dbs[qid * 2 * dev->db_stride];
-	memset(nvmeq->cmdid_data, 0, extra);
 	memset((void *)nvmeq->cqes, 0, CQ_SIZE(nvmeq->q_depth));
-	nvme_cancel_ios(nvmeq, false);
-	nvmeq->q_suspended = 0;
 	dev->online_queues++;
 	spin_unlock_irq(&nvmeq->q_lock);
 }
@@ -1486,6 +1331,52 @@ static int nvme_shutdown_ctrl(struct nvme_dev *dev)
 	return 0;
 }
 
+static struct blk_mq_ops nvme_mq_admin_ops = {
+	.queue_rq	= nvme_admin_queue_rq,
+	.map_queue	= blk_mq_map_queue,
+	.init_hctx	= nvme_admin_init_hctx,
+	.init_request	= nvme_admin_init_request,
+	.timeout	= nvme_timeout,
+};
+
+static struct blk_mq_ops nvme_mq_ops = {
+	.queue_rq	= nvme_queue_rq,
+	.map_queue	= blk_mq_map_queue,
+	.init_hctx	= nvme_init_hctx,
+	.init_request	= nvme_init_request,
+	.timeout	= nvme_timeout,
+};
+
+static int nvme_alloc_admin_tags(struct nvme_dev *dev)
+{
+	if (!dev->admin_q) {
+		dev->admin_tagset.ops = &nvme_mq_admin_ops;
+		dev->admin_tagset.nr_hw_queues = 1;
+		dev->admin_tagset.queue_depth = NVME_AQ_DEPTH - 1;
+		dev->admin_tagset.timeout = ADMIN_TIMEOUT;
+		dev->admin_tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+		dev->admin_tagset.cmd_size = sizeof(struct nvme_cmd_info);
+		dev->admin_tagset.driver_data = dev;
+
+		if (blk_mq_alloc_tag_set(&dev->admin_tagset))
+			return -ENOMEM;
+
+		dev->admin_q = blk_mq_init_queue(&dev->admin_tagset);
+		if (!dev->admin_q) {
+			blk_mq_free_tag_set(&dev->admin_tagset);
+			return -ENOMEM;
+		}
+	}
+
+	return 0;
+}
+
+static void nvme_free_admin_tags(struct nvme_dev *dev)
+{
+	if (dev->admin_q)
+		blk_mq_free_tag_set(&dev->admin_tagset);
+}
+
 static int nvme_configure_admin_queue(struct nvme_dev *dev)
 {
 	int result;
@@ -1515,9 +1406,9 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 	if (result < 0)
 		return result;
 
-	nvmeq = raw_nvmeq(dev, 0);
+	nvmeq = dev->queues[0];
 	if (!nvmeq) {
-		nvmeq = nvme_alloc_queue(dev, 0, 64, 0);
+		nvmeq = nvme_alloc_queue(dev, 0, NVME_AQ_DEPTH, 0);
 		if (!nvmeq)
 			return -ENOMEM;
 	}
@@ -1538,13 +1429,23 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
 
 	result = nvme_enable_ctrl(dev, cap);
 	if (result)
-		return result;
+		goto free_nvmeq;
+
+	result = nvme_alloc_admin_tags(dev);
+	if (result)
+		goto free_nvmeq;
 
 	result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
 	if (result)
-		return result;
+		goto free_tags;
 
 	return result;
+
+ free_tags:
+	nvme_free_admin_tags(dev);
+ free_nvmeq:
+	nvme_free_queues(dev, 0);
+	return result;
 }
 
 struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
@@ -1702,7 +1603,7 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	if (length != (io.nblocks + 1) << ns->lba_shift)
 		status = -ENOMEM;
 	else
-		status = nvme_submit_io_cmd(dev, &c, NULL);
+		status = nvme_submit_io_cmd(dev, ns, &c, NULL);
 
 	if (meta_len) {
 		if (status == NVME_SC_SUCCESS && !(io.opcode & 1)) {
@@ -1734,8 +1635,8 @@ static int nvme_submit_io(struct nvme_ns *ns, struct nvme_user_io __user *uio)
 	return status;
 }
 
-static int nvme_user_cmd(struct nvme_dev *dev,
-			struct nvme_passthru_cmd __user *ucmd, bool ioq)
+static int nvme_user_cmd(struct nvme_dev *dev, struct nvme_ns *ns,
+			struct nvme_passthru_cmd __user *ucmd)
 {
 	struct nvme_passthru_cmd cmd;
 	struct nvme_command c;
@@ -1774,13 +1675,23 @@ static int nvme_user_cmd(struct nvme_dev *dev,
 
 	timeout = cmd.timeout_ms ? msecs_to_jiffies(cmd.timeout_ms) :
 								ADMIN_TIMEOUT;
+
 	if (length != cmd.data_len)
 		status = -ENOMEM;
-	else if (ioq)
-		status = nvme_submit_sync_cmd(dev, this_cpu_read(*dev->io_queue), &c,
-							&cmd.result, timeout);
-	else
-		status = nvme_submit_sync_cmd(dev, 0, &c, &cmd.result, timeout);
+	else if (ns) {
+		struct request *req;
+
+		req = blk_mq_alloc_request(ns->queue, WRITE,
+						(GFP_KERNEL|__GFP_WAIT), false);
+		if (!req)
+			status = -ENOMEM;
+		else {
+			status = nvme_submit_sync_cmd(req, &c, &cmd.result,
+								timeout);
+			blk_put_request(req);
+		}
+	} else
+		status = __nvme_submit_admin_cmd(dev, &c, &cmd.result, timeout);
 
 	if (cmd.data_len) {
 		nvme_unmap_user_pages(dev, cmd.opcode & 1, iod);
@@ -1804,9 +1715,9 @@ static int nvme_ioctl(struct block_device *bdev, fmode_t mode, unsigned int cmd,
 		force_successful_syscall_return();
 		return ns->ns_id;
 	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(ns->dev, (void __user *)arg, false);
+		return nvme_user_cmd(ns->dev, NULL, (void __user *)arg);
 	case NVME_IOCTL_IO_CMD:
-		return nvme_user_cmd(ns->dev, (void __user *)arg, true);
+		return nvme_user_cmd(ns->dev, ns, (void __user *)arg);
 	case NVME_IOCTL_SUBMIT_IO:
 		return nvme_submit_io(ns, (void __user *)arg);
 	case SG_GET_VERSION_NUM:
@@ -1906,62 +1817,6 @@ static const struct block_device_operations nvme_fops = {
 	.revalidate_disk= nvme_revalidate_disk,
 };
 
-static void nvme_resubmit_iods(struct nvme_queue *nvmeq)
-{
-	struct nvme_iod *iod, *next;
-
-	list_for_each_entry_safe(iod, next, &nvmeq->iod_bio, node) {
-		if (unlikely(nvme_submit_iod(nvmeq, iod)))
-			break;
-		list_del(&iod->node);
-		if (bio_list_empty(&nvmeq->sq_cong) &&
-						list_empty(&nvmeq->iod_bio))
-			remove_wait_queue(&nvmeq->sq_full,
-						&nvmeq->sq_cong_wait);
-	}
-}
-
-static void nvme_resubmit_bios(struct nvme_queue *nvmeq)
-{
-	while (bio_list_peek(&nvmeq->sq_cong)) {
-		struct bio *bio = bio_list_pop(&nvmeq->sq_cong);
-		struct nvme_ns *ns = bio->bi_bdev->bd_disk->private_data;
-
-		if (bio_list_empty(&nvmeq->sq_cong) &&
-						list_empty(&nvmeq->iod_bio))
-			remove_wait_queue(&nvmeq->sq_full,
-							&nvmeq->sq_cong_wait);
-		if (nvme_submit_bio_queue(nvmeq, ns, bio)) {
-			if (!waitqueue_active(&nvmeq->sq_full))
-				add_wait_queue(&nvmeq->sq_full,
-							&nvmeq->sq_cong_wait);
-			bio_list_add_head(&nvmeq->sq_cong, bio);
-			break;
-		}
-	}
-}
-
-static int nvme_submit_async_req(struct nvme_queue *nvmeq)
-{
-	struct nvme_command *c;
-	int cmdid;
-
-	cmdid = alloc_cmdid(nvmeq, CMD_CTX_ASYNC, special_completion, 0);
-	if (cmdid < 0)
-		return cmdid;
-
-	c = &nvmeq->sq_cmds[nvmeq->sq_tail];
-	memset(c, 0, sizeof(*c));
-	c->common.opcode = nvme_admin_async_event;
-	c->common.command_id = cmdid;
-
-	if (++nvmeq->sq_tail == nvmeq->q_depth)
-		nvmeq->sq_tail = 0;
-	writel(nvmeq->sq_tail, nvmeq->q_db);
-
-	return 0;
-}
-
 static int nvme_kthread(void *data)
 {
 	struct nvme_dev *dev, *next;
@@ -1977,34 +1832,26 @@ static int nvme_kthread(void *data)
 					continue;
 				list_del_init(&dev->node);
 				dev_warn(&dev->pci_dev->dev,
-					"Failed status, reset controller\n");
+					"Failed status: %x, reset controller\n",
+					readl(&dev->bar->csts));
 				dev->reset_workfn = nvme_reset_failed_dev;
 				queue_work(nvme_workq, &dev->reset_work);
 				continue;
 			}
-			rcu_read_lock();
 			for (i = 0; i < dev->queue_count; i++) {
-				struct nvme_queue *nvmeq =
-						rcu_dereference(dev->queues[i]);
+				struct nvme_queue *nvmeq = dev->queues[i];
 				if (!nvmeq)
 					continue;
 				spin_lock_irq(&nvmeq->q_lock);
-				if (nvmeq->q_suspended)
-					goto unlock;
 				nvme_process_cq(nvmeq);
-				nvme_cancel_ios(nvmeq, true);
-				nvme_resubmit_bios(nvmeq);
-				nvme_resubmit_iods(nvmeq);
 
 				while ((i == 0) && (dev->event_limit > 0)) {
-					if (nvme_submit_async_req(nvmeq))
+					if (nvme_submit_async_admin_req(dev))
 						break;
 					dev->event_limit--;
 				}
- unlock:
 				spin_unlock_irq(&nvmeq->q_lock);
 			}
-			rcu_read_unlock();
 		}
 		spin_unlock(&dev_list_lock);
 		schedule_timeout(round_jiffies_relative(HZ));
@@ -2027,29 +1874,29 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 {
 	struct nvme_ns *ns;
 	struct gendisk *disk;
+	int node = dev_to_node(&dev->pci_dev->dev);
 	int lbaf;
 
 	if (rt->attributes & NVME_LBART_ATTRIB_HIDE)
 		return NULL;
 
-	ns = kzalloc(sizeof(*ns), GFP_KERNEL);
+	ns = kzalloc_node(sizeof(*ns), GFP_KERNEL, node);
 	if (!ns)
 		return NULL;
-	ns->queue = blk_alloc_queue(GFP_KERNEL);
+	ns->queue = blk_mq_init_queue(&dev->tagset);
 	if (!ns->queue)
 		goto out_free_ns;
-	ns->queue->queue_flags = QUEUE_FLAG_DEFAULT;
-	queue_flag_clear_unlocked(QUEUE_FLAG_STACKABLE, ns->queue);
 	queue_flag_set_unlocked(QUEUE_FLAG_NOMERGES, ns->queue);
 	queue_flag_set_unlocked(QUEUE_FLAG_NONROT, ns->queue);
-	queue_flag_clear_unlocked(QUEUE_FLAG_ADD_RANDOM, ns->queue);
-	blk_queue_make_request(ns->queue, nvme_make_request);
+	queue_flag_set_unlocked(QUEUE_FLAG_SG_GAPS, ns->queue);
+	queue_flag_clear_unlocked(QUEUE_FLAG_IO_STAT, ns->queue);
 	ns->dev = dev;
 	ns->queue->queuedata = ns;
 
-	disk = alloc_disk(0);
+	disk = alloc_disk_node(0, node);
 	if (!disk)
 		goto out_free_queue;
+
 	ns->ns_id = nsid;
 	ns->disk = disk;
 	lbaf = id->flbas & 0xf;
@@ -2058,6 +1905,8 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 	blk_queue_logical_block_size(ns->queue, 1 << ns->lba_shift);
 	if (dev->max_hw_sectors)
 		blk_queue_max_hw_sectors(ns->queue, dev->max_hw_sectors);
+	if (dev->stripe_size)
+		blk_queue_chunk_sectors(ns->queue, dev->stripe_size >> 9);
 	if (dev->vwc & NVME_CTRL_VWC_PRESENT)
 		blk_queue_flush(ns->queue, REQ_FLUSH | REQ_FUA);
 
@@ -2083,143 +1932,19 @@ static struct nvme_ns *nvme_alloc_ns(struct nvme_dev *dev, unsigned nsid,
 	return NULL;
 }
 
-static int nvme_find_closest_node(int node)
-{
-	int n, val, min_val = INT_MAX, best_node = node;
-
-	for_each_online_node(n) {
-		if (n == node)
-			continue;
-		val = node_distance(node, n);
-		if (val < min_val) {
-			min_val = val;
-			best_node = n;
-		}
-	}
-	return best_node;
-}
-
-static void nvme_set_queue_cpus(cpumask_t *qmask, struct nvme_queue *nvmeq,
-								int count)
-{
-	int cpu;
-	for_each_cpu(cpu, qmask) {
-		if (cpumask_weight(nvmeq->cpu_mask) >= count)
-			break;
-		if (!cpumask_test_and_set_cpu(cpu, nvmeq->cpu_mask))
-			*per_cpu_ptr(nvmeq->dev->io_queue, cpu) = nvmeq->qid;
-	}
-}
-
-static void nvme_add_cpus(cpumask_t *mask, const cpumask_t *unassigned_cpus,
-	const cpumask_t *new_mask, struct nvme_queue *nvmeq, int cpus_per_queue)
-{
-	int next_cpu;
-	for_each_cpu(next_cpu, new_mask) {
-		cpumask_or(mask, mask, get_cpu_mask(next_cpu));
-		cpumask_or(mask, mask, topology_thread_cpumask(next_cpu));
-		cpumask_and(mask, mask, unassigned_cpus);
-		nvme_set_queue_cpus(mask, nvmeq, cpus_per_queue);
-	}
-}
-
 static void nvme_create_io_queues(struct nvme_dev *dev)
 {
-	unsigned i, max;
+	unsigned i;
 
-	max = min(dev->max_qid, num_online_cpus());
-	for (i = dev->queue_count; i <= max; i++)
+	for (i = dev->queue_count; i <= dev->max_qid; i++)
 		if (!nvme_alloc_queue(dev, i, dev->q_depth, i - 1))
 			break;
 
-	max = min(dev->queue_count - 1, num_online_cpus());
-	for (i = dev->online_queues; i <= max; i++)
-		if (nvme_create_queue(raw_nvmeq(dev, i), i))
+	for (i = dev->online_queues; i <= dev->queue_count - 1; i++)
+		if (nvme_create_queue(dev->queues[i], i))
 			break;
 }
 
-/*
- * If there are fewer queues than online cpus, this will try to optimally
- * assign a queue to multiple cpus by grouping cpus that are "close" together:
- * thread siblings, core, socket, closest node, then whatever else is
- * available.
- */
-static void nvme_assign_io_queues(struct nvme_dev *dev)
-{
-	unsigned cpu, cpus_per_queue, queues, remainder, i;
-	cpumask_var_t unassigned_cpus;
-
-	nvme_create_io_queues(dev);
-
-	queues = min(dev->online_queues - 1, num_online_cpus());
-	if (!queues)
-		return;
-
-	cpus_per_queue = num_online_cpus() / queues;
-	remainder = queues - (num_online_cpus() - queues * cpus_per_queue);
-
-	if (!alloc_cpumask_var(&unassigned_cpus, GFP_KERNEL))
-		return;
-
-	cpumask_copy(unassigned_cpus, cpu_online_mask);
-	cpu = cpumask_first(unassigned_cpus);
-	for (i = 1; i <= queues; i++) {
-		struct nvme_queue *nvmeq = lock_nvmeq(dev, i);
-		cpumask_t mask;
-
-		cpumask_clear(nvmeq->cpu_mask);
-		if (!cpumask_weight(unassigned_cpus)) {
-			unlock_nvmeq(nvmeq);
-			break;
-		}
-
-		mask = *get_cpu_mask(cpu);
-		nvme_set_queue_cpus(&mask, nvmeq, cpus_per_queue);
-		if (cpus_weight(mask) < cpus_per_queue)
-			nvme_add_cpus(&mask, unassigned_cpus,
-				topology_thread_cpumask(cpu),
-				nvmeq, cpus_per_queue);
-		if (cpus_weight(mask) < cpus_per_queue)
-			nvme_add_cpus(&mask, unassigned_cpus,
-				topology_core_cpumask(cpu),
-				nvmeq, cpus_per_queue);
-		if (cpus_weight(mask) < cpus_per_queue)
-			nvme_add_cpus(&mask, unassigned_cpus,
-				cpumask_of_node(cpu_to_node(cpu)),
-				nvmeq, cpus_per_queue);
-		if (cpus_weight(mask) < cpus_per_queue)
-			nvme_add_cpus(&mask, unassigned_cpus,
-				cpumask_of_node(
-					nvme_find_closest_node(
-						cpu_to_node(cpu))),
-				nvmeq, cpus_per_queue);
-		if (cpus_weight(mask) < cpus_per_queue)
-			nvme_add_cpus(&mask, unassigned_cpus,
-				unassigned_cpus,
-				nvmeq, cpus_per_queue);
-
-		WARN(cpumask_weight(nvmeq->cpu_mask) != cpus_per_queue,
-			"nvme%d qid:%d mis-matched queue-to-cpu assignment\n",
-			dev->instance, i);
-
-		irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
-							nvmeq->cpu_mask);
-		cpumask_andnot(unassigned_cpus, unassigned_cpus,
-						nvmeq->cpu_mask);
-		cpu = cpumask_next(cpu, unassigned_cpus);
-		if (remainder && !--remainder)
-			cpus_per_queue++;
-		unlock_nvmeq(nvmeq);
-	}
-	WARN(cpumask_weight(unassigned_cpus), "nvme%d unassigned online cpus\n",
-								dev->instance);
-	i = 0;
-	cpumask_andnot(unassigned_cpus, cpu_possible_mask, cpu_online_mask);
-	for_each_cpu(cpu, unassigned_cpus)
-		*per_cpu_ptr(dev->io_queue, cpu) = (i++ % queues) + 1;
-	free_cpumask_var(unassigned_cpus);
-}
-
 static int set_queue_count(struct nvme_dev *dev, int count)
 {
 	int status;
@@ -2243,33 +1968,9 @@ static size_t db_bar_size(struct nvme_dev *dev, unsigned nr_io_queues)
 	return 4096 + ((nr_io_queues + 1) * 8 * dev->db_stride);
 }
 
-static void nvme_cpu_workfn(struct work_struct *work)
-{
-	struct nvme_dev *dev = container_of(work, struct nvme_dev, cpu_work);
-	if (dev->initialized)
-		nvme_assign_io_queues(dev);
-}
-
-static int nvme_cpu_notify(struct notifier_block *self,
-				unsigned long action, void *hcpu)
-{
-	struct nvme_dev *dev;
-
-	switch (action) {
-	case CPU_ONLINE:
-	case CPU_DEAD:
-		spin_lock(&dev_list_lock);
-		list_for_each_entry(dev, &dev_list, node)
-			schedule_work(&dev->cpu_work);
-		spin_unlock(&dev_list_lock);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
 static int nvme_setup_io_queues(struct nvme_dev *dev)
 {
-	struct nvme_queue *adminq = raw_nvmeq(dev, 0);
+	struct nvme_queue *adminq = dev->queues[0];
 	struct pci_dev *pdev = dev->pci_dev;
 	int result, i, vecs, nr_io_queues, size;
 
@@ -2321,14 +2022,12 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
 	dev->max_qid = nr_io_queues;
 
 	result = queue_request_irq(dev, adminq, adminq->irqname);
-	if (result) {
-		adminq->q_suspended = 1;
+	if (result)
 		goto free_queues;
-	}
 
 	/* Free previously allocated queues that are no longer usable */
 	nvme_free_queues(dev, nr_io_queues + 1);
-	nvme_assign_io_queues(dev);
+	nvme_create_io_queues(dev);
 
 	return 0;
 
@@ -2378,8 +2077,30 @@ static int nvme_dev_add(struct nvme_dev *dev)
 	if (ctrl->mdts)
 		dev->max_hw_sectors = 1 << (ctrl->mdts + shift - 9);
 	if ((pdev->vendor == PCI_VENDOR_ID_INTEL) &&
-			(pdev->device == 0x0953) && ctrl->vs[3])
+			(pdev->device == 0x0953) && ctrl->vs[3]) {
+		unsigned int max_hw_sectors;
+
 		dev->stripe_size = 1 << (ctrl->vs[3] + shift);
+		max_hw_sectors = dev->stripe_size >> (shift - 9);
+		if (dev->max_hw_sectors) {
+			dev->max_hw_sectors = min(max_hw_sectors,
+							dev->max_hw_sectors);
+		} else
+			dev->max_hw_sectors = max_hw_sectors;
+	}
+
+	dev->tagset.ops = &nvme_mq_ops;
+	dev->tagset.nr_hw_queues = dev->online_queues - 1;
+	dev->tagset.timeout = NVME_IO_TIMEOUT;
+	dev->tagset.numa_node = dev_to_node(&dev->pci_dev->dev);
+	dev->tagset.queue_depth =
+				min_t(int, dev->q_depth, BLK_MQ_MAX_DEPTH) - 1;
+	dev->tagset.cmd_size = sizeof(struct nvme_cmd_info);
+	dev->tagset.flags = BLK_MQ_F_SHOULD_MERGE;
+	dev->tagset.driver_data = dev;
+
+	if (blk_mq_alloc_tag_set(&dev->tagset))
+		goto out;
 
 	id_ns = mem;
 	for (i = 1; i <= nn; i++) {
@@ -2529,7 +2250,8 @@ static int adapter_async_del_queue(struct nvme_queue *nvmeq, u8 opcode,
 	c.delete_queue.qid = cpu_to_le16(nvmeq->qid);
 
 	init_kthread_work(&nvmeq->cmdinfo.work, fn);
-	return nvme_submit_admin_cmd_async(nvmeq->dev, &c, &nvmeq->cmdinfo);
+	return nvme_submit_admin_async_cmd(nvmeq->dev, &c, &nvmeq->cmdinfo,
+								ADMIN_TIMEOUT);
 }
 
 static void nvme_del_cq_work_handler(struct kthread_work *work)
@@ -2592,7 +2314,7 @@ static void nvme_disable_io_queues(struct nvme_dev *dev)
 	atomic_set(&dq.refcount, 0);
 	dq.worker = &worker;
 	for (i = dev->queue_count - 1; i > 0; i--) {
-		struct nvme_queue *nvmeq = raw_nvmeq(dev, i);
+		struct nvme_queue *nvmeq = dev->queues[i];
 
 		if (nvme_suspend_queue(nvmeq))
 			continue;
@@ -2637,7 +2359,7 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 		csts = readl(&dev->bar->csts);
 	if (csts & NVME_CSTS_CFS || !(csts & NVME_CSTS_RDY)) {
 		for (i = dev->queue_count - 1; i >= 0; i--) {
-			struct nvme_queue *nvmeq = raw_nvmeq(dev, i);
+			struct nvme_queue *nvmeq = dev->queues[i];
 			nvme_suspend_queue(nvmeq);
 			nvme_clear_queue(nvmeq);
 		}
@@ -2649,6 +2371,12 @@ static void nvme_dev_shutdown(struct nvme_dev *dev)
 	nvme_dev_unmap(dev);
 }
 
+static void nvme_dev_remove_admin(struct nvme_dev *dev)
+{
+	if (dev->admin_q && !blk_queue_dying(dev->admin_q))
+		blk_cleanup_queue(dev->admin_q);
+}
+
 static void nvme_dev_remove(struct nvme_dev *dev)
 {
 	struct nvme_ns *ns;
@@ -2736,7 +2464,7 @@ static void nvme_free_dev(struct kref *kref)
 
 	pci_dev_put(dev->pci_dev);
 	nvme_free_namespaces(dev);
-	free_percpu(dev->io_queue);
+	blk_mq_free_tag_set(&dev->tagset);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -2761,11 +2489,16 @@ static int nvme_dev_release(struct inode *inode, struct file *f)
 static long nvme_dev_ioctl(struct file *f, unsigned int cmd, unsigned long arg)
 {
 	struct nvme_dev *dev = f->private_data;
+	struct nvme_ns *ns;
+
 	switch (cmd) {
 	case NVME_IOCTL_ADMIN_CMD:
-		return nvme_user_cmd(dev, (void __user *)arg, false);
+		return nvme_user_cmd(dev, NULL, (void __user *)arg);
 	case NVME_IOCTL_IO_CMD:
-		return nvme_user_cmd(dev, (void __user *)arg, true);
+		if (list_empty(&dev->namespaces))
+			return -ENOTTY;
+		ns = list_first_entry(&dev->namespaces, struct nvme_ns, list);
+		return nvme_user_cmd(dev, ns, (void __user *)arg);
 	default:
 		return -ENOTTY;
 	}
@@ -2779,6 +2512,22 @@ static const struct file_operations nvme_dev_fops = {
 	.compat_ioctl	= nvme_dev_ioctl,
 };
 
+static void nvme_set_irq_hints(struct nvme_dev *dev)
+{
+	struct nvme_queue *nvmeq;
+	int i;
+
+	for (i = 0; i < dev->online_queues; i++) {
+		nvmeq = dev->queues[i];
+
+		if (!nvmeq->hctx)
+			continue;
+
+		irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
+							nvmeq->hctx->cpumask);
+	}
+}
+
 static int nvme_dev_start(struct nvme_dev *dev)
 {
 	int result;
@@ -2810,12 +2559,15 @@ static int nvme_dev_start(struct nvme_dev *dev)
 		result = nvme_thread ? PTR_ERR(nvme_thread) : -EINTR;
 		goto disable;
 	}
-	nvme_init_queue(raw_nvmeq(dev, 0), 0);
+
+	nvme_init_queue(dev->queues[0], 0);
 
 	result = nvme_setup_io_queues(dev);
 	if (result)
 		goto disable;
 
+	nvme_set_irq_hints(dev);
+
 	return result;
 
  disable:
@@ -2866,7 +2618,7 @@ static void nvme_dev_reset(struct nvme_dev *dev)
 {
 	nvme_dev_shutdown(dev);
 	if (nvme_dev_resume(dev)) {
-		dev_err(&dev->pci_dev->dev, "Device failed to resume\n");
+		dev_warn(&dev->pci_dev->dev, "Device failed to resume\n");
 		kref_get(&dev->kref);
 		if (IS_ERR(kthread_run(nvme_remove_dead_ctrl, dev, "nvme%d",
 							dev->instance))) {
@@ -2891,28 +2643,28 @@ static void nvme_reset_workfn(struct work_struct *work)
 
 static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 {
-	int result = -ENOMEM;
+	int node, result = -ENOMEM;
 	struct nvme_dev *dev;
 
-	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	node = dev_to_node(&pdev->dev);
+	if (node == NUMA_NO_NODE)
+		set_dev_node(&pdev->dev, 0);
+
+	dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
 	if (!dev)
 		return -ENOMEM;
-	dev->entry = kcalloc(num_possible_cpus(), sizeof(*dev->entry),
-								GFP_KERNEL);
+	dev->entry = kzalloc_node(num_possible_cpus() * sizeof(*dev->entry),
+							GFP_KERNEL, node);
 	if (!dev->entry)
 		goto free;
-	dev->queues = kcalloc(num_possible_cpus() + 1, sizeof(void *),
-								GFP_KERNEL);
+	dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *),
+							GFP_KERNEL, node);
 	if (!dev->queues)
 		goto free;
-	dev->io_queue = alloc_percpu(unsigned short);
-	if (!dev->io_queue)
-		goto free;
 
 	INIT_LIST_HEAD(&dev->namespaces);
 	dev->reset_workfn = nvme_reset_failed_dev;
 	INIT_WORK(&dev->reset_work, nvme_reset_workfn);
-	INIT_WORK(&dev->cpu_work, nvme_cpu_workfn);
 	dev->pci_dev = pci_dev_get(pdev);
 	pci_set_drvdata(pdev, dev);
 	result = nvme_set_instance(dev);
@@ -2942,11 +2694,14 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	if (result)
 		goto remove;
 
+	nvme_set_irq_hints(dev);
+
 	dev->initialized = 1;
 	return 0;
 
  remove:
 	nvme_dev_remove(dev);
+	nvme_dev_remove_admin(dev);
 	nvme_free_namespaces(dev);
  shutdown:
 	nvme_dev_shutdown(dev);
@@ -2958,7 +2713,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
  put_pci:
 	pci_dev_put(dev->pci_dev);
  free:
-	free_percpu(dev->io_queue);
 	kfree(dev->queues);
 	kfree(dev->entry);
 	kfree(dev);
@@ -2991,11 +2745,12 @@ static void nvme_remove(struct pci_dev *pdev)
 
 	pci_set_drvdata(pdev, NULL);
 	flush_work(&dev->reset_work);
-	flush_work(&dev->cpu_work);
 	misc_deregister(&dev->miscdev);
+	nvme_dev_remove(dev);
 	nvme_dev_shutdown(dev);
+	nvme_dev_remove_admin(dev);
 	nvme_free_queues(dev, 0);
-	nvme_dev_remove(dev);
+	nvme_free_admin_tags(dev);
 	nvme_release_instance(dev);
 	nvme_release_prp_pools(dev);
 	kref_put(&dev->kref, nvme_free_dev);
@@ -3079,18 +2834,11 @@ static int __init nvme_init(void)
 	else if (result > 0)
 		nvme_major = result;
 
-	nvme_nb.notifier_call = &nvme_cpu_notify;
-	result = register_hotcpu_notifier(&nvme_nb);
-	if (result)
-		goto unregister_blkdev;
-
 	result = pci_register_driver(&nvme_driver);
 	if (result)
-		goto unregister_hotcpu;
+		goto unregister_blkdev;
 	return 0;
 
- unregister_hotcpu:
-	unregister_hotcpu_notifier(&nvme_nb);
  unregister_blkdev:
 	unregister_blkdev(nvme_major, "nvme");
  kill_workq:
diff --git a/drivers/block/nvme-scsi.c b/drivers/block/nvme-scsi.c
index 046ae3321c5e..49f86d1a5aa2 100644
--- a/drivers/block/nvme-scsi.c
+++ b/drivers/block/nvme-scsi.c
@@ -2105,7 +2105,7 @@ static int nvme_trans_do_nvme_io(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 
 		nvme_offset += unit_num_blocks;
 
-		nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
+		nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
 		if (nvme_sc != NVME_SC_SUCCESS) {
 			nvme_unmap_user_pages(dev,
 				(is_write) ? DMA_TO_DEVICE : DMA_FROM_DEVICE,
@@ -2658,7 +2658,7 @@ static int nvme_trans_start_stop(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 			c.common.opcode = nvme_cmd_flush;
 			c.common.nsid = cpu_to_le32(ns->ns_id);
 
-			nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
+			nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
 			res = nvme_trans_status_code(hdr, nvme_sc);
 			if (res)
 				goto out;
@@ -2686,7 +2686,7 @@ static int nvme_trans_synchronize_cache(struct nvme_ns *ns,
 	c.common.opcode = nvme_cmd_flush;
 	c.common.nsid = cpu_to_le32(ns->ns_id);
 
-	nvme_sc = nvme_submit_io_cmd(ns->dev, &c, NULL);
+	nvme_sc = nvme_submit_io_cmd(ns->dev, ns, &c, NULL);
 
 	res = nvme_trans_status_code(hdr, nvme_sc);
 	if (res)
@@ -2894,7 +2894,7 @@ static int nvme_trans_unmap(struct nvme_ns *ns, struct sg_io_hdr *hdr,
 	c.dsm.nr = cpu_to_le32(ndesc - 1);
 	c.dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
 
-	nvme_sc = nvme_submit_io_cmd(dev, &c, NULL);
+	nvme_sc = nvme_submit_io_cmd(dev, ns, &c, NULL);
 	res = nvme_trans_status_code(hdr, nvme_sc);
 
 	dma_free_coherent(&dev->pci_dev->dev, ndesc * sizeof(*range),
diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index ed09074e5554..258945fcabf1 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -19,6 +19,7 @@
 #include <linux/pci.h>
 #include <linux/miscdevice.h>
 #include <linux/kref.h>
+#include <linux/blk-mq.h>
 
 struct nvme_bar {
 	__u64			cap;	/* Controller Capabilities */
@@ -71,8 +72,10 @@ extern unsigned char nvme_io_timeout;
  */
 struct nvme_dev {
 	struct list_head node;
-	struct nvme_queue __rcu **queues;
-	unsigned short __percpu *io_queue;
+	struct nvme_queue **queues;
+	struct request_queue *admin_q;
+	struct blk_mq_tag_set tagset;
+	struct blk_mq_tag_set admin_tagset;
 	u32 __iomem *dbs;
 	struct pci_dev *pci_dev;
 	struct dma_pool *prp_page_pool;
@@ -91,7 +94,6 @@ struct nvme_dev {
 	struct miscdevice miscdev;
 	work_func_t reset_workfn;
 	struct work_struct reset_work;
-	struct work_struct cpu_work;
 	char name[12];
 	char serial[20];
 	char model[40];
@@ -135,7 +137,6 @@ struct nvme_iod {
 	int offset;		/* Of PRP list */
 	int nents;		/* Used in scatterlist */
 	int length;		/* Of data, in bytes */
-	unsigned long start_time;
 	dma_addr_t first_dma;
 	struct list_head node;
 	struct scatterlist sg[0];
@@ -153,12 +154,14 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector)
  */
 void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod);
 
-int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int , gfp_t);
+int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int, gfp_t);
 struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write,
 				unsigned long addr, unsigned length);
 void nvme_unmap_user_pages(struct nvme_dev *dev, int write,
 			struct nvme_iod *iod);
-int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *);
+int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_ns *,
+						struct nvme_command *, u32 *);
+int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns);
 int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *,
 							u32 *result);
 int nvme_identify(struct nvme_dev *, unsigned nsid, unsigned cns,
-- 
cgit v1.2.3


From ffdcd955c3078af3ce117edcfce80fde1a512bed Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 21 Oct 2014 13:33:55 +0200
Subject: ACPI: Add support for device specific properties

Device Tree is used in many embedded systems to describe the system
configuration to the OS. It supports attaching properties or name-value
pairs to the devices it describe. With these properties one can pass
additional information to the drivers that would not be available
otherwise.

ACPI is another configuration mechanism (among other things) typically
seen, but not limited to, x86 machines. ACPI allows passing arbitrary
data from methods but there has not been mechanism equivalent to Device
Tree until the introduction of _DSD in the recent publication of the
ACPI 5.1 specification.

In order to facilitate ACPI usage in systems where Device Tree is
typically used, it would be beneficial to standardize a way to retrieve
Device Tree style properties from ACPI devices, which is what we do in
this patch.

If a given device described in ACPI namespace wants to export properties it
must implement _DSD method (Device Specific Data, introduced with ACPI 5.1)
that returns the properties in a package of packages. For example:

	Name (_DSD, Package () {
		ToUUID("daffd814-6eba-4d8c-8a91-bc9bbf4aa301"),
		Package () {
			Package () {"name1", <VALUE1>},
			Package () {"name2", <VALUE2>},
			...
		}
	})

The UUID reserved for properties is daffd814-6eba-4d8c-8a91-bc9bbf4aa301
and is documented in the ACPI 5.1 companion document called "_DSD
Implementation Guide" [1], [2].

We add several helper functions that can be used to extract these
properties and convert them to different Linux data types.

The ultimate goal is that we only have one device property API that
retrieves the requested properties from Device Tree or from ACPI
transparent to the caller.

[1] http://www.uefi.org/sites/default/files/resources/_DSD-implementation-guide-toplevel.htm
[2] http://www.uefi.org/sites/default/files/resources/_DSD-device-properties-UUID.pdf

Reviewed-by: Hanjun Guo <hanjun.guo@linaro.org>
Reviewed-by: Josh Triplett <josh@joshtriplett.org>
Reviewed-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Darren Hart <dvhart@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/Makefile   |   1 +
 drivers/acpi/internal.h |   6 +
 drivers/acpi/property.c | 364 ++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/acpi/scan.c     |   2 +
 include/acpi/acpi_bus.h |   7 +
 include/linux/acpi.h    |  40 ++++++
 6 files changed, 420 insertions(+)
 create mode 100644 drivers/acpi/property.c

(limited to 'include/linux')

diff --git a/drivers/acpi/Makefile b/drivers/acpi/Makefile
index c3b2fcb729f3..6d11522f0e48 100644
--- a/drivers/acpi/Makefile
+++ b/drivers/acpi/Makefile
@@ -47,6 +47,7 @@ acpi-y				+= int340x_thermal.o
 acpi-y				+= power.o
 acpi-y				+= event.o
 acpi-y				+= sysfs.o
+acpi-y				+= property.o
 acpi-$(CONFIG_X86)		+= acpi_cmos_rtc.o
 acpi-$(CONFIG_DEBUG_FS)		+= debugfs.o
 acpi-$(CONFIG_ACPI_NUMA)	+= numa.o
diff --git a/drivers/acpi/internal.h b/drivers/acpi/internal.h
index 447f6d679b29..163e82f536fa 100644
--- a/drivers/acpi/internal.h
+++ b/drivers/acpi/internal.h
@@ -173,4 +173,10 @@ static inline void suspend_nvs_restore(void) {}
 bool acpi_osi_is_win8(void);
 #endif
 
+/*--------------------------------------------------------------------------
+				Device properties
+  -------------------------------------------------------------------------- */
+void acpi_init_properties(struct acpi_device *adev);
+void acpi_free_properties(struct acpi_device *adev);
+
 #endif /* _ACPI_INTERNAL_H_ */
diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
new file mode 100644
index 000000000000..c4a3e800e82c
--- /dev/null
+++ b/drivers/acpi/property.c
@@ -0,0 +1,364 @@
+/*
+ * ACPI device specific properties support.
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * All rights reserved.
+ *
+ * Authors: Mika Westerberg <mika.westerberg@linux.intel.com>
+ *          Darren Hart <dvhart@linux.intel.com>
+ *          Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/acpi.h>
+#include <linux/device.h>
+#include <linux/export.h>
+
+#include "internal.h"
+
+/* ACPI _DSD device properties UUID: daffd814-6eba-4d8c-8a91-bc9bbf4aa301 */
+static const u8 prp_uuid[16] = {
+	0x14, 0xd8, 0xff, 0xda, 0xba, 0x6e, 0x8c, 0x4d,
+	0x8a, 0x91, 0xbc, 0x9b, 0xbf, 0x4a, 0xa3, 0x01
+};
+
+static bool acpi_property_value_ok(const union acpi_object *value)
+{
+	int j;
+
+	/*
+	 * The value must be an integer, a string, a reference, or a package
+	 * whose every element must be an integer, a string, or a reference.
+	 */
+	switch (value->type) {
+	case ACPI_TYPE_INTEGER:
+	case ACPI_TYPE_STRING:
+	case ACPI_TYPE_LOCAL_REFERENCE:
+		return true;
+
+	case ACPI_TYPE_PACKAGE:
+		for (j = 0; j < value->package.count; j++)
+			switch (value->package.elements[j].type) {
+			case ACPI_TYPE_INTEGER:
+			case ACPI_TYPE_STRING:
+			case ACPI_TYPE_LOCAL_REFERENCE:
+				continue;
+
+			default:
+				return false;
+			}
+
+		return true;
+	}
+	return false;
+}
+
+static bool acpi_properties_format_valid(const union acpi_object *properties)
+{
+	int i;
+
+	for (i = 0; i < properties->package.count; i++) {
+		const union acpi_object *property;
+
+		property = &properties->package.elements[i];
+		/*
+		 * Only two elements allowed, the first one must be a string and
+		 * the second one has to satisfy certain conditions.
+		 */
+		if (property->package.count != 2
+		    || property->package.elements[0].type != ACPI_TYPE_STRING
+		    || !acpi_property_value_ok(&property->package.elements[1]))
+			return false;
+	}
+	return true;
+}
+
+void acpi_init_properties(struct acpi_device *adev)
+{
+	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
+	const union acpi_object *desc;
+	acpi_status status;
+	int i;
+
+	status = acpi_evaluate_object_typed(adev->handle, "_DSD", NULL, &buf,
+					    ACPI_TYPE_PACKAGE);
+	if (ACPI_FAILURE(status))
+		return;
+
+	desc = buf.pointer;
+	if (desc->package.count % 2)
+		goto fail;
+
+	/* Look for the device properties UUID. */
+	for (i = 0; i < desc->package.count; i += 2) {
+		const union acpi_object *uuid, *properties;
+
+		uuid = &desc->package.elements[i];
+		properties = &desc->package.elements[i + 1];
+
+		/*
+		 * The first element must be a UUID and the second one must be
+		 * a package.
+		 */
+		if (uuid->type != ACPI_TYPE_BUFFER || uuid->buffer.length != 16
+		    || properties->type != ACPI_TYPE_PACKAGE)
+			break;
+
+		if (memcmp(uuid->buffer.pointer, prp_uuid, sizeof(prp_uuid)))
+			continue;
+
+		/*
+		 * We found the matching UUID. Now validate the format of the
+		 * package immediately following it.
+		 */
+		if (!acpi_properties_format_valid(properties))
+			break;
+
+		adev->data.pointer = buf.pointer;
+		adev->data.properties = properties;
+		return;
+	}
+
+ fail:
+	dev_warn(&adev->dev, "Returned _DSD data is not valid, skipping\n");
+	ACPI_FREE(buf.pointer);
+}
+
+void acpi_free_properties(struct acpi_device *adev)
+{
+	ACPI_FREE((void *)adev->data.pointer);
+	adev->data.pointer = NULL;
+	adev->data.properties = NULL;
+}
+
+/**
+ * acpi_dev_get_property - return an ACPI property with given name
+ * @adev: ACPI device to get property
+ * @name: Name of the property
+ * @type: Expected property type
+ * @obj: Location to store the property value (if not %NULL)
+ *
+ * Look up a property with @name and store a pointer to the resulting ACPI
+ * object at the location pointed to by @obj if found.
+ *
+ * Callers must not attempt to free the returned objects.  These objects will be
+ * freed by the ACPI core automatically during the removal of @adev.
+ *
+ * Return: %0 if property with @name has been found (success),
+ *         %-EINVAL if the arguments are invalid,
+ *         %-ENODATA if the property doesn't exist,
+ *         %-EPROTO if the property value type doesn't match @type.
+ */
+int acpi_dev_get_property(struct acpi_device *adev, const char *name,
+			  acpi_object_type type, const union acpi_object **obj)
+{
+	const union acpi_object *properties;
+	int i;
+
+	if (!adev || !name)
+		return -EINVAL;
+
+	if (!adev->data.pointer || !adev->data.properties)
+		return -ENODATA;
+
+	properties = adev->data.properties;
+	for (i = 0; i < properties->package.count; i++) {
+		const union acpi_object *propname, *propvalue;
+		const union acpi_object *property;
+
+		property = &properties->package.elements[i];
+
+		propname = &property->package.elements[0];
+		propvalue = &property->package.elements[1];
+
+		if (!strcmp(name, propname->string.pointer)) {
+			if (type != ACPI_TYPE_ANY && propvalue->type != type)
+				return -EPROTO;
+			else if (obj)
+				*obj = propvalue;
+
+			return 0;
+		}
+	}
+	return -ENODATA;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_get_property);
+
+/**
+ * acpi_dev_get_property_array - return an ACPI array property with given name
+ * @adev: ACPI device to get property
+ * @name: Name of the property
+ * @type: Expected type of array elements
+ * @obj: Location to store a pointer to the property value (if not NULL)
+ *
+ * Look up an array property with @name and store a pointer to the resulting
+ * ACPI object at the location pointed to by @obj if found.
+ *
+ * Callers must not attempt to free the returned objects.  Those objects will be
+ * freed by the ACPI core automatically during the removal of @adev.
+ *
+ * Return: %0 if array property (package) with @name has been found (success),
+ *         %-EINVAL if the arguments are invalid,
+ *         %-ENODATA if the property doesn't exist,
+ *         %-EPROTO if the property is not a package or the type of its elements
+ *           doesn't match @type.
+ */
+int acpi_dev_get_property_array(struct acpi_device *adev, const char *name,
+				acpi_object_type type,
+				const union acpi_object **obj)
+{
+	const union acpi_object *prop;
+	int ret, i;
+
+	ret = acpi_dev_get_property(adev, name, ACPI_TYPE_PACKAGE, &prop);
+	if (ret)
+		return ret;
+
+	if (type != ACPI_TYPE_ANY) {
+		/* Check that all elements are of correct type. */
+		for (i = 0; i < prop->package.count; i++)
+			if (prop->package.elements[i].type != type)
+				return -EPROTO;
+	}
+	if (obj)
+		*obj = prop;
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_get_property_array);
+
+/**
+ * acpi_dev_get_property_reference - returns handle to the referenced object
+ * @adev: ACPI device to get property
+ * @name: Name of the property
+ * @size_prop: Name of the "size" property in referenced object
+ * @index: Index of the reference to return
+ * @args: Location to store the returned reference with optional arguments
+ *
+ * Find property with @name, verifify that it is a package containing at least
+ * one object reference and if so, store the ACPI device object pointer to the
+ * target object in @args->adev.
+ *
+ * If the reference includes arguments (@size_prop is not %NULL) follow the
+ * reference and check whether or not there is an integer property @size_prop
+ * under the target object and if so, whether or not its value matches the
+ * number of arguments that follow the reference.  If there's more than one
+ * reference in the property value package, @index is used to select the one to
+ * return.
+ *
+ * Return: %0 on success, negative error code on failure.
+ */
+int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name,
+				    const char *size_prop, size_t index,
+				    struct acpi_reference_args *args)
+{
+	const union acpi_object *element, *end;
+	const union acpi_object *obj;
+	struct acpi_device *device;
+	int ret, idx = 0;
+
+	ret = acpi_dev_get_property(adev, name, ACPI_TYPE_ANY, &obj);
+	if (ret)
+		return ret;
+
+	/*
+	 * The simplest case is when the value is a single reference.  Just
+	 * return that reference then.
+	 */
+	if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) {
+		if (size_prop || index)
+			return -EINVAL;
+
+		ret = acpi_bus_get_device(obj->reference.handle, &device);
+		if (ret)
+			return ret;
+
+		args->adev = device;
+		args->nargs = 0;
+		return 0;
+	}
+
+	/*
+	 * If it is not a single reference, then it is a package of
+	 * references followed by number of ints as follows:
+	 *
+	 *  Package () { REF, INT, REF, INT, INT }
+	 *
+	 * The index argument is then used to determine which reference
+	 * the caller wants (along with the arguments).
+	 */
+	if (obj->type != ACPI_TYPE_PACKAGE || index >= obj->package.count)
+		return -EPROTO;
+
+	element = obj->package.elements;
+	end = element + obj->package.count;
+
+	while (element < end) {
+		u32 nargs, i;
+
+		if (element->type != ACPI_TYPE_LOCAL_REFERENCE)
+			return -EPROTO;
+
+		ret = acpi_bus_get_device(element->reference.handle, &device);
+		if (ret)
+			return -ENODEV;
+
+		element++;
+		nargs = 0;
+
+		if (size_prop) {
+			const union acpi_object *prop;
+
+			/*
+			 * Find out how many arguments the refenced object
+			 * expects by reading its size_prop property.
+			 */
+			ret = acpi_dev_get_property(device, size_prop,
+						    ACPI_TYPE_INTEGER, &prop);
+			if (ret)
+				return ret;
+
+			nargs = prop->integer.value;
+			if (nargs > MAX_ACPI_REFERENCE_ARGS
+			    || element + nargs > end)
+				return -EPROTO;
+
+			/*
+			 * Skip to the start of the arguments and verify
+			 * that they all are in fact integers.
+			 */
+			for (i = 0; i < nargs; i++)
+				if (element[i].type != ACPI_TYPE_INTEGER)
+					return -EPROTO;
+		} else {
+			/* assume following integer elements are all args */
+			for (i = 0; element + i < end; i++) {
+				int type = element[i].type;
+
+				if (type == ACPI_TYPE_INTEGER)
+					nargs++;
+				else if (type == ACPI_TYPE_LOCAL_REFERENCE)
+					break;
+				else
+					return -EPROTO;
+			}
+		}
+
+		if (idx++ == index) {
+			args->adev = device;
+			args->nargs = nargs;
+			for (i = 0; i < nargs; i++)
+				args->args[i] = element[i].integer.value;
+
+			return 0;
+		}
+
+		element += nargs;
+	}
+
+	return -EPROTO;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_get_property_reference);
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 0476e90b2091..40d80ac0552f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -922,6 +922,7 @@ static void acpi_device_release(struct device *dev)
 {
 	struct acpi_device *acpi_dev = to_acpi_device(dev);
 
+	acpi_free_properties(acpi_dev);
 	acpi_free_pnp_ids(&acpi_dev->pnp);
 	acpi_free_power_resources_lists(acpi_dev);
 	kfree(acpi_dev);
@@ -1926,6 +1927,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	acpi_set_device_status(device, sta);
 	acpi_device_get_busid(device);
 	acpi_set_pnp_ids(handle, &device->pnp, type);
+	acpi_init_properties(device);
 	acpi_bus_get_flags(device);
 	device->flags.match_driver = false;
 	device->flags.initialized = true;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index f34a0835aa4f..475781170091 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -337,6 +337,12 @@ struct acpi_device_physical_node {
 	bool put_online:1;
 };
 
+/* ACPI Device Specific Data (_DSD) */
+struct acpi_device_data {
+	const union acpi_object *pointer;
+	const union acpi_object *properties;
+};
+
 /* Device */
 struct acpi_device {
 	int device_type;
@@ -353,6 +359,7 @@ struct acpi_device {
 	struct acpi_device_wakeup wakeup;
 	struct acpi_device_perf performance;
 	struct acpi_device_dir dir;
+	struct acpi_device_data data;
 	struct acpi_scan_handler *handler;
 	struct acpi_hotplug_context *hp;
 	struct acpi_driver *driver;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 407a12f663eb..dcdf8738898c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -659,4 +659,44 @@ do {									\
 #endif
 #endif
 
+/* Device properties */
+
+#define MAX_ACPI_REFERENCE_ARGS	8
+struct acpi_reference_args {
+	struct acpi_device *adev;
+	size_t nargs;
+	u64 args[MAX_ACPI_REFERENCE_ARGS];
+};
+
+#ifdef CONFIG_ACPI
+int acpi_dev_get_property(struct acpi_device *adev, const char *name,
+			  acpi_object_type type, const union acpi_object **obj);
+int acpi_dev_get_property_array(struct acpi_device *adev, const char *name,
+				acpi_object_type type,
+				const union acpi_object **obj);
+int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name,
+				    const char *cells_name, size_t index,
+				    struct acpi_reference_args *args);
+#else
+static inline int acpi_dev_get_property(struct acpi_device *adev,
+					const char *name, acpi_object_type type,
+					const union acpi_object **obj)
+{
+	return -ENXIO;
+}
+static inline int acpi_dev_get_property_array(struct acpi_device *adev,
+					      const char *name,
+					      acpi_object_type type,
+					      const union acpi_object **obj)
+{
+	return -ENXIO;
+}
+static inline int acpi_dev_get_property_reference(struct acpi_device *adev,
+				const char *name, const char *cells_name,
+				size_t index, struct acpi_reference_args *args)
+{
+	return -ENXIO;
+}
+#endif
+
 #endif	/*_LINUX_ACPI_H*/
-- 
cgit v1.2.3


From b31384fa5de37a100507751dfb5c0a49d06cee67 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 4 Nov 2014 01:28:56 +0100
Subject: Driver core: Unified device properties interface for platform
 firmware

Add a uniform interface by which device drivers can request device
properties from the platform firmware by providing a property name
and the corresponding data type.  The purpose of it is to help to
write portable code that won't depend on any particular platform
firmware interface.

The following general helper functions are added:

device_property_present()
device_property_read_u8()
device_property_read_u16()
device_property_read_u32()
device_property_read_u64()
device_property_read_string()
device_property_read_u8_array()
device_property_read_u16_array()
device_property_read_u32_array()
device_property_read_u64_array()
device_property_read_string_array()

The first one allows the caller to check if the given property is
present.  The next 5 of them allow single-valued properties of
various types to be retrieved in a uniform way.  The remaining 5 are
for reading properties with multiple values (arrays of either numbers
or strings).

The interface covers both ACPI and Device Trees.

This change set includes material from Mika Westerberg and Aaron Lu.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c  | 178 +++++++++++++++++++++++++++++++++++++++++++++
 drivers/base/Makefile    |   2 +-
 drivers/base/property.c  | 185 +++++++++++++++++++++++++++++++++++++++++++++++
 drivers/of/base.c        |  33 +++++++++
 include/linux/acpi.h     |  32 ++++++++
 include/linux/of.h       |  12 +++
 include/linux/property.h |  73 +++++++++++++++++++
 7 files changed, 514 insertions(+), 1 deletion(-)
 create mode 100644 drivers/base/property.c
 create mode 100644 include/linux/property.h

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index c4a3e800e82c..2541b1fd1fa5 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -362,3 +362,181 @@ int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name,
 	return -EPROTO;
 }
 EXPORT_SYMBOL_GPL(acpi_dev_get_property_reference);
+
+int acpi_dev_prop_get(struct acpi_device *adev, const char *propname,
+		      void **valptr)
+{
+	return acpi_dev_get_property(adev, propname, ACPI_TYPE_ANY,
+				     (const union acpi_object **)valptr);
+}
+
+int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
+			      enum dev_prop_type proptype, void *val)
+{
+	const union acpi_object *obj;
+	int ret;
+
+	if (!val)
+		return -EINVAL;
+
+	if (proptype >= DEV_PROP_U8 && proptype <= DEV_PROP_U64) {
+		ret = acpi_dev_get_property(adev, propname, ACPI_TYPE_INTEGER, &obj);
+		if (ret)
+			return ret;
+
+		switch (proptype) {
+		case DEV_PROP_U8:
+			if (obj->integer.value > U8_MAX)
+				return -EOVERFLOW;
+			*(u8 *)val = obj->integer.value;
+			break;
+		case DEV_PROP_U16:
+			if (obj->integer.value > U16_MAX)
+				return -EOVERFLOW;
+			*(u16 *)val = obj->integer.value;
+			break;
+		case DEV_PROP_U32:
+			if (obj->integer.value > U32_MAX)
+				return -EOVERFLOW;
+			*(u32 *)val = obj->integer.value;
+			break;
+		default:
+			*(u64 *)val = obj->integer.value;
+			break;
+		}
+	} else if (proptype == DEV_PROP_STRING) {
+		ret = acpi_dev_get_property(adev, propname, ACPI_TYPE_STRING, &obj);
+		if (ret)
+			return ret;
+
+		*(char **)val = obj->string.pointer;
+	} else {
+		ret = -EINVAL;
+	}
+	return ret;
+}
+
+static int acpi_copy_property_array_u8(const union acpi_object *items, u8 *val,
+				       size_t nval)
+{
+	int i;
+
+	for (i = 0; i < nval; i++) {
+		if (items[i].type != ACPI_TYPE_INTEGER)
+			return -EPROTO;
+		if (items[i].integer.value > U8_MAX)
+			return -EOVERFLOW;
+
+		val[i] = items[i].integer.value;
+	}
+	return 0;
+}
+
+static int acpi_copy_property_array_u16(const union acpi_object *items,
+					u16 *val, size_t nval)
+{
+	int i;
+
+	for (i = 0; i < nval; i++) {
+		if (items[i].type != ACPI_TYPE_INTEGER)
+			return -EPROTO;
+		if (items[i].integer.value > U16_MAX)
+			return -EOVERFLOW;
+
+		val[i] = items[i].integer.value;
+	}
+	return 0;
+}
+
+static int acpi_copy_property_array_u32(const union acpi_object *items,
+					u32 *val, size_t nval)
+{
+	int i;
+
+	for (i = 0; i < nval; i++) {
+		if (items[i].type != ACPI_TYPE_INTEGER)
+			return -EPROTO;
+		if (items[i].integer.value > U32_MAX)
+			return -EOVERFLOW;
+
+		val[i] = items[i].integer.value;
+	}
+	return 0;
+}
+
+static int acpi_copy_property_array_u64(const union acpi_object *items,
+					u64 *val, size_t nval)
+{
+	int i;
+
+	for (i = 0; i < nval; i++) {
+		if (items[i].type != ACPI_TYPE_INTEGER)
+			return -EPROTO;
+
+		val[i] = items[i].integer.value;
+	}
+	return 0;
+}
+
+static int acpi_copy_property_array_string(const union acpi_object *items,
+					   char **val, size_t nval)
+{
+	int i;
+
+	for (i = 0; i < nval; i++) {
+		if (items[i].type != ACPI_TYPE_STRING)
+			return -EPROTO;
+
+		val[i] = items[i].string.pointer;
+	}
+	return 0;
+}
+
+int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
+		       enum dev_prop_type proptype, void *val, size_t nval)
+{
+	const union acpi_object *obj;
+	const union acpi_object *items;
+	int ret;
+
+	if (val && nval == 1) {
+		ret = acpi_dev_prop_read_single(adev, propname, proptype, val);
+		if (!ret)
+			return ret;
+	}
+
+	ret = acpi_dev_get_property_array(adev, propname, ACPI_TYPE_ANY, &obj);
+	if (ret)
+		return ret;
+
+	if (!val)
+		return obj->package.count;
+	else if (nval <= 0)
+		return -EINVAL;
+
+	if (nval > obj->package.count)
+		return -EOVERFLOW;
+
+	items = obj->package.elements;
+	switch (proptype) {
+	case DEV_PROP_U8:
+		ret = acpi_copy_property_array_u8(items, (u8 *)val, nval);
+		break;
+	case DEV_PROP_U16:
+		ret = acpi_copy_property_array_u16(items, (u16 *)val, nval);
+		break;
+	case DEV_PROP_U32:
+		ret = acpi_copy_property_array_u32(items, (u32 *)val, nval);
+		break;
+	case DEV_PROP_U64:
+		ret = acpi_copy_property_array_u64(items, (u64 *)val, nval);
+		break;
+	case DEV_PROP_STRING:
+		ret = acpi_copy_property_array_string(items, (char **)val, nval);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	return ret;
+}
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 6922cd6850a2..53c3fe1aeb29 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -4,7 +4,7 @@ obj-y			:= component.o core.o bus.o dd.o syscore.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o \
-			   topology.o container.o
+			   topology.o container.o property.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
 obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
diff --git a/drivers/base/property.c b/drivers/base/property.c
new file mode 100644
index 000000000000..6a94ef6e83c9
--- /dev/null
+++ b/drivers/base/property.c
@@ -0,0 +1,185 @@
+/*
+ * property.c - Unified device property interface.
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *          Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/property.h>
+#include <linux/export.h>
+#include <linux/acpi.h>
+#include <linux/of.h>
+
+/**
+ * device_property_present - check if a property of a device is present
+ * @dev: Device whose property is being checked
+ * @propname: Name of the property
+ *
+ * Check if property @propname is present in the device firmware description.
+ */
+bool device_property_present(struct device *dev, const char *propname)
+{
+	if (IS_ENABLED(CONFIG_OF) && dev->of_node)
+		return of_property_read_bool(dev->of_node, propname);
+
+	return !acpi_dev_prop_get(ACPI_COMPANION(dev), propname, NULL);
+}
+EXPORT_SYMBOL_GPL(device_property_present);
+
+#define OF_DEV_PROP_READ_ARRAY(node, propname, type, val, nval) \
+	(val) ? of_property_read_##type##_array((node), (propname), (val), (nval)) \
+	      : of_property_count_elems_of_size((node), (propname), sizeof(type))
+
+#define DEV_PROP_READ_ARRAY(_dev_, _propname_, _type_, _proptype_, _val_, _nval_) \
+	IS_ENABLED(CONFIG_OF) && _dev_->of_node ? \
+		(OF_DEV_PROP_READ_ARRAY(_dev_->of_node, _propname_, _type_, \
+					_val_, _nval_)) : \
+		acpi_dev_prop_read(ACPI_COMPANION(_dev_), _propname_, \
+				   _proptype_, _val_, _nval_)
+
+/**
+ * device_property_read_u8_array - return a u8 array property of a device
+ * @dev: Device to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Function reads an array of u8 properties with @propname from the device
+ * firmware description and stores them to @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of numbers,
+ *	   %-EOVERFLOW if the size of the property is not as expected.
+ */
+int device_property_read_u8_array(struct device *dev, const char *propname,
+				  u8 *val, size_t nval)
+{
+	return DEV_PROP_READ_ARRAY(dev, propname, u8, DEV_PROP_U8, val, nval);
+}
+EXPORT_SYMBOL_GPL(device_property_read_u8_array);
+
+/**
+ * device_property_read_u16_array - return a u16 array property of a device
+ * @dev: Device to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Function reads an array of u16 properties with @propname from the device
+ * firmware description and stores them to @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of numbers,
+ *	   %-EOVERFLOW if the size of the property is not as expected.
+ */
+int device_property_read_u16_array(struct device *dev, const char *propname,
+				   u16 *val, size_t nval)
+{
+	return DEV_PROP_READ_ARRAY(dev, propname, u16, DEV_PROP_U16, val, nval);
+}
+EXPORT_SYMBOL_GPL(device_property_read_u16_array);
+
+/**
+ * device_property_read_u32_array - return a u32 array property of a device
+ * @dev: Device to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Function reads an array of u32 properties with @propname from the device
+ * firmware description and stores them to @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of numbers,
+ *	   %-EOVERFLOW if the size of the property is not as expected.
+ */
+int device_property_read_u32_array(struct device *dev, const char *propname,
+				   u32 *val, size_t nval)
+{
+	return DEV_PROP_READ_ARRAY(dev, propname, u32, DEV_PROP_U32, val, nval);
+}
+EXPORT_SYMBOL_GPL(device_property_read_u32_array);
+
+/**
+ * device_property_read_u64_array - return a u64 array property of a device
+ * @dev: Device to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Function reads an array of u64 properties with @propname from the device
+ * firmware description and stores them to @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of numbers,
+ *	   %-EOVERFLOW if the size of the property is not as expected.
+ */
+int device_property_read_u64_array(struct device *dev, const char *propname,
+				   u64 *val, size_t nval)
+{
+	return DEV_PROP_READ_ARRAY(dev, propname, u64, DEV_PROP_U64, val, nval);
+}
+EXPORT_SYMBOL_GPL(device_property_read_u64_array);
+
+/**
+ * device_property_read_string_array - return a string array property of device
+ * @dev: Device to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Function reads an array of string properties with @propname from the device
+ * firmware description and stores them to @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO or %-EILSEQ if the property is not an array of strings,
+ *	   %-EOVERFLOW if the size of the property is not as expected.
+ */
+int device_property_read_string_array(struct device *dev, const char *propname,
+				      const char **val, size_t nval)
+{
+	return IS_ENABLED(CONFIG_OF) && dev->of_node ?
+		of_property_read_string_array(dev->of_node, propname, val, nval) :
+		acpi_dev_prop_read(ACPI_COMPANION(dev), propname,
+				   DEV_PROP_STRING, val, nval);
+}
+EXPORT_SYMBOL_GPL(device_property_read_string_array);
+
+/**
+ * device_property_read_string - return a string property of a device
+ * @dev: Device to get the property of
+ * @propname: Name of the property
+ * @val: The value is stored here
+ *
+ * Function reads property @propname from the device firmware description and
+ * stores the value into @val if found. The value is checked to be a string.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO or %-EILSEQ if the property type is not a string.
+ */
+int device_property_read_string(struct device *dev, const char *propname,
+				const char **val)
+{
+	return IS_ENABLED(CONFIG_OF) && dev->of_node ?
+		of_property_read_string(dev->of_node, propname, val) :
+		acpi_dev_prop_read(ACPI_COMPANION(dev), propname,
+				   DEV_PROP_STRING, val, 1);
+}
+EXPORT_SYMBOL_GPL(device_property_read_string);
diff --git a/drivers/of/base.c b/drivers/of/base.c
index 3823edf2d012..4c2ccde42427 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -1249,6 +1249,39 @@ int of_property_read_u64(const struct device_node *np, const char *propname,
 }
 EXPORT_SYMBOL_GPL(of_property_read_u64);
 
+/**
+ * of_property_read_u64_array - Find and read an array of 64 bit integers
+ * from a property.
+ *
+ * @np:		device node from which the property value is to be read.
+ * @propname:	name of the property to be searched.
+ * @out_values:	pointer to return value, modified only if return value is 0.
+ * @sz:		number of array elements to read
+ *
+ * Search for a property in a device node and read 64-bit value(s) from
+ * it. Returns 0 on success, -EINVAL if the property does not exist,
+ * -ENODATA if property does not have a value, and -EOVERFLOW if the
+ * property data isn't large enough.
+ *
+ * The out_values is modified only if a valid u64 value can be decoded.
+ */
+int of_property_read_u64_array(const struct device_node *np,
+			       const char *propname, u64 *out_values,
+			       size_t sz)
+{
+	const __be32 *val = of_find_property_value_of_size(np, propname,
+						(sz * sizeof(*out_values)));
+
+	if (IS_ERR(val))
+		return PTR_ERR(val);
+
+	while (sz--) {
+		*out_values++ = of_read_number(val, 2);
+		val += 2;
+	}
+	return 0;
+}
+
 /**
  * of_property_read_string - Find and read a string from a property
  * @np:		device node from which the property value is to be read.
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index dcdf8738898c..76d64d6a903a 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -28,6 +28,7 @@
 #include <linux/errno.h>
 #include <linux/ioport.h>	/* for struct resource */
 #include <linux/device.h>
+#include <linux/property.h>
 
 #ifndef _LINUX
 #define _LINUX
@@ -677,6 +678,13 @@ int acpi_dev_get_property_array(struct acpi_device *adev, const char *name,
 int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name,
 				    const char *cells_name, size_t index,
 				    struct acpi_reference_args *args);
+
+int acpi_dev_prop_get(struct acpi_device *adev, const char *propname,
+		      void **valptr);
+int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
+			      enum dev_prop_type proptype, void *val);
+int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
+		       enum dev_prop_type proptype, void *val, size_t nval);
 #else
 static inline int acpi_dev_get_property(struct acpi_device *adev,
 					const char *name, acpi_object_type type,
@@ -697,6 +705,30 @@ static inline int acpi_dev_get_property_reference(struct acpi_device *adev,
 {
 	return -ENXIO;
 }
+
+static inline int acpi_dev_prop_get(struct acpi_device *adev,
+				    const char *propname,
+				    void **valptr)
+{
+	return -ENXIO;
+}
+
+static inline int acpi_dev_prop_read_single(struct acpi_device *adev,
+					    const char *propname,
+					    enum dev_prop_type proptype,
+					    void *val)
+{
+	return -ENXIO;
+}
+
+static inline int acpi_dev_prop_read(struct acpi_device *adev,
+				     const char *propname,
+				     enum dev_prop_type proptype,
+				     void *val, size_t nval)
+{
+	return -ENXIO;
+}
+
 #endif
 
 #endif	/*_LINUX_ACPI_H*/
diff --git a/include/linux/of.h b/include/linux/of.h
index 29f0adc5f3e4..ce9f6a2b3532 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/topology.h>
 #include <linux/notifier.h>
+#include <linux/property.h>
 
 #include <asm/byteorder.h>
 #include <asm/errno.h>
@@ -263,6 +264,10 @@ extern int of_property_read_u32_array(const struct device_node *np,
 				      size_t sz);
 extern int of_property_read_u64(const struct device_node *np,
 				const char *propname, u64 *out_value);
+extern int of_property_read_u64_array(const struct device_node *np,
+				      const char *propname,
+				      u64 *out_values,
+				      size_t sz);
 
 extern int of_property_read_string(struct device_node *np,
 				   const char *propname,
@@ -477,6 +482,13 @@ static inline int of_property_read_u32_array(const struct device_node *np,
 	return -ENOSYS;
 }
 
+static inline int of_property_read_u64_array(const struct device_node *np,
+					     const char *propname,
+					     u64 *out_values, size_t sz)
+{
+	return -ENOSYS;
+}
+
 static inline int of_property_read_string(struct device_node *np,
 					  const char *propname,
 					  const char **out_string)
diff --git a/include/linux/property.h b/include/linux/property.h
new file mode 100644
index 000000000000..9242fb0221ba
--- /dev/null
+++ b/include/linux/property.h
@@ -0,0 +1,73 @@
+/*
+ * property.h - Unified device property interface.
+ *
+ * Copyright (C) 2014, Intel Corporation
+ * Authors: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
+ *          Mika Westerberg <mika.westerberg@linux.intel.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#ifndef _LINUX_PROPERTY_H_
+#define _LINUX_PROPERTY_H_
+
+#include <linux/types.h>
+
+struct device;
+
+enum dev_prop_type {
+	DEV_PROP_U8,
+	DEV_PROP_U16,
+	DEV_PROP_U32,
+	DEV_PROP_U64,
+	DEV_PROP_STRING,
+	DEV_PROP_MAX,
+};
+
+bool device_property_present(struct device *dev, const char *propname);
+int device_property_read_u8_array(struct device *dev, const char *propname,
+				  u8 *val, size_t nval);
+int device_property_read_u16_array(struct device *dev, const char *propname,
+				   u16 *val, size_t nval);
+int device_property_read_u32_array(struct device *dev, const char *propname,
+				   u32 *val, size_t nval);
+int device_property_read_u64_array(struct device *dev, const char *propname,
+				   u64 *val, size_t nval);
+int device_property_read_string_array(struct device *dev, const char *propname,
+				      const char **val, size_t nval);
+int device_property_read_string(struct device *dev, const char *propname,
+				const char **val);
+
+static inline bool device_property_read_bool(struct device *dev,
+					     const char *propname)
+{
+	return device_property_present(dev, propname);
+}
+
+static inline int device_property_read_u8(struct device *dev,
+					  const char *propname, u8 *val)
+{
+	return device_property_read_u8_array(dev, propname, val, 1);
+}
+
+static inline int device_property_read_u16(struct device *dev,
+					   const char *propname, u16 *val)
+{
+	return device_property_read_u16_array(dev, propname, val, 1);
+}
+
+static inline int device_property_read_u32(struct device *dev,
+					   const char *propname, u32 *val)
+{
+	return device_property_read_u32_array(dev, propname, val, 1);
+}
+
+static inline int device_property_read_u64(struct device *dev,
+					   const char *propname, u64 *val)
+{
+	return device_property_read_u64_array(dev, propname, val, 1);
+}
+
+#endif /* _LINUX_PROPERTY_H_ */
-- 
cgit v1.2.3


From 733e625139fe455b4d910ac63c18c90f7cbe2d6f Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 21 Oct 2014 13:33:56 +0200
Subject: ACPI: Allow drivers to match using Device Tree compatible property

We have lots of existing Device Tree enabled drivers and allocating
separate _HID for each is not feasible. Instead we allocate special _HID
"PRP0001" that means that the match should be done using Device Tree
compatible property using driver's .of_match_table instead if the driver
is missing .acpi_match_table.

If there is a need to distinguish from where the device is enumerated
(DT/ACPI) driver can check dev->of_node or ACPI_COMPATION(dev).

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c |  39 ++++++++++++++++++
 drivers/acpi/scan.c     | 106 +++++++++++++++++++++++++++++++++++++++++++-----
 include/acpi/acpi_bus.h |   1 +
 include/linux/acpi.h    |   8 +---
 4 files changed, 137 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 2541b1fd1fa5..27add91bc270 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -76,6 +76,42 @@ static bool acpi_properties_format_valid(const union acpi_object *properties)
 	return true;
 }
 
+static void acpi_init_of_compatible(struct acpi_device *adev)
+{
+	const union acpi_object *of_compatible;
+	struct acpi_hardware_id *hwid;
+	bool acpi_of = false;
+	int ret;
+
+	/*
+	 * Check if the special PRP0001 ACPI ID is present and in that
+	 * case we fill in Device Tree compatible properties for this
+	 * device.
+	 */
+	list_for_each_entry(hwid, &adev->pnp.ids, list) {
+		if (!strcmp(hwid->id, "PRP0001")) {
+			acpi_of = true;
+			break;
+		}
+	}
+
+	if (!acpi_of)
+		return;
+
+	ret = acpi_dev_get_property_array(adev, "compatible", ACPI_TYPE_STRING,
+					  &of_compatible);
+	if (ret) {
+		ret = acpi_dev_get_property(adev, "compatible",
+					    ACPI_TYPE_STRING, &of_compatible);
+		if (ret) {
+			acpi_handle_warn(adev->handle,
+					 "PRP0001 requires compatible property\n");
+			return;
+		}
+	}
+	adev->data.of_compatible = of_compatible;
+}
+
 void acpi_init_properties(struct acpi_device *adev)
 {
 	struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
@@ -119,6 +155,8 @@ void acpi_init_properties(struct acpi_device *adev)
 
 		adev->data.pointer = buf.pointer;
 		adev->data.properties = properties;
+
+		acpi_init_of_compatible(adev);
 		return;
 	}
 
@@ -130,6 +168,7 @@ void acpi_init_properties(struct acpi_device *adev)
 void acpi_free_properties(struct acpi_device *adev)
 {
 	ACPI_FREE((void *)adev->data.pointer);
+	adev->data.of_compatible = NULL;
 	adev->data.pointer = NULL;
 	adev->data.properties = NULL;
 }
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 40d80ac0552f..3a8f66444532 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -124,17 +124,56 @@ static int create_modalias(struct acpi_device *acpi_dev, char *modalias,
 	if (list_empty(&acpi_dev->pnp.ids))
 		return 0;
 
-	len = snprintf(modalias, size, "acpi:");
-	size -= len;
-
-	list_for_each_entry(id, &acpi_dev->pnp.ids, list) {
-		count = snprintf(&modalias[len], size, "%s:", id->id);
-		if (count < 0)
-			return -EINVAL;
-		if (count >= size)
-			return -ENOMEM;
-		len += count;
-		size -= count;
+	/*
+	 * If the device has PRP0001 we expose DT compatible modalias
+	 * instead in form of of:NnameTCcompatible.
+	 */
+	if (acpi_dev->data.of_compatible) {
+		struct acpi_buffer buf = { ACPI_ALLOCATE_BUFFER };
+		const union acpi_object *of_compatible, *obj;
+		int i, nval;
+		char *c;
+
+		acpi_get_name(acpi_dev->handle, ACPI_SINGLE_NAME, &buf);
+		/* DT strings are all in lower case */
+		for (c = buf.pointer; *c != '\0'; c++)
+			*c = tolower(*c);
+
+		len = snprintf(modalias, size, "of:N%sT", (char *)buf.pointer);
+		ACPI_FREE(buf.pointer);
+
+		of_compatible = acpi_dev->data.of_compatible;
+		if (of_compatible->type == ACPI_TYPE_PACKAGE) {
+			nval = of_compatible->package.count;
+			obj = of_compatible->package.elements;
+		} else { /* Must be ACPI_TYPE_STRING. */
+			nval = 1;
+			obj = of_compatible;
+		}
+		for (i = 0; i < nval; i++, obj++) {
+			count = snprintf(&modalias[len], size, "C%s",
+					 obj->string.pointer);
+			if (count < 0)
+				return -EINVAL;
+			if (count >= size)
+				return -ENOMEM;
+
+			len += count;
+			size -= count;
+		}
+	} else {
+		len = snprintf(modalias, size, "acpi:");
+		size -= len;
+
+		list_for_each_entry(id, &acpi_dev->pnp.ids, list) {
+			count = snprintf(&modalias[len], size, "%s:", id->id);
+			if (count < 0)
+				return -EINVAL;
+			if (count >= size)
+				return -ENOMEM;
+			len += count;
+			size -= count;
+		}
 	}
 
 	modalias[len] = '\0';
@@ -902,6 +941,51 @@ int acpi_match_device_ids(struct acpi_device *device,
 }
 EXPORT_SYMBOL(acpi_match_device_ids);
 
+/* Performs match against special "PRP0001" shoehorn ACPI ID */
+static bool acpi_of_driver_match_device(struct device *dev,
+					const struct device_driver *drv)
+{
+	const union acpi_object *of_compatible, *obj;
+	struct acpi_device *adev;
+	int i, nval;
+
+	adev = ACPI_COMPANION(dev);
+	if (!adev)
+		return false;
+
+	of_compatible = adev->data.of_compatible;
+	if (!drv->of_match_table || !of_compatible)
+		return false;
+
+	if (of_compatible->type == ACPI_TYPE_PACKAGE) {
+		nval = of_compatible->package.count;
+		obj = of_compatible->package.elements;
+	} else { /* Must be ACPI_TYPE_STRING. */
+		nval = 1;
+		obj = of_compatible;
+	}
+	/* Now we can look for the driver DT compatible strings */
+	for (i = 0; i < nval; i++, obj++) {
+		const struct of_device_id *id;
+
+		for (id = drv->of_match_table; id->compatible[0]; id++)
+			if (!strcasecmp(obj->string.pointer, id->compatible))
+				return true;
+	}
+
+	return false;
+}
+
+bool acpi_driver_match_device(struct device *dev,
+			      const struct device_driver *drv)
+{
+	if (!drv->acpi_match_table)
+		return acpi_of_driver_match_device(dev, drv);
+
+	return !!acpi_match_device(drv->acpi_match_table, dev);
+}
+EXPORT_SYMBOL_GPL(acpi_driver_match_device);
+
 static void acpi_free_power_resources_lists(struct acpi_device *device)
 {
 	int i;
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index 475781170091..f59cbf860658 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -341,6 +341,7 @@ struct acpi_device_physical_node {
 struct acpi_device_data {
 	const union acpi_object *pointer;
 	const union acpi_object *properties;
+	const union acpi_object *of_compatible;
 };
 
 /* Device */
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 76d64d6a903a..38296d686c55 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -424,12 +424,8 @@ extern int acpi_nvs_for_each_region(int (*func)(__u64, __u64, void *),
 const struct acpi_device_id *acpi_match_device(const struct acpi_device_id *ids,
 					       const struct device *dev);
 
-static inline bool acpi_driver_match_device(struct device *dev,
-					    const struct device_driver *drv)
-{
-	return !!acpi_match_device(drv->acpi_match_table, dev);
-}
-
+extern bool acpi_driver_match_device(struct device *dev,
+				     const struct device_driver *drv);
 int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
 
-- 
cgit v1.2.3


From 5c51277a9ababfa44a7f944100bdc9fbda139905 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Mon, 27 Oct 2014 23:29:32 +0100
Subject: leds: leds-gpio: Add support for GPIO descriptors

GPIO descriptors are the preferred way over legacy GPIO numbers
nowadays. Convert the driver to use GPIO descriptors internally but
still allow passing legacy GPIO numbers from platform data to support
existing platforms.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Acked-by: Bryan Wu <cooloney@gmail.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/leds/leds-gpio.c | 80 +++++++++++++++++++++++++++---------------------
 include/linux/leds.h     |  1 +
 2 files changed, 46 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index b4518c8751c8..1ff95ce9487a 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -12,6 +12,7 @@
  */
 #include <linux/err.h>
 #include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/kernel.h>
 #include <linux/leds.h>
 #include <linux/module.h>
@@ -24,11 +25,10 @@
 
 struct gpio_led_data {
 	struct led_classdev cdev;
-	unsigned gpio;
+	struct gpio_desc *gpiod;
 	struct work_struct work;
 	u8 new_level;
 	u8 can_sleep;
-	u8 active_low;
 	u8 blinking;
 	int (*platform_gpio_blink_set)(unsigned gpio, int state,
 			unsigned long *delay_on, unsigned long *delay_off);
@@ -40,12 +40,16 @@ static void gpio_led_work(struct work_struct *work)
 		container_of(work, struct gpio_led_data, work);
 
 	if (led_dat->blinking) {
-		led_dat->platform_gpio_blink_set(led_dat->gpio,
-						 led_dat->new_level,
-						 NULL, NULL);
+		int gpio = desc_to_gpio(led_dat->gpiod);
+		int level = led_dat->new_level;
+
+		if (gpiod_is_active_low(led_dat->gpiod))
+			level = !level;
+
+		led_dat->platform_gpio_blink_set(gpio, level, NULL, NULL);
 		led_dat->blinking = 0;
 	} else
-		gpio_set_value_cansleep(led_dat->gpio, led_dat->new_level);
+		gpiod_set_value_cansleep(led_dat->gpiod, led_dat->new_level);
 }
 
 static void gpio_led_set(struct led_classdev *led_cdev,
@@ -60,9 +64,6 @@ static void gpio_led_set(struct led_classdev *led_cdev,
 	else
 		level = 1;
 
-	if (led_dat->active_low)
-		level = !level;
-
 	/* Setting GPIOs with I2C/etc requires a task context, and we don't
 	 * seem to have a reliable way to know if we're already in one; so
 	 * let's just assume the worst.
@@ -72,11 +73,16 @@ static void gpio_led_set(struct led_classdev *led_cdev,
 		schedule_work(&led_dat->work);
 	} else {
 		if (led_dat->blinking) {
-			led_dat->platform_gpio_blink_set(led_dat->gpio, level,
-							 NULL, NULL);
+			int gpio = desc_to_gpio(led_dat->gpiod);
+
+			if (gpiod_is_active_low(led_dat->gpiod))
+				level = !level;
+
+			led_dat->platform_gpio_blink_set(gpio, level, NULL,
+							 NULL);
 			led_dat->blinking = 0;
 		} else
-			gpio_set_value(led_dat->gpio, level);
+			gpiod_set_value(led_dat->gpiod, level);
 	}
 }
 
@@ -85,9 +91,10 @@ static int gpio_blink_set(struct led_classdev *led_cdev,
 {
 	struct gpio_led_data *led_dat =
 		container_of(led_cdev, struct gpio_led_data, cdev);
+	int gpio = desc_to_gpio(led_dat->gpiod);
 
 	led_dat->blinking = 1;
-	return led_dat->platform_gpio_blink_set(led_dat->gpio, GPIO_LED_BLINK,
+	return led_dat->platform_gpio_blink_set(gpio, GPIO_LED_BLINK,
 						delay_on, delay_off);
 }
 
@@ -97,24 +104,33 @@ static int create_gpio_led(const struct gpio_led *template,
 {
 	int ret, state;
 
-	led_dat->gpio = -1;
+	if (!template->gpiod) {
+		unsigned long flags = 0;
 
-	/* skip leds that aren't available */
-	if (!gpio_is_valid(template->gpio)) {
-		dev_info(parent, "Skipping unavailable LED gpio %d (%s)\n",
-				template->gpio, template->name);
-		return 0;
-	}
+		/* skip leds that aren't available */
+		if (!gpio_is_valid(template->gpio)) {
+			dev_info(parent, "Skipping unavailable LED gpio %d (%s)\n",
+					template->gpio, template->name);
+			return 0;
+		}
 
-	ret = devm_gpio_request(parent, template->gpio, template->name);
-	if (ret < 0)
-		return ret;
+		if (template->active_low)
+			flags |= GPIOF_ACTIVE_LOW;
+
+		ret = devm_gpio_request_one(parent, template->gpio, flags,
+					    template->name);
+		if (ret < 0)
+			return ret;
+
+		led_dat->gpiod = gpio_to_desc(template->gpio);
+		if (IS_ERR(led_dat->gpiod))
+			return PTR_ERR(led_dat->gpiod);
+	}
 
 	led_dat->cdev.name = template->name;
 	led_dat->cdev.default_trigger = template->default_trigger;
-	led_dat->gpio = template->gpio;
-	led_dat->can_sleep = gpio_cansleep(template->gpio);
-	led_dat->active_low = template->active_low;
+	led_dat->gpiod = template->gpiod;
+	led_dat->can_sleep = gpiod_cansleep(template->gpiod);
 	led_dat->blinking = 0;
 	if (blink_set) {
 		led_dat->platform_gpio_blink_set = blink_set;
@@ -122,30 +138,24 @@ static int create_gpio_led(const struct gpio_led *template,
 	}
 	led_dat->cdev.brightness_set = gpio_led_set;
 	if (template->default_state == LEDS_GPIO_DEFSTATE_KEEP)
-		state = !!gpio_get_value_cansleep(led_dat->gpio) ^ led_dat->active_low;
+		state = !!gpiod_get_value_cansleep(led_dat->gpiod);
 	else
 		state = (template->default_state == LEDS_GPIO_DEFSTATE_ON);
 	led_dat->cdev.brightness = state ? LED_FULL : LED_OFF;
 	if (!template->retain_state_suspended)
 		led_dat->cdev.flags |= LED_CORE_SUSPENDRESUME;
 
-	ret = gpio_direction_output(led_dat->gpio, led_dat->active_low ^ state);
+	ret = gpiod_direction_output(led_dat->gpiod, state);
 	if (ret < 0)
 		return ret;
 
 	INIT_WORK(&led_dat->work, gpio_led_work);
 
-	ret = led_classdev_register(parent, &led_dat->cdev);
-	if (ret < 0)
-		return ret;
-
-	return 0;
+	return led_classdev_register(parent, &led_dat->cdev);
 }
 
 static void delete_gpio_led(struct gpio_led_data *led)
 {
-	if (!gpio_is_valid(led->gpio))
-		return;
 	led_classdev_unregister(&led->cdev);
 	cancel_work_sync(&led->work);
 }
diff --git a/include/linux/leds.h b/include/linux/leds.h
index a57611d0c94e..f3af5c4d9084 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -261,6 +261,7 @@ struct gpio_led {
 	unsigned	retain_state_suspended : 1;
 	unsigned	default_state : 2;
 	/* default_state should be one of LEDS_GPIO_DEFSTATE_(ON|OFF|KEEP) */
+	struct gpio_desc *gpiod;
 };
 #define LEDS_GPIO_DEFSTATE_OFF		0
 #define LEDS_GPIO_DEFSTATE_ON		1
-- 
cgit v1.2.3


From 633a21d80b4a2cd648aa2dacdb22494ffb2f28f0 Mon Sep 17 00:00:00 2001
From: Aaron Lu <aaron.lu@intel.com>
Date: Tue, 21 Oct 2014 23:30:25 +0200
Subject: input: gpio_keys_polled: Add support for GPIO descriptors

GPIO descriptors are the preferred way over legacy GPIO numbers
nowadays. Convert the driver to use GPIO descriptors internally but
still allow passing legacy GPIO numbers from platform data to support
existing platforms.

Signed-off-by: Aaron Lu <aaron.lu@intel.com>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/input/keyboard/gpio_keys_polled.c | 39 +++++++++++++++++++++----------
 include/linux/gpio_keys.h                 |  3 +++
 2 files changed, 30 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/input/keyboard/gpio_keys_polled.c b/drivers/input/keyboard/gpio_keys_polled.c
index 432d36395f35..b7a514ced509 100644
--- a/drivers/input/keyboard/gpio_keys_polled.c
+++ b/drivers/input/keyboard/gpio_keys_polled.c
@@ -23,6 +23,7 @@
 #include <linux/ioport.h>
 #include <linux/platform_device.h>
 #include <linux/gpio.h>
+#include <linux/gpio/consumer.h>
 #include <linux/gpio_keys.h>
 #include <linux/of.h>
 #include <linux/of_platform.h>
@@ -51,15 +52,14 @@ static void gpio_keys_polled_check_state(struct input_dev *input,
 	int state;
 
 	if (bdata->can_sleep)
-		state = !!gpio_get_value_cansleep(button->gpio);
+		state = !!gpiod_get_value_cansleep(button->gpiod);
 	else
-		state = !!gpio_get_value(button->gpio);
+		state = !!gpiod_get_value(button->gpiod);
 
 	if (state != bdata->last_state) {
 		unsigned int type = button->type ?: EV_KEY;
 
-		input_event(input, type, button->code,
-			    !!(state ^ button->active_low));
+		input_event(input, type, button->code, state);
 		input_sync(input);
 		bdata->count = 0;
 		bdata->last_state = state;
@@ -259,7 +259,6 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
 	for (i = 0; i < pdata->nbuttons; i++) {
 		struct gpio_keys_button *button = &pdata->buttons[i];
 		struct gpio_keys_button_data *bdata = &bdev->data[i];
-		unsigned int gpio = button->gpio;
 		unsigned int type = button->type ?: EV_KEY;
 
 		if (button->wakeup) {
@@ -267,15 +266,31 @@ static int gpio_keys_polled_probe(struct platform_device *pdev)
 			return -EINVAL;
 		}
 
-		error = devm_gpio_request_one(&pdev->dev, gpio, GPIOF_IN,
-					      button->desc ? : DRV_NAME);
-		if (error) {
-			dev_err(dev, "unable to claim gpio %u, err=%d\n",
-				gpio, error);
-			return error;
+		/*
+		 * Legacy GPIO number so request the GPIO here and
+		 * convert it to descriptor.
+		 */
+		if (!button->gpiod && gpio_is_valid(button->gpio)) {
+			unsigned flags = 0;
+
+			if (button->active_low)
+				flags |= GPIOF_ACTIVE_LOW;
+
+			error = devm_gpio_request_one(&pdev->dev, button->gpio,
+					flags, button->desc ? : DRV_NAME);
+			if (error) {
+				dev_err(dev, "unable to claim gpio %u, err=%d\n",
+					button->gpio, error);
+				return error;
+			}
+
+			button->gpiod = gpio_to_desc(button->gpio);
 		}
 
-		bdata->can_sleep = gpio_cansleep(gpio);
+		if (IS_ERR(button->gpiod))
+			return PTR_ERR(button->gpiod);
+
+		bdata->can_sleep = gpiod_cansleep(button->gpiod);
 		bdata->last_state = -1;
 		bdata->threshold = DIV_ROUND_UP(button->debounce_interval,
 						pdata->poll_interval);
diff --git a/include/linux/gpio_keys.h b/include/linux/gpio_keys.h
index 8b622468952c..ee2d8c6f9130 100644
--- a/include/linux/gpio_keys.h
+++ b/include/linux/gpio_keys.h
@@ -2,6 +2,7 @@
 #define _GPIO_KEYS_H
 
 struct device;
+struct gpio_desc;
 
 /**
  * struct gpio_keys_button - configuration parameters
@@ -17,6 +18,7 @@ struct device;
  *			disable button via sysfs
  * @value:		axis value for %EV_ABS
  * @irq:		Irq number in case of interrupt keys
+ * @gpiod:		GPIO descriptor
  */
 struct gpio_keys_button {
 	unsigned int code;
@@ -29,6 +31,7 @@ struct gpio_keys_button {
 	bool can_disable;
 	int value;
 	unsigned int irq;
+	struct gpio_desc *gpiod;
 };
 
 /**
-- 
cgit v1.2.3


From 8a0662d9ed2968e1186208336a8e1fab3fdfea63 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 4 Nov 2014 14:03:59 +0100
Subject: Driver core: Unified interface for firmware node properties

Add new generic routines are provided for retrieving properties from
device description objects in the platform firmware in case there are
no struct device objects for them (either those objects have not been
created yet or they do not exist at all).

The following functions are provided:

fwnode_property_present()
fwnode_property_read_u8()
fwnode_property_read_u16()
fwnode_property_read_u32()
fwnode_property_read_u64()
fwnode_property_read_string()
fwnode_property_read_u8_array()
fwnode_property_read_u16_array()
fwnode_property_read_u32_array()
fwnode_property_read_u64_array()
fwnode_property_read_string_array()

in analogy with the corresponding functions for struct device added
previously.  For all of them, the first argument is a pointer to struct
fwnode_handle (new type) that allows a device description object
(depending on what platform firmware interface is in use) to be
obtained.

Add a new macro device_for_each_child_node() for iterating over the
children of the device description object associated with a given
device and a new function device_get_child_node_count() returning the
number of a given device's child nodes.

The interface covers both ACPI and Device Trees.

Suggested-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/scan.c      |  21 ++++
 drivers/base/property.c  | 246 +++++++++++++++++++++++++++++++++++++++++++++++
 include/acpi/acpi_bus.h  |  17 ++++
 include/linux/acpi.h     |  26 +++++
 include/linux/of.h       |  22 +++++
 include/linux/property.h |  70 ++++++++++++++
 6 files changed, 402 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 3a8f66444532..9cb5cca3cfe3 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -1389,6 +1389,26 @@ int acpi_device_add(struct acpi_device *device,
 	return result;
 }
 
+struct acpi_device *acpi_get_next_child(struct device *dev,
+					struct acpi_device *child)
+{
+	struct acpi_device *adev = ACPI_COMPANION(dev);
+	struct list_head *head, *next;
+
+	if (!adev)
+		return NULL;
+
+	head = &adev->children;
+	if (list_empty(head))
+		return NULL;
+
+	if (!child)
+		return list_first_entry(head, struct acpi_device, node);
+
+	next = child->node.next;
+	return next == head ? NULL : list_entry(next, struct acpi_device, node);
+}
+
 /* --------------------------------------------------------------------------
                                  Driver Management
    -------------------------------------------------------------------------- */
@@ -2008,6 +2028,7 @@ void acpi_init_device_object(struct acpi_device *device, acpi_handle handle,
 	device->device_type = type;
 	device->handle = handle;
 	device->parent = acpi_bus_get_parent(handle);
+	device->fwnode.type = FWNODE_ACPI;
 	acpi_set_device_status(device, sta);
 	acpi_device_get_busid(device);
 	acpi_set_pnp_ids(handle, &device->pnp, type);
diff --git a/drivers/base/property.c b/drivers/base/property.c
index 6a94ef6e83c9..c45845874d4f 100644
--- a/drivers/base/property.c
+++ b/drivers/base/property.c
@@ -31,6 +31,22 @@ bool device_property_present(struct device *dev, const char *propname)
 }
 EXPORT_SYMBOL_GPL(device_property_present);
 
+/**
+ * fwnode_property_present - check if a property of a firmware node is present
+ * @fwnode: Firmware node whose property to check
+ * @propname: Name of the property
+ */
+bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname)
+{
+	if (is_of_node(fwnode))
+		return of_property_read_bool(of_node(fwnode), propname);
+	else if (is_acpi_node(fwnode))
+		return !acpi_dev_prop_get(acpi_node(fwnode), propname, NULL);
+
+	return false;
+}
+EXPORT_SYMBOL_GPL(fwnode_property_present);
+
 #define OF_DEV_PROP_READ_ARRAY(node, propname, type, val, nval) \
 	(val) ? of_property_read_##type##_array((node), (propname), (val), (nval)) \
 	      : of_property_count_elems_of_size((node), (propname), sizeof(type))
@@ -183,3 +199,233 @@ int device_property_read_string(struct device *dev, const char *propname,
 				   DEV_PROP_STRING, val, 1);
 }
 EXPORT_SYMBOL_GPL(device_property_read_string);
+
+#define FWNODE_PROP_READ_ARRAY(_fwnode_, _propname_, _type_, _proptype_, _val_, _nval_) \
+({ \
+	int _ret_; \
+	if (is_of_node(_fwnode_)) \
+		_ret_ = OF_DEV_PROP_READ_ARRAY(of_node(_fwnode_), _propname_, \
+					       _type_, _val_, _nval_); \
+	else if (is_acpi_node(_fwnode_)) \
+		_ret_ = acpi_dev_prop_read(acpi_node(_fwnode_), _propname_, \
+					   _proptype_, _val_, _nval_); \
+	else \
+		_ret_ = -ENXIO; \
+	_ret_; \
+})
+
+/**
+ * fwnode_property_read_u8_array - return a u8 array property of firmware node
+ * @fwnode: Firmware node to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Read an array of u8 properties with @propname from @fwnode and stores them to
+ * @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of numbers,
+ *	   %-EOVERFLOW if the size of the property is not as expected,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
+				  const char *propname, u8 *val, size_t nval)
+{
+	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u8, DEV_PROP_U8,
+				      val, nval);
+}
+EXPORT_SYMBOL_GPL(fwnode_property_read_u8_array);
+
+/**
+ * fwnode_property_read_u16_array - return a u16 array property of firmware node
+ * @fwnode: Firmware node to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Read an array of u16 properties with @propname from @fwnode and store them to
+ * @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of numbers,
+ *	   %-EOVERFLOW if the size of the property is not as expected,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int fwnode_property_read_u16_array(struct fwnode_handle *fwnode,
+				   const char *propname, u16 *val, size_t nval)
+{
+	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u16, DEV_PROP_U16,
+				      val, nval);
+}
+EXPORT_SYMBOL_GPL(fwnode_property_read_u16_array);
+
+/**
+ * fwnode_property_read_u32_array - return a u32 array property of firmware node
+ * @fwnode: Firmware node to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Read an array of u32 properties with @propname from @fwnode store them to
+ * @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of numbers,
+ *	   %-EOVERFLOW if the size of the property is not as expected,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int fwnode_property_read_u32_array(struct fwnode_handle *fwnode,
+				   const char *propname, u32 *val, size_t nval)
+{
+	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u32, DEV_PROP_U32,
+				      val, nval);
+}
+EXPORT_SYMBOL_GPL(fwnode_property_read_u32_array);
+
+/**
+ * fwnode_property_read_u64_array - return a u64 array property firmware node
+ * @fwnode: Firmware node to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Read an array of u64 properties with @propname from @fwnode and store them to
+ * @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of numbers,
+ *	   %-EOVERFLOW if the size of the property is not as expected,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int fwnode_property_read_u64_array(struct fwnode_handle *fwnode,
+				   const char *propname, u64 *val, size_t nval)
+{
+	return FWNODE_PROP_READ_ARRAY(fwnode, propname, u64, DEV_PROP_U64,
+				      val, nval);
+}
+EXPORT_SYMBOL_GPL(fwnode_property_read_u64_array);
+
+/**
+ * fwnode_property_read_string_array - return string array property of a node
+ * @fwnode: Firmware node to get the property of
+ * @propname: Name of the property
+ * @val: The values are stored here
+ * @nval: Size of the @val array
+ *
+ * Read an string list property @propname from the given firmware node and store
+ * them to @val if found.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO if the property is not an array of strings,
+ *	   %-EOVERFLOW if the size of the property is not as expected,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+				      const char *propname, const char **val,
+				      size_t nval)
+{
+	if (is_of_node(fwnode))
+		return of_property_read_string_array(of_node(fwnode), propname,
+						     val, nval);
+	else if (is_acpi_node(fwnode))
+		return acpi_dev_prop_read(acpi_node(fwnode), propname,
+					  DEV_PROP_STRING, val, nval);
+
+	return -ENXIO;
+}
+EXPORT_SYMBOL_GPL(fwnode_property_read_string_array);
+
+/**
+ * fwnode_property_read_string - return a string property of a firmware node
+ * @fwnode: Firmware node to get the property of
+ * @propname: Name of the property
+ * @val: The value is stored here
+ *
+ * Read property @propname from the given firmware node and store the value into
+ * @val if found.  The value is checked to be a string.
+ *
+ * Return: %0 if the property was found (success),
+ *	   %-EINVAL if given arguments are not valid,
+ *	   %-ENODATA if the property does not have a value,
+ *	   %-EPROTO or %-EILSEQ if the property is not a string,
+ *	   %-ENXIO if no suitable firmware interface is present.
+ */
+int fwnode_property_read_string(struct fwnode_handle *fwnode,
+				const char *propname, const char **val)
+{
+	if (is_of_node(fwnode))
+		return of_property_read_string(of_node(fwnode),propname, val);
+	else if (is_acpi_node(fwnode))
+		return acpi_dev_prop_read(acpi_node(fwnode), propname,
+					  DEV_PROP_STRING, val, 1);
+
+	return -ENXIO;
+}
+EXPORT_SYMBOL_GPL(fwnode_property_read_string);
+
+/**
+ * device_get_next_child_node - Return the next child node handle for a device
+ * @dev: Device to find the next child node for.
+ * @child: Handle to one of the device's child nodes or a null handle.
+ */
+struct fwnode_handle *device_get_next_child_node(struct device *dev,
+						 struct fwnode_handle *child)
+{
+	if (IS_ENABLED(CONFIG_OF) && dev->of_node) {
+		struct device_node *node;
+
+		node = of_get_next_available_child(dev->of_node, of_node(child));
+		if (node)
+			return &node->fwnode;
+	} else if (IS_ENABLED(CONFIG_ACPI)) {
+		struct acpi_device *node;
+
+		node = acpi_get_next_child(dev, acpi_node(child));
+		if (node)
+			return acpi_fwnode_handle(node);
+	}
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(device_get_next_child_node);
+
+/**
+ * fwnode_handle_put - Drop reference to a device node
+ * @fwnode: Pointer to the device node to drop the reference to.
+ *
+ * This has to be used when terminating device_for_each_child_node() iteration
+ * with break or return to prevent stale device node references from being left
+ * behind.
+ */
+void fwnode_handle_put(struct fwnode_handle *fwnode)
+{
+	if (is_of_node(fwnode))
+		of_node_put(of_node(fwnode));
+}
+EXPORT_SYMBOL_GPL(fwnode_handle_put);
+
+/**
+ * device_get_child_node_count - return the number of child nodes for device
+ * @dev: Device to cound the child nodes for
+ */
+unsigned int device_get_child_node_count(struct device *dev)
+{
+	struct fwnode_handle *child;
+	unsigned int count = 0;
+
+	device_for_each_child_node(dev, child)
+		count++;
+
+	return count;
+}
+EXPORT_SYMBOL_GPL(device_get_child_node_count);
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index f59cbf860658..a361f43b1974 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -27,6 +27,7 @@
 #define __ACPI_BUS_H__
 
 #include <linux/device.h>
+#include <linux/property.h>
 
 /* TBD: Make dynamic */
 #define ACPI_MAX_HANDLES	10
@@ -348,6 +349,7 @@ struct acpi_device_data {
 struct acpi_device {
 	int device_type;
 	acpi_handle handle;		/* no handle for fixed hardware */
+	struct fwnode_handle fwnode;
 	struct acpi_device *parent;
 	struct list_head children;
 	struct list_head node;
@@ -372,6 +374,21 @@ struct acpi_device {
 	void (*remove)(struct acpi_device *);
 };
 
+static inline bool is_acpi_node(struct fwnode_handle *fwnode)
+{
+	return fwnode && fwnode->type == FWNODE_ACPI;
+}
+
+static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode)
+{
+	return fwnode ? container_of(fwnode, struct acpi_device, fwnode) : NULL;
+}
+
+static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev)
+{
+	return &adev->fwnode;
+}
+
 static inline void *acpi_driver_data(struct acpi_device *d)
 {
 	return d->driver_data;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 38296d686c55..5b8802216a93 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -440,6 +440,23 @@ struct platform_device *acpi_create_platform_device(struct acpi_device *);
 #define ACPI_COMPANION_SET(dev, adev)	do { } while (0)
 #define ACPI_HANDLE(dev)		(NULL)
 
+struct fwnode_handle;
+
+static inline bool is_acpi_node(struct fwnode_handle *fwnode)
+{
+	return false;
+}
+
+static inline struct acpi_device *acpi_node(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
+static inline struct fwnode_handle *acpi_fwnode_handle(struct acpi_device *adev)
+{
+	return NULL;
+}
+
 static inline const char *acpi_dev_name(struct acpi_device *adev)
 {
 	return NULL;
@@ -681,6 +698,9 @@ int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname,
 			      enum dev_prop_type proptype, void *val);
 int acpi_dev_prop_read(struct acpi_device *adev, const char *propname,
 		       enum dev_prop_type proptype, void *val, size_t nval);
+
+struct acpi_device *acpi_get_next_child(struct device *dev,
+					struct acpi_device *child);
 #else
 static inline int acpi_dev_get_property(struct acpi_device *adev,
 					const char *name, acpi_object_type type,
@@ -725,6 +745,12 @@ static inline int acpi_dev_prop_read(struct acpi_device *adev,
 	return -ENXIO;
 }
 
+static inline struct acpi_device *acpi_get_next_child(struct device *dev,
+						      struct acpi_device *child)
+{
+	return NULL;
+}
+
 #endif
 
 #endif	/*_LINUX_ACPI_H*/
diff --git a/include/linux/of.h b/include/linux/of.h
index ce9f6a2b3532..cf79be1441d2 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -50,6 +50,7 @@ struct device_node {
 	const char *type;
 	phandle phandle;
 	const char *full_name;
+	struct fwnode_handle fwnode;
 
 	struct	property *properties;
 	struct	property *deadprops;	/* removed properties */
@@ -80,6 +81,7 @@ extern struct kobj_type of_node_ktype;
 static inline void of_node_init(struct device_node *node)
 {
 	kobject_init(&node->kobj, &of_node_ktype);
+	node->fwnode.type = FWNODE_OF;
 }
 
 /* true when node is initialized */
@@ -115,6 +117,16 @@ extern struct device_node *of_aliases;
 extern struct device_node *of_stdout;
 extern raw_spinlock_t devtree_lock;
 
+static inline bool is_of_node(struct fwnode_handle *fwnode)
+{
+	return fwnode && fwnode->type == FWNODE_OF;
+}
+
+static inline struct device_node *of_node(struct fwnode_handle *fwnode)
+{
+	return fwnode ? container_of(fwnode, struct device_node, fwnode) : NULL;
+}
+
 static inline bool of_have_populated_dt(void)
 {
 	return of_allnodes != NULL;
@@ -360,6 +372,16 @@ bool of_console_check(struct device_node *dn, char *name, int index);
 
 #else /* CONFIG_OF */
 
+static inline bool is_of_node(struct fwnode_handle *fwnode)
+{
+	return false;
+}
+
+static inline struct device_node *of_node(struct fwnode_handle *fwnode)
+{
+	return NULL;
+}
+
 static inline const char* of_node_full_name(const struct device_node *np)
 {
 	return "<no-node>";
diff --git a/include/linux/property.h b/include/linux/property.h
index 9242fb0221ba..a6a3d98bd7e9 100644
--- a/include/linux/property.h
+++ b/include/linux/property.h
@@ -40,6 +40,46 @@ int device_property_read_string_array(struct device *dev, const char *propname,
 int device_property_read_string(struct device *dev, const char *propname,
 				const char **val);
 
+enum fwnode_type {
+	FWNODE_INVALID = 0,
+	FWNODE_OF,
+	FWNODE_ACPI,
+};
+
+struct fwnode_handle {
+	enum fwnode_type type;
+};
+
+bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname);
+int fwnode_property_read_u8_array(struct fwnode_handle *fwnode,
+				  const char *propname, u8 *val,
+				  size_t nval);
+int fwnode_property_read_u16_array(struct fwnode_handle *fwnode,
+				   const char *propname, u16 *val,
+				   size_t nval);
+int fwnode_property_read_u32_array(struct fwnode_handle *fwnode,
+				   const char *propname, u32 *val,
+				   size_t nval);
+int fwnode_property_read_u64_array(struct fwnode_handle *fwnode,
+				   const char *propname, u64 *val,
+				   size_t nval);
+int fwnode_property_read_string_array(struct fwnode_handle *fwnode,
+				      const char *propname, const char **val,
+				      size_t nval);
+int fwnode_property_read_string(struct fwnode_handle *fwnode,
+				const char *propname, const char **val);
+
+struct fwnode_handle *device_get_next_child_node(struct device *dev,
+						 struct fwnode_handle *child);
+
+#define device_for_each_child_node(dev, child) \
+	for (child = device_get_next_child_node(dev, NULL); child; \
+	     child = device_get_next_child_node(dev, child))
+
+void fwnode_handle_put(struct fwnode_handle *fwnode);
+
+unsigned int device_get_child_node_count(struct device *dev);
+
 static inline bool device_property_read_bool(struct device *dev,
 					     const char *propname)
 {
@@ -70,4 +110,34 @@ static inline int device_property_read_u64(struct device *dev,
 	return device_property_read_u64_array(dev, propname, val, 1);
 }
 
+static inline bool fwnode_property_read_bool(struct fwnode_handle *fwnode,
+					     const char *propname)
+{
+	return fwnode_property_present(fwnode, propname);
+}
+
+static inline int fwnode_property_read_u8(struct fwnode_handle *fwnode,
+					  const char *propname, u8 *val)
+{
+	return fwnode_property_read_u8_array(fwnode, propname, val, 1);
+}
+
+static inline int fwnode_property_read_u16(struct fwnode_handle *fwnode,
+					   const char *propname, u16 *val)
+{
+	return fwnode_property_read_u16_array(fwnode, propname, val, 1);
+}
+
+static inline int fwnode_property_read_u32(struct fwnode_handle *fwnode,
+					   const char *propname, u32 *val)
+{
+	return fwnode_property_read_u32_array(fwnode, propname, val, 1);
+}
+
+static inline int fwnode_property_read_u64(struct fwnode_handle *fwnode,
+					   const char *propname, u64 *val)
+{
+	return fwnode_property_read_u64_array(fwnode, propname, val, 1);
+}
+
 #endif /* _LINUX_PROPERTY_H_ */
-- 
cgit v1.2.3


From 40b7318319281b1bdec804f6435f26cadd329c13 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Tue, 21 Oct 2014 13:33:59 +0200
Subject: gpio: Support for unified device properties interface

Some drivers need to deal with only firmware representation of its
GPIOs. An example would be a GPIO button array driver where each button
is described as a separate firmware node in device tree. Typically these
child nodes do not have physical representation in the Linux device
model.

In order to help device drivers to handle such firmware child nodes we
add dev[m]_get_named_gpiod_from_child() that takes a child firmware
node pointer as its second argument (the first one is the parent device
itself), finds the GPIO using whatever is the underlying firmware
method, and requests the GPIO properly.

Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/gpio/devres.c         | 32 +++++++++++++++++++++++++
 drivers/gpio/gpiolib.c        | 55 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/gpio/consumer.h |  7 ++++++
 3 files changed, 94 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/gpio/devres.c b/drivers/gpio/devres.c
index 954b9f6b0ef8..13dbd3dfc33a 100644
--- a/drivers/gpio/devres.c
+++ b/drivers/gpio/devres.c
@@ -108,6 +108,38 @@ struct gpio_desc *__must_check __devm_gpiod_get_index(struct device *dev,
 }
 EXPORT_SYMBOL(__devm_gpiod_get_index);
 
+/**
+ * devm_get_gpiod_from_child - get a GPIO descriptor from a device's child node
+ * @dev:	GPIO consumer
+ * @child:	firmware node (child of @dev)
+ *
+ * GPIO descriptors returned from this function are automatically disposed on
+ * driver detach.
+ */
+struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
+					    struct fwnode_handle *child)
+{
+	struct gpio_desc **dr;
+	struct gpio_desc *desc;
+
+	dr = devres_alloc(devm_gpiod_release, sizeof(struct gpio_desc *),
+			  GFP_KERNEL);
+	if (!dr)
+		return ERR_PTR(-ENOMEM);
+
+	desc = fwnode_get_named_gpiod(child, "gpios");
+	if (IS_ERR(desc)) {
+		devres_free(dr);
+		return desc;
+	}
+
+	*dr = desc;
+	devres_add(dev, dr);
+
+	return desc;
+}
+EXPORT_SYMBOL(devm_get_gpiod_from_child);
+
 /**
  * devm_gpiod_get_index_optional - Resource-managed gpiod_get_index_optional()
  * @dev: GPIO consumer
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 2bca0495cb46..58659dbe702a 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1734,6 +1734,61 @@ struct gpio_desc *__must_check __gpiod_get_index(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(__gpiod_get_index);
 
+/**
+ * fwnode_get_named_gpiod - obtain a GPIO from firmware node
+ * @fwnode:	handle of the firmware node
+ * @propname:	name of the firmware property representing the GPIO
+ *
+ * This function can be used for drivers that get their configuration
+ * from firmware.
+ *
+ * Function properly finds the corresponding GPIO using whatever is the
+ * underlying firmware interface and then makes sure that the GPIO
+ * descriptor is requested before it is returned to the caller.
+ *
+ * In case of error an ERR_PTR() is returned.
+ */
+struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
+					 const char *propname)
+{
+	struct gpio_desc *desc = ERR_PTR(-ENODEV);
+	bool active_low = false;
+	int ret;
+
+	if (!fwnode)
+		return ERR_PTR(-EINVAL);
+
+	if (is_of_node(fwnode)) {
+		enum of_gpio_flags flags;
+
+		desc = of_get_named_gpiod_flags(of_node(fwnode), propname, 0,
+						&flags);
+		if (!IS_ERR(desc))
+			active_low = flags & OF_GPIO_ACTIVE_LOW;
+	} else if (is_acpi_node(fwnode)) {
+		struct acpi_gpio_info info;
+
+		desc = acpi_get_gpiod_by_index(acpi_node(fwnode), propname, 0,
+					       &info);
+		if (!IS_ERR(desc))
+			active_low = info.active_low;
+	}
+
+	if (IS_ERR(desc))
+		return desc;
+
+	ret = gpiod_request(desc, NULL);
+	if (ret)
+		return ERR_PTR(ret);
+
+	/* Only value flag can be set from both DT and ACPI is active_low */
+	if (active_low)
+		set_bit(FLAG_ACTIVE_LOW, &desc->flags);
+
+	return desc;
+}
+EXPORT_SYMBOL_GPL(fwnode_get_named_gpiod);
+
 /**
  * gpiod_get_index_optional - obtain an optional GPIO from a multi-index GPIO
  *                            function
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 12f146fa6604..00b1b70d68ba 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -94,6 +94,13 @@ int gpiod_to_irq(const struct gpio_desc *desc);
 struct gpio_desc *gpio_to_desc(unsigned gpio);
 int desc_to_gpio(const struct gpio_desc *desc);
 
+/* Child properties interface */
+struct fwnode_handle;
+
+struct gpio_desc *fwnode_get_named_gpiod(struct fwnode_handle *fwnode,
+					 const char *propname);
+struct gpio_desc *devm_get_gpiod_from_child(struct device *dev,
+					    struct fwnode_handle *child);
 #else /* CONFIG_GPIOLIB */
 
 static inline struct gpio_desc *__must_check __gpiod_get(struct device *dev,
-- 
cgit v1.2.3


From f028d5242d7ecb0a1bfc80e7bd292201c8612641 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 3 Nov 2014 23:39:41 +0100
Subject: ACPI / GPIO: Driver GPIO mappings for ACPI GPIOs

Provide a way for device drivers using GPIOs described by ACPI
GpioIo resources in _CRS to tell the GPIO subsystem what names
(connection IDs) to associate with specific GPIO pins defined
in there.

To do that, a driver needs to define a mapping table as a
NULL-terminated array of struct acpi_gpio_mapping objects
that each contain a name, a pointer to an array of line data
(struct acpi_gpio_params) objects and the size of that array.

Each struct acpi_gpio_params object consists of three fields,
crs_entry_index, line_index, active_low, representing the index of
the target GpioIo()/GpioInt() resource in _CRS starting from zero,
the index of the target line in that resource starting from zero,
and the active-low flag for that line, respectively.

Next, the mapping table needs to be passed as the second
argument to acpi_dev_add_driver_gpios() that will register it with
the ACPI device object pointed to by its first argument.  That
should be done in the driver's .probe() routine.

On removal, the driver should unregister its GPIO mapping table
by calling acpi_dev_remove_driver_gpios() on the ACPI device
object where that table was previously registered.

Included are fixes from Mika Westerberg.

Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/gpio/gpiolib-acpi.c | 43 +++++++++++++++++++++++++++++++++++++++++--
 include/acpi/acpi_bus.h     |  3 +++
 include/linux/acpi.h        | 30 ++++++++++++++++++++++++++++++
 3 files changed, 74 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 8aa6ca473748..5a4d061e787e 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -287,6 +287,41 @@ void acpi_gpiochip_free_interrupts(struct gpio_chip *chip)
 	}
 }
 
+int acpi_dev_add_driver_gpios(struct acpi_device *adev,
+			      const struct acpi_gpio_mapping *gpios)
+{
+	if (adev && gpios) {
+		adev->driver_gpios = gpios;
+		return 0;
+	}
+	return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(acpi_dev_add_driver_gpios);
+
+static bool acpi_get_driver_gpio_data(struct acpi_device *adev,
+				      const char *name, int index,
+				      struct acpi_reference_args *args)
+{
+	const struct acpi_gpio_mapping *gm;
+
+	if (!adev->driver_gpios)
+		return false;
+
+	for (gm = adev->driver_gpios; gm->name; gm++)
+		if (!strcmp(name, gm->name) && gm->data && index < gm->size) {
+			const struct acpi_gpio_params *par = gm->data + index;
+
+			args->adev = adev;
+			args->args[0] = par->crs_entry_index;
+			args->args[1] = par->line_index;
+			args->args[2] = par->active_low;
+			args->nargs = 3;
+			return true;
+		}
+
+	return false;
+}
+
 struct acpi_gpio_lookup {
 	struct acpi_gpio_info info;
 	int index;
@@ -372,8 +407,12 @@ struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
 		memset(&args, 0, sizeof(args));
 		ret = acpi_dev_get_property_reference(adev, propname, NULL,
 						      index, &args);
-		if (ret)
-			return ERR_PTR(ret);
+		if (ret) {
+			bool found = acpi_get_driver_gpio_data(adev, propname,
+							       index, &args);
+			if (!found)
+				return ERR_PTR(ret);
+		}
 
 		/*
 		 * The property was found and resolved so need to
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index a361f43b1974..7d1ce40e201e 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -345,6 +345,8 @@ struct acpi_device_data {
 	const union acpi_object *of_compatible;
 };
 
+struct acpi_gpio_mapping;
+
 /* Device */
 struct acpi_device {
 	int device_type;
@@ -366,6 +368,7 @@ struct acpi_device {
 	struct acpi_scan_handler *handler;
 	struct acpi_hotplug_context *hp;
 	struct acpi_driver *driver;
+	const struct acpi_gpio_mapping *driver_gpios;
 	void *driver_data;
 	struct device dev;
 	unsigned int physical_node_count;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 5b8802216a93..0902426c4521 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -673,6 +673,36 @@ do {									\
 #endif
 #endif
 
+struct acpi_gpio_params {
+	unsigned int crs_entry_index;
+	unsigned int line_index;
+	bool active_low;
+};
+
+struct acpi_gpio_mapping {
+	const char *name;
+	const struct acpi_gpio_params *data;
+	unsigned int size;
+};
+
+#if defined(CONFIG_ACPI) && defined(CONFIG_GPIOLIB)
+int acpi_dev_add_driver_gpios(struct acpi_device *adev,
+			      const struct acpi_gpio_mapping *gpios);
+
+static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev)
+{
+	if (adev)
+		adev->driver_gpios = NULL;
+}
+#else
+static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev,
+			      const struct acpi_gpio_mapping *gpios)
+{
+	return -ENXIO;
+}
+static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) {}
+#endif
+
 /* Device properties */
 
 #define MAX_ACPI_REFERENCE_ARGS	8
-- 
cgit v1.2.3


From c673a2b4008103525a3cf21bedf15ffac37bfef0 Mon Sep 17 00:00:00 2001
From: Mika Westerberg <mika.westerberg@linux.intel.com>
Date: Fri, 31 Oct 2014 13:40:58 +0200
Subject: leds: leds-gpio: Convert gpio_blink_set() to use GPIO descriptors

Commit 21f2aae91e902aad ("leds: leds-gpio: Add support for GPIO
descriptors") already converted most of the driver to use GPIO descriptors.
What is still missing is the platform specific hook gpio_blink_set() and
board files which pass legacy GPIO numbers to this driver in platform data.

In this patch we handle the former and convert gpio_blink_set() to take
GPIO descriptor instead. In order to do this we convert the existing four
users to accept GPIO descriptor and translate it to legacy GPIO number in
the platform code. This effectively "pushes" legacy GPIO number usage from
the driver to platforms.

Also add comment to the remaining block describing that it is legacy code
path and we are getting rid of it eventually.

Suggested-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Acked-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Linus Walleij <linus.walleij@linaro.org>
Acked-by: Alexandre Courbot <acourbot@nvidia.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/mach-s3c24xx/h1940-bluetooth.c       |  4 ++--
 arch/arm/mach-s3c24xx/h1940.h                 |  4 +++-
 arch/arm/mach-s3c24xx/mach-h1940.c            |  3 ++-
 arch/arm/mach-s3c24xx/mach-rx1950.c           |  3 ++-
 arch/arm/plat-orion/gpio.c                    |  3 ++-
 arch/arm/plat-orion/include/plat/orion-gpio.h |  5 ++++-
 drivers/leds/leds-gpio.c                      | 31 +++++++++++----------------
 include/linux/leds.h                          |  2 +-
 8 files changed, 29 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-s3c24xx/h1940-bluetooth.c b/arch/arm/mach-s3c24xx/h1940-bluetooth.c
index b4d14b864367..9c8b1279a4ba 100644
--- a/arch/arm/mach-s3c24xx/h1940-bluetooth.c
+++ b/arch/arm/mach-s3c24xx/h1940-bluetooth.c
@@ -41,7 +41,7 @@ static void h1940bt_enable(int on)
 		mdelay(10);
 		gpio_set_value(S3C2410_GPH(1), 0);
 
-		h1940_led_blink_set(-EINVAL, GPIO_LED_BLINK, NULL, NULL);
+		h1940_led_blink_set(NULL, GPIO_LED_BLINK, NULL, NULL);
 	}
 	else {
 		gpio_set_value(S3C2410_GPH(1), 1);
@@ -50,7 +50,7 @@ static void h1940bt_enable(int on)
 		mdelay(10);
 		gpio_set_value(H1940_LATCH_BLUETOOTH_POWER, 0);
 
-		h1940_led_blink_set(-EINVAL, GPIO_LED_NO_BLINK_LOW, NULL, NULL);
+		h1940_led_blink_set(NULL, GPIO_LED_NO_BLINK_LOW, NULL, NULL);
 	}
 }
 
diff --git a/arch/arm/mach-s3c24xx/h1940.h b/arch/arm/mach-s3c24xx/h1940.h
index 2950cc466840..596d9f64c5b6 100644
--- a/arch/arm/mach-s3c24xx/h1940.h
+++ b/arch/arm/mach-s3c24xx/h1940.h
@@ -19,8 +19,10 @@
 #define H1940_SUSPEND_RESUMEAT		(0x30081000)
 #define H1940_SUSPEND_CHECK		(0x30080000)
 
+struct gpio_desc;
+
 extern void h1940_pm_return(void);
-extern int h1940_led_blink_set(unsigned gpio, int state,
+extern int h1940_led_blink_set(struct gpio_desc *desc, int state,
 			       unsigned long *delay_on,
 			       unsigned long *delay_off);
 
diff --git a/arch/arm/mach-s3c24xx/mach-h1940.c b/arch/arm/mach-s3c24xx/mach-h1940.c
index d35ddc1d9991..d40d4f5244c6 100644
--- a/arch/arm/mach-s3c24xx/mach-h1940.c
+++ b/arch/arm/mach-s3c24xx/mach-h1940.c
@@ -359,10 +359,11 @@ static struct platform_device h1940_battery = {
 
 static DEFINE_SPINLOCK(h1940_blink_spin);
 
-int h1940_led_blink_set(unsigned gpio, int state,
+int h1940_led_blink_set(struct gpio_desc *desc, int state,
 	unsigned long *delay_on, unsigned long *delay_off)
 {
 	int blink_gpio, check_gpio1, check_gpio2;
+	int gpio = desc ? desc_to_gpio(desc) : -EINVAL;
 
 	switch (gpio) {
 	case H1940_LATCH_LED_GREEN:
diff --git a/arch/arm/mach-s3c24xx/mach-rx1950.c b/arch/arm/mach-s3c24xx/mach-rx1950.c
index c3f2682d0c62..1d35ff375a01 100644
--- a/arch/arm/mach-s3c24xx/mach-rx1950.c
+++ b/arch/arm/mach-s3c24xx/mach-rx1950.c
@@ -250,9 +250,10 @@ static void rx1950_disable_charger(void)
 
 static DEFINE_SPINLOCK(rx1950_blink_spin);
 
-static int rx1950_led_blink_set(unsigned gpio, int state,
+static int rx1950_led_blink_set(struct gpio_desc *desc, int state,
 	unsigned long *delay_on, unsigned long *delay_off)
 {
+	int gpio = desc_to_gpio(desc);
 	int blink_gpio, check_gpio;
 
 	switch (gpio) {
diff --git a/arch/arm/plat-orion/gpio.c b/arch/arm/plat-orion/gpio.c
index b61a3bcc2fa8..b357053f40d9 100644
--- a/arch/arm/plat-orion/gpio.c
+++ b/arch/arm/plat-orion/gpio.c
@@ -306,9 +306,10 @@ EXPORT_SYMBOL(orion_gpio_set_blink);
 
 #define ORION_BLINK_HALF_PERIOD 100 /* ms */
 
-int orion_gpio_led_blink_set(unsigned gpio, int state,
+int orion_gpio_led_blink_set(struct gpio_desc *desc, int state,
 	unsigned long *delay_on, unsigned long *delay_off)
 {
+	unsigned gpio = desc_to_gpio(desc);
 
 	if (delay_on && delay_off && !*delay_on && !*delay_off)
 		*delay_on = *delay_off = ORION_BLINK_HALF_PERIOD;
diff --git a/arch/arm/plat-orion/include/plat/orion-gpio.h b/arch/arm/plat-orion/include/plat/orion-gpio.h
index e763988b04b9..e856b073a9c8 100644
--- a/arch/arm/plat-orion/include/plat/orion-gpio.h
+++ b/arch/arm/plat-orion/include/plat/orion-gpio.h
@@ -14,12 +14,15 @@
 #include <linux/init.h>
 #include <linux/types.h>
 #include <linux/irqdomain.h>
+
+struct gpio_desc;
+
 /*
  * Orion-specific GPIO API extensions.
  */
 void orion_gpio_set_unused(unsigned pin);
 void orion_gpio_set_blink(unsigned pin, int blink);
-int orion_gpio_led_blink_set(unsigned gpio, int state,
+int orion_gpio_led_blink_set(struct gpio_desc *desc, int state,
 	unsigned long *delay_on, unsigned long *delay_off);
 
 #define GPIO_INPUT_OK		(1 << 0)
diff --git a/drivers/leds/leds-gpio.c b/drivers/leds/leds-gpio.c
index edd370dbb22f..ba4698c32bb0 100644
--- a/drivers/leds/leds-gpio.c
+++ b/drivers/leds/leds-gpio.c
@@ -28,7 +28,7 @@ struct gpio_led_data {
 	u8 new_level;
 	u8 can_sleep;
 	u8 blinking;
-	int (*platform_gpio_blink_set)(unsigned gpio, int state,
+	int (*platform_gpio_blink_set)(struct gpio_desc *desc, int state,
 			unsigned long *delay_on, unsigned long *delay_off);
 };
 
@@ -38,13 +38,8 @@ static void gpio_led_work(struct work_struct *work)
 		container_of(work, struct gpio_led_data, work);
 
 	if (led_dat->blinking) {
-		int gpio = desc_to_gpio(led_dat->gpiod);
-		int level = led_dat->new_level;
-
-		if (gpiod_is_active_low(led_dat->gpiod))
-			level = !level;
-
-		led_dat->platform_gpio_blink_set(gpio, level, NULL, NULL);
+		led_dat->platform_gpio_blink_set(led_dat->gpiod,
+					led_dat->new_level, NULL, NULL);
 		led_dat->blinking = 0;
 	} else
 		gpiod_set_value_cansleep(led_dat->gpiod, led_dat->new_level);
@@ -71,13 +66,8 @@ static void gpio_led_set(struct led_classdev *led_cdev,
 		schedule_work(&led_dat->work);
 	} else {
 		if (led_dat->blinking) {
-			int gpio = desc_to_gpio(led_dat->gpiod);
-
-			if (gpiod_is_active_low(led_dat->gpiod))
-				level = !level;
-
-			led_dat->platform_gpio_blink_set(gpio, level, NULL,
-							 NULL);
+			led_dat->platform_gpio_blink_set(led_dat->gpiod, level,
+							 NULL, NULL);
 			led_dat->blinking = 0;
 		} else
 			gpiod_set_value(led_dat->gpiod, level);
@@ -89,20 +79,25 @@ static int gpio_blink_set(struct led_classdev *led_cdev,
 {
 	struct gpio_led_data *led_dat =
 		container_of(led_cdev, struct gpio_led_data, cdev);
-	int gpio = desc_to_gpio(led_dat->gpiod);
 
 	led_dat->blinking = 1;
-	return led_dat->platform_gpio_blink_set(gpio, GPIO_LED_BLINK,
+	return led_dat->platform_gpio_blink_set(led_dat->gpiod, GPIO_LED_BLINK,
 						delay_on, delay_off);
 }
 
 static int create_gpio_led(const struct gpio_led *template,
 	struct gpio_led_data *led_dat, struct device *parent,
-	int (*blink_set)(unsigned, int, unsigned long *, unsigned long *))
+	int (*blink_set)(struct gpio_desc *, int, unsigned long *,
+			 unsigned long *))
 {
 	int ret, state;
 
 	if (!template->gpiod) {
+		/*
+		 * This is the legacy code path for platform code that
+		 * still uses GPIO numbers. Ultimately we would like to get
+		 * rid of this block completely.
+		 */
 		unsigned long flags = 0;
 
 		/* skip leds that aren't available */
diff --git a/include/linux/leds.h b/include/linux/leds.h
index f3af5c4d9084..361101fef270 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -274,7 +274,7 @@ struct gpio_led_platform_data {
 #define GPIO_LED_NO_BLINK_LOW	0	/* No blink GPIO state low */
 #define GPIO_LED_NO_BLINK_HIGH	1	/* No blink GPIO state high */
 #define GPIO_LED_BLINK		2	/* Please, blink */
-	int		(*gpio_blink_set)(unsigned gpio, int state,
+	int		(*gpio_blink_set)(struct gpio_desc *desc, int state,
 					unsigned long *delay_on,
 					unsigned long *delay_off);
 };
-- 
cgit v1.2.3


From b3d208f96d6bb21247108a956dead6a028d5cdb2 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 23 Oct 2014 19:48:09 -0700
Subject: f2fs: revisit inline_data to avoid data races and potential bugs

This patch simplifies the inline_data usage with the following rule.
1. inline_data is set during the file creation.
2. If new data is requested to be written ranges out of inline_data,
 f2fs converts that inode permanently.
3. There is no cases which converts non-inline_data inode to inline_data.
4. The inline_data flag should be changed under inode page lock.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 fs/f2fs/data.c          |  89 ++++++++++-----------
 fs/f2fs/f2fs.h          |  24 +++++-
 fs/f2fs/file.c          | 110 ++++++++++++--------------
 fs/f2fs/inline.c        | 203 +++++++++++++++++++++++-------------------------
 fs/f2fs/inode.c         |  33 +++++++-
 fs/f2fs/namei.c         |   3 +
 include/linux/f2fs_fs.h |   1 +
 7 files changed, 251 insertions(+), 212 deletions(-)

(limited to 'include/linux')

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index e3788bd206d8..ceee1a69c5aa 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -737,14 +737,14 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
 static int f2fs_read_data_page(struct file *file, struct page *page)
 {
 	struct inode *inode = page->mapping->host;
-	int ret;
+	int ret = -EAGAIN;
 
 	trace_f2fs_readpage(page, DATA);
 
 	/* If the file has inline data, try to read it directly */
 	if (f2fs_has_inline_data(inode))
 		ret = f2fs_read_inline_data(inode, page);
-	else
+	if (ret == -EAGAIN)
 		ret = mpage_readpage(page, get_data_block);
 
 	return ret;
@@ -856,10 +856,11 @@ write:
 	else if (has_not_enough_free_secs(sbi, 0))
 		goto redirty_out;
 
+	err = -EAGAIN;
 	f2fs_lock_op(sbi);
-	if (f2fs_has_inline_data(inode) || f2fs_may_inline(inode))
-		err = f2fs_write_inline_data(inode, page, offset);
-	else
+	if (f2fs_has_inline_data(inode))
+		err = f2fs_write_inline_data(inode, page);
+	if (err == -EAGAIN)
 		err = do_write_data_page(page, &fio);
 	f2fs_unlock_op(sbi);
 done:
@@ -957,24 +958,14 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
 
 	f2fs_balance_fs(sbi);
 repeat:
-	err = f2fs_convert_inline_data(inode, pos + len, NULL);
-	if (err)
-		goto fail;
-
 	page = grab_cache_page_write_begin(mapping, index, flags);
 	if (!page) {
 		err = -ENOMEM;
 		goto fail;
 	}
 
-	/* to avoid latency during memory pressure */
-	unlock_page(page);
-
 	*pagep = page;
 
-	if (f2fs_has_inline_data(inode) && (pos + len) <= MAX_INLINE_DATA)
-		goto inline_data;
-
 	f2fs_lock_op(sbi);
 
 	/* check inline_data */
@@ -982,32 +973,42 @@ repeat:
 	if (IS_ERR(ipage))
 		goto unlock_fail;
 
+	set_new_dnode(&dn, inode, ipage, ipage, 0);
+
 	if (f2fs_has_inline_data(inode)) {
-		f2fs_put_page(ipage, 1);
-		f2fs_unlock_op(sbi);
-		f2fs_put_page(page, 0);
-		goto repeat;
+		if (pos + len <= MAX_INLINE_DATA) {
+			read_inline_data(page, ipage);
+			set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+			sync_inode_page(&dn);
+			goto put_next;
+		} else if (page->index == 0) {
+			err = f2fs_convert_inline_page(&dn, page);
+			if (err)
+				goto unlock_fail;
+		} else {
+			struct page *p = grab_cache_page(inode->i_mapping, 0);
+			if (!p) {
+				err = -ENOMEM;
+				goto unlock_fail;
+			}
+			err = f2fs_convert_inline_page(&dn, p);
+			f2fs_put_page(p, 1);
+			if (err)
+				goto unlock_fail;
+		}
 	}
-
-	set_new_dnode(&dn, inode, ipage, NULL, 0);
 	err = f2fs_reserve_block(&dn, index);
 	if (err)
 		goto unlock_fail;
+put_next:
 	f2fs_put_dnode(&dn);
 	f2fs_unlock_op(sbi);
 
-inline_data:
-	lock_page(page);
-	if (unlikely(page->mapping != mapping)) {
-		f2fs_put_page(page, 1);
-		goto repeat;
-	}
-
-	f2fs_wait_on_page_writeback(page, DATA);
-
 	if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
 		return 0;
 
+	f2fs_wait_on_page_writeback(page, DATA);
+
 	if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
 		unsigned start = pos & (PAGE_CACHE_SIZE - 1);
 		unsigned end = start + len;
@@ -1017,13 +1018,7 @@ inline_data:
 		goto out;
 	}
 
-	if (f2fs_has_inline_data(inode)) {
-		err = f2fs_read_inline_data(inode, page);
-		if (err) {
-			page_cache_release(page);
-			goto fail;
-		}
-	} else if (dn.data_blkaddr == NEW_ADDR) {
+	if (dn.data_blkaddr == NEW_ADDR) {
 		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
 	} else {
 		err = f2fs_submit_page_bio(sbi, page, dn.data_blkaddr,
@@ -1049,7 +1044,7 @@ out:
 
 unlock_fail:
 	f2fs_unlock_op(sbi);
-	f2fs_put_page(page, 0);
+	f2fs_put_page(page, 1);
 fail:
 	f2fs_write_failed(mapping, pos + len);
 	return err;
@@ -1102,9 +1097,12 @@ static ssize_t f2fs_direct_IO(int rw, struct kiocb *iocb,
 	size_t count = iov_iter_count(iter);
 	int err;
 
-	/* Let buffer I/O handle the inline data case. */
-	if (f2fs_has_inline_data(inode))
-		return 0;
+	/* we don't need to use inline_data strictly */
+	if (f2fs_has_inline_data(inode)) {
+		err = f2fs_convert_inline_inode(inode);
+		if (err)
+			return err;
+	}
 
 	if (check_direct_IO(inode, rw, iter, offset))
 		return 0;
@@ -1170,9 +1168,12 @@ static sector_t f2fs_bmap(struct address_space *mapping, sector_t block)
 {
 	struct inode *inode = mapping->host;
 
-	if (f2fs_has_inline_data(inode))
-		return 0;
-
+	/* we don't need to use inline_data strictly */
+	if (f2fs_has_inline_data(inode)) {
+		int err = f2fs_convert_inline_inode(inode);
+		if (err)
+			return err;
+	}
 	return generic_block_bmap(mapping, block, get_data_block);
 }
 
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index 2e9d2e3051f7..afe3022ffac5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -1101,6 +1101,7 @@ enum {
 	FI_NEED_IPU,		/* used for ipu per file */
 	FI_ATOMIC_FILE,		/* indicate atomic file */
 	FI_VOLATILE_FILE,	/* indicate volatile file */
+	FI_DATA_EXIST,		/* indicate data exists */
 };
 
 static inline void set_inode_flag(struct f2fs_inode_info *fi, int flag)
@@ -1135,6 +1136,8 @@ static inline void get_inline_info(struct f2fs_inode_info *fi,
 		set_inode_flag(fi, FI_INLINE_DATA);
 	if (ri->i_inline & F2FS_INLINE_DENTRY)
 		set_inode_flag(fi, FI_INLINE_DENTRY);
+	if (ri->i_inline & F2FS_DATA_EXIST)
+		set_inode_flag(fi, FI_DATA_EXIST);
 }
 
 static inline void set_raw_inline(struct f2fs_inode_info *fi,
@@ -1148,6 +1151,8 @@ static inline void set_raw_inline(struct f2fs_inode_info *fi,
 		ri->i_inline |= F2FS_INLINE_DATA;
 	if (is_inode_flag_set(fi, FI_INLINE_DENTRY))
 		ri->i_inline |= F2FS_INLINE_DENTRY;
+	if (is_inode_flag_set(fi, FI_DATA_EXIST))
+		ri->i_inline |= F2FS_DATA_EXIST;
 }
 
 static inline int f2fs_has_inline_xattr(struct inode *inode)
@@ -1182,6 +1187,17 @@ static inline int f2fs_has_inline_data(struct inode *inode)
 	return is_inode_flag_set(F2FS_I(inode), FI_INLINE_DATA);
 }
 
+static inline void f2fs_clear_inline_inode(struct inode *inode)
+{
+	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+	clear_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+}
+
+static inline int f2fs_exist_data(struct inode *inode)
+{
+	return is_inode_flag_set(F2FS_I(inode), FI_DATA_EXIST);
+}
+
 static inline bool f2fs_is_atomic_file(struct inode *inode)
 {
 	return is_inode_flag_set(F2FS_I(inode), FI_ATOMIC_FILE);
@@ -1590,10 +1606,12 @@ extern const struct inode_operations f2fs_special_inode_operations;
  * inline.c
  */
 bool f2fs_may_inline(struct inode *);
+void read_inline_data(struct page *, struct page *);
 int f2fs_read_inline_data(struct inode *, struct page *);
-int f2fs_convert_inline_data(struct inode *, pgoff_t, struct page *);
-int f2fs_write_inline_data(struct inode *, struct page *, unsigned int);
-void truncate_inline_data(struct inode *, u64);
+int f2fs_convert_inline_page(struct dnode_of_data *, struct page *);
+int f2fs_convert_inline_inode(struct inode *);
+int f2fs_write_inline_data(struct inode *, struct page *);
+void truncate_inline_data(struct page *, u64);
 bool recover_inline_data(struct inode *, struct page *);
 struct f2fs_dir_entry *find_in_inline_dir(struct inode *, struct qstr *,
 							struct page **);
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 402e38185b8c..832bd91922b8 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -35,35 +35,17 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
 	struct inode *inode = file_inode(vma->vm_file);
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct dnode_of_data dn;
-	struct page *ipage;
 	int err;
 
 	f2fs_balance_fs(sbi);
 
 	sb_start_pagefault(inode->i_sb);
-retry:
-	/* force to convert with normal data indices */
-	err = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, page);
-	if (err)
-		goto out;
+
+	f2fs_bug_on(sbi, f2fs_has_inline_data(inode));
 
 	/* block allocation */
 	f2fs_lock_op(sbi);
-
-	/* check inline_data */
-	ipage = get_node_page(sbi, inode->i_ino);
-	if (IS_ERR(ipage)) {
-		f2fs_unlock_op(sbi);
-		goto out;
-	}
-
-	if (f2fs_has_inline_data(inode)) {
-		f2fs_put_page(ipage, 1);
-		f2fs_unlock_op(sbi);
-		goto retry;
-	}
-
-	set_new_dnode(&dn, inode, ipage, NULL, 0);
+	set_new_dnode(&dn, inode, NULL, NULL, 0);
 	err = f2fs_reserve_block(&dn, page->index);
 	if (err) {
 		f2fs_unlock_op(sbi);
@@ -392,6 +374,15 @@ static loff_t f2fs_llseek(struct file *file, loff_t offset, int whence)
 
 static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	struct inode *inode = file_inode(file);
+
+	/* we don't need to use inline_data strictly */
+	if (f2fs_has_inline_data(inode)) {
+		int err = f2fs_convert_inline_inode(inode);
+		if (err)
+			return err;
+	}
+
 	file_accessed(file);
 	vma->vm_ops = &f2fs_file_vm_ops;
 	return 0;
@@ -433,20 +424,17 @@ void truncate_data_blocks(struct dnode_of_data *dn)
 	truncate_data_blocks_range(dn, ADDRS_PER_BLOCK);
 }
 
-static void truncate_partial_data_page(struct inode *inode, u64 from)
+static int truncate_partial_data_page(struct inode *inode, u64 from)
 {
 	unsigned offset = from & (PAGE_CACHE_SIZE - 1);
 	struct page *page;
 
-	if (f2fs_has_inline_data(inode))
-		return truncate_inline_data(inode, from);
-
 	if (!offset)
-		return;
+		return 0;
 
 	page = find_data_page(inode, from >> PAGE_CACHE_SHIFT, false);
 	if (IS_ERR(page))
-		return;
+		return 0;
 
 	lock_page(page);
 	if (unlikely(!PageUptodate(page) ||
@@ -456,9 +444,9 @@ static void truncate_partial_data_page(struct inode *inode, u64 from)
 	f2fs_wait_on_page_writeback(page, DATA);
 	zero_user(page, offset, PAGE_CACHE_SIZE - offset);
 	set_page_dirty(page);
-
 out:
 	f2fs_put_page(page, 1);
+	return 0;
 }
 
 int truncate_blocks(struct inode *inode, u64 from, bool lock)
@@ -468,33 +456,35 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
 	struct dnode_of_data dn;
 	pgoff_t free_from;
 	int count = 0, err = 0;
+	struct page *ipage;
 
 	trace_f2fs_truncate_blocks_enter(inode, from);
 
-	if (f2fs_has_inline_data(inode) || f2fs_has_inline_dentry(inode))
-		goto done;
-
 	free_from = (pgoff_t)
-			((from + blocksize - 1) >> (sbi->log_blocksize));
+		((from + blocksize - 1) >> (sbi->log_blocksize));
 
 	if (lock)
 		f2fs_lock_op(sbi);
 
-	set_new_dnode(&dn, inode, NULL, NULL, 0);
+	ipage = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(ipage)) {
+		err = PTR_ERR(ipage);
+		goto out;
+	}
+
+	if (f2fs_has_inline_data(inode)) {
+		truncate_inline_data(ipage, from);
+		update_inode(inode, ipage);
+		f2fs_put_page(ipage, 1);
+		goto out;
+	}
+
+	set_new_dnode(&dn, inode, ipage, NULL, 0);
 	err = get_dnode_of_data(&dn, free_from, LOOKUP_NODE);
 	if (err) {
 		if (err == -ENOENT)
 			goto free_next;
-		if (lock)
-			f2fs_unlock_op(sbi);
-		trace_f2fs_truncate_blocks_exit(inode, err);
-		return err;
-	}
-
-	/* writepage can convert inline_data under get_donde_of_data */
-	if (f2fs_has_inline_data(inode)) {
-		f2fs_put_dnode(&dn);
-		goto unlock_done;
+		goto out;
 	}
 
 	count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
@@ -510,12 +500,13 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
 	f2fs_put_dnode(&dn);
 free_next:
 	err = truncate_inode_blocks(inode, free_from);
-unlock_done:
+
+	/* lastly zero out the first data page */
+	if (!err)
+		err = truncate_partial_data_page(inode, from);
+out:
 	if (lock)
 		f2fs_unlock_op(sbi);
-done:
-	/* lastly zero out the first data page */
-	truncate_partial_data_page(inode, from);
 
 	trace_f2fs_truncate_blocks_exit(inode, err);
 	return err;
@@ -586,10 +577,6 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
 		return err;
 
 	if (attr->ia_valid & ATTR_SIZE) {
-		err = f2fs_convert_inline_data(inode, attr->ia_size, NULL);
-		if (err)
-			return err;
-
 		if (attr->ia_size != i_size_read(inode)) {
 			truncate_setsize(inode, attr->ia_size);
 			f2fs_truncate(inode);
@@ -690,9 +677,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
 	if (offset >= inode->i_size)
 		return ret;
 
-	ret = f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
-	if (ret)
-		return ret;
+	if (f2fs_has_inline_data(inode)) {
+		ret = f2fs_convert_inline_inode(inode);
+		if (ret)
+			return ret;
+	}
 
 	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
 	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -746,9 +735,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
 	if (ret)
 		return ret;
 
-	ret = f2fs_convert_inline_data(inode, offset + len, NULL);
-	if (ret)
-		return ret;
+	if (f2fs_has_inline_data(inode)) {
+		ret = f2fs_convert_inline_inode(inode);
+		if (ret)
+			return ret;
+	}
 
 	pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
 	pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
@@ -899,7 +890,7 @@ static int f2fs_ioc_start_atomic_write(struct file *filp)
 
 	set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
 
-	return f2fs_convert_inline_data(inode, MAX_INLINE_DATA + 1, NULL);
+	return f2fs_convert_inline_inode(inode);
 }
 
 static int f2fs_ioc_commit_atomic_write(struct file *filp)
@@ -933,7 +924,8 @@ static int f2fs_ioc_start_volatile_write(struct file *filp)
 		return -EACCES;
 
 	set_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
-	return 0;
+
+	return f2fs_convert_inline_inode(inode);
 }
 
 static int f2fs_ioc_fitrim(struct file *filp, unsigned long arg)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index d6677d6bbd89..8b6610906ea5 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -15,41 +15,26 @@
 
 bool f2fs_may_inline(struct inode *inode)
 {
-	block_t nr_blocks;
-	loff_t i_size;
-
 	if (!test_opt(F2FS_I_SB(inode), INLINE_DATA))
 		return false;
 
 	if (f2fs_is_atomic_file(inode))
 		return false;
 
-	nr_blocks = F2FS_I(inode)->i_xattr_nid ? 3 : 2;
-	if (inode->i_blocks > nr_blocks)
-		return false;
-
-	i_size = i_size_read(inode);
-	if (i_size > MAX_INLINE_DATA)
+	if (!S_ISREG(inode->i_mode))
 		return false;
 
 	return true;
 }
 
-int f2fs_read_inline_data(struct inode *inode, struct page *page)
+void read_inline_data(struct page *page, struct page *ipage)
 {
-	struct page *ipage;
 	void *src_addr, *dst_addr;
 
-	if (page->index) {
-		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
-		goto out;
-	}
+	if (PageUptodate(page))
+		return;
 
-	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
-	if (IS_ERR(ipage)) {
-		unlock_page(page);
-		return PTR_ERR(ipage);
-	}
+	f2fs_bug_on(F2FS_P_SB(page), page->index);
 
 	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
 
@@ -59,104 +44,120 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
 	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 	flush_dcache_page(page);
 	kunmap_atomic(dst_addr);
-	f2fs_put_page(ipage, 1);
-out:
 	SetPageUptodate(page);
-	unlock_page(page);
+}
 
+int f2fs_read_inline_data(struct inode *inode, struct page *page)
+{
+	struct page *ipage;
+
+	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
+	if (IS_ERR(ipage)) {
+		unlock_page(page);
+		return PTR_ERR(ipage);
+	}
+
+	if (!f2fs_has_inline_data(inode)) {
+		f2fs_put_page(ipage, 1);
+		return -EAGAIN;
+	}
+
+	if (page->index)
+		zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+	else
+		read_inline_data(page, ipage);
+
+	SetPageUptodate(page);
+	f2fs_put_page(ipage, 1);
+	unlock_page(page);
 	return 0;
 }
 
-static int __f2fs_convert_inline_data(struct inode *inode, struct page *page)
+int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
 {
-	int err = 0;
-	struct page *ipage;
-	struct dnode_of_data dn;
 	void *src_addr, *dst_addr;
 	block_t new_blk_addr;
-	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct f2fs_io_info fio = {
 		.type = DATA,
 		.rw = WRITE_SYNC | REQ_PRIO,
 	};
+	int err;
 
-	f2fs_lock_op(sbi);
-	ipage = get_node_page(sbi, inode->i_ino);
-	if (IS_ERR(ipage)) {
-		err = PTR_ERR(ipage);
-		goto out;
-	}
+	f2fs_bug_on(F2FS_I_SB(dn->inode), page->index);
 
-	/* someone else converted inline_data already */
-	if (!f2fs_has_inline_data(inode))
-		goto out;
+	if (!f2fs_exist_data(dn->inode))
+		goto clear_out;
 
-	/*
-	 * i_addr[0] is not used for inline data,
-	 * so reserving new block will not destroy inline data
-	 */
-	set_new_dnode(&dn, inode, ipage, NULL, 0);
-	err = f2fs_reserve_block(&dn, 0);
+	err = f2fs_reserve_block(dn, 0);
 	if (err)
-		goto out;
+		return err;
 
 	f2fs_wait_on_page_writeback(page, DATA);
+
+	if (PageUptodate(page))
+		goto no_update;
+
 	zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
 
 	/* Copy the whole inline data block */
-	src_addr = inline_data_addr(ipage);
+	src_addr = inline_data_addr(dn->inode_page);
 	dst_addr = kmap_atomic(page);
 	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 	kunmap_atomic(dst_addr);
 	SetPageUptodate(page);
-
+no_update:
 	/* write data page to try to make data consistent */
 	set_page_writeback(page);
-	write_data_page(page, &dn, &new_blk_addr, &fio);
-	update_extent_cache(new_blk_addr, &dn);
+
+	write_data_page(page, dn, &new_blk_addr, &fio);
+	update_extent_cache(new_blk_addr, dn);
 	f2fs_wait_on_page_writeback(page, DATA);
 
 	/* clear inline data and flag after data writeback */
-	zero_user_segment(ipage, INLINE_DATA_OFFSET,
-				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
-	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
-	stat_dec_inline_inode(inode);
-
-	sync_inode_page(&dn);
-	f2fs_put_dnode(&dn);
-out:
-	f2fs_unlock_op(sbi);
-	return err;
+	truncate_inline_data(dn->inode_page, 0);
+clear_out:
+	f2fs_clear_inline_inode(dn->inode);
+	stat_dec_inline_inode(dn->inode);
+	sync_inode_page(dn);
+	f2fs_put_dnode(dn);
+	return 0;
 }
 
-int f2fs_convert_inline_data(struct inode *inode, pgoff_t to_size,
-						struct page *page)
+int f2fs_convert_inline_inode(struct inode *inode)
 {
-	struct page *new_page = page;
-	int err;
+	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+	struct dnode_of_data dn;
+	struct page *ipage, *page;
+	int err = 0;
 
-	if (!f2fs_has_inline_data(inode))
-		return 0;
-	else if (to_size <= MAX_INLINE_DATA)
-		return 0;
+	page = grab_cache_page(inode->i_mapping, 0);
+	if (!page)
+		return -ENOMEM;
 
-	if (!page || page->index != 0) {
-		new_page = grab_cache_page(inode->i_mapping, 0);
-		if (!new_page)
-			return -ENOMEM;
+	f2fs_lock_op(sbi);
+
+	ipage = get_node_page(sbi, inode->i_ino);
+	if (IS_ERR(ipage)) {
+		f2fs_unlock_op(sbi);
+		return PTR_ERR(ipage);
 	}
 
-	err = __f2fs_convert_inline_data(inode, new_page);
-	if (!page || page->index != 0)
-		f2fs_put_page(new_page, 1);
+	set_new_dnode(&dn, inode, ipage, ipage, 0);
+
+	if (f2fs_has_inline_data(inode))
+		err = f2fs_convert_inline_page(&dn, page);
+
+	f2fs_put_dnode(&dn);
+
+	f2fs_unlock_op(sbi);
+
+	f2fs_put_page(page, 1);
 	return err;
 }
 
-int f2fs_write_inline_data(struct inode *inode,
-				struct page *page, unsigned size)
+int f2fs_write_inline_data(struct inode *inode, struct page *page)
 {
 	void *src_addr, *dst_addr;
-	struct page *ipage;
 	struct dnode_of_data dn;
 	int err;
 
@@ -164,48 +165,39 @@ int f2fs_write_inline_data(struct inode *inode,
 	err = get_dnode_of_data(&dn, 0, LOOKUP_NODE);
 	if (err)
 		return err;
-	ipage = dn.inode_page;
 
-	/* Release any data block if it is allocated */
 	if (!f2fs_has_inline_data(inode)) {
-		int count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
-		truncate_data_blocks_range(&dn, count);
-		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
-		stat_inc_inline_inode(inode);
+		f2fs_put_dnode(&dn);
+		return -EAGAIN;
 	}
 
-	f2fs_wait_on_page_writeback(ipage, NODE);
-	zero_user_segment(ipage, INLINE_DATA_OFFSET,
-				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+	f2fs_bug_on(F2FS_I_SB(inode), page->index);
+
+	f2fs_wait_on_page_writeback(dn.inode_page, NODE);
 	src_addr = kmap_atomic(page);
-	dst_addr = inline_data_addr(ipage);
-	memcpy(dst_addr, src_addr, size);
+	dst_addr = inline_data_addr(dn.inode_page);
+	memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
 	kunmap_atomic(src_addr);
 
 	set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
+	set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
 	sync_inode_page(&dn);
 	f2fs_put_dnode(&dn);
-
 	return 0;
 }
 
-void truncate_inline_data(struct inode *inode, u64 from)
+void truncate_inline_data(struct page *ipage, u64 from)
 {
-	struct page *ipage;
+	void *addr;
 
 	if (from >= MAX_INLINE_DATA)
 		return;
 
-	ipage = get_node_page(F2FS_I_SB(inode), inode->i_ino);
-	if (IS_ERR(ipage))
-		return;
-
 	f2fs_wait_on_page_writeback(ipage, NODE);
 
-	zero_user_segment(ipage, INLINE_DATA_OFFSET + from,
-				INLINE_DATA_OFFSET + MAX_INLINE_DATA);
-	set_page_dirty(ipage);
-	f2fs_put_page(ipage, 1);
+	addr = inline_data_addr(ipage);
+	memset(addr + from, 0, MAX_INLINE_DATA - from);
 }
 
 bool recover_inline_data(struct inode *inode, struct page *npage)
@@ -237,6 +229,10 @@ process_inline:
 		src_addr = inline_data_addr(npage);
 		dst_addr = inline_data_addr(ipage);
 		memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
+
+		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+		set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
 		update_inode(inode, ipage);
 		f2fs_put_page(ipage, 1);
 		return true;
@@ -245,15 +241,12 @@ process_inline:
 	if (f2fs_has_inline_data(inode)) {
 		ipage = get_node_page(sbi, inode->i_ino);
 		f2fs_bug_on(sbi, IS_ERR(ipage));
-		f2fs_wait_on_page_writeback(ipage, NODE);
-		zero_user_segment(ipage, INLINE_DATA_OFFSET,
-				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
-		clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
+		truncate_inline_data(ipage, 0);
+		f2fs_clear_inline_inode(inode);
 		update_inode(inode, ipage);
 		f2fs_put_page(ipage, 1);
 	} else if (ri && (ri->i_inline & F2FS_INLINE_DATA)) {
 		truncate_blocks(inode, 0, false);
-		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
 		goto process_inline;
 	}
 	return false;
@@ -366,8 +359,8 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
 	set_page_dirty(page);
 
 	/* clear inline dir and flag after data writeback */
-	zero_user_segment(ipage, INLINE_DATA_OFFSET,
-				 INLINE_DATA_OFFSET + MAX_INLINE_DATA);
+	truncate_inline_data(ipage, 0);
+
 	stat_dec_inline_dir(dir);
 	clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
 
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 4131e3cfd1cf..9fe110ef8cc4 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -67,12 +67,38 @@ static void __set_inode_rdev(struct inode *inode, struct f2fs_inode *ri)
 	}
 }
 
+static int __recover_inline_status(struct inode *inode, struct page *ipage)
+{
+	void *inline_data = inline_data_addr(ipage);
+	struct f2fs_inode *ri;
+	void *zbuf;
+
+	zbuf = kzalloc(MAX_INLINE_DATA, GFP_NOFS);
+	if (!zbuf)
+		return -ENOMEM;
+
+	if (!memcmp(zbuf, inline_data, MAX_INLINE_DATA)) {
+		kfree(zbuf);
+		return 0;
+	}
+	kfree(zbuf);
+
+	f2fs_wait_on_page_writeback(ipage, NODE);
+	set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
+
+	ri = F2FS_INODE(ipage);
+	set_raw_inline(F2FS_I(inode), ri);
+	set_page_dirty(ipage);
+	return 0;
+}
+
 static int do_read_inode(struct inode *inode)
 {
 	struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
 	struct f2fs_inode_info *fi = F2FS_I(inode);
 	struct page *node_page;
 	struct f2fs_inode *ri;
+	int err = 0;
 
 	/* Check if ino is within scope */
 	if (check_nid_range(sbi, inode->i_ino)) {
@@ -114,11 +140,15 @@ static int do_read_inode(struct inode *inode)
 	get_extent_info(&fi->ext, ri->i_ext);
 	get_inline_info(fi, ri);
 
+	/* check data exist */
+	if (f2fs_has_inline_data(inode) && !f2fs_exist_data(inode))
+		err = __recover_inline_status(inode, node_page);
+
 	/* get rdev by using inline_info */
 	__get_inode_rdev(inode, ri);
 
 	f2fs_put_page(node_page, 1);
-	return 0;
+	return err;
 }
 
 struct inode *f2fs_iget(struct super_block *sb, unsigned long ino)
@@ -329,6 +359,7 @@ void handle_failed_inode(struct inode *inode)
 
 	remove_inode_page(inode);
 
+	clear_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
 	clear_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
 	alloc_nid_failed(sbi, inode->i_ino);
 	f2fs_unlock_op(sbi);
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index a004a978096f..6312dd2e53f7 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -55,6 +55,8 @@ static struct inode *f2fs_new_inode(struct inode *dir, umode_t mode)
 		goto out;
 	}
 
+	if (f2fs_may_inline(inode))
+		set_inode_flag(F2FS_I(inode), FI_INLINE_DATA);
 	if (test_opt(sbi, INLINE_DENTRY) && S_ISDIR(inode->i_mode))
 		set_inode_flag(F2FS_I(inode), FI_INLINE_DENTRY);
 
@@ -133,6 +135,7 @@ static int f2fs_create(struct inode *dir, struct dentry *dentry, umode_t mode,
 
 	alloc_nid_done(sbi, ino);
 
+	stat_inc_inline_inode(inode);
 	d_instantiate(dentry, inode);
 	unlock_new_inode(inode);
 	return 0;
diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index 63f8303b79ba..cc1064f3153f 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -172,6 +172,7 @@ struct f2fs_extent {
 #define F2FS_INLINE_XATTR	0x01	/* file inline xattr flag */
 #define F2FS_INLINE_DATA	0x02	/* file inline data flag */
 #define F2FS_INLINE_DENTRY	0x04	/* file inline dentry flag */
+#define F2FS_DATA_EXIST		0x08	/* file inline data exist flag */
 
 #define MAX_INLINE_DATA		(sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
 						F2FS_INLINE_XATTR_ADDRS - 1))
-- 
cgit v1.2.3


From be138b7b0d4cbfb8a927d9bc333ceffee9908c23 Mon Sep 17 00:00:00 2001
From: Jaegeuk Kim <jaegeuk@kernel.org>
Date: Thu, 30 Oct 2014 19:01:10 -0700
Subject: f2fs: remove unnecessary macro

Let's remove unused macro.

Signed-off-by: Jaegeuk Kim <jaegeuk@kernel.org>
---
 include/linux/f2fs_fs.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h
index cc1064f3153f..87f14e90e984 100644
--- a/include/linux/f2fs_fs.h
+++ b/include/linux/f2fs_fs.h
@@ -177,10 +177,6 @@ struct f2fs_extent {
 #define MAX_INLINE_DATA		(sizeof(__le32) * (DEF_ADDRS_PER_INODE - \
 						F2FS_INLINE_XATTR_ADDRS - 1))
 
-#define INLINE_DATA_OFFSET	(PAGE_CACHE_SIZE - sizeof(struct node_footer) -\
-				sizeof(__le32) * (DEF_ADDRS_PER_INODE + \
-				DEF_NIDS_PER_INODE - 1))
-
 struct f2fs_inode {
 	__le16 i_mode;			/* file mode */
 	__u8 i_advise;			/* file hints */
-- 
cgit v1.2.3


From e1ccbbc9d5aa01a6c1c9c78acea6515db4f1be71 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Tue, 14 Oct 2014 16:34:47 +0200
Subject: efi: dmi: add support for SMBIOS 3.0 UEFI configuration table

This adds support to the UEFI side for detecting the presence of
a SMBIOS 3.0 64-bit entry point. This allows the actual SMBIOS
structure table to reside at a physical offset over 4 GB, which
cannot be supported by the legacy SMBIOS 32-bit entry point.

Since the firmware can legally provide both entry points, store
the SMBIOS 3.0 entry point in a separate variable, and let the
DMI decoding layer decide which one will be used.

Tested-by: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
Acked-by: Leif Lindholm <leif.lindholm@linaro.org>
Acked-by: Matt Fleming <matt.fleming@intel.com>
Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
---
 drivers/firmware/efi/efi.c | 4 ++++
 drivers/xen/efi.c          | 1 +
 include/linux/efi.h        | 6 +++++-
 3 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/firmware/efi/efi.c b/drivers/firmware/efi/efi.c
index 8590099ac148..9035c1b74d58 100644
--- a/drivers/firmware/efi/efi.c
+++ b/drivers/firmware/efi/efi.c
@@ -30,6 +30,7 @@ struct efi __read_mostly efi = {
 	.acpi       = EFI_INVALID_TABLE_ADDR,
 	.acpi20     = EFI_INVALID_TABLE_ADDR,
 	.smbios     = EFI_INVALID_TABLE_ADDR,
+	.smbios3    = EFI_INVALID_TABLE_ADDR,
 	.sal_systab = EFI_INVALID_TABLE_ADDR,
 	.boot_info  = EFI_INVALID_TABLE_ADDR,
 	.hcdp       = EFI_INVALID_TABLE_ADDR,
@@ -86,6 +87,8 @@ static ssize_t systab_show(struct kobject *kobj,
 		str += sprintf(str, "ACPI=0x%lx\n", efi.acpi);
 	if (efi.smbios != EFI_INVALID_TABLE_ADDR)
 		str += sprintf(str, "SMBIOS=0x%lx\n", efi.smbios);
+	if (efi.smbios3 != EFI_INVALID_TABLE_ADDR)
+		str += sprintf(str, "SMBIOS3=0x%lx\n", efi.smbios3);
 	if (efi.hcdp != EFI_INVALID_TABLE_ADDR)
 		str += sprintf(str, "HCDP=0x%lx\n", efi.hcdp);
 	if (efi.boot_info != EFI_INVALID_TABLE_ADDR)
@@ -260,6 +263,7 @@ static __initdata efi_config_table_type_t common_tables[] = {
 	{MPS_TABLE_GUID, "MPS", &efi.mps},
 	{SAL_SYSTEM_TABLE_GUID, "SALsystab", &efi.sal_systab},
 	{SMBIOS_TABLE_GUID, "SMBIOS", &efi.smbios},
+	{SMBIOS3_TABLE_GUID, "SMBIOS 3.0", &efi.smbios3},
 	{UGA_IO_PROTOCOL_GUID, "UGA", &efi.uga},
 	{NULL_GUID, NULL, NULL},
 };
diff --git a/drivers/xen/efi.c b/drivers/xen/efi.c
index 1f850c97482f..f745db270171 100644
--- a/drivers/xen/efi.c
+++ b/drivers/xen/efi.c
@@ -294,6 +294,7 @@ static const struct efi efi_xen __initconst = {
 	.acpi                     = EFI_INVALID_TABLE_ADDR,
 	.acpi20                   = EFI_INVALID_TABLE_ADDR,
 	.smbios                   = EFI_INVALID_TABLE_ADDR,
+	.smbios3                  = EFI_INVALID_TABLE_ADDR,
 	.sal_systab               = EFI_INVALID_TABLE_ADDR,
 	.boot_info                = EFI_INVALID_TABLE_ADDR,
 	.hcdp                     = EFI_INVALID_TABLE_ADDR,
diff --git a/include/linux/efi.h b/include/linux/efi.h
index 0949f9c7e872..0238d612750e 100644
--- a/include/linux/efi.h
+++ b/include/linux/efi.h
@@ -547,6 +547,9 @@ void efi_native_runtime_setup(void);
 #define SMBIOS_TABLE_GUID    \
     EFI_GUID(  0xeb9d2d31, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d )
 
+#define SMBIOS3_TABLE_GUID    \
+    EFI_GUID(  0xf2fd1544, 0x9794, 0x4a2c, 0x99, 0x2e, 0xe5, 0xbb, 0xcf, 0x20, 0xe3, 0x94 )
+
 #define SAL_SYSTEM_TABLE_GUID    \
     EFI_GUID(  0xeb9d2d32, 0x2d88, 0x11d3, 0x9a, 0x16, 0x0, 0x90, 0x27, 0x3f, 0xc1, 0x4d )
 
@@ -810,7 +813,8 @@ extern struct efi {
 	unsigned long mps;		/* MPS table */
 	unsigned long acpi;		/* ACPI table  (IA64 ext 0.71) */
 	unsigned long acpi20;		/* ACPI table  (ACPI 2.0) */
-	unsigned long smbios;		/* SM BIOS table */
+	unsigned long smbios;		/* SMBIOS table (32 bit entry point) */
+	unsigned long smbios3;		/* SMBIOS table (64 bit entry point) */
 	unsigned long sal_systab;	/* SAL system table */
 	unsigned long boot_info;	/* boot info table */
 	unsigned long hcdp;		/* HCDP table */
-- 
cgit v1.2.3


From 6af91949ab7462d0917f436820c263ae9a89322c Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Wed, 6 Aug 2014 18:16:58 -0700
Subject: mtd: m25p80: drop wait-till-ready checks

spi-nor.c should be taking care of these now.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Marek Vasut <marex@denx.de>
---
 drivers/mtd/devices/m25p80.c | 11 -----------
 include/linux/mtd/spi-nor.h  |  6 ++++++
 2 files changed, 6 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/devices/m25p80.c b/drivers/mtd/devices/m25p80.c
index 5d4482512e36..35e7e9896b17 100644
--- a/drivers/mtd/devices/m25p80.c
+++ b/drivers/mtd/devices/m25p80.c
@@ -129,16 +129,10 @@ static int m25p80_read(struct spi_nor *nor, loff_t from, size_t len,
 	struct spi_transfer t[2];
 	struct spi_message m;
 	unsigned int dummy = nor->read_dummy;
-	int ret;
 
 	/* convert the dummy cycles to the number of bytes */
 	dummy /= 8;
 
-	/* Wait till previous write/erase is done. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
-
 	spi_message_init(&m);
 	memset(t, 0, (sizeof t));
 
@@ -168,11 +162,6 @@ static int m25p80_erase(struct spi_nor *nor, loff_t offset)
 	dev_dbg(nor->dev, "%dKiB at 0x%08x\n",
 		flash->mtd.erasesize / 1024, (u32)offset);
 
-	/* Wait until finished previous write command. */
-	ret = nor->wait_till_ready(nor);
-	if (ret)
-		return ret;
-
 	/* Send write enable, then erase commands. */
 	ret = nor->write_reg(nor, SPINOR_OP_WREN, NULL, 0, 0);
 	if (ret)
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index 2f27713b3ae1..d691025d9b00 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -116,6 +116,10 @@ enum spi_nor_ops {
 	SPI_NOR_OPS_UNLOCK,
 };
 
+enum spi_nor_option_flags {
+	SNOR_F_USE_FSR		= BIT(0),
+};
+
 /**
  * struct spi_nor - Structure for defining a the SPI NOR layer
  * @mtd:		point to a mtd_info structure
@@ -129,6 +133,7 @@ enum spi_nor_ops {
  * @program_opcode:	the program opcode
  * @flash_read:		the mode of the read
  * @sst_write_second:	used by the SST write operation
+ * @flags:		flag options for the current SPI-NOR (SNOR_F_*)
  * @cfg:		used by the read_xfer/write_xfer
  * @cmd_buf:		used by the write_reg
  * @prepare:		[OPTIONAL] do some preparations for the
@@ -158,6 +163,7 @@ struct spi_nor {
 	u8			program_opcode;
 	enum read_mode		flash_read;
 	bool			sst_write_second;
+	u32			flags;
 	struct spi_nor_xfer_cfg	cfg;
 	u8			cmd_buf[SPI_NOR_MAX_CMD_SIZE];
 
-- 
cgit v1.2.3


From b94ed087744c5bc287d2ed706b476693df07151d Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Wed, 6 Aug 2014 18:17:00 -0700
Subject: mtd: spi-nor: drop replaceable wait-till-ready function pointer

We don't need to expose a 'wait-till-ready' interface to drivers. Status
register polling should be handled by the core spi-nor.c library, and as
of now, I see no need to provide a special driver-specific hook for it.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
Reviewed-by: Marek Vasut <marex@denx.de>
---
 drivers/mtd/spi-nor/spi-nor.c | 28 ++++++++++------------------
 include/linux/mtd/spi-nor.h   |  2 --
 2 files changed, 10 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c
index 93cd1043ef4f..bfb67f1e2b7d 100644
--- a/drivers/mtd/spi-nor/spi-nor.c
+++ b/drivers/mtd/spi-nor/spi-nor.c
@@ -195,6 +195,10 @@ static int spi_nor_ready(struct spi_nor *nor)
 	return sr && fsr;
 }
 
+/*
+ * Service routine to read status register until ready, or timeout occurs.
+ * Returns non-zero if error.
+ */
 static int spi_nor_wait_till_ready(struct spi_nor *nor)
 {
 	unsigned long deadline;
@@ -215,15 +219,6 @@ static int spi_nor_wait_till_ready(struct spi_nor *nor)
 	return -ETIMEDOUT;
 }
 
-/*
- * Service routine to read status register until ready, or timeout occurs.
- * Returns non-zero if error.
- */
-static int wait_till_ready(struct spi_nor *nor)
-{
-	return nor->wait_till_ready(nor);
-}
-
 /*
  * Erase the whole flash memory
  *
@@ -708,7 +703,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 		/* write one byte. */
 		nor->write(nor, to, 1, retlen, buf);
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 	}
@@ -720,7 +715,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 
 		/* write two bytes. */
 		nor->write(nor, to, 2, retlen, buf + actual);
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 		to += 2;
@@ -729,7 +724,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 	nor->sst_write_second = false;
 
 	write_disable(nor);
-	ret = wait_till_ready(nor);
+	ret = spi_nor_wait_till_ready(nor);
 	if (ret)
 		goto time_out;
 
@@ -740,7 +735,7 @@ static int sst_write(struct mtd_info *mtd, loff_t to, size_t len,
 		nor->program_opcode = SPINOR_OP_BP;
 		nor->write(nor, to, 1, retlen, buf + actual);
 
-		ret = wait_till_ready(nor);
+		ret = spi_nor_wait_till_ready(nor);
 		if (ret)
 			goto time_out;
 		write_disable(nor);
@@ -786,7 +781,7 @@ static int spi_nor_write(struct mtd_info *mtd, loff_t to, size_t len,
 			if (page_size > nor->page_size)
 				page_size = nor->page_size;
 
-			ret = wait_till_ready(nor);
+			ret = spi_nor_wait_till_ready(nor);
 			if (ret)
 				goto write_err;
 
@@ -812,7 +807,7 @@ static int macronix_quad_enable(struct spi_nor *nor)
 	nor->cmd_buf[0] = val | SR_QUAD_EN_MX;
 	nor->write_reg(nor, SPINOR_OP_WRSR, nor->cmd_buf, 1, 0);
 
-	if (wait_till_ready(nor))
+	if (spi_nor_wait_till_ready(nor))
 		return 1;
 
 	ret = read_sr(nor);
@@ -892,9 +887,6 @@ static int spi_nor_check(struct spi_nor *nor)
 		return -EINVAL;
 	}
 
-	if (!nor->wait_till_ready)
-		nor->wait_till_ready = spi_nor_wait_till_ready;
-
 	return 0;
 }
 
diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h
index d691025d9b00..63aeccf9ddc8 100644
--- a/include/linux/mtd/spi-nor.h
+++ b/include/linux/mtd/spi-nor.h
@@ -144,7 +144,6 @@ enum spi_nor_option_flags {
  * @write_xfer:		[OPTIONAL] the writefundamental primitive
  * @read_reg:		[DRIVER-SPECIFIC] read out the register
  * @write_reg:		[DRIVER-SPECIFIC] write data to the register
- * @wait_till_ready:	[REPLACEABLE] wait till the NOR becomes ready
  * @read:		[DRIVER-SPECIFIC] read data from the SPI NOR
  * @write:		[DRIVER-SPECIFIC] write data to the SPI NOR
  * @erase:		[DRIVER-SPECIFIC] erase a sector of the SPI NOR
@@ -176,7 +175,6 @@ struct spi_nor {
 	int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len);
 	int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len,
 			int write_enable);
-	int (*wait_till_ready)(struct spi_nor *nor);
 
 	int (*read)(struct spi_nor *nor, loff_t from,
 			size_t len, size_t *retlen, u_char *read_buf);
-- 
cgit v1.2.3


From 9162c6579bf90b3f5ddb7e3a6c6fa946c1b4cbeb Mon Sep 17 00:00:00 2001
From: Hannes Reinecke <hare@suse.de>
Date: Wed, 5 Nov 2014 13:08:21 +0100
Subject: libata: Implement ATA_DEV_ZAC

Add new ATA device type for ZAC devices.

Acked-by: Christoph Hellwig <hch@lst.de>
Acked-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 drivers/ata/libata-core.c      | 20 ++++++++++++++------
 drivers/ata/libata-eh.c        |  7 +++++--
 drivers/ata/libata-scsi.c      |  5 +++--
 drivers/ata/libata-transport.c |  1 +
 include/linux/libata.h         |  6 ++++--
 5 files changed, 27 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/ata/libata-core.c b/drivers/ata/libata-core.c
index c5ba15af87d3..5c84fb5c3372 100644
--- a/drivers/ata/libata-core.c
+++ b/drivers/ata/libata-core.c
@@ -1043,8 +1043,8 @@ const char *sata_spd_string(unsigned int spd)
  *	None.
  *
  *	RETURNS:
- *	Device type, %ATA_DEV_ATA, %ATA_DEV_ATAPI, %ATA_DEV_PMP or
- *	%ATA_DEV_UNKNOWN the event of failure.
+ *	Device type, %ATA_DEV_ATA, %ATA_DEV_ATAPI, %ATA_DEV_PMP,
+ *	%ATA_DEV_ZAC, or %ATA_DEV_UNKNOWN the event of failure.
  */
 unsigned int ata_dev_classify(const struct ata_taskfile *tf)
 {
@@ -1089,6 +1089,11 @@ unsigned int ata_dev_classify(const struct ata_taskfile *tf)
 		return ATA_DEV_SEMB;
 	}
 
+	if ((tf->lbam == 0xcd) && (tf->lbah == 0xab)) {
+		DPRINTK("found ZAC device by sig\n");
+		return ATA_DEV_ZAC;
+	}
+
 	DPRINTK("unknown device\n");
 	return ATA_DEV_UNKNOWN;
 }
@@ -1329,7 +1334,7 @@ static int ata_hpa_resize(struct ata_device *dev)
 	int rc;
 
 	/* do we need to do it? */
-	if (dev->class != ATA_DEV_ATA ||
+	if ((dev->class != ATA_DEV_ATA && dev->class != ATA_DEV_ZAC) ||
 	    !ata_id_has_lba(dev->id) || !ata_id_hpa_enabled(dev->id) ||
 	    (dev->horkage & ATA_HORKAGE_BROKEN_HPA))
 		return 0;
@@ -1889,6 +1894,7 @@ retry:
 	case ATA_DEV_SEMB:
 		class = ATA_DEV_ATA;	/* some hard drives report SEMB sig */
 	case ATA_DEV_ATA:
+	case ATA_DEV_ZAC:
 		tf.command = ATA_CMD_ID_ATA;
 		break;
 	case ATA_DEV_ATAPI:
@@ -1980,7 +1986,7 @@ retry:
 	rc = -EINVAL;
 	reason = "device reports invalid type";
 
-	if (class == ATA_DEV_ATA) {
+	if (class == ATA_DEV_ATA || class == ATA_DEV_ZAC) {
 		if (!ata_id_is_ata(id) && !ata_id_is_cfa(id))
 			goto err_out;
 		if (ap->host->flags & ATA_HOST_IGNORE_ATA &&
@@ -2015,7 +2021,8 @@ retry:
 			goto retry;
 	}
 
-	if ((flags & ATA_READID_POSTRESET) && class == ATA_DEV_ATA) {
+	if ((flags & ATA_READID_POSTRESET) &&
+	    (class == ATA_DEV_ATA || class == ATA_DEV_ZAC)) {
 		/*
 		 * The exact sequence expected by certain pre-ATA4 drives is:
 		 * SRST RESET
@@ -2280,7 +2287,7 @@ int ata_dev_configure(struct ata_device *dev)
 			sizeof(modelbuf));
 
 	/* ATA-specific feature tests */
-	if (dev->class == ATA_DEV_ATA) {
+	if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) {
 		if (ata_id_is_cfa(id)) {
 			/* CPRM may make this media unusable */
 			if (id[ATA_ID_CFA_KEY_MGMT] & 1)
@@ -4033,6 +4040,7 @@ int ata_dev_revalidate(struct ata_device *dev, unsigned int new_class,
 	if (ata_class_enabled(new_class) &&
 	    new_class != ATA_DEV_ATA &&
 	    new_class != ATA_DEV_ATAPI &&
+	    new_class != ATA_DEV_ZAC &&
 	    new_class != ATA_DEV_SEMB) {
 		ata_dev_info(dev, "class mismatch %u != %u\n",
 			     dev->class, new_class);
diff --git a/drivers/ata/libata-eh.c b/drivers/ata/libata-eh.c
index dad83df555c4..3dbec8954c86 100644
--- a/drivers/ata/libata-eh.c
+++ b/drivers/ata/libata-eh.c
@@ -1809,6 +1809,7 @@ static unsigned int ata_eh_analyze_tf(struct ata_queued_cmd *qc,
 
 	switch (qc->dev->class) {
 	case ATA_DEV_ATA:
+	case ATA_DEV_ZAC:
 		if (err & ATA_ICRC)
 			qc->err_mask |= AC_ERR_ATA_BUS;
 		if (err & (ATA_UNC | ATA_AMNF))
@@ -3792,7 +3793,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 				struct ata_eh_context *ehc = &link->eh_context;
 				unsigned long tmp;
 
-				if (dev->class != ATA_DEV_ATA)
+				if (dev->class != ATA_DEV_ATA &&
+				    dev->class != ATA_DEV_ZAC)
 					continue;
 				if (!(ehc->i.dev_action[dev->devno] &
 				      ATA_EH_PARK))
@@ -3873,7 +3875,8 @@ int ata_eh_recover(struct ata_port *ap, ata_prereset_fn_t prereset,
 
 		/* retry flush if necessary */
 		ata_for_each_dev(dev, link, ALL) {
-			if (dev->class != ATA_DEV_ATA)
+			if (dev->class != ATA_DEV_ATA &&
+			    dev->class != ATA_DEV_ZAC)
 				continue;
 			rc = ata_eh_maybe_retry_flush(dev);
 			if (rc)
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index 0586f66d70fa..bea6e7f4ebf0 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -235,7 +235,8 @@ static ssize_t ata_scsi_park_store(struct device *device,
 		rc = -ENODEV;
 		goto unlock;
 	}
-	if (dev->class != ATA_DEV_ATA) {
+	if (dev->class != ATA_DEV_ATA &&
+	    dev->class != ATA_DEV_ZAC) {
 		rc = -EOPNOTSUPP;
 		goto unlock;
 	}
@@ -3412,7 +3413,7 @@ static inline int __ata_scsi_queuecmd(struct scsi_cmnd *scmd,
 	ata_xlat_func_t xlat_func;
 	int rc = 0;
 
-	if (dev->class == ATA_DEV_ATA) {
+	if (dev->class == ATA_DEV_ATA || dev->class == ATA_DEV_ZAC) {
 		if (unlikely(!scmd->cmd_len || scmd->cmd_len > dev->cdb_len))
 			goto bad_cdb_len;
 
diff --git a/drivers/ata/libata-transport.c b/drivers/ata/libata-transport.c
index e37413228228..3227b7c8a05f 100644
--- a/drivers/ata/libata-transport.c
+++ b/drivers/ata/libata-transport.c
@@ -143,6 +143,7 @@ static struct {
 	{ ATA_DEV_PMP_UNSUP,		"pmp" },
 	{ ATA_DEV_SEMB,			"semb" },
 	{ ATA_DEV_SEMB_UNSUP,		"semb" },
+	{ ATA_DEV_ZAC,			"zac" },
 	{ ATA_DEV_NONE,			"none" }
 };
 ata_bitfield_name_search(class, ata_class_names)
diff --git a/include/linux/libata.h b/include/linux/libata.h
index bd5fefeaf548..a26daea7f5eb 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -191,7 +191,8 @@ enum {
 	ATA_DEV_PMP_UNSUP	= 6,	/* SATA port multiplier (unsupported) */
 	ATA_DEV_SEMB		= 7,	/* SEMB */
 	ATA_DEV_SEMB_UNSUP	= 8,	/* SEMB (unsupported) */
-	ATA_DEV_NONE		= 9,	/* no device */
+	ATA_DEV_ZAC		= 9,	/* ZAC device */
+	ATA_DEV_NONE		= 10,	/* no device */
 
 	/* struct ata_link flags */
 	ATA_LFLAG_NO_HRST	= (1 << 1), /* avoid hardreset */
@@ -1491,7 +1492,8 @@ static inline unsigned int ata_tag_internal(unsigned int tag)
 static inline unsigned int ata_class_enabled(unsigned int class)
 {
 	return class == ATA_DEV_ATA || class == ATA_DEV_ATAPI ||
-		class == ATA_DEV_PMP || class == ATA_DEV_SEMB;
+		class == ATA_DEV_PMP || class == ATA_DEV_SEMB ||
+		class == ATA_DEV_ZAC;
 }
 
 static inline unsigned int ata_class_disabled(unsigned int class)
-- 
cgit v1.2.3


From 68c4a4f8abc60c9440ede9cd123d48b78325f7a3 Mon Sep 17 00:00:00 2001
From: Sebastian Schmidt <yath@yath.de>
Date: Sun, 19 Oct 2014 20:05:15 +0200
Subject: pstore: Honor dmesg_restrict sysctl on dmesg dumps

When the kernel.dmesg_restrict restriction is in place, only users with
CAP_SYSLOG should be able to access crash dumps (like: attacker is
trying to exploit a bug, watchdog reboots, attacker can happily read
crash dumps and logs).

This puts the restriction on console-* types as well as sensitive
information could have been leaked there.

Other log types are unaffected.

Signed-off-by: Sebastian Schmidt <yath@yath.de>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 fs/pstore/inode.c      | 22 ++++++++++++++++++++++
 include/linux/syslog.h |  1 +
 kernel/printk/printk.c |  2 +-
 3 files changed, 24 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index fafb7a02a5d6..50416602774d 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -36,6 +36,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <linux/uaccess.h>
+#include <linux/syslog.h>
 
 #include "internal.h"
 
@@ -120,6 +121,18 @@ static const struct seq_operations pstore_ftrace_seq_ops = {
 	.show	= pstore_ftrace_seq_show,
 };
 
+static int pstore_check_syslog_permissions(struct pstore_private *ps)
+{
+	switch (ps->type) {
+	case PSTORE_TYPE_DMESG:
+	case PSTORE_TYPE_CONSOLE:
+		return check_syslog_permissions(SYSLOG_ACTION_READ_ALL,
+			SYSLOG_FROM_READER);
+	default:
+		return 0;
+	}
+}
+
 static ssize_t pstore_file_read(struct file *file, char __user *userbuf,
 						size_t count, loff_t *ppos)
 {
@@ -138,6 +151,10 @@ static int pstore_file_open(struct inode *inode, struct file *file)
 	int err;
 	const struct seq_operations *sops = NULL;
 
+	err = pstore_check_syslog_permissions(ps);
+	if (err)
+		return err;
+
 	if (ps->type == PSTORE_TYPE_FTRACE)
 		sops = &pstore_ftrace_seq_ops;
 
@@ -174,6 +191,11 @@ static const struct file_operations pstore_file_operations = {
 static int pstore_unlink(struct inode *dir, struct dentry *dentry)
 {
 	struct pstore_private *p = dentry->d_inode->i_private;
+	int err;
+
+	err = pstore_check_syslog_permissions(p);
+	if (err)
+		return err;
 
 	if (p->psi->erase)
 		p->psi->erase(p->type, p->id, p->count,
diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 98a3153c0f96..9def5297dbb7 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -48,5 +48,6 @@
 #define SYSLOG_FROM_PROC             1
 
 int do_syslog(int type, char __user *buf, int count, bool from_file);
+int check_syslog_permissions(int type, bool from_file);
 
 #endif /* _LINUX_SYSLOG_H */
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ced2b84b1cb7..c8755e7e1dba 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -480,7 +480,7 @@ static int syslog_action_restricted(int type)
 	       type != SYSLOG_ACTION_SIZE_BUFFER;
 }
 
-static int check_syslog_permissions(int type, bool from_file)
+int check_syslog_permissions(int type, bool from_file)
 {
 	/*
 	 * If this is from /proc/kmsg and we've already opened it, then we've
-- 
cgit v1.2.3


From a3816ab0e8fe542a89a53b82506a8ddac063fbe3 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 29 Sep 2014 16:08:25 -0700
Subject: fs: Convert show_fdinfo functions to void

seq_printf functions shouldn't really check the return value.
Checking seq_has_overflowed() occasionally is used instead.

Update vfs documentation.

Link: http://lkml.kernel.org/p/e37e6e7b76acbdcc3bb4ab2a57c8f8ca1ae11b9a.1412031505.git.joe@perches.com

Cc: David S. Miller <davem@davemloft.net>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Joe Perches <joe@perches.com>
[ did a few clean ups ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/filesystems/vfs.txt |  2 +-
 drivers/net/tun.c                 |  4 +-
 fs/eventfd.c                      |  9 ++---
 fs/eventpoll.c                    | 13 +++----
 fs/notify/fdinfo.c                | 78 ++++++++++++++++-----------------------
 fs/notify/fdinfo.h                |  4 +-
 fs/proc/fd.c                      |  3 +-
 fs/signalfd.c                     |  4 +-
 fs/timerfd.c                      | 27 +++++++-------
 include/linux/fs.h                |  2 +-
 10 files changed, 63 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index fceff7c00a3c..af1dbc1823bb 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -828,7 +828,7 @@ struct file_operations {
 	ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
 	int (*setlease)(struct file *, long arg, struct file_lock **, void **);
 	long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
-	int (*show_fdinfo)(struct seq_file *m, struct file *f);
+	void (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
 
 Again, all methods are called without any locks being held, unless
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 186ce541c657..a3420e091689 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -2209,7 +2209,7 @@ static int tun_chr_close(struct inode *inode, struct file *file)
 }
 
 #ifdef CONFIG_PROC_FS
-static int tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
+static void tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct tun_struct *tun;
 	struct ifreq ifr;
@@ -2225,7 +2225,7 @@ static int tun_chr_show_fdinfo(struct seq_file *m, struct file *f)
 	if (tun)
 		tun_put(tun);
 
-	return seq_printf(m, "iff:\t%s\n", ifr.ifr_name);
+	seq_printf(m, "iff:\t%s\n", ifr.ifr_name);
 }
 #endif
 
diff --git a/fs/eventfd.c b/fs/eventfd.c
index d6a88e7812f3..4b0a226024fa 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -287,17 +287,14 @@ static ssize_t eventfd_write(struct file *file, const char __user *buf, size_t c
 }
 
 #ifdef CONFIG_PROC_FS
-static int eventfd_show_fdinfo(struct seq_file *m, struct file *f)
+static void eventfd_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct eventfd_ctx *ctx = f->private_data;
-	int ret;
 
 	spin_lock_irq(&ctx->wqh.lock);
-	ret = seq_printf(m, "eventfd-count: %16llx\n",
-			 (unsigned long long)ctx->count);
+	seq_printf(m, "eventfd-count: %16llx\n",
+		   (unsigned long long)ctx->count);
 	spin_unlock_irq(&ctx->wqh.lock);
-
-	return ret;
 }
 #endif
 
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index 7bcfff900f05..d77f94491352 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -870,25 +870,22 @@ static unsigned int ep_eventpoll_poll(struct file *file, poll_table *wait)
 }
 
 #ifdef CONFIG_PROC_FS
-static int ep_show_fdinfo(struct seq_file *m, struct file *f)
+static void ep_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct eventpoll *ep = f->private_data;
 	struct rb_node *rbp;
-	int ret = 0;
 
 	mutex_lock(&ep->mtx);
 	for (rbp = rb_first(&ep->rbr); rbp; rbp = rb_next(rbp)) {
 		struct epitem *epi = rb_entry(rbp, struct epitem, rbn);
 
-		ret = seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
-				 epi->ffd.fd, epi->event.events,
-				 (long long)epi->event.data);
-		if (ret)
+		seq_printf(m, "tfd: %8d events: %8x data: %16llx\n",
+			   epi->ffd.fd, epi->event.events,
+			   (long long)epi->event.data);
+		if (seq_has_overflowed(m))
 			break;
 	}
 	mutex_unlock(&ep->mtx);
-
-	return ret;
 }
 #endif
 
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 9d7e2b9659cb..6ffd220eb14d 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -20,25 +20,24 @@
 
 #if defined(CONFIG_INOTIFY_USER) || defined(CONFIG_FANOTIFY)
 
-static int show_fdinfo(struct seq_file *m, struct file *f,
-		       int (*show)(struct seq_file *m, struct fsnotify_mark *mark))
+static void show_fdinfo(struct seq_file *m, struct file *f,
+			void (*show)(struct seq_file *m,
+				     struct fsnotify_mark *mark))
 {
 	struct fsnotify_group *group = f->private_data;
 	struct fsnotify_mark *mark;
-	int ret = 0;
 
 	mutex_lock(&group->mark_mutex);
 	list_for_each_entry(mark, &group->marks_list, g_list) {
-		ret = show(m, mark);
-		if (ret)
+		show(m, mark);
+		if (seq_has_overflowed(m))
 			break;
 	}
 	mutex_unlock(&group->mark_mutex);
-	return ret;
 }
 
 #if defined(CONFIG_EXPORTFS)
-static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
+static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
 {
 	struct {
 		struct file_handle handle;
@@ -52,71 +51,62 @@ static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
 	ret = exportfs_encode_inode_fh(inode, (struct fid *)f.handle.f_handle, &size, 0);
 	if ((ret == FILEID_INVALID) || (ret < 0)) {
 		WARN_ONCE(1, "Can't encode file handler for inotify: %d\n", ret);
-		return 0;
+		return;
 	}
 
 	f.handle.handle_type = ret;
 	f.handle.handle_bytes = size * sizeof(u32);
 
-	ret = seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:",
-			 f.handle.handle_bytes, f.handle.handle_type);
+	seq_printf(m, "fhandle-bytes:%x fhandle-type:%x f_handle:",
+		   f.handle.handle_bytes, f.handle.handle_type);
 
 	for (i = 0; i < f.handle.handle_bytes; i++)
-		ret |= seq_printf(m, "%02x", (int)f.handle.f_handle[i]);
-
-	return ret;
+		seq_printf(m, "%02x", (int)f.handle.f_handle[i]);
 }
 #else
-static int show_mark_fhandle(struct seq_file *m, struct inode *inode)
+static void show_mark_fhandle(struct seq_file *m, struct inode *inode)
 {
-	return 0;
 }
 #endif
 
 #ifdef CONFIG_INOTIFY_USER
 
-static int inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 {
 	struct inotify_inode_mark *inode_mark;
 	struct inode *inode;
-	int ret = 0;
 
 	if (!(mark->flags & (FSNOTIFY_MARK_FLAG_ALIVE | FSNOTIFY_MARK_FLAG_INODE)))
-		return 0;
+		return;
 
 	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
 	inode = igrab(mark->i.inode);
 	if (inode) {
-		ret = seq_printf(m, "inotify wd:%x ino:%lx sdev:%x "
-				 "mask:%x ignored_mask:%x ",
-				 inode_mark->wd, inode->i_ino,
-				 inode->i_sb->s_dev,
-				 mark->mask, mark->ignored_mask);
-		ret |= show_mark_fhandle(m, inode);
-		ret |= seq_putc(m, '\n');
+		seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ",
+			   inode_mark->wd, inode->i_ino, inode->i_sb->s_dev,
+			   mark->mask, mark->ignored_mask);
+		show_mark_fhandle(m, inode);
+		seq_putc(m, '\n');
 		iput(inode);
 	}
-
-	return ret;
 }
 
-int inotify_show_fdinfo(struct seq_file *m, struct file *f)
+void inotify_show_fdinfo(struct seq_file *m, struct file *f)
 {
-	return show_fdinfo(m, f, inotify_fdinfo);
+	show_fdinfo(m, f, inotify_fdinfo);
 }
 
 #endif /* CONFIG_INOTIFY_USER */
 
 #ifdef CONFIG_FANOTIFY
 
-static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
+static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 {
 	unsigned int mflags = 0;
 	struct inode *inode;
-	int ret = 0;
 
 	if (!(mark->flags & FSNOTIFY_MARK_FLAG_ALIVE))
-		return 0;
+		return;
 
 	if (mark->flags & FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY)
 		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
@@ -124,26 +114,22 @@ static int fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
 		inode = igrab(mark->i.inode);
 		if (!inode)
-			goto out;
-		ret = seq_printf(m, "fanotify ino:%lx sdev:%x "
-				 "mflags:%x mask:%x ignored_mask:%x ",
-				 inode->i_ino, inode->i_sb->s_dev,
-				 mflags, mark->mask, mark->ignored_mask);
-		ret |= show_mark_fhandle(m, inode);
-		ret |= seq_putc(m, '\n');
+			return;
+		seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
+			   inode->i_ino, inode->i_sb->s_dev,
+			   mflags, mark->mask, mark->ignored_mask);
+		show_mark_fhandle(m, inode);
+		seq_putc(m, '\n');
 		iput(inode);
 	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
 		struct mount *mnt = real_mount(mark->m.mnt);
 
-		ret = seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x "
-				 "ignored_mask:%x\n", mnt->mnt_id, mflags,
-				 mark->mask, mark->ignored_mask);
+		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
+			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
 	}
-out:
-	return ret;
 }
 
-int fanotify_show_fdinfo(struct seq_file *m, struct file *f)
+void fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct fsnotify_group *group = f->private_data;
 	unsigned int flags = 0;
@@ -169,7 +155,7 @@ int fanotify_show_fdinfo(struct seq_file *m, struct file *f)
 	seq_printf(m, "fanotify flags:%x event-flags:%x\n",
 		   flags, group->fanotify_data.f_flags);
 
-	return show_fdinfo(m, f, fanotify_fdinfo);
+	show_fdinfo(m, f, fanotify_fdinfo);
 }
 
 #endif /* CONFIG_FANOTIFY */
diff --git a/fs/notify/fdinfo.h b/fs/notify/fdinfo.h
index 556afda990e9..9664c4904d6b 100644
--- a/fs/notify/fdinfo.h
+++ b/fs/notify/fdinfo.h
@@ -10,11 +10,11 @@ struct file;
 #ifdef CONFIG_PROC_FS
 
 #ifdef CONFIG_INOTIFY_USER
-extern int inotify_show_fdinfo(struct seq_file *m, struct file *f);
+void inotify_show_fdinfo(struct seq_file *m, struct file *f);
 #endif
 
 #ifdef CONFIG_FANOTIFY
-extern int fanotify_show_fdinfo(struct seq_file *m, struct file *f);
+void fanotify_show_fdinfo(struct seq_file *m, struct file *f);
 #endif
 
 #else /* CONFIG_PROC_FS */
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index e11d7c590bb0..8e5ad83b629a 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -53,7 +53,8 @@ static int seq_show(struct seq_file *m, void *v)
 			   (long long)file->f_pos, f_flags,
 			   real_mount(file->f_path.mnt)->mnt_id);
 		if (file->f_op->show_fdinfo)
-			ret = file->f_op->show_fdinfo(m, file);
+			file->f_op->show_fdinfo(m, file);
+		ret = seq_has_overflowed(m);
 		fput(file);
 	}
 
diff --git a/fs/signalfd.c b/fs/signalfd.c
index 424b7b65321f..7e412ad74836 100644
--- a/fs/signalfd.c
+++ b/fs/signalfd.c
@@ -230,7 +230,7 @@ static ssize_t signalfd_read(struct file *file, char __user *buf, size_t count,
 }
 
 #ifdef CONFIG_PROC_FS
-static int signalfd_show_fdinfo(struct seq_file *m, struct file *f)
+static void signalfd_show_fdinfo(struct seq_file *m, struct file *f)
 {
 	struct signalfd_ctx *ctx = f->private_data;
 	sigset_t sigmask;
@@ -238,8 +238,6 @@ static int signalfd_show_fdinfo(struct seq_file *m, struct file *f)
 	sigmask = ctx->sigmask;
 	signotset(&sigmask);
 	render_sigset_t(m, "sigmask:\t", &sigmask);
-
-	return 0;
 }
 #endif
 
diff --git a/fs/timerfd.c b/fs/timerfd.c
index b46ffa94372a..b94fa6c3c6eb 100644
--- a/fs/timerfd.c
+++ b/fs/timerfd.c
@@ -288,7 +288,7 @@ static ssize_t timerfd_read(struct file *file, char __user *buf, size_t count,
 }
 
 #ifdef CONFIG_PROC_FS
-static int timerfd_show(struct seq_file *m, struct file *file)
+static void timerfd_show(struct seq_file *m, struct file *file)
 {
 	struct timerfd_ctx *ctx = file->private_data;
 	struct itimerspec t;
@@ -298,18 +298,19 @@ static int timerfd_show(struct seq_file *m, struct file *file)
 	t.it_interval = ktime_to_timespec(ctx->tintv);
 	spin_unlock_irq(&ctx->wqh.lock);
 
-	return seq_printf(m,
-			  "clockid: %d\n"
-			  "ticks: %llu\n"
-			  "settime flags: 0%o\n"
-			  "it_value: (%llu, %llu)\n"
-			  "it_interval: (%llu, %llu)\n",
-			  ctx->clockid, (unsigned long long)ctx->ticks,
-			  ctx->settime_flags,
-			  (unsigned long long)t.it_value.tv_sec,
-			  (unsigned long long)t.it_value.tv_nsec,
-			  (unsigned long long)t.it_interval.tv_sec,
-			  (unsigned long long)t.it_interval.tv_nsec);
+	seq_printf(m,
+		   "clockid: %d\n"
+		   "ticks: %llu\n"
+		   "settime flags: 0%o\n"
+		   "it_value: (%llu, %llu)\n"
+		   "it_interval: (%llu, %llu)\n",
+		   ctx->clockid,
+		   (unsigned long long)ctx->ticks,
+		   ctx->settime_flags,
+		   (unsigned long long)t.it_value.tv_sec,
+		   (unsigned long long)t.it_value.tv_nsec,
+		   (unsigned long long)t.it_interval.tv_sec,
+		   (unsigned long long)t.it_interval.tv_nsec);
 }
 #else
 #define timerfd_show NULL
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a957d4366c24..01dd9052a142 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1491,7 +1491,7 @@ struct file_operations {
 	int (*setlease)(struct file *, long, struct file_lock **, void **);
 	long (*fallocate)(struct file *file, int mode, loff_t offset,
 			  loff_t len);
-	int (*show_fdinfo)(struct seq_file *m, struct file *f);
+	void (*show_fdinfo)(struct seq_file *m, struct file *f);
 };
 
 struct inode_operations {
-- 
cgit v1.2.3


From 9761536e1d9e9e1f325fb04d4ad46b15a39eb94a Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 29 Sep 2014 16:08:26 -0700
Subject: debugfs: Have debugfs_print_regs32() return void

The seq_printf() will soon just return void, and seq_has_overflowed()
should be used instead to see if the seq can no longer accept input.

As the return value of debugfs_print_regs32() has no users and
the seq_file descriptor should be checked with seq_has_overflowed()
instead of return values of functions, it is better to just have
debugfs_print_regs32() also return void.

Link: http://lkml.kernel.org/p/2634b19eb1c04a9d31148c1fe6f1f3819be95349.1412031505.git.joe@perches.com

Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Joe Perches <joe@perches.com>
[ original change only updated seq_printf() return, added return of
  void to debugfs_print_regs32() as well ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/filesystems/debugfs.txt |  2 +-
 fs/debugfs/file.c                     | 15 ++++++++-------
 include/linux/debugfs.h               |  7 +++----
 3 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/debugfs.txt b/Documentation/filesystems/debugfs.txt
index 3a863f692728..88ab81c79109 100644
--- a/Documentation/filesystems/debugfs.txt
+++ b/Documentation/filesystems/debugfs.txt
@@ -140,7 +140,7 @@ file.
 				     struct dentry *parent,
 				     struct debugfs_regset32 *regset);
 
-    int debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
+    void debugfs_print_regs32(struct seq_file *s, struct debugfs_reg32 *regs,
 			 int nregs, void __iomem *base, char *prefix);
 
 The "base" argument may be 0, but you may want to build the reg32 array
diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 76c08c2beb2f..8e0f2f410189 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -692,18 +692,19 @@ EXPORT_SYMBOL_GPL(debugfs_create_u32_array);
  * because some peripherals have several blocks of identical registers,
  * for example configuration of dma channels
  */
-int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
-			   int nregs, void __iomem *base, char *prefix)
+void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
+			  int nregs, void __iomem *base, char *prefix)
 {
-	int i, ret = 0;
+	int i;
 
 	for (i = 0; i < nregs; i++, regs++) {
 		if (prefix)
-			ret += seq_printf(s, "%s", prefix);
-		ret += seq_printf(s, "%s = 0x%08x\n", regs->name,
-				  readl(base + regs->offset));
+			seq_printf(s, "%s", prefix);
+		seq_printf(s, "%s = 0x%08x\n", regs->name,
+			   readl(base + regs->offset));
+		if (seq_has_overflowed(s))
+			break;
 	}
-	return ret;
 }
 EXPORT_SYMBOL_GPL(debugfs_print_regs32);
 
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 4d0b4d1aa132..d84f8c254a87 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -92,8 +92,8 @@ struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
 				     struct dentry *parent,
 				     struct debugfs_regset32 *regset);
 
-int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
-			 int nregs, void __iomem *base, char *prefix);
+void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
+			  int nregs, void __iomem *base, char *prefix);
 
 struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
 					struct dentry *parent,
@@ -233,10 +233,9 @@ static inline struct dentry *debugfs_create_regset32(const char *name,
 	return ERR_PTR(-ENODEV);
 }
 
-static inline int debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
+static inline void debugfs_print_regs32(struct seq_file *s, const struct debugfs_reg32 *regs,
 			 int nregs, void __iomem *base, char *prefix)
 {
-	return 0;
 }
 
 static inline bool debugfs_initialized(void)
-- 
cgit v1.2.3


From b4c9f578cb9608f7de0c89aa637a6af4a48061ef Mon Sep 17 00:00:00 2001
From: Jingoo Han <jg1.han@samsung.com>
Date: Tue, 4 Nov 2014 11:01:47 +0900
Subject: usb: dwc3: exynos: remove non-DT support for Exynos Specific Glue
 layer

DWC3 Exynos Specific Glue layer can be used only for Exynos SoCs.
In addition, non-DT for EXYNOS SoCs is not supported from v3.11;
thus, there is no need to support non-DT for DWC3 Exynos Specific
Glue layer.

The 'linux/platform_data/dwc3-exynos.h' file has been used for
non-DT support. Thus, the 'dwc3-exynos.h' file is removed, because
it is not used anymore.

Signed-off-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/dwc3/Kconfig                  |  2 +-
 drivers/usb/dwc3/dwc3-exynos.c            |  5 +----
 include/linux/platform_data/dwc3-exynos.h | 24 ------------------------
 3 files changed, 2 insertions(+), 29 deletions(-)
 delete mode 100644 include/linux/platform_data/dwc3-exynos.h

(limited to 'include/linux')

diff --git a/drivers/usb/dwc3/Kconfig b/drivers/usb/dwc3/Kconfig
index f4e5cc60db0b..58b5b2cde4c5 100644
--- a/drivers/usb/dwc3/Kconfig
+++ b/drivers/usb/dwc3/Kconfig
@@ -55,7 +55,7 @@ config USB_DWC3_OMAP
 
 config USB_DWC3_EXYNOS
 	tristate "Samsung Exynos Platform"
-	depends on ARCH_EXYNOS || COMPILE_TEST
+	depends on ARCH_EXYNOS && OF || COMPILE_TEST
 	default USB_DWC3
 	help
 	  Recent Exynos5 SoCs ship with one DesignWare Core USB3 IP inside,
diff --git a/drivers/usb/dwc3/dwc3-exynos.c b/drivers/usb/dwc3/dwc3-exynos.c
index 3951a65fea04..4369c6658ec8 100644
--- a/drivers/usb/dwc3/dwc3-exynos.c
+++ b/drivers/usb/dwc3/dwc3-exynos.c
@@ -20,7 +20,6 @@
 #include <linux/kernel.h>
 #include <linux/slab.h>
 #include <linux/platform_device.h>
-#include <linux/platform_data/dwc3-exynos.h>
 #include <linux/dma-mapping.h>
 #include <linux/clk.h>
 #include <linux/usb/otg.h>
@@ -205,13 +204,11 @@ static int dwc3_exynos_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_OF
 static const struct of_device_id exynos_dwc3_match[] = {
 	{ .compatible = "samsung,exynos5250-dwusb3" },
 	{},
 };
 MODULE_DEVICE_TABLE(of, exynos_dwc3_match);
-#endif
 
 #ifdef CONFIG_PM_SLEEP
 static int dwc3_exynos_suspend(struct device *dev)
@@ -266,7 +263,7 @@ static struct platform_driver dwc3_exynos_driver = {
 	.remove		= dwc3_exynos_remove,
 	.driver		= {
 		.name	= "exynos-dwc3",
-		.of_match_table = of_match_ptr(exynos_dwc3_match),
+		.of_match_table = exynos_dwc3_match,
 		.pm	= DEV_PM_OPS,
 	},
 };
diff --git a/include/linux/platform_data/dwc3-exynos.h b/include/linux/platform_data/dwc3-exynos.h
deleted file mode 100644
index 5eb7da9b3772..000000000000
--- a/include/linux/platform_data/dwc3-exynos.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/**
- * dwc3-exynos.h - Samsung EXYNOS DWC3 Specific Glue layer, header.
- *
- * Copyright (c) 2012 Samsung Electronics Co., Ltd.
- *		http://www.samsung.com
- *
- * Author: Anton Tikhomirov <av.tikhomirov@samsung.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef _DWC3_EXYNOS_H_
-#define _DWC3_EXYNOS_H_
-
-struct dwc3_exynos_data {
-	int phy_type;
-	int (*phy_init)(struct platform_device *pdev, int type);
-	int (*phy_exit)(struct platform_device *pdev, int type);
-};
-
-#endif /* _DWC3_EXYNOS_H_ */
-- 
cgit v1.2.3


From 60ba032ed76e851d30d4fa514847285252147d07 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 5 Nov 2014 00:29:07 +0100
Subject: ACPI / property: Drop size_prop from
 acpi_dev_get_property_reference()

The size_prop argument of the recently added function
acpi_dev_get_property_reference() is not used by the only current
caller of that function and is very unlikely to be used at any time
going forward.

Namely, for a property whose value is a list of items each containing
a references to a device object possibly accompanied by some integers,
the number of items in the list can always be computed as the number
of elements of type ACPI_TYPE_LOCAL_REFERENCE in the property package.
Thus it should never be necessary to provide an additional "cells"
property with a value equal to the number of items in that list.  It
also should never be necessary to provide a "cells" property specifying
how many integers are supposed to be following each reference.

For this reason, drop the size_prop argument from
acpi_dev_get_property_reference() and update its caller accordingly.

Link: http://marc.info/?l=linux-kernel&m=141511255610556&w=2
Suggested-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Tested-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/property.c     | 62 ++++++++++++---------------------------------
 drivers/gpio/gpiolib-acpi.c |  2 +-
 include/linux/acpi.h        |  4 +--
 3 files changed, 19 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/property.c b/drivers/acpi/property.c
index 27add91bc270..0d083736e25b 100644
--- a/drivers/acpi/property.c
+++ b/drivers/acpi/property.c
@@ -273,25 +273,21 @@ EXPORT_SYMBOL_GPL(acpi_dev_get_property_array);
  * acpi_dev_get_property_reference - returns handle to the referenced object
  * @adev: ACPI device to get property
  * @name: Name of the property
- * @size_prop: Name of the "size" property in referenced object
  * @index: Index of the reference to return
  * @args: Location to store the returned reference with optional arguments
  *
  * Find property with @name, verifify that it is a package containing at least
  * one object reference and if so, store the ACPI device object pointer to the
- * target object in @args->adev.
+ * target object in @args->adev.  If the reference includes arguments, store
+ * them in the @args->args[] array.
  *
- * If the reference includes arguments (@size_prop is not %NULL) follow the
- * reference and check whether or not there is an integer property @size_prop
- * under the target object and if so, whether or not its value matches the
- * number of arguments that follow the reference.  If there's more than one
- * reference in the property value package, @index is used to select the one to
- * return.
+ * If there's more than one reference in the property value package, @index is
+ * used to select the one to return.
  *
  * Return: %0 on success, negative error code on failure.
  */
-int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name,
-				    const char *size_prop, size_t index,
+int acpi_dev_get_property_reference(struct acpi_device *adev,
+				    const char *name, size_t index,
 				    struct acpi_reference_args *args)
 {
 	const union acpi_object *element, *end;
@@ -308,7 +304,7 @@ int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name,
 	 * return that reference then.
 	 */
 	if (obj->type == ACPI_TYPE_LOCAL_REFERENCE) {
-		if (size_prop || index)
+		if (index)
 			return -EINVAL;
 
 		ret = acpi_bus_get_device(obj->reference.handle, &device);
@@ -348,42 +344,16 @@ int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name,
 		element++;
 		nargs = 0;
 
-		if (size_prop) {
-			const union acpi_object *prop;
-
-			/*
-			 * Find out how many arguments the refenced object
-			 * expects by reading its size_prop property.
-			 */
-			ret = acpi_dev_get_property(device, size_prop,
-						    ACPI_TYPE_INTEGER, &prop);
-			if (ret)
-				return ret;
-
-			nargs = prop->integer.value;
-			if (nargs > MAX_ACPI_REFERENCE_ARGS
-			    || element + nargs > end)
-				return -EPROTO;
+		/* assume following integer elements are all args */
+		for (i = 0; element + i < end; i++) {
+			int type = element[i].type;
 
-			/*
-			 * Skip to the start of the arguments and verify
-			 * that they all are in fact integers.
-			 */
-			for (i = 0; i < nargs; i++)
-				if (element[i].type != ACPI_TYPE_INTEGER)
-					return -EPROTO;
-		} else {
-			/* assume following integer elements are all args */
-			for (i = 0; element + i < end; i++) {
-				int type = element[i].type;
-
-				if (type == ACPI_TYPE_INTEGER)
-					nargs++;
-				else if (type == ACPI_TYPE_LOCAL_REFERENCE)
-					break;
-				else
-					return -EPROTO;
-			}
+			if (type == ACPI_TYPE_INTEGER)
+				nargs++;
+			else if (type == ACPI_TYPE_LOCAL_REFERENCE)
+				break;
+			else
+				return -EPROTO;
 		}
 
 		if (idx++ == index) {
diff --git a/drivers/gpio/gpiolib-acpi.c b/drivers/gpio/gpiolib-acpi.c
index 5a4d061e787e..ba98bb59a58f 100644
--- a/drivers/gpio/gpiolib-acpi.c
+++ b/drivers/gpio/gpiolib-acpi.c
@@ -405,7 +405,7 @@ struct gpio_desc *acpi_get_gpiod_by_index(struct acpi_device *adev,
 		dev_dbg(&adev->dev, "GPIO: looking up %s\n", propname);
 
 		memset(&args, 0, sizeof(args));
-		ret = acpi_dev_get_property_reference(adev, propname, NULL,
+		ret = acpi_dev_get_property_reference(adev, propname,
 						      index, &args);
 		if (ret) {
 			bool found = acpi_get_driver_gpio_data(adev, propname,
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 0902426c4521..10f2ed95645c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -718,8 +718,8 @@ int acpi_dev_get_property(struct acpi_device *adev, const char *name,
 int acpi_dev_get_property_array(struct acpi_device *adev, const char *name,
 				acpi_object_type type,
 				const union acpi_object **obj);
-int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name,
-				    const char *cells_name, size_t index,
+int acpi_dev_get_property_reference(struct acpi_device *adev,
+				    const char *name, size_t index,
 				    struct acpi_reference_args *args);
 
 int acpi_dev_prop_get(struct acpi_device *adev, const char *propname,
-- 
cgit v1.2.3


From 32f638fc11db0526c706454d9ab4339d55ac89f3 Mon Sep 17 00:00:00 2001
From: Yinghai Lu <yinghai@kernel.org>
Date: Thu, 30 Oct 2014 10:17:25 -0600
Subject: PCI: Don't oops on virtual buses in acpi_pci_get_bridge_handle()

acpi_pci_get_bridge_handle() returns the ACPI handle for the bridge device
(either a host bridge or a PCI-to-PCI bridge) leading to a PCI bus.  But
SR-IOV virtual functions can be on a virtual bus with no bridge leading to
it.  Return a NULL acpi_handle in this case instead of trying to
dereference the NULL pointer to the bridge.

This fixes a NULL pointer dereference oops in pci_get_hp_params() when
adding SR-IOV VF devices on virtual buses.

[bhelgaas: changelog, add comment in code]
Fixes: 6cd33649fa83 ("PCI: Add pci_configure_device() during enumeration")
Link: https://bugzilla.kernel.org/show_bug.cgi?id=87591
Reported-by: Chao Zhou <chao.zhou@intel.com>
Reported-by: Joerg Roedel <joro@8bytes.org>
Signed-off-by: Yinghai Lu <yinghai@kernel.org>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci-acpi.h | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h
index 64dacb7288a6..24c7728ca681 100644
--- a/include/linux/pci-acpi.h
+++ b/include/linux/pci-acpi.h
@@ -41,8 +41,13 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus)
 
 	if (pci_is_root_bus(pbus))
 		dev = pbus->bridge;
-	else
+	else {
+		/* If pbus is a virtual bus, there is no bridge to it */
+		if (!pbus->self)
+			return NULL;
+
 		dev = &pbus->self->dev;
+	}
 
 	return ACPI_HANDLE(dev);
 }
-- 
cgit v1.2.3


From 9cdb5dbf79f4e8f43e19ab7f4ec9ed74c146f0af Mon Sep 17 00:00:00 2001
From: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Date: Wed, 5 Nov 2014 21:44:27 +0100
Subject: include/linux/socket.h: Fix comment

File descriptors are always closed on exit :-)

Signed-off-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index ec538fc287a6..bb9b83640070 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -256,7 +256,7 @@ struct ucred {
 #define MSG_EOF         MSG_FIN
 
 #define MSG_FASTOPEN	0x20000000	/* Send data in TCP SYN */
-#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exit for file
+#define MSG_CMSG_CLOEXEC 0x40000000	/* Set close_on_exec for file
 					   descriptor received through
 					   SCM_RIGHTS */
 #if defined(CONFIG_COMPAT)
-- 
cgit v1.2.3


From 1906bbbddbe085b19be2c21cd132335260f551c3 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 5 Nov 2014 20:51:21 +0100
Subject: ieee802154: add IEEE802154_EXTENDED_ADDR_LEN

This patch adds a new define for getting the length of an extended
address.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 4c032863cd71..a907fe59b1d1 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -29,6 +29,8 @@
 #define IEEE802154_MTU			127
 #define IEEE802154_MIN_PSDU_LEN		5
 
+#define IEEE802154_EXTENDED_ADDR_LEN	8
+
 #define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
 #define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
 #define IEEE802154_FC_TYPE_ACK		0x2	/* Frame is acknowledgment */
-- 
cgit v1.2.3


From 35d5a374a559a1ba9c6810739cf3ad1d672c2de2 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 5 Nov 2014 20:51:22 +0100
Subject: ieee802154: add ieee802154_random_extended_addr

This patch adds a new function to generate a random IEEE 802.15.4
extended address.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index a907fe59b1d1..d043449a079d 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -24,6 +24,7 @@
 #define LINUX_IEEE802154_H
 
 #include <linux/types.h>
+#include <linux/random.h>
 #include <asm/byteorder.h>
 
 #define IEEE802154_MTU			127
@@ -215,4 +216,17 @@ static inline bool ieee802154_is_valid_extended_addr(const __le64 addr)
 		(addr != cpu_to_le64(0xffffffffffffffffULL)));
 }
 
+/**
+ * ieee802154_random_extended_addr - generates a random extended address
+ * @addr: extended addr pointer to place the random address
+ */
+static inline void ieee802154_random_extended_addr(__le64 *addr)
+{
+	get_random_bytes(addr, IEEE802154_EXTENDED_ADDR_LEN);
+
+	/* toggle some bit if we hit an invalid extended addr */
+	if (!ieee802154_is_valid_extended_addr(*addr))
+		((u8 *)addr)[IEEE802154_EXTENDED_ADDR_LEN - 1] ^= 0x01;
+}
+
 #endif /* LINUX_IEEE802154_H */
-- 
cgit v1.2.3


From e585f23636370320bc2071ca5ba2744ae37c3e51 Mon Sep 17 00:00:00 2001
From: Tom Herbert <therbert@google.com>
Date: Tue, 4 Nov 2014 09:06:54 -0800
Subject: udp: Changes to udp_offload to support remote checksum offload

Add a new GSO type, SKB_GSO_TUNNEL_REMCSUM, which indicates remote
checksum offload being done (in this case inner checksum must not
be offloaded to the NIC).

Added logic in __skb_udp_tunnel_segment to handle remote checksum
offload case.

Signed-off-by: Tom Herbert <therbert@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdev_features.h |  4 +++-
 include/linux/netdevice.h       |  1 +
 include/linux/skbuff.h          |  4 +++-
 net/core/skbuff.c               |  4 ++--
 net/ipv4/af_inet.c              |  1 +
 net/ipv4/tcp_offload.c          |  1 +
 net/ipv4/udp_offload.c          | 18 ++++++++++++++++--
 net/ipv6/ip6_offload.c          |  1 +
 net/ipv6/udp_offload.c          |  1 +
 9 files changed, 29 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index dcfdecbfa0b7..8c94b07e654a 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -48,8 +48,9 @@ enum {
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
 	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
+	NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
-		NETIF_F_GSO_MPLS_BIT,
+		NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
 
 	NETIF_F_FCOE_CRC_BIT,		/* FCoE CRC32 */
 	NETIF_F_SCTP_CSUM_BIT,		/* SCTP checksum offload */
@@ -119,6 +120,7 @@ enum {
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
 #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM)
 #define NETIF_F_GSO_MPLS	__NETIF_F(GSO_MPLS)
+#define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
 #define NETIF_F_HW_VLAN_STAG_TX	__NETIF_F(HW_VLAN_STAG_TX)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5ed05bd764dc..4767f546d7c0 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3584,6 +3584,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_MPLS    != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT));
+	BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
 
 	return (features & feature) == feature;
 }
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 5ad9675b6fe1..74ed34413969 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -373,6 +373,7 @@ enum {
 
 	SKB_GSO_MPLS = 1 << 12,
 
+	SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
 };
 
 #if BITS_PER_LONG > 32
@@ -603,7 +604,8 @@ struct sk_buff {
 #endif
 	__u8			ipvs_property:1;
 	__u8			inner_protocol_type:1;
-	/* 4 or 6 bit hole */
+	__u8			remcsum_offload:1;
+	/* 3 or 5 bit hole */
 
 #ifdef CONFIG_NET_SCHED
 	__u16			tc_index;	/* traffic control index */
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index e48e5c02e877..700189604f3d 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3013,7 +3013,7 @@ struct sk_buff *skb_segment(struct sk_buff *head_skb,
 		if (nskb->len == len + doffset)
 			goto perform_csum_check;
 
-		if (!sg) {
+		if (!sg && !nskb->remcsum_offload) {
 			nskb->ip_summed = CHECKSUM_NONE;
 			nskb->csum = skb_copy_and_csum_bits(head_skb, offset,
 							    skb_put(nskb, len),
@@ -3085,7 +3085,7 @@ skip_fraglist:
 		nskb->truesize += nskb->data_len;
 
 perform_csum_check:
-		if (!csum) {
+		if (!csum && !nskb->remcsum_offload) {
 			nskb->csum = skb_checksum(nskb, doffset,
 						  nskb->len - doffset, 0);
 			nskb->ip_summed = CHECKSUM_NONE;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 8b7fe5b03906..ed2c672c5b01 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1222,6 +1222,7 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_TCPV6 |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_UDP_TUNNEL_CSUM |
+		       SKB_GSO_TUNNEL_REMCSUM |
 		       SKB_GSO_MPLS |
 		       0)))
 		goto out;
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index 5b90f2f447a5..a1b2a5624f91 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -97,6 +97,7 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
 			       SKB_GSO_UDP_TUNNEL_CSUM |
+			       SKB_GSO_TUNNEL_REMCSUM |
 			       0) ||
 			     !(type & (SKB_GSO_TCPV4 | SKB_GSO_TCPV6))))
 			goto out;
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index a774711a88b9..0a5a70d0e84c 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -41,7 +41,8 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	unsigned int oldlen;
 	bool need_csum = !!(skb_shinfo(skb)->gso_type &
 			    SKB_GSO_UDP_TUNNEL_CSUM);
-	bool offload_csum = false, dont_encap = need_csum;
+	bool remcsum = !!(skb_shinfo(skb)->gso_type & SKB_GSO_TUNNEL_REMCSUM);
+	bool offload_csum = false, dont_encap = (need_csum || remcsum);
 
 	oldlen = (u16)~skb->len;
 
@@ -55,6 +56,7 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 	skb->mac_len = skb_inner_network_offset(skb);
 	skb->protocol = new_protocol;
 	skb->encap_hdr_csum = need_csum;
+	skb->remcsum_offload = remcsum;
 
 	/* Try to offload checksum if possible */
 	offload_csum = !!(need_csum &&
@@ -108,11 +110,22 @@ static struct sk_buff *__skb_udp_tunnel_segment(struct sk_buff *skb,
 		uh->check = ~csum_fold((__force __wsum)
 				       ((__force u32)uh->check +
 					(__force u32)delta));
-
 		if (offload_csum) {
 			skb->ip_summed = CHECKSUM_PARTIAL;
 			skb->csum_start = skb_transport_header(skb) - skb->head;
 			skb->csum_offset = offsetof(struct udphdr, check);
+		} else if (remcsum) {
+			/* Need to calculate checksum from scratch,
+			 * inner checksums are never when doing
+			 * remote_checksum_offload.
+			 */
+
+			skb->csum = skb_checksum(skb, udp_offset,
+						 skb->len - udp_offset,
+						 0);
+			uh->check = csum_fold(skb->csum);
+			if (uh->check == 0)
+				uh->check = CSUM_MANGLED_0;
 		} else {
 			uh->check = gso_make_checksum(skb, ~uh->check);
 
@@ -192,6 +205,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 		if (unlikely(type & ~(SKB_GSO_UDP | SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_UDP_TUNNEL_CSUM |
+				      SKB_GSO_TUNNEL_REMCSUM |
 				      SKB_GSO_IPIP |
 				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
 				      SKB_GSO_MPLS) ||
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index a071563a7e6e..e9767079a360 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -78,6 +78,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_SIT |
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_UDP_TUNNEL_CSUM |
+		       SKB_GSO_TUNNEL_REMCSUM |
 		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
 		       0)))
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 6b8f543f6ac6..637ba2e438b7 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -42,6 +42,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_DODGY |
 				      SKB_GSO_UDP_TUNNEL |
 				      SKB_GSO_UDP_TUNNEL_CSUM |
+				      SKB_GSO_TUNNEL_REMCSUM |
 				      SKB_GSO_GRE |
 				      SKB_GSO_GRE_CSUM |
 				      SKB_GSO_IPIP |
-- 
cgit v1.2.3


From 51f3d02b980a338cd291d2bc7629cdfb2568424b Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 5 Nov 2014 16:46:40 -0500
Subject: net: Add and use skb_copy_datagram_msg() helper.

This encapsulates all of the skb_copy_datagram_iovec() callers
with call argument signature "skb, offset, msghdr->msg_iov, length".

When we move to iov_iters in the networking, the iov_iter object will
sit in the msghdr.

Having a helper like this means there will be less places to touch
during that transformation.

Based upon descriptions and patch from Al Viro.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/isdn/mISDN/socket.c    | 2 +-
 drivers/net/ppp/pppoe.c        | 2 +-
 include/linux/skbuff.h         | 6 ++++++
 net/appletalk/ddp.c            | 2 +-
 net/atm/common.c               | 2 +-
 net/ax25/af_ax25.c             | 2 +-
 net/bluetooth/af_bluetooth.c   | 4 ++--
 net/bluetooth/hci_sock.c       | 2 +-
 net/caif/caif_socket.c         | 2 +-
 net/core/sock.c                | 2 +-
 net/dccp/proto.c               | 2 +-
 net/ieee802154/dgram.c         | 2 +-
 net/ieee802154/raw.c           | 2 +-
 net/ipv4/ip_sockglue.c         | 2 +-
 net/ipv4/ping.c                | 2 +-
 net/ipv4/raw.c                 | 2 +-
 net/ipv4/tcp.c                 | 5 ++---
 net/ipv4/udp.c                 | 4 ++--
 net/ipv6/datagram.c            | 4 ++--
 net/ipv6/raw.c                 | 4 ++--
 net/ipv6/udp.c                 | 4 ++--
 net/ipx/af_ipx.c               | 3 +--
 net/irda/af_irda.c             | 2 +-
 net/iucv/af_iucv.c             | 2 +-
 net/key/af_key.c               | 2 +-
 net/l2tp/l2tp_ip.c             | 2 +-
 net/l2tp/l2tp_ip6.c            | 2 +-
 net/l2tp/l2tp_ppp.c            | 2 +-
 net/llc/af_llc.c               | 3 +--
 net/netlink/af_netlink.c       | 2 +-
 net/netrom/af_netrom.c         | 2 +-
 net/nfc/llcp_sock.c            | 2 +-
 net/nfc/rawsock.c              | 2 +-
 net/packet/af_packet.c         | 2 +-
 net/phonet/datagram.c          | 2 +-
 net/phonet/pep.c               | 2 +-
 net/rose/af_rose.c             | 2 +-
 net/rxrpc/ar-recvmsg.c         | 2 +-
 net/sctp/socket.c              | 2 +-
 net/tipc/socket.c              | 7 +++----
 net/unix/af_unix.c             | 6 +++---
 net/vmw_vsock/vmci_transport.c | 3 +--
 net/x25/af_x25.c               | 2 +-
 43 files changed, 58 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index 1be82284cf9d..dcbd8589f0c4 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -163,7 +163,7 @@ mISDN_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	memcpy(skb_push(skb, MISDN_HEADER_LEN), mISDN_HEAD_P(skb),
 	       MISDN_HEADER_LEN);
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	mISDN_sock_cmsg(sk, msg, skb);
 
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 6c9c16d76935..443cbbf5c55f 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -981,7 +981,7 @@ static int pppoe_recvmsg(struct kiocb *iocb, struct socket *sock,
 
 	if (skb) {
 		total_len = min_t(size_t, total_len, skb->len);
-		error = skb_copy_datagram_iovec(skb, 0, m->msg_iov, total_len);
+		error = skb_copy_datagram_msg(skb, 0, m, total_len);
 		if (error == 0) {
 			consume_skb(skb);
 			return total_len;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 74ed34413969..39ec7530ae27 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -21,6 +21,7 @@
 #include <linux/bug.h>
 #include <linux/cache.h>
 #include <linux/rbtree.h>
+#include <linux/socket.h>
 
 #include <linux/atomic.h>
 #include <asm/types.h>
@@ -2639,6 +2640,11 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait);
 int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
 			    struct iovec *to, int size);
+static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
+					struct msghdr *msg, int size)
+{
+	return skb_copy_datagram_iovec(from, offset, msg->msg_iov, size);
+}
 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
 				     struct iovec *iov);
 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index c00897f65a31..425942db17f6 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1758,7 +1758,7 @@ static int atalk_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 		copied = size;
 		msg->msg_flags |= MSG_TRUNC;
 	}
-	err = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, offset, msg, copied);
 
 	if (!err && msg->msg_name) {
 		DECLARE_SOCKADDR(struct sockaddr_at *, sat, msg->msg_name);
diff --git a/net/atm/common.c b/net/atm/common.c
index 6a765156a3f6..9cd1ccae9a11 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -554,7 +554,7 @@ int vcc_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	error = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	error = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (error)
 		return error;
 	sock_recv_ts_and_drops(msg, sk, skb);
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index c35c3f48fc0f..f4f835e19378 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1634,7 +1634,7 @@ static int ax25_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	if (msg->msg_name) {
 		ax25_digi digi;
diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c
index 339c74ad4553..0a7cc565f93e 100644
--- a/net/bluetooth/af_bluetooth.c
+++ b/net/bluetooth/af_bluetooth.c
@@ -237,7 +237,7 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 	skb_reset_transport_header(skb);
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err == 0) {
 		sock_recv_ts_and_drops(msg, sk, skb);
 
@@ -328,7 +328,7 @@ int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock,
 		}
 
 		chunk = min_t(unsigned int, skb->len, size);
-		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, chunk)) {
+		if (skb_copy_datagram_msg(skb, 0, msg, chunk)) {
 			skb_queue_head(&sk->sk_receive_queue, skb);
 			if (!copied)
 				copied = -EFAULT;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 115f149362ba..29e1ec7189bd 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -878,7 +878,7 @@ static int hci_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	}
 
 	skb_reset_transport_header(skb);
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	switch (hci_pi(sk)->channel) {
 	case HCI_CHANNEL_RAW:
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 43f750e88e19..fbcd156099fb 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -293,7 +293,7 @@ static int caif_seqpkt_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copylen = len;
 	}
 
-	ret = skb_copy_datagram_iovec(skb, 0, m->msg_iov, copylen);
+	ret = skb_copy_datagram_msg(skb, 0, m, copylen);
 	if (ret)
 		goto out_free;
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 15e0c67b1069..ac56dd06c306 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -2457,7 +2457,7 @@ int sock_recv_errqueue(struct sock *sk, struct msghdr *msg, int len,
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free_skb;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 5ab6627cf370..8e6ae9422a7b 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -896,7 +896,7 @@ verify_sock_status:
 		else if (len < skb->len)
 			msg->msg_flags |= MSG_TRUNC;
 
-		if (skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len)) {
+		if (skb_copy_datagram_msg(skb, 0, msg, len)) {
 			/* Exception. Bailout! */
 			len = -EFAULT;
 			break;
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index ef2ad8aaef13..fc9193eabd41 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -324,7 +324,7 @@ static int dgram_recvmsg(struct kiocb *iocb, struct sock *sk,
 	}
 
 	/* FIXME: skip headers if necessary ?! */
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 9d1f64806f02..73a4d53463de 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -195,7 +195,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		copied = len;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index c373a9ad4555..21894df66262 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -424,7 +424,7 @@ int ip_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free_skb;
 
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 57f7c9804139..736236c3e554 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -875,7 +875,7 @@ int ping_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	}
 
 	/* Don't bother checking the checksum */
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 739db3100c23..ee8fa4bf3b73 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -718,7 +718,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		copied = len;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 39ec0c379545..c239f4740d10 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1377,7 +1377,7 @@ static int tcp_peek_sndq(struct sock *sk, struct msghdr *msg, int len)
 	/* XXX -- need to support SO_PEEK_OFF */
 
 	skb_queue_walk(&sk->sk_write_queue, skb) {
-		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, skb->len);
+		err = skb_copy_datagram_msg(skb, 0, msg, skb->len);
 		if (err)
 			break;
 
@@ -1833,8 +1833,7 @@ do_prequeue:
 		}
 
 		if (!(flags & MSG_TRUNC)) {
-			err = skb_copy_datagram_iovec(skb, offset,
-						      msg->msg_iov, used);
+			err = skb_copy_datagram_msg(skb, offset, msg, used);
 			if (err) {
 				/* Exception. Bailout! */
 				if (!copied)
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 3f001db72351..df19027f44f3 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1281,8 +1281,8 @@ try_again:
 	}
 
 	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-					      msg->msg_iov, copied);
+		err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
+					    msg, copied);
 	else {
 		err = skb_copy_and_csum_datagram_iovec(skb,
 						       sizeof(struct udphdr),
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2cdc38338be3..5c6996e44b14 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -351,7 +351,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free_skb;
 
@@ -445,7 +445,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len,
 		msg->msg_flags |= MSG_TRUNC;
 		copied = len;
 	}
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free_skb;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 075a0fb400e7..0cbcf98f2cab 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -486,11 +486,11 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 	}
 
 	if (skb_csum_unnecessary(skb)) {
-		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+		err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	} else if (msg->msg_flags&MSG_TRUNC) {
 		if (__skb_checksum_complete(skb))
 			goto csum_copy_err;
-		err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+		err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	} else {
 		err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
 		if (err == -EINVAL)
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index f6ba535b6feb..9b6809232b17 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -424,8 +424,8 @@ try_again:
 	}
 
 	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_iovec(skb, sizeof(struct udphdr),
-					      msg->msg_iov, copied);
+		err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
+					    msg, copied);
 	else {
 		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
 		if (err == -EINVAL)
diff --git a/net/ipx/af_ipx.c b/net/ipx/af_ipx.c
index 313ef4644069..a0c75366c93b 100644
--- a/net/ipx/af_ipx.c
+++ b/net/ipx/af_ipx.c
@@ -1805,8 +1805,7 @@ static int ipx_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	rc = skb_copy_datagram_iovec(skb, sizeof(struct ipxhdr), msg->msg_iov,
-				     copied);
+	rc = skb_copy_datagram_msg(skb, sizeof(struct ipxhdr), msg, copied);
 	if (rc)
 		goto out_free;
 	if (skb->tstamp.tv64)
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 92fafd485deb..980bc2670a13 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1396,7 +1396,7 @@ static int irda_recvmsg_dgram(struct kiocb *iocb, struct socket *sock,
 		copied = size;
 		msg->msg_flags |= MSG_TRUNC;
 	}
-	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	skb_free_datagram(sk, skb);
 
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index a089b6b91650..057b5647ef92 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1355,7 +1355,7 @@ static int iucv_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		sk->sk_shutdown = sk->sk_shutdown | RCV_SHUTDOWN;
 
 	cskb = skb;
-	if (skb_copy_datagram_iovec(cskb, offset, msg->msg_iov, copied)) {
+	if (skb_copy_datagram_msg(cskb, offset, msg, copied)) {
 		if (!(flags & MSG_PEEK))
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -EFAULT;
diff --git a/net/key/af_key.c b/net/key/af_key.c
index 1847ec4e3930..e5883091a8c6 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3654,7 +3654,7 @@ static int pfkey_recvmsg(struct kiocb *kiocb,
 	}
 
 	skb_reset_transport_header(skb);
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free;
 
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index 369a9822488c..a6cc1fed2b52 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -528,7 +528,7 @@ static int l2tp_ip_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 		copied = len;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index 0edb263cc002..2177b960da87 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -672,7 +672,7 @@ static int l2tp_ip6_recvmsg(struct kiocb *iocb, struct sock *sk,
 		copied = len;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto done;
 
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index b704a9356208..c559bcdf4679 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -208,7 +208,7 @@ static int pppol2tp_recvmsg(struct kiocb *iocb, struct socket *sock,
 	else if (len < skb->len)
 		msg->msg_flags |= MSG_TRUNC;
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	err = skb_copy_datagram_msg(skb, 0, msg, len);
 	if (likely(err == 0))
 		err = len;
 
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index bb9cbc17d926..af662669f951 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -819,8 +819,7 @@ static int llc_ui_recvmsg(struct kiocb *iocb, struct socket *sock,
 			used = len;
 
 		if (!(flags & MSG_TRUNC)) {
-			int rc = skb_copy_datagram_iovec(skb, offset,
-							 msg->msg_iov, used);
+			int rc = skb_copy_datagram_msg(skb, offset, msg, used);
 			if (rc) {
 				/* Exception. Bailout! */
 				if (!copied)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f1de72de273e..580b79452bec 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2401,7 +2401,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
 	}
 
 	skb_reset_transport_header(data_skb);
-	err = skb_copy_datagram_iovec(data_skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(data_skb, 0, msg, copied);
 
 	if (msg->msg_name) {
 		DECLARE_SOCKADDR(struct sockaddr_nl *, addr, msg->msg_name);
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 1b06a1fcf3e8..7e13f6afcd1f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1167,7 +1167,7 @@ static int nr_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	er = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	er = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (er < 0) {
 		skb_free_datagram(sk, skb);
 		release_sock(sk);
diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c
index 51f077a92fa9..83bc785d5855 100644
--- a/net/nfc/llcp_sock.c
+++ b/net/nfc/llcp_sock.c
@@ -832,7 +832,7 @@ static int llcp_sock_recvmsg(struct kiocb *iocb, struct socket *sock,
 	copied = min_t(unsigned int, rlen, len);
 
 	cskb = skb;
-	if (skb_copy_datagram_iovec(cskb, 0, msg->msg_iov, copied)) {
+	if (skb_copy_datagram_msg(cskb, 0, msg, copied)) {
 		if (!(flags & MSG_PEEK))
 			skb_queue_head(&sk->sk_receive_queue, skb);
 		return -EFAULT;
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 11c3544ea546..9d7d2b7ba5e4 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -269,7 +269,7 @@ static int rawsock_recvmsg(struct kiocb *iocb, struct socket *sock,
 		copied = len;
 	}
 
-	rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	rc = skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	skb_free_datagram(sk, skb);
 
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 87d20f48ff06..4cd13d8de44b 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2953,7 +2953,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (err)
 		goto out_free;
 
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 290352c0e6b4..0918bc21eae6 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -150,7 +150,7 @@ static int pn_recvmsg(struct kiocb *iocb, struct sock *sk,
 		copylen = len;
 	}
 
-	rval = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copylen);
+	rval = skb_copy_datagram_msg(skb, 0, msg, copylen);
 	if (rval) {
 		rval = -EFAULT;
 		goto out;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 70a547ea5177..44b2123e22b8 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1296,7 +1296,7 @@ copy:
 	else
 		len = skb->len;
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, len);
+	err = skb_copy_datagram_msg(skb, 0, msg, len);
 	if (!err)
 		err = (flags & MSG_TRUNC) ? skb->len : len;
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index a85c1a086ae4..9b600c20a7a3 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1249,7 +1249,7 @@ static int rose_recvmsg(struct kiocb *iocb, struct socket *sock,
 		msg->msg_flags |= MSG_TRUNC;
 	}
 
-	skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	if (msg->msg_name) {
 		struct sockaddr_rose *srose;
diff --git a/net/rxrpc/ar-recvmsg.c b/net/rxrpc/ar-recvmsg.c
index e9aaa65c0778..4575485ad1b4 100644
--- a/net/rxrpc/ar-recvmsg.c
+++ b/net/rxrpc/ar-recvmsg.c
@@ -180,7 +180,7 @@ int rxrpc_recvmsg(struct kiocb *iocb, struct socket *sock,
 		if (copy > len - copied)
 			copy = len - copied;
 
-		ret = skb_copy_datagram_iovec(skb, offset, msg->msg_iov, copy);
+		ret = skb_copy_datagram_msg(skb, offset, msg, copy);
 
 		if (ret < 0)
 			goto copy_error;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 634a2abb5f3a..2120292c842d 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -2095,7 +2095,7 @@ static int sctp_recvmsg(struct kiocb *iocb, struct sock *sk,
 	if (copied > len)
 		copied = len;
 
-	err = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	err = skb_copy_datagram_msg(skb, 0, msg, copied);
 
 	event = sctp_skb2event(skb);
 
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index ad8a1a1e2275..591bbfa082a0 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1372,8 +1372,7 @@ restart:
 			sz = buf_len;
 			m->msg_flags |= MSG_TRUNC;
 		}
-		res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg),
-					      m->msg_iov, sz);
+		res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg), m, sz);
 		if (res)
 			goto exit;
 		res = sz;
@@ -1473,8 +1472,8 @@ restart:
 		needed = (buf_len - sz_copied);
 		sz_to_copy = (sz <= needed) ? sz : needed;
 
-		res = skb_copy_datagram_iovec(buf, msg_hdr_sz(msg) + offset,
-					      m->msg_iov, sz_to_copy);
+		res = skb_copy_datagram_msg(buf, msg_hdr_sz(msg) + offset,
+					    m, sz_to_copy);
 		if (res)
 			goto exit;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index e96884380732..5eee625d113f 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1825,7 +1825,7 @@ static int unix_dgram_recvmsg(struct kiocb *iocb, struct socket *sock,
 	else if (size < skb->len - skip)
 		msg->msg_flags |= MSG_TRUNC;
 
-	err = skb_copy_datagram_iovec(skb, skip, msg->msg_iov, size);
+	err = skb_copy_datagram_msg(skb, skip, msg, size);
 	if (err)
 		goto out_free;
 
@@ -2030,8 +2030,8 @@ again:
 		}
 
 		chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size);
-		if (skb_copy_datagram_iovec(skb, UNIXCB(skb).consumed + skip,
-					    msg->msg_iov, chunk)) {
+		if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip,
+					  msg, chunk)) {
 			if (copied == 0)
 				copied = -EFAULT;
 			break;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 9bb63ffec4f2..a57ddef7d5af 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1773,8 +1773,7 @@ static int vmci_transport_dgram_dequeue(struct kiocb *kiocb,
 	}
 
 	/* Place the datagram payload in the user's iovec. */
-	err = skb_copy_datagram_iovec(skb, sizeof(*dg), msg->msg_iov,
-		payload_len);
+	err = skb_copy_datagram_msg(skb, sizeof(*dg), msg, payload_len);
 	if (err)
 		goto out;
 
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 5ad4418ef093..59e785bfde65 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1335,7 +1335,7 @@ static int x25_recvmsg(struct kiocb *iocb, struct socket *sock,
 	/* Currently, each datagram always contains a complete record */
 	msg->msg_flags |= MSG_EOR;
 
-	rc = skb_copy_datagram_iovec(skb, 0, msg->msg_iov, copied);
+	rc = skb_copy_datagram_msg(skb, 0, msg, copied);
 	if (rc)
 		goto out_free_dgram;
 
-- 
cgit v1.2.3


From 25de4668d094f00e44a8f2428dd3c1a4ecfa0053 Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 4 Nov 2014 10:59:47 -0800
Subject: ipv6: move INET6_MATCH() to include/net/inet6_hashtables.h

It is only used in net/ipv6/inet6_hashtables.c.

Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ipv6.h           | 10 ----------
 include/net/inet6_hashtables.h | 10 ++++++++++
 2 files changed, 10 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 7121a2e97ce2..c694e7baa621 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -317,14 +317,4 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk)
 #define tcp_twsk_ipv6only(__sk)		0
 #define inet_v6_ipv6only(__sk)		0
 #endif /* IS_ENABLED(CONFIG_IPV6) */
-
-#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
-	(((__sk)->sk_portpair == (__ports))			&&	\
-	 ((__sk)->sk_family == AF_INET6)			&&	\
-	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
-	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
-	 (!(__sk)->sk_bound_dev_if	||				\
-	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
-	 net_eq(sock_net(__sk), (__net)))
-
 #endif /* _IPV6_H */
diff --git a/include/net/inet6_hashtables.h b/include/net/inet6_hashtables.h
index d1d272843b3b..9201afe083fa 100644
--- a/include/net/inet6_hashtables.h
+++ b/include/net/inet6_hashtables.h
@@ -99,4 +99,14 @@ struct sock *inet6_lookup(struct net *net, struct inet_hashinfo *hashinfo,
 			  const struct in6_addr *daddr, const __be16 dport,
 			  const int dif);
 #endif /* IS_ENABLED(CONFIG_IPV6) */
+
+#define INET6_MATCH(__sk, __net, __saddr, __daddr, __ports, __dif)	\
+	(((__sk)->sk_portpair == (__ports))			&&	\
+	 ((__sk)->sk_family == AF_INET6)			&&	\
+	 ipv6_addr_equal(&(__sk)->sk_v6_daddr, (__saddr))		&&	\
+	 ipv6_addr_equal(&(__sk)->sk_v6_rcv_saddr, (__daddr))	&&	\
+	 (!(__sk)->sk_bound_dev_if	||				\
+	   ((__sk)->sk_bound_dev_if == (__dif))) 		&&	\
+	 net_eq(sock_net(__sk), (__net)))
+
 #endif /* _INET6_HASHTABLES_H */
-- 
cgit v1.2.3


From 096916610f415e07cfe71d71a391011c617be5ed Mon Sep 17 00:00:00 2001
From: Aaron Sierra <asierra@xes-inc.com>
Date: Tue, 26 Aug 2014 18:18:33 -0500
Subject: fsl_ifc: Support all 8 IFC chip selects

Freescale's QorIQ T Series processors support 8 IFC chip selects
within a memory map backward compatible with previous P Series
processors which supported only 4 chip selects.

Signed-off-by: Aaron Sierra <asierra@xes-inc.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/memory/fsl_ifc.c        | 13 +++++++++++--
 drivers/mtd/nand/fsl_ifc_nand.c | 10 ++++------
 include/linux/fsl_ifc.h         | 21 ++++++++++++++++-----
 3 files changed, 31 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/memory/fsl_ifc.c b/drivers/memory/fsl_ifc.c
index 3d5d792d5cb2..410c39749872 100644
--- a/drivers/memory/fsl_ifc.c
+++ b/drivers/memory/fsl_ifc.c
@@ -61,7 +61,7 @@ int fsl_ifc_find(phys_addr_t addr_base)
 	if (!fsl_ifc_ctrl_dev || !fsl_ifc_ctrl_dev->regs)
 		return -ENODEV;
 
-	for (i = 0; i < ARRAY_SIZE(fsl_ifc_ctrl_dev->regs->cspr_cs); i++) {
+	for (i = 0; i < fsl_ifc_ctrl_dev->banks; i++) {
 		u32 cspr = in_be32(&fsl_ifc_ctrl_dev->regs->cspr_cs[i].cspr);
 		if (cspr & CSPR_V && (cspr & CSPR_BA) ==
 				convert_ifc_address(addr_base))
@@ -213,7 +213,7 @@ static irqreturn_t fsl_ifc_ctrl_irq(int irqno, void *data)
 static int fsl_ifc_ctrl_probe(struct platform_device *dev)
 {
 	int ret = 0;
-
+	int version, banks;
 
 	dev_info(&dev->dev, "Freescale Integrated Flash Controller\n");
 
@@ -231,6 +231,15 @@ static int fsl_ifc_ctrl_probe(struct platform_device *dev)
 		goto err;
 	}
 
+	version = ioread32be(&fsl_ifc_ctrl_dev->regs->ifc_rev) &
+			FSL_IFC_VERSION_MASK;
+	banks = (version == FSL_IFC_VERSION_1_0_0) ? 4 : 8;
+	dev_info(&dev->dev, "IFC version %d.%d, %d banks\n",
+		version >> 24, (version >> 16) & 0xf, banks);
+
+	fsl_ifc_ctrl_dev->version = version;
+	fsl_ifc_ctrl_dev->banks = banks;
+
 	/* get the Controller level irq */
 	fsl_ifc_ctrl_dev->irq = irq_of_parse_and_map(dev->dev.of_node, 0);
 	if (fsl_ifc_ctrl_dev->irq == NO_IRQ) {
diff --git a/drivers/mtd/nand/fsl_ifc_nand.c b/drivers/mtd/nand/fsl_ifc_nand.c
index 2338124dd05f..4d40fdb24187 100644
--- a/drivers/mtd/nand/fsl_ifc_nand.c
+++ b/drivers/mtd/nand/fsl_ifc_nand.c
@@ -31,7 +31,6 @@
 #include <linux/mtd/nand_ecc.h>
 #include <linux/fsl_ifc.h>
 
-#define FSL_IFC_V1_1_0	0x01010000
 #define ERR_BYTE		0xFF /* Value returned for read
 					bytes when read failed	*/
 #define IFC_TIMEOUT_MSECS	500  /* Maximum number of mSecs to wait
@@ -877,7 +876,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 	struct fsl_ifc_regs __iomem *ifc = ctrl->regs;
 	struct nand_chip *chip = &priv->chip;
 	struct nand_ecclayout *layout;
-	u32 csor, ver;
+	u32 csor;
 
 	/* Fill in fsl_ifc_mtd structure */
 	priv->mtd.priv = chip;
@@ -984,8 +983,7 @@ static int fsl_ifc_chip_init(struct fsl_ifc_mtd *priv)
 		chip->ecc.mode = NAND_ECC_SOFT;
 	}
 
-	ver = ioread32be(&ifc->ifc_rev);
-	if (ver == FSL_IFC_V1_1_0)
+	if (ctrl->version == FSL_IFC_VERSION_1_1_0)
 		fsl_ifc_sram_init(priv);
 
 	return 0;
@@ -1045,12 +1043,12 @@ static int fsl_ifc_nand_probe(struct platform_device *dev)
 	}
 
 	/* find which chip select it is connected to */
-	for (bank = 0; bank < FSL_IFC_BANK_COUNT; bank++) {
+	for (bank = 0; bank < fsl_ifc_ctrl_dev->banks; bank++) {
 		if (match_bank(ifc, bank, res.start))
 			break;
 	}
 
-	if (bank >= FSL_IFC_BANK_COUNT) {
+	if (bank >= fsl_ifc_ctrl_dev->banks) {
 		dev_err(&dev->dev, "%s: address did not match any chip selects\n",
 			__func__);
 		return -ENODEV;
diff --git a/include/linux/fsl_ifc.h b/include/linux/fsl_ifc.h
index 84d60cb841b1..bf0321eabbda 100644
--- a/include/linux/fsl_ifc.h
+++ b/include/linux/fsl_ifc.h
@@ -29,7 +29,16 @@
 #include <linux/of_platform.h>
 #include <linux/interrupt.h>
 
-#define FSL_IFC_BANK_COUNT 4
+/*
+ * The actual number of banks implemented depends on the IFC version
+ *    - IFC version 1.0 implements 4 banks.
+ *    - IFC version 1.1 onward implements 8 banks.
+ */
+#define FSL_IFC_BANK_COUNT 8
+
+#define FSL_IFC_VERSION_MASK	0x0F0F0000
+#define FSL_IFC_VERSION_1_0_0	0x01000000
+#define FSL_IFC_VERSION_1_1_0	0x01010000
 
 /*
  * CSPR - Chip Select Property Register
@@ -776,23 +785,23 @@ struct fsl_ifc_regs {
 		__be32 cspr;
 		u32 res2;
 	} cspr_cs[FSL_IFC_BANK_COUNT];
-	u32 res3[0x19];
+	u32 res3[0xd];
 	struct {
 		__be32 amask;
 		u32 res4[0x2];
 	} amask_cs[FSL_IFC_BANK_COUNT];
-	u32 res5[0x18];
+	u32 res5[0xc];
 	struct {
 		__be32 csor;
 		__be32 csor_ext;
 		u32 res6;
 	} csor_cs[FSL_IFC_BANK_COUNT];
-	u32 res7[0x18];
+	u32 res7[0xc];
 	struct {
 		__be32 ftim[4];
 		u32 res8[0x8];
 	} ftim_cs[FSL_IFC_BANK_COUNT];
-	u32 res9[0x60];
+	u32 res9[0x30];
 	__be32 rb_stat;
 	u32 res10[0x2];
 	__be32 ifc_gcr;
@@ -827,6 +836,8 @@ struct fsl_ifc_ctrl {
 	int				nand_irq;
 	spinlock_t			lock;
 	void				*nand;
+	int				version;
+	int				banks;
 
 	u32 nand_stat;
 	wait_queue_head_t nand_wait;
-- 
cgit v1.2.3


From d4260b51699082c7dea257bea002d79394e876e0 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Thu, 16 Oct 2014 14:19:47 -0400
Subject: serial: Fix upstat_t sparse warnings

Commit 299245a145b2ad4cfb4c5432eb1264299f55e7e0,
serial: core: Privatize modem status enable flags, introduced
the upstat_t type and matching bit definitions. The purpose is to
produce sparse warnings if the wrong bit definitions are used
(by warning of implicit integer conversions).

Fix implicit conversion to integer return type from uart_cts_enabled()
and uart_dcd_enabled().

Fixes the following sparse warnings:
drivers/tty/serial/serial_core.c:63:30: warning: incorrect type in return expression (different base types)
drivers/tty/serial/serial_core.c:63:30:    expected int
drivers/tty/serial/serial_core.c:63:30:    got restricted upstat_t
include/linux/serial_core.h:364:30: warning: incorrect type in return expression (different base types)
include/linux/serial_core.h:364:30:    expected bool
include/linux/serial_core.h:364:30:    got restricted upstat_t
include/linux/serial_core.h:364:30: warning: incorrect type in return expression (different base types)
include/linux/serial_core.h:364:30:    expected bool
include/linux/serial_core.h:364:30:    got restricted upstat_t

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial_core.c | 2 +-
 include/linux/serial_core.h      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index df3a8c74358e..971103714ddd 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -61,7 +61,7 @@ static void uart_port_shutdown(struct tty_port *port);
 
 static int uart_dcd_enabled(struct uart_port *uport)
 {
-	return uport->status & UPSTAT_DCD_ENABLE;
+	return !!(uport->status & UPSTAT_DCD_ENABLE);
 }
 
 /*
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 21c2e05c1bc3..bccf4bac22f5 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -367,7 +367,7 @@ static inline int uart_tx_stopped(struct uart_port *port)
 
 static inline bool uart_cts_enabled(struct uart_port *uport)
 {
-	return uport->status & UPSTAT_CTS_ENABLE;
+	return !!(uport->status & UPSTAT_CTS_ENABLE);
 }
 
 /*
-- 
cgit v1.2.3


From 8f166e00196fcb16364c9c39dab6fe7b72e63f18 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Thu, 16 Oct 2014 14:59:41 -0400
Subject: tty: Remove tty_pair_get_tty()/tty_pair_get_pty() api

tty_pair_get_pty() has no in-tree users and tty_pair_get_tty()
has only one file-local user. Remove the external declarations,
the export declarations, and declare tty_pair_get_tty() static.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c | 16 +++++-----------
 include/linux/tty.h  |  3 ---
 2 files changed, 5 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index c322e7af7373..d3e71331641b 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2703,23 +2703,17 @@ static int tty_tiocgicount(struct tty_struct *tty, void __user *arg)
 	return 0;
 }
 
-struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
+/*
+ * if pty, return the slave side (real_tty)
+ * otherwise, return self
+ */
+static struct tty_struct *tty_pair_get_tty(struct tty_struct *tty)
 {
 	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
 	    tty->driver->subtype == PTY_TYPE_MASTER)
 		tty = tty->link;
 	return tty;
 }
-EXPORT_SYMBOL(tty_pair_get_tty);
-
-struct tty_struct *tty_pair_get_pty(struct tty_struct *tty)
-{
-	if (tty->driver->type == TTY_DRIVER_TYPE_PTY &&
-	    tty->driver->subtype == PTY_TYPE_MASTER)
-	    return tty;
-	return tty->link;
-}
-EXPORT_SYMBOL(tty_pair_get_pty);
 
 /*
  * Split this up, as gcc can choke on it otherwise..
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 5171ef8f7b85..b36b0b445c1f 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -498,9 +498,6 @@ extern int tty_init_termios(struct tty_struct *tty);
 extern int tty_standard_install(struct tty_driver *driver,
 		struct tty_struct *tty);
 
-extern struct tty_struct *tty_pair_get_tty(struct tty_struct *tty);
-extern struct tty_struct *tty_pair_get_pty(struct tty_struct *tty);
-
 extern struct mutex tty_mutex;
 extern spinlock_t tty_files_lock;
 
-- 
cgit v1.2.3


From e1c2296c3485158304bfad5a80e89078463d70c8 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Thu, 16 Oct 2014 14:59:48 -0400
Subject: tty: Move session_of_pgrp() and make static

tiocspgrp() is the lone caller of session_of_pgrp(); relocate and
limit to file scope.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c   | 21 +++++++++++++++++++++
 include/linux/kernel.h |  3 ---
 kernel/exit.c          | 21 ---------------------
 3 files changed, 21 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 114854c5554b..ae8f53c7972d 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2515,6 +2515,27 @@ struct pid *tty_get_pgrp(struct tty_struct *tty)
 }
 EXPORT_SYMBOL_GPL(tty_get_pgrp);
 
+/*
+ * This checks not only the pgrp, but falls back on the pid if no
+ * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
+ * without this...
+ *
+ * The caller must hold rcu lock or the tasklist lock.
+ */
+static struct pid *session_of_pgrp(struct pid *pgrp)
+{
+	struct task_struct *p;
+	struct pid *sid = NULL;
+
+	p = pid_task(pgrp, PIDTYPE_PGID);
+	if (p == NULL)
+		p = pid_task(pgrp, PIDTYPE_PID);
+	if (p != NULL)
+		sid = task_session(p);
+
+	return sid;
+}
+
 /**
  *	tiocgpgrp		-	get process group
  *	@tty: tty passed by user
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 3d770f5564b8..01bc530fbfcb 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -411,9 +411,6 @@ extern int __kernel_text_address(unsigned long addr);
 extern int kernel_text_address(unsigned long addr);
 extern int func_ptr_is_kernel_text(void *ptr);
 
-struct pid;
-extern struct pid *session_of_pgrp(struct pid *pgrp);
-
 unsigned long int_sqrt(unsigned long);
 
 extern void bust_spinlocks(int yes);
diff --git a/kernel/exit.c b/kernel/exit.c
index 5d30019ff953..6a3e2e5004ba 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -214,27 +214,6 @@ repeat:
 		goto repeat;
 }
 
-/*
- * This checks not only the pgrp, but falls back on the pid if no
- * satisfactory pgrp is found. I dunno - gdb doesn't work correctly
- * without this...
- *
- * The caller must hold rcu lock or the tasklist lock.
- */
-struct pid *session_of_pgrp(struct pid *pgrp)
-{
-	struct task_struct *p;
-	struct pid *sid = NULL;
-
-	p = pid_task(pgrp, PIDTYPE_PGID);
-	if (p == NULL)
-		p = pid_task(pgrp, PIDTYPE_PID);
-	if (p != NULL)
-		sid = task_session(p);
-
-	return sid;
-}
-
 /*
  * Determine if a process group is "orphaned", according to the POSIX
  * definition in 2.2.2.52.  Orphaned process groups are not to be affected
-- 
cgit v1.2.3


From 3ff51a199f9e85aed843471bc10dae9e94dbb0fc Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:12:46 -0500
Subject: tty: Remove TTY_HUPPING

Now that tty_ldisc_hangup() does not drop the tty lock, it is no
longer possible to observe TTY_HUPPING while holding the tty lock
on another cpu.

Remove TTY_HUPPING bit definition.

Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c    | 12 +-----------
 drivers/tty/tty_ldisc.c |  3 +--
 include/linux/tty.h     |  1 -
 3 files changed, 2 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 9d1e247ee330..873793c426df 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -690,9 +690,6 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 		return;
 	}
 
-	/* some functions below drop BTM, so we need this bit */
-	set_bit(TTY_HUPPING, &tty->flags);
-
 	/* inuse_filps is protected by the single tty lock,
 	   this really needs to change if we want to flush the
 	   workqueue with the lock held */
@@ -717,10 +714,6 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 	while (refs--)
 		tty_kref_put(tty);
 
-	/*
-	 * it drops BTM and thus races with reopen
-	 * we protect the race by TTY_HUPPING
-	 */
 	tty_ldisc_hangup(tty);
 
 	spin_lock_irq(&tty->ctrl_lock);
@@ -752,8 +745,6 @@ static void __tty_hangup(struct tty_struct *tty, int exit_session)
 	 * can't yet guarantee all that.
 	 */
 	set_bit(TTY_HUPPED, &tty->flags);
-	clear_bit(TTY_HUPPING, &tty->flags);
-
 	tty_unlock(tty);
 
 	if (f)
@@ -1461,8 +1452,7 @@ static int tty_reopen(struct tty_struct *tty)
 {
 	struct tty_driver *driver = tty->driver;
 
-	if (test_bit(TTY_CLOSING, &tty->flags) ||
-			test_bit(TTY_HUPPING, &tty->flags))
+	if (test_bit(TTY_CLOSING, &tty->flags))
 		return -EIO;
 
 	if (driver->type == TTY_DRIVER_TYPE_PTY &&
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 28858ebe2912..49001fa2ea2f 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -544,8 +544,7 @@ int tty_set_ldisc(struct tty_struct *tty, int ldisc)
 
 	old_ldisc = tty->ldisc;
 
-	if (test_bit(TTY_HUPPING, &tty->flags) ||
-	    test_bit(TTY_HUPPED, &tty->flags)) {
+	if (test_bit(TTY_HUPPED, &tty->flags)) {
 		/* We were raced by the hangup method. It will have stomped
 		   the ldisc data and closed the ldisc down */
 		tty_ldisc_enable_pair(tty, o_tty);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index b36b0b445c1f..ff0dd7aeaf3b 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -321,7 +321,6 @@ struct tty_file_private {
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
 #define TTY_HUPPED 		18	/* Post driver->hangup() */
-#define TTY_HUPPING 		21	/* ->hangup() in progress */
 #define TTY_LDISC_HALTED	22	/* Line discipline is halted */
 
 #define TTY_WRITE_FLUSH(tty) tty_write_flush((tty))
-- 
cgit v1.2.3


From 04980706c8febe41ec598116b174bd3a2dc82355 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:12:52 -0500
Subject: tty: Remove TTY_CLOSING

Now that re-open is not permitted for a legacy BSD pty master,
using TTY_CLOSING to indicate when a tty can be torn-down is
no longer necessary.

Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c | 14 ++------------
 include/linux/tty.h  |  1 -
 2 files changed, 2 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 66d6bcc24c7e..ea8c6cae8d12 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1197,7 +1197,7 @@ void tty_write_message(struct tty_struct *tty, char *msg)
 	if (tty) {
 		mutex_lock(&tty->atomic_write_lock);
 		tty_lock(tty);
-		if (tty->ops->write && !test_bit(TTY_CLOSING, &tty->flags)) {
+		if (tty->ops->write && tty->count > 0) {
 			tty_unlock(tty);
 			tty->ops->write(tty, msg, strlen(msg));
 		} else
@@ -1879,16 +1879,6 @@ int tty_release(struct inode *inode, struct file *filp)
 	/*
 	 * Perform some housekeeping before deciding whether to return.
 	 *
-	 * Set the TTY_CLOSING flag if this was the last open.  In the
-	 * case of a pty we may have to wait around for the other side
-	 * to close, and TTY_CLOSING makes sure we can't be reopened.
-	 */
-	if (tty_closing)
-		set_bit(TTY_CLOSING, &tty->flags);
-	if (o_tty_closing)
-		set_bit(TTY_CLOSING, &o_tty->flags);
-
-	/*
 	 * If _either_ side is closing, make sure there aren't any
 	 * processes that still think tty or o_tty is their controlling
 	 * tty.
@@ -1903,7 +1893,7 @@ int tty_release(struct inode *inode, struct file *filp)
 
 	mutex_unlock(&tty_mutex);
 	tty_unlock_pair(tty, o_tty);
-	/* At this point the TTY_CLOSING flag should ensure a dead tty
+	/* At this point, the tty->count == 0 should ensure a dead tty
 	   cannot be re-opened by a racing opener */
 
 	/* check whether both sides are closing ... */
diff --git a/include/linux/tty.h b/include/linux/tty.h
index ff0dd7aeaf3b..35b29f0750f8 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -316,7 +316,6 @@ struct tty_file_private {
 #define TTY_EXCLUSIVE 		3	/* Exclusive open mode */
 #define TTY_DEBUG 		4	/* Debugging */
 #define TTY_DO_WRITE_WAKEUP 	5	/* Call write_wakeup after queuing new */
-#define TTY_CLOSING 		7	/* ->close() in progress */
 #define TTY_LDISC_OPEN	 	11	/* Line discipline is open */
 #define TTY_PTY_LOCK 		16	/* pty private */
 #define TTY_NO_WRITE_SPLIT 	17	/* Preserve write boundaries to driver */
-- 
cgit v1.2.3


From 62462aefeb5aff092fc97037d9c12a4afe95a3ff Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:12:58 -0500
Subject: tty: Simplify tty_ldisc_release() interface

Passing the 'other' tty to tty_ldisc_release() only makes sense
for a pty pair; make o_tty function local instead.

Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c    |  2 +-
 drivers/tty/tty_ldisc.c | 15 +++++++--------
 include/linux/tty.h     |  2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index f5b62b99489d..cd9550820a58 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1892,7 +1892,7 @@ int tty_release(struct inode *inode, struct file *filp)
 	/*
 	 * Ask the line discipline code to release its structures
 	 */
-	tty_ldisc_release(tty, o_tty);
+	tty_ldisc_release(tty);
 
 	/* Wait for pending work before tty destruction commmences */
 	tty_flush_works(tty);
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 49001fa2ea2f..1c4d7b6d8a76 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -756,18 +756,17 @@ static void tty_ldisc_kill(struct tty_struct *tty)
 
 /**
  *	tty_ldisc_release		-	release line discipline
- *	@tty: tty being shut down
- *	@o_tty: pair tty for pty/tty pairs
- *
- *	Called during the final close of a tty/pty pair in order to shut down
- *	the line discpline layer. On exit the ldisc assigned is N_TTY and the
- *	ldisc has not been opened.
+ *	@tty: tty being shut down (or one end of pty pair)
  *
- *	Holding ldisc_sem write lock serializes tty->ldisc changes.
+ *	Called during the final close of a tty or a pty pair in order to shut
+ *	down the line discpline layer. On exit, each ldisc assigned is N_TTY and
+ *	each ldisc has not been opened.
  */
 
-void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty)
+void tty_ldisc_release(struct tty_struct *tty)
 {
+	struct tty_struct *o_tty = tty->link;
+
 	/*
 	 * Shutdown this line discipline. As this is the final close,
 	 * it does not race with the set_ldisc code path.
diff --git a/include/linux/tty.h b/include/linux/tty.h
index 35b29f0750f8..af1a7f3e4e4a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -557,7 +557,7 @@ extern int tty_register_ldisc(int disc, struct tty_ldisc_ops *new_ldisc);
 extern int tty_unregister_ldisc(int disc);
 extern int tty_set_ldisc(struct tty_struct *tty, int ldisc);
 extern int tty_ldisc_setup(struct tty_struct *tty, struct tty_struct *o_tty);
-extern void tty_ldisc_release(struct tty_struct *tty, struct tty_struct *o_tty);
+extern void tty_ldisc_release(struct tty_struct *tty);
 extern void tty_ldisc_init(struct tty_struct *tty);
 extern void tty_ldisc_deinit(struct tty_struct *tty);
 extern void tty_ldisc_begin(void);
-- 
cgit v1.2.3


From 2aff5e2bc62db43e05c814461a08aff0fc2b7fe5 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:13:01 -0500
Subject: tty: Change tty lock order to master->slave

When releasing the master pty, the slave pty also needs to be locked
to prevent concurrent tty count changes for the slave pty and to
ensure that only one parallel master and slave release observe the
final close, and proceed to destruct the pty pair. Conversely, when
releasing the slave pty, locking the master pty is not necessary
(since the master's state can be inferred by the slave tty count).

Introduce tty_lock_slave()/tty_unlock_slave() which acquires/releases
the tty lock of the slave pty. Remove tty_lock_pair()/tty_unlock_pair().

Dropping the tty_lock is no longer required to re-establish a stable
lock order.

Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_io.c    | 10 ++++++----
 drivers/tty/tty_mutex.c | 32 +++++++++++++-------------------
 include/linux/tty.h     |  7 ++-----
 3 files changed, 21 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index bd7cde3c56ef..4ecee2856ece 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -1790,7 +1790,9 @@ int tty_release(struct inode *inode, struct file *filp)
 	if (tty->ops->close)
 		tty->ops->close(tty, filp);
 
-	tty_unlock(tty);
+	/* If tty is pty master, lock the slave pty (stable lock order) */
+	tty_lock_slave(o_tty);
+
 	/*
 	 * Sanity check: if tty->count is going to zero, there shouldn't be
 	 * any waiters on tty->read_wait or tty->write_wait.  We test the
@@ -1804,8 +1806,6 @@ int tty_release(struct inode *inode, struct file *filp)
 	 * Thus this test wouldn't be triggered at the time the slave closed,
 	 * so we do it now.
 	 */
-	tty_lock_pair(tty, o_tty);
-
 	while (1) {
 		do_sleep = 0;
 
@@ -1879,7 +1879,9 @@ int tty_release(struct inode *inode, struct file *filp)
 	/* check whether both sides are closing ... */
 	final = !tty->count && !(o_tty && o_tty->count);
 
-	tty_unlock_pair(tty, o_tty);
+	tty_unlock_slave(o_tty);
+	tty_unlock(tty);
+
 	/* At this point, the tty->count == 0 should ensure a dead tty
 	   cannot be re-opened by a racing opener */
 
diff --git a/drivers/tty/tty_mutex.c b/drivers/tty/tty_mutex.c
index 2e41abebbcba..f43e995c7a0f 100644
--- a/drivers/tty/tty_mutex.c
+++ b/drivers/tty/tty_mutex.c
@@ -4,6 +4,11 @@
 #include <linux/semaphore.h>
 #include <linux/sched.h>
 
+/*
+ * Nested tty locks are necessary for releasing pty pairs.
+ * The stable lock order is master pty first, then slave pty.
+ */
+
 /* Legacy tty mutex glue */
 
 enum {
@@ -45,29 +50,18 @@ void __lockfunc tty_unlock(struct tty_struct *tty)
 }
 EXPORT_SYMBOL(tty_unlock);
 
-/*
- * Getting the big tty mutex for a pair of ttys with lock ordering
- * On a non pty/tty pair tty2 can be NULL which is just fine.
- */
-void __lockfunc tty_lock_pair(struct tty_struct *tty,
-					struct tty_struct *tty2)
+void __lockfunc tty_lock_slave(struct tty_struct *tty)
 {
-	if (tty < tty2) {
-		tty_lock(tty);
-		tty_lock_nested(tty2, TTY_MUTEX_NESTED);
-	} else {
-		if (tty2 && tty2 != tty)
-			tty_lock(tty2);
+	if (tty && tty != tty->link) {
+		WARN_ON(!mutex_is_locked(&tty->link->legacy_mutex) ||
+			!tty->driver->type == TTY_DRIVER_TYPE_PTY ||
+			!tty->driver->type == PTY_TYPE_SLAVE);
 		tty_lock_nested(tty, TTY_MUTEX_NESTED);
 	}
 }
-EXPORT_SYMBOL(tty_lock_pair);
 
-void __lockfunc tty_unlock_pair(struct tty_struct *tty,
-						struct tty_struct *tty2)
+void __lockfunc tty_unlock_slave(struct tty_struct *tty)
 {
-	tty_unlock(tty);
-	if (tty2 && tty2 != tty)
-		tty_unlock(tty2);
+	if (tty && tty != tty->link)
+		tty_unlock(tty);
 }
-EXPORT_SYMBOL(tty_unlock_pair);
diff --git a/include/linux/tty.h b/include/linux/tty.h
index af1a7f3e4e4a..a07b4b415db8 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -638,11 +638,8 @@ extern long vt_compat_ioctl(struct tty_struct *tty,
 /* functions for preparation of BKL removal */
 extern void __lockfunc tty_lock(struct tty_struct *tty);
 extern void __lockfunc tty_unlock(struct tty_struct *tty);
-extern void __lockfunc tty_lock_pair(struct tty_struct *tty,
-				struct tty_struct *tty2);
-extern void __lockfunc tty_unlock_pair(struct tty_struct *tty,
-				struct tty_struct *tty2);
-
+extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
+extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
 /*
  * this shall be called only from where BTM is held (like close)
  *
-- 
cgit v1.2.3


From 2febdb632bb96235b94b8fccaf882a78f8f4b2bb Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:13:02 -0500
Subject: tty: Preset lock subclass for nested tty locks

Eliminate the requirement of specifying the tty lock nesting at
lock time; instead, set the lock subclass for slave ptys at pty
install (normal ttys and master ptys use subclass 0).

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/pty.c       |  2 ++
 drivers/tty/tty_mutex.c | 19 +++++++++----------
 include/linux/tty.h     |  1 +
 3 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c
index bdb8fd1a2026..11db7dc8676b 100644
--- a/drivers/tty/pty.c
+++ b/drivers/tty/pty.c
@@ -399,6 +399,8 @@ static int pty_common_install(struct tty_driver *driver, struct tty_struct *tty,
 	if (!o_tty)
 		goto err_put_module;
 
+	tty_set_lock_subclass(o_tty);
+
 	if (legacy) {
 		/* We always use new tty termios data so we can do this
 		   the easy way .. */
diff --git a/drivers/tty/tty_mutex.c b/drivers/tty/tty_mutex.c
index f43e995c7a0f..4486741190c4 100644
--- a/drivers/tty/tty_mutex.c
+++ b/drivers/tty/tty_mutex.c
@@ -13,15 +13,14 @@
 
 enum {
 	TTY_MUTEX_NORMAL,
-	TTY_MUTEX_NESTED,
+	TTY_MUTEX_SLAVE,
 };
 
 /*
  * Getting the big tty mutex.
  */
 
-static void __lockfunc tty_lock_nested(struct tty_struct *tty,
-				       unsigned int subclass)
+void __lockfunc tty_lock(struct tty_struct *tty)
 {
 	if (tty->magic != TTY_MAGIC) {
 		pr_err("L Bad %p\n", tty);
@@ -29,12 +28,7 @@ static void __lockfunc tty_lock_nested(struct tty_struct *tty,
 		return;
 	}
 	tty_kref_get(tty);
-	mutex_lock_nested(&tty->legacy_mutex, subclass);
-}
-
-void __lockfunc tty_lock(struct tty_struct *tty)
-{
-	return tty_lock_nested(tty, TTY_MUTEX_NORMAL);
+	mutex_lock(&tty->legacy_mutex);
 }
 EXPORT_SYMBOL(tty_lock);
 
@@ -56,7 +50,7 @@ void __lockfunc tty_lock_slave(struct tty_struct *tty)
 		WARN_ON(!mutex_is_locked(&tty->link->legacy_mutex) ||
 			!tty->driver->type == TTY_DRIVER_TYPE_PTY ||
 			!tty->driver->type == PTY_TYPE_SLAVE);
-		tty_lock_nested(tty, TTY_MUTEX_NESTED);
+		tty_lock(tty);
 	}
 }
 
@@ -65,3 +59,8 @@ void __lockfunc tty_unlock_slave(struct tty_struct *tty)
 	if (tty && tty != tty->link)
 		tty_unlock(tty);
 }
+
+void tty_set_lock_subclass(struct tty_struct *tty)
+{
+	lockdep_set_subclass(&tty->legacy_mutex, TTY_MUTEX_SLAVE);
+}
diff --git a/include/linux/tty.h b/include/linux/tty.h
index a07b4b415db8..196c352a5ce8 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -640,6 +640,7 @@ extern void __lockfunc tty_lock(struct tty_struct *tty);
 extern void __lockfunc tty_unlock(struct tty_struct *tty);
 extern void __lockfunc tty_lock_slave(struct tty_struct *tty);
 extern void __lockfunc tty_unlock_slave(struct tty_struct *tty);
+extern void tty_set_lock_subclass(struct tty_struct *tty);
 /*
  * this shall be called only from where BTM is held (like close)
  *
-- 
cgit v1.2.3


From 35af935ee47eb4548e0052733e9d75328e260fa1 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:13:03 -0500
Subject: tty: Remove tty_unhangup() declaration

The tty_unhangup() function is not defined.

Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 196c352a5ce8..e33ea1a0c2d0 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -435,7 +435,6 @@ extern int is_ignored(int sig);
 extern int tty_signal(int sig, struct tty_struct *tty);
 extern void tty_hangup(struct tty_struct *tty);
 extern void tty_vhangup(struct tty_struct *tty);
-extern void tty_unhangup(struct file *filp);
 extern int tty_hung_up_p(struct file *filp);
 extern void do_SAK(struct tty_struct *tty);
 extern void __do_SAK(struct tty_struct *tty);
-- 
cgit v1.2.3


From 86c80a8e2ab443e9c4261b3499de4ce808399104 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:13:09 -0500
Subject: tty: Flush ldisc buffer atomically with tty flip buffers

tty_ldisc_flush() first clears the line discipline input buffer,
then clears the tty flip buffers. However, this allows for existing
data in the tty flip buffers to be added after the ldisc input
buffer has been cleared, but before the flip buffers have been cleared.

Add an optional ldisc parameter to tty_buffer_flush() to allow
tty_ldisc_flush() to pass the ldisc to clear.

NB: Initially, the plan was to do this automatically in
tty_buffer_flush(). However, an audit of the behavior of existing
line disciplines showed that performing a ldisc buffer flush on
ioctl(TCFLSH) was not always the outcome. For example, some line
disciplines have flush_buffer() methods but not ioctl() methods,
so a ->flush_buffer() command would be unexpected.

Reviewed-by: Alan Cox <alan@linux.intel.com>
Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/tty_buffer.c | 10 ++++++++--
 drivers/tty/tty_io.c     |  2 +-
 drivers/tty/tty_ldisc.c  | 12 +++++-------
 include/linux/tty.h      |  2 +-
 4 files changed, 15 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/tty_buffer.c b/drivers/tty/tty_buffer.c
index 143deb62467d..3605103fc1ac 100644
--- a/drivers/tty/tty_buffer.c
+++ b/drivers/tty/tty_buffer.c
@@ -202,14 +202,16 @@ static void tty_buffer_free(struct tty_port *port, struct tty_buffer *b)
 /**
  *	tty_buffer_flush		-	flush full tty buffers
  *	@tty: tty to flush
+ *	@ld:  optional ldisc ptr (must be referenced)
  *
- *	flush all the buffers containing receive data.
+ *	flush all the buffers containing receive data. If ld != NULL,
+ *	flush the ldisc input buffer.
  *
  *	Locking: takes buffer lock to ensure single-threaded flip buffer
  *		 'consumer'
  */
 
-void tty_buffer_flush(struct tty_struct *tty)
+void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld)
 {
 	struct tty_port *port = tty->port;
 	struct tty_bufhead *buf = &port->buf;
@@ -223,6 +225,10 @@ void tty_buffer_flush(struct tty_struct *tty)
 		buf->head = next;
 	}
 	buf->head->read = buf->head->commit;
+
+	if (ld && ld->ops->flush_buffer)
+		ld->ops->flush_buffer(tty);
+
 	atomic_dec(&buf->priority);
 	mutex_unlock(&buf->lock);
 }
diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c
index 4ecee2856ece..aa83cd1bf071 100644
--- a/drivers/tty/tty_io.c
+++ b/drivers/tty/tty_io.c
@@ -2890,7 +2890,7 @@ long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 		case TCIFLUSH:
 		case TCIOFLUSH:
 		/* flush tty buffer and allow ldisc to process ioctl */
-			tty_buffer_flush(tty);
+			tty_buffer_flush(tty, NULL);
 			break;
 		}
 		break;
diff --git a/drivers/tty/tty_ldisc.c b/drivers/tty/tty_ldisc.c
index 6368dd95e137..b66a81d0549e 100644
--- a/drivers/tty/tty_ldisc.c
+++ b/drivers/tty/tty_ldisc.c
@@ -397,19 +397,17 @@ static void __lockfunc tty_ldisc_unlock_pair(struct tty_struct *tty,
  *	tty_ldisc_flush	-	flush line discipline queue
  *	@tty: tty
  *
- *	Flush the line discipline queue (if any) for this tty. If there
- *	is no line discipline active this is a no-op.
+ *	Flush the line discipline queue (if any) and the tty flip buffers
+ *	for this tty.
  */
 
 void tty_ldisc_flush(struct tty_struct *tty)
 {
 	struct tty_ldisc *ld = tty_ldisc_ref(tty);
-	if (ld) {
-		if (ld->ops->flush_buffer)
-			ld->ops->flush_buffer(tty);
+
+	tty_buffer_flush(tty, ld);
+	if (ld)
 		tty_ldisc_deref(ld);
-	}
-	tty_buffer_flush(tty);
 }
 EXPORT_SYMBOL_GPL(tty_ldisc_flush);
 
diff --git a/include/linux/tty.h b/include/linux/tty.h
index e33ea1a0c2d0..f6835ea1079a 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -441,7 +441,7 @@ extern void __do_SAK(struct tty_struct *tty);
 extern void no_tty(void);
 extern void tty_flush_to_ldisc(struct tty_struct *tty);
 extern void tty_buffer_free_all(struct tty_port *port);
-extern void tty_buffer_flush(struct tty_struct *tty);
+extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld);
 extern void tty_buffer_init(struct tty_port *port);
 extern speed_t tty_termios_baud_rate(struct ktermios *termios);
 extern speed_t tty_termios_input_baud_rate(struct ktermios *termios);
-- 
cgit v1.2.3


From 904326ecac022ebaeb39cdfb206fd3b6551cdfca Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Thu, 16 Oct 2014 16:54:21 -0400
Subject: tty,serial: Unify UPF_* and ASYNC_* flag definitions

The userspace-defined ASYNC_* flags in include/uapi/linux/tty_flags.h
are the authoritative bit definitions for the serial_struct flags,
and thus for any derivative values or fields.

Although the serial core provides the TIOCSSERIAL and TIOCGSERIAL
ioctls to set and retrieve these flags from userspace, it defines these
bits independently, as UPF_* macros.

Define the UPF_* macros which are userspace-modifiable directly from
the ASYNC_* symbolic constants. Add compile-time test to ensure the
bits changeable by TIOCSSERIAL match the defined range in the uapi
header.

Add ASYNCB_MAGIC_MULTIPLIER to the uapi header since this bit is
programmable by userspace.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/serial_core.h    | 47 ++++++++++++++++++++++++++++--------------
 include/uapi/linux/tty_flags.h |  6 +++++-
 2 files changed, 37 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index bccf4bac22f5..ad9329669aba 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -160,21 +160,33 @@ struct uart_port {
 	/* flags must be updated while holding port mutex */
 	upf_t			flags;
 
-#define UPF_FOURPORT		((__force upf_t) (1 << 1))
-#define UPF_SAK			((__force upf_t) (1 << 2))
-#define UPF_SPD_MASK		((__force upf_t) (0x1030))
-#define UPF_SPD_HI		((__force upf_t) (0x0010))
-#define UPF_SPD_VHI		((__force upf_t) (0x0020))
-#define UPF_SPD_CUST		((__force upf_t) (0x0030))
-#define UPF_SPD_SHI		((__force upf_t) (0x1000))
-#define UPF_SPD_WARP		((__force upf_t) (0x1010))
-#define UPF_SKIP_TEST		((__force upf_t) (1 << 6))
-#define UPF_AUTO_IRQ		((__force upf_t) (1 << 7))
-#define UPF_HARDPPS_CD		((__force upf_t) (1 << 11))
-#define UPF_LOW_LATENCY		((__force upf_t) (1 << 13))
-#define UPF_BUGGY_UART		((__force upf_t) (1 << 14))
+	/*
+	 * These flags must be equivalent to the flags defined in
+	 * include/uapi/linux/tty_flags.h which are the userspace definitions
+	 * assigned from the serial_struct flags in uart_set_info()
+	 * [for bit definitions in the UPF_CHANGE_MASK]
+	 *
+	 * Bits [0..UPF_LAST_USER] are userspace defined/visible/changeable
+	 * except bit 15 (UPF_NO_TXEN_TEST) which is masked off.
+	 * The remaining bits are serial-core specific and not modifiable by
+	 * userspace.
+	 */
+#define UPF_FOURPORT		((__force upf_t) ASYNC_FOURPORT       /* 1  */ )
+#define UPF_SAK			((__force upf_t) ASYNC_SAK            /* 2  */ )
+#define UPF_SPD_HI		((__force upf_t) ASYNC_SPD_HI         /* 4  */ )
+#define UPF_SPD_VHI		((__force upf_t) ASYNC_SPD_VHI        /* 5  */ )
+#define UPF_SPD_CUST		((__force upf_t) ASYNC_SPD_CUST   /* 0x0030 */ )
+#define UPF_SPD_WARP		((__force upf_t) ASYNC_SPD_WARP   /* 0x1010 */ )
+#define UPF_SPD_MASK		((__force upf_t) ASYNC_SPD_MASK   /* 0x1030 */ )
+#define UPF_SKIP_TEST		((__force upf_t) ASYNC_SKIP_TEST      /* 6  */ )
+#define UPF_AUTO_IRQ		((__force upf_t) ASYNC_AUTO_IRQ       /* 7  */ )
+#define UPF_HARDPPS_CD		((__force upf_t) ASYNC_HARDPPS_CD     /* 11 */ )
+#define UPF_SPD_SHI		((__force upf_t) ASYNC_SPD_SHI        /* 12 */ )
+#define UPF_LOW_LATENCY		((__force upf_t) ASYNC_LOW_LATENCY    /* 13 */ )
+#define UPF_BUGGY_UART		((__force upf_t) ASYNC_BUGGY_UART     /* 14 */ )
 #define UPF_NO_TXEN_TEST	((__force upf_t) (1 << 15))
-#define UPF_MAGIC_MULTIPLIER	((__force upf_t) (1 << 16))
+#define UPF_MAGIC_MULTIPLIER	((__force upf_t) ASYNC_MAGIC_MULTIPLIER /* 16 */ )
+
 /* Port has hardware-assisted h/w flow control (iow, auto-RTS *not* auto-CTS) */
 #define UPF_HARD_FLOW		((__force upf_t) (1 << 21))
 /* Port has hardware-assisted s/w flow control */
@@ -190,9 +202,14 @@ struct uart_port {
 #define UPF_DEAD		((__force upf_t) (1 << 30))
 #define UPF_IOREMAP		((__force upf_t) (1 << 31))
 
-#define UPF_CHANGE_MASK		((__force upf_t) (0x17fff))
+#define __UPF_CHANGE_MASK	0x17fff
+#define UPF_CHANGE_MASK		((__force upf_t) __UPF_CHANGE_MASK)
 #define UPF_USR_MASK		((__force upf_t) (UPF_SPD_MASK|UPF_LOW_LATENCY))
 
+#if __UPF_CHANGE_MASK > ASYNC_FLAGS
+#error Change mask not equivalent to userspace-visible bit defines
+#endif
+
 	/* status must be updated while holding port lock */
 	upstat_t		status;
 
diff --git a/include/uapi/linux/tty_flags.h b/include/uapi/linux/tty_flags.h
index eefcb483a2c0..7b516f7ee7e6 100644
--- a/include/uapi/linux/tty_flags.h
+++ b/include/uapi/linux/tty_flags.h
@@ -6,6 +6,8 @@
  * shared by the tty_port flags structures.
  *
  * Define ASYNCB_* for convenient use with {test,set,clear}_bit.
+ *
+ * Bits [0..ASYNCB_LAST_USER] are userspace defined/visible/changeable
  */
 #define ASYNCB_HUP_NOTIFY	 0 /* Notify getty on hangups and closes
 				    * on the callout port */
@@ -26,7 +28,8 @@
 #define ASYNCB_BUGGY_UART	14 /* This is a buggy UART, skip some safety
 				    * checks.  Note: can be dangerous! */
 #define ASYNCB_AUTOPROBE	15 /* Port was autoprobed by PCI or PNP code */
-#define ASYNCB_LAST_USER	15
+#define ASYNCB_MAGIC_MULTIPLIER	16 /* Use special CLK or divisor */
+#define ASYNCB_LAST_USER	16
 
 /* Internal flags used only by kernel */
 #define ASYNCB_INITIALIZED	31 /* Serial port was initialized */
@@ -57,6 +60,7 @@
 #define ASYNC_LOW_LATENCY	(1U << ASYNCB_LOW_LATENCY)
 #define ASYNC_BUGGY_UART	(1U << ASYNCB_BUGGY_UART)
 #define ASYNC_AUTOPROBE		(1U << ASYNCB_AUTOPROBE)
+#define ASYNC_MAGIC_MULTIPLIER	(1U << ASYNCB_MAGIC_MULTIPLIER)
 
 #define ASYNC_FLAGS		((1U << (ASYNCB_LAST_USER + 1)) - 1)
 #define ASYNC_USR_MASK		(ASYNC_SPD_MASK|ASYNC_CALLOUT_NOHUP| \
-- 
cgit v1.2.3


From e5a2c899957659cd1a9f789bc462f9c0b35f5150 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Wed, 5 Nov 2014 00:23:04 +0100
Subject: fast_hash: avoid indirect function calls

By default the arch_fast_hash hashing function pointers are initialized
to jhash(2). If during boot-up a CPU with SSE4.2 is detected they get
updated to the CRC32 ones. This dispatching scheme incurs a function
pointer lookup and indirect call for every hashing operation.

rhashtable as a user of arch_fast_hash e.g. stores pointers to hashing
functions in its structure, too, causing two indirect branches per
hashing operation.

Using alternative_call we can get away with one of those indirect branches.

Acked-by: Daniel Borkmann <dborkman@redhat.com>
Cc: Thomas Graf <tgraf@suug.ch>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/include/asm/hash.h | 51 ++++++++++++++++++++++++++++++++++++++++-----
 arch/x86/lib/hash.c         | 29 +++++++++++++++-----------
 include/asm-generic/hash.h  | 36 ++++++++++++++++++++++++++++++--
 include/linux/hash.h        | 34 ------------------------------
 lib/Makefile                |  2 +-
 lib/hash.c                  | 39 ----------------------------------
 6 files changed, 98 insertions(+), 93 deletions(-)
 delete mode 100644 lib/hash.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h
index e8c58f88b1d4..a881d784f044 100644
--- a/arch/x86/include/asm/hash.h
+++ b/arch/x86/include/asm/hash.h
@@ -1,7 +1,48 @@
-#ifndef _ASM_X86_HASH_H
-#define _ASM_X86_HASH_H
+#ifndef __ASM_X86_HASH_H
+#define __ASM_X86_HASH_H
 
-struct fast_hash_ops;
-extern void setup_arch_fast_hash(struct fast_hash_ops *ops);
+#include <linux/cpufeature.h>
+#include <asm/alternative.h>
 
-#endif /* _ASM_X86_HASH_H */
+u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed);
+u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed);
+
+/*
+ * non-inline versions of jhash so gcc does not need to generate
+ * duplicate code in every object file
+ */
+u32 __jhash(const void *data, u32 len, u32 seed);
+u32 __jhash2(const u32 *data, u32 len, u32 seed);
+
+/*
+ * for documentation of these functions please look into
+ * <include/asm-generic/hash.h>
+ */
+
+static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+	u32 hash;
+
+	alternative_call(__jhash, __intel_crc4_2_hash, X86_FEATURE_XMM4_2,
+#ifdef CONFIG_X86_64
+			 "=a" (hash), "D" (data), "S" (len), "d" (seed));
+#else
+			 "=a" (hash), "a" (data), "d" (len), "c" (seed));
+#endif
+	return hash;
+}
+
+static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
+{
+	u32 hash;
+
+	alternative_call(__jhash2, __intel_crc4_2_hash2, X86_FEATURE_XMM4_2,
+#ifdef CONFIG_X86_64
+			 "=a" (hash), "D" (data), "S" (len), "d" (seed));
+#else
+			 "=a" (hash), "a" (data), "d" (len), "c" (seed));
+#endif
+	return hash;
+}
+
+#endif /* __ASM_X86_HASH_H */
diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c
index ff4fa51a5b1f..e14327198835 100644
--- a/arch/x86/lib/hash.c
+++ b/arch/x86/lib/hash.c
@@ -31,13 +31,13 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
-#include <linux/hash.h>
-#include <linux/init.h>
-
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
 #include <asm/hash.h>
 
+#include <linux/hash.h>
+#include <linux/jhash.h>
+
 static inline u32 crc32_u32(u32 crc, u32 val)
 {
 #ifdef CONFIG_AS_CRC32
@@ -48,7 +48,7 @@ static inline u32 crc32_u32(u32 crc, u32 val)
 	return crc;
 }
 
-static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
+u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed)
 {
 	const u32 *p32 = (const u32 *) data;
 	u32 i, tmp = 0;
@@ -71,22 +71,27 @@ static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
 
 	return seed;
 }
+EXPORT_SYMBOL(__intel_crc4_2_hash);
 
-static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
+u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
 {
-	const u32 *p32 = (const u32 *) data;
 	u32 i;
 
 	for (i = 0; i < len; i++)
-		seed = crc32_u32(seed, *p32++);
+		seed = crc32_u32(seed, *data++);
 
 	return seed;
 }
+EXPORT_SYMBOL(__intel_crc4_2_hash2);
 
-void __init setup_arch_fast_hash(struct fast_hash_ops *ops)
+u32 __jhash(const void *data, u32 len, u32 seed)
 {
-	if (cpu_has_xmm4_2) {
-		ops->hash  = intel_crc4_2_hash;
-		ops->hash2 = intel_crc4_2_hash2;
-	}
+	return jhash(data, len, seed);
+}
+EXPORT_SYMBOL(__jhash);
+
+u32 __jhash2(const u32 *data, u32 len, u32 seed)
+{
+	return jhash2(data, len, seed);
 }
+EXPORT_SYMBOL(__jhash2);
diff --git a/include/asm-generic/hash.h b/include/asm-generic/hash.h
index b6312843dbd9..3c82760ff2a4 100644
--- a/include/asm-generic/hash.h
+++ b/include/asm-generic/hash.h
@@ -1,9 +1,41 @@
 #ifndef __ASM_GENERIC_HASH_H
 #define __ASM_GENERIC_HASH_H
 
-struct fast_hash_ops;
-static inline void setup_arch_fast_hash(struct fast_hash_ops *ops)
+#include <linux/jhash.h>
+
+/**
+ *	arch_fast_hash - Caclulates a hash over a given buffer that can have
+ *			 arbitrary size. This function will eventually use an
+ *			 architecture-optimized hashing implementation if
+ *			 available, and trades off distribution for speed.
+ *
+ *	@data: buffer to hash
+ *	@len: length of buffer in bytes
+ *	@seed: start seed
+ *
+ *	Returns 32bit hash.
+ */
+static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+	return jhash(data, len, seed);
+}
+
+/**
+ *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
+ *			  size that is of a multiple of 32bit words. This
+ *			  function will eventually use an architecture-
+ *			  optimized hashing implementation if available,
+ *			  and trades off distribution for speed.
+ *
+ *	@data: buffer to hash (must be 32bit padded)
+ *	@len: number of 32bit words
+ *	@seed: start seed
+ *
+ *	Returns 32bit hash.
+ */
+static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
 {
+	return jhash2(data, len, seed);
 }
 
 #endif /* __ASM_GENERIC_HASH_H */
diff --git a/include/linux/hash.h b/include/linux/hash.h
index d0494c399392..6e8fb028848c 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -84,38 +84,4 @@ static inline u32 hash32_ptr(const void *ptr)
 	return (u32)val;
 }
 
-struct fast_hash_ops {
-	u32 (*hash)(const void *data, u32 len, u32 seed);
-	u32 (*hash2)(const u32 *data, u32 len, u32 seed);
-};
-
-/**
- *	arch_fast_hash - Caclulates a hash over a given buffer that can have
- *			 arbitrary size. This function will eventually use an
- *			 architecture-optimized hashing implementation if
- *			 available, and trades off distribution for speed.
- *
- *	@data: buffer to hash
- *	@len: length of buffer in bytes
- *	@seed: start seed
- *
- *	Returns 32bit hash.
- */
-extern u32 arch_fast_hash(const void *data, u32 len, u32 seed);
-
-/**
- *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
- *			  size that is of a multiple of 32bit words. This
- *			  function will eventually use an architecture-
- *			  optimized hashing implementation if available,
- *			  and trades off distribution for speed.
- *
- *	@data: buffer to hash (must be 32bit padded)
- *	@len: number of 32bit words
- *	@seed: start seed
- *
- *	Returns 32bit hash.
- */
-extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed);
-
 #endif /* _LINUX_HASH_H */
diff --git a/lib/Makefile b/lib/Makefile
index 7512dc978f18..04e53dd16070 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o hash.o rhashtable.o
+	 percpu-refcount.o percpu_ida.o rhashtable.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
diff --git a/lib/hash.c b/lib/hash.c
deleted file mode 100644
index fea973f4bd57..000000000000
--- a/lib/hash.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* General purpose hashing library
- *
- * That's a start of a kernel hashing library, which can be extended
- * with further algorithms in future. arch_fast_hash{2,}() will
- * eventually resolve to an architecture optimized implementation.
- *
- * Copyright 2013 Francesco Fusco <ffusco@redhat.com>
- * Copyright 2013 Daniel Borkmann <dborkman@redhat.com>
- * Copyright 2013 Thomas Graf <tgraf@redhat.com>
- * Licensed under the GNU General Public License, version 2.0 (GPLv2)
- */
-
-#include <linux/jhash.h>
-#include <linux/hash.h>
-#include <linux/cache.h>
-
-static struct fast_hash_ops arch_hash_ops __read_mostly = {
-	.hash  = jhash,
-	.hash2 = jhash2,
-};
-
-u32 arch_fast_hash(const void *data, u32 len, u32 seed)
-{
-	return arch_hash_ops.hash(data, len, seed);
-}
-EXPORT_SYMBOL_GPL(arch_fast_hash);
-
-u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
-{
-	return arch_hash_ops.hash2(data, len, seed);
-}
-EXPORT_SYMBOL_GPL(arch_fast_hash2);
-
-static int __init hashlib_init(void)
-{
-	setup_arch_fast_hash(&arch_hash_ops);
-	return 0;
-}
-early_initcall(hashlib_init);
-- 
cgit v1.2.3


From 1d597e7c266658697843352c5c030e20f48c6230 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:26:26 -0500
Subject: tty: Remove defunct pcxe_open() declaration

pcxe_open() has no definition in mainline; remove declaration.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index f6835ea1079a..97cfb3108e29 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -621,10 +621,6 @@ extern long n_tty_compat_ioctl_helper(struct tty_struct *tty, struct file *file,
 
 extern void serial_console_init(void);
 
-/* pcxx.c */
-
-extern int pcxe_open(struct tty_struct *tty, struct file *filp);
-
 /* vt.c */
 
 extern int vt_ioctl(struct tty_struct *tty,
-- 
cgit v1.2.3


From b9104f5cec3b7d8fb4068bce5ddc8240dc7b81f7 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:26:27 -0500
Subject: tty: Remove defunct serial_console_init() declaration

serial_console_init() is not defined by the tty core; remove
declaration.

Note that the powerpc arch boot code contains a serial_console_init()
declaration in arch/powerpc/boot/ops.h which is restricted to
the powerpc arch boot.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 4 ----
 1 file changed, 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index 97cfb3108e29..c52a689e09aa 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -617,10 +617,6 @@ extern int n_tty_ioctl_helper(struct tty_struct *tty, struct file *file,
 extern long n_tty_compat_ioctl_helper(struct tty_struct *tty, struct file *file,
 		       unsigned int cmd, unsigned long arg);
 
-/* serial.c */
-
-extern void serial_console_init(void);
-
 /* vt.c */
 
 extern int vt_ioctl(struct tty_struct *tty,
-- 
cgit v1.2.3


From 59b93b41e7fa71138734a911b11b044340dd16bd Mon Sep 17 00:00:00 2001
From: Pravin B Shelar <pshelar@nicira.com>
Date: Wed, 5 Nov 2014 15:27:48 -0800
Subject: net: Remove MPLS GSO feature.

Device can export MPLS GSO support in dev->mpls_features same way
it export vlan features in dev->vlan_features. So it is safe to
remove NETIF_F_GSO_MPLS redundant flag.

Signed-off-by: Pravin B Shelar <pshelar@nicira.com>
---
 include/linux/netdev_features.h | 5 +----
 include/linux/netdevice.h       | 1 -
 include/linux/skbuff.h          | 4 +---
 net/core/ethtool.c              | 1 -
 net/ipv4/af_inet.c              | 1 -
 net/ipv4/tcp_offload.c          | 1 -
 net/ipv4/udp_offload.c          | 3 +--
 net/ipv6/ip6_offload.c          | 1 -
 net/ipv6/udp_offload.c          | 3 +--
 net/mpls/mpls_gso.c             | 3 +--
 10 files changed, 5 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 8c94b07e654a..8e30685affeb 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -47,7 +47,6 @@ enum {
 	NETIF_F_GSO_SIT_BIT,		/* ... SIT tunnel with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_BIT,	/* ... UDP TUNNEL with TSO */
 	NETIF_F_GSO_UDP_TUNNEL_CSUM_BIT,/* ... UDP TUNNEL with TSO & CSUM */
-	NETIF_F_GSO_MPLS_BIT,		/* ... MPLS segmentation */
 	NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */
 	/**/NETIF_F_GSO_LAST =		/* last bit, see GSO_MASK */
 		NETIF_F_GSO_TUNNEL_REMCSUM_BIT,
@@ -119,7 +118,6 @@ enum {
 #define NETIF_F_GSO_SIT		__NETIF_F(GSO_SIT)
 #define NETIF_F_GSO_UDP_TUNNEL	__NETIF_F(GSO_UDP_TUNNEL)
 #define NETIF_F_GSO_UDP_TUNNEL_CSUM __NETIF_F(GSO_UDP_TUNNEL_CSUM)
-#define NETIF_F_GSO_MPLS	__NETIF_F(GSO_MPLS)
 #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM)
 #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER)
 #define NETIF_F_HW_VLAN_STAG_RX	__NETIF_F(HW_VLAN_STAG_RX)
@@ -183,7 +181,6 @@ enum {
 				 NETIF_F_GSO_IPIP |			\
 				 NETIF_F_GSO_SIT |			\
 				 NETIF_F_GSO_UDP_TUNNEL |		\
-				 NETIF_F_GSO_UDP_TUNNEL_CSUM |		\
-				 NETIF_F_GSO_MPLS)
+				 NETIF_F_GSO_UDP_TUNNEL_CSUM)
 
 #endif	/* _LINUX_NETDEV_FEATURES_H */
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4767f546d7c0..90ac95900a11 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3583,7 +3583,6 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type)
 	BUILD_BUG_ON(SKB_GSO_SIT     != (NETIF_F_GSO_SIT >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL != (NETIF_F_GSO_UDP_TUNNEL >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_UDP_TUNNEL_CSUM != (NETIF_F_GSO_UDP_TUNNEL_CSUM >> NETIF_F_GSO_SHIFT));
-	BUILD_BUG_ON(SKB_GSO_MPLS    != (NETIF_F_GSO_MPLS >> NETIF_F_GSO_SHIFT));
 	BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT));
 
 	return (features & feature) == feature;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 39ec7530ae27..53f4f6c93356 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -372,9 +372,7 @@ enum {
 
 	SKB_GSO_UDP_TUNNEL_CSUM = 1 << 11,
 
-	SKB_GSO_MPLS = 1 << 12,
-
-	SKB_GSO_TUNNEL_REMCSUM = 1 << 13,
+	SKB_GSO_TUNNEL_REMCSUM = 1 << 12,
 };
 
 #if BITS_PER_LONG > 32
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 06dfb293e5aa..b0f84f5ddda8 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -84,7 +84,6 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_GSO_IPIP_BIT] =	 "tx-ipip-segmentation",
 	[NETIF_F_GSO_SIT_BIT] =		 "tx-sit-segmentation",
 	[NETIF_F_GSO_UDP_TUNNEL_BIT] =	 "tx-udp_tnl-segmentation",
-	[NETIF_F_GSO_MPLS_BIT] =	 "tx-mpls-segmentation",
 
 	[NETIF_F_FCOE_CRC_BIT] =         "tx-checksum-fcoe-crc",
 	[NETIF_F_SCTP_CSUM_BIT] =        "tx-checksum-sctp",
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index ed2c672c5b01..3a096bb2d596 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1223,7 +1223,6 @@ static struct sk_buff *inet_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_UDP_TUNNEL_CSUM |
 		       SKB_GSO_TUNNEL_REMCSUM |
-		       SKB_GSO_MPLS |
 		       0)))
 		goto out;
 
diff --git a/net/ipv4/tcp_offload.c b/net/ipv4/tcp_offload.c
index a1b2a5624f91..9d7930ba8e0f 100644
--- a/net/ipv4/tcp_offload.c
+++ b/net/ipv4/tcp_offload.c
@@ -94,7 +94,6 @@ struct sk_buff *tcp_gso_segment(struct sk_buff *skb,
 			       SKB_GSO_GRE_CSUM |
 			       SKB_GSO_IPIP |
 			       SKB_GSO_SIT |
-			       SKB_GSO_MPLS |
 			       SKB_GSO_UDP_TUNNEL |
 			       SKB_GSO_UDP_TUNNEL_CSUM |
 			       SKB_GSO_TUNNEL_REMCSUM |
diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
index 0a5a70d0e84c..d3e537ef6b7f 100644
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -207,8 +207,7 @@ static struct sk_buff *udp4_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_UDP_TUNNEL_CSUM |
 				      SKB_GSO_TUNNEL_REMCSUM |
 				      SKB_GSO_IPIP |
-				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM |
-				      SKB_GSO_MPLS) ||
+				      SKB_GSO_GRE | SKB_GSO_GRE_CSUM) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c
index e9767079a360..fd76ce938c32 100644
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -79,7 +79,6 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb,
 		       SKB_GSO_UDP_TUNNEL |
 		       SKB_GSO_UDP_TUNNEL_CSUM |
 		       SKB_GSO_TUNNEL_REMCSUM |
-		       SKB_GSO_MPLS |
 		       SKB_GSO_TCPV6 |
 		       0)))
 		goto out;
diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c
index 637ba2e438b7..b6aa8ed18257 100644
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -46,8 +46,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb,
 				      SKB_GSO_GRE |
 				      SKB_GSO_GRE_CSUM |
 				      SKB_GSO_IPIP |
-				      SKB_GSO_SIT |
-				      SKB_GSO_MPLS) ||
+				      SKB_GSO_SIT) ||
 			     !(type & (SKB_GSO_UDP))))
 			goto out;
 
diff --git a/net/mpls/mpls_gso.c b/net/mpls/mpls_gso.c
index e3545f21a099..ca27837974fe 100644
--- a/net/mpls/mpls_gso.c
+++ b/net/mpls/mpls_gso.c
@@ -34,8 +34,7 @@ static struct sk_buff *mpls_gso_segment(struct sk_buff *skb,
 				  SKB_GSO_TCP_ECN |
 				  SKB_GSO_GRE |
 				  SKB_GSO_GRE_CSUM |
-				  SKB_GSO_IPIP |
-				  SKB_GSO_MPLS)))
+				  SKB_GSO_IPIP)))
 		goto out;
 
 	/* Setup inner SKB. */
-- 
cgit v1.2.3


From e979f3b712c8b8ae44bab591427f1647dd25aa0d Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Tue, 21 Oct 2014 15:23:03 -0700
Subject: tty: serial: bcm63xx: Eliminate unnecessary request/release functions

We don't really need to perform the ioremap "on demand" so it's simpler
just to do it from the probe function.  This also lets us eliminate the
UART_REG_SIZE constant and rely on the resource information passed in
from the DT or platform code.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/bcm63xx_uart.c | 30 ++++++++++--------------------
 include/linux/serial_bcm63xx.h    |  2 --
 2 files changed, 10 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/bcm63xx_uart.c b/drivers/tty/serial/bcm63xx_uart.c
index 109dea711318..e04e5805ae6e 100644
--- a/drivers/tty/serial/bcm63xx_uart.c
+++ b/drivers/tty/serial/bcm63xx_uart.c
@@ -588,20 +588,7 @@ static void bcm_uart_set_termios(struct uart_port *port,
  */
 static int bcm_uart_request_port(struct uart_port *port)
 {
-	unsigned int size;
-
-	size = UART_REG_SIZE;
-	if (!request_mem_region(port->mapbase, size, "bcm63xx")) {
-		dev_err(port->dev, "Memory region busy\n");
-		return -EBUSY;
-	}
-
-	port->membase = ioremap(port->mapbase, size);
-	if (!port->membase) {
-		dev_err(port->dev, "Unable to map registers\n");
-		release_mem_region(port->mapbase, size);
-		return -EBUSY;
-	}
+	/* UARTs always present */
 	return 0;
 }
 
@@ -610,8 +597,7 @@ static int bcm_uart_request_port(struct uart_port *port)
  */
 static void bcm_uart_release_port(struct uart_port *port)
 {
-	release_mem_region(port->mapbase, UART_REG_SIZE);
-	iounmap(port->membase);
+	/* Nothing to release ... */
 }
 
 /*
@@ -833,13 +819,20 @@ static int bcm_uart_probe(struct platform_device *pdev)
 	if (pdev->id < 0 || pdev->id >= BCM63XX_NR_UARTS)
 		return -EINVAL;
 
-	if (ports[pdev->id].membase)
+	port = &ports[pdev->id];
+	if (port->membase)
 		return -EBUSY;
+	memset(port, 0, sizeof(*port));
 
 	res_mem = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res_mem)
 		return -ENODEV;
 
+	port->mapbase = res_mem->start;
+	port->membase = devm_ioremap_resource(&pdev->dev, res_mem);
+	if (IS_ERR(port->membase))
+		return PTR_ERR(port->membase);
+
 	res_irq = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
 	if (!res_irq)
 		return -ENODEV;
@@ -849,10 +842,7 @@ static int bcm_uart_probe(struct platform_device *pdev)
 	if (IS_ERR(clk))
 		return -ENODEV;
 
-	port = &ports[pdev->id];
-	memset(port, 0, sizeof(*port));
 	port->iotype = UPIO_MEM;
-	port->mapbase = res_mem->start;
 	port->irq = res_irq->start;
 	port->ops = &bcm_uart_ops;
 	port->flags = UPF_BOOT_AUTOCONF;
diff --git a/include/linux/serial_bcm63xx.h b/include/linux/serial_bcm63xx.h
index a80aa1a5bee2..570e964dc899 100644
--- a/include/linux/serial_bcm63xx.h
+++ b/include/linux/serial_bcm63xx.h
@@ -116,6 +116,4 @@
 					UART_FIFO_PARERR_MASK |		\
 					UART_FIFO_BRKDET_MASK)
 
-#define UART_REG_SIZE			24
-
 #endif /* _LINUX_SERIAL_BCM63XX_H */
-- 
cgit v1.2.3


From 413ba6385382bc80e4bf2f58efa900e82e810b11 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 12:40:04 -0500
Subject: tty: Convert tty->closing to int

tty->closing is a bitfield member; prevent corruption from non-atomic
update by assigning a unique memory location.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/tty.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/tty.h b/include/linux/tty.h
index c52a689e09aa..7d66ae508e5c 100644
--- a/include/linux/tty.h
+++ b/include/linux/tty.h
@@ -284,7 +284,7 @@ struct tty_struct {
 
 #define N_TTY_BUF_SIZE 4096
 
-	unsigned char closing:1;
+	int closing;
 	unsigned char *write_buf;
 	int write_cnt;
 	/* If the tty has a pending do_SAK, queue it here - akpm */
-- 
cgit v1.2.3


From 732a84a037a4de29b54e0b4e6cb6f9b3813e29e5 Mon Sep 17 00:00:00 2001
From: Peter Hurley <peter@hurleysoftware.com>
Date: Wed, 5 Nov 2014 13:11:43 -0500
Subject: serial: core: Pass termios to set_ldisc() notifications

UART drivers which enable modem status interrupts when switching
to N_PPS line discipline need to determine if modem status
interrupts should be disabled when switching from N_PPS.
Specifically, the set_ldisc() notification needs to evaluate
UART_ENABLE_MS() which requires termios->c_cflag.

Convert in-tree UART drivers to new interface.

Signed-off-by: Peter Hurley <peter@hurleysoftware.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 4 ++--
 drivers/tty/serial/amba-pl010.c     | 4 ++--
 drivers/tty/serial/atmel_serial.c   | 4 ++--
 drivers/tty/serial/bfin_uart.c      | 5 +++--
 drivers/tty/serial/clps711x.c       | 5 +++--
 drivers/tty/serial/serial_core.c    | 2 +-
 include/linux/serial_core.h         | 2 +-
 7 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 223503299e99..116b5a75f1af 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -2609,9 +2609,9 @@ serial8250_set_termios(struct uart_port *port, struct ktermios *termios,
 }
 
 static void
-serial8250_set_ldisc(struct uart_port *port, int new)
+serial8250_set_ldisc(struct uart_port *port, struct ktermios *termios)
 {
-	if (new == N_PPS) {
+	if (termios->c_line == N_PPS) {
 		port->flags |= UPF_HARDPPS_CD;
 		serial8250_enable_ms(port);
 	} else
diff --git a/drivers/tty/serial/amba-pl010.c b/drivers/tty/serial/amba-pl010.c
index 00ae8dcabc9b..230bc7a290d5 100644
--- a/drivers/tty/serial/amba-pl010.c
+++ b/drivers/tty/serial/amba-pl010.c
@@ -479,9 +479,9 @@ pl010_set_termios(struct uart_port *port, struct ktermios *termios,
 	spin_unlock_irqrestore(&uap->port.lock, flags);
 }
 
-static void pl010_set_ldisc(struct uart_port *port, int new)
+static void pl010_set_ldisc(struct uart_port *port, struct ktermios *termios)
 {
-	if (new == N_PPS) {
+	if (termios->c_line == N_PPS) {
 		port->flags |= UPF_HARDPPS_CD;
 		pl010_enable_ms(port);
 	} else
diff --git a/drivers/tty/serial/atmel_serial.c b/drivers/tty/serial/atmel_serial.c
index b9987109b2da..6a1d960e2e82 100644
--- a/drivers/tty/serial/atmel_serial.c
+++ b/drivers/tty/serial/atmel_serial.c
@@ -2056,9 +2056,9 @@ static void atmel_set_termios(struct uart_port *port, struct ktermios *termios,
 	spin_unlock_irqrestore(&port->lock, flags);
 }
 
-static void atmel_set_ldisc(struct uart_port *port, int new)
+static void atmel_set_ldisc(struct uart_port *port, struct ktermios *termios)
 {
-	if (new == N_PPS) {
+	if (termios->c_line == N_PPS) {
 		port->flags |= UPF_HARDPPS_CD;
 		atmel_enable_ms(port);
 	} else {
diff --git a/drivers/tty/serial/bfin_uart.c b/drivers/tty/serial/bfin_uart.c
index 7da9911e95f0..44b27ec32341 100644
--- a/drivers/tty/serial/bfin_uart.c
+++ b/drivers/tty/serial/bfin_uart.c
@@ -944,12 +944,13 @@ bfin_serial_verify_port(struct uart_port *port, struct serial_struct *ser)
  * Enable the IrDA function if tty->ldisc.num is N_IRDA.
  * In other cases, disable IrDA function.
  */
-static void bfin_serial_set_ldisc(struct uart_port *port, int ld)
+static void bfin_serial_set_ldisc(struct uart_port *port,
+				  struct ktermios *termios)
 {
 	struct bfin_serial_port *uart = (struct bfin_serial_port *)port;
 	unsigned int val;
 
-	switch (ld) {
+	switch (termios->c_line) {
 	case N_IRDA:
 		val = UART_GET_GCTL(uart);
 		val |= (UMOD_IRDA | RPOLC);
diff --git a/drivers/tty/serial/clps711x.c b/drivers/tty/serial/clps711x.c
index acfe31773643..f963c4c48085 100644
--- a/drivers/tty/serial/clps711x.c
+++ b/drivers/tty/serial/clps711x.c
@@ -225,13 +225,14 @@ static void uart_clps711x_break_ctl(struct uart_port *port, int break_state)
 	writel(ubrlcr, port->membase + UBRLCR_OFFSET);
 }
 
-static void uart_clps711x_set_ldisc(struct uart_port *port, int ld)
+static void uart_clps711x_set_ldisc(struct uart_port *port,
+				    struct ktermios *termios)
 {
 	if (!port->line) {
 		struct clps711x_port *s = dev_get_drvdata(port->dev);
 
 		regmap_update_bits(s->syscon, SYSCON_OFFSET, SYSCON1_SIREN,
-				   (ld == N_IRDA) ? SYSCON1_SIREN : 0);
+				   (termios->c_line == N_IRDA) ? SYSCON1_SIREN : 0);
 	}
 }
 
diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 5209eaaa7225..c5fb08cfbdb1 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1247,7 +1247,7 @@ static void uart_set_ldisc(struct tty_struct *tty)
 
 	if (uport->ops->set_ldisc) {
 		mutex_lock(&state->port.mutex);
-		uport->ops->set_ldisc(uport, tty->termios.c_line);
+		uport->ops->set_ldisc(uport, &tty->termios);
 		mutex_unlock(&state->port.mutex);
 	}
 }
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index ad9329669aba..40b4cc4f8e1d 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -63,7 +63,7 @@ struct uart_ops {
 	void		(*flush_buffer)(struct uart_port *);
 	void		(*set_termios)(struct uart_port *, struct ktermios *new,
 				       struct ktermios *old);
-	void		(*set_ldisc)(struct uart_port *, int new);
+	void		(*set_ldisc)(struct uart_port *, struct ktermios *);
 	void		(*pm)(struct uart_port *, unsigned int state,
 			      unsigned int oldstate);
 
-- 
cgit v1.2.3


From a5f276f10ff70da89b349df445e944c8cd87956c Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Thu, 6 Nov 2014 22:46:13 +0100
Subject: serial_core: Handle TIOC[GS]RS485 ioctls.

The following drivers: 8250_core, atmel_serial, max310x, mcf, omap-serial
and sci16is7xx implement code to handle RS485 ioctls.

In order to avoid code duplication, we implement a simple ioctl handler
on the serial_core layer.

This handler can be used by all the other drivers instead of duplicating
code.

Until this is the only RS485 ioctl handler, it will try first the
rs485_config callback and if it is not present it will call the driver
specific ioctl.

Reviewed-by: Alan Cox <alan@linux.intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jiri Slaby <jslaby@suse.cz>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial_core.c | 45 ++++++++++++++++++++++++++++++++++++++++
 include/linux/serial_core.h      |  3 +++
 2 files changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index c5fb08cfbdb1..ab4db1dcc474 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -1152,6 +1152,39 @@ static int uart_get_icount(struct tty_struct *tty,
 	return 0;
 }
 
+static int uart_get_rs485_config(struct uart_port *port,
+			 struct serial_rs485 __user *rs485)
+{
+	if (!port->rs485_config)
+		return -ENOIOCTLCMD;
+
+	if (copy_to_user(rs485, &port->rs485, sizeof(port->rs485)))
+		return -EFAULT;
+	return 0;
+}
+
+static int uart_set_rs485_config(struct uart_port *port,
+			 struct serial_rs485 __user *rs485_user)
+{
+	struct serial_rs485 rs485;
+	int ret;
+
+	if (!port->rs485_config)
+		return -ENOIOCTLCMD;
+
+	if (copy_from_user(&rs485, rs485_user, sizeof(*rs485_user)))
+		return -EFAULT;
+
+	ret = port->rs485_config(port, &rs485);
+	if (ret)
+		return ret;
+
+	if (copy_to_user(rs485_user, &port->rs485, sizeof(port->rs485)))
+		return -EFAULT;
+
+	return 0;
+}
+
 /*
  * Called via sys_ioctl.  We can use spin_lock_irq() here.
  */
@@ -1223,6 +1256,18 @@ uart_ioctl(struct tty_struct *tty, unsigned int cmd,
 	 * protected against the tty being hung up.
 	 */
 	switch (cmd) {
+	case TIOCGRS485:
+		ret = uart_get_rs485_config(state->uart_port, uarg);
+		break;
+
+	case TIOCSRS485:
+		ret = uart_set_rs485_config(state->uart_port, uarg);
+		break;
+	}
+	if (ret != -ENOIOCTLCMD)
+		goto out;
+
+	switch (cmd) {
 	case TIOCSERGETLSR: /* Get line status register */
 		ret = uart_get_lsr_info(tty, state, uarg);
 		break;
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 40b4cc4f8e1d..3231a43f6acf 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -131,6 +131,8 @@ struct uart_port {
 	void			(*pm)(struct uart_port *, unsigned int state,
 				      unsigned int old);
 	void			(*handle_break)(struct uart_port *);
+	int			(*rs485_config)(struct uart_port *,
+						struct serial_rs485 *rs485);
 	unsigned int		irq;			/* irq number */
 	unsigned long		irqflags;		/* irq flags  */
 	unsigned int		uartclk;		/* base uart clock */
@@ -231,6 +233,7 @@ struct uart_port {
 	unsigned char		unused[2];
 	struct attribute_group	*attr_group;		/* port specific attributes */
 	const struct attribute_group **tty_groups;	/* all attributes (serial core use only) */
+	struct serial_rs485     rs485;
 	void			*private_data;		/* generic platform data pointer */
 };
 
-- 
cgit v1.2.3


From 039ec1f010e6b058f497381d5a6bb840e160b4ac Mon Sep 17 00:00:00 2001
From: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Date: Thu, 6 Nov 2014 09:22:53 +0100
Subject: serial/8250: Remove obsolete handling of rs485 ioctls

There is no more users for this functions. All the 8250 drivers are
using the rs485 handler on serial_core instead.

Reviewed-by: Alan Cox <alan@linux.intel.com>
Cc: Jiri Slaby <jslaby@suse.cz>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Cc: Alan Cox <alan@linux.intel.com>
Cc: Tony Lindgren <tony@atomide.com>
Cc: Peter Hurley <peter@hurleysoftware.com>
Cc: Yoshihiro YUNOMAE <yoshihiro.yunomae.ez@hitachi.com>
Cc: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Cc: Ingo Molnar <mingo@elte.hu>
Signed-off-by: Ricardo Ribalda Delgado <ricardo.ribalda@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/8250/8250_core.c | 39 -------------------------------------
 include/linux/serial_8250.h         |  3 ---
 2 files changed, 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/8250/8250_core.c b/drivers/tty/serial/8250/8250_core.c
index 3f2c6576ab4e..30522d680014 100644
--- a/drivers/tty/serial/8250/8250_core.c
+++ b/drivers/tty/serial/8250/8250_core.c
@@ -3005,42 +3005,6 @@ serial8250_verify_port(struct uart_port *port, struct serial_struct *ser)
 	return 0;
 }
 
-static int serial8250_ioctl(struct uart_port *port, unsigned int cmd,
-			   unsigned long arg)
-{
-	struct uart_8250_port *up =
-		container_of(port, struct uart_8250_port, port);
-	int ret;
-	struct serial_rs485 rs485_config;
-
-	if (!up->rs485_config)
-		return -ENOIOCTLCMD;
-
-	switch (cmd) {
-	case TIOCSRS485:
-		if (copy_from_user(&rs485_config, (void __user *)arg,
-				   sizeof(rs485_config)))
-			return -EFAULT;
-
-		ret = up->rs485_config(up, &rs485_config);
-		if (ret)
-			return ret;
-
-		memcpy(&up->rs485, &rs485_config, sizeof(rs485_config));
-
-		return 0;
-	case TIOCGRS485:
-		if (copy_to_user((void __user *)arg, &up->rs485,
-				 sizeof(up->rs485)))
-			return -EFAULT;
-		return 0;
-	default:
-		break;
-	}
-
-	return -ENOIOCTLCMD;
-}
-
 static const char *
 serial8250_type(struct uart_port *port)
 {
@@ -3072,7 +3036,6 @@ static struct uart_ops serial8250_pops = {
 	.request_port	= serial8250_request_port,
 	.config_port	= serial8250_config_port,
 	.verify_port	= serial8250_verify_port,
-	.ioctl		= serial8250_ioctl,
 #ifdef CONFIG_CONSOLE_POLL
 	.poll_get_char = serial8250_get_poll_char,
 	.poll_put_char = serial8250_put_poll_char,
@@ -3615,8 +3578,6 @@ int serial8250_register_8250_port(struct uart_8250_port *up)
 		uart->port.fifosize	= up->port.fifosize;
 		uart->tx_loadsz		= up->tx_loadsz;
 		uart->capabilities	= up->capabilities;
-		uart->rs485_config	= up->rs485_config;
-		uart->rs485		= up->rs485;
 		uart->port.throttle	= up->port.throttle;
 		uart->port.unthrottle	= up->port.unthrottle;
 		uart->port.rs485_config	= up->port.rs485_config;
diff --git a/include/linux/serial_8250.h b/include/linux/serial_8250.h
index 3df10d5f154b..e02acf0a0ec9 100644
--- a/include/linux/serial_8250.h
+++ b/include/linux/serial_8250.h
@@ -97,13 +97,10 @@ struct uart_8250_port {
 	unsigned char		msr_saved_flags;
 
 	struct uart_8250_dma	*dma;
-	struct serial_rs485     rs485;
 
 	/* 8250 specific callbacks */
 	int			(*dl_read)(struct uart_8250_port *);
 	void			(*dl_write)(struct uart_8250_port *, int);
-	int			(*rs485_config)(struct uart_8250_port *,
-						struct serial_rs485 *rs485);
 };
 
 static inline struct uart_8250_port *up_to_u8250p(struct uart_port *up)
-- 
cgit v1.2.3


From c3b50dc219e1437e4dcb6a1639b004648dc29faa Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 28 Oct 2014 17:40:41 +0100
Subject: core: platform: let platform_driver_probe initialize module owner

Since commit 9447057eaff8 ("platform_device: use a macro instead of
platform_driver_register"), platform_driver_register() always overwrites
the .owner field of a platform_driver with THIS_MODULE. This breaks
platform_driver_probe() which uses it from within the platform core
instead of the module init. Fix it by using a similar #define construct
to obtain THIS_MODULE and pass it on later.

Reported-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c         | 11 ++++++-----
 include/linux/platform_device.h |  6 ++++--
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index b2afc29403f9..c87a63326871 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -580,9 +580,10 @@ void platform_driver_unregister(struct platform_driver *drv)
 EXPORT_SYMBOL_GPL(platform_driver_unregister);
 
 /**
- * platform_driver_probe - register driver for non-hotpluggable device
+ * __platform_driver_probe - register driver for non-hotpluggable device
  * @drv: platform driver structure
  * @probe: the driver probe routine, probably from an __init section
+ * @module: module which will be the owner of the driver
  *
  * Use this instead of platform_driver_register() when you know the device
  * is not hotpluggable and has already been registered, and you want to
@@ -598,8 +599,8 @@ EXPORT_SYMBOL_GPL(platform_driver_unregister);
  * Returns zero if the driver registered and bound to a device, else returns
  * a negative error code and with the driver not registered.
  */
-int __init_or_module platform_driver_probe(struct platform_driver *drv,
-		int (*probe)(struct platform_device *))
+int __init_or_module __platform_driver_probe(struct platform_driver *drv,
+		int (*probe)(struct platform_device *), struct module *module)
 {
 	int retval, code;
 
@@ -614,7 +615,7 @@ int __init_or_module platform_driver_probe(struct platform_driver *drv,
 
 	/* temporary section violation during probe() */
 	drv->probe = probe;
-	retval = code = platform_driver_register(drv);
+	retval = code = __platform_driver_register(drv, module);
 
 	/*
 	 * Fixup that section violation, being paranoid about code scanning
@@ -633,7 +634,7 @@ int __init_or_module platform_driver_probe(struct platform_driver *drv,
 		platform_driver_unregister(drv);
 	return retval;
 }
-EXPORT_SYMBOL_GPL(platform_driver_probe);
+EXPORT_SYMBOL_GPL(__platform_driver_probe);
 
 /**
  * platform_create_bundle - register driver and create corresponding device
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index 153d303af7eb..c8d95c60da19 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -197,8 +197,10 @@ extern void platform_driver_unregister(struct platform_driver *);
 /* non-hotpluggable platform devices may use this so that probe() and
  * its support may live in __init sections, conserving runtime memory.
  */
-extern int platform_driver_probe(struct platform_driver *driver,
-		int (*probe)(struct platform_device *));
+#define platform_driver_probe(drv, probe) \
+	__platform_driver_probe(drv, probe, THIS_MODULE)
+extern int __platform_driver_probe(struct platform_driver *driver,
+		int (*probe)(struct platform_device *), struct module *module);
 
 static inline void *platform_get_drvdata(const struct platform_device *pdev)
 {
-- 
cgit v1.2.3


From 291f653a140ad880426125e5e9dbb70f4c184683 Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa@the-dreams.de>
Date: Tue, 28 Oct 2014 17:40:42 +0100
Subject: core: platform: let platform_create_bundle initialize module owner

Since commit 9447057eaff8 ("platform_device: use a macro instead of
platform_driver_register"), platform_driver_register() always overwrites
the .owner field of a platform_driver with THIS_MODULE. This breaks
platform_create_bundle() which uses it via platform_driver_probe() from
within the platform core instead of the module init. Fix it by using a
similar #define construct to obtain THIS_MODULE and pass it on later.

Reported-by: Russell King <rmk+kernel@arm.linux.org.uk>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c         | 11 ++++++-----
 include/linux/platform_device.h |  6 ++++--
 2 files changed, 10 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index c87a63326871..cdb6c076c3f7 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -637,24 +637,25 @@ int __init_or_module __platform_driver_probe(struct platform_driver *drv,
 EXPORT_SYMBOL_GPL(__platform_driver_probe);
 
 /**
- * platform_create_bundle - register driver and create corresponding device
+ * __platform_create_bundle - register driver and create corresponding device
  * @driver: platform driver structure
  * @probe: the driver probe routine, probably from an __init section
  * @res: set of resources that needs to be allocated for the device
  * @n_res: number of resources
  * @data: platform specific data for this platform device
  * @size: size of platform specific data
+ * @module: module which will be the owner of the driver
  *
  * Use this in legacy-style modules that probe hardware directly and
  * register a single platform device and corresponding platform driver.
  *
  * Returns &struct platform_device pointer on success, or ERR_PTR() on error.
  */
-struct platform_device * __init_or_module platform_create_bundle(
+struct platform_device * __init_or_module __platform_create_bundle(
 			struct platform_driver *driver,
 			int (*probe)(struct platform_device *),
 			struct resource *res, unsigned int n_res,
-			const void *data, size_t size)
+			const void *data, size_t size, struct module *module)
 {
 	struct platform_device *pdev;
 	int error;
@@ -677,7 +678,7 @@ struct platform_device * __init_or_module platform_create_bundle(
 	if (error)
 		goto err_pdev_put;
 
-	error = platform_driver_probe(driver, probe);
+	error = __platform_driver_probe(driver, probe, module);
 	if (error)
 		goto err_pdev_del;
 
@@ -690,7 +691,7 @@ err_pdev_put:
 err_out:
 	return ERR_PTR(error);
 }
-EXPORT_SYMBOL_GPL(platform_create_bundle);
+EXPORT_SYMBOL_GPL(__platform_create_bundle);
 
 /* modalias support enables more hands-off userspace setup:
  * (a) environment variable lets new-style hotplug events work once system is
diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h
index c8d95c60da19..ae4882ca4a64 100644
--- a/include/linux/platform_device.h
+++ b/include/linux/platform_device.h
@@ -240,10 +240,12 @@ static void __exit __platform_driver##_exit(void) \
 } \
 module_exit(__platform_driver##_exit);
 
-extern struct platform_device *platform_create_bundle(
+#define platform_create_bundle(driver, probe, res, n_res, data, size) \
+	__platform_create_bundle(driver, probe, res, n_res, data, size, THIS_MODULE)
+extern struct platform_device *__platform_create_bundle(
 	struct platform_driver *driver, int (*probe)(struct platform_device *),
 	struct resource *res, unsigned int n_res,
-	const void *data, size_t size);
+	const void *data, size_t size, struct module *module);
 
 /* early platform driver interface */
 struct early_platform_driver {
-- 
cgit v1.2.3


From 38737d82f9f0168955f9944c3f8bd3bb262c7e88 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Mon, 27 Oct 2014 10:44:36 +0800
Subject: PCI/MSI: Add pci_msi_ignore_mask to prevent writes to MSI/MSI-X Mask
 Bits

MSI-X vector Mask Bits are in MSI-X Tables in PCI memory space.  Xen PV
guests can't write to those tables.  MSI vector Mask Bits are in PCI
configuration space.  Xen PV guests can write to config space, but those
writes are ignored.

Commit 0e4ccb1505a9 ("PCI: Add x86_msi.msi_mask_irq() and
msix_mask_irq()") added a way to override default_mask_msi_irqs() and
default_mask_msix_irqs() so they can be no-ops in Xen guests, but this is
more complicated than necessary.

Add "pci_msi_ignore_mask" in the core PCI MSI code.  If set,
default_mask_msi_irqs() and default_mask_msix_irqs() return without doing
anything.  This is less flexible, but much simpler.

[bhelgaas: changelog]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Reviewed-by: David Vrabel <david.vrabel@citrix.com>
CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: xen-devel@lists.xenproject.org
---
 arch/x86/pci/xen.c  | 2 ++
 drivers/pci/msi.c   | 7 ++++++-
 include/linux/msi.h | 1 +
 3 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 093f5f4272d3..5ef62ed20ba4 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -427,6 +427,7 @@ int __init pci_xen_init(void)
 	x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
 	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
 	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
+	pci_msi_ignore_mask = 1;
 #endif
 	return 0;
 }
@@ -508,6 +509,7 @@ int __init pci_xen_initial_domain(void)
 	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
 	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
 	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
+	pci_msi_ignore_mask = 1;
 #endif
 	xen_setup_acpi_sci();
 	__acpi_register_gsi = acpi_register_gsi_xen;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 9fab30af0e75..066c2fb9763a 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -23,6 +23,7 @@
 #include "pci.h"
 
 static int pci_msi_enable = 1;
+int pci_msi_ignore_mask;
 
 #define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)
 
@@ -167,7 +168,7 @@ u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 
-	if (!desc->msi_attrib.maskbit)
+	if (pci_msi_ignore_mask || !desc->msi_attrib.maskbit)
 		return 0;
 
 	mask_bits &= ~mask;
@@ -199,6 +200,10 @@ u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
 	u32 mask_bits = desc->masked;
 	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
 						PCI_MSIX_ENTRY_VECTOR_CTRL;
+
+	if (pci_msi_ignore_mask)
+		return 0;
+
 	mask_bits &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
 	if (flag)
 		mask_bits |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 44f4746d033b..86dc501a1534 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -10,6 +10,7 @@ struct msi_msg {
 	u32	data;		/* 16 bits of msi message data */
 };
 
+extern int pci_msi_ignore_mask;
 /* Helper functions */
 struct irq_data;
 struct msi_desc;
-- 
cgit v1.2.3


From 245bd6f6af8a62a2e2e14976e5ef3dc2b82ec153 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 6 Nov 2014 15:51:00 +0200
Subject: PM / clock_ops: Add pm_clk_add_clk()

The existing pm_clk_add() allows to pass a clock by con_id. However,
when referring to a specific clock from DT, no con_id is available.

Add pm_clk_add_clk(), which allows to specify the struct clk * directly.
The will will increment refcount on clock pointer, so the caller has
to use clk_put() on clock pointer when done.

Reviewed-by: Santosh Shilimkar <santosh.shilimkar@ti.com>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reviewed-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/clock_ops.c | 47 +++++++++++++++++++++++++++++++++---------
 include/linux/pm_clock.h       |  8 +++++++
 2 files changed, 45 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index 78369305e069..f061eaa48bb5 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -12,6 +12,7 @@
 #include <linux/pm.h>
 #include <linux/pm_clock.h>
 #include <linux/clk.h>
+#include <linux/clkdev.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 
@@ -53,7 +54,8 @@ static inline int __pm_clk_enable(struct device *dev, struct clk *clk)
  */
 static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce)
 {
-	ce->clk = clk_get(dev, ce->con_id);
+	if (!ce->clk)
+		ce->clk = clk_get(dev, ce->con_id);
 	if (IS_ERR(ce->clk)) {
 		ce->status = PCE_STATUS_ERROR;
 	} else {
@@ -63,15 +65,8 @@ static void pm_clk_acquire(struct device *dev, struct pm_clock_entry *ce)
 	}
 }
 
-/**
- * pm_clk_add - Start using a device clock for power management.
- * @dev: Device whose clock is going to be used for power management.
- * @con_id: Connection ID of the clock.
- *
- * Add the clock represented by @con_id to the list of clocks used for
- * the power management of @dev.
- */
-int pm_clk_add(struct device *dev, const char *con_id)
+static int __pm_clk_add(struct device *dev, const char *con_id,
+			struct clk *clk)
 {
 	struct pm_subsys_data *psd = dev_to_psd(dev);
 	struct pm_clock_entry *ce;
@@ -93,6 +88,12 @@ int pm_clk_add(struct device *dev, const char *con_id)
 			kfree(ce);
 			return -ENOMEM;
 		}
+	} else {
+		if (IS_ERR(ce->clk) || !__clk_get(clk)) {
+			kfree(ce);
+			return -ENOENT;
+		}
+		ce->clk = clk;
 	}
 
 	pm_clk_acquire(dev, ce);
@@ -103,6 +104,32 @@ int pm_clk_add(struct device *dev, const char *con_id)
 	return 0;
 }
 
+/**
+ * pm_clk_add - Start using a device clock for power management.
+ * @dev: Device whose clock is going to be used for power management.
+ * @con_id: Connection ID of the clock.
+ *
+ * Add the clock represented by @con_id to the list of clocks used for
+ * the power management of @dev.
+ */
+int pm_clk_add(struct device *dev, const char *con_id)
+{
+	return __pm_clk_add(dev, con_id, NULL);
+}
+
+/**
+ * pm_clk_add_clk - Start using a device clock for power management.
+ * @dev: Device whose clock is going to be used for power management.
+ * @clk: Clock pointer
+ *
+ * Add the clock to the list of clocks used for the power management of @dev.
+ * It will increment refcount on clock pointer, use clk_put() on it when done.
+ */
+int pm_clk_add_clk(struct device *dev, struct clk *clk)
+{
+	return __pm_clk_add(dev, NULL, clk);
+}
+
 /**
  * __pm_clk_remove - Destroy PM clock entry.
  * @ce: PM clock entry to destroy.
diff --git a/include/linux/pm_clock.h b/include/linux/pm_clock.h
index 8348866e7b05..0b0039634410 100644
--- a/include/linux/pm_clock.h
+++ b/include/linux/pm_clock.h
@@ -18,6 +18,8 @@ struct pm_clk_notifier_block {
 	char *con_ids[];
 };
 
+struct clk;
+
 #ifdef CONFIG_PM_CLK
 static inline bool pm_clk_no_clocks(struct device *dev)
 {
@@ -29,6 +31,7 @@ extern void pm_clk_init(struct device *dev);
 extern int pm_clk_create(struct device *dev);
 extern void pm_clk_destroy(struct device *dev);
 extern int pm_clk_add(struct device *dev, const char *con_id);
+extern int pm_clk_add_clk(struct device *dev, struct clk *clk);
 extern void pm_clk_remove(struct device *dev, const char *con_id);
 extern int pm_clk_suspend(struct device *dev);
 extern int pm_clk_resume(struct device *dev);
@@ -51,6 +54,11 @@ static inline int pm_clk_add(struct device *dev, const char *con_id)
 {
 	return -EINVAL;
 }
+
+static inline int pm_clk_add_clk(struct device *dev, struct clk *clk)
+{
+	return -EINVAL;
+}
 static inline void pm_clk_remove(struct device *dev, const char *con_id)
 {
 }
-- 
cgit v1.2.3


From b07597367001c2c4f36a97863530f71b84060d3d Mon Sep 17 00:00:00 2001
From: Padmavathi Venna <padma.v@samsung.com>
Date: Fri, 7 Nov 2014 12:24:39 +0530
Subject: ASoC: Samsung: Add quirk for internal DMA

Internal DMA is available only on some of Samsung platforms.
So added a quirk for the same and made it optional.

Signed-off-by: Padmavathi Venna <padma.v@samsung.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/platform_data/asoc-s3c.h |  1 +
 sound/soc/samsung/i2s.c                | 12 ++++++------
 2 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/platform_data/asoc-s3c.h b/include/linux/platform_data/asoc-s3c.h
index a6591c693ebb..5e0bc779e6c5 100644
--- a/include/linux/platform_data/asoc-s3c.h
+++ b/include/linux/platform_data/asoc-s3c.h
@@ -27,6 +27,7 @@ struct samsung_i2s {
 #define QUIRK_NO_MUXPSR		(1 << 2)
 #define QUIRK_NEED_RSTCLR	(1 << 3)
 #define QUIRK_SUPPORTS_TDM	(1 << 4)
+#define QUIRK_SUPPORTS_IDMA	(1 << 5)
 	/* Quirks of the I2S controller */
 	u32 quirks;
 	dma_addr_t idma_addr;
diff --git a/sound/soc/samsung/i2s.c b/sound/soc/samsung/i2s.c
index 9d513473b300..38b9a524cc9f 100644
--- a/sound/soc/samsung/i2s.c
+++ b/sound/soc/samsung/i2s.c
@@ -987,7 +987,7 @@ static int samsung_i2s_dai_probe(struct snd_soc_dai *dai)
 	if (i2s->quirks & QUIRK_NEED_RSTCLR)
 		writel(CON_RSTCLR, i2s->addr + I2SCON);
 
-	if (i2s->quirks & QUIRK_SEC_DAI)
+	if (i2s->quirks & QUIRK_SUPPORTS_IDMA)
 		idma_reg_addr_init(i2s->addr,
 					i2s->sec_dai->idma_playback.dma_addr);
 
@@ -1199,10 +1199,9 @@ static int samsung_i2s_probe(struct platform_device *pdev)
 		quirks = i2s_dai_data->quirks;
 		if (of_property_read_u32(np, "samsung,idma-addr",
 					 &idma_addr)) {
-			if (quirks & QUIRK_SEC_DAI) {
-				dev_err(&pdev->dev, "idma address is not"\
+			if (quirks & QUIRK_SUPPORTS_IDMA) {
+				dev_info(&pdev->dev, "idma address is not"\
 						"specified");
-				return -EINVAL;
 			}
 		}
 	}
@@ -1309,13 +1308,14 @@ static const struct samsung_i2s_dai_data i2sv3_dai_type = {
 
 static const struct samsung_i2s_dai_data i2sv5_dai_type = {
 	.dai_type = TYPE_PRI,
-	.quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI | QUIRK_NEED_RSTCLR,
+	.quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI | QUIRK_NEED_RSTCLR |
+			QUIRK_SUPPORTS_IDMA,
 };
 
 static const struct samsung_i2s_dai_data i2sv6_dai_type = {
 	.dai_type = TYPE_PRI,
 	.quirks = QUIRK_PRI_6CHAN | QUIRK_SEC_DAI | QUIRK_NEED_RSTCLR |
-			QUIRK_SUPPORTS_TDM,
+			QUIRK_SUPPORTS_TDM | QUIRK_SUPPORTS_IDMA,
 };
 
 static const struct samsung_i2s_dai_data samsung_dai_type_pri = {
-- 
cgit v1.2.3


From b9ec1c9da64f0c1d130beb125a916d0725363ec9 Mon Sep 17 00:00:00 2001
From: Chris Rorvick <chris@rorvick.com>
Date: Tue, 4 Nov 2014 19:18:45 -0600
Subject: usb: Create separate header for ehci-dbgp

The FUSBH200 and FOTG210 controllers implement sufficiently EHCI-
compatible debug ports to leverage ehci-dbgp from their respective
drivers.  Rather than including <linux/usb/ehci_def.h> header, though,
they replicate the necessary declarations in their own headers.  Move
the ehci-dbgp stuff into its own header as a first step towards removing
this redundancy.

Signed-off-by: Chris Rorvick <chris@rorvick.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/ehci-dbgp.h | 84 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/usb/ehci_def.h  | 65 ++-------------------------------
 2 files changed, 86 insertions(+), 63 deletions(-)
 create mode 100644 include/linux/usb/ehci-dbgp.h

(limited to 'include/linux')

diff --git a/include/linux/usb/ehci-dbgp.h b/include/linux/usb/ehci-dbgp.h
new file mode 100644
index 000000000000..796c1cd6f26a
--- /dev/null
+++ b/include/linux/usb/ehci-dbgp.h
@@ -0,0 +1,84 @@
+/*
+ * Standalone EHCI usb debug driver
+ *
+ * Originally written by:
+ *  Eric W. Biederman" <ebiederm@xmission.com> and
+ *  Yinghai Lu <yhlu.kernel@gmail.com>
+ *
+ * Changes for early/late printk and HW errata:
+ *  Jason Wessel <jason.wessel@windriver.com>
+ *  Copyright (C) 2009 Wind River Systems, Inc.
+ *
+ */
+
+#ifndef __LINUX_USB_EHCI_DBGP_H
+#define __LINUX_USB_EHCI_DBGP_H
+
+#include <linux/console.h>
+#include <linux/types.h>
+
+/* Appendix C, Debug port ... intended for use with special "debug devices"
+ * that can help if there's no serial console.  (nonstandard enumeration.)
+ */
+struct ehci_dbg_port {
+	u32	control;
+#define DBGP_OWNER	(1<<30)
+#define DBGP_ENABLED	(1<<28)
+#define DBGP_DONE	(1<<16)
+#define DBGP_INUSE	(1<<10)
+#define DBGP_ERRCODE(x)	(((x)>>7)&0x07)
+#	define DBGP_ERR_BAD	1
+#	define DBGP_ERR_SIGNAL	2
+#define DBGP_ERROR	(1<<6)
+#define DBGP_GO		(1<<5)
+#define DBGP_OUT	(1<<4)
+#define DBGP_LEN(x)	(((x)>>0)&0x0f)
+	u32	pids;
+#define DBGP_PID_GET(x)		(((x)>>16)&0xff)
+#define DBGP_PID_SET(data, tok)	(((data)<<8)|(tok))
+	u32	data03;
+	u32	data47;
+	u32	address;
+#define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
+};
+
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+#include <linux/init.h>
+extern int __init early_dbgp_init(char *s);
+extern struct console early_dbgp_console;
+#endif /* CONFIG_EARLY_PRINTK_DBGP */
+
+struct usb_hcd;
+
+#ifdef CONFIG_XEN_DOM0
+extern int xen_dbgp_reset_prep(struct usb_hcd *);
+extern int xen_dbgp_external_startup(struct usb_hcd *);
+#else
+static inline int xen_dbgp_reset_prep(struct usb_hcd *hcd)
+{
+	return 1; /* Shouldn't this be 0? */
+}
+
+static inline int xen_dbgp_external_startup(struct usb_hcd *hcd)
+{
+	return -1;
+}
+#endif
+
+#ifdef CONFIG_EARLY_PRINTK_DBGP
+/* Call backs from ehci host driver to ehci debug driver */
+extern int dbgp_external_startup(struct usb_hcd *);
+extern int dbgp_reset_prep(struct usb_hcd *);
+#else
+static inline int dbgp_reset_prep(struct usb_hcd *hcd)
+{
+	return xen_dbgp_reset_prep(hcd);
+}
+
+static inline int dbgp_external_startup(struct usb_hcd *hcd)
+{
+	return xen_dbgp_external_startup(hcd);
+}
+#endif
+
+#endif /* __LINUX_USB_EHCI_DBGP_H */
diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h
index daec99af5d54..966889a20ea3 100644
--- a/include/linux/usb/ehci_def.h
+++ b/include/linux/usb/ehci_def.h
@@ -19,6 +19,8 @@
 #ifndef __LINUX_USB_EHCI_DEF_H
 #define __LINUX_USB_EHCI_DEF_H
 
+#include <linux/usb/ehci-dbgp.h>
+
 /* EHCI register interface, corresponds to EHCI Revision 0.95 specification */
 
 /* Section 2.2 Host Controller Capability Registers */
@@ -190,67 +192,4 @@ struct ehci_regs {
 #define USBMODE_EX_HC	(3<<0)		/* host controller mode */
 };
 
-/* Appendix C, Debug port ... intended for use with special "debug devices"
- * that can help if there's no serial console.  (nonstandard enumeration.)
- */
-struct ehci_dbg_port {
-	u32	control;
-#define DBGP_OWNER	(1<<30)
-#define DBGP_ENABLED	(1<<28)
-#define DBGP_DONE	(1<<16)
-#define DBGP_INUSE	(1<<10)
-#define DBGP_ERRCODE(x)	(((x)>>7)&0x07)
-#	define DBGP_ERR_BAD	1
-#	define DBGP_ERR_SIGNAL	2
-#define DBGP_ERROR	(1<<6)
-#define DBGP_GO		(1<<5)
-#define DBGP_OUT	(1<<4)
-#define DBGP_LEN(x)	(((x)>>0)&0x0f)
-	u32	pids;
-#define DBGP_PID_GET(x)		(((x)>>16)&0xff)
-#define DBGP_PID_SET(data, tok)	(((data)<<8)|(tok))
-	u32	data03;
-	u32	data47;
-	u32	address;
-#define DBGP_EPADDR(dev, ep)	(((dev)<<8)|(ep))
-};
-
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-#include <linux/init.h>
-extern int __init early_dbgp_init(char *s);
-extern struct console early_dbgp_console;
-#endif /* CONFIG_EARLY_PRINTK_DBGP */
-
-struct usb_hcd;
-
-#ifdef CONFIG_XEN_DOM0
-extern int xen_dbgp_reset_prep(struct usb_hcd *);
-extern int xen_dbgp_external_startup(struct usb_hcd *);
-#else
-static inline int xen_dbgp_reset_prep(struct usb_hcd *hcd)
-{
-	return 1; /* Shouldn't this be 0? */
-}
-
-static inline int xen_dbgp_external_startup(struct usb_hcd *hcd)
-{
-	return -1;
-}
-#endif
-
-#ifdef CONFIG_EARLY_PRINTK_DBGP
-/* Call backs from ehci host driver to ehci debug driver */
-extern int dbgp_external_startup(struct usb_hcd *);
-extern int dbgp_reset_prep(struct usb_hcd *hcd);
-#else
-static inline int dbgp_reset_prep(struct usb_hcd *hcd)
-{
-	return xen_dbgp_reset_prep(hcd);
-}
-static inline int dbgp_external_startup(struct usb_hcd *hcd)
-{
-	return xen_dbgp_external_startup(hcd);
-}
-#endif
-
 #endif /* __LINUX_USB_EHCI_DEF_H */
-- 
cgit v1.2.3


From 35a27eab6f94e15fa30f7662af00fbec50526f4a Mon Sep 17 00:00:00 2001
From: Chris Rorvick <chris@rorvick.com>
Date: Tue, 4 Nov 2014 19:18:54 -0600
Subject: usb: Remove __init from early_dbgp_init() prototype

Specifying these attributes in both the prototype and the function
definition is unnecessary and could cause confusion or bugs if they are
inconsistent.  As such, __init should only be specified at the function
definition.

Keith Owens suggested this as a janitorial task on LKML several years
ago:

  https://lkml.org/lkml/2006/1/14/305

Signed-off-by: Chris Rorvick <chris@rorvick.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/usb/ehci-dbgp.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/usb/ehci-dbgp.h b/include/linux/usb/ehci-dbgp.h
index 796c1cd6f26a..7344d9e591cc 100644
--- a/include/linux/usb/ehci-dbgp.h
+++ b/include/linux/usb/ehci-dbgp.h
@@ -43,8 +43,7 @@ struct ehci_dbg_port {
 };
 
 #ifdef CONFIG_EARLY_PRINTK_DBGP
-#include <linux/init.h>
-extern int __init early_dbgp_init(char *s);
+extern int early_dbgp_init(char *s);
 extern struct console early_dbgp_console;
 #endif /* CONFIG_EARLY_PRINTK_DBGP */
 
-- 
cgit v1.2.3


From a8f820aa4066d2c97e75ecd1bbca8a7920b66f2c Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 7 Nov 2014 21:22:22 +0800
Subject: inet: Add skb_copy_datagram_iter

This patch adds skb_copy_datagram_iter, which is identical to
skb_copy_datagram_iovec except that it operates on iov_iter
instead of iovec.

Eventually all users of skb_copy_datagram_iovec should switch
over to iov_iter and then we can remove skb_copy_datagram_iovec.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 ++
 net/core/datagram.c    | 87 ++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 90 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 53f4f6c93356..933cfce7fcd9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -150,6 +150,7 @@
 struct net_device;
 struct scatterlist;
 struct pipe_inode_info;
+struct iov_iter;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
@@ -2653,6 +2654,8 @@ int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
 int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
 				  const struct iovec *to, int to_offset,
 				  int size);
+int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
+			   struct iov_iter *to, int size);
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index fdbc9a81d4c2..84d90d087a30 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -49,6 +49,7 @@
 #include <linux/spinlock.h>
 #include <linux/slab.h>
 #include <linux/pagemap.h>
+#include <linux/uio.h>
 
 #include <net/protocol.h>
 #include <linux/skbuff.h>
@@ -481,6 +482,92 @@ fault:
 }
 EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
 
+/**
+ *	skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
+ *	@skb: buffer to copy
+ *	@offset: offset in the buffer to start copying from
+ *	@to: iovec iterator to copy to
+ *	@len: amount of data to copy from buffer to iovec
+ */
+int skb_copy_datagram_iter(const struct sk_buff *skb, int offset,
+			   struct iov_iter *to, int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+
+	trace_skb_copy_datagram_iovec(skb, len);
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (copy_to_iter(skb->data + offset, copy, to) != copy)
+			goto short_copy;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(frag);
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (copy_page_to_iter(skb_frag_page(frag),
+					      frag->page_offset + offset -
+					      start, copy, to) != copy)
+				goto short_copy;
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (skb_copy_datagram_iter(frag_iter, offset - start,
+						   to, copy))
+				goto fault;
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+	if (!len)
+		return 0;
+
+	/* This is not really a user copy fault, but rather someone
+	 * gave us a bogus length on the skb.  We should probably
+	 * print a warning here as it may indicate a kernel bug.
+	 */
+
+fault:
+	return -EFAULT;
+
+short_copy:
+	if (iov_iter_count(to))
+		goto fault;
+
+	return 0;
+}
+EXPORT_SYMBOL(skb_copy_datagram_iter);
+
 /**
  *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
  *	@skb: buffer to copy
-- 
cgit v1.2.3


From bfe1be38fcee0e13ad53175d0b530707c20f93ec Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 7 Nov 2014 21:22:26 +0800
Subject: net: Kill skb_copy_datagram_const_iovec

Now that both macvtap and tun are using skb_copy_datagram_iter, we
can kill the abomination that is skb_copy_datagram_const_iovec.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 --
 net/core/datagram.c    | 89 --------------------------------------------------
 2 files changed, 92 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 933cfce7fcd9..103fbe8113f8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2651,9 +2651,6 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 				 int len);
 int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
 			   int offset, size_t count);
-int skb_copy_datagram_const_iovec(const struct sk_buff *from, int offset,
-				  const struct iovec *to, int to_offset,
-				  int size);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 			   struct iov_iter *to, int size);
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 84d90d087a30..26391a3fe3e5 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -393,95 +393,6 @@ fault:
 }
 EXPORT_SYMBOL(skb_copy_datagram_iovec);
 
-/**
- *	skb_copy_datagram_const_iovec - Copy a datagram to an iovec.
- *	@skb: buffer to copy
- *	@offset: offset in the buffer to start copying from
- *	@to: io vector to copy to
- *	@to_offset: offset in the io vector to start copying to
- *	@len: amount of data to copy from buffer to iovec
- *
- *	Returns 0 or -EFAULT.
- *	Note: the iovec is not modified during the copy.
- */
-int skb_copy_datagram_const_iovec(const struct sk_buff *skb, int offset,
-				  const struct iovec *to, int to_offset,
-				  int len)
-{
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
-	struct sk_buff *frag_iter;
-
-	/* Copy header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		if (memcpy_toiovecend(to, skb->data + offset, to_offset, copy))
-			goto fault;
-		if ((len -= copy) == 0)
-			return 0;
-		offset += copy;
-		to_offset += copy;
-	}
-
-	/* Copy paged appendix. Hmm... why does this look so complicated? */
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		WARN_ON(start > offset + len);
-
-		end = start + skb_frag_size(frag);
-		if ((copy = end - offset) > 0) {
-			int err;
-			u8  *vaddr;
-			struct page *page = skb_frag_page(frag);
-
-			if (copy > len)
-				copy = len;
-			vaddr = kmap(page);
-			err = memcpy_toiovecend(to, vaddr + frag->page_offset +
-						offset - start, to_offset, copy);
-			kunmap(page);
-			if (err)
-				goto fault;
-			if (!(len -= copy))
-				return 0;
-			offset += copy;
-			to_offset += copy;
-		}
-		start = end;
-	}
-
-	skb_walk_frags(skb, frag_iter) {
-		int end;
-
-		WARN_ON(start > offset + len);
-
-		end = start + frag_iter->len;
-		if ((copy = end - offset) > 0) {
-			if (copy > len)
-				copy = len;
-			if (skb_copy_datagram_const_iovec(frag_iter,
-							  offset - start,
-							  to, to_offset,
-							  copy))
-				goto fault;
-			if ((len -= copy) == 0)
-				return 0;
-			offset += copy;
-			to_offset += copy;
-		}
-		start = end;
-	}
-	if (!len)
-		return 0;
-
-fault:
-	return -EFAULT;
-}
-EXPORT_SYMBOL(skb_copy_datagram_const_iovec);
-
 /**
  *	skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
  *	@skb: buffer to copy
-- 
cgit v1.2.3


From 5559b7bc42f1ff85759246e40ef73abf3171d8d9 Mon Sep 17 00:00:00 2001
From: Cristian Stoica <cristian.stoica@freescale.com>
Date: Tue, 7 Oct 2014 18:25:43 +0300
Subject: devres: support sizes greater than an unsigned long

As in 4f452e8aa492c0b8028ca9b4bdb4d018ba28c6c7, use resource_size_t
to accomodate sizes greater than the size of an unsigned long int on
platforms that have more than 32 bit physical addresses.

Signed-off-by: Cristian Stoica <cristian.stoica@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/io.h | 4 ++--
 lib/devres.c       | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/io.h b/include/linux/io.h
index d5fc9b8d8b03..fa02e55e5a2e 100644
--- a/include/linux/io.h
+++ b/include/linux/io.h
@@ -61,9 +61,9 @@ static inline void devm_ioport_unmap(struct device *dev, void __iomem *addr)
 #define IOMEM_ERR_PTR(err) (__force void __iomem *)ERR_PTR(err)
 
 void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
-			    unsigned long size);
+			   resource_size_t size);
 void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
-				    unsigned long size);
+				   resource_size_t size);
 void devm_iounmap(struct device *dev, void __iomem *addr);
 int check_signature(const volatile void __iomem *io_addr,
 			const unsigned char *signature, int length);
diff --git a/lib/devres.c b/lib/devres.c
index f4a195a6efe4..0f1dd2e9d2c1 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -23,7 +23,7 @@ static int devm_ioremap_match(struct device *dev, void *res, void *match_data)
  * Managed ioremap().  Map is automatically unmapped on driver detach.
  */
 void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
-			   unsigned long size)
+			   resource_size_t size)
 {
 	void __iomem **ptr, *addr;
 
@@ -52,7 +52,7 @@ EXPORT_SYMBOL(devm_ioremap);
  * detach.
  */
 void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
-				   unsigned long size)
+				   resource_size_t size)
 {
 	void __iomem **ptr, *addr;
 
-- 
cgit v1.2.3


From e0f1147cc9512d3610d2f2a0f069690661444703 Mon Sep 17 00:00:00 2001
From: Cristian Stoica <cristian.stoica@freescale.com>
Date: Thu, 9 Oct 2014 15:00:27 +0300
Subject: uio: support memory sizes larger than 32 bits

This is a completion to 27a90700a4275c5178b883b65927affdafa5185c
The size field is also increased to allow values larger than 32 bits
on platforms that have more than 32 bit physical addresses.

Signed-off-by: Cristian Stoica <cristian.stoica@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/DocBook/uio-howto.tmpl | 2 +-
 drivers/uio/uio.c                    | 4 ++--
 include/linux/uio_driver.h           | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/DocBook/uio-howto.tmpl b/Documentation/DocBook/uio-howto.tmpl
index bbe9c1fd5cef..1fdc246e4256 100644
--- a/Documentation/DocBook/uio-howto.tmpl
+++ b/Documentation/DocBook/uio-howto.tmpl
@@ -540,7 +540,7 @@ appears in sysfs.
 </para></listitem>
 
 <listitem><para>
-<varname>unsigned long size</varname>: Fill in the size of the
+<varname>resource_size_t size</varname>: Fill in the size of the
 memory block that <varname>addr</varname> points to. If <varname>size</varname>
 is zero, the mapping is considered unused. Note that you
 <emphasis>must</emphasis> initialize <varname>size</varname> with zero for
diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c
index 60fa6278fbce..6276f13e9e12 100644
--- a/drivers/uio/uio.c
+++ b/drivers/uio/uio.c
@@ -56,12 +56,12 @@ static ssize_t map_name_show(struct uio_mem *mem, char *buf)
 
 static ssize_t map_addr_show(struct uio_mem *mem, char *buf)
 {
-	return sprintf(buf, "0x%llx\n", (unsigned long long)mem->addr);
+	return sprintf(buf, "%pa\n", &mem->addr);
 }
 
 static ssize_t map_size_show(struct uio_mem *mem, char *buf)
 {
-	return sprintf(buf, "0x%lx\n", mem->size);
+	return sprintf(buf, "%pa\n", &mem->size);
 }
 
 static ssize_t map_offset_show(struct uio_mem *mem, char *buf)
diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h
index baa81718d985..32c0e83d6239 100644
--- a/include/linux/uio_driver.h
+++ b/include/linux/uio_driver.h
@@ -35,7 +35,7 @@ struct uio_map;
 struct uio_mem {
 	const char		*name;
 	phys_addr_t		addr;
-	unsigned long		size;
+	resource_size_t		size;
 	int			memtype;
 	void __iomem		*internal_addr;
 	struct uio_map		*map;
-- 
cgit v1.2.3


From 2b75869bba676c248d8d25ae6d2bd9221dfffdb6 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Mon, 13 Oct 2014 16:41:28 +1100
Subject: sysfs/kernfs: allow attributes to request write buffer be
 pre-allocated.

md/raid allows metadata management to be performed in user-space.
A various times, particularly on device failure, the metadata needs
to be updated before further writes can be permitted.
This means that the user-space program which updates metadata much
not block on writeout, and so must not allocate memory.

mlockall(MCL_CURRENT|MCL_FUTURE) and pre-allocation can avoid all
memory allocation issues for user-memory, but that does not help
kernel memory.
Several kernel objects can be pre-allocated.  e.g. files opened before
any writes to the array are permitted.
However some kernel allocation happens in places that cannot be
pre-allocated.
In particular, writes to sysfs files (to tell md that it can now
allow writes to the array) allocate a buffer using GFP_KERNEL.

This patch allows attributes to be marked as "PREALLOC".  In that case
the maximal buffer is allocated when the file is opened, and then used
on each write instead of allocating a new buffer.

As the same buffer is now shared for all writes on the same file
description, the mutex is extended to cover full use of the buffer
including the copy_from_user().

The new __ATTR_PREALLOC() 'or's a new flag in to the 'mode', which is
inspected by sysfs_add_file_mode_ns() to determine if the file should be
marked as requiring prealloc.

Despite the comment, we *do* use ->seq_show together with ->prealloc
in this patch.  The next patch fixes that.

Signed-off-by: NeilBrown  <neilb@suse.de>
Reviewed-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/kernfs/file.c       | 45 ++++++++++++++++++++++++++++++---------------
 fs/sysfs/file.c        | 31 ++++++++++++++++++++++++-------
 include/linux/kernfs.h |  8 ++++++++
 include/linux/sysfs.h  |  9 +++++++++
 4 files changed, 71 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 4429d6d9217f..70186e2e692a 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -106,7 +106,7 @@ static void *kernfs_seq_start(struct seq_file *sf, loff_t *ppos)
 	const struct kernfs_ops *ops;
 
 	/*
-	 * @of->mutex nests outside active ref and is just to ensure that
+	 * @of->mutex nests outside active ref and is primarily to ensure that
 	 * the ops aren't called concurrently for the same open file.
 	 */
 	mutex_lock(&of->mutex);
@@ -194,7 +194,7 @@ static ssize_t kernfs_file_direct_read(struct kernfs_open_file *of,
 		return -ENOMEM;
 
 	/*
-	 * @of->mutex nests outside active ref and is just to ensure that
+	 * @of->mutex nests outside active ref and is primarily to ensure that
 	 * the ops aren't called concurrently for the same open file.
 	 */
 	mutex_lock(&of->mutex);
@@ -278,19 +278,16 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
 		len = min_t(size_t, count, PAGE_SIZE);
 	}
 
-	buf = kmalloc(len + 1, GFP_KERNEL);
+	buf = of->prealloc_buf;
+	if (!buf)
+		buf = kmalloc(len + 1, GFP_KERNEL);
 	if (!buf)
 		return -ENOMEM;
 
-	if (copy_from_user(buf, user_buf, len)) {
-		len = -EFAULT;
-		goto out_free;
-	}
-	buf[len] = '\0';	/* guarantee string termination */
-
 	/*
-	 * @of->mutex nests outside active ref and is just to ensure that
-	 * the ops aren't called concurrently for the same open file.
+	 * @of->mutex nests outside active ref and is used both to ensure that
+	 * the ops aren't called concurrently for the same open file, and
+	 * to provide exclusive access to ->prealloc_buf (when that exists).
 	 */
 	mutex_lock(&of->mutex);
 	if (!kernfs_get_active(of->kn)) {
@@ -299,19 +296,27 @@ static ssize_t kernfs_fop_write(struct file *file, const char __user *user_buf,
 		goto out_free;
 	}
 
+	if (copy_from_user(buf, user_buf, len)) {
+		len = -EFAULT;
+		goto out_unlock;
+	}
+	buf[len] = '\0';	/* guarantee string termination */
+
 	ops = kernfs_ops(of->kn);
 	if (ops->write)
 		len = ops->write(of, buf, len, *ppos);
 	else
 		len = -EINVAL;
 
-	kernfs_put_active(of->kn);
-	mutex_unlock(&of->mutex);
-
 	if (len > 0)
 		*ppos += len;
+
+out_unlock:
+	kernfs_put_active(of->kn);
+	mutex_unlock(&of->mutex);
 out_free:
-	kfree(buf);
+	if (buf != of->prealloc_buf)
+		kfree(buf);
 	return len;
 }
 
@@ -685,6 +690,14 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
 	 */
 	of->atomic_write_len = ops->atomic_write_len;
 
+	if (ops->prealloc) {
+		int len = of->atomic_write_len ?: PAGE_SIZE;
+		of->prealloc_buf = kmalloc(len + 1, GFP_KERNEL);
+		error = -ENOMEM;
+		if (!of->prealloc_buf)
+			goto err_free;
+	}
+
 	/*
 	 * Always instantiate seq_file even if read access doesn't use
 	 * seq_file or is not requested.  This unifies private data access
@@ -715,6 +728,7 @@ static int kernfs_fop_open(struct inode *inode, struct file *file)
 err_close:
 	seq_release(inode, file);
 err_free:
+	kfree(of->prealloc_buf);
 	kfree(of);
 err_out:
 	kernfs_put_active(kn);
@@ -728,6 +742,7 @@ static int kernfs_fop_release(struct inode *inode, struct file *filp)
 
 	kernfs_put_open_node(kn, of);
 	seq_release(inode, filp);
+	kfree(of->prealloc_buf);
 	kfree(of);
 
 	return 0;
diff --git a/fs/sysfs/file.c b/fs/sysfs/file.c
index 589abee16a39..4ad3721a991c 100644
--- a/fs/sysfs/file.c
+++ b/fs/sysfs/file.c
@@ -184,6 +184,17 @@ static const struct kernfs_ops sysfs_file_kfops_rw = {
 	.write		= sysfs_kf_write,
 };
 
+static const struct kernfs_ops sysfs_prealloc_kfops_wo = {
+	.write		= sysfs_kf_write,
+	.prealloc	= true,
+};
+
+static const struct kernfs_ops sysfs_prealloc_kfops_rw = {
+	.seq_show	= sysfs_kf_seq_show,
+	.write		= sysfs_kf_write,
+	.prealloc	= true,
+};
+
 static const struct kernfs_ops sysfs_bin_kfops_ro = {
 	.read		= sysfs_kf_bin_read,
 };
@@ -222,13 +233,19 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 			 kobject_name(kobj)))
 			return -EINVAL;
 
-		if (sysfs_ops->show && sysfs_ops->store)
-			ops = &sysfs_file_kfops_rw;
-		else if (sysfs_ops->show)
+		if (sysfs_ops->show && sysfs_ops->store) {
+			if (mode & SYSFS_PREALLOC)
+				ops = &sysfs_prealloc_kfops_rw;
+			else
+				ops = &sysfs_file_kfops_rw;
+		} else if (sysfs_ops->show)
 			ops = &sysfs_file_kfops_ro;
-		else if (sysfs_ops->store)
-			ops = &sysfs_file_kfops_wo;
-		else
+		else if (sysfs_ops->store) {
+			if (mode & SYSFS_PREALLOC)
+				ops = &sysfs_prealloc_kfops_wo;
+			else
+				ops = &sysfs_file_kfops_wo;
+		} else
 			ops = &sysfs_file_kfops_empty;
 
 		size = PAGE_SIZE;
@@ -253,7 +270,7 @@ int sysfs_add_file_mode_ns(struct kernfs_node *parent,
 	if (!attr->ignore_lockdep)
 		key = attr->key ?: (struct lock_class_key *)&attr->skey;
 #endif
-	kn = __kernfs_create_file(parent, attr->name, mode, size, ops,
+	kn = __kernfs_create_file(parent, attr->name, mode & 0777, size, ops,
 				  (void *)attr, ns, true, key);
 	if (IS_ERR(kn)) {
 		if (PTR_ERR(kn) == -EEXIST)
diff --git a/include/linux/kernfs.h b/include/linux/kernfs.h
index 30faf797c2c3..d4e01b358341 100644
--- a/include/linux/kernfs.h
+++ b/include/linux/kernfs.h
@@ -179,6 +179,7 @@ struct kernfs_open_file {
 	struct mutex		mutex;
 	int			event;
 	struct list_head	list;
+	char			*prealloc_buf;
 
 	size_t			atomic_write_len;
 	bool			mmapped;
@@ -214,6 +215,13 @@ struct kernfs_ops {
 	 * larger ones are rejected with -E2BIG.
 	 */
 	size_t atomic_write_len;
+	/*
+	 * "prealloc" causes a buffer to be allocated at open for
+	 * all read/write requests.  As ->seq_show uses seq_read()
+	 * which does its own allocation, it is incompatible with
+	 * ->prealloc.  Provide ->read and ->write with ->prealloc.
+	 */
+	bool prealloc;
 	ssize_t (*write)(struct kernfs_open_file *of, char *buf, size_t bytes,
 			 loff_t off);
 
diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h
index f97d0dbb59fa..ddad16148bd6 100644
--- a/include/linux/sysfs.h
+++ b/include/linux/sysfs.h
@@ -70,6 +70,8 @@ struct attribute_group {
  * for examples..
  */
 
+#define SYSFS_PREALLOC 010000
+
 #define __ATTR(_name, _mode, _show, _store) {				\
 	.attr = {.name = __stringify(_name),				\
 		 .mode = VERIFY_OCTAL_PERMISSIONS(_mode) },		\
@@ -77,6 +79,13 @@ struct attribute_group {
 	.store	= _store,						\
 }
 
+#define __ATTR_PREALLOC(_name, _mode, _show, _store) {			\
+	.attr = {.name = __stringify(_name),				\
+		 .mode = SYSFS_PREALLOC | VERIFY_OCTAL_PERMISSIONS(_mode) },\
+	.show	= _show,						\
+	.store	= _store,						\
+}
+
 #define __ATTR_RO(_name) {						\
 	.attr	= { .name = __stringify(_name), .mode = S_IRUGO },	\
 	.show	= _name##_show,						\
-- 
cgit v1.2.3


From 5aaba36318e5995e8c95d077a46d9a4d00fcc1cd Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 30 Sep 2014 14:48:22 +0100
Subject: cpumask: factor out show_cpumap into separate helper function

Many sysfs *_show function use cpu{list,mask}_scnprintf to copy cpumap
to the buffer aligned to PAGE_SIZE, append '\n' and '\0' to return null
terminated buffer with newline.

This patch creates a new helper function cpumap_print_to_pagebuf in
cpumask.h using newly added bitmap_print_to_pagebuf and consolidates
most of those sysfs functions using the new helper function.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Suggested-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Acked-by: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: x86@kernel.org
Cc: linux-acpi@vger.kernel.org
Cc: linux-pci@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 arch/x86/kernel/cpu/perf_event_amd_iommu.c    |  5 +---
 arch/x86/kernel/cpu/perf_event_amd_uncore.c   |  6 +----
 arch/x86/kernel/cpu/perf_event_intel_rapl.c   |  6 +----
 arch/x86/kernel/cpu/perf_event_intel_uncore.c |  6 +----
 drivers/acpi/acpi_pad.c                       |  8 +++---
 drivers/base/cpu.c                            |  5 +---
 drivers/base/node.c                           | 14 +++-------
 drivers/base/topology.c                       | 22 ++-------------
 drivers/pci/pci-sysfs.c                       | 39 +++++++--------------------
 include/linux/bitmap.h                        |  3 +++
 include/linux/cpumask.h                       | 17 ++++++++++++
 lib/bitmap.c                                  | 29 ++++++++++++++++++++
 12 files changed, 73 insertions(+), 87 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/cpu/perf_event_amd_iommu.c b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
index 639d1289b1ba..97242a9242bd 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_iommu.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_iommu.c
@@ -130,10 +130,7 @@ static ssize_t _iommu_cpumask_show(struct device *dev,
 				   struct device_attribute *attr,
 				   char *buf)
 {
-	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &iommu_cpumask);
-	buf[n++] = '\n';
-	buf[n] = '\0';
-	return n;
+	return cpumap_print_to_pagebuf(true, buf, &iommu_cpumask);
 }
 static DEVICE_ATTR(cpumask, S_IRUGO, _iommu_cpumask_show, NULL);
 
diff --git a/arch/x86/kernel/cpu/perf_event_amd_uncore.c b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
index 30790d798e6b..cc6cedb8f25d 100644
--- a/arch/x86/kernel/cpu/perf_event_amd_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_amd_uncore.c
@@ -219,7 +219,6 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 					    struct device_attribute *attr,
 					    char *buf)
 {
-	int n;
 	cpumask_t *active_mask;
 	struct pmu *pmu = dev_get_drvdata(dev);
 
@@ -230,10 +229,7 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev,
 	else
 		return 0;
 
-	n = cpulist_scnprintf(buf, PAGE_SIZE - 2, active_mask);
-	buf[n++] = '\n';
-	buf[n] = '\0';
-	return n;
+	return cpumap_print_to_pagebuf(true, buf, active_mask);
 }
 static DEVICE_ATTR(cpumask, S_IRUGO, amd_uncore_attr_show_cpumask, NULL);
 
diff --git a/arch/x86/kernel/cpu/perf_event_intel_rapl.c b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
index d64f275fe274..673f930c700f 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_rapl.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_rapl.c
@@ -365,11 +365,7 @@ static void rapl_pmu_event_read(struct perf_event *event)
 static ssize_t rapl_get_attr_cpumask(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &rapl_cpu_mask);
-
-	buf[n++] = '\n';
-	buf[n] = '\0';
-	return n;
+	return cpumap_print_to_pagebuf(true, buf, &rapl_cpu_mask);
 }
 
 static DEVICE_ATTR(cpumask, S_IRUGO, rapl_get_attr_cpumask, NULL);
diff --git a/arch/x86/kernel/cpu/perf_event_intel_uncore.c b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
index 9762dbd9f3f7..08f3fed2b0f2 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_uncore.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_uncore.c
@@ -647,11 +647,7 @@ static int uncore_pmu_event_init(struct perf_event *event)
 static ssize_t uncore_get_attr_cpumask(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	int n = cpulist_scnprintf(buf, PAGE_SIZE - 2, &uncore_cpu_mask);
-
-	buf[n++] = '\n';
-	buf[n] = '\0';
-	return n;
+	return cpumap_print_to_pagebuf(true, buf, &uncore_cpu_mask);
 }
 
 static DEVICE_ATTR(cpumask, S_IRUGO, uncore_get_attr_cpumask, NULL);
diff --git a/drivers/acpi/acpi_pad.c b/drivers/acpi/acpi_pad.c
index f148a0580e04..c7b105c0e1d3 100644
--- a/drivers/acpi/acpi_pad.c
+++ b/drivers/acpi/acpi_pad.c
@@ -350,12 +350,10 @@ static ssize_t acpi_pad_idlecpus_store(struct device *dev,
 static ssize_t acpi_pad_idlecpus_show(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
-	int n = 0;
-	n = cpumask_scnprintf(buf, PAGE_SIZE-2, to_cpumask(pad_busy_cpus_bits));
-	buf[n++] = '\n';
-	buf[n] = '\0';
-	return n;
+	return cpumap_print_to_pagebuf(false, buf,
+				       to_cpumask(pad_busy_cpus_bits));
 }
+
 static DEVICE_ATTR(idlecpus, S_IRUGO|S_IWUSR,
 	acpi_pad_idlecpus_show,
 	acpi_pad_idlecpus_store);
diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 006b1bc5297d..4d8a56406fbb 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -207,11 +207,8 @@ static ssize_t show_cpus_attr(struct device *dev,
 			      char *buf)
 {
 	struct cpu_attr *ca = container_of(attr, struct cpu_attr, attr);
-	int n = cpulist_scnprintf(buf, PAGE_SIZE-2, *(ca->map));
 
-	buf[n++] = '\n';
-	buf[n] = '\0';
-	return n;
+	return cpumap_print_to_pagebuf(true, buf, *ca->map);
 }
 
 #define _CPU_ATTR(name, map) \
diff --git a/drivers/base/node.c b/drivers/base/node.c
index 472168cd0c97..a3b82e9c7f20 100644
--- a/drivers/base/node.c
+++ b/drivers/base/node.c
@@ -25,32 +25,26 @@ static struct bus_type node_subsys = {
 };
 
 
-static ssize_t node_read_cpumap(struct device *dev, int type, char *buf)
+static ssize_t node_read_cpumap(struct device *dev, bool list, char *buf)
 {
 	struct node *node_dev = to_node(dev);
 	const struct cpumask *mask = cpumask_of_node(node_dev->dev.id);
-	int len;
 
 	/* 2008/04/07: buf currently PAGE_SIZE, need 9 chars per 32 bits. */
 	BUILD_BUG_ON((NR_CPUS/32 * 9) > (PAGE_SIZE-1));
 
-	len = type?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, mask) :
-		cpumask_scnprintf(buf, PAGE_SIZE-2, mask);
- 	buf[len++] = '\n';
- 	buf[len] = '\0';
-	return len;
+	return cpumap_print_to_pagebuf(list, buf, mask);
 }
 
 static inline ssize_t node_read_cpumask(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	return node_read_cpumap(dev, 0, buf);
+	return node_read_cpumap(dev, false, buf);
 }
 static inline ssize_t node_read_cpulist(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	return node_read_cpumap(dev, 1, buf);
+	return node_read_cpumap(dev, true, buf);
 }
 
 static DEVICE_ATTR(cpumap,  S_IRUGO, node_read_cpumask, NULL);
diff --git a/drivers/base/topology.c b/drivers/base/topology.c
index be7c1fb7c0c9..f7c353843ddf 100644
--- a/drivers/base/topology.c
+++ b/drivers/base/topology.c
@@ -42,29 +42,11 @@ static ssize_t show_##name(struct device *dev,			\
 	return sprintf(buf, "%d\n", topology_##name(dev->id));	\
 }
 
-#if defined(topology_thread_cpumask) || defined(topology_core_cpumask) || \
-    defined(topology_book_cpumask)
-static ssize_t show_cpumap(int type, const struct cpumask *mask, char *buf)
-{
-	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf;
-	int n = 0;
-
-	if (len > 1) {
-		n = type?
-			cpulist_scnprintf(buf, len-2, mask) :
-			cpumask_scnprintf(buf, len-2, mask);
-		buf[n++] = '\n';
-		buf[n] = '\0';
-	}
-	return n;
-}
-#endif
-
 #define define_siblings_show_map(name)					\
 static ssize_t show_##name(struct device *dev,				\
 			   struct device_attribute *attr, char *buf)	\
 {									\
-	return show_cpumap(0, topology_##name(dev->id), buf);		\
+	return cpumap_print_to_pagebuf(false, buf, topology_##name(dev->id));\
 }
 
 #define define_siblings_show_list(name)					\
@@ -72,7 +54,7 @@ static ssize_t show_##name##_list(struct device *dev,			\
 				  struct device_attribute *attr,	\
 				  char *buf)				\
 {									\
-	return show_cpumap(1, topology_##name(dev->id), buf);		\
+	return cpumap_print_to_pagebuf(true, buf, topology_##name(dev->id));\
 }
 
 #define define_siblings_show_func(name)		\
diff --git a/drivers/pci/pci-sysfs.c b/drivers/pci/pci-sysfs.c
index 2c6643fdc0cf..368bdac42603 100644
--- a/drivers/pci/pci-sysfs.c
+++ b/drivers/pci/pci-sysfs.c
@@ -77,11 +77,10 @@ static ssize_t broken_parity_status_store(struct device *dev,
 }
 static DEVICE_ATTR_RW(broken_parity_status);
 
-static ssize_t pci_dev_show_local_cpu(struct device *dev, int type,
+static ssize_t pci_dev_show_local_cpu(struct device *dev, bool list,
 				      struct device_attribute *attr, char *buf)
 {
 	const struct cpumask *mask;
-	int len;
 
 #ifdef CONFIG_NUMA
 	mask = (dev_to_node(dev) == -1) ? cpu_online_mask :
@@ -89,59 +88,41 @@ static ssize_t pci_dev_show_local_cpu(struct device *dev, int type,
 #else
 	mask = cpumask_of_pcibus(to_pci_dev(dev)->bus);
 #endif
-	len = type ?
-		cpumask_scnprintf(buf, PAGE_SIZE-2, mask) :
-		cpulist_scnprintf(buf, PAGE_SIZE-2, mask);
-
-	buf[len++] = '\n';
-	buf[len] = '\0';
-	return len;
+	return cpumap_print_to_pagebuf(list, buf, mask);
 }
 
 static ssize_t local_cpus_show(struct device *dev,
 			       struct device_attribute *attr, char *buf)
 {
-	return pci_dev_show_local_cpu(dev, 1, attr, buf);
+	return pci_dev_show_local_cpu(dev, false, attr, buf);
 }
 static DEVICE_ATTR_RO(local_cpus);
 
 static ssize_t local_cpulist_show(struct device *dev,
 				  struct device_attribute *attr, char *buf)
 {
-	return pci_dev_show_local_cpu(dev, 0, attr, buf);
+	return pci_dev_show_local_cpu(dev, true, attr, buf);
 }
 static DEVICE_ATTR_RO(local_cpulist);
 
 /*
  * PCI Bus Class Devices
  */
-static ssize_t pci_bus_show_cpuaffinity(struct device *dev, int type,
-					struct device_attribute *attr,
-					char *buf)
-{
-	int ret;
-	const struct cpumask *cpumask;
-
-	cpumask = cpumask_of_pcibus(to_pci_bus(dev));
-	ret = type ?
-		cpulist_scnprintf(buf, PAGE_SIZE-2, cpumask) :
-		cpumask_scnprintf(buf, PAGE_SIZE-2, cpumask);
-	buf[ret++] = '\n';
-	buf[ret] = '\0';
-	return ret;
-}
-
 static ssize_t cpuaffinity_show(struct device *dev,
 				struct device_attribute *attr, char *buf)
 {
-	return pci_bus_show_cpuaffinity(dev, 0, attr, buf);
+	const struct cpumask *cpumask = cpumask_of_pcibus(to_pci_bus(dev));
+
+	return cpumap_print_to_pagebuf(false, buf, cpumask);
 }
 static DEVICE_ATTR_RO(cpuaffinity);
 
 static ssize_t cpulistaffinity_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
-	return pci_bus_show_cpuaffinity(dev, 1, attr, buf);
+	const struct cpumask *cpumask = cpumask_of_pcibus(to_pci_bus(dev));
+
+	return cpumap_print_to_pagebuf(true, buf, cpumask);
 }
 static DEVICE_ATTR_RO(cpulistaffinity);
 
diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index e1c8d080c427..9d5c3224a1e2 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -60,6 +60,7 @@
  * bitmap_find_free_region(bitmap, bits, order)	Find and allocate bit region
  * bitmap_release_region(bitmap, pos, order)	Free specified bit region
  * bitmap_allocate_region(bitmap, pos, order)	Allocate specified bit region
+ * bitmap_print_to_pagebuf(list, buf, mask, nbits) Print bitmap src as list/hex
  */
 
 /*
@@ -145,6 +146,8 @@ extern void bitmap_release_region(unsigned long *bitmap, unsigned int pos, int o
 extern int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order);
 extern void bitmap_copy_le(void *dst, const unsigned long *src, int nbits);
 extern int bitmap_ord_to_pos(const unsigned long *bitmap, int n, int bits);
+extern int bitmap_print_to_pagebuf(bool list, char *buf,
+				   const unsigned long *maskp, int nmaskbits);
 
 #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) % BITS_PER_LONG))
 #define BITMAP_LAST_WORD_MASK(nbits)					\
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index 0a9a6da21e74..b950e9d6008b 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -803,6 +803,23 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu)
 }
 #endif /* NR_CPUS > BITS_PER_LONG */
 
+/**
+ * cpumap_print_to_pagebuf  - copies the cpumask into the buffer either
+ *	as comma-separated list of cpus or hex values of cpumask
+ * @list: indicates whether the cpumap must be list
+ * @mask: the cpumask to copy
+ * @buf: the buffer to copy into
+ *
+ * Returns the length of the (null-terminated) @buf string, zero if
+ * nothing is copied.
+ */
+static inline ssize_t
+cpumap_print_to_pagebuf(bool list, char *buf, const struct cpumask *mask)
+{
+	return bitmap_print_to_pagebuf(list, buf, cpumask_bits(mask),
+				      nr_cpumask_bits);
+}
+
 /*
  *
  * From here down, all obsolete.  Use cpumask_ variants!
diff --git a/lib/bitmap.c b/lib/bitmap.c
index b499ab6ada29..5bc7a1128fe8 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -12,6 +12,8 @@
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
 #include <linux/bug.h>
+
+#include <asm/page.h>
 #include <asm/uaccess.h>
 
 /*
@@ -583,6 +585,33 @@ int bitmap_scnlistprintf(char *buf, unsigned int buflen,
 }
 EXPORT_SYMBOL(bitmap_scnlistprintf);
 
+/**
+ * bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string
+ * @list: indicates whether the bitmap must be list
+ * @buf: page aligned buffer into which string is placed
+ * @maskp: pointer to bitmap to convert
+ * @nmaskbits: size of bitmap, in bits
+ *
+ * Output format is a comma-separated list of decimal numbers and
+ * ranges if list is specified or hex digits grouped into comma-separated
+ * sets of 8 digits/set. Returns the number of characters written to buf.
+ */
+int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp,
+			    int nmaskbits)
+{
+	ptrdiff_t len = PTR_ALIGN(buf + PAGE_SIZE - 1, PAGE_SIZE) - buf - 2;
+	int n = 0;
+
+	if (len > 1) {
+		n = list ? bitmap_scnlistprintf(buf, len, maskp, nmaskbits) :
+			   bitmap_scnprintf(buf, len, maskp, nmaskbits);
+		buf[n++] = '\n';
+		buf[n] = '\0';
+	}
+	return n;
+}
+EXPORT_SYMBOL(bitmap_print_to_pagebuf);
+
 /**
  * __bitmap_parselist - convert list format ASCII string to bitmap
  * @buf: read nul-terminated user string from this buffer
-- 
cgit v1.2.3


From 3d52943b3a51497a777e6d7d840a38596a92cee9 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 30 Sep 2014 14:48:24 +0100
Subject: drivers: base: add cpu_device_create to support per-cpu devices

This patch adds a new function to create per-cpu devices.
This helps in:
1. reusing the device infrastructure to create any cpu related
   attributes and corresponding sysfs instead of creating and
   dealing with raw kobjects directly
2. retaining the legacy path(/sys/devices/system/cpu/..) to support
   existing sysfs ABI
3. avoiding to create links in the bus directory pointing to the
   device as there would be per-cpu instance of these devices with
   the same name since dev->bus is not populated to cpu_sysbus on
   purpose

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: David Herrmann <dh.herrmann@gmail.com>
Cc: Kay Sievers <kay@vrfy.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/cpu.c  | 54 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/cpu.h |  4 ++++
 2 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/cpu.c b/drivers/base/cpu.c
index 4d8a56406fbb..f829a4c71749 100644
--- a/drivers/base/cpu.c
+++ b/drivers/base/cpu.c
@@ -363,6 +363,60 @@ struct device *get_cpu_device(unsigned cpu)
 }
 EXPORT_SYMBOL_GPL(get_cpu_device);
 
+static void device_create_release(struct device *dev)
+{
+	kfree(dev);
+}
+
+static struct device *
+__cpu_device_create(struct device *parent, void *drvdata,
+		    const struct attribute_group **groups,
+		    const char *fmt, va_list args)
+{
+	struct device *dev = NULL;
+	int retval = -ENODEV;
+
+	dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+	if (!dev) {
+		retval = -ENOMEM;
+		goto error;
+	}
+
+	device_initialize(dev);
+	dev->parent = parent;
+	dev->groups = groups;
+	dev->release = device_create_release;
+	dev_set_drvdata(dev, drvdata);
+
+	retval = kobject_set_name_vargs(&dev->kobj, fmt, args);
+	if (retval)
+		goto error;
+
+	retval = device_add(dev);
+	if (retval)
+		goto error;
+
+	return dev;
+
+error:
+	put_device(dev);
+	return ERR_PTR(retval);
+}
+
+struct device *cpu_device_create(struct device *parent, void *drvdata,
+				 const struct attribute_group **groups,
+				 const char *fmt, ...)
+{
+	va_list vargs;
+	struct device *dev;
+
+	va_start(vargs, fmt);
+	dev = __cpu_device_create(parent, drvdata, groups, fmt, vargs);
+	va_end(vargs);
+	return dev;
+}
+EXPORT_SYMBOL_GPL(cpu_device_create);
+
 #ifdef CONFIG_GENERIC_CPU_AUTOPROBE
 static DEVICE_ATTR(modalias, 0444, print_cpu_modalias, NULL);
 #endif
diff --git a/include/linux/cpu.h b/include/linux/cpu.h
index b2d9a43012b2..4260e8594bd7 100644
--- a/include/linux/cpu.h
+++ b/include/linux/cpu.h
@@ -19,6 +19,7 @@
 
 struct device;
 struct device_node;
+struct attribute_group;
 
 struct cpu {
 	int node_id;		/* The node which contains the CPU */
@@ -39,6 +40,9 @@ extern void cpu_remove_dev_attr(struct device_attribute *attr);
 extern int cpu_add_dev_attr_group(struct attribute_group *attrs);
 extern void cpu_remove_dev_attr_group(struct attribute_group *attrs);
 
+extern struct device *cpu_device_create(struct device *parent, void *drvdata,
+					const struct attribute_group **groups,
+					const char *fmt, ...);
 #ifdef CONFIG_HOTPLUG_CPU
 extern void unregister_cpu(struct cpu *cpu);
 extern ssize_t arch_cpu_probe(const char *, size_t);
-- 
cgit v1.2.3


From 246246cbde5e840012f853e27630ebb59f409486 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 30 Sep 2014 14:48:25 +0100
Subject: drivers: base: support cpu cache information interface to userspace
 via sysfs

This patch adds initial support for providing processor cache information
to userspace through sysfs interface. This is based on already existing
implementations(x86, ia64, s390 and powerpc) and hence the interface is
intended to be fully compatible.

The main purpose of this generic support is to avoid further code
duplication to support new architectures and also to unify all the existing
different implementations.

This implementation maintains the hierarchy of cache objects which reflects
the system's cache topology. Cache devices are instantiated as needed as
CPUs come online. The cache information is replicated per-cpu even if they are
shared. A per-cpu array of cache information maintained is used mainly for
sysfs-related book keeping.

It also implements the shared_cpu_map attribute, which is essential for
enabling both kernel and user-space to discover the system's overall cache
topology.

This patch also add the missing ABI documentation for the cacheinfo sysfs
interface already, which is well defined and widely used.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Tested-by: Stephen Boyd <sboyd@codeaurora.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-api@vger.kernel.org
Cc: linux390@de.ibm.com
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-ia64@vger.kernel.org
Cc: linuxppc-dev@lists.ozlabs.org
Cc: linux-s390@vger.kernel.org
Cc: x86@kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/testing/sysfs-devices-system-cpu |  47 ++
 drivers/base/Makefile                              |   2 +-
 drivers/base/cacheinfo.c                           | 541 +++++++++++++++++++++
 include/linux/cacheinfo.h                          | 100 ++++
 4 files changed, 689 insertions(+), 1 deletion(-)
 create mode 100644 drivers/base/cacheinfo.c
 create mode 100644 include/linux/cacheinfo.h

(limited to 'include/linux')

diff --git a/Documentation/ABI/testing/sysfs-devices-system-cpu b/Documentation/ABI/testing/sysfs-devices-system-cpu
index acb9bfc89b48..99983e67c13c 100644
--- a/Documentation/ABI/testing/sysfs-devices-system-cpu
+++ b/Documentation/ABI/testing/sysfs-devices-system-cpu
@@ -224,3 +224,50 @@ Description:	Parameters for the Intel P-state driver
 		frequency range.
 
 		More details can be found in Documentation/cpu-freq/intel-pstate.txt
+
+What:		/sys/devices/system/cpu/cpu*/cache/index*/<set_of_attributes_mentioned_below>
+Date:		July 2014(documented, existed before August 2008)
+Contact:	Sudeep Holla <sudeep.holla@arm.com>
+		Linux kernel mailing list <linux-kernel@vger.kernel.org>
+Description:	Parameters for the CPU cache attributes
+
+		allocation_policy:
+			- WriteAllocate: allocate a memory location to a cache line
+					 on a cache miss because of a write
+			- ReadAllocate: allocate a memory location to a cache line
+					on a cache miss because of a read
+			- ReadWriteAllocate: both writeallocate and readallocate
+
+		attributes: LEGACY used only on IA64 and is same as write_policy
+
+		coherency_line_size: the minimum amount of data in bytes that gets
+				     transferred from memory to cache
+
+		level: the cache hierarcy in the multi-level cache configuration
+
+		number_of_sets: total number of sets in the cache, a set is a
+				collection of cache lines with the same cache index
+
+		physical_line_partition: number of physical cache line per cache tag
+
+		shared_cpu_list: the list of logical cpus sharing the cache
+
+		shared_cpu_map: logical cpu mask containing the list of cpus sharing
+				the cache
+
+		size: the total cache size in kB
+
+		type:
+			- Instruction: cache that only holds instructions
+			- Data: cache that only caches data
+			- Unified: cache that holds both data and instructions
+
+		ways_of_associativity: degree of freedom in placing a particular block
+					of memory in the cache
+
+		write_policy:
+			- WriteThrough: data is written to both the cache line
+					and to the block in the lower-level memory
+			- WriteBack: data is written only to the cache line and
+				     the modified cache line is written to main
+				     memory only when it is replaced
diff --git a/drivers/base/Makefile b/drivers/base/Makefile
index 6922cd6850a2..e81a55ca513c 100644
--- a/drivers/base/Makefile
+++ b/drivers/base/Makefile
@@ -4,7 +4,7 @@ obj-y			:= component.o core.o bus.o dd.o syscore.o \
 			   driver.o class.o platform.o \
 			   cpu.o firmware.o init.o map.o devres.o \
 			   attribute_container.o transport_class.o \
-			   topology.o container.o
+			   topology.o container.o cacheinfo.o
 obj-$(CONFIG_DEVTMPFS)	+= devtmpfs.o
 obj-$(CONFIG_DMA_CMA) += dma-contiguous.o
 obj-y			+= power/
diff --git a/drivers/base/cacheinfo.c b/drivers/base/cacheinfo.c
new file mode 100644
index 000000000000..8ed4ea1f3716
--- /dev/null
+++ b/drivers/base/cacheinfo.c
@@ -0,0 +1,541 @@
+/*
+ * cacheinfo support - processor cache information via sysfs
+ *
+ * Based on arch/x86/kernel/cpu/intel_cacheinfo.c
+ * Author: Sudeep Holla <sudeep.holla@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed "as is" WITHOUT ANY WARRANTY of any
+ * kind, whether express or implied; without even the implied warranty
+ * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <linux/bitops.h>
+#include <linux/cacheinfo.h>
+#include <linux/compiler.h>
+#include <linux/cpu.h>
+#include <linux/device.h>
+#include <linux/init.h>
+#include <linux/of.h>
+#include <linux/sched.h>
+#include <linux/slab.h>
+#include <linux/smp.h>
+#include <linux/sysfs.h>
+
+/* pointer to per cpu cacheinfo */
+static DEFINE_PER_CPU(struct cpu_cacheinfo, ci_cpu_cacheinfo);
+#define ci_cacheinfo(cpu)	(&per_cpu(ci_cpu_cacheinfo, cpu))
+#define cache_leaves(cpu)	(ci_cacheinfo(cpu)->num_leaves)
+#define per_cpu_cacheinfo(cpu)	(ci_cacheinfo(cpu)->info_list)
+
+struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu)
+{
+	return ci_cacheinfo(cpu);
+}
+
+#ifdef CONFIG_OF
+static int cache_setup_of_node(unsigned int cpu)
+{
+	struct device_node *np;
+	struct cacheinfo *this_leaf;
+	struct device *cpu_dev = get_cpu_device(cpu);
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	unsigned int index = 0;
+
+	/* skip if of_node is already populated */
+	if (this_cpu_ci->info_list->of_node)
+		return 0;
+
+	if (!cpu_dev) {
+		pr_err("No cpu device for CPU %d\n", cpu);
+		return -ENODEV;
+	}
+	np = cpu_dev->of_node;
+	if (!np) {
+		pr_err("Failed to find cpu%d device node\n", cpu);
+		return -ENOENT;
+	}
+
+	while (np && index < cache_leaves(cpu)) {
+		this_leaf = this_cpu_ci->info_list + index;
+		if (this_leaf->level != 1)
+			np = of_find_next_cache_node(np);
+		else
+			np = of_node_get(np);/* cpu node itself */
+		this_leaf->of_node = np;
+		index++;
+	}
+	return 0;
+}
+
+static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
+					   struct cacheinfo *sib_leaf)
+{
+	return sib_leaf->of_node == this_leaf->of_node;
+}
+#else
+static inline int cache_setup_of_node(unsigned int cpu) { return 0; }
+static inline bool cache_leaves_are_shared(struct cacheinfo *this_leaf,
+					   struct cacheinfo *sib_leaf)
+{
+	/*
+	 * For non-DT systems, assume unique level 1 cache, system-wide
+	 * shared caches for all other levels. This will be used only if
+	 * arch specific code has not populated shared_cpu_map
+	 */
+	return !(this_leaf->level == 1);
+}
+#endif
+
+static int cache_shared_cpu_map_setup(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf, *sib_leaf;
+	unsigned int index;
+	int ret;
+
+	ret = cache_setup_of_node(cpu);
+	if (ret)
+		return ret;
+
+	for (index = 0; index < cache_leaves(cpu); index++) {
+		unsigned int i;
+
+		this_leaf = this_cpu_ci->info_list + index;
+		/* skip if shared_cpu_map is already populated */
+		if (!cpumask_empty(&this_leaf->shared_cpu_map))
+			continue;
+
+		cpumask_set_cpu(cpu, &this_leaf->shared_cpu_map);
+		for_each_online_cpu(i) {
+			struct cpu_cacheinfo *sib_cpu_ci = get_cpu_cacheinfo(i);
+
+			if (i == cpu || !sib_cpu_ci->info_list)
+				continue;/* skip if itself or no cacheinfo */
+			sib_leaf = sib_cpu_ci->info_list + index;
+			if (cache_leaves_are_shared(this_leaf, sib_leaf)) {
+				cpumask_set_cpu(cpu, &sib_leaf->shared_cpu_map);
+				cpumask_set_cpu(i, &this_leaf->shared_cpu_map);
+			}
+		}
+	}
+
+	return 0;
+}
+
+static void cache_shared_cpu_map_remove(unsigned int cpu)
+{
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	struct cacheinfo *this_leaf, *sib_leaf;
+	unsigned int sibling, index;
+
+	for (index = 0; index < cache_leaves(cpu); index++) {
+		this_leaf = this_cpu_ci->info_list + index;
+		for_each_cpu(sibling, &this_leaf->shared_cpu_map) {
+			struct cpu_cacheinfo *sib_cpu_ci;
+
+			if (sibling == cpu) /* skip itself */
+				continue;
+			sib_cpu_ci = get_cpu_cacheinfo(sibling);
+			sib_leaf = sib_cpu_ci->info_list + index;
+			cpumask_clear_cpu(cpu, &sib_leaf->shared_cpu_map);
+			cpumask_clear_cpu(sibling, &this_leaf->shared_cpu_map);
+		}
+		of_node_put(this_leaf->of_node);
+	}
+}
+
+static void free_cache_attributes(unsigned int cpu)
+{
+	cache_shared_cpu_map_remove(cpu);
+
+	kfree(per_cpu_cacheinfo(cpu));
+	per_cpu_cacheinfo(cpu) = NULL;
+}
+
+int __weak init_cache_level(unsigned int cpu)
+{
+	return -ENOENT;
+}
+
+int __weak populate_cache_leaves(unsigned int cpu)
+{
+	return -ENOENT;
+}
+
+static int detect_cache_attributes(unsigned int cpu)
+{
+	int ret;
+
+	if (init_cache_level(cpu))
+		return -ENOENT;
+
+	per_cpu_cacheinfo(cpu) = kcalloc(cache_leaves(cpu),
+					 sizeof(struct cacheinfo), GFP_KERNEL);
+	if (per_cpu_cacheinfo(cpu) == NULL)
+		return -ENOMEM;
+
+	ret = populate_cache_leaves(cpu);
+	if (ret)
+		goto free_ci;
+	/*
+	 * For systems using DT for cache hierarcy, of_node and shared_cpu_map
+	 * will be set up here only if they are not populated already
+	 */
+	ret = cache_shared_cpu_map_setup(cpu);
+	if (ret)
+		goto free_ci;
+	return 0;
+
+free_ci:
+	free_cache_attributes(cpu);
+	return ret;
+}
+
+/* pointer to cpuX/cache device */
+static DEFINE_PER_CPU(struct device *, ci_cache_dev);
+#define per_cpu_cache_dev(cpu)	(per_cpu(ci_cache_dev, cpu))
+
+static cpumask_t cache_dev_map;
+
+/* pointer to array of devices for cpuX/cache/indexY */
+static DEFINE_PER_CPU(struct device **, ci_index_dev);
+#define per_cpu_index_dev(cpu)	(per_cpu(ci_index_dev, cpu))
+#define per_cache_index_dev(cpu, idx)	((per_cpu_index_dev(cpu))[idx])
+
+#define show_one(file_name, object)				\
+static ssize_t file_name##_show(struct device *dev,		\
+		struct device_attribute *attr, char *buf)	\
+{								\
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);	\
+	return sprintf(buf, "%u\n", this_leaf->object);		\
+}
+
+show_one(level, level);
+show_one(coherency_line_size, coherency_line_size);
+show_one(number_of_sets, number_of_sets);
+show_one(physical_line_partition, physical_line_partition);
+show_one(ways_of_associativity, ways_of_associativity);
+
+static ssize_t size_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+
+	return sprintf(buf, "%uK\n", this_leaf->size >> 10);
+}
+
+static ssize_t shared_cpumap_show_func(struct device *dev, bool list, char *buf)
+{
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	const struct cpumask *mask = &this_leaf->shared_cpu_map;
+
+	return cpumap_print_to_pagebuf(list, buf, mask);
+}
+
+static ssize_t shared_cpu_map_show(struct device *dev,
+				   struct device_attribute *attr, char *buf)
+{
+	return shared_cpumap_show_func(dev, false, buf);
+}
+
+static ssize_t shared_cpu_list_show(struct device *dev,
+				    struct device_attribute *attr, char *buf)
+{
+	return shared_cpumap_show_func(dev, true, buf);
+}
+
+static ssize_t type_show(struct device *dev,
+			 struct device_attribute *attr, char *buf)
+{
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+
+	switch (this_leaf->type) {
+	case CACHE_TYPE_DATA:
+		return sprintf(buf, "Data\n");
+	case CACHE_TYPE_INST:
+		return sprintf(buf, "Instruction\n");
+	case CACHE_TYPE_UNIFIED:
+		return sprintf(buf, "Unified\n");
+	default:
+		return -EINVAL;
+	}
+}
+
+static ssize_t allocation_policy_show(struct device *dev,
+				      struct device_attribute *attr, char *buf)
+{
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	unsigned int ci_attr = this_leaf->attributes;
+	int n = 0;
+
+	if ((ci_attr & CACHE_READ_ALLOCATE) && (ci_attr & CACHE_WRITE_ALLOCATE))
+		n = sprintf(buf, "ReadWriteAllocate\n");
+	else if (ci_attr & CACHE_READ_ALLOCATE)
+		n = sprintf(buf, "ReadAllocate\n");
+	else if (ci_attr & CACHE_WRITE_ALLOCATE)
+		n = sprintf(buf, "WriteAllocate\n");
+	return n;
+}
+
+static ssize_t write_policy_show(struct device *dev,
+				 struct device_attribute *attr, char *buf)
+{
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	unsigned int ci_attr = this_leaf->attributes;
+	int n = 0;
+
+	if (ci_attr & CACHE_WRITE_THROUGH)
+		n = sprintf(buf, "WriteThrough\n");
+	else if (ci_attr & CACHE_WRITE_BACK)
+		n = sprintf(buf, "WriteBack\n");
+	return n;
+}
+
+static DEVICE_ATTR_RO(level);
+static DEVICE_ATTR_RO(type);
+static DEVICE_ATTR_RO(coherency_line_size);
+static DEVICE_ATTR_RO(ways_of_associativity);
+static DEVICE_ATTR_RO(number_of_sets);
+static DEVICE_ATTR_RO(size);
+static DEVICE_ATTR_RO(allocation_policy);
+static DEVICE_ATTR_RO(write_policy);
+static DEVICE_ATTR_RO(shared_cpu_map);
+static DEVICE_ATTR_RO(shared_cpu_list);
+static DEVICE_ATTR_RO(physical_line_partition);
+
+static struct attribute *cache_default_attrs[] = {
+	&dev_attr_type.attr,
+	&dev_attr_level.attr,
+	&dev_attr_shared_cpu_map.attr,
+	&dev_attr_shared_cpu_list.attr,
+	&dev_attr_coherency_line_size.attr,
+	&dev_attr_ways_of_associativity.attr,
+	&dev_attr_number_of_sets.attr,
+	&dev_attr_size.attr,
+	&dev_attr_allocation_policy.attr,
+	&dev_attr_write_policy.attr,
+	&dev_attr_physical_line_partition.attr,
+	NULL
+};
+
+static umode_t
+cache_default_attrs_is_visible(struct kobject *kobj,
+			       struct attribute *attr, int unused)
+{
+	struct device *dev = kobj_to_dev(kobj);
+	struct cacheinfo *this_leaf = dev_get_drvdata(dev);
+	const struct cpumask *mask = &this_leaf->shared_cpu_map;
+	umode_t mode = attr->mode;
+
+	if ((attr == &dev_attr_type.attr) && this_leaf->type)
+		return mode;
+	if ((attr == &dev_attr_level.attr) && this_leaf->level)
+		return mode;
+	if ((attr == &dev_attr_shared_cpu_map.attr) && !cpumask_empty(mask))
+		return mode;
+	if ((attr == &dev_attr_shared_cpu_list.attr) && !cpumask_empty(mask))
+		return mode;
+	if ((attr == &dev_attr_coherency_line_size.attr) &&
+	    this_leaf->coherency_line_size)
+		return mode;
+	if ((attr == &dev_attr_ways_of_associativity.attr) &&
+	    this_leaf->size) /* allow 0 = full associativity */
+		return mode;
+	if ((attr == &dev_attr_number_of_sets.attr) &&
+	    this_leaf->number_of_sets)
+		return mode;
+	if ((attr == &dev_attr_size.attr) && this_leaf->size)
+		return mode;
+	if ((attr == &dev_attr_write_policy.attr) &&
+	    (this_leaf->attributes & CACHE_WRITE_POLICY_MASK))
+		return mode;
+	if ((attr == &dev_attr_allocation_policy.attr) &&
+	    (this_leaf->attributes & CACHE_ALLOCATE_POLICY_MASK))
+		return mode;
+	if ((attr == &dev_attr_physical_line_partition.attr) &&
+	    this_leaf->physical_line_partition)
+		return mode;
+
+	return 0;
+}
+
+static const struct attribute_group cache_default_group = {
+	.attrs = cache_default_attrs,
+	.is_visible = cache_default_attrs_is_visible,
+};
+
+static const struct attribute_group *cache_default_groups[] = {
+	&cache_default_group,
+	NULL,
+};
+
+static const struct attribute_group *cache_private_groups[] = {
+	&cache_default_group,
+	NULL, /* Place holder for private group */
+	NULL,
+};
+
+const struct attribute_group *
+__weak cache_get_priv_group(struct cacheinfo *this_leaf)
+{
+	return NULL;
+}
+
+static const struct attribute_group **
+cache_get_attribute_groups(struct cacheinfo *this_leaf)
+{
+	const struct attribute_group *priv_group =
+			cache_get_priv_group(this_leaf);
+
+	if (!priv_group)
+		return cache_default_groups;
+
+	if (!cache_private_groups[1])
+		cache_private_groups[1] = priv_group;
+
+	return cache_private_groups;
+}
+
+/* Add/Remove cache interface for CPU device */
+static void cpu_cache_sysfs_exit(unsigned int cpu)
+{
+	int i;
+	struct device *ci_dev;
+
+	if (per_cpu_index_dev(cpu)) {
+		for (i = 0; i < cache_leaves(cpu); i++) {
+			ci_dev = per_cache_index_dev(cpu, i);
+			if (!ci_dev)
+				continue;
+			device_unregister(ci_dev);
+		}
+		kfree(per_cpu_index_dev(cpu));
+		per_cpu_index_dev(cpu) = NULL;
+	}
+	device_unregister(per_cpu_cache_dev(cpu));
+	per_cpu_cache_dev(cpu) = NULL;
+}
+
+static int cpu_cache_sysfs_init(unsigned int cpu)
+{
+	struct device *dev = get_cpu_device(cpu);
+
+	if (per_cpu_cacheinfo(cpu) == NULL)
+		return -ENOENT;
+
+	per_cpu_cache_dev(cpu) = cpu_device_create(dev, NULL, NULL, "cache");
+	if (IS_ERR(per_cpu_cache_dev(cpu)))
+		return PTR_ERR(per_cpu_cache_dev(cpu));
+
+	/* Allocate all required memory */
+	per_cpu_index_dev(cpu) = kcalloc(cache_leaves(cpu),
+					 sizeof(struct device *), GFP_KERNEL);
+	if (unlikely(per_cpu_index_dev(cpu) == NULL))
+		goto err_out;
+
+	return 0;
+
+err_out:
+	cpu_cache_sysfs_exit(cpu);
+	return -ENOMEM;
+}
+
+static int cache_add_dev(unsigned int cpu)
+{
+	unsigned int i;
+	int rc;
+	struct device *ci_dev, *parent;
+	struct cacheinfo *this_leaf;
+	struct cpu_cacheinfo *this_cpu_ci = get_cpu_cacheinfo(cpu);
+	const struct attribute_group **cache_groups;
+
+	rc = cpu_cache_sysfs_init(cpu);
+	if (unlikely(rc < 0))
+		return rc;
+
+	parent = per_cpu_cache_dev(cpu);
+	for (i = 0; i < cache_leaves(cpu); i++) {
+		this_leaf = this_cpu_ci->info_list + i;
+		if (this_leaf->disable_sysfs)
+			continue;
+		cache_groups = cache_get_attribute_groups(this_leaf);
+		ci_dev = cpu_device_create(parent, this_leaf, cache_groups,
+					   "index%1u", i);
+		if (IS_ERR(ci_dev)) {
+			rc = PTR_ERR(ci_dev);
+			goto err;
+		}
+		per_cache_index_dev(cpu, i) = ci_dev;
+	}
+	cpumask_set_cpu(cpu, &cache_dev_map);
+
+	return 0;
+err:
+	cpu_cache_sysfs_exit(cpu);
+	return rc;
+}
+
+static void cache_remove_dev(unsigned int cpu)
+{
+	if (!cpumask_test_cpu(cpu, &cache_dev_map))
+		return;
+	cpumask_clear_cpu(cpu, &cache_dev_map);
+
+	cpu_cache_sysfs_exit(cpu);
+}
+
+static int cacheinfo_cpu_callback(struct notifier_block *nfb,
+				  unsigned long action, void *hcpu)
+{
+	unsigned int cpu = (unsigned long)hcpu;
+	int rc = 0;
+
+	switch (action & ~CPU_TASKS_FROZEN) {
+	case CPU_ONLINE:
+		rc = detect_cache_attributes(cpu);
+		if (!rc)
+			rc = cache_add_dev(cpu);
+		break;
+	case CPU_DEAD:
+		cache_remove_dev(cpu);
+		if (per_cpu_cacheinfo(cpu))
+			free_cache_attributes(cpu);
+		break;
+	}
+	return notifier_from_errno(rc);
+}
+
+static int __init cacheinfo_sysfs_init(void)
+{
+	int cpu, rc = 0;
+
+	cpu_notifier_register_begin();
+
+	for_each_online_cpu(cpu) {
+		rc = detect_cache_attributes(cpu);
+		if (rc) {
+			pr_err("error detecting cacheinfo..cpu%d\n", cpu);
+			goto out;
+		}
+		rc = cache_add_dev(cpu);
+		if (rc) {
+			free_cache_attributes(cpu);
+			pr_err("error populating cacheinfo..cpu%d\n", cpu);
+			goto out;
+		}
+	}
+	__hotcpu_notifier(cacheinfo_cpu_callback, 0);
+
+out:
+	cpu_notifier_register_done();
+	return rc;
+}
+
+device_initcall(cacheinfo_sysfs_init);
diff --git a/include/linux/cacheinfo.h b/include/linux/cacheinfo.h
new file mode 100644
index 000000000000..3daf5ed392c9
--- /dev/null
+++ b/include/linux/cacheinfo.h
@@ -0,0 +1,100 @@
+#ifndef _LINUX_CACHEINFO_H
+#define _LINUX_CACHEINFO_H
+
+#include <linux/bitops.h>
+#include <linux/cpumask.h>
+#include <linux/smp.h>
+
+struct device_node;
+struct attribute;
+
+enum cache_type {
+	CACHE_TYPE_NOCACHE = 0,
+	CACHE_TYPE_INST = BIT(0),
+	CACHE_TYPE_DATA = BIT(1),
+	CACHE_TYPE_SEPARATE = CACHE_TYPE_INST | CACHE_TYPE_DATA,
+	CACHE_TYPE_UNIFIED = BIT(2),
+};
+
+/**
+ * struct cacheinfo - represent a cache leaf node
+ * @type: type of the cache - data, inst or unified
+ * @level: represents the hierarcy in the multi-level cache
+ * @coherency_line_size: size of each cache line usually representing
+ *	the minimum amount of data that gets transferred from memory
+ * @number_of_sets: total number of sets, a set is a collection of cache
+ *	lines sharing the same index
+ * @ways_of_associativity: number of ways in which a particular memory
+ *	block can be placed in the cache
+ * @physical_line_partition: number of physical cache lines sharing the
+ *	same cachetag
+ * @size: Total size of the cache
+ * @shared_cpu_map: logical cpumask representing all the cpus sharing
+ *	this cache node
+ * @attributes: bitfield representing various cache attributes
+ * @of_node: if devicetree is used, this represents either the cpu node in
+ *	case there's no explicit cache node or the cache node itself in the
+ *	device tree
+ * @disable_sysfs: indicates whether this node is visible to the user via
+ *	sysfs or not
+ * @priv: pointer to any private data structure specific to particular
+ *	cache design
+ *
+ * While @of_node, @disable_sysfs and @priv are used for internal book
+ * keeping, the remaining members form the core properties of the cache
+ */
+struct cacheinfo {
+	enum cache_type type;
+	unsigned int level;
+	unsigned int coherency_line_size;
+	unsigned int number_of_sets;
+	unsigned int ways_of_associativity;
+	unsigned int physical_line_partition;
+	unsigned int size;
+	cpumask_t shared_cpu_map;
+	unsigned int attributes;
+#define CACHE_WRITE_THROUGH	BIT(0)
+#define CACHE_WRITE_BACK	BIT(1)
+#define CACHE_WRITE_POLICY_MASK		\
+	(CACHE_WRITE_THROUGH | CACHE_WRITE_BACK)
+#define CACHE_READ_ALLOCATE	BIT(2)
+#define CACHE_WRITE_ALLOCATE	BIT(3)
+#define CACHE_ALLOCATE_POLICY_MASK	\
+	(CACHE_READ_ALLOCATE | CACHE_WRITE_ALLOCATE)
+
+	struct device_node *of_node;
+	bool disable_sysfs;
+	void *priv;
+};
+
+struct cpu_cacheinfo {
+	struct cacheinfo *info_list;
+	unsigned int num_levels;
+	unsigned int num_leaves;
+};
+
+/*
+ * Helpers to make sure "func" is executed on the cpu whose cache
+ * attributes are being detected
+ */
+#define DEFINE_SMP_CALL_CACHE_FUNCTION(func)			\
+static inline void _##func(void *ret)				\
+{								\
+	int cpu = smp_processor_id();				\
+	*(int *)ret = __##func(cpu);				\
+}								\
+								\
+int func(unsigned int cpu)					\
+{								\
+	int ret;						\
+	smp_call_function_single(cpu, _##func, &ret, true);	\
+	return ret;						\
+}
+
+struct cpu_cacheinfo *get_cpu_cacheinfo(unsigned int cpu);
+int init_cache_level(unsigned int cpu);
+int populate_cache_leaves(unsigned int cpu);
+
+const struct attribute_group *cache_get_priv_group(struct cacheinfo *this_leaf);
+
+#endif /* _LINUX_CACHEINFO_H */
-- 
cgit v1.2.3


From 72c72bdf7bf53353d2d8e055194d27f0128be92b Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Fri, 7 Nov 2014 14:44:25 -0500
Subject: VFS: Rename do_fallocate() to vfs_fallocate()

This function needs to be exported so it can be used by the NFSD module
when responding to the new ALLOCATE and DEALLOCATE operations in NFS
v4.2.  Christoph Hellwig suggested renaming the function to stay
consistent with how other vfs functions are named.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 drivers/staging/android/ashmem.c | 2 +-
 fs/ioctl.c                       | 2 +-
 fs/open.c                        | 5 +++--
 include/linux/fs.h               | 2 +-
 mm/madvise.c                     | 2 +-
 5 files changed, 7 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index ad4f5790a76f..27eecfe1c410 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -446,7 +446,7 @@ ashmem_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
 		loff_t start = range->pgstart * PAGE_SIZE;
 		loff_t end = (range->pgend + 1) * PAGE_SIZE;
 
-		do_fallocate(range->asma->file,
+		vfs_fallocate(range->asma->file,
 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 				start, end - start);
 		range->purged = ASHMEM_WAS_PURGED;
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad36192..0bd6142183ee 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -443,7 +443,7 @@ int ioctl_preallocate(struct file *filp, void __user *argp)
 		return -EINVAL;
 	}
 
-	return do_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
+	return vfs_fallocate(filp, FALLOC_FL_KEEP_SIZE, sr.l_start, sr.l_len);
 }
 
 static int file_ioctl(struct file *filp, unsigned int cmd,
diff --git a/fs/open.c b/fs/open.c
index d6fd3acde134..c94449b2e582 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -222,7 +222,7 @@ SYSCALL_DEFINE2(ftruncate64, unsigned int, fd, loff_t, length)
 #endif /* BITS_PER_LONG == 32 */
 
 
-int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
+int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 {
 	struct inode *inode = file_inode(file);
 	long ret;
@@ -298,6 +298,7 @@ int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
 	sb_end_write(inode->i_sb);
 	return ret;
 }
+EXPORT_SYMBOL_GPL(vfs_fallocate);
 
 SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
 {
@@ -305,7 +306,7 @@ SYSCALL_DEFINE4(fallocate, int, fd, int, mode, loff_t, offset, loff_t, len)
 	int error = -EBADF;
 
 	if (f.file) {
-		error = do_fallocate(f.file, mode, offset, len);
+		error = vfs_fallocate(f.file, mode, offset, len);
 		fdput(f);
 	}
 	return error;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index a957d4366c24..a88718677579 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2032,7 +2032,7 @@ struct filename {
 extern long vfs_truncate(struct path *, loff_t);
 extern int do_truncate(struct dentry *, loff_t start, unsigned int time_attrs,
 		       struct file *filp);
-extern int do_fallocate(struct file *file, int mode, loff_t offset,
+extern int vfs_fallocate(struct file *file, int mode, loff_t offset,
 			loff_t len);
 extern long do_sys_open(int dfd, const char __user *filename, int flags,
 			umode_t mode);
diff --git a/mm/madvise.c b/mm/madvise.c
index 0938b30da4ab..a271adc93289 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -326,7 +326,7 @@ static long madvise_remove(struct vm_area_struct *vma,
 	 */
 	get_file(f);
 	up_read(&current->mm->mmap_sem);
-	error = do_fallocate(f,
+	error = vfs_fallocate(f,
 				FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE,
 				offset, end - start);
 	fput(f);
-- 
cgit v1.2.3


From a06ae8609b3dd06b957a6e4e965772a8a14d3af5 Mon Sep 17 00:00:00 2001
From: Pratik Patel <pratikp@codeaurora.org>
Date: Mon, 3 Nov 2014 11:07:35 -0700
Subject: coresight: add CoreSight core layer framework

CoreSight components are compliant with the ARM CoreSight
architecture specification and can be connected in various
topologies to suit a particular SoC tracing needs. These trace
components can generally be classified as sources, links and
sinks. Trace data produced by one or more sources flows through
the intermediate links connecting the source to the currently
selected sink.

The CoreSight framework provides an interface for the CoreSight trace
drivers to register themselves with. It's intended to build up a
topological view of the CoreSight components and configure the
correct serie of components on user input via sysfs.

For eg., when enabling a source, the framework builds up a path
consisting of all the components connecting the source to the
currently selected sink(s) and enables all of them.

The framework also supports switching between available sinks
and provides status information to user space applications
through the debugfs interface.

Signed-off-by: Pratik Patel <pratikp@codeaurora.org>
Signed-off-by: Mathieu Poirier <mathieu.poirier@linaro.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 MAINTAINERS                        |   8 +
 arch/arm/Kconfig.debug             |   9 +
 drivers/Makefile                   |   1 +
 drivers/amba/bus.c                 |   2 +-
 drivers/coresight/Makefile         |   5 +
 drivers/coresight/coresight-priv.h |  63 ++++
 drivers/coresight/coresight.c      | 717 +++++++++++++++++++++++++++++++++++++
 drivers/coresight/of_coresight.c   | 204 +++++++++++
 include/linux/amba/bus.h           |   1 +
 include/linux/coresight.h          | 263 ++++++++++++++
 10 files changed, 1272 insertions(+), 1 deletion(-)
 create mode 100644 drivers/coresight/Makefile
 create mode 100644 drivers/coresight/coresight-priv.h
 create mode 100644 drivers/coresight/coresight.c
 create mode 100644 drivers/coresight/of_coresight.c
 create mode 100644 include/linux/coresight.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 3c6427190be2..39952634be8a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -918,6 +918,14 @@ M:	Hubert Feurstein <hubert.feurstein@contec.at>
 S:	Maintained
 F:	arch/arm/mach-ep93xx/micro9.c
 
+ARM/CORESIGHT FRAMEWORK AND DRIVERS
+M:	Mathieu Poirier <mathieu.poirier@linaro.org>
+L:	linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
+S:	Maintained
+F:	drivers/coresight/*
+F:	Documentation/trace/coresight.txt
+F:	Documentation/devicetree/bindings/arm/coresight.txt
+
 ARM/CORGI MACHINE SUPPORT
 M:	Richard Purdie <rpurdie@rpsys.net>
 S:	Maintained
diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug
index 03dc4c1a8736..cd3890e3110e 100644
--- a/arch/arm/Kconfig.debug
+++ b/arch/arm/Kconfig.debug
@@ -1331,4 +1331,13 @@ config DEBUG_SET_MODULE_RONX
 	  against certain classes of kernel exploits.
 	  If in doubt, say "N".
 
+menuconfig CORESIGHT
+	bool "CoreSight Tracing Support"
+	select ARM_AMBA
+	help
+	  This framework provides a kernel interface for the CoreSight debug
+	  and trace drivers to register themselves with. It's intended to build
+	  a topological view of the CoreSight components based on a DT
+	  specification and configure the right serie of components when a
+	  trace source gets enabled.
 endmenu
diff --git a/drivers/Makefile b/drivers/Makefile
index ebee55537a05..628b512b625b 100644
--- a/drivers/Makefile
+++ b/drivers/Makefile
@@ -161,3 +161,4 @@ obj-$(CONFIG_POWERCAP)		+= powercap/
 obj-$(CONFIG_MCB)		+= mcb/
 obj-$(CONFIG_RAS)		+= ras/
 obj-$(CONFIG_THUNDERBOLT)	+= thunderbolt/
+obj-$(CONFIG_CORESIGHT)		+= coresight/
diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 47bbdc1b5be3..a4ac490dd784 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -336,7 +336,7 @@ int amba_device_add(struct amba_device *dev, struct resource *parent)
 
 		amba_put_disable_pclk(dev);
 
-		if (cid == AMBA_CID)
+		if (cid == AMBA_CID || cid == CORESIGHT_CID)
 			dev->periphid = pid;
 
 		if (!dev->periphid)
diff --git a/drivers/coresight/Makefile b/drivers/coresight/Makefile
new file mode 100644
index 000000000000..218e3b589f93
--- /dev/null
+++ b/drivers/coresight/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for CoreSight drivers.
+#
+obj-$(CONFIG_CORESIGHT) += coresight.o
+obj-$(CONFIG_OF) += of_coresight.o
diff --git a/drivers/coresight/coresight-priv.h b/drivers/coresight/coresight-priv.h
new file mode 100644
index 000000000000..8d1180c47c0b
--- /dev/null
+++ b/drivers/coresight/coresight-priv.h
@@ -0,0 +1,63 @@
+/* Copyright (c) 2011-2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _CORESIGHT_PRIV_H
+#define _CORESIGHT_PRIV_H
+
+#include <linux/bitops.h>
+#include <linux/io.h>
+#include <linux/coresight.h>
+
+/*
+ * Coresight management registers (0xf00-0xfcc)
+ * 0xfa0 - 0xfa4: Management	registers in PFTv1.0
+ *		  Trace		registers in PFTv1.1
+ */
+#define CORESIGHT_ITCTRL	0xf00
+#define CORESIGHT_CLAIMSET	0xfa0
+#define CORESIGHT_CLAIMCLR	0xfa4
+#define CORESIGHT_LAR		0xfb0
+#define CORESIGHT_LSR		0xfb4
+#define CORESIGHT_AUTHSTATUS	0xfb8
+#define CORESIGHT_DEVID		0xfc8
+#define CORESIGHT_DEVTYPE	0xfcc
+
+#define TIMEOUT_US		100
+#define BMVAL(val, lsb, msb)	((val & GENMASK(msb, lsb)) >> lsb)
+
+static inline void CS_LOCK(void __iomem *addr)
+{
+	do {
+		/* Wait for things to settle */
+		mb();
+		writel_relaxed(0x0, addr + CORESIGHT_LAR);
+	} while (0);
+}
+
+static inline void CS_UNLOCK(void __iomem *addr)
+{
+	do {
+		writel_relaxed(CORESIGHT_UNLOCK, addr + CORESIGHT_LAR);
+		/* Make sure eveyone has seen this */
+		mb();
+	} while (0);
+}
+
+#ifdef CONFIG_CORESIGHT_SOURCE_ETM3X
+extern int etm_readl_cp14(u32 off, unsigned int *val);
+extern int etm_writel_cp14(u32 off, u32 val);
+#else
+static inline int etm_readl_cp14(u32 off, unsigned int *val) { return 0; }
+static inline int etm_writel_cp14(u32 val, u32 off) { return 0; }
+#endif
+
+#endif
diff --git a/drivers/coresight/coresight.c b/drivers/coresight/coresight.c
new file mode 100644
index 000000000000..6e0181f84425
--- /dev/null
+++ b/drivers/coresight/coresight.c
@@ -0,0 +1,717 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/types.h>
+#include <linux/device.h>
+#include <linux/io.h>
+#include <linux/err.h>
+#include <linux/export.h>
+#include <linux/slab.h>
+#include <linux/mutex.h>
+#include <linux/clk.h>
+#include <linux/coresight.h>
+#include <linux/of_platform.h>
+#include <linux/delay.h>
+
+#include "coresight-priv.h"
+
+static DEFINE_MUTEX(coresight_mutex);
+
+static int coresight_id_match(struct device *dev, void *data)
+{
+	int trace_id, i_trace_id;
+	struct coresight_device *csdev, *i_csdev;
+
+	csdev = data;
+	i_csdev = to_coresight_device(dev);
+
+	/*
+	 * No need to care about oneself and components that are not
+	 * sources or not enabled
+	 */
+	if (i_csdev == csdev || !i_csdev->enable ||
+	    i_csdev->type != CORESIGHT_DEV_TYPE_SOURCE)
+		return 0;
+
+	/* Get the source ID for both compoment */
+	trace_id = source_ops(csdev)->trace_id(csdev);
+	i_trace_id = source_ops(i_csdev)->trace_id(i_csdev);
+
+	/* All you need is one */
+	if (trace_id == i_trace_id)
+		return 1;
+
+	return 0;
+}
+
+static int coresight_source_is_unique(struct coresight_device *csdev)
+{
+	int trace_id = source_ops(csdev)->trace_id(csdev);
+
+	/* this shouldn't happen */
+	if (trace_id < 0)
+		return 0;
+
+	return !bus_for_each_dev(&coresight_bustype, NULL,
+				 csdev, coresight_id_match);
+}
+
+static int coresight_find_link_inport(struct coresight_device *csdev)
+{
+	int i;
+	struct coresight_device *parent;
+	struct coresight_connection *conn;
+
+	parent = container_of(csdev->path_link.next,
+			      struct coresight_device, path_link);
+
+	for (i = 0; i < parent->nr_outport; i++) {
+		conn = &parent->conns[i];
+		if (conn->child_dev == csdev)
+			return conn->child_port;
+	}
+
+	dev_err(&csdev->dev, "couldn't find inport, parent: %s, child: %s\n",
+		dev_name(&parent->dev), dev_name(&csdev->dev));
+
+	return 0;
+}
+
+static int coresight_find_link_outport(struct coresight_device *csdev)
+{
+	int i;
+	struct coresight_device *child;
+	struct coresight_connection *conn;
+
+	child = container_of(csdev->path_link.prev,
+			     struct coresight_device, path_link);
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		conn = &csdev->conns[i];
+		if (conn->child_dev == child)
+			return conn->outport;
+	}
+
+	dev_err(&csdev->dev, "couldn't find outport, parent: %s, child: %s\n",
+		dev_name(&csdev->dev), dev_name(&child->dev));
+
+	return 0;
+}
+
+static int coresight_enable_sink(struct coresight_device *csdev)
+{
+	int ret;
+
+	if (!csdev->enable) {
+		if (sink_ops(csdev)->enable) {
+			ret = sink_ops(csdev)->enable(csdev);
+			if (ret)
+				return ret;
+		}
+		csdev->enable = true;
+	}
+
+	atomic_inc(csdev->refcnt);
+
+	return 0;
+}
+
+static void coresight_disable_sink(struct coresight_device *csdev)
+{
+	if (atomic_dec_return(csdev->refcnt) == 0) {
+		if (sink_ops(csdev)->disable) {
+			sink_ops(csdev)->disable(csdev);
+			csdev->enable = false;
+		}
+	}
+}
+
+static int coresight_enable_link(struct coresight_device *csdev)
+{
+	int ret;
+	int link_subtype;
+	int refport, inport, outport;
+
+	inport = coresight_find_link_inport(csdev);
+	outport = coresight_find_link_outport(csdev);
+	link_subtype = csdev->subtype.link_subtype;
+
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
+		refport = inport;
+	else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
+		refport = outport;
+	else
+		refport = 0;
+
+	if (atomic_inc_return(&csdev->refcnt[refport]) == 1) {
+		if (link_ops(csdev)->enable) {
+			ret = link_ops(csdev)->enable(csdev, inport, outport);
+			if (ret)
+				return ret;
+		}
+	}
+
+	csdev->enable = true;
+
+	return 0;
+}
+
+static void coresight_disable_link(struct coresight_device *csdev)
+{
+	int i, nr_conns;
+	int link_subtype;
+	int refport, inport, outport;
+
+	inport = coresight_find_link_inport(csdev);
+	outport = coresight_find_link_outport(csdev);
+	link_subtype = csdev->subtype.link_subtype;
+
+	if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG) {
+		refport = inport;
+		nr_conns = csdev->nr_inport;
+	} else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT) {
+		refport = outport;
+		nr_conns = csdev->nr_outport;
+	} else {
+		refport = 0;
+		nr_conns = 1;
+	}
+
+	if (atomic_dec_return(&csdev->refcnt[refport]) == 0) {
+		if (link_ops(csdev)->disable)
+			link_ops(csdev)->disable(csdev, inport, outport);
+	}
+
+	for (i = 0; i < nr_conns; i++)
+		if (atomic_read(&csdev->refcnt[i]) != 0)
+			return;
+
+	csdev->enable = false;
+}
+
+static int coresight_enable_source(struct coresight_device *csdev)
+{
+	int ret;
+
+	if (!coresight_source_is_unique(csdev)) {
+		dev_warn(&csdev->dev, "traceID %d not unique\n",
+			 source_ops(csdev)->trace_id(csdev));
+		return -EINVAL;
+	}
+
+	if (!csdev->enable) {
+		if (source_ops(csdev)->enable) {
+			ret = source_ops(csdev)->enable(csdev);
+			if (ret)
+				return ret;
+		}
+		csdev->enable = true;
+	}
+
+	atomic_inc(csdev->refcnt);
+
+	return 0;
+}
+
+static void coresight_disable_source(struct coresight_device *csdev)
+{
+	if (atomic_dec_return(csdev->refcnt) == 0) {
+		if (source_ops(csdev)->disable) {
+			source_ops(csdev)->disable(csdev);
+			csdev->enable = false;
+		}
+	}
+}
+
+static int coresight_enable_path(struct list_head *path)
+{
+	int ret = 0;
+	struct coresight_device *cd;
+
+	list_for_each_entry(cd, path, path_link) {
+		if (cd == list_first_entry(path, struct coresight_device,
+					   path_link)) {
+			ret = coresight_enable_sink(cd);
+		} else if (list_is_last(&cd->path_link, path)) {
+			/*
+			 * Don't enable the source just yet - this needs to
+			 * happen at the very end when all links and sink
+			 * along the path have been configured properly.
+			 */
+			;
+		} else {
+			ret = coresight_enable_link(cd);
+		}
+		if (ret)
+			goto err;
+	}
+
+	return 0;
+err:
+	list_for_each_entry_continue_reverse(cd, path, path_link) {
+		if (cd == list_first_entry(path, struct coresight_device,
+					   path_link)) {
+			coresight_disable_sink(cd);
+		} else if (list_is_last(&cd->path_link, path)) {
+			;
+		} else {
+			coresight_disable_link(cd);
+		}
+	}
+
+	return ret;
+}
+
+static int coresight_disable_path(struct list_head *path)
+{
+	struct coresight_device *cd;
+
+	list_for_each_entry_reverse(cd, path, path_link) {
+		if (cd == list_first_entry(path, struct coresight_device,
+					   path_link)) {
+			coresight_disable_sink(cd);
+		} else if (list_is_last(&cd->path_link, path)) {
+			/*
+			 * The source has already been stopped, no need
+			 * to do it again here.
+			 */
+			;
+		} else {
+			coresight_disable_link(cd);
+		}
+	}
+
+	return 0;
+}
+
+static int coresight_build_paths(struct coresight_device *csdev,
+				 struct list_head *path,
+				 bool enable)
+{
+	int i, ret = -EINVAL;
+	struct coresight_connection *conn;
+
+	list_add(&csdev->path_link, path);
+
+	if (csdev->type == CORESIGHT_DEV_TYPE_SINK && csdev->activated) {
+		if (enable)
+			ret = coresight_enable_path(path);
+		else
+			ret = coresight_disable_path(path);
+	} else {
+		for (i = 0; i < csdev->nr_outport; i++) {
+			conn = &csdev->conns[i];
+			if (coresight_build_paths(conn->child_dev,
+						    path, enable) == 0)
+				ret = 0;
+		}
+	}
+
+	if (list_first_entry(path, struct coresight_device, path_link) != csdev)
+		dev_err(&csdev->dev, "wrong device in %s\n", __func__);
+
+	list_del(&csdev->path_link);
+
+	return ret;
+}
+
+int coresight_enable(struct coresight_device *csdev)
+{
+	int ret = 0;
+	LIST_HEAD(path);
+
+	mutex_lock(&coresight_mutex);
+	if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE) {
+		ret = -EINVAL;
+		dev_err(&csdev->dev, "wrong device type in %s\n", __func__);
+		goto out;
+	}
+	if (csdev->enable)
+		goto out;
+
+	if (coresight_build_paths(csdev, &path, true)) {
+		dev_err(&csdev->dev, "building path(s) failed\n");
+		goto out;
+	}
+
+	if (coresight_enable_source(csdev))
+		dev_err(&csdev->dev, "source enable failed\n");
+out:
+	mutex_unlock(&coresight_mutex);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(coresight_enable);
+
+void coresight_disable(struct coresight_device *csdev)
+{
+	LIST_HEAD(path);
+
+	mutex_lock(&coresight_mutex);
+	if (csdev->type != CORESIGHT_DEV_TYPE_SOURCE) {
+		dev_err(&csdev->dev, "wrong device type in %s\n", __func__);
+		goto out;
+	}
+	if (!csdev->enable)
+		goto out;
+
+	coresight_disable_source(csdev);
+	if (coresight_build_paths(csdev, &path, false))
+		dev_err(&csdev->dev, "releasing path(s) failed\n");
+
+out:
+	mutex_unlock(&coresight_mutex);
+}
+EXPORT_SYMBOL_GPL(coresight_disable);
+
+static ssize_t enable_sink_show(struct device *dev,
+				struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", (unsigned)csdev->activated);
+}
+
+static ssize_t enable_sink_store(struct device *dev,
+				 struct device_attribute *attr,
+				 const char *buf, size_t size)
+{
+	int ret;
+	unsigned long val;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val)
+		csdev->activated = true;
+	else
+		csdev->activated = false;
+
+	return size;
+
+}
+static DEVICE_ATTR_RW(enable_sink);
+
+static ssize_t enable_source_show(struct device *dev,
+				  struct device_attribute *attr, char *buf)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	return scnprintf(buf, PAGE_SIZE, "%u\n", (unsigned)csdev->enable);
+}
+
+static ssize_t enable_source_store(struct device *dev,
+				   struct device_attribute *attr,
+				   const char *buf, size_t size)
+{
+	int ret = 0;
+	unsigned long val;
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	ret = kstrtoul(buf, 10, &val);
+	if (ret)
+		return ret;
+
+	if (val) {
+		ret = coresight_enable(csdev);
+		if (ret)
+			return ret;
+	} else {
+		coresight_disable(csdev);
+	}
+
+	return size;
+}
+static DEVICE_ATTR_RW(enable_source);
+
+static struct attribute *coresight_sink_attrs[] = {
+	&dev_attr_enable_sink.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_sink);
+
+static struct attribute *coresight_source_attrs[] = {
+	&dev_attr_enable_source.attr,
+	NULL,
+};
+ATTRIBUTE_GROUPS(coresight_source);
+
+static struct device_type coresight_dev_type[] = {
+	{
+		.name = "none",
+	},
+	{
+		.name = "sink",
+		.groups = coresight_sink_groups,
+	},
+	{
+		.name = "link",
+	},
+	{
+		.name = "linksink",
+		.groups = coresight_sink_groups,
+	},
+	{
+		.name = "source",
+		.groups = coresight_source_groups,
+	},
+};
+
+static void coresight_device_release(struct device *dev)
+{
+	struct coresight_device *csdev = to_coresight_device(dev);
+
+	kfree(csdev);
+}
+
+static int coresight_orphan_match(struct device *dev, void *data)
+{
+	int i;
+	bool still_orphan = false;
+	struct coresight_device *csdev, *i_csdev;
+	struct coresight_connection *conn;
+
+	csdev = data;
+	i_csdev = to_coresight_device(dev);
+
+	/* No need to check oneself */
+	if (csdev == i_csdev)
+		return 0;
+
+	/* Move on to another component if no connection is orphan */
+	if (!i_csdev->orphan)
+		return 0;
+	/*
+	 * Circle throuch all the connection of that component.  If we find
+	 * an orphan connection whose name matches @csdev, link it.
+	 */
+	for (i = 0; i < i_csdev->nr_outport; i++)	{
+		conn = &i_csdev->conns[i];
+
+		/* We have found at least one orphan connection */
+		if (conn->child_dev == NULL) {
+			/* Does it match this newly added device? */
+			if (!strcmp(dev_name(&csdev->dev), conn->child_name))
+				conn->child_dev = csdev;
+		} else {
+			/* Too bad, this component still has an orphan */
+			still_orphan = true;
+		}
+	}
+
+	i_csdev->orphan = still_orphan;
+
+	/*
+	 * Returning '0' ensures that all known component on the
+	 * bus will be checked.
+	 */
+	return 0;
+}
+
+static void coresight_fixup_orphan_conns(struct coresight_device *csdev)
+{
+	/*
+	 * No need to check for a return value as orphan connection(s)
+	 * are hooked-up with each newly added component.
+	 */
+	bus_for_each_dev(&coresight_bustype, NULL,
+				 csdev, coresight_orphan_match);
+}
+
+
+static int coresight_name_match(struct device *dev, void *data)
+{
+	char *to_match;
+	struct coresight_device *i_csdev;
+
+	to_match = data;
+	i_csdev = to_coresight_device(dev);
+
+	if (!strcmp(to_match, dev_name(&i_csdev->dev)))
+		return 1;
+
+	return 0;
+}
+
+static void coresight_fixup_device_conns(struct coresight_device *csdev)
+{
+	int i;
+	struct device *dev = NULL;
+	struct coresight_connection *conn;
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		conn = &csdev->conns[i];
+		dev = bus_find_device(&coresight_bustype, NULL,
+				      (void *)conn->child_name,
+				      coresight_name_match);
+
+		if (dev) {
+			conn->child_dev = to_coresight_device(dev);
+		} else {
+			csdev->orphan = true;
+			conn->child_dev = NULL;
+		}
+	}
+}
+
+/**
+ * coresight_timeout - loop until a bit has changed to a specific state.
+ * @addr: base address of the area of interest.
+ * @offset: address of a register, starting from @addr.
+ * @position: the position of the bit of interest.
+ * @value: the value the bit should have.
+ *
+ * Return: 0 as soon as the bit has taken the desired state or -EAGAIN if
+ * TIMEOUT_US has elapsed, which ever happens first.
+ */
+
+int coresight_timeout(void __iomem *addr, u32 offset, int position, int value)
+{
+	int i;
+	u32 val;
+
+	for (i = TIMEOUT_US; i > 0; i--) {
+		val = __raw_readl(addr + offset);
+		/* waiting on the bit to go from 0 to 1 */
+		if (value) {
+			if (val & BIT(position))
+				return 0;
+		/* waiting on the bit to go from 1 to 0 */
+		} else {
+			if (!(val & BIT(position)))
+				return 0;
+		}
+
+		/*
+		 * Delay is arbitrary - the specification doesn't say how long
+		 * we are expected to wait.  Extra check required to make sure
+		 * we don't wait needlessly on the last iteration.
+		 */
+		if (i - 1)
+			udelay(1);
+	}
+
+	return -EAGAIN;
+}
+
+struct bus_type coresight_bustype = {
+	.name	= "coresight",
+};
+
+static int __init coresight_init(void)
+{
+	return bus_register(&coresight_bustype);
+}
+postcore_initcall(coresight_init);
+
+struct coresight_device *coresight_register(struct coresight_desc *desc)
+{
+	int i;
+	int ret;
+	int link_subtype;
+	int nr_refcnts = 1;
+	atomic_t *refcnts = NULL;
+	struct coresight_device *csdev;
+	struct coresight_connection *conns;
+
+	csdev = kzalloc(sizeof(*csdev), GFP_KERNEL);
+	if (!csdev) {
+		ret = -ENOMEM;
+		goto err_kzalloc_csdev;
+	}
+
+	if (desc->type == CORESIGHT_DEV_TYPE_LINK ||
+	    desc->type == CORESIGHT_DEV_TYPE_LINKSINK) {
+		link_subtype = desc->subtype.link_subtype;
+
+		if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_MERG)
+			nr_refcnts = desc->pdata->nr_inport;
+		else if (link_subtype == CORESIGHT_DEV_SUBTYPE_LINK_SPLIT)
+			nr_refcnts = desc->pdata->nr_outport;
+	}
+
+	refcnts = kcalloc(nr_refcnts, sizeof(*refcnts), GFP_KERNEL);
+	if (!refcnts) {
+		ret = -ENOMEM;
+		goto err_kzalloc_refcnts;
+	}
+
+	csdev->refcnt = refcnts;
+
+	csdev->nr_inport = desc->pdata->nr_inport;
+	csdev->nr_outport = desc->pdata->nr_outport;
+	conns = kcalloc(csdev->nr_outport, sizeof(*conns), GFP_KERNEL);
+	if (!conns) {
+		ret = -ENOMEM;
+		goto err_kzalloc_conns;
+	}
+
+	for (i = 0; i < csdev->nr_outport; i++) {
+		conns[i].outport = desc->pdata->outports[i];
+		conns[i].child_name = desc->pdata->child_names[i];
+		conns[i].child_port = desc->pdata->child_ports[i];
+	}
+
+	csdev->conns = conns;
+
+	csdev->type = desc->type;
+	csdev->subtype = desc->subtype;
+	csdev->ops = desc->ops;
+	csdev->orphan = false;
+
+	csdev->dev.type = &coresight_dev_type[desc->type];
+	csdev->dev.groups = desc->groups;
+	csdev->dev.parent = desc->dev;
+	csdev->dev.release = coresight_device_release;
+	csdev->dev.bus = &coresight_bustype;
+	dev_set_name(&csdev->dev, "%s", desc->pdata->name);
+
+	ret = device_register(&csdev->dev);
+	if (ret)
+		goto err_device_register;
+
+	mutex_lock(&coresight_mutex);
+
+	coresight_fixup_device_conns(csdev);
+	coresight_fixup_orphan_conns(csdev);
+
+	mutex_unlock(&coresight_mutex);
+
+	return csdev;
+
+err_device_register:
+	kfree(conns);
+err_kzalloc_conns:
+	kfree(refcnts);
+err_kzalloc_refcnts:
+	kfree(csdev);
+err_kzalloc_csdev:
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(coresight_register);
+
+void coresight_unregister(struct coresight_device *csdev)
+{
+	mutex_lock(&coresight_mutex);
+
+	kfree(csdev->conns);
+	device_unregister(&csdev->dev);
+
+	mutex_unlock(&coresight_mutex);
+}
+EXPORT_SYMBOL_GPL(coresight_unregister);
+
+MODULE_LICENSE("GPL v2");
diff --git a/drivers/coresight/of_coresight.c b/drivers/coresight/of_coresight.c
new file mode 100644
index 000000000000..5030c0734508
--- /dev/null
+++ b/drivers/coresight/of_coresight.c
@@ -0,0 +1,204 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/clk.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_graph.h>
+#include <linux/of_platform.h>
+#include <linux/platform_device.h>
+#include <linux/amba/bus.h>
+#include <linux/coresight.h>
+#include <asm/smp_plat.h>
+
+
+static int of_dev_node_match(struct device *dev, void *data)
+{
+	return dev->of_node == data;
+}
+
+static struct device *
+of_coresight_get_endpoint_device(struct device_node *endpoint)
+{
+	struct device *dev = NULL;
+
+	/*
+	 * If we have a non-configuable replicator, it will be found on the
+	 * platform bus.
+	 */
+	dev = bus_find_device(&platform_bus_type, NULL,
+			      endpoint, of_dev_node_match);
+	if (dev)
+		return dev;
+
+	/*
+	 * We have a configurable component - circle through the AMBA bus
+	 * looking for the device that matches the endpoint node.
+	 */
+	return bus_find_device(&amba_bustype, NULL,
+			       endpoint, of_dev_node_match);
+}
+
+static struct device_node *of_get_coresight_endpoint(
+		const struct device_node *parent, struct device_node *prev)
+{
+	struct device_node *node = of_graph_get_next_endpoint(parent, prev);
+
+	of_node_put(prev);
+	return node;
+}
+
+static void of_coresight_get_ports(struct device_node *node,
+				   int *nr_inport, int *nr_outport)
+{
+	struct device_node *ep = NULL;
+	int in = 0, out = 0;
+
+	do {
+		ep = of_get_coresight_endpoint(node, ep);
+		if (!ep)
+			break;
+
+		if (of_property_read_bool(ep, "slave-mode"))
+			in++;
+		else
+			out++;
+
+	} while (ep);
+
+	*nr_inport = in;
+	*nr_outport = out;
+}
+
+static int of_coresight_alloc_memory(struct device *dev,
+			struct coresight_platform_data *pdata)
+{
+	/* List of output port on this component */
+	pdata->outports = devm_kzalloc(dev, pdata->nr_outport *
+				       sizeof(*pdata->outports),
+				       GFP_KERNEL);
+	if (!pdata->outports)
+		return -ENOMEM;
+
+	/* Children connected to this component via @outport */
+	 pdata->child_names = devm_kzalloc(dev, pdata->nr_outport *
+					  sizeof(*pdata->child_names),
+					  GFP_KERNEL);
+	if (!pdata->child_names)
+		return -ENOMEM;
+
+	/* Port number on the child this component is connected to */
+	pdata->child_ports = devm_kzalloc(dev, pdata->nr_outport *
+					  sizeof(*pdata->child_ports),
+					  GFP_KERNEL);
+	if (!pdata->child_ports)
+		return -ENOMEM;
+
+	return 0;
+}
+
+struct coresight_platform_data *of_get_coresight_platform_data(
+				struct device *dev, struct device_node *node)
+{
+	int i = 0, ret = 0;
+	struct coresight_platform_data *pdata;
+	struct of_endpoint endpoint, rendpoint;
+	struct device *rdev;
+	struct device_node *cpu;
+	struct device_node *ep = NULL;
+	struct device_node *rparent = NULL;
+	struct device_node *rport = NULL;
+
+	pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL);
+	if (!pdata)
+		return ERR_PTR(-ENOMEM);
+
+	/* Use device name as debugfs handle */
+	pdata->name = dev_name(dev);
+
+	/* Get the number of input and output port for this component */
+	of_coresight_get_ports(node, &pdata->nr_inport, &pdata->nr_outport);
+
+	if (pdata->nr_outport) {
+		ret = of_coresight_alloc_memory(dev, pdata);
+		if (ret)
+			return ERR_PTR(ret);
+
+		/* Iterate through each port to discover topology */
+		do {
+			/* Get a handle on a port */
+			ep = of_get_coresight_endpoint(node, ep);
+			if (!ep)
+				break;
+
+			/*
+			 * No need to deal with input ports, processing for as
+			 * processing for output ports will deal with them.
+			 */
+			if (of_find_property(ep, "slave-mode", NULL))
+				continue;
+
+			/* Get a handle on the local endpoint */
+			ret = of_graph_parse_endpoint(ep, &endpoint);
+
+			if (ret)
+				continue;
+
+			/* The local out port number */
+			pdata->outports[i] = endpoint.id;
+
+			/*
+			 * Get a handle on the remote port and parent
+			 * attached to it.
+			 */
+			rparent = of_graph_get_remote_port_parent(ep);
+			rport = of_graph_get_remote_port(ep);
+
+			if (!rparent || !rport)
+				continue;
+
+			if (of_graph_parse_endpoint(rport, &rendpoint))
+				continue;
+
+			rdev = of_coresight_get_endpoint_device(rparent);
+			if (!dev)
+				continue;
+
+			pdata->child_names[i] = dev_name(rdev);
+			pdata->child_ports[i] = rendpoint.id;
+
+			i++;
+		} while (ep);
+	}
+
+	/* Affinity defaults to CPU0 */
+	pdata->cpu = 0;
+	cpu = of_parse_phandle(node, "cpu", 0);
+	if (cpu) {
+		const u32 *mpidr;
+		int len, index;
+
+		mpidr = of_get_property(cpu, "reg", &len);
+		if (mpidr && len == 4) {
+			index = get_logical_index(be32_to_cpup(mpidr));
+			if (index != -EINVAL)
+				pdata->cpu = index;
+		}
+	}
+
+	return pdata;
+}
+EXPORT_SYMBOL_GPL(of_get_coresight_platform_data);
diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index c324f5700d1a..d024bd9c9d9b 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -23,6 +23,7 @@
 
 #define AMBA_NR_IRQS	9
 #define AMBA_CID	0xb105f00d
+#define CORESIGHT_CID	0xb105900d
 
 struct clk;
 
diff --git a/include/linux/coresight.h b/include/linux/coresight.h
new file mode 100644
index 000000000000..bdde4199c74a
--- /dev/null
+++ b/include/linux/coresight.h
@@ -0,0 +1,263 @@
+/* Copyright (c) 2012, The Linux Foundation. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 and
+ * only version 2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ */
+
+#ifndef _LINUX_CORESIGHT_H
+#define _LINUX_CORESIGHT_H
+
+#include <linux/device.h>
+
+/* Peripheral id registers (0xFD0-0xFEC) */
+#define CORESIGHT_PERIPHIDR4	0xfd0
+#define CORESIGHT_PERIPHIDR5	0xfd4
+#define CORESIGHT_PERIPHIDR6	0xfd8
+#define CORESIGHT_PERIPHIDR7	0xfdC
+#define CORESIGHT_PERIPHIDR0	0xfe0
+#define CORESIGHT_PERIPHIDR1	0xfe4
+#define CORESIGHT_PERIPHIDR2	0xfe8
+#define CORESIGHT_PERIPHIDR3	0xfeC
+/* Component id registers (0xFF0-0xFFC) */
+#define CORESIGHT_COMPIDR0	0xff0
+#define CORESIGHT_COMPIDR1	0xff4
+#define CORESIGHT_COMPIDR2	0xff8
+#define CORESIGHT_COMPIDR3	0xffC
+
+#define ETM_ARCH_V3_3		0x23
+#define ETM_ARCH_V3_5		0x25
+#define PFT_ARCH_V1_0		0x30
+#define PFT_ARCH_V1_1		0x31
+
+#define CORESIGHT_UNLOCK	0xc5acce55
+
+extern struct bus_type coresight_bustype;
+
+enum coresight_dev_type {
+	CORESIGHT_DEV_TYPE_NONE,
+	CORESIGHT_DEV_TYPE_SINK,
+	CORESIGHT_DEV_TYPE_LINK,
+	CORESIGHT_DEV_TYPE_LINKSINK,
+	CORESIGHT_DEV_TYPE_SOURCE,
+};
+
+enum coresight_dev_subtype_sink {
+	CORESIGHT_DEV_SUBTYPE_SINK_NONE,
+	CORESIGHT_DEV_SUBTYPE_SINK_PORT,
+	CORESIGHT_DEV_SUBTYPE_SINK_BUFFER,
+};
+
+enum coresight_dev_subtype_link {
+	CORESIGHT_DEV_SUBTYPE_LINK_NONE,
+	CORESIGHT_DEV_SUBTYPE_LINK_MERG,
+	CORESIGHT_DEV_SUBTYPE_LINK_SPLIT,
+	CORESIGHT_DEV_SUBTYPE_LINK_FIFO,
+};
+
+enum coresight_dev_subtype_source {
+	CORESIGHT_DEV_SUBTYPE_SOURCE_NONE,
+	CORESIGHT_DEV_SUBTYPE_SOURCE_PROC,
+	CORESIGHT_DEV_SUBTYPE_SOURCE_BUS,
+	CORESIGHT_DEV_SUBTYPE_SOURCE_SOFTWARE,
+};
+
+/**
+ * struct coresight_dev_subtype - further characterisation of a type
+ * @sink_subtype:	type of sink this component is, as defined
+			by @coresight_dev_subtype_sink.
+ * @link_subtype:	type of link this component is, as defined
+			by @coresight_dev_subtype_link.
+ * @source_subtype:	type of source this component is, as defined
+			by @coresight_dev_subtype_source.
+ */
+struct coresight_dev_subtype {
+	enum coresight_dev_subtype_sink sink_subtype;
+	enum coresight_dev_subtype_link link_subtype;
+	enum coresight_dev_subtype_source source_subtype;
+};
+
+/**
+ * struct coresight_platform_data - data harvested from the DT specification
+ * @cpu:	the CPU a source belongs to. Only applicable for ETM/PTMs.
+ * @name:	name of the component as shown under sysfs.
+ * @nr_inport:	number of input ports for this component.
+ * @outports:	list of remote enpoint port number.
+ * @child_names:name of all child components connected to this device.
+ * @child_ports:child component port number the current component is
+		connected  to.
+ * @nr_outport:	number of output ports for this component.
+ * @clk:	The clock this component is associated to.
+ */
+struct coresight_platform_data {
+	int cpu;
+	const char *name;
+	int nr_inport;
+	int *outports;
+	const char **child_names;
+	int *child_ports;
+	int nr_outport;
+	struct clk *clk;
+};
+
+/**
+ * struct coresight_desc - description of a component required from drivers
+ * @type:	as defined by @coresight_dev_type.
+ * @subtype:	as defined by @coresight_dev_subtype.
+ * @ops:	generic operations for this component, as defined
+		by @coresight_ops.
+ * @pdata:	platform data collected from DT.
+ * @dev:	The device entity associated to this component.
+ * @groups	:operations specific to this component. These will end up
+		in the component's sysfs sub-directory.
+ */
+struct coresight_desc {
+	enum coresight_dev_type type;
+	struct coresight_dev_subtype subtype;
+	const struct coresight_ops *ops;
+	struct coresight_platform_data *pdata;
+	struct device *dev;
+	const struct attribute_group **groups;
+};
+
+/**
+ * struct coresight_connection - representation of a single connection
+ * @ref_count:	keeping count a port' references.
+ * @outport:	a connection's output port number.
+ * @chid_name:	remote component's name.
+ * @child_port:	remote component's port number @output is connected to.
+ * @child_dev:	a @coresight_device representation of the component
+		connected to @outport.
+ */
+struct coresight_connection {
+	int outport;
+	const char *child_name;
+	int child_port;
+	struct coresight_device *child_dev;
+};
+
+/**
+ * struct coresight_device - representation of a device as used by the framework
+ * @nr_inport:	number of input port associated to this component.
+ * @nr_outport:	number of output port associated to this component.
+ * @type:	as defined by @coresight_dev_type.
+ * @subtype:	as defined by @coresight_dev_subtype.
+ * @ops:	generic operations for this component, as defined
+		by @coresight_ops.
+ * @dev:	The device entity associated to this component.
+ * @refcnt:	keep track of what is in use.
+ * @path_link:	link of current component into the path being enabled.
+ * @orphan:	true if the component has connections that haven't been linked.
+ * @enable:	'true' if component is currently part of an active path.
+ * @activated:	'true' only if a _sink_ has been activated.  A sink can be
+		activated but not yet enabled.  Enabling for a _sink_
+		happens when a source has been selected for that it.
+ */
+struct coresight_device {
+	struct coresight_connection *conns;
+	int nr_inport;
+	int nr_outport;
+	enum coresight_dev_type type;
+	struct coresight_dev_subtype subtype;
+	const struct coresight_ops *ops;
+	struct device dev;
+	atomic_t *refcnt;
+	struct list_head path_link;
+	bool orphan;
+	bool enable;	/* true only if configured as part of a path */
+	bool activated;	/* true only if a sink is part of a path */
+};
+
+#define to_coresight_device(d) container_of(d, struct coresight_device, dev)
+
+#define source_ops(csdev)	csdev->ops->source_ops
+#define sink_ops(csdev)		csdev->ops->sink_ops
+#define link_ops(csdev)		csdev->ops->link_ops
+
+#define CORESIGHT_DEBUGFS_ENTRY(__name, __entry_name,			\
+				 __mode, __get, __set, __fmt)		\
+DEFINE_SIMPLE_ATTRIBUTE(__name ## _ops, __get, __set, __fmt);		\
+static const struct coresight_ops_entry __name ## _entry = {		\
+	.name = __entry_name,						\
+	.mode = __mode,							\
+	.ops  = &__name ## _ops						\
+}
+
+/**
+ * struct coresight_ops_sink - basic operations for a sink
+ * Operations available for sinks
+ * @enable:	enables the sink.
+ * @disable:	disables the sink.
+ */
+struct coresight_ops_sink {
+	int (*enable)(struct coresight_device *csdev);
+	void (*disable)(struct coresight_device *csdev);
+};
+
+/**
+ * struct coresight_ops_link - basic operations for a link
+ * Operations available for links.
+ * @enable:	enables flow between iport and oport.
+ * @disable:	disables flow between iport and oport.
+ */
+struct coresight_ops_link {
+	int (*enable)(struct coresight_device *csdev, int iport, int oport);
+	void (*disable)(struct coresight_device *csdev, int iport, int oport);
+};
+
+/**
+ * struct coresight_ops_source - basic operations for a source
+ * Operations available for sources.
+ * @trace_id:	returns the value of the component's trace ID as known
+		to the HW.
+ * @enable:	enables tracing from a source.
+ * @disable:	disables tracing for a source.
+ */
+struct coresight_ops_source {
+	int (*trace_id)(struct coresight_device *csdev);
+	int (*enable)(struct coresight_device *csdev);
+	void (*disable)(struct coresight_device *csdev);
+};
+
+struct coresight_ops {
+	const struct coresight_ops_sink *sink_ops;
+	const struct coresight_ops_link *link_ops;
+	const struct coresight_ops_source *source_ops;
+};
+
+#ifdef CONFIG_CORESIGHT
+extern struct coresight_device *
+coresight_register(struct coresight_desc *desc);
+extern void coresight_unregister(struct coresight_device *csdev);
+extern int coresight_enable(struct coresight_device *csdev);
+extern void coresight_disable(struct coresight_device *csdev);
+extern int coresight_is_bit_set(u32 val, int position, int value);
+extern int coresight_timeout(void __iomem *addr, u32 offset,
+			     int position, int value);
+#ifdef CONFIG_OF
+extern struct coresight_platform_data *of_get_coresight_platform_data(
+				struct device *dev, struct device_node *node);
+#endif
+#else
+static inline struct coresight_device *
+coresight_register(struct coresight_desc *desc) { return NULL; }
+static inline void coresight_unregister(struct coresight_device *csdev) {}
+static inline int
+coresight_enable(struct coresight_device *csdev) { return -ENOSYS; }
+static inline void coresight_disable(struct coresight_device *csdev) {}
+static inline int coresight_is_bit_set(u32 val, int position, int value)
+					 { return 0; }
+static inline int coresight_timeout(void __iomem *addr, u32 offset,
+				     int position, int value) { return 1; }
+#ifdef CONFIG_OF
+static inline struct coresight_platform_data *of_get_coresight_platform_data(
+	struct device *dev, struct device_node *node) { return NULL; }
+#endif
+#endif
+
+#endif
-- 
cgit v1.2.3


From c16561e8df7a64764ef61f02221e98273add325a Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 6 Nov 2014 00:37:08 +0100
Subject: PM / Domains: Change prototype for the attach and detach callbacks

Convert the prototypes to return an int in order to support error
handling in these callbacks.

Also, as suggested by Dmitry Torokhov, pass the domain pointer for use
inside the callbacks, and so that they match the existing
power_on/power_off callbacks which currently take the domain pointer.

Acked-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[ khilman: added domain as parameter to callbacks, as suggested by Dmitry ]
Signed-off-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 4 ++--
 include/linux/pm_domain.h   | 6 ++++--
 2 files changed, 6 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 40bc2f4072cc..b520687046d4 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -1437,7 +1437,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	spin_unlock_irq(&dev->power.lock);
 
 	if (genpd->attach_dev)
-		genpd->attach_dev(dev);
+		genpd->attach_dev(genpd, dev);
 
 	mutex_lock(&gpd_data->lock);
 	gpd_data->base.dev = dev;
@@ -1499,7 +1499,7 @@ int pm_genpd_remove_device(struct generic_pm_domain *genpd,
 	genpd->max_off_time_changed = true;
 
 	if (genpd->detach_dev)
-		genpd->detach_dev(dev);
+		genpd->detach_dev(genpd, dev);
 
 	spin_lock_irq(&dev->power.lock);
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 73e938b7e937..b3ed7766a291 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -72,8 +72,10 @@ struct generic_pm_domain {
 	bool max_off_time_changed;
 	bool cached_power_down_ok;
 	struct gpd_cpuidle_data *cpuidle_data;
-	void (*attach_dev)(struct device *dev);
-	void (*detach_dev)(struct device *dev);
+	int (*attach_dev)(struct generic_pm_domain *domain,
+			  struct device *dev);
+	void (*detach_dev)(struct generic_pm_domain *domain,
+			   struct device *dev);
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
-- 
cgit v1.2.3


From 332fd7c4fef5f3b166e93decb07fd69eb24f7998 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Thu, 6 Nov 2014 22:44:17 -0800
Subject: genirq: Generic chip: Change irq_reg_{readl,writel} arguments

Pass in the irq_chip_generic struct so we can use different readl/writel
settings for each irqchip driver, when appropriate.  Compute
(gc->reg_base + reg_offset) in the helper function because this is pretty
much what all callers want to do anyway.

Compile-tested using the following configurations:

    at91_dt_defconfig (CONFIG_ATMEL_AIC_IRQ=y)
    sama5_defconfig (CONFIG_ATMEL_AIC5_IRQ=y)
    sunxi_defconfig (CONFIG_ARCH_SUNXI=y)

tb10x (ARC) is untested.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lkml.kernel.org/r/1415342669-30640-3-git-send-email-cernekee@gmail.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-atmel-aic.c  | 40 ++++++++++++-------------
 drivers/irqchip/irq-atmel-aic5.c | 65 +++++++++++++++++++---------------------
 drivers/irqchip/irq-sunxi-nmi.c  |  4 +--
 drivers/irqchip/irq-tb10x.c      |  4 +--
 include/linux/irq.h              | 20 ++++++++-----
 kernel/irq/generic-chip.c        | 20 ++++++-------
 6 files changed, 78 insertions(+), 75 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-atmel-aic.c b/drivers/irqchip/irq-atmel-aic.c
index 9a2cf3c1a3a5..27fdd8c3e7b4 100644
--- a/drivers/irqchip/irq-atmel-aic.c
+++ b/drivers/irqchip/irq-atmel-aic.c
@@ -65,11 +65,11 @@ aic_handle(struct pt_regs *regs)
 	u32 irqnr;
 	u32 irqstat;
 
-	irqnr = irq_reg_readl(gc->reg_base + AT91_AIC_IVR);
-	irqstat = irq_reg_readl(gc->reg_base + AT91_AIC_ISR);
+	irqnr = irq_reg_readl(gc, AT91_AIC_IVR);
+	irqstat = irq_reg_readl(gc, AT91_AIC_ISR);
 
 	if (!irqstat)
-		irq_reg_writel(0, gc->reg_base + AT91_AIC_EOICR);
+		irq_reg_writel(gc, 0, AT91_AIC_EOICR);
 	else
 		handle_domain_irq(aic_domain, irqnr, regs);
 }
@@ -80,7 +80,7 @@ static int aic_retrigger(struct irq_data *d)
 
 	/* Enable interrupt on AIC5 */
 	irq_gc_lock(gc);
-	irq_reg_writel(d->mask, gc->reg_base + AT91_AIC_ISCR);
+	irq_reg_writel(gc, d->mask, AT91_AIC_ISCR);
 	irq_gc_unlock(gc);
 
 	return 0;
@@ -92,12 +92,12 @@ static int aic_set_type(struct irq_data *d, unsigned type)
 	unsigned int smr;
 	int ret;
 
-	smr = irq_reg_readl(gc->reg_base + AT91_AIC_SMR(d->hwirq));
+	smr = irq_reg_readl(gc, AT91_AIC_SMR(d->hwirq));
 	ret = aic_common_set_type(d, type, &smr);
 	if (ret)
 		return ret;
 
-	irq_reg_writel(smr, gc->reg_base + AT91_AIC_SMR(d->hwirq));
+	irq_reg_writel(gc, smr, AT91_AIC_SMR(d->hwirq));
 
 	return 0;
 }
@@ -108,8 +108,8 @@ static void aic_suspend(struct irq_data *d)
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 
 	irq_gc_lock(gc);
-	irq_reg_writel(gc->mask_cache, gc->reg_base + AT91_AIC_IDCR);
-	irq_reg_writel(gc->wake_active, gc->reg_base + AT91_AIC_IECR);
+	irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IDCR);
+	irq_reg_writel(gc, gc->wake_active, AT91_AIC_IECR);
 	irq_gc_unlock(gc);
 }
 
@@ -118,8 +118,8 @@ static void aic_resume(struct irq_data *d)
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 
 	irq_gc_lock(gc);
-	irq_reg_writel(gc->wake_active, gc->reg_base + AT91_AIC_IDCR);
-	irq_reg_writel(gc->mask_cache, gc->reg_base + AT91_AIC_IECR);
+	irq_reg_writel(gc, gc->wake_active, AT91_AIC_IDCR);
+	irq_reg_writel(gc, gc->mask_cache, AT91_AIC_IECR);
 	irq_gc_unlock(gc);
 }
 
@@ -128,8 +128,8 @@ static void aic_pm_shutdown(struct irq_data *d)
 	struct irq_chip_generic *gc = irq_data_get_irq_chip_data(d);
 
 	irq_gc_lock(gc);
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_IDCR);
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_ICCR);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR);
 	irq_gc_unlock(gc);
 }
 #else
@@ -148,24 +148,24 @@ static void __init aic_hw_init(struct irq_domain *domain)
 	 * will not Lock out nIRQ
 	 */
 	for (i = 0; i < 8; i++)
-		irq_reg_writel(0, gc->reg_base + AT91_AIC_EOICR);
+		irq_reg_writel(gc, 0, AT91_AIC_EOICR);
 
 	/*
 	 * Spurious Interrupt ID in Spurious Vector Register.
 	 * When there is no current interrupt, the IRQ Vector Register
 	 * reads the value stored in AIC_SPU
 	 */
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_SPU);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_SPU);
 
 	/* No debugging in AIC: Debug (Protect) Control Register */
-	irq_reg_writel(0, gc->reg_base + AT91_AIC_DCR);
+	irq_reg_writel(gc, 0, AT91_AIC_DCR);
 
 	/* Disable and clear all interrupts initially */
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_IDCR);
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC_ICCR);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_IDCR);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC_ICCR);
 
 	for (i = 0; i < 32; i++)
-		irq_reg_writel(i, gc->reg_base + AT91_AIC_SVR(i));
+		irq_reg_writel(gc, i, AT91_AIC_SVR(i));
 }
 
 static int aic_irq_domain_xlate(struct irq_domain *d,
@@ -195,10 +195,10 @@ static int aic_irq_domain_xlate(struct irq_domain *d,
 	gc = dgc->gc[idx];
 
 	irq_gc_lock(gc);
-	smr = irq_reg_readl(gc->reg_base + AT91_AIC_SMR(*out_hwirq));
+	smr = irq_reg_readl(gc, AT91_AIC_SMR(*out_hwirq));
 	ret = aic_common_set_priority(intspec[2], &smr);
 	if (!ret)
-		irq_reg_writel(smr, gc->reg_base + AT91_AIC_SMR(*out_hwirq));
+		irq_reg_writel(gc, smr, AT91_AIC_SMR(*out_hwirq));
 	irq_gc_unlock(gc);
 
 	return ret;
diff --git a/drivers/irqchip/irq-atmel-aic5.c b/drivers/irqchip/irq-atmel-aic5.c
index a11aae8fb006..a2e8c3f876cb 100644
--- a/drivers/irqchip/irq-atmel-aic5.c
+++ b/drivers/irqchip/irq-atmel-aic5.c
@@ -75,11 +75,11 @@ aic5_handle(struct pt_regs *regs)
 	u32 irqnr;
 	u32 irqstat;
 
-	irqnr = irq_reg_readl(gc->reg_base + AT91_AIC5_IVR);
-	irqstat = irq_reg_readl(gc->reg_base + AT91_AIC5_ISR);
+	irqnr = irq_reg_readl(gc, AT91_AIC5_IVR);
+	irqstat = irq_reg_readl(gc, AT91_AIC5_ISR);
 
 	if (!irqstat)
-		irq_reg_writel(0, gc->reg_base + AT91_AIC5_EOICR);
+		irq_reg_writel(gc, 0, AT91_AIC5_EOICR);
 	else
 		handle_domain_irq(aic5_domain, irqnr, regs);
 }
@@ -92,8 +92,8 @@ static void aic5_mask(struct irq_data *d)
 
 	/* Disable interrupt on AIC5 */
 	irq_gc_lock(gc);
-	irq_reg_writel(d->hwirq, gc->reg_base + AT91_AIC5_SSR);
-	irq_reg_writel(1, gc->reg_base + AT91_AIC5_IDCR);
+	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+	irq_reg_writel(gc, 1, AT91_AIC5_IDCR);
 	gc->mask_cache &= ~d->mask;
 	irq_gc_unlock(gc);
 }
@@ -106,8 +106,8 @@ static void aic5_unmask(struct irq_data *d)
 
 	/* Enable interrupt on AIC5 */
 	irq_gc_lock(gc);
-	irq_reg_writel(d->hwirq, gc->reg_base + AT91_AIC5_SSR);
-	irq_reg_writel(1, gc->reg_base + AT91_AIC5_IECR);
+	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+	irq_reg_writel(gc, 1, AT91_AIC5_IECR);
 	gc->mask_cache |= d->mask;
 	irq_gc_unlock(gc);
 }
@@ -120,8 +120,8 @@ static int aic5_retrigger(struct irq_data *d)
 
 	/* Enable interrupt on AIC5 */
 	irq_gc_lock(gc);
-	irq_reg_writel(d->hwirq, gc->reg_base + AT91_AIC5_SSR);
-	irq_reg_writel(1, gc->reg_base + AT91_AIC5_ISCR);
+	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+	irq_reg_writel(gc, 1, AT91_AIC5_ISCR);
 	irq_gc_unlock(gc);
 
 	return 0;
@@ -136,11 +136,11 @@ static int aic5_set_type(struct irq_data *d, unsigned type)
 	int ret;
 
 	irq_gc_lock(gc);
-	irq_reg_writel(d->hwirq, gc->reg_base + AT91_AIC5_SSR);
-	smr = irq_reg_readl(gc->reg_base + AT91_AIC5_SMR);
+	irq_reg_writel(gc, d->hwirq, AT91_AIC5_SSR);
+	smr = irq_reg_readl(gc, AT91_AIC5_SMR);
 	ret = aic_common_set_type(d, type, &smr);
 	if (!ret)
-		irq_reg_writel(smr, gc->reg_base + AT91_AIC5_SMR);
+		irq_reg_writel(gc, smr, AT91_AIC5_SMR);
 	irq_gc_unlock(gc);
 
 	return ret;
@@ -162,12 +162,11 @@ static void aic5_suspend(struct irq_data *d)
 		if ((mask & gc->mask_cache) == (mask & gc->wake_active))
 			continue;
 
-		irq_reg_writel(i + gc->irq_base,
-			       bgc->reg_base + AT91_AIC5_SSR);
+		irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR);
 		if (mask & gc->wake_active)
-			irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IECR);
+			irq_reg_writel(bgc, 1, AT91_AIC5_IECR);
 		else
-			irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IDCR);
+			irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
 	}
 	irq_gc_unlock(bgc);
 }
@@ -187,12 +186,11 @@ static void aic5_resume(struct irq_data *d)
 		if ((mask & gc->mask_cache) == (mask & gc->wake_active))
 			continue;
 
-		irq_reg_writel(i + gc->irq_base,
-			       bgc->reg_base + AT91_AIC5_SSR);
+		irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR);
 		if (mask & gc->mask_cache)
-			irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IECR);
+			irq_reg_writel(bgc, 1, AT91_AIC5_IECR);
 		else
-			irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IDCR);
+			irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
 	}
 	irq_gc_unlock(bgc);
 }
@@ -207,10 +205,9 @@ static void aic5_pm_shutdown(struct irq_data *d)
 
 	irq_gc_lock(bgc);
 	for (i = 0; i < dgc->irqs_per_chip; i++) {
-		irq_reg_writel(i + gc->irq_base,
-			       bgc->reg_base + AT91_AIC5_SSR);
-		irq_reg_writel(1, bgc->reg_base + AT91_AIC5_IDCR);
-		irq_reg_writel(1, bgc->reg_base + AT91_AIC5_ICCR);
+		irq_reg_writel(bgc, i + gc->irq_base, AT91_AIC5_SSR);
+		irq_reg_writel(bgc, 1, AT91_AIC5_IDCR);
+		irq_reg_writel(bgc, 1, AT91_AIC5_ICCR);
 	}
 	irq_gc_unlock(bgc);
 }
@@ -230,24 +227,24 @@ static void __init aic5_hw_init(struct irq_domain *domain)
 	 * will not Lock out nIRQ
 	 */
 	for (i = 0; i < 8; i++)
-		irq_reg_writel(0, gc->reg_base + AT91_AIC5_EOICR);
+		irq_reg_writel(gc, 0, AT91_AIC5_EOICR);
 
 	/*
 	 * Spurious Interrupt ID in Spurious Vector Register.
 	 * When there is no current interrupt, the IRQ Vector Register
 	 * reads the value stored in AIC_SPU
 	 */
-	irq_reg_writel(0xffffffff, gc->reg_base + AT91_AIC5_SPU);
+	irq_reg_writel(gc, 0xffffffff, AT91_AIC5_SPU);
 
 	/* No debugging in AIC: Debug (Protect) Control Register */
-	irq_reg_writel(0, gc->reg_base + AT91_AIC5_DCR);
+	irq_reg_writel(gc, 0, AT91_AIC5_DCR);
 
 	/* Disable and clear all interrupts initially */
 	for (i = 0; i < domain->revmap_size; i++) {
-		irq_reg_writel(i, gc->reg_base + AT91_AIC5_SSR);
-		irq_reg_writel(i, gc->reg_base + AT91_AIC5_SVR);
-		irq_reg_writel(1, gc->reg_base + AT91_AIC5_IDCR);
-		irq_reg_writel(1, gc->reg_base + AT91_AIC5_ICCR);
+		irq_reg_writel(gc, i, AT91_AIC5_SSR);
+		irq_reg_writel(gc, i, AT91_AIC5_SVR);
+		irq_reg_writel(gc, 1, AT91_AIC5_IDCR);
+		irq_reg_writel(gc, 1, AT91_AIC5_ICCR);
 	}
 }
 
@@ -273,11 +270,11 @@ static int aic5_irq_domain_xlate(struct irq_domain *d,
 	gc = dgc->gc[0];
 
 	irq_gc_lock(gc);
-	irq_reg_writel(*out_hwirq, gc->reg_base + AT91_AIC5_SSR);
-	smr = irq_reg_readl(gc->reg_base + AT91_AIC5_SMR);
+	irq_reg_writel(gc, *out_hwirq, AT91_AIC5_SSR);
+	smr = irq_reg_readl(gc, AT91_AIC5_SMR);
 	ret = aic_common_set_priority(intspec[2], &smr);
 	if (!ret)
-		irq_reg_writel(intspec[2] | smr, gc->reg_base + AT91_AIC5_SMR);
+		irq_reg_writel(gc, intspec[2] | smr, AT91_AIC5_SMR);
 	irq_gc_unlock(gc);
 
 	return ret;
diff --git a/drivers/irqchip/irq-sunxi-nmi.c b/drivers/irqchip/irq-sunxi-nmi.c
index 12f547a44ae4..4a9ce5b50c5b 100644
--- a/drivers/irqchip/irq-sunxi-nmi.c
+++ b/drivers/irqchip/irq-sunxi-nmi.c
@@ -50,12 +50,12 @@ static struct sunxi_sc_nmi_reg_offs sun6i_reg_offs = {
 static inline void sunxi_sc_nmi_write(struct irq_chip_generic *gc, u32 off,
 				      u32 val)
 {
-	irq_reg_writel(val, gc->reg_base + off);
+	irq_reg_writel(gc, val, off);
 }
 
 static inline u32 sunxi_sc_nmi_read(struct irq_chip_generic *gc, u32 off)
 {
-	return irq_reg_readl(gc->reg_base + off);
+	return irq_reg_readl(gc, off);
 }
 
 static void sunxi_sc_nmi_handle_irq(unsigned int irq, struct irq_desc *desc)
diff --git a/drivers/irqchip/irq-tb10x.c b/drivers/irqchip/irq-tb10x.c
index 7c44c99bf1f2..accc20036a3c 100644
--- a/drivers/irqchip/irq-tb10x.c
+++ b/drivers/irqchip/irq-tb10x.c
@@ -43,12 +43,12 @@
 static inline void ab_irqctl_writereg(struct irq_chip_generic *gc, u32 reg,
 	u32 val)
 {
-	irq_reg_writel(val, gc->reg_base + reg);
+	irq_reg_writel(gc, val, reg);
 }
 
 static inline u32 ab_irqctl_readreg(struct irq_chip_generic *gc, u32 reg)
 {
-	return irq_reg_readl(gc->reg_base + reg);
+	return irq_reg_readl(gc, reg);
 }
 
 static int tb10x_irq_set_type(struct irq_data *data, unsigned int flow_type)
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 03f48d936f66..ed1135d32d80 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -20,6 +20,7 @@
 #include <linux/errno.h>
 #include <linux/topology.h>
 #include <linux/wait.h>
+#include <linux/io.h>
 
 #include <asm/irq.h>
 #include <asm/ptrace.h>
@@ -639,13 +640,6 @@ void arch_teardown_hwirq(unsigned int irq);
 void irq_init_desc(unsigned int irq);
 #endif
 
-#ifndef irq_reg_writel
-# define irq_reg_writel(val, addr)	writel(val, addr)
-#endif
-#ifndef irq_reg_readl
-# define irq_reg_readl(addr)		readl(addr)
-#endif
-
 /**
  * struct irq_chip_regs - register offsets for struct irq_gci
  * @enable:	Enable register offset to reg_base
@@ -821,4 +815,16 @@ static inline void irq_gc_lock(struct irq_chip_generic *gc) { }
 static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
 #endif
 
+static inline void irq_reg_writel(struct irq_chip_generic *gc,
+				  u32 val, int reg_offset)
+{
+	writel(val, gc->reg_base + reg_offset);
+}
+
+static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
+				int reg_offset)
+{
+	return readl(gc->reg_base + reg_offset);
+}
+
 #endif /* _LINUX_IRQ_H */
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index cf80e7b0ddab..db458c68e392 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -39,7 +39,7 @@ void irq_gc_mask_disable_reg(struct irq_data *d)
 	u32 mask = d->mask;
 
 	irq_gc_lock(gc);
-	irq_reg_writel(mask, gc->reg_base + ct->regs.disable);
+	irq_reg_writel(gc, mask, ct->regs.disable);
 	*ct->mask_cache &= ~mask;
 	irq_gc_unlock(gc);
 }
@@ -59,7 +59,7 @@ void irq_gc_mask_set_bit(struct irq_data *d)
 
 	irq_gc_lock(gc);
 	*ct->mask_cache |= mask;
-	irq_reg_writel(*ct->mask_cache, gc->reg_base + ct->regs.mask);
+	irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
 	irq_gc_unlock(gc);
 }
 EXPORT_SYMBOL_GPL(irq_gc_mask_set_bit);
@@ -79,7 +79,7 @@ void irq_gc_mask_clr_bit(struct irq_data *d)
 
 	irq_gc_lock(gc);
 	*ct->mask_cache &= ~mask;
-	irq_reg_writel(*ct->mask_cache, gc->reg_base + ct->regs.mask);
+	irq_reg_writel(gc, *ct->mask_cache, ct->regs.mask);
 	irq_gc_unlock(gc);
 }
 EXPORT_SYMBOL_GPL(irq_gc_mask_clr_bit);
@@ -98,7 +98,7 @@ void irq_gc_unmask_enable_reg(struct irq_data *d)
 	u32 mask = d->mask;
 
 	irq_gc_lock(gc);
-	irq_reg_writel(mask, gc->reg_base + ct->regs.enable);
+	irq_reg_writel(gc, mask, ct->regs.enable);
 	*ct->mask_cache |= mask;
 	irq_gc_unlock(gc);
 }
@@ -114,7 +114,7 @@ void irq_gc_ack_set_bit(struct irq_data *d)
 	u32 mask = d->mask;
 
 	irq_gc_lock(gc);
-	irq_reg_writel(mask, gc->reg_base + ct->regs.ack);
+	irq_reg_writel(gc, mask, ct->regs.ack);
 	irq_gc_unlock(gc);
 }
 EXPORT_SYMBOL_GPL(irq_gc_ack_set_bit);
@@ -130,7 +130,7 @@ void irq_gc_ack_clr_bit(struct irq_data *d)
 	u32 mask = ~d->mask;
 
 	irq_gc_lock(gc);
-	irq_reg_writel(mask, gc->reg_base + ct->regs.ack);
+	irq_reg_writel(gc, mask, ct->regs.ack);
 	irq_gc_unlock(gc);
 }
 
@@ -145,8 +145,8 @@ void irq_gc_mask_disable_reg_and_ack(struct irq_data *d)
 	u32 mask = d->mask;
 
 	irq_gc_lock(gc);
-	irq_reg_writel(mask, gc->reg_base + ct->regs.mask);
-	irq_reg_writel(mask, gc->reg_base + ct->regs.ack);
+	irq_reg_writel(gc, mask, ct->regs.mask);
+	irq_reg_writel(gc, mask, ct->regs.ack);
 	irq_gc_unlock(gc);
 }
 
@@ -161,7 +161,7 @@ void irq_gc_eoi(struct irq_data *d)
 	u32 mask = d->mask;
 
 	irq_gc_lock(gc);
-	irq_reg_writel(mask, gc->reg_base + ct->regs.eoi);
+	irq_reg_writel(gc, mask, ct->regs.eoi);
 	irq_gc_unlock(gc);
 }
 
@@ -245,7 +245,7 @@ irq_gc_init_mask_cache(struct irq_chip_generic *gc, enum irq_gc_flags flags)
 		}
 		ct[i].mask_cache = mskptr;
 		if (flags & IRQ_GC_INIT_MASK_CACHE)
-			*mskptr = irq_reg_readl(gc->reg_base + mskreg);
+			*mskptr = irq_reg_readl(gc, mskreg);
 	}
 }
 
-- 
cgit v1.2.3


From 2b28037632b1e62b92c0616f08652d806008c80d Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Thu, 6 Nov 2014 22:44:18 -0800
Subject: genirq: Generic chip: Allow irqchip drivers to override
 irq_reg_{readl,writel}

Currently, these I/O accessors always assume little endian 32-bit
registers (readl/writel).  On some systems the IRQ registers need to be
accessed in BE mode or using 16-bit loads/stores, so we will provide a
way to override the default behavior.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lkml.kernel.org/r/1415342669-30640-4-git-send-email-cernekee@gmail.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 include/linux/irq.h | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index ed1135d32d80..0fecd95ba271 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -686,6 +686,8 @@ struct irq_chip_type {
  * struct irq_chip_generic - Generic irq chip data structure
  * @lock:		Lock to protect register and cache data access
  * @reg_base:		Register base address (virtual)
+ * @reg_readl:		Alternate I/O accessor (defaults to readl if NULL)
+ * @reg_writel:		Alternate I/O accessor (defaults to writel if NULL)
  * @irq_base:		Interrupt base nr for this chip
  * @irq_cnt:		Number of interrupts handled by this chip
  * @mask_cache:		Cached mask register shared between all chip types
@@ -710,6 +712,8 @@ struct irq_chip_type {
 struct irq_chip_generic {
 	raw_spinlock_t		lock;
 	void __iomem		*reg_base;
+	u32			(*reg_readl)(void __iomem *addr);
+	void			(*reg_writel)(u32 val, void __iomem *addr);
 	unsigned int		irq_base;
 	unsigned int		irq_cnt;
 	u32			mask_cache;
@@ -818,13 +822,19 @@ static inline void irq_gc_unlock(struct irq_chip_generic *gc) { }
 static inline void irq_reg_writel(struct irq_chip_generic *gc,
 				  u32 val, int reg_offset)
 {
-	writel(val, gc->reg_base + reg_offset);
+	if (gc->reg_writel)
+		gc->reg_writel(val, gc->reg_base + reg_offset);
+	else
+		writel(val, gc->reg_base + reg_offset);
 }
 
 static inline u32 irq_reg_readl(struct irq_chip_generic *gc,
 				int reg_offset)
 {
-	return readl(gc->reg_base + reg_offset);
+	if (gc->reg_readl)
+		return gc->reg_readl(gc->reg_base + reg_offset);
+	else
+		return readl(gc->reg_base + reg_offset);
 }
 
 #endif /* _LINUX_IRQ_H */
-- 
cgit v1.2.3


From b79055952badbd73710685643bab44104f2509ea Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Thu, 6 Nov 2014 22:44:19 -0800
Subject: genirq: Generic chip: Add big endian I/O accessors

Use io{read,write}32be if the caller specified IRQ_GC_BE_IO when creating
the irqchip.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Acked-by: Arnd Bergmann <arnd@arndb.de>
Link: https://lkml.kernel.org/r/1415342669-30640-5-git-send-email-cernekee@gmail.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 include/linux/irq.h       |  2 ++
 kernel/irq/generic-chip.c | 16 ++++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0fecd95ba271..8588e5efe577 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -738,12 +738,14 @@ struct irq_chip_generic {
  *				the parent irq. Usually GPIO implementations
  * @IRQ_GC_MASK_CACHE_PER_TYPE:	Mask cache is chip type private
  * @IRQ_GC_NO_MASK:		Do not calculate irq_data->mask
+ * @IRQ_GC_BE_IO:		Use big-endian register accesses (default: LE)
  */
 enum irq_gc_flags {
 	IRQ_GC_INIT_MASK_CACHE		= 1 << 0,
 	IRQ_GC_INIT_NESTED_LOCK		= 1 << 1,
 	IRQ_GC_MASK_CACHE_PER_TYPE	= 1 << 2,
 	IRQ_GC_NO_MASK			= 1 << 3,
+	IRQ_GC_BE_IO			= 1 << 4,
 };
 
 /*
diff --git a/kernel/irq/generic-chip.c b/kernel/irq/generic-chip.c
index db458c68e392..61024e8abdef 100644
--- a/kernel/irq/generic-chip.c
+++ b/kernel/irq/generic-chip.c
@@ -191,6 +191,16 @@ int irq_gc_set_wake(struct irq_data *d, unsigned int on)
 	return 0;
 }
 
+static u32 irq_readl_be(void __iomem *addr)
+{
+	return ioread32be(addr);
+}
+
+static void irq_writel_be(u32 val, void __iomem *addr)
+{
+	iowrite32be(val, addr);
+}
+
 static void
 irq_init_generic_chip(struct irq_chip_generic *gc, const char *name,
 		      int num_ct, unsigned int irq_base,
@@ -300,7 +310,13 @@ int irq_alloc_domain_generic_chips(struct irq_domain *d, int irqs_per_chip,
 		dgc->gc[i] = gc = tmp;
 		irq_init_generic_chip(gc, name, num_ct, i * irqs_per_chip,
 				      NULL, handler);
+
 		gc->domain = d;
+		if (gcflags & IRQ_GC_BE_IO) {
+			gc->reg_readl = &irq_readl_be;
+			gc->reg_writel = &irq_writel_be;
+		}
+
 		raw_spin_lock_irqsave(&gc_lock, flags);
 		list_add_tail(&gc->list, &gc_list);
 		raw_spin_unlock_irqrestore(&gc_lock, flags);
-- 
cgit v1.2.3


From 6bab3596bbede980c067eaeaf6a470c262888dac Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 20 Oct 2014 16:01:33 +0200
Subject: quota: Remove const from function declarations

We don't use const through VFS too much so just remove it from quota
function declarations.

Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c         | 4 ++--
 include/linux/quotaops.h | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index a180b1d1a6c6..b1910c915c90 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -1643,7 +1643,7 @@ EXPORT_SYMBOL(__dquot_alloc_space);
 /*
  * This operation can block, but only after everything is updated
  */
-int dquot_alloc_inode(const struct inode *inode)
+int dquot_alloc_inode(struct inode *inode)
 {
 	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
@@ -1784,7 +1784,7 @@ EXPORT_SYMBOL(__dquot_free_space);
 /*
  * This operation can block, but only after everything is updated
  */
-void dquot_free_inode(const struct inode *inode)
+void dquot_free_inode(struct inode *inode)
 {
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h
index 1d3eee594cd6..f23538a6e411 100644
--- a/include/linux/quotaops.h
+++ b/include/linux/quotaops.h
@@ -64,10 +64,10 @@ void dquot_destroy(struct dquot *dquot);
 int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags);
 void __dquot_free_space(struct inode *inode, qsize_t number, int flags);
 
-int dquot_alloc_inode(const struct inode *inode);
+int dquot_alloc_inode(struct inode *inode);
 
 int dquot_claim_space_nodirty(struct inode *inode, qsize_t number);
-void dquot_free_inode(const struct inode *inode);
+void dquot_free_inode(struct inode *inode);
 void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number);
 
 int dquot_disable(struct super_block *sb, int type, unsigned int flags);
@@ -213,12 +213,12 @@ static inline void dquot_drop(struct inode *inode)
 {
 }
 
-static inline int dquot_alloc_inode(const struct inode *inode)
+static inline int dquot_alloc_inode(struct inode *inode)
 {
 	return 0;
 }
 
-static inline void dquot_free_inode(const struct inode *inode)
+static inline void dquot_free_inode(struct inode *inode)
 {
 }
 
-- 
cgit v1.2.3


From 2c5f648aa24a7c8f0668d8ce5722d69da5bef739 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Tue, 30 Sep 2014 10:43:09 +0200
Subject: quota: Allow each filesystem to specify which quota types it supports

Currently all filesystems supporting VFS quota support user and group
quotas. With introduction of project quotas this is going to change so
make sure filesystem isn't called for quota type it doesn't support by
introduction of a bitmask determining which quota types each filesystem
supports.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/quota.c      | 13 +++++++++++--
 fs/super.c            |  6 ++++++
 include/linux/fs.h    |  1 +
 include/linux/quota.h |  5 +++++
 4 files changed, 23 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 75621649dbd7..2aa4151f99d2 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -47,8 +47,11 @@ static int check_quotactl_permission(struct super_block *sb, int type, int cmd,
 
 static void quota_sync_one(struct super_block *sb, void *arg)
 {
-	if (sb->s_qcop && sb->s_qcop->quota_sync)
-		sb->s_qcop->quota_sync(sb, *(int *)arg);
+	int type = *(int *)arg;
+
+	if (sb->s_qcop && sb->s_qcop->quota_sync &&
+	    (sb->s_quota_types & (1 << type)))
+		sb->s_qcop->quota_sync(sb, type);
 }
 
 static int quota_sync_all(int type)
@@ -297,8 +300,14 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
 
 	if (type >= (XQM_COMMAND(cmd) ? XQM_MAXQUOTAS : MAXQUOTAS))
 		return -EINVAL;
+	/*
+	 * Quota not supported on this fs? Check this before s_quota_types
+	 * since they needn't be set if quota is not supported at all.
+	 */
 	if (!sb->s_qcop)
 		return -ENOSYS;
+	if (!(sb->s_quota_types & (1 << type)))
+		return -EINVAL;
 
 	ret = check_quotactl_permission(sb, type, cmd, id);
 	if (ret < 0)
diff --git a/fs/super.c b/fs/super.c
index eae088f6aaae..4512281df8ff 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -218,6 +218,12 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	atomic_set(&s->s_active, 1);
 	mutex_init(&s->s_vfs_rename_mutex);
 	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
+	/*
+	 * For now MAXQUOTAS check in do_quotactl() will limit quota type
+	 * appropriately. When each fs sets allowed_types, we can remove the
+	 * line below
+	 */
+	s->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 	mutex_init(&s->s_dquot.dqio_mutex);
 	mutex_init(&s->s_dquot.dqonoff_mutex);
 	s->s_maxbytes = MAX_NON_LFS;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ab779e8a63c..cf55a5483d40 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1224,6 +1224,7 @@ struct super_block {
 	struct backing_dev_info *s_bdi;
 	struct mtd_info		*s_mtd;
 	struct hlist_node	s_instances;
+	unsigned int		s_quota_types;	/* Bitmask of supported quota types */
 	struct quota_info	s_dquot;	/* Diskquota specific options */
 
 	struct sb_writers	s_writers;
diff --git a/include/linux/quota.h b/include/linux/quota.h
index 80d345a3524c..50978b781a19 100644
--- a/include/linux/quota.h
+++ b/include/linux/quota.h
@@ -56,6 +56,11 @@ enum quota_type {
 	PRJQUOTA = 2,		/* element used for project quotas */
 };
 
+/* Masks for quota types when used as a bitmask */
+#define QTYPE_MASK_USR (1 << USRQUOTA)
+#define QTYPE_MASK_GRP (1 << GRPQUOTA)
+#define QTYPE_MASK_PRJ (1 << PRJQUOTA)
+
 typedef __kernel_uid32_t qid_t; /* Type in which we store ids in memory */
 typedef long long qsize_t;	/* Type in which we store sizes */
 
-- 
cgit v1.2.3


From 2d0fa467915ed0c5957c992011b7f142a7dedf8e Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Thu, 25 Sep 2014 16:36:14 +0200
Subject: quota: Use function to provide i_dquot pointers

i_dquot array is used by relatively few filesystems (ext?, ocfs2, jfs,
reiserfs) so it is beneficial to move this array to fs-private part of
the inode. We cannot just pass quota pointers from filesystems to quota
functions because during quotaon and quotaoff we have to traverse list
of all inodes and manipulate i_dquot pointers for each inode. So we
provide a function which generic quota code can use to get pointer to
the i_dquot array from the filesystem.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/quota/dquot.c   | 54 +++++++++++++++++++++++++++++++-----------------------
 include/linux/fs.h |  1 +
 2 files changed, 32 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b1910c915c90..b80d1fe56f83 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -893,6 +893,14 @@ out:
 }
 EXPORT_SYMBOL(dqget);
 
+static inline struct dquot **i_dquot(struct inode *inode)
+{
+	/* Temporary workaround until all filesystems are converted. */
+	if (!inode->i_sb->s_op->get_dquots)
+		return inode->i_dquot;
+	return inode->i_sb->s_op->get_dquots(inode);
+}
+
 static int dqinit_needed(struct inode *inode, int type)
 {
 	int cnt;
@@ -900,9 +908,9 @@ static int dqinit_needed(struct inode *inode, int type)
 	if (IS_NOQUOTA(inode))
 		return 0;
 	if (type != -1)
-		return !inode->i_dquot[type];
+		return !i_dquot(inode)[type];
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++)
-		if (!inode->i_dquot[cnt])
+		if (!i_dquot(inode)[cnt])
 			return 1;
 	return 0;
 }
@@ -965,9 +973,9 @@ static void add_dquot_ref(struct super_block *sb, int type)
 static void remove_inode_dquot_ref(struct inode *inode, int type,
 				   struct list_head *tofree_head)
 {
-	struct dquot *dquot = inode->i_dquot[type];
+	struct dquot *dquot = i_dquot(inode)[type];
 
-	inode->i_dquot[type] = NULL;
+	i_dquot(inode)[type] = NULL;
 	if (!dquot)
 		return;
 
@@ -1402,7 +1410,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 		 * we check it without locking here to avoid unnecessary
 		 * dqget()/dqput() calls.
 		 */
-		if (inode->i_dquot[cnt])
+		if (i_dquot(inode)[cnt])
 			continue;
 		init_needed = 1;
 
@@ -1433,8 +1441,8 @@ static void __dquot_initialize(struct inode *inode, int type)
 		/* We could race with quotaon or dqget() could have failed */
 		if (!got[cnt])
 			continue;
-		if (!inode->i_dquot[cnt]) {
-			inode->i_dquot[cnt] = got[cnt];
+		if (!i_dquot(inode)[cnt]) {
+			i_dquot(inode)[cnt] = got[cnt];
 			got[cnt] = NULL;
 			/*
 			 * Make quota reservation system happy if someone
@@ -1442,7 +1450,7 @@ static void __dquot_initialize(struct inode *inode, int type)
 			 */
 			rsv = inode_get_rsv_space(inode);
 			if (unlikely(rsv))
-				dquot_resv_space(inode->i_dquot[cnt], rsv);
+				dquot_resv_space(i_dquot(inode)[cnt], rsv);
 		}
 	}
 out_err:
@@ -1472,8 +1480,8 @@ static void __dquot_drop(struct inode *inode)
 
 	spin_lock(&dq_data_lock);
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		put[cnt] = inode->i_dquot[cnt];
-		inode->i_dquot[cnt] = NULL;
+		put[cnt] = i_dquot(inode)[cnt];
+		i_dquot(inode)[cnt] = NULL;
 	}
 	spin_unlock(&dq_data_lock);
 	dqput_all(put);
@@ -1494,7 +1502,7 @@ void dquot_drop(struct inode *inode)
 	 * add quota pointers back anyway.
 	 */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
+		if (i_dquot(inode)[cnt])
 			break;
 	}
 
@@ -1595,7 +1603,7 @@ int __dquot_alloc_space(struct inode *inode, qsize_t number, int flags)
 {
 	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot **dquots = inode->i_dquot;
+	struct dquot **dquots = i_dquot(inode);
 	int reserve = flags & DQUOT_SPACE_RESERVE;
 
 	if (!dquot_active(inode)) {
@@ -1647,7 +1655,7 @@ int dquot_alloc_inode(struct inode *inode)
 {
 	int cnt, ret = 0, index;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot * const *dquots = inode->i_dquot;
+	struct dquot * const *dquots = i_dquot(inode);
 
 	if (!dquot_active(inode))
 		return 0;
@@ -1696,14 +1704,14 @@ int dquot_claim_space_nodirty(struct inode *inode, qsize_t number)
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_claim_reserved_space(inode->i_dquot[cnt],
+		if (i_dquot(inode)[cnt])
+			dquot_claim_reserved_space(i_dquot(inode)[cnt],
 							number);
 	}
 	/* Update inode bytes */
 	inode_claim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(i_dquot(inode));
 	srcu_read_unlock(&dquot_srcu, index);
 	return 0;
 }
@@ -1725,14 +1733,14 @@ void dquot_reclaim_space_nodirty(struct inode *inode, qsize_t number)
 	spin_lock(&dq_data_lock);
 	/* Claim reserved quotas to allocated quotas */
 	for (cnt = 0; cnt < MAXQUOTAS; cnt++) {
-		if (inode->i_dquot[cnt])
-			dquot_reclaim_reserved_space(inode->i_dquot[cnt],
+		if (i_dquot(inode)[cnt])
+			dquot_reclaim_reserved_space(i_dquot(inode)[cnt],
 						     number);
 	}
 	/* Update inode bytes */
 	inode_reclaim_rsv_space(inode, number);
 	spin_unlock(&dq_data_lock);
-	mark_all_dquot_dirty(inode->i_dquot);
+	mark_all_dquot_dirty(i_dquot(inode));
 	srcu_read_unlock(&dquot_srcu, index);
 	return;
 }
@@ -1745,7 +1753,7 @@ void __dquot_free_space(struct inode *inode, qsize_t number, int flags)
 {
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot **dquots = inode->i_dquot;
+	struct dquot **dquots = i_dquot(inode);
 	int reserve = flags & DQUOT_SPACE_RESERVE, index;
 
 	if (!dquot_active(inode)) {
@@ -1788,7 +1796,7 @@ void dquot_free_inode(struct inode *inode)
 {
 	unsigned int cnt;
 	struct dquot_warn warn[MAXQUOTAS];
-	struct dquot * const *dquots = inode->i_dquot;
+	struct dquot * const *dquots = i_dquot(inode);
 	int index;
 
 	if (!dquot_active(inode))
@@ -1865,7 +1873,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		if (!sb_has_quota_active(inode->i_sb, cnt))
 			continue;
 		is_valid[cnt] = 1;
-		transfer_from[cnt] = inode->i_dquot[cnt];
+		transfer_from[cnt] = i_dquot(inode)[cnt];
 		ret = check_idq(transfer_to[cnt], 1, &warn_to[cnt]);
 		if (ret)
 			goto over_quota;
@@ -1901,7 +1909,7 @@ int __dquot_transfer(struct inode *inode, struct dquot **transfer_to)
 		dquot_incr_space(transfer_to[cnt], cur_space);
 		dquot_resv_space(transfer_to[cnt], rsv_space);
 
-		inode->i_dquot[cnt] = transfer_to[cnt];
+		i_dquot(inode)[cnt] = transfer_to[cnt];
 	}
 	spin_unlock(&dq_data_lock);
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index cf55a5483d40..8c093ad49522 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1591,6 +1591,7 @@ struct super_operations {
 #ifdef CONFIG_QUOTA
 	ssize_t (*quota_read)(struct super_block *, int, char *, size_t, loff_t);
 	ssize_t (*quota_write)(struct super_block *, int, const char *, size_t, loff_t);
+	struct dquot **(*get_dquots)(struct inode *);
 #endif
 	int (*bdev_try_to_free_page)(struct super_block*, struct page*, gfp_t);
 	long (*nr_cached_objects)(struct super_block *, int);
-- 
cgit v1.2.3


From 75cbe701a4251fcd8b846d52ae42f88c9a8e5e93 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Mon, 29 Sep 2014 15:10:26 +0200
Subject: vfs: Remove i_dquot field from inode

All filesystems using VFS quotas are now converted to use their private
i_dquot fields. Remove the i_dquot field from generic inode structure.

Acked-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jan Kara <jack@suse.cz>
---
 fs/inode.c         | 3 ---
 fs/quota/dquot.c   | 3 ---
 fs/super.c         | 6 ------
 include/linux/fs.h | 3 ---
 4 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/inode.c b/fs/inode.c
index 26753ba7b6d6..2ed95f7caa4f 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -143,9 +143,6 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_blocks = 0;
 	inode->i_bytes = 0;
 	inode->i_generation = 0;
-#ifdef CONFIG_QUOTA
-	memset(&inode->i_dquot, 0, sizeof(inode->i_dquot));
-#endif
 	inode->i_pipe = NULL;
 	inode->i_bdev = NULL;
 	inode->i_cdev = NULL;
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b80d1fe56f83..8f0acef3d184 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -895,9 +895,6 @@ EXPORT_SYMBOL(dqget);
 
 static inline struct dquot **i_dquot(struct inode *inode)
 {
-	/* Temporary workaround until all filesystems are converted. */
-	if (!inode->i_sb->s_op->get_dquots)
-		return inode->i_dquot;
 	return inode->i_sb->s_op->get_dquots(inode);
 }
 
diff --git a/fs/super.c b/fs/super.c
index 4512281df8ff..eae088f6aaae 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -218,12 +218,6 @@ static struct super_block *alloc_super(struct file_system_type *type, int flags)
 	atomic_set(&s->s_active, 1);
 	mutex_init(&s->s_vfs_rename_mutex);
 	lockdep_set_class(&s->s_vfs_rename_mutex, &type->s_vfs_rename_key);
-	/*
-	 * For now MAXQUOTAS check in do_quotactl() will limit quota type
-	 * appropriately. When each fs sets allowed_types, we can remove the
-	 * line below
-	 */
-	s->s_quota_types = QTYPE_MASK_USR | QTYPE_MASK_GRP | QTYPE_MASK_PRJ;
 	mutex_init(&s->s_dquot.dqio_mutex);
 	mutex_init(&s->s_dquot.dqonoff_mutex);
 	s->s_maxbytes = MAX_NON_LFS;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8c093ad49522..6eb5337688bd 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -606,9 +606,6 @@ struct inode {
 	const struct file_operations	*i_fop;	/* former ->i_op->default_file_ops */
 	struct file_lock	*i_flock;
 	struct address_space	i_data;
-#ifdef CONFIG_QUOTA
-	struct dquot		*i_dquot[MAXQUOTAS];
-#endif
 	struct list_head	i_devices;
 	union {
 		struct pipe_inode_info	*i_pipe;
-- 
cgit v1.2.3


From cc9571e85808dfc121e4fda5c75c9a3c3c75e514 Mon Sep 17 00:00:00 2001
From: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Date: Tue, 21 Oct 2014 11:22:35 +0200
Subject: mmc: sdhci-pxav3: Move private driver data to driver source

struct sdhci_pxa is only used in sdhci_pxa driver itself, so move it
there.

Signed-off-by: Sebastian Hesselbarth <sebastian.hesselbarth@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci-pxav3.c          | 5 +++++
 include/linux/platform_data/pxa_sdhci.h | 5 -----
 2 files changed, 5 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci-pxav3.c b/drivers/mmc/host/sdhci-pxav3.c
index b55c807982fe..48bc8179c8fc 100644
--- a/drivers/mmc/host/sdhci-pxav3.c
+++ b/drivers/mmc/host/sdhci-pxav3.c
@@ -58,6 +58,11 @@
 #define SDCE_MISC_INT		(1<<2)
 #define SDCE_MISC_INT_EN	(1<<1)
 
+struct sdhci_pxa {
+	u8	clk_enable;
+	u8	power_mode;
+};
+
 /*
  * These registers are relative to the second register region, for the
  * MBus bridge.
diff --git a/include/linux/platform_data/pxa_sdhci.h b/include/linux/platform_data/pxa_sdhci.h
index 27d3156d093a..9e20c2fb4ffd 100644
--- a/include/linux/platform_data/pxa_sdhci.h
+++ b/include/linux/platform_data/pxa_sdhci.h
@@ -55,9 +55,4 @@ struct sdhci_pxa_platdata {
 	unsigned int	quirks2;
 	unsigned int	pm_caps;
 };
-
-struct sdhci_pxa {
-	u8	clk_enable;
-	u8	power_mode;
-};
 #endif /* _PXA_SDHCI_H_ */
-- 
cgit v1.2.3


From 433b7b1210a4ece4f2b4f1b04f31a2f0928c8aa8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 6 Oct 2014 11:00:15 +0200
Subject: mmc: core: Don't export the to_sdio_driver macro

The macro is only used by the mmc core, so let's move it in there.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/sdio_bus.c   | 2 ++
 include/linux/mmc/sdio_func.h | 2 --
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/sdio_bus.c b/drivers/mmc/core/sdio_bus.c
index f09040bf5484..51e23f502108 100644
--- a/drivers/mmc/core/sdio_bus.c
+++ b/drivers/mmc/core/sdio_bus.c
@@ -26,6 +26,8 @@
 #include "sdio_cis.h"
 #include "sdio_bus.h"
 
+#define to_sdio_driver(d)	container_of(d, struct sdio_driver, drv)
+
 /* show configuration fields */
 #define sdio_config_attr(field, format_string)				\
 static ssize_t								\
diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h
index 50f0bc952328..aab032a6ae61 100644
--- a/include/linux/mmc/sdio_func.h
+++ b/include/linux/mmc/sdio_func.h
@@ -84,8 +84,6 @@ struct sdio_driver {
 	struct device_driver drv;
 };
 
-#define to_sdio_driver(d)	container_of(d, struct sdio_driver, drv)
-
 /**
  * SDIO_DEVICE - macro used to describe a specific SDIO device
  * @vend: the 16 bit manufacturer code
-- 
cgit v1.2.3


From 6685ac62b2f08fcff77dc35c6b8bff1b74aaa408 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 6 Oct 2014 13:51:40 +0200
Subject: mmc: core: Convert mmc_driver to device_driver

The struct mmc_driver adds an extra layer on top of the struct
device_driver. That would be fine, if there were a good reason, but
that's not the case.

Let's simplify code by converting to the common struct device_driver
instead and thus also removing superfluous overhead.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c    | 26 ++++++++++++++------------
 drivers/mmc/card/mmc_test.c | 18 +++++++++++-------
 drivers/mmc/core/bus.c      | 41 ++++++++---------------------------------
 include/linux/mmc/card.h    | 16 ++--------------
 4 files changed, 35 insertions(+), 66 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index dfbdfb995dd3..70569d9b5c74 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -2425,8 +2425,9 @@ static const struct mmc_fixup blk_fixups[] =
 	END_FIXUP
 };
 
-static int mmc_blk_probe(struct mmc_card *card)
+static int mmc_blk_probe(struct device *dev)
 {
+	struct mmc_card *card = mmc_dev_to_card(dev);
 	struct mmc_blk_data *md, *part_md;
 	char cap_str[10];
 
@@ -2481,8 +2482,9 @@ static int mmc_blk_probe(struct mmc_card *card)
 	return 0;
 }
 
-static void mmc_blk_remove(struct mmc_card *card)
+static int mmc_blk_remove(struct device *dev)
 {
+	struct mmc_card *card = mmc_dev_to_card(dev);
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
 	mmc_blk_remove_parts(card, md);
@@ -2495,11 +2497,14 @@ static void mmc_blk_remove(struct mmc_card *card)
 	pm_runtime_put_noidle(&card->dev);
 	mmc_blk_remove_req(md);
 	mmc_set_drvdata(card, NULL);
+
+	return 0;
 }
 
-static int _mmc_blk_suspend(struct mmc_card *card)
+static int _mmc_blk_suspend(struct device *dev)
 {
 	struct mmc_blk_data *part_md;
+	struct mmc_card *card = mmc_dev_to_card(dev);
 	struct mmc_blk_data *md = mmc_get_drvdata(card);
 
 	if (md) {
@@ -2511,16 +2516,15 @@ static int _mmc_blk_suspend(struct mmc_card *card)
 	return 0;
 }
 
-static void mmc_blk_shutdown(struct mmc_card *card)
+static void mmc_blk_shutdown(struct device *dev)
 {
-	_mmc_blk_suspend(card);
+	_mmc_blk_suspend(dev);
 }
 
 #ifdef CONFIG_PM_SLEEP
 static int mmc_blk_suspend(struct device *dev)
 {
-	struct mmc_card *card = mmc_dev_to_card(dev);
-	return _mmc_blk_suspend(card);
+	return _mmc_blk_suspend(dev);
 }
 
 static int mmc_blk_resume(struct device *dev)
@@ -2546,11 +2550,9 @@ static int mmc_blk_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(mmc_blk_pm_ops, mmc_blk_suspend, mmc_blk_resume);
 
-static struct mmc_driver mmc_driver = {
-	.drv		= {
-		.name	= "mmcblk",
-		.pm	= &mmc_blk_pm_ops,
-	},
+static struct device_driver mmc_driver = {
+	.name		= "mmcblk",
+	.pm		= &mmc_blk_pm_ops,
 	.probe		= mmc_blk_probe,
 	.remove		= mmc_blk_remove,
 	.shutdown	= mmc_blk_shutdown,
diff --git a/drivers/mmc/card/mmc_test.c b/drivers/mmc/card/mmc_test.c
index 0c0fc52d42c5..b0643432d6d9 100644
--- a/drivers/mmc/card/mmc_test.c
+++ b/drivers/mmc/card/mmc_test.c
@@ -14,6 +14,7 @@
 #include <linux/mmc/host.h>
 #include <linux/mmc/mmc.h>
 #include <linux/slab.h>
+#include <linux/device.h>
 
 #include <linux/scatterlist.h>
 #include <linux/swap.h>		/* For nr_free_buffer_pages() */
@@ -2997,8 +2998,9 @@ err:
 	return ret;
 }
 
-static int mmc_test_probe(struct mmc_card *card)
+static int mmc_test_probe(struct device *dev)
 {
+	struct mmc_card *card = mmc_dev_to_card(dev);
 	int ret;
 
 	if (!mmc_card_mmc(card) && !mmc_card_sd(card))
@@ -3013,20 +3015,22 @@ static int mmc_test_probe(struct mmc_card *card)
 	return 0;
 }
 
-static void mmc_test_remove(struct mmc_card *card)
+static int mmc_test_remove(struct device *dev)
 {
+	struct mmc_card *card = mmc_dev_to_card(dev);
+
 	mmc_test_free_result(card);
 	mmc_test_free_dbgfs_file(card);
+
+	return 0;
 }
 
-static void mmc_test_shutdown(struct mmc_card *card)
+static void mmc_test_shutdown(struct device *dev)
 {
 }
 
-static struct mmc_driver mmc_driver = {
-	.drv		= {
-		.name	= "mmc_test",
-	},
+static struct device_driver mmc_driver = {
+	.name	= "mmc_test",
 	.probe		= mmc_test_probe,
 	.remove		= mmc_test_remove,
 	.shutdown	= mmc_test_shutdown,
diff --git a/drivers/mmc/core/bus.c b/drivers/mmc/core/bus.c
index 2f375283c423..5ca562ccfcf3 100644
--- a/drivers/mmc/core/bus.c
+++ b/drivers/mmc/core/bus.c
@@ -25,8 +25,6 @@
 #include "sdio_cis.h"
 #include "bus.h"
 
-#define to_mmc_driver(d)	container_of(d, struct mmc_driver, drv)
-
 static ssize_t type_show(struct device *dev,
 	struct device_attribute *attr, char *buf)
 {
@@ -106,33 +104,14 @@ mmc_bus_uevent(struct device *dev, struct kobj_uevent_env *env)
 	return retval;
 }
 
-static int mmc_bus_probe(struct device *dev)
-{
-	struct mmc_driver *drv = to_mmc_driver(dev->driver);
-	struct mmc_card *card = mmc_dev_to_card(dev);
-
-	return drv->probe(card);
-}
-
-static int mmc_bus_remove(struct device *dev)
-{
-	struct mmc_driver *drv = to_mmc_driver(dev->driver);
-	struct mmc_card *card = mmc_dev_to_card(dev);
-
-	drv->remove(card);
-
-	return 0;
-}
-
 static void mmc_bus_shutdown(struct device *dev)
 {
-	struct mmc_driver *drv = to_mmc_driver(dev->driver);
 	struct mmc_card *card = mmc_dev_to_card(dev);
 	struct mmc_host *host = card->host;
 	int ret;
 
-	if (dev->driver && drv->shutdown)
-		drv->shutdown(card);
+	if (dev->driver && dev->driver->shutdown)
+		dev->driver->shutdown(dev);
 
 	if (host->bus_ops->shutdown) {
 		ret = host->bus_ops->shutdown(host);
@@ -201,8 +180,6 @@ static struct bus_type mmc_bus_type = {
 	.dev_groups	= mmc_dev_groups,
 	.match		= mmc_bus_match,
 	.uevent		= mmc_bus_uevent,
-	.probe		= mmc_bus_probe,
-	.remove		= mmc_bus_remove,
 	.shutdown	= mmc_bus_shutdown,
 	.pm		= &mmc_bus_pm_ops,
 };
@@ -221,24 +198,22 @@ void mmc_unregister_bus(void)
  *	mmc_register_driver - register a media driver
  *	@drv: MMC media driver
  */
-int mmc_register_driver(struct mmc_driver *drv)
+int mmc_register_driver(struct device_driver *drv)
 {
-	drv->drv.bus = &mmc_bus_type;
-	return driver_register(&drv->drv);
+	drv->bus = &mmc_bus_type;
+	return driver_register(drv);
 }
-
 EXPORT_SYMBOL(mmc_register_driver);
 
 /**
  *	mmc_unregister_driver - unregister a media driver
  *	@drv: MMC media driver
  */
-void mmc_unregister_driver(struct mmc_driver *drv)
+void mmc_unregister_driver(struct device_driver *drv)
 {
-	drv->drv.bus = &mmc_bus_type;
-	driver_unregister(&drv->drv);
+	drv->bus = &mmc_bus_type;
+	driver_unregister(drv);
 }
-
 EXPORT_SYMBOL(mmc_unregister_driver);
 
 static void mmc_release_card(struct device *dev)
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index b0692d28f8e6..cf54afe5d863 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -513,20 +513,8 @@ static inline int mmc_card_broken_irq_polling(const struct mmc_card *c)
 #define mmc_get_drvdata(c)	dev_get_drvdata(&(c)->dev)
 #define mmc_set_drvdata(c,d)	dev_set_drvdata(&(c)->dev, d)
 
-/*
- * MMC device driver (e.g., Flash card, I/O card...)
- */
-struct mmc_driver {
-	struct device_driver drv;
-	int (*probe)(struct mmc_card *);
-	void (*remove)(struct mmc_card *);
-	int (*suspend)(struct mmc_card *);
-	int (*resume)(struct mmc_card *);
-	void (*shutdown)(struct mmc_card *);
-};
-
-extern int mmc_register_driver(struct mmc_driver *);
-extern void mmc_unregister_driver(struct mmc_driver *);
+extern int mmc_register_driver(struct device_driver *);
+extern void mmc_unregister_driver(struct device_driver *);
 
 extern void mmc_fixup_device(struct mmc_card *card,
 			     const struct mmc_fixup *table);
-- 
cgit v1.2.3


From fc95e30ba33b9f4faa8630d0762af2548031dc00 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 6 Oct 2014 14:34:09 +0200
Subject: mmc: block: Use dev_set|get_drvdata()

In most of the cases mmc_get|set_drvdata() didn't simplify code, which
should be the primary reason for such macros.

Let's remove them and convert to the common device_driver macros,
dev_set|get_drvdata() instead.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/card/block.c | 21 ++++++++++-----------
 include/linux/mmc/card.h |  2 --
 2 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/card/block.c b/drivers/mmc/card/block.c
index 70569d9b5c74..f45f7e3870be 100644
--- a/drivers/mmc/card/block.c
+++ b/drivers/mmc/card/block.c
@@ -112,7 +112,7 @@ struct mmc_blk_data {
 
 	/*
 	 * Only set in main mmc_blk_data associated
-	 * with mmc_card with mmc_set_drvdata, and keeps
+	 * with mmc_card with dev_set_drvdata, and keeps
 	 * track of the current selected device partition.
 	 */
 	unsigned int	part_curr;
@@ -642,7 +642,7 @@ static inline int mmc_blk_part_switch(struct mmc_card *card,
 				      struct mmc_blk_data *md)
 {
 	int ret;
-	struct mmc_blk_data *main_md = mmc_get_drvdata(card);
+	struct mmc_blk_data *main_md = dev_get_drvdata(&card->dev);
 
 	if (main_md->part_curr == md->part_type)
 		return 0;
@@ -1004,7 +1004,8 @@ static int mmc_blk_reset(struct mmc_blk_data *md, struct mmc_host *host,
 	err = mmc_hw_reset(host);
 	/* Ensure we switch back to the correct partition */
 	if (err != -EOPNOTSUPP) {
-		struct mmc_blk_data *main_md = mmc_get_drvdata(host->card);
+		struct mmc_blk_data *main_md =
+			dev_get_drvdata(&host->card->dev);
 		int part_err;
 
 		main_md->part_curr = main_md->part_type;
@@ -2093,7 +2094,7 @@ static struct mmc_blk_data *mmc_blk_alloc_req(struct mmc_card *card,
 
 	/*
 	 * !subname implies we are creating main mmc_blk_data that will be
-	 * associated with mmc_card with mmc_set_drvdata. Due to device
+	 * associated with mmc_card with dev_set_drvdata. Due to device
 	 * partitions, devidx will not coincide with a per-physical card
 	 * index anymore so we keep track of a name index.
 	 */
@@ -2452,7 +2453,7 @@ static int mmc_blk_probe(struct device *dev)
 	if (mmc_blk_alloc_parts(card, md))
 		goto out;
 
-	mmc_set_drvdata(card, md);
+	dev_set_drvdata(dev, md);
 
 	if (mmc_add_disk(md))
 		goto out;
@@ -2485,7 +2486,7 @@ static int mmc_blk_probe(struct device *dev)
 static int mmc_blk_remove(struct device *dev)
 {
 	struct mmc_card *card = mmc_dev_to_card(dev);
-	struct mmc_blk_data *md = mmc_get_drvdata(card);
+	struct mmc_blk_data *md = dev_get_drvdata(dev);
 
 	mmc_blk_remove_parts(card, md);
 	pm_runtime_get_sync(&card->dev);
@@ -2496,7 +2497,7 @@ static int mmc_blk_remove(struct device *dev)
 		pm_runtime_disable(&card->dev);
 	pm_runtime_put_noidle(&card->dev);
 	mmc_blk_remove_req(md);
-	mmc_set_drvdata(card, NULL);
+	dev_set_drvdata(dev, NULL);
 
 	return 0;
 }
@@ -2504,8 +2505,7 @@ static int mmc_blk_remove(struct device *dev)
 static int _mmc_blk_suspend(struct device *dev)
 {
 	struct mmc_blk_data *part_md;
-	struct mmc_card *card = mmc_dev_to_card(dev);
-	struct mmc_blk_data *md = mmc_get_drvdata(card);
+	struct mmc_blk_data *md = dev_get_drvdata(dev);
 
 	if (md) {
 		mmc_queue_suspend(&md->queue);
@@ -2530,8 +2530,7 @@ static int mmc_blk_suspend(struct device *dev)
 static int mmc_blk_resume(struct device *dev)
 {
 	struct mmc_blk_data *part_md;
-	struct mmc_card *card = mmc_dev_to_card(dev);
-	struct mmc_blk_data *md = mmc_get_drvdata(card);
+	struct mmc_blk_data *md = dev_get_drvdata(dev);
 
 	if (md) {
 		/*
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index cf54afe5d863..64f413676410 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -510,8 +510,6 @@ static inline int mmc_card_broken_irq_polling(const struct mmc_card *c)
 #define mmc_dev_to_card(d)	container_of(d, struct mmc_card, dev)
 
 #define mmc_list_to_card(l)	container_of(l, struct mmc_card, node)
-#define mmc_get_drvdata(c)	dev_get_drvdata(&(c)->dev)
-#define mmc_set_drvdata(c,d)	dev_set_drvdata(&(c)->dev, d)
 
 extern int mmc_register_driver(struct device_driver *);
 extern void mmc_unregister_driver(struct device_driver *);
-- 
cgit v1.2.3


From 390e316c606de2f839389698f4531004cfe1bafd Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 6 Oct 2014 14:39:03 +0200
Subject: mmc: core: Remove unused mmc_list_to_card() macro

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 include/linux/mmc/card.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 64f413676410..0ba8f251f8ef 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -509,8 +509,6 @@ static inline int mmc_card_broken_irq_polling(const struct mmc_card *c)
 
 #define mmc_dev_to_card(d)	container_of(d, struct mmc_card, dev)
 
-#define mmc_list_to_card(l)	container_of(l, struct mmc_card, node)
-
 extern int mmc_register_driver(struct device_driver *);
 extern void mmc_unregister_driver(struct device_driver *);
 
-- 
cgit v1.2.3


From 0f762426769a517d5b278e4e5d579fcea6801734 Mon Sep 17 00:00:00 2001
From: Gwendal Grignou <gwendal@chromium.org>
Date: Thu, 16 Oct 2014 11:27:16 -0700
Subject: mmc: core: Report firmware version for eMMC 5.0 devices.

For eMMC 5.0 compliant device, firmware version is stored in ext_csd.
Report firmware as a 64bit hexa decimal. Vendor can use hexa or ascii
string to report firmware version.
Also add FFU related EXT_CSD register and note if the device is FFU capable.

Signed-off-by: Gwendal Grignou <gwendal@chromium.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c   | 27 ++++++++++++++++++++++++++-
 include/linux/mmc/card.h |  3 +++
 include/linux/mmc/mmc.h  |  3 +++
 3 files changed, 32 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index bcde451f6d91..a5e05ceb554c 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -628,6 +628,14 @@ static int mmc_read_ext_csd(struct mmc_card *card, u8 *ext_csd)
 		card->ext_csd.data_sector_size = 512;
 	}
 
+	/* eMMC v5 or later */
+	if (card->ext_csd.rev >= 7) {
+		memcpy(card->ext_csd.fwrev, &ext_csd[EXT_CSD_FIRMWARE_VERSION],
+		       MMC_FIRMWARE_LEN);
+		card->ext_csd.ffu_capable =
+			(ext_csd[EXT_CSD_SUPPORTED_MODE] & 0x1) &&
+			!(ext_csd[EXT_CSD_FW_CONFIG] & 0x1);
+	}
 out:
 	return err;
 }
@@ -722,7 +730,7 @@ MMC_DEV_ATTR(csd, "%08x%08x%08x%08x\n", card->raw_csd[0], card->raw_csd[1],
 MMC_DEV_ATTR(date, "%02d/%04d\n", card->cid.month, card->cid.year);
 MMC_DEV_ATTR(erase_size, "%u\n", card->erase_size << 9);
 MMC_DEV_ATTR(preferred_erase_size, "%u\n", card->pref_erase << 9);
-MMC_DEV_ATTR(fwrev, "0x%x\n", card->cid.fwrev);
+MMC_DEV_ATTR(ffu_capable, "%d\n", card->ext_csd.ffu_capable);
 MMC_DEV_ATTR(hwrev, "0x%x\n", card->cid.hwrev);
 MMC_DEV_ATTR(manfid, "0x%06x\n", card->cid.manfid);
 MMC_DEV_ATTR(name, "%s\n", card->cid.prod_name);
@@ -735,6 +743,22 @@ MMC_DEV_ATTR(enhanced_area_size, "%u\n", card->ext_csd.enhanced_area_size);
 MMC_DEV_ATTR(raw_rpmb_size_mult, "%#x\n", card->ext_csd.raw_rpmb_size_mult);
 MMC_DEV_ATTR(rel_sectors, "%#x\n", card->ext_csd.rel_sectors);
 
+static ssize_t mmc_fwrev_show(struct device *dev,
+			      struct device_attribute *attr,
+			      char *buf)
+{
+	struct mmc_card *card = mmc_dev_to_card(dev);
+
+	if (card->ext_csd.rev < 7) {
+		return sprintf(buf, "0x%x\n", card->cid.fwrev);
+	} else {
+		return sprintf(buf, "0x%*phN\n", MMC_FIRMWARE_LEN,
+			       card->ext_csd.fwrev);
+	}
+}
+
+static DEVICE_ATTR(fwrev, S_IRUGO, mmc_fwrev_show, NULL);
+
 static struct attribute *mmc_std_attrs[] = {
 	&dev_attr_cid.attr,
 	&dev_attr_csd.attr,
@@ -742,6 +766,7 @@ static struct attribute *mmc_std_attrs[] = {
 	&dev_attr_erase_size.attr,
 	&dev_attr_preferred_erase_size.attr,
 	&dev_attr_fwrev.attr,
+	&dev_attr_ffu_capable.attr,
 	&dev_attr_hwrev.attr,
 	&dev_attr_manfid.attr,
 	&dev_attr_name.attr,
diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h
index 0ba8f251f8ef..4d69c00497bd 100644
--- a/include/linux/mmc/card.h
+++ b/include/linux/mmc/card.h
@@ -88,6 +88,9 @@ struct mmc_ext_csd {
 	unsigned int            data_tag_unit_size;     /* DATA TAG UNIT size */
 	unsigned int		boot_ro_lock;		/* ro lock support */
 	bool			boot_ro_lockable;
+	bool			ffu_capable;	/* Firmware upgrade support */
+#define MMC_FIRMWARE_LEN 8
+	u8			fwrev[MMC_FIRMWARE_LEN];  /* FW version */
 	u8			raw_exception_status;	/* 54 */
 	u8			raw_partition_support;	/* 160 */
 	u8			raw_rpmb_size_mult;	/* 168 */
diff --git a/include/linux/mmc/mmc.h b/include/linux/mmc/mmc.h
index 1cd00b3a75b9..49ad7a943638 100644
--- a/include/linux/mmc/mmc.h
+++ b/include/linux/mmc/mmc.h
@@ -296,6 +296,7 @@ struct _mmc_csd {
 #define EXT_CSD_SANITIZE_START		165     /* W */
 #define EXT_CSD_WR_REL_PARAM		166	/* RO */
 #define EXT_CSD_RPMB_MULT		168	/* RO */
+#define EXT_CSD_FW_CONFIG		169	/* R/W */
 #define EXT_CSD_BOOT_WP			173	/* R/W */
 #define EXT_CSD_ERASE_GROUP_DEF		175	/* R/W */
 #define EXT_CSD_PART_CONFIG		179	/* R/W */
@@ -332,6 +333,8 @@ struct _mmc_csd {
 #define EXT_CSD_GENERIC_CMD6_TIME	248	/* RO */
 #define EXT_CSD_CACHE_SIZE		249	/* RO, 4 bytes */
 #define EXT_CSD_PWR_CL_DDR_200_360	253	/* RO */
+#define EXT_CSD_FIRMWARE_VERSION	254	/* RO, 8 bytes */
+#define EXT_CSD_SUPPORTED_MODE		493	/* RO */
 #define EXT_CSD_TAG_UNIT_SIZE		498	/* RO */
 #define EXT_CSD_DATA_TAG_SUPPORT	499	/* RO */
 #define EXT_CSD_MAX_PACKED_WRITES	500	/* RO */
-- 
cgit v1.2.3


From 9cbef73cb657ff795c130cccfed251f0ae923abb Mon Sep 17 00:00:00 2001
From: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Date: Fri, 17 Oct 2014 10:26:36 +0200
Subject: mmc: atmel-mci: move mach header to platform_data

Move the mach header that can come either from arm/mach-at91 or avr32 to
platform_data to be able to switch the AT91 platforms to multiplatform.

Signed-off-by: Alexandre Belloni <alexandre.belloni@free-electrons.com>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
[Ulf: Fixed compile error]
---
 drivers/mmc/host/atmel-mci.c                |  2 +-
 include/linux/platform_data/mmc-atmel-mci.h | 22 ++++++++++++++++++++++
 2 files changed, 23 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/platform_data/mmc-atmel-mci.h

(limited to 'include/linux')

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 77250d4b1979..0b9ddf8aed04 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -30,11 +30,11 @@
 #include <linux/stat.h>
 #include <linux/types.h>
 #include <linux/platform_data/atmel.h>
+#include <linux/platform_data/mmc-atmel-mci.h>
 
 #include <linux/mmc/host.h>
 #include <linux/mmc/sdio.h>
 
-#include <mach/atmel-mci.h>
 #include <linux/atmel-mci.h>
 #include <linux/atmel_pdc.h>
 
diff --git a/include/linux/platform_data/mmc-atmel-mci.h b/include/linux/platform_data/mmc-atmel-mci.h
new file mode 100644
index 000000000000..399a2d5a14bd
--- /dev/null
+++ b/include/linux/platform_data/mmc-atmel-mci.h
@@ -0,0 +1,22 @@
+#ifndef __MMC_ATMEL_MCI_H
+#define __MMC_ATMEL_MCI_H
+
+#include <linux/platform_data/dma-atmel.h>
+#include <linux/platform_data/dma-dw.h>
+
+/**
+ * struct mci_dma_data - DMA data for MCI interface
+ */
+struct mci_dma_data {
+#ifdef CONFIG_ARM
+	struct at_dma_slave     sdata;
+#else
+	struct dw_dma_slave     sdata;
+#endif
+};
+
+/* accessor macros */
+#define	slave_data_ptr(s)	(&(s)->sdata)
+#define find_slave_dev(s)	((s)->sdata.dma_dev)
+
+#endif /* __MMC_ATMEL_MCI_H */
-- 
cgit v1.2.3


From 6130e7a9c34d01afbd4e7e215846d1f2d70333bb Mon Sep 17 00:00:00 2001
From: Doug Anderson <dianders@chromium.org>
Date: Tue, 14 Oct 2014 09:33:09 -0700
Subject: mmc: dw_mmc: Remove old card detect infrastructure

The dw_mmc driver had a bunch of code that ran whenever a card was
ejected and inserted.  However, this code was old and crufty and
should be removed.  Some evidence that it's really not needed:

1. Is is supposed to be legal to use 'cd-gpio' on dw_mmc instead of
   using the built-in card detect mechanism.  The 'cd-gpio' code
   doesn't run any of the crufty old code but yet still works.

2. While looking at this, I realized that my old change (369ac86 mmc:
   dw_mmc: don't queue up a card detect at slot startup) actually
   castrated the old code a little bit already and nobody noticed.
   Specifically "last_detect_state" was left as 0 at bootup.  That
   means that on the first card removal none of the crufty code ran.

3. I can run "while true; do dd if=/dev/mmcblk1 of=/dev/null; done"
   while ejecting and inserting an SD Card and the world doesn't
   explode.

If some of the crufty old code is actually needed, we should justify
it and also put it in some place where it will be run even with
"cd-gpio".

Note that in my case I'm using the "cd-gpio" mechanism but for various
reasons the hardware triggers a dw_mmc "card detect" at bootup.  That
was actually causing a real bug.  The card detect workqueue was
running while the system was trying to enumerate the card.  The
"present != slot->last_detect_state" triggered and we were doing all
kinds of crazy stuff and messing up enumeration.  The new mechanism of
just asking the core to check the card is much safer and then the
bogus interrupt doesn't hurt.

Signed-off-by: Doug Anderson <dianders@chromium.org>
Tested-by: Jaehoon Chung <jh80.chung@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Tested-by: alim.akhtar <alim.akhtar@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 121 ++++++++-------------------------------------
 drivers/mmc/host/dw_mmc.h  |   2 -
 include/linux/mmc/dw_mmc.h |   2 -
 3 files changed, 20 insertions(+), 105 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 545f62191afd..bb46b1b8d16b 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -34,7 +34,6 @@
 #include <linux/mmc/dw_mmc.h>
 #include <linux/bitops.h>
 #include <linux/regulator/consumer.h>
-#include <linux/workqueue.h>
 #include <linux/of.h>
 #include <linux/of_gpio.h>
 #include <linux/mmc/slot-gpio.h>
@@ -1959,6 +1958,23 @@ static void dw_mci_cmd_interrupt(struct dw_mci *host, u32 status)
 	tasklet_schedule(&host->tasklet);
 }
 
+static void dw_mci_handle_cd(struct dw_mci *host)
+{
+	int i;
+
+	for (i = 0; i < host->num_slots; i++) {
+		struct dw_mci_slot *slot = host->slot[i];
+
+		if (!slot)
+			continue;
+
+		if (slot->mmc->ops->card_event)
+			slot->mmc->ops->card_event(slot->mmc);
+		mmc_detect_change(slot->mmc,
+			msecs_to_jiffies(host->pdata->detect_delay_ms));
+	}
+}
+
 static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 {
 	struct dw_mci *host = dev_id;
@@ -2034,7 +2050,7 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 
 		if (pending & SDMMC_INT_CD) {
 			mci_writel(host, RINTSTS, SDMMC_INT_CD);
-			queue_work(host->card_workqueue, &host->card_work);
+			dw_mci_handle_cd(host);
 		}
 
 		/* Handle SDIO Interrupts */
@@ -2061,88 +2077,6 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static void dw_mci_work_routine_card(struct work_struct *work)
-{
-	struct dw_mci *host = container_of(work, struct dw_mci, card_work);
-	int i;
-
-	for (i = 0; i < host->num_slots; i++) {
-		struct dw_mci_slot *slot = host->slot[i];
-		struct mmc_host *mmc = slot->mmc;
-		struct mmc_request *mrq;
-		int present;
-
-		present = dw_mci_get_cd(mmc);
-		while (present != slot->last_detect_state) {
-			dev_dbg(&slot->mmc->class_dev, "card %s\n",
-				present ? "inserted" : "removed");
-
-			spin_lock_bh(&host->lock);
-
-			/* Card change detected */
-			slot->last_detect_state = present;
-
-			/* Clean up queue if present */
-			mrq = slot->mrq;
-			if (mrq) {
-				if (mrq == host->mrq) {
-					host->data = NULL;
-					host->cmd = NULL;
-
-					switch (host->state) {
-					case STATE_IDLE:
-					case STATE_WAITING_CMD11_DONE:
-						break;
-					case STATE_SENDING_CMD11:
-					case STATE_SENDING_CMD:
-						mrq->cmd->error = -ENOMEDIUM;
-						if (!mrq->data)
-							break;
-						/* fall through */
-					case STATE_SENDING_DATA:
-						mrq->data->error = -ENOMEDIUM;
-						dw_mci_stop_dma(host);
-						break;
-					case STATE_DATA_BUSY:
-					case STATE_DATA_ERROR:
-						if (mrq->data->error == -EINPROGRESS)
-							mrq->data->error = -ENOMEDIUM;
-						/* fall through */
-					case STATE_SENDING_STOP:
-						if (mrq->stop)
-							mrq->stop->error = -ENOMEDIUM;
-						break;
-					}
-
-					dw_mci_request_end(host, mrq);
-				} else {
-					list_del(&slot->queue_node);
-					mrq->cmd->error = -ENOMEDIUM;
-					if (mrq->data)
-						mrq->data->error = -ENOMEDIUM;
-					if (mrq->stop)
-						mrq->stop->error = -ENOMEDIUM;
-
-					spin_unlock(&host->lock);
-					mmc_request_done(slot->mmc, mrq);
-					spin_lock(&host->lock);
-				}
-			}
-
-			/* Power down slot */
-			if (present == 0)
-				dw_mci_reset(host);
-
-			spin_unlock_bh(&host->lock);
-
-			present = dw_mci_get_cd(mmc);
-		}
-
-		mmc_detect_change(slot->mmc,
-			msecs_to_jiffies(host->pdata->detect_delay_ms));
-	}
-}
-
 #ifdef CONFIG_OF
 /* given a slot id, find out the device node representing that slot */
 static struct device_node *dw_mci_of_find_slot_node(struct device *dev, u8 slot)
@@ -2294,9 +2228,6 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 	dw_mci_init_debugfs(slot);
 #endif
 
-	/* Card initially undetected */
-	slot->last_detect_state = 0;
-
 	return 0;
 
 err_host_allocated:
@@ -2677,17 +2608,10 @@ int dw_mci_probe(struct dw_mci *host)
 		host->data_offset = DATA_240A_OFFSET;
 
 	tasklet_init(&host->tasklet, dw_mci_tasklet_func, (unsigned long)host);
-	host->card_workqueue = alloc_workqueue("dw-mci-card",
-			WQ_MEM_RECLAIM, 1);
-	if (!host->card_workqueue) {
-		ret = -ENOMEM;
-		goto err_dmaunmap;
-	}
-	INIT_WORK(&host->card_work, dw_mci_work_routine_card);
 	ret = devm_request_irq(host->dev, host->irq, dw_mci_interrupt,
 			       host->irq_flags, "dw-mci", host);
 	if (ret)
-		goto err_workqueue;
+		goto err_dmaunmap;
 
 	if (host->pdata->num_slots)
 		host->num_slots = host->pdata->num_slots;
@@ -2723,7 +2647,7 @@ int dw_mci_probe(struct dw_mci *host)
 	} else {
 		dev_dbg(host->dev, "attempted to initialize %d slots, "
 					"but failed on all\n", host->num_slots);
-		goto err_workqueue;
+		goto err_dmaunmap;
 	}
 
 	if (host->quirks & DW_MCI_QUIRK_IDMAC_DTO)
@@ -2731,9 +2655,6 @@ int dw_mci_probe(struct dw_mci *host)
 
 	return 0;
 
-err_workqueue:
-	destroy_workqueue(host->card_workqueue);
-
 err_dmaunmap:
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
@@ -2767,8 +2688,6 @@ void dw_mci_remove(struct dw_mci *host)
 	mci_writel(host, CLKENA, 0);
 	mci_writel(host, CLKSRC, 0);
 
-	destroy_workqueue(host->card_workqueue);
-
 	if (host->use_dma && host->dma_ops->exit)
 		host->dma_ops->exit(host);
 
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 01b99e8a9190..71d499557edc 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -214,7 +214,6 @@ extern int dw_mci_resume(struct dw_mci *host);
  *	with CONFIG_MMC_CLKGATE.
  * @flags: Random state bits associated with the slot.
  * @id: Number of this slot.
- * @last_detect_state: Most recently observed card detect state.
  */
 struct dw_mci_slot {
 	struct mmc_host		*mmc;
@@ -234,7 +233,6 @@ struct dw_mci_slot {
 #define DW_MMC_CARD_PRESENT	0
 #define DW_MMC_CARD_NEED_INIT	1
 	int			id;
-	int			last_detect_state;
 };
 
 struct dw_mci_tuning_data {
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 001366927cf4..69d08144cfad 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -135,7 +135,6 @@ struct dw_mci {
 	struct mmc_command	stop_abort;
 	unsigned int		prev_blksz;
 	unsigned char		timing;
-	struct workqueue_struct	*card_workqueue;
 
 	/* DMA interface members*/
 	int			use_dma;
@@ -154,7 +153,6 @@ struct dw_mci {
 	u32			stop_cmdr;
 	u32			dir_status;
 	struct tasklet_struct	tasklet;
-	struct work_struct	card_work;
 	unsigned long		pending_events;
 	unsigned long		completed_events;
 	enum dw_mci_state	state;
-- 
cgit v1.2.3


From e21aa519ee3667d0fabda5d806cc68826e9899e0 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 17 Oct 2014 11:32:32 +0200
Subject: mmc: core: Export mmc_get_ext_csd()

Callers of mmc_send_ext_csd() will be able to decrease code duplication
by using mmc_get_ext_csd() instead. Let's make it available.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc.c     | 31 -------------------------------
 drivers/mmc/core/mmc_ops.c | 29 +++++++++++++++++++++++++++++
 include/linux/mmc/core.h   |  1 +
 3 files changed, 30 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc.c b/drivers/mmc/core/mmc.c
index efaa9f8f07b0..02ad79229f65 100644
--- a/drivers/mmc/core/mmc.c
+++ b/drivers/mmc/core/mmc.c
@@ -177,37 +177,6 @@ static int mmc_decode_csd(struct mmc_card *card)
 	return 0;
 }
 
-/*
- * Read extended CSD.
- */
-static int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
-{
-	int err;
-	u8 *ext_csd;
-
-	if (!card || !new_ext_csd)
-		return -EINVAL;
-
-	if (!mmc_can_ext_csd(card))
-		return -EOPNOTSUPP;
-
-	/*
-	 * As the ext_csd is so large and mostly unused, we don't store the
-	 * raw block in mmc_card.
-	 */
-	ext_csd = kmalloc(512, GFP_KERNEL);
-	if (!ext_csd)
-		return -ENOMEM;
-
-	err = mmc_send_ext_csd(card, ext_csd);
-	if (err)
-		kfree(ext_csd);
-	else
-		*new_ext_csd = ext_csd;
-
-	return err;
-}
-
 static void mmc_select_card_type(struct mmc_card *card)
 {
 	struct mmc_host *host = card->host;
diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 1db60be43c37..72e1f9b3ed0d 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -385,6 +385,35 @@ int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd)
 }
 EXPORT_SYMBOL_GPL(mmc_send_ext_csd);
 
+int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
+{
+	int err;
+	u8 *ext_csd;
+
+	if (!card || !new_ext_csd)
+		return -EINVAL;
+
+	if (!mmc_can_ext_csd(card))
+		return -EOPNOTSUPP;
+
+	/*
+	 * As the ext_csd is so large and mostly unused, we don't store the
+	 * raw block in mmc_card.
+	 */
+	ext_csd = kmalloc(512, GFP_KERNEL);
+	if (!ext_csd)
+		return -ENOMEM;
+
+	err = mmc_send_ext_csd(card, ext_csd);
+	if (err)
+		kfree(ext_csd);
+	else
+		*new_ext_csd = ext_csd;
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mmc_get_ext_csd);
+
 int mmc_spi_read_ocr(struct mmc_host *host, int highcap, u32 *ocrp)
 {
 	struct mmc_command cmd = {0};
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index f206e29f94d7..0a77d3567c27 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -155,6 +155,7 @@ extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool,
 			bool, bool);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
 extern int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd);
+extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 
 #define MMC_ERASE_ARG		0x00000000
 #define MMC_SECURE_ERASE_ARG	0x80000000
-- 
cgit v1.2.3


From 2fc91e8b0e1cd89094677d1c9dfba1b26979e48b Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 17 Oct 2014 11:54:22 +0200
Subject: mmc: core: Remove the redundant mmc_send_ext_csd() API

Previous patches has replaced the calls to mmc_send_ext_csd() into
mmc_get_ext_csd(), thus mmc_send_ext_csd() has become redundant. Let's
remove it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc_ops.c | 10 ++--------
 include/linux/mmc/core.h   |  1 -
 2 files changed, 2 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 72e1f9b3ed0d..9a6181bf0c06 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -378,13 +378,6 @@ err:
 	return ret;
 }
 
-int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd)
-{
-	return mmc_send_cxd_data(card, card->host, MMC_SEND_EXT_CSD,
-			ext_csd, 512);
-}
-EXPORT_SYMBOL_GPL(mmc_send_ext_csd);
-
 int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
 {
 	int err;
@@ -404,7 +397,8 @@ int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd)
 	if (!ext_csd)
 		return -ENOMEM;
 
-	err = mmc_send_ext_csd(card, ext_csd);
+	err = mmc_send_cxd_data(card, card->host, MMC_SEND_EXT_CSD, ext_csd,
+				512);
 	if (err)
 		kfree(ext_csd);
 	else
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index 0a77d3567c27..b11e43c10631 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -154,7 +154,6 @@ extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
 extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool,
 			bool, bool);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
-extern int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd);
 extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 
 #define MMC_ERASE_ARG		0x00000000
-- 
cgit v1.2.3


From 76d5556428fbbdf411504895b516272cad27127d Mon Sep 17 00:00:00 2001
From: Timo Kokkonen <timo.kokkonen@offcode.fi>
Date: Mon, 3 Nov 2014 13:12:59 +0200
Subject: mmc: host: atmel-mci: Add support for non-removable slots

Add support for non-removable slots which have no card detection GPIO
and which should not be polled for a card change.

Signed-off-by: Timo Kokkonen <timo.kokkonen@offcode.fi>
Acked-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/atmel-mci.c | 11 +++++++++--
 include/linux/atmel-mci.h    |  2 ++
 2 files changed, 11 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/atmel-mci.c b/drivers/mmc/host/atmel-mci.c
index 0b9ddf8aed04..d9646e5ae2c8 100644
--- a/drivers/mmc/host/atmel-mci.c
+++ b/drivers/mmc/host/atmel-mci.c
@@ -560,6 +560,9 @@ atmci_of_init(struct platform_device *pdev)
 		pdata->slot[slot_id].detect_is_active_high =
 			of_property_read_bool(cnp, "cd-inverted");
 
+		pdata->slot[slot_id].non_removable =
+			of_property_read_bool(cnp, "non-removable");
+
 		pdata->slot[slot_id].wp_pin =
 			of_get_named_gpio(cnp, "wp-gpios", 0);
 	}
@@ -2206,8 +2209,12 @@ static int __init atmci_init_slot(struct atmel_mci *host,
 		}
 	}
 
-	if (!gpio_is_valid(slot->detect_pin))
-		mmc->caps |= MMC_CAP_NEEDS_POLL;
+	if (!gpio_is_valid(slot->detect_pin)) {
+		if (slot_data->non_removable)
+			mmc->caps |= MMC_CAP_NONREMOVABLE;
+		else
+			mmc->caps |= MMC_CAP_NEEDS_POLL;
+	}
 
 	if (gpio_is_valid(slot->wp_pin)) {
 		if (devm_gpio_request(&host->pdev->dev, slot->wp_pin,
diff --git a/include/linux/atmel-mci.h b/include/linux/atmel-mci.h
index 91b77f8d495d..9177947bf032 100644
--- a/include/linux/atmel-mci.h
+++ b/include/linux/atmel-mci.h
@@ -11,6 +11,7 @@
  * @detect_pin: GPIO pin wired to the card detect switch
  * @wp_pin: GPIO pin wired to the write protect sensor
  * @detect_is_active_high: The state of the detect pin when it is active
+ * @non_removable: The slot is not removable, only detect once
  *
  * If a given slot is not present on the board, @bus_width should be
  * set to 0. The other fields are ignored in this case.
@@ -26,6 +27,7 @@ struct mci_slot_pdata {
 	int			detect_pin;
 	int			wp_pin;
 	bool			detect_is_active_high;
+	bool			non_removable;
 };
 
 /**
-- 
cgit v1.2.3


From 4efaa6fbe1fd5e86ab2fee4cdcfc6873dab60716 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 4 Nov 2014 12:42:39 +0200
Subject: mmc: sdhci: Rename adma_desc to adma_table

In preparation for 64-bit ADMA, rename adma_desc to
adma_table.  That is because members will be added
for descriptor size and table size, so using adma_desc
(which is the table) is confusing.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 31 ++++++++++++++++---------------
 include/linux/mmc/sdhci.h |  2 +-
 2 files changed, 17 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index e40414ab7ef8..f2062b073e00 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -505,7 +505,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 	if (host->sg_count == 0)
 		goto unmap_align;
 
-	desc = host->adma_desc;
+	desc = host->adma_table;
 	align = host->align_buffer;
 
 	align_addr = host->align_addr;
@@ -555,14 +555,14 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 		 * If this triggers then we have a calculation bug
 		 * somewhere. :/
 		 */
-		WARN_ON((desc - host->adma_desc) >= ADMA_SIZE);
+		WARN_ON((desc - host->adma_table) >= ADMA_SIZE);
 	}
 
 	if (host->quirks & SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC) {
 		/*
 		* Mark the last descriptor as the terminating descriptor
 		*/
-		if (desc != host->adma_desc) {
+		if (desc != host->adma_table) {
 			desc -= 8;
 			desc[0] |= 0x2; /* end */
 		}
@@ -2294,7 +2294,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
 static void sdhci_adma_show_error(struct sdhci_host *host)
 {
 	const char *name = mmc_hostname(host->mmc);
-	u8 *desc = host->adma_desc;
+	u8 *desc = host->adma_table;
 	__le32 *dma;
 	__le16 *len;
 	u8 attr;
@@ -2875,27 +2875,28 @@ int sdhci_add_host(struct sdhci_host *host)
 		 * (128) and potentially one alignment transfer for
 		 * each of those entries.
 		 */
-		host->adma_desc = dma_alloc_coherent(mmc_dev(mmc),
-						     ADMA_SIZE, &host->adma_addr,
-						     GFP_KERNEL);
+		host->adma_table = dma_alloc_coherent(mmc_dev(mmc),
+						      ADMA_SIZE,
+						      &host->adma_addr,
+						      GFP_KERNEL);
 		host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
-		if (!host->adma_desc || !host->align_buffer) {
+		if (!host->adma_table || !host->align_buffer) {
 			dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
-					  host->adma_desc, host->adma_addr);
+					  host->adma_table, host->adma_addr);
 			kfree(host->align_buffer);
 			pr_warn("%s: Unable to allocate ADMA buffers - falling back to standard DMA\n",
 				mmc_hostname(mmc));
 			host->flags &= ~SDHCI_USE_ADMA;
-			host->adma_desc = NULL;
+			host->adma_table = NULL;
 			host->align_buffer = NULL;
 		} else if (host->adma_addr & 3) {
 			pr_warn("%s: unable to allocate aligned ADMA descriptor\n",
 				mmc_hostname(mmc));
 			host->flags &= ~SDHCI_USE_ADMA;
 			dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
-					  host->adma_desc, host->adma_addr);
+					  host->adma_table, host->adma_addr);
 			kfree(host->align_buffer);
-			host->adma_desc = NULL;
+			host->adma_table = NULL;
 			host->align_buffer = NULL;
 		}
 	}
@@ -3351,12 +3352,12 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 	if (!IS_ERR(mmc->supply.vqmmc))
 		regulator_disable(mmc->supply.vqmmc);
 
-	if (host->adma_desc)
+	if (host->adma_table)
 		dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
-				  host->adma_desc, host->adma_addr);
+				  host->adma_table, host->adma_addr);
 	kfree(host->align_buffer);
 
-	host->adma_desc = NULL;
+	host->adma_table = NULL;
 	host->align_buffer = NULL;
 }
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index dba793e3a331..76e04324cc33 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -155,7 +155,7 @@ struct sdhci_host {
 
 	int sg_count;		/* Mapped sg entries */
 
-	u8 *adma_desc;		/* ADMA descriptor table */
+	u8 *adma_table;		/* ADMA descriptor table */
 	u8 *align_buffer;	/* Bounce buffer */
 
 	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
-- 
cgit v1.2.3


From 1c3d5f6ddcb915c0e702cf513bad4a3b1583f48f Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 4 Nov 2014 12:42:41 +0200
Subject: mmc: sdhci: Use 'void *' for not 'u8 *' for ADMA data

It is kernel-style to use 'void *' for anonymous data.
This is being applied to the ADMA bounce buffer which
contains unaligned bytes, and to the ADMA descriptor
table which will contain 32-bit ADMA descriptors
or 64-bit ADMA descriptors when support is added.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 12 ++++++------
 include/linux/mmc/sdhci.h |  4 ++--
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 19f31ea99738..20e8a2d0d51a 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -454,7 +454,7 @@ static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags)
 	local_irq_restore(*flags);
 }
 
-static void sdhci_adma_write_desc(u8 *desc, u32 addr, int len, unsigned cmd)
+static void sdhci_adma_write_desc(void *desc, u32 addr, int len, unsigned cmd)
 {
 	__le32 *dataddr = (__le32 __force *)(desc + 4);
 	__le16 *cmdlen = (__le16 __force *)desc;
@@ -480,8 +480,8 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 {
 	int direction;
 
-	u8 *desc;
-	u8 *align;
+	void *desc;
+	void *align;
 	dma_addr_t addr;
 	dma_addr_t align_addr;
 	int len, offset;
@@ -606,7 +606,7 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
 
 	struct scatterlist *sg;
 	int i, size;
-	u8 *align;
+	void *align;
 	char *buffer;
 	unsigned long flags;
 	bool has_unaligned;
@@ -2301,7 +2301,7 @@ static void sdhci_cmd_irq(struct sdhci_host *host, u32 intmask, u32 *mask)
 static void sdhci_adma_show_error(struct sdhci_host *host)
 {
 	const char *name = mmc_hostname(host->mmc);
-	u8 *desc = host->adma_table;
+	void *desc = host->adma_table;
 	__le32 *dma;
 	__le16 *len;
 	u8 attr;
@@ -2311,7 +2311,7 @@ static void sdhci_adma_show_error(struct sdhci_host *host)
 	while (true) {
 		dma = (__le32 *)(desc + 4);
 		len = (__le16 *)(desc + 2);
-		attr = *desc;
+		attr = *(u8 *)desc;
 
 		DBG("%s: %p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
 		    name, desc, le32_to_cpu(*dma), le16_to_cpu(*len), attr);
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 76e04324cc33..933dbbb50742 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -155,8 +155,8 @@ struct sdhci_host {
 
 	int sg_count;		/* Mapped sg entries */
 
-	u8 *adma_table;		/* ADMA descriptor table */
-	u8 *align_buffer;	/* Bounce buffer */
+	void *adma_table;	/* ADMA descriptor table */
+	void *align_buffer;	/* Bounce buffer */
 
 	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
 	dma_addr_t align_addr;	/* Mapped bounce buffer */
-- 
cgit v1.2.3


From 76fe379acaeb857f91705f3bd5c6f69ec13872a9 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 4 Nov 2014 12:42:42 +0200
Subject: mmc: sdhci: Parameterize ADMA sizes and alignment

In preparation for 64-bit ADMA, parameterize ADMA sizes
and alignment.  64-bit ADMA has a larger descriptor
because it contains a 64-bit address instead of a 32-bit
address.  Also data must be 8-byte aligned instead
of 4-byte aligned.  Consequently, sdhci_host members
are added for descriptor, table, and buffer sizes
and alignment.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 68 +++++++++++++++++++++++------------------------
 include/linux/mmc/sdhci.h |  7 +++++
 2 files changed, 41 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 20e8a2d0d51a..053b55df9df1 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -44,14 +44,6 @@
 
 #define MAX_TUNING_LOOP 40
 
-/*
- * The ADMA2 descriptor table size is calculated as the maximum number of
- * segments (128), times 2 to allow for an alignment descriptor for each
- * segment, plus 1 for a nop end descriptor, all multipled by the 32-bit
- * descriptor size (8).
- */
-#define ADMA_SIZE	((128 * 2 + 1) * 8)
-
 static unsigned int debug_quirks = 0;
 static unsigned int debug_quirks2;
 
@@ -502,10 +494,10 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 		direction = DMA_TO_DEVICE;
 
 	host->align_addr = dma_map_single(mmc_dev(host->mmc),
-		host->align_buffer, 128 * 4, direction);
+		host->align_buffer, host->align_buffer_sz, direction);
 	if (dma_mapping_error(mmc_dev(host->mmc), host->align_addr))
 		goto fail;
-	BUG_ON(host->align_addr & 0x3);
+	BUG_ON(host->align_addr & host->align_mask);
 
 	host->sg_count = dma_map_sg(mmc_dev(host->mmc),
 		data->sg, data->sg_len, direction);
@@ -528,7 +520,8 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 		 * the (up to three) bytes that screw up the
 		 * alignment.
 		 */
-		offset = (4 - (addr & 0x3)) & 0x3;
+		offset = (host->align_sz - (addr & host->align_mask)) &
+			 host->align_mask;
 		if (offset) {
 			if (data->flags & MMC_DATA_WRITE) {
 				buffer = sdhci_kmap_atomic(sg, &flags);
@@ -543,10 +536,10 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 
 			BUG_ON(offset > 65536);
 
-			align += 4;
-			align_addr += 4;
+			align += host->align_sz;
+			align_addr += host->align_sz;
 
-			desc += 8;
+			desc += host->desc_sz;
 
 			addr += offset;
 			len -= offset;
@@ -556,13 +549,13 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 
 		/* tran, valid */
 		sdhci_adma_write_desc(desc, addr, len, 0x21);
-		desc += 8;
+		desc += host->desc_sz;
 
 		/*
 		 * If this triggers then we have a calculation bug
 		 * somewhere. :/
 		 */
-		WARN_ON((desc - host->adma_table) >= ADMA_SIZE);
+		WARN_ON((desc - host->adma_table) >= host->adma_table_sz);
 	}
 
 	if (host->quirks & SDHCI_QUIRK_NO_ENDATTR_IN_NOPDESC) {
@@ -570,7 +563,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 		* Mark the last descriptor as the terminating descriptor
 		*/
 		if (desc != host->adma_table) {
-			desc -= 8;
+			desc -= host->desc_sz;
 			sdhci_adma_mark_end(desc);
 		}
 	} else {
@@ -587,14 +580,14 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 	 */
 	if (data->flags & MMC_DATA_WRITE) {
 		dma_sync_single_for_device(mmc_dev(host->mmc),
-			host->align_addr, 128 * 4, direction);
+			host->align_addr, host->align_buffer_sz, direction);
 	}
 
 	return 0;
 
 unmap_align:
 	dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
-		128 * 4, direction);
+		host->align_buffer_sz, direction);
 fail:
 	return -EINVAL;
 }
@@ -617,12 +610,12 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
 		direction = DMA_TO_DEVICE;
 
 	dma_unmap_single(mmc_dev(host->mmc), host->align_addr,
-		128 * 4, direction);
+		host->align_buffer_sz, direction);
 
 	/* Do a quick scan of the SG list for any unaligned mappings */
 	has_unaligned = false;
 	for_each_sg(data->sg, sg, host->sg_count, i)
-		if (sg_dma_address(sg) & 3) {
+		if (sg_dma_address(sg) & host->align_mask) {
 			has_unaligned = true;
 			break;
 		}
@@ -634,8 +627,9 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
 		align = host->align_buffer;
 
 		for_each_sg(data->sg, sg, host->sg_count, i) {
-			if (sg_dma_address(sg) & 0x3) {
-				size = 4 - (sg_dma_address(sg) & 0x3);
+			if (sg_dma_address(sg) & host->align_mask) {
+				size = host->align_sz -
+				       (sg_dma_address(sg) & host->align_mask);
 
 				buffer = sdhci_kmap_atomic(sg, &flags);
 				WARN_ON(((long)buffer & (PAGE_SIZE - 1)) >
@@ -643,7 +637,7 @@ static void sdhci_adma_table_post(struct sdhci_host *host,
 				memcpy(buffer, align, size);
 				sdhci_kunmap_atomic(buffer, &flags);
 
-				align += 4;
+				align += host->align_sz;
 			}
 		}
 	}
@@ -2316,7 +2310,7 @@ static void sdhci_adma_show_error(struct sdhci_host *host)
 		DBG("%s: %p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
 		    name, desc, le32_to_cpu(*dma), le16_to_cpu(*len), attr);
 
-		desc += 8;
+		desc += host->desc_sz;
 
 		if (attr & 2)
 			break;
@@ -2878,17 +2872,23 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	if (host->flags & SDHCI_USE_ADMA) {
 		/*
-		 * We need to allocate descriptors for all sg entries
-		 * (128) and potentially one alignment transfer for
-		 * each of those entries.
+		 * The DMA descriptor table size is calculated as the maximum
+		 * number of segments times 2, to allow for an alignment
+		 * descriptor for each segment, plus 1 for a nop end descriptor,
+		 * all multipled by the descriptor size.
 		 */
+		host->adma_table_sz = (128 * 2 + 1) * 8;
+		host->align_buffer_sz = 128 * 4;
+		host->desc_sz = 8;
+		host->align_sz = 4;
+		host->align_mask = 3;
 		host->adma_table = dma_alloc_coherent(mmc_dev(mmc),
-						      ADMA_SIZE,
+						      host->adma_table_sz,
 						      &host->adma_addr,
 						      GFP_KERNEL);
-		host->align_buffer = kmalloc(128 * 4, GFP_KERNEL);
+		host->align_buffer = kmalloc(host->align_buffer_sz, GFP_KERNEL);
 		if (!host->adma_table || !host->align_buffer) {
-			dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
+			dma_free_coherent(mmc_dev(mmc), host->adma_table_sz,
 					  host->adma_table, host->adma_addr);
 			kfree(host->align_buffer);
 			pr_warn("%s: Unable to allocate ADMA buffers - falling back to standard DMA\n",
@@ -2896,11 +2896,11 @@ int sdhci_add_host(struct sdhci_host *host)
 			host->flags &= ~SDHCI_USE_ADMA;
 			host->adma_table = NULL;
 			host->align_buffer = NULL;
-		} else if (host->adma_addr & 3) {
+		} else if (host->adma_addr & host->align_mask) {
 			pr_warn("%s: unable to allocate aligned ADMA descriptor\n",
 				mmc_hostname(mmc));
 			host->flags &= ~SDHCI_USE_ADMA;
-			dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
+			dma_free_coherent(mmc_dev(mmc), host->adma_table_sz,
 					  host->adma_table, host->adma_addr);
 			kfree(host->align_buffer);
 			host->adma_table = NULL;
@@ -3360,7 +3360,7 @@ void sdhci_remove_host(struct sdhci_host *host, int dead)
 		regulator_disable(mmc->supply.vqmmc);
 
 	if (host->adma_table)
-		dma_free_coherent(mmc_dev(mmc), ADMA_SIZE,
+		dma_free_coherent(mmc_dev(mmc), host->adma_table_sz,
 				  host->adma_table, host->adma_addr);
 	kfree(host->align_buffer);
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 933dbbb50742..2a72e9510833 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -158,9 +158,16 @@ struct sdhci_host {
 	void *adma_table;	/* ADMA descriptor table */
 	void *align_buffer;	/* Bounce buffer */
 
+	size_t adma_table_sz;	/* ADMA descriptor table size */
+	size_t align_buffer_sz;	/* Bounce buffer size */
+
 	dma_addr_t adma_addr;	/* Mapped ADMA descr. table */
 	dma_addr_t align_addr;	/* Mapped bounce buffer */
 
+	unsigned int desc_sz;	/* ADMA descriptor size */
+	unsigned int align_sz;	/* ADMA alignment */
+	unsigned int align_mask;	/* ADMA alignment mask */
+
 	struct tasklet_struct finish_tasklet;	/* Tasklet structures */
 
 	struct timer_list timer;	/* Timer for timeouts */
-- 
cgit v1.2.3


From e57a5f61eae7e145aeeda18ccb22576822f534bf Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Tue, 4 Nov 2014 12:42:46 +0200
Subject: mmc: sdhci: Add 64-bit ADMA support

Add 64-bit ADMA support including:
	- add 64-bit ADMA descriptor
	- add SDHCI_USE_64_BIT_DMA flag
	- set upper 32-bits of DMA addresses
	- ability to select 64-bit ADMA
	- ability to use 64-bit ADMA sizes and alignment
	- display "ADMA 64-bit" when host is added

It is assumed that a 64-bit capable device has set a 64-bit DMA mask
and *must* do 64-bit DMA.  A driver has the opportunity to change
that during the first call to ->enable_dma().  Similarly
SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
implement.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 109 ++++++++++++++++++++++++++++++++++------------
 drivers/mmc/host/sdhci.h  |  18 ++++++++
 include/linux/mmc/sdhci.h |   3 ++
 3 files changed, 102 insertions(+), 28 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index ec093490a24f..f895ab07fcc2 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -117,10 +117,17 @@ static void sdhci_dumpregs(struct sdhci_host *host)
 	pr_debug(DRIVER_NAME ": Host ctl2: 0x%08x\n",
 		sdhci_readw(host, SDHCI_HOST_CONTROL2));
 
-	if (host->flags & SDHCI_USE_ADMA)
-		pr_debug(DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x\n",
-		       readl(host->ioaddr + SDHCI_ADMA_ERROR),
-		       readl(host->ioaddr + SDHCI_ADMA_ADDRESS));
+	if (host->flags & SDHCI_USE_ADMA) {
+		if (host->flags & SDHCI_USE_64_BIT_DMA)
+			pr_debug(DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x%08x\n",
+				 readl(host->ioaddr + SDHCI_ADMA_ERROR),
+				 readl(host->ioaddr + SDHCI_ADMA_ADDRESS_HI),
+				 readl(host->ioaddr + SDHCI_ADMA_ADDRESS));
+		else
+			pr_debug(DRIVER_NAME ": ADMA Err: 0x%08x | ADMA Ptr: 0x%08x\n",
+				 readl(host->ioaddr + SDHCI_ADMA_ERROR),
+				 readl(host->ioaddr + SDHCI_ADMA_ADDRESS));
+	}
 
 	pr_debug(DRIVER_NAME ": ===========================================\n");
 }
@@ -446,19 +453,25 @@ static void sdhci_kunmap_atomic(void *buffer, unsigned long *flags)
 	local_irq_restore(*flags);
 }
 
-static void sdhci_adma_write_desc(void *desc, u32 addr, int len, unsigned cmd)
+static void sdhci_adma_write_desc(struct sdhci_host *host, void *desc,
+				  dma_addr_t addr, int len, unsigned cmd)
 {
-	struct sdhci_adma2_32_desc *dma_desc = desc;
+	struct sdhci_adma2_64_desc *dma_desc = desc;
 
+	/* 32-bit and 64-bit descriptors have these members in same position */
 	dma_desc->cmd = cpu_to_le16(cmd);
 	dma_desc->len = cpu_to_le16(len);
-	dma_desc->addr = cpu_to_le32(addr);
+	dma_desc->addr_lo = cpu_to_le32((u32)addr);
+
+	if (host->flags & SDHCI_USE_64_BIT_DMA)
+		dma_desc->addr_hi = cpu_to_le32((u64)addr >> 32);
 }
 
 static void sdhci_adma_mark_end(void *desc)
 {
-	struct sdhci_adma2_32_desc *dma_desc = desc;
+	struct sdhci_adma2_64_desc *dma_desc = desc;
 
+	/* 32-bit and 64-bit descriptors have 'cmd' in same position */
 	dma_desc->cmd |= cpu_to_le16(ADMA2_END);
 }
 
@@ -527,7 +540,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 			}
 
 			/* tran, valid */
-			sdhci_adma_write_desc(desc, align_addr, offset,
+			sdhci_adma_write_desc(host, desc, align_addr, offset,
 					      ADMA2_TRAN_VALID);
 
 			BUG_ON(offset > 65536);
@@ -544,7 +557,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 		BUG_ON(len > 65536);
 
 		/* tran, valid */
-		sdhci_adma_write_desc(desc, addr, len, ADMA2_TRAN_VALID);
+		sdhci_adma_write_desc(host, desc, addr, len, ADMA2_TRAN_VALID);
 		desc += host->desc_sz;
 
 		/*
@@ -568,7 +581,7 @@ static int sdhci_adma_table_pre(struct sdhci_host *host,
 		*/
 
 		/* nop, end, valid */
-		sdhci_adma_write_desc(desc, 0, 0, ADMA2_NOP_END_VALID);
+		sdhci_adma_write_desc(host, desc, 0, 0, ADMA2_NOP_END_VALID);
 	}
 
 	/*
@@ -827,6 +840,10 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 			} else {
 				sdhci_writel(host, host->adma_addr,
 					SDHCI_ADMA_ADDRESS);
+				if (host->flags & SDHCI_USE_64_BIT_DMA)
+					sdhci_writel(host,
+						     (u64)host->adma_addr >> 32,
+						     SDHCI_ADMA_ADDRESS_HI);
 			}
 		} else {
 			int sg_cnt;
@@ -860,10 +877,14 @@ static void sdhci_prepare_data(struct sdhci_host *host, struct mmc_command *cmd)
 		ctrl = sdhci_readb(host, SDHCI_HOST_CONTROL);
 		ctrl &= ~SDHCI_CTRL_DMA_MASK;
 		if ((host->flags & SDHCI_REQ_USE_DMA) &&
-			(host->flags & SDHCI_USE_ADMA))
-			ctrl |= SDHCI_CTRL_ADMA32;
-		else
+			(host->flags & SDHCI_USE_ADMA)) {
+			if (host->flags & SDHCI_USE_64_BIT_DMA)
+				ctrl |= SDHCI_CTRL_ADMA64;
+			else
+				ctrl |= SDHCI_CTRL_ADMA32;
+		} else {
 			ctrl |= SDHCI_CTRL_SDMA;
+		}
 		sdhci_writeb(host, ctrl, SDHCI_HOST_CONTROL);
 	}
 
@@ -2296,12 +2317,19 @@ static void sdhci_adma_show_error(struct sdhci_host *host)
 	sdhci_dumpregs(host);
 
 	while (true) {
-		struct sdhci_adma2_32_desc *dma_desc = desc;
-
-		DBG("%s: %p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
-		    name, desc, le32_to_cpu(dma_desc->addr),
-		    le16_to_cpu(dma_desc->len),
-		    le16_to_cpu(dma_desc->cmd));
+		struct sdhci_adma2_64_desc *dma_desc = desc;
+
+		if (host->flags & SDHCI_USE_64_BIT_DMA)
+			DBG("%s: %p: DMA 0x%08x%08x, LEN 0x%04x, Attr=0x%02x\n",
+			    name, desc, le32_to_cpu(dma_desc->addr_hi),
+			    le32_to_cpu(dma_desc->addr_lo),
+			    le16_to_cpu(dma_desc->len),
+			    le16_to_cpu(dma_desc->cmd));
+		else
+			DBG("%s: %p: DMA 0x%08x, LEN 0x%04x, Attr=0x%02x\n",
+			    name, desc, le32_to_cpu(dma_desc->addr_lo),
+			    le16_to_cpu(dma_desc->len),
+			    le16_to_cpu(dma_desc->cmd));
 
 		desc += host->desc_sz;
 
@@ -2852,6 +2880,16 @@ int sdhci_add_host(struct sdhci_host *host)
 		host->flags &= ~SDHCI_USE_ADMA;
 	}
 
+	/*
+	 * It is assumed that a 64-bit capable device has set a 64-bit DMA mask
+	 * and *must* do 64-bit DMA.  A driver has the opportunity to change
+	 * that during the first call to ->enable_dma().  Similarly
+	 * SDHCI_QUIRK2_BROKEN_64_BIT_DMA must be left to the drivers to
+	 * implement.
+	 */
+	if (sdhci_readl(host, SDHCI_CAPABILITIES) & SDHCI_CAN_64BIT)
+		host->flags |= SDHCI_USE_64_BIT_DMA;
+
 	if (host->flags & (SDHCI_USE_SDMA | SDHCI_USE_ADMA)) {
 		if (host->ops->enable_dma) {
 			if (host->ops->enable_dma(host)) {
@@ -2863,6 +2901,10 @@ int sdhci_add_host(struct sdhci_host *host)
 		}
 	}
 
+	/* SDMA does not support 64-bit DMA */
+	if (host->flags & SDHCI_USE_64_BIT_DMA)
+		host->flags &= ~SDHCI_USE_SDMA;
+
 	if (host->flags & SDHCI_USE_ADMA) {
 		/*
 		 * The DMA descriptor table size is calculated as the maximum
@@ -2870,13 +2912,23 @@ int sdhci_add_host(struct sdhci_host *host)
 		 * descriptor for each segment, plus 1 for a nop end descriptor,
 		 * all multipled by the descriptor size.
 		 */
-		host->adma_table_sz = (SDHCI_MAX_SEGS * 2 + 1) *
-				      SDHCI_ADMA2_32_DESC_SZ;
-		host->align_buffer_sz = SDHCI_MAX_SEGS *
-					SDHCI_ADMA2_32_ALIGN;
-		host->desc_sz = SDHCI_ADMA2_32_DESC_SZ;
-		host->align_sz = SDHCI_ADMA2_32_ALIGN;
-		host->align_mask = SDHCI_ADMA2_32_ALIGN - 1;
+		if (host->flags & SDHCI_USE_64_BIT_DMA) {
+			host->adma_table_sz = (SDHCI_MAX_SEGS * 2 + 1) *
+					      SDHCI_ADMA2_64_DESC_SZ;
+			host->align_buffer_sz = SDHCI_MAX_SEGS *
+						SDHCI_ADMA2_64_ALIGN;
+			host->desc_sz = SDHCI_ADMA2_64_DESC_SZ;
+			host->align_sz = SDHCI_ADMA2_64_ALIGN;
+			host->align_mask = SDHCI_ADMA2_64_ALIGN - 1;
+		} else {
+			host->adma_table_sz = (SDHCI_MAX_SEGS * 2 + 1) *
+					      SDHCI_ADMA2_32_DESC_SZ;
+			host->align_buffer_sz = SDHCI_MAX_SEGS *
+						SDHCI_ADMA2_32_ALIGN;
+			host->desc_sz = SDHCI_ADMA2_32_DESC_SZ;
+			host->align_sz = SDHCI_ADMA2_32_ALIGN;
+			host->align_mask = SDHCI_ADMA2_32_ALIGN - 1;
+		}
 		host->adma_table = dma_alloc_coherent(mmc_dev(mmc),
 						      host->adma_table_sz,
 						      &host->adma_addr,
@@ -3289,7 +3341,8 @@ int sdhci_add_host(struct sdhci_host *host)
 
 	pr_info("%s: SDHCI controller on %s [%s] using %s\n",
 		mmc_hostname(mmc), host->hw_name, dev_name(mmc_dev(mmc)),
-		(host->flags & SDHCI_USE_ADMA) ? "ADMA" :
+		(host->flags & SDHCI_USE_ADMA) ?
+		(host->flags & SDHCI_USE_64_BIT_DMA) ? "ADMA 64-bit" : "ADMA" :
 		(host->flags & SDHCI_USE_SDMA) ? "DMA" : "PIO");
 
 	sdhci_enable_card_detection(host);
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 14c8b6773dbb..c2ec7fcd8a1f 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -227,6 +227,7 @@
 /* 55-57 reserved */
 
 #define SDHCI_ADMA_ADDRESS	0x58
+#define SDHCI_ADMA_ADDRESS_HI	0x5C
 
 /* 60-FB reserved */
 
@@ -279,6 +280,23 @@ struct sdhci_adma2_32_desc {
 	__le32	addr;
 }  __packed __aligned(SDHCI_ADMA2_32_ALIGN);
 
+/* ADMA2 64-bit DMA descriptor size */
+#define SDHCI_ADMA2_64_DESC_SZ	12
+
+/* ADMA2 64-bit DMA alignment */
+#define SDHCI_ADMA2_64_ALIGN	8
+
+/*
+ * ADMA2 64-bit descriptor. Note 12-byte descriptor can't always be 8-byte
+ * aligned.
+ */
+struct sdhci_adma2_64_desc {
+	__le16	cmd;
+	__le16	len;
+	__le32	addr_lo;
+	__le32	addr_hi;
+}  __packed __aligned(4);
+
 #define ADMA2_TRAN_VALID	0x21
 #define ADMA2_NOP_END_VALID	0x3
 #define ADMA2_END		0x2
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 2a72e9510833..931ac5e05453 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -100,6 +100,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK2_BROKEN_DDR50			(1<<7)
 /* Stop command (CMD12) can set Transfer Complete when not using MMC_RSP_BUSY */
 #define SDHCI_QUIRK2_STOP_WITH_TC			(1<<8)
+/* Controller does not support 64-bit DMA */
+#define SDHCI_QUIRK2_BROKEN_64_BIT_DMA			(1<<9)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
@@ -130,6 +132,7 @@ struct sdhci_host {
 #define SDHCI_SDIO_IRQ_ENABLED	(1<<9)	/* SDIO irq enabled */
 #define SDHCI_SDR104_NEEDS_TUNING (1<<10)	/* SDR104/HS200 needs tuning */
 #define SDHCI_USING_RETUNING_TIMER (1<<11)	/* Host is using a retuning timer for the card */
+#define SDHCI_USE_64_BIT_DMA	(1<<12)	/* Use 64-bit DMA */
 
 	unsigned int version;	/* SDHCI spec. version */
 
-- 
cgit v1.2.3


From c0acb8144bd6d8d88aee1dab33364b7353e9a903 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Oct 2014 12:48:35 +0200
Subject: mfd: max77693: Fix always masked MUIC interrupts

All interrupts coming from MUIC were ignored because interrupt source
register was masked.

The Maxim 77693 has a "interrupt source" - a separate register and interrupts
which give information about PMIC block triggering the individual
interrupt (charger, topsys, MUIC, flash LED).

By default bootloader could initialize this register to "mask all"
value. In such case (observed on Trats2 board) MUIC interrupts won't be
generated regardless of their mask status. Regmap irq chip was unmasking
individual MUIC interrupts but the source was masked

Before introducing regmap irq chip this interrupt source was unmasked,
read and acked. Reading and acking is not necessary but unmasking is.

Fixes: 342d669c1ee4 ("mfd: max77693: Handle IRQs using regmap")

Cc: <stable@vger.kernel.org>
Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/max77693.c               | 12 ++++++++++++
 include/linux/mfd/max77693-private.h |  7 +++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/max77693.c b/drivers/mfd/max77693.c
index 22d23fcbb65c..711773e8e64b 100644
--- a/drivers/mfd/max77693.c
+++ b/drivers/mfd/max77693.c
@@ -250,6 +250,17 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 		goto err_irq_muic;
 	}
 
+	/* Unmask interrupts from all blocks in interrupt source register */
+	ret = regmap_update_bits(max77693->regmap,
+				MAX77693_PMIC_REG_INTSRC_MASK,
+				SRC_IRQ_ALL, (unsigned int)~SRC_IRQ_ALL);
+	if (ret < 0) {
+		dev_err(max77693->dev,
+			"Could not unmask interrupts in INTSRC: %d\n",
+			ret);
+		goto err_intsrc;
+	}
+
 	pm_runtime_set_active(max77693->dev);
 
 	ret = mfd_add_devices(max77693->dev, -1, max77693_devs,
@@ -261,6 +272,7 @@ static int max77693_i2c_probe(struct i2c_client *i2c,
 
 err_mfd:
 	mfd_remove_devices(max77693->dev);
+err_intsrc:
 	regmap_del_irq_chip(max77693->irq, max77693->irq_data_muic);
 err_irq_muic:
 	regmap_del_irq_chip(max77693->irq, max77693->irq_data_charger);
diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index fc17d56581b2..582e67f34054 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -330,6 +330,13 @@ enum max77693_irq_source {
 	MAX77693_IRQ_GROUP_NR,
 };
 
+#define SRC_IRQ_CHARGER			BIT(0)
+#define SRC_IRQ_TOP			BIT(1)
+#define SRC_IRQ_FLASH			BIT(2)
+#define SRC_IRQ_MUIC			BIT(3)
+#define SRC_IRQ_ALL			(SRC_IRQ_CHARGER | SRC_IRQ_TOP \
+						| SRC_IRQ_FLASH | SRC_IRQ_MUIC)
+
 #define LED_IRQ_FLED2_OPEN		BIT(0)
 #define LED_IRQ_FLED2_SHORT		BIT(1)
 #define LED_IRQ_FLED1_OPEN		BIT(2)
-- 
cgit v1.2.3


From 179e20b8df97e0c7541a1bae30cad53ecc7a5e86 Mon Sep 17 00:00:00 2001
From: Andreas Gruenbacher <agruen@linbit.com>
Date: Mon, 10 Nov 2014 17:21:09 +0100
Subject: drbd: Minor cleanups

 . Update comments
 . drbd_set_{in,out_of}_sync(): Remove unused parameters
 . Move common code into adm_del_resource()
 . Redefine ERR_MINOR_EXISTS -> ERR_MINOR_OR_VOLUME_EXISTS

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 drivers/block/drbd/drbd_actlog.c |  3 +-
 drivers/block/drbd/drbd_int.h    |  9 +++---
 drivers/block/drbd/drbd_main.c   |  6 ++--
 drivers/block/drbd/drbd_nl.c     | 60 +++++++++++++++++-----------------------
 include/linux/drbd.h             |  2 +-
 5 files changed, 35 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_actlog.c b/drivers/block/drbd/drbd_actlog.c
index a2dfa169237d..1318e3217cb0 100644
--- a/drivers/block/drbd/drbd_actlog.c
+++ b/drivers/block/drbd/drbd_actlog.c
@@ -827,8 +827,7 @@ static int update_sync_bits(struct drbd_device *device,
  *
  */
 int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
-		enum update_sync_bits_mode mode,
-		const char *file, const unsigned int line)
+		enum update_sync_bits_mode mode)
 {
 	/* Is called from worker and receiver context _only_ */
 	unsigned long sbnr, ebnr, lbnr;
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 9b22f8f01b57..c14b718c2fab 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1662,14 +1662,13 @@ extern void drbd_advance_rs_marks(struct drbd_device *device, unsigned long stil
 
 enum update_sync_bits_mode { RECORD_RS_FAILED, SET_OUT_OF_SYNC, SET_IN_SYNC };
 extern int __drbd_change_sync(struct drbd_device *device, sector_t sector, int size,
-		enum update_sync_bits_mode mode,
-		const char *file, const unsigned int line);
+		enum update_sync_bits_mode mode);
 #define drbd_set_in_sync(device, sector, size) \
-	__drbd_change_sync(device, sector, size, SET_IN_SYNC, __FILE__, __LINE__)
+	__drbd_change_sync(device, sector, size, SET_IN_SYNC)
 #define drbd_set_out_of_sync(device, sector, size) \
-	__drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC, __FILE__, __LINE__)
+	__drbd_change_sync(device, sector, size, SET_OUT_OF_SYNC)
 #define drbd_rs_failed_io(device, sector, size) \
-	__drbd_change_sync(device, sector, size, RECORD_RS_FAILED, __FILE__, __LINE__)
+	__drbd_change_sync(device, sector, size, RECORD_RS_FAILED)
 extern void drbd_al_shrink(struct drbd_device *device);
 extern int drbd_initialize_al(struct drbd_device *, void *);
 
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 973c185c9cfe..cce25a5eb157 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2731,7 +2731,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 
 	device = minor_to_device(minor);
 	if (device)
-		return ERR_MINOR_EXISTS;
+		return ERR_MINOR_OR_VOLUME_EXISTS;
 
 	/* GFP_KERNEL, we are outside of all write-out paths */
 	device = kzalloc(sizeof(struct drbd_device), GFP_KERNEL);
@@ -2794,7 +2794,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	id = idr_alloc(&drbd_devices, device, minor, minor + 1, GFP_KERNEL);
 	if (id < 0) {
 		if (id == -ENOSPC) {
-			err = ERR_MINOR_EXISTS;
+			err = ERR_MINOR_OR_VOLUME_EXISTS;
 			drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
 		}
 		goto out_no_minor_idr;
@@ -2804,7 +2804,7 @@ enum drbd_ret_code drbd_create_device(struct drbd_config_context *adm_ctx, unsig
 	id = idr_alloc(&resource->devices, device, vnr, vnr + 1, GFP_KERNEL);
 	if (id < 0) {
 		if (id == -ENOSPC) {
-			err = ERR_MINOR_EXISTS;
+			err = ERR_MINOR_OR_VOLUME_EXISTS;
 			drbd_msg_put_info(adm_ctx->reply_skb, "requested minor exists already");
 		}
 		goto out_idr_remove_minor;
diff --git a/drivers/block/drbd/drbd_nl.c b/drivers/block/drbd/drbd_nl.c
index 1cd47df44bda..c145619f1ccb 100644
--- a/drivers/block/drbd/drbd_nl.c
+++ b/drivers/block/drbd/drbd_nl.c
@@ -2052,7 +2052,7 @@ check_net_options(struct drbd_connection *connection, struct net_conf *new_net_c
 	rv = _check_net_options(connection, rcu_dereference(connection->net_conf), new_net_conf);
 	rcu_read_unlock();
 
-	/* connection->volumes protected by genl_lock() here */
+	/* connection->peer_devices protected by genl_lock() here */
 	idr_for_each_entry(&connection->peer_devices, peer_device, i) {
 		struct drbd_device *device = peer_device->device;
 		if (!device->bitmap) {
@@ -3483,7 +3483,7 @@ int drbd_adm_new_minor(struct sk_buff *skb, struct genl_info *info)
 	 * that first_peer_device(device)->connection and device->vnr match the request. */
 	if (adm_ctx.device) {
 		if (info->nlhdr->nlmsg_flags & NLM_F_EXCL)
-			retcode = ERR_MINOR_EXISTS;
+			retcode = ERR_MINOR_OR_VOLUME_EXISTS;
 		/* else: still NO_ERROR */
 		goto out;
 	}
@@ -3530,6 +3530,27 @@ out:
 	return 0;
 }
 
+static int adm_del_resource(struct drbd_resource *resource)
+{
+	struct drbd_connection *connection;
+
+	for_each_connection(connection, resource) {
+		if (connection->cstate > C_STANDALONE)
+			return ERR_NET_CONFIGURED;
+	}
+	if (!idr_is_empty(&resource->devices))
+		return ERR_RES_IN_USE;
+
+	list_del_rcu(&resource->resources);
+	/* Make sure all threads have actually stopped: state handling only
+	 * does drbd_thread_stop_nowait(). */
+	list_for_each_entry(connection, &resource->connections, connections)
+		drbd_thread_stop(&connection->worker);
+	synchronize_rcu();
+	drbd_free_resource(resource);
+	return NO_ERROR;
+}
+
 int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_config_context adm_ctx;
@@ -3575,14 +3596,6 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	/* If we reach this, all volumes (of this connection) are Secondary,
-	 * Disconnected, Diskless, aka Unconfigured. Make sure all threads have
-	 * actually stopped, state handling only does drbd_thread_stop_nowait(). */
-	for_each_connection(connection, resource)
-		drbd_thread_stop(&connection->worker);
-
-	/* Now, nothing can fail anymore */
-
 	/* delete volumes */
 	idr_for_each_entry(&resource->devices, device, i) {
 		retcode = adm_del_minor(device);
@@ -3593,10 +3606,7 @@ int drbd_adm_down(struct sk_buff *skb, struct genl_info *info)
 		}
 	}
 
-	list_del_rcu(&resource->resources);
-	synchronize_rcu();
-	drbd_free_resource(resource);
-	retcode = NO_ERROR;
+	retcode = adm_del_resource(resource);
 out:
 	mutex_unlock(&resource->adm_mutex);
 finish:
@@ -3608,7 +3618,6 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 {
 	struct drbd_config_context adm_ctx;
 	struct drbd_resource *resource;
-	struct drbd_connection *connection;
 	enum drbd_ret_code retcode;
 
 	retcode = drbd_adm_prepare(&adm_ctx, skb, info, DRBD_ADM_NEED_RESOURCE);
@@ -3616,27 +3625,10 @@ int drbd_adm_del_resource(struct sk_buff *skb, struct genl_info *info)
 		return retcode;
 	if (retcode != NO_ERROR)
 		goto finish;
-
 	resource = adm_ctx.resource;
-	mutex_lock(&resource->adm_mutex);
-	for_each_connection(connection, resource) {
-		if (connection->cstate > C_STANDALONE) {
-			retcode = ERR_NET_CONFIGURED;
-			goto out;
-		}
-	}
-	if (!idr_is_empty(&resource->devices)) {
-		retcode = ERR_RES_IN_USE;
-		goto out;
-	}
 
-	list_del_rcu(&resource->resources);
-	for_each_connection(connection, resource)
-		drbd_thread_stop(&connection->worker);
-	synchronize_rcu();
-	drbd_free_resource(resource);
-	retcode = NO_ERROR;
-out:
+	mutex_lock(&resource->adm_mutex);
+	retcode = adm_del_resource(resource);
 	mutex_unlock(&resource->adm_mutex);
 finish:
 	drbd_adm_finish(&adm_ctx, info, retcode);
diff --git a/include/linux/drbd.h b/include/linux/drbd.h
index debb70d40547..8723f2a99e15 100644
--- a/include/linux/drbd.h
+++ b/include/linux/drbd.h
@@ -172,7 +172,7 @@ enum drbd_ret_code {
 	ERR_RES_NOT_KNOWN	= 158,
 	ERR_RES_IN_USE		= 159,
 	ERR_MINOR_CONFIGURED    = 160,
-	ERR_MINOR_EXISTS	= 161,
+	ERR_MINOR_OR_VOLUME_EXISTS = 161,
 	ERR_INVALID_REQUEST	= 162,
 	ERR_NEED_APV_100	= 163,
 	ERR_NEED_ALLOW_TWO_PRI  = 164,
-- 
cgit v1.2.3


From a7975473cc41773d9f6d8ea72b48c7656e6cd0f6 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Fri, 26 Sep 2014 12:55:30 +0200
Subject: mfd: core: Add helper function to register hotplug devices

Hot-pluggable multi-function devices should always be registered with
PLATFORM_DEVID_AUTO to avoid name collisions on the platform bus. This
helper also hides the memory map and irq parameters, which aren't used
by hot-pluggable (e.g. USB-based) devices.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/core.h | 7 +++++++
 1 file changed, 7 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h
index 73e1709d4c09..a76bc100bf97 100644
--- a/include/linux/mfd/core.h
+++ b/include/linux/mfd/core.h
@@ -111,6 +111,13 @@ extern int mfd_add_devices(struct device *parent, int id,
 			   struct resource *mem_base,
 			   int irq_base, struct irq_domain *irq_domain);
 
+static inline int mfd_add_hotplug_devices(struct device *parent,
+		const struct mfd_cell *cells, int n_devs)
+{
+	return mfd_add_devices(parent, PLATFORM_DEVID_AUTO, cells, n_devs,
+			NULL, 0, NULL);
+}
+
 extern void mfd_remove_devices(struct device *parent);
 
 #endif
-- 
cgit v1.2.3


From 338a128142975439a19ab3c91480bc9d5a71f033 Mon Sep 17 00:00:00 2001
From: Octavian Purdila <octavian.purdila@intel.com>
Date: Thu, 6 Nov 2014 15:48:03 +0200
Subject: mfd: Add support for Diolan DLN-2 devices

This patch implements the USB part of the Diolan USB-I2C/SPI/GPIO
Master Adapter DLN-2. Details about the device can be found here:

https://www.diolan.com/i2c/i2c_interface.html.

Information about the USB protocol can be found in the Programmer's
Reference Manual [1], see section 1.7.

Because the hardware has a single transmit endpoint and a single
receive endpoint the communication between the various DLN2 drivers
and the hardware will be muxed/demuxed by this driver.

Each DLN2 module will be identified by the handle field within the DLN2
message header. If a DLN2 module issues multiple commands in parallel
they will be identified by the echo counter field in the message header.

The DLN2 modules can use the dln2_transfer() function to issue a
command and wait for its response. They can also register a callback
that is going to be called when a specific event id is generated by
the device (e.g. GPIO interrupts). The device uses handle 0 for
sending events.

[1] https://www.diolan.com/downloads/dln-api-manual.pdf

Signed-off-by: Octavian Purdila <octavian.purdila@intel.com>
Reviewed-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig      |  10 +
 drivers/mfd/Makefile     |   1 +
 drivers/mfd/dln2.c       | 763 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/dln2.h | 103 +++++++
 4 files changed, 877 insertions(+)
 create mode 100644 drivers/mfd/dln2.c
 create mode 100644 include/linux/mfd/dln2.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index 1456ea70bbc7..c1acc76a519f 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -183,6 +183,16 @@ config MFD_DA9063
 	  Additional drivers must be enabled in order to use the functionality
 	  of the device.
 
+config MFD_DLN2
+	tristate "Diolan DLN2 support"
+	select MFD_CORE
+	depends on USB
+	help
+	  This adds support for Diolan USB-I2C/SPI/GPIO Master Adapter
+	  DLN-2. Additional drivers such as I2C_DLN2, GPIO_DLN2,
+	  etc. must be enabled in order to use the functionality of
+	  the device.
+
 config MFD_MC13XXX
 	tristate
 	depends on (SPI_MASTER || I2C)
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 8bd54b1253af..10bbd0b9096b 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -174,6 +174,7 @@ obj-$(CONFIG_MFD_STW481X)	+= stw481x.o
 obj-$(CONFIG_MFD_IPAQ_MICRO)	+= ipaq-micro.o
 obj-$(CONFIG_MFD_MENF21BMC)	+= menf21bmc.o
 obj-$(CONFIG_MFD_HI6421_PMIC)	+= hi6421-pmic-core.o
+obj-$(CONFIG_MFD_DLN2)		+= dln2.o
 
 intel-soc-pmic-objs		:= intel_soc_pmic_core.o intel_soc_pmic_crc.o
 obj-$(CONFIG_INTEL_SOC_PMIC)	+= intel-soc-pmic.o
diff --git a/drivers/mfd/dln2.c b/drivers/mfd/dln2.c
new file mode 100644
index 000000000000..9765a174d2c5
--- /dev/null
+++ b/drivers/mfd/dln2.c
@@ -0,0 +1,763 @@
+/*
+ * Driver for the Diolan DLN-2 USB adapter
+ *
+ * Copyright (c) 2014 Intel Corporation
+ *
+ * Derived from:
+ *  i2c-diolan-u2c.c
+ *  Copyright (c) 2010-2011 Ericsson AB
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation, version 2.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/slab.h>
+#include <linux/usb.h>
+#include <linux/i2c.h>
+#include <linux/mutex.h>
+#include <linux/platform_device.h>
+#include <linux/mfd/core.h>
+#include <linux/mfd/dln2.h>
+#include <linux/rculist.h>
+
+struct dln2_header {
+	__le16 size;
+	__le16 id;
+	__le16 echo;
+	__le16 handle;
+};
+
+struct dln2_response {
+	struct dln2_header hdr;
+	__le16 result;
+};
+
+#define DLN2_GENERIC_MODULE_ID		0x00
+#define DLN2_GENERIC_CMD(cmd)		DLN2_CMD(cmd, DLN2_GENERIC_MODULE_ID)
+#define CMD_GET_DEVICE_VER		DLN2_GENERIC_CMD(0x30)
+#define CMD_GET_DEVICE_SN		DLN2_GENERIC_CMD(0x31)
+
+#define DLN2_HW_ID			0x200
+#define DLN2_USB_TIMEOUT		200	/* in ms */
+#define DLN2_MAX_RX_SLOTS		16
+#define DLN2_MAX_URBS			16
+#define DLN2_RX_BUF_SIZE		512
+
+enum dln2_handle {
+	DLN2_HANDLE_EVENT = 0,		/* don't change, hardware defined */
+	DLN2_HANDLE_CTRL,
+	DLN2_HANDLE_GPIO,
+	DLN2_HANDLE_I2C,
+	DLN2_HANDLES
+};
+
+/*
+ * Receive context used between the receive demultiplexer and the transfer
+ * routine. While sending a request the transfer routine will look for a free
+ * receive context and use it to wait for a response and to receive the URB and
+ * thus the response data.
+ */
+struct dln2_rx_context {
+	/* completion used to wait for a response */
+	struct completion done;
+
+	/* if non-NULL the URB contains the response */
+	struct urb *urb;
+
+	/* if true then this context is used to wait for a response */
+	bool in_use;
+};
+
+/*
+ * Receive contexts for a particular DLN2 module (i2c, gpio, etc.). We use the
+ * handle header field to identify the module in dln2_dev.mod_rx_slots and then
+ * the echo header field to index the slots field and find the receive context
+ * for a particular request.
+ */
+struct dln2_mod_rx_slots {
+	/* RX slots bitmap */
+	DECLARE_BITMAP(bmap, DLN2_MAX_RX_SLOTS);
+
+	/* used to wait for a free RX slot */
+	wait_queue_head_t wq;
+
+	/* used to wait for an RX operation to complete */
+	struct dln2_rx_context slots[DLN2_MAX_RX_SLOTS];
+
+	/* avoid races between alloc/free_rx_slot and dln2_rx_transfer */
+	spinlock_t lock;
+};
+
+struct dln2_dev {
+	struct usb_device *usb_dev;
+	struct usb_interface *interface;
+	u8 ep_in;
+	u8 ep_out;
+
+	struct urb *rx_urb[DLN2_MAX_URBS];
+	void *rx_buf[DLN2_MAX_URBS];
+
+	struct dln2_mod_rx_slots mod_rx_slots[DLN2_HANDLES];
+
+	struct list_head event_cb_list;
+	spinlock_t event_cb_lock;
+
+	bool disconnect;
+	int active_transfers;
+	wait_queue_head_t disconnect_wq;
+	spinlock_t disconnect_lock;
+};
+
+struct dln2_event_cb_entry {
+	struct list_head list;
+	u16 id;
+	struct platform_device *pdev;
+	dln2_event_cb_t callback;
+};
+
+int dln2_register_event_cb(struct platform_device *pdev, u16 id,
+			   dln2_event_cb_t event_cb)
+{
+	struct dln2_dev *dln2 = dev_get_drvdata(pdev->dev.parent);
+	struct dln2_event_cb_entry *i, *entry;
+	unsigned long flags;
+	int ret = 0;
+
+	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->id = id;
+	entry->callback = event_cb;
+	entry->pdev = pdev;
+
+	spin_lock_irqsave(&dln2->event_cb_lock, flags);
+
+	list_for_each_entry(i, &dln2->event_cb_list, list) {
+		if (i->id == id) {
+			ret = -EBUSY;
+			break;
+		}
+	}
+
+	if (!ret)
+		list_add_rcu(&entry->list, &dln2->event_cb_list);
+
+	spin_unlock_irqrestore(&dln2->event_cb_lock, flags);
+
+	if (ret)
+		kfree(entry);
+
+	return ret;
+}
+EXPORT_SYMBOL(dln2_register_event_cb);
+
+void dln2_unregister_event_cb(struct platform_device *pdev, u16 id)
+{
+	struct dln2_dev *dln2 = dev_get_drvdata(pdev->dev.parent);
+	struct dln2_event_cb_entry *i;
+	unsigned long flags;
+	bool found = false;
+
+	spin_lock_irqsave(&dln2->event_cb_lock, flags);
+
+	list_for_each_entry(i, &dln2->event_cb_list, list) {
+		if (i->id == id) {
+			list_del_rcu(&i->list);
+			found = true;
+			break;
+		}
+	}
+
+	spin_unlock_irqrestore(&dln2->event_cb_lock, flags);
+
+	if (found) {
+		synchronize_rcu();
+		kfree(i);
+	}
+}
+EXPORT_SYMBOL(dln2_unregister_event_cb);
+
+/*
+ * Returns true if a valid transfer slot is found. In this case the URB must not
+ * be resubmitted immediately in dln2_rx as we need the data when dln2_transfer
+ * is woke up. It will be resubmitted there.
+ */
+static bool dln2_transfer_complete(struct dln2_dev *dln2, struct urb *urb,
+				   u16 handle, u16 rx_slot)
+{
+	struct device *dev = &dln2->interface->dev;
+	struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[handle];
+	struct dln2_rx_context *rxc;
+	bool valid_slot = false;
+
+	rxc = &rxs->slots[rx_slot];
+
+	/*
+	 * No need to disable interrupts as this lock is not taken in interrupt
+	 * context elsewhere in this driver. This function (or its callers) are
+	 * also not exported to other modules.
+	 */
+	spin_lock(&rxs->lock);
+	if (rxc->in_use && !rxc->urb) {
+		rxc->urb = urb;
+		complete(&rxc->done);
+		valid_slot = true;
+	}
+	spin_unlock(&rxs->lock);
+
+	if (!valid_slot)
+		dev_warn(dev, "bad/late response %d/%d\n", handle, rx_slot);
+
+	return valid_slot;
+}
+
+static void dln2_run_event_callbacks(struct dln2_dev *dln2, u16 id, u16 echo,
+				     void *data, int len)
+{
+	struct dln2_event_cb_entry *i;
+
+	rcu_read_lock();
+
+	list_for_each_entry_rcu(i, &dln2->event_cb_list, list) {
+		if (i->id == id) {
+			i->callback(i->pdev, echo, data, len);
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+}
+
+static void dln2_rx(struct urb *urb)
+{
+	struct dln2_dev *dln2 = urb->context;
+	struct dln2_header *hdr = urb->transfer_buffer;
+	struct device *dev = &dln2->interface->dev;
+	u16 id, echo, handle, size;
+	u8 *data;
+	int len;
+	int err;
+
+	switch (urb->status) {
+	case 0:
+		/* success */
+		break;
+	case -ECONNRESET:
+	case -ENOENT:
+	case -ESHUTDOWN:
+	case -EPIPE:
+		/* this urb is terminated, clean up */
+		dev_dbg(dev, "urb shutting down with status %d\n", urb->status);
+		return;
+	default:
+		dev_dbg(dev, "nonzero urb status received %d\n", urb->status);
+		goto out;
+	}
+
+	if (urb->actual_length < sizeof(struct dln2_header)) {
+		dev_err(dev, "short response: %d\n", urb->actual_length);
+		goto out;
+	}
+
+	handle = le16_to_cpu(hdr->handle);
+	id = le16_to_cpu(hdr->id);
+	echo = le16_to_cpu(hdr->echo);
+	size = le16_to_cpu(hdr->size);
+
+	if (size != urb->actual_length) {
+		dev_err(dev, "size mismatch: handle %x cmd %x echo %x size %d actual %d\n",
+			handle, id, echo, size, urb->actual_length);
+		goto out;
+	}
+
+	if (handle >= DLN2_HANDLES) {
+		dev_warn(dev, "invalid handle %d\n", handle);
+		goto out;
+	}
+
+	data = urb->transfer_buffer + sizeof(struct dln2_header);
+	len = urb->actual_length - sizeof(struct dln2_header);
+
+	if (handle == DLN2_HANDLE_EVENT) {
+		dln2_run_event_callbacks(dln2, id, echo, data, len);
+	} else {
+		/* URB will be re-submitted in _dln2_transfer (free_rx_slot) */
+		if (dln2_transfer_complete(dln2, urb, handle, echo))
+			return;
+	}
+
+out:
+	err = usb_submit_urb(urb, GFP_ATOMIC);
+	if (err < 0)
+		dev_err(dev, "failed to resubmit RX URB: %d\n", err);
+}
+
+static void *dln2_prep_buf(u16 handle, u16 cmd, u16 echo, const void *obuf,
+			   int *obuf_len, gfp_t gfp)
+{
+	int len;
+	void *buf;
+	struct dln2_header *hdr;
+
+	len = *obuf_len + sizeof(*hdr);
+	buf = kmalloc(len, gfp);
+	if (!buf)
+		return NULL;
+
+	hdr = (struct dln2_header *)buf;
+	hdr->id = cpu_to_le16(cmd);
+	hdr->size = cpu_to_le16(len);
+	hdr->echo = cpu_to_le16(echo);
+	hdr->handle = cpu_to_le16(handle);
+
+	memcpy(buf + sizeof(*hdr), obuf, *obuf_len);
+
+	*obuf_len = len;
+
+	return buf;
+}
+
+static int dln2_send_wait(struct dln2_dev *dln2, u16 handle, u16 cmd, u16 echo,
+			  const void *obuf, int obuf_len)
+{
+	int ret = 0;
+	int len = obuf_len;
+	void *buf;
+	int actual;
+
+	buf = dln2_prep_buf(handle, cmd, echo, obuf, &len, GFP_KERNEL);
+	if (!buf)
+		return -ENOMEM;
+
+	ret = usb_bulk_msg(dln2->usb_dev,
+			   usb_sndbulkpipe(dln2->usb_dev, dln2->ep_out),
+			   buf, len, &actual, DLN2_USB_TIMEOUT);
+
+	kfree(buf);
+
+	return ret;
+}
+
+static bool find_free_slot(struct dln2_dev *dln2, u16 handle, int *slot)
+{
+	struct dln2_mod_rx_slots *rxs;
+	unsigned long flags;
+
+	if (dln2->disconnect) {
+		*slot = -ENODEV;
+		return true;
+	}
+
+	rxs = &dln2->mod_rx_slots[handle];
+
+	spin_lock_irqsave(&rxs->lock, flags);
+
+	*slot = find_first_zero_bit(rxs->bmap, DLN2_MAX_RX_SLOTS);
+
+	if (*slot < DLN2_MAX_RX_SLOTS) {
+		struct dln2_rx_context *rxc = &rxs->slots[*slot];
+
+		set_bit(*slot, rxs->bmap);
+		rxc->in_use = true;
+	}
+
+	spin_unlock_irqrestore(&rxs->lock, flags);
+
+	return *slot < DLN2_MAX_RX_SLOTS;
+}
+
+static int alloc_rx_slot(struct dln2_dev *dln2, u16 handle)
+{
+	int ret;
+	int slot;
+
+	/*
+	 * No need to timeout here, the wait is bounded by the timeout in
+	 * _dln2_transfer.
+	 */
+	ret = wait_event_interruptible(dln2->mod_rx_slots[handle].wq,
+				       find_free_slot(dln2, handle, &slot));
+	if (ret < 0)
+		return ret;
+
+	return slot;
+}
+
+static void free_rx_slot(struct dln2_dev *dln2, u16 handle, int slot)
+{
+	struct dln2_mod_rx_slots *rxs;
+	struct urb *urb = NULL;
+	unsigned long flags;
+	struct dln2_rx_context *rxc;
+
+	rxs = &dln2->mod_rx_slots[handle];
+
+	spin_lock_irqsave(&rxs->lock, flags);
+
+	clear_bit(slot, rxs->bmap);
+
+	rxc = &rxs->slots[slot];
+	rxc->in_use = false;
+	urb = rxc->urb;
+	rxc->urb = NULL;
+	reinit_completion(&rxc->done);
+
+	spin_unlock_irqrestore(&rxs->lock, flags);
+
+	if (urb) {
+		int err;
+		struct device *dev = &dln2->interface->dev;
+
+		err = usb_submit_urb(urb, GFP_KERNEL);
+		if (err < 0)
+			dev_err(dev, "failed to resubmit RX URB: %d\n", err);
+	}
+
+	wake_up_interruptible(&rxs->wq);
+}
+
+static int _dln2_transfer(struct dln2_dev *dln2, u16 handle, u16 cmd,
+			  const void *obuf, unsigned obuf_len,
+			  void *ibuf, unsigned *ibuf_len)
+{
+	int ret = 0;
+	int rx_slot;
+	struct dln2_response *rsp;
+	struct dln2_rx_context *rxc;
+	struct device *dev = &dln2->interface->dev;
+	const unsigned long timeout = DLN2_USB_TIMEOUT * HZ / 1000;
+	struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[handle];
+
+	spin_lock(&dln2->disconnect_lock);
+	if (!dln2->disconnect)
+		dln2->active_transfers++;
+	else
+		ret = -ENODEV;
+	spin_unlock(&dln2->disconnect_lock);
+
+	if (ret)
+		return ret;
+
+	rx_slot = alloc_rx_slot(dln2, handle);
+	if (rx_slot < 0) {
+		ret = rx_slot;
+		goto out_decr;
+	}
+
+	ret = dln2_send_wait(dln2, handle, cmd, rx_slot, obuf, obuf_len);
+	if (ret < 0) {
+		dev_err(dev, "USB write failed: %d\n", ret);
+		goto out_free_rx_slot;
+	}
+
+	rxc = &rxs->slots[rx_slot];
+
+	ret = wait_for_completion_interruptible_timeout(&rxc->done, timeout);
+	if (ret <= 0) {
+		if (!ret)
+			ret = -ETIMEDOUT;
+		goto out_free_rx_slot;
+	}
+
+	if (dln2->disconnect) {
+		ret = -ENODEV;
+		goto out_free_rx_slot;
+	}
+
+	/* if we got here we know that the response header has been checked */
+	rsp = rxc->urb->transfer_buffer;
+
+	if (rsp->hdr.size < sizeof(*rsp)) {
+		ret = -EPROTO;
+		goto out_free_rx_slot;
+	}
+
+	if (le16_to_cpu(rsp->result) > 0x80) {
+		dev_dbg(dev, "%d received response with error %d\n",
+			handle, le16_to_cpu(rsp->result));
+		ret = -EREMOTEIO;
+		goto out_free_rx_slot;
+	}
+
+	if (!ibuf) {
+		ret = 0;
+		goto out_free_rx_slot;
+	}
+
+	if (*ibuf_len > rsp->hdr.size - sizeof(*rsp))
+		*ibuf_len = rsp->hdr.size - sizeof(*rsp);
+
+	memcpy(ibuf, rsp + 1, *ibuf_len);
+
+out_free_rx_slot:
+	free_rx_slot(dln2, handle, rx_slot);
+out_decr:
+	spin_lock(&dln2->disconnect_lock);
+	dln2->active_transfers--;
+	spin_unlock(&dln2->disconnect_lock);
+	if (dln2->disconnect)
+		wake_up(&dln2->disconnect_wq);
+
+	return ret;
+}
+
+int dln2_transfer(struct platform_device *pdev, u16 cmd,
+		  const void *obuf, unsigned obuf_len,
+		  void *ibuf, unsigned *ibuf_len)
+{
+	struct dln2_platform_data *dln2_pdata;
+	struct dln2_dev *dln2;
+	u16 handle;
+
+	dln2 = dev_get_drvdata(pdev->dev.parent);
+	dln2_pdata = dev_get_platdata(&pdev->dev);
+	handle = dln2_pdata->handle;
+
+	return _dln2_transfer(dln2, handle, cmd, obuf, obuf_len, ibuf,
+			      ibuf_len);
+}
+EXPORT_SYMBOL(dln2_transfer);
+
+static int dln2_check_hw(struct dln2_dev *dln2)
+{
+	int ret;
+	__le32 hw_type;
+	int len = sizeof(hw_type);
+
+	ret = _dln2_transfer(dln2, DLN2_HANDLE_CTRL, CMD_GET_DEVICE_VER,
+			     NULL, 0, &hw_type, &len);
+	if (ret < 0)
+		return ret;
+	if (len < sizeof(hw_type))
+		return -EREMOTEIO;
+
+	if (le32_to_cpu(hw_type) != DLN2_HW_ID) {
+		dev_err(&dln2->interface->dev, "Device ID 0x%x not supported\n",
+			le32_to_cpu(hw_type));
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+static int dln2_print_serialno(struct dln2_dev *dln2)
+{
+	int ret;
+	__le32 serial_no;
+	int len = sizeof(serial_no);
+	struct device *dev = &dln2->interface->dev;
+
+	ret = _dln2_transfer(dln2, DLN2_HANDLE_CTRL, CMD_GET_DEVICE_SN, NULL, 0,
+			     &serial_no, &len);
+	if (ret < 0)
+		return ret;
+	if (len < sizeof(serial_no))
+		return -EREMOTEIO;
+
+	dev_info(dev, "Diolan DLN2 serial %u\n", le32_to_cpu(serial_no));
+
+	return 0;
+}
+
+static int dln2_hw_init(struct dln2_dev *dln2)
+{
+	int ret;
+
+	ret = dln2_check_hw(dln2);
+	if (ret < 0)
+		return ret;
+
+	return dln2_print_serialno(dln2);
+}
+
+static void dln2_free_rx_urbs(struct dln2_dev *dln2)
+{
+	int i;
+
+	for (i = 0; i < DLN2_MAX_URBS; i++) {
+		usb_kill_urb(dln2->rx_urb[i]);
+		usb_free_urb(dln2->rx_urb[i]);
+		kfree(dln2->rx_buf[i]);
+	}
+}
+
+static void dln2_free(struct dln2_dev *dln2)
+{
+	dln2_free_rx_urbs(dln2);
+	usb_put_dev(dln2->usb_dev);
+	kfree(dln2);
+}
+
+static int dln2_setup_rx_urbs(struct dln2_dev *dln2,
+			      struct usb_host_interface *hostif)
+{
+	int i;
+	int ret;
+	const int rx_max_size = DLN2_RX_BUF_SIZE;
+	struct device *dev = &dln2->interface->dev;
+
+	for (i = 0; i < DLN2_MAX_URBS; i++) {
+		dln2->rx_buf[i] = kmalloc(rx_max_size, GFP_KERNEL);
+		if (!dln2->rx_buf[i])
+			return -ENOMEM;
+
+		dln2->rx_urb[i] = usb_alloc_urb(0, GFP_KERNEL);
+		if (!dln2->rx_urb[i])
+			return -ENOMEM;
+
+		usb_fill_bulk_urb(dln2->rx_urb[i], dln2->usb_dev,
+				  usb_rcvbulkpipe(dln2->usb_dev, dln2->ep_in),
+				  dln2->rx_buf[i], rx_max_size, dln2_rx, dln2);
+
+		ret = usb_submit_urb(dln2->rx_urb[i], GFP_KERNEL);
+		if (ret < 0) {
+			dev_err(dev, "failed to submit RX URB: %d\n", ret);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static struct dln2_platform_data dln2_pdata_gpio = {
+	.handle = DLN2_HANDLE_GPIO,
+};
+
+/* Only one I2C port seems to be supported on current hardware */
+static struct dln2_platform_data dln2_pdata_i2c = {
+	.handle = DLN2_HANDLE_I2C,
+	.port = 0,
+};
+
+static const struct mfd_cell dln2_devs[] = {
+	{
+		.name = "dln2-gpio",
+		.platform_data = &dln2_pdata_gpio,
+		.pdata_size = sizeof(struct dln2_platform_data),
+	},
+	{
+		.name = "dln2-i2c",
+		.platform_data = &dln2_pdata_i2c,
+		.pdata_size = sizeof(struct dln2_platform_data),
+	},
+};
+
+static void dln2_disconnect(struct usb_interface *interface)
+{
+	struct dln2_dev *dln2 = usb_get_intfdata(interface);
+	int i, j;
+
+	/* don't allow starting new transfers */
+	spin_lock(&dln2->disconnect_lock);
+	dln2->disconnect = true;
+	spin_unlock(&dln2->disconnect_lock);
+
+	/* cancel in progress transfers */
+	for (i = 0; i < DLN2_HANDLES; i++) {
+		struct dln2_mod_rx_slots *rxs = &dln2->mod_rx_slots[i];
+		unsigned long flags;
+
+		spin_lock_irqsave(&rxs->lock, flags);
+
+		/* cancel all response waiters */
+		for (j = 0; j < DLN2_MAX_RX_SLOTS; j++) {
+			struct dln2_rx_context *rxc = &rxs->slots[j];
+
+			if (rxc->in_use)
+				complete(&rxc->done);
+		}
+
+		spin_unlock_irqrestore(&rxs->lock, flags);
+	}
+
+	/* wait for transfers to end */
+	wait_event(dln2->disconnect_wq, !dln2->active_transfers);
+
+	mfd_remove_devices(&interface->dev);
+
+	dln2_free(dln2);
+}
+
+static int dln2_probe(struct usb_interface *interface,
+		      const struct usb_device_id *usb_id)
+{
+	struct usb_host_interface *hostif = interface->cur_altsetting;
+	struct device *dev = &interface->dev;
+	struct dln2_dev *dln2;
+	int ret;
+	int i, j;
+
+	if (hostif->desc.bInterfaceNumber != 0 ||
+	    hostif->desc.bNumEndpoints < 2)
+		return -ENODEV;
+
+	dln2 = kzalloc(sizeof(*dln2), GFP_KERNEL);
+	if (!dln2)
+		return -ENOMEM;
+
+	dln2->ep_out = hostif->endpoint[0].desc.bEndpointAddress;
+	dln2->ep_in = hostif->endpoint[1].desc.bEndpointAddress;
+	dln2->usb_dev = usb_get_dev(interface_to_usbdev(interface));
+	dln2->interface = interface;
+	usb_set_intfdata(interface, dln2);
+	init_waitqueue_head(&dln2->disconnect_wq);
+
+	for (i = 0; i < DLN2_HANDLES; i++) {
+		init_waitqueue_head(&dln2->mod_rx_slots[i].wq);
+		spin_lock_init(&dln2->mod_rx_slots[i].lock);
+		for (j = 0; j < DLN2_MAX_RX_SLOTS; j++)
+			init_completion(&dln2->mod_rx_slots[i].slots[j].done);
+	}
+
+	spin_lock_init(&dln2->event_cb_lock);
+	spin_lock_init(&dln2->disconnect_lock);
+	INIT_LIST_HEAD(&dln2->event_cb_list);
+
+	ret = dln2_setup_rx_urbs(dln2, hostif);
+	if (ret)
+		goto out_cleanup;
+
+	ret = dln2_hw_init(dln2);
+	if (ret < 0) {
+		dev_err(dev, "failed to initialize hardware\n");
+		goto out_cleanup;
+	}
+
+	ret = mfd_add_hotplug_devices(dev, dln2_devs, ARRAY_SIZE(dln2_devs));
+	if (ret != 0) {
+		dev_err(dev, "failed to add mfd devices to core\n");
+		goto out_cleanup;
+	}
+
+	return 0;
+
+out_cleanup:
+	dln2_free(dln2);
+
+	return ret;
+}
+
+static const struct usb_device_id dln2_table[] = {
+	{ USB_DEVICE(0xa257, 0x2013) },
+	{ }
+};
+
+MODULE_DEVICE_TABLE(usb, dln2_table);
+
+static struct usb_driver dln2_driver = {
+	.name = "dln2",
+	.probe = dln2_probe,
+	.disconnect = dln2_disconnect,
+	.id_table = dln2_table,
+};
+
+module_usb_driver(dln2_driver);
+
+MODULE_AUTHOR("Octavian Purdila <octavian.purdila@intel.com>");
+MODULE_DESCRIPTION("Core driver for the Diolan DLN2 interface adapter");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/dln2.h b/include/linux/mfd/dln2.h
new file mode 100644
index 000000000000..004b24576da8
--- /dev/null
+++ b/include/linux/mfd/dln2.h
@@ -0,0 +1,103 @@
+#ifndef __LINUX_USB_DLN2_H
+#define __LINUX_USB_DLN2_H
+
+#define DLN2_CMD(cmd, id)		((cmd) | ((id) << 8))
+
+struct dln2_platform_data {
+	u16 handle;		/* sub-driver handle (internally used only) */
+	u8 port;		/* I2C/SPI port */
+};
+
+/**
+ * dln2_event_cb_t - event callback function signature
+ *
+ * @pdev - the sub-device that registered this callback
+ * @echo - the echo header field received in the message
+ * @data - the data payload
+ * @len  - the data payload length
+ *
+ * The callback function is called in interrupt context and the data payload is
+ * only valid during the call. If the user needs later access of the data, it
+ * must copy it.
+ */
+
+typedef void (*dln2_event_cb_t)(struct platform_device *pdev, u16 echo,
+				const void *data, int len);
+
+/**
+ * dl2n_register_event_cb - register a callback function for an event
+ *
+ * @pdev - the sub-device that registers the callback
+ * @event - the event for which to register a callback
+ * @event_cb - the callback function
+ *
+ * @return 0 in case of success, negative value in case of error
+ */
+int dln2_register_event_cb(struct platform_device *pdev, u16 event,
+			   dln2_event_cb_t event_cb);
+
+/**
+ * dln2_unregister_event_cb - unregister the callback function for an event
+ *
+ * @pdev - the sub-device that registered the callback
+ * @event - the event for which to register a callback
+ */
+void dln2_unregister_event_cb(struct platform_device *pdev, u16 event);
+
+/**
+ * dln2_transfer - issue a DLN2 command and wait for a response and the
+ * associated data
+ *
+ * @pdev - the sub-device which is issuing this transfer
+ * @cmd - the command to be sent to the device
+ * @obuf - the buffer to be sent to the device; it can be NULL if the user
+ *	doesn't need to transmit data with this command
+ * @obuf_len - the size of the buffer to be sent to the device
+ * @ibuf - any data associated with the response will be copied here; it can be
+ *	NULL if the user doesn't need the response data
+ * @ibuf_len - must be initialized to the input buffer size; it will be modified
+ *	to indicate the actual data transferred;
+ *
+ * @return 0 for success, negative value for errors
+ */
+int dln2_transfer(struct platform_device *pdev, u16 cmd,
+		  const void *obuf, unsigned obuf_len,
+		  void *ibuf, unsigned *ibuf_len);
+
+/**
+ * dln2_transfer_rx - variant of @dln2_transfer() where TX buffer is not needed
+ *
+ * @pdev - the sub-device which is issuing this transfer
+ * @cmd - the command to be sent to the device
+ * @ibuf - any data associated with the response will be copied here; it can be
+ *	NULL if the user doesn't need the response data
+ * @ibuf_len - must be initialized to the input buffer size; it will be modified
+ *	to indicate the actual data transferred;
+ *
+ * @return 0 for success, negative value for errors
+ */
+
+static inline int dln2_transfer_rx(struct platform_device *pdev, u16 cmd,
+				   void *ibuf, unsigned *ibuf_len)
+{
+	return dln2_transfer(pdev, cmd, NULL, 0, ibuf, ibuf_len);
+}
+
+/**
+ * dln2_transfer_tx - variant of @dln2_transfer() where RX buffer is not needed
+ *
+ * @pdev - the sub-device which is issuing this transfer
+ * @cmd - the command to be sent to the device
+ * @obuf - the buffer to be sent to the device; it can be NULL if the
+ *	user doesn't need to transmit data with this command
+ * @obuf_len - the size of the buffer to be sent to the device
+ *
+ * @return 0 for success, negative value for errors
+ */
+static inline int dln2_transfer_tx(struct platform_device *pdev, u16 cmd,
+				   const void *obuf, unsigned obuf_len)
+{
+	return dln2_transfer(pdev, cmd, obuf, obuf_len, NULL, NULL);
+}
+
+#endif
-- 
cgit v1.2.3


From 3b47d30396bae4f0bd1ff0dbcd7c4f5077e7df4e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 6 Nov 2014 21:09:44 -0800
Subject: net: gro: add a per device gro flush timer

Tuning coalescing parameters on NIC can be really hard.

Servers can handle both bulk and RPC like traffic, with conflicting
goals : bulk flows want as big GRO packets as possible, RPC want minimal
latencies.

To reach big GRO packets on 10Gbe NIC, one can use :

ethtool -C eth0 rx-usecs 4 rx-frames 44

But this penalizes rpc sessions, with an increase of latencies, up to
50% in some cases, as NICs generally do not force an interrupt when
a packet with TCP Push flag is received.

Some NICs do not have an absolute timer, only a timer rearmed for every
incoming packet.

This patch uses a different strategy : Let GRO stack decides what do do,
based on traffic pattern.

Packets with Push flag wont be delayed.
Packets without Push flag might be held in GRO engine, if we keep
receiving data.

This new mechanism is off by default, and shall be enabled by setting
/sys/class/net/ethX/gro_flush_timeout to a value in nanosecond.

To fully enable this mechanism, drivers should use napi_complete_done()
instead of napi_complete().

Tested:
 Ran 200 netperf TCP_STREAM from A to B (10Gbe mlx4 link, 8 RX queues)

Without this feature, we send back about 305,000 ACK per second.

GRO aggregation ratio is low (811/305 = 2.65 segments per GRO packet)

Setting a timer of 2000 nsec is enough to increase GRO packet sizes
and reduce number of ACK packets. (811/19.2 = 42)

Receiver performs less calls to upper stacks, less wakes up.
This also reduces cpu usage on the sender, as it receives less ACK
packets.

Note that reducing number of wakes up increases cpu efficiency, but can
decrease QPS, as applications wont have the chance to warmup cpu caches
doing a partial read of RPC requests/answers if they fit in one skb.

B:~# sar -n DEV 1 10 | grep eth0 | tail -1
Average:         eth0 811269.80 305732.30 1199462.57  19705.72      0.00
0.00      0.50

B:~# echo 2000 >/sys/class/net/eth0/gro_flush_timeout

B:~# sar -n DEV 1 10 | grep eth0 | tail -1
Average:         eth0 811577.30  19230.80 1199916.51   1239.80      0.00
0.00      0.50

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 20 ++++++++++----------
 net/core/dev.c            | 45 +++++++++++++++++++++++++++++++++++++++++----
 net/core/net-sysfs.c      | 18 ++++++++++++++++++
 3 files changed, 69 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 90ac95900a11..888d5513fa4a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -314,6 +314,7 @@ struct napi_struct {
 	struct net_device	*dev;
 	struct sk_buff		*gro_list;
 	struct sk_buff		*skb;
+	struct hrtimer		timer;
 	struct list_head	dev_list;
 	struct hlist_node	napi_hash_node;
 	unsigned int		napi_id;
@@ -443,14 +444,19 @@ static inline bool napi_reschedule(struct napi_struct *napi)
 	return false;
 }
 
+void __napi_complete(struct napi_struct *n);
+void napi_complete_done(struct napi_struct *n, int work_done);
 /**
  *	napi_complete - NAPI processing complete
  *	@n: napi context
  *
  * Mark NAPI processing as complete.
+ * Consider using napi_complete_done() instead.
  */
-void __napi_complete(struct napi_struct *n);
-void napi_complete(struct napi_struct *n);
+static inline void napi_complete(struct napi_struct *n)
+{
+	return napi_complete_done(n, 0);
+}
 
 /**
  *	napi_by_id - lookup a NAPI by napi_id
@@ -485,14 +491,7 @@ void napi_hash_del(struct napi_struct *napi);
  * Stop NAPI from being scheduled on this context.
  * Waits till any outstanding processing completes.
  */
-static inline void napi_disable(struct napi_struct *n)
-{
-	might_sleep();
-	set_bit(NAPI_STATE_DISABLE, &n->state);
-	while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
-		msleep(1);
-	clear_bit(NAPI_STATE_DISABLE, &n->state);
-}
+void napi_disable(struct napi_struct *n);
 
 /**
  *	napi_enable - enable NAPI scheduling
@@ -1603,6 +1602,7 @@ struct net_device {
 
 #endif
 
+	unsigned long		gro_flush_timeout;
 	rx_handler_func_t __rcu	*rx_handler;
 	void __rcu		*rx_handler_data;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 70bb609c283d..bb09b0364619 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -134,6 +134,7 @@
 #include <linux/vmalloc.h>
 #include <linux/if_macvlan.h>
 #include <linux/errqueue.h>
+#include <linux/hrtimer.h>
 
 #include "net-sysfs.h"
 
@@ -4412,7 +4413,6 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
 void __napi_complete(struct napi_struct *n)
 {
 	BUG_ON(!test_bit(NAPI_STATE_SCHED, &n->state));
-	BUG_ON(n->gro_list);
 
 	list_del_init(&n->poll_list);
 	smp_mb__before_atomic();
@@ -4420,7 +4420,7 @@ void __napi_complete(struct napi_struct *n)
 }
 EXPORT_SYMBOL(__napi_complete);
 
-void napi_complete(struct napi_struct *n)
+void napi_complete_done(struct napi_struct *n, int work_done)
 {
 	unsigned long flags;
 
@@ -4431,8 +4431,18 @@ void napi_complete(struct napi_struct *n)
 	if (unlikely(test_bit(NAPI_STATE_NPSVC, &n->state)))
 		return;
 
-	napi_gro_flush(n, false);
+	if (n->gro_list) {
+		unsigned long timeout = 0;
 
+		if (work_done)
+			timeout = n->dev->gro_flush_timeout;
+
+		if (timeout)
+			hrtimer_start(&n->timer, ns_to_ktime(timeout),
+				      HRTIMER_MODE_REL_PINNED);
+		else
+			napi_gro_flush(n, false);
+	}
 	if (likely(list_empty(&n->poll_list))) {
 		WARN_ON_ONCE(!test_and_clear_bit(NAPI_STATE_SCHED, &n->state));
 	} else {
@@ -4442,7 +4452,7 @@ void napi_complete(struct napi_struct *n)
 		local_irq_restore(flags);
 	}
 }
-EXPORT_SYMBOL(napi_complete);
+EXPORT_SYMBOL(napi_complete_done);
 
 /* must be called under rcu_read_lock(), as we dont take a reference */
 struct napi_struct *napi_by_id(unsigned int napi_id)
@@ -4496,10 +4506,23 @@ void napi_hash_del(struct napi_struct *napi)
 }
 EXPORT_SYMBOL_GPL(napi_hash_del);
 
+static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
+{
+	struct napi_struct *napi;
+
+	napi = container_of(timer, struct napi_struct, timer);
+	if (napi->gro_list)
+		napi_schedule(napi);
+
+	return HRTIMER_NORESTART;
+}
+
 void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 		    int (*poll)(struct napi_struct *, int), int weight)
 {
 	INIT_LIST_HEAD(&napi->poll_list);
+	hrtimer_init(&napi->timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL_PINNED);
+	napi->timer.function = napi_watchdog;
 	napi->gro_count = 0;
 	napi->gro_list = NULL;
 	napi->skb = NULL;
@@ -4518,6 +4541,20 @@ void netif_napi_add(struct net_device *dev, struct napi_struct *napi,
 }
 EXPORT_SYMBOL(netif_napi_add);
 
+void napi_disable(struct napi_struct *n)
+{
+	might_sleep();
+	set_bit(NAPI_STATE_DISABLE, &n->state);
+
+	while (test_and_set_bit(NAPI_STATE_SCHED, &n->state))
+		msleep(1);
+
+	hrtimer_cancel(&n->timer);
+
+	clear_bit(NAPI_STATE_DISABLE, &n->state);
+}
+EXPORT_SYMBOL(napi_disable);
+
 void netif_napi_del(struct napi_struct *napi)
 {
 	list_del_init(&napi->dev_list);
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 9dd06699b09c..1a24602cd54e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -325,6 +325,23 @@ static ssize_t tx_queue_len_store(struct device *dev,
 }
 NETDEVICE_SHOW_RW(tx_queue_len, fmt_ulong);
 
+static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
+{
+	dev->gro_flush_timeout = val;
+	return 0;
+}
+
+static ssize_t gro_flush_timeout_store(struct device *dev,
+				  struct device_attribute *attr,
+				  const char *buf, size_t len)
+{
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	return netdev_store(dev, attr, buf, len, change_gro_flush_timeout);
+}
+NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
+
 static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t len)
 {
@@ -422,6 +439,7 @@ static struct attribute *net_class_attrs[] = {
 	&dev_attr_mtu.attr,
 	&dev_attr_flags.attr,
 	&dev_attr_tx_queue_len.attr,
+	&dev_attr_gro_flush_timeout.attr,
 	&dev_attr_phys_port_id.attr,
 	NULL,
 };
-- 
cgit v1.2.3


From 7aff221c5dbe4f88930a64531df2236f303e1851 Mon Sep 17 00:00:00 2001
From: NeilBrown <neilb@suse.de>
Date: Sat, 8 Nov 2014 10:42:46 +1100
Subject: ARM: OMAP: serial: remove last vestige of DTR_gpio support.

These fields were added by:
commit 9574f36fb801035f6ab0fbb1b53ce2c12c17d100
    OMAP/serial: Add support for driving a GPIO as DTR.

but not removed by

commit 985bfd54c826c0ba873ca0adfd5589263e0c6ee2
    tty: serial: omap: remove some dead code

which reverted most of that commit.
Time to revert the rest.

Signed-off-by: NeilBrown <neilb@suse.de>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/serial.c              | 3 ---
 include/linux/platform_data/serial-omap.h | 3 ---
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/serial.c b/arch/arm/mach-omap2/serial.c
index a388f8c1bcb3..57dee0c7cd2b 100644
--- a/arch/arm/mach-omap2/serial.c
+++ b/arch/arm/mach-omap2/serial.c
@@ -263,9 +263,6 @@ void __init omap_serial_init_port(struct omap_board_data *bdata,
 	omap_up.dma_rx_timeout = info->dma_rx_timeout;
 	omap_up.dma_rx_poll_rate = info->dma_rx_poll_rate;
 	omap_up.autosuspend_timeout = info->autosuspend_timeout;
-	omap_up.DTR_gpio = info->DTR_gpio;
-	omap_up.DTR_inverted = info->DTR_inverted;
-	omap_up.DTR_present = info->DTR_present;
 
 	pdata = &omap_up;
 	pdata_size = sizeof(struct omap_uart_port_info);
diff --git a/include/linux/platform_data/serial-omap.h b/include/linux/platform_data/serial-omap.h
index c860c1b314c0..d09275f3cde3 100644
--- a/include/linux/platform_data/serial-omap.h
+++ b/include/linux/platform_data/serial-omap.h
@@ -38,9 +38,6 @@ struct omap_uart_port_info {
 	unsigned int		dma_rx_timeout;
 	unsigned int		autosuspend_timeout;
 	unsigned int		dma_rx_poll_rate;
-	int			DTR_gpio;
-	int			DTR_inverted;
-	int			DTR_present;
 
 	int (*get_context_loss_count)(struct device *);
 	void (*enable_wakeup)(struct device *, bool);
-- 
cgit v1.2.3


From d74bcaaeb66826192c9e361cbfe8fd1ffaccf74e Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Sun, 9 Nov 2014 17:00:59 +0100
Subject: ASoC: wm5102: Move ultrasonic response settings lock to the driver
 level

The wm5102 driver currently uses the snd_soc_codec mutex to protect its
ultrasonic response settings from concurrent access. This patch moves this
lock to the driver level. This will allow us to eventually remove the
snd_soc_codec mutex.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Acked-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/arizona/core.h |  1 +
 sound/soc/codecs/arizona.c       |  4 ++--
 sound/soc/codecs/wm5102.c        | 16 ++++++++--------
 3 files changed, 11 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/arizona/core.h b/include/linux/mfd/arizona/core.h
index f34723f7663c..910e3aa1e965 100644
--- a/include/linux/mfd/arizona/core.h
+++ b/include/linux/mfd/arizona/core.h
@@ -141,6 +141,7 @@ struct arizona {
 
 	uint16_t dac_comp_coeff;
 	uint8_t dac_comp_enabled;
+	struct mutex dac_comp_lock;
 };
 
 int arizona_clk32k_enable(struct arizona *arizona);
diff --git a/sound/soc/codecs/arizona.c b/sound/soc/codecs/arizona.c
index 0c05e7a7945f..730636c14f2e 100644
--- a/sound/soc/codecs/arizona.c
+++ b/sound/soc/codecs/arizona.c
@@ -1164,13 +1164,13 @@ static void arizona_wm5102_set_dac_comp(struct snd_soc_codec *codec,
 		{ 0x80, 0x0 },
 	};
 
-	mutex_lock(&codec->mutex);
+	mutex_lock(&arizona->dac_comp_lock);
 
 	dac_comp[1].def = arizona->dac_comp_coeff;
 	if (rate >= 176400)
 		dac_comp[2].def = arizona->dac_comp_enabled;
 
-	mutex_unlock(&codec->mutex);
+	mutex_unlock(&arizona->dac_comp_lock);
 
 	regmap_multi_reg_write(arizona->regmap,
 			       dac_comp,
diff --git a/sound/soc/codecs/wm5102.c b/sound/soc/codecs/wm5102.c
index f60234962527..1f7553492667 100644
--- a/sound/soc/codecs/wm5102.c
+++ b/sound/soc/codecs/wm5102.c
@@ -619,10 +619,10 @@ static int wm5102_out_comp_coeff_get(struct snd_kcontrol *kcontrol,
 	struct arizona *arizona = dev_get_drvdata(codec->dev->parent);
 	uint16_t data;
 
-	mutex_lock(&codec->mutex);
+	mutex_lock(&arizona->dac_comp_lock);
 	data = cpu_to_be16(arizona->dac_comp_coeff);
 	memcpy(ucontrol->value.bytes.data, &data, sizeof(data));
-	mutex_unlock(&codec->mutex);
+	mutex_unlock(&arizona->dac_comp_lock);
 
 	return 0;
 }
@@ -633,11 +633,11 @@ static int wm5102_out_comp_coeff_put(struct snd_kcontrol *kcontrol,
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct arizona *arizona = dev_get_drvdata(codec->dev->parent);
 
-	mutex_lock(&codec->mutex);
+	mutex_lock(&arizona->dac_comp_lock);
 	memcpy(&arizona->dac_comp_coeff, ucontrol->value.bytes.data,
 	       sizeof(arizona->dac_comp_coeff));
 	arizona->dac_comp_coeff = be16_to_cpu(arizona->dac_comp_coeff);
-	mutex_unlock(&codec->mutex);
+	mutex_unlock(&arizona->dac_comp_lock);
 
 	return 0;
 }
@@ -648,9 +648,9 @@ static int wm5102_out_comp_switch_get(struct snd_kcontrol *kcontrol,
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct arizona *arizona = dev_get_drvdata(codec->dev->parent);
 
-	mutex_lock(&codec->mutex);
+	mutex_lock(&arizona->dac_comp_lock);
 	ucontrol->value.integer.value[0] = arizona->dac_comp_enabled;
-	mutex_unlock(&codec->mutex);
+	mutex_unlock(&arizona->dac_comp_lock);
 
 	return 0;
 }
@@ -661,9 +661,9 @@ static int wm5102_out_comp_switch_put(struct snd_kcontrol *kcontrol,
 	struct snd_soc_codec *codec = snd_soc_kcontrol_codec(kcontrol);
 	struct arizona *arizona = dev_get_drvdata(codec->dev->parent);
 
-	mutex_lock(&codec->mutex);
+	mutex_lock(&arizona->dac_comp_lock);
 	arizona->dac_comp_enabled = ucontrol->value.integer.value[0];
-	mutex_unlock(&codec->mutex);
+	mutex_unlock(&arizona->dac_comp_lock);
 
 	return 0;
 }
-- 
cgit v1.2.3


From e30f53aad2202b5526c40c36d8eeac8bf290bde5 Mon Sep 17 00:00:00 2001
From: Rabin Vincent <rabin@rab.in>
Date: Mon, 10 Nov 2014 19:46:34 +0100
Subject: tracing: Do not busy wait in buffer splice

On a !PREEMPT kernel, attempting to use trace-cmd results in a soft
lockup:

 # trace-cmd record -e raw_syscalls:* -F false
 NMI watchdog: BUG: soft lockup - CPU#0 stuck for 22s! [trace-cmd:61]
 ...
 Call Trace:
  [<ffffffff8105b580>] ? __wake_up_common+0x90/0x90
  [<ffffffff81092e25>] wait_on_pipe+0x35/0x40
  [<ffffffff810936e3>] tracing_buffers_splice_read+0x2e3/0x3c0
  [<ffffffff81093300>] ? tracing_stats_read+0x2a0/0x2a0
  [<ffffffff812d10ab>] ? _raw_spin_unlock+0x2b/0x40
  [<ffffffff810dc87b>] ? do_read_fault+0x21b/0x290
  [<ffffffff810de56a>] ? handle_mm_fault+0x2ba/0xbd0
  [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80
  [<ffffffff810951e2>] ? trace_buffer_lock_reserve+0x22/0x60
  [<ffffffff81095c80>] ? trace_event_buffer_lock_reserve+0x40/0x80
  [<ffffffff8112415d>] do_splice_to+0x6d/0x90
  [<ffffffff81126971>] SyS_splice+0x7c1/0x800
  [<ffffffff812d1edd>] tracesys_phase2+0xd3/0xd8

The problem is this: tracing_buffers_splice_read() calls
ring_buffer_wait() to wait for data in the ring buffers.  The buffers
are not empty so ring_buffer_wait() returns immediately.  But
tracing_buffers_splice_read() calls ring_buffer_read_page() with full=1,
meaning it only wants to read a full page.  When the full page is not
available, tracing_buffers_splice_read() tries to wait again with
ring_buffer_wait(), which again returns immediately, and so on.

Fix this by adding a "full" argument to ring_buffer_wait() which will
make ring_buffer_wait() wait until the writer has left the reader's
page, i.e.  until full-page reads will succeed.

Link: http://lkml.kernel.org/r/1415645194-25379-1-git-send-email-rabin@rab.in

Cc: stable@vger.kernel.org # 3.16+
Fixes: b1169cc69ba9 ("tracing: Remove mock up poll wait function")
Signed-off-by: Rabin Vincent <rabin@rab.in>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ring_buffer.h |  2 +-
 kernel/trace/ring_buffer.c  | 81 ++++++++++++++++++++++++++++++---------------
 kernel/trace/trace.c        | 23 ++++---------
 3 files changed, 62 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h
index 49a4d6f59108..e2c13cd863bd 100644
--- a/include/linux/ring_buffer.h
+++ b/include/linux/ring_buffer.h
@@ -97,7 +97,7 @@ __ring_buffer_alloc(unsigned long size, unsigned flags, struct lock_class_key *k
 	__ring_buffer_alloc((size), (flags), &__key);	\
 })
 
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu);
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full);
 int ring_buffer_poll_wait(struct ring_buffer *buffer, int cpu,
 			  struct file *filp, poll_table *poll_table);
 
diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 2d75c94ae87d..a56e07c8d15b 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -538,16 +538,18 @@ static void rb_wake_up_waiters(struct irq_work *work)
  * ring_buffer_wait - wait for input to the ring buffer
  * @buffer: buffer to wait on
  * @cpu: the cpu buffer to wait on
+ * @full: wait until a full page is available, if @cpu != RING_BUFFER_ALL_CPUS
  *
  * If @cpu == RING_BUFFER_ALL_CPUS then the task will wake up as soon
  * as data is added to any of the @buffer's cpu buffers. Otherwise
  * it will wait for data to be added to a specific cpu buffer.
  */
-int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
+int ring_buffer_wait(struct ring_buffer *buffer, int cpu, bool full)
 {
-	struct ring_buffer_per_cpu *cpu_buffer;
+	struct ring_buffer_per_cpu *uninitialized_var(cpu_buffer);
 	DEFINE_WAIT(wait);
 	struct rb_irq_work *work;
+	int ret = 0;
 
 	/*
 	 * Depending on what the caller is waiting for, either any
@@ -564,36 +566,61 @@ int ring_buffer_wait(struct ring_buffer *buffer, int cpu)
 	}
 
 
-	prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
+	while (true) {
+		prepare_to_wait(&work->waiters, &wait, TASK_INTERRUPTIBLE);
 
-	/*
-	 * The events can happen in critical sections where
-	 * checking a work queue can cause deadlocks.
-	 * After adding a task to the queue, this flag is set
-	 * only to notify events to try to wake up the queue
-	 * using irq_work.
-	 *
-	 * We don't clear it even if the buffer is no longer
-	 * empty. The flag only causes the next event to run
-	 * irq_work to do the work queue wake up. The worse
-	 * that can happen if we race with !trace_empty() is that
-	 * an event will cause an irq_work to try to wake up
-	 * an empty queue.
-	 *
-	 * There's no reason to protect this flag either, as
-	 * the work queue and irq_work logic will do the necessary
-	 * synchronization for the wake ups. The only thing
-	 * that is necessary is that the wake up happens after
-	 * a task has been queued. It's OK for spurious wake ups.
-	 */
-	work->waiters_pending = true;
+		/*
+		 * The events can happen in critical sections where
+		 * checking a work queue can cause deadlocks.
+		 * After adding a task to the queue, this flag is set
+		 * only to notify events to try to wake up the queue
+		 * using irq_work.
+		 *
+		 * We don't clear it even if the buffer is no longer
+		 * empty. The flag only causes the next event to run
+		 * irq_work to do the work queue wake up. The worse
+		 * that can happen if we race with !trace_empty() is that
+		 * an event will cause an irq_work to try to wake up
+		 * an empty queue.
+		 *
+		 * There's no reason to protect this flag either, as
+		 * the work queue and irq_work logic will do the necessary
+		 * synchronization for the wake ups. The only thing
+		 * that is necessary is that the wake up happens after
+		 * a task has been queued. It's OK for spurious wake ups.
+		 */
+		work->waiters_pending = true;
+
+		if (signal_pending(current)) {
+			ret = -EINTR;
+			break;
+		}
+
+		if (cpu == RING_BUFFER_ALL_CPUS && !ring_buffer_empty(buffer))
+			break;
+
+		if (cpu != RING_BUFFER_ALL_CPUS &&
+		    !ring_buffer_empty_cpu(buffer, cpu)) {
+			unsigned long flags;
+			bool pagebusy;
+
+			if (!full)
+				break;
+
+			raw_spin_lock_irqsave(&cpu_buffer->reader_lock, flags);
+			pagebusy = cpu_buffer->reader_page == cpu_buffer->commit_page;
+			raw_spin_unlock_irqrestore(&cpu_buffer->reader_lock, flags);
+
+			if (!pagebusy)
+				break;
+		}
 
-	if ((cpu == RING_BUFFER_ALL_CPUS && ring_buffer_empty(buffer)) ||
-	    (cpu != RING_BUFFER_ALL_CPUS && ring_buffer_empty_cpu(buffer, cpu)))
 		schedule();
+	}
 
 	finish_wait(&work->waiters, &wait);
-	return 0;
+
+	return ret;
 }
 
 /**
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8a528392b1f4..15209335888d 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -1076,13 +1076,14 @@ update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
 }
 #endif /* CONFIG_TRACER_MAX_TRACE */
 
-static int wait_on_pipe(struct trace_iterator *iter)
+static int wait_on_pipe(struct trace_iterator *iter, bool full)
 {
 	/* Iterators are static, they should be filled or empty */
 	if (trace_buffer_iter(iter, iter->cpu_file))
 		return 0;
 
-	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file);
+	return ring_buffer_wait(iter->trace_buffer->buffer, iter->cpu_file,
+				full);
 }
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
@@ -4434,15 +4435,12 @@ static int tracing_wait_pipe(struct file *filp)
 
 		mutex_unlock(&iter->mutex);
 
-		ret = wait_on_pipe(iter);
+		ret = wait_on_pipe(iter, false);
 
 		mutex_lock(&iter->mutex);
 
 		if (ret)
 			return ret;
-
-		if (signal_pending(current))
-			return -EINTR;
 	}
 
 	return 1;
@@ -5372,16 +5370,12 @@ tracing_buffers_read(struct file *filp, char __user *ubuf,
 				goto out_unlock;
 			}
 			mutex_unlock(&trace_types_lock);
-			ret = wait_on_pipe(iter);
+			ret = wait_on_pipe(iter, false);
 			mutex_lock(&trace_types_lock);
 			if (ret) {
 				size = ret;
 				goto out_unlock;
 			}
-			if (signal_pending(current)) {
-				size = -EINTR;
-				goto out_unlock;
-			}
 			goto again;
 		}
 		size = 0;
@@ -5587,14 +5581,11 @@ tracing_buffers_splice_read(struct file *file, loff_t *ppos,
 			goto out;
 		}
 		mutex_unlock(&trace_types_lock);
-		ret = wait_on_pipe(iter);
+		ret = wait_on_pipe(iter, true);
 		mutex_lock(&trace_types_lock);
 		if (ret)
 			goto out;
-		if (signal_pending(current)) {
-			ret = -EINTR;
-			goto out;
-		}
+
 		goto again;
 	}
 
-- 
cgit v1.2.3


From 2f35c41f58a978dfa44ffa102249d556caa99eeb Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Mon, 10 Nov 2014 09:29:29 +1030
Subject: module: Replace module_ref with atomic_t refcnt

Replace module_ref per-cpu complex reference counter with
an atomic_t simple refcnt. This is for code simplification.

Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/module.h        | 16 +---------------
 include/trace/events/module.h |  2 +-
 kernel/module.c               | 39 +++++----------------------------------
 3 files changed, 7 insertions(+), 50 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/module.h b/include/linux/module.h
index 71f282a4e307..ebfb0e153c6a 100644
--- a/include/linux/module.h
+++ b/include/linux/module.h
@@ -210,20 +210,6 @@ enum module_state {
 	MODULE_STATE_UNFORMED,	/* Still setting it up. */
 };
 
-/**
- * struct module_ref - per cpu module reference counts
- * @incs: number of module get on this cpu
- * @decs: number of module put on this cpu
- *
- * We force an alignment on 8 or 16 bytes, so that alloc_percpu()
- * put @incs/@decs in same cache line, with no extra memory cost,
- * since alloc_percpu() is fine grained.
- */
-struct module_ref {
-	unsigned long incs;
-	unsigned long decs;
-} __attribute((aligned(2 * sizeof(unsigned long))));
-
 struct module {
 	enum module_state state;
 
@@ -367,7 +353,7 @@ struct module {
 	/* Destruction function. */
 	void (*exit)(void);
 
-	struct module_ref __percpu *refptr;
+	atomic_t refcnt;
 #endif
 
 #ifdef CONFIG_CONSTRUCTORS
diff --git a/include/trace/events/module.h b/include/trace/events/module.h
index 7c5cbfe3fc49..81c4c183d348 100644
--- a/include/trace/events/module.h
+++ b/include/trace/events/module.h
@@ -80,7 +80,7 @@ DECLARE_EVENT_CLASS(module_refcnt,
 
 	TP_fast_assign(
 		__entry->ip	= ip;
-		__entry->refcnt	= __this_cpu_read(mod->refptr->incs) - __this_cpu_read(mod->refptr->decs);
+		__entry->refcnt	= atomic_read(&mod->refcnt);
 		__assign_str(name, mod->name);
 	),
 
diff --git a/kernel/module.c b/kernel/module.c
index d596a306b0a1..b1d485df5ac1 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -631,15 +631,11 @@ EXPORT_TRACEPOINT_SYMBOL(module_get);
 /* Init the unload section of the module. */
 static int module_unload_init(struct module *mod)
 {
-	mod->refptr = alloc_percpu(struct module_ref);
-	if (!mod->refptr)
-		return -ENOMEM;
-
 	INIT_LIST_HEAD(&mod->source_list);
 	INIT_LIST_HEAD(&mod->target_list);
 
 	/* Hold reference count during initialization. */
-	raw_cpu_write(mod->refptr->incs, 1);
+	atomic_set(&mod->refcnt, 1);
 
 	return 0;
 }
@@ -721,8 +717,6 @@ static void module_unload_free(struct module *mod)
 		kfree(use);
 	}
 	mutex_unlock(&module_mutex);
-
-	free_percpu(mod->refptr);
 }
 
 #ifdef CONFIG_MODULE_FORCE_UNLOAD
@@ -772,28 +766,7 @@ static int try_stop_module(struct module *mod, int flags, int *forced)
 
 unsigned long module_refcount(struct module *mod)
 {
-	unsigned long incs = 0, decs = 0;
-	int cpu;
-
-	for_each_possible_cpu(cpu)
-		decs += per_cpu_ptr(mod->refptr, cpu)->decs;
-	/*
-	 * ensure the incs are added up after the decs.
-	 * module_put ensures incs are visible before decs with smp_wmb.
-	 *
-	 * This 2-count scheme avoids the situation where the refcount
-	 * for CPU0 is read, then CPU0 increments the module refcount,
-	 * then CPU1 drops that refcount, then the refcount for CPU1 is
-	 * read. We would record a decrement but not its corresponding
-	 * increment so we would see a low count (disaster).
-	 *
-	 * Rare situation? But module_refcount can be preempted, and we
-	 * might be tallying up 4096+ CPUs. So it is not impossible.
-	 */
-	smp_rmb();
-	for_each_possible_cpu(cpu)
-		incs += per_cpu_ptr(mod->refptr, cpu)->incs;
-	return incs - decs;
+	return (unsigned long)atomic_read(&mod->refcnt);
 }
 EXPORT_SYMBOL(module_refcount);
 
@@ -935,7 +908,7 @@ void __module_get(struct module *module)
 {
 	if (module) {
 		preempt_disable();
-		__this_cpu_inc(module->refptr->incs);
+		atomic_inc(&module->refcnt);
 		trace_module_get(module, _RET_IP_);
 		preempt_enable();
 	}
@@ -950,7 +923,7 @@ bool try_module_get(struct module *module)
 		preempt_disable();
 
 		if (likely(module_is_live(module))) {
-			__this_cpu_inc(module->refptr->incs);
+			atomic_inc(&module->refcnt);
 			trace_module_get(module, _RET_IP_);
 		} else
 			ret = false;
@@ -965,9 +938,7 @@ void module_put(struct module *module)
 {
 	if (module) {
 		preempt_disable();
-		smp_wmb(); /* see comment in module_refcount */
-		__this_cpu_inc(module->refptr->decs);
-
+		atomic_dec(&module->refcnt);
 		trace_module_put(module, _RET_IP_);
 		preempt_enable();
 	}
-- 
cgit v1.2.3


From cbd7f8d6825c6900daa9d6f1b363b592ea9ee1fa Mon Sep 17 00:00:00 2001
From: Paul Bolle <pebolle@tiscali.nl>
Date: Mon, 10 Nov 2014 09:33:29 +1030
Subject: virtio: Fix comment typo 'CONFIG_S_FAILED'

Without the VIRTIO_ prefix CONFIG_S_FAILED looks like a Kconfig macro.
So use that prefix here too.

Signed-off-by: Paul Bolle <pebolle@tiscali.nl>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
---
 include/linux/virtio.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 65261a7244fc..abafae783058 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -78,7 +78,7 @@ bool virtqueue_is_broken(struct virtqueue *vq);
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
- * @failed: saved value for CONFIG_S_FAILED bit (for restore)
+ * @failed: saved value for VIRTIO_CONFIG_S_FAILED bit (for restore)
  * @config_enabled: configuration change reporting enabled
  * @config_change_pending: configuration change reported while disabled
  * @config_lock: protects configuration change reporting
-- 
cgit v1.2.3


From 15a42a9bc9ffcff4315a7154313db08c6bf9ef11 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 6 Nov 2014 14:36:41 +0000
Subject: kdb: Rename kdb_repeat_t to kdb_cmdflags_t, cmd_repeat to cmd_flags

We're about to add more options for command behaviour, so let's expand
the meaning of kdb_repeat_t.

So far we just do various renames, there should be no functional changes.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h            | 6 +++---
 kernel/debug/kdb/kdb_main.c    | 6 +++---
 kernel/debug/kdb/kdb_private.h | 2 +-
 3 files changed, 7 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 290db1269c4c..e650f79aa414 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -17,7 +17,7 @@ typedef enum {
 	KDB_REPEAT_NONE = 0,	/* Do not repeat this command */
 	KDB_REPEAT_NO_ARGS,	/* Repeat the command without arguments */
 	KDB_REPEAT_WITH_ARGS,	/* Repeat the command including its arguments */
-} kdb_repeat_t;
+} kdb_cmdflags_t;
 
 typedef int (*kdb_func_t)(int, const char **);
 
@@ -147,7 +147,7 @@ static inline const char *kdb_walk_kallsyms(loff_t *pos)
 /* Dynamic kdb shell command registration */
 extern int kdb_register(char *, kdb_func_t, char *, char *, short);
 extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
-			       short, kdb_repeat_t);
+			       short, kdb_cmdflags_t);
 extern int kdb_unregister(char *);
 #else /* ! CONFIG_KGDB_KDB */
 static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
@@ -156,7 +156,7 @@ static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
 			       char *help, short minlen) { return 0; }
 static inline int kdb_register_repeat(char *cmd, kdb_func_t func, char *usage,
 				      char *help, short minlen,
-				      kdb_repeat_t repeat) { return 0; }
+				      kdb_cmdflags_t flags) { return 0; }
 static inline int kdb_unregister(char *cmd) { return 0; }
 #endif	/* CONFIG_KGDB_KDB */
 enum {
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index cc02aa205668..41966b5f86b7 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1008,7 +1008,7 @@ int kdb_parse(const char *cmdstr)
 		if (result && ignore_errors && result > KDB_CMD_GO)
 			result = 0;
 		KDB_STATE_CLEAR(CMD);
-		switch (tp->cmd_repeat) {
+		switch (tp->cmd_flags) {
 		case KDB_REPEAT_NONE:
 			argc = 0;
 			if (argv[0])
@@ -2646,7 +2646,7 @@ int kdb_register_repeat(char *cmd,
 			char *usage,
 			char *help,
 			short minlen,
-			kdb_repeat_t repeat)
+			kdb_cmdflags_t flags)
 {
 	int i;
 	kdbtab_t *kp;
@@ -2695,7 +2695,7 @@ int kdb_register_repeat(char *cmd,
 	kp->cmd_usage  = usage;
 	kp->cmd_help   = help;
 	kp->cmd_minlen = minlen;
-	kp->cmd_repeat = repeat;
+	kp->cmd_flags  = flags;
 
 	return 0;
 }
diff --git a/kernel/debug/kdb/kdb_private.h b/kernel/debug/kdb/kdb_private.h
index c4c46c7b26fd..eaacd1693954 100644
--- a/kernel/debug/kdb/kdb_private.h
+++ b/kernel/debug/kdb/kdb_private.h
@@ -174,7 +174,7 @@ typedef struct _kdbtab {
 	char    *cmd_help;		/* Help message for this command */
 	short    cmd_minlen;		/* Minimum legal # command
 					 * chars required */
-	kdb_repeat_t cmd_repeat;	/* Does command auto repeat on enter? */
+	kdb_cmdflags_t cmd_flags;	/* Command behaviour flags */
 } kdbtab_t;
 
 extern int kdb_bt(int, const char **);	/* KDB display back trace */
-- 
cgit v1.2.3


From 42c884c10b775ce04f8aabe488820134625c893e Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 6 Nov 2014 14:36:42 +0000
Subject: kdb: Rename kdb_register_repeat() to kdb_register_flags()

We're about to add more options for commands behaviour, so let's give
a more generic name to the low-level kdb command registration function.

There are just various renames, no functional changes.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h         | 10 +++---
 kernel/debug/kdb/kdb_bp.c   | 14 ++++----
 kernel/debug/kdb/kdb_main.c | 86 ++++++++++++++++++++++-----------------------
 kernel/trace/trace_kdb.c    |  2 +-
 4 files changed, 56 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index e650f79aa414..32d2f407981d 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -146,17 +146,17 @@ static inline const char *kdb_walk_kallsyms(loff_t *pos)
 
 /* Dynamic kdb shell command registration */
 extern int kdb_register(char *, kdb_func_t, char *, char *, short);
-extern int kdb_register_repeat(char *, kdb_func_t, char *, char *,
-			       short, kdb_cmdflags_t);
+extern int kdb_register_flags(char *, kdb_func_t, char *, char *,
+			      short, kdb_cmdflags_t);
 extern int kdb_unregister(char *);
 #else /* ! CONFIG_KGDB_KDB */
 static inline __printf(1, 2) int kdb_printf(const char *fmt, ...) { return 0; }
 static inline void kdb_init(int level) {}
 static inline int kdb_register(char *cmd, kdb_func_t func, char *usage,
 			       char *help, short minlen) { return 0; }
-static inline int kdb_register_repeat(char *cmd, kdb_func_t func, char *usage,
-				      char *help, short minlen,
-				      kdb_cmdflags_t flags) { return 0; }
+static inline int kdb_register_flags(char *cmd, kdb_func_t func, char *usage,
+				     char *help, short minlen,
+				     kdb_cmdflags_t flags) { return 0; }
 static inline int kdb_unregister(char *cmd) { return 0; }
 #endif	/* CONFIG_KGDB_KDB */
 enum {
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index b20d544f20c2..59536661c7b9 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -531,21 +531,21 @@ void __init kdb_initbptab(void)
 	for (i = 0, bp = kdb_breakpoints; i < KDB_MAXBPT; i++, bp++)
 		bp->bp_free = 1;
 
-	kdb_register_repeat("bp", kdb_bp, "[<vaddr>]",
+	kdb_register_flags("bp", kdb_bp, "[<vaddr>]",
 		"Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
-	kdb_register_repeat("bl", kdb_bp, "[<vaddr>]",
+	kdb_register_flags("bl", kdb_bp, "[<vaddr>]",
 		"Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
 	if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT)
-		kdb_register_repeat("bph", kdb_bp, "[<vaddr>]",
+		kdb_register_flags("bph", kdb_bp, "[<vaddr>]",
 		"[datar [length]|dataw [length]]   Set hw brk", 0, KDB_REPEAT_NO_ARGS);
-	kdb_register_repeat("bc", kdb_bc, "<bpnum>",
+	kdb_register_flags("bc", kdb_bc, "<bpnum>",
 		"Clear Breakpoint", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("be", kdb_bc, "<bpnum>",
+	kdb_register_flags("be", kdb_bc, "<bpnum>",
 		"Enable Breakpoint", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("bd", kdb_bc, "<bpnum>",
+	kdb_register_flags("bd", kdb_bc, "<bpnum>",
 		"Disable Breakpoint", 0, KDB_REPEAT_NONE);
 
-	kdb_register_repeat("ss", kdb_ss, "",
+	kdb_register_flags("ss", kdb_ss, "",
 		"Single Step", 1, KDB_REPEAT_NO_ARGS);
 	/*
 	 * Architecture dependent initialization.
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 41966b5f86b7..070f1ff358d2 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2629,7 +2629,7 @@ static int kdb_grep_help(int argc, const char **argv)
 }
 
 /*
- * kdb_register_repeat - This function is used to register a kernel
+ * kdb_register_flags - This function is used to register a kernel
  * 	debugger command.
  * Inputs:
  *	cmd	Command name
@@ -2641,12 +2641,12 @@ static int kdb_grep_help(int argc, const char **argv)
  *	zero for success, one if a duplicate command.
  */
 #define kdb_command_extend 50	/* arbitrary */
-int kdb_register_repeat(char *cmd,
-			kdb_func_t func,
-			char *usage,
-			char *help,
-			short minlen,
-			kdb_cmdflags_t flags)
+int kdb_register_flags(char *cmd,
+		       kdb_func_t func,
+		       char *usage,
+		       char *help,
+		       short minlen,
+		       kdb_cmdflags_t flags)
 {
 	int i;
 	kdbtab_t *kp;
@@ -2699,13 +2699,13 @@ int kdb_register_repeat(char *cmd,
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(kdb_register_repeat);
+EXPORT_SYMBOL_GPL(kdb_register_flags);
 
 
 /*
  * kdb_register - Compatibility register function for commands that do
  *	not need to specify a repeat state.  Equivalent to
- *	kdb_register_repeat with KDB_REPEAT_NONE.
+ *	kdb_register_flags with KDB_REPEAT_NONE.
  * Inputs:
  *	cmd	Command name
  *	func	Function to execute the command
@@ -2720,8 +2720,8 @@ int kdb_register(char *cmd,
 	     char *help,
 	     short minlen)
 {
-	return kdb_register_repeat(cmd, func, usage, help, minlen,
-				   KDB_REPEAT_NONE);
+	return kdb_register_flags(cmd, func, usage, help, minlen,
+				  KDB_REPEAT_NONE);
 }
 EXPORT_SYMBOL_GPL(kdb_register);
 
@@ -2763,79 +2763,79 @@ static void __init kdb_inittab(void)
 	for_each_kdbcmd(kp, i)
 		kp->cmd_name = NULL;
 
-	kdb_register_repeat("md", kdb_md, "<vaddr>",
+	kdb_register_flags("md", kdb_md, "<vaddr>",
 	  "Display Memory Contents, also mdWcN, e.g. md8c1", 1,
 			    KDB_REPEAT_NO_ARGS);
-	kdb_register_repeat("mdr", kdb_md, "<vaddr> <bytes>",
+	kdb_register_flags("mdr", kdb_md, "<vaddr> <bytes>",
 	  "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
-	kdb_register_repeat("mdp", kdb_md, "<paddr> <bytes>",
+	kdb_register_flags("mdp", kdb_md, "<paddr> <bytes>",
 	  "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
-	kdb_register_repeat("mds", kdb_md, "<vaddr>",
+	kdb_register_flags("mds", kdb_md, "<vaddr>",
 	  "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
-	kdb_register_repeat("mm", kdb_mm, "<vaddr> <contents>",
+	kdb_register_flags("mm", kdb_mm, "<vaddr> <contents>",
 	  "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
-	kdb_register_repeat("go", kdb_go, "[<vaddr>]",
+	kdb_register_flags("go", kdb_go, "[<vaddr>]",
 	  "Continue Execution", 1, KDB_REPEAT_NONE);
-	kdb_register_repeat("rd", kdb_rd, "",
+	kdb_register_flags("rd", kdb_rd, "",
 	  "Display Registers", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("rm", kdb_rm, "<reg> <contents>",
+	kdb_register_flags("rm", kdb_rm, "<reg> <contents>",
 	  "Modify Registers", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("ef", kdb_ef, "<vaddr>",
+	kdb_register_flags("ef", kdb_ef, "<vaddr>",
 	  "Display exception frame", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("bt", kdb_bt, "[<vaddr>]",
+	kdb_register_flags("bt", kdb_bt, "[<vaddr>]",
 	  "Stack traceback", 1, KDB_REPEAT_NONE);
-	kdb_register_repeat("btp", kdb_bt, "<pid>",
+	kdb_register_flags("btp", kdb_bt, "<pid>",
 	  "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]",
+	kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]",
 	  "Backtrace all processes matching state flag", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("btc", kdb_bt, "",
+	kdb_register_flags("btc", kdb_bt, "",
 	  "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("btt", kdb_bt, "<vaddr>",
+	kdb_register_flags("btt", kdb_bt, "<vaddr>",
 	  "Backtrace process given its struct task address", 0,
 			    KDB_REPEAT_NONE);
-	kdb_register_repeat("env", kdb_env, "",
+	kdb_register_flags("env", kdb_env, "",
 	  "Show environment variables", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("set", kdb_set, "",
+	kdb_register_flags("set", kdb_set, "",
 	  "Set environment variables", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("help", kdb_help, "",
+	kdb_register_flags("help", kdb_help, "",
 	  "Display Help Message", 1, KDB_REPEAT_NONE);
-	kdb_register_repeat("?", kdb_help, "",
+	kdb_register_flags("?", kdb_help, "",
 	  "Display Help Message", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("cpu", kdb_cpu, "<cpunum>",
+	kdb_register_flags("cpu", kdb_cpu, "<cpunum>",
 	  "Switch to new cpu", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("kgdb", kdb_kgdb, "",
+	kdb_register_flags("kgdb", kdb_kgdb, "",
 	  "Enter kgdb mode", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("ps", kdb_ps, "[<flags>|A]",
+	kdb_register_flags("ps", kdb_ps, "[<flags>|A]",
 	  "Display active task list", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("pid", kdb_pid, "<pidnum>",
+	kdb_register_flags("pid", kdb_pid, "<pidnum>",
 	  "Switch to another task", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("reboot", kdb_reboot, "",
+	kdb_register_flags("reboot", kdb_reboot, "",
 	  "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
 #if defined(CONFIG_MODULES)
-	kdb_register_repeat("lsmod", kdb_lsmod, "",
+	kdb_register_flags("lsmod", kdb_lsmod, "",
 	  "List loaded kernel modules", 0, KDB_REPEAT_NONE);
 #endif
 #if defined(CONFIG_MAGIC_SYSRQ)
-	kdb_register_repeat("sr", kdb_sr, "<key>",
+	kdb_register_flags("sr", kdb_sr, "<key>",
 	  "Magic SysRq key", 0, KDB_REPEAT_NONE);
 #endif
 #if defined(CONFIG_PRINTK)
-	kdb_register_repeat("dmesg", kdb_dmesg, "[lines]",
+	kdb_register_flags("dmesg", kdb_dmesg, "[lines]",
 	  "Display syslog buffer", 0, KDB_REPEAT_NONE);
 #endif
 	if (arch_kgdb_ops.enable_nmi) {
-		kdb_register_repeat("disable_nmi", kdb_disable_nmi, "",
+		kdb_register_flags("disable_nmi", kdb_disable_nmi, "",
 		  "Disable NMI entry to KDB", 0, KDB_REPEAT_NONE);
 	}
-	kdb_register_repeat("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
+	kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
 	  "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("kill", kdb_kill, "<-signal> <pid>",
+	kdb_register_flags("kill", kdb_kill, "<-signal> <pid>",
 	  "Send a signal to a process", 0, KDB_REPEAT_NONE);
-	kdb_register_repeat("summary", kdb_summary, "",
+	kdb_register_flags("summary", kdb_summary, "",
 	  "Summarize the system", 4, KDB_REPEAT_NONE);
-	kdb_register_repeat("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
+	kdb_register_flags("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
 	  "Display per_cpu variables", 3, KDB_REPEAT_NONE);
-	kdb_register_repeat("grephelp", kdb_grep_help, "",
+	kdb_register_flags("grephelp", kdb_grep_help, "",
 	  "Display help on | grep", 0, KDB_REPEAT_NONE);
 }
 
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index bd90e1b06088..1e3b36c75048 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -127,7 +127,7 @@ static int kdb_ftdump(int argc, const char **argv)
 
 static __init int kdb_ftrace_register(void)
 {
-	kdb_register_repeat("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
+	kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
 			    "Dump ftrace log", 0, KDB_REPEAT_NONE);
 	return 0;
 }
-- 
cgit v1.2.3


From 04bb171e7aa99dee0c92e772e4f66f8d5c1b4081 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 6 Nov 2014 14:36:43 +0000
Subject: kdb: Use KDB_REPEAT_* values as flags

The actual values of KDB_REPEAT_* enum values and overall logic stayed
the same, but we now treat the values as flags.

This makes it possible to add other flags and combine them, plus makes
the code a lot simpler and shorter. But functionality-wise, there should
be no changes.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h         |  4 ++--
 kernel/debug/kdb/kdb_main.c | 21 +++++++--------------
 2 files changed, 9 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 32d2f407981d..90aed7c31f0d 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -15,8 +15,8 @@
 
 typedef enum {
 	KDB_REPEAT_NONE = 0,	/* Do not repeat this command */
-	KDB_REPEAT_NO_ARGS,	/* Repeat the command without arguments */
-	KDB_REPEAT_WITH_ARGS,	/* Repeat the command including its arguments */
+	KDB_REPEAT_NO_ARGS	= 0x1, /* Repeat the command w/o arguments */
+	KDB_REPEAT_WITH_ARGS	= 0x2, /* Repeat the command w/ its arguments */
 } kdb_cmdflags_t;
 
 typedef int (*kdb_func_t)(int, const char **);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 070f1ff358d2..cbacae24a55a 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -1008,20 +1008,13 @@ int kdb_parse(const char *cmdstr)
 		if (result && ignore_errors && result > KDB_CMD_GO)
 			result = 0;
 		KDB_STATE_CLEAR(CMD);
-		switch (tp->cmd_flags) {
-		case KDB_REPEAT_NONE:
-			argc = 0;
-			if (argv[0])
-				*(argv[0]) = '\0';
-			break;
-		case KDB_REPEAT_NO_ARGS:
-			argc = 1;
-			if (argv[1])
-				*(argv[1]) = '\0';
-			break;
-		case KDB_REPEAT_WITH_ARGS:
-			break;
-		}
+
+		if (tp->cmd_flags & KDB_REPEAT_WITH_ARGS)
+			return result;
+
+		argc = tp->cmd_flags & KDB_REPEAT_NO_ARGS ? 1 : 0;
+		if (argv[argc])
+			*(argv[argc]) = '\0';
 		return result;
 	}
 
-- 
cgit v1.2.3


From e8ab24d9b0173ada3eeed31d7d7f982228efc2c5 Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 6 Nov 2014 14:36:44 +0000
Subject: kdb: Remove KDB_REPEAT_NONE flag

Since we now treat KDB_REPEAT_* as flags, there is no need to
pass KDB_REPEAT_NONE. It's just the default behaviour when no
flags are specified.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h         |  1 -
 kernel/debug/kdb/kdb_bp.c   |  6 ++---
 kernel/debug/kdb/kdb_main.c | 59 ++++++++++++++++++++++-----------------------
 kernel/trace/trace_kdb.c    |  2 +-
 4 files changed, 33 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 90aed7c31f0d..39b44b37c8dc 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -14,7 +14,6 @@
  */
 
 typedef enum {
-	KDB_REPEAT_NONE = 0,	/* Do not repeat this command */
 	KDB_REPEAT_NO_ARGS	= 0x1, /* Repeat the command w/o arguments */
 	KDB_REPEAT_WITH_ARGS	= 0x2, /* Repeat the command w/ its arguments */
 } kdb_cmdflags_t;
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index 59536661c7b9..f8844fb55311 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -539,11 +539,11 @@ void __init kdb_initbptab(void)
 		kdb_register_flags("bph", kdb_bp, "[<vaddr>]",
 		"[datar [length]|dataw [length]]   Set hw brk", 0, KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("bc", kdb_bc, "<bpnum>",
-		"Clear Breakpoint", 0, KDB_REPEAT_NONE);
+		"Clear Breakpoint", 0, 0);
 	kdb_register_flags("be", kdb_bc, "<bpnum>",
-		"Enable Breakpoint", 0, KDB_REPEAT_NONE);
+		"Enable Breakpoint", 0, 0);
 	kdb_register_flags("bd", kdb_bc, "<bpnum>",
-		"Disable Breakpoint", 0, KDB_REPEAT_NONE);
+		"Disable Breakpoint", 0, 0);
 
 	kdb_register_flags("ss", kdb_ss, "",
 		"Single Step", 1, KDB_REPEAT_NO_ARGS);
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index cbacae24a55a..538bf1dce26a 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -2698,7 +2698,7 @@ EXPORT_SYMBOL_GPL(kdb_register_flags);
 /*
  * kdb_register - Compatibility register function for commands that do
  *	not need to specify a repeat state.  Equivalent to
- *	kdb_register_flags with KDB_REPEAT_NONE.
+ *	kdb_register_flags with flags set to 0.
  * Inputs:
  *	cmd	Command name
  *	func	Function to execute the command
@@ -2713,8 +2713,7 @@ int kdb_register(char *cmd,
 	     char *help,
 	     short minlen)
 {
-	return kdb_register_flags(cmd, func, usage, help, minlen,
-				  KDB_REPEAT_NONE);
+	return kdb_register_flags(cmd, func, usage, help, minlen, 0);
 }
 EXPORT_SYMBOL_GPL(kdb_register);
 
@@ -2768,68 +2767,68 @@ static void __init kdb_inittab(void)
 	kdb_register_flags("mm", kdb_mm, "<vaddr> <contents>",
 	  "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("go", kdb_go, "[<vaddr>]",
-	  "Continue Execution", 1, KDB_REPEAT_NONE);
+	  "Continue Execution", 1, 0);
 	kdb_register_flags("rd", kdb_rd, "",
-	  "Display Registers", 0, KDB_REPEAT_NONE);
+	  "Display Registers", 0, 0);
 	kdb_register_flags("rm", kdb_rm, "<reg> <contents>",
-	  "Modify Registers", 0, KDB_REPEAT_NONE);
+	  "Modify Registers", 0, 0);
 	kdb_register_flags("ef", kdb_ef, "<vaddr>",
-	  "Display exception frame", 0, KDB_REPEAT_NONE);
+	  "Display exception frame", 0, 0);
 	kdb_register_flags("bt", kdb_bt, "[<vaddr>]",
-	  "Stack traceback", 1, KDB_REPEAT_NONE);
+	  "Stack traceback", 1, 0);
 	kdb_register_flags("btp", kdb_bt, "<pid>",
-	  "Display stack for process <pid>", 0, KDB_REPEAT_NONE);
+	  "Display stack for process <pid>", 0, 0);
 	kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]",
-	  "Backtrace all processes matching state flag", 0, KDB_REPEAT_NONE);
+	  "Backtrace all processes matching state flag", 0, 0);
 	kdb_register_flags("btc", kdb_bt, "",
-	  "Backtrace current process on each cpu", 0, KDB_REPEAT_NONE);
+	  "Backtrace current process on each cpu", 0, 0);
 	kdb_register_flags("btt", kdb_bt, "<vaddr>",
 	  "Backtrace process given its struct task address", 0,
-			    KDB_REPEAT_NONE);
+			    0);
 	kdb_register_flags("env", kdb_env, "",
-	  "Show environment variables", 0, KDB_REPEAT_NONE);
+	  "Show environment variables", 0, 0);
 	kdb_register_flags("set", kdb_set, "",
-	  "Set environment variables", 0, KDB_REPEAT_NONE);
+	  "Set environment variables", 0, 0);
 	kdb_register_flags("help", kdb_help, "",
-	  "Display Help Message", 1, KDB_REPEAT_NONE);
+	  "Display Help Message", 1, 0);
 	kdb_register_flags("?", kdb_help, "",
-	  "Display Help Message", 0, KDB_REPEAT_NONE);
+	  "Display Help Message", 0, 0);
 	kdb_register_flags("cpu", kdb_cpu, "<cpunum>",
-	  "Switch to new cpu", 0, KDB_REPEAT_NONE);
+	  "Switch to new cpu", 0, 0);
 	kdb_register_flags("kgdb", kdb_kgdb, "",
-	  "Enter kgdb mode", 0, KDB_REPEAT_NONE);
+	  "Enter kgdb mode", 0, 0);
 	kdb_register_flags("ps", kdb_ps, "[<flags>|A]",
-	  "Display active task list", 0, KDB_REPEAT_NONE);
+	  "Display active task list", 0, 0);
 	kdb_register_flags("pid", kdb_pid, "<pidnum>",
-	  "Switch to another task", 0, KDB_REPEAT_NONE);
+	  "Switch to another task", 0, 0);
 	kdb_register_flags("reboot", kdb_reboot, "",
-	  "Reboot the machine immediately", 0, KDB_REPEAT_NONE);
+	  "Reboot the machine immediately", 0, 0);
 #if defined(CONFIG_MODULES)
 	kdb_register_flags("lsmod", kdb_lsmod, "",
-	  "List loaded kernel modules", 0, KDB_REPEAT_NONE);
+	  "List loaded kernel modules", 0, 0);
 #endif
 #if defined(CONFIG_MAGIC_SYSRQ)
 	kdb_register_flags("sr", kdb_sr, "<key>",
-	  "Magic SysRq key", 0, KDB_REPEAT_NONE);
+	  "Magic SysRq key", 0, 0);
 #endif
 #if defined(CONFIG_PRINTK)
 	kdb_register_flags("dmesg", kdb_dmesg, "[lines]",
-	  "Display syslog buffer", 0, KDB_REPEAT_NONE);
+	  "Display syslog buffer", 0, 0);
 #endif
 	if (arch_kgdb_ops.enable_nmi) {
 		kdb_register_flags("disable_nmi", kdb_disable_nmi, "",
-		  "Disable NMI entry to KDB", 0, KDB_REPEAT_NONE);
+		  "Disable NMI entry to KDB", 0, 0);
 	}
 	kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
-	  "Define a set of commands, down to endefcmd", 0, KDB_REPEAT_NONE);
+	  "Define a set of commands, down to endefcmd", 0, 0);
 	kdb_register_flags("kill", kdb_kill, "<-signal> <pid>",
-	  "Send a signal to a process", 0, KDB_REPEAT_NONE);
+	  "Send a signal to a process", 0, 0);
 	kdb_register_flags("summary", kdb_summary, "",
-	  "Summarize the system", 4, KDB_REPEAT_NONE);
+	  "Summarize the system", 4, 0);
 	kdb_register_flags("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
-	  "Display per_cpu variables", 3, KDB_REPEAT_NONE);
+	  "Display per_cpu variables", 3, 0);
 	kdb_register_flags("grephelp", kdb_grep_help, "",
-	  "Display help on | grep", 0, KDB_REPEAT_NONE);
+	  "Display help on | grep", 0, 0);
 }
 
 /* Execute any commands defined in kdb_cmds.  */
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 1e3b36c75048..3da7e3043596 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -128,7 +128,7 @@ static int kdb_ftdump(int argc, const char **argv)
 static __init int kdb_ftrace_register(void)
 {
 	kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
-			    "Dump ftrace log", 0, KDB_REPEAT_NONE);
+			    "Dump ftrace log", 0, 0);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 9452e977ac17caf9f98a91b33d5e3c3357258c64 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Thu, 6 Nov 2014 14:36:45 +0000
Subject: kdb: Categorize kdb commands (similar to SysRq categorization)

This patch introduces several new flags to collect kdb commands into
groups (later allowing them to be optionally disabled).

This follows similar prior art to enable/disable magic sysrq
commands.

The commands have been categorized as follows:

Always on:  go (w/o args), env, set, help, ?, cpu (w/o args), sr,
            dmesg, disable_nmi, defcmd, summary, grephelp
Mem read:   md, mdr, mdp, mds, ef, bt (with args), per_cpu
Mem write:  mm
Reg read:   rd
Reg write:  go (with args), rm
Inspect:    bt (w/o args), btp, bta, btc, btt, ps, pid, lsmod
Flow ctrl:  bp, bl, bph, bc, be, bd, ss
Signal:     kill
Reboot:     reboot
All:        cpu, kgdb, (and all of the above), nmi_console

Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h         |  48 +++++++++++++++++-
 kernel/debug/kdb/kdb_bp.c   |  21 +++++---
 kernel/debug/kdb/kdb_main.c | 120 ++++++++++++++++++++++++++++++++------------
 kernel/trace/trace_kdb.c    |   2 +-
 4 files changed, 148 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index 39b44b37c8dc..f1fe36185c17 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -13,9 +13,53 @@
  * Copyright (C) 2009 Jason Wessel <jason.wessel@windriver.com>
  */
 
+/* Shifted versions of the command enable bits are be used if the command
+ * has no arguments (see kdb_check_flags). This allows commands, such as
+ * go, to have different permissions depending upon whether it is called
+ * with an argument.
+ */
+#define KDB_ENABLE_NO_ARGS_SHIFT 10
+
 typedef enum {
-	KDB_REPEAT_NO_ARGS	= 0x1, /* Repeat the command w/o arguments */
-	KDB_REPEAT_WITH_ARGS	= 0x2, /* Repeat the command w/ its arguments */
+	KDB_ENABLE_ALL = (1 << 0), /* Enable everything */
+	KDB_ENABLE_MEM_READ = (1 << 1),
+	KDB_ENABLE_MEM_WRITE = (1 << 2),
+	KDB_ENABLE_REG_READ = (1 << 3),
+	KDB_ENABLE_REG_WRITE = (1 << 4),
+	KDB_ENABLE_INSPECT = (1 << 5),
+	KDB_ENABLE_FLOW_CTRL = (1 << 6),
+	KDB_ENABLE_SIGNAL = (1 << 7),
+	KDB_ENABLE_REBOOT = (1 << 8),
+	/* User exposed values stop here, all remaining flags are
+	 * exclusively used to describe a commands behaviour.
+	 */
+
+	KDB_ENABLE_ALWAYS_SAFE = (1 << 9),
+	KDB_ENABLE_MASK = (1 << KDB_ENABLE_NO_ARGS_SHIFT) - 1,
+
+	KDB_ENABLE_ALL_NO_ARGS = KDB_ENABLE_ALL << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_MEM_READ_NO_ARGS = KDB_ENABLE_MEM_READ
+				      << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_MEM_WRITE_NO_ARGS = KDB_ENABLE_MEM_WRITE
+				       << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_REG_READ_NO_ARGS = KDB_ENABLE_REG_READ
+				      << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_REG_WRITE_NO_ARGS = KDB_ENABLE_REG_WRITE
+				       << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_INSPECT_NO_ARGS = KDB_ENABLE_INSPECT
+				     << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_FLOW_CTRL_NO_ARGS = KDB_ENABLE_FLOW_CTRL
+				       << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_SIGNAL_NO_ARGS = KDB_ENABLE_SIGNAL
+				    << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_REBOOT_NO_ARGS = KDB_ENABLE_REBOOT
+				    << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_ALWAYS_SAFE_NO_ARGS = KDB_ENABLE_ALWAYS_SAFE
+					 << KDB_ENABLE_NO_ARGS_SHIFT,
+	KDB_ENABLE_MASK_NO_ARGS = KDB_ENABLE_MASK << KDB_ENABLE_NO_ARGS_SHIFT,
+
+	KDB_REPEAT_NO_ARGS = 0x40000000, /* Repeat the command w/o arguments */
+	KDB_REPEAT_WITH_ARGS = 0x80000000, /* Repeat the command with args */
 } kdb_cmdflags_t;
 
 typedef int (*kdb_func_t)(int, const char **);
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index f8844fb55311..e1dbf4a2c69e 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -532,21 +532,28 @@ void __init kdb_initbptab(void)
 		bp->bp_free = 1;
 
 	kdb_register_flags("bp", kdb_bp, "[<vaddr>]",
-		"Set/Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
+		"Set/Display breakpoints", 0,
+		KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("bl", kdb_bp, "[<vaddr>]",
-		"Display breakpoints", 0, KDB_REPEAT_NO_ARGS);
+		"Display breakpoints", 0,
+		KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS);
 	if (arch_kgdb_ops.flags & KGDB_HW_BREAKPOINT)
 		kdb_register_flags("bph", kdb_bp, "[<vaddr>]",
-		"[datar [length]|dataw [length]]   Set hw brk", 0, KDB_REPEAT_NO_ARGS);
+		"[datar [length]|dataw [length]]   Set hw brk", 0,
+		KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("bc", kdb_bc, "<bpnum>",
-		"Clear Breakpoint", 0, 0);
+		"Clear Breakpoint", 0,
+		KDB_ENABLE_FLOW_CTRL);
 	kdb_register_flags("be", kdb_bc, "<bpnum>",
-		"Enable Breakpoint", 0, 0);
+		"Enable Breakpoint", 0,
+		KDB_ENABLE_FLOW_CTRL);
 	kdb_register_flags("bd", kdb_bc, "<bpnum>",
-		"Disable Breakpoint", 0, 0);
+		"Disable Breakpoint", 0,
+		KDB_ENABLE_FLOW_CTRL);
 
 	kdb_register_flags("ss", kdb_ss, "",
-		"Single Step", 1, KDB_REPEAT_NO_ARGS);
+		"Single Step", 1,
+		KDB_ENABLE_FLOW_CTRL | KDB_REPEAT_NO_ARGS);
 	/*
 	 * Architecture dependent initialization.
 	 */
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index 538bf1dce26a..fae1fc3962f8 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -187,6 +187,26 @@ struct task_struct *kdb_curr_task(int cpu)
 	return p;
 }
 
+/*
+ * Check whether the flags of the current command and the permissions
+ * of the kdb console has allow a command to be run.
+ */
+static inline bool kdb_check_flags(kdb_cmdflags_t flags, int permissions,
+				   bool no_args)
+{
+	/* permissions comes from userspace so needs massaging slightly */
+	permissions &= KDB_ENABLE_MASK;
+	permissions |= KDB_ENABLE_ALWAYS_SAFE;
+
+	/* some commands change group when launched with no arguments */
+	if (no_args)
+		permissions |= permissions << KDB_ENABLE_NO_ARGS_SHIFT;
+
+	flags |= KDB_ENABLE_ALL;
+
+	return permissions & flags;
+}
+
 /*
  * kdbgetenv - This function will return the character string value of
  *	an environment variable.
@@ -641,8 +661,13 @@ static int kdb_defcmd2(const char *cmdstr, const char *argv0)
 		if (!s->count)
 			s->usable = 0;
 		if (s->usable)
-			kdb_register(s->name, kdb_exec_defcmd,
-				     s->usage, s->help, 0);
+			/* macros are always safe because when executed each
+			 * internal command re-enters kdb_parse() and is
+			 * safety checked individually.
+			 */
+			kdb_register_flags(s->name, kdb_exec_defcmd, s->usage,
+					   s->help, 0,
+					   KDB_ENABLE_ALWAYS_SAFE);
 		return 0;
 	}
 	if (!s->usable)
@@ -2757,78 +2782,107 @@ static void __init kdb_inittab(void)
 
 	kdb_register_flags("md", kdb_md, "<vaddr>",
 	  "Display Memory Contents, also mdWcN, e.g. md8c1", 1,
-			    KDB_REPEAT_NO_ARGS);
+	  KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("mdr", kdb_md, "<vaddr> <bytes>",
-	  "Display Raw Memory", 0, KDB_REPEAT_NO_ARGS);
+	  "Display Raw Memory", 0,
+	  KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("mdp", kdb_md, "<paddr> <bytes>",
-	  "Display Physical Memory", 0, KDB_REPEAT_NO_ARGS);
+	  "Display Physical Memory", 0,
+	  KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("mds", kdb_md, "<vaddr>",
-	  "Display Memory Symbolically", 0, KDB_REPEAT_NO_ARGS);
+	  "Display Memory Symbolically", 0,
+	  KDB_ENABLE_MEM_READ | KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("mm", kdb_mm, "<vaddr> <contents>",
-	  "Modify Memory Contents", 0, KDB_REPEAT_NO_ARGS);
+	  "Modify Memory Contents", 0,
+	  KDB_ENABLE_MEM_WRITE | KDB_REPEAT_NO_ARGS);
 	kdb_register_flags("go", kdb_go, "[<vaddr>]",
-	  "Continue Execution", 1, 0);
+	  "Continue Execution", 1,
+	  KDB_ENABLE_REG_WRITE | KDB_ENABLE_ALWAYS_SAFE_NO_ARGS);
 	kdb_register_flags("rd", kdb_rd, "",
-	  "Display Registers", 0, 0);
+	  "Display Registers", 0,
+	  KDB_ENABLE_REG_READ);
 	kdb_register_flags("rm", kdb_rm, "<reg> <contents>",
-	  "Modify Registers", 0, 0);
+	  "Modify Registers", 0,
+	  KDB_ENABLE_REG_WRITE);
 	kdb_register_flags("ef", kdb_ef, "<vaddr>",
-	  "Display exception frame", 0, 0);
+	  "Display exception frame", 0,
+	  KDB_ENABLE_MEM_READ);
 	kdb_register_flags("bt", kdb_bt, "[<vaddr>]",
-	  "Stack traceback", 1, 0);
+	  "Stack traceback", 1,
+	  KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS);
 	kdb_register_flags("btp", kdb_bt, "<pid>",
-	  "Display stack for process <pid>", 0, 0);
+	  "Display stack for process <pid>", 0,
+	  KDB_ENABLE_INSPECT);
 	kdb_register_flags("bta", kdb_bt, "[D|R|S|T|C|Z|E|U|I|M|A]",
-	  "Backtrace all processes matching state flag", 0, 0);
+	  "Backtrace all processes matching state flag", 0,
+	  KDB_ENABLE_INSPECT);
 	kdb_register_flags("btc", kdb_bt, "",
-	  "Backtrace current process on each cpu", 0, 0);
+	  "Backtrace current process on each cpu", 0,
+	  KDB_ENABLE_INSPECT);
 	kdb_register_flags("btt", kdb_bt, "<vaddr>",
 	  "Backtrace process given its struct task address", 0,
-			    0);
+	  KDB_ENABLE_MEM_READ | KDB_ENABLE_INSPECT_NO_ARGS);
 	kdb_register_flags("env", kdb_env, "",
-	  "Show environment variables", 0, 0);
+	  "Show environment variables", 0,
+	  KDB_ENABLE_ALWAYS_SAFE);
 	kdb_register_flags("set", kdb_set, "",
-	  "Set environment variables", 0, 0);
+	  "Set environment variables", 0,
+	  KDB_ENABLE_ALWAYS_SAFE);
 	kdb_register_flags("help", kdb_help, "",
-	  "Display Help Message", 1, 0);
+	  "Display Help Message", 1,
+	  KDB_ENABLE_ALWAYS_SAFE);
 	kdb_register_flags("?", kdb_help, "",
-	  "Display Help Message", 0, 0);
+	  "Display Help Message", 0,
+	  KDB_ENABLE_ALWAYS_SAFE);
 	kdb_register_flags("cpu", kdb_cpu, "<cpunum>",
-	  "Switch to new cpu", 0, 0);
+	  "Switch to new cpu", 0,
+	  KDB_ENABLE_ALWAYS_SAFE_NO_ARGS);
 	kdb_register_flags("kgdb", kdb_kgdb, "",
 	  "Enter kgdb mode", 0, 0);
 	kdb_register_flags("ps", kdb_ps, "[<flags>|A]",
-	  "Display active task list", 0, 0);
+	  "Display active task list", 0,
+	  KDB_ENABLE_INSPECT);
 	kdb_register_flags("pid", kdb_pid, "<pidnum>",
-	  "Switch to another task", 0, 0);
+	  "Switch to another task", 0,
+	  KDB_ENABLE_INSPECT);
 	kdb_register_flags("reboot", kdb_reboot, "",
-	  "Reboot the machine immediately", 0, 0);
+	  "Reboot the machine immediately", 0,
+	  KDB_ENABLE_REBOOT);
 #if defined(CONFIG_MODULES)
 	kdb_register_flags("lsmod", kdb_lsmod, "",
-	  "List loaded kernel modules", 0, 0);
+	  "List loaded kernel modules", 0,
+	  KDB_ENABLE_INSPECT);
 #endif
 #if defined(CONFIG_MAGIC_SYSRQ)
 	kdb_register_flags("sr", kdb_sr, "<key>",
-	  "Magic SysRq key", 0, 0);
+	  "Magic SysRq key", 0,
+	  KDB_ENABLE_ALWAYS_SAFE);
 #endif
 #if defined(CONFIG_PRINTK)
 	kdb_register_flags("dmesg", kdb_dmesg, "[lines]",
-	  "Display syslog buffer", 0, 0);
+	  "Display syslog buffer", 0,
+	  KDB_ENABLE_ALWAYS_SAFE);
 #endif
 	if (arch_kgdb_ops.enable_nmi) {
 		kdb_register_flags("disable_nmi", kdb_disable_nmi, "",
-		  "Disable NMI entry to KDB", 0, 0);
+		  "Disable NMI entry to KDB", 0,
+		  KDB_ENABLE_ALWAYS_SAFE);
 	}
 	kdb_register_flags("defcmd", kdb_defcmd, "name \"usage\" \"help\"",
-	  "Define a set of commands, down to endefcmd", 0, 0);
+	  "Define a set of commands, down to endefcmd", 0,
+	  KDB_ENABLE_ALWAYS_SAFE);
 	kdb_register_flags("kill", kdb_kill, "<-signal> <pid>",
-	  "Send a signal to a process", 0, 0);
+	  "Send a signal to a process", 0,
+	  KDB_ENABLE_SIGNAL);
 	kdb_register_flags("summary", kdb_summary, "",
-	  "Summarize the system", 4, 0);
+	  "Summarize the system", 4,
+	  KDB_ENABLE_ALWAYS_SAFE);
 	kdb_register_flags("per_cpu", kdb_per_cpu, "<sym> [<bytes>] [<cpu>]",
-	  "Display per_cpu variables", 3, 0);
+	  "Display per_cpu variables", 3,
+	  KDB_ENABLE_MEM_READ);
 	kdb_register_flags("grephelp", kdb_grep_help, "",
-	  "Display help on | grep", 0, 0);
+	  "Display help on | grep", 0,
+	  KDB_ENABLE_ALWAYS_SAFE);
 }
 
 /* Execute any commands defined in kdb_cmds.  */
diff --git a/kernel/trace/trace_kdb.c b/kernel/trace/trace_kdb.c
index 3da7e3043596..1058f6bd8399 100644
--- a/kernel/trace/trace_kdb.c
+++ b/kernel/trace/trace_kdb.c
@@ -128,7 +128,7 @@ static int kdb_ftdump(int argc, const char **argv)
 static __init int kdb_ftrace_register(void)
 {
 	kdb_register_flags("ftdump", kdb_ftdump, "[skip_#lines] [cpu]",
-			    "Dump ftrace log", 0, 0);
+			    "Dump ftrace log", 0, KDB_ENABLE_ALWAYS_SAFE);
 	return 0;
 }
 
-- 
cgit v1.2.3


From 420c2b1b0df84f5956036b5185cc1e11d247817d Mon Sep 17 00:00:00 2001
From: Anton Vorontsov <anton.vorontsov@linaro.org>
Date: Thu, 6 Nov 2014 14:36:46 +0000
Subject: kdb: Add enable mask for groups of commands

Currently all kdb commands are enabled whenever kdb is deployed. This
makes it difficult to deploy kdb to help debug certain types of
systems.

Android phones provide one example; the FIQ debugger found on some
Android devices has a deliberately weak set of commands to allow the
debugger to enabled very late in the production cycle.

Certain kiosk environments offer another interesting case where an
engineer might wish to probe the system state using passive inspection
commands without providing sufficient power for a passer by to root it.

Without any restrictions, obtaining the root rights via KDB is a matter of
a few commands, and works everywhere. For example, log in as a normal
user:

cbou:~$ id
uid=1001(cbou) gid=1001(cbou) groups=1001(cbou)

Now enter KDB (for example via sysrq):

Entering kdb (current=0xffff8800065bc740, pid 920) due to Keyboard Entry
kdb> ps
23 sleeping system daemon (state M) processes suppressed,
use 'ps A' to see all.
Task Addr               Pid   Parent [*] cpu State Thread             Command
0xffff8800065bc740      920      919  1    0   R  0xffff8800065bca20 *bash

0xffff880007078000        1        0  0    0   S  0xffff8800070782e0  init
[...snip...]
0xffff8800065be3c0      918        1  0    0   S  0xffff8800065be6a0  getty
0xffff8800065b9c80      919        1  0    0   S  0xffff8800065b9f60  login
0xffff8800065bc740      920      919  1    0   R  0xffff8800065bca20 *bash

All we need is the offset of cred pointers. We can look up the offset in
the distro's kernel source, but it is unnecessary. We can just start
dumping init's task_struct, until we see the process name:

kdb> md 0xffff880007078000
0xffff880007078000 0000000000000001 ffff88000703c000   ................
0xffff880007078010 0040210000000002 0000000000000000   .....!@.........
[...snip...]
0xffff8800070782b0 ffff8800073e0580 ffff8800073e0580   ..>.......>.....
0xffff8800070782c0 0000000074696e69 0000000000000000   init............

^ Here, 'init'. Creds are just above it, so the offset is 0x02b0.

Now we set up init's creds for our non-privileged shell:

kdb> mm 0xffff8800065bc740+0x02b0 0xffff8800073e0580
0xffff8800065bc9f0 = 0xffff8800073e0580
kdb> mm 0xffff8800065bc740+0x02b8 0xffff8800073e0580
0xffff8800065bc9f8 = 0xffff8800073e0580

And thus gaining the root:

kdb> go
cbou:~$ id
uid=0(root) gid=0(root) groups=0(root)
cbou:~$ bash
root:~#

p.s. No distro enables kdb by default (although, with a nice KDB-over-KMS
feature availability, I would expect at least some would enable it), so
it's not actually some kind of a major issue.

Signed-off-by: Anton Vorontsov <anton.vorontsov@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
Cc: Jason Wessel <jason.wessel@windriver.com>
Signed-off-by: Jason Wessel <jason.wessel@windriver.com>
---
 include/linux/kdb.h         |  1 +
 kernel/debug/kdb/kdb_main.c | 30 +++++++++++++++++++++++++++++-
 2 files changed, 30 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/kdb.h b/include/linux/kdb.h
index f1fe36185c17..75ae2e2631fc 100644
--- a/include/linux/kdb.h
+++ b/include/linux/kdb.h
@@ -105,6 +105,7 @@ extern atomic_t kdb_event;
 #define KDB_BADLENGTH	(-19)
 #define KDB_NOBP	(-20)
 #define KDB_BADADDR	(-21)
+#define KDB_NOPERM	(-22)
 
 /*
  * kdb_diemsg
diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c
index fae1fc3962f8..fe1ac56b62e9 100644
--- a/kernel/debug/kdb/kdb_main.c
+++ b/kernel/debug/kdb/kdb_main.c
@@ -12,6 +12,7 @@
  */
 
 #include <linux/ctype.h>
+#include <linux/types.h>
 #include <linux/string.h>
 #include <linux/kernel.h>
 #include <linux/kmsg_dump.h>
@@ -23,6 +24,7 @@
 #include <linux/vmalloc.h>
 #include <linux/atomic.h>
 #include <linux/module.h>
+#include <linux/moduleparam.h>
 #include <linux/mm.h>
 #include <linux/init.h>
 #include <linux/kallsyms.h>
@@ -42,6 +44,12 @@
 #include <linux/slab.h>
 #include "kdb_private.h"
 
+#undef	MODULE_PARAM_PREFIX
+#define	MODULE_PARAM_PREFIX "kdb."
+
+static int kdb_cmd_enabled;
+module_param_named(cmd_enable, kdb_cmd_enabled, int, 0600);
+
 #define GREP_LEN 256
 char kdb_grep_string[GREP_LEN];
 int kdb_grepping_flag;
@@ -121,6 +129,7 @@ static kdbmsg_t kdbmsgs[] = {
 	KDBMSG(BADLENGTH, "Invalid length field"),
 	KDBMSG(NOBP, "No Breakpoint exists"),
 	KDBMSG(BADADDR, "Invalid address"),
+	KDBMSG(NOPERM, "Permission denied"),
 };
 #undef KDBMSG
 
@@ -495,6 +504,15 @@ int kdbgetaddrarg(int argc, const char **argv, int *nextarg,
 	char *cp;
 	kdb_symtab_t symtab;
 
+	/*
+	 * If the enable flags prohibit both arbitrary memory access
+	 * and flow control then there are no reasonable grounds to
+	 * provide symbol lookup.
+	 */
+	if (!kdb_check_flags(KDB_ENABLE_MEM_READ | KDB_ENABLE_FLOW_CTRL,
+			     kdb_cmd_enabled, false))
+		return KDB_NOPERM;
+
 	/*
 	 * Process arguments which follow the following syntax:
 	 *
@@ -1028,6 +1046,10 @@ int kdb_parse(const char *cmdstr)
 
 	if (i < kdb_max_commands) {
 		int result;
+
+		if (!kdb_check_flags(tp->cmd_flags, kdb_cmd_enabled, argc <= 1))
+			return KDB_NOPERM;
+
 		KDB_STATE_SET(CMD);
 		result = (*tp->cmd_func)(argc-1, (const char **)argv);
 		if (result && ignore_errors && result > KDB_CMD_GO)
@@ -1939,10 +1961,14 @@ static int kdb_rm(int argc, const char **argv)
  */
 static int kdb_sr(int argc, const char **argv)
 {
+	bool check_mask =
+	    !kdb_check_flags(KDB_ENABLE_ALL, kdb_cmd_enabled, false);
+
 	if (argc != 1)
 		return KDB_ARGCOUNT;
+
 	kdb_trap_printk++;
-	__handle_sysrq(*argv[1], false);
+	__handle_sysrq(*argv[1], check_mask);
 	kdb_trap_printk--;
 
 	return 0;
@@ -2393,6 +2419,8 @@ static int kdb_help(int argc, const char **argv)
 			return 0;
 		if (!kt->cmd_name)
 			continue;
+		if (!kdb_check_flags(kt->cmd_flags, kdb_cmd_enabled, true))
+			continue;
 		if (strlen(kt->cmd_usage) > 20)
 			space = "\n                                    ";
 		kdb_printf("%-15.15s %-20s%s%s\n", kt->cmd_name,
-- 
cgit v1.2.3


From 4fd3279b48605ae3ea509b9b2c02e46aa0975930 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 24 Oct 2014 17:56:04 -0400
Subject: ftrace: Add more information to ftrace_bug() output

With the introduction of the dynamic trampolines, it is useful that if
things go wrong that ftrace_bug() produces more information about what
the current state is. This can help debug issues that may arise.

Ftrace has lots of checks to make sure that the state of the system it
touchs is exactly what it expects it to be. When it detects an abnormality
it calls ftrace_bug() and disables itself to prevent any further damage.
It is crucial that ftrace_bug() produces sufficient information that
can be used to debug the situation.

Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Acked-by: Borislav Petkov <bp@suse.de>
Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Tested-by: Jiri Kosina <jkosina@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/powerpc/kernel/ftrace.c |  2 +-
 arch/x86/kernel/ftrace.c     |  2 +-
 include/linux/ftrace.h       |  4 +++-
 kernel/trace/ftrace.c        | 38 +++++++++++++++++++++++++++++---------
 4 files changed, 34 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index 390311c0f03d..e66af6d265e8 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -449,7 +449,7 @@ void ftrace_replace_code(int enable)
 		rec = ftrace_rec_iter_record(iter);
 		ret = __ftrace_replace_code(rec, enable);
 		if (ret) {
-			ftrace_bug(ret, rec->ip);
+			ftrace_bug(ret, rec);
 			return;
 		}
 	}
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 4cfeca6ffe11..1aea94d336c7 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -583,7 +583,7 @@ void ftrace_replace_code(int enable)
 
  remove_breakpoints:
 	pr_warn("Failed on %s (%d):\n", report, count);
-	ftrace_bug(ret, rec ? rec->ip : 0);
+	ftrace_bug(ret, rec);
 	for_ftrace_rec_iter(iter) {
 		rec = ftrace_rec_iter_record(iter);
 		/*
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 06e3ca5a5083..619e37cc17fd 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -263,7 +263,9 @@ struct ftrace_func_command {
 int ftrace_arch_code_modify_prepare(void);
 int ftrace_arch_code_modify_post_process(void);
 
-void ftrace_bug(int err, unsigned long ip);
+struct dyn_ftrace;
+
+void ftrace_bug(int err, struct dyn_ftrace *rec);
 
 struct seq_file;
 
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index eab3123a1fbe..4043332f6720 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1738,10 +1738,13 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
 		printk(KERN_CONT "%s%02x", i ? ":" : "", p[i]);
 }
 
+static struct ftrace_ops *
+ftrace_find_tramp_ops_any(struct dyn_ftrace *rec);
+
 /**
  * ftrace_bug - report and shutdown function tracer
  * @failed: The failed type (EFAULT, EINVAL, EPERM)
- * @ip: The address that failed
+ * @rec: The record that failed
  *
  * The arch code that enables or disables the function tracing
  * can call ftrace_bug() when it has detected a problem in
@@ -1750,8 +1753,10 @@ static void print_ip_ins(const char *fmt, unsigned char *p)
  * EINVAL - if what is read at @ip is not what was expected
  * EPERM - if the problem happens on writting to the @ip address
  */
-void ftrace_bug(int failed, unsigned long ip)
+void ftrace_bug(int failed, struct dyn_ftrace *rec)
 {
+	unsigned long ip = rec ? rec->ip : 0;
+
 	switch (failed) {
 	case -EFAULT:
 		FTRACE_WARN_ON_ONCE(1);
@@ -1763,7 +1768,7 @@ void ftrace_bug(int failed, unsigned long ip)
 		pr_info("ftrace failed to modify ");
 		print_ip_sym(ip);
 		print_ip_ins(" actual: ", (unsigned char *)ip);
-		printk(KERN_CONT "\n");
+		pr_cont("\n");
 		break;
 	case -EPERM:
 		FTRACE_WARN_ON_ONCE(1);
@@ -1775,6 +1780,24 @@ void ftrace_bug(int failed, unsigned long ip)
 		pr_info("ftrace faulted on unknown error ");
 		print_ip_sym(ip);
 	}
+	if (rec) {
+		struct ftrace_ops *ops = NULL;
+
+		pr_info("ftrace record flags: %lx\n", rec->flags);
+		pr_cont(" (%ld)%s", ftrace_rec_count(rec),
+			rec->flags & FTRACE_FL_REGS ? " R" : "  ");
+		if (rec->flags & FTRACE_FL_TRAMP_EN) {
+			ops = ftrace_find_tramp_ops_any(rec);
+			if (ops)
+				pr_cont("\ttramp: %pS",
+					(void *)ops->trampoline);
+			else
+				pr_cont("\ttramp: ERROR!");
+
+		}
+		ip = ftrace_get_addr_curr(rec);
+		pr_cont(" expected tramp: %lx\n", ip);
+	}
 }
 
 static int ftrace_check_record(struct dyn_ftrace *rec, int enable, int update)
@@ -2097,7 +2120,7 @@ void __weak ftrace_replace_code(int enable)
 	do_for_each_ftrace_rec(pg, rec) {
 		failed = __ftrace_replace_code(rec, enable);
 		if (failed) {
-			ftrace_bug(failed, rec->ip);
+			ftrace_bug(failed, rec);
 			/* Stop processing */
 			return;
 		}
@@ -2179,17 +2202,14 @@ struct dyn_ftrace *ftrace_rec_iter_record(struct ftrace_rec_iter *iter)
 static int
 ftrace_code_disable(struct module *mod, struct dyn_ftrace *rec)
 {
-	unsigned long ip;
 	int ret;
 
-	ip = rec->ip;
-
 	if (unlikely(ftrace_disabled))
 		return 0;
 
 	ret = ftrace_make_nop(mod, rec, MCOUNT_ADDR);
 	if (ret) {
-		ftrace_bug(ret, ip);
+		ftrace_bug(ret, rec);
 		return 0;
 	}
 	return 1;
@@ -2633,7 +2653,7 @@ static int ftrace_update_code(struct module *mod, struct ftrace_page *new_pgs)
 			if (ftrace_start_up && cnt) {
 				int failed = __ftrace_replace_code(p, 1);
 				if (failed)
-					ftrace_bug(failed, p->ip);
+					ftrace_bug(failed, p);
 			}
 		}
 	}
-- 
cgit v1.2.3


From f8c6455bb04b944edb69e9b074e28efee2c56bdd Mon Sep 17 00:00:00 2001
From: Shani Michaeli <shanim@mellanox.com>
Date: Sun, 9 Nov 2014 13:51:53 +0200
Subject: net/mlx4_en: Extend checksum offloading by CHECKSUM COMPLETE

When processing received traffic, pass CHECKSUM_COMPLETE status to the
stack, with calculated checksum for non TCP/UDP packets (such
as GRE or ICMP).

Although the stack expects checksum which doesn't include the pseudo
header, the HW adds it. To address that, we are subtracting the pseudo
header checksum from the checksum value provided by the HW.

In the IPv6 case, we also compute/add the IP header checksum which
is not added by the HW for such packets.

Cc: Jerry Chu <hkchu@google.com>
Signed-off-by: Shani Michaeli <shanim@mellanox.com>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c |   2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c  |   5 +
 drivers/net/ethernet/mellanox/mlx4/en_port.c    |   2 +
 drivers/net/ethernet/mellanox/mlx4/en_rx.c      | 126 ++++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx4/main.c       |   9 ++
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h    |   5 +-
 include/linux/mlx4/device.h                     |   1 +
 7 files changed, 142 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 8ea4d5be7376..6c643230a5ed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -115,7 +115,7 @@ static const char main_strings[][ETH_GSTRING_LEN] = {
 	"tso_packets",
 	"xmit_more",
 	"queue_stopped", "wake_queue", "tx_timeout", "rx_alloc_failed",
-	"rx_csum_good", "rx_csum_none", "tx_chksum_offload",
+	"rx_csum_good", "rx_csum_none", "rx_csum_complete", "tx_chksum_offload",
 
 	/* packet statistics */
 	"broadcast", "rx_prio_0", "rx_prio_1", "rx_prio_2", "rx_prio_3",
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 0efbae90f1ba..d1eb25dbff56 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -1893,6 +1893,7 @@ static void mlx4_en_clear_stats(struct net_device *dev)
 		priv->rx_ring[i]->packets = 0;
 		priv->rx_ring[i]->csum_ok = 0;
 		priv->rx_ring[i]->csum_none = 0;
+		priv->rx_ring[i]->csum_complete = 0;
 	}
 }
 
@@ -2503,6 +2504,10 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 	/* Query for default mac and max mtu */
 	priv->max_mtu = mdev->dev->caps.eth_mtu_cap[priv->port];
 
+	if (mdev->dev->caps.rx_checksum_flags_port[priv->port] &
+	    MLX4_RX_CSUM_MODE_VAL_NON_TCP_UDP)
+		priv->flags |= MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP;
+
 	/* Set default MAC */
 	dev->addr_len = ETH_ALEN;
 	mlx4_en_u64_to_mac(dev->dev_addr, mdev->dev->caps.def_mac[priv->port]);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_port.c b/drivers/net/ethernet/mellanox/mlx4/en_port.c
index 134b12e17da5..6cb80072af6c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_port.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_port.c
@@ -155,11 +155,13 @@ int mlx4_en_DUMP_ETH_STATS(struct mlx4_en_dev *mdev, u8 port, u8 reset)
 	stats->rx_bytes = 0;
 	priv->port_stats.rx_chksum_good = 0;
 	priv->port_stats.rx_chksum_none = 0;
+	priv->port_stats.rx_chksum_complete = 0;
 	for (i = 0; i < priv->rx_ring_num; i++) {
 		stats->rx_packets += priv->rx_ring[i]->packets;
 		stats->rx_bytes += priv->rx_ring[i]->bytes;
 		priv->port_stats.rx_chksum_good += priv->rx_ring[i]->csum_ok;
 		priv->port_stats.rx_chksum_none += priv->rx_ring[i]->csum_none;
+		priv->port_stats.rx_chksum_complete += priv->rx_ring[i]->csum_complete;
 	}
 	stats->tx_packets = 0;
 	stats->tx_bytes = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index df6d352c4e54..ccd95177ea7c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -42,6 +42,10 @@
 #include <linux/vmalloc.h>
 #include <linux/irq.h>
 
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_checksum.h>
+#endif
+
 #include "mlx4_en.h"
 
 static int mlx4_alloc_pages(struct mlx4_en_priv *priv,
@@ -643,6 +647,86 @@ static void mlx4_en_refill_rx_buffers(struct mlx4_en_priv *priv,
 	}
 }
 
+/* When hardware doesn't strip the vlan, we need to calculate the checksum
+ * over it and add it to the hardware's checksum calculation
+ */
+static inline __wsum get_fixed_vlan_csum(__wsum hw_checksum,
+					 struct vlan_hdr *vlanh)
+{
+	return csum_add(hw_checksum, *(__wsum *)vlanh);
+}
+
+/* Although the stack expects checksum which doesn't include the pseudo
+ * header, the HW adds it. To address that, we are subtracting the pseudo
+ * header checksum from the checksum value provided by the HW.
+ */
+static void get_fixed_ipv4_csum(__wsum hw_checksum, struct sk_buff *skb,
+				struct iphdr *iph)
+{
+	__u16 length_for_csum = 0;
+	__wsum csum_pseudo_header = 0;
+
+	length_for_csum = (be16_to_cpu(iph->tot_len) - (iph->ihl << 2));
+	csum_pseudo_header = csum_tcpudp_nofold(iph->saddr, iph->daddr,
+						length_for_csum, iph->protocol, 0);
+	skb->csum = csum_sub(hw_checksum, csum_pseudo_header);
+}
+
+#if IS_ENABLED(CONFIG_IPV6)
+/* In IPv6 packets, besides subtracting the pseudo header checksum,
+ * we also compute/add the IP header checksum which
+ * is not added by the HW.
+ */
+static int get_fixed_ipv6_csum(__wsum hw_checksum, struct sk_buff *skb,
+			       struct ipv6hdr *ipv6h)
+{
+	__wsum csum_pseudo_hdr = 0;
+
+	if (ipv6h->nexthdr == IPPROTO_FRAGMENT || ipv6h->nexthdr == IPPROTO_HOPOPTS)
+		return -1;
+	hw_checksum = csum_add(hw_checksum, (__force __wsum)(ipv6h->nexthdr << 8));
+
+	csum_pseudo_hdr = csum_partial(&ipv6h->saddr,
+				       sizeof(ipv6h->saddr) + sizeof(ipv6h->daddr), 0);
+	csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ipv6h->payload_len);
+	csum_pseudo_hdr = csum_add(csum_pseudo_hdr, (__force __wsum)ntohs(ipv6h->nexthdr));
+
+	skb->csum = csum_sub(hw_checksum, csum_pseudo_hdr);
+	skb->csum = csum_add(skb->csum, csum_partial(ipv6h, sizeof(struct ipv6hdr), 0));
+	return 0;
+}
+#endif
+static int check_csum(struct mlx4_cqe *cqe, struct sk_buff *skb, void *va,
+		      int hwtstamp_rx_filter)
+{
+	__wsum hw_checksum = 0;
+
+	void *hdr = (u8 *)va + sizeof(struct ethhdr);
+
+	hw_checksum = csum_unfold((__force __sum16)cqe->checksum);
+
+	if (((struct ethhdr *)va)->h_proto == htons(ETH_P_8021Q) &&
+	    hwtstamp_rx_filter != HWTSTAMP_FILTER_NONE) {
+		/* next protocol non IPv4 or IPv6 */
+		if (((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
+		    != htons(ETH_P_IP) &&
+		    ((struct vlan_hdr *)hdr)->h_vlan_encapsulated_proto
+		    != htons(ETH_P_IPV6))
+			return -1;
+		hw_checksum = get_fixed_vlan_csum(hw_checksum, hdr);
+		hdr += sizeof(struct vlan_hdr);
+	}
+
+	if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4))
+		get_fixed_ipv4_csum(hw_checksum, skb, hdr);
+#if IS_ENABLED(CONFIG_IPV6)
+	else if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV6))
+		if (get_fixed_ipv6_csum(hw_checksum, skb, hdr))
+			return -1;
+#endif
+	return 0;
+}
+
 int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int budget)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -744,13 +828,26 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 			(cqe->vlan_my_qpn & cpu_to_be32(MLX4_CQE_L2_TUNNEL));
 
 		if (likely(dev->features & NETIF_F_RXCSUM)) {
-			if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
-			    (cqe->checksum == cpu_to_be16(0xffff))) {
-				ring->csum_ok++;
-				ip_summed = CHECKSUM_UNNECESSARY;
+			if (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_TCP |
+						      MLX4_CQE_STATUS_UDP)) {
+				if ((cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPOK)) &&
+				    cqe->checksum == cpu_to_be16(0xffff)) {
+					ip_summed = CHECKSUM_UNNECESSARY;
+					ring->csum_ok++;
+				} else {
+					ip_summed = CHECKSUM_NONE;
+					ring->csum_none++;
+				}
 			} else {
-				ip_summed = CHECKSUM_NONE;
-				ring->csum_none++;
+				if (priv->flags & MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP &&
+				    (cqe->status & cpu_to_be16(MLX4_CQE_STATUS_IPV4 |
+							       MLX4_CQE_STATUS_IPV6))) {
+					ip_summed = CHECKSUM_COMPLETE;
+					ring->csum_complete++;
+				} else {
+					ip_summed = CHECKSUM_NONE;
+					ring->csum_none++;
+				}
 			}
 		} else {
 			ip_summed = CHECKSUM_NONE;
@@ -776,6 +873,15 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 			if (!nr)
 				goto next;
 
+			if (ip_summed == CHECKSUM_COMPLETE) {
+				void *va = skb_frag_address(skb_shinfo(gro_skb)->frags);
+				if (check_csum(cqe, gro_skb, va, ring->hwtstamp_rx_filter)) {
+					ip_summed = CHECKSUM_NONE;
+					ring->csum_none++;
+					ring->csum_complete--;
+				}
+			}
+
 			skb_shinfo(gro_skb)->nr_frags = nr;
 			gro_skb->len = length;
 			gro_skb->data_len = length;
@@ -822,6 +928,14 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 			goto next;
 		}
 
+		if (ip_summed == CHECKSUM_COMPLETE) {
+			if (check_csum(cqe, skb, skb->data, ring->hwtstamp_rx_filter)) {
+				ip_summed = CHECKSUM_NONE;
+				ring->csum_complete--;
+				ring->csum_none++;
+			}
+		}
+
 		skb->ip_summed = ip_summed;
 		skb->protocol = eth_type_trans(skb, dev);
 		skb_record_rx_queue(skb, cq->ring);
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 9f821964a1b9..2f6ba420ac03 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -1629,6 +1629,7 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 	struct mlx4_init_hca_param init_hca;
 	u64 icm_size;
 	int err;
+	struct mlx4_config_dev_params params;
 
 	if (!mlx4_is_slave(dev)) {
 		err = mlx4_QUERY_FW(dev);
@@ -1762,6 +1763,14 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 		goto unmap_bf;
 	}
 
+	/* Query CONFIG_DEV parameters */
+	err = mlx4_config_dev_retrieval(dev, &params);
+	if (err && err != -ENOTSUPP) {
+		mlx4_err(dev, "Failed to query CONFIG_DEV parameters\n");
+	} else if (!err) {
+		dev->caps.rx_checksum_flags_port[1] = params.rx_csum_flags_port_1;
+		dev->caps.rx_checksum_flags_port[2] = params.rx_csum_flags_port_2;
+	}
 	priv->eq_table.inta_pin = adapter.inta_pin;
 	memcpy(dev->board_id, adapter.board_id, sizeof dev->board_id);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index ef83d127f406..de456749ffae 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -326,6 +326,7 @@ struct mlx4_en_rx_ring {
 #endif
 	unsigned long csum_ok;
 	unsigned long csum_none;
+	unsigned long csum_complete;
 	int hwtstamp_rx_filter;
 	cpumask_var_t affinity_mask;
 };
@@ -449,6 +450,7 @@ struct mlx4_en_port_stats {
 	unsigned long rx_alloc_failed;
 	unsigned long rx_chksum_good;
 	unsigned long rx_chksum_none;
+	unsigned long rx_chksum_complete;
 	unsigned long tx_chksum_offload;
 #define NUM_PORT_STATS		9
 };
@@ -507,7 +509,8 @@ enum {
 	MLX4_EN_FLAG_ENABLE_HW_LOOPBACK	= (1 << 2),
 	/* whether we need to drop packets that hardware loopback-ed */
 	MLX4_EN_FLAG_RX_FILTER_NEEDED	= (1 << 3),
-	MLX4_EN_FLAG_FORCE_PROMISC	= (1 << 4)
+	MLX4_EN_FLAG_FORCE_PROMISC	= (1 << 4),
+	MLX4_EN_FLAG_RX_CSUM_NON_TCP_UDP	= (1 << 5),
 };
 
 #define MLX4_EN_MAC_HASH_SIZE (1 << BITS_PER_BYTE)
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 5cc5eac47d1b..3d9bff00f24a 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -497,6 +497,7 @@ struct mlx4_caps {
 	u16			hca_core_clock;
 	u64			phys_port_id[MLX4_MAX_PORTS + 1];
 	int			tunnel_offload_mode;
+	u8			rx_checksum_flags_port[MLX4_MAX_PORTS + 1];
 };
 
 struct mlx4_buf_list {
-- 
cgit v1.2.3


From 67732cd34382066ae5df313b6dad65ab14b9735f Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Tue, 11 Nov 2014 11:07:08 +0100
Subject: PM / Domains: Fix initial default state of the need_restore flag

The initial state of the device's need_restore flag should'nt depend on
the current state of the PM domain. For example it should be perfectly
valid to attach an inactive device to a powered PM domain.

The pm_genpd_dev_need_restore() API allow us to update the need_restore
flag to somewhat cope with such scenarios. Typically that should have
been done from drivers/buses ->probe() since it's those that put the
requirements on the value of the need_restore flag.

Until recently, the Exynos SOCs were the only user of the
pm_genpd_dev_need_restore() API, though invoking it from a centralized
location while adding devices to their PM domains.

Due to that Exynos now have swithed to the generic OF-based PM domain
look-up, it's no longer possible to invoke the API from a centralized
location. The reason is because devices are now added to their PM
domains during the probe sequence.

Commit "ARM: exynos: Move to generic PM domain DT bindings"
did the switch for Exynos to the generic OF-based PM domain look-up,
but it also removed the call to pm_genpd_dev_need_restore(). This
caused a regression for some of the Exynos drivers.

To handle things more properly in the generic PM domain, let's change
the default initial value of the need_restore flag to reflect that the
state is unknown. As soon as some of the runtime PM callbacks gets
invoked, update the initial value accordingly.

Moreover, since the generic PM domain is verifying that all devices
are both runtime PM enabled and suspended, using pm_runtime_suspended()
while pm_genpd_poweroff() is invoked from the scheduled work, we can be
sure of that the PM domain won't be powering off while having active
devices.

Do note that, the generic PM domain can still only know about active
devices which has been activated through invoking its runtime PM resume
callback. In other words, buses/drivers using pm_runtime_set_active()
during ->probe() will still suffer from a race condition, potentially
probing a device without having its PM domain being powered. That issue
will have to be solved using a different approach.

This a log from the boot regression for Exynos5, which is being fixed in
this patch.

------------[ cut here ]------------
WARNING: CPU: 0 PID: 308 at ../drivers/clk/clk.c:851 clk_disable+0x24/0x30()
Modules linked in:
CPU: 0 PID: 308 Comm: kworker/0:1 Not tainted 3.18.0-rc3-00569-gbd9449f-dirty #10
Workqueue: pm pm_runtime_work
[<c0013c64>] (unwind_backtrace) from [<c0010dec>] (show_stack+0x10/0x14)
[<c0010dec>] (show_stack) from [<c03ee4cc>] (dump_stack+0x70/0xbc)
[<c03ee4cc>] (dump_stack) from [<c0020d34>] (warn_slowpath_common+0x64/0x88)
[<c0020d34>] (warn_slowpath_common) from [<c0020d74>] (warn_slowpath_null+0x1c/0x24)
[<c0020d74>] (warn_slowpath_null) from [<c03107b0>] (clk_disable+0x24/0x30)
[<c03107b0>] (clk_disable) from [<c02cc834>] (gsc_runtime_suspend+0x128/0x160)
[<c02cc834>] (gsc_runtime_suspend) from [<c0249024>] (pm_generic_runtime_suspend+0x2c/0x38)
[<c0249024>] (pm_generic_runtime_suspend) from [<c024f44c>] (pm_genpd_default_save_state+0x2c/0x8c)
[<c024f44c>] (pm_genpd_default_save_state) from [<c024ff2c>] (pm_genpd_poweroff+0x224/0x3ec)
[<c024ff2c>] (pm_genpd_poweroff) from [<c02501b4>] (pm_genpd_runtime_suspend+0x9c/0xcc)
[<c02501b4>] (pm_genpd_runtime_suspend) from [<c024a4f8>] (__rpm_callback+0x2c/0x60)
[<c024a4f8>] (__rpm_callback) from [<c024a54c>] (rpm_callback+0x20/0x74)
[<c024a54c>] (rpm_callback) from [<c024a930>] (rpm_suspend+0xd4/0x43c)
[<c024a930>] (rpm_suspend) from [<c024bbcc>] (pm_runtime_work+0x80/0x90)
[<c024bbcc>] (pm_runtime_work) from [<c0032a9c>] (process_one_work+0x12c/0x314)
[<c0032a9c>] (process_one_work) from [<c0032cf4>] (worker_thread+0x3c/0x4b0)
[<c0032cf4>] (worker_thread) from [<c003747c>] (kthread+0xcc/0xe8)
[<c003747c>] (kthread) from [<c000e738>] (ret_from_fork+0x14/0x3c)
---[ end trace 40cd58bcd6988f12 ]---

Fixes: a4a8c2c4962bb655 (ARM: exynos: Move to generic PM domain DT bindings)
Reported-and-tested0by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Sylwester Nawrocki <s.nawrocki@samsung.com>
Reviewed-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 38 ++++++++++++++++++++++++++++++++------
 include/linux/pm_domain.h   |  2 +-
 2 files changed, 33 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b520687046d4..fb83d4acd400 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -361,9 +361,19 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd,
 	struct device *dev = pdd->dev;
 	int ret = 0;
 
-	if (gpd_data->need_restore)
+	if (gpd_data->need_restore > 0)
 		return 0;
 
+	/*
+	 * If the value of the need_restore flag is still unknown at this point,
+	 * we trust that pm_genpd_poweroff() has verified that the device is
+	 * already runtime PM suspended.
+	 */
+	if (gpd_data->need_restore < 0) {
+		gpd_data->need_restore = 1;
+		return 0;
+	}
+
 	mutex_unlock(&genpd->lock);
 
 	genpd_start_dev(genpd, dev);
@@ -373,7 +383,7 @@ static int __pm_genpd_save_device(struct pm_domain_data *pdd,
 	mutex_lock(&genpd->lock);
 
 	if (!ret)
-		gpd_data->need_restore = true;
+		gpd_data->need_restore = 1;
 
 	return ret;
 }
@@ -389,12 +399,17 @@ static void __pm_genpd_restore_device(struct pm_domain_data *pdd,
 {
 	struct generic_pm_domain_data *gpd_data = to_gpd_data(pdd);
 	struct device *dev = pdd->dev;
-	bool need_restore = gpd_data->need_restore;
+	int need_restore = gpd_data->need_restore;
 
-	gpd_data->need_restore = false;
+	gpd_data->need_restore = 0;
 	mutex_unlock(&genpd->lock);
 
 	genpd_start_dev(genpd, dev);
+
+	/*
+	 * Call genpd_restore_dev() for recently added devices too (need_restore
+	 * is negative then).
+	 */
 	if (need_restore)
 		genpd_restore_dev(genpd, dev);
 
@@ -603,6 +618,7 @@ static void genpd_power_off_work_fn(struct work_struct *work)
 static int pm_genpd_runtime_suspend(struct device *dev)
 {
 	struct generic_pm_domain *genpd;
+	struct generic_pm_domain_data *gpd_data;
 	bool (*stop_ok)(struct device *__dev);
 	int ret;
 
@@ -628,6 +644,16 @@ static int pm_genpd_runtime_suspend(struct device *dev)
 		return 0;
 
 	mutex_lock(&genpd->lock);
+
+	/*
+	 * If we have an unknown state of the need_restore flag, it means none
+	 * of the runtime PM callbacks has been invoked yet. Let's update the
+	 * flag to reflect that the current state is active.
+	 */
+	gpd_data = to_gpd_data(dev->power.subsys_data->domain_data);
+	if (gpd_data->need_restore < 0)
+		gpd_data->need_restore = 0;
+
 	genpd->in_progress++;
 	pm_genpd_poweroff(genpd);
 	genpd->in_progress--;
@@ -1442,7 +1468,7 @@ int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev,
 	mutex_lock(&gpd_data->lock);
 	gpd_data->base.dev = dev;
 	list_add_tail(&gpd_data->base.list_node, &genpd->dev_list);
-	gpd_data->need_restore = genpd->status == GPD_STATE_POWER_OFF;
+	gpd_data->need_restore = -1;
 	gpd_data->td.constraint_changed = true;
 	gpd_data->td.effective_constraint_ns = -1;
 	mutex_unlock(&gpd_data->lock);
@@ -1546,7 +1572,7 @@ void pm_genpd_dev_need_restore(struct device *dev, bool val)
 
 	psd = dev_to_psd(dev);
 	if (psd && psd->domain_data)
-		to_gpd_data(psd->domain_data)->need_restore = val;
+		to_gpd_data(psd->domain_data)->need_restore = val ? 1 : 0;
 
 	spin_unlock_irqrestore(&dev->power.lock, flags);
 }
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index b3ed7766a291..2e0e06daf8c0 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -106,7 +106,7 @@ struct generic_pm_domain_data {
 	struct notifier_block nb;
 	struct mutex lock;
 	unsigned int refcount;
-	bool need_restore;
+	int need_restore;
 };
 
 #ifdef CONFIG_PM_GENERIC_DOMAINS
-- 
cgit v1.2.3


From 09626e9d153326ca82568e4e27f2daa53713992e Mon Sep 17 00:00:00 2001
From: WANG Cong <xiyou.wangcong@gmail.com>
Date: Tue, 11 Nov 2014 13:29:42 -0800
Subject: net: kill netif_copy_real_num_queues()

vlan was the only user of netif_copy_real_num_queues(),
but it no longer calls it after
commit 4af429d29b341bb1735f04c2fb960178 ("vlan: lockless transmit path").
So we can just remove it.

Cc: Eric Dumazet <eric.dumazet@gmail.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 17 -----------------
 1 file changed, 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 888d5513fa4a..4a6f770377d3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -2762,23 +2762,6 @@ static inline int netif_set_real_num_rx_queues(struct net_device *dev,
 }
 #endif
 
-static inline int netif_copy_real_num_queues(struct net_device *to_dev,
-					     const struct net_device *from_dev)
-{
-	int err;
-
-	err = netif_set_real_num_tx_queues(to_dev,
-					   from_dev->real_num_tx_queues);
-	if (err)
-		return err;
-#ifdef CONFIG_SYSFS
-	return netif_set_real_num_rx_queues(to_dev,
-					    from_dev->real_num_rx_queues);
-#else
-	return 0;
-#endif
-}
-
 #ifdef CONFIG_SYSFS
 static inline unsigned int get_netdev_rx_queue_index(
 		struct netdev_rx_queue *queue)
-- 
cgit v1.2.3


From 85eb92e81801d64686eb78928d500a4c83ee9623 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Sat, 1 Nov 2014 16:54:55 +0100
Subject: bcma: make it possible to specify a IRQ num in bcma_core_irq()

This moves bcma_core_irq() to main.c and add a extra parameter with a
number so that we can return different irq number for devices with more
than one.

Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Signed-off-by: John W. Linville <linville@tuxdriver.com>
---
 drivers/bcma/driver_chipcommon.c      |  2 +-
 drivers/bcma/driver_gpio.c            |  4 ++--
 drivers/bcma/driver_mips.c            | 11 ++---------
 drivers/bcma/driver_pci_host.c        |  4 ++--
 drivers/bcma/main.c                   | 22 ++++++++++++++++++++++
 include/linux/bcma/bcma.h             |  2 ++
 include/linux/bcma/bcma_driver_mips.h |  4 ++--
 7 files changed, 33 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/bcma/driver_chipcommon.c b/drivers/bcma/driver_chipcommon.c
index b068f98920a8..19f679667ca4 100644
--- a/drivers/bcma/driver_chipcommon.c
+++ b/drivers/bcma/driver_chipcommon.c
@@ -339,7 +339,7 @@ void bcma_chipco_serial_init(struct bcma_drv_cc *cc)
 		return;
 	}
 
-	irq = bcma_core_irq(cc->core);
+	irq = bcma_core_irq(cc->core, 0);
 
 	/* Determine the registers of the UARTs */
 	cc->nr_serial_ports = (cc->capabilities & BCMA_CC_CAP_NRUART);
diff --git a/drivers/bcma/driver_gpio.c b/drivers/bcma/driver_gpio.c
index 706b9ae0dcfb..598a6cd9028a 100644
--- a/drivers/bcma/driver_gpio.c
+++ b/drivers/bcma/driver_gpio.c
@@ -152,7 +152,7 @@ static int bcma_gpio_irq_domain_init(struct bcma_drv_cc *cc)
 					 handle_simple_irq);
 	}
 
-	hwirq = bcma_core_irq(cc->core);
+	hwirq = bcma_core_irq(cc->core, 0);
 	err = request_irq(hwirq, bcma_gpio_irq_handler, IRQF_SHARED, "gpio",
 			  cc);
 	if (err)
@@ -183,7 +183,7 @@ static void bcma_gpio_irq_domain_exit(struct bcma_drv_cc *cc)
 		return;
 
 	bcma_cc_mask32(cc, BCMA_CC_IRQMASK, ~BCMA_CC_IRQ_GPIO);
-	free_irq(bcma_core_irq(cc->core), cc);
+	free_irq(bcma_core_irq(cc->core, 0), cc);
 	for (gpio = 0; gpio < chip->ngpio; gpio++) {
 		int irq = irq_find_mapping(cc->irq_domain, gpio);
 
diff --git a/drivers/bcma/driver_mips.c b/drivers/bcma/driver_mips.c
index 004d6aa671ce..5ec69c3d409d 100644
--- a/drivers/bcma/driver_mips.c
+++ b/drivers/bcma/driver_mips.c
@@ -115,7 +115,7 @@ static u32 bcma_core_mips_irqflag(struct bcma_device *dev)
  * If disabled, 5 is returned.
  * If not supported, 6 is returned.
  */
-static unsigned int bcma_core_mips_irq(struct bcma_device *dev)
+unsigned int bcma_core_mips_irq(struct bcma_device *dev)
 {
 	struct bcma_device *mdev = dev->bus->drv_mips.core;
 	u32 irqflag;
@@ -133,13 +133,6 @@ static unsigned int bcma_core_mips_irq(struct bcma_device *dev)
 	return 5;
 }
 
-unsigned int bcma_core_irq(struct bcma_device *dev)
-{
-	unsigned int mips_irq = bcma_core_mips_irq(dev);
-	return mips_irq <= 4 ? mips_irq + 2 : 0;
-}
-EXPORT_SYMBOL(bcma_core_irq);
-
 static void bcma_core_mips_set_irq(struct bcma_device *dev, unsigned int irq)
 {
 	unsigned int oldirq = bcma_core_mips_irq(dev);
@@ -423,7 +416,7 @@ void bcma_core_mips_init(struct bcma_drv_mips *mcore)
 		break;
 	default:
 		list_for_each_entry(core, &bus->cores, list) {
-			core->irq = bcma_core_irq(core);
+			core->irq = bcma_core_irq(core, 0);
 		}
 		bcma_err(bus,
 			 "Unknown device (0x%x) found, can not configure IRQs\n",
diff --git a/drivers/bcma/driver_pci_host.c b/drivers/bcma/driver_pci_host.c
index c3d7b03c2fdc..c8a6b741967b 100644
--- a/drivers/bcma/driver_pci_host.c
+++ b/drivers/bcma/driver_pci_host.c
@@ -593,7 +593,7 @@ int bcma_core_pci_plat_dev_init(struct pci_dev *dev)
 	pr_info("PCI: Fixing up device %s\n", pci_name(dev));
 
 	/* Fix up interrupt lines */
-	dev->irq = bcma_core_irq(pc_host->pdev->core);
+	dev->irq = bcma_core_irq(pc_host->pdev->core, 0);
 	pci_write_config_byte(dev, PCI_INTERRUPT_LINE, dev->irq);
 
 	readrq = pcie_get_readrq(dev);
@@ -617,6 +617,6 @@ int bcma_core_pci_pcibios_map_irq(const struct pci_dev *dev)
 
 	pc_host = container_of(dev->bus->ops, struct bcma_drv_pci_host,
 			       pci_ops);
-	return bcma_core_irq(pc_host->pdev->core);
+	return bcma_core_irq(pc_host->pdev->core, 0);
 }
 EXPORT_SYMBOL(bcma_core_pci_pcibios_map_irq);
diff --git a/drivers/bcma/main.c b/drivers/bcma/main.c
index 9b229c9c35e5..6d1cf5701452 100644
--- a/drivers/bcma/main.c
+++ b/drivers/bcma/main.c
@@ -169,6 +169,28 @@ static void bcma_of_fill_device(struct platform_device *parent,
 }
 #endif /* CONFIG_OF */
 
+unsigned int bcma_core_irq(struct bcma_device *core, int num)
+{
+	struct bcma_bus *bus = core->bus;
+	unsigned int mips_irq;
+
+	switch (bus->hosttype) {
+	case BCMA_HOSTTYPE_PCI:
+		return bus->host_pci->irq;
+	case BCMA_HOSTTYPE_SOC:
+		if (bus->drv_mips.core && num == 0) {
+			mips_irq = bcma_core_mips_irq(core);
+			return mips_irq <= 4 ? mips_irq + 2 : 0;
+		}
+		break;
+	case BCMA_HOSTTYPE_SDIO:
+		return 0;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL(bcma_core_irq);
+
 void bcma_prepare_core(struct bcma_bus *bus, struct bcma_device *core)
 {
 	core->dev.release = bcma_release_core_dev;
diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h
index 729f48e6b20b..eb1c6a47b67f 100644
--- a/include/linux/bcma/bcma.h
+++ b/include/linux/bcma/bcma.h
@@ -447,4 +447,6 @@ extern u32 bcma_chipco_pll_read(struct bcma_drv_cc *cc, u32 offset);
 #define  BCMA_DMA_TRANSLATION_DMA64_CMT	0x80000000 /* Client Mode Translation for 64-bit DMA */
 extern u32 bcma_core_dma_translation(struct bcma_device *core);
 
+extern unsigned int bcma_core_irq(struct bcma_device *core, int num);
+
 #endif /* LINUX_BCMA_H_ */
diff --git a/include/linux/bcma/bcma_driver_mips.h b/include/linux/bcma/bcma_driver_mips.h
index fb61f3fb4ddb..0b3b32aeeb8a 100644
--- a/include/linux/bcma/bcma_driver_mips.h
+++ b/include/linux/bcma/bcma_driver_mips.h
@@ -43,12 +43,12 @@ struct bcma_drv_mips {
 extern void bcma_core_mips_init(struct bcma_drv_mips *mcore);
 extern void bcma_core_mips_early_init(struct bcma_drv_mips *mcore);
 
-extern unsigned int bcma_core_irq(struct bcma_device *core);
+extern unsigned int bcma_core_mips_irq(struct bcma_device *dev);
 #else
 static inline void bcma_core_mips_init(struct bcma_drv_mips *mcore) { }
 static inline void bcma_core_mips_early_init(struct bcma_drv_mips *mcore) { }
 
-static inline unsigned int bcma_core_irq(struct bcma_device *core)
+static inline unsigned int bcma_core_mips_irq(struct bcma_device *dev)
 {
 	return 0;
 }
-- 
cgit v1.2.3


From 03f56e42d03eb7d0a47e40e9ae72a3ac0afeff08 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Mon, 27 Oct 2014 10:44:37 +0800
Subject: Revert "PCI: Add x86_msi.msi_mask_irq() and msix_mask_irq()"

The problem fixed by 0e4ccb1505a9 ("PCI: Add x86_msi.msi_mask_irq() and
msix_mask_irq()") has been fixed in a simpler way by a previous commit
("PCI/MSI: Add pci_msi_ignore_mask to prevent writes to MSI/MSI-X Mask
Bits").

The msi_mask_irq() and msix_mask_irq() x86_msi_ops added by 0e4ccb1505a9
are no longer needed, so revert the commit.

default_msi_mask_irq() and default_msix_mask_irq() were added by
0e4ccb1505a9 and are still used by s390, so keep them for now.

[bhelgaas: changelog]
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: David Vrabel <david.vrabel@citrix.com>
CC: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
CC: xen-devel@lists.xenproject.org
---
 arch/x86/include/asm/x86_init.h |  3 ---
 arch/x86/kernel/x86_init.c      | 10 ----------
 arch/x86/pci/xen.c              | 13 +------------
 drivers/pci/msi.c               | 22 ++++++----------------
 include/linux/msi.h             |  6 ++++--
 5 files changed, 11 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/x86_init.h b/arch/x86/include/asm/x86_init.h
index e45e4da96bf1..f58a9c7a3c86 100644
--- a/arch/x86/include/asm/x86_init.h
+++ b/arch/x86/include/asm/x86_init.h
@@ -172,7 +172,6 @@ struct x86_platform_ops {
 
 struct pci_dev;
 struct msi_msg;
-struct msi_desc;
 
 struct x86_msi_ops {
 	int (*setup_msi_irqs)(struct pci_dev *dev, int nvec, int type);
@@ -183,8 +182,6 @@ struct x86_msi_ops {
 	void (*teardown_msi_irqs)(struct pci_dev *dev);
 	void (*restore_msi_irqs)(struct pci_dev *dev);
 	int  (*setup_hpet_msi)(unsigned int irq, unsigned int id);
-	u32 (*msi_mask_irq)(struct msi_desc *desc, u32 mask, u32 flag);
-	u32 (*msix_mask_irq)(struct msi_desc *desc, u32 flag);
 };
 
 struct IO_APIC_route_entry;
diff --git a/arch/x86/kernel/x86_init.c b/arch/x86/kernel/x86_init.c
index e48b674639cc..234b0722de53 100644
--- a/arch/x86/kernel/x86_init.c
+++ b/arch/x86/kernel/x86_init.c
@@ -116,8 +116,6 @@ struct x86_msi_ops x86_msi = {
 	.teardown_msi_irqs	= default_teardown_msi_irqs,
 	.restore_msi_irqs	= default_restore_msi_irqs,
 	.setup_hpet_msi		= default_setup_hpet_msi,
-	.msi_mask_irq		= default_msi_mask_irq,
-	.msix_mask_irq		= default_msix_mask_irq,
 };
 
 /* MSI arch specific hooks */
@@ -140,14 +138,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev)
 {
 	x86_msi.restore_msi_irqs(dev);
 }
-u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
-{
-	return x86_msi.msi_mask_irq(desc, mask, flag);
-}
-u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
-{
-	return x86_msi.msix_mask_irq(desc, flag);
-}
 #endif
 
 struct x86_io_apic_ops x86_io_apic_ops = {
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 5ef62ed20ba4..466b978e13a5 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -394,14 +394,7 @@ static void xen_teardown_msi_irq(unsigned int irq)
 {
 	xen_destroy_irq(irq);
 }
-static u32 xen_nop_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
-{
-	return 0;
-}
-static u32 xen_nop_msix_mask_irq(struct msi_desc *desc, u32 flag)
-{
-	return 0;
-}
+
 #endif
 
 int __init pci_xen_init(void)
@@ -425,8 +418,6 @@ int __init pci_xen_init(void)
 	x86_msi.setup_msi_irqs = xen_setup_msi_irqs;
 	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 	x86_msi.teardown_msi_irqs = xen_teardown_msi_irqs;
-	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
-	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
 	pci_msi_ignore_mask = 1;
 #endif
 	return 0;
@@ -507,8 +498,6 @@ int __init pci_xen_initial_domain(void)
 	x86_msi.setup_msi_irqs = xen_initdom_setup_msi_irqs;
 	x86_msi.teardown_msi_irq = xen_teardown_msi_irq;
 	x86_msi.restore_msi_irqs = xen_initdom_restore_msi_irqs;
-	x86_msi.msi_mask_irq = xen_nop_msi_mask_irq;
-	x86_msi.msix_mask_irq = xen_nop_msix_mask_irq;
 	pci_msi_ignore_mask = 1;
 #endif
 	xen_setup_acpi_sci();
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 066c2fb9763a..d9a92cf6a6d6 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -164,7 +164,7 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
  * reliably as devices without an INTx disable bit will then generate a
  * level IRQ which will never be cleared.
  */
-u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 
@@ -178,14 +178,9 @@ u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 	return mask_bits;
 }
 
-__weak u32 arch_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
-{
-	return default_msi_mask_irq(desc, mask, flag);
-}
-
 static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
-	desc->masked = arch_msi_mask_irq(desc, mask, flag);
+	desc->masked = __msi_mask_irq(desc, mask, flag);
 }
 
 /*
@@ -195,7 +190,7 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
  * file.  This saves a few milliseconds when initialising devices with lots
  * of MSI-X interrupts.
  */
-u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
+u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
@@ -212,14 +207,9 @@ u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag)
 	return mask_bits;
 }
 
-__weak u32 arch_msix_mask_irq(struct msi_desc *desc, u32 flag)
-{
-	return default_msix_mask_irq(desc, flag);
-}
-
 static void msix_mask_irq(struct msi_desc *desc, u32 flag)
 {
-	desc->masked = arch_msix_mask_irq(desc, flag);
+	desc->masked = __msix_mask_irq(desc, flag);
 }
 
 static void msi_set_mask_bit(struct irq_data *data, u32 flag)
@@ -874,7 +864,7 @@ void pci_msi_shutdown(struct pci_dev *dev)
 	/* Return the device with MSI unmasked as initial states */
 	mask = msi_mask(desc->msi_attrib.multi_cap);
 	/* Keep cached state to be restored */
-	arch_msi_mask_irq(desc, mask, ~mask);
+	__msi_mask_irq(desc, mask, ~mask);
 
 	/* Restore dev->irq to its default pin-assertion irq */
 	dev->irq = desc->msi_attrib.default_irq;
@@ -972,7 +962,7 @@ void pci_msix_shutdown(struct pci_dev *dev)
 	/* Return the device with MSI-X masked as initial states */
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		/* Keep cached states to be restored */
-		arch_msix_mask_irq(entry, 1);
+		__msix_mask_irq(entry, 1);
 	}
 
 	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 86dc501a1534..f6630a53be70 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -22,6 +22,8 @@ void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void read_msi_msg(unsigned int irq, struct msi_msg *msg);
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
 void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+u32 __msix_mask_irq(struct msi_desc *desc, u32 flag);
+u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
 
 struct msi_desc {
 	struct {
@@ -62,8 +64,8 @@ void arch_restore_msi_irqs(struct pci_dev *dev);
 
 void default_teardown_msi_irqs(struct pci_dev *dev);
 void default_restore_msi_irqs(struct pci_dev *dev);
-u32 default_msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
-u32 default_msix_mask_irq(struct msi_desc *desc, u32 flag);
+#define default_msi_mask_irq  __msi_mask_irq
+#define default_msix_mask_irq  __msix_mask_irq
 
 struct msi_chip {
 	struct module *owner;
-- 
cgit v1.2.3


From f8338694270224970cbaae7e404517ec39f9c753 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Mon, 27 Oct 2014 10:44:38 +0800
Subject: s390/MSI: Use __msi_mask_irq() instead of default_msi_mask_irq()

Now only s390/MSI use default_msi_mask_irq() and default_msix_mask_irq(),
replace them with the common MSI mask IRQ functions __msi_mask_irq() and
__msix_mask_irq().  Remove default_msi_mask_irq() and
default_msix_mask_irq().

Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
Acked-by: Sebastian Ott <sebott@linux.vnet.ibm.com>
CC: linux-s390@vger.kernel.org
---
 arch/s390/pci/pci.c | 4 ++--
 include/linux/msi.h | 2 --
 2 files changed, 2 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 2fa7b14b9c08..552b9908aa77 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -448,9 +448,9 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
 	/* Release MSI interrupts */
 	list_for_each_entry(msi, &pdev->msi_list, list) {
 		if (msi->msi_attrib.is_msix)
-			default_msix_mask_irq(msi, 1);
+			__msix_mask_irq(msi, 1);
 		else
-			default_msi_mask_irq(msi, 1, 1);
+			__msi_mask_irq(msi, 1, 1);
 		irq_set_msi_desc(msi->irq, NULL);
 		irq_free_desc(msi->irq);
 		msi->msg.address_lo = 0;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index f6630a53be70..efad12742e45 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -64,8 +64,6 @@ void arch_restore_msi_irqs(struct pci_dev *dev);
 
 void default_teardown_msi_irqs(struct pci_dev *dev);
 void default_restore_msi_irqs(struct pci_dev *dev);
-#define default_msi_mask_irq  __msi_mask_irq
-#define default_msix_mask_irq  __msix_mask_irq
 
 struct msi_chip {
 	struct module *owner;
-- 
cgit v1.2.3


From 702bf371282f5912fe53f0b247fa2d7df9d7951f Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 12 Nov 2014 03:36:57 +0100
Subject: ieee820154: add pan_id setting support

This patch adds support for setting pan_id via nl802154 framework.
Adding a comment because setting 0xffff as pan_id seems to be valid
setting. The pan_id 0xffff as source pan is invalid. I am not sure now
about this setting but for the current netlink interface this is an
invalid setting, so we do the same now. Maybe we need to change that
when we have coordinator support and association support.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h |  2 ++
 include/net/cfg802154.h    |  2 ++
 net/ieee802154/nl802154.c  | 31 +++++++++++++++++++++++++++++++
 net/ieee802154/rdev-ops.h  |  7 +++++++
 net/mac802154/cfg.c        | 20 ++++++++++++++++++++
 5 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index d043449a079d..d40379876b84 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -30,6 +30,8 @@
 #define IEEE802154_MTU			127
 #define IEEE802154_MIN_PSDU_LEN		5
 
+#define IEEE802154_PAN_ID_BROADCAST	0xffff
+
 #define IEEE802154_EXTENDED_ADDR_LEN	8
 
 #define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 391fdb37208d..d07b0726b285 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -38,6 +38,8 @@ struct cfg802154_ops {
 	void	(*del_virtual_intf_deprecated)(struct wpan_phy *wpan_phy,
 					       struct net_device *dev);
 	int	(*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel);
+	int	(*set_pan_id)(struct wpan_phy *wpan_phy,
+			      struct wpan_dev *wpan_dev, u16 pan_id);
 };
 
 struct wpan_phy {
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index d8ef2c8a182f..88cd1293283a 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -570,6 +570,29 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_channel(rdev, page, channel);
 }
 
+static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	u16 pan_id;
+
+	/* conflict here while tx/rx calls */
+	if (netif_running(dev))
+		return -EBUSY;
+
+	/* don't change address fields on monitor */
+	if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+		return -EINVAL;
+
+	if (!info->attrs[NL802154_ATTR_PAN_ID])
+		return -EINVAL;
+
+	pan_id = nla_get_u16(info->attrs[NL802154_ATTR_PAN_ID]);
+
+	return rdev_set_pan_id(rdev, wpan_dev, pan_id);
+}
+
 #define NL802154_FLAG_NEED_WPAN_PHY	0x01
 #define NL802154_FLAG_NEED_NETDEV	0x02
 #define NL802154_FLAG_NEED_RTNL		0x04
@@ -688,6 +711,14 @@ static const struct genl_ops nl802154_ops[] = {
 		.internal_flags = NL802154_FLAG_NEED_WPAN_PHY |
 				  NL802154_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL802154_CMD_SET_PAN_ID,
+		.doit = nl802154_set_pan_id,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
 };
 
 /* initialisation/exit functions */
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 8a3b0eb4e026..4115ea264fd5 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -27,4 +27,11 @@ rdev_set_channel(struct cfg802154_registered_device *rdev, const u8 page,
 	return rdev->ops->set_channel(&rdev->wpan_phy, page, channel);
 }
 
+static inline int
+rdev_set_pan_id(struct cfg802154_registered_device *rdev,
+		struct wpan_dev *wpan_dev, u16 pan_id)
+{
+	return rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id);
+}
+
 #endif /* __CFG802154_RDEV_OPS */
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index 9d5b1895c752..db6e5e981a83 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -64,8 +64,28 @@ ieee802154_set_channel(struct wpan_phy *wpan_phy, const u8 page,
 	return ret;
 }
 
+static int ieee802154_set_pan_id(struct wpan_phy *wpan_phy,
+				 struct wpan_dev *wpan_dev, const u16 pan_id)
+{
+	ASSERT_RTNL();
+
+	/* TODO
+	 * I am not sure about to check here on broadcast pan_id.
+	 * Broadcast is a valid setting, comment from 802.15.4:
+	 * If this value is 0xffff, the device is not associated.
+	 *
+	 * This could useful to simple deassociate an device.
+	 */
+	if (pan_id == IEEE802154_PAN_ID_BROADCAST)
+		return -EINVAL;
+
+	wpan_dev->pan_id = cpu_to_le16(pan_id);
+	return 0;
+}
+
 const struct cfg802154_ops mac802154_config_ops = {
 	.add_virtual_intf_deprecated = ieee802154_add_iface_deprecated,
 	.del_virtual_intf_deprecated = ieee802154_del_iface_deprecated,
 	.set_channel = ieee802154_set_channel,
+	.set_pan_id = ieee802154_set_pan_id,
 };
-- 
cgit v1.2.3


From 9830c62a0b3d57d9d00880989cfe987f581bc03f Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 12 Nov 2014 03:36:58 +0100
Subject: ieee820154: add short_addr setting support

This patch adds support for setting short address via nl802154 framework.
Also added a comment because a 0xffff seems to be valid address that we
don't have a short address. This is a valid setting but we need
more checks in upper layers to don't allow this address as source address.
Also the current netlink interface doesn't allow to set the short_addr
to 0xffff. Same for the 0xfffe short address which describes a not
allocated short address.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h |  2 ++
 include/net/cfg802154.h    |  2 ++
 net/ieee802154/nl802154.c  | 31 +++++++++++++++++++++++++++++++
 net/ieee802154/rdev-ops.h  |  7 +++++++
 net/mac802154/cfg.c        | 26 ++++++++++++++++++++++++++
 5 files changed, 68 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index d40379876b84..ce0f96a55976 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -31,6 +31,8 @@
 #define IEEE802154_MIN_PSDU_LEN		5
 
 #define IEEE802154_PAN_ID_BROADCAST	0xffff
+#define IEEE802154_ADDR_SHORT_BROADCAST	0xffff
+#define IEEE802154_ADDR_SHORT_UNSPEC	0xfffe
 
 #define IEEE802154_EXTENDED_ADDR_LEN	8
 
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index d07b0726b285..e8a4c2b70720 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -40,6 +40,8 @@ struct cfg802154_ops {
 	int	(*set_channel)(struct wpan_phy *wpan_phy, u8 page, u8 channel);
 	int	(*set_pan_id)(struct wpan_phy *wpan_phy,
 			      struct wpan_dev *wpan_dev, u16 pan_id);
+	int	(*set_short_addr)(struct wpan_phy *wpan_phy,
+				  struct wpan_dev *wpan_dev, u16 short_addr);
 };
 
 struct wpan_phy {
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 88cd1293283a..2978c1a78017 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -593,6 +593,29 @@ static int nl802154_set_pan_id(struct sk_buff *skb, struct genl_info *info)
 	return rdev_set_pan_id(rdev, wpan_dev, pan_id);
 }
 
+static int nl802154_set_short_addr(struct sk_buff *skb, struct genl_info *info)
+{
+	struct cfg802154_registered_device *rdev = info->user_ptr[0];
+	struct net_device *dev = info->user_ptr[1];
+	struct wpan_dev *wpan_dev = dev->ieee802154_ptr;
+	u16 short_addr;
+
+	/* conflict here while tx/rx calls */
+	if (netif_running(dev))
+		return -EBUSY;
+
+	/* don't change address fields on monitor */
+	if (wpan_dev->iftype == NL802154_IFTYPE_MONITOR)
+		return -EINVAL;
+
+	if (!info->attrs[NL802154_ATTR_SHORT_ADDR])
+		return -EINVAL;
+
+	short_addr = nla_get_u16(info->attrs[NL802154_ATTR_SHORT_ADDR]);
+
+	return rdev_set_short_addr(rdev, wpan_dev, short_addr);
+}
+
 #define NL802154_FLAG_NEED_WPAN_PHY	0x01
 #define NL802154_FLAG_NEED_NETDEV	0x02
 #define NL802154_FLAG_NEED_RTNL		0x04
@@ -719,6 +742,14 @@ static const struct genl_ops nl802154_ops[] = {
 		.internal_flags = NL802154_FLAG_NEED_NETDEV |
 				  NL802154_FLAG_NEED_RTNL,
 	},
+	{
+		.cmd = NL802154_CMD_SET_SHORT_ADDR,
+		.doit = nl802154_set_short_addr,
+		.policy = nl802154_policy,
+		.flags = GENL_ADMIN_PERM,
+		.internal_flags = NL802154_FLAG_NEED_NETDEV |
+				  NL802154_FLAG_NEED_RTNL,
+	},
 };
 
 /* initialisation/exit functions */
diff --git a/net/ieee802154/rdev-ops.h b/net/ieee802154/rdev-ops.h
index 4115ea264fd5..16b0de06c3af 100644
--- a/net/ieee802154/rdev-ops.h
+++ b/net/ieee802154/rdev-ops.h
@@ -34,4 +34,11 @@ rdev_set_pan_id(struct cfg802154_registered_device *rdev,
 	return rdev->ops->set_pan_id(&rdev->wpan_phy, wpan_dev, pan_id);
 }
 
+static inline int
+rdev_set_short_addr(struct cfg802154_registered_device *rdev,
+		    struct wpan_dev *wpan_dev, u16 short_addr)
+{
+	return rdev->ops->set_short_addr(&rdev->wpan_phy, wpan_dev, short_addr);
+}
+
 #endif /* __CFG802154_RDEV_OPS */
diff --git a/net/mac802154/cfg.c b/net/mac802154/cfg.c
index db6e5e981a83..df29976d1321 100644
--- a/net/mac802154/cfg.c
+++ b/net/mac802154/cfg.c
@@ -83,9 +83,35 @@ static int ieee802154_set_pan_id(struct wpan_phy *wpan_phy,
 	return 0;
 }
 
+static int
+ieee802154_set_short_addr(struct wpan_phy *wpan_phy, struct wpan_dev *wpan_dev,
+			  const u16 short_addr)
+{
+	ASSERT_RTNL();
+
+	/* TODO
+	 * I am not sure about to check here on broadcast short_addr.
+	 * Broadcast is a valid setting, comment from 802.15.4:
+	 * A value of 0xfffe indicates that the device has
+	 * associated but has not been allocated an address. A
+	 * value of 0xffff indicates that the device does not
+	 * have a short address.
+	 *
+	 * I think we should allow to set these settings but
+	 * don't allow to allow socket communication with it.
+	 */
+	if (short_addr == IEEE802154_ADDR_SHORT_UNSPEC ||
+	    short_addr == IEEE802154_ADDR_SHORT_BROADCAST)
+		return -EINVAL;
+
+	wpan_dev->short_addr = cpu_to_le16(short_addr);
+	return 0;
+}
+
 const struct cfg802154_ops mac802154_config_ops = {
 	.add_virtual_intf_deprecated = ieee802154_add_iface_deprecated,
 	.del_virtual_intf_deprecated = ieee802154_del_iface_deprecated,
 	.set_channel = ieee802154_set_channel,
 	.set_pan_id = ieee802154_set_pan_id,
+	.set_short_addr = ieee802154_set_short_addr,
 };
-- 
cgit v1.2.3


From 71dfda58aaaf4bf6b1bc59f9d8afa635fa1337d4 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Tue, 11 Nov 2014 09:26:34 -0800
Subject: net: Add device Rx page allocation function

This patch implements __dev_alloc_pages and __dev_alloc_page.  These are
meant to replace the __skb_alloc_pages and __skb_alloc_page functions.  The
reason for doing this is that it occurred to me that __skb_alloc_page is
supposed to be passed an sk_buff pointer, but it is NULL in all cases where
it is used.  Worse is that in the case of ixgbe it is passed NULL via the
sk_buff pointer in the rx_buffer info structure which means the compiler is
not correctly stripping it out.

The naming for these functions is based on dev_alloc_skb and __dev_alloc_skb.
There was originally a netdev_alloc_page, however that was passed a
net_device pointer and this function is not so I thought it best to follow
that naming scheme since that is the same difference between dev_alloc_skb
and netdev_alloc_skb.

In the case of anything greater than order 0 it is assumed that we want a
compound page so __GFP_COMP is set for all allocations as we expect a
compound page when assigning a page frag.

The other change in this patch is to exploit the behaviors of the page
allocator in how it handles flags.  So for example we can always set
__GFP_COMP and __GFP_MEMALLOC since they are ignored if they are not
applicable or are overridden by another flag.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 103fbe8113f8..2e5221f1ec72 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2184,6 +2184,54 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 	return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
 }
 
+/**
+ * __dev_alloc_pages - allocate page for network Rx
+ * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
+ * @order: size of the allocation
+ *
+ * Allocate a new page.
+ *
+ * %NULL is returned if there is no free memory.
+*/
+static inline struct page *__dev_alloc_pages(gfp_t gfp_mask,
+					     unsigned int order)
+{
+	/* This piece of code contains several assumptions.
+	 * 1.  This is for device Rx, therefor a cold page is preferred.
+	 * 2.  The expectation is the user wants a compound page.
+	 * 3.  If requesting a order 0 page it will not be compound
+	 *     due to the check to see if order has a value in prep_new_page
+	 * 4.  __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to
+	 *     code in gfp_to_alloc_flags that should be enforcing this.
+	 */
+	gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC;
+
+	return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
+}
+
+static inline struct page *dev_alloc_pages(unsigned int order)
+{
+	return __dev_alloc_pages(GFP_ATOMIC, order);
+}
+
+/**
+ * __dev_alloc_page - allocate a page for network Rx
+ * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
+ *
+ * Allocate a new page.
+ *
+ * %NULL is returned if there is no free memory.
+ */
+static inline struct page *__dev_alloc_page(gfp_t gfp_mask)
+{
+	return __dev_alloc_pages(gfp_mask, 0);
+}
+
+static inline struct page *dev_alloc_page(void)
+{
+	return __dev_alloc_page(GFP_ATOMIC);
+}
+
 /**
  *	__skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data
  *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
-- 
cgit v1.2.3


From 160d2aba550b23c6a538158511d5adccc400f04c Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Tue, 11 Nov 2014 09:27:05 -0800
Subject: net: Remove __skb_alloc_page and __skb_alloc_pages

Remove the two functions which are now dead code.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 43 -------------------------------------------
 1 file changed, 43 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 2e5221f1ec72..73c370e615de 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2232,49 +2232,6 @@ static inline struct page *dev_alloc_page(void)
 	return __dev_alloc_page(GFP_ATOMIC);
 }
 
-/**
- *	__skb_alloc_pages - allocate pages for ps-rx on a skb and preserve pfmemalloc data
- *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
- *	@skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
- *	@order: size of the allocation
- *
- * 	Allocate a new page.
- *
- * 	%NULL is returned if there is no free memory.
-*/
-static inline struct page *__skb_alloc_pages(gfp_t gfp_mask,
-					      struct sk_buff *skb,
-					      unsigned int order)
-{
-	struct page *page;
-
-	gfp_mask |= __GFP_COLD;
-
-	if (!(gfp_mask & __GFP_NOMEMALLOC))
-		gfp_mask |= __GFP_MEMALLOC;
-
-	page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
-	if (skb && page && page->pfmemalloc)
-		skb->pfmemalloc = true;
-
-	return page;
-}
-
-/**
- *	__skb_alloc_page - allocate a page for ps-rx for a given skb and preserve pfmemalloc data
- *	@gfp_mask: alloc_pages_node mask. Set __GFP_NOMEMALLOC if not for network packet RX
- *	@skb: skb to set pfmemalloc on if __GFP_MEMALLOC is used
- *
- * 	Allocate a new page.
- *
- * 	%NULL is returned if there is no free memory.
- */
-static inline struct page *__skb_alloc_page(gfp_t gfp_mask,
-					     struct sk_buff *skb)
-{
-	return __skb_alloc_pages(gfp_mask, skb, 0);
-}
-
 /**
  *	skb_propagate_pfmemalloc - Propagate pfmemalloc if skb is allocated after RX page
  *	@page: The page that was allocated from skb_alloc_page
-- 
cgit v1.2.3


From 205fb5f5ba1d8edcf18009998ed05b80b7d186af Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Thu, 30 Oct 2014 14:45:11 +0100
Subject: blk-mq: add blk_mq_unique_tag()

The queuecommand() callback functions in SCSI low-level drivers
need to know which hardware context has been selected by the
block layer. Since this information is not available in the
request structure, and since passing the hctx pointer directly to
the queuecommand callback function would require modification of
all SCSI LLDs, add a function to the block layer that allows to
query the hardware context index.

Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Acked-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Sagi Grimberg <sagig@mellanox.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 block/blk-mq-tag.c     | 28 ++++++++++++++++++++++++++++
 block/blk-mq.c         |  2 ++
 include/linux/blk-mq.h | 17 +++++++++++++++++
 3 files changed, 47 insertions(+)

(limited to 'include/linux')

diff --git a/block/blk-mq-tag.c b/block/blk-mq-tag.c
index 8317175a3009..728b9a4d5f56 100644
--- a/block/blk-mq-tag.c
+++ b/block/blk-mq-tag.c
@@ -584,6 +584,34 @@ int blk_mq_tag_update_depth(struct blk_mq_tags *tags, unsigned int tdepth)
 	return 0;
 }
 
+/**
+ * blk_mq_unique_tag() - return a tag that is unique queue-wide
+ * @rq: request for which to compute a unique tag
+ *
+ * The tag field in struct request is unique per hardware queue but not over
+ * all hardware queues. Hence this function that returns a tag with the
+ * hardware context index in the upper bits and the per hardware queue tag in
+ * the lower bits.
+ *
+ * Note: When called for a request that is queued on a non-multiqueue request
+ * queue, the hardware context index is set to zero.
+ */
+u32 blk_mq_unique_tag(struct request *rq)
+{
+	struct request_queue *q = rq->q;
+	struct blk_mq_hw_ctx *hctx;
+	int hwq = 0;
+
+	if (q->mq_ops) {
+		hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
+		hwq = hctx->queue_num;
+	}
+
+	return (hwq << BLK_MQ_UNIQUE_TAG_BITS) |
+		(rq->tag & BLK_MQ_UNIQUE_TAG_MASK);
+}
+EXPORT_SYMBOL(blk_mq_unique_tag);
+
 ssize_t blk_mq_tag_sysfs_show(struct blk_mq_tags *tags, char *page)
 {
 	char *orig_page = page;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 68929bad9a6a..b5896d436fc9 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2024,6 +2024,8 @@ static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set)
  */
 int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
 {
+	BUILD_BUG_ON(BLK_MQ_MAX_DEPTH > 1 << BLK_MQ_UNIQUE_TAG_BITS);
+
 	if (!set->nr_hw_queues)
 		return -EINVAL;
 	if (!set->queue_depth)
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c9be1589415a..15f7034aa377 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -167,6 +167,23 @@ struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		gfp_t gfp, bool reserved);
 struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag);
 
+enum {
+	BLK_MQ_UNIQUE_TAG_BITS = 16,
+	BLK_MQ_UNIQUE_TAG_MASK = (1 << BLK_MQ_UNIQUE_TAG_BITS) - 1,
+};
+
+u32 blk_mq_unique_tag(struct request *rq);
+
+static inline u16 blk_mq_unique_tag_to_hwq(u32 unique_tag)
+{
+	return unique_tag >> BLK_MQ_UNIQUE_TAG_BITS;
+}
+
+static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag)
+{
+	return unique_tag & BLK_MQ_UNIQUE_TAG_MASK;
+}
+
 struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index);
 struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int);
 
-- 
cgit v1.2.3


From 125c99bc8b6b108d251169a86324a7ed3c6f3cce Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 3 Nov 2014 12:47:47 +0100
Subject: scsi: add new scsi-command flag for tagged commands

Currently scsi piggy backs on the block layer to define the concept
of a tagged command.  But we want to be able to have block-level host-wide
tags assigned even for untagged commands like the initial INQUIRY, so add
a new SCSI-level flag for commands that are tagged at the scsi level, so
that even commands without that set can have tags assigned to them.  Note
that this alredy is the case for the blk-mq code path, and this just lets
the old path catch up with it.

We also set this flag based upon sdev->simple_tags instead of the block
queue flag, so that it is entirely independent of the block layer tagging,
and thus always correct even if a driver doesn't use block level tagging
yet.

Also remove the old blk_rq_tagged; it was only used by SCSI drivers, and
removing it forces them to look for the proper replacement.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 Documentation/block/biodoc.txt     |  4 ----
 block/blk-core.c                   |  4 ++--
 drivers/scsi/53c700.c              |  6 +++---
 drivers/scsi/aic7xxx/aic7xxx_osm.c |  2 +-
 drivers/scsi/scsi_lib.c            | 13 +++++++++----
 drivers/usb/storage/uas.c          |  2 +-
 include/linux/blkdev.h             |  1 -
 include/scsi/scsi_cmnd.h           |  4 ++++
 include/scsi/scsi_tcq.h            |  6 ++----
 9 files changed, 22 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/block/biodoc.txt b/Documentation/block/biodoc.txt
index 2101e718670d..6b972b287795 100644
--- a/Documentation/block/biodoc.txt
+++ b/Documentation/block/biodoc.txt
@@ -827,10 +827,6 @@ but in the event of any barrier requests in the tag queue we need to ensure
 that requests are restarted in the order they were queue. This may happen
 if the driver needs to use blk_queue_invalidate_tags().
 
-Tagging also defines a new request flag, REQ_QUEUED. This is set whenever
-a request is currently tagged. You should not use this flag directly,
-blk_rq_tagged(rq) is the portable way to do so.
-
 3.3 I/O Submission
 
 The routine submit_bio() is used to submit a single io. Higher level i/o
diff --git a/block/blk-core.c b/block/blk-core.c
index 0421b53e6431..2e7424b42947 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1266,7 +1266,7 @@ void blk_requeue_request(struct request_queue *q, struct request *rq)
 	blk_clear_rq_complete(rq);
 	trace_block_rq_requeue(q, rq);
 
-	if (blk_rq_tagged(rq))
+	if (rq->cmd_flags & REQ_QUEUED)
 		blk_queue_end_tag(q, rq);
 
 	BUG_ON(blk_queued_rq(rq));
@@ -2554,7 +2554,7 @@ EXPORT_SYMBOL_GPL(blk_unprep_request);
  */
 void blk_finish_request(struct request *req, int error)
 {
-	if (blk_rq_tagged(req))
+	if (req->cmd_flags & REQ_QUEUED)
 		blk_queue_end_tag(req->q, req);
 
 	BUG_ON(blk_queued_rq(req));
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index 474cc6dc98e2..5143d3213e86 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -1767,7 +1767,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 	 */
 	if(NCR_700_get_depth(SCp->device) != 0
 	   && (!(hostdata->tag_negotiated & (1<<scmd_id(SCp)))
-	       || !blk_rq_tagged(SCp->request))) {
+	       || !(SCp->flags & SCMD_TAGGED))) {
 		CDEBUG(KERN_ERR, SCp, "has non zero depth %d\n",
 		       NCR_700_get_depth(SCp->device));
 		return SCSI_MLQUEUE_DEVICE_BUSY;
@@ -1795,7 +1795,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 	printk("53c700: scsi%d, command ", SCp->device->host->host_no);
 	scsi_print_command(SCp);
 #endif
-	if(blk_rq_tagged(SCp->request)
+	if ((SCp->flags & SCMD_TAGGED)
 	   && (hostdata->tag_negotiated &(1<<scmd_id(SCp))) == 0
 	   && NCR_700_get_tag_neg_state(SCp->device) == NCR_700_START_TAG_NEGOTIATION) {
 		scmd_printk(KERN_ERR, SCp, "Enabling Tag Command Queuing\n");
@@ -1809,7 +1809,7 @@ NCR_700_queuecommand_lck(struct scsi_cmnd *SCp, void (*done)(struct scsi_cmnd *)
 	 *
 	 * FIXME: This will royally screw up on multiple LUN devices
 	 * */
-	if(!blk_rq_tagged(SCp->request)
+	if (!(SCp->flags & SCMD_TAGGED)
 	   && (hostdata->tag_negotiated &(1<<scmd_id(SCp)))) {
 		scmd_printk(KERN_INFO, SCp, "Disabling Tag Command Queuing\n");
 		hostdata->tag_negotiated &= ~(1<<scmd_id(SCp));
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index d2c9bf39033d..63bae7c65c9f 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -1447,7 +1447,7 @@ ahc_linux_run_command(struct ahc_softc *ahc, struct ahc_linux_device *dev,
 	 * we are storing a full busy target *lun*
 	 * table in SCB space.
 	 */
-	if (!blk_rq_tagged(cmd->request)
+	if (!(cmd->flags & SCMD_TAGGED)
 	    && (ahc->features & AHC_SCB_BTT) == 0) {
 		int target_offset;
 
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 38f8c85957b6..994eb083fff9 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1740,7 +1740,7 @@ static void scsi_request_fn(struct request_queue *q)
 		 * we add the dev to the starved list so it eventually gets
 		 * a run when a tag is freed.
 		 */
-		if (blk_queue_tagged(q) && !blk_rq_tagged(req)) {
+		if (blk_queue_tagged(q) && !(req->cmd_flags & REQ_QUEUED)) {
 			spin_lock_irq(shost->host_lock);
 			if (list_empty(&sdev->starved_entry))
 				list_add_tail(&sdev->starved_entry,
@@ -1754,6 +1754,11 @@ static void scsi_request_fn(struct request_queue *q)
 
 		if (!scsi_host_queue_ready(q, shost, sdev))
 			goto host_not_ready;
+	
+		if (sdev->simple_tags)
+			cmd->flags |= SCMD_TAGGED;
+		else
+			cmd->flags &= ~SCMD_TAGGED;
 
 		/*
 		 * Finally, initialize any error handling parameters, and set up
@@ -1908,10 +1913,10 @@ static int scsi_queue_rq(struct blk_mq_hw_ctx *hctx, struct request *req,
 		blk_mq_start_request(req);
 	}
 
-	if (blk_queue_tagged(q))
-		req->cmd_flags |= REQ_QUEUED;
+	if (sdev->simple_tags)
+		cmd->flags |= SCMD_TAGGED;
 	else
-		req->cmd_flags &= ~REQ_QUEUED;
+		cmd->flags &= ~SCMD_TAGGED;
 
 	scsi_init_cmd_errh(cmd);
 	cmd->scsi_done = scsi_mq_done;
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 89b24349269e..b38bc1318a60 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -181,7 +181,7 @@ static int uas_get_tag(struct scsi_cmnd *cmnd)
 {
 	int tag;
 
-	if (blk_rq_tagged(cmnd->request))
+	if (cmnd->flags & SCMD_TAGGED)
 		tag = cmnd->request->tag + 2;
 	else
 		tag = 1;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aac0f9ea952a..6d76b8b4aa2b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1136,7 +1136,6 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk)
 /*
  * tag stuff
  */
-#define blk_rq_tagged(rq)		((rq)->cmd_flags & REQ_QUEUED)
 extern int blk_queue_start_tag(struct request_queue *, struct request *);
 extern struct request *blk_queue_find_tag(struct request_queue *, int);
 extern void blk_queue_end_tag(struct request_queue *, struct request *);
diff --git a/include/scsi/scsi_cmnd.h b/include/scsi/scsi_cmnd.h
index 522a5f27f553..e119142e565e 100644
--- a/include/scsi/scsi_cmnd.h
+++ b/include/scsi/scsi_cmnd.h
@@ -53,6 +53,9 @@ struct scsi_pointer {
 	volatile int phase;
 };
 
+/* for scmd->flags */
+#define SCMD_TAGGED		(1 << 0)
+
 struct scsi_cmnd {
 	struct scsi_device *device;
 	struct list_head list;  /* scsi_cmnd participates in queue lists */
@@ -132,6 +135,7 @@ struct scsi_cmnd {
 					 * to be at an address < 16Mb). */
 
 	int result;		/* Status code from lower level driver */
+	int flags;		/* Command flags */
 
 	unsigned char tag;	/* SCSI-II queued command tag */
 };
diff --git a/include/scsi/scsi_tcq.h b/include/scsi/scsi_tcq.h
index 1712dab6e00e..032df74b66d7 100644
--- a/include/scsi/scsi_tcq.h
+++ b/include/scsi/scsi_tcq.h
@@ -101,11 +101,9 @@ static inline void scsi_deactivate_tcq(struct scsi_device *sdev, int depth)
  **/
 static inline int scsi_populate_tag_msg(struct scsi_cmnd *cmd, char *msg)
 {
-        struct request *req = cmd->request;
-
-        if (blk_rq_tagged(req)) {
+        if (cmd->flags & SCMD_TAGGED) {
 		*msg++ = MSG_SIMPLE_TAG;
-        	*msg++ = req->tag;
+        	*msg++ = cmd->request->tag;
         	return 2;
 	}
 
-- 
cgit v1.2.3


From c2791b806988100cc1c047e2b0b5c5d0914aa3b6 Mon Sep 17 00:00:00 2001
From: Yijing Wang <wangyijing@huawei.com>
Date: Tue, 11 Nov 2014 17:45:45 -0700
Subject: PCI/MSI: Rename "struct msi_chip" to "struct msi_controller"

"msi_chip" isn't very descriptive, so rename it to "msi_controller".  That
tells a little more about what it does and is already used in device tree
bindings.

No functional change.

[bhelgaas: changelog, change *only* the struct name so it's reviewable]
Suggested-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/irqchip/irq-armada-370-xp.c |  6 +++---
 drivers/of/of_pci.c                 |  8 ++++----
 drivers/pci/host/pci-keystone-dw.c  |  2 +-
 drivers/pci/host/pci-keystone.h     |  2 +-
 drivers/pci/host/pci-mvebu.c        |  2 +-
 drivers/pci/host/pci-tegra.c        | 11 ++++++-----
 drivers/pci/host/pcie-designware.c  |  6 +++---
 drivers/pci/host/pcie-designware.h  |  2 +-
 drivers/pci/host/pcie-rcar.c        |  8 ++++----
 drivers/pci/host/pcie-xilinx.c      |  7 ++++---
 drivers/pci/msi.c                   |  4 ++--
 include/linux/msi.h                 |  6 +++---
 include/linux/of_pci.h              | 12 ++++++------
 include/linux/pci.h                 |  2 +-
 14 files changed, 40 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 3e238cd049e6..4e202375e027 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -129,7 +129,7 @@ static void armada_370_xp_free_msi(int hwirq)
 	mutex_unlock(&msi_used_lock);
 }
 
-static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
+static int armada_370_xp_setup_msi_irq(struct msi_controller *chip,
 				       struct pci_dev *pdev,
 				       struct msi_desc *desc)
 {
@@ -160,7 +160,7 @@ static int armada_370_xp_setup_msi_irq(struct msi_chip *chip,
 	return 0;
 }
 
-static void armada_370_xp_teardown_msi_irq(struct msi_chip *chip,
+static void armada_370_xp_teardown_msi_irq(struct msi_controller *chip,
 					   unsigned int irq)
 {
 	struct irq_data *d = irq_get_irq_data(irq);
@@ -195,7 +195,7 @@ static const struct irq_domain_ops armada_370_xp_msi_irq_ops = {
 static int armada_370_xp_msi_init(struct device_node *node,
 				  phys_addr_t main_int_phys_base)
 {
-	struct msi_chip *msi_chip;
+	struct msi_controller *msi_chip;
 	u32 reg;
 	int ret;
 
diff --git a/drivers/of/of_pci.c b/drivers/of/of_pci.c
index 8882b467be95..88471d3d98cd 100644
--- a/drivers/of/of_pci.c
+++ b/drivers/of/of_pci.c
@@ -236,7 +236,7 @@ EXPORT_SYMBOL_GPL(of_pci_get_host_bridge_resources);
 static LIST_HEAD(of_pci_msi_chip_list);
 static DEFINE_MUTEX(of_pci_msi_chip_mutex);
 
-int of_pci_msi_chip_add(struct msi_chip *chip)
+int of_pci_msi_chip_add(struct msi_controller *chip)
 {
 	if (!of_property_read_bool(chip->of_node, "msi-controller"))
 		return -EINVAL;
@@ -249,7 +249,7 @@ int of_pci_msi_chip_add(struct msi_chip *chip)
 }
 EXPORT_SYMBOL_GPL(of_pci_msi_chip_add);
 
-void of_pci_msi_chip_remove(struct msi_chip *chip)
+void of_pci_msi_chip_remove(struct msi_controller *chip)
 {
 	mutex_lock(&of_pci_msi_chip_mutex);
 	list_del(&chip->list);
@@ -257,9 +257,9 @@ void of_pci_msi_chip_remove(struct msi_chip *chip)
 }
 EXPORT_SYMBOL_GPL(of_pci_msi_chip_remove);
 
-struct msi_chip *of_pci_find_msi_chip_by_node(struct device_node *of_node)
+struct msi_controller *of_pci_find_msi_chip_by_node(struct device_node *of_node)
 {
-	struct msi_chip *c;
+	struct msi_controller *c;
 
 	mutex_lock(&of_pci_msi_chip_mutex);
 	list_for_each_entry(c, &of_pci_msi_chip_list, list) {
diff --git a/drivers/pci/host/pci-keystone-dw.c b/drivers/pci/host/pci-keystone-dw.c
index 34086ce88e8e..ac3d08c42770 100644
--- a/drivers/pci/host/pci-keystone-dw.c
+++ b/drivers/pci/host/pci-keystone-dw.c
@@ -205,7 +205,7 @@ const struct irq_domain_ops ks_dw_pcie_msi_domain_ops = {
 	.map = ks_dw_pcie_msi_map,
 };
 
-int ks_dw_pcie_msi_host_init(struct pcie_port *pp, struct msi_chip *chip)
+int ks_dw_pcie_msi_host_init(struct pcie_port *pp, struct msi_controller *chip)
 {
 	struct keystone_pcie *ks_pcie = to_keystone_pcie(pp);
 	int i;
diff --git a/drivers/pci/host/pci-keystone.h b/drivers/pci/host/pci-keystone.h
index 1fc1fceede9e..478d932b602d 100644
--- a/drivers/pci/host/pci-keystone.h
+++ b/drivers/pci/host/pci-keystone.h
@@ -55,4 +55,4 @@ void ks_dw_pcie_msi_set_irq(struct pcie_port *pp, int irq);
 void ks_dw_pcie_msi_clear_irq(struct pcie_port *pp, int irq);
 void ks_dw_pcie_v3_65_scan_bus(struct pcie_port *pp);
 int ks_dw_pcie_msi_host_init(struct pcie_port *pp,
-		struct msi_chip *chip);
+		struct msi_controller *chip);
diff --git a/drivers/pci/host/pci-mvebu.c b/drivers/pci/host/pci-mvebu.c
index b1315e197ffb..cf907fec0652 100644
--- a/drivers/pci/host/pci-mvebu.c
+++ b/drivers/pci/host/pci-mvebu.c
@@ -99,7 +99,7 @@ struct mvebu_pcie_port;
 struct mvebu_pcie {
 	struct platform_device *pdev;
 	struct mvebu_pcie_port *ports;
-	struct msi_chip *msi;
+	struct msi_controller *msi;
 	struct resource io;
 	char io_name[30];
 	struct resource realio;
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 3d43874319be..5529de7328ba 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -238,7 +238,7 @@
 	)
 
 struct tegra_msi {
-	struct msi_chip chip;
+	struct msi_controller chip;
 	DECLARE_BITMAP(used, INT_PCI_MSI_NR);
 	struct irq_domain *domain;
 	unsigned long pages;
@@ -259,7 +259,7 @@ struct tegra_pcie_soc_data {
 	bool has_gen2;
 };
 
-static inline struct tegra_msi *to_tegra_msi(struct msi_chip *chip)
+static inline struct tegra_msi *to_tegra_msi(struct msi_controller *chip)
 {
 	return container_of(chip, struct tegra_msi, chip);
 }
@@ -1283,8 +1283,8 @@ static irqreturn_t tegra_pcie_msi_irq(int irq, void *data)
 	return processed > 0 ? IRQ_HANDLED : IRQ_NONE;
 }
 
-static int tegra_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
-			       struct msi_desc *desc)
+static int tegra_msi_setup_irq(struct msi_controller *chip,
+			       struct pci_dev *pdev, struct msi_desc *desc)
 {
 	struct tegra_msi *msi = to_tegra_msi(chip);
 	struct msi_msg msg;
@@ -1313,7 +1313,8 @@ static int tegra_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
 	return 0;
 }
 
-static void tegra_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void tegra_msi_teardown_irq(struct msi_controller *chip,
+				   unsigned int irq)
 {
 	struct tegra_msi *msi = to_tegra_msi(chip);
 	struct irq_data *d = irq_get_irq_data(irq);
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index dfed00aa3ac0..f4decad80ee4 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -276,7 +276,7 @@ no_valid_irq:
 	return -ENOSPC;
 }
 
-static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
+static int dw_msi_setup_irq(struct msi_controller *chip, struct pci_dev *pdev,
 			struct msi_desc *desc)
 {
 	int irq, pos;
@@ -303,7 +303,7 @@ static int dw_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
 	return 0;
 }
 
-static void dw_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void dw_msi_teardown_irq(struct msi_controller *chip, unsigned int irq)
 {
 	struct irq_data *data = irq_get_irq_data(irq);
 	struct msi_desc *msi = irq_data_get_msi(data);
@@ -312,7 +312,7 @@ static void dw_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
 	clear_irq_range(pp, irq, 1, data->hwirq);
 }
 
-static struct msi_chip dw_pcie_msi_chip = {
+static struct msi_controller dw_pcie_msi_chip = {
 	.setup_irq = dw_msi_setup_irq,
 	.teardown_irq = dw_msi_teardown_irq,
 };
diff --git a/drivers/pci/host/pcie-designware.h b/drivers/pci/host/pcie-designware.h
index c6256751daff..d0bbd276840d 100644
--- a/drivers/pci/host/pcie-designware.h
+++ b/drivers/pci/host/pcie-designware.h
@@ -73,7 +73,7 @@ struct pcie_host_ops {
 	u32 (*get_msi_addr)(struct pcie_port *pp);
 	u32 (*get_msi_data)(struct pcie_port *pp, int pos);
 	void (*scan_bus)(struct pcie_port *pp);
-	int (*msi_host_init)(struct pcie_port *pp, struct msi_chip *chip);
+	int (*msi_host_init)(struct pcie_port *pp, struct msi_controller *chip);
 };
 
 int dw_pcie_cfg_read(void __iomem *addr, int where, int size, u32 *val);
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index 61158e03ab5f..b986c51d4df3 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -111,14 +111,14 @@
 struct rcar_msi {
 	DECLARE_BITMAP(used, INT_PCI_MSI_NR);
 	struct irq_domain *domain;
-	struct msi_chip chip;
+	struct msi_controller chip;
 	unsigned long pages;
 	struct mutex lock;
 	int irq1;
 	int irq2;
 };
 
-static inline struct rcar_msi *to_rcar_msi(struct msi_chip *chip)
+static inline struct rcar_msi *to_rcar_msi(struct msi_controller *chip)
 {
 	return container_of(chip, struct rcar_msi, chip);
 }
@@ -622,7 +622,7 @@ static irqreturn_t rcar_pcie_msi_irq(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static int rcar_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
+static int rcar_msi_setup_irq(struct msi_controller *chip, struct pci_dev *pdev,
 			      struct msi_desc *desc)
 {
 	struct rcar_msi *msi = to_rcar_msi(chip);
@@ -652,7 +652,7 @@ static int rcar_msi_setup_irq(struct msi_chip *chip, struct pci_dev *pdev,
 	return 0;
 }
 
-static void rcar_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void rcar_msi_teardown_irq(struct msi_controller *chip, unsigned int irq)
 {
 	struct rcar_msi *msi = to_rcar_msi(chip);
 	struct irq_data *d = irq_get_irq_data(irq);
diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c
index ccc496b33a97..f41bc601b7b0 100644
--- a/drivers/pci/host/pcie-xilinx.c
+++ b/drivers/pci/host/pcie-xilinx.c
@@ -335,7 +335,8 @@ static int xilinx_pcie_assign_msi(struct xilinx_pcie_port *port)
  * @chip: MSI Chip descriptor
  * @irq: MSI IRQ to destroy
  */
-static void xilinx_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
+static void xilinx_msi_teardown_irq(struct msi_controller *chip,
+				    unsigned int irq)
 {
 	xilinx_pcie_destroy_msi(irq);
 }
@@ -348,7 +349,7 @@ static void xilinx_msi_teardown_irq(struct msi_chip *chip, unsigned int irq)
  *
  * Return: '0' on success and error value on failure
  */
-static int xilinx_pcie_msi_setup_irq(struct msi_chip *chip,
+static int xilinx_pcie_msi_setup_irq(struct msi_controller *chip,
 				     struct pci_dev *pdev,
 				     struct msi_desc *desc)
 {
@@ -380,7 +381,7 @@ static int xilinx_pcie_msi_setup_irq(struct msi_chip *chip,
 }
 
 /* MSI Chip Descriptor */
-static struct msi_chip xilinx_pcie_msi_chip = {
+static struct msi_controller xilinx_pcie_msi_chip = {
 	.setup_irq = xilinx_pcie_msi_setup_irq,
 	.teardown_irq = xilinx_msi_teardown_irq,
 };
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index d9a92cf6a6d6..0260f3933211 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -32,7 +32,7 @@ int pci_msi_ignore_mask;
 
 int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 {
-	struct msi_chip *chip = dev->bus->msi;
+	struct msi_controller *chip = dev->bus->msi;
 	int err;
 
 	if (!chip || !chip->setup_irq)
@@ -49,7 +49,7 @@ int __weak arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 
 void __weak arch_teardown_msi_irq(unsigned int irq)
 {
-	struct msi_chip *chip = irq_get_chip_data(irq);
+	struct msi_controller *chip = irq_get_chip_data(irq);
 
 	if (!chip || !chip->teardown_irq)
 		return;
diff --git a/include/linux/msi.h b/include/linux/msi.h
index efad12742e45..6704991b0174 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -65,15 +65,15 @@ void arch_restore_msi_irqs(struct pci_dev *dev);
 void default_teardown_msi_irqs(struct pci_dev *dev);
 void default_restore_msi_irqs(struct pci_dev *dev);
 
-struct msi_chip {
+struct msi_controller {
 	struct module *owner;
 	struct device *dev;
 	struct device_node *of_node;
 	struct list_head list;
 
-	int (*setup_irq)(struct msi_chip *chip, struct pci_dev *dev,
+	int (*setup_irq)(struct msi_controller *chip, struct pci_dev *dev,
 			 struct msi_desc *desc);
-	void (*teardown_irq)(struct msi_chip *chip, unsigned int irq);
+	void (*teardown_irq)(struct msi_controller *chip, unsigned int irq);
 };
 
 #endif /* LINUX_MSI_H */
diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h
index 1fd207e7a847..ce0e5abeb454 100644
--- a/include/linux/of_pci.h
+++ b/include/linux/of_pci.h
@@ -59,13 +59,13 @@ int of_pci_get_host_bridge_resources(struct device_node *dev,
 #endif
 
 #if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI)
-int of_pci_msi_chip_add(struct msi_chip *chip);
-void of_pci_msi_chip_remove(struct msi_chip *chip);
-struct msi_chip *of_pci_find_msi_chip_by_node(struct device_node *of_node);
+int of_pci_msi_chip_add(struct msi_controller *chip);
+void of_pci_msi_chip_remove(struct msi_controller *chip);
+struct msi_controller *of_pci_find_msi_chip_by_node(struct device_node *of_node);
 #else
-static inline int of_pci_msi_chip_add(struct msi_chip *chip) { return -EINVAL; }
-static inline void of_pci_msi_chip_remove(struct msi_chip *chip) { }
-static inline struct msi_chip *
+static inline int of_pci_msi_chip_add(struct msi_controller *chip) { return -EINVAL; }
+static inline void of_pci_msi_chip_remove(struct msi_controller *chip) { }
+static inline struct msi_controller *
 of_pci_find_msi_chip_by_node(struct device_node *of_node) { return NULL; }
 #endif
 
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5be8db45e368..90a5fdec6b9f 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -449,7 +449,7 @@ struct pci_bus {
 	struct resource busn_res;	/* bus numbers routed to this bus */
 
 	struct pci_ops	*ops;		/* configuration access functions */
-	struct msi_chip	*msi;		/* MSI controller */
+	struct msi_controller *msi;	/* MSI controller */
 	void		*sysdata;	/* hook for sys-specific extension */
 	struct proc_dir_entry *procdir;	/* directory entry in /proc/bus/pci */
 
-- 
cgit v1.2.3


From d3cf6a4b01894e69e9e800ef76f34eaa13357ff1 Mon Sep 17 00:00:00 2001
From: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Date: Mon, 10 Nov 2014 20:02:47 +0900
Subject: usb: renesas_usbhs: expand USB-DMAC channels for R-Car Gen2

This patch expands USB-DMAC channels for R-Car Gen2 SoCs. The SoCs
have 4 channels. If d{2,3}_{t,x}x_id are not set, this driver never
uses the expanded USB-DMAC channels.

Signed-off-by: Yoshihiro Shimoda <yoshihiro.shimoda.uh@renesas.com>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/renesas_usbhs/common.h | 4 ++++
 drivers/usb/renesas_usbhs/fifo.c   | 2 ++
 drivers/usb/renesas_usbhs/fifo.h   | 2 +-
 include/linux/usb/renesas_usbhs.h  | 4 ++++
 4 files changed, 11 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/usb/renesas_usbhs/common.h b/drivers/usb/renesas_usbhs/common.h
index c45667f942b6..0427cdd1a483 100644
--- a/drivers/usb/renesas_usbhs/common.h
+++ b/drivers/usb/renesas_usbhs/common.h
@@ -102,6 +102,10 @@ struct usbhs_priv;
 #define DEVADD8		0x00E0
 #define DEVADD9		0x00E2
 #define DEVADDA		0x00E4
+#define D2FIFOSEL	0x00F0	/* for R-Car Gen2 */
+#define D2FIFOCTR	0x00F2	/* for R-Car Gen2 */
+#define D3FIFOSEL	0x00F4	/* for R-Car Gen2 */
+#define D3FIFOCTR	0x00F6	/* for R-Car Gen2 */
 
 /* SYSCFG */
 #define SCKE	(1 << 10)	/* USB Module Clock Enable */
diff --git a/drivers/usb/renesas_usbhs/fifo.c b/drivers/usb/renesas_usbhs/fifo.c
index bc9a0506b54e..f46271ce1b15 100644
--- a/drivers/usb/renesas_usbhs/fifo.c
+++ b/drivers/usb/renesas_usbhs/fifo.c
@@ -1234,6 +1234,8 @@ int usbhs_fifo_probe(struct usbhs_priv *priv)
 	/* DFIFO */
 	USBHS_DFIFO_INIT(priv, fifo, 0);
 	USBHS_DFIFO_INIT(priv, fifo, 1);
+	USBHS_DFIFO_INIT_NO_PORT(priv, fifo, 2);
+	USBHS_DFIFO_INIT_NO_PORT(priv, fifo, 3);
 
 	return 0;
 }
diff --git a/drivers/usb/renesas_usbhs/fifo.h b/drivers/usb/renesas_usbhs/fifo.h
index 899729ac085f..f07037c1185f 100644
--- a/drivers/usb/renesas_usbhs/fifo.h
+++ b/drivers/usb/renesas_usbhs/fifo.h
@@ -38,7 +38,7 @@ struct usbhs_fifo {
 	struct sh_dmae_slave	rx_slave;
 };
 
-#define USBHS_MAX_NUM_DFIFO	2
+#define USBHS_MAX_NUM_DFIFO	4
 struct usbhs_fifo_info {
 	struct usbhs_fifo cfifo;
 	struct usbhs_fifo dfifo[USBHS_MAX_NUM_DFIFO];
diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h
index d5952bb66752..9fd9e481ea98 100644
--- a/include/linux/usb/renesas_usbhs.h
+++ b/include/linux/usb/renesas_usbhs.h
@@ -145,6 +145,10 @@ struct renesas_usbhs_driver_param {
 	int d0_rx_id;
 	int d1_tx_id;
 	int d1_rx_id;
+	int d2_tx_id;
+	int d2_rx_id;
+	int d3_tx_id;
+	int d3_rx_id;
 
 	/*
 	 * option:
-- 
cgit v1.2.3


From c31accd159a6477b91de61ae237dce38e3f3ee4d Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Tue, 11 Nov 2014 19:45:57 +0100
Subject: net: phy: add module_phy_driver macro

Add helper macro for PHY drivers which do not do anything special in
module init/exit. This will allow us to eliminate a lot of boilerplate
code.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index d090cfcaa167..07794e720139 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -772,4 +772,28 @@ int __init mdio_bus_init(void);
 void mdio_bus_exit(void);
 
 extern struct bus_type mdio_bus_type;
+
+/**
+ * module_phy_driver() - Helper macro for registering PHY drivers
+ * @__phy_drivers: array of PHY drivers to register
+ *
+ * Helper macro for PHY drivers which do not do anything special in module
+ * init/exit. Each module may only use this macro once, and calling it
+ * replaces module_init() and module_exit().
+ */
+#define phy_module_driver(__phy_drivers, __count)			\
+static int __init phy_module_init(void)					\
+{									\
+	return phy_drivers_register(__phy_drivers, __count);		\
+}									\
+module_init(phy_module_init);						\
+static void __exit phy_module_exit(void)				\
+{									\
+	phy_drivers_unregister(__phy_drivers, __count);			\
+}									\
+module_exit(phy_module_exit)
+
+#define module_phy_driver(__phy_drivers)				\
+	phy_module_driver(__phy_drivers, ARRAY_SIZE(__phy_drivers))
+
 #endif /* __PHY_H */
-- 
cgit v1.2.3


From 8c393f9a721c30a030049a680e1bf896669bb279 Mon Sep 17 00:00:00 2001
From: Peng Tao <tao.peng@primarydata.com>
Date: Wed, 5 Nov 2014 22:36:50 +0800
Subject: nfs: fix pnfs direct write memory leak

For pNFS direct writes, layout driver may dynamically allocate ds_cinfo.buckets.
So we need to take care to free them when freeing dreq.

Ideally this needs to be done inside layout driver where ds_cinfo.buckets
are allocated. But buckets are attached to dreq and reused across LD IO iterations.
So I feel it's OK to free them in the generic layer.

Cc: stable@vger.kernel.org [v3.4+]
Signed-off-by: Peng Tao <tao.peng@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/direct.c         |  1 +
 include/linux/nfs_xdr.h | 11 +++++++++++
 2 files changed, 12 insertions(+)

(limited to 'include/linux')

diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 20cffc830468..10bf07280f4a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -266,6 +266,7 @@ static void nfs_direct_req_free(struct kref *kref)
 {
 	struct nfs_direct_req *dreq = container_of(kref, struct nfs_direct_req, kref);
 
+	nfs_free_pnfs_ds_cinfo(&dreq->ds_cinfo);
 	if (dreq->l_ctx != NULL)
 		nfs_put_lock_context(dreq->l_ctx);
 	if (dreq->ctx != NULL)
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 983876f24aed..47ebb4fafd87 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1224,11 +1224,22 @@ struct nfs41_free_stateid_res {
 	unsigned int			status;
 };
 
+static inline void
+nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
+{
+	kfree(cinfo->buckets);
+}
+
 #else
 
 struct pnfs_ds_commit_info {
 };
 
+static inline void
+nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
+{
+}
+
 #endif /* CONFIG_NFS_V4_1 */
 
 #ifdef CONFIG_NFS_V4_2
-- 
cgit v1.2.3


From b82b6cca488074da3852e8a54fde1d9f74bf1557 Mon Sep 17 00:00:00 2001
From: Daniel Lezcano <daniel.lezcano@linaro.org>
Date: Wed, 12 Nov 2014 16:03:50 +0100
Subject: cpuidle: Invert CPUIDLE_FLAG_TIME_VALID logic

The only place where the time is invalid is when the ACPI_CSTATE_FFH entry
method is not set. Otherwise for all the drivers, the time can be correctly
measured.

Instead of duplicating the CPUIDLE_FLAG_TIME_VALID flag in all the drivers
for all the states, just invert the logic by replacing it by the flag
CPUIDLE_FLAG_TIME_INVALID, hence we can set this flag only for the acpi idle
driver, remove the former flag from all the drivers and invert the logic with
this flag in the different governor.

Signed-off-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/arm/include/asm/cpuidle.h         |   1 -
 arch/arm/mach-davinci/cpuidle.c        |   1 -
 arch/arm/mach-imx/cpuidle-imx5.c       |   1 -
 arch/arm/mach-imx/cpuidle-imx6q.c      |   3 +-
 arch/arm/mach-imx/cpuidle-imx6sl.c     |   3 +-
 arch/arm/mach-omap2/cpuidle34xx.c      |   7 ---
 arch/arm/mach-omap2/cpuidle44xx.c      |   5 +-
 arch/arm/mach-s3c64xx/cpuidle.c        |   1 -
 arch/arm/mach-shmobile/pm-sh7372.c     |   4 --
 arch/arm/mach-tegra/cpuidle-tegra114.c |   1 -
 arch/arm/mach-tegra/cpuidle-tegra20.c  |   3 +-
 arch/arm/mach-tegra/cpuidle-tegra30.c  |   1 -
 arch/mips/include/asm/idle.h           |   1 -
 arch/sh/kernel/cpu/shmobile/cpuidle.c  |   3 -
 arch/x86/kernel/apm_32.c               |   1 -
 drivers/acpi/processor_idle.c          |   6 +-
 drivers/cpuidle/cpuidle-arm64.c        |   1 -
 drivers/cpuidle/cpuidle-at91.c         |   1 -
 drivers/cpuidle/cpuidle-big_little.c   |   6 +-
 drivers/cpuidle/cpuidle-calxeda.c      |   1 -
 drivers/cpuidle/cpuidle-cps.c          |   7 +--
 drivers/cpuidle/cpuidle-exynos.c       |   1 -
 drivers/cpuidle/cpuidle-kirkwood.c     |   1 -
 drivers/cpuidle/cpuidle-mvebu-v7.c     |   8 +--
 drivers/cpuidle/cpuidle-powernv.c      |   6 +-
 drivers/cpuidle/cpuidle-pseries.c      |   3 -
 drivers/cpuidle/cpuidle-ux500.c        |   3 +-
 drivers/cpuidle/cpuidle-zynq.c         |   1 -
 drivers/cpuidle/driver.c               |   1 -
 drivers/cpuidle/dt_idle_states.c       |   2 +-
 drivers/cpuidle/governors/ladder.c     |   2 +-
 drivers/cpuidle/governors/menu.c       |   2 +-
 drivers/idle/intel_idle.c              | 108 ++++++++++++++++-----------------
 include/linux/cpuidle.h                |   4 +-
 34 files changed, 75 insertions(+), 125 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/cpuidle.h b/arch/arm/include/asm/cpuidle.h
index 2fca60ab513a..af319ac4960c 100644
--- a/arch/arm/include/asm/cpuidle.h
+++ b/arch/arm/include/asm/cpuidle.h
@@ -15,7 +15,6 @@ static inline int arm_cpuidle_simple_enter(struct cpuidle_device *dev,
 	.exit_latency           = 1,\
 	.target_residency       = 1,\
 	.power_usage		= p,\
-	.flags                  = CPUIDLE_FLAG_TIME_VALID,\
 	.name                   = "WFI",\
 	.desc                   = "ARM WFI",\
 }
diff --git a/arch/arm/mach-davinci/cpuidle.c b/arch/arm/mach-davinci/cpuidle.c
index f1ac1c94ac0f..b4675fc28f83 100644
--- a/arch/arm/mach-davinci/cpuidle.c
+++ b/arch/arm/mach-davinci/cpuidle.c
@@ -66,7 +66,6 @@ static struct cpuidle_driver davinci_idle_driver = {
 		.enter			= davinci_enter_idle,
 		.exit_latency		= 10,
 		.target_residency	= 10000,
-		.flags			= CPUIDLE_FLAG_TIME_VALID,
 		.name			= "DDR SR",
 		.desc			= "WFI and DDR Self Refresh",
 	},
diff --git a/arch/arm/mach-imx/cpuidle-imx5.c b/arch/arm/mach-imx/cpuidle-imx5.c
index 5a47e3c6172f..3feca526d16b 100644
--- a/arch/arm/mach-imx/cpuidle-imx5.c
+++ b/arch/arm/mach-imx/cpuidle-imx5.c
@@ -24,7 +24,6 @@ static struct cpuidle_driver imx5_cpuidle_driver = {
 		.enter            = imx5_cpuidle_enter,
 		.exit_latency     = 2,
 		.target_residency = 1,
-		.flags            = CPUIDLE_FLAG_TIME_VALID,
 		.name             = "IMX5 SRPG",
 		.desc             = "CPU state retained,powered off",
 	},
diff --git a/arch/arm/mach-imx/cpuidle-imx6q.c b/arch/arm/mach-imx/cpuidle-imx6q.c
index aa935787b743..d76d08623f9f 100644
--- a/arch/arm/mach-imx/cpuidle-imx6q.c
+++ b/arch/arm/mach-imx/cpuidle-imx6q.c
@@ -53,8 +53,7 @@ static struct cpuidle_driver imx6q_cpuidle_driver = {
 		{
 			.exit_latency = 50,
 			.target_residency = 75,
-			.flags = CPUIDLE_FLAG_TIME_VALID |
-			         CPUIDLE_FLAG_TIMER_STOP,
+			.flags = CPUIDLE_FLAG_TIMER_STOP,
 			.enter = imx6q_enter_wait,
 			.name = "WAIT",
 			.desc = "Clock off",
diff --git a/arch/arm/mach-imx/cpuidle-imx6sl.c b/arch/arm/mach-imx/cpuidle-imx6sl.c
index d4b6b8171fa9..7d92e6584551 100644
--- a/arch/arm/mach-imx/cpuidle-imx6sl.c
+++ b/arch/arm/mach-imx/cpuidle-imx6sl.c
@@ -40,8 +40,7 @@ static struct cpuidle_driver imx6sl_cpuidle_driver = {
 		{
 			.exit_latency = 50,
 			.target_residency = 75,
-			.flags = CPUIDLE_FLAG_TIME_VALID |
-				CPUIDLE_FLAG_TIMER_STOP,
+			.flags = CPUIDLE_FLAG_TIMER_STOP,
 			.enter = imx6sl_enter_wait,
 			.name = "WAIT",
 			.desc = "Clock off",
diff --git a/arch/arm/mach-omap2/cpuidle34xx.c b/arch/arm/mach-omap2/cpuidle34xx.c
index e18709d3b95d..aa7b379e2661 100644
--- a/arch/arm/mach-omap2/cpuidle34xx.c
+++ b/arch/arm/mach-omap2/cpuidle34xx.c
@@ -265,7 +265,6 @@ static struct cpuidle_driver omap3_idle_driver = {
 			.enter		  = omap3_enter_idle_bm,
 			.exit_latency	  = 2 + 2,
 			.target_residency = 5,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID,
 			.name		  = "C1",
 			.desc		  = "MPU ON + CORE ON",
 		},
@@ -273,7 +272,6 @@ static struct cpuidle_driver omap3_idle_driver = {
 			.enter		  = omap3_enter_idle_bm,
 			.exit_latency	  = 10 + 10,
 			.target_residency = 30,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID,
 			.name		  = "C2",
 			.desc		  = "MPU ON + CORE ON",
 		},
@@ -281,7 +279,6 @@ static struct cpuidle_driver omap3_idle_driver = {
 			.enter		  = omap3_enter_idle_bm,
 			.exit_latency	  = 50 + 50,
 			.target_residency = 300,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID,
 			.name		  = "C3",
 			.desc		  = "MPU RET + CORE ON",
 		},
@@ -289,7 +286,6 @@ static struct cpuidle_driver omap3_idle_driver = {
 			.enter		  = omap3_enter_idle_bm,
 			.exit_latency	  = 1500 + 1800,
 			.target_residency = 4000,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID,
 			.name		  = "C4",
 			.desc		  = "MPU OFF + CORE ON",
 		},
@@ -297,7 +293,6 @@ static struct cpuidle_driver omap3_idle_driver = {
 			.enter		  = omap3_enter_idle_bm,
 			.exit_latency	  = 2500 + 7500,
 			.target_residency = 12000,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID,
 			.name		  = "C5",
 			.desc		  = "MPU RET + CORE RET",
 		},
@@ -305,7 +300,6 @@ static struct cpuidle_driver omap3_idle_driver = {
 			.enter		  = omap3_enter_idle_bm,
 			.exit_latency	  = 3000 + 8500,
 			.target_residency = 15000,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID,
 			.name		  = "C6",
 			.desc		  = "MPU OFF + CORE RET",
 		},
@@ -313,7 +307,6 @@ static struct cpuidle_driver omap3_idle_driver = {
 			.enter		  = omap3_enter_idle_bm,
 			.exit_latency	  = 10000 + 30000,
 			.target_residency = 30000,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID,
 			.name		  = "C7",
 			.desc		  = "MPU OFF + CORE OFF",
 		},
diff --git a/arch/arm/mach-omap2/cpuidle44xx.c b/arch/arm/mach-omap2/cpuidle44xx.c
index 2498ab025fa2..01e398a868bc 100644
--- a/arch/arm/mach-omap2/cpuidle44xx.c
+++ b/arch/arm/mach-omap2/cpuidle44xx.c
@@ -196,7 +196,6 @@ static struct cpuidle_driver omap4_idle_driver = {
 			/* C1 - CPU0 ON + CPU1 ON + MPU ON */
 			.exit_latency = 2 + 2,
 			.target_residency = 5,
-			.flags = CPUIDLE_FLAG_TIME_VALID,
 			.enter = omap_enter_idle_simple,
 			.name = "C1",
 			.desc = "CPUx ON, MPUSS ON"
@@ -205,7 +204,7 @@ static struct cpuidle_driver omap4_idle_driver = {
 			/* C2 - CPU0 OFF + CPU1 OFF + MPU CSWR */
 			.exit_latency = 328 + 440,
 			.target_residency = 960,
-			.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_COUPLED,
+			.flags = CPUIDLE_FLAG_COUPLED,
 			.enter = omap_enter_idle_coupled,
 			.name = "C2",
 			.desc = "CPUx OFF, MPUSS CSWR",
@@ -214,7 +213,7 @@ static struct cpuidle_driver omap4_idle_driver = {
 			/* C3 - CPU0 OFF + CPU1 OFF + MPU OSWR */
 			.exit_latency = 460 + 518,
 			.target_residency = 1100,
-			.flags = CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_COUPLED,
+			.flags = CPUIDLE_FLAG_COUPLED,
 			.enter = omap_enter_idle_coupled,
 			.name = "C3",
 			.desc = "CPUx OFF, MPUSS OSWR",
diff --git a/arch/arm/mach-s3c64xx/cpuidle.c b/arch/arm/mach-s3c64xx/cpuidle.c
index 3c8ab07c2012..2eb072440dfa 100644
--- a/arch/arm/mach-s3c64xx/cpuidle.c
+++ b/arch/arm/mach-s3c64xx/cpuidle.c
@@ -48,7 +48,6 @@ static struct cpuidle_driver s3c64xx_cpuidle_driver = {
 			.enter            = s3c64xx_enter_idle,
 			.exit_latency     = 1,
 			.target_residency = 1,
-			.flags            = CPUIDLE_FLAG_TIME_VALID,
 			.name             = "IDLE",
 			.desc             = "System active, ARM gated",
 		},
diff --git a/arch/arm/mach-shmobile/pm-sh7372.c b/arch/arm/mach-shmobile/pm-sh7372.c
index 7e5c2676c489..0e37da654ed5 100644
--- a/arch/arm/mach-shmobile/pm-sh7372.c
+++ b/arch/arm/mach-shmobile/pm-sh7372.c
@@ -423,7 +423,6 @@ static struct cpuidle_driver sh7372_cpuidle_driver = {
 		.desc = "Core Standby Mode",
 		.exit_latency = 10,
 		.target_residency = 20 + 10,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
 		.enter = sh7372_enter_core_standby,
 	},
 	.states[2] = {
@@ -431,7 +430,6 @@ static struct cpuidle_driver sh7372_cpuidle_driver = {
 		.desc = "A3SM PLL ON",
 		.exit_latency = 20,
 		.target_residency = 30 + 20,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
 		.enter = sh7372_enter_a3sm_pll_on,
 	},
 	.states[3] = {
@@ -439,7 +437,6 @@ static struct cpuidle_driver sh7372_cpuidle_driver = {
 		.desc = "A3SM PLL OFF",
 		.exit_latency = 120,
 		.target_residency = 30 + 120,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
 		.enter = sh7372_enter_a3sm_pll_off,
 	},
 	.states[4] = {
@@ -447,7 +444,6 @@ static struct cpuidle_driver sh7372_cpuidle_driver = {
 		.desc = "A4S PLL OFF",
 		.exit_latency = 240,
 		.target_residency = 30 + 240,
-		.flags = CPUIDLE_FLAG_TIME_VALID,
 		.enter = sh7372_enter_a4s,
 		.disabled = true,
 	},
diff --git a/arch/arm/mach-tegra/cpuidle-tegra114.c b/arch/arm/mach-tegra/cpuidle-tegra114.c
index e3ebdce3e71f..b30908235d52 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra114.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra114.c
@@ -75,7 +75,6 @@ static struct cpuidle_driver tegra_idle_driver = {
 			.exit_latency		= 500,
 			.target_residency	= 1000,
 			.power_usage		= 0,
-			.flags			= CPUIDLE_FLAG_TIME_VALID,
 			.name			= "powered-down",
 			.desc			= "CPU power gated",
 		},
diff --git a/arch/arm/mach-tegra/cpuidle-tegra20.c b/arch/arm/mach-tegra/cpuidle-tegra20.c
index b30bf5cba65b..4f25a7c7ca0f 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra20.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra20.c
@@ -59,8 +59,7 @@ static struct cpuidle_driver tegra_idle_driver = {
 			.exit_latency     = 5000,
 			.target_residency = 10000,
 			.power_usage      = 0,
-			.flags            = CPUIDLE_FLAG_TIME_VALID |
-			CPUIDLE_FLAG_COUPLED,
+			.flags            = CPUIDLE_FLAG_COUPLED,
 			.name             = "powered-down",
 			.desc             = "CPU power gated",
 		},
diff --git a/arch/arm/mach-tegra/cpuidle-tegra30.c b/arch/arm/mach-tegra/cpuidle-tegra30.c
index 35561274f6cf..f8815ed65d9d 100644
--- a/arch/arm/mach-tegra/cpuidle-tegra30.c
+++ b/arch/arm/mach-tegra/cpuidle-tegra30.c
@@ -56,7 +56,6 @@ static struct cpuidle_driver tegra_idle_driver = {
 			.exit_latency		= 2000,
 			.target_residency	= 2200,
 			.power_usage		= 0,
-			.flags			= CPUIDLE_FLAG_TIME_VALID,
 			.name			= "powered-down",
 			.desc			= "CPU power gated",
 		},
diff --git a/arch/mips/include/asm/idle.h b/arch/mips/include/asm/idle.h
index 1c967abd545c..a2d18ab57ac6 100644
--- a/arch/mips/include/asm/idle.h
+++ b/arch/mips/include/asm/idle.h
@@ -22,7 +22,6 @@ extern int mips_cpuidle_wait_enter(struct cpuidle_device *dev,
 	.exit_latency		= 1,\
 	.target_residency	= 1,\
 	.power_usage		= UINT_MAX,\
-	.flags			= CPUIDLE_FLAG_TIME_VALID,\
 	.name			= "wait",\
 	.desc			= "MIPS wait",\
 }
diff --git a/arch/sh/kernel/cpu/shmobile/cpuidle.c b/arch/sh/kernel/cpu/shmobile/cpuidle.c
index e3abfd4277e2..53b8eeb1db20 100644
--- a/arch/sh/kernel/cpu/shmobile/cpuidle.c
+++ b/arch/sh/kernel/cpu/shmobile/cpuidle.c
@@ -59,7 +59,6 @@ static struct cpuidle_driver cpuidle_driver = {
 			.exit_latency = 1,
 			.target_residency = 1 * 2,
 			.power_usage = 3,
-			.flags = CPUIDLE_FLAG_TIME_VALID,
 			.enter = cpuidle_sleep_enter,
 			.name = "C1",
 			.desc = "SuperH Sleep Mode",
@@ -68,7 +67,6 @@ static struct cpuidle_driver cpuidle_driver = {
 			.exit_latency = 100,
 			.target_residency = 1 * 2,
 			.power_usage = 1,
-			.flags = CPUIDLE_FLAG_TIME_VALID,
 			.enter = cpuidle_sleep_enter,
 			.name = "C2",
 			.desc = "SuperH Sleep Mode [SF]",
@@ -78,7 +76,6 @@ static struct cpuidle_driver cpuidle_driver = {
 			.exit_latency = 2300,
 			.target_residency = 1 * 2,
 			.power_usage = 1,
-			.flags = CPUIDLE_FLAG_TIME_VALID,
 			.enter = cpuidle_sleep_enter,
 			.name = "C3",
 			.desc = "SuperH Mobile Standby Mode [SF]",
diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c
index 584874451414..927ec9235947 100644
--- a/arch/x86/kernel/apm_32.c
+++ b/arch/x86/kernel/apm_32.c
@@ -378,7 +378,6 @@ static struct cpuidle_driver apm_idle_driver = {
 		{ /* entry 1 is for APM idle */
 			.name = "APM",
 			.desc = "APM idle",
-			.flags = CPUIDLE_FLAG_TIME_VALID,
 			.exit_latency = 250,	/* WAG */
 			.target_residency = 500,	/* WAG */
 			.enter = &apm_cpu_idle
diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 17f9ec501972..380b4b43f361 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -985,8 +985,8 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
 		state->flags = 0;
 		switch (cx->type) {
 			case ACPI_STATE_C1:
-			if (cx->entry_method == ACPI_CSTATE_FFH)
-				state->flags |= CPUIDLE_FLAG_TIME_VALID;
+			if (cx->entry_method != ACPI_CSTATE_FFH)
+				state->flags |= CPUIDLE_FLAG_TIME_INVALID;
 
 			state->enter = acpi_idle_enter_c1;
 			state->enter_dead = acpi_idle_play_dead;
@@ -994,14 +994,12 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
 			break;
 
 			case ACPI_STATE_C2:
-			state->flags |= CPUIDLE_FLAG_TIME_VALID;
 			state->enter = acpi_idle_enter_simple;
 			state->enter_dead = acpi_idle_play_dead;
 			drv->safe_state_index = count;
 			break;
 
 			case ACPI_STATE_C3:
-			state->flags |= CPUIDLE_FLAG_TIME_VALID;
 			state->enter = pr->flags.bm_check ?
 					acpi_idle_enter_bm :
 					acpi_idle_enter_simple;
diff --git a/drivers/cpuidle/cpuidle-arm64.c b/drivers/cpuidle/cpuidle-arm64.c
index 50997ea942fc..87320e62c721 100644
--- a/drivers/cpuidle/cpuidle-arm64.c
+++ b/drivers/cpuidle/cpuidle-arm64.c
@@ -73,7 +73,6 @@ static struct cpuidle_driver arm64_idle_driver = {
 		.exit_latency           = 1,
 		.target_residency       = 1,
 		.power_usage		= UINT_MAX,
-		.flags                  = CPUIDLE_FLAG_TIME_VALID,
 		.name                   = "WFI",
 		.desc                   = "ARM64 WFI",
 	}
diff --git a/drivers/cpuidle/cpuidle-at91.c b/drivers/cpuidle/cpuidle-at91.c
index a0774370c6bc..1964ff07117c 100644
--- a/drivers/cpuidle/cpuidle-at91.c
+++ b/drivers/cpuidle/cpuidle-at91.c
@@ -43,7 +43,6 @@ static struct cpuidle_driver at91_idle_driver = {
 		.enter			= at91_enter_idle,
 		.exit_latency		= 10,
 		.target_residency	= 10000,
-		.flags			= CPUIDLE_FLAG_TIME_VALID,
 		.name			= "RAM_SR",
 		.desc			= "WFI and DDR Self Refresh",
 	},
diff --git a/drivers/cpuidle/cpuidle-big_little.c b/drivers/cpuidle/cpuidle-big_little.c
index fbc00a1d3c48..e3e225fe6b45 100644
--- a/drivers/cpuidle/cpuidle-big_little.c
+++ b/drivers/cpuidle/cpuidle-big_little.c
@@ -67,8 +67,7 @@ static struct cpuidle_driver bl_idle_little_driver = {
 		.enter			= bl_enter_powerdown,
 		.exit_latency		= 700,
 		.target_residency	= 2500,
-		.flags			= CPUIDLE_FLAG_TIME_VALID |
-					  CPUIDLE_FLAG_TIMER_STOP,
+		.flags			= CPUIDLE_FLAG_TIMER_STOP,
 		.name			= "C1",
 		.desc			= "ARM little-cluster power down",
 	},
@@ -89,8 +88,7 @@ static struct cpuidle_driver bl_idle_big_driver = {
 		.enter			= bl_enter_powerdown,
 		.exit_latency		= 500,
 		.target_residency	= 2000,
-		.flags			= CPUIDLE_FLAG_TIME_VALID |
-					  CPUIDLE_FLAG_TIMER_STOP,
+		.flags			= CPUIDLE_FLAG_TIMER_STOP,
 		.name			= "C1",
 		.desc			= "ARM big-cluster power down",
 	},
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
index 6e51114057d0..6541b0bfdfaa 100644
--- a/drivers/cpuidle/cpuidle-calxeda.c
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -55,7 +55,6 @@ static struct cpuidle_driver calxeda_idle_driver = {
 		{
 			.name = "PG",
 			.desc = "Power Gate",
-			.flags = CPUIDLE_FLAG_TIME_VALID,
 			.exit_latency = 30,
 			.power_usage = 50,
 			.target_residency = 200,
diff --git a/drivers/cpuidle/cpuidle-cps.c b/drivers/cpuidle/cpuidle-cps.c
index fc7b62720deb..1adb6980b707 100644
--- a/drivers/cpuidle/cpuidle-cps.c
+++ b/drivers/cpuidle/cpuidle-cps.c
@@ -79,7 +79,6 @@ static struct cpuidle_driver cps_driver = {
 			.enter	= cps_nc_enter,
 			.exit_latency		= 200,
 			.target_residency	= 450,
-			.flags	= CPUIDLE_FLAG_TIME_VALID,
 			.name	= "nc-wait",
 			.desc	= "non-coherent MIPS wait",
 		},
@@ -87,8 +86,7 @@ static struct cpuidle_driver cps_driver = {
 			.enter	= cps_nc_enter,
 			.exit_latency		= 300,
 			.target_residency	= 700,
-			.flags	= CPUIDLE_FLAG_TIME_VALID |
-				  CPUIDLE_FLAG_TIMER_STOP,
+			.flags	= CPUIDLE_FLAG_TIMER_STOP,
 			.name	= "clock-gated",
 			.desc	= "core clock gated",
 		},
@@ -96,8 +94,7 @@ static struct cpuidle_driver cps_driver = {
 			.enter	= cps_nc_enter,
 			.exit_latency		= 600,
 			.target_residency	= 1000,
-			.flags	= CPUIDLE_FLAG_TIME_VALID |
-				  CPUIDLE_FLAG_TIMER_STOP,
+			.flags	= CPUIDLE_FLAG_TIMER_STOP,
 			.name	= "power-gated",
 			.desc	= "core power gated",
 		},
diff --git a/drivers/cpuidle/cpuidle-exynos.c b/drivers/cpuidle/cpuidle-exynos.c
index ba9b34b579f3..64d12a855ec6 100644
--- a/drivers/cpuidle/cpuidle-exynos.c
+++ b/drivers/cpuidle/cpuidle-exynos.c
@@ -47,7 +47,6 @@ static struct cpuidle_driver exynos_idle_driver = {
 			.enter			= exynos_enter_lowpower,
 			.exit_latency		= 300,
 			.target_residency	= 100000,
-			.flags			= CPUIDLE_FLAG_TIME_VALID,
 			.name			= "C1",
 			.desc			= "ARM power down",
 		},
diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c
index 41ba843251b8..d88f8d7c2143 100644
--- a/drivers/cpuidle/cpuidle-kirkwood.c
+++ b/drivers/cpuidle/cpuidle-kirkwood.c
@@ -47,7 +47,6 @@ static struct cpuidle_driver kirkwood_idle_driver = {
 		.enter			= kirkwood_enter_idle,
 		.exit_latency		= 10,
 		.target_residency	= 100000,
-		.flags			= CPUIDLE_FLAG_TIME_VALID,
 		.name			= "DDR SR",
 		.desc			= "WFI and DDR Self Refresh",
 	},
diff --git a/drivers/cpuidle/cpuidle-mvebu-v7.c b/drivers/cpuidle/cpuidle-mvebu-v7.c
index 45371bb16214..dd4c176df2a3 100644
--- a/drivers/cpuidle/cpuidle-mvebu-v7.c
+++ b/drivers/cpuidle/cpuidle-mvebu-v7.c
@@ -53,7 +53,6 @@ static struct cpuidle_driver armadaxp_idle_driver = {
 		.exit_latency		= 10,
 		.power_usage		= 50,
 		.target_residency	= 100,
-		.flags			= CPUIDLE_FLAG_TIME_VALID,
 		.name			= "MV CPU IDLE",
 		.desc			= "CPU power down",
 	},
@@ -62,8 +61,7 @@ static struct cpuidle_driver armadaxp_idle_driver = {
 		.exit_latency		= 100,
 		.power_usage		= 5,
 		.target_residency	= 1000,
-		.flags			= CPUIDLE_FLAG_TIME_VALID |
-						MVEBU_V7_FLAG_DEEP_IDLE,
+		.flags			= MVEBU_V7_FLAG_DEEP_IDLE,
 		.name			= "MV CPU DEEP IDLE",
 		.desc			= "CPU and L2 Fabric power down",
 	},
@@ -78,8 +76,7 @@ static struct cpuidle_driver armada370_idle_driver = {
 		.exit_latency		= 100,
 		.power_usage		= 5,
 		.target_residency	= 1000,
-		.flags			= (CPUIDLE_FLAG_TIME_VALID |
-					   MVEBU_V7_FLAG_DEEP_IDLE),
+		.flags			= MVEBU_V7_FLAG_DEEP_IDLE,
 		.name			= "Deep Idle",
 		.desc			= "CPU and L2 Fabric power down",
 	},
@@ -94,7 +91,6 @@ static struct cpuidle_driver armada38x_idle_driver = {
 		.exit_latency		= 10,
 		.power_usage		= 5,
 		.target_residency	= 100,
-		.flags			= CPUIDLE_FLAG_TIME_VALID,
 		.name			= "Idle",
 		.desc			= "CPU and SCU power down",
 	},
diff --git a/drivers/cpuidle/cpuidle-powernv.c b/drivers/cpuidle/cpuidle-powernv.c
index 7d3a3497dd4c..e9248bb9173a 100644
--- a/drivers/cpuidle/cpuidle-powernv.c
+++ b/drivers/cpuidle/cpuidle-powernv.c
@@ -93,7 +93,6 @@ static struct cpuidle_state powernv_states[MAX_POWERNV_IDLE_STATES] = {
 	{ /* Snooze */
 		.name = "snooze",
 		.desc = "snooze",
-		.flags = CPUIDLE_FLAG_TIME_VALID,
 		.exit_latency = 0,
 		.target_residency = 0,
 		.enter = &snooze_loop },
@@ -202,7 +201,7 @@ static int powernv_add_idle_states(void)
 			/* Add NAP state */
 			strcpy(powernv_states[nr_idle_states].name, "Nap");
 			strcpy(powernv_states[nr_idle_states].desc, "Nap");
-			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIME_VALID;
+			powernv_states[nr_idle_states].flags = 0;
 			powernv_states[nr_idle_states].exit_latency =
 					((unsigned int)latency_ns) / 1000;
 			powernv_states[nr_idle_states].target_residency =
@@ -215,8 +214,7 @@ static int powernv_add_idle_states(void)
 			/* Add FASTSLEEP state */
 			strcpy(powernv_states[nr_idle_states].name, "FastSleep");
 			strcpy(powernv_states[nr_idle_states].desc, "FastSleep");
-			powernv_states[nr_idle_states].flags =
-				CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TIMER_STOP;
+			powernv_states[nr_idle_states].flags = CPUIDLE_FLAG_TIMER_STOP;
 			powernv_states[nr_idle_states].exit_latency =
 					((unsigned int)latency_ns) / 1000;
 			powernv_states[nr_idle_states].target_residency =
diff --git a/drivers/cpuidle/cpuidle-pseries.c b/drivers/cpuidle/cpuidle-pseries.c
index 6f7b01956885..bb9e2b6f3ecc 100644
--- a/drivers/cpuidle/cpuidle-pseries.c
+++ b/drivers/cpuidle/cpuidle-pseries.c
@@ -142,14 +142,12 @@ static struct cpuidle_state dedicated_states[] = {
 	{ /* Snooze */
 		.name = "snooze",
 		.desc = "snooze",
-		.flags = CPUIDLE_FLAG_TIME_VALID,
 		.exit_latency = 0,
 		.target_residency = 0,
 		.enter = &snooze_loop },
 	{ /* CEDE */
 		.name = "CEDE",
 		.desc = "CEDE",
-		.flags = CPUIDLE_FLAG_TIME_VALID,
 		.exit_latency = 10,
 		.target_residency = 100,
 		.enter = &dedicated_cede_loop },
@@ -162,7 +160,6 @@ static struct cpuidle_state shared_states[] = {
 	{ /* Shared Cede */
 		.name = "Shared Cede",
 		.desc = "Shared Cede",
-		.flags = CPUIDLE_FLAG_TIME_VALID,
 		.exit_latency = 0,
 		.target_residency = 0,
 		.enter = &shared_cede_loop },
diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c
index 5e35804b1a95..292e65a90308 100644
--- a/drivers/cpuidle/cpuidle-ux500.c
+++ b/drivers/cpuidle/cpuidle-ux500.c
@@ -101,8 +101,7 @@ static struct cpuidle_driver ux500_idle_driver = {
 			.enter		  = ux500_enter_idle,
 			.exit_latency	  = 70,
 			.target_residency = 260,
-			.flags		  = CPUIDLE_FLAG_TIME_VALID |
-			                    CPUIDLE_FLAG_TIMER_STOP,
+			.flags		  = CPUIDLE_FLAG_TIMER_STOP,
 			.name		  = "ApIdle",
 			.desc		  = "ARM Retention",
 		},
diff --git a/drivers/cpuidle/cpuidle-zynq.c b/drivers/cpuidle/cpuidle-zynq.c
index c61b8b2a7c77..022dec86de8e 100644
--- a/drivers/cpuidle/cpuidle-zynq.c
+++ b/drivers/cpuidle/cpuidle-zynq.c
@@ -52,7 +52,6 @@ static struct cpuidle_driver zynq_idle_driver = {
 			.enter			= zynq_enter_idle,
 			.exit_latency		= 10,
 			.target_residency	= 10000,
-			.flags			= CPUIDLE_FLAG_TIME_VALID,
 			.name			= "RAM_SR",
 			.desc			= "WFI and RAM Self Refresh",
 		},
diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c
index e431d11abf8d..2697e87d5b34 100644
--- a/drivers/cpuidle/driver.c
+++ b/drivers/cpuidle/driver.c
@@ -201,7 +201,6 @@ static void poll_idle_init(struct cpuidle_driver *drv)
 	state->exit_latency = 0;
 	state->target_residency = 0;
 	state->power_usage = -1;
-	state->flags = CPUIDLE_FLAG_TIME_VALID;
 	state->enter = poll_idle;
 	state->disabled = false;
 }
diff --git a/drivers/cpuidle/dt_idle_states.c b/drivers/cpuidle/dt_idle_states.c
index 52f4d11bbf3f..6e6f0b272835 100644
--- a/drivers/cpuidle/dt_idle_states.c
+++ b/drivers/cpuidle/dt_idle_states.c
@@ -73,7 +73,7 @@ static int init_state_node(struct cpuidle_state *idle_state,
 		return -EINVAL;
 	}
 
-	idle_state->flags = CPUIDLE_FLAG_TIME_VALID;
+	idle_state->flags = 0;
 	if (of_property_read_bool(state_node, "local-timer-stop"))
 		idle_state->flags |= CPUIDLE_FLAG_TIMER_STOP;
 	/*
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 06b57c4c4d80..37263d9a1051 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -79,7 +79,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 
 	last_state = &ldev->states[last_idx];
 
-	if (drv->states[last_idx].flags & CPUIDLE_FLAG_TIME_VALID) {
+	if (!(drv->states[last_idx].flags & CPUIDLE_FLAG_TIME_INVALID)) {
 		last_residency = cpuidle_get_last_residency(dev) - \
 					 drv->states[last_idx].exit_latency;
 	}
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 710a233b9b0d..659d7b0c9ebf 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -405,7 +405,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	 * the measured amount of time is less than the exit latency,
 	 * assume the state was never reached and the exit latency is 0.
 	 */
-	if (unlikely(!(target->flags & CPUIDLE_FLAG_TIME_VALID))) {
+	if (unlikely(target->flags & CPUIDLE_FLAG_TIME_INVALID)) {
 		/* Use timer value as is */
 		measured_us = data->next_timer_us;
 
diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index 9b7ee7e427df..9cceacb92f9d 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -128,28 +128,28 @@ static struct cpuidle_state nehalem_cstates[] = {
 	{
 		.name = "C1-NHM",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 3,
 		.target_residency = 6,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-NHM",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
 		.target_residency = 20,
 		.enter = &intel_idle },
 	{
 		.name = "C3-NHM",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 20,
 		.target_residency = 80,
 		.enter = &intel_idle },
 	{
 		.name = "C6-NHM",
 		.desc = "MWAIT 0x20",
-		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 200,
 		.target_residency = 800,
 		.enter = &intel_idle },
@@ -161,35 +161,35 @@ static struct cpuidle_state snb_cstates[] = {
 	{
 		.name = "C1-SNB",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
 		.target_residency = 2,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-SNB",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
 		.target_residency = 20,
 		.enter = &intel_idle },
 	{
 		.name = "C3-SNB",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
 		.target_residency = 211,
 		.enter = &intel_idle },
 	{
 		.name = "C6-SNB",
 		.desc = "MWAIT 0x20",
-		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 104,
 		.target_residency = 345,
 		.enter = &intel_idle },
 	{
 		.name = "C7-SNB",
 		.desc = "MWAIT 0x30",
-		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 109,
 		.target_residency = 345,
 		.enter = &intel_idle },
@@ -201,42 +201,42 @@ static struct cpuidle_state byt_cstates[] = {
 	{
 		.name = "C1-BYT",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
 		.target_residency = 1,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-BYT",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 15,
 		.target_residency = 30,
 		.enter = &intel_idle },
 	{
 		.name = "C6N-BYT",
 		.desc = "MWAIT 0x58",
-		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x58) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 40,
 		.target_residency = 275,
 		.enter = &intel_idle },
 	{
 		.name = "C6S-BYT",
 		.desc = "MWAIT 0x52",
-		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 140,
 		.target_residency = 560,
 		.enter = &intel_idle },
 	{
 		.name = "C7-BYT",
 		.desc = "MWAIT 0x60",
-		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 1200,
 		.target_residency = 1500,
 		.enter = &intel_idle },
 	{
 		.name = "C7S-BYT",
 		.desc = "MWAIT 0x64",
-		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x64) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 10000,
 		.target_residency = 20000,
 		.enter = &intel_idle },
@@ -248,35 +248,35 @@ static struct cpuidle_state ivb_cstates[] = {
 	{
 		.name = "C1-IVB",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
 		.target_residency = 1,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-IVB",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
 		.target_residency = 20,
 		.enter = &intel_idle },
 	{
 		.name = "C3-IVB",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
 		.target_residency = 156,
 		.enter = &intel_idle },
 	{
 		.name = "C6-IVB",
 		.desc = "MWAIT 0x20",
-		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 80,
 		.target_residency = 300,
 		.enter = &intel_idle },
 	{
 		.name = "C7-IVB",
 		.desc = "MWAIT 0x30",
-		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 87,
 		.target_residency = 300,
 		.enter = &intel_idle },
@@ -288,28 +288,28 @@ static struct cpuidle_state ivt_cstates[] = {
 	{
 		.name = "C1-IVT",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
 		.target_residency = 1,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-IVT",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
 		.target_residency = 80,
 		.enter = &intel_idle },
 	{
 		.name = "C3-IVT",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
 		.target_residency = 156,
 		.enter = &intel_idle },
 	{
 		.name = "C6-IVT",
 		.desc = "MWAIT 0x20",
-		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 82,
 		.target_residency = 300,
 		.enter = &intel_idle },
@@ -321,28 +321,28 @@ static struct cpuidle_state ivt_cstates_4s[] = {
 	{
 		.name = "C1-IVT-4S",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
 		.target_residency = 1,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-IVT-4S",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
 		.target_residency = 250,
 		.enter = &intel_idle },
 	{
 		.name = "C3-IVT-4S",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
 		.target_residency = 300,
 		.enter = &intel_idle },
 	{
 		.name = "C6-IVT-4S",
 		.desc = "MWAIT 0x20",
-		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 84,
 		.target_residency = 400,
 		.enter = &intel_idle },
@@ -354,28 +354,28 @@ static struct cpuidle_state ivt_cstates_8s[] = {
 	{
 		.name = "C1-IVT-8S",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 1,
 		.target_residency = 1,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-IVT-8S",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
 		.target_residency = 500,
 		.enter = &intel_idle },
 	{
 		.name = "C3-IVT-8S",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 59,
 		.target_residency = 600,
 		.enter = &intel_idle },
 	{
 		.name = "C6-IVT-8S",
 		.desc = "MWAIT 0x20",
-		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 88,
 		.target_residency = 700,
 		.enter = &intel_idle },
@@ -387,56 +387,56 @@ static struct cpuidle_state hsw_cstates[] = {
 	{
 		.name = "C1-HSW",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
 		.target_residency = 2,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-HSW",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
 		.target_residency = 20,
 		.enter = &intel_idle },
 	{
 		.name = "C3-HSW",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 33,
 		.target_residency = 100,
 		.enter = &intel_idle },
 	{
 		.name = "C6-HSW",
 		.desc = "MWAIT 0x20",
-		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
 		.target_residency = 400,
 		.enter = &intel_idle },
 	{
 		.name = "C7s-HSW",
 		.desc = "MWAIT 0x32",
-		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
 		.target_residency = 500,
 		.enter = &intel_idle },
 	{
 		.name = "C8-HSW",
 		.desc = "MWAIT 0x40",
-		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
 		.target_residency = 900,
 		.enter = &intel_idle },
 	{
 		.name = "C9-HSW",
 		.desc = "MWAIT 0x50",
-		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
 		.target_residency = 1800,
 		.enter = &intel_idle },
 	{
 		.name = "C10-HSW",
 		.desc = "MWAIT 0x60",
-		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
 		.target_residency = 7700,
 		.enter = &intel_idle },
@@ -447,56 +447,56 @@ static struct cpuidle_state bdw_cstates[] = {
 	{
 		.name = "C1-BDW",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
 		.target_residency = 2,
 		.enter = &intel_idle },
 	{
 		.name = "C1E-BDW",
 		.desc = "MWAIT 0x01",
-		.flags = MWAIT2flg(0x01) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x01),
 		.exit_latency = 10,
 		.target_residency = 20,
 		.enter = &intel_idle },
 	{
 		.name = "C3-BDW",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 40,
 		.target_residency = 100,
 		.enter = &intel_idle },
 	{
 		.name = "C6-BDW",
 		.desc = "MWAIT 0x20",
-		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x20) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 133,
 		.target_residency = 400,
 		.enter = &intel_idle },
 	{
 		.name = "C7s-BDW",
 		.desc = "MWAIT 0x32",
-		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x32) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 166,
 		.target_residency = 500,
 		.enter = &intel_idle },
 	{
 		.name = "C8-BDW",
 		.desc = "MWAIT 0x40",
-		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x40) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 300,
 		.target_residency = 900,
 		.enter = &intel_idle },
 	{
 		.name = "C9-BDW",
 		.desc = "MWAIT 0x50",
-		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x50) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 600,
 		.target_residency = 1800,
 		.enter = &intel_idle },
 	{
 		.name = "C10-BDW",
 		.desc = "MWAIT 0x60",
-		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x60) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 2600,
 		.target_residency = 7700,
 		.enter = &intel_idle },
@@ -508,28 +508,28 @@ static struct cpuidle_state atom_cstates[] = {
 	{
 		.name = "C1E-ATM",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 10,
 		.target_residency = 20,
 		.enter = &intel_idle },
 	{
 		.name = "C2-ATM",
 		.desc = "MWAIT 0x10",
-		.flags = MWAIT2flg(0x10) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x10),
 		.exit_latency = 20,
 		.target_residency = 80,
 		.enter = &intel_idle },
 	{
 		.name = "C4-ATM",
 		.desc = "MWAIT 0x30",
-		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x30) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 100,
 		.target_residency = 400,
 		.enter = &intel_idle },
 	{
 		.name = "C6-ATM",
 		.desc = "MWAIT 0x52",
-		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x52) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 140,
 		.target_residency = 560,
 		.enter = &intel_idle },
@@ -540,14 +540,14 @@ static struct cpuidle_state avn_cstates[] = {
 	{
 		.name = "C1-AVN",
 		.desc = "MWAIT 0x00",
-		.flags = MWAIT2flg(0x00) | CPUIDLE_FLAG_TIME_VALID,
+		.flags = MWAIT2flg(0x00),
 		.exit_latency = 2,
 		.target_residency = 2,
 		.enter = &intel_idle },
 	{
 		.name = "C6-AVN",
 		.desc = "MWAIT 0x51",
-		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TIME_VALID | CPUIDLE_FLAG_TLB_FLUSHED,
+		.flags = MWAIT2flg(0x51) | CPUIDLE_FLAG_TLB_FLUSHED,
 		.exit_latency = 15,
 		.target_residency = 45,
 		.enter = &intel_idle },
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 25e0df6155a4..a07e087f54b2 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -53,7 +53,7 @@ struct cpuidle_state {
 };
 
 /* Idle State Flags */
-#define CPUIDLE_FLAG_TIME_VALID	(0x01) /* is residency time measurable? */
+#define CPUIDLE_FLAG_TIME_INVALID	(0x01) /* is residency time measurable? */
 #define CPUIDLE_FLAG_COUPLED	(0x02) /* state applies to multiple cpus */
 #define CPUIDLE_FLAG_TIMER_STOP (0x04)  /* timer is stopped on this state */
 
@@ -90,7 +90,7 @@ DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
  * cpuidle_get_last_residency - retrieves the last state's residency time
  * @dev: the target CPU
  *
- * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_VALID isn't set
+ * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_INVALID is set
  */
 static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
 {
-- 
cgit v1.2.3


From 61f2dcba9a03d4fd9342f0d6821af0a46c7098e9 Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Wed, 12 Nov 2014 19:51:56 +0100
Subject: mac802154: add interframe spacing time handling

This patch adds a new interframe spacing time handling into mac802154
layer. Interframe spacing time is a time period between each transmit.
This patch adds a high resolution timer into mac802154 and starts on
xmit complete with corresponding interframe spacing expire time if
ifs_handling is true. We make it variable because it depends if
interframe spacing time is handled by transceiver or mac802154. At the
timer complete function we wake the netdev queue again. This avoids
new frame transmit in range of interframe spacing time.

For synced driver we add no handling of interframe spacing time. This
is currently a lack of support in all synced xmit drivers. I suppose
it's working because the latency of workqueue which is needed to call
spi_sync.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/net/ieee802154/at86rf230.c |  2 +-
 include/linux/ieee802154.h         |  3 +++
 include/net/cfg802154.h            |  8 ++++++++
 include/net/mac802154.h            |  3 ++-
 net/mac802154/ieee802154_i.h       |  4 ++++
 net/mac802154/iface.c              |  2 ++
 net/mac802154/main.c               | 17 +++++++++++++++++
 net/mac802154/tx.c                 |  2 +-
 net/mac802154/util.c               | 32 +++++++++++++++++++++++++++++---
 9 files changed, 67 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ieee802154/at86rf230.c b/drivers/net/ieee802154/at86rf230.c
index 31d62f9c6ce8..46e50295710a 100644
--- a/drivers/net/ieee802154/at86rf230.c
+++ b/drivers/net/ieee802154/at86rf230.c
@@ -731,7 +731,7 @@ at86rf230_tx_complete(void *context)
 			udelay(lp->data->t_sifs);
 	}
 
-	ieee802154_xmit_complete(lp->hw, skb);
+	ieee802154_xmit_complete(lp->hw, skb, false);
 }
 
 static void
diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index ce0f96a55976..5a40c0418438 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -36,6 +36,9 @@
 
 #define IEEE802154_EXTENDED_ADDR_LEN	8
 
+#define IEEE802154_LIFS_PERIOD		40
+#define IEEE802154_SIFS_PERIOD		12
+
 #define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
 #define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
 #define IEEE802154_FC_TYPE_ACK		0x2	/* Frame is acknowledgment */
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index fa0a9e519523..17b4fc0705b2 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -81,6 +81,14 @@ struct wpan_phy {
 
 	s32 cca_ed_level;
 
+	/* PHY depended MAC PIB values */
+
+	/* 802.15.4 acronym: Tdsym in usec */
+	u8 symbol_duration;
+	/* lifs and sifs periods timing */
+	u16 lifs_period;
+	u16 sifs_period;
+
 	struct device dev;
 
 	char priv[0] __aligned(NETDEV_ALIGN);
diff --git a/include/net/mac802154.h b/include/net/mac802154.h
index 632f6566adb5..c823d910b46c 100644
--- a/include/net/mac802154.h
+++ b/include/net/mac802154.h
@@ -260,6 +260,7 @@ void ieee802154_rx_irqsafe(struct ieee802154_hw *hw, struct sk_buff *skb,
 
 void ieee802154_wake_queue(struct ieee802154_hw *hw);
 void ieee802154_stop_queue(struct ieee802154_hw *hw);
-void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb);
+void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
+			      bool ifs_handling);
 
 #endif /* NET_MAC802154_H */
diff --git a/net/mac802154/ieee802154_i.h b/net/mac802154/ieee802154_i.h
index 69cb585e162f..c5b231047b60 100644
--- a/net/mac802154/ieee802154_i.h
+++ b/net/mac802154/ieee802154_i.h
@@ -20,6 +20,7 @@
 #define __IEEE802154_I_H
 
 #include <linux/mutex.h>
+#include <linux/hrtimer.h>
 #include <net/cfg802154.h>
 #include <net/mac802154.h>
 #include <net/ieee802154_netdev.h>
@@ -51,6 +52,8 @@ struct ieee802154_local {
 	 */
 	struct workqueue_struct	*workqueue;
 
+	struct hrtimer ifs_timer;
+
 	bool started;
 
 	struct tasklet_struct tasklet;
@@ -127,6 +130,7 @@ ieee802154_monitor_start_xmit(struct sk_buff *skb, struct net_device *dev);
 void mac802154_wpan_setup(struct net_device *dev);
 netdev_tx_t
 ieee802154_subif_start_xmit(struct sk_buff *skb, struct net_device *dev);
+enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer);
 
 /* MIB callbacks */
 void mac802154_dev_set_short_addr(struct net_device *dev, __le16 val);
diff --git a/net/mac802154/iface.c b/net/mac802154/iface.c
index ec92b48d1b0b..feb064715d1f 100644
--- a/net/mac802154/iface.c
+++ b/net/mac802154/iface.c
@@ -246,6 +246,8 @@ static int mac802154_slave_close(struct net_device *dev)
 
 	ASSERT_RTNL();
 
+	hrtimer_cancel(&local->ifs_timer);
+
 	netif_stop_queue(dev);
 	local->open_count--;
 
diff --git a/net/mac802154/main.c b/net/mac802154/main.c
index 46c76e005446..0af1be64e8ad 100644
--- a/net/mac802154/main.c
+++ b/net/mac802154/main.c
@@ -125,6 +125,18 @@ void ieee802154_free_hw(struct ieee802154_hw *hw)
 }
 EXPORT_SYMBOL(ieee802154_free_hw);
 
+static void ieee802154_setup_wpan_phy_pib(struct wpan_phy *wpan_phy)
+{
+	/* TODO warn on empty symbol_duration
+	 * Should be done when all drivers sets this value.
+	 */
+
+	wpan_phy->lifs_period = IEEE802154_LIFS_PERIOD *
+				wpan_phy->symbol_duration;
+	wpan_phy->sifs_period = IEEE802154_SIFS_PERIOD *
+				wpan_phy->symbol_duration;
+}
+
 int ieee802154_register_hw(struct ieee802154_hw *hw)
 {
 	struct ieee802154_local *local = hw_to_local(hw);
@@ -138,8 +150,13 @@ int ieee802154_register_hw(struct ieee802154_hw *hw)
 		goto out;
 	}
 
+	hrtimer_init(&local->ifs_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
+	local->ifs_timer.function = ieee802154_xmit_ifs_timer;
+
 	wpan_phy_set_dev(local->phy, local->hw.parent);
 
+	ieee802154_setup_wpan_phy_pib(local->phy);
+
 	rc = wpan_phy_register(local->phy);
 	if (rc < 0)
 		goto out_wq;
diff --git a/net/mac802154/tx.c b/net/mac802154/tx.c
index cc37b77f2632..c62e95695c78 100644
--- a/net/mac802154/tx.c
+++ b/net/mac802154/tx.c
@@ -60,7 +60,7 @@ static void ieee802154_xmit_worker(struct work_struct *work)
 	if (res)
 		goto err_tx;
 
-	ieee802154_xmit_complete(&local->hw, skb);
+	ieee802154_xmit_complete(&local->hw, skb, false);
 
 	dev->stats.tx_packets++;
 	dev->stats.tx_bytes += skb->len;
diff --git a/net/mac802154/util.c b/net/mac802154/util.c
index 9a04e4a8e50f..5fc979027919 100644
--- a/net/mac802154/util.c
+++ b/net/mac802154/util.c
@@ -50,9 +50,35 @@ void ieee802154_stop_queue(struct ieee802154_hw *hw)
 }
 EXPORT_SYMBOL(ieee802154_stop_queue);
 
-void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb)
+enum hrtimer_restart ieee802154_xmit_ifs_timer(struct hrtimer *timer)
 {
-	ieee802154_wake_queue(hw);
-	consume_skb(skb);
+	struct ieee802154_local *local =
+		container_of(timer, struct ieee802154_local, ifs_timer);
+
+	ieee802154_wake_queue(&local->hw);
+
+	return HRTIMER_NORESTART;
+}
+
+void ieee802154_xmit_complete(struct ieee802154_hw *hw, struct sk_buff *skb,
+			      bool ifs_handling)
+{
+	if (ifs_handling) {
+		struct ieee802154_local *local = hw_to_local(hw);
+
+		if (skb->len > 18)
+			hrtimer_start(&local->ifs_timer,
+				      ktime_set(0, hw->phy->lifs_period * NSEC_PER_USEC),
+				      HRTIMER_MODE_REL);
+		else
+			hrtimer_start(&local->ifs_timer,
+				      ktime_set(0, hw->phy->sifs_period * NSEC_PER_USEC),
+				      HRTIMER_MODE_REL);
+
+		consume_skb(skb);
+	} else {
+		ieee802154_wake_queue(hw);
+		consume_skb(skb);
+	}
 }
 EXPORT_SYMBOL(ieee802154_xmit_complete);
-- 
cgit v1.2.3


From 37d74578d819b1798dd2cff724438e345d118980 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Wed, 8 Oct 2014 11:14:34 +0200
Subject: video/hdmi: Relicense header under MIT license
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

OpenBSD wants to reuse this file but needs the license to be more
permissive.

Acked-by: Alban Bedel <alban.bedel@avionic-design.de>
Acked-by: Daniel Vetter <daniel.vetter@intel.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Acked-by: Christian König <christian.koenig@amd.com>
Signed-off-by: Thierry Reding <treding@nvidia.com>
---
 include/linux/hdmi.h | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h
index 11c0182a153b..cbb5790a35cd 100644
--- a/include/linux/hdmi.h
+++ b/include/linux/hdmi.h
@@ -1,9 +1,24 @@
 /*
  * Copyright (C) 2012 Avionic Design GmbH
  *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sub license,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+ * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
+ * DEALINGS IN THE SOFTWARE.
  */
 
 #ifndef __LINUX_HDMI_H_
-- 
cgit v1.2.3


From 0d7f488f0305a9caffab0a18a882f9980f8bc936 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 12 Nov 2014 05:27:49 +0100
Subject: crypto: doc - cipher data structures

The data structure of struct crypto_alg together with various other
data structures needed by cipher developers is documented wit all
parameters that can be set by a developer of a transformation. All
parameters that are internal to the crypto API are marked as such.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 246 ++++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 243 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index d45e949699ea..752360e1e8fe 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -127,6 +127,13 @@ struct skcipher_givcrypt_request;
 
 typedef void (*crypto_completion_t)(struct crypto_async_request *req, int err);
 
+/**
+ * DOC: Block Cipher Context Data Structures
+ *
+ * These data structures define the operating context for each block cipher
+ * type.
+ */
+
 struct crypto_async_request {
 	struct list_head list;
 	crypto_completion_t complete;
@@ -194,9 +201,63 @@ struct hash_desc {
 	u32 flags;
 };
 
-/*
- * Algorithms: modular crypto algorithm implementations, managed
- * via crypto_register_alg() and crypto_unregister_alg().
+/**
+ * DOC: Block Cipher Algorithm Definitions
+ *
+ * These data structures define modular crypto algorithm implementations,
+ * managed via crypto_register_alg() and crypto_unregister_alg().
+ */
+
+/**
+ * struct ablkcipher_alg - asynchronous block cipher definition
+ * @min_keysize: Minimum key size supported by the transformation. This is the
+ *		 smallest key length supported by this transformation algorithm.
+ *		 This must be set to one of the pre-defined values as this is
+ *		 not hardware specific. Possible values for this field can be
+ *		 found via git grep "_MIN_KEY_SIZE" include/crypto/
+ * @max_keysize: Maximum key size supported by the transformation. This is the
+ *		 largest key length supported by this transformation algorithm.
+ *		 This must be set to one of the pre-defined values as this is
+ *		 not hardware specific. Possible values for this field can be
+ *		 found via git grep "_MAX_KEY_SIZE" include/crypto/
+ * @setkey: Set key for the transformation. This function is used to either
+ *	    program a supplied key into the hardware or store the key in the
+ *	    transformation context for programming it later. Note that this
+ *	    function does modify the transformation context. This function can
+ *	    be called multiple times during the existence of the transformation
+ *	    object, so one must make sure the key is properly reprogrammed into
+ *	    the hardware. This function is also responsible for checking the key
+ *	    length for validity. In case a software fallback was put in place in
+ *	    the @cra_init call, this function might need to use the fallback if
+ *	    the algorithm doesn't support all of the key sizes.
+ * @encrypt: Encrypt a scatterlist of blocks. This function is used to encrypt
+ *	     the supplied scatterlist containing the blocks of data. The crypto
+ *	     API consumer is responsible for aligning the entries of the
+ *	     scatterlist properly and making sure the chunks are correctly
+ *	     sized. In case a software fallback was put in place in the
+ *	     @cra_init call, this function might need to use the fallback if
+ *	     the algorithm doesn't support all of the key sizes. In case the
+ *	     key was stored in transformation context, the key might need to be
+ *	     re-programmed into the hardware in this function. This function
+ *	     shall not modify the transformation context, as this function may
+ *	     be called in parallel with the same transformation object.
+ * @decrypt: Decrypt a single block. This is a reverse counterpart to @encrypt
+ *	     and the conditions are exactly the same.
+ * @givencrypt: Update the IV for encryption. With this function, a cipher
+ *	        implementation may provide the function on how to update the IV
+ *	        for encryption.
+ * @givdecrypt: Update the IV for decryption. This is the reverse of
+ *	        @givencrypt .
+ * @geniv: The transformation implementation may use an "IV generator" provided
+ *	   by the kernel crypto API. Several use cases have a predefined
+ *	   approach how IVs are to be updated. For such use cases, the kernel
+ *	   crypto API provides ready-to-use implementations that can be
+ *	   referenced with this variable.
+ * @ivsize: IV size applicable for transformation. The consumer must provide an
+ *	    IV of exactly that size to perform the encrypt or decrypt operation.
+ *
+ * All fields except @givencrypt , @givdecrypt , @geniv and @ivsize are
+ * mandatory and must be filled.
  */
 struct ablkcipher_alg {
 	int (*setkey)(struct crypto_ablkcipher *tfm, const u8 *key,
@@ -213,6 +274,32 @@ struct ablkcipher_alg {
 	unsigned int ivsize;
 };
 
+/**
+ * struct aead_alg - AEAD cipher definition
+ * @maxauthsize: Set the maximum authentication tag size supported by the
+ *		 transformation. A transformation may support smaller tag sizes.
+ *		 As the authentication tag is a message digest to ensure the
+ *		 integrity of the encrypted data, a consumer typically wants the
+ *		 largest authentication tag possible as defined by this
+ *		 variable.
+ * @setauthsize: Set authentication size for the AEAD transformation. This
+ *		 function is used to specify the consumer requested size of the
+ * 		 authentication tag to be either generated by the transformation
+ *		 during encryption or the size of the authentication tag to be
+ *		 supplied during the decryption operation. This function is also
+ *		 responsible for checking the authentication tag size for
+ *		 validity.
+ * @setkey: see struct ablkcipher_alg
+ * @encrypt: see struct ablkcipher_alg
+ * @decrypt: see struct ablkcipher_alg
+ * @givencrypt: see struct ablkcipher_alg
+ * @givdecrypt: see struct ablkcipher_alg
+ * @geniv: see struct ablkcipher_alg
+ * @ivsize: see struct ablkcipher_alg
+ *
+ * All fields except @givencrypt , @givdecrypt , @geniv and @ivsize are
+ * mandatory and must be filled.
+ */
 struct aead_alg {
 	int (*setkey)(struct crypto_aead *tfm, const u8 *key,
 	              unsigned int keylen);
@@ -228,6 +315,18 @@ struct aead_alg {
 	unsigned int maxauthsize;
 };
 
+/**
+ * struct blkcipher_alg - synchronous block cipher definition
+ * @min_keysize: see struct ablkcipher_alg
+ * @max_keysize: see struct ablkcipher_alg
+ * @setkey: see struct ablkcipher_alg
+ * @encrypt: see struct ablkcipher_alg
+ * @decrypt: see struct ablkcipher_alg
+ * @geniv: see struct ablkcipher_alg
+ * @ivsize: see struct ablkcipher_alg
+ *
+ * All fields except @geniv and @ivsize are mandatory and must be filled.
+ */
 struct blkcipher_alg {
 	int (*setkey)(struct crypto_tfm *tfm, const u8 *key,
 	              unsigned int keylen);
@@ -245,6 +344,53 @@ struct blkcipher_alg {
 	unsigned int ivsize;
 };
 
+/**
+ * struct cipher_alg - single-block symmetric ciphers definition
+ * @cia_min_keysize: Minimum key size supported by the transformation. This is
+ *		     the smallest key length supported by this transformation
+ *		     algorithm. This must be set to one of the pre-defined
+ *		     values as this is not hardware specific. Possible values
+ *		     for this field can be found via git grep "_MIN_KEY_SIZE"
+ *		     include/crypto/
+ * @cia_max_keysize: Maximum key size supported by the transformation. This is
+ *		    the largest key length supported by this transformation
+ *		    algorithm. This must be set to one of the pre-defined values
+ *		    as this is not hardware specific. Possible values for this
+ *		    field can be found via git grep "_MAX_KEY_SIZE"
+ *		    include/crypto/
+ * @cia_setkey: Set key for the transformation. This function is used to either
+ *	        program a supplied key into the hardware or store the key in the
+ *	        transformation context for programming it later. Note that this
+ *	        function does modify the transformation context. This function
+ *	        can be called multiple times during the existence of the
+ *	        transformation object, so one must make sure the key is properly
+ *	        reprogrammed into the hardware. This function is also
+ *	        responsible for checking the key length for validity.
+ * @cia_encrypt: Encrypt a single block. This function is used to encrypt a
+ *		 single block of data, which must be @cra_blocksize big. This
+ *		 always operates on a full @cra_blocksize and it is not possible
+ *		 to encrypt a block of smaller size. The supplied buffers must
+ *		 therefore also be at least of @cra_blocksize size. Both the
+ *		 input and output buffers are always aligned to @cra_alignmask.
+ *		 In case either of the input or output buffer supplied by user
+ *		 of the crypto API is not aligned to @cra_alignmask, the crypto
+ *		 API will re-align the buffers. The re-alignment means that a
+ *		 new buffer will be allocated, the data will be copied into the
+ *		 new buffer, then the processing will happen on the new buffer,
+ *		 then the data will be copied back into the original buffer and
+ *		 finally the new buffer will be freed. In case a software
+ *		 fallback was put in place in the @cra_init call, this function
+ *		 might need to use the fallback if the algorithm doesn't support
+ *		 all of the key sizes. In case the key was stored in
+ *		 transformation context, the key might need to be re-programmed
+ *		 into the hardware in this function. This function shall not
+ *		 modify the transformation context, as this function may be
+ *		 called in parallel with the same transformation object.
+ * @cia_decrypt: Decrypt a single block. This is a reverse counterpart to
+ *		 @cia_encrypt, and the conditions are exactly the same.
+ *
+ * All fields are mandatory and must be filled.
+ */
 struct cipher_alg {
 	unsigned int cia_min_keysize;
 	unsigned int cia_max_keysize;
@@ -261,6 +407,25 @@ struct compress_alg {
 			      unsigned int slen, u8 *dst, unsigned int *dlen);
 };
 
+/**
+ * struct rng_alg - random number generator definition
+ * @rng_make_random: The function defined by this variable obtains a random
+ *		     number. The random number generator transform must generate
+ *		     the random number out of the context provided with this
+ *		     call.
+ * @rng_reset: Reset of the random number generator by clearing the entire state.
+ *	       With the invocation of this function call, the random number
+ *             generator shall completely reinitialize its state. If the random
+ *	       number generator requires a seed for setting up a new state,
+ *	       the seed must be provided by the consumer while invoking this
+ *	       function. The required size of the seed is defined with
+ *	       @seedsize .
+ * @seedsize: The seed size required for a random number generator
+ *	      initialization defined with this variable. Some random number
+ *	      generators like the SP800-90A DRBG does not require a seed as the
+ *	      seeding is implemented internally without the need of support by
+ *	      the consumer. In this case, the seed size is set to zero.
+ */
 struct rng_alg {
 	int (*rng_make_random)(struct crypto_rng *tfm, u8 *rdata,
 			       unsigned int dlen);
@@ -277,6 +442,81 @@ struct rng_alg {
 #define cra_compress	cra_u.compress
 #define cra_rng		cra_u.rng
 
+/**
+ * struct crypto_alg - definition of a cryptograpic cipher algorithm
+ * @cra_flags: Flags describing this transformation. See include/linux/crypto.h
+ *	       CRYPTO_ALG_* flags for the flags which go in here. Those are
+ *	       used for fine-tuning the description of the transformation
+ *	       algorithm.
+ * @cra_blocksize: Minimum block size of this transformation. The size in bytes
+ *		   of the smallest possible unit which can be transformed with
+ *		   this algorithm. The users must respect this value.
+ *		   In case of HASH transformation, it is possible for a smaller
+ *		   block than @cra_blocksize to be passed to the crypto API for
+ *		   transformation, in case of any other transformation type, an
+ * 		   error will be returned upon any attempt to transform smaller
+ *		   than @cra_blocksize chunks.
+ * @cra_ctxsize: Size of the operational context of the transformation. This
+ *		 value informs the kernel crypto API about the memory size
+ *		 needed to be allocated for the transformation context.
+ * @cra_alignmask: Alignment mask for the input and output data buffer. The data
+ *		   buffer containing the input data for the algorithm must be
+ *		   aligned to this alignment mask. The data buffer for the
+ *		   output data must be aligned to this alignment mask. Note that
+ *		   the Crypto API will do the re-alignment in software, but
+ *		   only under special conditions and there is a performance hit.
+ *		   The re-alignment happens at these occasions for different
+ *		   @cra_u types: cipher -- For both input data and output data
+ *		   buffer; ahash -- For output hash destination buf; shash --
+ *		   For output hash destination buf.
+ *		   This is needed on hardware which is flawed by design and
+ *		   cannot pick data from arbitrary addresses.
+ * @cra_priority: Priority of this transformation implementation. In case
+ *		  multiple transformations with same @cra_name are available to
+ *		  the Crypto API, the kernel will use the one with highest
+ *		  @cra_priority.
+ * @cra_name: Generic name (usable by multiple implementations) of the
+ *	      transformation algorithm. This is the name of the transformation
+ *	      itself. This field is used by the kernel when looking up the
+ *	      providers of particular transformation.
+ * @cra_driver_name: Unique name of the transformation provider. This is the
+ *		     name of the provider of the transformation. This can be any
+ *		     arbitrary value, but in the usual case, this contains the
+ *		     name of the chip or provider and the name of the
+ *		     transformation algorithm.
+ * @cra_type: Type of the cryptographic transformation. This is a pointer to
+ *	      struct crypto_type, which implements callbacks common for all
+ *	      trasnformation types. There are multiple options:
+ *	      &crypto_blkcipher_type, &crypto_ablkcipher_type,
+ *	      &crypto_ahash_type, &crypto_aead_type, &crypto_rng_type.
+ *	      This field might be empty. In that case, there are no common
+ *	      callbacks. This is the case for: cipher, compress, shash.
+ * @cra_u: Callbacks implementing the transformation. This is a union of
+ *	   multiple structures. Depending on the type of transformation selected
+ *	   by @cra_type and @cra_flags above, the associated structure must be
+ *	   filled with callbacks. This field might be empty. This is the case
+ *	   for ahash, shash.
+ * @cra_init: Initialize the cryptographic transformation object. This function
+ *	      is used to initialize the cryptographic transformation object.
+ *	      This function is called only once at the instantiation time, right
+ *	      after the transformation context was allocated. In case the
+ *	      cryptographic hardware has some special requirements which need to
+ *	      be handled by software, this function shall check for the precise
+ *	      requirement of the transformation and put any software fallbacks
+ *	      in place.
+ * @cra_exit: Deinitialize the cryptographic transformation object. This is a
+ *	      counterpart to @cra_init, used to remove various changes set in
+ *	      @cra_init.
+ * @cra_module: Owner of this transformation implementation. Set to THIS_MODULE
+ * @cra_list: internally used
+ * @cra_users: internally used
+ * @cra_refcnt: internally used
+ * @cra_destroy: internally used
+ *
+ * The struct crypto_alg describes a generic Crypto API algorithm and is common
+ * for all of the transformations. Any variable not documented here shall not
+ * be used by a cipher implementation as it is internal to the Crypto API.
+ */
 struct crypto_alg {
 	struct list_head cra_list;
 	struct list_head cra_users;
-- 
cgit v1.2.3


From f13ec330a787c2653d4738be1ee887ead1fb3008 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 12 Nov 2014 05:28:22 +0100
Subject: crypto: doc - ABLKCIPHER API documentation

The API function calls exported by the kernel crypto API for
asynchronous block ciphers to be used by consumers are documented.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 206 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 206 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 752360e1e8fe..39c8fc6d3313 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -821,6 +821,50 @@ static inline u32 crypto_skcipher_mask(u32 mask)
 	return mask;
 }
 
+/**
+ * DOC: Asynchronous Block Cipher API
+ *
+ * Asynchronous block cipher API is used with the ciphers of type
+ * CRYPTO_ALG_TYPE_ABLKCIPHER (listed as type "ablkcipher" in /proc/crypto).
+ *
+ * Asynchronous cipher operations imply that the function invocation for a
+ * cipher request returns immediately before the completion of the operation.
+ * The cipher request is scheduled as a separate kernel thread and therefore
+ * load-balanced on the different CPUs via the process scheduler. To allow
+ * the kernel crypto API to inform the caller about the completion of a cipher
+ * request, the caller must provide a callback function. That function is
+ * invoked with the cipher handle when the request completes.
+ *
+ * To support the asynchronous operation, additional information than just the
+ * cipher handle must be supplied to the kernel crypto API. That additional
+ * information is given by filling in the ablkcipher_request data structure.
+ *
+ * For the asynchronous block cipher API, the state is maintained with the tfm
+ * cipher handle. A single tfm can be used across multiple calls and in
+ * parallel. For asynchronous block cipher calls, context data supplied and
+ * only used by the caller can be referenced the request data structure in
+ * addition to the IV used for the cipher request. The maintenance of such
+ * state information would be important for a crypto driver implementer to
+ * have, because when calling the callback function upon completion of the
+ * cipher operation, that callback function may need some information about
+ * which operation just finished if it invoked multiple in parallel. This
+ * state information is unused by the kernel crypto API.
+ */
+
+/**
+ * crypto_alloc_ablkcipher() - allocate asynchronous block cipher handle
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	      ablkcipher cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Allocate a cipher handle for an ablkcipher. The returned struct
+ * crypto_ablkcipher is the cipher handle that is required for any subsequent
+ * API invocation for that ablkcipher.
+ *
+ * Return: allocated cipher handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
 struct crypto_ablkcipher *crypto_alloc_ablkcipher(const char *alg_name,
 						  u32 type, u32 mask);
 
@@ -830,11 +874,25 @@ static inline struct crypto_tfm *crypto_ablkcipher_tfm(
 	return &tfm->base;
 }
 
+/**
+ * crypto_free_ablkcipher() - zeroize and free cipher handle
+ * @tfm: cipher handle to be freed
+ */
 static inline void crypto_free_ablkcipher(struct crypto_ablkcipher *tfm)
 {
 	crypto_free_tfm(crypto_ablkcipher_tfm(tfm));
 }
 
+/**
+ * crypto_has_ablkcipher() - Search for the availability of an ablkcipher.
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	      ablkcipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Return: true when the ablkcipher is known to the kernel crypto API; false
+ *	   otherwise
+ */
 static inline int crypto_has_ablkcipher(const char *alg_name, u32 type,
 					u32 mask)
 {
@@ -848,12 +906,31 @@ static inline struct ablkcipher_tfm *crypto_ablkcipher_crt(
 	return &crypto_ablkcipher_tfm(tfm)->crt_ablkcipher;
 }
 
+/**
+ * crypto_ablkcipher_ivsize() - obtain IV size
+ * @tfm: cipher handle
+ *
+ * The size of the IV for the ablkcipher referenced by the cipher handle is
+ * returned. This IV size may be zero if the cipher does not need an IV.
+ *
+ * Return: IV size in bytes
+ */
 static inline unsigned int crypto_ablkcipher_ivsize(
 	struct crypto_ablkcipher *tfm)
 {
 	return crypto_ablkcipher_crt(tfm)->ivsize;
 }
 
+/**
+ * crypto_ablkcipher_blocksize() - obtain block size of cipher
+ * @tfm: cipher handle
+ *
+ * The block size for the ablkcipher referenced with the cipher handle is
+ * returned. The caller may use that information to allocate appropriate
+ * memory for the data returned by the encryption or decryption operation
+ *
+ * Return: block size of cipher
+ */
 static inline unsigned int crypto_ablkcipher_blocksize(
 	struct crypto_ablkcipher *tfm)
 {
@@ -883,6 +960,22 @@ static inline void crypto_ablkcipher_clear_flags(struct crypto_ablkcipher *tfm,
 	crypto_tfm_clear_flags(crypto_ablkcipher_tfm(tfm), flags);
 }
 
+/**
+ * crypto_ablkcipher_setkey() - set key for cipher
+ * @tfm: cipher handle
+ * @key: buffer holding the key
+ * @keylen: length of the key in bytes
+ *
+ * The caller provided key is set for the ablkcipher referenced by the cipher
+ * handle.
+ *
+ * Note, the key length determines the cipher type. Many block ciphers implement
+ * different cipher modes depending on the key size, such as AES-128 vs AES-192
+ * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
+ * is performed.
+ *
+ * Return: 0 if the setting of the key was successful; < 0 if an error occurred
+ */
 static inline int crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
 					   const u8 *key, unsigned int keylen)
 {
@@ -891,12 +984,32 @@ static inline int crypto_ablkcipher_setkey(struct crypto_ablkcipher *tfm,
 	return crt->setkey(crt->base, key, keylen);
 }
 
+/**
+ * crypto_ablkcipher_reqtfm() - obtain cipher handle from request
+ * @req: ablkcipher_request out of which the cipher handle is to be obtained
+ *
+ * Return the crypto_ablkcipher handle when furnishing an ablkcipher_request
+ * data structure.
+ *
+ * Return: crypto_ablkcipher handle
+ */
 static inline struct crypto_ablkcipher *crypto_ablkcipher_reqtfm(
 	struct ablkcipher_request *req)
 {
 	return __crypto_ablkcipher_cast(req->base.tfm);
 }
 
+/**
+ * crypto_ablkcipher_encrypt() - encrypt plaintext
+ * @req: reference to the ablkcipher_request handle that holds all information
+ *	 needed to perform the cipher operation
+ *
+ * Encrypt plaintext data using the ablkcipher_request handle. That data
+ * structure and how it is filled with data is discussed with the
+ * ablkcipher_request_* functions.
+ *
+ * Return: 0 if the cipher operation was successful; < 0 if an error occurred
+ */
 static inline int crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
 {
 	struct ablkcipher_tfm *crt =
@@ -904,6 +1017,17 @@ static inline int crypto_ablkcipher_encrypt(struct ablkcipher_request *req)
 	return crt->encrypt(req);
 }
 
+/**
+ * crypto_ablkcipher_decrypt() - decrypt ciphertext
+ * @req: reference to the ablkcipher_request handle that holds all information
+ *	 needed to perform the cipher operation
+ *
+ * Decrypt ciphertext data using the ablkcipher_request handle. That data
+ * structure and how it is filled with data is discussed with the
+ * ablkcipher_request_* functions.
+ *
+ * Return: 0 if the cipher operation was successful; < 0 if an error occurred
+ */
 static inline int crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
 {
 	struct ablkcipher_tfm *crt =
@@ -911,12 +1035,37 @@ static inline int crypto_ablkcipher_decrypt(struct ablkcipher_request *req)
 	return crt->decrypt(req);
 }
 
+/**
+ * DOC: Asynchronous Cipher Request Handle
+ *
+ * The ablkcipher_request data structure contains all pointers to data
+ * required for the asynchronous cipher operation. This includes the cipher
+ * handle (which can be used by multiple ablkcipher_request instances), pointer
+ * to plaintext and ciphertext, asynchronous callback function, etc. It acts
+ * as a handle to the ablkcipher_request_* API calls in a similar way as
+ * ablkcipher handle to the crypto_ablkcipher_* API calls.
+ */
+
+/**
+ * crypto_ablkcipher_reqsize() - obtain size of the request data structure
+ * @tfm: cipher handle
+ *
+ * Return: number of bytes
+ */
 static inline unsigned int crypto_ablkcipher_reqsize(
 	struct crypto_ablkcipher *tfm)
 {
 	return crypto_ablkcipher_crt(tfm)->reqsize;
 }
 
+/**
+ * ablkcipher_request_set_tfm() - update cipher handle reference in request
+ * @req: request handle to be modified
+ * @tfm: cipher handle that shall be added to the request handle
+ *
+ * Allow the caller to replace the existing ablkcipher handle in the request
+ * data structure with a different one.
+ */
 static inline void ablkcipher_request_set_tfm(
 	struct ablkcipher_request *req, struct crypto_ablkcipher *tfm)
 {
@@ -929,6 +1078,18 @@ static inline struct ablkcipher_request *ablkcipher_request_cast(
 	return container_of(req, struct ablkcipher_request, base);
 }
 
+/**
+ * ablkcipher_request_alloc() - allocate request data structure
+ * @tfm: cipher handle to be registered with the request
+ * @gfp: memory allocation flag that is handed to kmalloc by the API call.
+ *
+ * Allocate the request data structure that must be used with the ablkcipher
+ * encrypt and decrypt API calls. During the allocation, the provided ablkcipher
+ * handle is registered in the request data structure.
+ *
+ * Return: allocated request handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
 static inline struct ablkcipher_request *ablkcipher_request_alloc(
 	struct crypto_ablkcipher *tfm, gfp_t gfp)
 {
@@ -943,11 +1104,40 @@ static inline struct ablkcipher_request *ablkcipher_request_alloc(
 	return req;
 }
 
+/**
+ * ablkcipher_request_free() - zeroize and free request data structure
+ * @req: request data structure cipher handle to be freed
+ */
 static inline void ablkcipher_request_free(struct ablkcipher_request *req)
 {
 	kzfree(req);
 }
 
+/**
+ * ablkcipher_request_set_callback() - set asynchronous callback function
+ * @req: request handle
+ * @flags: specify zero or an ORing of the flags
+ *         CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
+ *	   increase the wait queue beyond the initial maximum size;
+ *	   CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep
+ * @compl: callback function pointer to be registered with the request handle
+ * @data: The data pointer refers to memory that is not used by the kernel
+ *	  crypto API, but provided to the callback function for it to use. Here,
+ *	  the caller can provide a reference to memory the callback function can
+ *	  operate on. As the callback function is invoked asynchronously to the
+ *	  related functionality, it may need to access data structures of the
+ *	  related functionality which can be referenced using this pointer. The
+ *	  callback function can access the memory via the "data" field in the
+ *	  crypto_async_request data structure provided to the callback function.
+ *
+ * This function allows setting the callback function that is triggered once the
+ * cipher operation completes.
+ *
+ * The callback function is registered with the ablkcipher_request handle and
+ * must comply with the following template:
+ *
+ *	void callback_function(struct crypto_async_request *req, int error)
+ */
 static inline void ablkcipher_request_set_callback(
 	struct ablkcipher_request *req,
 	u32 flags, crypto_completion_t compl, void *data)
@@ -957,6 +1147,22 @@ static inline void ablkcipher_request_set_callback(
 	req->base.flags = flags;
 }
 
+/**
+ * ablkcipher_request_set_crypt() - set data buffers
+ * @req: request handle
+ * @src: source scatter / gather list
+ * @dst: destination scatter / gather list
+ * @nbytes: number of bytes to process from @src
+ * @iv: IV for the cipher operation which must comply with the IV size defined
+ *      by crypto_ablkcipher_ivsize
+ *
+ * This function allows setting of the source data and destination data
+ * scatter / gather lists.
+ *
+ * For encryption, the source is treated as the plaintext and the
+ * destination is the ciphertext. For a decryption operation, the use is
+ * reversed: the source is the ciphertext and the destination is the plaintext.
+ */
 static inline void ablkcipher_request_set_crypt(
 	struct ablkcipher_request *req,
 	struct scatterlist *src, struct scatterlist *dst,
-- 
cgit v1.2.3


From fced7b02623e3ccace714f8adceed735698a9c8b Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 12 Nov 2014 05:29:00 +0100
Subject: crypto: doc - AEAD API documentation

The API function calls exported by the kernel crypto API for AEAD
ciphers to be used by consumers are documented.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 251 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 251 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 39c8fc6d3313..4704e71a31fe 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -1174,11 +1174,55 @@ static inline void ablkcipher_request_set_crypt(
 	req->info = iv;
 }
 
+/**
+ * DOC: Authenticated Encryption With Associated Data (AEAD) Cipher API
+ *
+ * The AEAD cipher API is used with the ciphers of type CRYPTO_ALG_TYPE_AEAD
+ * (listed as type "aead" in /proc/crypto)
+ *
+ * The most prominent examples for this type of encryption is GCM and CCM.
+ * However, the kernel supports other types of AEAD ciphers which are defined
+ * with the following cipher string:
+ *
+ *	authenc(keyed message digest, block cipher)
+ *
+ * For example: authenc(hmac(sha256), cbc(aes))
+ *
+ * The example code provided for the asynchronous block cipher operation
+ * applies here as well. Naturally all *ablkcipher* symbols must be exchanged
+ * the *aead* pendants discussed in the following. In addtion, for the AEAD
+ * operation, the aead_request_set_assoc function must be used to set the
+ * pointer to the associated data memory location before performing the
+ * encryption or decryption operation. In case of an encryption, the associated
+ * data memory is filled during the encryption operation. For decryption, the
+ * associated data memory must contain data that is used to verify the integrity
+ * of the decrypted data. Another deviation from the asynchronous block cipher
+ * operation is that the caller should explicitly check for -EBADMSG of the
+ * crypto_aead_decrypt. That error indicates an authentication error, i.e.
+ * a breach in the integrity of the message. In essence, that -EBADMSG error
+ * code is the key bonus an AEAD cipher has over "standard" block chaining
+ * modes.
+ */
+
 static inline struct crypto_aead *__crypto_aead_cast(struct crypto_tfm *tfm)
 {
 	return (struct crypto_aead *)tfm;
 }
 
+/**
+ * crypto_alloc_aead() - allocate AEAD cipher handle
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	     AEAD cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Allocate a cipher handle for an AEAD. The returned struct
+ * crypto_aead is the cipher handle that is required for any subsequent
+ * API invocation for that AEAD.
+ *
+ * Return: allocated cipher handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
 struct crypto_aead *crypto_alloc_aead(const char *alg_name, u32 type, u32 mask);
 
 static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm)
@@ -1186,6 +1230,10 @@ static inline struct crypto_tfm *crypto_aead_tfm(struct crypto_aead *tfm)
 	return &tfm->base;
 }
 
+/**
+ * crypto_free_aead() - zeroize and free aead handle
+ * @tfm: cipher handle to be freed
+ */
 static inline void crypto_free_aead(struct crypto_aead *tfm)
 {
 	crypto_free_tfm(crypto_aead_tfm(tfm));
@@ -1196,16 +1244,47 @@ static inline struct aead_tfm *crypto_aead_crt(struct crypto_aead *tfm)
 	return &crypto_aead_tfm(tfm)->crt_aead;
 }
 
+/**
+ * crypto_aead_ivsize() - obtain IV size
+ * @tfm: cipher handle
+ *
+ * The size of the IV for the aead referenced by the cipher handle is
+ * returned. This IV size may be zero if the cipher does not need an IV.
+ *
+ * Return: IV size in bytes
+ */
 static inline unsigned int crypto_aead_ivsize(struct crypto_aead *tfm)
 {
 	return crypto_aead_crt(tfm)->ivsize;
 }
 
+/**
+ * crypto_aead_authsize() - obtain maximum authentication data size
+ * @tfm: cipher handle
+ *
+ * The maximum size of the authentication data for the AEAD cipher referenced
+ * by the AEAD cipher handle is returned. The authentication data size may be
+ * zero if the cipher implements a hard-coded maximum.
+ *
+ * The authentication data may also be known as "tag value".
+ *
+ * Return: authentication data size / tag size in bytes
+ */
 static inline unsigned int crypto_aead_authsize(struct crypto_aead *tfm)
 {
 	return crypto_aead_crt(tfm)->authsize;
 }
 
+/**
+ * crypto_aead_blocksize() - obtain block size of cipher
+ * @tfm: cipher handle
+ *
+ * The block size for the AEAD referenced with the cipher handle is returned.
+ * The caller may use that information to allocate appropriate memory for the
+ * data returned by the encryption or decryption operation
+ *
+ * Return: block size of cipher
+ */
 static inline unsigned int crypto_aead_blocksize(struct crypto_aead *tfm)
 {
 	return crypto_tfm_alg_blocksize(crypto_aead_tfm(tfm));
@@ -1231,6 +1310,22 @@ static inline void crypto_aead_clear_flags(struct crypto_aead *tfm, u32 flags)
 	crypto_tfm_clear_flags(crypto_aead_tfm(tfm), flags);
 }
 
+/**
+ * crypto_aead_setkey() - set key for cipher
+ * @tfm: cipher handle
+ * @key: buffer holding the key
+ * @keylen: length of the key in bytes
+ *
+ * The caller provided key is set for the AEAD referenced by the cipher
+ * handle.
+ *
+ * Note, the key length determines the cipher type. Many block ciphers implement
+ * different cipher modes depending on the key size, such as AES-128 vs AES-192
+ * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
+ * is performed.
+ *
+ * Return: 0 if the setting of the key was successful; < 0 if an error occurred
+ */
 static inline int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 				     unsigned int keylen)
 {
@@ -1239,6 +1334,16 @@ static inline int crypto_aead_setkey(struct crypto_aead *tfm, const u8 *key,
 	return crt->setkey(crt->base, key, keylen);
 }
 
+/**
+ * crypto_aead_setauthsize() - set authentication data size
+ * @tfm: cipher handle
+ * @authsize: size of the authentication data / tag in bytes
+ *
+ * Set the authentication data size / tag size. AEAD requires an authentication
+ * tag (or MAC) in addition to the associated data.
+ *
+ * Return: 0 if the setting of the key was successful; < 0 if an error occurred
+ */
 int crypto_aead_setauthsize(struct crypto_aead *tfm, unsigned int authsize);
 
 static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
@@ -1246,27 +1351,105 @@ static inline struct crypto_aead *crypto_aead_reqtfm(struct aead_request *req)
 	return __crypto_aead_cast(req->base.tfm);
 }
 
+/**
+ * crypto_aead_encrypt() - encrypt plaintext
+ * @req: reference to the aead_request handle that holds all information
+ *	 needed to perform the cipher operation
+ *
+ * Encrypt plaintext data using the aead_request handle. That data structure
+ * and how it is filled with data is discussed with the aead_request_*
+ * functions.
+ *
+ * IMPORTANT NOTE The encryption operation creates the authentication data /
+ *		  tag. That data is concatenated with the created ciphertext.
+ *		  The ciphertext memory size is therefore the given number of
+ *		  block cipher blocks + the size defined by the
+ *		  crypto_aead_setauthsize invocation. The caller must ensure
+ *		  that sufficient memory is available for the ciphertext and
+ *		  the authentication tag.
+ *
+ * Return: 0 if the cipher operation was successful; < 0 if an error occurred
+ */
 static inline int crypto_aead_encrypt(struct aead_request *req)
 {
 	return crypto_aead_crt(crypto_aead_reqtfm(req))->encrypt(req);
 }
 
+/**
+ * crypto_aead_decrypt() - decrypt ciphertext
+ * @req: reference to the ablkcipher_request handle that holds all information
+ *	 needed to perform the cipher operation
+ *
+ * Decrypt ciphertext data using the aead_request handle. That data structure
+ * and how it is filled with data is discussed with the aead_request_*
+ * functions.
+ *
+ * IMPORTANT NOTE The caller must concatenate the ciphertext followed by the
+ *		  authentication data / tag. That authentication data / tag
+ *		  must have the size defined by the crypto_aead_setauthsize
+ *		  invocation.
+ *
+ *
+ * Return: 0 if the cipher operation was successful; -EBADMSG: The AEAD
+ *	   cipher operation performs the authentication of the data during the
+ *	   decryption operation. Therefore, the function returns this error if
+ *	   the authentication of the ciphertext was unsuccessful (i.e. the
+ *	   integrity of the ciphertext or the associated data was violated);
+ *	   < 0 if an error occurred.
+ */
 static inline int crypto_aead_decrypt(struct aead_request *req)
 {
 	return crypto_aead_crt(crypto_aead_reqtfm(req))->decrypt(req);
 }
 
+/**
+ * DOC: Asynchronous AEAD Request Handle
+ *
+ * The aead_request data structure contains all pointers to data required for
+ * the AEAD cipher operation. This includes the cipher handle (which can be
+ * used by multiple aead_request instances), pointer to plaintext and
+ * ciphertext, asynchronous callback function, etc. It acts as a handle to the
+ * aead_request_* API calls in a similar way as AEAD handle to the
+ * crypto_aead_* API calls.
+ */
+
+/**
+ * crypto_aead_reqsize() - obtain size of the request data structure
+ * @tfm: cipher handle
+ *
+ * Return: number of bytes
+ */
 static inline unsigned int crypto_aead_reqsize(struct crypto_aead *tfm)
 {
 	return crypto_aead_crt(tfm)->reqsize;
 }
 
+/**
+ * aead_request_set_tfm() - update cipher handle reference in request
+ * @req: request handle to be modified
+ * @tfm: cipher handle that shall be added to the request handle
+ *
+ * Allow the caller to replace the existing aead handle in the request
+ * data structure with a different one.
+ */
 static inline void aead_request_set_tfm(struct aead_request *req,
 					struct crypto_aead *tfm)
 {
 	req->base.tfm = crypto_aead_tfm(crypto_aead_crt(tfm)->base);
 }
 
+/**
+ * aead_request_alloc() - allocate request data structure
+ * @tfm: cipher handle to be registered with the request
+ * @gfp: memory allocation flag that is handed to kmalloc by the API call.
+ *
+ * Allocate the request data structure that must be used with the AEAD
+ * encrypt and decrypt API calls. During the allocation, the provided aead
+ * handle is registered in the request data structure.
+ *
+ * Return: allocated request handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
 static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm,
 						      gfp_t gfp)
 {
@@ -1280,11 +1463,40 @@ static inline struct aead_request *aead_request_alloc(struct crypto_aead *tfm,
 	return req;
 }
 
+/**
+ * aead_request_free() - zeroize and free request data structure
+ * @req: request data structure cipher handle to be freed
+ */
 static inline void aead_request_free(struct aead_request *req)
 {
 	kzfree(req);
 }
 
+/**
+ * aead_request_set_callback() - set asynchronous callback function
+ * @req: request handle
+ * @flags: specify zero or an ORing of the flags
+ *	   CRYPTO_TFM_REQ_MAY_BACKLOG the request queue may back log and
+ *	   increase the wait queue beyond the initial maximum size;
+ *	   CRYPTO_TFM_REQ_MAY_SLEEP the request processing may sleep
+ * @compl: callback function pointer to be registered with the request handle
+ * @data: The data pointer refers to memory that is not used by the kernel
+ *	  crypto API, but provided to the callback function for it to use. Here,
+ *	  the caller can provide a reference to memory the callback function can
+ *	  operate on. As the callback function is invoked asynchronously to the
+ *	  related functionality, it may need to access data structures of the
+ *	  related functionality which can be referenced using this pointer. The
+ *	  callback function can access the memory via the "data" field in the
+ *	  crypto_async_request data structure provided to the callback function.
+ *
+ * Setting the callback function that is triggered once the cipher operation
+ * completes
+ *
+ * The callback function is registered with the aead_request handle and
+ * must comply with the following template:
+ *
+ *	void callback_function(struct crypto_async_request *req, int error)
+ */
 static inline void aead_request_set_callback(struct aead_request *req,
 					     u32 flags,
 					     crypto_completion_t compl,
@@ -1295,6 +1507,36 @@ static inline void aead_request_set_callback(struct aead_request *req,
 	req->base.flags = flags;
 }
 
+/**
+ * aead_request_set_crypt - set data buffers
+ * @req: request handle
+ * @src: source scatter / gather list
+ * @dst: destination scatter / gather list
+ * @cryptlen: number of bytes to process from @src
+ * @iv: IV for the cipher operation which must comply with the IV size defined
+ *      by crypto_aead_ivsize()
+ *
+ * Setting the source data and destination data scatter / gather lists.
+ *
+ * For encryption, the source is treated as the plaintext and the
+ * destination is the ciphertext. For a decryption operation, the use is
+ * reversed: the source is the ciphertext and the destination is the plaintext.
+ *
+ * IMPORTANT NOTE AEAD requires an authentication tag (MAC). For decryption,
+ *		  the caller must concatenate the ciphertext followed by the
+ *		  authentication tag and provide the entire data stream to the
+ *		  decryption operation (i.e. the data length used for the
+ *		  initialization of the scatterlist and the data length for the
+ *		  decryption operation is identical). For encryption, however,
+ *		  the authentication tag is created while encrypting the data.
+ *		  The destination buffer must hold sufficient space for the
+ *		  ciphertext and the authentication tag while the encryption
+ *		  invocation must only point to the plaintext data size. The
+ *		  following code snippet illustrates the memory usage
+ *		  buffer = kmalloc(ptbuflen + (enc ? authsize : 0));
+ *		  sg_init_one(&sg, buffer, ptbuflen + (enc ? authsize : 0));
+ *		  aead_request_set_crypt(req, &sg, &sg, ptbuflen, iv);
+ */
 static inline void aead_request_set_crypt(struct aead_request *req,
 					  struct scatterlist *src,
 					  struct scatterlist *dst,
@@ -1306,6 +1548,15 @@ static inline void aead_request_set_crypt(struct aead_request *req,
 	req->iv = iv;
 }
 
+/**
+ * aead_request_set_assoc() - set the associated data scatter / gather list
+ * @req: request handle
+ * @assoc: associated data scatter / gather list
+ * @assoclen: number of bytes to process from @assoc
+ *
+ * For encryption, the memory is filled with the associated data. For
+ * decryption, the memory must point to the associated data.
+ */
 static inline void aead_request_set_assoc(struct aead_request *req,
 					  struct scatterlist *assoc,
 					  unsigned int assoclen)
-- 
cgit v1.2.3


From 58284f0d6c4a974d2d89446f3b3cbc51420432ea Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 12 Nov 2014 05:29:36 +0100
Subject: crypto: doc - BLKCIPHER API documentation

The API function calls exported by the kernel crypto API for
synchronous block ciphers to be used by consumers are documented.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 188 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 188 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 4704e71a31fe..c9ebe456dc9e 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -1565,6 +1565,36 @@ static inline void aead_request_set_assoc(struct aead_request *req,
 	req->assoclen = assoclen;
 }
 
+/**
+ * DOC: Synchronous Block Cipher API
+ *
+ * The synchronous block cipher API is used with the ciphers of type
+ * CRYPTO_ALG_TYPE_BLKCIPHER (listed as type "blkcipher" in /proc/crypto)
+ *
+ * Synchronous calls, have a context in the tfm. But since a single tfm can be
+ * used in multiple calls and in parallel, this info should not be changeable
+ * (unless a lock is used). This applies, for example, to the symmetric key.
+ * However, the IV is changeable, so there is an iv field in blkcipher_tfm
+ * structure for synchronous blkcipher api. So, its the only state info that can
+ * be kept for synchronous calls without using a big lock across a tfm.
+ *
+ * The block cipher API allows the use of a complete cipher, i.e. a cipher
+ * consisting of a template (a block chaining mode) and a single block cipher
+ * primitive (e.g. AES).
+ *
+ * The plaintext data buffer and the ciphertext data buffer are pointed to
+ * by using scatter/gather lists. The cipher operation is performed
+ * on all segments of the provided scatter/gather lists.
+ *
+ * The kernel crypto API supports a cipher operation "in-place" which means that
+ * the caller may provide the same scatter/gather list for the plaintext and
+ * cipher text. After the completion of the cipher operation, the plaintext
+ * data is replaced with the ciphertext data in case of an encryption and vice
+ * versa for a decryption. The caller must ensure that the scatter/gather lists
+ * for the output data point to sufficiently large buffers, i.e. multiples of
+ * the block size of the cipher.
+ */
+
 static inline struct crypto_blkcipher *__crypto_blkcipher_cast(
 	struct crypto_tfm *tfm)
 {
@@ -1578,6 +1608,20 @@ static inline struct crypto_blkcipher *crypto_blkcipher_cast(
 	return __crypto_blkcipher_cast(tfm);
 }
 
+/**
+ * crypto_alloc_blkcipher() - allocate synchronous block cipher handle
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	      blkcipher cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Allocate a cipher handle for a block cipher. The returned struct
+ * crypto_blkcipher is the cipher handle that is required for any subsequent
+ * API invocation for that block cipher.
+ *
+ * Return: allocated cipher handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
 static inline struct crypto_blkcipher *crypto_alloc_blkcipher(
 	const char *alg_name, u32 type, u32 mask)
 {
@@ -1594,11 +1638,25 @@ static inline struct crypto_tfm *crypto_blkcipher_tfm(
 	return &tfm->base;
 }
 
+/**
+ * crypto_free_blkcipher() - zeroize and free the block cipher handle
+ * @tfm: cipher handle to be freed
+ */
 static inline void crypto_free_blkcipher(struct crypto_blkcipher *tfm)
 {
 	crypto_free_tfm(crypto_blkcipher_tfm(tfm));
 }
 
+/**
+ * crypto_has_blkcipher() - Search for the availability of a block cipher
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	      block cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Return: true when the block cipher is known to the kernel crypto API; false
+ *	   otherwise
+ */
 static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask)
 {
 	type &= ~CRYPTO_ALG_TYPE_MASK;
@@ -1608,6 +1666,12 @@ static inline int crypto_has_blkcipher(const char *alg_name, u32 type, u32 mask)
 	return crypto_has_alg(alg_name, type, mask);
 }
 
+/**
+ * crypto_blkcipher_name() - return the name / cra_name from the cipher handle
+ * @tfm: cipher handle
+ *
+ * Return: The character string holding the name of the cipher
+ */
 static inline const char *crypto_blkcipher_name(struct crypto_blkcipher *tfm)
 {
 	return crypto_tfm_alg_name(crypto_blkcipher_tfm(tfm));
@@ -1625,11 +1689,30 @@ static inline struct blkcipher_alg *crypto_blkcipher_alg(
 	return &crypto_blkcipher_tfm(tfm)->__crt_alg->cra_blkcipher;
 }
 
+/**
+ * crypto_blkcipher_ivsize() - obtain IV size
+ * @tfm: cipher handle
+ *
+ * The size of the IV for the block cipher referenced by the cipher handle is
+ * returned. This IV size may be zero if the cipher does not need an IV.
+ *
+ * Return: IV size in bytes
+ */
 static inline unsigned int crypto_blkcipher_ivsize(struct crypto_blkcipher *tfm)
 {
 	return crypto_blkcipher_alg(tfm)->ivsize;
 }
 
+/**
+ * crypto_blkcipher_blocksize() - obtain block size of cipher
+ * @tfm: cipher handle
+ *
+ * The block size for the block cipher referenced with the cipher handle is
+ * returned. The caller may use that information to allocate appropriate
+ * memory for the data returned by the encryption or decryption operation.
+ *
+ * Return: block size of cipher
+ */
 static inline unsigned int crypto_blkcipher_blocksize(
 	struct crypto_blkcipher *tfm)
 {
@@ -1659,6 +1742,22 @@ static inline void crypto_blkcipher_clear_flags(struct crypto_blkcipher *tfm,
 	crypto_tfm_clear_flags(crypto_blkcipher_tfm(tfm), flags);
 }
 
+/**
+ * crypto_blkcipher_setkey() - set key for cipher
+ * @tfm: cipher handle
+ * @key: buffer holding the key
+ * @keylen: length of the key in bytes
+ *
+ * The caller provided key is set for the block cipher referenced by the cipher
+ * handle.
+ *
+ * Note, the key length determines the cipher type. Many block ciphers implement
+ * different cipher modes depending on the key size, such as AES-128 vs AES-192
+ * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
+ * is performed.
+ *
+ * Return: 0 if the setting of the key was successful; < 0 if an error occurred
+ */
 static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm,
 					  const u8 *key, unsigned int keylen)
 {
@@ -1666,6 +1765,24 @@ static inline int crypto_blkcipher_setkey(struct crypto_blkcipher *tfm,
 						 key, keylen);
 }
 
+/**
+ * crypto_blkcipher_encrypt() - encrypt plaintext
+ * @desc: reference to the block cipher handle with meta data
+ * @dst: scatter/gather list that is filled by the cipher operation with the
+ *	ciphertext
+ * @src: scatter/gather list that holds the plaintext
+ * @nbytes: number of bytes of the plaintext to encrypt.
+ *
+ * Encrypt plaintext data using the IV set by the caller with a preceding
+ * call of crypto_blkcipher_set_iv.
+ *
+ * The blkcipher_desc data structure must be filled by the caller and can
+ * reside on the stack. The caller must fill desc as follows: desc.tfm is filled
+ * with the block cipher handle; desc.flags is filled with either
+ * CRYPTO_TFM_REQ_MAY_SLEEP or 0.
+ *
+ * Return: 0 if the cipher operation was successful; < 0 if an error occurred
+ */
 static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc,
 					   struct scatterlist *dst,
 					   struct scatterlist *src,
@@ -1675,6 +1792,25 @@ static inline int crypto_blkcipher_encrypt(struct blkcipher_desc *desc,
 	return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes);
 }
 
+/**
+ * crypto_blkcipher_encrypt_iv() - encrypt plaintext with dedicated IV
+ * @desc: reference to the block cipher handle with meta data
+ * @dst: scatter/gather list that is filled by the cipher operation with the
+ *	ciphertext
+ * @src: scatter/gather list that holds the plaintext
+ * @nbytes: number of bytes of the plaintext to encrypt.
+ *
+ * Encrypt plaintext data with the use of an IV that is solely used for this
+ * cipher operation. Any previously set IV is not used.
+ *
+ * The blkcipher_desc data structure must be filled by the caller and can
+ * reside on the stack. The caller must fill desc as follows: desc.tfm is filled
+ * with the block cipher handle; desc.info is filled with the IV to be used for
+ * the current operation; desc.flags is filled with either
+ * CRYPTO_TFM_REQ_MAY_SLEEP or 0.
+ *
+ * Return: 0 if the cipher operation was successful; < 0 if an error occurred
+ */
 static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc,
 					      struct scatterlist *dst,
 					      struct scatterlist *src,
@@ -1683,6 +1819,23 @@ static inline int crypto_blkcipher_encrypt_iv(struct blkcipher_desc *desc,
 	return crypto_blkcipher_crt(desc->tfm)->encrypt(desc, dst, src, nbytes);
 }
 
+/**
+ * crypto_blkcipher_decrypt() - decrypt ciphertext
+ * @desc: reference to the block cipher handle with meta data
+ * @dst: scatter/gather list that is filled by the cipher operation with the
+ *	plaintext
+ * @src: scatter/gather list that holds the ciphertext
+ * @nbytes: number of bytes of the ciphertext to decrypt.
+ *
+ * Decrypt ciphertext data using the IV set by the caller with a preceding
+ * call of crypto_blkcipher_set_iv.
+ *
+ * The blkcipher_desc data structure must be filled by the caller as documented
+ * for the crypto_blkcipher_encrypt call above.
+ *
+ * Return: 0 if the cipher operation was successful; < 0 if an error occurred
+ *
+ */
 static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc,
 					   struct scatterlist *dst,
 					   struct scatterlist *src,
@@ -1692,6 +1845,22 @@ static inline int crypto_blkcipher_decrypt(struct blkcipher_desc *desc,
 	return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes);
 }
 
+/**
+ * crypto_blkcipher_decrypt_iv() - decrypt ciphertext with dedicated IV
+ * @desc: reference to the block cipher handle with meta data
+ * @dst: scatter/gather list that is filled by the cipher operation with the
+ *	plaintext
+ * @src: scatter/gather list that holds the ciphertext
+ * @nbytes: number of bytes of the ciphertext to decrypt.
+ *
+ * Decrypt ciphertext data with the use of an IV that is solely used for this
+ * cipher operation. Any previously set IV is not used.
+ *
+ * The blkcipher_desc data structure must be filled by the caller as documented
+ * for the crypto_blkcipher_encrypt_iv call above.
+ *
+ * Return: 0 if the cipher operation was successful; < 0 if an error occurred
+ */
 static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc,
 					      struct scatterlist *dst,
 					      struct scatterlist *src,
@@ -1700,12 +1869,31 @@ static inline int crypto_blkcipher_decrypt_iv(struct blkcipher_desc *desc,
 	return crypto_blkcipher_crt(desc->tfm)->decrypt(desc, dst, src, nbytes);
 }
 
+/**
+ * crypto_blkcipher_set_iv() - set IV for cipher
+ * @tfm: cipher handle
+ * @src: buffer holding the IV
+ * @len: length of the IV in bytes
+ *
+ * The caller provided IV is set for the block cipher referenced by the cipher
+ * handle.
+ */
 static inline void crypto_blkcipher_set_iv(struct crypto_blkcipher *tfm,
 					   const u8 *src, unsigned int len)
 {
 	memcpy(crypto_blkcipher_crt(tfm)->iv, src, len);
 }
 
+/**
+ * crypto_blkcipher_get_iv() - obtain IV from cipher
+ * @tfm: cipher handle
+ * @dst: buffer filled with the IV
+ * @len: length of the buffer dst
+ *
+ * The caller can obtain the IV set for the block cipher referenced by the
+ * cipher handle and store it into the user-provided buffer. If the buffer
+ * has an insufficient space, the IV is truncated to fit the buffer.
+ */
 static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm,
 					   u8 *dst, unsigned int len)
 {
-- 
cgit v1.2.3


From 16e61030aecb250766cf175141fc91d441361c43 Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 12 Nov 2014 05:30:06 +0100
Subject: crypto: doc - CIPHER API documentation

The API function calls exported by the kernel crypto API for
signle block ciphers to be used by consumers are documented.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 89 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 89 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index c9ebe456dc9e..0f95a07aa4df 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -1900,6 +1900,23 @@ static inline void crypto_blkcipher_get_iv(struct crypto_blkcipher *tfm,
 	memcpy(dst, crypto_blkcipher_crt(tfm)->iv, len);
 }
 
+/**
+ * DOC: Single Block Cipher API
+ *
+ * The single block cipher API is used with the ciphers of type
+ * CRYPTO_ALG_TYPE_CIPHER (listed as type "cipher" in /proc/crypto).
+ *
+ * Using the single block cipher API calls, operations with the basic cipher
+ * primitive can be implemented. These cipher primitives exclude any block
+ * chaining operations including IV handling.
+ *
+ * The purpose of this single block cipher API is to support the implementation
+ * of templates or other concepts that only need to perform the cipher operation
+ * on one block at a time. Templates invoke the underlying cipher primitive
+ * block-wise and process either the input or the output data of these cipher
+ * operations.
+ */
+
 static inline struct crypto_cipher *__crypto_cipher_cast(struct crypto_tfm *tfm)
 {
 	return (struct crypto_cipher *)tfm;
@@ -1911,6 +1928,20 @@ static inline struct crypto_cipher *crypto_cipher_cast(struct crypto_tfm *tfm)
 	return __crypto_cipher_cast(tfm);
 }
 
+/**
+ * crypto_alloc_cipher() - allocate single block cipher handle
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	     single block cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Allocate a cipher handle for a single block cipher. The returned struct
+ * crypto_cipher is the cipher handle that is required for any subsequent API
+ * invocation for that single block cipher.
+ *
+ * Return: allocated cipher handle in case of success; IS_ERR() is true in case
+ *	   of an error, PTR_ERR() returns the error code.
+ */
 static inline struct crypto_cipher *crypto_alloc_cipher(const char *alg_name,
 							u32 type, u32 mask)
 {
@@ -1926,11 +1957,25 @@ static inline struct crypto_tfm *crypto_cipher_tfm(struct crypto_cipher *tfm)
 	return &tfm->base;
 }
 
+/**
+ * crypto_free_cipher() - zeroize and free the single block cipher handle
+ * @tfm: cipher handle to be freed
+ */
 static inline void crypto_free_cipher(struct crypto_cipher *tfm)
 {
 	crypto_free_tfm(crypto_cipher_tfm(tfm));
 }
 
+/**
+ * crypto_has_cipher() - Search for the availability of a single block cipher
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	     single block cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Return: true when the single block cipher is known to the kernel crypto API;
+ *	   false otherwise
+ */
 static inline int crypto_has_cipher(const char *alg_name, u32 type, u32 mask)
 {
 	type &= ~CRYPTO_ALG_TYPE_MASK;
@@ -1945,6 +1990,16 @@ static inline struct cipher_tfm *crypto_cipher_crt(struct crypto_cipher *tfm)
 	return &crypto_cipher_tfm(tfm)->crt_cipher;
 }
 
+/**
+ * crypto_cipher_blocksize() - obtain block size for cipher
+ * @tfm: cipher handle
+ *
+ * The block size for the single block cipher referenced with the cipher handle
+ * tfm is returned. The caller may use that information to allocate appropriate
+ * memory for the data returned by the encryption or decryption operation
+ *
+ * Return: block size of cipher
+ */
 static inline unsigned int crypto_cipher_blocksize(struct crypto_cipher *tfm)
 {
 	return crypto_tfm_alg_blocksize(crypto_cipher_tfm(tfm));
@@ -1972,6 +2027,22 @@ static inline void crypto_cipher_clear_flags(struct crypto_cipher *tfm,
 	crypto_tfm_clear_flags(crypto_cipher_tfm(tfm), flags);
 }
 
+/**
+ * crypto_cipher_setkey() - set key for cipher
+ * @tfm: cipher handle
+ * @key: buffer holding the key
+ * @keylen: length of the key in bytes
+ *
+ * The caller provided key is set for the single block cipher referenced by the
+ * cipher handle.
+ *
+ * Note, the key length determines the cipher type. Many block ciphers implement
+ * different cipher modes depending on the key size, such as AES-128 vs AES-192
+ * vs. AES-256. When providing a 16 byte key for an AES cipher handle, AES-128
+ * is performed.
+ *
+ * Return: 0 if the setting of the key was successful; < 0 if an error occurred
+ */
 static inline int crypto_cipher_setkey(struct crypto_cipher *tfm,
                                        const u8 *key, unsigned int keylen)
 {
@@ -1979,6 +2050,15 @@ static inline int crypto_cipher_setkey(struct crypto_cipher *tfm,
 						  key, keylen);
 }
 
+/**
+ * crypto_cipher_encrypt_one() - encrypt one block of plaintext
+ * @tfm: cipher handle
+ * @dst: points to the buffer that will be filled with the ciphertext
+ * @src: buffer holding the plaintext to be encrypted
+ *
+ * Invoke the encryption operation of one block. The caller must ensure that
+ * the plaintext and ciphertext buffers are at least one block in size.
+ */
 static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm,
 					     u8 *dst, const u8 *src)
 {
@@ -1986,6 +2066,15 @@ static inline void crypto_cipher_encrypt_one(struct crypto_cipher *tfm,
 						dst, src);
 }
 
+/**
+ * crypto_cipher_decrypt_one() - decrypt one block of ciphertext
+ * @tfm: cipher handle
+ * @dst: points to the buffer that will be filled with the plaintext
+ * @src: buffer holding the ciphertext to be decrypted
+ *
+ * Invoke the decryption operation of one block. The caller must ensure that
+ * the plaintext and ciphertext buffers are at least one block in size.
+ */
 static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
 					     u8 *dst, const u8 *src)
 {
-- 
cgit v1.2.3


From 47ca5be9eb066befe284f4e03fc11f21b8321ddc Mon Sep 17 00:00:00 2001
From: Stephan Mueller <smueller@chronox.de>
Date: Wed, 12 Nov 2014 05:30:42 +0100
Subject: crypto: doc - HASH API documentation

The API function calls exported by the kernel crypto API for
message digests to be used by consumers are documented.

Signed-off-by: Stephan Mueller <smueller@chronox.de>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/crypto.h | 119 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 119 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 0f95a07aa4df..208a63290b23 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -2082,6 +2082,13 @@ static inline void crypto_cipher_decrypt_one(struct crypto_cipher *tfm,
 						dst, src);
 }
 
+/**
+ * DOC: Synchronous Message Digest API
+ *
+ * The synchronous message digest API is used with the ciphers of type
+ * CRYPTO_ALG_TYPE_HASH (listed as type "hash" in /proc/crypto)
+ */
+
 static inline struct crypto_hash *__crypto_hash_cast(struct crypto_tfm *tfm)
 {
 	return (struct crypto_hash *)tfm;
@@ -2094,6 +2101,20 @@ static inline struct crypto_hash *crypto_hash_cast(struct crypto_tfm *tfm)
 	return __crypto_hash_cast(tfm);
 }
 
+/**
+ * crypto_alloc_hash() - allocate synchronous message digest handle
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	      message digest cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Allocate a cipher handle for a message digest. The returned struct
+ * crypto_hash is the cipher handle that is required for any subsequent
+ * API invocation for that message digest.
+ *
+ * Return: allocated cipher handle in case of success; IS_ERR() is true in case
+ * of an error, PTR_ERR() returns the error code.
+ */
 static inline struct crypto_hash *crypto_alloc_hash(const char *alg_name,
 						    u32 type, u32 mask)
 {
@@ -2110,11 +2131,25 @@ static inline struct crypto_tfm *crypto_hash_tfm(struct crypto_hash *tfm)
 	return &tfm->base;
 }
 
+/**
+ * crypto_free_hash() - zeroize and free message digest handle
+ * @tfm: cipher handle to be freed
+ */
 static inline void crypto_free_hash(struct crypto_hash *tfm)
 {
 	crypto_free_tfm(crypto_hash_tfm(tfm));
 }
 
+/**
+ * crypto_has_hash() - Search for the availability of a message digest
+ * @alg_name: is the cra_name / name or cra_driver_name / driver name of the
+ *	      message digest cipher
+ * @type: specifies the type of the cipher
+ * @mask: specifies the mask for the cipher
+ *
+ * Return: true when the message digest cipher is known to the kernel crypto
+ *	   API; false otherwise
+ */
 static inline int crypto_has_hash(const char *alg_name, u32 type, u32 mask)
 {
 	type &= ~CRYPTO_ALG_TYPE_MASK;
@@ -2130,6 +2165,15 @@ static inline struct hash_tfm *crypto_hash_crt(struct crypto_hash *tfm)
 	return &crypto_hash_tfm(tfm)->crt_hash;
 }
 
+/**
+ * crypto_hash_blocksize() - obtain block size for message digest
+ * @tfm: cipher handle
+ *
+ * The block size for the message digest cipher referenced with the cipher
+ * handle is returned.
+ *
+ * Return: block size of cipher
+ */
 static inline unsigned int crypto_hash_blocksize(struct crypto_hash *tfm)
 {
 	return crypto_tfm_alg_blocksize(crypto_hash_tfm(tfm));
@@ -2140,6 +2184,15 @@ static inline unsigned int crypto_hash_alignmask(struct crypto_hash *tfm)
 	return crypto_tfm_alg_alignmask(crypto_hash_tfm(tfm));
 }
 
+/**
+ * crypto_hash_digestsize() - obtain message digest size
+ * @tfm: cipher handle
+ *
+ * The size for the message digest created by the message digest cipher
+ * referenced with the cipher handle is returned.
+ *
+ * Return: message digest size
+ */
 static inline unsigned int crypto_hash_digestsize(struct crypto_hash *tfm)
 {
 	return crypto_hash_crt(tfm)->digestsize;
@@ -2160,11 +2213,38 @@ static inline void crypto_hash_clear_flags(struct crypto_hash *tfm, u32 flags)
 	crypto_tfm_clear_flags(crypto_hash_tfm(tfm), flags);
 }
 
+/**
+ * crypto_hash_init() - (re)initialize message digest handle
+ * @desc: cipher request handle that to be filled by caller --
+ *	  desc.tfm is filled with the hash cipher handle;
+ *	  desc.flags is filled with either CRYPTO_TFM_REQ_MAY_SLEEP or 0.
+ *
+ * The call (re-)initializes the message digest referenced by the hash cipher
+ * request handle. Any potentially existing state created by previous
+ * operations is discarded.
+ *
+ * Return: 0 if the message digest initialization was successful; < 0 if an
+ *	   error occurred
+ */
 static inline int crypto_hash_init(struct hash_desc *desc)
 {
 	return crypto_hash_crt(desc->tfm)->init(desc);
 }
 
+/**
+ * crypto_hash_update() - add data to message digest for processing
+ * @desc: cipher request handle
+ * @sg: scatter / gather list pointing to the data to be added to the message
+ *      digest
+ * @nbytes: number of bytes to be processed from @sg
+ *
+ * Updates the message digest state of the cipher handle pointed to by the
+ * hash cipher request handle with the input data pointed to by the
+ * scatter/gather list.
+ *
+ * Return: 0 if the message digest update was successful; < 0 if an error
+ *	   occurred
+ */
 static inline int crypto_hash_update(struct hash_desc *desc,
 				     struct scatterlist *sg,
 				     unsigned int nbytes)
@@ -2172,11 +2252,39 @@ static inline int crypto_hash_update(struct hash_desc *desc,
 	return crypto_hash_crt(desc->tfm)->update(desc, sg, nbytes);
 }
 
+/**
+ * crypto_hash_final() - calculate message digest
+ * @desc: cipher request handle
+ * @out: message digest output buffer -- The caller must ensure that the out
+ *	 buffer has a sufficient size (e.g. by using the crypto_hash_digestsize
+ *	 function).
+ *
+ * Finalize the message digest operation and create the message digest
+ * based on all data added to the cipher handle. The message digest is placed
+ * into the output buffer.
+ *
+ * Return: 0 if the message digest creation was successful; < 0 if an error
+ *	   occurred
+ */
 static inline int crypto_hash_final(struct hash_desc *desc, u8 *out)
 {
 	return crypto_hash_crt(desc->tfm)->final(desc, out);
 }
 
+/**
+ * crypto_hash_digest() - calculate message digest for a buffer
+ * @desc: see crypto_hash_final()
+ * @sg: see crypto_hash_update()
+ * @nbytes:  see crypto_hash_update()
+ * @out: see crypto_hash_final()
+ *
+ * This function is a "short-hand" for the function calls of crypto_hash_init,
+ * crypto_hash_update and crypto_hash_final. The parameters have the same
+ * meaning as discussed for those separate three functions.
+ *
+ * Return: 0 if the message digest creation was successful; < 0 if an error
+ *	   occurred
+ */
 static inline int crypto_hash_digest(struct hash_desc *desc,
 				     struct scatterlist *sg,
 				     unsigned int nbytes, u8 *out)
@@ -2184,6 +2292,17 @@ static inline int crypto_hash_digest(struct hash_desc *desc,
 	return crypto_hash_crt(desc->tfm)->digest(desc, sg, nbytes, out);
 }
 
+/**
+ * crypto_hash_setkey() - set key for message digest
+ * @hash: cipher handle
+ * @key: buffer holding the key
+ * @keylen: length of the key in bytes
+ *
+ * The caller provided key is set for the message digest cipher. The cipher
+ * handle must point to a keyed hash in order for this function to succeed.
+ *
+ * Return: 0 if the setting of the key was successful; < 0 if an error occurred
+ */
 static inline int crypto_hash_setkey(struct crypto_hash *hash,
 				     const u8 *key, unsigned int keylen)
 {
-- 
cgit v1.2.3


From d539efa37f1f789339699c941e72e320d12d5f28 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 3 Oct 2014 16:57:11 +0300
Subject: ARM: OMAP3: clock: add new rate changing logic support for noncore
 DPLLs

Currently, DPLL code hides the re-parenting within its internals, which
is wrong. This needs to be exposed to the common clock code via
determine_rate and set_rate_and_parent APIs. This patch adds support
for these, which will be taken into use in the following patches.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/dpll3xxx.c | 147 +++++++++++++++++++++++++++++++++++++++++
 include/linux/clk/ti.h         |   9 +++
 2 files changed, 156 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index ac3d789ac3cd..cfe7c30235d1 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c
@@ -546,6 +546,153 @@ int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate,
 	return 0;
 }
 
+/**
+ * omap3_noncore_dpll_determine_rate - determine rate for a DPLL
+ * @hw: pointer to the clock to determine rate for
+ * @rate: target rate for the DPLL
+ * @best_parent_rate: pointer for returning best parent rate
+ * @best_parent_clk: pointer for returning best parent clock
+ *
+ * Determines which DPLL mode to use for reaching a desired target rate.
+ * Checks whether the DPLL shall be in bypass or locked mode, and if
+ * locked, calculates the M,N values for the DPLL via round-rate.
+ * Returns a positive clock rate with success, negative error value
+ * in failure.
+ */
+long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate,
+				       unsigned long *best_parent_rate,
+				       struct clk **best_parent_clk)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *dd;
+
+	if (!hw || !rate)
+		return -EINVAL;
+
+	dd = clk->dpll_data;
+	if (!dd)
+		return -EINVAL;
+
+	if (__clk_get_rate(dd->clk_bypass) == rate &&
+	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
+		*best_parent_clk = dd->clk_bypass;
+	} else {
+		rate = omap2_dpll_round_rate(hw, rate, best_parent_rate);
+		*best_parent_clk = dd->clk_ref;
+	}
+
+	*best_parent_rate = rate;
+
+	return rate;
+}
+
+/**
+ * omap3_noncore_dpll_set_parent - set parent for a DPLL clock
+ * @hw: pointer to the clock to set parent for
+ * @index: parent index to select
+ *
+ * Sets parent for a DPLL clock. This sets the DPLL into bypass or
+ * locked mode. Returns 0 with success, negative error value otherwise.
+ */
+int omap3_noncore_dpll_set_parent(struct clk_hw *hw, u8 index)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	int ret;
+
+	if (!hw)
+		return -EINVAL;
+
+	if (index)
+		ret = _omap3_noncore_dpll_bypass(clk);
+	else
+		ret = _omap3_noncore_dpll_lock(clk);
+
+	return ret;
+}
+
+/**
+ * omap3_noncore_dpll_set_rate_new - set rate for a DPLL clock
+ * @hw: pointer to the clock to set parent for
+ * @rate: target rate for the clock
+ * @parent_rate: rate of the parent clock
+ *
+ * Sets rate for a DPLL clock. First checks if the clock parent is
+ * reference clock (in bypass mode, the rate of the clock can't be
+ * changed) and proceeds with the rate change operation. Returns 0
+ * with success, negative error value otherwise.
+ */
+static int omap3_noncore_dpll_set_rate_new(struct clk_hw *hw,
+					   unsigned long rate,
+					   unsigned long parent_rate)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *dd;
+	u16 freqsel = 0;
+	int ret;
+
+	if (!hw || !rate)
+		return -EINVAL;
+
+	dd = clk->dpll_data;
+	if (!dd)
+		return -EINVAL;
+
+	if (__clk_get_parent(hw->clk) != dd->clk_ref)
+		return -EINVAL;
+
+	if (dd->last_rounded_rate == 0)
+		return -EINVAL;
+
+	/* Freqsel is available only on OMAP343X devices */
+	if (ti_clk_features.flags & TI_CLK_DPLL_HAS_FREQSEL) {
+		freqsel = _omap3_dpll_compute_freqsel(clk, dd->last_rounded_n);
+		WARN_ON(!freqsel);
+	}
+
+	pr_debug("%s: %s: set rate: locking rate to %lu.\n", __func__,
+		 __clk_get_name(hw->clk), rate);
+
+	ret = omap3_noncore_dpll_program(clk, freqsel);
+
+	return ret;
+}
+
+/**
+ * omap3_noncore_dpll_set_rate_and_parent - set rate and parent for a DPLL clock
+ * @hw: pointer to the clock to set rate and parent for
+ * @rate: target rate for the DPLL
+ * @parent_rate: clock rate of the DPLL parent
+ * @index: new parent index for the DPLL, 0 - reference, 1 - bypass
+ *
+ * Sets rate and parent for a DPLL clock. If new parent is the bypass
+ * clock, only selects the parent. Otherwise proceeds with a rate
+ * change, as this will effectively also change the parent as the
+ * DPLL is put into locked mode. Returns 0 with success, negative error
+ * value otherwise.
+ */
+int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw,
+					   unsigned long rate,
+					   unsigned long parent_rate,
+					   u8 index)
+{
+	int ret;
+
+	if (!hw || !rate)
+		return -EINVAL;
+
+	/*
+	 * clk-ref at index[0], in which case we only need to set rate,
+	 * the parent will be changed automatically with the lock sequence.
+	 * With clk-bypass case we only need to change parent.
+	 */
+	if (index)
+		ret = omap3_noncore_dpll_set_parent(hw, index);
+	else
+		ret = omap3_noncore_dpll_set_rate_new(hw, rate, parent_rate);
+
+	return ret;
+}
+
 /* DPLL autoidle read/set code */
 
 /**
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index f75acbf70e96..6f9fb77ffdd5 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -254,8 +254,17 @@ extern const struct clk_ops ti_clk_mux_ops;
 void omap2_init_clk_hw_omap_clocks(struct clk *clk);
 int omap3_noncore_dpll_enable(struct clk_hw *hw);
 void omap3_noncore_dpll_disable(struct clk_hw *hw);
+int omap3_noncore_dpll_set_parent(struct clk_hw *hw, u8 index);
 int omap3_noncore_dpll_set_rate(struct clk_hw *hw, unsigned long rate,
 				unsigned long parent_rate);
+int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw,
+					   unsigned long rate,
+					   unsigned long parent_rate,
+					   u8 index);
+long omap3_noncore_dpll_determine_rate(struct clk_hw *hw,
+				       unsigned long rate,
+				       unsigned long *best_parent_rate,
+				       struct clk **best_parent_clk);
 unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw,
 					 unsigned long parent_rate);
 long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw,
-- 
cgit v1.2.3


From 83501ff0a5032dfbd63ab1ca9d9d25b97ec49fb9 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 3 Oct 2014 16:57:12 +0300
Subject: ARM: OMAP4: clock: add support for determine_rate for omap4 regm4xen
 DPLL

Similarly to OMAP3 noncore DPLL, the implementation of this DPLL clock
type is wrong. This patch adds basic functionality for determine_rate
for this clock type which will be taken into use in the patches following
later.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/dpll44xx.c | 41 +++++++++++++++++++++++++++++++++++++++++
 include/linux/clk/ti.h         |  4 ++++
 2 files changed, 45 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c
index 4613f1e86988..535822fcf4bb 100644
--- a/arch/arm/mach-omap2/dpll44xx.c
+++ b/arch/arm/mach-omap2/dpll44xx.c
@@ -207,3 +207,44 @@ out:
 
 	return dd->last_rounded_rate;
 }
+
+/**
+ * omap4_dpll_regm4xen_determine_rate - determine rate for a DPLL
+ * @hw: pointer to the clock to determine rate for
+ * @rate: target rate for the DPLL
+ * @best_parent_rate: pointer for returning best parent rate
+ * @best_parent_clk: pointer for returning best parent clock
+ *
+ * Determines which DPLL mode to use for reaching a desired rate.
+ * Checks whether the DPLL shall be in bypass or locked mode, and if
+ * locked, calculates the M,N values for the DPLL via round-rate.
+ * Returns a positive clock rate with success, negative error value
+ * in failure.
+ */
+long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate,
+					unsigned long *best_parent_rate,
+					struct clk **best_parent_clk)
+{
+	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
+	struct dpll_data *dd;
+
+	if (!hw || !rate)
+		return -EINVAL;
+
+	dd = clk->dpll_data;
+	if (!dd)
+		return -EINVAL;
+
+	if (__clk_get_rate(dd->clk_bypass) == rate &&
+	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
+		*best_parent_clk = dd->clk_bypass;
+	} else {
+		rate = omap4_dpll_regm4xen_round_rate(hw, rate,
+						      best_parent_rate);
+		*best_parent_clk = dd->clk_ref;
+	}
+
+	*best_parent_rate = rate;
+
+	return rate;
+}
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 6f9fb77ffdd5..abc702a73aca 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -270,6 +270,10 @@ unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw,
 long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw,
 				    unsigned long target_rate,
 				    unsigned long *parent_rate);
+long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw,
+					unsigned long rate,
+					unsigned long *best_parent_rate,
+					struct clk **best_parent_clk);
 u8 omap2_init_dpll_parent(struct clk_hw *hw);
 unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate);
 long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate,
-- 
cgit v1.2.3


From e3ab6013ab06d3a861ed00c1f8d32aa4e6b66ddd Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 3 Oct 2014 16:57:13 +0300
Subject: ARM: OMAP3: clock: add support for dpll4_set_rate_and_parent

Expand the support of omap4 per-dpll to provide set_rate_and_parent.
This is required for proper behavior of clk_change_rate with
determine_rate support.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Signed-off-by: Paul Walmsley <paul@pwsan.com>
---
 arch/arm/mach-omap2/clock3xxx.c | 36 ++++++++++++++++++++++++++++++++++++
 include/linux/clk/ti.h          |  2 ++
 2 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/clock3xxx.c b/arch/arm/mach-omap2/clock3xxx.c
index 9a2560181909..a9e86db5daf9 100644
--- a/arch/arm/mach-omap2/clock3xxx.c
+++ b/arch/arm/mach-omap2/clock3xxx.c
@@ -38,6 +38,18 @@
 
 /* needed by omap3_core_dpll_m2_set_rate() */
 struct clk *sdrc_ick_p, *arm_fck_p;
+
+/**
+ * omap3_dpll4_set_rate - set rate for omap3 per-dpll
+ * @hw: clock to change
+ * @rate: target rate for clock
+ * @parent_rate: rate of the parent clock
+ *
+ * Check if the current SoC supports the per-dpll reprogram operation
+ * or not, and then do the rate change if supported. Returns -EINVAL
+ * if not supported, 0 for success, and potential error codes from the
+ * clock rate change.
+ */
 int omap3_dpll4_set_rate(struct clk_hw *hw, unsigned long rate,
 				unsigned long parent_rate)
 {
@@ -54,6 +66,30 @@ int omap3_dpll4_set_rate(struct clk_hw *hw, unsigned long rate,
 	return omap3_noncore_dpll_set_rate(hw, rate, parent_rate);
 }
 
+/**
+ * omap3_dpll4_set_rate_and_parent - set rate and parent for omap3 per-dpll
+ * @hw: clock to change
+ * @rate: target rate for clock
+ * @parent_rate: rate of the parent clock
+ * @index: parent index, 0 - reference clock, 1 - bypass clock
+ *
+ * Check if the current SoC support the per-dpll reprogram operation
+ * or not, and then do the rate + parent change if supported. Returns
+ * -EINVAL if not supported, 0 for success, and potential error codes
+ * from the clock rate change.
+ */
+int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate,
+				    unsigned long parent_rate, u8 index)
+{
+	if (ti_clk_features.flags & TI_CLK_DPLL4_DENY_REPROGRAM) {
+		pr_err("clock: DPLL4 cannot change rate due to silicon 'Limitation 2.5' on 3430ES1.\n");
+		return -EINVAL;
+	}
+
+	return omap3_noncore_dpll_set_rate_and_parent(hw, rate, parent_rate,
+						      index);
+}
+
 void __init omap3_clk_lock_dpll5(void)
 {
 	struct clk *dpll5_clk;
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index abc702a73aca..74e5341463c9 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -291,6 +291,8 @@ int omap2_clk_disable_autoidle_all(void);
 void omap2_clk_enable_init_clocks(const char **clk_names, u8 num_clocks);
 int omap3_dpll4_set_rate(struct clk_hw *clk, unsigned long rate,
 			 unsigned long parent_rate);
+int omap3_dpll4_set_rate_and_parent(struct clk_hw *hw, unsigned long rate,
+				    unsigned long parent_rate, u8 index);
 int omap2_dflt_clk_enable(struct clk_hw *hw);
 void omap2_dflt_clk_disable(struct clk_hw *hw);
 int omap2_dflt_clk_is_enabled(struct clk_hw *hw);
-- 
cgit v1.2.3


From a4acded086c4320bc58bd59ea746fd19de6dabb0 Mon Sep 17 00:00:00 2001
From: Gavin Shan <gwshan@linux.vnet.ibm.com>
Date: Wed, 12 Nov 2014 13:43:52 +1100
Subject: PCI: Remove unused and broken to_hotplug_slot()

to_hotplug_slot() is unused and wouldn't work anyway, because struct
hotplug_slot no longer contains a struct kobject (it was removed by
f46753c5e354 ("PCI: introduce pci_slot")).

Remove to_hotplug_slot().

[bhelgaas: changelog]
Signed-off-by: Gavin Shan <gwshan@linux.vnet.ibm.com>
Signed-off-by: Bjorn Helgaas <bhelgaas@google.com>
---
 include/linux/pci_hotplug.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/pci_hotplug.h b/include/linux/pci_hotplug.h
index 2706ee9a4327..8c7895061121 100644
--- a/include/linux/pci_hotplug.h
+++ b/include/linux/pci_hotplug.h
@@ -109,7 +109,6 @@ struct hotplug_slot {
 	struct list_head		slot_list;
 	struct pci_slot			*pci_slot;
 };
-#define to_hotplug_slot(n) container_of(n, struct hotplug_slot, kobj)
 
 static inline const char *hotplug_slot_name(const struct hotplug_slot *slot)
 {
-- 
cgit v1.2.3


From 1a6c9b2675460718f819def9a272cca35575eeb7 Mon Sep 17 00:00:00 2001
From: Pranith Kumar <bobby.prani@gmail.com>
Date: Thu, 25 Sep 2014 14:03:34 -0400
Subject: rcu: Add sparse check for RCU_INIT_POINTER()

Add a sparse check when RCU_INIT_POINTER() is used to assign a non __rcu
annotated pointer.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a4a819ffb2d1..a033d8b55773 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -1047,6 +1047,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void)
  */
 #define RCU_INIT_POINTER(p, v) \
 	do { \
+		rcu_dereference_sparse(p, __rcu); \
 		p = RCU_INITIALIZER(v); \
 	} while (0)
 
-- 
cgit v1.2.3


From b6331ae8afe4118884c4b6e14a213758d88422b2 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@linux.vnet.ibm.com>
Date: Sat, 4 Oct 2014 03:43:41 -0700
Subject: rcu: Optimize cond_resched_rcu_qs()

The current implementation of cond_resched_rcu_qs() can invoke
rcu_note_voluntary_context_switch() twice in the should_resched()
case, once via the call to __schedule() and once directly.  However, as
noted by Joe Lawrence in a patch to the team subsystem, cond_resched()
returns an indication as to whether or not the call to __schedule()
actually happened.  This commit therefore changes cond_resched_rcu_qs()
so as to invoke rcu_note_voluntary_context_switch() only when __schedule()
was not called.

Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index a033d8b55773..36ea3ba5c516 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -348,8 +348,8 @@ extern struct srcu_struct tasks_rcu_exit_srcu;
  */
 #define cond_resched_rcu_qs() \
 do { \
-	rcu_note_voluntary_context_switch(current); \
-	cond_resched(); \
+	if (!cond_resched()) \
+		rcu_note_voluntary_context_switch(current); \
 } while (0)
 
 #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) || defined(CONFIG_SMP)
-- 
cgit v1.2.3


From ce36f2f3eb6613a73bc6f3a5256bde7dd3f95710 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Sun, 28 Sep 2014 23:44:21 +0200
Subject: rcu: More info about potential deadlocks with rcu_read_unlock()

The comment above rcu_read_unlock() explains the potential deadlock
if the caller holds one of the locks taken by rt_mutex_unlock() paths,
but it is not clear from this documentation that any lock which can
be taken from interrupt can lead to deadlock as well and we need to
take rt_mutex_lock() into account too.

The problem is that rt_mutex_lock() takes wait_lock without disabling
irqs, and thus an interrupt taking some LOCK can obviously race with
rcu_read_unlock_special() called with the same LOCK held.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
---
 include/linux/rcupdate.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 36ea3ba5c516..ae6942a84a0d 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -887,7 +887,9 @@ static inline void rcu_read_lock(void)
  * Unfortunately, this function acquires the scheduler's runqueue and
  * priority-inheritance spinlocks.  This means that deadlock could result
  * if the caller of rcu_read_unlock() already holds one of these locks or
- * any lock that is ever acquired while holding them.
+ * any lock that is ever acquired while holding them; or any lock which
+ * can be taken from interrupt context because rcu_boost()->rt_mutex_lock()
+ * does not disable irqs while taking ->wait_lock.
  *
  * That said, RCU readers are never priority boosted unless they were
  * preempted.  Therefore, one way to avoid deadlock is to make sure
-- 
cgit v1.2.3


From 1b2f309d70daf04b6a97b3753e375654532f6207 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 13 Nov 2014 18:11:20 +0800
Subject: rhashtable: Move mutex_is_held under PROVE_LOCKING

The rhashtable function mutex_is_held is only used when PROVE_LOCKING
is enabled.  This patch makes the mutex_is_held field in rhashtable
optional depending on PROVE_LOCKING.

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 2 ++
 lib/rhashtable.c           | 8 ++++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index fb298e9d6d3a..96ce8ceff554 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -65,7 +65,9 @@ struct rhashtable_params {
 						 size_t new_size);
 	bool			(*shrink_decision)(const struct rhashtable *ht,
 						   size_t new_size);
+#ifdef CONFIG_PROVE_LOCKING
 	int			(*mutex_is_held)(void);
+#endif
 };
 
 /**
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 081be3ba9ea8..c7654b6f5f64 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -532,7 +532,9 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.key_offset = offsetof(struct test_obj, key),
  *	.key_len = sizeof(int),
  *	.hashfn = arch_fast_hash,
+ * #ifdef CONFIG_PROVE_LOCKING
  *	.mutex_is_held = &my_mutex_is_held,
+ * #endif
  * };
  *
  * Configuration Example 2: Variable length keys
@@ -552,7 +554,9 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params)
  *	.head_offset = offsetof(struct test_obj, node),
  *	.hashfn = arch_fast_hash,
  *	.obj_hashfn = my_hash_fn,
+ * #ifdef CONFIG_PROVE_LOCKING
  *	.mutex_is_held = &my_mutex_is_held,
+ * #endif
  * };
  */
 int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
@@ -613,10 +617,12 @@ EXPORT_SYMBOL_GPL(rhashtable_destroy);
 #define TEST_PTR	((void *) 0xdeadbeef)
 #define TEST_NEXPANDS	4
 
+#ifdef CONFIG_PROVE_LOCKING
 static int test_mutex_is_held(void)
 {
 	return 1;
 }
+#endif
 
 struct test_obj {
 	void			*ptr;
@@ -767,7 +773,9 @@ static int __init test_rht_init(void)
 		.key_offset = offsetof(struct test_obj, value),
 		.key_len = sizeof(int),
 		.hashfn = arch_fast_hash,
+#ifdef CONFIG_PROVE_LOCKING
 		.mutex_is_held = &test_mutex_is_held,
+#endif
 		.grow_decision = rht_grow_above_75,
 		.shrink_decision = rht_shrink_below_30,
 	};
-- 
cgit v1.2.3


From 7b4ce2353467fdab6e003be7a3129fb09b09deac Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Thu, 13 Nov 2014 18:11:22 +0800
Subject: rhashtable: Add parent argument to mutex_is_held

Currently mutex_is_held can only test locks in the that are global
since it takes no arguments.  This prevents rhashtable from being
used in places where locks are lock, e.g., per-namespace locks.

This patch adds a parent field to mutex_is_held and rhashtable_params
so that local locks can be used (and tested).

Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 3 ++-
 lib/rhashtable.c           | 4 ++--
 net/netfilter/nft_hash.c   | 2 +-
 net/netlink/af_netlink.c   | 2 +-
 4 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 96ce8ceff554..473e26bdb91d 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -66,7 +66,8 @@ struct rhashtable_params {
 	bool			(*shrink_decision)(const struct rhashtable *ht,
 						   size_t new_size);
 #ifdef CONFIG_PROVE_LOCKING
-	int			(*mutex_is_held)(void);
+	int			(*mutex_is_held)(void *parent);
+	void			*parent;
 #endif
 };
 
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index c7654b6f5f64..4b4b53bfa08b 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -32,7 +32,7 @@
 #ifdef CONFIG_PROVE_LOCKING
 int lockdep_rht_mutex_is_held(const struct rhashtable *ht)
 {
-	return ht->p.mutex_is_held();
+	return ht->p.mutex_is_held(ht->p.parent);
 }
 EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held);
 #endif
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(rhashtable_destroy);
 #define TEST_NEXPANDS	4
 
 #ifdef CONFIG_PROVE_LOCKING
-static int test_mutex_is_held(void)
+static int test_mutex_is_held(void *parent)
 {
 	return 1;
 }
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index b86305c86048..3f75aaaf9d06 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -154,7 +154,7 @@ static unsigned int nft_hash_privsize(const struct nlattr * const nla[])
 }
 
 #ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nfnl_lock_is_held(void)
+static int lockdep_nfnl_lock_is_held(void *parent)
 {
 	return lockdep_nfnl_is_held(NFNL_SUBSYS_NFTABLES);
 }
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 53b8ea793191..9e0628cfdf67 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -115,7 +115,7 @@ DEFINE_MUTEX(nl_sk_hash_lock);
 EXPORT_SYMBOL_GPL(nl_sk_hash_lock);
 
 #ifdef CONFIG_PROVE_LOCKING
-static int lockdep_nl_sk_hash_is_held(void)
+static int lockdep_nl_sk_hash_is_held(void *parent)
 {
 	if (debug_locks)
 		return lockdep_is_held(&nl_sk_hash_lock) || lockdep_is_held(&nl_table_lock);
-- 
cgit v1.2.3


From 7ae0e400cd9396c41fe596d35dcc34feaa89a04f Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 13 Nov 2014 14:45:32 +0200
Subject: net/mlx4_core: Flexible (asymmetric) allocation of EQs and MSI-X
 vectors for PF/VFs

Previously, the driver queried the firmware in order to get the number
of supported EQs. Under SRIOV, since this was done before the driver
notified the firmware how many VFs it actually needs, the firmware had
to take into account a worst case scenario and always allocated four EQs
per VF, where one was used for events while the others were used for completions.

Now, when the firmware supports the asymmetric allocation scheme, denoted
by exposing num_sys_eqs > 0 (--> MLX4_DEV_CAP_FLAG2_SYS_EQS), we use the
QUERY_FUNC command to query the firmware before enabling SRIOV. Thus we
can get more EQs and MSI-X vectors per function.

Moreover, when running in the new firmware/driver mode, the limitation
that the number of EQs should be a power of two is lifted.

Signed-off-by: Jack Morgenstein <jackm@dev.mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c            |   3 +-
 drivers/net/ethernet/mellanox/mlx4/eq.c      |   8 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c      |  52 +++++++---
 drivers/net/ethernet/mellanox/mlx4/fw.h      |   2 +
 drivers/net/ethernet/mellanox/mlx4/main.c    | 144 +++++++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx4/profile.c |  19 ++--
 include/linux/mlx4/device.h                  |   4 +-
 7 files changed, 190 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 8b72cf392b34..0c3375524a64 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -1975,8 +1975,7 @@ static void mlx4_ib_alloc_eqs(struct mlx4_dev *dev, struct mlx4_ib_dev *ibdev)
 	    dev->caps.num_ports > dev->caps.comp_pool)
 		return;
 
-	eq_per_port = rounddown_pow_of_two(dev->caps.comp_pool/
-					dev->caps.num_ports);
+	eq_per_port = dev->caps.comp_pool / dev->caps.num_ports;
 
 	/* Init eq table */
 	added_eqs = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index 49290a405903..d68b264cee4d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -1123,8 +1123,12 @@ int mlx4_init_eq_table(struct mlx4_dev *dev)
 		goto err_out_free;
 	}
 
-	err = mlx4_bitmap_init(&priv->eq_table.bitmap, dev->caps.num_eqs,
-			       dev->caps.num_eqs - 1, dev->caps.reserved_eqs, 0);
+	err = mlx4_bitmap_init(&priv->eq_table.bitmap,
+			       roundup_pow_of_two(dev->caps.num_eqs),
+			       dev->caps.num_eqs - 1,
+			       dev->caps.reserved_eqs,
+			       roundup_pow_of_two(dev->caps.num_eqs) -
+			       dev->caps.num_eqs);
 	if (err)
 		goto err_out_free;
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index b3bbeb97da14..d2f594fadfbf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -142,7 +142,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[13] = "Large cache line (>64B) EQE stride support",
 		[14] = "Ethernet protocol control support",
 		[15] = "Ethernet Backplane autoneg support",
-		[16] = "CONFIG DEV support"
+		[16] = "CONFIG DEV support",
+		[17] = "Asymmetric EQs support"
 	};
 	int i;
 
@@ -200,7 +201,6 @@ int mlx4_QUERY_FUNC(struct mlx4_dev *dev, struct mlx4_func *func, int slave)
 	outbox = mailbox->buf;
 
 	in_modifier = slave;
-	mlx4_dbg(dev, "%s for VF %d\n", __func__, in_modifier);
 
 	err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, 0,
 			   MLX4_CMD_QUERY_FUNC,
@@ -243,6 +243,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 	u8	field, port;
 	u32	size, proxy_qp, qkey;
 	int	err = 0;
+	struct mlx4_func func;
 
 #define QUERY_FUNC_CAP_FLAGS_OFFSET		0x0
 #define QUERY_FUNC_CAP_NUM_PORTS_OFFSET		0x1
@@ -287,6 +288,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_VF_ENABLE_QP0		0x08
 
 #define QUERY_FUNC_CAP_FLAGS0_FORCE_PHY_WQE_GID 0x80
+#define QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS (1 << 31)
 
 	if (vhcr->op_modifier == 1) {
 		struct mlx4_active_ports actv_ports =
@@ -365,11 +367,24 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		size = dev->caps.num_cqs;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_CQ_QUOTA_OFFSET_DEP);
 
-		size = dev->caps.num_eqs;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
-
-		size = dev->caps.reserved_eqs;
-		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+		if (!(dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) ||
+		    mlx4_QUERY_FUNC(dev, &func, slave)) {
+			size = vhcr->in_modifier &
+				QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ?
+				dev->caps.num_eqs :
+				rounddown_pow_of_two(dev->caps.num_eqs);
+			MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+			size = dev->caps.reserved_eqs;
+			MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+		} else {
+			size = vhcr->in_modifier &
+				QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS ?
+				func.max_eq :
+				rounddown_pow_of_two(func.max_eq);
+			MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MAX_EQ_OFFSET);
+			size = func.rsvd_eqs;
+			MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
+		}
 
 		size = priv->mfunc.master.res_tracker.res_alloc[RES_MPT].quota[slave];
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MPT_QUOTA_OFFSET);
@@ -399,14 +414,17 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
 	u8			field, op_modifier;
 	u32			size, qkey;
 	int			err = 0, quotas = 0;
+	u32                     in_modifier;
 
 	op_modifier = !!gen_or_port; /* 0 = general, 1 = logical port */
+	in_modifier = op_modifier ? gen_or_port :
+		QUERY_FUNC_CAP_SUPPORTS_NON_POWER_OF_2_NUM_EQS;
 
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
 	if (IS_ERR(mailbox))
 		return PTR_ERR(mailbox);
 
-	err = mlx4_cmd_box(dev, 0, mailbox->dma, gen_or_port, op_modifier,
+	err = mlx4_cmd_box(dev, 0, mailbox->dma, in_modifier, op_modifier,
 			   MLX4_CMD_QUERY_FUNC_CAP,
 			   MLX4_CMD_TIME_CLASS_A, MLX4_CMD_WRAPPED);
 	if (err)
@@ -578,6 +596,7 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_MAX_MRW_SZ_OFFSET		0x21
 #define QUERY_DEV_CAP_RSVD_MRW_OFFSET		0x22
 #define QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET	0x23
+#define QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET		0x26
 #define QUERY_DEV_CAP_MAX_AV_OFFSET		0x27
 #define QUERY_DEV_CAP_MAX_REQ_QP_OFFSET		0x29
 #define QUERY_DEV_CAP_MAX_RES_QP_OFFSET		0x2b
@@ -678,6 +697,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	dev_cap->reserved_mrws = 1 << (field & 0xf);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_MTT_SEG_OFFSET);
 	dev_cap->max_mtt_seg = 1 << (field & 0x3f);
+	MLX4_GET(size, outbox, QUERY_DEV_CAP_NUM_SYS_EQ_OFFSET);
+	dev_cap->num_sys_eqs = size & 0xfff;
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_REQ_QP_OFFSET);
 	dev_cap->max_requester_per_qp = 1 << (field & 0x3f);
 	MLX4_GET(field, outbox, QUERY_DEV_CAP_MAX_RES_QP_OFFSET);
@@ -905,8 +926,11 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	 * we can't use any EQs whose doorbell falls on that page,
 	 * even if the EQ itself isn't reserved.
 	 */
-	dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4,
-				    dev_cap->reserved_eqs);
+	if (dev_cap->num_sys_eqs == 0)
+		dev_cap->reserved_eqs = max(dev_cap->reserved_uars * 4,
+					    dev_cap->reserved_eqs);
+	else
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_SYS_EQS;
 
 	mlx4_dbg(dev, "Max ICM size %lld MB\n",
 		 (unsigned long long) dev_cap->max_icm_sz >> 20);
@@ -916,8 +940,9 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		 dev_cap->max_srqs, dev_cap->reserved_srqs, dev_cap->srq_entry_sz);
 	mlx4_dbg(dev, "Max CQs: %d, reserved CQs: %d, entry size: %d\n",
 		 dev_cap->max_cqs, dev_cap->reserved_cqs, dev_cap->cqc_entry_sz);
-	mlx4_dbg(dev, "Max EQs: %d, reserved EQs: %d, entry size: %d\n",
-		 dev_cap->max_eqs, dev_cap->reserved_eqs, dev_cap->eqc_entry_sz);
+	mlx4_dbg(dev, "Num sys EQs: %d, max EQs: %d, reserved EQs: %d, entry size: %d\n",
+		 dev_cap->num_sys_eqs, dev_cap->max_eqs, dev_cap->reserved_eqs,
+		 dev_cap->eqc_entry_sz);
 	mlx4_dbg(dev, "reserved MPTs: %d, reserved MTTs: %d\n",
 		 dev_cap->reserved_mrws, dev_cap->reserved_mtts);
 	mlx4_dbg(dev, "Max PDs: %d, reserved PDs: %d, reserved UARs: %d\n",
@@ -1463,6 +1488,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define	 INIT_HCA_AUXC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x50)
 #define	 INIT_HCA_EQC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x60)
 #define	 INIT_HCA_LOG_EQ_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x67)
+#define	INIT_HCA_NUM_SYS_EQS_OFFSET	(INIT_HCA_QPC_OFFSET + 0x6a)
 #define	 INIT_HCA_RDMARC_BASE_OFFSET	 (INIT_HCA_QPC_OFFSET + 0x70)
 #define	 INIT_HCA_LOG_RD_OFFSET		 (INIT_HCA_QPC_OFFSET + 0x77)
 #define INIT_HCA_MCAST_OFFSET		 0x0c0
@@ -1566,6 +1592,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	MLX4_PUT(inbox, param->auxc_base,     INIT_HCA_AUXC_BASE_OFFSET);
 	MLX4_PUT(inbox, param->eqc_base,      INIT_HCA_EQC_BASE_OFFSET);
 	MLX4_PUT(inbox, param->log_num_eqs,   INIT_HCA_LOG_EQ_OFFSET);
+	MLX4_PUT(inbox, param->num_sys_eqs,   INIT_HCA_NUM_SYS_EQS_OFFSET);
 	MLX4_PUT(inbox, param->rdmarc_base,   INIT_HCA_RDMARC_BASE_OFFSET);
 	MLX4_PUT(inbox, param->log_rd_per_qp, INIT_HCA_LOG_RD_OFFSET);
 
@@ -1676,6 +1703,7 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	MLX4_GET(param->auxc_base,     outbox, INIT_HCA_AUXC_BASE_OFFSET);
 	MLX4_GET(param->eqc_base,      outbox, INIT_HCA_EQC_BASE_OFFSET);
 	MLX4_GET(param->log_num_eqs,   outbox, INIT_HCA_LOG_EQ_OFFSET);
+	MLX4_GET(param->num_sys_eqs,   outbox, INIT_HCA_NUM_SYS_EQS_OFFSET);
 	MLX4_GET(param->rdmarc_base,   outbox, INIT_HCA_RDMARC_BASE_OFFSET);
 	MLX4_GET(param->log_rd_per_qp, outbox, INIT_HCA_LOG_RD_OFFSET);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 48c11b5e73e7..475215ee370f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -56,6 +56,7 @@ struct mlx4_dev_cap {
 	int max_mpts;
 	int reserved_eqs;
 	int max_eqs;
+	int num_sys_eqs;
 	int reserved_mtts;
 	int max_mrw_sz;
 	int reserved_mrws;
@@ -180,6 +181,7 @@ struct mlx4_init_hca_param {
 	u8  log_num_srqs;
 	u8  log_num_cqs;
 	u8  log_num_eqs;
+	u16 num_sys_eqs;
 	u8  log_rd_per_qp;
 	u8  log_mc_table_sz;
 	u8  log_mpt_sz;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 43047b2a2aac..ebb279060a25 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -197,6 +197,29 @@ static void mlx4_set_port_mask(struct mlx4_dev *dev)
 		dev->caps.port_mask[i] = dev->caps.port_type[i];
 }
 
+enum {
+	MLX4_QUERY_FUNC_NUM_SYS_EQS = 1 << 0,
+};
+
+static int mlx4_query_func(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
+{
+	int err = 0;
+	struct mlx4_func func;
+
+	if (dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+		err = mlx4_QUERY_FUNC(dev, &func, 0);
+		if (err) {
+			mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+			return err;
+		}
+		dev_cap->max_eqs = func.max_eq;
+		dev_cap->reserved_eqs = func.rsvd_eqs;
+		dev_cap->reserved_uars = func.rsvd_uars;
+		err |= MLX4_QUERY_FUNC_NUM_SYS_EQS;
+	}
+	return err;
+}
+
 static void mlx4_enable_cqe_eqe_stride(struct mlx4_dev *dev)
 {
 	struct mlx4_caps *dev_cap = &dev->caps;
@@ -261,7 +284,10 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	}
 
 	dev->caps.num_ports	     = dev_cap->num_ports;
-	dev->phys_caps.num_phys_eqs  = MLX4_MAX_EQ_NUM;
+	dev->caps.num_sys_eqs = dev_cap->num_sys_eqs;
+	dev->phys_caps.num_phys_eqs = dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS ?
+				      dev->caps.num_sys_eqs :
+				      MLX4_MAX_EQ_NUM;
 	for (i = 1; i <= dev->caps.num_ports; ++i) {
 		dev->caps.vl_cap[i]	    = dev_cap->max_vl[i];
 		dev->caps.ib_mtu_cap[i]	    = dev_cap->ib_mtu[i];
@@ -1130,8 +1156,7 @@ static int mlx4_init_cmpt_table(struct mlx4_dev *dev, u64 cmpt_base,
 	if (err)
 		goto err_srq;
 
-	num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
-		  dev->caps.num_eqs;
+	num_eqs = dev->phys_caps.num_phys_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.cmpt_table,
 				  cmpt_base +
 				  ((u64) (MLX4_CMPT_TYPE_EQ *
@@ -1193,8 +1218,7 @@ static int mlx4_init_icm(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
 	}
 
 
-	num_eqs = (mlx4_is_master(dev)) ? dev->phys_caps.num_phys_eqs :
-		   dev->caps.num_eqs;
+	num_eqs = dev->phys_caps.num_phys_eqs;
 	err = mlx4_init_icm_table(dev, &priv->eq_table.table,
 				  init_hca->eqc_base, dev_cap->eqc_entry_sz,
 				  num_eqs, num_eqs, 0, 0);
@@ -1719,6 +1743,19 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 			mlx4_err(dev, "INIT_HCA command failed, aborting\n");
 			goto err_free_icm;
 		}
+
+		if (dev_cap.flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+			err = mlx4_query_func(dev, &dev_cap);
+			if (err < 0) {
+				mlx4_err(dev, "QUERY_FUNC command failed, aborting.\n");
+				goto err_stop_fw;
+			} else if (err & MLX4_QUERY_FUNC_NUM_SYS_EQS) {
+				dev->caps.num_eqs = dev_cap.max_eqs;
+				dev->caps.reserved_eqs = dev_cap.reserved_eqs;
+				dev->caps.reserved_uars = dev_cap.reserved_uars;
+			}
+		}
+
 		/*
 		 * If TS is supported by FW
 		 * read HCA frequency by QUERY_HCA command
@@ -2085,12 +2122,11 @@ static void mlx4_enable_msi_x(struct mlx4_dev *dev)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct msix_entry *entries;
-	int nreq = min_t(int, dev->caps.num_ports *
-			 min_t(int, num_online_cpus() + 1,
-			       MAX_MSIX_P_PORT) + MSIX_LEGACY_SZ, MAX_MSIX);
 	int i;
 
 	if (msi_x) {
+		int nreq = dev->caps.num_ports * num_online_cpus() + MSIX_LEGACY_SZ;
+
 		nreq = min_t(int, dev->caps.num_eqs - dev->caps.reserved_eqs,
 			     nreq);
 
@@ -2345,6 +2381,7 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 	int err;
 	int port;
 	int i;
+	struct mlx4_dev_cap *dev_cap = NULL;
 	int existing_vfs = 0;
 
 	dev = &priv->dev;
@@ -2381,15 +2418,6 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 			}
 		}
 
-		if (total_vfs) {
-			existing_vfs = pci_num_vf(pdev);
-			dev->flags = MLX4_FLAG_MASTER;
-			dev->flags = mlx4_enable_sriov(dev, pdev, total_vfs,
-						       existing_vfs);
-			if (!SRIOV_VALID_STATE(dev->flags))
-				goto err_sriov;
-		}
-
 		atomic_set(&priv->opreq_count, 0);
 		INIT_WORK(&priv->opreq_task, mlx4_opreq_action);
 
@@ -2403,6 +2431,12 @@ static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 			mlx4_err(dev, "Failed to reset HCA, aborting\n");
 			goto err_sriov;
 		}
+
+		if (total_vfs) {
+			existing_vfs = pci_num_vf(pdev);
+			dev->flags = MLX4_FLAG_MASTER;
+			dev->num_vfs = total_vfs;
+		}
 	}
 
 slave_start:
@@ -2416,9 +2450,10 @@ slave_start:
 	 * before posting commands. Also, init num_slaves before calling
 	 * mlx4_init_hca */
 	if (mlx4_is_mfunc(dev)) {
-		if (mlx4_is_master(dev))
+		if (mlx4_is_master(dev)) {
 			dev->num_slaves = MLX4_MAX_NUM_SLAVES;
-		else {
+
+		} else {
 			dev->num_slaves = 0;
 			err = mlx4_multi_func_init(dev);
 			if (err) {
@@ -2434,6 +2469,52 @@ slave_start:
 		goto err_mfunc;
 	}
 
+	if (mlx4_is_master(dev)) {
+		if (!dev_cap) {
+			dev_cap = kzalloc(sizeof(*dev_cap), GFP_KERNEL);
+
+			if (!dev_cap) {
+				err = -ENOMEM;
+				goto err_fw;
+			}
+
+			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
+			if (err) {
+				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+				goto err_fw;
+			}
+
+			if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
+				u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
+								  existing_vfs);
+
+				mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_ALL);
+				dev->flags = dev_flags;
+				if (!SRIOV_VALID_STATE(dev->flags)) {
+					mlx4_err(dev, "Invalid SRIOV state\n");
+					goto err_sriov;
+				}
+				err = mlx4_reset(dev);
+				if (err) {
+					mlx4_err(dev, "Failed to reset HCA, aborting.\n");
+					goto err_sriov;
+				}
+				goto slave_start;
+			}
+		} else {
+			/* Legacy mode FW requires SRIOV to be enabled before
+			 * doing QUERY_DEV_CAP, since max_eq's value is different if
+			 * SRIOV is enabled.
+			 */
+			memset(dev_cap, 0, sizeof(*dev_cap));
+			err = mlx4_QUERY_DEV_CAP(dev, dev_cap);
+			if (err) {
+				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
+				goto err_fw;
+			}
+		}
+	}
+
 	err = mlx4_init_hca(dev);
 	if (err) {
 		if (err == -EACCES) {
@@ -2457,6 +2538,30 @@ slave_start:
 			goto err_fw;
 	}
 
+	if (mlx4_is_master(dev) && (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
+		u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs, existing_vfs);
+
+		if ((dev->flags ^ dev_flags) & (MLX4_FLAG_MASTER | MLX4_FLAG_SLAVE)) {
+			mlx4_cmd_cleanup(dev, MLX4_CMD_CLEANUP_VHCR);
+			dev->flags = dev_flags;
+			err = mlx4_cmd_init(dev);
+			if (err) {
+				/* Only VHCR is cleaned up, so could still
+				 * send FW commands
+				 */
+				mlx4_err(dev, "Failed to init VHCR command interface, aborting\n");
+				goto err_close;
+			}
+		} else {
+			dev->flags = dev_flags;
+		}
+
+		if (!SRIOV_VALID_STATE(dev->flags)) {
+			mlx4_err(dev, "Invalid SRIOV state\n");
+			goto err_close;
+		}
+	}
+
 	/* check if the device is functioning at its maximum possible speed.
 	 * No return code for this call, just warn the user in case of PCI
 	 * express device capabilities are under-satisfied by the bus.
@@ -2631,6 +2736,7 @@ err_sriov:
 	if (!mlx4_is_slave(dev))
 		mlx4_free_ownership(dev);
 
+	kfree(dev_cap);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/profile.c b/drivers/net/ethernet/mellanox/mlx4/profile.c
index 14089d9e1667..2bf437aafc53 100644
--- a/drivers/net/ethernet/mellanox/mlx4/profile.c
+++ b/drivers/net/ethernet/mellanox/mlx4/profile.c
@@ -126,8 +126,7 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 	profile[MLX4_RES_AUXC].num    = request->num_qp;
 	profile[MLX4_RES_SRQ].num     = request->num_srq;
 	profile[MLX4_RES_CQ].num      = request->num_cq;
-	profile[MLX4_RES_EQ].num      = mlx4_is_mfunc(dev) ?
-					dev->phys_caps.num_phys_eqs :
+	profile[MLX4_RES_EQ].num = mlx4_is_mfunc(dev) ? dev->phys_caps.num_phys_eqs :
 					min_t(unsigned, dev_cap->max_eqs, MAX_MSIX);
 	profile[MLX4_RES_DMPT].num    = request->num_mpt;
 	profile[MLX4_RES_CMPT].num    = MLX4_NUM_CMPTS;
@@ -216,10 +215,18 @@ u64 mlx4_make_profile(struct mlx4_dev *dev,
 			init_hca->log_num_cqs = profile[i].log_num;
 			break;
 		case MLX4_RES_EQ:
-			dev->caps.num_eqs     = roundup_pow_of_two(min_t(unsigned, dev_cap->max_eqs,
-									 MAX_MSIX));
-			init_hca->eqc_base    = profile[i].start;
-			init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
+			if (dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS) {
+				init_hca->log_num_eqs = 0x1f;
+				init_hca->eqc_base    = profile[i].start;
+				init_hca->num_sys_eqs = dev_cap->num_sys_eqs;
+			} else {
+				dev->caps.num_eqs     = roundup_pow_of_two(
+								min_t(unsigned,
+								      dev_cap->max_eqs,
+								      MAX_MSIX));
+				init_hca->eqc_base    = profile[i].start;
+				init_hca->log_num_eqs = ilog2(dev->caps.num_eqs);
+			}
 			break;
 		case MLX4_RES_DMPT:
 			dev->caps.num_mpts	= profile[i].num;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3d9bff00f24a..1c560eb870ad 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -189,7 +189,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_EQE_STRIDE		= 1LL <<  13,
 	MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL        = 1LL <<  14,
 	MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP	= 1LL <<  15,
-	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16
+	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16,
+	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17
 };
 
 enum {
@@ -443,6 +444,7 @@ struct mlx4_caps {
 	int			num_cqs;
 	int			max_cqes;
 	int			reserved_cqs;
+	int			num_sys_eqs;
 	int			num_eqs;
 	int			reserved_eqs;
 	int			num_comp_vectors;
-- 
cgit v1.2.3


From de966c5928026b100a989c8cef761d306310a184 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 13 Nov 2014 14:45:33 +0200
Subject: net/mlx4_core: Support more than 64 VFs

We now allow up to 126 VFs. Note though that certain firmware
versions only allow up to 80 VFs. Moreover, old HCAs only support 64 VFs.
In these cases, we limit the maximum number of VFs to 64.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/fw.c   |  5 ++++-
 drivers/net/ethernet/mellanox/mlx4/main.c | 24 ++++++++++++++++++++++++
 include/linux/mlx4/device.h               |  5 +++--
 3 files changed, 31 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index d2f594fadfbf..4251f81a0275 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -143,7 +143,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[14] = "Ethernet protocol control support",
 		[15] = "Ethernet Backplane autoneg support",
 		[16] = "CONFIG DEV support",
-		[17] = "Asymmetric EQs support"
+		[17] = "Asymmetric EQs support",
+		[18] = "More than 80 VFs support"
 	};
 	int i;
 
@@ -860,6 +861,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_VLAN_CONTROL;
 	if (field32 & (1 << 20))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_FSM;
+	if (field32 & (1 << 21))
+		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_80_VFS;
 
 	if (dev->flags & MLX4_FLAG_OLD_PORT_CMDS) {
 		for (i = 1; i <= dev_cap->num_ports; ++i) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index ebb279060a25..3044f9e623cb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -2373,6 +2373,24 @@ disable_sriov:
 	return dev_flags & ~MLX4_FLAG_MASTER;
 }
 
+enum {
+	MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64 = -1,
+};
+
+static int mlx4_check_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap,
+			      int *nvfs)
+{
+	int requested_vfs = nvfs[0] + nvfs[1] + nvfs[2];
+	/* Checking for 64 VFs as a limitation of CX2 */
+	if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_80_VFS) &&
+	    requested_vfs >= 64) {
+		mlx4_err(dev, "Requested %d VFs, but FW does not support more than 64\n",
+			 requested_vfs);
+		return MLX4_DEV_CAP_CHECK_NUM_VFS_ABOVE_64;
+	}
+	return 0;
+}
+
 static int mlx4_load_one(struct pci_dev *pdev, int pci_dev_data,
 			 int total_vfs, int *nvfs, struct mlx4_priv *priv)
 {
@@ -2484,6 +2502,9 @@ slave_start:
 				goto err_fw;
 			}
 
+			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
+				goto err_fw;
+
 			if (!(dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_SYS_EQS)) {
 				u64 dev_flags = mlx4_enable_sriov(dev, pdev, total_vfs,
 								  existing_vfs);
@@ -2512,6 +2533,9 @@ slave_start:
 				mlx4_err(dev, "QUERY_DEV_CAP command failed, aborting.\n");
 				goto err_fw;
 			}
+
+			if (mlx4_check_dev_cap(dev, dev_cap, nvfs))
+				goto err_fw;
 		}
 	}
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 1c560eb870ad..cf09e65c2901 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -95,7 +95,7 @@ enum {
 
 enum {
 	MLX4_MAX_NUM_PF		= 16,
-	MLX4_MAX_NUM_VF		= 64,
+	MLX4_MAX_NUM_VF		= 126,
 	MLX4_MAX_NUM_VF_P_PORT  = 64,
 	MLX4_MFUNC_MAX		= 80,
 	MLX4_MAX_EQ_NUM		= 1024,
@@ -190,7 +190,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_ETH_PROT_CTRL        = 1LL <<  14,
 	MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP	= 1LL <<  15,
 	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16,
-	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17
+	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17,
+	MLX4_DEV_CAP_FLAG2_80_VFS		= 1LL <<  18
 };
 
 enum {
-- 
cgit v1.2.3


From 6eba82248ef47fd478f940a418429e3ec95cb3db Mon Sep 17 00:00:00 2001
From: Thomas Graf <tgraf@suug.ch>
Date: Thu, 13 Nov 2014 13:45:46 +0100
Subject: rhashtable: Drop gfp_flags arg in insert/remove functions

Reallocation is only required for shrinking and expanding and both rely
on a mutex for synchronization and callers of rhashtable_init() are in
non atomic context. Therefore, no reason to continue passing allocation
hints through the API.

Instead, use GFP_KERNEL and add __GFP_NOWARN | __GFP_NORETRY to allow
for silent fall back to vzalloc() without the OOM killer jumping in as
pointed out by Eric Dumazet and Eric W. Biederman.

Signed-off-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rhashtable.h | 10 +++++-----
 lib/rhashtable.c           | 41 +++++++++++++++++------------------------
 net/netfilter/nft_hash.c   |  4 ++--
 net/netlink/af_netlink.c   |  4 ++--
 4 files changed, 26 insertions(+), 33 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h
index 473e26bdb91d..b93fd89b2e5e 100644
--- a/include/linux/rhashtable.h
+++ b/include/linux/rhashtable.h
@@ -99,16 +99,16 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params);
 u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len);
 u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr);
 
-void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node, gfp_t);
-bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node, gfp_t);
+void rhashtable_insert(struct rhashtable *ht, struct rhash_head *node);
+bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *node);
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head __rcu **pprev, gfp_t flags);
+			     struct rhash_head __rcu **pprev);
 
 bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size);
 bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size);
 
-int rhashtable_expand(struct rhashtable *ht, gfp_t flags);
-int rhashtable_shrink(struct rhashtable *ht, gfp_t flags);
+int rhashtable_expand(struct rhashtable *ht);
+int rhashtable_shrink(struct rhashtable *ht);
 
 void *rhashtable_lookup(const struct rhashtable *ht, const void *key);
 void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash,
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 4b4b53bfa08b..25e4c213b08a 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -107,13 +107,13 @@ static u32 head_hashfn(const struct rhashtable *ht,
 	return obj_hashfn(ht, rht_obj(ht, he), hsize);
 }
 
-static struct bucket_table *bucket_table_alloc(size_t nbuckets, gfp_t flags)
+static struct bucket_table *bucket_table_alloc(size_t nbuckets)
 {
 	struct bucket_table *tbl;
 	size_t size;
 
 	size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]);
-	tbl = kzalloc(size, flags);
+	tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN);
 	if (tbl == NULL)
 		tbl = vzalloc(size);
 
@@ -200,7 +200,6 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
 /**
  * rhashtable_expand - Expand hash table while allowing concurrent lookups
  * @ht:		the hash table to expand
- * @flags:	allocation flags
  *
  * A secondary bucket array is allocated and the hash entries are migrated
  * while keeping them on both lists until the end of the RCU grace period.
@@ -211,7 +210,7 @@ static void hashtable_chain_unzip(const struct rhashtable *ht,
  * The caller must ensure that no concurrent table mutations take place.
  * It is however valid to have concurrent lookups if they are RCU protected.
  */
-int rhashtable_expand(struct rhashtable *ht, gfp_t flags)
+int rhashtable_expand(struct rhashtable *ht)
 {
 	struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht);
 	struct rhash_head *he;
@@ -223,7 +222,7 @@ int rhashtable_expand(struct rhashtable *ht, gfp_t flags)
 	if (ht->p.max_shift && ht->shift >= ht->p.max_shift)
 		return 0;
 
-	new_tbl = bucket_table_alloc(old_tbl->size * 2, flags);
+	new_tbl = bucket_table_alloc(old_tbl->size * 2);
 	if (new_tbl == NULL)
 		return -ENOMEM;
 
@@ -281,7 +280,6 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
 /**
  * rhashtable_shrink - Shrink hash table while allowing concurrent lookups
  * @ht:		the hash table to shrink
- * @flags:	allocation flags
  *
  * This function may only be called in a context where it is safe to call
  * synchronize_rcu(), e.g. not within a rcu_read_lock() section.
@@ -289,7 +287,7 @@ EXPORT_SYMBOL_GPL(rhashtable_expand);
  * The caller must ensure that no concurrent table mutations take place.
  * It is however valid to have concurrent lookups if they are RCU protected.
  */
-int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
+int rhashtable_shrink(struct rhashtable *ht)
 {
 	struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht);
 	struct rhash_head __rcu **pprev;
@@ -300,7 +298,7 @@ int rhashtable_shrink(struct rhashtable *ht, gfp_t flags)
 	if (ht->shift <= ht->p.min_shift)
 		return 0;
 
-	ntbl = bucket_table_alloc(tbl->size / 2, flags);
+	ntbl = bucket_table_alloc(tbl->size / 2);
 	if (ntbl == NULL)
 		return -ENOMEM;
 
@@ -341,7 +339,6 @@ EXPORT_SYMBOL_GPL(rhashtable_shrink);
  * rhashtable_insert - insert object into hash hash table
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
- * @flags:	allocation flags (table expansion)
  *
  * Will automatically grow the table via rhashtable_expand() if the the
  * grow_decision function specified at rhashtable_init() returns true.
@@ -349,8 +346,7 @@ EXPORT_SYMBOL_GPL(rhashtable_shrink);
  * The caller must ensure that no concurrent table mutations occur. It is
  * however valid to have concurrent lookups if they are RCU protected.
  */
-void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
-		       gfp_t flags)
+void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj)
 {
 	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
 	u32 hash;
@@ -363,7 +359,7 @@ void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj,
 	ht->nelems++;
 
 	if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size))
-		rhashtable_expand(ht, flags);
+		rhashtable_expand(ht);
 }
 EXPORT_SYMBOL_GPL(rhashtable_insert);
 
@@ -372,14 +368,13 @@ EXPORT_SYMBOL_GPL(rhashtable_insert);
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
  * @pprev:	pointer to previous element
- * @flags:	allocation flags (table expansion)
  *
  * Identical to rhashtable_remove() but caller is alreayd aware of the element
  * in front of the element to be deleted. This is in particular useful for
  * deletion when combined with walking or lookup.
  */
 void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
-			     struct rhash_head __rcu **pprev, gfp_t flags)
+			     struct rhash_head __rcu **pprev)
 {
 	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
 
@@ -390,7 +385,7 @@ void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj,
 
 	if (ht->p.shrink_decision &&
 	    ht->p.shrink_decision(ht, tbl->size))
-		rhashtable_shrink(ht, flags);
+		rhashtable_shrink(ht);
 }
 EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
 
@@ -398,7 +393,6 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
  * rhashtable_remove - remove object from hash table
  * @ht:		hash table
  * @obj:	pointer to hash head inside object
- * @flags:	allocation flags (table expansion)
  *
  * Since the hash chain is single linked, the removal operation needs to
  * walk the bucket chain upon removal. The removal operation is thus
@@ -410,8 +404,7 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev);
  * The caller must ensure that no concurrent table mutations occur. It is
  * however valid to have concurrent lookups if they are RCU protected.
  */
-bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj,
-		       gfp_t flags)
+bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj)
 {
 	struct bucket_table *tbl = rht_dereference(ht->tbl, ht);
 	struct rhash_head __rcu **pprev;
@@ -429,7 +422,7 @@ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj,
 			continue;
 		}
 
-		rhashtable_remove_pprev(ht, he, pprev, flags);
+		rhashtable_remove_pprev(ht, he, pprev);
 		return true;
 	}
 
@@ -576,7 +569,7 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params)
 	if (params->nelem_hint)
 		size = rounded_hashtable_size(params);
 
-	tbl = bucket_table_alloc(size, GFP_KERNEL);
+	tbl = bucket_table_alloc(size);
 	if (tbl == NULL)
 		return -ENOMEM;
 
@@ -713,7 +706,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 		obj->ptr = TEST_PTR;
 		obj->value = i * 2;
 
-		rhashtable_insert(ht, &obj->node, GFP_KERNEL);
+		rhashtable_insert(ht, &obj->node);
 	}
 
 	rcu_read_lock();
@@ -724,7 +717,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 
 	for (i = 0; i < TEST_NEXPANDS; i++) {
 		pr_info("  Table expansion iteration %u...\n", i);
-		rhashtable_expand(ht, GFP_KERNEL);
+		rhashtable_expand(ht);
 
 		rcu_read_lock();
 		pr_info("  Verifying lookups...\n");
@@ -734,7 +727,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 
 	for (i = 0; i < TEST_NEXPANDS; i++) {
 		pr_info("  Table shrinkage iteration %u...\n", i);
-		rhashtable_shrink(ht, GFP_KERNEL);
+		rhashtable_shrink(ht);
 
 		rcu_read_lock();
 		pr_info("  Verifying lookups...\n");
@@ -749,7 +742,7 @@ static int __init test_rhashtable(struct rhashtable *ht)
 		obj = rhashtable_lookup(ht, &key);
 		BUG_ON(!obj);
 
-		rhashtable_remove(ht, &obj->node, GFP_KERNEL);
+		rhashtable_remove(ht, &obj->node);
 		kfree(obj);
 	}
 
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index 3f75aaaf9d06..1e316ce4cb5d 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -65,7 +65,7 @@ static int nft_hash_insert(const struct nft_set *set,
 	if (set->flags & NFT_SET_MAP)
 		nft_data_copy(he->data, &elem->data);
 
-	rhashtable_insert(priv, &he->node, GFP_KERNEL);
+	rhashtable_insert(priv, &he->node);
 
 	return 0;
 }
@@ -88,7 +88,7 @@ static void nft_hash_remove(const struct nft_set *set,
 	pprev = elem->cookie;
 	he = rht_dereference((*pprev), priv);
 
-	rhashtable_remove_pprev(priv, he, pprev, GFP_KERNEL);
+	rhashtable_remove_pprev(priv, he, pprev);
 
 	synchronize_rcu();
 	kfree(he);
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 9e0628cfdf67..a491c1a4861f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1092,7 +1092,7 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 portid)
 
 	nlk_sk(sk)->portid = portid;
 	sock_hold(sk);
-	rhashtable_insert(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL);
+	rhashtable_insert(&table->hash, &nlk_sk(sk)->node);
 	err = 0;
 err:
 	mutex_unlock(&nl_sk_hash_lock);
@@ -1105,7 +1105,7 @@ static void netlink_remove(struct sock *sk)
 
 	mutex_lock(&nl_sk_hash_lock);
 	table = &nl_table[sk->sk_protocol];
-	if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node, GFP_KERNEL)) {
+	if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
 		WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
 		__sock_put(sk);
 	}
-- 
cgit v1.2.3


From ad53f92eb416d81e469fa8ea57153e59455e7175 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Thu, 13 Nov 2014 15:19:11 -0800
Subject: mm/page_alloc: fix incorrect isolation behavior by rechecking
 migratetype

Before describing bugs itself, I first explain definition of freepage.

 1. pages on buddy list are counted as freepage.
 2. pages on isolate migratetype buddy list are *not* counted as freepage.
 3. pages on cma buddy list are counted as CMA freepage, too.

Now, I describe problems and related patch.

Patch 1: There is race conditions on getting pageblock migratetype that
it results in misplacement of freepages on buddy list, incorrect
freepage count and un-availability of freepage.

Patch 2: Freepages on pcp list could have stale cached information to
determine migratetype of buddy list to go.  This causes misplacement of
freepages on buddy list and incorrect freepage count.

Patch 4: Merging between freepages on different migratetype of
pageblocks will cause freepages accouting problem.  This patch fixes it.

Without patchset [3], above problem doesn't happens on my CMA allocation
test, because CMA reserved pages aren't used at all.  So there is no
chance for above race.

With patchset [3], I did simple CMA allocation test and get below
result:

 - Virtual machine, 4 cpus, 1024 MB memory, 256 MB CMA reservation
 - run kernel build (make -j16) on background
 - 30 times CMA allocation(8MB * 30 = 240MB) attempts in 5 sec interval
 - Result: more than 5000 freepage count are missed

With patchset [3] and this patchset, I found that no freepage count are
missed so that I conclude that problems are solved.

On my simple memory offlining test, these problems also occur on that
environment, too.

This patch (of 4):

There are two paths to reach core free function of buddy allocator,
__free_one_page(), one is free_one_page()->__free_one_page() and the
other is free_hot_cold_page()->free_pcppages_bulk()->__free_one_page().
Each paths has race condition causing serious problems.  At first, this
patch is focused on first type of freepath.  And then, following patch
will solve the problem in second type of freepath.

In the first type of freepath, we got migratetype of freeing page
without holding the zone lock, so it could be racy.  There are two cases
of this race.

 1. pages are added to isolate buddy list after restoring orignal
    migratetype

    CPU1                                   CPU2

    get migratetype => return MIGRATE_ISOLATE
    call free_one_page() with MIGRATE_ISOLATE

                                grab the zone lock
                                unisolate pageblock
                                release the zone lock

    grab the zone lock
    call __free_one_page() with MIGRATE_ISOLATE
    freepage go into isolate buddy list,
    although pageblock is already unisolated

This may cause two problems.  One is that we can't use this page anymore
until next isolation attempt of this pageblock, because freepage is on
isolate buddy list.  The other is that freepage accouting could be wrong
due to merging between different buddy list.  Freepages on isolate buddy
list aren't counted as freepage, but ones on normal buddy list are
counted as freepage.  If merge happens, buddy freepage on normal buddy
list is inevitably moved to isolate buddy list without any consideration
of freepage accouting so it could be incorrect.

 2. pages are added to normal buddy list while pageblock is isolated.
    It is similar with above case.

This also may cause two problems.  One is that we can't keep these
freepages from being allocated.  Although this pageblock is isolated,
freepage would be added to normal buddy list so that it could be
allocated without any restriction.  And the other problem is same as
case 1, that it, incorrect freepage accouting.

This race condition would be prevented by checking migratetype again
with holding the zone lock.  Because it is somewhat heavy operation and
it isn't needed in common case, we want to avoid rechecking as much as
possible.  So this patch introduce new variable, nr_isolate_pageblock in
struct zone to check if there is isolated pageblock.  With this, we can
avoid to re-check migratetype in common case and do it only if there is
isolated pageblock or migratetype is MIGRATE_ISOLATE.  This solve above
mentioned problems.

Changes from v3:
Add one more check in free_one_page() that checks whether migratetype is
MIGRATE_ISOLATE or not. Without this, abovementioned case 1 could happens.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Minchan Kim <minchan@kernel.org>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Acked-by: Vlastimil Babka <vbabka@suse.cz>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Tang Chen <tangchen@cn.fujitsu.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Cc: Wen Congyang <wency@cn.fujitsu.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Laura Abbott <lauraa@codeaurora.org>
Cc: Heesub Shin <heesub.shin@samsung.com>
Cc: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Cc: Ritesh Harjani <ritesh.list@gmail.com>
Cc: Gioh Kim <gioh.kim@lge.com>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h         |  9 +++++++++
 include/linux/page-isolation.h |  8 ++++++++
 mm/page_alloc.c                | 11 +++++++++--
 mm/page_isolation.c            |  2 ++
 4 files changed, 28 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 48bf12ef6620..ffe66e381c04 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -431,6 +431,15 @@ struct zone {
 	 */
 	int			nr_migrate_reserve_block;
 
+#ifdef CONFIG_MEMORY_ISOLATION
+	/*
+	 * Number of isolated pageblock. It is used to solve incorrect
+	 * freepage counting problem due to racy retrieving migratetype
+	 * of pageblock. Protected by zone->lock.
+	 */
+	unsigned long		nr_isolate_pageblock;
+#endif
+
 #ifdef CONFIG_MEMORY_HOTPLUG
 	/* see spanned/present_pages for more description */
 	seqlock_t		span_seqlock;
diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h
index 3fff8e774067..2dc1e1697b45 100644
--- a/include/linux/page-isolation.h
+++ b/include/linux/page-isolation.h
@@ -2,6 +2,10 @@
 #define __LINUX_PAGEISOLATION_H
 
 #ifdef CONFIG_MEMORY_ISOLATION
+static inline bool has_isolate_pageblock(struct zone *zone)
+{
+	return zone->nr_isolate_pageblock;
+}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return get_pageblock_migratetype(page) == MIGRATE_ISOLATE;
@@ -11,6 +15,10 @@ static inline bool is_migrate_isolate(int migratetype)
 	return migratetype == MIGRATE_ISOLATE;
 }
 #else
+static inline bool has_isolate_pageblock(struct zone *zone)
+{
+	return false;
+}
 static inline bool is_migrate_isolate_page(struct page *page)
 {
 	return false;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 9cd36b822444..df1da25b309b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -739,9 +739,16 @@ static void free_one_page(struct zone *zone,
 	if (nr_scanned)
 		__mod_zone_page_state(zone, NR_PAGES_SCANNED, -nr_scanned);
 
+	if (unlikely(has_isolate_pageblock(zone) ||
+		is_migrate_isolate(migratetype))) {
+		migratetype = get_pfnblock_migratetype(page, pfn);
+		if (is_migrate_isolate(migratetype))
+			goto skip_counting;
+	}
+	__mod_zone_freepage_state(zone, 1 << order, migratetype);
+
+skip_counting:
 	__free_one_page(page, pfn, zone, order, migratetype);
-	if (unlikely(!is_migrate_isolate(migratetype)))
-		__mod_zone_freepage_state(zone, 1 << order, migratetype);
 	spin_unlock(&zone->lock);
 }
 
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index d1473b2e9481..1fa4a4d72ecc 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -60,6 +60,7 @@ out:
 		int migratetype = get_pageblock_migratetype(page);
 
 		set_pageblock_migratetype(page, MIGRATE_ISOLATE);
+		zone->nr_isolate_pageblock++;
 		nr_pages = move_freepages_block(zone, page, MIGRATE_ISOLATE);
 
 		__mod_zone_freepage_state(zone, -nr_pages, migratetype);
@@ -83,6 +84,7 @@ void unset_migratetype_isolate(struct page *page, unsigned migratetype)
 	nr_pages = move_freepages_block(zone, page, migratetype);
 	__mod_zone_freepage_state(zone, nr_pages, migratetype);
 	set_pageblock_migratetype(page, migratetype);
+	zone->nr_isolate_pageblock--;
 out:
 	spin_unlock_irqrestore(&zone->lock, flags);
 }
-- 
cgit v1.2.3


From f784a3f19613901ca4539a5b0eed3bdc700e6ee7 Mon Sep 17 00:00:00 2001
From: Tang Chen <tangchen@cn.fujitsu.com>
Date: Thu, 13 Nov 2014 15:19:39 -0800
Subject: mem-hotplug: reset node managed pages when hot-adding a new pgdat

In free_area_init_core(), zone->managed_pages is set to an approximate
value for lowmem, and will be adjusted when the bootmem allocator frees
pages into the buddy system.

But free_area_init_core() is also called by hotadd_new_pgdat() when
hot-adding memory.  As a result, zone->managed_pages of the newly added
node's pgdat is set to an approximate value in the very beginning.

Even if the memory on that node has node been onlined,
/sys/device/system/node/nodeXXX/meminfo has wrong value:

  hot-add node2 (memory not onlined)
  cat /sys/device/system/node/node2/meminfo
  Node 2 MemTotal:       33554432 kB
  Node 2 MemFree:               0 kB
  Node 2 MemUsed:        33554432 kB
  Node 2 Active:                0 kB

This patch fixes this problem by reset node managed pages to 0 after
hot-adding a new node.

1. Move reset_managed_pages_done from reset_node_managed_pages() to
   reset_all_zones_managed_pages()
2. Make reset_node_managed_pages() non-static
3. Call reset_node_managed_pages() in hotadd_new_pgdat() after pgdat
   is initialized

Signed-off-by: Tang Chen <tangchen@cn.fujitsu.com>
Signed-off-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: <stable@vger.kernel.org>	[3.16+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bootmem.h | 1 +
 mm/bootmem.c            | 9 +++++----
 mm/memory_hotplug.c     | 9 +++++++++
 mm/nobootmem.c          | 8 +++++---
 4 files changed, 20 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h
index 4e2bd4c95b66..0995c2de8162 100644
--- a/include/linux/bootmem.h
+++ b/include/linux/bootmem.h
@@ -46,6 +46,7 @@ extern unsigned long init_bootmem_node(pg_data_t *pgdat,
 extern unsigned long init_bootmem(unsigned long addr, unsigned long memend);
 
 extern unsigned long free_all_bootmem(void);
+extern void reset_node_managed_pages(pg_data_t *pgdat);
 extern void reset_all_zones_managed_pages(void);
 
 extern void free_bootmem_node(pg_data_t *pgdat,
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 8a000cebb0d7..477be696511d 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -243,13 +243,10 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata)
 
 static int reset_managed_pages_done __initdata;
 
-static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
+void reset_node_managed_pages(pg_data_t *pgdat)
 {
 	struct zone *z;
 
-	if (reset_managed_pages_done)
-		return;
-
 	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
 		z->managed_pages = 0;
 }
@@ -258,8 +255,12 @@ void __init reset_all_zones_managed_pages(void)
 {
 	struct pglist_data *pgdat;
 
+	if (reset_managed_pages_done)
+		return;
+
 	for_each_online_pgdat(pgdat)
 		reset_node_managed_pages(pgdat);
+
 	reset_managed_pages_done = 1;
 }
 
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 252e1dbbed86..c5f7fe199a60 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -31,6 +31,7 @@
 #include <linux/stop_machine.h>
 #include <linux/hugetlb.h>
 #include <linux/memblock.h>
+#include <linux/bootmem.h>
 
 #include <asm/tlbflush.h>
 
@@ -1096,6 +1097,14 @@ static pg_data_t __ref *hotadd_new_pgdat(int nid, u64 start)
 	build_all_zonelists(pgdat, NULL);
 	mutex_unlock(&zonelists_mutex);
 
+	/*
+	 * zone->managed_pages is set to an approximate value in
+	 * free_area_init_core(), which will cause
+	 * /sys/device/system/node/nodeX/meminfo has wrong data.
+	 * So reset it to 0 before any memory is onlined.
+	 */
+	reset_node_managed_pages(pgdat);
+
 	return pgdat;
 }
 
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 7c7ab32ee503..90b50468333e 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -145,12 +145,10 @@ static unsigned long __init free_low_memory_core_early(void)
 
 static int reset_managed_pages_done __initdata;
 
-static inline void __init reset_node_managed_pages(pg_data_t *pgdat)
+void reset_node_managed_pages(pg_data_t *pgdat)
 {
 	struct zone *z;
 
-	if (reset_managed_pages_done)
-		return;
 	for (z = pgdat->node_zones; z < pgdat->node_zones + MAX_NR_ZONES; z++)
 		z->managed_pages = 0;
 }
@@ -159,8 +157,12 @@ void __init reset_all_zones_managed_pages(void)
 {
 	struct pglist_data *pgdat;
 
+	if (reset_managed_pages_done)
+		return;
+
 	for_each_online_pgdat(pgdat)
 		reset_node_managed_pages(pgdat);
+
 	reset_managed_pages_done = 1;
 }
 
-- 
cgit v1.2.3


From f30c59e921f12b209852e1ddc197dc6a8fb0142b Mon Sep 17 00:00:00 2001
From: "Aneesh Kumar K.V" <aneesh.kumar@linux.vnet.ibm.com>
Date: Wed, 5 Nov 2014 21:57:40 +0530
Subject: mm: Update generic gup implementation to handle hugepage directory

Update generic gup implementation with powerpc specific details.
On powerpc at pmd level we can have hugepte, normal pmd pointer
or a pointer to the hugepage directory.

Tested-by: Steve Capper <steve.capper@linaro.org>
Acked-by: Steve Capper <steve.capper@linaro.org>
Signed-off-by: Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com>
Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
---
 arch/powerpc/include/asm/page.h |  1 +
 include/linux/hugetlb.h         | 46 +++++++++++++++++++++++
 mm/gup.c                        | 81 +++++++++++++++++++++++++++++++++++++----
 3 files changed, 120 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/include/asm/page.h b/arch/powerpc/include/asm/page.h
index 26fe1ae15212..f973fce73a43 100644
--- a/arch/powerpc/include/asm/page.h
+++ b/arch/powerpc/include/asm/page.h
@@ -380,6 +380,7 @@ static inline int hugepd_ok(hugepd_t hpd)
 #endif
 
 #define is_hugepd(pdep)               (hugepd_ok(*((hugepd_t *)(pdep))))
+#define pgd_huge pgd_huge
 int pgd_huge(pgd_t pgd);
 #else /* CONFIG_HUGETLB_PAGE */
 #define is_hugepd(pdep)			0
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6e6d338641fe..e6b62f30ab21 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -175,6 +175,52 @@ static inline void __unmap_hugepage_range(struct mmu_gather *tlb,
 }
 
 #endif /* !CONFIG_HUGETLB_PAGE */
+/*
+ * hugepages at page global directory. If arch support
+ * hugepages at pgd level, they need to define this.
+ */
+#ifndef pgd_huge
+#define pgd_huge(x)	0
+#endif
+
+#ifndef pgd_write
+static inline int pgd_write(pgd_t pgd)
+{
+	BUG();
+	return 0;
+}
+#endif
+
+#ifndef pud_write
+static inline int pud_write(pud_t pud)
+{
+	BUG();
+	return 0;
+}
+#endif
+
+#ifndef is_hugepd
+/*
+ * Some architectures requires a hugepage directory format that is
+ * required to support multiple hugepage sizes. For example
+ * a4fe3ce76 "powerpc/mm: Allow more flexible layouts for hugepage pagetables"
+ * introduced the same on powerpc. This allows for a more flexible hugepage
+ * pagetable layout.
+ */
+typedef struct { unsigned long pd; } hugepd_t;
+#define is_hugepd(hugepd) (0)
+#define __hugepd(x) ((hugepd_t) { (x) })
+static inline int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+			      unsigned pdshift, unsigned long end,
+			      int write, struct page **pages, int *nr)
+{
+	return 0;
+}
+#else
+extern int gup_huge_pd(hugepd_t hugepd, unsigned long addr,
+		       unsigned pdshift, unsigned long end,
+		       int write, struct page **pages, int *nr);
+#endif
 
 #define HUGETLB_ANON_FILE "anon_hugepage"
 
diff --git a/mm/gup.c b/mm/gup.c
index cd62c8c90d4a..0ca1df9075ab 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -3,7 +3,6 @@
 #include <linux/err.h>
 #include <linux/spinlock.h>
 
-#include <linux/hugetlb.h>
 #include <linux/mm.h>
 #include <linux/pagemap.h>
 #include <linux/rmap.h>
@@ -12,6 +11,7 @@
 
 #include <linux/sched.h>
 #include <linux/rwsem.h>
+#include <linux/hugetlb.h>
 #include <asm/pgtable.h>
 
 #include "internal.h"
@@ -875,6 +875,49 @@ static int gup_huge_pud(pud_t orig, pud_t *pudp, unsigned long addr,
 	return 1;
 }
 
+static int gup_huge_pgd(pgd_t orig, pgd_t *pgdp, unsigned long addr,
+			unsigned long end, int write,
+			struct page **pages, int *nr)
+{
+	int refs;
+	struct page *head, *page, *tail;
+
+	if (write && !pgd_write(orig))
+		return 0;
+
+	refs = 0;
+	head = pgd_page(orig);
+	page = head + ((addr & ~PGDIR_MASK) >> PAGE_SHIFT);
+	tail = page;
+	do {
+		VM_BUG_ON_PAGE(compound_head(page) != head, page);
+		pages[*nr] = page;
+		(*nr)++;
+		page++;
+		refs++;
+	} while (addr += PAGE_SIZE, addr != end);
+
+	if (!page_cache_add_speculative(head, refs)) {
+		*nr -= refs;
+		return 0;
+	}
+
+	if (unlikely(pgd_val(orig) != pgd_val(*pgdp))) {
+		*nr -= refs;
+		while (refs--)
+			put_page(head);
+		return 0;
+	}
+
+	while (refs--) {
+		if (PageTail(tail))
+			get_huge_page_tail(tail);
+		tail++;
+	}
+
+	return 1;
+}
+
 static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 		int write, struct page **pages, int *nr)
 {
@@ -902,6 +945,14 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 				pages, nr))
 				return 0;
 
+		} else if (unlikely(is_hugepd(__hugepd(pmd_val(pmd))))) {
+			/*
+			 * architecture have different format for hugetlbfs
+			 * pmd format and THP pmd format
+			 */
+			if (!gup_huge_pd(__hugepd(pmd_val(pmd)), addr,
+					 PMD_SHIFT, next, write, pages, nr))
+				return 0;
 		} else if (!gup_pte_range(pmd, addr, next, write, pages, nr))
 				return 0;
 	} while (pmdp++, addr = next, addr != end);
@@ -909,22 +960,26 @@ static int gup_pmd_range(pud_t pud, unsigned long addr, unsigned long end,
 	return 1;
 }
 
-static int gup_pud_range(pgd_t *pgdp, unsigned long addr, unsigned long end,
-		int write, struct page **pages, int *nr)
+static int gup_pud_range(pgd_t pgd, unsigned long addr, unsigned long end,
+			 int write, struct page **pages, int *nr)
 {
 	unsigned long next;
 	pud_t *pudp;
 
-	pudp = pud_offset(pgdp, addr);
+	pudp = pud_offset(&pgd, addr);
 	do {
 		pud_t pud = ACCESS_ONCE(*pudp);
 
 		next = pud_addr_end(addr, end);
 		if (pud_none(pud))
 			return 0;
-		if (pud_huge(pud)) {
+		if (unlikely(pud_huge(pud))) {
 			if (!gup_huge_pud(pud, pudp, addr, next, write,
-					pages, nr))
+					  pages, nr))
+				return 0;
+		} else if (unlikely(is_hugepd(__hugepd(pud_val(pud))))) {
+			if (!gup_huge_pd(__hugepd(pud_val(pud)), addr,
+					 PUD_SHIFT, next, write, pages, nr))
 				return 0;
 		} else if (!gup_pmd_range(pud, addr, next, write, pages, nr))
 			return 0;
@@ -970,10 +1025,20 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
 	local_irq_save(flags);
 	pgdp = pgd_offset(mm, addr);
 	do {
+		pgd_t pgd = ACCESS_ONCE(*pgdp);
+
 		next = pgd_addr_end(addr, end);
-		if (pgd_none(*pgdp))
+		if (pgd_none(pgd))
 			break;
-		else if (!gup_pud_range(pgdp, addr, next, write, pages, &nr))
+		if (unlikely(pgd_huge(pgd))) {
+			if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+					  pages, &nr))
+				break;
+		} else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+			if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+					 PGDIR_SHIFT, next, write, pages, &nr))
+				break;
+		} else if (!gup_pud_range(pgd, addr, next, write, pages, &nr))
 			break;
 	} while (pgdp++, addr = next, addr != end);
 	local_irq_restore(flags);
-- 
cgit v1.2.3


From 6d09dc6b74caaca83e32e67f2454406041d58fb0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 14 Nov 2014 13:26:52 +0100
Subject: of.h: Keep extern declaration of of_* variables when !CONFIG_OF

Keep the extern declaration of of_allnodes and friends, when building without
of support, this way code using them can be written like this:

	if (IS_ENABLED(CONFIG_OF_PLATFORM) && of_chosen) {
		for_each_child_of_node(of_chosen, np)
			...
	}

And rely on the compiler optimizing it away, avoiding the need for #ifdef-ery.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Tomi Valkeinen <tomi.valkeinen@ti.com>
---
 include/linux/of.h | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 6545e7aec7bb..f83ca9dddcba 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -105,8 +105,6 @@ static inline struct device_node *of_node_get(struct device_node *node)
 static inline void of_node_put(struct device_node *node) { }
 #endif /* !CONFIG_OF_DYNAMIC */
 
-#ifdef CONFIG_OF
-
 /* Pointer for first entry in chain of all nodes. */
 extern struct device_node *of_allnodes;
 extern struct device_node *of_chosen;
@@ -114,6 +112,7 @@ extern struct device_node *of_aliases;
 extern struct device_node *of_stdout;
 extern raw_spinlock_t devtree_lock;
 
+#ifdef CONFIG_OF
 static inline bool of_have_populated_dt(void)
 {
 	return of_allnodes != NULL;
-- 
cgit v1.2.3


From a720b41c41f5a7e4c51558cf087882c57331581f Mon Sep 17 00:00:00 2001
From: Antonios Motakis <a.motakis@virtualopensystems.com>
Date: Mon, 13 Oct 2014 14:06:16 +0100
Subject: iommu/arm-smmu: change IOMMU_EXEC to IOMMU_NOEXEC

Exposing the XN flag of the SMMU driver as IOMMU_NOEXEC instead of
IOMMU_EXEC makes it enforceable, since for IOMMUs that don't support
the XN flag pages will always be executable.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/arm-smmu.c | 9 +++++----
 include/linux/iommu.h    | 2 +-
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c
index 60558f794922..566c1769d9bd 100644
--- a/drivers/iommu/arm-smmu.c
+++ b/drivers/iommu/arm-smmu.c
@@ -1281,7 +1281,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,
 				   unsigned long pfn, int prot, int stage)
 {
 	pte_t *pte, *start;
-	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF | ARM_SMMU_PTE_XN;
+	pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF;
 
 	if (pmd_none(*pmd)) {
 		/* Allocate a new set of tables */
@@ -1315,10 +1315,11 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd,
 			pteval |= ARM_SMMU_PTE_MEMATTR_NC;
 	}
 
+	if (prot & IOMMU_NOEXEC)
+		pteval |= ARM_SMMU_PTE_XN;
+
 	/* If no access, create a faulting entry to avoid TLB fills */
-	if (prot & IOMMU_EXEC)
-		pteval &= ~ARM_SMMU_PTE_XN;
-	else if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
+	if (!(prot & (IOMMU_READ | IOMMU_WRITE)))
 		pteval &= ~ARM_SMMU_PTE_PAGE;
 
 	pteval |= ARM_SMMU_PTE_SH_IS;
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e6a7c9ff72f2..f47383a05d34 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -27,7 +27,7 @@
 #define IOMMU_READ	(1 << 0)
 #define IOMMU_WRITE	(1 << 1)
 #define IOMMU_CACHE	(1 << 2) /* DMA cache coherency */
-#define IOMMU_EXEC	(1 << 3)
+#define IOMMU_NOEXEC	(1 << 3)
 
 struct iommu_ops;
 struct iommu_group;
-- 
cgit v1.2.3


From c49866493b1ffb7c0a7963a1e3c0094e78760184 Mon Sep 17 00:00:00 2001
From: Antonios Motakis <a.motakis@virtualopensystems.com>
Date: Mon, 13 Oct 2014 14:06:17 +0100
Subject: iommu: add capability IOMMU_CAP_NOEXEC

Some IOMMUs accept an IOMMU_NOEXEC protection flag in addition to
IOMMU_READ and IOMMU_WRITE. Expose this as an IOMMU capability.

Signed-off-by: Antonios Motakis <a.motakis@virtualopensystems.com>
Acked-by: Joerg Roedel <jroedel@suse.de>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index f47383a05d34..e438b30f062b 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -61,6 +61,7 @@ enum iommu_cap {
 	IOMMU_CAP_CACHE_COHERENCY,	/* IOMMU can enforce cache coherent DMA
 					   transactions */
 	IOMMU_CAP_INTR_REMAP,		/* IOMMU supports interrupt isolation */
+	IOMMU_CAP_NOEXEC,		/* IOMMU_NOEXEC flag */
 };
 
 /*
-- 
cgit v1.2.3


From a77f9c5dcdf8480a93332792c336fa2bf9d31229 Mon Sep 17 00:00:00 2001
From: Jay Vosburgh <jay.vosburgh@canonical.com>
Date: Fri, 14 Nov 2014 11:05:06 -0800
Subject: Revert "fast_hash: avoid indirect function calls"

This reverts commit e5a2c899957659cd1a9f789bc462f9c0b35f5150.

	Commit e5a2c899 introduced an alternative_call, arch_fast_hash2,
that selects between __jhash2 and __intel_crc4_2_hash based on the
X86_FEATURE_XMM4_2.

	Unfortunately, the alternative_call system does not appear to be
suitable for use with C functions, as register usage is not handled
properly for the called functions.  The __jhash2 function in particular
clobbers registers that are not preserved when called via
alternative_call, resulting in a panic for direct callers of
arch_fast_hash2 on older CPUs lacking sse4_2.  It is possible that
__intel_crc4_2_hash works merely by chance because it uses fewer
registers.

	This commit was suggested as the source of the problem by Jesse
Gross <jesse@nicira.com>.

Signed-off-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/x86/include/asm/hash.h | 51 +++++----------------------------------------
 arch/x86/lib/hash.c         | 29 +++++++++++---------------
 include/asm-generic/hash.h  | 36 ++------------------------------
 include/linux/hash.h        | 34 ++++++++++++++++++++++++++++++
 lib/Makefile                |  2 +-
 lib/hash.c                  | 39 ++++++++++++++++++++++++++++++++++
 6 files changed, 93 insertions(+), 98 deletions(-)
 create mode 100644 lib/hash.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h
index a881d784f044..e8c58f88b1d4 100644
--- a/arch/x86/include/asm/hash.h
+++ b/arch/x86/include/asm/hash.h
@@ -1,48 +1,7 @@
-#ifndef __ASM_X86_HASH_H
-#define __ASM_X86_HASH_H
+#ifndef _ASM_X86_HASH_H
+#define _ASM_X86_HASH_H
 
-#include <linux/cpufeature.h>
-#include <asm/alternative.h>
+struct fast_hash_ops;
+extern void setup_arch_fast_hash(struct fast_hash_ops *ops);
 
-u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed);
-u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed);
-
-/*
- * non-inline versions of jhash so gcc does not need to generate
- * duplicate code in every object file
- */
-u32 __jhash(const void *data, u32 len, u32 seed);
-u32 __jhash2(const u32 *data, u32 len, u32 seed);
-
-/*
- * for documentation of these functions please look into
- * <include/asm-generic/hash.h>
- */
-
-static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
-{
-	u32 hash;
-
-	alternative_call(__jhash, __intel_crc4_2_hash, X86_FEATURE_XMM4_2,
-#ifdef CONFIG_X86_64
-			 "=a" (hash), "D" (data), "S" (len), "d" (seed));
-#else
-			 "=a" (hash), "a" (data), "d" (len), "c" (seed));
-#endif
-	return hash;
-}
-
-static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
-{
-	u32 hash;
-
-	alternative_call(__jhash2, __intel_crc4_2_hash2, X86_FEATURE_XMM4_2,
-#ifdef CONFIG_X86_64
-			 "=a" (hash), "D" (data), "S" (len), "d" (seed));
-#else
-			 "=a" (hash), "a" (data), "d" (len), "c" (seed));
-#endif
-	return hash;
-}
-
-#endif /* __ASM_X86_HASH_H */
+#endif /* _ASM_X86_HASH_H */
diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c
index e14327198835..ff4fa51a5b1f 100644
--- a/arch/x86/lib/hash.c
+++ b/arch/x86/lib/hash.c
@@ -31,13 +31,13 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+#include <linux/hash.h>
+#include <linux/init.h>
+
 #include <asm/processor.h>
 #include <asm/cpufeature.h>
 #include <asm/hash.h>
 
-#include <linux/hash.h>
-#include <linux/jhash.h>
-
 static inline u32 crc32_u32(u32 crc, u32 val)
 {
 #ifdef CONFIG_AS_CRC32
@@ -48,7 +48,7 @@ static inline u32 crc32_u32(u32 crc, u32 val)
 	return crc;
 }
 
-u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed)
+static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
 {
 	const u32 *p32 = (const u32 *) data;
 	u32 i, tmp = 0;
@@ -71,27 +71,22 @@ u32 __intel_crc4_2_hash(const void *data, u32 len, u32 seed)
 
 	return seed;
 }
-EXPORT_SYMBOL(__intel_crc4_2_hash);
 
-u32 __intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
+static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
 {
+	const u32 *p32 = (const u32 *) data;
 	u32 i;
 
 	for (i = 0; i < len; i++)
-		seed = crc32_u32(seed, *data++);
+		seed = crc32_u32(seed, *p32++);
 
 	return seed;
 }
-EXPORT_SYMBOL(__intel_crc4_2_hash2);
 
-u32 __jhash(const void *data, u32 len, u32 seed)
+void __init setup_arch_fast_hash(struct fast_hash_ops *ops)
 {
-	return jhash(data, len, seed);
-}
-EXPORT_SYMBOL(__jhash);
-
-u32 __jhash2(const u32 *data, u32 len, u32 seed)
-{
-	return jhash2(data, len, seed);
+	if (cpu_has_xmm4_2) {
+		ops->hash  = intel_crc4_2_hash;
+		ops->hash2 = intel_crc4_2_hash2;
+	}
 }
-EXPORT_SYMBOL(__jhash2);
diff --git a/include/asm-generic/hash.h b/include/asm-generic/hash.h
index 3c82760ff2a4..b6312843dbd9 100644
--- a/include/asm-generic/hash.h
+++ b/include/asm-generic/hash.h
@@ -1,41 +1,9 @@
 #ifndef __ASM_GENERIC_HASH_H
 #define __ASM_GENERIC_HASH_H
 
-#include <linux/jhash.h>
-
-/**
- *	arch_fast_hash - Caclulates a hash over a given buffer that can have
- *			 arbitrary size. This function will eventually use an
- *			 architecture-optimized hashing implementation if
- *			 available, and trades off distribution for speed.
- *
- *	@data: buffer to hash
- *	@len: length of buffer in bytes
- *	@seed: start seed
- *
- *	Returns 32bit hash.
- */
-static inline u32 arch_fast_hash(const void *data, u32 len, u32 seed)
-{
-	return jhash(data, len, seed);
-}
-
-/**
- *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
- *			  size that is of a multiple of 32bit words. This
- *			  function will eventually use an architecture-
- *			  optimized hashing implementation if available,
- *			  and trades off distribution for speed.
- *
- *	@data: buffer to hash (must be 32bit padded)
- *	@len: number of 32bit words
- *	@seed: start seed
- *
- *	Returns 32bit hash.
- */
-static inline u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
+struct fast_hash_ops;
+static inline void setup_arch_fast_hash(struct fast_hash_ops *ops)
 {
-	return jhash2(data, len, seed);
 }
 
 #endif /* __ASM_GENERIC_HASH_H */
diff --git a/include/linux/hash.h b/include/linux/hash.h
index 6e8fb028848c..d0494c399392 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -84,4 +84,38 @@ static inline u32 hash32_ptr(const void *ptr)
 	return (u32)val;
 }
 
+struct fast_hash_ops {
+	u32 (*hash)(const void *data, u32 len, u32 seed);
+	u32 (*hash2)(const u32 *data, u32 len, u32 seed);
+};
+
+/**
+ *	arch_fast_hash - Caclulates a hash over a given buffer that can have
+ *			 arbitrary size. This function will eventually use an
+ *			 architecture-optimized hashing implementation if
+ *			 available, and trades off distribution for speed.
+ *
+ *	@data: buffer to hash
+ *	@len: length of buffer in bytes
+ *	@seed: start seed
+ *
+ *	Returns 32bit hash.
+ */
+extern u32 arch_fast_hash(const void *data, u32 len, u32 seed);
+
+/**
+ *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
+ *			  size that is of a multiple of 32bit words. This
+ *			  function will eventually use an architecture-
+ *			  optimized hashing implementation if available,
+ *			  and trades off distribution for speed.
+ *
+ *	@data: buffer to hash (must be 32bit padded)
+ *	@len: number of 32bit words
+ *	@seed: start seed
+ *
+ *	Returns 32bit hash.
+ */
+extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed);
+
 #endif /* _LINUX_HASH_H */
diff --git a/lib/Makefile b/lib/Makefile
index 04e53dd16070..7512dc978f18 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o rhashtable.o
+	 percpu-refcount.o percpu_ida.o hash.o rhashtable.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
diff --git a/lib/hash.c b/lib/hash.c
new file mode 100644
index 000000000000..fea973f4bd57
--- /dev/null
+++ b/lib/hash.c
@@ -0,0 +1,39 @@
+/* General purpose hashing library
+ *
+ * That's a start of a kernel hashing library, which can be extended
+ * with further algorithms in future. arch_fast_hash{2,}() will
+ * eventually resolve to an architecture optimized implementation.
+ *
+ * Copyright 2013 Francesco Fusco <ffusco@redhat.com>
+ * Copyright 2013 Daniel Borkmann <dborkman@redhat.com>
+ * Copyright 2013 Thomas Graf <tgraf@redhat.com>
+ * Licensed under the GNU General Public License, version 2.0 (GPLv2)
+ */
+
+#include <linux/jhash.h>
+#include <linux/hash.h>
+#include <linux/cache.h>
+
+static struct fast_hash_ops arch_hash_ops __read_mostly = {
+	.hash  = jhash,
+	.hash2 = jhash2,
+};
+
+u32 arch_fast_hash(const void *data, u32 len, u32 seed)
+{
+	return arch_hash_ops.hash(data, len, seed);
+}
+EXPORT_SYMBOL_GPL(arch_fast_hash);
+
+u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
+{
+	return arch_hash_ops.hash2(data, len, seed);
+}
+EXPORT_SYMBOL_GPL(arch_fast_hash2);
+
+static int __init hashlib_init(void)
+{
+	setup_arch_fast_hash(&arch_hash_ops);
+	return 0;
+}
+early_initcall(hashlib_init);
-- 
cgit v1.2.3


From 84bc88688e3f6ef843aa8803dbcd90168bb89faf Mon Sep 17 00:00:00 2001
From: Vincent BENAYOUN <vincent.benayoun@trust-in-soft.com>
Date: Thu, 13 Nov 2014 13:47:26 +0100
Subject: inetdevice: fixed signed integer overflow

There could be a signed overflow in the following code.

The expression, (32-logmask) is comprised between 0 and 31 included.
It may be equal to 31.
In such a case the left shift will produce a signed integer overflow.
According to the C99 Standard, this is an undefined behavior.
A simple fix is to replace the signed int 1 with the unsigned int 1U.

Signed-off-by: Vincent BENAYOUN <vincent.benayoun@trust-in-soft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inetdevice.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h
index 0068708161ff..0a21fbefdfbe 100644
--- a/include/linux/inetdevice.h
+++ b/include/linux/inetdevice.h
@@ -242,7 +242,7 @@ static inline void in_dev_put(struct in_device *idev)
 static __inline__ __be32 inet_make_mask(int logmask)
 {
 	if (logmask)
-		return htonl(~((1<<(32-logmask))-1));
+		return htonl(~((1U<<(32-logmask))-1));
 	return 0;
 }
 
-- 
cgit v1.2.3


From acd899e4f3066b6662f6047da5b795cc762093cb Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Mon, 22 Sep 2014 08:21:04 -0700
Subject: leds: implement sysfs interface locking mechanism

Add a mechanism for locking LED subsystem sysfs interface.
This patch prepares ground for addition of LED Flash Class
extension, whose API will be integrated with V4L2 Flash API.
Such a fusion enforces introducing a locking scheme, which
will secure consistent access to the LED Flash Class device.

The mechanism being introduced allows for disabling LED
subsystem sysfs interface by calling led_sysfs_disable function
and enabling it by calling led_sysfs_enable. The functions
alter the LED_SYSFS_DISABLE flag state and must be called
under mutex lock. The state of the lock is checked with use
of led_sysfs_is_disabled function. Such a design allows for
providing immediate feedback to the user space on whether
the LED Flash Class device is available or is under V4L2 Flash
sub-device control.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c    | 19 ++++++++++++++++---
 drivers/leds/led-core.c     | 18 ++++++++++++++++++
 drivers/leds/led-triggers.c | 16 +++++++++++++---
 include/linux/leds.h        | 32 ++++++++++++++++++++++++++++++++
 4 files changed, 79 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 7440c58b8e6f..65722deb8d28 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -40,17 +40,27 @@ static ssize_t brightness_store(struct device *dev,
 {
 	struct led_classdev *led_cdev = dev_get_drvdata(dev);
 	unsigned long state;
-	ssize_t ret = -EINVAL;
+	ssize_t ret;
+
+	mutex_lock(&led_cdev->led_access);
+
+	if (led_sysfs_is_disabled(led_cdev)) {
+		ret = -EBUSY;
+		goto unlock;
+	}
 
 	ret = kstrtoul(buf, 10, &state);
 	if (ret)
-		return ret;
+		goto unlock;
 
 	if (state == LED_OFF)
 		led_trigger_remove(led_cdev);
 	__led_set_brightness(led_cdev, state);
 
-	return size;
+	ret = size;
+unlock:
+	mutex_unlock(&led_cdev->led_access);
+	return ret;
 }
 static DEVICE_ATTR_RW(brightness);
 
@@ -214,6 +224,7 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 #ifdef CONFIG_LEDS_TRIGGERS
 	init_rwsem(&led_cdev->trigger_lock);
 #endif
+	mutex_init(&led_cdev->led_access);
 	/* add to the list of leds */
 	down_write(&leds_list_lock);
 	list_add_tail(&led_cdev->node, &leds_list);
@@ -267,6 +278,8 @@ void led_classdev_unregister(struct led_classdev *led_cdev)
 	down_write(&leds_list_lock);
 	list_del(&led_cdev->node);
 	up_write(&leds_list_lock);
+
+	mutex_destroy(&led_cdev->led_access);
 }
 EXPORT_SYMBOL_GPL(led_classdev_unregister);
 
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index aaa8eba9099f..be6d9fa5e971 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -143,3 +143,21 @@ int led_update_brightness(struct led_classdev *led_cdev)
 	return ret;
 }
 EXPORT_SYMBOL(led_update_brightness);
+
+/* Caller must ensure led_cdev->led_access held */
+void led_sysfs_disable(struct led_classdev *led_cdev)
+{
+	lockdep_assert_held(&led_cdev->led_access);
+
+	led_cdev->flags |= LED_SYSFS_DISABLE;
+}
+EXPORT_SYMBOL_GPL(led_sysfs_disable);
+
+/* Caller must ensure led_cdev->led_access held */
+void led_sysfs_enable(struct led_classdev *led_cdev)
+{
+	lockdep_assert_held(&led_cdev->led_access);
+
+	led_cdev->flags &= ~LED_SYSFS_DISABLE;
+}
+EXPORT_SYMBOL_GPL(led_sysfs_enable);
diff --git a/drivers/leds/led-triggers.c b/drivers/leds/led-triggers.c
index c3734f10fdd5..e8b1120f486d 100644
--- a/drivers/leds/led-triggers.c
+++ b/drivers/leds/led-triggers.c
@@ -37,6 +37,14 @@ ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr,
 	char trigger_name[TRIG_NAME_MAX];
 	struct led_trigger *trig;
 	size_t len;
+	int ret = count;
+
+	mutex_lock(&led_cdev->led_access);
+
+	if (led_sysfs_is_disabled(led_cdev)) {
+		ret = -EBUSY;
+		goto unlock;
+	}
 
 	trigger_name[sizeof(trigger_name) - 1] = '\0';
 	strncpy(trigger_name, buf, sizeof(trigger_name) - 1);
@@ -47,7 +55,7 @@ ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr,
 
 	if (!strcmp(trigger_name, "none")) {
 		led_trigger_remove(led_cdev);
-		return count;
+		goto unlock;
 	}
 
 	down_read(&triggers_list_lock);
@@ -58,12 +66,14 @@ ssize_t led_trigger_store(struct device *dev, struct device_attribute *attr,
 			up_write(&led_cdev->trigger_lock);
 
 			up_read(&triggers_list_lock);
-			return count;
+			goto unlock;
 		}
 	}
 	up_read(&triggers_list_lock);
 
-	return -EINVAL;
+unlock:
+	mutex_unlock(&led_cdev->led_access);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(led_trigger_store);
 
diff --git a/include/linux/leds.h b/include/linux/leds.h
index a57611d0c94e..737f9b1051f2 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -13,6 +13,7 @@
 #define __LINUX_LEDS_H_INCLUDED
 
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/rwsem.h>
 #include <linux/spinlock.h>
 #include <linux/timer.h>
@@ -42,6 +43,7 @@ struct led_classdev {
 #define LED_BLINK_ONESHOT	(1 << 17)
 #define LED_BLINK_ONESHOT_STOP	(1 << 18)
 #define LED_BLINK_INVERT	(1 << 19)
+#define LED_SYSFS_DISABLE	(1 << 20)
 
 	/* Set LED brightness level */
 	/* Must not sleep, use a workqueue if needed */
@@ -85,6 +87,9 @@ struct led_classdev {
 	/* true if activated - deactivate routine uses it to do cleanup */
 	bool			activated;
 #endif
+
+	/* Ensures consistent access to the LED Flash Class device */
+	struct mutex		led_access;
 };
 
 extern int led_classdev_register(struct device *parent,
@@ -151,6 +156,33 @@ extern void led_set_brightness(struct led_classdev *led_cdev,
  */
 extern int led_update_brightness(struct led_classdev *led_cdev);
 
+/**
+ * led_sysfs_disable - disable LED sysfs interface
+ * @led_cdev: the LED to set
+ *
+ * Disable the led_cdev's sysfs interface.
+ */
+extern void led_sysfs_disable(struct led_classdev *led_cdev);
+
+/**
+ * led_sysfs_enable - enable LED sysfs interface
+ * @led_cdev: the LED to set
+ *
+ * Enable the led_cdev's sysfs interface.
+ */
+extern void led_sysfs_enable(struct led_classdev *led_cdev);
+
+/**
+ * led_sysfs_is_disabled - check if LED sysfs interface is disabled
+ * @led_cdev: the LED to query
+ *
+ * Returns: true if the led_cdev's sysfs interface is disabled.
+ */
+static inline bool led_sysfs_is_disabled(struct led_classdev *led_cdev)
+{
+	return led_cdev->flags & LED_SYSFS_DISABLE;
+}
+
 /*
  * LED Triggers
  */
-- 
cgit v1.2.3


From 4d71a4a12b130ad033219e6f58c74a64059415eb Mon Sep 17 00:00:00 2001
From: Jacek Anaszewski <j.anaszewski@samsung.com>
Date: Fri, 14 Nov 2014 02:50:18 -0800
Subject: leds: Add support for setting brightness in a synchronous way

There are use cases when setting a LED brightness has to
have immediate effect (e.g. setting a torch LED brightness).
This patch extends LED subsystem to support such operations.
The LED subsystem internal API __led_set_brightness is changed
to led_set_brightness_async and new led_set_brightness_sync API
is added.

Signed-off-by: Jacek Anaszewski <j.anaszewski@samsung.com>
Acked-by: Kyungmin Park <kyungmin.park@samsung.com>
Cc: Richard Purdie <rpurdie@rpsys.net>
Signed-off-by: Bryan Wu <cooloney@gmail.com>
---
 drivers/leds/led-class.c                  | 10 ++++++----
 drivers/leds/led-core.c                   | 18 +++++++++++++++---
 drivers/leds/leds.h                       | 20 ++++++++++++++++----
 drivers/leds/trigger/ledtrig-backlight.c  |  8 ++++----
 drivers/leds/trigger/ledtrig-default-on.c |  2 +-
 drivers/leds/trigger/ledtrig-gpio.c       |  6 +++---
 drivers/leds/trigger/ledtrig-heartbeat.c  |  2 +-
 drivers/leds/trigger/ledtrig-oneshot.c    |  4 ++--
 drivers/leds/trigger/ledtrig-transient.c  | 10 ++++++----
 include/linux/leds.h                      |  8 ++++++++
 10 files changed, 62 insertions(+), 26 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/leds/led-class.c b/drivers/leds/led-class.c
index 65722deb8d28..dbeebac38d31 100644
--- a/drivers/leds/led-class.c
+++ b/drivers/leds/led-class.c
@@ -55,7 +55,7 @@ static ssize_t brightness_store(struct device *dev,
 
 	if (state == LED_OFF)
 		led_trigger_remove(led_cdev);
-	__led_set_brightness(led_cdev, state);
+	led_set_brightness(led_cdev, state);
 
 	ret = size;
 unlock:
@@ -109,7 +109,7 @@ static void led_timer_function(unsigned long data)
 	unsigned long delay;
 
 	if (!led_cdev->blink_delay_on || !led_cdev->blink_delay_off) {
-		__led_set_brightness(led_cdev, LED_OFF);
+		led_set_brightness_async(led_cdev, LED_OFF);
 		return;
 	}
 
@@ -132,7 +132,7 @@ static void led_timer_function(unsigned long data)
 		delay = led_cdev->blink_delay_off;
 	}
 
-	__led_set_brightness(led_cdev, brightness);
+	led_set_brightness_async(led_cdev, brightness);
 
 	/* Return in next iteration if led is in one-shot mode and we are in
 	 * the final blink state so that the led is toggled each delay_on +
@@ -158,7 +158,7 @@ static void set_brightness_delayed(struct work_struct *ws)
 
 	led_stop_software_blink(led_cdev);
 
-	__led_set_brightness(led_cdev, led_cdev->delayed_set_value);
+	led_set_brightness_async(led_cdev, led_cdev->delayed_set_value);
 }
 
 /**
@@ -233,6 +233,8 @@ int led_classdev_register(struct device *parent, struct led_classdev *led_cdev)
 	if (!led_cdev->max_brightness)
 		led_cdev->max_brightness = LED_FULL;
 
+	led_cdev->flags |= SET_BRIGHTNESS_ASYNC;
+
 	led_update_brightness(led_cdev);
 
 	INIT_WORK(&led_cdev->set_brightness_work, set_brightness_delayed);
diff --git a/drivers/leds/led-core.c b/drivers/leds/led-core.c
index be6d9fa5e971..9886dace5ad2 100644
--- a/drivers/leds/led-core.c
+++ b/drivers/leds/led-core.c
@@ -42,13 +42,13 @@ static void led_set_software_blink(struct led_classdev *led_cdev,
 
 	/* never on - just set to off */
 	if (!delay_on) {
-		__led_set_brightness(led_cdev, LED_OFF);
+		led_set_brightness_async(led_cdev, LED_OFF);
 		return;
 	}
 
 	/* never off - just set to brightness */
 	if (!delay_off) {
-		__led_set_brightness(led_cdev, led_cdev->blink_brightness);
+		led_set_brightness_async(led_cdev, led_cdev->blink_brightness);
 		return;
 	}
 
@@ -117,6 +117,8 @@ EXPORT_SYMBOL_GPL(led_stop_software_blink);
 void led_set_brightness(struct led_classdev *led_cdev,
 			enum led_brightness brightness)
 {
+	int ret = 0;
+
 	/* delay brightness setting if need to stop soft-blink timer */
 	if (led_cdev->blink_delay_on || led_cdev->blink_delay_off) {
 		led_cdev->delayed_set_value = brightness;
@@ -124,7 +126,17 @@ void led_set_brightness(struct led_classdev *led_cdev,
 		return;
 	}
 
-	__led_set_brightness(led_cdev, brightness);
+	if (led_cdev->flags & SET_BRIGHTNESS_ASYNC) {
+		led_set_brightness_async(led_cdev, brightness);
+		return;
+	} else if (led_cdev->flags & SET_BRIGHTNESS_SYNC)
+		ret = led_set_brightness_sync(led_cdev, brightness);
+	else
+		ret = -EINVAL;
+
+	if (ret < 0)
+		dev_dbg(led_cdev->dev, "Setting LED brightness failed (%d)\n",
+			ret);
 }
 EXPORT_SYMBOL(led_set_brightness);
 
diff --git a/drivers/leds/leds.h b/drivers/leds/leds.h
index 4c50365344a9..2348dbda5269 100644
--- a/drivers/leds/leds.h
+++ b/drivers/leds/leds.h
@@ -17,16 +17,28 @@
 #include <linux/rwsem.h>
 #include <linux/leds.h>
 
-static inline void __led_set_brightness(struct led_classdev *led_cdev,
+static inline void led_set_brightness_async(struct led_classdev *led_cdev,
 					enum led_brightness value)
 {
-	if (value > led_cdev->max_brightness)
-		value = led_cdev->max_brightness;
-	led_cdev->brightness = value;
+	led_cdev->brightness = min(value, led_cdev->max_brightness);
+
 	if (!(led_cdev->flags & LED_SUSPENDED))
 		led_cdev->brightness_set(led_cdev, value);
 }
 
+static inline int led_set_brightness_sync(struct led_classdev *led_cdev,
+					enum led_brightness value)
+{
+	int ret = 0;
+
+	led_cdev->brightness = min(value, led_cdev->max_brightness);
+
+	if (!(led_cdev->flags & LED_SUSPENDED))
+		ret = led_cdev->brightness_set_sync(led_cdev,
+						led_cdev->brightness);
+	return ret;
+}
+
 static inline int led_get_brightness(struct led_classdev *led_cdev)
 {
 	return led_cdev->brightness;
diff --git a/drivers/leds/trigger/ledtrig-backlight.c b/drivers/leds/trigger/ledtrig-backlight.c
index 47e55aa9eefa..59eca17d9661 100644
--- a/drivers/leds/trigger/ledtrig-backlight.c
+++ b/drivers/leds/trigger/ledtrig-backlight.c
@@ -51,9 +51,9 @@ static int fb_notifier_callback(struct notifier_block *p,
 
 	if ((n->old_status == UNBLANK) ^ n->invert) {
 		n->brightness = led->brightness;
-		__led_set_brightness(led, LED_OFF);
+		led_set_brightness_async(led, LED_OFF);
 	} else {
-		__led_set_brightness(led, n->brightness);
+		led_set_brightness_async(led, n->brightness);
 	}
 
 	n->old_status = new_status;
@@ -89,9 +89,9 @@ static ssize_t bl_trig_invert_store(struct device *dev,
 
 	/* After inverting, we need to update the LED. */
 	if ((n->old_status == BLANK) ^ n->invert)
-		__led_set_brightness(led, LED_OFF);
+		led_set_brightness_async(led, LED_OFF);
 	else
-		__led_set_brightness(led, n->brightness);
+		led_set_brightness_async(led, n->brightness);
 
 	return num;
 }
diff --git a/drivers/leds/trigger/ledtrig-default-on.c b/drivers/leds/trigger/ledtrig-default-on.c
index 81a91be8e18d..6f38f883aaf1 100644
--- a/drivers/leds/trigger/ledtrig-default-on.c
+++ b/drivers/leds/trigger/ledtrig-default-on.c
@@ -19,7 +19,7 @@
 
 static void defon_trig_activate(struct led_classdev *led_cdev)
 {
-	__led_set_brightness(led_cdev, led_cdev->max_brightness);
+	led_set_brightness_async(led_cdev, led_cdev->max_brightness);
 }
 
 static struct led_trigger defon_led_trigger = {
diff --git a/drivers/leds/trigger/ledtrig-gpio.c b/drivers/leds/trigger/ledtrig-gpio.c
index c86c41826476..4cc7040746c6 100644
--- a/drivers/leds/trigger/ledtrig-gpio.c
+++ b/drivers/leds/trigger/ledtrig-gpio.c
@@ -54,12 +54,12 @@ static void gpio_trig_work(struct work_struct *work)
 
 	if (tmp) {
 		if (gpio_data->desired_brightness)
-			__led_set_brightness(gpio_data->led,
+			led_set_brightness_async(gpio_data->led,
 					   gpio_data->desired_brightness);
 		else
-			__led_set_brightness(gpio_data->led, LED_FULL);
+			led_set_brightness_async(gpio_data->led, LED_FULL);
 	} else {
-		__led_set_brightness(gpio_data->led, LED_OFF);
+		led_set_brightness_async(gpio_data->led, LED_OFF);
 	}
 }
 
diff --git a/drivers/leds/trigger/ledtrig-heartbeat.c b/drivers/leds/trigger/ledtrig-heartbeat.c
index 5c8464a33172..fea6871d2609 100644
--- a/drivers/leds/trigger/ledtrig-heartbeat.c
+++ b/drivers/leds/trigger/ledtrig-heartbeat.c
@@ -74,7 +74,7 @@ static void led_heartbeat_function(unsigned long data)
 		break;
 	}
 
-	__led_set_brightness(led_cdev, brightness);
+	led_set_brightness_async(led_cdev, brightness);
 	mod_timer(&heartbeat_data->timer, jiffies + delay);
 }
 
diff --git a/drivers/leds/trigger/ledtrig-oneshot.c b/drivers/leds/trigger/ledtrig-oneshot.c
index cb4c7466692a..fbd02cdc3ad7 100644
--- a/drivers/leds/trigger/ledtrig-oneshot.c
+++ b/drivers/leds/trigger/ledtrig-oneshot.c
@@ -63,9 +63,9 @@ static ssize_t led_invert_store(struct device *dev,
 	oneshot_data->invert = !!state;
 
 	if (oneshot_data->invert)
-		__led_set_brightness(led_cdev, LED_FULL);
+		led_set_brightness_async(led_cdev, LED_FULL);
 	else
-		__led_set_brightness(led_cdev, LED_OFF);
+		led_set_brightness_async(led_cdev, LED_OFF);
 
 	return size;
 }
diff --git a/drivers/leds/trigger/ledtrig-transient.c b/drivers/leds/trigger/ledtrig-transient.c
index e5abc00bb00c..3c34de404d18 100644
--- a/drivers/leds/trigger/ledtrig-transient.c
+++ b/drivers/leds/trigger/ledtrig-transient.c
@@ -41,7 +41,7 @@ static void transient_timer_function(unsigned long data)
 	struct transient_trig_data *transient_data = led_cdev->trigger_data;
 
 	transient_data->activate = 0;
-	__led_set_brightness(led_cdev, transient_data->restore_state);
+	led_set_brightness_async(led_cdev, transient_data->restore_state);
 }
 
 static ssize_t transient_activate_show(struct device *dev,
@@ -72,7 +72,8 @@ static ssize_t transient_activate_store(struct device *dev,
 	if (state == 0 && transient_data->activate == 1) {
 		del_timer(&transient_data->timer);
 		transient_data->activate = state;
-		__led_set_brightness(led_cdev, transient_data->restore_state);
+		led_set_brightness_async(led_cdev,
+					transient_data->restore_state);
 		return size;
 	}
 
@@ -80,7 +81,7 @@ static ssize_t transient_activate_store(struct device *dev,
 	if (state == 1 && transient_data->activate == 0 &&
 	    transient_data->duration != 0) {
 		transient_data->activate = state;
-		__led_set_brightness(led_cdev, transient_data->state);
+		led_set_brightness_async(led_cdev, transient_data->state);
 		transient_data->restore_state =
 		    (transient_data->state == LED_FULL) ? LED_OFF : LED_FULL;
 		mod_timer(&transient_data->timer,
@@ -203,7 +204,8 @@ static void transient_trig_deactivate(struct led_classdev *led_cdev)
 
 	if (led_cdev->activated) {
 		del_timer_sync(&transient_data->timer);
-		__led_set_brightness(led_cdev, transient_data->restore_state);
+		led_set_brightness_async(led_cdev,
+					transient_data->restore_state);
 		device_remove_file(led_cdev->dev, &dev_attr_activate);
 		device_remove_file(led_cdev->dev, &dev_attr_duration);
 		device_remove_file(led_cdev->dev, &dev_attr_state);
diff --git a/include/linux/leds.h b/include/linux/leds.h
index 737f9b1051f2..dfe70e928eae 100644
--- a/include/linux/leds.h
+++ b/include/linux/leds.h
@@ -44,11 +44,19 @@ struct led_classdev {
 #define LED_BLINK_ONESHOT_STOP	(1 << 18)
 #define LED_BLINK_INVERT	(1 << 19)
 #define LED_SYSFS_DISABLE	(1 << 20)
+#define SET_BRIGHTNESS_ASYNC	(1 << 21)
+#define SET_BRIGHTNESS_SYNC	(1 << 22)
 
 	/* Set LED brightness level */
 	/* Must not sleep, use a workqueue if needed */
 	void		(*brightness_set)(struct led_classdev *led_cdev,
 					  enum led_brightness brightness);
+	/*
+	 * Set LED brightness level immediately - it can block the caller for
+	 * the time required for accessing a LED device register.
+	 */
+	int		(*brightness_set_sync)(struct led_classdev *led_cdev,
+					enum led_brightness brightness);
 	/* Get LED brightness level */
 	enum led_brightness (*brightness_get)(struct led_classdev *led_cdev);
 
-- 
cgit v1.2.3


From ccf54555da9a5e91e454b909ca6a5303c7d6b910 Mon Sep 17 00:00:00 2001
From: Cristina Ciocan <cristina.ciocan@intel.com>
Date: Tue, 11 Nov 2014 16:07:42 +0200
Subject: iio: Fix IIO_EVENT_CODE_EXTRACT_DIR bit mask

The direction field is set on 7 bits, thus we need to AND it with 0111 111 mask
in order to retrieve it, that is 0x7F, not 0xCF as it is now.

Fixes: ade7ef7ba (staging:iio: Differential channel handling)
Signed-off-by: Cristina Ciocan <cristina.ciocan@intel.com>
Cc: <Stable@vger.kernel.org>
Signed-off-by: Jonathan Cameron <jic23@kernel.org>
---
 include/linux/iio/events.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/iio/events.h b/include/linux/iio/events.h
index 8bbd7bc1043d..03fa332ad2a8 100644
--- a/include/linux/iio/events.h
+++ b/include/linux/iio/events.h
@@ -72,7 +72,7 @@ struct iio_event_data {
 
 #define IIO_EVENT_CODE_EXTRACT_TYPE(mask) ((mask >> 56) & 0xFF)
 
-#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0xCF)
+#define IIO_EVENT_CODE_EXTRACT_DIR(mask) ((mask >> 48) & 0x7F)
 
 #define IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(mask) ((mask >> 32) & 0xFF)
 
-- 
cgit v1.2.3


From 00b4d9a14125f1e51874def2b9de6092e007412d Mon Sep 17 00:00:00 2001
From: Maxime COQUELIN <maxime.coquelin@st.com>
Date: Thu, 6 Nov 2014 10:54:19 +0100
Subject: bitops: Fix shift overflow in GENMASK macros

On some 32 bits architectures, including x86, GENMASK(31, 0) returns 0
instead of the expected ~0UL.

This is the same on some 64 bits architectures with GENMASK_ULL(63, 0).

This is due to an overflow in the shift operand, 1 << 32 for GENMASK,
1 << 64 for GENMASK_ULL.

Reported-by: Eric Paire <eric.paire@st.com>
Suggested-by: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Signed-off-by: Maxime Coquelin <maxime.coquelin@st.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: <stable@vger.kernel.org> # v3.13+
Cc: linux@rasmusvillemoes.dk
Cc: gong.chen@linux.intel.com
Cc: John Sullivan <jsrhbz@kanargh.force9.co.uk>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Theodore Ts'o <tytso@mit.edu>
Fixes: 10ef6b0dffe4 ("bitops: Introduce a more generic BITMASK macro")
Link: http://lkml.kernel.org/r/1415267659-10563-1-git-send-email-maxime.coquelin@st.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/bitops.h | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitops.h b/include/linux/bitops.h
index be5fd38bd5a0..5d858e02997f 100644
--- a/include/linux/bitops.h
+++ b/include/linux/bitops.h
@@ -18,8 +18,11 @@
  * position @h. For example
  * GENMASK_ULL(39, 21) gives us the 64bit vector 0x000000ffffe00000.
  */
-#define GENMASK(h, l)		(((U32_C(1) << ((h) - (l) + 1)) - 1) << (l))
-#define GENMASK_ULL(h, l)	(((U64_C(1) << ((h) - (l) + 1)) - 1) << (l))
+#define GENMASK(h, l) \
+	(((~0UL) << (l)) & (~0UL >> (BITS_PER_LONG - 1 - (h))))
+
+#define GENMASK_ULL(h, l) \
+	(((~0ULL) << (l)) & (~0ULL >> (BITS_PER_LONG_LONG - 1 - (h))))
 
 extern unsigned int __sw_hweight8(unsigned int w);
 extern unsigned int __sw_hweight16(unsigned int w);
-- 
cgit v1.2.3


From 23cfa361f3e54a3e184a5e126bbbdd95f984881a Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 12 Nov 2014 12:37:37 +0100
Subject: sched/cputime: Fix cpu_timer_sample_group() double accounting

While looking over the cpu-timer code I found that we appear to add
the delta for the calling task twice, through:

  cpu_timer_sample_group()
    thread_group_cputimer()
      thread_group_cputime()
        times->sum_exec_runtime += task_sched_runtime();

    *sample = cputime.sum_exec_runtime + task_delta_exec();

Which would make the sample run ahead, making the sleep short.

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Stanislaw Gruszka <sgruszka@redhat.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Frederic Weisbecker <fweisbec@gmail.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Rik van Riel <riel@redhat.com>
Cc: Tejun Heo <tj@kernel.org>
Link: http://lkml.kernel.org/r/20141112113737.GI10476@twins.programming.kicks-ass.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/kernel_stat.h    |  5 -----
 kernel/sched/core.c            | 13 -------------
 kernel/time/posix-cpu-timers.c |  2 +-
 3 files changed, 1 insertion(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index 8422b4ed6882..b9376cd5a187 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -77,11 +77,6 @@ static inline unsigned int kstat_cpu_irqs_sum(unsigned int cpu)
 	return kstat_cpu(cpu).irqs_sum;
 }
 
-/*
- * Lock/unlock the current runqueue - to extract task statistics:
- */
-extern unsigned long long task_delta_exec(struct task_struct *);
-
 extern void account_user_time(struct task_struct *, cputime_t, cputime_t);
 extern void account_system_time(struct task_struct *, int, cputime_t, cputime_t);
 extern void account_steal_time(cputime_t);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 5f12ca65c9a7..797a6c84c48d 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -2499,19 +2499,6 @@ static u64 do_task_delta_exec(struct task_struct *p, struct rq *rq)
 	return ns;
 }
 
-unsigned long long task_delta_exec(struct task_struct *p)
-{
-	unsigned long flags;
-	struct rq *rq;
-	u64 ns = 0;
-
-	rq = task_rq_lock(p, &flags);
-	ns = do_task_delta_exec(p, rq);
-	task_rq_unlock(rq, p, &flags);
-
-	return ns;
-}
-
 /*
  * Return accounted runtime for the task.
  * In case the task is currently running, return the runtime plus current's
diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c
index 492b986195d5..a16b67859e2a 100644
--- a/kernel/time/posix-cpu-timers.c
+++ b/kernel/time/posix-cpu-timers.c
@@ -553,7 +553,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock,
 		*sample = cputime_to_expires(cputime.utime);
 		break;
 	case CPUCLOCK_SCHED:
-		*sample = cputime.sum_exec_runtime + task_delta_exec(p);
+		*sample = cputime.sum_exec_runtime;
 		break;
 	}
 	return 0;
-- 
cgit v1.2.3


From d8b163c4c657478ef33c082cff78d03a4ca07bb2 Mon Sep 17 00:00:00 2001
From: Kirill Tkhai <ktkhai@parallels.com>
Date: Tue, 11 Nov 2014 12:46:29 +0300
Subject: sched/numa: Init numa balancing fields of init_task

We do not initialize init_task.numa_preferred_nid,
but this value is inherited by userspace "init"
process:

rest_init()->kernel_thread(kernel_init)->do_fork(CLONE_VM);

__sched_fork()
{
	if (clone_flags & CLONE_VM)
		p->numa_preferred_nid = current->numa_preferred_nid;
	else
		p->numa_preferred_nid = -1;
}

kernel_init() becomes userspace "init" process.

So, we propagate garbage nid to userspace, and it may be used
during numa balancing.

Currently, we do not have reports about this brings a problem,
but it seem we should set it for sure.

Even if init_task.numa_preferred_nid is zero, we may meet a weird
configuration without nid#0. On sparc64, where processors are
numbered physically, I saw a machine without cpu#1, while cpu#2
existed. Possible, something similar may be with numa nodes.
So, let's initialize it and be sure we're safe.

Signed-off-by: Kirill Tkhai <ktkhai@parallels.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Eric Paris <eparis@redhat.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Oleg Nesterov <oleg@redhat.com>
Cc: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Cc: Sergey Dyasly <dserrg@gmail.com>
Link: http://lkml.kernel.org/r/1415699189.15631.6.camel@tkhai
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/init_task.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/init_task.h b/include/linux/init_task.h
index 77fc43f8fb72..5f30ac8c82bc 100644
--- a/include/linux/init_task.h
+++ b/include/linux/init_task.h
@@ -166,6 +166,15 @@ extern struct task_group root_task_group;
 # define INIT_RT_MUTEXES(tsk)
 #endif
 
+#ifdef CONFIG_NUMA_BALANCING
+# define INIT_NUMA_BALANCING(tsk)					\
+	.numa_preferred_nid = -1,					\
+	.numa_group = NULL,						\
+	.numa_faults = NULL,
+#else
+# define INIT_NUMA_BALANCING(tsk)
+#endif
+
 /*
  *  INIT_TASK is used to set up the first task table, touch at
  * your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -237,6 +246,7 @@ extern struct task_group root_task_group;
 	INIT_CPUSET_SEQ(tsk)						\
 	INIT_RT_MUTEXES(tsk)						\
 	INIT_VTIME(tsk)							\
+	INIT_NUMA_BALANCING(tsk)					\
 }
 
 
-- 
cgit v1.2.3


From 60e2364e60e86e81bc6377f49779779e6120977f Mon Sep 17 00:00:00 2001
From: Stephane Eranian <eranian@google.com>
Date: Wed, 24 Sep 2014 13:48:37 +0200
Subject: perf: Add ability to sample machine state on interrupt

Enable capture of interrupted machine state for each sample.

Registers to sample are passed per event in the sample_regs_intr bitmask.

To sample interrupt machine state, the PERF_SAMPLE_INTR_REGS must be passed in
sample_type.

The list of available registers is arch dependent and provided by asm/perf_regs.h

Registers are laid out as u64 in the order of the bit order of sample_intr_regs.

This patch also adds a new ABI version PERF_ATTR_SIZE_VER4 because we extend
the perf_event_attr struct with a new u64 field.

Reviewed-by: Jiri Olsa <jolsa@redhat.com>
Signed-off-by: Stephane Eranian <eranian@google.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: cebbert.lkml@gmail.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: linux-api@vger.kernel.org
Link: http://lkml.kernel.org/r/1411559322-16548-2-git-send-email-eranian@google.com
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h      |  7 +++++--
 include/uapi/linux/perf_event.h | 15 +++++++++++++-
 kernel/events/core.c            | 46 +++++++++++++++++++++++++++++++++++++++--
 3 files changed, 63 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 893a0d07986f..68d46d536e24 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -79,7 +79,7 @@ struct perf_branch_stack {
 	struct perf_branch_entry	entries[0];
 };
 
-struct perf_regs_user {
+struct perf_regs {
 	__u64		abi;
 	struct pt_regs	*regs;
 };
@@ -600,7 +600,8 @@ struct perf_sample_data {
 	struct perf_callchain_entry	*callchain;
 	struct perf_raw_record		*raw;
 	struct perf_branch_stack	*br_stack;
-	struct perf_regs_user		regs_user;
+	struct perf_regs		regs_user;
+	struct perf_regs		regs_intr;
 	u64				stack_user_size;
 	u64				weight;
 	/*
@@ -630,6 +631,8 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->weight = 0;
 	data->data_src.val = PERF_MEM_NA;
 	data->txn = 0;
+	data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
+	data->regs_intr.regs = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h
index 9d845404d875..9b79abbd1ab8 100644
--- a/include/uapi/linux/perf_event.h
+++ b/include/uapi/linux/perf_event.h
@@ -137,8 +137,9 @@ enum perf_event_sample_format {
 	PERF_SAMPLE_DATA_SRC			= 1U << 15,
 	PERF_SAMPLE_IDENTIFIER			= 1U << 16,
 	PERF_SAMPLE_TRANSACTION			= 1U << 17,
+	PERF_SAMPLE_REGS_INTR			= 1U << 18,
 
-	PERF_SAMPLE_MAX = 1U << 18,		/* non-ABI */
+	PERF_SAMPLE_MAX = 1U << 19,		/* non-ABI */
 };
 
 /*
@@ -238,6 +239,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER2	80	/* add: branch_sample_type */
 #define PERF_ATTR_SIZE_VER3	96	/* add: sample_regs_user */
 					/* add: sample_stack_user */
+#define PERF_ATTR_SIZE_VER4	104	/* add: sample_regs_intr */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -334,6 +336,15 @@ struct perf_event_attr {
 
 	/* Align to u64. */
 	__u32	__reserved_2;
+	/*
+	 * Defines set of regs to dump for each sample
+	 * state captured on:
+	 *  - precise = 0: PMU interrupt
+	 *  - precise > 0: sampled instruction
+	 *
+	 * See asm/perf_regs.h for details.
+	 */
+	__u64	sample_regs_intr;
 };
 
 #define perf_flags(attr)	(*(&(attr)->read_format + 1))
@@ -686,6 +697,8 @@ enum perf_event_type {
 	 *	{ u64			weight;   } && PERF_SAMPLE_WEIGHT
 	 *	{ u64			data_src; } && PERF_SAMPLE_DATA_SRC
 	 *	{ u64			transaction; } && PERF_SAMPLE_TRANSACTION
+	 *	{ u64			abi; # enum perf_sample_regs_abi
+	 *	  u64			regs[weight(mask)]; } && PERF_SAMPLE_REGS_INTR
 	 * };
 	 */
 	PERF_RECORD_SAMPLE			= 9,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1cd5eef1fcdd..c2be1597ece7 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4460,7 +4460,7 @@ perf_output_sample_regs(struct perf_output_handle *handle,
 	}
 }
 
-static void perf_sample_regs_user(struct perf_regs_user *regs_user,
+static void perf_sample_regs_user(struct perf_regs *regs_user,
 				  struct pt_regs *regs)
 {
 	if (!user_mode(regs)) {
@@ -4476,6 +4476,14 @@ static void perf_sample_regs_user(struct perf_regs_user *regs_user,
 	}
 }
 
+static void perf_sample_regs_intr(struct perf_regs *regs_intr,
+				  struct pt_regs *regs)
+{
+	regs_intr->regs = regs;
+	regs_intr->abi  = perf_reg_abi(current);
+}
+
+
 /*
  * Get remaining task size from user stack pointer.
  *
@@ -4857,6 +4865,23 @@ void perf_output_sample(struct perf_output_handle *handle,
 	if (sample_type & PERF_SAMPLE_TRANSACTION)
 		perf_output_put(handle, data->txn);
 
+	if (sample_type & PERF_SAMPLE_REGS_INTR) {
+		u64 abi = data->regs_intr.abi;
+		/*
+		 * If there are no regs to dump, notice it through
+		 * first u64 being zero (PERF_SAMPLE_REGS_ABI_NONE).
+		 */
+		perf_output_put(handle, abi);
+
+		if (abi) {
+			u64 mask = event->attr.sample_regs_intr;
+
+			perf_output_sample_regs(handle,
+						data->regs_intr.regs,
+						mask);
+		}
+	}
+
 	if (!event->attr.watermark) {
 		int wakeup_events = event->attr.wakeup_events;
 
@@ -4943,7 +4968,7 @@ void perf_prepare_sample(struct perf_event_header *header,
 		 * in case new sample type is added, because we could eat
 		 * up the rest of the sample size.
 		 */
-		struct perf_regs_user *uregs = &data->regs_user;
+		struct perf_regs *uregs = &data->regs_user;
 		u16 stack_size = event->attr.sample_stack_user;
 		u16 size = sizeof(u64);
 
@@ -4964,6 +4989,21 @@ void perf_prepare_sample(struct perf_event_header *header,
 		data->stack_user_size = stack_size;
 		header->size += size;
 	}
+
+	if (sample_type & PERF_SAMPLE_REGS_INTR) {
+		/* regs dump ABI info */
+		int size = sizeof(u64);
+
+		perf_sample_regs_intr(&data->regs_intr, regs);
+
+		if (data->regs_intr.regs) {
+			u64 mask = event->attr.sample_regs_intr;
+
+			size += hweight64(mask) * sizeof(u64);
+		}
+
+		header->size += size;
+	}
 }
 
 static void perf_event_output(struct perf_event *event,
@@ -7151,6 +7191,8 @@ static int perf_copy_attr(struct perf_event_attr __user *uattr,
 			ret = -EINVAL;
 	}
 
+	if (attr->sample_type & PERF_SAMPLE_REGS_INTR)
+		ret = perf_reg_validate(attr->sample_regs_intr);
 out:
 	return ret;
 
-- 
cgit v1.2.3


From 2565711fb7d7c28e0cd93c8971b520d1b10b857c Mon Sep 17 00:00:00 2001
From: Peter Zijlstra <peterz@infradead.org>
Date: Wed, 24 Sep 2014 13:48:42 +0200
Subject: perf: Improve the perf_sample_data struct layout

This patch reorders fields in the perf_sample_data struct in order to
minimize the number of cachelines touched in perf_sample_data_init().
It also removes some intializations which are redundant with the code
in kernel/events/core.c

Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: http://lkml.kernel.org/r/1411559322-16548-7-git-send-email-eranian@google.com
Cc: cebbert.lkml@gmail.com
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: jolsa@redhat.com
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 include/linux/perf_event.h | 34 +++++++++++++++++-----------------
 kernel/events/core.c       | 16 ++++++++--------
 2 files changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 68d46d536e24..486e84ccb1f9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -580,35 +580,40 @@ extern u64 perf_event_read_value(struct perf_event *event,
 
 
 struct perf_sample_data {
-	u64				type;
+	/*
+	 * Fields set by perf_sample_data_init(), group so as to
+	 * minimize the cachelines touched.
+	 */
+	u64				addr;
+	struct perf_raw_record		*raw;
+	struct perf_branch_stack	*br_stack;
+	u64				period;
+	u64				weight;
+	u64				txn;
+	union  perf_mem_data_src	data_src;
 
+	/*
+	 * The other fields, optionally {set,used} by
+	 * perf_{prepare,output}_sample().
+	 */
+	u64				type;
 	u64				ip;
 	struct {
 		u32	pid;
 		u32	tid;
 	}				tid_entry;
 	u64				time;
-	u64				addr;
 	u64				id;
 	u64				stream_id;
 	struct {
 		u32	cpu;
 		u32	reserved;
 	}				cpu_entry;
-	u64				period;
-	union  perf_mem_data_src	data_src;
 	struct perf_callchain_entry	*callchain;
-	struct perf_raw_record		*raw;
-	struct perf_branch_stack	*br_stack;
 	struct perf_regs		regs_user;
 	struct perf_regs		regs_intr;
 	u64				stack_user_size;
-	u64				weight;
-	/*
-	 * Transaction flags for abort events:
-	 */
-	u64				txn;
-};
+} ____cacheline_aligned;
 
 /* default value for data source */
 #define PERF_MEM_NA (PERF_MEM_S(OP, NA)   |\
@@ -625,14 +630,9 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
 	data->raw  = NULL;
 	data->br_stack = NULL;
 	data->period = period;
-	data->regs_user.abi = PERF_SAMPLE_REGS_ABI_NONE;
-	data->regs_user.regs = NULL;
-	data->stack_user_size = 0;
 	data->weight = 0;
 	data->data_src.val = PERF_MEM_NA;
 	data->txn = 0;
-	data->regs_intr.abi = PERF_SAMPLE_REGS_ABI_NONE;
-	data->regs_intr.regs = NULL;
 }
 
 extern void perf_output_sample(struct perf_output_handle *handle,
diff --git a/kernel/events/core.c b/kernel/events/core.c
index c2be1597ece7..3e19d3ebc29c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4471,8 +4471,11 @@ static void perf_sample_regs_user(struct perf_regs *regs_user,
 	}
 
 	if (regs) {
-		regs_user->regs = regs;
 		regs_user->abi  = perf_reg_abi(current);
+		regs_user->regs = regs;
+	} else {
+		regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
+		regs_user->regs = NULL;
 	}
 }
 
@@ -4947,12 +4950,13 @@ void perf_prepare_sample(struct perf_event_header *header,
 		header->size += size;
 	}
 
+	if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
+		perf_sample_regs_user(&data->regs_user, regs);
+
 	if (sample_type & PERF_SAMPLE_REGS_USER) {
 		/* regs dump ABI info */
 		int size = sizeof(u64);
 
-		perf_sample_regs_user(&data->regs_user, regs);
-
 		if (data->regs_user.regs) {
 			u64 mask = event->attr.sample_regs_user;
 			size += hweight64(mask) * sizeof(u64);
@@ -4968,15 +4972,11 @@ void perf_prepare_sample(struct perf_event_header *header,
 		 * in case new sample type is added, because we could eat
 		 * up the rest of the sample size.
 		 */
-		struct perf_regs *uregs = &data->regs_user;
 		u16 stack_size = event->attr.sample_stack_user;
 		u16 size = sizeof(u64);
 
-		if (!uregs->abi)
-			perf_sample_regs_user(uregs, regs);
-
 		stack_size = perf_sample_ustack_size(stack_size, header->size,
-						     uregs->regs);
+						     data->regs_user.regs);
 
 		/*
 		 * If there is something to dump, add space for the dump
-- 
cgit v1.2.3


From 960fb622f85180f36d3aff82af53e2be3db2f888 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 16 Nov 2014 06:23:05 -0800
Subject: net: provide a per host RSS key generic infrastructure

RSS (Receive Side Scaling) typically uses Toeplitz hash and a 40 or 52 bytes
RSS key.

Some drivers use a constant (and well known key), some drivers use a random
key per port, making bonding setups hard to tune. Well known keys increase
attack surface, considering that number of queues is usually a power of two.

This patch provides infrastructure to help drivers doing the right thing.

netdev_rss_key_fill() should be used by drivers to initialize their RSS key,
even if they provide ethtool -X support to let user redefine the key later.

A new /proc/sys/net/core/netdev_rss_key file can be used to get the host
RSS key even for drivers not providing ethtool -x support, in case some
applications want to precisely setup flows to match some RX queues.

Tested:

myhost:~# cat /proc/sys/net/core/netdev_rss_key
11:63:99:bb:79:fb:a5:a7:07:45:b2:20:bf:02:42:2d:08:1a:dd:19:2b:6b:23:ac:56:28:9d:70:c3:ac:e8:16:4b:b7:c1:10:53:a4:78:41:36:40:74:b6:15:ca:27:44:aa:b3:4d:72

myhost:~# ethtool -x eth0
RX flow hash indirection table for eth0 with 8 RX ring(s):
    0:      0     1     2     3     4     5     6     7
RSS hash key:
11:63:99:bb:79:fb:a5:a7:07:45:b2:20:bf:02:42:2d:08:1a:dd:19:2b:6b:23:ac:56:28:9d:70:c3:ac:e8:16:4b:b7:c1:10:53:a4:78:41

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/sysctl/net.txt | 22 ++++++++++++++++++++++
 include/linux/netdevice.h    |  6 ++++++
 net/core/ethtool.c           | 11 +++++++++++
 net/core/sysctl_net_core.c   | 19 +++++++++++++++++++
 4 files changed, 58 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt
index e26c607468a6..666594b43cff 100644
--- a/Documentation/sysctl/net.txt
+++ b/Documentation/sysctl/net.txt
@@ -142,6 +142,28 @@ netdev_max_backlog
 Maximum number  of  packets,  queued  on  the  INPUT  side, when the interface
 receives packets faster than kernel can process them.
 
+netdev_rss_key
+--------------
+
+RSS (Receive Side Scaling) enabled drivers use a 40 bytes host key that is
+randomly generated.
+Some user space might need to gather its content even if drivers do not
+provide ethtool -x support yet.
+
+myhost:~# cat /proc/sys/net/core/netdev_rss_key
+84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8: ... (52 bytes total)
+
+File contains nul bytes if no driver ever called netdev_rss_key_fill() function.
+Note:
+/proc/sys/net/core/netdev_rss_key contains 52 bytes of key,
+but most drivers only use 40 bytes of it.
+
+myhost:~# ethtool -x eth0
+RX flow hash indirection table for eth0 with 8 RX ring(s):
+    0:    0     1     2     3     4     5     6     7
+RSS hash key:
+84:50:f4:00:a8:15:d1:a7:e9:7f:1d:60:35:c7:47:25:42:97:74:ca:56:bb:b6:a1:d8:43:e3:c9:0c:fd:17:55:c2:3a:4d:69:ed:f1:42:89
+
 netdev_tstamp_prequeue
 ----------------------
 
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4a6f770377d3..db63cf459ba1 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3422,6 +3422,12 @@ void netdev_upper_dev_unlink(struct net_device *dev,
 void netdev_adjacent_rename_links(struct net_device *dev, char *oldname);
 void *netdev_lower_dev_get_private(struct net_device *dev,
 				   struct net_device *lower_dev);
+
+/* RSS keys are 40 or 52 bytes long */
+#define NETDEV_RSS_KEY_LEN 52
+extern u8 netdev_rss_key[NETDEV_RSS_KEY_LEN];
+void netdev_rss_key_fill(void *buffer, size_t len);
+
 int dev_get_nest_level(struct net_device *dev,
 		       bool (*type_check)(struct net_device *dev));
 int skb_checksum_help(struct sk_buff *skb);
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index b0f84f5ddda8..715f51f321e9 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -25,6 +25,7 @@
 #include <linux/slab.h>
 #include <linux/rtnetlink.h>
 #include <linux/sched.h>
+#include <linux/net.h>
 
 /*
  * Some useful ethtool_ops methods that're device independent.
@@ -573,6 +574,16 @@ static int ethtool_copy_validate_indir(u32 *indir, void __user *useraddr,
 	return 0;
 }
 
+u8 netdev_rss_key[NETDEV_RSS_KEY_LEN];
+
+void netdev_rss_key_fill(void *buffer, size_t len)
+{
+	BUG_ON(len > sizeof(netdev_rss_key));
+	net_get_random_once(netdev_rss_key, sizeof(netdev_rss_key));
+	memcpy(buffer, netdev_rss_key, len);
+}
+EXPORT_SYMBOL(netdev_rss_key_fill);
+
 static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 						     void __user *useraddr)
 {
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index f93f092fe226..31baba2a71ce 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -217,6 +217,18 @@ static int set_default_qdisc(struct ctl_table *table, int write,
 }
 #endif
 
+static int proc_do_rss_key(struct ctl_table *table, int write,
+			   void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	struct ctl_table fake_table;
+	char buf[NETDEV_RSS_KEY_LEN * 3];
+
+	snprintf(buf, sizeof(buf), "%*phC", NETDEV_RSS_KEY_LEN, netdev_rss_key);
+	fake_table.data = buf;
+	fake_table.maxlen = sizeof(buf);
+	return proc_dostring(&fake_table, write, buffer, lenp, ppos);
+}
+
 static struct ctl_table net_core_table[] = {
 #ifdef CONFIG_NET
 	{
@@ -265,6 +277,13 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec
 	},
+	{
+		.procname	= "netdev_rss_key",
+		.data		= &netdev_rss_key,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= proc_do_rss_key,
+	},
 #ifdef CONFIG_BPF_JIT
 	{
 		.procname	= "bpf_jit_enable",
-- 
cgit v1.2.3


From b9d1ab7eb42ede51ffbb6cafffd0a521b30c12e1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 16 Nov 2014 06:23:16 -0800
Subject: mlx4: use netdev_rss_key_fill() helper

Use of well known RSS key increases attack surface.
Switch to a random one, using generic helper so that all
ports share a common key.

Also provide ethtool -x support to fetch RSS key

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 9 ++++++++-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c      | 6 +-----
 include/linux/mlx4/qp.h                         | 4 +++-
 3 files changed, 12 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 6c643230a5ed..710cf309962a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -973,6 +973,11 @@ static u32 mlx4_en_get_rxfh_indir_size(struct net_device *dev)
 	return priv->rx_ring_num;
 }
 
+static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev)
+{
+	return MLX4_EN_RSS_KEY_SIZE;
+}
+
 static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
@@ -988,7 +993,8 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key)
 		ring_index[n] = rss_map->qps[n % rss_rings].qpn -
 			rss_map->base_qpn;
 	}
-
+	if (key)
+		netdev_rss_key_fill(key, MLX4_EN_RSS_KEY_SIZE);
 	return err;
 }
 
@@ -1799,6 +1805,7 @@ const struct ethtool_ops mlx4_en_ethtool_ops = {
 	.get_rxnfc = mlx4_en_get_rxnfc,
 	.set_rxnfc = mlx4_en_set_rxnfc,
 	.get_rxfh_indir_size = mlx4_en_get_rxfh_indir_size,
+	.get_rxfh_key_size = mlx4_en_get_rxfh_key_size,
 	.get_rxfh = mlx4_en_get_rxfh,
 	.set_rxfh = mlx4_en_set_rxfh,
 	.get_channels = mlx4_en_get_channels,
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index ccd95177ea7c..b7bda8956011 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1169,9 +1169,6 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	int i, qpn;
 	int err = 0;
 	int good_qps = 0;
-	static const u32 rsskey[10] = { 0xD181C62C, 0xF7F4DB5B, 0x1983A2FC,
-				0x943E1ADB, 0xD9389E6B, 0xD1039C2C, 0xA74499AD,
-				0x593D56D9, 0xF3253C06, 0x2ADC1FFC};
 
 	en_dbg(DRV, priv, "Configuring rss steering\n");
 	err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
@@ -1226,8 +1223,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 
 	rss_context->flags = rss_mask;
 	rss_context->hash_fn = MLX4_RSS_HASH_TOP;
-	for (i = 0; i < 10; i++)
-		rss_context->rss_key[i] = cpu_to_be32(rsskey[i]);
+	netdev_rss_key_fill(rss_context->rss_key, MLX4_EN_RSS_KEY_SIZE);
 
 	err = mlx4_qp_to_ready(mdev->dev, &priv->res.mtt, &context,
 			       &rss_map->indir_qp, &rss_map->indir_state);
diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 5f4e36cf0091..467ccdf94c98 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -120,13 +120,15 @@ enum {
 	MLX4_RSS_QPC_FLAG_OFFSET		= 13,
 };
 
+#define MLX4_EN_RSS_KEY_SIZE 40
+
 struct mlx4_rss_context {
 	__be32			base_qpn;
 	__be32			default_qpn;
 	u16			reserved;
 	u8			hash_fn;
 	u8			flags;
-	__be32			rss_key[10];
+	__be32			rss_key[MLX4_EN_RSS_KEY_SIZE / sizeof(__be32)];
 	__be32			base_qpn_udp;
 };
 
-- 
cgit v1.2.3


From 29aebfde8802b64a5b505a1d8a6842c188abfdbc Mon Sep 17 00:00:00 2001
From: Nicolin Chen <nicoleotsuka@gmail.com>
Date: Fri, 24 Oct 2014 12:37:41 -0700
Subject: dmaengine: imx-sdma: Add a new DMATYPE for SAI

This patch simply adds a new DMATYPE for SAI which's included
in i.MX6 Solo X.

Signed-off-by: Nicolin Chen <nicoleotsuka@gmail.com>
Acked-by: Shawn Guo <shawn.guo@linaro.org>
Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt | 1 +
 drivers/dma/imx-sdma.c                                 | 1 +
 include/linux/platform_data/dma-imx.h                  | 1 +
 3 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
index 4659fd952301..dc8d3aac1aa9 100644
--- a/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
+++ b/Documentation/devicetree/bindings/dma/fsl-imx-sdma.txt
@@ -48,6 +48,7 @@ The full ID of peripheral types can be found below.
 	21	ESAI
 	22	SSI Dual FIFO	(needs firmware ver >= 2)
 	23	Shared ASRC
+	24	SAI
 
 The third cell specifies the transfer priority as below.
 
diff --git a/drivers/dma/imx-sdma.c b/drivers/dma/imx-sdma.c
index 99d8833edbd6..5b38f2bbd42a 100644
--- a/drivers/dma/imx-sdma.c
+++ b/drivers/dma/imx-sdma.c
@@ -729,6 +729,7 @@ static void sdma_get_pc(struct sdma_channel *sdmac,
 	case IMX_DMATYPE_CSPI:
 	case IMX_DMATYPE_EXT:
 	case IMX_DMATYPE_SSI:
+	case IMX_DMATYPE_SAI:
 		per_2_emi = sdma->script_addrs->app_2_mcu_addr;
 		emi_2_per = sdma->script_addrs->mcu_2_app_addr;
 		break;
diff --git a/include/linux/platform_data/dma-imx.h b/include/linux/platform_data/dma-imx.h
index 6a1357d31871..7d964e787299 100644
--- a/include/linux/platform_data/dma-imx.h
+++ b/include/linux/platform_data/dma-imx.h
@@ -41,6 +41,7 @@ enum sdma_peripheral_type {
 	IMX_DMATYPE_ESAI,	/* ESAI */
 	IMX_DMATYPE_SSI_DUAL,	/* SSI Dual FIFO */
 	IMX_DMATYPE_ASRC_SP,	/* Shared ASRC */
+	IMX_DMATYPE_SAI,	/* SAI */
 };
 
 enum imx_dma_prio {
-- 
cgit v1.2.3


From cb41c8dd01d74d091618f72e28f0282f064a9f0a Mon Sep 17 00:00:00 2001
From: Alexander Aring <alex.aring@gmail.com>
Date: Mon, 17 Nov 2014 08:20:54 +0100
Subject: ieee802154: rename and move WPAN_NUM_ defines

This patch moves the 802.15.4 constraints WPAN_NUM_ defines into
"net/ieee802154.h" which should contain all necessary 802.15.4 related
information. Also rename these defines to a common name which is
IEEE802154_MAX_CHANNEL and IEEE802154_MAX_PAGE.

Signed-off-by: Alexander Aring <alex.aring@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/linux/ieee802154.h |  3 +++
 include/net/cfg802154.h    | 11 ++---------
 net/ieee802154/nl802154.c  |  4 ++--
 3 files changed, 7 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h
index 5a40c0418438..6e82d888287c 100644
--- a/include/linux/ieee802154.h
+++ b/include/linux/ieee802154.h
@@ -39,6 +39,9 @@
 #define IEEE802154_LIFS_PERIOD		40
 #define IEEE802154_SIFS_PERIOD		12
 
+#define IEEE802154_MAX_CHANNEL		26
+#define IEEE802154_MAX_PAGE		31
+
 #define IEEE802154_FC_TYPE_BEACON	0x0	/* Frame is beacon */
 #define	IEEE802154_FC_TYPE_DATA		0x1	/* Frame is data */
 #define IEEE802154_FC_TYPE_ACK		0x2	/* Frame is acknowledgment */
diff --git a/include/net/cfg802154.h b/include/net/cfg802154.h
index 507ac9d3d38a..228f1f7668f7 100644
--- a/include/net/cfg802154.h
+++ b/include/net/cfg802154.h
@@ -17,20 +17,13 @@
 #ifndef __NET_CFG802154_H
 #define __NET_CFG802154_H
 
+#include <linux/ieee802154.h>
 #include <linux/netdevice.h>
 #include <linux/mutex.h>
 #include <linux/bug.h>
 
 #include <net/nl802154.h>
 
-/* According to the IEEE 802.15.4 stadard the upper most significant bits of
- * the 32-bit channel bitmaps shall be used as an integer value to specify 32
- * possible channel pages. The lower 27 bits of the channel bit map shall be
- * used as a bit mask to specify channel numbers within a channel page.
- */
-#define WPAN_NUM_CHANNELS	27
-#define WPAN_NUM_PAGES		32
-
 struct wpan_phy;
 
 struct cfg802154_ops {
@@ -81,7 +74,7 @@ struct wpan_phy {
 	 */
 	u8 current_channel;
 	u8 current_page;
-	u32 channels_supported[32];
+	u32 channels_supported[IEEE802154_MAX_PAGE + 1];
 	s8 transmit_power;
 	u8 cca_mode;
 
diff --git a/net/ieee802154/nl802154.c b/net/ieee802154/nl802154.c
index 6b9bc93944a6..b82b01669a67 100644
--- a/net/ieee802154/nl802154.c
+++ b/net/ieee802154/nl802154.c
@@ -245,7 +245,7 @@ nl802154_send_wpan_phy_channels(struct cfg802154_registered_device *rdev,
 	if (!nl_page)
 		return -ENOBUFS;
 
-	for (page = 0; page < WPAN_NUM_PAGES; page++) {
+	for (page = 0; page <= IEEE802154_MAX_PAGE; page++) {
 		if (nla_put_u32(msg, NL802154_ATTR_SUPPORTED_CHANNEL,
 				rdev->wpan_phy.channels_supported[page]))
 			return -ENOBUFS;
@@ -616,7 +616,7 @@ static int nl802154_set_channel(struct sk_buff *skb, struct genl_info *info)
 	channel = nla_get_u8(info->attrs[NL802154_ATTR_CHANNEL]);
 
 	/* check 802.15.4 constraints */
-	if (page >= WPAN_NUM_PAGES || channel >= WPAN_NUM_CHANNELS)
+	if (page > IEEE802154_MAX_PAGE || channel > IEEE802154_MAX_CHANNEL)
 		return -EINVAL;
 
 	return rdev_set_channel(rdev, page, channel);
-- 
cgit v1.2.3


From 48b6bca6b7b8309697fc8a101793befe92d249d9 Mon Sep 17 00:00:00 2001
From: Benjamin Marzinski <bmarzins@redhat.com>
Date: Thu, 13 Nov 2014 20:42:03 -0600
Subject: fs: add freeze_super/thaw_super fs hooks

Currently, freezing a filesystem involves calling freeze_super, which locks
sb->s_umount and then calls the fs-specific freeze_fs hook. This makes it
hard for gfs2 (and potentially other cluster filesystems) to use the vfs
freezing code to do freezes on all the cluster nodes.

In order to communicate that a freeze has been requested, and to make sure
that only one node is trying to freeze at a time, gfs2 uses a glock
(sd_freeze_gl). The problem is that there is no hook for gfs2 to acquire
this lock before calling freeze_super. This means that two nodes can
attempt to freeze the filesystem by both calling freeze_super, acquiring
the sb->s_umount lock, and then attempting to grab the cluster glock
sd_freeze_gl. Only one will succeed, and the other will be stuck in
freeze_super, making it impossible to finish freezing the node.

To solve this problem, this patch adds the freeze_super and thaw_super
hooks.  If a filesystem implements these hooks, they are called instead of
the vfs freeze_super and thaw_super functions. This means that every
filesystem that implements these hooks must call the vfs freeze_super and
thaw_super functions itself within the hook function to make use of the vfs
freezing code.

Reviewed-by: Jan Kara <jack@suse.cz>
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
---
 fs/block_dev.c     | 10 ++++++++--
 fs/ioctl.c         |  6 +++++-
 include/linux/fs.h |  2 ++
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/fs/block_dev.c b/fs/block_dev.c
index 1d9c9f3754f8..b48c41bf0f86 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -235,7 +235,10 @@ struct super_block *freeze_bdev(struct block_device *bdev)
 	sb = get_active_super(bdev);
 	if (!sb)
 		goto out;
-	error = freeze_super(sb);
+	if (sb->s_op->freeze_super)
+		error = sb->s_op->freeze_super(sb);
+	else
+		error = freeze_super(sb);
 	if (error) {
 		deactivate_super(sb);
 		bdev->bd_fsfreeze_count--;
@@ -272,7 +275,10 @@ int thaw_bdev(struct block_device *bdev, struct super_block *sb)
 	if (!sb)
 		goto out;
 
-	error = thaw_super(sb);
+	if (sb->s_op->thaw_super)
+		error = sb->s_op->thaw_super(sb);
+	else
+		error = thaw_super(sb);
 	if (error) {
 		bdev->bd_fsfreeze_count++;
 		mutex_unlock(&bdev->bd_fsfreeze_mutex);
diff --git a/fs/ioctl.c b/fs/ioctl.c
index 8ac3fad36192..77c9a7812542 100644
--- a/fs/ioctl.c
+++ b/fs/ioctl.c
@@ -518,10 +518,12 @@ static int ioctl_fsfreeze(struct file *filp)
 		return -EPERM;
 
 	/* If filesystem doesn't support freeze feature, return. */
-	if (sb->s_op->freeze_fs == NULL)
+	if (sb->s_op->freeze_fs == NULL && sb->s_op->freeze_super == NULL)
 		return -EOPNOTSUPP;
 
 	/* Freeze */
+	if (sb->s_op->freeze_super)
+		return sb->s_op->freeze_super(sb);
 	return freeze_super(sb);
 }
 
@@ -533,6 +535,8 @@ static int ioctl_fsthaw(struct file *filp)
 		return -EPERM;
 
 	/* Thaw */
+	if (sb->s_op->thaw_super)
+		return sb->s_op->thaw_super(sb);
 	return thaw_super(sb);
 }
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ab779e8a63c..b4a1d73c0d5d 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1577,7 +1577,9 @@ struct super_operations {
 	void (*evict_inode) (struct inode *);
 	void (*put_super) (struct super_block *);
 	int (*sync_fs)(struct super_block *sb, int wait);
+	int (*freeze_super) (struct super_block *);
 	int (*freeze_fs) (struct super_block *);
+	int (*thaw_super) (struct super_block *);
 	int (*unfreeze_fs) (struct super_block *);
 	int (*statfs) (struct dentry *, struct kstatfs *);
 	int (*remount_fs) (struct super_block *, int *, char *);
-- 
cgit v1.2.3


From 7c7f2f2bc9a63f9605a16eabac59fc655dfe7c9a Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@fb.com>
Date: Mon, 17 Nov 2014 10:41:57 -0700
Subject: blk-mq: add blk_mq_free_hctx_request()

It's silly to use blk_mq_free_request() which in turn maps the
request to the hardware queue, for places where we already know
what the hardware queue is. This saves us an extra mapping of a
hardware queue on request completion, if the caller knows this
information already.

Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/blk-mq.c         | 17 ++++++++++++-----
 include/linux/blk-mq.h |  1 +
 2 files changed, 13 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-mq.c b/block/blk-mq.c
index fdf12152946e..4347aa2be6ae 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -269,16 +269,23 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx,
 	blk_mq_queue_exit(q);
 }
 
-void blk_mq_free_request(struct request *rq)
+void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *hctx, struct request *rq)
 {
 	struct blk_mq_ctx *ctx = rq->mq_ctx;
-	struct blk_mq_hw_ctx *hctx;
-	struct request_queue *q = rq->q;
 
 	ctx->rq_completed[rq_is_sync(rq)]++;
-
-	hctx = q->mq_ops->map_queue(q, ctx->cpu);
 	__blk_mq_free_request(hctx, ctx, rq);
+
+}
+EXPORT_SYMBOL_GPL(blk_mq_free_hctx_request);
+
+void blk_mq_free_request(struct request *rq)
+{
+	struct blk_mq_hw_ctx *hctx;
+	struct request_queue *q = rq->q;
+
+	hctx = q->mq_ops->map_queue(q, rq->mq_ctx->cpu);
+	blk_mq_free_hctx_request(hctx, rq);
 }
 EXPORT_SYMBOL_GPL(blk_mq_free_request);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index c3b64ec5321e..fb0a4fb3dc2b 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -169,6 +169,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);
 void blk_mq_insert_request(struct request *, bool, bool, bool);
 void blk_mq_run_queues(struct request_queue *q, bool async);
 void blk_mq_free_request(struct request *rq);
+void blk_mq_free_hctx_request(struct blk_mq_hw_ctx *, struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 struct request *blk_mq_alloc_request(struct request_queue *q, int rw,
 		gfp_t gfp, bool reserved);
-- 
cgit v1.2.3


From 069fb0b63722f8c9f8b4bbce236793626c89af33 Mon Sep 17 00:00:00 2001
From: Sebastian Schmidt <yath@yath.de>
Date: Fri, 14 Nov 2014 10:51:45 -0800
Subject: syslog: Provide stub check_syslog_permissions

When building without CONFIG_PRINTK, we need to provide a stub
check_syslog_permissions. As there is no way to turn on the
dmesg_restrict sysctl without CONFIG_PRINTK, return success.

Reported-by: Jim Davis <jim.epost@gmail.com>
Signed-off-by: Sebastian Schmidt <yath@yath.de>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 include/linux/syslog.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/syslog.h b/include/linux/syslog.h
index 9def5297dbb7..4b7b875a7ce1 100644
--- a/include/linux/syslog.h
+++ b/include/linux/syslog.h
@@ -48,6 +48,14 @@
 #define SYSLOG_FROM_PROC             1
 
 int do_syslog(int type, char __user *buf, int count, bool from_file);
+
+#ifdef CONFIG_PRINTK
 int check_syslog_permissions(int type, bool from_file);
+#else
+static inline int check_syslog_permissions(int type, bool from_file)
+{
+	return 0;
+}
+#endif
 
 #endif /* _LINUX_SYSLOG_H */
-- 
cgit v1.2.3


From e6d5e7d90be92cee626d7ec16ca9b06f1eed710b Mon Sep 17 00:00:00 2001
From: James Hogan <james.hogan@imgtec.com>
Date: Fri, 14 Nov 2014 15:32:09 +0000
Subject: clk-divider: Fix READ_ONLY when divider > 1

Commit 79c6ab509558 (clk: divider: add CLK_DIVIDER_READ_ONLY flag) in
v3.16 introduced the CLK_DIVIDER_READ_ONLY flag which caused the
recalc_rate() and round_rate() clock callbacks to be omitted.

However using this flag has the unfortunate side effect of causing the
clock recalculation code when a clock rate change is attempted to always
treat it as a pass-through clock, i.e. with a fixed divide of 1, which
may not be the case. Child clock rates are then recalculated using the
wrong parent rate.

Therefore instead of dropping the recalc_rate() and round_rate()
callbacks, alter clk_divider_bestdiv() to always report the current
divider as the best divider so that it is never altered.

For me the read only clock was the system clock, which divided the PLL
rate by 2, from which both the UART and the SPI clocks were divided.
Initial setting of the UART rate set it correctly, but when the SPI
clock was set, the other child clocks were miscalculated. The UART clock
was recalculated using the PLL rate as the parent rate, resulting in a
UART new_rate of double what it should be, and a UART which spewed forth
garbage when the rate changes were propagated.

Signed-off-by: James Hogan <james.hogan@imgtec.com>
Cc: Thomas Abraham <thomas.ab@samsung.com>
Cc: Tomasz Figa <t.figa@samsung.com>
Cc: Max Schwarz <max.schwarz@online.de>
Cc: <stable@vger.kernel.org> # v3.16+
Acked-by: Haojian Zhuang <haojian.zhuang@gmail.com>
Signed-off-by: Michael Turquette <mturquette@linaro.org>
---
 drivers/clk/clk-divider.c    | 18 +++++++++---------
 drivers/clk/rockchip/clk.c   |  4 +---
 include/linux/clk-provider.h |  1 -
 3 files changed, 10 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk-divider.c b/drivers/clk/clk-divider.c
index 18a9de29df0e..c0a842b335c5 100644
--- a/drivers/clk/clk-divider.c
+++ b/drivers/clk/clk-divider.c
@@ -263,6 +263,14 @@ static int clk_divider_bestdiv(struct clk_hw *hw, unsigned long rate,
 	if (!rate)
 		rate = 1;
 
+	/* if read only, just return current value */
+	if (divider->flags & CLK_DIVIDER_READ_ONLY) {
+		bestdiv = readl(divider->reg) >> divider->shift;
+		bestdiv &= div_mask(divider);
+		bestdiv = _get_div(divider, bestdiv);
+		return bestdiv;
+	}
+
 	maxdiv = _get_maxdiv(divider);
 
 	if (!(__clk_get_flags(hw->clk) & CLK_SET_RATE_PARENT)) {
@@ -361,11 +369,6 @@ const struct clk_ops clk_divider_ops = {
 };
 EXPORT_SYMBOL_GPL(clk_divider_ops);
 
-const struct clk_ops clk_divider_ro_ops = {
-	.recalc_rate = clk_divider_recalc_rate,
-};
-EXPORT_SYMBOL_GPL(clk_divider_ro_ops);
-
 static struct clk *_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
@@ -391,10 +394,7 @@ static struct clk *_register_divider(struct device *dev, const char *name,
 	}
 
 	init.name = name;
-	if (clk_divider_flags & CLK_DIVIDER_READ_ONLY)
-		init.ops = &clk_divider_ro_ops;
-	else
-		init.ops = &clk_divider_ops;
+	init.ops = &clk_divider_ops;
 	init.flags = flags | CLK_IS_BASIC;
 	init.parent_names = (parent_name ? &parent_name: NULL);
 	init.num_parents = (parent_name ? 1 : 0);
diff --git a/drivers/clk/rockchip/clk.c b/drivers/clk/rockchip/clk.c
index 1e68bff481b8..880a266f0143 100644
--- a/drivers/clk/rockchip/clk.c
+++ b/drivers/clk/rockchip/clk.c
@@ -90,9 +90,7 @@ static struct clk *rockchip_clk_register_branch(const char *name,
 		div->width = div_width;
 		div->lock = lock;
 		div->table = div_table;
-		div_ops = (div_flags & CLK_DIVIDER_READ_ONLY)
-						? &clk_divider_ro_ops
-						: &clk_divider_ops;
+		div_ops = &clk_divider_ops;
 	}
 
 	clk = clk_register_composite(NULL, name, parent_names, num_parents,
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index be21af149f11..2839c639f092 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -352,7 +352,6 @@ struct clk_divider {
 #define CLK_DIVIDER_READ_ONLY		BIT(5)
 
 extern const struct clk_ops clk_divider_ops;
-extern const struct clk_ops clk_divider_ro_ops;
 struct clk *clk_register_divider(struct device *dev, const char *name,
 		const char *parent_name, unsigned long flags,
 		void __iomem *reg, u8 shift, u8 width,
-- 
cgit v1.2.3


From 3fb1581ea1ab0aec6ac5430dc0e257a8c9b71680 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 14 Nov 2014 09:47:25 +0100
Subject: ARM: 8199/1: PM / Runtime: Add getter for querying the IRQ safe
 option v12

Add a simple getter pm_runtime_is_irq_safe() for querying whether runtime
PM IRQ safe was set or not.

Various bus drivers implementing runtime PM may use choose to suspend
differently based on IRQ safeness status of child driver (e.g. do not
unprepare the clock if IRQ safe is not set).

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Rafael J. Wysocki <rjw@rjwysocki.net>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 Documentation/power/runtime_pm.txt | 4 ++++
 include/linux/pm_runtime.h         | 6 ++++++
 2 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/power/runtime_pm.txt b/Documentation/power/runtime_pm.txt
index f32ce5419573..397b81593142 100644
--- a/Documentation/power/runtime_pm.txt
+++ b/Documentation/power/runtime_pm.txt
@@ -468,6 +468,10 @@ drivers/base/power/runtime.c and include/linux/pm_runtime.h:
     - set the power.irq_safe flag for the device, causing the runtime-PM
       callbacks to be invoked with interrupts off
 
+  bool pm_runtime_is_irq_safe(struct device *dev);
+    - return true if power.irq_safe flag was set for the device, causing
+      the runtime-PM callbacks to be invoked with interrupts off
+
   void pm_runtime_mark_last_busy(struct device *dev);
     - set the power.last_busy field to the current time
 
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 367f49b9a1c9..44d74f0f182e 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -128,6 +128,11 @@ static inline void pm_runtime_mark_last_busy(struct device *dev)
 	ACCESS_ONCE(dev->power.last_busy) = jiffies;
 }
 
+static inline bool pm_runtime_is_irq_safe(struct device *dev)
+{
+	return dev->power.irq_safe;
+}
+
 #else /* !CONFIG_PM_RUNTIME */
 
 static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
@@ -167,6 +172,7 @@ static inline bool pm_runtime_enabled(struct device *dev) { return false; }
 
 static inline void pm_runtime_no_callbacks(struct device *dev) {}
 static inline void pm_runtime_irq_safe(struct device *dev) {}
+static inline bool pm_runtime_is_irq_safe(struct device *dev) { return false; }
 
 static inline bool pm_runtime_callbacks_present(struct device *dev) { return false; }
 static inline void pm_runtime_mark_last_busy(struct device *dev) {}
-- 
cgit v1.2.3


From f560e3229564a673603541ebfb36118d7cd98038 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 14 Nov 2014 09:47:49 +0100
Subject: ARM: 8200/1: amba: Add helpers for (un)preparing AMBA clock v12

Add amba_pclk_prepare() and amba_pclk_unprepare() inline functions for
handling the AMBA bus clock by device drivers.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Russell King <rmk+kernel@arm.linux.org.uk>
---
 include/linux/amba/bus.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h
index c324f5700d1a..ac02f9bd63dc 100644
--- a/include/linux/amba/bus.h
+++ b/include/linux/amba/bus.h
@@ -97,6 +97,16 @@ void amba_release_regions(struct amba_device *);
 #define amba_pclk_disable(d)	\
 	do { if (!IS_ERR((d)->pclk)) clk_disable((d)->pclk); } while (0)
 
+static inline int amba_pclk_prepare(struct amba_device *dev)
+{
+	return clk_prepare(dev->pclk);
+}
+
+static inline void amba_pclk_unprepare(struct amba_device *dev)
+{
+	clk_unprepare(dev->pclk);
+}
+
 /* Some drivers don't use the struct amba_device */
 #define AMBA_CONFIG_BITS(a) (((a) >> 24) & 0xff)
 #define AMBA_REV_BITS(a) (((a) >> 20) & 0x0f)
-- 
cgit v1.2.3


From 4aae7e436fa51faf4bf5d11b175aea82cfe8224a Mon Sep 17 00:00:00 2001
From: Qiaowei Ren <qiaowei.ren@intel.com>
Date: Fri, 14 Nov 2014 07:18:25 -0800
Subject: x86, mpx: Introduce VM_MPX to indicate that a VMA is MPX specific

MPX-enabled applications using large swaths of memory can
potentially have large numbers of bounds tables in process
address space to save bounds information. These tables can take
up huge swaths of memory (as much as 80% of the memory on the
system) even if we clean them up aggressively. In the worst-case
scenario, the tables can be 4x the size of the data structure
being tracked. IOW, a 1-page structure can require 4 bounds-table
pages.

Being this huge, our expectation is that folks using MPX are
going to be keen on figuring out how much memory is being
dedicated to it. So we need a way to track memory use for MPX.

If we want to specifically track MPX VMAs we need to be able to
distinguish them from normal VMAs, and keep them from getting
merged with normal VMAs. A new VM_ flag set only on MPX VMAs does
both of those things. With this flag, MPX bounds-table VMAs can
be distinguished from other VMAs, and userspace can also walk
/proc/$pid/smaps to get memory usage for MPX.

In addition to this flag, we also introduce a special ->vm_ops
specific to MPX VMAs (see the patch "add MPX specific mmap
interface"), but currently different ->vm_ops do not by
themselves prevent VMA merging, so we still need this flag.

We understand that VM_ flags are scarce and are open to other
options.

Signed-off-by: Qiaowei Ren <qiaowei.ren@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: linux-mm@kvack.org
Cc: linux-mips@linux-mips.org
Cc: Dave Hansen <dave@sr71.net>
Link: http://lkml.kernel.org/r/20141114151825.565625B3@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 fs/proc/task_mmu.c | 3 +++
 include/linux/mm.h | 6 ++++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 4e0388cffe3d..f6734c6b66a6 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -552,6 +552,9 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
 		[ilog2(VM_GROWSDOWN)]	= "gd",
 		[ilog2(VM_PFNMAP)]	= "pf",
 		[ilog2(VM_DENYWRITE)]	= "dw",
+#ifdef CONFIG_X86_INTEL_MPX
+		[ilog2(VM_MPX)]		= "mp",
+#endif
 		[ilog2(VM_LOCKED)]	= "lo",
 		[ilog2(VM_IO)]		= "io",
 		[ilog2(VM_SEQ_READ)]	= "sr",
diff --git a/include/linux/mm.h b/include/linux/mm.h
index b46461116cd2..f7606d3a0915 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -128,6 +128,7 @@ extern unsigned int kobjsize(const void *objp);
 #define VM_HUGETLB	0x00400000	/* Huge TLB Page VM */
 #define VM_NONLINEAR	0x00800000	/* Is non-linear (remap_file_pages) */
 #define VM_ARCH_1	0x01000000	/* Architecture-specific flag */
+#define VM_ARCH_2	0x02000000
 #define VM_DONTDUMP	0x04000000	/* Do not include in the core dump */
 
 #ifdef CONFIG_MEM_SOFT_DIRTY
@@ -155,6 +156,11 @@ extern unsigned int kobjsize(const void *objp);
 # define VM_MAPPED_COPY	VM_ARCH_1	/* T if mapped copy of data (nommu mmap) */
 #endif
 
+#if defined(CONFIG_X86)
+/* MPX specific bounds table or bounds directory */
+# define VM_MPX		VM_ARCH_2
+#endif
+
 #ifndef VM_GROWSUP
 # define VM_GROWSUP	VM_NONE
 #endif
-- 
cgit v1.2.3


From fe3d197f84319d3bce379a9c0dc17b1f48ad358c Mon Sep 17 00:00:00 2001
From: Dave Hansen <dave.hansen@linux.intel.com>
Date: Fri, 14 Nov 2014 07:18:29 -0800
Subject: x86, mpx: On-demand kernel allocation of bounds tables

This is really the meat of the MPX patch set.  If there is one patch to
review in the entire series, this is the one.  There is a new ABI here
and this kernel code also interacts with userspace memory in a
relatively unusual manner.  (small FAQ below).

Long Description:

This patch adds two prctl() commands to provide enable or disable the
management of bounds tables in kernel, including on-demand kernel
allocation (See the patch "on-demand kernel allocation of bounds tables")
and cleanup (See the patch "cleanup unused bound tables"). Applications
do not strictly need the kernel to manage bounds tables and we expect
some applications to use MPX without taking advantage of this kernel
support. This means the kernel can not simply infer whether an application
needs bounds table management from the MPX registers.  The prctl() is an
explicit signal from userspace.

PR_MPX_ENABLE_MANAGEMENT is meant to be a signal from userspace to
require kernel's help in managing bounds tables.

PR_MPX_DISABLE_MANAGEMENT is the opposite, meaning that userspace don't
want kernel's help any more. With PR_MPX_DISABLE_MANAGEMENT, the kernel
won't allocate and free bounds tables even if the CPU supports MPX.

PR_MPX_ENABLE_MANAGEMENT will fetch the base address of the bounds
directory out of a userspace register (bndcfgu) and then cache it into
a new field (->bd_addr) in  the 'mm_struct'.  PR_MPX_DISABLE_MANAGEMENT
will set "bd_addr" to an invalid address.  Using this scheme, we can
use "bd_addr" to determine whether the management of bounds tables in
kernel is enabled.

Also, the only way to access that bndcfgu register is via an xsaves,
which can be expensive.  Caching "bd_addr" like this also helps reduce
the cost of those xsaves when doing table cleanup at munmap() time.
Unfortunately, we can not apply this optimization to #BR fault time
because we need an xsave to get the value of BNDSTATUS.

==== Why does the hardware even have these Bounds Tables? ====

MPX only has 4 hardware registers for storing bounds information.
If MPX-enabled code needs more than these 4 registers, it needs to
spill them somewhere. It has two special instructions for this
which allow the bounds to be moved between the bounds registers
and some new "bounds tables".

They are similar conceptually to a page fault and will be raised by
the MPX hardware during both bounds violations or when the tables
are not present. This patch handles those #BR exceptions for
not-present tables by carving the space out of the normal processes
address space (essentially calling the new mmap() interface indroduced
earlier in this patch set.) and then pointing the bounds-directory
over to it.

The tables *need* to be accessed and controlled by userspace because
the instructions for moving bounds in and out of them are extremely
frequent. They potentially happen every time a register pointing to
memory is dereferenced. Any direct kernel involvement (like a syscall)
to access the tables would obviously destroy performance.

==== Why not do this in userspace? ====

This patch is obviously doing this allocation in the kernel.
However, MPX does not strictly *require* anything in the kernel.
It can theoretically be done completely from userspace. Here are
a few ways this *could* be done. I don't think any of them are
practical in the real-world, but here they are.

Q: Can virtual space simply be reserved for the bounds tables so
   that we never have to allocate them?
A: As noted earlier, these tables are *HUGE*. An X-GB virtual
   area needs 4*X GB of virtual space, plus 2GB for the bounds
   directory. If we were to preallocate them for the 128TB of
   user virtual address space, we would need to reserve 512TB+2GB,
   which is larger than the entire virtual address space today.
   This means they can not be reserved ahead of time. Also, a
   single process's pre-popualated bounds directory consumes 2GB
   of virtual *AND* physical memory. IOW, it's completely
   infeasible to prepopulate bounds directories.

Q: Can we preallocate bounds table space at the same time memory
   is allocated which might contain pointers that might eventually
   need bounds tables?
A: This would work if we could hook the site of each and every
   memory allocation syscall. This can be done for small,
   constrained applications. But, it isn't practical at a larger
   scale since a given app has no way of controlling how all the
   parts of the app might allocate memory (think libraries). The
   kernel is really the only place to intercept these calls.

Q: Could a bounds fault be handed to userspace and the tables
   allocated there in a signal handler instead of in the kernel?
A: (thanks to tglx) mmap() is not on the list of safe async
   handler functions and even if mmap() would work it still
   requires locking or nasty tricks to keep track of the
   allocation state there.

Having ruled out all of the userspace-only approaches for managing
bounds tables that we could think of, we create them on demand in
the kernel.

Based-on-patch-by: Qiaowei Ren <qiaowei.ren@intel.com>
Signed-off-by: Dave Hansen <dave.hansen@linux.intel.com>
Cc: linux-mm@kvack.org
Cc: linux-mips@linux-mips.org
Cc: Dave Hansen <dave@sr71.net>
Link: http://lkml.kernel.org/r/20141114151829.AD4310DE@viggo.jf.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/mmu_context.h |   7 ++
 arch/x86/include/asm/mpx.h         |  41 +++++++
 arch/x86/include/asm/processor.h   |  18 +++
 arch/x86/kernel/setup.c            |   2 +
 arch/x86/kernel/traps.c            |  85 +++++++++++++-
 arch/x86/mm/mpx.c                  | 223 ++++++++++++++++++++++++++++++++++++-
 fs/exec.c                          |   2 +
 include/asm-generic/mmu_context.h  |   5 +
 include/linux/mm_types.h           |   4 +
 include/uapi/linux/prctl.h         |   6 +
 kernel/sys.c                       |  12 ++
 11 files changed, 399 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/mmu_context.h b/arch/x86/include/asm/mmu_context.h
index 166af2a8e865..0b0ba91ff1ef 100644
--- a/arch/x86/include/asm/mmu_context.h
+++ b/arch/x86/include/asm/mmu_context.h
@@ -10,6 +10,7 @@
 #include <asm/pgalloc.h>
 #include <asm/tlbflush.h>
 #include <asm/paravirt.h>
+#include <asm/mpx.h>
 #ifndef CONFIG_PARAVIRT
 #include <asm-generic/mm_hooks.h>
 
@@ -102,4 +103,10 @@ do {						\
 } while (0)
 #endif
 
+static inline void arch_bprm_mm_init(struct mm_struct *mm,
+		struct vm_area_struct *vma)
+{
+	mpx_mm_init(mm);
+}
+
 #endif /* _ASM_X86_MMU_CONTEXT_H */
diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h
index 35bcb1cddf40..05eecbf8a484 100644
--- a/arch/x86/include/asm/mpx.h
+++ b/arch/x86/include/asm/mpx.h
@@ -5,6 +5,14 @@
 #include <asm/ptrace.h>
 #include <asm/insn.h>
 
+/*
+ * NULL is theoretically a valid place to put the bounds
+ * directory, so point this at an invalid address.
+ */
+#define MPX_INVALID_BOUNDS_DIR	((void __user *)-1)
+#define MPX_BNDCFG_ENABLE_FLAG	0x1
+#define MPX_BD_ENTRY_VALID_FLAG	0x1
+
 #ifdef CONFIG_X86_64
 
 /* upper 28 bits [47:20] of the virtual address in 64-bit used to
@@ -18,6 +26,7 @@
 #define MPX_BT_ENTRY_OFFSET	17
 #define MPX_BT_ENTRY_SHIFT	5
 #define MPX_IGN_BITS		3
+#define MPX_BD_ENTRY_TAIL	3
 
 #else
 
@@ -26,23 +35,55 @@
 #define MPX_BT_ENTRY_OFFSET	10
 #define MPX_BT_ENTRY_SHIFT	4
 #define MPX_IGN_BITS		2
+#define MPX_BD_ENTRY_TAIL	2
 
 #endif
 
 #define MPX_BD_SIZE_BYTES (1UL<<(MPX_BD_ENTRY_OFFSET+MPX_BD_ENTRY_SHIFT))
 #define MPX_BT_SIZE_BYTES (1UL<<(MPX_BT_ENTRY_OFFSET+MPX_BT_ENTRY_SHIFT))
 
+#define MPX_BNDSTA_TAIL		2
+#define MPX_BNDCFG_TAIL		12
+#define MPX_BNDSTA_ADDR_MASK	(~((1UL<<MPX_BNDSTA_TAIL)-1))
+#define MPX_BNDCFG_ADDR_MASK	(~((1UL<<MPX_BNDCFG_TAIL)-1))
+#define MPX_BT_ADDR_MASK	(~((1UL<<MPX_BD_ENTRY_TAIL)-1))
+
+#define MPX_BNDCFG_ADDR_MASK	(~((1UL<<MPX_BNDCFG_TAIL)-1))
 #define MPX_BNDSTA_ERROR_CODE	0x3
 
 #ifdef CONFIG_X86_INTEL_MPX
 siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
 				struct xsave_struct *xsave_buf);
+int mpx_handle_bd_fault(struct xsave_struct *xsave_buf);
+static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
+{
+	return (mm->bd_addr != MPX_INVALID_BOUNDS_DIR);
+}
+static inline void mpx_mm_init(struct mm_struct *mm)
+{
+	/*
+	 * NULL is theoretically a valid place to put the bounds
+	 * directory, so point this at an invalid address.
+	 */
+	mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+}
 #else
 static inline siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
 					      struct xsave_struct *xsave_buf)
 {
 	return NULL;
 }
+static inline int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
+{
+	return -EINVAL;
+}
+static inline int kernel_managing_mpx_tables(struct mm_struct *mm)
+{
+	return 0;
+}
+static inline void mpx_mm_init(struct mm_struct *mm)
+{
+}
 #endif /* CONFIG_X86_INTEL_MPX */
 
 #endif /* _ASM_X86_MPX_H */
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 6571aaabacb9..9617a1716813 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -954,6 +954,24 @@ extern void start_thread(struct pt_regs *regs, unsigned long new_ip,
 extern int get_tsc_mode(unsigned long adr);
 extern int set_tsc_mode(unsigned int val);
 
+/* Register/unregister a process' MPX related resource */
+#define MPX_ENABLE_MANAGEMENT(tsk)	mpx_enable_management((tsk))
+#define MPX_DISABLE_MANAGEMENT(tsk)	mpx_disable_management((tsk))
+
+#ifdef CONFIG_X86_INTEL_MPX
+extern int mpx_enable_management(struct task_struct *tsk);
+extern int mpx_disable_management(struct task_struct *tsk);
+#else
+static inline int mpx_enable_management(struct task_struct *tsk)
+{
+	return -EINVAL;
+}
+static inline int mpx_disable_management(struct task_struct *tsk)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_X86_INTEL_MPX */
+
 extern u16 amd_get_nb_id(int cpu);
 
 static inline uint32_t hypervisor_cpuid_base(const char *sig, uint32_t leaves)
diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index ab08aa2276fb..214245d6b996 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -960,6 +960,8 @@ void __init setup_arch(char **cmdline_p)
 	init_mm.end_data = (unsigned long) _edata;
 	init_mm.brk = _brk_end;
 
+	mpx_mm_init(&init_mm);
+
 	code_resource.start = __pa_symbol(_text);
 	code_resource.end = __pa_symbol(_etext)-1;
 	data_resource.start = __pa_symbol(_etext);
diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c
index 0d0e922fafc1..651d5d4f7558 100644
--- a/arch/x86/kernel/traps.c
+++ b/arch/x86/kernel/traps.c
@@ -60,6 +60,7 @@
 #include <asm/fixmap.h>
 #include <asm/mach_traps.h>
 #include <asm/alternative.h>
+#include <asm/mpx.h>
 
 #ifdef CONFIG_X86_64
 #include <asm/x86_init.h>
@@ -228,7 +229,6 @@ dotraplinkage void do_##name(struct pt_regs *regs, long error_code)	\
 
 DO_ERROR(X86_TRAP_DE,     SIGFPE,  "divide error",		divide_error)
 DO_ERROR(X86_TRAP_OF,     SIGSEGV, "overflow",			overflow)
-DO_ERROR(X86_TRAP_BR,     SIGSEGV, "bounds",			bounds)
 DO_ERROR(X86_TRAP_UD,     SIGILL,  "invalid opcode",		invalid_op)
 DO_ERROR(X86_TRAP_OLD_MF, SIGFPE,  "coprocessor segment overrun",coprocessor_segment_overrun)
 DO_ERROR(X86_TRAP_TS,     SIGSEGV, "invalid TSS",		invalid_TSS)
@@ -278,6 +278,89 @@ dotraplinkage void do_double_fault(struct pt_regs *regs, long error_code)
 }
 #endif
 
+dotraplinkage void do_bounds(struct pt_regs *regs, long error_code)
+{
+	struct task_struct *tsk = current;
+	struct xsave_struct *xsave_buf;
+	enum ctx_state prev_state;
+	struct bndcsr *bndcsr;
+	siginfo_t *info;
+
+	prev_state = exception_enter();
+	if (notify_die(DIE_TRAP, "bounds", regs, error_code,
+			X86_TRAP_BR, SIGSEGV) == NOTIFY_STOP)
+		goto exit;
+	conditional_sti(regs);
+
+	if (!user_mode(regs))
+		die("bounds", regs, error_code);
+
+	if (!cpu_feature_enabled(X86_FEATURE_MPX)) {
+		/* The exception is not from Intel MPX */
+		goto exit_trap;
+	}
+
+	/*
+	 * We need to look at BNDSTATUS to resolve this exception.
+	 * It is not directly accessible, though, so we need to
+	 * do an xsave and then pull it out of the xsave buffer.
+	 */
+	fpu_save_init(&tsk->thread.fpu);
+	xsave_buf = &(tsk->thread.fpu.state->xsave);
+	bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
+	if (!bndcsr)
+		goto exit_trap;
+
+	/*
+	 * The error code field of the BNDSTATUS register communicates status
+	 * information of a bound range exception #BR or operation involving
+	 * bound directory.
+	 */
+	switch (bndcsr->bndstatus & MPX_BNDSTA_ERROR_CODE) {
+	case 2:	/* Bound directory has invalid entry. */
+		if (mpx_handle_bd_fault(xsave_buf))
+			goto exit_trap;
+		break; /* Success, it was handled */
+	case 1: /* Bound violation. */
+		info = mpx_generate_siginfo(regs, xsave_buf);
+		if (PTR_ERR(info)) {
+			/*
+			 * We failed to decode the MPX instruction.  Act as if
+			 * the exception was not caused by MPX.
+			 */
+			goto exit_trap;
+		}
+		/*
+		 * Success, we decoded the instruction and retrieved
+		 * an 'info' containing the address being accessed
+		 * which caused the exception.  This information
+		 * allows and application to possibly handle the
+		 * #BR exception itself.
+		 */
+		do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, info);
+		kfree(info);
+		break;
+	case 0: /* No exception caused by Intel MPX operations. */
+		goto exit_trap;
+	default:
+		die("bounds", regs, error_code);
+	}
+
+exit:
+	exception_exit(prev_state);
+	return;
+exit_trap:
+	/*
+	 * This path out is for all the cases where we could not
+	 * handle the exception in some way (like allocating a
+	 * table or telling userspace about it.  We will also end
+	 * up here if the kernel has MPX turned off at compile
+	 * time..
+	 */
+	do_trap(X86_TRAP_BR, SIGSEGV, "bounds", regs, error_code, NULL);
+	exception_exit(prev_state);
+}
+
 dotraplinkage void
 do_general_protection(struct pt_regs *regs, long error_code)
 {
diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c
index 9009e094d686..96266375441e 100644
--- a/arch/x86/mm/mpx.c
+++ b/arch/x86/mm/mpx.c
@@ -10,8 +10,12 @@
 #include <linux/syscalls.h>
 #include <linux/sched/sysctl.h>
 
+#include <asm/i387.h>
+#include <asm/insn.h>
 #include <asm/mman.h>
 #include <asm/mpx.h>
+#include <asm/processor.h>
+#include <asm/fpu-internal.h>
 
 static const char *mpx_mapping_name(struct vm_area_struct *vma)
 {
@@ -266,10 +270,11 @@ bad_opcode:
 siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
 				struct xsave_struct *xsave_buf)
 {
+	struct bndreg *bndregs, *bndreg;
+	siginfo_t *info = NULL;
 	struct insn insn;
 	uint8_t bndregno;
 	int err;
-	siginfo_t *info;
 
 	err = mpx_insn_decode(&insn, regs);
 	if (err)
@@ -285,6 +290,15 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
 		err = -EINVAL;
 		goto err_out;
 	}
+	/* get the bndregs _area_ of the xsave structure */
+	bndregs = get_xsave_addr(xsave_buf, XSTATE_BNDREGS);
+	if (!bndregs) {
+		err = -EINVAL;
+		goto err_out;
+	}
+	/* now go select the individual register in the set of 4 */
+	bndreg = &bndregs[bndregno];
+
 	info = kzalloc(sizeof(*info), GFP_KERNEL);
 	if (!info) {
 		err = -ENOMEM;
@@ -300,10 +314,8 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
 	 * complains when casting from integers to different-size
 	 * pointers.
 	 */
-	info->si_lower = (void __user *)(unsigned long)
-		(xsave_buf->bndreg[bndregno].lower_bound);
-	info->si_upper = (void __user *)(unsigned long)
-		(~xsave_buf->bndreg[bndregno].upper_bound);
+	info->si_lower = (void __user *)(unsigned long)bndreg->lower_bound;
+	info->si_upper = (void __user *)(unsigned long)~bndreg->upper_bound;
 	info->si_addr_lsb = 0;
 	info->si_signo = SIGSEGV;
 	info->si_errno = 0;
@@ -319,5 +331,206 @@ siginfo_t *mpx_generate_siginfo(struct pt_regs *regs,
 	}
 	return info;
 err_out:
+	/* info might be NULL, but kfree() handles that */
+	kfree(info);
 	return ERR_PTR(err);
 }
+
+static __user void *task_get_bounds_dir(struct task_struct *tsk)
+{
+	struct bndcsr *bndcsr;
+
+	if (!cpu_feature_enabled(X86_FEATURE_MPX))
+		return MPX_INVALID_BOUNDS_DIR;
+
+	/*
+	 * The bounds directory pointer is stored in a register
+	 * only accessible if we first do an xsave.
+	 */
+	fpu_save_init(&tsk->thread.fpu);
+	bndcsr = get_xsave_addr(&tsk->thread.fpu.state->xsave, XSTATE_BNDCSR);
+	if (!bndcsr)
+		return MPX_INVALID_BOUNDS_DIR;
+
+	/*
+	 * Make sure the register looks valid by checking the
+	 * enable bit.
+	 */
+	if (!(bndcsr->bndcfgu & MPX_BNDCFG_ENABLE_FLAG))
+		return MPX_INVALID_BOUNDS_DIR;
+
+	/*
+	 * Lastly, mask off the low bits used for configuration
+	 * flags, and return the address of the bounds table.
+	 */
+	return (void __user *)(unsigned long)
+		(bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK);
+}
+
+int mpx_enable_management(struct task_struct *tsk)
+{
+	void __user *bd_base = MPX_INVALID_BOUNDS_DIR;
+	struct mm_struct *mm = tsk->mm;
+	int ret = 0;
+
+	/*
+	 * runtime in the userspace will be responsible for allocation of
+	 * the bounds directory. Then, it will save the base of the bounds
+	 * directory into XSAVE/XRSTOR Save Area and enable MPX through
+	 * XRSTOR instruction.
+	 *
+	 * fpu_xsave() is expected to be very expensive. Storing the bounds
+	 * directory here means that we do not have to do xsave in the unmap
+	 * path; we can just use mm->bd_addr instead.
+	 */
+	bd_base = task_get_bounds_dir(tsk);
+	down_write(&mm->mmap_sem);
+	mm->bd_addr = bd_base;
+	if (mm->bd_addr == MPX_INVALID_BOUNDS_DIR)
+		ret = -ENXIO;
+
+	up_write(&mm->mmap_sem);
+	return ret;
+}
+
+int mpx_disable_management(struct task_struct *tsk)
+{
+	struct mm_struct *mm = current->mm;
+
+	if (!cpu_feature_enabled(X86_FEATURE_MPX))
+		return -ENXIO;
+
+	down_write(&mm->mmap_sem);
+	mm->bd_addr = MPX_INVALID_BOUNDS_DIR;
+	up_write(&mm->mmap_sem);
+	return 0;
+}
+
+/*
+ * With 32-bit mode, MPX_BT_SIZE_BYTES is 4MB, and the size of each
+ * bounds table is 16KB. With 64-bit mode, MPX_BT_SIZE_BYTES is 2GB,
+ * and the size of each bounds table is 4MB.
+ */
+static int allocate_bt(long __user *bd_entry)
+{
+	unsigned long expected_old_val = 0;
+	unsigned long actual_old_val = 0;
+	unsigned long bt_addr;
+	int ret = 0;
+
+	/*
+	 * Carve the virtual space out of userspace for the new
+	 * bounds table:
+	 */
+	bt_addr = mpx_mmap(MPX_BT_SIZE_BYTES);
+	if (IS_ERR((void *)bt_addr))
+		return PTR_ERR((void *)bt_addr);
+	/*
+	 * Set the valid flag (kinda like _PAGE_PRESENT in a pte)
+	 */
+	bt_addr = bt_addr | MPX_BD_ENTRY_VALID_FLAG;
+
+	/*
+	 * Go poke the address of the new bounds table in to the
+	 * bounds directory entry out in userspace memory.  Note:
+	 * we may race with another CPU instantiating the same table.
+	 * In that case the cmpxchg will see an unexpected
+	 * 'actual_old_val'.
+	 *
+	 * This can fault, but that's OK because we do not hold
+	 * mmap_sem at this point, unlike some of the other part
+	 * of the MPX code that have to pagefault_disable().
+	 */
+	ret = user_atomic_cmpxchg_inatomic(&actual_old_val, bd_entry,
+					   expected_old_val, bt_addr);
+	if (ret)
+		goto out_unmap;
+
+	/*
+	 * The user_atomic_cmpxchg_inatomic() will only return nonzero
+	 * for faults, *not* if the cmpxchg itself fails.  Now we must
+	 * verify that the cmpxchg itself completed successfully.
+	 */
+	/*
+	 * We expected an empty 'expected_old_val', but instead found
+	 * an apparently valid entry.  Assume we raced with another
+	 * thread to instantiate this table and desclare succecss.
+	 */
+	if (actual_old_val & MPX_BD_ENTRY_VALID_FLAG) {
+		ret = 0;
+		goto out_unmap;
+	}
+	/*
+	 * We found a non-empty bd_entry but it did not have the
+	 * VALID_FLAG set.  Return an error which will result in
+	 * a SEGV since this probably means that somebody scribbled
+	 * some invalid data in to a bounds table.
+	 */
+	if (expected_old_val != actual_old_val) {
+		ret = -EINVAL;
+		goto out_unmap;
+	}
+	return 0;
+out_unmap:
+	vm_munmap(bt_addr & MPX_BT_ADDR_MASK, MPX_BT_SIZE_BYTES);
+	return ret;
+}
+
+/*
+ * When a BNDSTX instruction attempts to save bounds to a bounds
+ * table, it will first attempt to look up the table in the
+ * first-level bounds directory.  If it does not find a table in
+ * the directory, a #BR is generated and we get here in order to
+ * allocate a new table.
+ *
+ * With 32-bit mode, the size of BD is 4MB, and the size of each
+ * bound table is 16KB. With 64-bit mode, the size of BD is 2GB,
+ * and the size of each bound table is 4MB.
+ */
+static int do_mpx_bt_fault(struct xsave_struct *xsave_buf)
+{
+	unsigned long bd_entry, bd_base;
+	struct bndcsr *bndcsr;
+
+	bndcsr = get_xsave_addr(xsave_buf, XSTATE_BNDCSR);
+	if (!bndcsr)
+		return -EINVAL;
+	/*
+	 * Mask off the preserve and enable bits
+	 */
+	bd_base = bndcsr->bndcfgu & MPX_BNDCFG_ADDR_MASK;
+	/*
+	 * The hardware provides the address of the missing or invalid
+	 * entry via BNDSTATUS, so we don't have to go look it up.
+	 */
+	bd_entry = bndcsr->bndstatus & MPX_BNDSTA_ADDR_MASK;
+	/*
+	 * Make sure the directory entry is within where we think
+	 * the directory is.
+	 */
+	if ((bd_entry < bd_base) ||
+	    (bd_entry >= bd_base + MPX_BD_SIZE_BYTES))
+		return -EINVAL;
+
+	return allocate_bt((long __user *)bd_entry);
+}
+
+int mpx_handle_bd_fault(struct xsave_struct *xsave_buf)
+{
+	/*
+	 * Userspace never asked us to manage the bounds tables,
+	 * so refuse to help.
+	 */
+	if (!kernel_managing_mpx_tables(current->mm))
+		return -EINVAL;
+
+	if (do_mpx_bt_fault(xsave_buf)) {
+		force_sig(SIGSEGV, current);
+		/*
+		 * The force_sig() is essentially "handling" this
+		 * exception, so we do not pass up the error
+		 * from do_mpx_bt_fault().
+		 */
+	}
+	return 0;
+}
diff --git a/fs/exec.c b/fs/exec.c
index 7302b75a9820..65d4f5c70ef4 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -60,6 +60,7 @@
 #include <asm/uaccess.h>
 #include <asm/mmu_context.h>
 #include <asm/tlb.h>
+#include <asm/mpx.h>
 
 #include <trace/events/task.h>
 #include "internal.h"
@@ -277,6 +278,7 @@ static int __bprm_mm_init(struct linux_binprm *bprm)
 		goto err;
 
 	mm->stack_vm = mm->total_vm = 1;
+	arch_bprm_mm_init(mm, vma);
 	up_write(&mm->mmap_sem);
 	bprm->p = vma->vm_end - sizeof(void *);
 	return 0;
diff --git a/include/asm-generic/mmu_context.h b/include/asm-generic/mmu_context.h
index a7eec910ba6c..1f2a8f9c9264 100644
--- a/include/asm-generic/mmu_context.h
+++ b/include/asm-generic/mmu_context.h
@@ -42,4 +42,9 @@ static inline void activate_mm(struct mm_struct *prev_mm,
 {
 }
 
+static inline void arch_bprm_mm_init(struct mm_struct *mm,
+			struct vm_area_struct *vma)
+{
+}
+
 #endif /* __ASM_GENERIC_MMU_CONTEXT_H */
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 6e0b286649f1..004e9d17b47e 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -454,6 +454,10 @@ struct mm_struct {
 	bool tlb_flush_pending;
 #endif
 	struct uprobes_state uprobes_state;
+#ifdef CONFIG_X86_INTEL_MPX
+	/* address of the bounds directory */
+	void __user *bd_addr;
+#endif
 };
 
 static inline void mm_init_cpumask(struct mm_struct *mm)
diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h
index 513df75d0fc9..89f63503f903 100644
--- a/include/uapi/linux/prctl.h
+++ b/include/uapi/linux/prctl.h
@@ -179,4 +179,10 @@ struct prctl_mm_map {
 #define PR_SET_THP_DISABLE	41
 #define PR_GET_THP_DISABLE	42
 
+/*
+ * Tell the kernel to start/stop helping userspace manage bounds tables.
+ */
+#define PR_MPX_ENABLE_MANAGEMENT  43
+#define PR_MPX_DISABLE_MANAGEMENT 44
+
 #endif /* _LINUX_PRCTL_H */
diff --git a/kernel/sys.c b/kernel/sys.c
index 1eaa2f0b0246..a8c9f5a7dda6 100644
--- a/kernel/sys.c
+++ b/kernel/sys.c
@@ -91,6 +91,12 @@
 #ifndef SET_TSC_CTL
 # define SET_TSC_CTL(a)		(-EINVAL)
 #endif
+#ifndef MPX_ENABLE_MANAGEMENT
+# define MPX_ENABLE_MANAGEMENT(a)	(-EINVAL)
+#endif
+#ifndef MPX_DISABLE_MANAGEMENT
+# define MPX_DISABLE_MANAGEMENT(a)	(-EINVAL)
+#endif
 
 /*
  * this is where the system-wide overflow UID and GID are defined, for
@@ -2203,6 +2209,12 @@ SYSCALL_DEFINE5(prctl, int, option, unsigned long, arg2, unsigned long, arg3,
 			me->mm->def_flags &= ~VM_NOHUGEPAGE;
 		up_write(&me->mm->mmap_sem);
 		break;
+	case PR_MPX_ENABLE_MANAGEMENT:
+		error = MPX_ENABLE_MANAGEMENT(me);
+		break;
+	case PR_MPX_DISABLE_MANAGEMENT:
+		error = MPX_DISABLE_MANAGEMENT(me);
+		break;
 	default:
 		error = -EINVAL;
 		break;
-- 
cgit v1.2.3


From 00e7c295968d74f4dbb00aef8334fafe788e3c89 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 14 Nov 2014 08:41:32 +0100
Subject: PM / Domains: Move struct pm_domain_data to pm_domain.h

The definition of the struct pm_domain_data better belongs in the
header for the PM domains, let's move it there.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h        | 6 +-----
 include/linux/pm_domain.h | 5 +++++
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 383fd68aaee1..45e3e78c1e3a 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -538,11 +538,7 @@ enum rpm_request {
 };
 
 struct wakeup_source;
-
-struct pm_domain_data {
-	struct list_head list_node;
-	struct device *dev;
-};
+struct pm_domain_data;
 
 struct pm_subsys_data {
 	spinlock_t lock;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 73e938b7e937..86689b59ce5b 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -98,6 +98,11 @@ struct gpd_timing_data {
 	bool cached_stop_ok;
 };
 
+struct pm_domain_data {
+	struct list_head list_node;
+	struct device *dev;
+};
+
 struct generic_pm_domain_data {
 	struct pm_domain_data base;
 	struct gpd_timing_data td;
-- 
cgit v1.2.3


From c9cd2ce2bc6313aafa33f8e28d29a8690252f219 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <d.kasatkin@samsung.com>
Date: Wed, 5 Nov 2014 17:01:15 +0200
Subject: integrity: provide a hook to load keys when rootfs is ready

Keys can only be loaded once the rootfs is mounted. Initcalls
are not suitable for that. This patch defines a special hook
to load the x509 public keys onto the IMA keyring, before
attempting to access any file. The keys are required for
verifying the file's signature. The hook is called after the
root filesystem is mounted and before the kernel calls 'init'.

Changes in v3:
* added more explanation to the patch description (Mimi)

Changes in v2:
* Hook renamed as 'integrity_load_keys()' to handle both IMA and EVM
  keys by integrity subsystem.
* Hook patch moved after defining loading functions

Signed-off-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 include/linux/integrity.h |  6 ++++++
 init/main.c               |  6 +++++-
 security/integrity/iint.c | 11 +++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/integrity.h b/include/linux/integrity.h
index 83222cebd47b..c2d6082a1a4c 100644
--- a/include/linux/integrity.h
+++ b/include/linux/integrity.h
@@ -24,6 +24,7 @@ enum integrity_status {
 #ifdef CONFIG_INTEGRITY
 extern struct integrity_iint_cache *integrity_inode_get(struct inode *inode);
 extern void integrity_inode_free(struct inode *inode);
+extern void __init integrity_load_keys(void);
 
 #else
 static inline struct integrity_iint_cache *
@@ -36,5 +37,10 @@ static inline void integrity_inode_free(struct inode *inode)
 {
 	return;
 }
+
+static inline void integrity_load_keys(void)
+{
+}
 #endif /* CONFIG_INTEGRITY */
+
 #endif /* _LINUX_INTEGRITY_H */
diff --git a/init/main.c b/init/main.c
index e8ae1fef0908..2c1928d08b78 100644
--- a/init/main.c
+++ b/init/main.c
@@ -78,6 +78,7 @@
 #include <linux/context_tracking.h>
 #include <linux/random.h>
 #include <linux/list.h>
+#include <linux/integrity.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -1026,8 +1027,11 @@ static noinline void __init kernel_init_freeable(void)
 	 * Ok, we have completed the initial bootup, and
 	 * we're essentially up and running. Get rid of the
 	 * initmem segments and start the user-mode stuff..
+	 *
+	 * rootfs is available now, try loading the public keys
+	 * and default modules
 	 */
 
-	/* rootfs is available now, try loading default modules */
+	integrity_load_keys();
 	load_default_modules();
 }
diff --git a/security/integrity/iint.c b/security/integrity/iint.c
index dbee618526b6..df45640fbac6 100644
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -245,3 +245,14 @@ out:
 	fput(file);
 	return rc;
 }
+
+/*
+ * integrity_load_keys - load integrity keys hook
+ *
+ * Hooks is called from init/main.c:kernel_init_freeable()
+ * when rootfs is ready
+ */
+void __init integrity_load_keys(void)
+{
+	ima_load_x509();
+}
-- 
cgit v1.2.3


From 6fb5032ebb1c5b852461d64ee33829081de8ca61 Mon Sep 17 00:00:00 2001
From: Dmitry Kasatkin <d.kasatkin@samsung.com>
Date: Wed, 5 Nov 2014 17:01:17 +0200
Subject: VFS: refactor vfs_read()

integrity_kernel_read() duplicates the file read operations code
in vfs_read(). This patch refactors vfs_read() code creating a
helper function __vfs_read(). It is used by both vfs_read() and
integrity_kernel_read().

Signed-off-by: Dmitry Kasatkin <d.kasatkin@samsung.com>
Signed-off-by: Mimi Zohar <zohar@linux.vnet.ibm.com>
---
 fs/read_write.c           | 24 ++++++++++++++++++------
 include/linux/fs.h        |  1 +
 security/integrity/iint.c | 10 +++-------
 3 files changed, 22 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/read_write.c b/fs/read_write.c
index 009d8542a889..f45b2ae5d5f1 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -412,6 +412,23 @@ ssize_t new_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *p
 
 EXPORT_SYMBOL(new_sync_read);
 
+ssize_t __vfs_read(struct file *file, char __user *buf, size_t count,
+		   loff_t *pos)
+{
+	ssize_t ret;
+
+	if (file->f_op->read)
+		ret = file->f_op->read(file, buf, count, pos);
+	else if (file->f_op->aio_read)
+		ret = do_sync_read(file, buf, count, pos);
+	else if (file->f_op->read_iter)
+		ret = new_sync_read(file, buf, count, pos);
+	else
+		ret = -EINVAL;
+
+	return ret;
+}
+
 ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 {
 	ssize_t ret;
@@ -426,12 +443,7 @@ ssize_t vfs_read(struct file *file, char __user *buf, size_t count, loff_t *pos)
 	ret = rw_verify_area(READ, file, pos, count);
 	if (ret >= 0) {
 		count = ret;
-		if (file->f_op->read)
-			ret = file->f_op->read(file, buf, count, pos);
-		else if (file->f_op->aio_read)
-			ret = do_sync_read(file, buf, count, pos);
-		else
-			ret = new_sync_read(file, buf, count, pos);
+		ret = __vfs_read(file, buf, count, pos);
 		if (ret > 0) {
 			fsnotify_access(file);
 			add_rchar(current, ret);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e11d60cc867b..ac3a36e05da9 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1527,6 +1527,7 @@ ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
 			      struct iovec *fast_pointer,
 			      struct iovec **ret_pointer);
 
+extern ssize_t __vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
diff --git a/security/integrity/iint.c b/security/integrity/iint.c
index df45640fbac6..dbb6d141c3db 100644
--- a/security/integrity/iint.c
+++ b/security/integrity/iint.c
@@ -184,20 +184,16 @@ int integrity_kernel_read(struct file *file, loff_t offset,
 {
 	mm_segment_t old_fs;
 	char __user *buf = (char __user *)addr;
-	ssize_t ret = -EINVAL;
+	ssize_t ret;
 
 	if (!(file->f_mode & FMODE_READ))
 		return -EBADF;
 
 	old_fs = get_fs();
 	set_fs(get_ds());
-	if (file->f_op->read)
-		ret = file->f_op->read(file, buf, count, &offset);
-	else if (file->f_op->aio_read)
-		ret = do_sync_read(file, buf, count, &offset);
-	else if (file->f_op->read_iter)
-		ret = new_sync_read(file, buf, count, &offset);
+	ret = __vfs_read(file, buf, count, &offset);
 	set_fs(old_fs);
+
 	return ret;
 }
 
-- 
cgit v1.2.3


From 7d172cc89b8589e4173d0c73a1ddaae408f29c9d Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Nov 2014 02:49:51 -0500
Subject: cgroup: add cgroup_subsys->css_released()

Add a new cgroup subsys callback css_released().  This is called when
the reference count of the css (cgroup_subsys_state) reaches zero
before RCU scheduling free.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup.h | 1 +
 kernel/cgroup.c        | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1d5196889048..e717a39f22ea 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -612,6 +612,7 @@ struct cgroup_subsys {
 	struct cgroup_subsys_state *(*css_alloc)(struct cgroup_subsys_state *parent_css);
 	int (*css_online)(struct cgroup_subsys_state *css);
 	void (*css_offline)(struct cgroup_subsys_state *css);
+	void (*css_released)(struct cgroup_subsys_state *css);
 	void (*css_free)(struct cgroup_subsys_state *css);
 	void (*css_reset)(struct cgroup_subsys_state *css);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index dffa54041d4a..c8558693102b 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -4380,6 +4380,8 @@ static void css_release_work_fn(struct work_struct *work)
 	if (ss) {
 		/* css release path */
 		cgroup_idr_remove(&ss->css_idr, css->id);
+		if (ss->css_released)
+			ss->css_released(css);
 	} else {
 		/* cgroup release path */
 		cgroup_idr_remove(&cgrp->root->cgroup_idr, cgrp->id);
-- 
cgit v1.2.3


From 56c807ba4e91f0980567b6a69de239677879b17f Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Nov 2014 02:49:51 -0500
Subject: cgroup: add cgroup_subsys->css_e_css_changed()

Add a new cgroup_subsys operatoin ->css_e_css_changed().  This is
invoked if any of the effective csses seen from the css's cgroup may
have changed.  This will be used to implement cgroup writeback
support.

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup.h |  1 +
 kernel/cgroup.c        | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index e717a39f22ea..3a04aeb8b5a1 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -615,6 +615,7 @@ struct cgroup_subsys {
 	void (*css_released)(struct cgroup_subsys_state *css);
 	void (*css_free)(struct cgroup_subsys_state *css);
 	void (*css_reset)(struct cgroup_subsys_state *css);
+	void (*css_e_css_changed)(struct cgroup_subsys_state *css);
 
 	int (*can_attach)(struct cgroup_subsys_state *css,
 			  struct cgroup_taskset *tset);
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index c8558693102b..69f033582a1a 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -2836,6 +2836,24 @@ static ssize_t cgroup_subtree_control_write(struct kernfs_open_file *of,
 		}
 	}
 
+	/*
+	 * The effective csses of all the descendants (excluding @cgrp) may
+	 * have changed.  Subsystems can optionally subscribe to this event
+	 * by implementing ->css_e_css_changed() which is invoked if any of
+	 * the effective csses seen from the css's cgroup may have changed.
+	 */
+	for_each_subsys(ss, ssid) {
+		struct cgroup_subsys_state *this_css = cgroup_css(cgrp, ss);
+		struct cgroup_subsys_state *css;
+
+		if (!ss->css_e_css_changed || !this_css)
+			continue;
+
+		css_for_each_descendant_pre(css, this_css)
+			if (css != this_css)
+				ss->css_e_css_changed(css);
+	}
+
 	kernfs_activate(cgrp->kn);
 	ret = 0;
 out_unlock:
-- 
cgit v1.2.3


From eeecbd1971517103e06f11750dd1a9a1dc37e4e6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Tue, 18 Nov 2014 02:49:52 -0500
Subject: cgroup: implement cgroup_get_e_css()

Implement cgroup_get_e_css() which finds and gets the effective css
for the specified cgroup and subsystem combination.  This function
always returns a valid pinned css.  This will be used by cgroup
writeback support.

While at it, add comment to cgroup_e_css() to explain why that
function is different from cgroup_get_e_css() and has to test
cgrp->child_subsys_mask instead of cgroup_css(cgrp, ss).

Signed-off-by: Tejun Heo <tj@kernel.org>
Acked-by: Zefan Li <lizefan@huawei.com>
---
 include/linux/cgroup.h |  2 ++
 kernel/cgroup.c        | 37 +++++++++++++++++++++++++++++++++++++
 2 files changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 3a04aeb8b5a1..9fd99f5e699f 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -910,6 +910,8 @@ void css_task_iter_end(struct css_task_iter *it);
 int cgroup_attach_task_all(struct task_struct *from, struct task_struct *);
 int cgroup_transfer_tasks(struct cgroup *to, struct cgroup *from);
 
+struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup,
+					     struct cgroup_subsys *ss);
 struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry,
 						       struct cgroup_subsys *ss);
 
diff --git a/kernel/cgroup.c b/kernel/cgroup.c
index 69f033582a1a..bb263d0caab3 100644
--- a/kernel/cgroup.c
+++ b/kernel/cgroup.c
@@ -277,6 +277,10 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 	if (!(cgrp->root->subsys_mask & (1 << ss->id)))
 		return NULL;
 
+	/*
+	 * This function is used while updating css associations and thus
+	 * can't test the csses directly.  Use ->child_subsys_mask.
+	 */
 	while (cgroup_parent(cgrp) &&
 	       !(cgroup_parent(cgrp)->child_subsys_mask & (1 << ss->id)))
 		cgrp = cgroup_parent(cgrp);
@@ -284,6 +288,39 @@ static struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgrp,
 	return cgroup_css(cgrp, ss);
 }
 
+/**
+ * cgroup_get_e_css - get a cgroup's effective css for the specified subsystem
+ * @cgrp: the cgroup of interest
+ * @ss: the subsystem of interest
+ *
+ * Find and get the effective css of @cgrp for @ss.  The effective css is
+ * defined as the matching css of the nearest ancestor including self which
+ * has @ss enabled.  If @ss is not mounted on the hierarchy @cgrp is on,
+ * the root css is returned, so this function always returns a valid css.
+ * The returned css must be put using css_put().
+ */
+struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgrp,
+					     struct cgroup_subsys *ss)
+{
+	struct cgroup_subsys_state *css;
+
+	rcu_read_lock();
+
+	do {
+		css = cgroup_css(cgrp, ss);
+
+		if (css && css_tryget_online(css))
+			goto out_unlock;
+		cgrp = cgroup_parent(cgrp);
+	} while (cgrp);
+
+	css = init_css_set.subsys[ss->id];
+	css_get(css);
+out_unlock:
+	rcu_read_unlock();
+	return css;
+}
+
 /* convenient tests for these bits */
 static inline bool cgroup_is_dead(const struct cgroup *cgrp)
 {
-- 
cgit v1.2.3


From c2a0b538d2c778aef7bf2fbe7973229192c9a392 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sun, 9 Nov 2014 22:47:56 +0800
Subject: iommu/vt-d: Introduce helper function dmar_walk_resources()

Introduce helper function dmar_walk_resources to walk resource entries
in DMAR table and ACPI buffer object returned by ACPI _DSM method
for IOMMU hot-plug.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c        | 209 +++++++++++++++++++++++---------------------
 drivers/iommu/intel-iommu.c |   4 +-
 include/linux/dmar.h        |  19 ++--
 3 files changed, 122 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index c5c61cabd6e3..586dd2aa2ca2 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -44,6 +44,14 @@
 
 #include "irq_remapping.h"
 
+typedef int (*dmar_res_handler_t)(struct acpi_dmar_header *, void *);
+struct dmar_res_callback {
+	dmar_res_handler_t	cb[ACPI_DMAR_TYPE_RESERVED];
+	void			*arg[ACPI_DMAR_TYPE_RESERVED];
+	bool			ignore_unhandled;
+	bool			print_entry;
+};
+
 /*
  * Assumptions:
  * 1) The hotplug framework guarentees that DMAR unit will be hot-added
@@ -350,7 +358,7 @@ static struct notifier_block dmar_pci_bus_nb = {
  * present in the platform
  */
 static int __init
-dmar_parse_one_drhd(struct acpi_dmar_header *header)
+dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	struct dmar_drhd_unit *dmaru;
@@ -381,6 +389,10 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header)
 		return ret;
 	}
 	dmar_register_drhd_unit(dmaru);
+
+	if (arg)
+		(*(int *)arg)++;
+
 	return 0;
 }
 
@@ -393,7 +405,8 @@ static void dmar_free_drhd(struct dmar_drhd_unit *dmaru)
 	kfree(dmaru);
 }
 
-static int __init dmar_parse_one_andd(struct acpi_dmar_header *header)
+static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
+				      void *arg)
 {
 	struct acpi_dmar_andd *andd = (void *)header;
 
@@ -415,7 +428,7 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header)
 
 #ifdef CONFIG_ACPI_NUMA
 static int __init
-dmar_parse_one_rhsa(struct acpi_dmar_header *header)
+dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_rhsa *rhsa;
 	struct dmar_drhd_unit *drhd;
@@ -442,6 +455,8 @@ dmar_parse_one_rhsa(struct acpi_dmar_header *header)
 
 	return 0;
 }
+#else
+#define	dmar_parse_one_rhsa		dmar_res_noop
 #endif
 
 static void __init
@@ -503,6 +518,52 @@ static int __init dmar_table_detect(void)
 	return (ACPI_SUCCESS(status) ? 1 : 0);
 }
 
+static int dmar_walk_remapping_entries(struct acpi_dmar_header *start,
+				       size_t len, struct dmar_res_callback *cb)
+{
+	int ret = 0;
+	struct acpi_dmar_header *iter, *next;
+	struct acpi_dmar_header *end = ((void *)start) + len;
+
+	for (iter = start; iter < end && ret == 0; iter = next) {
+		next = (void *)iter + iter->length;
+		if (iter->length == 0) {
+			/* Avoid looping forever on bad ACPI tables */
+			pr_debug(FW_BUG "Invalid 0-length structure\n");
+			break;
+		} else if (next > end) {
+			/* Avoid passing table end */
+			pr_warn(FW_BUG "record passes table end\n");
+			ret = -EINVAL;
+			break;
+		}
+
+		if (cb->print_entry)
+			dmar_table_print_dmar_entry(iter);
+
+		if (iter->type >= ACPI_DMAR_TYPE_RESERVED) {
+			/* continue for forward compatibility */
+			pr_debug("Unknown DMAR structure type %d\n",
+				 iter->type);
+		} else if (cb->cb[iter->type]) {
+			ret = cb->cb[iter->type](iter, cb->arg[iter->type]);
+		} else if (!cb->ignore_unhandled) {
+			pr_warn("No handler for DMAR structure type %d\n",
+				iter->type);
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
+
+static inline int dmar_walk_dmar_table(struct acpi_table_dmar *dmar,
+				       struct dmar_res_callback *cb)
+{
+	return dmar_walk_remapping_entries((void *)(dmar + 1),
+			dmar->header.length - sizeof(*dmar), cb);
+}
+
 /**
  * parse_dmar_table - parses the DMA reporting table
  */
@@ -510,9 +571,18 @@ static int __init
 parse_dmar_table(void)
 {
 	struct acpi_table_dmar *dmar;
-	struct acpi_dmar_header *entry_header;
 	int ret = 0;
 	int drhd_count = 0;
+	struct dmar_res_callback cb = {
+		.print_entry = true,
+		.ignore_unhandled = true,
+		.arg[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &drhd_count,
+		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_parse_one_drhd,
+		.cb[ACPI_DMAR_TYPE_RESERVED_MEMORY] = &dmar_parse_one_rmrr,
+		.cb[ACPI_DMAR_TYPE_ROOT_ATS] = &dmar_parse_one_atsr,
+		.cb[ACPI_DMAR_TYPE_HARDWARE_AFFINITY] = &dmar_parse_one_rhsa,
+		.cb[ACPI_DMAR_TYPE_NAMESPACE] = &dmar_parse_one_andd,
+	};
 
 	/*
 	 * Do it again, earlier dmar_tbl mapping could be mapped with
@@ -536,51 +606,10 @@ parse_dmar_table(void)
 	}
 
 	pr_info("Host address width %d\n", dmar->width + 1);
-
-	entry_header = (struct acpi_dmar_header *)(dmar + 1);
-	while (((unsigned long)entry_header) <
-			(((unsigned long)dmar) + dmar_tbl->length)) {
-		/* Avoid looping forever on bad ACPI tables */
-		if (entry_header->length == 0) {
-			pr_warn("Invalid 0-length structure\n");
-			ret = -EINVAL;
-			break;
-		}
-
-		dmar_table_print_dmar_entry(entry_header);
-
-		switch (entry_header->type) {
-		case ACPI_DMAR_TYPE_HARDWARE_UNIT:
-			drhd_count++;
-			ret = dmar_parse_one_drhd(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_RESERVED_MEMORY:
-			ret = dmar_parse_one_rmrr(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_ROOT_ATS:
-			ret = dmar_parse_one_atsr(entry_header);
-			break;
-		case ACPI_DMAR_TYPE_HARDWARE_AFFINITY:
-#ifdef CONFIG_ACPI_NUMA
-			ret = dmar_parse_one_rhsa(entry_header);
-#endif
-			break;
-		case ACPI_DMAR_TYPE_NAMESPACE:
-			ret = dmar_parse_one_andd(entry_header);
-			break;
-		default:
-			pr_warn("Unknown DMAR structure type %d\n",
-				entry_header->type);
-			ret = 0; /* for forward compatibility */
-			break;
-		}
-		if (ret)
-			break;
-
-		entry_header = ((void *)entry_header + entry_header->length);
-	}
-	if (drhd_count == 0)
+	ret = dmar_walk_dmar_table(dmar, &cb);
+	if (ret == 0 && drhd_count == 0)
 		pr_warn(FW_BUG "No DRHD structure found in DMAR table\n");
+
 	return ret;
 }
 
@@ -778,76 +807,60 @@ static void warn_invalid_dmar(u64 addr, const char *message)
 		dmi_get_system_info(DMI_PRODUCT_VERSION));
 }
 
-static int __init check_zero_address(void)
+static int __ref
+dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
 {
-	struct acpi_table_dmar *dmar;
-	struct acpi_dmar_header *entry_header;
 	struct acpi_dmar_hardware_unit *drhd;
+	void __iomem *addr;
+	u64 cap, ecap;
 
-	dmar = (struct acpi_table_dmar *)dmar_tbl;
-	entry_header = (struct acpi_dmar_header *)(dmar + 1);
-
-	while (((unsigned long)entry_header) <
-			(((unsigned long)dmar) + dmar_tbl->length)) {
-		/* Avoid looping forever on bad ACPI tables */
-		if (entry_header->length == 0) {
-			pr_warn("Invalid 0-length structure\n");
-			return 0;
-		}
-
-		if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) {
-			void __iomem *addr;
-			u64 cap, ecap;
-
-			drhd = (void *)entry_header;
-			if (!drhd->address) {
-				warn_invalid_dmar(0, "");
-				goto failed;
-			}
+	drhd = (void *)entry;
+	if (!drhd->address) {
+		warn_invalid_dmar(0, "");
+		return -EINVAL;
+	}
 
-			addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
-			if (!addr ) {
-				printk("IOMMU: can't validate: %llx\n", drhd->address);
-				goto failed;
-			}
-			cap = dmar_readq(addr + DMAR_CAP_REG);
-			ecap = dmar_readq(addr + DMAR_ECAP_REG);
-			early_iounmap(addr, VTD_PAGE_SIZE);
-			if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
-				warn_invalid_dmar(drhd->address,
-						  " returns all ones");
-				goto failed;
-			}
-		}
+	addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
+	if (!addr) {
+		pr_warn("IOMMU: can't validate: %llx\n", drhd->address);
+		return -EINVAL;
+	}
+	cap = dmar_readq(addr + DMAR_CAP_REG);
+	ecap = dmar_readq(addr + DMAR_ECAP_REG);
+	early_iounmap(addr, VTD_PAGE_SIZE);
 
-		entry_header = ((void *)entry_header + entry_header->length);
+	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
+		warn_invalid_dmar(drhd->address, " returns all ones");
+		return -EINVAL;
 	}
-	return 1;
 
-failed:
 	return 0;
 }
 
 int __init detect_intel_iommu(void)
 {
 	int ret;
+	struct dmar_res_callback validate_drhd_cb = {
+		.cb[ACPI_DMAR_TYPE_HARDWARE_UNIT] = &dmar_validate_one_drhd,
+		.ignore_unhandled = true,
+	};
 
 	down_write(&dmar_global_lock);
 	ret = dmar_table_detect();
 	if (ret)
-		ret = check_zero_address();
-	{
-		if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
-			iommu_detected = 1;
-			/* Make sure ACS will be enabled */
-			pci_request_acs();
-		}
+		ret = !dmar_walk_dmar_table((struct acpi_table_dmar *)dmar_tbl,
+					    &validate_drhd_cb);
+	if (ret && !no_iommu && !iommu_detected && !dmar_disabled) {
+		iommu_detected = 1;
+		/* Make sure ACS will be enabled */
+		pci_request_acs();
+	}
 
 #ifdef CONFIG_X86
-		if (ret)
-			x86_init.iommu.iommu_init = intel_iommu_init;
+	if (ret)
+		x86_init.iommu.iommu_init = intel_iommu_init;
 #endif
-	}
+
 	early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size);
 	dmar_tbl = NULL;
 	up_write(&dmar_global_lock);
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index ba0fa2a8d696..b9cc9c2b03fc 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3684,7 +3684,7 @@ static inline void init_iommu_pm_ops(void) {}
 #endif	/* CONFIG_PM */
 
 
-int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
+int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_reserved_memory *rmrr;
 	struct dmar_rmrr_unit *rmrru;
@@ -3710,7 +3710,7 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header)
 	return 0;
 }
 
-int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr)
+int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
 {
 	struct acpi_dmar_atsr *atsr;
 	struct dmar_atsr_unit *atsru;
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 593fff99e6bf..495df5e48f80 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -121,22 +121,21 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info,
 extern int detect_intel_iommu(void);
 extern int enable_drhd_fault_handling(void);
 
+static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg)
+{
+	return 0;
+}
+
 #ifdef CONFIG_INTEL_IOMMU
 extern int iommu_detected, no_iommu;
 extern int intel_iommu_init(void);
-extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header);
-extern int dmar_parse_one_atsr(struct acpi_dmar_header *header);
+extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg);
+extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg);
 extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info);
 #else /* !CONFIG_INTEL_IOMMU: */
 static inline int intel_iommu_init(void) { return -ENODEV; }
-static inline int dmar_parse_one_rmrr(struct acpi_dmar_header *header)
-{
-	return 0;
-}
-static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header)
-{
-	return 0;
-}
+#define	dmar_parse_one_rmrr		dmar_res_noop
+#define	dmar_parse_one_atsr		dmar_res_noop
 static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 {
 	return 0;
-- 
cgit v1.2.3


From 78d8e7046111425bb688cddc4303d79cb0f0d281 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sun, 9 Nov 2014 22:47:57 +0800
Subject: iommu/vt-d: Dynamically allocate and free seq_id for DMAR units

Introduce functions to support dynamic IOMMU seq_id allocating and
releasing, which will be used to support DMAR hotplug.

Also rename IOMMU_UNITS_SUPPORTED as DMAR_UNITS_SUPPORTED.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Reviewed-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c        | 40 ++++++++++++++++++++++++++++++++++------
 drivers/iommu/intel-iommu.c | 13 +++----------
 include/linux/dmar.h        |  6 ++++++
 3 files changed, 43 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 586dd2aa2ca2..78aa1b2af113 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -70,6 +70,7 @@ LIST_HEAD(dmar_drhd_units);
 struct acpi_table_header * __initdata dmar_tbl;
 static acpi_size dmar_tbl_size;
 static int dmar_dev_scope_status = 1;
+static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
 
 static int alloc_iommu(struct dmar_drhd_unit *drhd);
 static void free_iommu(struct intel_iommu *iommu);
@@ -944,11 +945,32 @@ out:
 	return err;
 }
 
+static int dmar_alloc_seq_id(struct intel_iommu *iommu)
+{
+	iommu->seq_id = find_first_zero_bit(dmar_seq_ids,
+					    DMAR_UNITS_SUPPORTED);
+	if (iommu->seq_id >= DMAR_UNITS_SUPPORTED) {
+		iommu->seq_id = -1;
+	} else {
+		set_bit(iommu->seq_id, dmar_seq_ids);
+		sprintf(iommu->name, "dmar%d", iommu->seq_id);
+	}
+
+	return iommu->seq_id;
+}
+
+static void dmar_free_seq_id(struct intel_iommu *iommu)
+{
+	if (iommu->seq_id >= 0) {
+		clear_bit(iommu->seq_id, dmar_seq_ids);
+		iommu->seq_id = -1;
+	}
+}
+
 static int alloc_iommu(struct dmar_drhd_unit *drhd)
 {
 	struct intel_iommu *iommu;
 	u32 ver, sts;
-	static int iommu_allocated = 0;
 	int agaw = 0;
 	int msagaw = 0;
 	int err;
@@ -962,13 +984,16 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 	if (!iommu)
 		return -ENOMEM;
 
-	iommu->seq_id = iommu_allocated++;
-	sprintf (iommu->name, "dmar%d", iommu->seq_id);
+	if (dmar_alloc_seq_id(iommu) < 0) {
+		pr_err("IOMMU: failed to allocate seq_id\n");
+		err = -ENOSPC;
+		goto error;
+	}
 
 	err = map_iommu(iommu, drhd->reg_base_addr);
 	if (err) {
 		pr_err("IOMMU: failed to map %s\n", iommu->name);
-		goto error;
+		goto error_free_seq_id;
 	}
 
 	err = -EINVAL;
@@ -1018,9 +1043,11 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd)
 
 	return 0;
 
- err_unmap:
+err_unmap:
 	unmap_iommu(iommu);
- error:
+error_free_seq_id:
+	dmar_free_seq_id(iommu);
+error:
 	kfree(iommu);
 	return err;
 }
@@ -1044,6 +1071,7 @@ static void free_iommu(struct intel_iommu *iommu)
 	if (iommu->reg)
 		unmap_iommu(iommu);
 
+	dmar_free_seq_id(iommu);
 	kfree(iommu);
 }
 
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index b9cc9c2b03fc..2779354321f5 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -330,17 +330,10 @@ static int hw_pass_through = 1;
 /* si_domain contains mulitple devices */
 #define DOMAIN_FLAG_STATIC_IDENTITY	(1 << 1)
 
-/* define the limit of IOMMUs supported in each domain */
-#ifdef	CONFIG_X86
-# define	IOMMU_UNITS_SUPPORTED	MAX_IO_APICS
-#else
-# define	IOMMU_UNITS_SUPPORTED	64
-#endif
-
 struct dmar_domain {
 	int	id;			/* domain id */
 	int	nid;			/* node id */
-	DECLARE_BITMAP(iommu_bmp, IOMMU_UNITS_SUPPORTED);
+	DECLARE_BITMAP(iommu_bmp, DMAR_UNITS_SUPPORTED);
 					/* bitmap of iommus this domain uses*/
 
 	struct list_head devices; 	/* all devices' list */
@@ -2730,12 +2723,12 @@ static int __init init_dmars(void)
 		 * threaded kernel __init code path all other access are read
 		 * only
 		 */
-		if (g_num_of_iommus < IOMMU_UNITS_SUPPORTED) {
+		if (g_num_of_iommus < DMAR_UNITS_SUPPORTED) {
 			g_num_of_iommus++;
 			continue;
 		}
 		printk_once(KERN_ERR "intel-iommu: exceeded %d IOMMUs\n",
-			  IOMMU_UNITS_SUPPORTED);
+			  DMAR_UNITS_SUPPORTED);
 	}
 
 	g_iommus = kcalloc(g_num_of_iommus, sizeof(struct intel_iommu *),
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 495df5e48f80..725204fc433e 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -30,6 +30,12 @@
 
 struct acpi_dmar_header;
 
+#ifdef	CONFIG_X86
+# define	DMAR_UNITS_SUPPORTED	MAX_IO_APICS
+#else
+# define	DMAR_UNITS_SUPPORTED	64
+#endif
+
 /* DMAR Flags */
 #define DMAR_INTR_REMAP		0x1
 #define DMAR_X2APIC_OPT_OUT	0x2
-- 
cgit v1.2.3


From 6b1972493a84f8fe13ff9d202745590f6c53d670 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sun, 9 Nov 2014 22:47:58 +0800
Subject: iommu/vt-d: Implement DMAR unit hotplug framework

On Intel platforms, an IO Hub (PCI/PCIe host bridge) may contain DMAR
units, so we need to support DMAR hotplug when supporting PCI host
bridge hotplug on Intel platforms.

According to Section 8.8 "Remapping Hardware Unit Hot Plug" in "Intel
Virtualization Technology for Directed IO Architecture Specification
Rev 2.2", ACPI BIOS should implement ACPI _DSM method under the ACPI
object for the PCI host bridge to support DMAR hotplug.

This patch introduces interfaces to parse ACPI _DSM method for
DMAR unit hotplug. It also implements state machines for DMAR unit
hot-addition and hot-removal.

The PCI host bridge hotplug driver should call dmar_hotplug_hotplug()
before scanning PCI devices connected for hot-addition and after
destroying all PCI devices for hot-removal.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Reviewed-by: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Joerg Roedel <jroedel@suse.de>
---
 drivers/iommu/dmar.c                | 268 ++++++++++++++++++++++++++++++++++--
 drivers/iommu/intel-iommu.c         |  78 ++++++++++-
 drivers/iommu/intel_irq_remapping.c |   5 +
 include/linux/dmar.h                |  33 +++++
 4 files changed, 370 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c
index 78aa1b2af113..0bd536d769a9 100644
--- a/drivers/iommu/dmar.c
+++ b/drivers/iommu/dmar.c
@@ -75,7 +75,7 @@ static unsigned long dmar_seq_ids[BITS_TO_LONGS(DMAR_UNITS_SUPPORTED)];
 static int alloc_iommu(struct dmar_drhd_unit *drhd);
 static void free_iommu(struct intel_iommu *iommu);
 
-static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
+static void dmar_register_drhd_unit(struct dmar_drhd_unit *drhd)
 {
 	/*
 	 * add INCLUDE_ALL at the tail, so scan the list will find it at
@@ -353,24 +353,45 @@ static struct notifier_block dmar_pci_bus_nb = {
 	.priority = INT_MIN,
 };
 
+static struct dmar_drhd_unit *
+dmar_find_dmaru(struct acpi_dmar_hardware_unit *drhd)
+{
+	struct dmar_drhd_unit *dmaru;
+
+	list_for_each_entry_rcu(dmaru, &dmar_drhd_units, list)
+		if (dmaru->segment == drhd->segment &&
+		    dmaru->reg_base_addr == drhd->address)
+			return dmaru;
+
+	return NULL;
+}
+
 /**
  * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition
  * structure which uniquely represent one DMA remapping hardware unit
  * present in the platform
  */
-static int __init
-dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
+static int dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_hardware_unit *drhd;
 	struct dmar_drhd_unit *dmaru;
 	int ret = 0;
 
 	drhd = (struct acpi_dmar_hardware_unit *)header;
-	dmaru = kzalloc(sizeof(*dmaru), GFP_KERNEL);
+	dmaru = dmar_find_dmaru(drhd);
+	if (dmaru)
+		goto out;
+
+	dmaru = kzalloc(sizeof(*dmaru) + header->length, GFP_KERNEL);
 	if (!dmaru)
 		return -ENOMEM;
 
-	dmaru->hdr = header;
+	/*
+	 * If header is allocated from slab by ACPI _DSM method, we need to
+	 * copy the content because the memory buffer will be freed on return.
+	 */
+	dmaru->hdr = (void *)(dmaru + 1);
+	memcpy(dmaru->hdr, header, header->length);
 	dmaru->reg_base_addr = drhd->address;
 	dmaru->segment = drhd->segment;
 	dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */
@@ -391,6 +412,7 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header, void *arg)
 	}
 	dmar_register_drhd_unit(dmaru);
 
+out:
 	if (arg)
 		(*(int *)arg)++;
 
@@ -428,8 +450,7 @@ static int __init dmar_parse_one_andd(struct acpi_dmar_header *header,
 }
 
 #ifdef CONFIG_ACPI_NUMA
-static int __init
-dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
+static int dmar_parse_one_rhsa(struct acpi_dmar_header *header, void *arg)
 {
 	struct acpi_dmar_rhsa *rhsa;
 	struct dmar_drhd_unit *drhd;
@@ -821,14 +842,22 @@ dmar_validate_one_drhd(struct acpi_dmar_header *entry, void *arg)
 		return -EINVAL;
 	}
 
-	addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
+	if (arg)
+		addr = ioremap(drhd->address, VTD_PAGE_SIZE);
+	else
+		addr = early_ioremap(drhd->address, VTD_PAGE_SIZE);
 	if (!addr) {
 		pr_warn("IOMMU: can't validate: %llx\n", drhd->address);
 		return -EINVAL;
 	}
+
 	cap = dmar_readq(addr + DMAR_CAP_REG);
 	ecap = dmar_readq(addr + DMAR_ECAP_REG);
-	early_iounmap(addr, VTD_PAGE_SIZE);
+
+	if (arg)
+		iounmap(addr);
+	else
+		early_iounmap(addr, VTD_PAGE_SIZE);
 
 	if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) {
 		warn_invalid_dmar(drhd->address, " returns all ones");
@@ -1702,12 +1731,17 @@ int __init dmar_ir_support(void)
 	return dmar->flags & 0x1;
 }
 
+/* Check whether DMAR units are in use */
+static inline bool dmar_in_use(void)
+{
+	return irq_remapping_enabled || intel_iommu_enabled;
+}
+
 static int __init dmar_free_unused_resources(void)
 {
 	struct dmar_drhd_unit *dmaru, *dmaru_n;
 
-	/* DMAR units are in use */
-	if (irq_remapping_enabled || intel_iommu_enabled)
+	if (dmar_in_use())
 		return 0;
 
 	if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units))
@@ -1725,3 +1759,215 @@ static int __init dmar_free_unused_resources(void)
 
 late_initcall(dmar_free_unused_resources);
 IOMMU_INIT_POST(detect_intel_iommu);
+
+/*
+ * DMAR Hotplug Support
+ * For more details, please refer to Intel(R) Virtualization Technology
+ * for Directed-IO Architecture Specifiction, Rev 2.2, Section 8.8
+ * "Remapping Hardware Unit Hot Plug".
+ */
+static u8 dmar_hp_uuid[] = {
+	/* 0000 */    0xA6, 0xA3, 0xC1, 0xD8, 0x9B, 0xBE, 0x9B, 0x4C,
+	/* 0008 */    0x91, 0xBF, 0xC3, 0xCB, 0x81, 0xFC, 0x5D, 0xAF
+};
+
+/*
+ * Currently there's only one revision and BIOS will not check the revision id,
+ * so use 0 for safety.
+ */
+#define	DMAR_DSM_REV_ID			0
+#define	DMAR_DSM_FUNC_DRHD		1
+#define	DMAR_DSM_FUNC_ATSR		2
+#define	DMAR_DSM_FUNC_RHSA		3
+
+static inline bool dmar_detect_dsm(acpi_handle handle, int func)
+{
+	return acpi_check_dsm(handle, dmar_hp_uuid, DMAR_DSM_REV_ID, 1 << func);
+}
+
+static int dmar_walk_dsm_resource(acpi_handle handle, int func,
+				  dmar_res_handler_t handler, void *arg)
+{
+	int ret = -ENODEV;
+	union acpi_object *obj;
+	struct acpi_dmar_header *start;
+	struct dmar_res_callback callback;
+	static int res_type[] = {
+		[DMAR_DSM_FUNC_DRHD] = ACPI_DMAR_TYPE_HARDWARE_UNIT,
+		[DMAR_DSM_FUNC_ATSR] = ACPI_DMAR_TYPE_ROOT_ATS,
+		[DMAR_DSM_FUNC_RHSA] = ACPI_DMAR_TYPE_HARDWARE_AFFINITY,
+	};
+
+	if (!dmar_detect_dsm(handle, func))
+		return 0;
+
+	obj = acpi_evaluate_dsm_typed(handle, dmar_hp_uuid, DMAR_DSM_REV_ID,
+				      func, NULL, ACPI_TYPE_BUFFER);
+	if (!obj)
+		return -ENODEV;
+
+	memset(&callback, 0, sizeof(callback));
+	callback.cb[res_type[func]] = handler;
+	callback.arg[res_type[func]] = arg;
+	start = (struct acpi_dmar_header *)obj->buffer.pointer;
+	ret = dmar_walk_remapping_entries(start, obj->buffer.length, &callback);
+
+	ACPI_FREE(obj);
+
+	return ret;
+}
+
+static int dmar_hp_add_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	int ret;
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (!dmaru)
+		return -ENODEV;
+
+	ret = dmar_ir_hotplug(dmaru, true);
+	if (ret == 0)
+		ret = dmar_iommu_hotplug(dmaru, true);
+
+	return ret;
+}
+
+static int dmar_hp_remove_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	int i, ret;
+	struct device *dev;
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (!dmaru)
+		return 0;
+
+	/*
+	 * All PCI devices managed by this unit should have been destroyed.
+	 */
+	if (!dmaru->include_all && dmaru->devices && dmaru->devices_cnt)
+		for_each_active_dev_scope(dmaru->devices,
+					  dmaru->devices_cnt, i, dev)
+			return -EBUSY;
+
+	ret = dmar_ir_hotplug(dmaru, false);
+	if (ret == 0)
+		ret = dmar_iommu_hotplug(dmaru, false);
+
+	return ret;
+}
+
+static int dmar_hp_release_drhd(struct acpi_dmar_header *header, void *arg)
+{
+	struct dmar_drhd_unit *dmaru;
+
+	dmaru = dmar_find_dmaru((struct acpi_dmar_hardware_unit *)header);
+	if (dmaru) {
+		list_del_rcu(&dmaru->list);
+		synchronize_rcu();
+		dmar_free_drhd(dmaru);
+	}
+
+	return 0;
+}
+
+static int dmar_hotplug_insert(acpi_handle handle)
+{
+	int ret;
+	int drhd_count = 0;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_validate_one_drhd, (void *)1);
+	if (ret)
+		goto out;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_parse_one_drhd, (void *)&drhd_count);
+	if (ret == 0 && drhd_count == 0) {
+		pr_warn(FW_BUG "No DRHD structures in buffer returned by _DSM method\n");
+		goto out;
+	} else if (ret) {
+		goto release_drhd;
+	}
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_RHSA,
+				     &dmar_parse_one_rhsa, NULL);
+	if (ret)
+		goto release_drhd;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+				     &dmar_parse_one_atsr, NULL);
+	if (ret)
+		goto release_atsr;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_hp_add_drhd, NULL);
+	if (!ret)
+		return 0;
+
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+			       &dmar_hp_remove_drhd, NULL);
+release_atsr:
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+			       &dmar_release_one_atsr, NULL);
+release_drhd:
+	dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+			       &dmar_hp_release_drhd, NULL);
+out:
+	return ret;
+}
+
+static int dmar_hotplug_remove(acpi_handle handle)
+{
+	int ret;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+				     &dmar_check_one_atsr, NULL);
+	if (ret)
+		return ret;
+
+	ret = dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				     &dmar_hp_remove_drhd, NULL);
+	if (ret == 0) {
+		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_ATSR,
+					       &dmar_release_one_atsr, NULL));
+		WARN_ON(dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+					       &dmar_hp_release_drhd, NULL));
+	} else {
+		dmar_walk_dsm_resource(handle, DMAR_DSM_FUNC_DRHD,
+				       &dmar_hp_add_drhd, NULL);
+	}
+
+	return ret;
+}
+
+static int dmar_device_hotplug(acpi_handle handle, bool insert)
+{
+	int ret;
+
+	if (!dmar_in_use())
+		return 0;
+
+	if (!dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD))
+		return 0;
+
+	down_write(&dmar_global_lock);
+	if (insert)
+		ret = dmar_hotplug_insert(handle);
+	else
+		ret = dmar_hotplug_remove(handle);
+	up_write(&dmar_global_lock);
+
+	return ret;
+}
+
+int dmar_device_add(acpi_handle handle)
+{
+	return dmar_device_hotplug(handle, true);
+}
+
+int dmar_device_remove(acpi_handle handle)
+{
+	return dmar_device_hotplug(handle, false);
+}
diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c
index 2779354321f5..7c49ab51904f 100644
--- a/drivers/iommu/intel-iommu.c
+++ b/drivers/iommu/intel-iommu.c
@@ -3703,17 +3703,48 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg)
 	return 0;
 }
 
-int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
+static struct dmar_atsr_unit *dmar_find_atsr(struct acpi_dmar_atsr *atsr)
+{
+	struct dmar_atsr_unit *atsru;
+	struct acpi_dmar_atsr *tmp;
+
+	list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) {
+		tmp = (struct acpi_dmar_atsr *)atsru->hdr;
+		if (atsr->segment != tmp->segment)
+			continue;
+		if (atsr->header.length != tmp->header.length)
+			continue;
+		if (memcmp(atsr, tmp, atsr->header.length) == 0)
+			return atsru;
+	}
+
+	return NULL;
+}
+
+int dmar_parse_one_atsr(struct acpi_dmar_header *hdr, void *arg)
 {
 	struct acpi_dmar_atsr *atsr;
 	struct dmar_atsr_unit *atsru;
 
+	if (system_state != SYSTEM_BOOTING && !intel_iommu_enabled)
+		return 0;
+
 	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
-	atsru = kzalloc(sizeof(*atsru), GFP_KERNEL);
+	atsru = dmar_find_atsr(atsr);
+	if (atsru)
+		return 0;
+
+	atsru = kzalloc(sizeof(*atsru) + hdr->length, GFP_KERNEL);
 	if (!atsru)
 		return -ENOMEM;
 
-	atsru->hdr = hdr;
+	/*
+	 * If memory is allocated from slab by ACPI _DSM method, we need to
+	 * copy the memory content because the memory buffer will be freed
+	 * on return.
+	 */
+	atsru->hdr = (void *)(atsru + 1);
+	memcpy(atsru->hdr, hdr, hdr->length);
 	atsru->include_all = atsr->flags & 0x1;
 	if (!atsru->include_all) {
 		atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1),
@@ -3736,6 +3767,47 @@ static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru)
 	kfree(atsru);
 }
 
+int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg)
+{
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+	atsru = dmar_find_atsr(atsr);
+	if (atsru) {
+		list_del_rcu(&atsru->list);
+		synchronize_rcu();
+		intel_iommu_free_atsr(atsru);
+	}
+
+	return 0;
+}
+
+int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg)
+{
+	int i;
+	struct device *dev;
+	struct acpi_dmar_atsr *atsr;
+	struct dmar_atsr_unit *atsru;
+
+	atsr = container_of(hdr, struct acpi_dmar_atsr, header);
+	atsru = dmar_find_atsr(atsr);
+	if (!atsru)
+		return 0;
+
+	if (!atsru->include_all && atsru->devices && atsru->devices_cnt)
+		for_each_active_dev_scope(atsru->devices, atsru->devices_cnt,
+					  i, dev)
+			return -EBUSY;
+
+	return 0;
+}
+
+int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{
+	return intel_iommu_enabled ? -ENOSYS : 0;
+}
+
 static void intel_iommu_free_dmars(void)
 {
 	struct dmar_rmrr_unit *rmrru, *rmrr_n;
diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c
index 7c80661b35c1..1cbdb509bc7b 100644
--- a/drivers/iommu/intel_irq_remapping.c
+++ b/drivers/iommu/intel_irq_remapping.c
@@ -1171,3 +1171,8 @@ struct irq_remap_ops intel_irq_remap_ops = {
 	.msi_setup_irq		= intel_msi_setup_irq,
 	.alloc_hpet_msi		= intel_alloc_hpet_msi,
 };
+
+int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{
+	return irq_remapping_enabled ? -ENOSYS : 0;
+}
diff --git a/include/linux/dmar.h b/include/linux/dmar.h
index 725204fc433e..30624954dec5 100644
--- a/include/linux/dmar.h
+++ b/include/linux/dmar.h
@@ -126,6 +126,8 @@ extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info,
 /* Intel IOMMU detection */
 extern int detect_intel_iommu(void);
 extern int enable_drhd_fault_handling(void);
+extern int dmar_device_add(acpi_handle handle);
+extern int dmar_device_remove(acpi_handle handle);
 
 static inline int dmar_res_noop(struct acpi_dmar_header *hdr, void *arg)
 {
@@ -137,17 +139,48 @@ extern int iommu_detected, no_iommu;
 extern int intel_iommu_init(void);
 extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header, void *arg);
 extern int dmar_parse_one_atsr(struct acpi_dmar_header *header, void *arg);
+extern int dmar_check_one_atsr(struct acpi_dmar_header *hdr, void *arg);
+extern int dmar_release_one_atsr(struct acpi_dmar_header *hdr, void *arg);
+extern int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert);
 extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info);
 #else /* !CONFIG_INTEL_IOMMU: */
 static inline int intel_iommu_init(void) { return -ENODEV; }
+
 #define	dmar_parse_one_rmrr		dmar_res_noop
 #define	dmar_parse_one_atsr		dmar_res_noop
+#define	dmar_check_one_atsr		dmar_res_noop
+#define	dmar_release_one_atsr		dmar_res_noop
+
 static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info)
 {
 	return 0;
 }
+
+static inline int dmar_iommu_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{
+	return 0;
+}
 #endif /* CONFIG_INTEL_IOMMU */
 
+#ifdef CONFIG_IRQ_REMAP
+extern int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert);
+#else  /* CONFIG_IRQ_REMAP */
+static inline int dmar_ir_hotplug(struct dmar_drhd_unit *dmaru, bool insert)
+{ return 0; }
+#endif /* CONFIG_IRQ_REMAP */
+
+#else /* CONFIG_DMAR_TABLE */
+
+static inline int dmar_device_add(void *handle)
+{
+	return 0;
+}
+
+static inline int dmar_device_remove(void *handle)
+{
+	return 0;
+}
+
 #endif /* CONFIG_DMAR_TABLE */
 
 struct irte {
-- 
cgit v1.2.3


From 98e69016a11b2b9398bea668442193b3b362cd43 Mon Sep 17 00:00:00 2001
From: Dong Aisheng <b29396@freescale.com>
Date: Fri, 7 Nov 2014 16:45:12 +0800
Subject: can: dev: add can_is_canfd_skb() API

The CAN device drivers can use can_is_canfd_skb() to check if the frame to send
is on CAN FD mode or normal CAN mode.

Acked-by: Oliver Hartkopp <socketcan@hartkopp.net>
Signed-off-by: Dong Aisheng <b29396@freescale.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 include/linux/can/dev.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index 6992afc6ba7f..b37ea95bc348 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -99,6 +99,12 @@ inval_skb:
 	return 1;
 }
 
+static inline bool can_is_canfd_skb(const struct sk_buff *skb)
+{
+	/* the CAN specific type of skb is identified by its data length */
+	return skb->len == CANFD_MTU;
+}
+
 /* get data length from can_dlc with sanitized can_dlc */
 u8 can_dlc2len(u8 can_dlc);
 
-- 
cgit v1.2.3


From 53a4ab96c61a34d62717b1481f6043e0b4338d74 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Wed, 12 Nov 2014 12:54:01 -0800
Subject: of: Change of_device_is_available() to return bool

This function can only return true or false; using a bool makes it more
obvious to the reader.

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/base.c  | 22 +++++++++++-----------
 include/linux/of.h |  6 +++---
 2 files changed, 14 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 4627e0acf4ad..2d5dfb8b2e65 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -522,27 +522,27 @@ EXPORT_SYMBOL(of_machine_is_compatible);
  *
  *  @device: Node to check for availability, with locks already held
  *
- *  Returns 1 if the status property is absent or set to "okay" or "ok",
- *  0 otherwise
+ *  Returns true if the status property is absent or set to "okay" or "ok",
+ *  false otherwise
  */
-static int __of_device_is_available(const struct device_node *device)
+static bool __of_device_is_available(const struct device_node *device)
 {
 	const char *status;
 	int statlen;
 
 	if (!device)
-		return 0;
+		return false;
 
 	status = __of_get_property(device, "status", &statlen);
 	if (status == NULL)
-		return 1;
+		return true;
 
 	if (statlen > 0) {
 		if (!strcmp(status, "okay") || !strcmp(status, "ok"))
-			return 1;
+			return true;
 	}
 
-	return 0;
+	return false;
 }
 
 /**
@@ -550,13 +550,13 @@ static int __of_device_is_available(const struct device_node *device)
  *
  *  @device: Node to check for availability
  *
- *  Returns 1 if the status property is absent or set to "okay" or "ok",
- *  0 otherwise
+ *  Returns true if the status property is absent or set to "okay" or "ok",
+ *  false otherwise
  */
-int of_device_is_available(const struct device_node *device)
+bool of_device_is_available(const struct device_node *device)
 {
 	unsigned long flags;
-	int res;
+	bool res;
 
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	res = __of_device_is_available(device);
diff --git a/include/linux/of.h b/include/linux/of.h
index 3c851a8f23eb..27635c89d8c2 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -276,7 +276,7 @@ extern int of_property_read_string_helper(struct device_node *np,
 					      const char **out_strs, size_t sz, int index);
 extern int of_device_is_compatible(const struct device_node *device,
 				   const char *);
-extern int of_device_is_available(const struct device_node *device);
+extern bool of_device_is_available(const struct device_node *device);
 extern const void *of_get_property(const struct device_node *node,
 				const char *name,
 				int *lenp);
@@ -427,9 +427,9 @@ static inline int of_device_is_compatible(const struct device_node *device,
 	return 0;
 }
 
-static inline int of_device_is_available(const struct device_node *device)
+static inline bool of_device_is_available(const struct device_node *device)
 {
-	return 0;
+	return false;
 }
 
 static inline struct property *of_find_property(const struct device_node *np,
-- 
cgit v1.2.3


From 3274f52073d88b62f3c5ace82ae9d48546232e72 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 13 Nov 2014 17:36:44 -0800
Subject: bpf: add 'flags' attribute to BPF_MAP_UPDATE_ELEM command

the current meaning of BPF_MAP_UPDATE_ELEM syscall command is:
either update existing map element or create a new one.
Initially the plan was to add a new command to handle the case of
'create new element if it didn't exist', but 'flags' style looks
cleaner and overall diff is much smaller (more code reused), so add 'flags'
attribute to BPF_MAP_UPDATE_ELEM command with the following meaning:
 #define BPF_ANY	0 /* create new element or update existing */
 #define BPF_NOEXIST	1 /* create new element if it didn't exist */
 #define BPF_EXIST	2 /* update existing element */

bpf_update_elem(fd, key, value, BPF_NOEXIST) call can fail with EEXIST
if element already exists.

bpf_update_elem(fd, key, value, BPF_EXIST) can fail with ENOENT
if element doesn't exist.

Userspace will call it as:
int bpf_update_elem(int fd, void *key, void *value, __u64 flags)
{
    union bpf_attr attr = {
        .map_fd = fd,
        .key = ptr_to_u64(key),
        .value = ptr_to_u64(value),
        .flags = flags;
    };

    return bpf(BPF_MAP_UPDATE_ELEM, &attr, sizeof(attr));
}

First two bits of 'flags' are used to encode style of bpf_update_elem() command.
Bits 2-63 are reserved for future use.

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      | 2 +-
 include/uapi/linux/bpf.h | 8 +++++++-
 kernel/bpf/syscall.c     | 4 ++--
 3 files changed, 10 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 3cf91754a957..51e9242e4803 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -22,7 +22,7 @@ struct bpf_map_ops {
 
 	/* funcs callable from userspace and from eBPF programs */
 	void *(*map_lookup_elem)(struct bpf_map *map, void *key);
-	int (*map_update_elem)(struct bpf_map *map, void *key, void *value);
+	int (*map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags);
 	int (*map_delete_elem)(struct bpf_map *map, void *key);
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index d18316f9e9c4..3e9e1b77f29d 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -82,7 +82,7 @@ enum bpf_cmd {
 
 	/* create or update key/value pair in a given map
 	 * err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
-	 * Using attr->map_fd, attr->key, attr->value
+	 * Using attr->map_fd, attr->key, attr->value, attr->flags
 	 * returns zero or negative error
 	 */
 	BPF_MAP_UPDATE_ELEM,
@@ -117,6 +117,11 @@ enum bpf_prog_type {
 	BPF_PROG_TYPE_UNSPEC,
 };
 
+/* flags for BPF_MAP_UPDATE_ELEM command */
+#define BPF_ANY		0 /* create new element or update existing */
+#define BPF_NOEXIST	1 /* create new element if it didn't exist */
+#define BPF_EXIST	2 /* update existing element */
+
 union bpf_attr {
 	struct { /* anonymous struct used by BPF_MAP_CREATE command */
 		__u32	map_type;	/* one of enum bpf_map_type */
@@ -132,6 +137,7 @@ union bpf_attr {
 			__aligned_u64 value;
 			__aligned_u64 next_key;
 		};
+		__u64		flags;
 	};
 
 	struct { /* anonymous struct used by BPF_PROG_LOAD command */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index ba61c8c16032..c0d03bf317a2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -190,7 +190,7 @@ err_put:
 	return err;
 }
 
-#define BPF_MAP_UPDATE_ELEM_LAST_FIELD value
+#define BPF_MAP_UPDATE_ELEM_LAST_FIELD flags
 
 static int map_update_elem(union bpf_attr *attr)
 {
@@ -231,7 +231,7 @@ static int map_update_elem(union bpf_attr *attr)
 	 * therefore all map accessors rely on this fact, so do the same here
 	 */
 	rcu_read_lock();
-	err = map->ops->map_update_elem(map, key, value);
+	err = map->ops->map_update_elem(map, key, value, attr->flags);
 	rcu_read_unlock();
 
 free_value:
-- 
cgit v1.2.3


From d0003ec01c667b731c139e23de3306a8b328ccf5 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Thu, 13 Nov 2014 17:36:49 -0800
Subject: bpf: allow eBPF programs to use maps

expose bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
map accessors to eBPF programs

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/bpf.h      |  5 +++
 include/uapi/linux/bpf.h |  3 ++
 kernel/bpf/Makefile      |  2 +-
 kernel/bpf/helpers.c     | 89 ++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 98 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/helpers.c

(limited to 'include/linux')

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 51e9242e4803..75e94eaa228b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -133,4 +133,9 @@ struct bpf_prog *bpf_prog_get(u32 ufd);
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
 
+/* verifier prototypes for helper functions called from eBPF programs */
+extern struct bpf_func_proto bpf_map_lookup_elem_proto;
+extern struct bpf_func_proto bpf_map_update_elem_proto;
+extern struct bpf_func_proto bpf_map_delete_elem_proto;
+
 #endif /* _LINUX_BPF_H */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0d662fe75df5..4a3d0f84f178 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -158,6 +158,9 @@ union bpf_attr {
  */
 enum bpf_func_id {
 	BPF_FUNC_unspec,
+	BPF_FUNC_map_lookup_elem, /* void *map_lookup_elem(&map, &key) */
+	BPF_FUNC_map_update_elem, /* int map_update_elem(&map, &key, &value, flags) */
+	BPF_FUNC_map_delete_elem, /* int map_delete_elem(&map, &key) */
 	__BPF_FUNC_MAX_ID,
 };
 
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 72ec98ba2d42..a5ae60f0b0a2 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -1,5 +1,5 @@
 obj-y := core.o
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o hashtab.o arraymap.o helpers.o
 ifdef CONFIG_TEST_BPF
 obj-$(CONFIG_BPF_SYSCALL) += test_stub.o
 endif
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
new file mode 100644
index 000000000000..9e3414d85459
--- /dev/null
+++ b/kernel/bpf/helpers.c
@@ -0,0 +1,89 @@
+/* Copyright (c) 2011-2014 PLUMgrid, http://plumgrid.com
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+#include <linux/bpf.h>
+#include <linux/rcupdate.h>
+
+/* If kernel subsystem is allowing eBPF programs to call this function,
+ * inside its own verifier_ops->get_func_proto() callback it should return
+ * bpf_map_lookup_elem_proto, so that verifier can properly check the arguments
+ *
+ * Different map implementations will rely on rcu in map methods
+ * lookup/update/delete, therefore eBPF programs must run under rcu lock
+ * if program is allowed to access maps, so check rcu_read_lock_held in
+ * all three functions.
+ */
+static u64 bpf_map_lookup_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	/* verifier checked that R1 contains a valid pointer to bpf_map
+	 * and R2 points to a program stack and map->key_size bytes were
+	 * initialized
+	 */
+	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+	void *key = (void *) (unsigned long) r2;
+	void *value;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	value = map->ops->map_lookup_elem(map, key);
+
+	/* lookup() returns either pointer to element value or NULL
+	 * which is the meaning of PTR_TO_MAP_VALUE_OR_NULL type
+	 */
+	return (unsigned long) value;
+}
+
+struct bpf_func_proto bpf_map_lookup_elem_proto = {
+	.func = bpf_map_lookup_elem,
+	.gpl_only = false,
+	.ret_type = RET_PTR_TO_MAP_VALUE_OR_NULL,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_MAP_KEY,
+};
+
+static u64 bpf_map_update_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+	void *key = (void *) (unsigned long) r2;
+	void *value = (void *) (unsigned long) r3;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	return map->ops->map_update_elem(map, key, value, r4);
+}
+
+struct bpf_func_proto bpf_map_update_elem_proto = {
+	.func = bpf_map_update_elem,
+	.gpl_only = false,
+	.ret_type = RET_INTEGER,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_MAP_KEY,
+	.arg3_type = ARG_PTR_TO_MAP_VALUE,
+	.arg4_type = ARG_ANYTHING,
+};
+
+static u64 bpf_map_delete_elem(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5)
+{
+	struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
+	void *key = (void *) (unsigned long) r2;
+
+	WARN_ON_ONCE(!rcu_read_lock_held());
+
+	return map->ops->map_delete_elem(map, key);
+}
+
+struct bpf_func_proto bpf_map_delete_elem_proto = {
+	.func = bpf_map_delete_elem,
+	.gpl_only = false,
+	.ret_type = RET_INTEGER,
+	.arg1_type = ARG_CONST_MAP_PTR,
+	.arg2_type = ARG_PTR_TO_MAP_KEY,
+};
-- 
cgit v1.2.3


From 22853223d15b3a626de62cf9e40ce2d3881bc0a8 Mon Sep 17 00:00:00 2001
From: Mark Brown <broonie@kernel.org>
Date: Tue, 18 Nov 2014 19:45:51 +0100
Subject: regmap: ac97: Add generic AC'97 callbacks

Use the recently added support for bus operations to provide a standard
mapping for AC'97 register I/O.

Signed-off-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
---
 drivers/base/regmap/Kconfig       |   5 +-
 drivers/base/regmap/Makefile      |   1 +
 drivers/base/regmap/regmap-ac97.c | 114 ++++++++++++++++++++++++++++++++++++++
 include/linux/regmap.h            |   7 +++
 4 files changed, 126 insertions(+), 1 deletion(-)
 create mode 100644 drivers/base/regmap/regmap-ac97.c

(limited to 'include/linux')

diff --git a/drivers/base/regmap/Kconfig b/drivers/base/regmap/Kconfig
index 8a3f51f7b1b9..db9d00c36a3e 100644
--- a/drivers/base/regmap/Kconfig
+++ b/drivers/base/regmap/Kconfig
@@ -3,12 +3,15 @@
 # subsystems should select the appropriate symbols.
 
 config REGMAP
-	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_MMIO || REGMAP_IRQ)
+	default y if (REGMAP_I2C || REGMAP_SPI || REGMAP_SPMI || REGMAP_AC97 || REGMAP_MMIO || REGMAP_IRQ)
 	select LZO_COMPRESS
 	select LZO_DECOMPRESS
 	select IRQ_DOMAIN if REGMAP_IRQ
 	bool
 
+config REGMAP_AC97
+	tristate
+
 config REGMAP_I2C
 	tristate
 	depends on I2C
diff --git a/drivers/base/regmap/Makefile b/drivers/base/regmap/Makefile
index a7c670b4123a..0a533653ef3b 100644
--- a/drivers/base/regmap/Makefile
+++ b/drivers/base/regmap/Makefile
@@ -1,6 +1,7 @@
 obj-$(CONFIG_REGMAP) += regmap.o regcache.o
 obj-$(CONFIG_REGMAP) += regcache-rbtree.o regcache-lzo.o regcache-flat.o
 obj-$(CONFIG_DEBUG_FS) += regmap-debugfs.o
+obj-$(CONFIG_REGMAP_AC97) += regmap-ac97.o
 obj-$(CONFIG_REGMAP_I2C) += regmap-i2c.o
 obj-$(CONFIG_REGMAP_SPI) += regmap-spi.o
 obj-$(CONFIG_REGMAP_SPMI) += regmap-spmi.o
diff --git a/drivers/base/regmap/regmap-ac97.c b/drivers/base/regmap/regmap-ac97.c
new file mode 100644
index 000000000000..e4c45d2299c1
--- /dev/null
+++ b/drivers/base/regmap/regmap-ac97.c
@@ -0,0 +1,114 @@
+/*
+ * Register map access API - AC'97 support
+ *
+ * Copyright 2013 Linaro Ltd.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/io.h>
+#include <linux/module.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+
+#include <sound/ac97_codec.h>
+
+bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg)
+{
+	switch (reg) {
+	case AC97_RESET:
+	case AC97_POWERDOWN:
+	case AC97_INT_PAGING:
+	case AC97_EXTENDED_ID:
+	case AC97_EXTENDED_STATUS:
+	case AC97_EXTENDED_MID:
+	case AC97_EXTENDED_MSTATUS:
+	case AC97_GPIO_STATUS:
+	case AC97_MISC_AFE:
+	case AC97_VENDOR_ID1:
+	case AC97_VENDOR_ID2:
+	case AC97_CODEC_CLASS_REV:
+	case AC97_PCI_SVID:
+	case AC97_PCI_SID:
+	case AC97_FUNC_SELECT:
+	case AC97_FUNC_INFO:
+	case AC97_SENSE_INFO:
+		return true;
+	default:
+		return false;
+	}
+}
+EXPORT_SYMBOL_GPL(regmap_ac97_default_volatile);
+
+static int regmap_ac97_reg_read(void *context, unsigned int reg,
+	unsigned int *val)
+{
+	struct snd_ac97 *ac97 = context;
+
+	*val = ac97->bus->ops->read(ac97, reg);
+
+	return 0;
+}
+
+static int regmap_ac97_reg_write(void *context, unsigned int reg,
+	unsigned int val)
+{
+	struct snd_ac97 *ac97 = context;
+
+	ac97->bus->ops->write(ac97, reg, val);
+
+	return 0;
+}
+
+static const struct regmap_bus ac97_regmap_bus = {
+		.reg_write = regmap_ac97_reg_write,
+		.reg_read = regmap_ac97_reg_read,
+};
+
+/**
+ * regmap_init_ac97(): Initialise AC'97 register map
+ *
+ * @ac97: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer to
+ * a struct regmap.
+ */
+struct regmap *regmap_init_ac97(struct snd_ac97 *ac97,
+				const struct regmap_config *config)
+{
+	return regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config);
+}
+EXPORT_SYMBOL_GPL(regmap_init_ac97);
+
+/**
+ * devm_regmap_init_ac97(): Initialise AC'97 register map
+ *
+ * @ac97: Device that will be interacted with
+ * @config: Configuration for register map
+ *
+ * The return value will be an ERR_PTR() on error or a valid pointer
+ * to a struct regmap.  The regmap will be automatically freed by the
+ * device management code.
+ */
+struct regmap *devm_regmap_init_ac97(struct snd_ac97 *ac97,
+				     const struct regmap_config *config)
+{
+	return devm_regmap_init(&ac97->dev, &ac97_regmap_bus, ac97, config);
+}
+EXPORT_SYMBOL_GPL(devm_regmap_init_ac97);
+
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index c5ed83f49c4e..4419b99d8d6e 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -27,6 +27,7 @@ struct spmi_device;
 struct regmap;
 struct regmap_range_cfg;
 struct regmap_field;
+struct snd_ac97;
 
 /* An enum of all the supported cache types */
 enum regcache_type {
@@ -340,6 +341,8 @@ struct regmap *regmap_init_spmi_ext(struct spmi_device *dev,
 struct regmap *regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 				    void __iomem *regs,
 				    const struct regmap_config *config);
+struct regmap *regmap_init_ac97(struct snd_ac97 *ac97,
+				const struct regmap_config *config);
 
 struct regmap *devm_regmap_init(struct device *dev,
 				const struct regmap_bus *bus,
@@ -356,6 +359,10 @@ struct regmap *devm_regmap_init_spmi_ext(struct spmi_device *dev,
 struct regmap *devm_regmap_init_mmio_clk(struct device *dev, const char *clk_id,
 					 void __iomem *regs,
 					 const struct regmap_config *config);
+struct regmap *devm_regmap_init_ac97(struct snd_ac97 *ac97,
+				     const struct regmap_config *config);
+
+bool regmap_ac97_default_volatile(struct device *dev, unsigned int reg);
 
 /**
  * regmap_init_mmio(): Initialise register map
-- 
cgit v1.2.3


From d67ee213fa5700c7da526fe5bcccd485cfa63d8b Mon Sep 17 00:00:00 2001
From: Mike Snitzer <snitzer@redhat.com>
Date: Tue, 28 Oct 2014 20:13:31 -0400
Subject: dm: add presuspend_undo hook to target_type

The DM thin-pool target now must undo the changes performed during
pool_presuspend() so introduce presuspend_undo hook in target_type.

Signed-off-by: Mike Snitzer <snitzer@redhat.com>
Acked-by: Joe Thornber <ejt@redhat.com>
---
 drivers/md/dm-table.c         | 36 +++++++++++++++++++++++++++++-------
 drivers/md/dm.c               | 10 ++++++++--
 drivers/md/dm.h               |  1 +
 include/linux/device-mapper.h |  2 ++
 include/uapi/linux/dm-ioctl.h |  4 ++--
 5 files changed, 42 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index b2bd1ebf4562..3afae9e062f8 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1521,18 +1521,32 @@ fmode_t dm_table_get_mode(struct dm_table *t)
 }
 EXPORT_SYMBOL(dm_table_get_mode);
 
-static void suspend_targets(struct dm_table *t, unsigned postsuspend)
+enum suspend_mode {
+	PRESUSPEND,
+	PRESUSPEND_UNDO,
+	POSTSUSPEND,
+};
+
+static void suspend_targets(struct dm_table *t, enum suspend_mode mode)
 {
 	int i = t->num_targets;
 	struct dm_target *ti = t->targets;
 
 	while (i--) {
-		if (postsuspend) {
+		switch (mode) {
+		case PRESUSPEND:
+			if (ti->type->presuspend)
+				ti->type->presuspend(ti);
+			break;
+		case PRESUSPEND_UNDO:
+			if (ti->type->presuspend_undo)
+				ti->type->presuspend_undo(ti);
+			break;
+		case POSTSUSPEND:
 			if (ti->type->postsuspend)
 				ti->type->postsuspend(ti);
-		} else if (ti->type->presuspend)
-			ti->type->presuspend(ti);
-
+			break;
+		}
 		ti++;
 	}
 }
@@ -1542,7 +1556,15 @@ void dm_table_presuspend_targets(struct dm_table *t)
 	if (!t)
 		return;
 
-	suspend_targets(t, 0);
+	suspend_targets(t, PRESUSPEND);
+}
+
+void dm_table_presuspend_undo_targets(struct dm_table *t)
+{
+	if (!t)
+		return;
+
+	suspend_targets(t, PRESUSPEND_UNDO);
 }
 
 void dm_table_postsuspend_targets(struct dm_table *t)
@@ -1550,7 +1572,7 @@ void dm_table_postsuspend_targets(struct dm_table *t)
 	if (!t)
 		return;
 
-	suspend_targets(t, 1);
+	suspend_targets(t, POSTSUSPEND);
 }
 
 int dm_table_resume_targets(struct dm_table *t)
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index f8cdd97c28a7..f84de3215982 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -2756,7 +2756,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 	if (noflush)
 		set_bit(DMF_NOFLUSH_SUSPENDING, &md->flags);
 
-	/* This does not get reverted if there's an error later. */
+	/*
+	 * This gets reverted if there's an error later and the targets
+	 * provide the .presuspend_undo hook.
+	 */
 	dm_table_presuspend_targets(map);
 
 	/*
@@ -2767,8 +2770,10 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 	 */
 	if (!noflush && do_lockfs) {
 		r = lock_fs(md);
-		if (r)
+		if (r) {
+			dm_table_presuspend_undo_targets(map);
 			goto out_unlock;
+		}
 	}
 
 	/*
@@ -2816,6 +2821,7 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags)
 			start_queue(md->queue);
 
 		unlock_fs(md);
+		dm_table_presuspend_undo_targets(map);
 		goto out_unlock; /* pushback list is already flushed, so skip flush */
 	}
 
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index 988c7fb7b145..781994093bf5 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -65,6 +65,7 @@ void dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
 			       struct queue_limits *limits);
 struct list_head *dm_table_get_devices(struct dm_table *t);
 void dm_table_presuspend_targets(struct dm_table *t);
+void dm_table_presuspend_undo_targets(struct dm_table *t);
 void dm_table_postsuspend_targets(struct dm_table *t);
 int dm_table_resume_targets(struct dm_table *t);
 int dm_table_any_congested(struct dm_table *t, int bdi_bits);
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index e1707de043ae..ca6d2acc5eb7 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -64,6 +64,7 @@ typedef int (*dm_request_endio_fn) (struct dm_target *ti,
 				    union map_info *map_context);
 
 typedef void (*dm_presuspend_fn) (struct dm_target *ti);
+typedef void (*dm_presuspend_undo_fn) (struct dm_target *ti);
 typedef void (*dm_postsuspend_fn) (struct dm_target *ti);
 typedef int (*dm_preresume_fn) (struct dm_target *ti);
 typedef void (*dm_resume_fn) (struct dm_target *ti);
@@ -145,6 +146,7 @@ struct target_type {
 	dm_endio_fn end_io;
 	dm_request_endio_fn rq_end_io;
 	dm_presuspend_fn presuspend;
+	dm_presuspend_undo_fn presuspend_undo;
 	dm_postsuspend_fn postsuspend;
 	dm_preresume_fn preresume;
 	dm_resume_fn resume;
diff --git a/include/uapi/linux/dm-ioctl.h b/include/uapi/linux/dm-ioctl.h
index 3315ab21f728..2be66f4be2f9 100644
--- a/include/uapi/linux/dm-ioctl.h
+++ b/include/uapi/linux/dm-ioctl.h
@@ -267,9 +267,9 @@ enum {
 #define DM_DEV_SET_GEOMETRY	_IOWR(DM_IOCTL, DM_DEV_SET_GEOMETRY_CMD, struct dm_ioctl)
 
 #define DM_VERSION_MAJOR	4
-#define DM_VERSION_MINOR	28
+#define DM_VERSION_MINOR	29
 #define DM_VERSION_PATCHLEVEL	0
-#define DM_VERSION_EXTRA	"-ioctl (2014-09-17)"
+#define DM_VERSION_EXTRA	"-ioctl (2014-10-28)"
 
 /* Status bits */
 #define DM_READONLY_FLAG	(1 << 0) /* In/Out */
-- 
cgit v1.2.3


From 1277b4a9f531e84e26f9e0210c1801b0c0bf81ca Mon Sep 17 00:00:00 2001
From: Liad Kaufman <liad.kaufman@intel.com>
Date: Sun, 9 Nov 2014 18:50:08 +0200
Subject: mac80211: retransmit TDLS teardown packet through AP if not ACKed

Since the TDLS peer station might not receive the teardown
packet (e.g., when in PS), this makes sure the packet is
retransmitted - this time through the AP - if the TDLS peer
didn't ACK the packet.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  | 25 ++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  4 ++++
 net/mac80211/mlme.c        | 12 ++++++++++
 net/mac80211/status.c      | 55 ++++++++++++++++++++++++++++++++++++++++--
 net/mac80211/tdls.c        | 59 +++++++++++++++++++++++++++++++++++++---------
 5 files changed, 142 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f65b5446d983..4e2bb9107878 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -19,6 +19,7 @@
 #include <linux/types.h>
 #include <linux/if_ether.h>
 #include <asm/byteorder.h>
+#include <asm/unaligned.h>
 
 /*
  * DS bit usage
@@ -2418,6 +2419,30 @@ static inline bool ieee80211_check_tim(const struct ieee80211_tim_ie *tim,
 	return !!(tim->virtual_map[index] & mask);
 }
 
+/**
+ * ieee80211_get_tdls_action - get tdls packet action (or -1, if not tdls packet)
+ * @skb: the skb containing the frame, length will not be checked
+ * @hdr_size: the size of the ieee80211_hdr that starts at skb->data
+ *
+ * This function assumes the frame is a data frame, and that the network header
+ * is in the correct place.
+ */
+static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size)
+{
+	if (!skb_is_nonlinear(skb) &&
+	    skb->len > (skb_network_offset(skb) + 2)) {
+		/* Point to where the indication of TDLS should start */
+		const u8 *tdls_data = skb_network_header(skb) - 2;
+
+		if (get_unaligned_be16(tdls_data) == ETH_P_TDLS &&
+		    tdls_data[2] == WLAN_TDLS_SNAP_RFTYPE &&
+		    tdls_data[3] == WLAN_CATEGORY_TDLS)
+			return tdls_data[4];
+	}
+
+	return -1;
+}
+
 /* convert time units */
 #define TU_TO_JIFFIES(x)	(usecs_to_jiffies((x) * 1024))
 #define TU_TO_EXP_TIME(x)	(jiffies + TU_TO_JIFFIES(x))
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 208953d1d028..bc6f12ff1f61 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -525,8 +525,12 @@ struct ieee80211_if_managed {
 	struct ieee80211_vht_cap vht_capa; /* configured VHT overrides */
 	struct ieee80211_vht_cap vht_capa_mask; /* Valid parts of vht_capa */
 
+	/* TDLS support */
 	u8 tdls_peer[ETH_ALEN] __aligned(2);
 	struct delayed_work tdls_peer_del_work;
+	struct sk_buff *orig_teardown_skb; /* The original teardown skb */
+	struct sk_buff *teardown_skb; /* A copy to send through the AP */
+	spinlock_t teardown_lock; /* To lock changing teardown_skb */
 
 	/* WMM-AC TSPEC support */
 	struct ieee80211_sta_tx_tspec tx_tspec[IEEE80211_NUM_ACS];
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 243539878991..11a937f3fdeb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -4003,6 +4003,11 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata)
 		ifmgd->req_smps = IEEE80211_SMPS_AUTOMATIC;
 	else
 		ifmgd->req_smps = IEEE80211_SMPS_OFF;
+
+	/* Setup TDLS data */
+	spin_lock_init(&ifmgd->teardown_lock);
+	ifmgd->teardown_skb = NULL;
+	ifmgd->orig_teardown_skb = NULL;
 }
 
 /* scan finished notification */
@@ -4865,6 +4870,13 @@ void ieee80211_mgd_stop(struct ieee80211_sub_if_data *sdata)
 	}
 	if (ifmgd->auth_data)
 		ieee80211_destroy_auth_data(sdata, false);
+	spin_lock_bh(&ifmgd->teardown_lock);
+	if (ifmgd->teardown_skb) {
+		kfree_skb(ifmgd->teardown_skb);
+		ifmgd->teardown_skb = NULL;
+		ifmgd->orig_teardown_skb = NULL;
+	}
+	spin_unlock_bh(&ifmgd->teardown_lock);
 	del_timer_sync(&ifmgd->timer);
 	sdata_unlock(sdata);
 }
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 9612d89fad56..71de2d3866cc 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -390,6 +390,46 @@ ieee80211_add_tx_radiotap_header(struct ieee80211_local *local,
 	}
 }
 
+/*
+ * Handles the tx for TDLS teardown frames.
+ * If the frame wasn't ACKed by the peer - it will be re-sent through the AP
+ */
+static void ieee80211_tdls_td_tx_handle(struct ieee80211_local *local,
+					struct ieee80211_sub_if_data *sdata,
+					struct sk_buff *skb, u32 flags)
+{
+	struct sk_buff *teardown_skb;
+	struct sk_buff *orig_teardown_skb;
+	bool is_teardown = false;
+
+	/* Get the teardown data we need and free the lock */
+	spin_lock(&sdata->u.mgd.teardown_lock);
+	teardown_skb = sdata->u.mgd.teardown_skb;
+	orig_teardown_skb = sdata->u.mgd.orig_teardown_skb;
+	if ((skb == orig_teardown_skb) && teardown_skb) {
+		sdata->u.mgd.teardown_skb = NULL;
+		sdata->u.mgd.orig_teardown_skb = NULL;
+		is_teardown = true;
+	}
+	spin_unlock(&sdata->u.mgd.teardown_lock);
+
+	if (is_teardown) {
+		/* This mechanism relies on being able to get ACKs */
+		WARN_ON(!(local->hw.flags &
+			  IEEE80211_HW_REPORTS_TX_ACK_STATUS));
+
+		/* Check if peer has ACKed */
+		if (flags & IEEE80211_TX_STAT_ACK) {
+			dev_kfree_skb_any(teardown_skb);
+		} else {
+			tdls_dbg(sdata,
+				 "TDLS Resending teardown through AP\n");
+
+			ieee80211_subif_start_xmit(teardown_skb, skb->dev);
+		}
+	}
+}
+
 static void ieee80211_report_used_skb(struct ieee80211_local *local,
 				      struct sk_buff *skb, bool dropped)
 {
@@ -426,8 +466,19 @@ static void ieee80211_report_used_skb(struct ieee80211_local *local,
 		if (!sdata) {
 			skb->dev = NULL;
 		} else if (info->flags & IEEE80211_TX_INTFL_MLME_CONN_TX) {
-			ieee80211_mgd_conn_tx_status(sdata, hdr->frame_control,
-						     acked);
+			unsigned int hdr_size =
+				ieee80211_hdrlen(hdr->frame_control);
+
+			/* Check to see if packet is a TDLS teardown packet */
+			if (ieee80211_is_data(hdr->frame_control) &&
+			    (ieee80211_get_tdls_action(skb, hdr_size) ==
+			     WLAN_TDLS_TEARDOWN))
+				ieee80211_tdls_td_tx_handle(local, sdata, skb,
+							    info->flags);
+			else
+				ieee80211_mgd_conn_tx_status(sdata,
+							     hdr->frame_control,
+							     acked);
 		} else if (ieee80211_is_nullfunc(hdr->frame_control) ||
 			   ieee80211_is_qos_nullfunc(hdr->frame_control)) {
 			cfg80211_probe_status(sdata->dev, hdr->addr1,
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index b4f368e2cb3b..d4fe091fd98a 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -512,20 +512,22 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
 	struct sk_buff *skb = NULL;
+	u32 flags = 0;
 	bool send_direct;
 	struct sta_info *sta;
 	int ret;
 
-	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
-			    max(sizeof(struct ieee80211_mgmt),
-				sizeof(struct ieee80211_tdls_data)) +
-			    50 + /* supported rates */
-			    7 + /* ext capab */
-			    26 + /* max(WMM-info, WMM-param) */
-			    2 + max(sizeof(struct ieee80211_ht_cap),
-				    sizeof(struct ieee80211_ht_operation)) +
-			    extra_ies_len +
-			    sizeof(struct ieee80211_tdls_lnkie));
+	skb = netdev_alloc_skb(dev,
+			       local->hw.extra_tx_headroom +
+			       max(sizeof(struct ieee80211_mgmt),
+				   sizeof(struct ieee80211_tdls_data)) +
+			       50 + /* supported rates */
+			       7 + /* ext capab */
+			       26 + /* max(WMM-info, WMM-param) */
+			       2 + max(sizeof(struct ieee80211_ht_cap),
+				       sizeof(struct ieee80211_ht_operation)) +
+			       extra_ies_len +
+			       sizeof(struct ieee80211_tdls_lnkie));
 	if (!skb)
 		return -ENOMEM;
 
@@ -623,9 +625,44 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
 		break;
 	}
 
+	/*
+	 * Set the WLAN_TDLS_TEARDOWN flag to indicate a teardown in progress.
+	 * Later, if no ACK is returned from peer, we will re-send the teardown
+	 * packet through the AP.
+	 */
+	if ((action_code == WLAN_TDLS_TEARDOWN) &&
+	    (local->hw.flags & IEEE80211_HW_REPORTS_TX_ACK_STATUS)) {
+		struct sta_info *sta = NULL;
+		bool try_resend; /* Should we keep skb for possible resend */
+
+		/* If not sending directly to peer - no point in keeping skb */
+		rcu_read_lock();
+		sta = sta_info_get(sdata, peer);
+		try_resend = sta && test_sta_flag(sta, WLAN_STA_TDLS_PEER_AUTH);
+		rcu_read_unlock();
+
+		spin_lock_bh(&sdata->u.mgd.teardown_lock);
+		if (try_resend && !sdata->u.mgd.teardown_skb) {
+			/* Mark it as requiring TX status callback  */
+			flags |= IEEE80211_TX_CTL_REQ_TX_STATUS |
+				 IEEE80211_TX_INTFL_MLME_CONN_TX;
+
+			/*
+			 * skb is copied since mac80211 will later set
+			 * properties that might not be the same as the AP,
+			 * such as encryption, QoS, addresses, etc.
+			 *
+			 * No problem if skb_copy() fails, so no need to check.
+			 */
+			sdata->u.mgd.teardown_skb = skb_copy(skb, GFP_ATOMIC);
+			sdata->u.mgd.orig_teardown_skb = skb;
+		}
+		spin_unlock_bh(&sdata->u.mgd.teardown_lock);
+	}
+
 	/* disable bottom halves when entering the Tx path */
 	local_bh_disable();
-	ret = ieee80211_subif_start_xmit(skb, dev);
+	__ieee80211_subif_start_xmit(skb, dev, flags);
 	local_bh_enable();
 
 	return ret;
-- 
cgit v1.2.3


From 2cedd87960a809dd9bf683f72123b7dce6736f07 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Sun, 9 Nov 2014 18:50:13 +0200
Subject: mac80211: add BSS coex IE to TDLS setup frames

Add the BSS coex IE in case we support HT40 channels, as mandated by
section 8.5.13 in IEEE802.11 2012.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  3 +++
 net/mac80211/tdls.c       | 15 +++++++++++++++
 2 files changed, 18 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 4e2bb9107878..adac1be67387 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2037,6 +2037,9 @@ enum ieee80211_tdls_actioncode {
 /* TDLS specific payload type in the LLC/SNAP header */
 #define WLAN_TDLS_SNAP_RFTYPE	0x2
 
+/* BSS Coex IE information field bits */
+#define WLAN_BSS_COEX_INFORMATION_REQUEST	BIT(0)
+
 /**
  * enum - mesh synchronization method identifier
  *
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 8fb314b182e2..30a4c1004010 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -117,6 +117,16 @@ ieee80211_tdls_add_supp_channels(struct ieee80211_sub_if_data *sdata,
 	*pos = 2 * subband_cnt;
 }
 
+static void ieee80211_tdls_add_bss_coex_ie(struct sk_buff *skb)
+{
+	u8 *pos = (void *)skb_put(skb, 3);
+
+	*pos++ = WLAN_EID_BSS_COEX_2040;
+	*pos++ = 1; /* len */
+
+	*pos++ = WLAN_BSS_COEX_INFORMATION_REQUEST;
+}
+
 static u16 ieee80211_get_tdls_sta_capab(struct ieee80211_sub_if_data *sdata,
 					u16 status_code)
 {
@@ -341,6 +351,10 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 
 	rcu_read_unlock();
 
+	if (ht_cap.ht_supported &&
+	    (ht_cap.cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40))
+		ieee80211_tdls_add_bss_coex_ie(skb);
+
 	/* add any remaining IEs */
 	if (extra_ies_len) {
 		noffset = extra_ies_len;
@@ -597,6 +611,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
 			       2 + max(sizeof(struct ieee80211_ht_cap),
 				       sizeof(struct ieee80211_ht_operation)) +
 			       50 + /* supported channels */
+			       3 + /* 40/20 BSS coex */
 			       extra_ies_len +
 			       sizeof(struct ieee80211_tdls_lnkie));
 	if (!skb)
-- 
cgit v1.2.3


From 78632a17eaa7a5abdc22aac8ca5932d6cad59984 Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Sun, 9 Nov 2014 18:50:14 +0200
Subject: cfg/mac80211: define TDLS channel switch feature bit

Define some related TDLS protocol constants and advertise channel switch
support in the extended-capabilities IE when the feature bit is defined.

Actually supporting TDLS channel-switching also requires support for
some new nl80211 commands, to be introduced by future patches.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h    | 6 ++++++
 include/uapi/linux/nl80211.h | 3 +++
 net/mac80211/tdls.c          | 9 ++++++---
 3 files changed, 15 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index adac1be67387..fbb02d240658 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2019,6 +2019,11 @@ enum ieee80211_tdls_actioncode {
  */
 #define WLAN_EXT_CAPA1_EXT_CHANNEL_SWITCHING	BIT(2)
 
+/* TDLS capabilities in the the 4th byte of @WLAN_EID_EXT_CAPABILITY */
+#define WLAN_EXT_CAPA4_TDLS_BUFFER_STA		BIT(4)
+#define WLAN_EXT_CAPA4_TDLS_PEER_PSM		BIT(5)
+#define WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH		BIT(6)
+
 /* Interworking capabilities are set in 7th bit of 4th byte of the
  * @WLAN_EID_EXT_CAPABILITY information element
  */
@@ -2030,6 +2035,7 @@ enum ieee80211_tdls_actioncode {
  */
 #define WLAN_EXT_CAPA5_TDLS_ENABLED	BIT(5)
 #define WLAN_EXT_CAPA5_TDLS_PROHIBITED	BIT(6)
+#define WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED	BIT(7)
 
 #define WLAN_EXT_CAPA8_OPMODE_NOTIF	BIT(6)
 #define WLAN_EXT_CAPA8_TDLS_WIDE_BW_ENABLED	BIT(7)
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 442369f69b4f..ccdeef28d672 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4095,6 +4095,8 @@ enum nl80211_ap_sme_features {
  * @NL80211_FEATURE_MAC_ON_CREATE: Device supports configuring
  *	the vif's MAC address upon creation.
  *	See 'macaddr' field in the vif_params (cfg80211.h).
+ * @NL80211_FEATURE_TDLS_CHANNEL_SWITCH: Driver supports channel switching when
+ *	operating as a TDLS peer.
  */
 enum nl80211_feature_flags {
 	NL80211_FEATURE_SK_TX_STATUS			= 1 << 0,
@@ -4125,6 +4127,7 @@ enum nl80211_feature_flags {
 	NL80211_FEATURE_DYNAMIC_SMPS			= 1 << 25,
 	NL80211_FEATURE_SUPPORTS_WMM_ADMISSION		= 1 << 26,
 	NL80211_FEATURE_MAC_ON_CREATE			= 1 << 27,
+	NL80211_FEATURE_TDLS_CHANNEL_SWITCH		= 1 << 28,
 };
 
 /**
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 30a4c1004010..4554bdc72c91 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -35,16 +35,19 @@ void ieee80211_tdls_peer_del_work(struct work_struct *wk)
 	mutex_unlock(&local->mtx);
 }
 
-static void ieee80211_tdls_add_ext_capab(struct sk_buff *skb)
+static void ieee80211_tdls_add_ext_capab(struct ieee80211_local *local,
+					 struct sk_buff *skb)
 {
 	u8 *pos = (void *)skb_put(skb, 7);
+	bool chan_switch = local->hw.wiphy->features &
+			   NL80211_FEATURE_TDLS_CHANNEL_SWITCH;
 
 	*pos++ = WLAN_EID_EXT_CAPABILITY;
 	*pos++ = 5; /* len */
 	*pos++ = 0x0;
 	*pos++ = 0x0;
 	*pos++ = 0x0;
-	*pos++ = 0x0;
+	*pos++ = chan_switch ? WLAN_EXT_CAPA4_TDLS_CHAN_SWITCH : 0;
 	*pos++ = WLAN_EXT_CAPA5_TDLS_ENABLED;
 }
 
@@ -289,7 +292,7 @@ ieee80211_tdls_add_setup_start_ies(struct ieee80211_sub_if_data *sdata,
 		offset = noffset;
 	}
 
-	ieee80211_tdls_add_ext_capab(skb);
+	ieee80211_tdls_add_ext_capab(local, skb);
 
 	/* add the QoS element if we support it */
 	if (local->hw.queues >= IEEE80211_NUM_ACS &&
-- 
cgit v1.2.3


From 53837584438f8899e061ada4663ae1d09b49b96a Mon Sep 17 00:00:00 2001
From: Arik Nemtsov <arik@wizery.com>
Date: Sun, 9 Nov 2014 18:50:18 +0200
Subject: mac80211: add parsing of TDLS specific IEs

These are used in TDLS channel switching code.

Signed-off-by: Arik Nemtsov <arikx.nemtsov@intel.com>
Signed-off-by: Arik Nemtsov <arik@wizery.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h  | 15 +++++++++++++++
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/util.c        | 16 ++++++++++++++++
 3 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index fbb02d240658..4f4eea8a6288 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1067,6 +1067,12 @@ struct ieee80211_pspoll {
 
 /* TDLS */
 
+/* Channel switch timing */
+struct ieee80211_ch_switch_timing {
+	__le16 switch_time;
+	__le16 switch_timeout;
+} __packed;
+
 /* Link-id information element */
 struct ieee80211_tdls_lnkie {
 	u8 ie_type; /* Link Identifier IE */
@@ -1108,6 +1114,15 @@ struct ieee80211_tdls_data {
 			u8 dialog_token;
 			u8 variable[0];
 		} __packed discover_req;
+		struct {
+			u8 target_channel;
+			u8 oper_class;
+			u8 variable[0];
+		} __packed chan_switch_req;
+		struct {
+			__le16 status_code;
+			u8 variable[0];
+		} __packed chan_switch_resp;
 	} u;
 } __packed;
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 4b3a7e7ec2a0..e786ab6bc72c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1400,6 +1400,8 @@ struct ieee802_11_elems {
 	size_t total_len;
 
 	/* pointers to IEs */
+	const struct ieee80211_tdls_lnkie *lnk_id;
+	const struct ieee80211_ch_switch_timing *ch_sw_timing;
 	const u8 *ext_capab;
 	const u8 *ssid;
 	const u8 *supp_rates;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 3ca0c2e725ff..9e5bfd614856 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -832,6 +832,8 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 		case WLAN_EID_WIDE_BW_CHANNEL_SWITCH:
 		case WLAN_EID_CHAN_SWITCH_PARAM:
 		case WLAN_EID_EXT_CAPABILITY:
+		case WLAN_EID_CHAN_SWITCH_TIMING:
+		case WLAN_EID_LINK_ID:
 		/*
 		 * not listing WLAN_EID_CHANNEL_SWITCH_WRAPPER -- it seems possible
 		 * that if the content gets bigger it might be needed more than once
@@ -851,6 +853,20 @@ u32 ieee802_11_parse_elems_crc(const u8 *start, size_t len, bool action,
 		elem_parse_failed = false;
 
 		switch (id) {
+		case WLAN_EID_LINK_ID:
+			if (elen + 2 != sizeof(struct ieee80211_tdls_lnkie)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->lnk_id = (void *)(pos - 2);
+			break;
+		case WLAN_EID_CHAN_SWITCH_TIMING:
+			if (elen != sizeof(struct ieee80211_ch_switch_timing)) {
+				elem_parse_failed = true;
+				break;
+			}
+			elems->ch_sw_timing = (void *)pos;
+			break;
 		case WLAN_EID_EXT_CAPABILITY:
 			elems->ext_capab = pos;
 			elems->ext_capab_len = elen;
-- 
cgit v1.2.3


From b5ae6b15bd73e35b129408755a0804287a87e041 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 22:16:02 -0400
Subject: merge d_materialise_unique() into d_splice_alias()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/dcache.c            | 143 ++++++++++++-------------------------------------
 include/linux/dcache.h |   2 +-
 2 files changed, 36 insertions(+), 109 deletions(-)

(limited to 'include/linux')

diff --git a/fs/dcache.c b/fs/dcache.c
index e605e90d0f0a..435991eada1e 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -2575,11 +2575,11 @@ struct dentry *d_ancestor(struct dentry *p1, struct dentry *p2)
  * Note: If ever the locking in lock_rename() changes, then please
  * remember to update this too...
  */
-static struct dentry *__d_unalias(struct inode *inode,
+static int __d_unalias(struct inode *inode,
 		struct dentry *dentry, struct dentry *alias)
 {
 	struct mutex *m1 = NULL, *m2 = NULL;
-	struct dentry *ret = ERR_PTR(-EBUSY);
+	int ret = -EBUSY;
 
 	/* If alias and dentry share a parent, then no extra locks required */
 	if (alias->d_parent == dentry->d_parent)
@@ -2594,7 +2594,7 @@ static struct dentry *__d_unalias(struct inode *inode,
 	m2 = &alias->d_parent->d_inode->i_mutex;
 out_unalias:
 	__d_move(alias, dentry, false);
-	ret = alias;
+	ret = 0;
 out_err:
 	spin_unlock(&inode->i_lock);
 	if (m2)
@@ -2629,130 +2629,57 @@ out_err:
  */
 struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
 {
-	struct dentry *new = NULL;
-
 	if (IS_ERR(inode))
 		return ERR_CAST(inode);
 
-	if (inode && S_ISDIR(inode->i_mode)) {
-		spin_lock(&inode->i_lock);
-		new = __d_find_any_alias(inode);
-		if (new) {
-			if (!IS_ROOT(new)) {
-				spin_unlock(&inode->i_lock);
-				dput(new);
-				iput(inode);
-				return ERR_PTR(-EIO);
-			}
-			if (d_ancestor(new, dentry)) {
-				spin_unlock(&inode->i_lock);
-				dput(new);
-				iput(inode);
-				return ERR_PTR(-EIO);
-			}
-			write_seqlock(&rename_lock);
-			__d_move(new, dentry, false);
-			write_sequnlock(&rename_lock);
-			spin_unlock(&inode->i_lock);
-			security_d_instantiate(new, inode);
-			iput(inode);
-		} else {
-			/* already taking inode->i_lock, so d_add() by hand */
-			__d_instantiate(dentry, inode);
-			spin_unlock(&inode->i_lock);
-			security_d_instantiate(dentry, inode);
-			d_rehash(dentry);
-		}
-	} else {
-		d_instantiate(dentry, inode);
-		if (d_unhashed(dentry))
-			d_rehash(dentry);
-	}
-	return new;
-}
-EXPORT_SYMBOL(d_splice_alias);
-
-/**
- * d_materialise_unique - introduce an inode into the tree
- * @dentry: candidate dentry
- * @inode: inode to bind to the dentry, to which aliases may be attached
- *
- * Introduces an dentry into the tree, substituting an extant disconnected
- * root directory alias in its place if there is one. Caller must hold the
- * i_mutex of the parent directory.
- */
-struct dentry *d_materialise_unique(struct dentry *dentry, struct inode *inode)
-{
-	struct dentry *actual;
-
 	BUG_ON(!d_unhashed(dentry));
 
 	if (!inode) {
-		actual = dentry;
 		__d_instantiate(dentry, NULL);
-		d_rehash(actual);
-		goto out_nolock;
+		goto out;
 	}
-
 	spin_lock(&inode->i_lock);
-
 	if (S_ISDIR(inode->i_mode)) {
-		struct dentry *alias;
-
-		/* Does an aliased dentry already exist? */
-		alias = __d_find_alias(inode);
-		if (alias) {
-			actual = alias;
+		struct dentry *new = __d_find_any_alias(inode);
+		if (unlikely(new)) {
 			write_seqlock(&rename_lock);
-
-			if (d_ancestor(alias, dentry)) {
-				/* Check for loops */
-				actual = ERR_PTR(-ELOOP);
+			if (unlikely(d_ancestor(new, dentry))) {
+				write_sequnlock(&rename_lock);
 				spin_unlock(&inode->i_lock);
-			} else if (IS_ROOT(alias)) {
-				/* Is this an anonymous mountpoint that we
-				 * could splice into our tree? */
-				__d_move(alias, dentry, false);
+				dput(new);
+				new = ERR_PTR(-ELOOP);
+				pr_warn_ratelimited(
+					"VFS: Lookup of '%s' in %s %s"
+					" would have caused loop\n",
+					dentry->d_name.name,
+					inode->i_sb->s_type->name,
+					inode->i_sb->s_id);
+			} else if (!IS_ROOT(new)) {
+				int err = __d_unalias(inode, dentry, new);
 				write_sequnlock(&rename_lock);
-				goto found;
+				if (err) {
+					dput(new);
+					new = ERR_PTR(err);
+				}
 			} else {
-				/* Nope, but we must(!) avoid directory
-				 * aliasing. This drops inode->i_lock */
-				actual = __d_unalias(inode, dentry, alias);
-			}
-			write_sequnlock(&rename_lock);
-			if (IS_ERR(actual)) {
-				if (PTR_ERR(actual) == -ELOOP)
-					pr_warn_ratelimited(
-						"VFS: Lookup of '%s' in %s %s"
-						" would have caused loop\n",
-						dentry->d_name.name,
-						inode->i_sb->s_type->name,
-						inode->i_sb->s_id);
-				dput(alias);
+				__d_move(new, dentry, false);
+				write_sequnlock(&rename_lock);
+				spin_unlock(&inode->i_lock);
+				security_d_instantiate(new, inode);
 			}
-			goto out_nolock;
+			iput(inode);
+			return new;
 		}
 	}
-
-	/* Add a unique reference */
-	actual = __d_instantiate_unique(dentry, inode);
-	if (!actual)
-		actual = dentry;
-
-	d_rehash(actual);
-found:
+	/* already taking inode->i_lock, so d_add() by hand */
+	__d_instantiate(dentry, inode);
 	spin_unlock(&inode->i_lock);
-out_nolock:
-	if (actual == dentry) {
-		security_d_instantiate(dentry, inode);
-		return NULL;
-	}
-
-	iput(inode);
-	return actual;
+out:
+	security_d_instantiate(dentry, inode);
+	d_rehash(dentry);
+	return NULL;
 }
-EXPORT_SYMBOL_GPL(d_materialise_unique);
+EXPORT_SYMBOL(d_splice_alias);
 
 static int prepend(char **buffer, int *buflen, const char *str, int namelen)
 {
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index 1c2f1b84468b..ee569da27b72 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -230,7 +230,7 @@ extern seqlock_t rename_lock;
  */
 extern void d_instantiate(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
-extern struct dentry * d_materialise_unique(struct dentry *, struct inode *);
+#define d_materialise_unique(d, i) d_splice_alias(i, d)
 extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
-- 
cgit v1.2.3


From 41d28bca2da4bd75a8915c1ccf2cacf7f4a2e531 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 12 Oct 2014 22:24:21 -0400
Subject: switch d_materialise_unique() users to d_splice_alias()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/nfs/Exporting | 23 +++++------------------
 Documentation/filesystems/porting       |  4 ++++
 fs/9p/vfs_inode.c                       |  2 +-
 fs/btrfs/inode.c                        |  2 +-
 fs/ceph/inode.c                         |  2 +-
 fs/cifs/readdir.c                       |  2 +-
 fs/fuse/dir.c                           |  4 ++--
 fs/kernfs/dir.c                         |  2 +-
 fs/nfs/dir.c                            |  4 ++--
 fs/nfs/getroot.c                        |  2 +-
 include/linux/dcache.h                  |  1 -
 11 files changed, 19 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/nfs/Exporting b/Documentation/filesystems/nfs/Exporting
index c8f036a9b13f..520a4becb75c 100644
--- a/Documentation/filesystems/nfs/Exporting
+++ b/Documentation/filesystems/nfs/Exporting
@@ -72,24 +72,11 @@ c/ Helper routines to allocate anonymous dentries, and to help attach
         DCACHE_DISCONNECTED) dentry is allocated and attached.
       In the case of a directory, care is taken that only one dentry
       can ever be attached.
-    d_splice_alias(inode, dentry) or d_materialise_unique(dentry, inode)
-      will introduce a new dentry into the tree; either the passed-in
-      dentry or a preexisting alias for the given inode (such as an
-      anonymous one created by d_obtain_alias), if appropriate.  The two
-      functions differ in their handling of directories with preexisting
-      aliases:
-        d_splice_alias will use any existing IS_ROOT dentry, but it will
-	  return -EIO rather than try to move a dentry with a different
-	  parent.  This is appropriate for local filesystems, which
-	  should never see such an alias unless the filesystem is
-	  corrupted somehow (for example, if two on-disk directory
-	  entries refer to the same directory.)
-	d_materialise_unique will attempt to move any dentry.  This is
-	  appropriate for distributed filesystems, where finding a
-	  directory other than where we last cached it may be a normal
-	  consequence of concurrent operations on other hosts.
-      Both functions return NULL when the passed-in dentry is used,
-      following the calling convention of ->lookup.
+    d_splice_alias(inode, dentry) will introduce a new dentry into the tree;
+      either the passed-in dentry or a preexisting alias for the given inode
+      (such as an anonymous one created by d_obtain_alias), if appropriate.
+      It returns NULL when the passed-in dentry is used, following the calling
+      convention of ->lookup.
 
  
 Filesystem Issues
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index 0f3a1390bf00..b6b55a9cffee 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -463,3 +463,7 @@ in your dentry operations instead.
 	of the in-tree instances did).  inode_hash_lock is still held,
 	of course, so they are still serialized wrt removal from inode hash,
 	as well as wrt set() callback of iget5_locked().
+--
+[mandatory]
+	d_materialise_unique() is gone; d_splice_alias() does everything you
+	need now.  Remember that they have opposite orders of arguments ;-/
diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c
index 296482fc77a9..9ee5343d4884 100644
--- a/fs/9p/vfs_inode.c
+++ b/fs/9p/vfs_inode.c
@@ -832,7 +832,7 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry,
 	 * moved b under k and client parallely did a lookup for
 	 * k/b.
 	 */
-	res = d_materialise_unique(dentry, inode);
+	res = d_splice_alias(inode, dentry);
 	if (!res)
 		v9fs_fid_add(dentry, fid);
 	else if (!IS_ERR(res))
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d23362f4464e..ff0dcc016b71 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5303,7 +5303,7 @@ static struct dentry *btrfs_lookup(struct inode *dir, struct dentry *dentry,
 			return ERR_CAST(inode);
 	}
 
-	return d_materialise_unique(dentry, inode);
+	return d_splice_alias(inode, dentry);
 }
 
 unsigned char btrfs_filetype_table[] = {
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7a1df90c7771..90ec8e32c138 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -967,7 +967,7 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
 	/* dn must be unhashed */
 	if (!d_unhashed(dn))
 		d_drop(dn);
-	realdn = d_materialise_unique(dn, in);
+	realdn = d_splice_alias(in, dn);
 	if (IS_ERR(realdn)) {
 		pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
 		       PTR_ERR(realdn), dn, in, ceph_vinop(in));
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index 8fd2a95860ba..586e3d3b204e 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -123,7 +123,7 @@ cifs_prime_dcache(struct dentry *parent, struct qstr *name,
 	if (!inode)
 		goto out;
 
-	alias = d_materialise_unique(dentry, inode);
+	alias = d_splice_alias(inode, dentry);
 	if (alias && !IS_ERR(alias))
 		dput(alias);
 out:
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c
index dbab798f5caf..df562cc87763 100644
--- a/fs/fuse/dir.c
+++ b/fs/fuse/dir.c
@@ -372,7 +372,7 @@ static struct dentry *fuse_lookup(struct inode *dir, struct dentry *entry,
 	if (inode && get_node_id(inode) == FUSE_ROOT_ID)
 		goto out_iput;
 
-	newent = d_materialise_unique(entry, inode);
+	newent = d_splice_alias(inode, entry);
 	err = PTR_ERR(newent);
 	if (IS_ERR(newent))
 		goto out_err;
@@ -1320,7 +1320,7 @@ static int fuse_direntplus_link(struct file *file,
 	if (!inode)
 		goto out;
 
-	alias = d_materialise_unique(dentry, inode);
+	alias = d_splice_alias(inode, dentry);
 	err = PTR_ERR(alias);
 	if (IS_ERR(alias))
 		goto out;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 1c771931bb60..37989f02a226 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -807,7 +807,7 @@ static struct dentry *kernfs_iop_lookup(struct inode *dir,
 	}
 
 	/* instantiate and hash dentry */
-	ret = d_materialise_unique(dentry, inode);
+	ret = d_splice_alias(inode, dentry);
  out_unlock:
 	mutex_unlock(&kernfs_mutex);
 	return ret;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 06e8cfcbb670..44d7d0c7e376 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -499,7 +499,7 @@ void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry)
 	if (IS_ERR(inode))
 		goto out;
 
-	alias = d_materialise_unique(dentry, inode);
+	alias = d_splice_alias(inode, dentry);
 	if (IS_ERR(alias))
 		goto out;
 	else if (alias) {
@@ -1393,7 +1393,7 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
 	nfs_advise_use_readdirplus(dir);
 
 no_entry:
-	res = d_materialise_unique(dentry, inode);
+	res = d_splice_alias(inode, dentry);
 	if (res != NULL) {
 		if (IS_ERR(res))
 			goto out_unblock_sillyrename;
diff --git a/fs/nfs/getroot.c b/fs/nfs/getroot.c
index ebc6a0add5ae..9ac3846cb59e 100644
--- a/fs/nfs/getroot.c
+++ b/fs/nfs/getroot.c
@@ -51,7 +51,7 @@ static int nfs_superblock_set_dummy_root(struct super_block *sb, struct inode *i
 		/*
 		 * Ensure that this dentry is invisible to d_find_alias().
 		 * Otherwise, it may be spliced into the tree by
-		 * d_materialise_unique if a parent directory from the same
+		 * d_splice_alias if a parent directory from the same
 		 * filesystem gets mounted at a later time.
 		 * This again causes shrink_dcache_for_umount_subtree() to
 		 * Oops, since the test for IS_ROOT() will fail.
diff --git a/include/linux/dcache.h b/include/linux/dcache.h
index ee569da27b72..5a813988e6d4 100644
--- a/include/linux/dcache.h
+++ b/include/linux/dcache.h
@@ -230,7 +230,6 @@ extern seqlock_t rename_lock;
  */
 extern void d_instantiate(struct dentry *, struct inode *);
 extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
-#define d_materialise_unique(d, i) d_splice_alias(i, d)
 extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
 extern void __d_drop(struct dentry *dentry);
 extern void d_drop(struct dentry *dentry);
-- 
cgit v1.2.3


From b583043e99bc6d91e98fae32bd9eff6a5958240a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 Oct 2014 01:22:04 -0400
Subject: kill f_dentry uses

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/block/drbd/drbd_debugfs.c          |  6 +++---
 drivers/scsi/lpfc/lpfc_debugfs.c           |  4 ++--
 drivers/staging/lustre/lustre/llite/file.c | 12 +++---------
 fs/ceph/dir.c                              |  6 +++---
 fs/ceph/file.c                             |  2 +-
 fs/cifs/readdir.c                          |  2 +-
 fs/ecryptfs/crypto.c                       |  2 +-
 fs/ecryptfs/mmap.c                         |  2 +-
 fs/efivarfs/file.c                         |  4 ++--
 fs/sync.c                                  |  2 +-
 include/linux/cgroup.h                     |  4 ++--
 kernel/auditsc.c                           |  2 +-
 kernel/events/core.c                       |  2 +-
 kernel/taskstats.c                         |  2 +-
 mm/memcontrol.c                            |  4 ++--
 security/commoncap.c                       |  2 +-
 security/integrity/ima/ima_api.c           |  4 ++--
 security/integrity/ima/ima_appraise.c      |  4 ++--
 security/integrity/ima/ima_template_lib.c  |  2 +-
 19 files changed, 31 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/block/drbd/drbd_debugfs.c b/drivers/block/drbd/drbd_debugfs.c
index 900d4d3272d1..9a950022ff88 100644
--- a/drivers/block/drbd/drbd_debugfs.c
+++ b/drivers/block/drbd/drbd_debugfs.c
@@ -419,7 +419,7 @@ static int in_flight_summary_show(struct seq_file *m, void *pos)
 	return 0;
 }
 
-/* simple_positive(file->f_dentry) respectively debugfs_positive(),
+/* simple_positive(file->f_path.dentry) respectively debugfs_positive(),
  * but neither is "reachable" from here.
  * So we have our own inline version of it above.  :-( */
 static inline int debugfs_positive(struct dentry *dentry)
@@ -437,14 +437,14 @@ static int drbd_single_open(struct file *file, int (*show)(struct seq_file *, vo
 
 	/* Are we still linked,
 	 * or has debugfs_remove() already been called? */
-	parent = file->f_dentry->d_parent;
+	parent = file->f_path.dentry->d_parent;
 	/* not sure if this can happen: */
 	if (!parent || !parent->d_inode)
 		goto out;
 	/* serialize with d_delete() */
 	mutex_lock(&parent->d_inode->i_mutex);
 	/* Make sure the object is still alive */
-	if (debugfs_positive(file->f_dentry)
+	if (debugfs_positive(file->f_path.dentry)
 	&& kref_get_unless_zero(kref))
 		ret = 0;
 	mutex_unlock(&parent->d_inode->i_mutex);
diff --git a/drivers/scsi/lpfc/lpfc_debugfs.c b/drivers/scsi/lpfc/lpfc_debugfs.c
index 1311ab15243e..5633e7dadc08 100644
--- a/drivers/scsi/lpfc/lpfc_debugfs.c
+++ b/drivers/scsi/lpfc/lpfc_debugfs.c
@@ -1011,7 +1011,7 @@ static ssize_t
 lpfc_debugfs_dif_err_read(struct file *file, char __user *buf,
 	size_t nbytes, loff_t *ppos)
 {
-	struct dentry *dent = file->f_dentry;
+	struct dentry *dent = file->f_path.dentry;
 	struct lpfc_hba *phba = file->private_data;
 	char cbuf[32];
 	uint64_t tmp = 0;
@@ -1052,7 +1052,7 @@ static ssize_t
 lpfc_debugfs_dif_err_write(struct file *file, const char __user *buf,
 	size_t nbytes, loff_t *ppos)
 {
-	struct dentry *dent = file->f_dentry;
+	struct dentry *dent = file->f_path.dentry;
 	struct lpfc_hba *phba = file->private_data;
 	char dstbuf[32];
 	uint64_t tmp = 0;
diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c
index c99b74117152..08e77492da8f 100644
--- a/drivers/staging/lustre/lustre/llite/file.c
+++ b/drivers/staging/lustre/lustre/llite/file.c
@@ -2092,7 +2092,7 @@ putgl:
 	rc = 0;
 	if (llss->ia2.ia_valid != 0) {
 		mutex_lock(&llss->inode1->i_mutex);
-		rc = ll_setattr(file1->f_dentry, &llss->ia2);
+		rc = ll_setattr(file1->f_path.dentry, &llss->ia2);
 		mutex_unlock(&llss->inode1->i_mutex);
 	}
 
@@ -2100,7 +2100,7 @@ putgl:
 		int rc1;
 
 		mutex_lock(&llss->inode2->i_mutex);
-		rc1 = ll_setattr(file2->f_dentry, &llss->ia1);
+		rc1 = ll_setattr(file2->f_path.dentry, &llss->ia1);
 		mutex_unlock(&llss->inode2->i_mutex);
 		if (rc == 0)
 			rc = rc1;
@@ -2185,7 +2185,7 @@ static int ll_hsm_import(struct inode *inode, struct file *file,
 
 	mutex_lock(&inode->i_mutex);
 
-	rc = ll_setattr_raw(file->f_dentry, attr, true);
+	rc = ll_setattr_raw(file->f_path.dentry, attr, true);
 	if (rc == -ENODATA)
 		rc = 0;
 
@@ -2622,12 +2622,6 @@ int cl_sync_file_range(struct inode *inode, loff_t start, loff_t end,
 	return result;
 }
 
-/*
- * When dentry is provided (the 'else' case), *file->f_dentry may be
- * null and dentry must be used directly rather than pulled from
- * *file->f_dentry as is done otherwise.
- */
-
 int ll_fsync(struct file *file, loff_t start, loff_t end, int datasync)
 {
 	struct dentry *dentry = file->f_dentry;
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index 766ec35f988e..681a8537b64f 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -123,7 +123,7 @@ static int __dcache_readdir(struct file *file,  struct dir_context *ctx,
 			    u32 shared_gen)
 {
 	struct ceph_file_info *fi = file->private_data;
-	struct dentry *parent = file->f_dentry;
+	struct dentry *parent = file->f_path.dentry;
 	struct inode *dir = parent->d_inode;
 	struct list_head *p;
 	struct dentry *dentry, *last;
@@ -274,7 +274,7 @@ static int ceph_readdir(struct file *file, struct dir_context *ctx)
 		off = 1;
 	}
 	if (ctx->pos == 1) {
-		ino_t ino = parent_ino(file->f_dentry);
+		ino_t ino = parent_ino(file->f_path.dentry);
 		dout("readdir off 1 -> '..'\n");
 		if (!dir_emit(ctx, "..", 2,
 			    ceph_translate_ino(inode->i_sb, ino),
@@ -337,7 +337,7 @@ more:
 		}
 		req->r_inode = inode;
 		ihold(inode);
-		req->r_dentry = dget(file->f_dentry);
+		req->r_dentry = dget(file->f_path.dentry);
 		/* hints to request -> mds selection code */
 		req->r_direct_mode = USE_AUTH_MDS;
 		req->r_direct_hash = ceph_frag_value(frag);
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index db2c967950f7..9f8e3572040e 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -211,7 +211,7 @@ int ceph_open(struct inode *inode, struct file *file)
 
 	req->r_num_caps = 1;
 	if (flags & O_CREAT)
-		parent_inode = ceph_get_dentry_parent_inode(file->f_dentry);
+		parent_inode = ceph_get_dentry_parent_inode(file->f_path.dentry);
 	err = ceph_mdsc_do_request(mdsc, parent_inode, req);
 	iput(parent_inode);
 	if (!err)
diff --git a/fs/cifs/readdir.c b/fs/cifs/readdir.c
index d19f1996a6ea..d116ca8ce4c0 100644
--- a/fs/cifs/readdir.c
+++ b/fs/cifs/readdir.c
@@ -753,7 +753,7 @@ static int cifs_filldir(char *find_entry, struct file *file,
 		 */
 		fattr.cf_flags |= CIFS_FATTR_NEED_REVAL;
 
-	cifs_prime_dcache(file->f_dentry, &name, &fattr);
+	cifs_prime_dcache(file->f_path.dentry, &name, &fattr);
 
 	ino = cifs_uniqueid_to_ino_t(fattr.cf_uniqueid);
 	return !dir_emit(ctx, name.name, name.len, ino, fattr.cf_dtype);
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 2f6735dbf1a9..c2d6604667b0 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -1373,7 +1373,7 @@ out:
 int ecryptfs_read_xattr_region(char *page_virt, struct inode *ecryptfs_inode)
 {
 	struct dentry *lower_dentry =
-		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
+		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry;
 	ssize_t size;
 	int rc = 0;
 
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index 564a1fa34b99..4626976794e7 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -419,7 +419,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
 	ssize_t size;
 	void *xattr_virt;
 	struct dentry *lower_dentry =
-		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_dentry;
+		ecryptfs_inode_to_private(ecryptfs_inode)->lower_file->f_path.dentry;
 	struct inode *lower_inode = lower_dentry->d_inode;
 	int rc;
 
diff --git a/fs/efivarfs/file.c b/fs/efivarfs/file.c
index cdb2971192a5..90001da9abfd 100644
--- a/fs/efivarfs/file.c
+++ b/fs/efivarfs/file.c
@@ -47,8 +47,8 @@ static ssize_t efivarfs_file_write(struct file *file,
 
 	if (bytes == -ENOENT) {
 		drop_nlink(inode);
-		d_delete(file->f_dentry);
-		dput(file->f_dentry);
+		d_delete(file->f_path.dentry);
+		dput(file->f_path.dentry);
 	} else {
 		mutex_lock(&inode->i_mutex);
 		i_size_write(inode, datasize + sizeof(attributes));
diff --git a/fs/sync.c b/fs/sync.c
index bdc729d80e5e..01d9f18a70b5 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -154,7 +154,7 @@ SYSCALL_DEFINE1(syncfs, int, fd)
 
 	if (!f.file)
 		return -EBADF;
-	sb = f.file->f_dentry->d_sb;
+	sb = f.file->f_path.dentry->d_sb;
 
 	down_read(&sb->s_umount);
 	ret = sync_filesystem(sb);
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1d5196889048..27b0c9105da5 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -367,8 +367,8 @@ struct css_set {
  * struct cftype: handler definitions for cgroup control files
  *
  * When reading/writing to a file:
- *	- the cgroup to use is file->f_dentry->d_parent->d_fsdata
- *	- the 'cftype' of the file is file->f_dentry->d_fsdata
+ *	- the cgroup to use is file->f_path.dentry->d_parent->d_fsdata
+ *	- the 'cftype' of the file is file->f_path.dentry->d_fsdata
  */
 
 /* cftype->flags */
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index e420a0c41b5f..06820657c8ca 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -2373,7 +2373,7 @@ int __audit_log_bprm_fcaps(struct linux_binprm *bprm,
 	ax->d.next = context->aux;
 	context->aux = (void *)ax;
 
-	dentry = dget(bprm->file->f_dentry);
+	dentry = dget(bprm->file->f_path.dentry);
 	get_vfs_caps_from_disk(dentry, &vcaps);
 	dput(dentry);
 
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 1425d07018de..446fbeefad1c 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -614,7 +614,7 @@ static inline int perf_cgroup_connect(int fd, struct perf_event *event,
 	if (!f.file)
 		return -EBADF;
 
-	css = css_tryget_online_from_dir(f.file->f_dentry,
+	css = css_tryget_online_from_dir(f.file->f_path.dentry,
 					 &perf_event_cgrp_subsys);
 	if (IS_ERR(css)) {
 		ret = PTR_ERR(css);
diff --git a/kernel/taskstats.c b/kernel/taskstats.c
index b312fcc73024..670fff88a961 100644
--- a/kernel/taskstats.c
+++ b/kernel/taskstats.c
@@ -459,7 +459,7 @@ static int cgroupstats_user_cmd(struct sk_buff *skb, struct genl_info *info)
 	stats = nla_data(na);
 	memset(stats, 0, sizeof(*stats));
 
-	rc = cgroupstats_build(stats, f.file->f_dentry);
+	rc = cgroupstats_build(stats, f.file->f_path.dentry);
 	if (rc < 0) {
 		nlmsg_free(rep_skb);
 		goto err;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 23976fd885fd..8c3385181b16 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -5055,7 +5055,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
 	 *
 	 * DO NOT ADD NEW FILES.
 	 */
-	name = cfile.file->f_dentry->d_name.name;
+	name = cfile.file->f_path.dentry->d_name.name;
 
 	if (!strcmp(name, "memory.usage_in_bytes")) {
 		event->register_event = mem_cgroup_usage_register_event;
@@ -5079,7 +5079,7 @@ static ssize_t memcg_write_event_control(struct kernfs_open_file *of,
 	 * automatically removed on cgroup destruction but the removal is
 	 * asynchronous, so take an extra ref on @css.
 	 */
-	cfile_css = css_tryget_online_from_dir(cfile.file->f_dentry->d_parent,
+	cfile_css = css_tryget_online_from_dir(cfile.file->f_path.dentry->d_parent,
 					       &memory_cgrp_subsys);
 	ret = -EINVAL;
 	if (IS_ERR(cfile_css))
diff --git a/security/commoncap.c b/security/commoncap.c
index bab0611afc1e..2915d8503054 100644
--- a/security/commoncap.c
+++ b/security/commoncap.c
@@ -446,7 +446,7 @@ static int get_file_caps(struct linux_binprm *bprm, bool *effective, bool *has_c
 	if (bprm->file->f_path.mnt->mnt_flags & MNT_NOSUID)
 		return 0;
 
-	dentry = dget(bprm->file->f_dentry);
+	dentry = dget(bprm->file->f_path.dentry);
 
 	rc = get_vfs_caps_from_disk(dentry, &vcaps);
 	if (rc < 0) {
diff --git a/security/integrity/ima/ima_api.c b/security/integrity/ima/ima_api.c
index 86885979918c..f92be1b14089 100644
--- a/security/integrity/ima/ima_api.c
+++ b/security/integrity/ima/ima_api.c
@@ -196,7 +196,7 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
 {
 	const char *audit_cause = "failed";
 	struct inode *inode = file_inode(file);
-	const char *filename = file->f_dentry->d_name.name;
+	const char *filename = file->f_path.dentry->d_name.name;
 	int result = 0;
 	struct {
 		struct ima_digest_data hdr;
@@ -204,7 +204,7 @@ int ima_collect_measurement(struct integrity_iint_cache *iint,
 	} hash;
 
 	if (xattr_value)
-		*xattr_len = ima_read_xattr(file->f_dentry, xattr_value);
+		*xattr_len = ima_read_xattr(file->f_path.dentry, xattr_value);
 
 	if (!(iint->flags & IMA_COLLECTED)) {
 		u64 i_version = file_inode(file)->i_version;
diff --git a/security/integrity/ima/ima_appraise.c b/security/integrity/ima/ima_appraise.c
index 922685483bd3..c2f203accbd1 100644
--- a/security/integrity/ima/ima_appraise.c
+++ b/security/integrity/ima/ima_appraise.c
@@ -189,7 +189,7 @@ int ima_appraise_measurement(int func, struct integrity_iint_cache *iint,
 {
 	static const char op[] = "appraise_data";
 	char *cause = "unknown";
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	struct inode *inode = dentry->d_inode;
 	enum integrity_status status = INTEGRITY_UNKNOWN;
 	int rc = xattr_len, hash_start = 0;
@@ -289,7 +289,7 @@ out:
  */
 void ima_update_xattr(struct integrity_iint_cache *iint, struct file *file)
 {
-	struct dentry *dentry = file->f_dentry;
+	struct dentry *dentry = file->f_path.dentry;
 	int rc = 0;
 
 	/* do not collect and update hash for digital signatures */
diff --git a/security/integrity/ima/ima_template_lib.c b/security/integrity/ima/ima_template_lib.c
index 1506f0248572..bcfc36cbde6a 100644
--- a/security/integrity/ima/ima_template_lib.c
+++ b/security/integrity/ima/ima_template_lib.c
@@ -284,7 +284,7 @@ static int ima_eventname_init_common(struct integrity_iint_cache *iint,
 	}
 
 	if (file) {
-		cur_filename = file->f_dentry->d_name.name;
+		cur_filename = file->f_path.dentry->d_name.name;
 		cur_filename_len = strlen(cur_filename);
 	} else
 		/*
-- 
cgit v1.2.3


From 9f45f5bf302daad6835ce64701fb3c286a2cc6af Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 Oct 2014 17:44:57 -0400
Subject: new helper: audit_file()

... for situations when we don't have any candidate in pathnames - basically,
in descriptor-based syscalls.

[Folded the build fix for !CONFIG_AUDITSYSCALL configs from Chen Gang]

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/open.c             |  4 ++--
 fs/xattr.c            | 16 ++++++----------
 include/linux/audit.h |  9 +++++++++
 ipc/mqueue.c          |  4 ++--
 kernel/auditsc.c      |  5 +++++
 5 files changed, 24 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/fs/open.c b/fs/open.c
index de92c13b58be..b1bf3d542d5d 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -516,7 +516,7 @@ SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
 	int err = -EBADF;
 
 	if (f.file) {
-		audit_inode(NULL, f.file->f_path.dentry, 0);
+		audit_file(f.file);
 		err = chmod_common(&f.file->f_path, mode);
 		fdput(f);
 	}
@@ -642,7 +642,7 @@ SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
 	error = mnt_want_write_file(f.file);
 	if (error)
 		goto out_fput;
-	audit_inode(NULL, f.file->f_path.dentry, 0);
+	audit_file(f.file);
 	error = chown_common(&f.file->f_path, user, group);
 	mnt_drop_write_file(f.file);
 out_fput:
diff --git a/fs/xattr.c b/fs/xattr.c
index 64e83efb742d..4ef698549e31 100644
--- a/fs/xattr.c
+++ b/fs/xattr.c
@@ -405,16 +405,14 @@ SYSCALL_DEFINE5(fsetxattr, int, fd, const char __user *, name,
 		const void __user *,value, size_t, size, int, flags)
 {
 	struct fd f = fdget(fd);
-	struct dentry *dentry;
 	int error = -EBADF;
 
 	if (!f.file)
 		return error;
-	dentry = f.file->f_path.dentry;
-	audit_inode(NULL, dentry, 0);
+	audit_file(f.file);
 	error = mnt_want_write_file(f.file);
 	if (!error) {
-		error = setxattr(dentry, name, value, size, flags);
+		error = setxattr(f.file->f_path.dentry, name, value, size, flags);
 		mnt_drop_write_file(f.file);
 	}
 	fdput(f);
@@ -509,7 +507,7 @@ SYSCALL_DEFINE4(fgetxattr, int, fd, const char __user *, name,
 
 	if (!f.file)
 		return error;
-	audit_inode(NULL, f.file->f_path.dentry, 0);
+	audit_file(f.file);
 	error = getxattr(f.file->f_path.dentry, name, value, size);
 	fdput(f);
 	return error;
@@ -590,7 +588,7 @@ SYSCALL_DEFINE3(flistxattr, int, fd, char __user *, list, size_t, size)
 
 	if (!f.file)
 		return error;
-	audit_inode(NULL, f.file->f_path.dentry, 0);
+	audit_file(f.file);
 	error = listxattr(f.file->f_path.dentry, list, size);
 	fdput(f);
 	return error;
@@ -651,16 +649,14 @@ SYSCALL_DEFINE2(lremovexattr, const char __user *, pathname,
 SYSCALL_DEFINE2(fremovexattr, int, fd, const char __user *, name)
 {
 	struct fd f = fdget(fd);
-	struct dentry *dentry;
 	int error = -EBADF;
 
 	if (!f.file)
 		return error;
-	dentry = f.file->f_path.dentry;
-	audit_inode(NULL, dentry, 0);
+	audit_file(f.file);
 	error = mnt_want_write_file(f.file);
 	if (!error) {
-		error = removexattr(dentry, name);
+		error = removexattr(f.file->f_path.dentry, name);
 		mnt_drop_write_file(f.file);
 	}
 	fdput(f);
diff --git a/include/linux/audit.h b/include/linux/audit.h
index e58fe7df8b9c..0c04917c2f12 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -130,6 +130,7 @@ extern void audit_putname(struct filename *name);
 #define AUDIT_INODE_HIDDEN	2	/* audit record should be hidden */
 extern void __audit_inode(struct filename *name, const struct dentry *dentry,
 				unsigned int flags);
+extern void __audit_file(const struct file *);
 extern void __audit_inode_child(const struct inode *parent,
 				const struct dentry *dentry,
 				const unsigned char type);
@@ -183,6 +184,11 @@ static inline void audit_inode(struct filename *name,
 		__audit_inode(name, dentry, flags);
 	}
 }
+static inline void audit_file(struct file *file)
+{
+	if (unlikely(!audit_dummy_context()))
+		__audit_file(file);
+}
 static inline void audit_inode_parent_hidden(struct filename *name,
 						const struct dentry *dentry)
 {
@@ -357,6 +363,9 @@ static inline void audit_inode(struct filename *name,
 				const struct dentry *dentry,
 				unsigned int parent)
 { }
+static inline void audit_file(struct file *file)
+{
+}
 static inline void audit_inode_parent_hidden(struct filename *name,
 				const struct dentry *dentry)
 { }
diff --git a/ipc/mqueue.c b/ipc/mqueue.c
index 4fcf39af1776..7635a1cf99f3 100644
--- a/ipc/mqueue.c
+++ b/ipc/mqueue.c
@@ -990,7 +990,7 @@ SYSCALL_DEFINE5(mq_timedsend, mqd_t, mqdes, const char __user *, u_msg_ptr,
 		goto out_fput;
 	}
 	info = MQUEUE_I(inode);
-	audit_inode(NULL, f.file->f_path.dentry, 0);
+	audit_file(f.file);
 
 	if (unlikely(!(f.file->f_mode & FMODE_WRITE))) {
 		ret = -EBADF;
@@ -1106,7 +1106,7 @@ SYSCALL_DEFINE5(mq_timedreceive, mqd_t, mqdes, char __user *, u_msg_ptr,
 		goto out_fput;
 	}
 	info = MQUEUE_I(inode);
-	audit_inode(NULL, f.file->f_path.dentry, 0);
+	audit_file(f.file);
 
 	if (unlikely(!(f.file->f_mode & FMODE_READ))) {
 		ret = -EBADF;
diff --git a/kernel/auditsc.c b/kernel/auditsc.c
index 06820657c8ca..c75522a83678 100644
--- a/kernel/auditsc.c
+++ b/kernel/auditsc.c
@@ -1897,6 +1897,11 @@ out:
 	audit_copy_inode(n, dentry, inode);
 }
 
+void __audit_file(const struct file *file)
+{
+	__audit_inode(NULL, file->f_path.dentry, 0);
+}
+
 /**
  * __audit_inode_child - collect inode info for created/removed objects
  * @parent: inode of dentry parent
-- 
cgit v1.2.3


From 78d28e651f97866d608d9b41f8ad291e65d47dd5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 Oct 2014 01:22:04 -0400
Subject: kill f_dentry macro

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 Documentation/filesystems/porting | 4 ++++
 include/linux/fs.h                | 1 -
 2 files changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index b6b55a9cffee..fa2db081505e 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -467,3 +467,7 @@ in your dentry operations instead.
 [mandatory]
 	d_materialise_unique() is gone; d_splice_alias() does everything you
 	need now.  Remember that they have opposite orders of arguments ;-/
+--
+[mandatory]
+	f_dentry is gone; use f_path.dentry, or, better yet, see if you can avoid
+	it entirely.
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1c12c681803f..1a8bb3c023a1 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -789,7 +789,6 @@ struct file {
 		struct rcu_head 	fu_rcuhead;
 	} f_u;
 	struct path		f_path;
-#define f_dentry	f_path.dentry
 	struct inode		*f_inode;	/* cached value */
 	const struct file_operations	*f_op;
 
-- 
cgit v1.2.3


From aec0be2d6e9f02dbef41ee54854c2e003e55c23e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Tue, 18 Nov 2014 21:14:11 -0500
Subject: ftrace/x86/extable: Add is_ftrace_trampoline() function

Stack traces that happen from function tracing check if the address
on the stack is a __kernel_text_address(). That is, is the address
kernel code. This calls core_kernel_text() which returns true
if the address is part of the builtin kernel code. It also calls
is_module_text_address() which returns true if the address belongs
to module code.

But what is missing is ftrace dynamically allocated trampolines.
These trampolines are allocated for individual ftrace_ops that
call the ftrace_ops callback functions directly. But if they do a
stack trace, the code checking the stack wont detect them as they
are neither core kernel code nor module address space.

Adding another field to ftrace_ops that also stores the size of
the trampoline assigned to it we can create a new function called
is_ftrace_trampoline() that returns true if the address is a
dynamically allocate ftrace trampoline. Note, it ignores trampolines
that are not dynamically allocated as they will return true with
the core_kernel_text() function.

Link: http://lkml.kernel.org/r/20141119034829.497125839@goodmis.org

Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 arch/x86/kernel/ftrace.c |  9 +++++++--
 include/linux/ftrace.h   |  8 ++++++++
 kernel/extable.c         |  7 ++++++-
 kernel/trace/ftrace.c    | 38 ++++++++++++++++++++++++++++++++++++++
 4 files changed, 59 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index 1aea94d336c7..60881d919432 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -712,7 +712,8 @@ union ftrace_op_code_union {
 	} __attribute__((packed));
 };
 
-static unsigned long create_trampoline(struct ftrace_ops *ops)
+static unsigned long
+create_trampoline(struct ftrace_ops *ops, unsigned int *tramp_size)
 {
 	unsigned const char *jmp;
 	unsigned long start_offset;
@@ -749,6 +750,8 @@ static unsigned long create_trampoline(struct ftrace_ops *ops)
 	if (!trampoline)
 		return 0;
 
+	*tramp_size = size + MCOUNT_INSN_SIZE + sizeof(void *);
+
 	/* Copy ftrace_caller onto the trampoline memory */
 	ret = probe_kernel_read(trampoline, (void *)start_offset, size);
 	if (WARN_ON(ret < 0)) {
@@ -819,6 +822,7 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 	unsigned char *new;
 	unsigned long offset;
 	unsigned long ip;
+	unsigned int size;
 	int ret;
 
 	if (ops->trampoline) {
@@ -829,9 +833,10 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 		if (!(ops->flags & FTRACE_OPS_FL_ALLOC_TRAMP))
 			return;
 	} else {
-		ops->trampoline = create_trampoline(ops);
+		ops->trampoline = create_trampoline(ops, &size);
 		if (!ops->trampoline)
 			return;
+		ops->trampoline_size = size;
 	}
 
 	offset = calc_trampoline_call_offset(ops->flags & FTRACE_OPS_FL_SAVE_REGS);
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 619e37cc17fd..7b2616fa2472 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -150,6 +150,7 @@ struct ftrace_ops {
 	struct ftrace_ops_hash		*func_hash;
 	struct ftrace_ops_hash		old_hash;
 	unsigned long			trampoline;
+	unsigned long			trampoline_size;
 #endif
 };
 
@@ -297,6 +298,8 @@ extern int ftrace_text_reserved(const void *start, const void *end);
 
 extern int ftrace_nr_registered_ops(void);
 
+bool is_ftrace_trampoline(unsigned long addr);
+
 /*
  * The dyn_ftrace record's flags field is split into two parts.
  * the first part which is '0-FTRACE_REF_MAX' is a counter of
@@ -596,6 +599,11 @@ static inline ssize_t ftrace_notrace_write(struct file *file, const char __user
 			     size_t cnt, loff_t *ppos) { return -ENODEV; }
 static inline int
 ftrace_regex_release(struct inode *inode, struct file *file) { return -ENODEV; }
+
+static inline bool is_ftrace_trampoline(unsigned long addr)
+{
+	return false;
+}
 #endif /* CONFIG_DYNAMIC_FTRACE */
 
 /* totally disable ftrace - can not re-enable after this */
diff --git a/kernel/extable.c b/kernel/extable.c
index d8a6446adbcb..c98f926277a8 100644
--- a/kernel/extable.c
+++ b/kernel/extable.c
@@ -18,6 +18,7 @@
 #include <linux/ftrace.h>
 #include <linux/memory.h>
 #include <linux/module.h>
+#include <linux/ftrace.h>
 #include <linux/mutex.h>
 #include <linux/init.h>
 
@@ -102,6 +103,8 @@ int __kernel_text_address(unsigned long addr)
 		return 1;
 	if (is_module_text_address(addr))
 		return 1;
+	if (is_ftrace_trampoline(addr))
+		return 1;
 	/*
 	 * There might be init symbols in saved stacktraces.
 	 * Give those symbols a chance to be printed in
@@ -119,7 +122,9 @@ int kernel_text_address(unsigned long addr)
 {
 	if (core_kernel_text(addr))
 		return 1;
-	return is_module_text_address(addr);
+	if (is_module_text_address(addr))
+		return 1;
+	return is_ftrace_trampoline(addr);
 }
 
 /*
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 6233f9102179..fa0f36bb32e9 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1117,6 +1117,43 @@ static struct ftrace_ops global_ops = {
 					  FTRACE_OPS_FL_INITIALIZED,
 };
 
+/*
+ * This is used by __kernel_text_address() to return true if the
+ * the address is on a dynamically allocated trampoline that would
+ * not return true for either core_kernel_text() or
+ * is_module_text_address().
+ */
+bool is_ftrace_trampoline(unsigned long addr)
+{
+	struct ftrace_ops *op;
+	bool ret = false;
+
+	/*
+	 * Some of the ops may be dynamically allocated,
+	 * they are freed after a synchronize_sched().
+	 */
+	preempt_disable_notrace();
+
+	do_for_each_ftrace_op(op, ftrace_ops_list) {
+		/*
+		 * This is to check for dynamically allocated trampolines.
+		 * Trampolines that are in kernel text will have
+		 * core_kernel_text() return true.
+		 */
+		if (op->trampoline && op->trampoline_size)
+			if (addr >= op->trampoline &&
+			    addr < op->trampoline + op->trampoline_size) {
+				ret = true;
+				goto out;
+			}
+	} while_for_each_ftrace_op(op);
+
+ out:
+	preempt_enable_notrace();
+
+	return ret;
+}
+
 struct ftrace_page {
 	struct ftrace_page	*next;
 	struct dyn_ftrace	*records;
@@ -5373,6 +5410,7 @@ static struct ftrace_ops graph_ops = {
 				   FTRACE_OPS_FL_STUB,
 #ifdef FTRACE_GRAPH_TRAMP_ADDR
 	.trampoline		= FTRACE_GRAPH_TRAMP_ADDR,
+	/* trampoline_size is only needed for dynamically allocated tramps */
 #endif
 	ASSIGN_OPS_HASH(graph_ops, &global_ops.local_hash)
 };
-- 
cgit v1.2.3


From 19a7fe206232cc875a3083211e0a21c08edd756e Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 12 Nov 2014 10:29:54 -0500
Subject: tracing: Add trace_seq_has_overflowed() and trace_handle_return()

Adding a trace_seq_has_overflowed() which returns true if the trace_seq
had too much written into it allows us to simplify the code.

Instead of checking the return value of every call to trace_seq_printf()
and friends, they can all be called normally, and at the end we can
return !trace_seq_has_overflowed() instead.

Several functions also return TRACE_TYPE_PARTIAL_LINE when the trace_seq
overflowed and TRACE_TYPE_HANDLED otherwise. Another helper function
was created called trace_handle_return() which takes a trace_seq and
returns these enums. Using this helper function also simplifies the
code.

This change also makes it possible to remove the return values of
trace_seq_printf() and friends. They should instead just be
void functions.

Link: http://lkml.kernel.org/r/20141114011410.365183157@goodmis.org

Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace_event.h |  11 ++
 include/linux/trace_seq.h    |  12 ++
 include/trace/ftrace.h       |   6 +-
 kernel/trace/trace.c         |  69 +++----
 kernel/trace/trace.h         |   1 +
 kernel/trace/trace_output.c  | 416 +++++++++++++++++--------------------------
 kernel/trace/trace_output.h  |  16 +-
 7 files changed, 231 insertions(+), 300 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h
index 28672e87e910..0bebb5c348b8 100644
--- a/include/linux/ftrace_event.h
+++ b/include/linux/ftrace_event.h
@@ -138,6 +138,17 @@ enum print_line_t {
 	TRACE_TYPE_NO_CONSUME	= 3	/* Handled but ask to not consume */
 };
 
+/*
+ * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
+ * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
+ * simplifies those functions and keeps them in sync.
+ */
+static inline enum print_line_t trace_handle_return(struct trace_seq *s)
+{
+	return trace_seq_has_overflowed(s) ?
+		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
+}
+
 void tracing_generic_entry_update(struct trace_entry *entry,
 				  unsigned long flags,
 				  int pc);
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index ea6c9dea79e3..07eda413dfcf 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -40,6 +40,18 @@ trace_seq_buffer_ptr(struct trace_seq *s)
 	return s->buffer + s->len;
 }
 
+/**
+ * trace_seq_has_overflowed - return true if the trace_seq took too much
+ * @s: trace sequence descriptor
+ *
+ * Returns true if too much data was added to the trace_seq and it is
+ * now full and will not take anymore.
+ */
+static inline bool trace_seq_has_overflowed(struct trace_seq *s)
+{
+	return s->full || s->len > PAGE_SIZE - 1;
+}
+
 /*
  * Currently only defined when tracing is enabled.
  */
diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
index 26b4f2e13275..f13471b5d27a 100644
--- a/include/trace/ftrace.h
+++ b/include/trace/ftrace.h
@@ -280,11 +280,9 @@ ftrace_raw_output_##call(struct trace_iterator *iter, int flags,	\
 	if (ret)							\
 		return ret;						\
 									\
-	ret = trace_seq_printf(s, print);				\
-	if (!ret)							\
-		return TRACE_TYPE_PARTIAL_LINE;				\
+	trace_seq_printf(s, print);					\
 									\
-	return TRACE_TYPE_HANDLED;					\
+	return trace_handle_return(s);					\
 }									\
 static struct trace_event_functions ftrace_event_type_funcs_##call = {	\
 	.trace			= ftrace_raw_output_##call,		\
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 44d561426700..3ce3c4ccfc94 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2649,24 +2649,21 @@ static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
 	event = ftrace_find_event(entry->type);
 
 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
-		if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
-			if (!trace_print_lat_context(iter))
-				goto partial;
-		} else {
-			if (!trace_print_context(iter))
-				goto partial;
-		}
+		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
+			trace_print_lat_context(iter);
+		else
+			trace_print_context(iter);
 	}
 
+	if (trace_seq_has_overflowed(s))
+		return TRACE_TYPE_PARTIAL_LINE;
+
 	if (event)
 		return event->funcs->trace(iter, sym_flags, event);
 
-	if (!trace_seq_printf(s, "Unknown type %d\n", entry->type))
-		goto partial;
+	trace_seq_printf(s, "Unknown type %d\n", entry->type);
 
-	return TRACE_TYPE_HANDLED;
-partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
@@ -2677,22 +2674,20 @@ static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
 
 	entry = iter->ent;
 
-	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
-		if (!trace_seq_printf(s, "%d %d %llu ",
-				      entry->pid, iter->cpu, iter->ts))
-			goto partial;
-	}
+	if (trace_flags & TRACE_ITER_CONTEXT_INFO)
+		trace_seq_printf(s, "%d %d %llu ",
+				 entry->pid, iter->cpu, iter->ts);
+
+	if (trace_seq_has_overflowed(s))
+		return TRACE_TYPE_PARTIAL_LINE;
 
 	event = ftrace_find_event(entry->type);
 	if (event)
 		return event->funcs->raw(iter, 0, event);
 
-	if (!trace_seq_printf(s, "%d ?\n", entry->type))
-		goto partial;
+	trace_seq_printf(s, "%d ?\n", entry->type);
 
-	return TRACE_TYPE_HANDLED;
-partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
@@ -2705,9 +2700,11 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
-		SEQ_PUT_HEX_FIELD_RET(s, entry->pid);
-		SEQ_PUT_HEX_FIELD_RET(s, iter->cpu);
-		SEQ_PUT_HEX_FIELD_RET(s, iter->ts);
+		SEQ_PUT_HEX_FIELD(s, entry->pid);
+		SEQ_PUT_HEX_FIELD(s, iter->cpu);
+		SEQ_PUT_HEX_FIELD(s, iter->ts);
+		if (trace_seq_has_overflowed(s))
+			return TRACE_TYPE_PARTIAL_LINE;
 	}
 
 	event = ftrace_find_event(entry->type);
@@ -2717,9 +2714,9 @@ static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
 			return ret;
 	}
 
-	SEQ_PUT_FIELD_RET(s, newline);
+	SEQ_PUT_FIELD(s, newline);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
@@ -2731,9 +2728,11 @@ static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
 	entry = iter->ent;
 
 	if (trace_flags & TRACE_ITER_CONTEXT_INFO) {
-		SEQ_PUT_FIELD_RET(s, entry->pid);
-		SEQ_PUT_FIELD_RET(s, iter->cpu);
-		SEQ_PUT_FIELD_RET(s, iter->ts);
+		SEQ_PUT_FIELD(s, entry->pid);
+		SEQ_PUT_FIELD(s, iter->cpu);
+		SEQ_PUT_FIELD(s, iter->ts);
+		if (trace_seq_has_overflowed(s))
+			return TRACE_TYPE_PARTIAL_LINE;
 	}
 
 	event = ftrace_find_event(entry->type);
@@ -2779,10 +2778,12 @@ enum print_line_t print_trace_line(struct trace_iterator *iter)
 {
 	enum print_line_t ret;
 
-	if (iter->lost_events &&
-	    !trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
-				 iter->cpu, iter->lost_events))
-		return TRACE_TYPE_PARTIAL_LINE;
+	if (iter->lost_events) {
+		trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
+				 iter->cpu, iter->lost_events);
+		if (trace_seq_has_overflowed(&iter->seq))
+			return TRACE_TYPE_PARTIAL_LINE;
+	}
 
 	if (iter->trace && iter->trace->print_line) {
 		ret = iter->trace->print_line(iter);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3376de623ea0..19418221b302 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -14,6 +14,7 @@
 #include <linux/trace_seq.h>
 #include <linux/ftrace_event.h>
 #include <linux/compiler.h>
+#include <linux/trace_seq.h>
 
 #ifdef CONFIG_FTRACE_SYSCALLS
 #include <asm/unistd.h>		/* For NR_SYSCALLS	     */
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index cfa91de22e27..163c11b6b8ff 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -25,15 +25,12 @@ enum print_line_t trace_print_bputs_msg_only(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry = iter->ent;
 	struct bputs_entry *field;
-	int ret;
 
 	trace_assign_type(field, entry);
 
-	ret = trace_seq_puts(s, field->str);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
+	trace_seq_puts(s, field->str);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
@@ -41,15 +38,12 @@ enum print_line_t trace_print_bprintk_msg_only(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry = iter->ent;
 	struct bprint_entry *field;
-	int ret;
 
 	trace_assign_type(field, entry);
 
-	ret = trace_seq_bprintf(s, field->fmt, field->buf);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
+	trace_seq_bprintf(s, field->fmt, field->buf);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
@@ -57,15 +51,12 @@ enum print_line_t trace_print_printk_msg_only(struct trace_iterator *iter)
 	struct trace_seq *s = &iter->seq;
 	struct trace_entry *entry = iter->ent;
 	struct print_entry *field;
-	int ret;
 
 	trace_assign_type(field, entry);
 
-	ret = trace_seq_puts(s, field->buf);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
+	trace_seq_puts(s, field->buf);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 const char *
@@ -193,7 +184,6 @@ int ftrace_raw_output_prep(struct trace_iterator *iter,
 	struct trace_seq *s = &iter->seq;
 	struct trace_seq *p = &iter->tmp_seq;
 	struct trace_entry *entry;
-	int ret;
 
 	event = container_of(trace_event, struct ftrace_event_call, event);
 	entry = iter->ent;
@@ -204,8 +194,9 @@ int ftrace_raw_output_prep(struct trace_iterator *iter,
 	}
 
 	trace_seq_init(p);
-	ret = trace_seq_printf(s, "%s: ", ftrace_event_name(event));
-	if (!ret)
+	trace_seq_printf(s, "%s: ", ftrace_event_name(event));
+
+	if (trace_seq_has_overflowed(s))
 		return TRACE_TYPE_PARTIAL_LINE;
 
 	return 0;
@@ -216,18 +207,11 @@ static int ftrace_output_raw(struct trace_iterator *iter, char *name,
 			     char *fmt, va_list ap)
 {
 	struct trace_seq *s = &iter->seq;
-	int ret;
 
-	ret = trace_seq_printf(s, "%s: ", name);
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	ret = trace_seq_vprintf(s, fmt, ap);
+	trace_seq_printf(s, "%s: ", name);
+	trace_seq_vprintf(s, fmt, ap);
 
-	if (!ret)
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 int ftrace_output_call(struct trace_iterator *iter, char *name, char *fmt, ...)
@@ -260,7 +244,7 @@ static inline const char *kretprobed(const char *name)
 }
 #endif /* CONFIG_KRETPROBES */
 
-static int
+static void
 seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
 {
 #ifdef CONFIG_KALLSYMS
@@ -271,12 +255,11 @@ seq_print_sym_short(struct trace_seq *s, const char *fmt, unsigned long address)
 
 	name = kretprobed(str);
 
-	return trace_seq_printf(s, fmt, name);
+	trace_seq_printf(s, fmt, name);
 #endif
-	return 1;
 }
 
-static int
+static void
 seq_print_sym_offset(struct trace_seq *s, const char *fmt,
 		     unsigned long address)
 {
@@ -287,9 +270,8 @@ seq_print_sym_offset(struct trace_seq *s, const char *fmt,
 	sprint_symbol(str, address);
 	name = kretprobed(str);
 
-	return trace_seq_printf(s, fmt, name);
+	trace_seq_printf(s, fmt, name);
 #endif
-	return 1;
 }
 
 #ifndef CONFIG_64BIT
@@ -320,14 +302,14 @@ int seq_print_user_ip(struct trace_seq *s, struct mm_struct *mm,
 		if (file) {
 			ret = trace_seq_path(s, &file->f_path);
 			if (ret)
-				ret = trace_seq_printf(s, "[+0x%lx]",
-						       ip - vmstart);
+				trace_seq_printf(s, "[+0x%lx]",
+						 ip - vmstart);
 		}
 		up_read(&mm->mmap_sem);
 	}
 	if (ret && ((sym_flags & TRACE_ITER_SYM_ADDR) || !file))
-		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
-	return ret;
+		trace_seq_printf(s, " <" IP_FMT ">", ip);
+	return !trace_seq_has_overflowed(s);
 }
 
 int
@@ -335,7 +317,6 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 		      unsigned long sym_flags)
 {
 	struct mm_struct *mm = NULL;
-	int ret = 1;
 	unsigned int i;
 
 	if (trace_flags & TRACE_ITER_SYM_USEROBJ) {
@@ -354,48 +335,45 @@ seq_print_userip_objs(const struct userstack_entry *entry, struct trace_seq *s,
 	for (i = 0; i < FTRACE_STACK_ENTRIES; i++) {
 		unsigned long ip = entry->caller[i];
 
-		if (ip == ULONG_MAX || !ret)
+		if (ip == ULONG_MAX || trace_seq_has_overflowed(s))
 			break;
-		if (ret)
-			ret = trace_seq_puts(s, " => ");
+
+		trace_seq_puts(s, " => ");
+
 		if (!ip) {
-			if (ret)
-				ret = trace_seq_puts(s, "??");
-			if (ret)
-				ret = trace_seq_putc(s, '\n');
+			trace_seq_puts(s, "??");
+			trace_seq_putc(s, '\n');
 			continue;
 		}
-		if (!ret)
-			break;
-		if (ret)
-			ret = seq_print_user_ip(s, mm, ip, sym_flags);
-		ret = trace_seq_putc(s, '\n');
+
+		seq_print_user_ip(s, mm, ip, sym_flags);
+		trace_seq_putc(s, '\n');
 	}
 
 	if (mm)
 		mmput(mm);
-	return ret;
+
+	return !trace_seq_has_overflowed(s);
 }
 
 int
 seq_print_ip_sym(struct trace_seq *s, unsigned long ip, unsigned long sym_flags)
 {
-	int ret;
-
-	if (!ip)
-		return trace_seq_putc(s, '0');
+	if (!ip) {
+		trace_seq_putc(s, '0');
+		goto out;
+	}
 
 	if (sym_flags & TRACE_ITER_SYM_OFFSET)
-		ret = seq_print_sym_offset(s, "%s", ip);
+		seq_print_sym_offset(s, "%s", ip);
 	else
-		ret = seq_print_sym_short(s, "%s", ip);
-
-	if (!ret)
-		return 0;
+		seq_print_sym_short(s, "%s", ip);
 
 	if (sym_flags & TRACE_ITER_SYM_ADDR)
-		ret = trace_seq_printf(s, " <" IP_FMT ">", ip);
-	return ret;
+		trace_seq_printf(s, " <" IP_FMT ">", ip);
+
+ out:
+	return !trace_seq_has_overflowed(s);
 }
 
 /**
@@ -413,7 +391,6 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 	char irqs_off;
 	int hardirq;
 	int softirq;
-	int ret;
 
 	hardirq = entry->flags & TRACE_FLAG_HARDIRQ;
 	softirq = entry->flags & TRACE_FLAG_SOFTIRQ;
@@ -445,16 +422,15 @@ int trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry)
 		softirq ? 's' :
 		'.';
 
-	if (!trace_seq_printf(s, "%c%c%c",
-			      irqs_off, need_resched, hardsoft_irq))
-		return 0;
+	trace_seq_printf(s, "%c%c%c",
+			 irqs_off, need_resched, hardsoft_irq);
 
 	if (entry->preempt_count)
-		ret = trace_seq_printf(s, "%x", entry->preempt_count);
+		trace_seq_printf(s, "%x", entry->preempt_count);
 	else
-		ret = trace_seq_putc(s, '.');
+		trace_seq_putc(s, '.');
 
-	return ret;
+	return !trace_seq_has_overflowed(s);
 }
 
 static int
@@ -464,9 +440,8 @@ lat_print_generic(struct trace_seq *s, struct trace_entry *entry, int cpu)
 
 	trace_find_cmdline(entry->pid, comm);
 
-	if (!trace_seq_printf(s, "%8.8s-%-5d %3d",
-			      comm, entry->pid, cpu))
-		return 0;
+	trace_seq_printf(s, "%8.8s-%-5d %3d",
+			 comm, entry->pid, cpu);
 
 	return trace_print_lat_fmt(s, entry);
 }
@@ -493,24 +468,29 @@ lat_print_timestamp(struct trace_iterator *iter, u64 next_ts)
 		unsigned long rel_usec = do_div(rel_ts, USEC_PER_MSEC);
 		unsigned long rel_msec = (unsigned long)rel_ts;
 
-		return trace_seq_printf(
-				s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
-				ns2usecs(iter->ts),
-				abs_msec, abs_usec,
-				rel_msec, rel_usec);
+		trace_seq_printf(
+			s, "[%08llx] %ld.%03ldms (+%ld.%03ldms): ",
+			ns2usecs(iter->ts),
+			abs_msec, abs_usec,
+			rel_msec, rel_usec);
+
 	} else if (verbose && !in_ns) {
-		return trace_seq_printf(
-				s, "[%016llx] %lld (+%lld): ",
-				iter->ts, abs_ts, rel_ts);
+		trace_seq_printf(
+			s, "[%016llx] %lld (+%lld): ",
+			iter->ts, abs_ts, rel_ts);
+
 	} else if (!verbose && in_ns) {
-		return trace_seq_printf(
-				s, " %4lldus%c: ",
-				abs_ts,
-				rel_ts > preempt_mark_thresh_us ? '!' :
-				  rel_ts > 1 ? '+' : ' ');
+		trace_seq_printf(
+			s, " %4lldus%c: ",
+			abs_ts,
+			rel_ts > preempt_mark_thresh_us ? '!' :
+			rel_ts > 1 ? '+' : ' ');
+
 	} else { /* !verbose && !in_ns */
-		return trace_seq_printf(s, " %4lld: ", abs_ts);
+		trace_seq_printf(s, " %4lld: ", abs_ts);
 	}
+
+	return !trace_seq_has_overflowed(s);
 }
 
 int trace_print_context(struct trace_iterator *iter)
@@ -520,34 +500,29 @@ int trace_print_context(struct trace_iterator *iter)
 	unsigned long long t;
 	unsigned long secs, usec_rem;
 	char comm[TASK_COMM_LEN];
-	int ret;
 
 	trace_find_cmdline(entry->pid, comm);
 
-	ret = trace_seq_printf(s, "%16s-%-5d [%03d] ",
+	trace_seq_printf(s, "%16s-%-5d [%03d] ",
 			       comm, entry->pid, iter->cpu);
-	if (!ret)
-		return 0;
 
-	if (trace_flags & TRACE_ITER_IRQ_INFO) {
-		ret = trace_print_lat_fmt(s, entry);
-		if (!ret)
-			return 0;
-	}
+	if (trace_flags & TRACE_ITER_IRQ_INFO)
+		trace_print_lat_fmt(s, entry);
 
 	if (iter->iter_flags & TRACE_FILE_TIME_IN_NS) {
 		t = ns2usecs(iter->ts);
 		usec_rem = do_div(t, USEC_PER_SEC);
 		secs = (unsigned long)t;
-		return trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem);
+		trace_seq_printf(s, " %5lu.%06lu: ", secs, usec_rem);
 	} else
-		return trace_seq_printf(s, " %12llu: ", iter->ts);
+		trace_seq_printf(s, " %12llu: ", iter->ts);
+
+	return !trace_seq_has_overflowed(s);
 }
 
 int trace_print_lat_context(struct trace_iterator *iter)
 {
 	u64 next_ts;
-	int ret;
 	/* trace_find_next_entry will reset ent_size */
 	int ent_size = iter->ent_size;
 	struct trace_seq *s = &iter->seq;
@@ -567,18 +542,17 @@ int trace_print_lat_context(struct trace_iterator *iter)
 
 		trace_find_cmdline(entry->pid, comm);
 
-		ret = trace_seq_printf(
-				s, "%16s %5d %3d %d %08x %08lx ",
-				comm, entry->pid, iter->cpu, entry->flags,
-				entry->preempt_count, iter->idx);
+		trace_seq_printf(
+			s, "%16s %5d %3d %d %08x %08lx ",
+			comm, entry->pid, iter->cpu, entry->flags,
+			entry->preempt_count, iter->idx);
 	} else {
-		ret = lat_print_generic(s, entry, iter->cpu);
+		lat_print_generic(s, entry, iter->cpu);
 	}
 
-	if (ret)
-		ret = lat_print_timestamp(iter, next_ts);
+	lat_print_timestamp(iter, next_ts);
 
-	return ret;
+	return !trace_seq_has_overflowed(s);
 }
 
 static const char state_to_char[] = TASK_STATE_TO_CHAR_STR;
@@ -764,10 +738,9 @@ EXPORT_SYMBOL_GPL(unregister_ftrace_event);
 enum print_line_t trace_nop_print(struct trace_iterator *iter, int flags,
 				  struct trace_event *event)
 {
-	if (!trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type))
-		return TRACE_TYPE_PARTIAL_LINE;
+	trace_seq_printf(&iter->seq, "type: %d\n", iter->ent->type);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(&iter->seq);
 }
 
 /* TRACE_FN */
@@ -779,24 +752,16 @@ static enum print_line_t trace_fn_trace(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!seq_print_ip_sym(s, field->ip, flags))
-		goto partial;
+	seq_print_ip_sym(s, field->ip, flags);
 
 	if ((flags & TRACE_ITER_PRINT_PARENT) && field->parent_ip) {
-		if (!trace_seq_puts(s, " <-"))
-			goto partial;
-		if (!seq_print_ip_sym(s,
-				      field->parent_ip,
-				      flags))
-			goto partial;
+		trace_seq_puts(s, " <-");
+		seq_print_ip_sym(s, field->parent_ip, flags);
 	}
-	if (!trace_seq_putc(s, '\n'))
-		goto partial;
 
-	return TRACE_TYPE_HANDLED;
+	trace_seq_putc(s, '\n');
 
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags,
@@ -806,12 +771,11 @@ static enum print_line_t trace_fn_raw(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!trace_seq_printf(&iter->seq, "%lx %lx\n",
-			      field->ip,
-			      field->parent_ip))
-		return TRACE_TYPE_PARTIAL_LINE;
+	trace_seq_printf(&iter->seq, "%lx %lx\n",
+			 field->ip,
+			 field->parent_ip);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(&iter->seq);
 }
 
 static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags,
@@ -822,10 +786,10 @@ static enum print_line_t trace_fn_hex(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, iter->ent);
 
-	SEQ_PUT_HEX_FIELD_RET(s, field->ip);
-	SEQ_PUT_HEX_FIELD_RET(s, field->parent_ip);
+	SEQ_PUT_HEX_FIELD(s, field->ip);
+	SEQ_PUT_HEX_FIELD(s, field->parent_ip);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags,
@@ -836,10 +800,10 @@ static enum print_line_t trace_fn_bin(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, iter->ent);
 
-	SEQ_PUT_FIELD_RET(s, field->ip);
-	SEQ_PUT_FIELD_RET(s, field->parent_ip);
+	SEQ_PUT_FIELD(s, field->ip);
+	SEQ_PUT_FIELD(s, field->parent_ip);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 static struct trace_event_functions trace_fn_funcs = {
@@ -868,18 +832,17 @@ static enum print_line_t trace_ctxwake_print(struct trace_iterator *iter,
 	T = task_state_char(field->next_state);
 	S = task_state_char(field->prev_state);
 	trace_find_cmdline(field->next_pid, comm);
-	if (!trace_seq_printf(&iter->seq,
-			      " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
-			      field->prev_pid,
-			      field->prev_prio,
-			      S, delim,
-			      field->next_cpu,
-			      field->next_pid,
-			      field->next_prio,
-			      T, comm))
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	return TRACE_TYPE_HANDLED;
+	trace_seq_printf(&iter->seq,
+			 " %5d:%3d:%c %s [%03d] %5d:%3d:%c %s\n",
+			 field->prev_pid,
+			 field->prev_prio,
+			 S, delim,
+			 field->next_cpu,
+			 field->next_pid,
+			 field->next_prio,
+			 T, comm);
+
+	return trace_handle_return(&iter->seq);
 }
 
 static enum print_line_t trace_ctx_print(struct trace_iterator *iter, int flags,
@@ -904,17 +867,16 @@ static int trace_ctxwake_raw(struct trace_iterator *iter, char S)
 	if (!S)
 		S = task_state_char(field->prev_state);
 	T = task_state_char(field->next_state);
-	if (!trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
-			      field->prev_pid,
-			      field->prev_prio,
-			      S,
-			      field->next_cpu,
-			      field->next_pid,
-			      field->next_prio,
-			      T))
-		return TRACE_TYPE_PARTIAL_LINE;
-
-	return TRACE_TYPE_HANDLED;
+	trace_seq_printf(&iter->seq, "%d %d %c %d %d %d %c\n",
+			 field->prev_pid,
+			 field->prev_prio,
+			 S,
+			 field->next_cpu,
+			 field->next_pid,
+			 field->next_prio,
+			 T);
+
+	return trace_handle_return(&iter->seq);
 }
 
 static enum print_line_t trace_ctx_raw(struct trace_iterator *iter, int flags,
@@ -942,15 +904,15 @@ static int trace_ctxwake_hex(struct trace_iterator *iter, char S)
 		S = task_state_char(field->prev_state);
 	T = task_state_char(field->next_state);
 
-	SEQ_PUT_HEX_FIELD_RET(s, field->prev_pid);
-	SEQ_PUT_HEX_FIELD_RET(s, field->prev_prio);
-	SEQ_PUT_HEX_FIELD_RET(s, S);
-	SEQ_PUT_HEX_FIELD_RET(s, field->next_cpu);
-	SEQ_PUT_HEX_FIELD_RET(s, field->next_pid);
-	SEQ_PUT_HEX_FIELD_RET(s, field->next_prio);
-	SEQ_PUT_HEX_FIELD_RET(s, T);
+	SEQ_PUT_HEX_FIELD(s, field->prev_pid);
+	SEQ_PUT_HEX_FIELD(s, field->prev_prio);
+	SEQ_PUT_HEX_FIELD(s, S);
+	SEQ_PUT_HEX_FIELD(s, field->next_cpu);
+	SEQ_PUT_HEX_FIELD(s, field->next_pid);
+	SEQ_PUT_HEX_FIELD(s, field->next_prio);
+	SEQ_PUT_HEX_FIELD(s, T);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 static enum print_line_t trace_ctx_hex(struct trace_iterator *iter, int flags,
@@ -973,15 +935,15 @@ static enum print_line_t trace_ctxwake_bin(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
-	SEQ_PUT_FIELD_RET(s, field->prev_pid);
-	SEQ_PUT_FIELD_RET(s, field->prev_prio);
-	SEQ_PUT_FIELD_RET(s, field->prev_state);
-	SEQ_PUT_FIELD_RET(s, field->next_cpu);
-	SEQ_PUT_FIELD_RET(s, field->next_pid);
-	SEQ_PUT_FIELD_RET(s, field->next_prio);
-	SEQ_PUT_FIELD_RET(s, field->next_state);
+	SEQ_PUT_FIELD(s, field->prev_pid);
+	SEQ_PUT_FIELD(s, field->prev_prio);
+	SEQ_PUT_FIELD(s, field->prev_state);
+	SEQ_PUT_FIELD(s, field->next_cpu);
+	SEQ_PUT_FIELD(s, field->next_pid);
+	SEQ_PUT_FIELD(s, field->next_prio);
+	SEQ_PUT_FIELD(s, field->next_state);
 
-	return TRACE_TYPE_HANDLED;
+	return trace_handle_return(s);
 }
 
 static struct trace_event_functions trace_ctx_funcs = {
@@ -1021,23 +983,19 @@ static enum print_line_t trace_stack_print(struct trace_iterator *iter,
 	trace_assign_type(field, iter->ent);
 	end = (unsigned long *)((long)iter->ent + iter->ent_size);
 
-	if (!trace_seq_puts(s, "<stack trace>\n"))
-		goto partial;
+	trace_seq_puts(s, "<stack trace>\n");
 
 	for (p = field->caller; p && *p != ULONG_MAX && p < end; p++) {
-		if (!trace_seq_puts(s, " => "))
-			goto partial;
 
-		if (!seq_print_ip_sym(s, *p, flags))
-			goto partial;
-		if (!trace_seq_putc(s, '\n'))
-			goto partial;
-	}
+		if (trace_seq_has_overflowed(s))
+			break;
 
-	return TRACE_TYPE_HANDLED;
+		trace_seq_puts(s, " => ");
+		seq_print_ip_sym(s, *p, flags);
+		trace_seq_putc(s, '\n');
+	}
 
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 static struct trace_event_functions trace_stack_funcs = {
@@ -1058,16 +1016,10 @@ static enum print_line_t trace_user_stack_print(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!trace_seq_puts(s, "<user stack trace>\n"))
-		goto partial;
-
-	if (!seq_print_userip_objs(field, s, flags))
-		goto partial;
-
-	return TRACE_TYPE_HANDLED;
+	trace_seq_puts(s, "<user stack trace>\n");
+	seq_print_userip_objs(field, s, flags);
 
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 static struct trace_event_functions trace_user_stack_funcs = {
@@ -1090,19 +1042,11 @@ trace_bputs_print(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, entry);
 
-	if (!seq_print_ip_sym(s, field->ip, flags))
-		goto partial;
+	seq_print_ip_sym(s, field->ip, flags);
+	trace_seq_puts(s, ": ");
+	trace_seq_puts(s, field->str);
 
-	if (!trace_seq_puts(s, ": "))
-		goto partial;
-
-	if (!trace_seq_puts(s, field->str))
-		goto partial;
-
-	return TRACE_TYPE_HANDLED;
-
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 
@@ -1115,16 +1059,10 @@ trace_bputs_raw(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!trace_seq_printf(s, ": %lx : ", field->ip))
-		goto partial;
-
-	if (!trace_seq_puts(s, field->str))
-		goto partial;
+	trace_seq_printf(s, ": %lx : ", field->ip);
+	trace_seq_puts(s, field->str);
 
-	return TRACE_TYPE_HANDLED;
-
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 static struct trace_event_functions trace_bputs_funcs = {
@@ -1148,19 +1086,11 @@ trace_bprint_print(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, entry);
 
-	if (!seq_print_ip_sym(s, field->ip, flags))
-		goto partial;
-
-	if (!trace_seq_puts(s, ": "))
-		goto partial;
-
-	if (!trace_seq_bprintf(s, field->fmt, field->buf))
-		goto partial;
+	seq_print_ip_sym(s, field->ip, flags);
+	trace_seq_puts(s, ": ");
+	trace_seq_bprintf(s, field->fmt, field->buf);
 
-	return TRACE_TYPE_HANDLED;
-
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 
@@ -1173,16 +1103,10 @@ trace_bprint_raw(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!trace_seq_printf(s, ": %lx : ", field->ip))
-		goto partial;
-
-	if (!trace_seq_bprintf(s, field->fmt, field->buf))
-		goto partial;
+	trace_seq_printf(s, ": %lx : ", field->ip);
+	trace_seq_bprintf(s, field->fmt, field->buf);
 
-	return TRACE_TYPE_HANDLED;
-
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 static struct trace_event_functions trace_bprint_funcs = {
@@ -1204,16 +1128,10 @@ static enum print_line_t trace_print_print(struct trace_iterator *iter,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!seq_print_ip_sym(s, field->ip, flags))
-		goto partial;
-
-	if (!trace_seq_printf(s, ": %s", field->buf))
-		goto partial;
+	seq_print_ip_sym(s, field->ip, flags);
+	trace_seq_printf(s, ": %s", field->buf);
 
-	return TRACE_TYPE_HANDLED;
-
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(s);
 }
 
 static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
@@ -1223,13 +1141,9 @@ static enum print_line_t trace_print_raw(struct trace_iterator *iter, int flags,
 
 	trace_assign_type(field, iter->ent);
 
-	if (!trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf))
-		goto partial;
-
-	return TRACE_TYPE_HANDLED;
+	trace_seq_printf(&iter->seq, "# %lx %s", field->ip, field->buf);
 
- partial:
-	return TRACE_TYPE_PARTIAL_LINE;
+	return trace_handle_return(&iter->seq);
 }
 
 static struct trace_event_functions trace_print_funcs = {
diff --git a/kernel/trace/trace_output.h b/kernel/trace/trace_output.h
index 80b25b585a70..8ef2c40efb3c 100644
--- a/kernel/trace/trace_output.h
+++ b/kernel/trace/trace_output.h
@@ -35,17 +35,11 @@ trace_print_lat_fmt(struct trace_seq *s, struct trace_entry *entry);
 extern int __unregister_ftrace_event(struct trace_event *event);
 extern struct rw_semaphore trace_event_sem;
 
-#define SEQ_PUT_FIELD_RET(s, x)				\
-do {							\
-	if (!trace_seq_putmem(s, &(x), sizeof(x)))	\
-		return TRACE_TYPE_PARTIAL_LINE;		\
-} while (0)
-
-#define SEQ_PUT_HEX_FIELD_RET(s, x)			\
-do {							\
-	if (!trace_seq_putmem_hex(s, &(x), sizeof(x)))	\
-		return TRACE_TYPE_PARTIAL_LINE;		\
-} while (0)
+#define SEQ_PUT_FIELD(s, x)				\
+	trace_seq_putmem(s, &(x), sizeof(x))
+
+#define SEQ_PUT_HEX_FIELD(s, x)				\
+	trace_seq_putmem_hex(s, &(x), sizeof(x))
 
 #endif
 
-- 
cgit v1.2.3


From dba39448abb7340f86ae9b062f99d7acacb5d2d2 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 12 Nov 2014 18:07:22 -0500
Subject: tracing: Remove return values of most trace_seq_*() functions

The trace_seq_printf() and friends are used to store strings into a buffer
that can be passed around from function to function. If the trace_seq buffer
fills up, it will not print any more. The return values were somewhat
inconsistant and using trace_seq_has_overflowed() was a better way to know
if the write to the trace_seq buffer succeeded or not.

Now that all users have removed reading the return value of the printf()
type functions, they can safely return void and keep future users of them
from reading the inconsistent values as well.

Link: http://lkml.kernel.org/r/20141114011411.992510720@goodmis.org

Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h | 37 +++++++++------------
 kernel/trace/trace_seq.c  | 84 +++++++++++++----------------------------------
 2 files changed, 38 insertions(+), 83 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 07eda413dfcf..db8a73224f1a 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -57,40 +57,37 @@ static inline bool trace_seq_has_overflowed(struct trace_seq *s)
  */
 #ifdef CONFIG_TRACING
 extern __printf(2, 3)
-int trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
+void trace_seq_printf(struct trace_seq *s, const char *fmt, ...);
 extern __printf(2, 0)
-int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args);
-extern int
+void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args);
+extern void
 trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary);
 extern int trace_print_seq(struct seq_file *m, struct trace_seq *s);
 extern int trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 			     int cnt);
-extern int trace_seq_puts(struct trace_seq *s, const char *str);
-extern int trace_seq_putc(struct trace_seq *s, unsigned char c);
-extern int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len);
-extern int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+extern void trace_seq_puts(struct trace_seq *s, const char *str);
+extern void trace_seq_putc(struct trace_seq *s, unsigned char c);
+extern void trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len);
+extern void trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 				unsigned int len);
 extern int trace_seq_path(struct trace_seq *s, const struct path *path);
 
-extern int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+extern void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
 			     int nmaskbits);
 
 #else /* CONFIG_TRACING */
-static inline int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+static inline void trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
-	return 0;
 }
-static inline int
+static inline void
 trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
-	return 0;
 }
 
-static inline int
+static inline void
 trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
 		  int nmaskbits)
 {
-	return 0;
 }
 
 static inline int trace_print_seq(struct seq_file *m, struct trace_seq *s)
@@ -102,23 +99,19 @@ static inline int trace_seq_to_user(struct trace_seq *s, char __user *ubuf,
 {
 	return 0;
 }
-static inline int trace_seq_puts(struct trace_seq *s, const char *str)
+static inline void trace_seq_puts(struct trace_seq *s, const char *str)
 {
-	return 0;
 }
-static inline int trace_seq_putc(struct trace_seq *s, unsigned char c)
+static inline void trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
-	return 0;
 }
-static inline int
+static inline void
 trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len)
 {
-	return 0;
 }
-static inline int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+static inline void trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 				       unsigned int len)
 {
-	return 0;
 }
 static inline int trace_seq_path(struct trace_seq *s, const struct path *path)
 {
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index b100994a17fe..fabfa0f190a3 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -69,20 +69,15 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s)
  * trace_seq_printf() is used to store strings into a special
  * buffer (@s). Then the output may be either used by
  * the sequencer or pulled into another buffer.
- *
- * Returns 1 if we successfully written all the contents to
- *   the buffer.
-  * Returns 0 if we the length to write is bigger than the
- *   reserved buffer space. In this case, nothing gets written.
  */
-int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
+void trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
 	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
 	va_list ap;
 	int ret;
 
 	if (s->full || !len)
-		return 0;
+		return;
 
 	va_start(ap, fmt);
 	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
@@ -91,12 +86,10 @@ int trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 	/* If we can't write it all, don't bother writing anything */
 	if (ret >= len) {
 		s->full = 1;
-		return 0;
+		return;
 	}
 
 	s->len += ret;
-
-	return 1;
 }
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
@@ -107,25 +100,18 @@ EXPORT_SYMBOL_GPL(trace_seq_printf);
  * @nmaskbits:	The number of bits that are valid in @maskp
  *
  * Writes a ASCII representation of a bitmask string into @s.
- *
- * Returns 1 if we successfully written all the contents to
- *   the buffer.
- * Returns 0 if we the length to write is bigger than the
- *   reserved buffer space. In this case, nothing gets written.
  */
-int trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
+void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
 		      int nmaskbits)
 {
 	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
 	int ret;
 
 	if (s->full || !len)
-		return 0;
+		return;
 
 	ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits);
 	s->len += ret;
-
-	return 1;
 }
 EXPORT_SYMBOL_GPL(trace_seq_bitmask);
 
@@ -139,28 +125,24 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask);
  * trace_seq_printf is used to store strings into a special
  * buffer (@s). Then the output may be either used by
  * the sequencer or pulled into another buffer.
- *
- * Returns how much it wrote to the buffer.
  */
-int trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
+void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 {
 	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
 	int ret;
 
 	if (s->full || !len)
-		return 0;
+		return;
 
 	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
 
 	/* If we can't write it all, don't bother writing anything */
 	if (ret >= len) {
 		s->full = 1;
-		return 0;
+		return;
 	}
 
 	s->len += ret;
-
-	return len;
 }
 EXPORT_SYMBOL_GPL(trace_seq_vprintf);
 
@@ -178,28 +160,24 @@ EXPORT_SYMBOL_GPL(trace_seq_vprintf);
  *
  * This function will take the format and the binary array and finish
  * the conversion into the ASCII string within the buffer.
- *
- * Returns how much it wrote to the buffer.
  */
-int trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
 	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
 	int ret;
 
 	if (s->full || !len)
-		return 0;
+		return;
 
 	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
 
 	/* If we can't write it all, don't bother writing anything */
 	if (ret >= len) {
 		s->full = 1;
-		return 0;
+		return;
 	}
 
 	s->len += ret;
-
-	return len;
 }
 EXPORT_SYMBOL_GPL(trace_seq_bprintf);
 
@@ -212,25 +190,21 @@ EXPORT_SYMBOL_GPL(trace_seq_bprintf);
  * copy to user routines. This function records a simple string
  * into a special buffer (@s) for later retrieval by a sequencer
  * or other mechanism.
- *
- * Returns how much it wrote to the buffer.
  */
-int trace_seq_puts(struct trace_seq *s, const char *str)
+void trace_seq_puts(struct trace_seq *s, const char *str)
 {
 	unsigned int len = strlen(str);
 
 	if (s->full)
-		return 0;
+		return;
 
 	if (len > TRACE_SEQ_BUF_LEFT(s)) {
 		s->full = 1;
-		return 0;
+		return;
 	}
 
 	memcpy(s->buffer + s->len, str, len);
 	s->len += len;
-
-	return len;
 }
 EXPORT_SYMBOL_GPL(trace_seq_puts);
 
@@ -243,22 +217,18 @@ EXPORT_SYMBOL_GPL(trace_seq_puts);
  * copy to user routines. This function records a simple charater
  * into a special buffer (@s) for later retrieval by a sequencer
  * or other mechanism.
- *
- * Returns how much it wrote to the buffer.
  */
-int trace_seq_putc(struct trace_seq *s, unsigned char c)
+void trace_seq_putc(struct trace_seq *s, unsigned char c)
 {
 	if (s->full)
-		return 0;
+		return;
 
 	if (TRACE_SEQ_BUF_LEFT(s) < 1) {
 		s->full = 1;
-		return 0;
+		return;
 	}
 
 	s->buffer[s->len++] = c;
-
-	return 1;
 }
 EXPORT_SYMBOL_GPL(trace_seq_putc);
 
@@ -271,23 +241,19 @@ EXPORT_SYMBOL_GPL(trace_seq_putc);
  * There may be cases where raw memory needs to be written into the
  * buffer and a strcpy() would not work. Using this function allows
  * for such cases.
- *
- * Returns how much it wrote to the buffer.
  */
-int trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len)
+void trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len)
 {
 	if (s->full)
-		return 0;
+		return;
 
 	if (len > TRACE_SEQ_BUF_LEFT(s)) {
 		s->full = 1;
-		return 0;
+		return;
 	}
 
 	memcpy(s->buffer + s->len, mem, len);
 	s->len += len;
-
-	return len;
 }
 EXPORT_SYMBOL_GPL(trace_seq_putmem);
 
@@ -303,20 +269,17 @@ EXPORT_SYMBOL_GPL(trace_seq_putmem);
  * This is similar to trace_seq_putmem() except instead of just copying the
  * raw memory into the buffer it writes its ASCII representation of it
  * in hex characters.
- *
- * Returns how much it wrote to the buffer.
  */
-int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
+void trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 			 unsigned int len)
 {
 	unsigned char hex[HEX_CHARS];
 	const unsigned char *data = mem;
 	unsigned int start_len;
 	int i, j;
-	int cnt = 0;
 
 	if (s->full)
-		return 0;
+		return;
 
 	while (len) {
 		start_len = min(len, HEX_CHARS - 1);
@@ -335,9 +298,8 @@ int trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 		len -= j / 2;
 		hex[j++] = ' ';
 
-		cnt += trace_seq_putmem(s, hex, j);
+		trace_seq_putmem(s, hex, j);
 	}
-	return cnt;
 }
 EXPORT_SYMBOL_GPL(trace_seq_putmem_hex);
 
-- 
cgit v1.2.3


From 666547ff591cebdedc4679bf6b1b3f3383a8dea3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Apr 2014 14:03:05 -0400
Subject: separate kernel- and userland-side msghdr

Kernel-side struct msghdr is (currently) using the same layout as
userland one, but it's not a one-to-one copy - even without considering
32bit compat issues, we have msg_iov, msg_name and msg_control copied
to kernel[1].  It's fairly localized, so we get away with a few functions
where that knowledge is needed (and we could shrink that set even
more).  Pretty much everything deals with the kernel-side variant and
the few places that want userland one just use a bunch of force-casts
to paper over the differences.

The thing is, kernel-side definition of struct msghdr is *not* exposed
in include/uapi - libc doesn't see it, etc.  So we can add struct user_msghdr,
with proper annotations and let the few places that ever deal with those
beasts use it for userland pointers.  Saner typechecking aside, that will
allow to change the layout of kernel-side msghdr - e.g. replace
msg_iov/msg_iovlen there with struct iov_iter, getting rid of the need
to modify the iovec as we copy data to/from it, etc.

We could introduce kernel_msghdr instead, but that would create much more
noise - the absolute majority of the instances would need to have the
type switched to kernel_msghdr and definition of struct msghdr in
include/linux/socket.h is not going to be seen by userland anyway.

This commit just introduces user_msghdr and switches the few places that
are dealing with userland-side msghdr to it.

[1] actually, it's even trickier than that - we copy msg_control for
sendmsg, but keep the userland address on recvmsg.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm/kernel/sys_oabi-compat.c |  4 ++--
 include/linux/socket.h            | 16 +++++++++++++---
 include/linux/syscalls.h          |  6 +++---
 net/compat.c                      |  4 ++--
 net/socket.c                      | 29 ++++++++++++++++-------------
 5 files changed, 36 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c
index e90a3148f385..b83f3b7737fb 100644
--- a/arch/arm/kernel/sys_oabi-compat.c
+++ b/arch/arm/kernel/sys_oabi-compat.c
@@ -400,7 +400,7 @@ asmlinkage long sys_oabi_sendto(int fd, void __user *buff,
 	return sys_sendto(fd, buff, len, flags, addr, addrlen);
 }
 
-asmlinkage long sys_oabi_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
+asmlinkage long sys_oabi_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
 {
 	struct sockaddr __user *addr;
 	int msg_namelen;
@@ -446,7 +446,7 @@ asmlinkage long sys_oabi_socketcall(int call, unsigned long __user *args)
 		break;
 	case SYS_SENDMSG:
 		if (copy_from_user(a, args, 3 * sizeof(long)) == 0)
-			r = sys_oabi_sendmsg(a[0], (struct msghdr __user *)a[1], a[2]);
+			r = sys_oabi_sendmsg(a[0], (struct user_msghdr __user *)a[1], a[2]);
 		break;
 	default:
 		r = sys_socketcall(call, args);
diff --git a/include/linux/socket.h b/include/linux/socket.h
index bb9b83640070..51bd6668b80e 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -53,10 +53,20 @@ struct msghdr {
 	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
 	unsigned int	msg_flags;	/* flags on received message */
 };
+ 
+struct user_msghdr {
+	void		__user *msg_name;	/* ptr to socket address structure */
+	int		msg_namelen;		/* size of socket address structure */
+	struct iovec	__user *msg_iov;	/* scatter/gather array */
+	__kernel_size_t	msg_iovlen;		/* # elements in msg_iov */
+	void		__user *msg_control;	/* ancillary data */
+	__kernel_size_t	msg_controllen;		/* ancillary data buffer length */
+	unsigned int	msg_flags;		/* flags on received message */
+};
 
 /* For recvmmsg/sendmmsg */
 struct mmsghdr {
-	struct msghdr   msg_hdr;
+	struct user_msghdr  msg_hdr;
 	unsigned int        msg_len;
 };
 
@@ -319,8 +329,8 @@ extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 struct timespec;
 
 /* The __sys_...msg variants allow MSG_CMSG_COMPAT */
-extern long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
-extern long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
+extern long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
+extern long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
 extern int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			  unsigned int flags, struct timespec *timeout);
 extern int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg,
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index bda9b81357cc..c9afdc7a7f84 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -25,7 +25,7 @@ struct linux_dirent64;
 struct list_head;
 struct mmap_arg_struct;
 struct msgbuf;
-struct msghdr;
+struct user_msghdr;
 struct mmsghdr;
 struct msqid_ds;
 struct new_utsname;
@@ -601,13 +601,13 @@ asmlinkage long sys_getpeername(int, struct sockaddr __user *, int __user *);
 asmlinkage long sys_send(int, void __user *, size_t, unsigned);
 asmlinkage long sys_sendto(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int);
-asmlinkage long sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags);
+asmlinkage long sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
 asmlinkage long sys_sendmmsg(int fd, struct mmsghdr __user *msg,
 			     unsigned int vlen, unsigned flags);
 asmlinkage long sys_recv(int, void __user *, size_t, unsigned);
 asmlinkage long sys_recvfrom(int, void __user *, size_t, unsigned,
 				struct sockaddr __user *, int __user *);
-asmlinkage long sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags);
+asmlinkage long sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags);
 asmlinkage long sys_recvmmsg(int fd, struct mmsghdr __user *msg,
 			     unsigned int vlen, unsigned flags,
 			     struct timespec __user *timeout);
diff --git a/net/compat.c b/net/compat.c
index bc8aeefddf3f..562e920b07f0 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -740,7 +740,7 @@ COMPAT_SYSCALL_DEFINE3(sendmsg, int, fd, struct compat_msghdr __user *, msg, uns
 {
 	if (flags & MSG_CMSG_COMPAT)
 		return -EINVAL;
-	return __sys_sendmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+	return __sys_sendmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
 COMPAT_SYSCALL_DEFINE4(sendmmsg, int, fd, struct compat_mmsghdr __user *, mmsg,
@@ -756,7 +756,7 @@ COMPAT_SYSCALL_DEFINE3(recvmsg, int, fd, struct compat_msghdr __user *, msg, uns
 {
 	if (flags & MSG_CMSG_COMPAT)
 		return -EINVAL;
-	return __sys_recvmsg(fd, (struct msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
+	return __sys_recvmsg(fd, (struct user_msghdr __user *)msg, flags | MSG_CMSG_COMPAT);
 }
 
 COMPAT_SYSCALL_DEFINE4(recv, int, fd, void __user *, buf, compat_size_t, len, unsigned int, flags)
diff --git a/net/socket.c b/net/socket.c
index fe20c319a0bb..0ae8147e3acc 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1989,8 +1989,11 @@ struct used_address {
 };
 
 static int copy_msghdr_from_user(struct msghdr *kmsg,
-				 struct msghdr __user *umsg)
+				 struct user_msghdr __user *umsg)
 {
+	/* We are relying on the (currently) identical layouts.  Once
+	 * the kernel-side changes, this place will need to be updated
+	 */
 	if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
 		return -EFAULT;
 
@@ -2005,7 +2008,7 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
 	return 0;
 }
 
-static int ___sys_sendmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned int flags,
 			 struct used_address *used_address)
 {
@@ -2123,7 +2126,7 @@ out:
  *	BSD sendmsg interface
  */
 
-long __sys_sendmsg(int fd, struct msghdr __user *msg, unsigned flags)
+long __sys_sendmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
 {
 	int fput_needed, err;
 	struct msghdr msg_sys;
@@ -2140,7 +2143,7 @@ out:
 	return err;
 }
 
-SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned int, flags)
+SYSCALL_DEFINE3(sendmsg, int, fd, struct user_msghdr __user *, msg, unsigned int, flags)
 {
 	if (flags & MSG_CMSG_COMPAT)
 		return -EINVAL;
@@ -2177,7 +2180,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 
 	while (datagrams < vlen) {
 		if (MSG_CMSG_COMPAT & flags) {
-			err = ___sys_sendmsg(sock, (struct msghdr __user *)compat_entry,
+			err = ___sys_sendmsg(sock, (struct user_msghdr __user *)compat_entry,
 					     &msg_sys, flags, &used_address);
 			if (err < 0)
 				break;
@@ -2185,7 +2188,7 @@ int __sys_sendmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			++compat_entry;
 		} else {
 			err = ___sys_sendmsg(sock,
-					     (struct msghdr __user *)entry,
+					     (struct user_msghdr __user *)entry,
 					     &msg_sys, flags, &used_address);
 			if (err < 0)
 				break;
@@ -2215,7 +2218,7 @@ SYSCALL_DEFINE4(sendmmsg, int, fd, struct mmsghdr __user *, mmsg,
 	return __sys_sendmmsg(fd, mmsg, vlen, flags);
 }
 
-static int ___sys_recvmsg(struct socket *sock, struct msghdr __user *msg,
+static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
 			 struct msghdr *msg_sys, unsigned int flags, int nosec)
 {
 	struct compat_msghdr __user *msg_compat =
@@ -2311,7 +2314,7 @@ out:
  *	BSD recvmsg interface
  */
 
-long __sys_recvmsg(int fd, struct msghdr __user *msg, unsigned flags)
+long __sys_recvmsg(int fd, struct user_msghdr __user *msg, unsigned flags)
 {
 	int fput_needed, err;
 	struct msghdr msg_sys;
@@ -2328,7 +2331,7 @@ out:
 	return err;
 }
 
-SYSCALL_DEFINE3(recvmsg, int, fd, struct msghdr __user *, msg,
+SYSCALL_DEFINE3(recvmsg, int, fd, struct user_msghdr __user *, msg,
 		unsigned int, flags)
 {
 	if (flags & MSG_CMSG_COMPAT)
@@ -2373,7 +2376,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 		 * No need to ask LSM for more than the first datagram.
 		 */
 		if (MSG_CMSG_COMPAT & flags) {
-			err = ___sys_recvmsg(sock, (struct msghdr __user *)compat_entry,
+			err = ___sys_recvmsg(sock, (struct user_msghdr __user *)compat_entry,
 					     &msg_sys, flags & ~MSG_WAITFORONE,
 					     datagrams);
 			if (err < 0)
@@ -2382,7 +2385,7 @@ int __sys_recvmmsg(int fd, struct mmsghdr __user *mmsg, unsigned int vlen,
 			++compat_entry;
 		} else {
 			err = ___sys_recvmsg(sock,
-					     (struct msghdr __user *)entry,
+					     (struct user_msghdr __user *)entry,
 					     &msg_sys, flags & ~MSG_WAITFORONE,
 					     datagrams);
 			if (err < 0)
@@ -2571,13 +2574,13 @@ SYSCALL_DEFINE2(socketcall, int, call, unsigned long __user *, args)
 				   (int __user *)a[4]);
 		break;
 	case SYS_SENDMSG:
-		err = sys_sendmsg(a0, (struct msghdr __user *)a1, a[2]);
+		err = sys_sendmsg(a0, (struct user_msghdr __user *)a1, a[2]);
 		break;
 	case SYS_SENDMMSG:
 		err = sys_sendmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3]);
 		break;
 	case SYS_RECVMSG:
-		err = sys_recvmsg(a0, (struct msghdr __user *)a1, a[2]);
+		err = sys_recvmsg(a0, (struct user_msghdr __user *)a1, a[2]);
 		break;
 	case SYS_RECVMMSG:
 		err = sys_recvmmsg(a0, (struct mmsghdr __user *)a1, a[2], a[3],
-- 
cgit v1.2.3


From 08adb7dabd4874cc5666b4490653b26534702ce0 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 10 Nov 2014 20:23:13 -0500
Subject: fold verify_iovec() into copy_msghdr_from_user()

... and do the same on the compat side of things.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/socket.h |  1 -
 include/net/compat.h   |  5 ++-
 net/compat.c           | 52 ++++++++++++----------------
 net/core/iovec.c       | 38 ---------------------
 net/socket.c           | 93 ++++++++++++++++++++++++++++----------------------
 5 files changed, 77 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 51bd6668b80e..de5222832be4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -322,7 +322,6 @@ extern int csum_partial_copy_fromiovecend(unsigned char *kdata,
 extern unsigned long iov_pages(const struct iovec *iov, int offset,
 			       unsigned long nr_segs);
 
-extern int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode);
 extern int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr_storage *kaddr);
 extern int put_cmsg(struct msghdr*, int level, int type, int len, void *data);
 
diff --git a/include/net/compat.h b/include/net/compat.h
index 3b603b199c01..42a9c8431177 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -40,9 +40,8 @@ int compat_sock_get_timestampns(struct sock *, struct timespec __user *);
 #define compat_mmsghdr	mmsghdr
 #endif /* defined(CONFIG_COMPAT) */
 
-int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *);
-int verify_compat_iovec(struct msghdr *, struct iovec *,
-			struct sockaddr_storage *, int);
+ssize_t get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *,
+		      struct sockaddr __user **, struct iovec **);
 asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *,
 				   unsigned int);
 asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *,
diff --git a/net/compat.c b/net/compat.c
index 7b4b6ad13235..062f157d2a6b 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -31,14 +31,18 @@
 #include <asm/uaccess.h>
 #include <net/compat.h>
 
-int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg)
+ssize_t get_compat_msghdr(struct msghdr *kmsg,
+			  struct compat_msghdr __user *umsg,
+			  struct sockaddr __user **save_addr,
+			  struct iovec **iov)
 {
-	compat_uptr_t tmp1, tmp2, tmp3;
+	compat_uptr_t uaddr, uiov, tmp3;
+	ssize_t err;
 
 	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
-	    __get_user(tmp1, &umsg->msg_name) ||
+	    __get_user(uaddr, &umsg->msg_name) ||
 	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
-	    __get_user(tmp2, &umsg->msg_iov) ||
+	    __get_user(uiov, &umsg->msg_iov) ||
 	    __get_user(kmsg->msg_iovlen, &umsg->msg_iovlen) ||
 	    __get_user(tmp3, &umsg->msg_control) ||
 	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
@@ -46,44 +50,32 @@ int get_compat_msghdr(struct msghdr *kmsg, struct compat_msghdr __user *umsg)
 		return -EFAULT;
 	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
 		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
-	kmsg->msg_name = compat_ptr(tmp1);
-	kmsg->msg_iov = compat_ptr(tmp2);
 	kmsg->msg_control = compat_ptr(tmp3);
-	return 0;
-}
 
-/* I've named the args so it is easy to tell whose space the pointers are in. */
-int verify_compat_iovec(struct msghdr *kern_msg, struct iovec *iov,
-		   struct sockaddr_storage *kern_address, int mode)
-{
-	struct compat_iovec __user *p;
-	struct iovec *res;
-	int err;
+	if (save_addr)
+		*save_addr = compat_ptr(uaddr);
 
-	if (kern_msg->msg_name && kern_msg->msg_namelen) {
-		if (mode == WRITE) {
-			int err = move_addr_to_kernel(kern_msg->msg_name,
-						      kern_msg->msg_namelen,
-						      kern_address);
+	if (uaddr && kmsg->msg_namelen) {
+		if (!save_addr) {
+			err = move_addr_to_kernel(compat_ptr(uaddr),
+						  kmsg->msg_namelen,
+						  kmsg->msg_name);
 			if (err < 0)
 				return err;
 		}
-		kern_msg->msg_name = kern_address;
 	} else {
-		kern_msg->msg_name = NULL;
-		kern_msg->msg_namelen = 0;
+		kmsg->msg_name = NULL;
+		kmsg->msg_namelen = 0;
 	}
 
-	if (kern_msg->msg_iovlen > UIO_MAXIOV)
+	if (kmsg->msg_iovlen > UIO_MAXIOV)
 		return -EMSGSIZE;
 
-	p = (struct compat_iovec __user *)kern_msg->msg_iov;
-	err = compat_rw_copy_check_uvector(mode, p, kern_msg->msg_iovlen,
-					   UIO_FASTIOV, iov, &res);
+	err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE,
+					   compat_ptr(uiov), kmsg->msg_iovlen,
+					   UIO_FASTIOV, *iov, iov);
 	if (err >= 0)
-		kern_msg->msg_iov = res;
-	else if (res != iov)
-		kfree(res);
+		kmsg->msg_iov = *iov;
 	return err;
 }
 
diff --git a/net/core/iovec.c b/net/core/iovec.c
index 86beeea61d72..dcbe98b3726a 100644
--- a/net/core/iovec.c
+++ b/net/core/iovec.c
@@ -27,44 +27,6 @@
 #include <net/checksum.h>
 #include <net/sock.h>
 
-/*
- *	Verify iovec. The caller must ensure that the iovec is big enough
- *	to hold the message iovec.
- *
- *	Save time not doing access_ok. copy_*_user will make this work
- *	in any case.
- */
-
-int verify_iovec(struct msghdr *m, struct iovec *iov, struct sockaddr_storage *address, int mode)
-{
-	struct iovec *res;
-	int err;
-
-	if (m->msg_name && m->msg_namelen) {
-		if (mode == WRITE) {
-			void __user *namep = (void __user __force *)m->msg_name;
-			int err = move_addr_to_kernel(namep, m->msg_namelen,
-						  address);
-			if (err < 0)
-				return err;
-		}
-		m->msg_name = address;
-	} else {
-		m->msg_name = NULL;
-		m->msg_namelen = 0;
-	}
-	if (m->msg_iovlen > UIO_MAXIOV)
-		return -EMSGSIZE;
-
-	err = rw_copy_check_uvector(mode, (void __user __force *)m->msg_iov,
-				    m->msg_iovlen, UIO_FASTIOV, iov, &res);
-	if (err >= 0)
-		m->msg_iov = res;
-	else if (res != iov)
-		kfree(res);
-	return err;
-}
-
 /*
  *	And now for the all-in-one: copy and checksum from a user iovec
  *	directly to a datagram
diff --git a/net/socket.c b/net/socket.c
index 59020f0b583b..ee3ee39eefa5 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -1988,16 +1988,26 @@ struct used_address {
 	unsigned int name_len;
 };
 
-static int copy_msghdr_from_user(struct msghdr *kmsg,
-				 struct user_msghdr __user *umsg)
+static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
+				     struct user_msghdr __user *umsg,
+				     struct sockaddr __user **save_addr,
+				     struct iovec **iov)
 {
-	/* We are relying on the (currently) identical layouts.  Once
-	 * the kernel-side changes, this place will need to be updated
-	 */
-	if (copy_from_user(kmsg, umsg, sizeof(struct msghdr)))
+	struct sockaddr __user *uaddr;
+	struct iovec __user *uiov;
+	ssize_t err;
+
+	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
+	    __get_user(uaddr, &umsg->msg_name) ||
+	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
+	    __get_user(uiov, &umsg->msg_iov) ||
+	    __get_user(kmsg->msg_iovlen, &umsg->msg_iovlen) ||
+	    __get_user(kmsg->msg_control, &umsg->msg_control) ||
+	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
+	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
 		return -EFAULT;
 
-	if (kmsg->msg_name == NULL)
+	if (!uaddr)
 		kmsg->msg_namelen = 0;
 
 	if (kmsg->msg_namelen < 0)
@@ -2005,7 +2015,31 @@ static int copy_msghdr_from_user(struct msghdr *kmsg,
 
 	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
 		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
-	return 0;
+
+	if (save_addr)
+		*save_addr = uaddr;
+
+	if (uaddr && kmsg->msg_namelen) {
+		if (!save_addr) {
+			err = move_addr_to_kernel(uaddr, kmsg->msg_namelen,
+						  kmsg->msg_name);
+			if (err < 0)
+				return err;
+		}
+	} else {
+		kmsg->msg_name = NULL;
+		kmsg->msg_namelen = 0;
+	}
+
+	if (kmsg->msg_iovlen > UIO_MAXIOV)
+		return -EMSGSIZE;
+
+	err = rw_copy_check_uvector(save_addr ? READ : WRITE,
+				    uiov, kmsg->msg_iovlen,
+				    UIO_FASTIOV, *iov, iov);
+	if (err >= 0)
+		kmsg->msg_iov = *iov;
+	return err;
 }
 
 static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
@@ -2020,26 +2054,17 @@ static int ___sys_sendmsg(struct socket *sock, struct user_msghdr __user *msg,
 	    __attribute__ ((aligned(sizeof(__kernel_size_t))));
 	/* 20 is size of ipv6_pktinfo */
 	unsigned char *ctl_buf = ctl;
-	int err, ctl_len, total_len;
+	int ctl_len, total_len;
+	ssize_t err;
 
-	err = -EFAULT;
-	if (MSG_CMSG_COMPAT & flags) {
-		if (get_compat_msghdr(msg_sys, msg_compat))
-			return -EFAULT;
-	} else {
-		err = copy_msghdr_from_user(msg_sys, msg);
-		if (err)
-			return err;
-	}
+	msg_sys->msg_name = &address;
 
-	/* This will also move the address data into kernel space */
 	if (MSG_CMSG_COMPAT & flags)
-		err = verify_compat_iovec(msg_sys, iovstack, &address, WRITE);
+		err = get_compat_msghdr(msg_sys, msg_compat, NULL, &iov);
 	else
-		err = verify_iovec(msg_sys, iovstack, &address, WRITE);
+		err = copy_msghdr_from_user(msg_sys, msg, NULL, &iov);
 	if (err < 0)
 		goto out_freeiov;
-	iov = msg_sys->msg_iov;
 	total_len = err;
 
 	err = -ENOBUFS;
@@ -2215,36 +2240,24 @@ static int ___sys_recvmsg(struct socket *sock, struct user_msghdr __user *msg,
 	struct iovec iovstack[UIO_FASTIOV];
 	struct iovec *iov = iovstack;
 	unsigned long cmsg_ptr;
-	int err, total_len, len;
+	int total_len, len;
+	ssize_t err;
 
 	/* kernel mode address */
 	struct sockaddr_storage addr;
 
 	/* user mode address pointers */
 	struct sockaddr __user *uaddr;
-	int __user *uaddr_len;
+	int __user *uaddr_len = COMPAT_NAMELEN(msg);
 
-	if (MSG_CMSG_COMPAT & flags) {
-		if (get_compat_msghdr(msg_sys, msg_compat))
-			return -EFAULT;
-	} else {
-		err = copy_msghdr_from_user(msg_sys, msg);
-		if (err)
-			return err;
-	}
+	msg_sys->msg_name = &addr;
 
-	/* Save the user-mode address (verify_iovec will change the
-	 * kernel msghdr to use the kernel address space)
-	 */
-	uaddr = (__force void __user *)msg_sys->msg_name;
-	uaddr_len = COMPAT_NAMELEN(msg);
 	if (MSG_CMSG_COMPAT & flags)
-		err = verify_compat_iovec(msg_sys, iovstack, &addr, READ);
+		err = get_compat_msghdr(msg_sys, msg_compat, &uaddr, &iov);
 	else
-		err = verify_iovec(msg_sys, iovstack, &addr, READ);
+		err = copy_msghdr_from_user(msg_sys, msg, &uaddr, &iov);
 	if (err < 0)
 		goto out_freeiov;
-	iov = msg_sys->msg_iov;
 	total_len = err;
 
 	cmsg_ptr = (unsigned long)msg_sys->msg_control;
-- 
cgit v1.2.3


From 3a161d99c43ce74c76aecff309be4c3ba455e823 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 25 Jun 2014 15:54:42 -0400
Subject: tracing: Create seq_buf layer in trace_seq

Create a seq_buf layer that trace_seq sits on. The seq_buf will not
be limited to page size. This will allow other usages of seq_buf
instead of a hard set PAGE_SIZE one that trace_seq has.

Link: http://lkml.kernel.org/r/20141104160221.864997179@goodmis.org
Link: http://lkml.kernel.org/r/20141114011412.170377300@goodmis.org

Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/seq_buf.h              |  81 +++++++++
 include/linux/trace_seq.h            |  12 +-
 kernel/trace/Makefile                |   1 +
 kernel/trace/seq_buf.c               | 341 +++++++++++++++++++++++++++++++++++
 kernel/trace/trace.c                 |  39 ++--
 kernel/trace/trace_events.c          |   6 +-
 kernel/trace/trace_functions_graph.c |   6 +-
 kernel/trace/trace_seq.c             | 178 +++++++++---------
 8 files changed, 538 insertions(+), 126 deletions(-)
 create mode 100644 include/linux/seq_buf.h
 create mode 100644 kernel/trace/seq_buf.c

(limited to 'include/linux')

diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
new file mode 100644
index 000000000000..4f7a96a9d71a
--- /dev/null
+++ b/include/linux/seq_buf.h
@@ -0,0 +1,81 @@
+#ifndef _LINUX_SEQ_BUF_H
+#define _LINUX_SEQ_BUF_H
+
+#include <linux/fs.h>
+
+/*
+ * Trace sequences are used to allow a function to call several other functions
+ * to create a string of data to use.
+ */
+
+/**
+ * seq_buf - seq buffer structure
+ * @buffer:	pointer to the buffer
+ * @size:	size of the buffer
+ * @len:	the amount of data inside the buffer
+ * @readpos:	The next position to read in the buffer.
+ */
+struct seq_buf {
+	unsigned char		*buffer;
+	unsigned int		size;
+	unsigned int		len;
+	unsigned int		readpos;
+};
+
+static inline void
+seq_buf_init(struct seq_buf *s, unsigned char *buf, unsigned int size)
+{
+	s->buffer = buf;
+	s->size = size;
+	s->len = 0;
+	s->readpos = 0;
+}
+
+/*
+ * seq_buf have a buffer that might overflow. When this happens
+ * the len and size are set to be equal.
+ */
+static inline bool
+seq_buf_has_overflowed(struct seq_buf *s)
+{
+	return s->len == s->size;
+}
+
+static inline void
+seq_buf_set_overflow(struct seq_buf *s)
+{
+	s->len = s->size;
+}
+
+/*
+ * How much buffer is left on the seq_buf?
+ */
+static inline unsigned int
+seq_buf_buffer_left(struct seq_buf *s)
+{
+	if (seq_buf_has_overflowed(s))
+		return 0;
+
+	return (s->size - 1) - s->len;
+}
+
+extern __printf(2, 3)
+int seq_buf_printf(struct seq_buf *s, const char *fmt, ...);
+extern __printf(2, 0)
+int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args);
+extern int
+seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary);
+extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s);
+extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf,
+			   int cnt);
+extern int seq_buf_puts(struct seq_buf *s, const char *str);
+extern int seq_buf_putc(struct seq_buf *s, unsigned char c);
+extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len);
+extern int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
+			      unsigned int len);
+extern int seq_buf_path(struct seq_buf *s, const struct path *path);
+
+extern int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp,
+			   int nmaskbits);
+
+#endif /* _LINUX_SEQ_BUF_H */
diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index db8a73224f1a..85d37106be3d 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -1,7 +1,7 @@
 #ifndef _LINUX_TRACE_SEQ_H
 #define _LINUX_TRACE_SEQ_H
 
-#include <linux/fs.h>
+#include <linux/seq_buf.h>
 
 #include <asm/page.h>
 
@@ -12,16 +12,14 @@
 
 struct trace_seq {
 	unsigned char		buffer[PAGE_SIZE];
-	unsigned int		len;
-	unsigned int		readpos;
+	struct seq_buf		seq;
 	int			full;
 };
 
 static inline void
 trace_seq_init(struct trace_seq *s)
 {
-	s->len = 0;
-	s->readpos = 0;
+	seq_buf_init(&s->seq, s->buffer, PAGE_SIZE);
 	s->full = 0;
 }
 
@@ -37,7 +35,7 @@ trace_seq_init(struct trace_seq *s)
 static inline unsigned char *
 trace_seq_buffer_ptr(struct trace_seq *s)
 {
-	return s->buffer + s->len;
+	return s->buffer + s->seq.len;
 }
 
 /**
@@ -49,7 +47,7 @@ trace_seq_buffer_ptr(struct trace_seq *s)
  */
 static inline bool trace_seq_has_overflowed(struct trace_seq *s)
 {
-	return s->full || s->len > PAGE_SIZE - 1;
+	return s->full || seq_buf_has_overflowed(&s->seq);
 }
 
 /*
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index 67d6369ddf83..edc98c72a634 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -29,6 +29,7 @@ obj-$(CONFIG_RING_BUFFER_BENCHMARK) += ring_buffer_benchmark.o
 obj-$(CONFIG_TRACING) += trace.o
 obj-$(CONFIG_TRACING) += trace_output.o
 obj-$(CONFIG_TRACING) += trace_seq.o
+obj-$(CONFIG_TRACING) += seq_buf.o
 obj-$(CONFIG_TRACING) += trace_stat.o
 obj-$(CONFIG_TRACING) += trace_printk.o
 obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
diff --git a/kernel/trace/seq_buf.c b/kernel/trace/seq_buf.c
new file mode 100644
index 000000000000..e9a7861595d2
--- /dev/null
+++ b/kernel/trace/seq_buf.c
@@ -0,0 +1,341 @@
+/*
+ * seq_buf.c
+ *
+ * Copyright (C) 2014 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
+ *
+ * The seq_buf is a handy tool that allows you to pass a descriptor around
+ * to a buffer that other functions can write to. It is similar to the
+ * seq_file functionality but has some differences.
+ *
+ * To use it, the seq_buf must be initialized with seq_buf_init().
+ * This will set up the counters within the descriptor. You can call
+ * seq_buf_init() more than once to reset the seq_buf to start
+ * from scratch.
+ */
+#include <linux/uaccess.h>
+#include <linux/seq_file.h>
+#include <linux/seq_buf.h>
+
+/* How much buffer is written? */
+#define SEQ_BUF_USED(s) min((s)->len, (s)->size - 1)
+
+/**
+ * seq_buf_print_seq - move the contents of seq_buf into a seq_file
+ * @m: the seq_file descriptor that is the destination
+ * @s: the seq_buf descriptor that is the source.
+ *
+ * Returns zero on success, non zero otherwise
+ */
+int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s)
+{
+	unsigned int len = SEQ_BUF_USED(s);
+
+	return seq_write(m, s->buffer, len);
+}
+
+/**
+ * seq_buf_vprintf - sequence printing of information.
+ * @s: seq_buf descriptor
+ * @fmt: printf format string
+ * @args: va_list of arguments from a printf() type function
+ *
+ * Writes a vnprintf() format into the sequencce buffer.
+ *
+ * Returns zero on success, -1 on overflow.
+ */
+int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args)
+{
+	int len;
+
+	WARN_ON(s->size == 0);
+
+	if (s->len < s->size) {
+		len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args);
+		if (s->len + len < s->size) {
+			s->len += len;
+			return 0;
+		}
+	}
+	seq_buf_set_overflow(s);
+	return -1;
+}
+
+/**
+ * seq_buf_printf - sequence printing of information
+ * @s: seq_buf descriptor
+ * @fmt: printf format string
+ *
+ * Writes a printf() format into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow.
+ */
+int seq_buf_printf(struct seq_buf *s, const char *fmt, ...)
+{
+	va_list ap;
+	int ret;
+
+	va_start(ap, fmt);
+	ret = seq_buf_vprintf(s, fmt, ap);
+	va_end(ap);
+
+	return ret;
+}
+
+/**
+ * seq_buf_bitmask - write a bitmask array in its ASCII representation
+ * @s:		seq_buf descriptor
+ * @maskp:	points to an array of unsigned longs that represent a bitmask
+ * @nmaskbits:	The number of bits that are valid in @maskp
+ *
+ * Writes a ASCII representation of a bitmask string into @s.
+ *
+ * Returns zero on success, -1 on overflow.
+ */
+int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp,
+		    int nmaskbits)
+{
+	unsigned int len = seq_buf_buffer_left(s);
+	int ret;
+
+	WARN_ON(s->size == 0);
+
+	/*
+	 * The last byte of the buffer is used to determine if we
+	 * overflowed or not.
+	 */
+	if (len > 1) {
+		ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits);
+		if (ret < len) {
+			s->len += ret;
+			return 0;
+		}
+	}
+	seq_buf_set_overflow(s);
+	return -1;
+}
+
+/**
+ * seq_buf_bprintf - Write the printf string from binary arguments
+ * @s: seq_buf descriptor
+ * @fmt: The format string for the @binary arguments
+ * @binary: The binary arguments for @fmt.
+ *
+ * When recording in a fast path, a printf may be recorded with just
+ * saving the format and the arguments as they were passed to the
+ * function, instead of wasting cycles converting the arguments into
+ * ASCII characters. Instead, the arguments are saved in a 32 bit
+ * word array that is defined by the format string constraints.
+ *
+ * This function will take the format and the binary array and finish
+ * the conversion into the ASCII string within the buffer.
+ *
+ * Returns zero on success, -1 on overflow.
+ */
+int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
+{
+	unsigned int len = seq_buf_buffer_left(s);
+	int ret;
+
+	WARN_ON(s->size == 0);
+
+	if (s->len < s->size) {
+		ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
+		if (s->len + ret < s->size) {
+			s->len += ret;
+			return 0;
+		}
+	}
+	seq_buf_set_overflow(s);
+	return -1;
+}
+
+/**
+ * seq_buf_puts - sequence printing of simple string
+ * @s: seq_buf descriptor
+ * @str: simple string to record
+ *
+ * Copy a simple string into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_puts(struct seq_buf *s, const char *str)
+{
+	unsigned int len = strlen(str);
+
+	WARN_ON(s->size == 0);
+
+	if (s->len + len < s->size) {
+		memcpy(s->buffer + s->len, str, len);
+		s->len += len;
+		return 0;
+	}
+	seq_buf_set_overflow(s);
+	return -1;
+}
+
+/**
+ * seq_buf_putc - sequence printing of simple character
+ * @s: seq_buf descriptor
+ * @c: simple character to record
+ *
+ * Copy a single character into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_putc(struct seq_buf *s, unsigned char c)
+{
+	WARN_ON(s->size == 0);
+
+	if (s->len + 1 < s->size) {
+		s->buffer[s->len++] = c;
+		return 0;
+	}
+	seq_buf_set_overflow(s);
+	return -1;
+}
+
+/**
+ * seq_buf_putmem - write raw data into the sequenc buffer
+ * @s: seq_buf descriptor
+ * @mem: The raw memory to copy into the buffer
+ * @len: The length of the raw memory to copy (in bytes)
+ *
+ * There may be cases where raw memory needs to be written into the
+ * buffer and a strcpy() would not work. Using this function allows
+ * for such cases.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len)
+{
+	WARN_ON(s->size == 0);
+
+	if (s->len + len < s->size) {
+		memcpy(s->buffer + s->len, mem, len);
+		s->len += len;
+		return 0;
+	}
+	seq_buf_set_overflow(s);
+	return -1;
+}
+
+#define MAX_MEMHEX_BYTES	8U
+#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
+
+/**
+ * seq_buf_putmem_hex - write raw memory into the buffer in ASCII hex
+ * @s: seq_buf descriptor
+ * @mem: The raw memory to write its hex ASCII representation of
+ * @len: The length of the raw memory to copy (in bytes)
+ *
+ * This is similar to seq_buf_putmem() except instead of just copying the
+ * raw memory into the buffer it writes its ASCII representation of it
+ * in hex characters.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
+		       unsigned int len)
+{
+	unsigned char hex[HEX_CHARS];
+	const unsigned char *data = mem;
+	unsigned int start_len;
+	int i, j;
+
+	WARN_ON(s->size == 0);
+
+	while (len) {
+		start_len = min(len, HEX_CHARS - 1);
+#ifdef __BIG_ENDIAN
+		for (i = 0, j = 0; i < start_len; i++) {
+#else
+		for (i = start_len-1, j = 0; i >= 0; i--) {
+#endif
+			hex[j++] = hex_asc_hi(data[i]);
+			hex[j++] = hex_asc_lo(data[i]);
+		}
+		if (WARN_ON_ONCE(j == 0 || j/2 > len))
+			break;
+
+		/* j increments twice per loop */
+		len -= j / 2;
+		hex[j++] = ' ';
+
+		seq_buf_putmem(s, hex, j);
+		if (seq_buf_has_overflowed(s))
+			return -1;
+	}
+	return 0;
+}
+
+/**
+ * seq_buf_path - copy a path into the sequence buffer
+ * @s: seq_buf descriptor
+ * @path: path to write into the sequence buffer.
+ *
+ * Write a path name into the sequence buffer.
+ *
+ * Returns zero on success, -1 on overflow
+ */
+int seq_buf_path(struct seq_buf *s, const struct path *path)
+{
+	unsigned int len = seq_buf_buffer_left(s);
+	unsigned char *p;
+
+	WARN_ON(s->size == 0);
+
+	p = d_path(path, s->buffer + s->len, len);
+	if (!IS_ERR(p)) {
+		p = mangle_path(s->buffer + s->len, p, "\n");
+		if (p) {
+			s->len = p - s->buffer;
+			return 0;
+		}
+	}
+	seq_buf_set_overflow(s);
+	return -1;
+}
+
+/**
+ * seq_buf_to_user - copy the squence buffer to user space
+ * @s: seq_buf descriptor
+ * @ubuf: The userspace memory location to copy to
+ * @cnt: The amount to copy
+ *
+ * Copies the sequence buffer into the userspace memory pointed to
+ * by @ubuf. It starts from the last read position (@s->readpos)
+ * and writes up to @cnt characters or till it reaches the end of
+ * the content in the buffer (@s->len), which ever comes first.
+ *
+ * On success, it returns a positive number of the number of bytes
+ * it copied.
+ *
+ * On failure it returns -EBUSY if all of the content in the
+ * sequence has been already read, which includes nothing in the
+ * sequence (@s->len == @s->readpos).
+ *
+ * Returns -EFAULT if the copy to userspace fails.
+ */
+int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, int cnt)
+{
+	int len;
+	int ret;
+
+	if (!cnt)
+		return 0;
+
+	if (s->len <= s->readpos)
+		return -EBUSY;
+
+	len = s->len - s->readpos;
+	if (cnt > len)
+		cnt = len;
+	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
+	if (ret == cnt)
+		return -EFAULT;
+
+	cnt -= ret;
+
+	s->readpos += cnt;
+	return cnt;
+}
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 3ce3c4ccfc94..7d7a07e9b9e9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -939,19 +939,20 @@ out:
 	return ret;
 }
 
+/* TODO add a seq_buf_to_buffer() */
 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 {
 	int len;
 
-	if (s->len <= s->readpos)
+	if (s->seq.len <= s->seq.readpos)
 		return -EBUSY;
 
-	len = s->len - s->readpos;
+	len = s->seq.len - s->seq.readpos;
 	if (cnt > len)
 		cnt = len;
-	memcpy(buf, s->buffer + s->readpos, cnt);
+	memcpy(buf, s->buffer + s->seq.readpos, cnt);
 
-	s->readpos += cnt;
+	s->seq.readpos += cnt;
 	return cnt;
 }
 
@@ -4315,6 +4316,8 @@ static int tracing_open_pipe(struct inode *inode, struct file *filp)
 		goto out;
 	}
 
+	trace_seq_init(&iter->seq);
+
 	/*
 	 * We make a copy of the current tracer to avoid concurrent
 	 * changes on it while we are reading.
@@ -4511,18 +4514,18 @@ waitagain:
 	trace_access_lock(iter->cpu_file);
 	while (trace_find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
-		int len = iter->seq.len;
+		int len = iter->seq.seq.len;
 
 		ret = print_trace_line(iter);
 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
 			/* don't print partial lines */
-			iter->seq.len = len;
+			iter->seq.seq.len = len;
 			break;
 		}
 		if (ret != TRACE_TYPE_NO_CONSUME)
 			trace_consume(iter);
 
-		if (iter->seq.len >= cnt)
+		if (iter->seq.seq.len >= cnt)
 			break;
 
 		/*
@@ -4538,7 +4541,7 @@ waitagain:
 
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
-	if (iter->seq.readpos >= iter->seq.len)
+	if (iter->seq.seq.readpos >= iter->seq.seq.len)
 		trace_seq_init(&iter->seq);
 
 	/*
@@ -4576,16 +4579,16 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
 
 	/* Seq buffer is page-sized, exactly what we need. */
 	for (;;) {
-		count = iter->seq.len;
+		count = iter->seq.seq.len;
 		ret = print_trace_line(iter);
-		count = iter->seq.len - count;
+		count = iter->seq.seq.len - count;
 		if (rem < count) {
 			rem = 0;
-			iter->seq.len -= count;
+			iter->seq.seq.len -= count;
 			break;
 		}
 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
-			iter->seq.len -= count;
+			iter->seq.seq.len -= count;
 			break;
 		}
 
@@ -4666,13 +4669,13 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 		/* Copy the data into the page, so we can start over. */
 		ret = trace_seq_to_buffer(&iter->seq,
 					  page_address(spd.pages[i]),
-					  iter->seq.len);
+					  iter->seq.seq.len);
 		if (ret < 0) {
 			__free_page(spd.pages[i]);
 			break;
 		}
 		spd.partial[i].offset = 0;
-		spd.partial[i].len = iter->seq.len;
+		spd.partial[i].len = iter->seq.seq.len;
 
 		trace_seq_init(&iter->seq);
 	}
@@ -5673,7 +5676,7 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
 	trace_seq_printf(s, "read events: %ld\n", cnt);
 
-	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->len);
+	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->seq.len);
 
 	kfree(s);
 
@@ -6636,11 +6639,11 @@ void
 trace_printk_seq(struct trace_seq *s)
 {
 	/* Probably should print a warning here. */
-	if (s->len >= TRACE_MAX_PRINT)
-		s->len = TRACE_MAX_PRINT;
+	if (s->seq.len >= TRACE_MAX_PRINT)
+		s->seq.len = TRACE_MAX_PRINT;
 
 	/* should be zero ended, but we are paranoid. */
-	s->buffer[s->len] = 0;
+	s->buffer[s->seq.len] = 0;
 
 	printk(KERN_TRACE "%s", s->buffer);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f9d0cbe014b7..4d0067dd7f88 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1044,7 +1044,7 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 	mutex_unlock(&event_mutex);
 
 	if (file)
-		r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+		r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->seq.len);
 
 	kfree(s);
 
@@ -1210,7 +1210,7 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 	trace_seq_init(s);
 
 	print_subsystem_event_filter(system, s);
-	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->seq.len);
 
 	kfree(s);
 
@@ -1265,7 +1265,7 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 	trace_seq_init(s);
 
 	func(s);
-	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
+	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->seq.len);
 
 	kfree(s);
 
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 100288d10e1f..6d1342ae7a44 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1154,9 +1154,9 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 	}
 
 	/* Strip ending newline */
-	if (s->buffer[s->len - 1] == '\n') {
-		s->buffer[s->len - 1] = '\0';
-		s->len--;
+	if (s->buffer[s->seq.len - 1] == '\n') {
+		s->buffer[s->seq.len - 1] = '\0';
+		s->seq.len--;
 	}
 
 	trace_seq_puts(s, " */\n");
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index fabfa0f190a3..8c0c54fe674b 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -27,10 +27,19 @@
 #include <linux/trace_seq.h>
 
 /* How much buffer is left on the trace_seq? */
-#define TRACE_SEQ_BUF_LEFT(s) ((PAGE_SIZE - 1) - (s)->len)
+#define TRACE_SEQ_BUF_LEFT(s) seq_buf_buffer_left(&(s)->seq)
 
 /* How much buffer is written? */
-#define TRACE_SEQ_BUF_USED(s) min((s)->len, (unsigned int)(PAGE_SIZE - 1))
+#define TRACE_SEQ_BUF_USED(s) min((s)->seq.len, (unsigned int)(PAGE_SIZE - 1))
+
+/*
+ * trace_seq should work with being initialized with 0s.
+ */
+static inline void __trace_seq_init(struct trace_seq *s)
+{
+	if (unlikely(!s->seq.size))
+		trace_seq_init(s);
+}
 
 /**
  * trace_print_seq - move the contents of trace_seq into a seq_file
@@ -43,10 +52,11 @@
  */
 int trace_print_seq(struct seq_file *m, struct trace_seq *s)
 {
-	unsigned int len = TRACE_SEQ_BUF_USED(s);
 	int ret;
 
-	ret = seq_write(m, s->buffer, len);
+	__trace_seq_init(s);
+
+	ret = seq_buf_print_seq(m, &s->seq);
 
 	/*
 	 * Only reset this buffer if we successfully wrote to the
@@ -72,24 +82,23 @@ int trace_print_seq(struct seq_file *m, struct trace_seq *s)
  */
 void trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
 {
-	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
+	unsigned int save_len = s->seq.len;
 	va_list ap;
-	int ret;
 
-	if (s->full || !len)
+	if (s->full)
 		return;
 
+	__trace_seq_init(s);
+
 	va_start(ap, fmt);
-	ret = vsnprintf(s->buffer + s->len, len, fmt, ap);
+	seq_buf_vprintf(&s->seq, fmt, ap);
 	va_end(ap);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len) {
+	if (unlikely(seq_buf_has_overflowed(&s->seq))) {
+		s->seq.len = save_len;
 		s->full = 1;
-		return;
 	}
-
-	s->len += ret;
 }
 EXPORT_SYMBOL_GPL(trace_seq_printf);
 
@@ -104,14 +113,19 @@ EXPORT_SYMBOL_GPL(trace_seq_printf);
 void trace_seq_bitmask(struct trace_seq *s, const unsigned long *maskp,
 		      int nmaskbits)
 {
-	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
-	int ret;
+	unsigned int save_len = s->seq.len;
 
-	if (s->full || !len)
+	if (s->full)
 		return;
 
-	ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits);
-	s->len += ret;
+	__trace_seq_init(s);
+
+	seq_buf_bitmask(&s->seq, maskp, nmaskbits);
+
+	if (unlikely(seq_buf_has_overflowed(&s->seq))) {
+		s->seq.len = save_len;
+		s->full = 1;
+	}
 }
 EXPORT_SYMBOL_GPL(trace_seq_bitmask);
 
@@ -128,21 +142,20 @@ EXPORT_SYMBOL_GPL(trace_seq_bitmask);
  */
 void trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args)
 {
-	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
-	int ret;
+	unsigned int save_len = s->seq.len;
 
-	if (s->full || !len)
+	if (s->full)
 		return;
 
-	ret = vsnprintf(s->buffer + s->len, len, fmt, args);
+	__trace_seq_init(s);
+
+	seq_buf_vprintf(&s->seq, fmt, args);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len) {
+	if (unlikely(seq_buf_has_overflowed(&s->seq))) {
+		s->seq.len = save_len;
 		s->full = 1;
-		return;
 	}
-
-	s->len += ret;
 }
 EXPORT_SYMBOL_GPL(trace_seq_vprintf);
 
@@ -163,21 +176,21 @@ EXPORT_SYMBOL_GPL(trace_seq_vprintf);
  */
 void trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
 {
-	unsigned int len = TRACE_SEQ_BUF_LEFT(s);
-	int ret;
+	unsigned int save_len = s->seq.len;
 
-	if (s->full || !len)
+	if (s->full)
 		return;
 
-	ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
+	__trace_seq_init(s);
+
+	seq_buf_bprintf(&s->seq, fmt, binary);
 
 	/* If we can't write it all, don't bother writing anything */
-	if (ret >= len) {
+	if (unlikely(seq_buf_has_overflowed(&s->seq))) {
+		s->seq.len = save_len;
 		s->full = 1;
 		return;
 	}
-
-	s->len += ret;
 }
 EXPORT_SYMBOL_GPL(trace_seq_bprintf);
 
@@ -198,13 +211,14 @@ void trace_seq_puts(struct trace_seq *s, const char *str)
 	if (s->full)
 		return;
 
+	__trace_seq_init(s);
+
 	if (len > TRACE_SEQ_BUF_LEFT(s)) {
 		s->full = 1;
 		return;
 	}
 
-	memcpy(s->buffer + s->len, str, len);
-	s->len += len;
+	seq_buf_putmem(&s->seq, str, len);
 }
 EXPORT_SYMBOL_GPL(trace_seq_puts);
 
@@ -223,12 +237,14 @@ void trace_seq_putc(struct trace_seq *s, unsigned char c)
 	if (s->full)
 		return;
 
+	__trace_seq_init(s);
+
 	if (TRACE_SEQ_BUF_LEFT(s) < 1) {
 		s->full = 1;
 		return;
 	}
 
-	s->buffer[s->len++] = c;
+	seq_buf_putc(&s->seq, c);
 }
 EXPORT_SYMBOL_GPL(trace_seq_putc);
 
@@ -247,19 +263,17 @@ void trace_seq_putmem(struct trace_seq *s, const void *mem, unsigned int len)
 	if (s->full)
 		return;
 
+	__trace_seq_init(s);
+
 	if (len > TRACE_SEQ_BUF_LEFT(s)) {
 		s->full = 1;
 		return;
 	}
 
-	memcpy(s->buffer + s->len, mem, len);
-	s->len += len;
+	seq_buf_putmem(&s->seq, mem, len);
 }
 EXPORT_SYMBOL_GPL(trace_seq_putmem);
 
-#define MAX_MEMHEX_BYTES	8U
-#define HEX_CHARS		(MAX_MEMHEX_BYTES*2 + 1)
-
 /**
  * trace_seq_putmem_hex - write raw memory into the buffer in ASCII hex
  * @s: trace sequence descriptor
@@ -273,32 +287,26 @@ EXPORT_SYMBOL_GPL(trace_seq_putmem);
 void trace_seq_putmem_hex(struct trace_seq *s, const void *mem,
 			 unsigned int len)
 {
-	unsigned char hex[HEX_CHARS];
-	const unsigned char *data = mem;
-	unsigned int start_len;
-	int i, j;
+	unsigned int save_len = s->seq.len;
 
 	if (s->full)
 		return;
 
-	while (len) {
-		start_len = min(len, HEX_CHARS - 1);
-#ifdef __BIG_ENDIAN
-		for (i = 0, j = 0; i < start_len; i++) {
-#else
-		for (i = start_len-1, j = 0; i >= 0; i--) {
-#endif
-			hex[j++] = hex_asc_hi(data[i]);
-			hex[j++] = hex_asc_lo(data[i]);
-		}
-		if (WARN_ON_ONCE(j == 0 || j/2 > len))
-			break;
-
-		/* j increments twice per loop */
-		len -= j / 2;
-		hex[j++] = ' ';
-
-		trace_seq_putmem(s, hex, j);
+	__trace_seq_init(s);
+
+	/* Each byte is represented by two chars */
+	if (len * 2 > TRACE_SEQ_BUF_LEFT(s)) {
+		s->full = 1;
+		return;
+	}
+
+	/* The added spaces can still cause an overflow */
+	seq_buf_putmem_hex(&s->seq, mem, len);
+
+	if (unlikely(seq_buf_has_overflowed(&s->seq))) {
+		s->seq.len = save_len;
+		s->full = 1;
+		return;
 	}
 }
 EXPORT_SYMBOL_GPL(trace_seq_putmem_hex);
@@ -317,30 +325,28 @@ EXPORT_SYMBOL_GPL(trace_seq_putmem_hex);
  */
 int trace_seq_path(struct trace_seq *s, const struct path *path)
 {
-	unsigned char *p;
+	unsigned int save_len = s->seq.len;
+	int ret;
 
 	if (s->full)
 		return 0;
 
+	__trace_seq_init(s);
+
 	if (TRACE_SEQ_BUF_LEFT(s) < 1) {
 		s->full = 1;
 		return 0;
 	}
 
-	p = d_path(path, s->buffer + s->len, PAGE_SIZE - s->len);
-	if (!IS_ERR(p)) {
-		p = mangle_path(s->buffer + s->len, p, "\n");
-		if (p) {
-			s->len = p - s->buffer;
-			return 1;
-		}
-	} else {
-		s->buffer[s->len++] = '?';
-		return 1;
+	ret = seq_buf_path(&s->seq, path);
+
+	if (unlikely(seq_buf_has_overflowed(&s->seq))) {
+		s->seq.len = save_len;
+		s->full = 1;
+		return 0;
 	}
 
-	s->full = 1;
-	return 0;
+	return ret;
 }
 EXPORT_SYMBOL_GPL(trace_seq_path);
 
@@ -366,25 +372,7 @@ EXPORT_SYMBOL_GPL(trace_seq_path);
  */
 int trace_seq_to_user(struct trace_seq *s, char __user *ubuf, int cnt)
 {
-	int len;
-	int ret;
-
-	if (!cnt)
-		return 0;
-
-	if (s->len <= s->readpos)
-		return -EBUSY;
-
-	len = s->len - s->readpos;
-	if (cnt > len)
-		cnt = len;
-	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
-	if (ret == cnt)
-		return -EFAULT;
-
-	cnt -= ret;
-
-	s->readpos += cnt;
-	return cnt;
+	__trace_seq_init(s);
+	return seq_buf_to_user(&s->seq, ubuf, cnt);
 }
 EXPORT_SYMBOL_GPL(trace_seq_to_user);
-- 
cgit v1.2.3


From dd23180aacf4b27d48f40b27249f1e58c8df03be Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 29 Oct 2014 13:48:37 -0400
Subject: tracing: Convert seq_buf_path() to be like seq_path()

Rewrite seq_buf_path() like it is done in seq_path() and allow
it to accept any escape character instead of just "\n".

Making seq_buf_path() like seq_path() will help prevent problems
when converting seq_file to use the seq_buf logic.

Link: http://lkml.kernel.org/r/20141104160222.048795666@goodmis.org
Link: http://lkml.kernel.org/r/20141114011412.338523371@goodmis.org

Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/seq_buf.h  |  2 +-
 kernel/trace/seq_buf.c   | 28 ++++++++++++++++------------
 kernel/trace/trace_seq.c |  5 ++---
 3 files changed, 19 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index 4f7a96a9d71a..38770688a627 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -73,7 +73,7 @@ extern int seq_buf_putc(struct seq_buf *s, unsigned char c);
 extern int seq_buf_putmem(struct seq_buf *s, const void *mem, unsigned int len);
 extern int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
 			      unsigned int len);
-extern int seq_buf_path(struct seq_buf *s, const struct path *path);
+extern int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc);
 
 extern int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp,
 			   int nmaskbits);
diff --git a/kernel/trace/seq_buf.c b/kernel/trace/seq_buf.c
index e9a7861595d2..7dac34d1235b 100644
--- a/kernel/trace/seq_buf.c
+++ b/kernel/trace/seq_buf.c
@@ -272,28 +272,32 @@ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
  * seq_buf_path - copy a path into the sequence buffer
  * @s: seq_buf descriptor
  * @path: path to write into the sequence buffer.
+ * @esc: set of characters to escape in the output
  *
  * Write a path name into the sequence buffer.
  *
- * Returns zero on success, -1 on overflow
+ * Returns the number of written bytes on success, -1 on overflow
  */
-int seq_buf_path(struct seq_buf *s, const struct path *path)
+int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
 {
-	unsigned int len = seq_buf_buffer_left(s);
-	unsigned char *p;
+	char *buf = s->buffer + s->len;
+	size_t size = seq_buf_buffer_left(s);
+	int res = -1;
 
 	WARN_ON(s->size == 0);
 
-	p = d_path(path, s->buffer + s->len, len);
-	if (!IS_ERR(p)) {
-		p = mangle_path(s->buffer + s->len, p, "\n");
-		if (p) {
-			s->len = p - s->buffer;
-			return 0;
+	if (size) {
+		char *p = d_path(path, buf, size);
+		if (!IS_ERR(p)) {
+			char *end = mangle_path(buf, p, esc);
+			if (end)
+				res = end - buf;
 		}
 	}
-	seq_buf_set_overflow(s);
-	return -1;
+	if (res > 0)
+		s->len += res;
+
+	return res;
 }
 
 /**
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 8c0c54fe674b..74cacc930c24 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -326,7 +326,6 @@ EXPORT_SYMBOL_GPL(trace_seq_putmem_hex);
 int trace_seq_path(struct trace_seq *s, const struct path *path)
 {
 	unsigned int save_len = s->seq.len;
-	int ret;
 
 	if (s->full)
 		return 0;
@@ -338,7 +337,7 @@ int trace_seq_path(struct trace_seq *s, const struct path *path)
 		return 0;
 	}
 
-	ret = seq_buf_path(&s->seq, path);
+	seq_buf_path(&s->seq, path, "\n");
 
 	if (unlikely(seq_buf_has_overflowed(&s->seq))) {
 		s->seq.len = save_len;
@@ -346,7 +345,7 @@ int trace_seq_path(struct trace_seq *s, const struct path *path)
 		return 0;
 	}
 
-	return ret;
+	return 1;
 }
 EXPORT_SYMBOL_GPL(trace_seq_path);
 
-- 
cgit v1.2.3


From 9a7777935c34b9192e28ef3d232a4b6b5414a657 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 29 Oct 2014 13:59:58 -0400
Subject: tracing: Convert seq_buf fields to be like seq_file fields

In facilitating the conversion of seq_file to use seq_buf,
have the seq_buf fields match the types used by seq_file.

Link: http://lkml.kernel.org/r/20141104160222.195301024@goodmis.org

Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/seq_buf.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index 38770688a627..d14dc9023dde 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -16,10 +16,10 @@
  * @readpos:	The next position to read in the buffer.
  */
 struct seq_buf {
-	unsigned char		*buffer;
-	unsigned int		size;
-	unsigned int		len;
-	unsigned int		readpos;
+	char			*buffer;
+	size_t			size;
+	size_t			len;
+	loff_t			readpos;
 };
 
 static inline void
-- 
cgit v1.2.3


From 0736c033a81547b1cdc5120fc8dd60e26a00fd28 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 29 Oct 2014 14:17:52 -0400
Subject: tracing: Add a seq_buf_clear() helper and clear len and readpos in
 init

Add a helper function seq_buf_clear() that resets the len and readpos
fields of a seq_buf. Currently it is only used in the seq_buf_init()
but will be used later when updating the seq_file code.

Link: http://lkml.kernel.org/r/20141104160222.352309995@goodmis.org

Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/seq_buf.h | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index d14dc9023dde..5d91262433e2 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -22,13 +22,18 @@ struct seq_buf {
 	loff_t			readpos;
 };
 
+static inline void seq_buf_clear(struct seq_buf *s)
+{
+	s->len = 0;
+	s->readpos = 0;
+}
+
 static inline void
 seq_buf_init(struct seq_buf *s, unsigned char *buf, unsigned int size)
 {
 	s->buffer = buf;
 	s->size = size;
-	s->len = 0;
-	s->readpos = 0;
+	seq_buf_clear(s);
 }
 
 /*
-- 
cgit v1.2.3


From eeab98154dc0b49afd398afdd71c464a8af5911f Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 6 Nov 2014 16:38:28 -0500
Subject: seq_buf: Create seq_buf_used() to find out how much was written

Add a helper function seq_buf_used() that replaces the SEQ_BUF_USED()
private macro to let callers have a method to know how much of the
seq_buf was written to.

Link: http://lkml.kernel.org/r/20141114011412.170377300@goodmis.org
Link: http://lkml.kernel.org/r/20141114011413.321654244@goodmis.org

Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/seq_buf.h | 6 ++++++
 kernel/trace/seq_buf.c  | 5 +----
 2 files changed, 7 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index 5d91262433e2..93718e570d4c 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -64,6 +64,12 @@ seq_buf_buffer_left(struct seq_buf *s)
 	return (s->size - 1) - s->len;
 }
 
+/* How much buffer was written? */
+static inline unsigned int seq_buf_used(struct seq_buf *s)
+{
+	return min(s->len, s->size);
+}
+
 extern __printf(2, 3)
 int seq_buf_printf(struct seq_buf *s, const char *fmt, ...);
 extern __printf(2, 0)
diff --git a/kernel/trace/seq_buf.c b/kernel/trace/seq_buf.c
index 7dac34d1235b..9ec5305d9da7 100644
--- a/kernel/trace/seq_buf.c
+++ b/kernel/trace/seq_buf.c
@@ -16,9 +16,6 @@
 #include <linux/seq_file.h>
 #include <linux/seq_buf.h>
 
-/* How much buffer is written? */
-#define SEQ_BUF_USED(s) min((s)->len, (s)->size - 1)
-
 /**
  * seq_buf_print_seq - move the contents of seq_buf into a seq_file
  * @m: the seq_file descriptor that is the destination
@@ -28,7 +25,7 @@
  */
 int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s)
 {
-	unsigned int len = SEQ_BUF_USED(s);
+	unsigned int len = seq_buf_used(s);
 
 	return seq_write(m, s->buffer, len);
 }
-- 
cgit v1.2.3


From 5ac48378414dccca735897c4d7f4e19987c8977c Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 14 Nov 2014 15:49:41 -0500
Subject: tracing: Use trace_seq_used() and seq_buf_used() instead of len

As the seq_buf->len will soon be +1 size when there's an overflow, we
must use trace_seq_used() or seq_buf_used() methods to get the real
length. This will prevent buffer overflow issues if just the len
of the seq_buf descriptor is used to copy memory.

Link: http://lkml.kernel.org/r/20141114121911.09ba3d38@gandalf.local.home

Reported-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/trace_seq.h            | 20 +++++++++++++++++++-
 kernel/trace/seq_buf.c               |  2 +-
 kernel/trace/trace.c                 | 21 +++++++++++----------
 kernel/trace/trace_events.c          |  9 ++++++---
 kernel/trace/trace_functions_graph.c |  5 ++++-
 kernel/trace/trace_seq.c             |  2 +-
 6 files changed, 42 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/trace_seq.h b/include/linux/trace_seq.h
index 85d37106be3d..cfaf5a1d4bad 100644
--- a/include/linux/trace_seq.h
+++ b/include/linux/trace_seq.h
@@ -23,6 +23,24 @@ trace_seq_init(struct trace_seq *s)
 	s->full = 0;
 }
 
+/**
+ * trace_seq_used - amount of actual data written to buffer
+ * @s: trace sequence descriptor
+ *
+ * Returns the amount of data written to the buffer.
+ *
+ * IMPORTANT!
+ *
+ * Use this instead of @s->seq.len if you need to pass the amount
+ * of data from the buffer to another buffer (userspace, or what not).
+ * The @s->seq.len on overflow is bigger than the buffer size and
+ * using it can cause access to undefined memory.
+ */
+static inline int trace_seq_used(struct trace_seq *s)
+{
+	return seq_buf_used(&s->seq);
+}
+
 /**
  * trace_seq_buffer_ptr - return pointer to next location in buffer
  * @s: trace sequence descriptor
@@ -35,7 +53,7 @@ trace_seq_init(struct trace_seq *s)
 static inline unsigned char *
 trace_seq_buffer_ptr(struct trace_seq *s)
 {
-	return s->buffer + s->seq.len;
+	return s->buffer + seq_buf_used(&s->seq);
 }
 
 /**
diff --git a/kernel/trace/seq_buf.c b/kernel/trace/seq_buf.c
index 9ec5305d9da7..ce17f65268ed 100644
--- a/kernel/trace/seq_buf.c
+++ b/kernel/trace/seq_buf.c
@@ -328,7 +328,7 @@ int seq_buf_to_user(struct seq_buf *s, char __user *ubuf, int cnt)
 	if (s->len <= s->readpos)
 		return -EBUSY;
 
-	len = s->len - s->readpos;
+	len = seq_buf_used(s) - s->readpos;
 	if (cnt > len)
 		cnt = len;
 	ret = copy_to_user(ubuf, s->buffer + s->readpos, cnt);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 0aa75be843a0..9023446b2c2b 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -944,10 +944,10 @@ static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
 {
 	int len;
 
-	if (s->seq.len <= s->seq.readpos)
+	if (trace_seq_used(s) <= s->seq.readpos)
 		return -EBUSY;
 
-	len = s->seq.len - s->seq.readpos;
+	len = trace_seq_used(s) - s->seq.readpos;
 	if (cnt > len)
 		cnt = len;
 	memcpy(buf, s->buffer + s->seq.readpos, cnt);
@@ -4514,18 +4514,18 @@ waitagain:
 	trace_access_lock(iter->cpu_file);
 	while (trace_find_next_entry_inc(iter) != NULL) {
 		enum print_line_t ret;
-		int len = iter->seq.seq.len;
+		int save_len = iter->seq.seq.len;
 
 		ret = print_trace_line(iter);
 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
 			/* don't print partial lines */
-			iter->seq.seq.len = len;
+			iter->seq.seq.len = save_len;
 			break;
 		}
 		if (ret != TRACE_TYPE_NO_CONSUME)
 			trace_consume(iter);
 
-		if (iter->seq.seq.len >= cnt)
+		if (trace_seq_used(&iter->seq) >= cnt)
 			break;
 
 		/*
@@ -4541,7 +4541,7 @@ waitagain:
 
 	/* Now copy what we have to the user */
 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
-	if (iter->seq.seq.readpos >= iter->seq.seq.len)
+	if (iter->seq.seq.readpos >= trace_seq_used(&iter->seq))
 		trace_seq_init(&iter->seq);
 
 	/*
@@ -4598,7 +4598,7 @@ tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
 			break;
 		}
 
-		count = iter->seq.seq.len - save_len;
+		count = trace_seq_used(&iter->seq) - save_len;
 		if (rem < count) {
 			rem = 0;
 			iter->seq.seq.len = save_len;
@@ -4682,13 +4682,13 @@ static ssize_t tracing_splice_read_pipe(struct file *filp,
 		/* Copy the data into the page, so we can start over. */
 		ret = trace_seq_to_buffer(&iter->seq,
 					  page_address(spd.pages[i]),
-					  iter->seq.seq.len);
+					  trace_seq_used(&iter->seq));
 		if (ret < 0) {
 			__free_page(spd.pages[i]);
 			break;
 		}
 		spd.partial[i].offset = 0;
-		spd.partial[i].len = iter->seq.seq.len;
+		spd.partial[i].len = trace_seq_used(&iter->seq);
 
 		trace_seq_init(&iter->seq);
 	}
@@ -5689,7 +5689,8 @@ tracing_stats_read(struct file *filp, char __user *ubuf,
 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
 	trace_seq_printf(s, "read events: %ld\n", cnt);
 
-	count = simple_read_from_buffer(ubuf, count, ppos, s->buffer, s->seq.len);
+	count = simple_read_from_buffer(ubuf, count, ppos,
+					s->buffer, trace_seq_used(s));
 
 	kfree(s);
 
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index 4d0067dd7f88..935cbea78532 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -1044,7 +1044,8 @@ event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 	mutex_unlock(&event_mutex);
 
 	if (file)
-		r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->seq.len);
+		r = simple_read_from_buffer(ubuf, cnt, ppos,
+					    s->buffer, trace_seq_used(s));
 
 	kfree(s);
 
@@ -1210,7 +1211,8 @@ subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
 	trace_seq_init(s);
 
 	print_subsystem_event_filter(system, s);
-	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->seq.len);
+	r = simple_read_from_buffer(ubuf, cnt, ppos,
+				    s->buffer, trace_seq_used(s));
 
 	kfree(s);
 
@@ -1265,7 +1267,8 @@ show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
 	trace_seq_init(s);
 
 	func(s);
-	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->seq.len);
+	r = simple_read_from_buffer(ubuf, cnt, ppos,
+				    s->buffer, trace_seq_used(s));
 
 	kfree(s);
 
diff --git a/kernel/trace/trace_functions_graph.c b/kernel/trace/trace_functions_graph.c
index 6d1342ae7a44..ec35468349a7 100644
--- a/kernel/trace/trace_functions_graph.c
+++ b/kernel/trace/trace_functions_graph.c
@@ -1153,6 +1153,9 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 			return ret;
 	}
 
+	if (trace_seq_has_overflowed(s))
+		goto out;
+
 	/* Strip ending newline */
 	if (s->buffer[s->seq.len - 1] == '\n') {
 		s->buffer[s->seq.len - 1] = '\0';
@@ -1160,7 +1163,7 @@ print_graph_comment(struct trace_seq *s, struct trace_entry *ent,
 	}
 
 	trace_seq_puts(s, " */\n");
-
+ out:
 	return trace_handle_return(s);
 }
 
diff --git a/kernel/trace/trace_seq.c b/kernel/trace/trace_seq.c
index 74cacc930c24..f8b45d8792f9 100644
--- a/kernel/trace/trace_seq.c
+++ b/kernel/trace/trace_seq.c
@@ -30,7 +30,7 @@
 #define TRACE_SEQ_BUF_LEFT(s) seq_buf_buffer_left(&(s)->seq)
 
 /* How much buffer is written? */
-#define TRACE_SEQ_BUF_USED(s) min((s)->seq.len, (unsigned int)(PAGE_SIZE - 1))
+#define TRACE_SEQ_BUF_USED(s) seq_buf_used(&(s)->seq)
 
 /*
  * trace_seq should work with being initialized with 0s.
-- 
cgit v1.2.3


From 8cd709ae7658a7fd7f6630699e3229188c2591e4 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 29 Oct 2014 15:26:09 -0400
Subject: tracing: Have seq_buf use full buffer

Currently seq_buf is full when all but one byte of the buffer is
filled. Change it so that the seq_buf is full when all of the
buffer is filled.

Some of the functions would fill the buffer completely and report
everything was fine. This was inconsistent with the max of size - 1.
Changing this to be max of size makes all functions consistent.

Link: http://lkml.kernel.org/r/20141104160222.502133196@goodmis.org
Link: http://lkml.kernel.org/r/20141114011412.811957882@goodmis.org

Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/seq_buf.h | 6 +++---
 kernel/trace/seq_buf.c  | 9 ++++++---
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index 93718e570d4c..0800a24b4348 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -43,13 +43,13 @@ seq_buf_init(struct seq_buf *s, unsigned char *buf, unsigned int size)
 static inline bool
 seq_buf_has_overflowed(struct seq_buf *s)
 {
-	return s->len == s->size;
+	return s->len > s->size;
 }
 
 static inline void
 seq_buf_set_overflow(struct seq_buf *s)
 {
-	s->len = s->size;
+	s->len = s->size + 1;
 }
 
 /*
@@ -61,7 +61,7 @@ seq_buf_buffer_left(struct seq_buf *s)
 	if (seq_buf_has_overflowed(s))
 		return 0;
 
-	return (s->size - 1) - s->len;
+	return s->size - s->len;
 }
 
 /* How much buffer was written? */
diff --git a/kernel/trace/seq_buf.c b/kernel/trace/seq_buf.c
index 6fc9d021cbef..c53f1d5088e8 100644
--- a/kernel/trace/seq_buf.c
+++ b/kernel/trace/seq_buf.c
@@ -26,7 +26,7 @@
  */
 static bool seq_buf_can_fit(struct seq_buf *s, size_t len)
 {
-	return s->len + len < s->size;
+	return s->len + len <= s->size;
 }
 
 /**
@@ -110,8 +110,11 @@ int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp,
 	WARN_ON(s->size == 0);
 
 	/*
-	 * The last byte of the buffer is used to determine if we
-	 * overflowed or not.
+	 * Note, because bitmap_scnprintf() only returns the number of bytes
+	 * written and not the number that would be written, we use the last
+	 * byte of the buffer to let us know if we overflowed. There's a small
+	 * chance that the bitmap could have fit exactly inside the buffer, but
+	 * it's not that critical if that does happen.
 	 */
 	if (len > 1) {
 		ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits);
-- 
cgit v1.2.3


From 01cb06a4c229908d239149017049fdd1fca1dd51 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Wed, 29 Oct 2014 17:30:50 -0400
Subject: tracing: Add seq_buf_get_buf() and seq_buf_commit() helper functions

Add two helper functions; seq_buf_get_buf() and seq_buf_commit() that
are used by seq_buf_path(). This makes the code similar to the
seq_file: seq_path() function, and will help to be able to consolidate
the functions between seq_file and trace_seq.

Link: http://lkml.kernel.org/r/20141104160222.644881406@goodmis.org
Link: http://lkml.kernel.org/r/20141114011412.977571447@goodmis.org

Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/seq_buf.h | 41 +++++++++++++++++++++++++++++++++++++++++
 kernel/trace/seq_buf.c  |  7 +++----
 2 files changed, 44 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index 0800a24b4348..12c64282aa98 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -70,6 +70,47 @@ static inline unsigned int seq_buf_used(struct seq_buf *s)
 	return min(s->len, s->size);
 }
 
+/**
+ * seq_buf_get_buf - get buffer to write arbitrary data to
+ * @s: the seq_buf handle
+ * @bufp: the beginning of the buffer is stored here
+ *
+ * Return the number of bytes available in the buffer, or zero if
+ * there's no space.
+ */
+static inline size_t seq_buf_get_buf(struct seq_buf *s, char **bufp)
+{
+	WARN_ON(s->len > s->size + 1);
+
+	if (s->len < s->size) {
+		*bufp = s->buffer + s->len;
+		return s->size - s->len;
+	}
+
+	*bufp = NULL;
+	return 0;
+}
+
+/**
+ * seq_buf_commit - commit data to the buffer
+ * @s: the seq_buf handle
+ * @num: the number of bytes to commit
+ *
+ * Commit @num bytes of data written to a buffer previously acquired
+ * by seq_buf_get.  To signal an error condition, or that the data
+ * didn't fit in the available space, pass a negative @num value.
+ */
+static inline void seq_buf_commit(struct seq_buf *s, int num)
+{
+	if (num < 0) {
+		seq_buf_set_overflow(s);
+	} else {
+		/* num must be negative on overflow */
+		BUG_ON(s->len + num > s->size);
+		s->len += num;
+	}
+}
+
 extern __printf(2, 3)
 int seq_buf_printf(struct seq_buf *s, const char *fmt, ...);
 extern __printf(2, 0)
diff --git a/kernel/trace/seq_buf.c b/kernel/trace/seq_buf.c
index c53f1d5088e8..086f594ac890 100644
--- a/kernel/trace/seq_buf.c
+++ b/kernel/trace/seq_buf.c
@@ -293,8 +293,8 @@ int seq_buf_putmem_hex(struct seq_buf *s, const void *mem,
  */
 int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
 {
-	char *buf = s->buffer + s->len;
-	size_t size = seq_buf_buffer_left(s);
+	char *buf;
+	size_t size = seq_buf_get_buf(s, &buf);
 	int res = -1;
 
 	WARN_ON(s->size == 0);
@@ -307,8 +307,7 @@ int seq_buf_path(struct seq_buf *s, const struct path *path, const char *esc)
 				res = end - buf;
 		}
 	}
-	if (res > 0)
-		s->len += res;
+	seq_buf_commit(s, res);
 
 	return res;
 }
-- 
cgit v1.2.3


From 2448913ed2aa7a7424d9b9ca79861d13c746a3f1 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Mon, 3 Nov 2014 18:53:50 -0500
Subject: seq-buf: Make seq_buf_bprintf() conditional on CONFIG_BINARY_PRINTF

The function bstr_printf() from lib/vsprnintf.c is only available if
CONFIG_BINARY_PRINTF is defined. This is due to the only user currently
being the tracing infrastructure, which needs to select this config
when tracing is configured. Until there is another user of the binary
printf formats, this will continue to be the case.

Since seq_buf.c is now lives in lib/ and is compiled even without
tracing, it must encompass its use of bstr_printf() which is used
by seq_buf_printf(). This too is only used by the tracing infrastructure
and is still encapsulated by the CONFIG_BINARY_PRINTF.

Link: http://lkml.kernel.org/r/20141104160222.969013383@goodmis.org

Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/seq_buf.h | 7 +++++--
 kernel/trace/seq_buf.c  | 2 ++
 2 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/seq_buf.h b/include/linux/seq_buf.h
index 12c64282aa98..9aafe0e24c68 100644
--- a/include/linux/seq_buf.h
+++ b/include/linux/seq_buf.h
@@ -115,8 +115,6 @@ extern __printf(2, 3)
 int seq_buf_printf(struct seq_buf *s, const char *fmt, ...);
 extern __printf(2, 0)
 int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args);
-extern int
-seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary);
 extern int seq_buf_print_seq(struct seq_file *m, struct seq_buf *s);
 extern int seq_buf_to_user(struct seq_buf *s, char __user *ubuf,
 			   int cnt);
@@ -130,4 +128,9 @@ extern int seq_buf_path(struct seq_buf *s, const struct path *path, const char *
 extern int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp,
 			   int nmaskbits);
 
+#ifdef CONFIG_BINARY_PRINTF
+extern int
+seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary);
+#endif
+
 #endif /* _LINUX_SEQ_BUF_H */
diff --git a/kernel/trace/seq_buf.c b/kernel/trace/seq_buf.c
index 086f594ac890..4eedfedb9e31 100644
--- a/kernel/trace/seq_buf.c
+++ b/kernel/trace/seq_buf.c
@@ -127,6 +127,7 @@ int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp,
 	return -1;
 }
 
+#ifdef CONFIG_BINARY_PRINTF
 /**
  * seq_buf_bprintf - Write the printf string from binary arguments
  * @s: seq_buf descriptor
@@ -161,6 +162,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary)
 	seq_buf_set_overflow(s);
 	return -1;
 }
+#endif /* CONFIG_BINARY_PRINTF */
 
 /**
  * seq_buf_puts - sequence printing of simple string
-- 
cgit v1.2.3


From afdc34a3d3b823a12a93b822ee1efb566f884032 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Thu, 19 Jun 2014 17:33:31 -0400
Subject: printk: Add per_cpu printk func to allow printk to be diverted

Being able to divert printk to call another function besides the normal
logging is useful for such things like NMI handling. If some functions
are to be called from NMI that does printk() it is possible to lock up
the box if the nmi handler triggers when another printk is happening.

One example of this use is to perform a stack trace on all CPUs via NMI.
But if the NMI is to do the printk() it can cause the system to lock up.
By allowing the printk to be diverted to another function that can safely
record the printk output and then print it when it in a safe context
then NMIs will be safe to call these functions like show_regs().

Link: http://lkml.kernel.org/p/20140619213952.209176403@goodmis.org

Tested-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Jiri Kosina <jkosina@suse.cz>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Reviewed-by: Petr Mladek <pmladek@suse.cz>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/percpu.h |  3 +++
 include/linux/printk.h |  2 ++
 kernel/printk/printk.c | 38 +++++++++++++++++++++++++++++---------
 3 files changed, 34 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index a3aa63e47637..ba2e85a0ff5b 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -134,4 +134,7 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr);
 	(typeof(type) __percpu *)__alloc_percpu(sizeof(type),		\
 						__alignof__(type))
 
+/* To avoid include hell, as printk can not declare this, we declare it here */
+DECLARE_PER_CPU(printk_func_t, printk_func);
+
 #endif /* __LINUX_PERCPU_H */
diff --git a/include/linux/printk.h b/include/linux/printk.h
index d78125f73ac4..3bbd979d32fb 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -162,6 +162,8 @@ extern int kptr_restrict;
 
 extern void wake_up_klogd(void);
 
+typedef int(*printk_func_t)(const char *fmt, va_list args);
+
 void log_buf_kexec_setup(void);
 void __init setup_log_buf(int early);
 void dump_stack_set_arch_desc(const char *fmt, ...);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ced2b84b1cb7..f7b723f98cb9 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1807,6 +1807,30 @@ asmlinkage int printk_emit(int facility, int level,
 }
 EXPORT_SYMBOL(printk_emit);
 
+int vprintk_default(const char *fmt, va_list args)
+{
+	int r;
+
+#ifdef CONFIG_KGDB_KDB
+	if (unlikely(kdb_trap_printk)) {
+		r = vkdb_printf(fmt, args);
+		return r;
+	}
+#endif
+	r = vprintk_emit(0, -1, NULL, 0, fmt, args);
+
+	return r;
+}
+EXPORT_SYMBOL_GPL(vprintk_default);
+
+/*
+ * This allows printk to be diverted to another function per cpu.
+ * This is useful for calling printk functions from within NMI
+ * without worrying about race conditions that can lock up the
+ * box.
+ */
+DEFINE_PER_CPU(printk_func_t, printk_func) = vprintk_default;
+
 /**
  * printk - print a kernel message
  * @fmt: format string
@@ -1830,19 +1854,15 @@ EXPORT_SYMBOL(printk_emit);
  */
 asmlinkage __visible int printk(const char *fmt, ...)
 {
+	printk_func_t vprintk_func;
 	va_list args;
 	int r;
 
-#ifdef CONFIG_KGDB_KDB
-	if (unlikely(kdb_trap_printk)) {
-		va_start(args, fmt);
-		r = vkdb_printf(fmt, args);
-		va_end(args);
-		return r;
-	}
-#endif
 	va_start(args, fmt);
-	r = vprintk_emit(0, -1, NULL, 0, fmt, args);
+	preempt_disable();
+	vprintk_func = this_cpu_read(printk_func);
+	r = vprintk_func(fmt, args);
+	preempt_enable();
 	va_end(args);
 
 	return r;
-- 
cgit v1.2.3


From 62d956dc3e11138c5ffd4b2c62e4f909c5180a12 Mon Sep 17 00:00:00 2001
From: Boris BREZILLON <boris.brezillon@free-electrons.com>
Date: Mon, 20 Oct 2014 10:46:14 +0200
Subject: mtd: nand: provide detailed description for raw read/write page
 methods

read_page_raw and write_page_raw method description is not clear enough.
It clearly specifies that ECC correction should not be involved but does
not talk about specific layout (by layout I mean where in-band and
out-of-band data are stored on the NAND media) used by NAND/ECC
controllers.

Those specific layouts might impact MTD users and thus should be hidden (as
already done in the standard NAND_ECC_HW_SYNDROME implementation).

Clearly state this constraint in the nand_ecc_ctrl struct documentation.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 include/linux/mtd/nand.h | 17 +++++++++++++++--
 1 file changed, 15 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index e4d451e4600b..b14d190b593a 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -455,8 +455,21 @@ struct nand_hw_control {
  *		be provided if an hardware ECC is available
  * @calculate:	function for ECC calculation or readback from ECC hardware
  * @correct:	function for ECC correction, matching to ECC generator (sw/hw)
- * @read_page_raw:	function to read a raw page without ECC
- * @write_page_raw:	function to write a raw page without ECC
+ * @read_page_raw:	function to read a raw page without ECC. This function
+ *			should hide the specific layout used by the ECC
+ *			controller and always return contiguous in-band and
+ *			out-of-band data even if they're not stored
+ *			contiguously on the NAND chip (e.g.
+ *			NAND_ECC_HW_SYNDROME interleaves in-band and
+ *			out-of-band data).
+ * @write_page_raw:	function to write a raw page without ECC. This function
+ *			should hide the specific layout used by the ECC
+ *			controller and consider the passed data as contiguous
+ *			in-band and out-of-band data. ECC controller is
+ *			responsible for doing the appropriate transformations
+ *			to adapt to its specific layout (e.g.
+ *			NAND_ECC_HW_SYNDROME interleaves in-band and
+ *			out-of-band data).
  * @read_page:	function to read a page according to the ECC generator
  *		requirements; returns maximum number of bitflips corrected in
  *		any single ECC step, 0 if bitflips uncorrectable, -EIO hw error
-- 
cgit v1.2.3


From 3943f42c11896ce82ad3da132c8a5630313bdd0e Mon Sep 17 00:00:00 2001
From: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Date: Fri, 14 Nov 2014 05:09:55 +0400
Subject: Replace mentions of "list_struct" to "list_head"

There's no such thing as "list_struct".

Signed-off-by: Andrey Utkin <andrey.krieger.utkin@gmail.com>
Acked-by: Steven Rostedt <rostedt@goodmis.org>
Acked-by: Mauro Carvalho Chehab <mchehab@osg.samsung.com>
Acked-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Acked-by: Alex Deucher <alexander.deucher@amd.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/gpu/drm/radeon/mkregtable.c  | 24 ++++++++++++------------
 drivers/media/pci/cx18/cx18-driver.h |  2 +-
 include/linux/list.h                 | 34 +++++++++++++++++-----------------
 include/linux/plist.h                | 10 +++++-----
 include/linux/rculist.h              |  8 ++++----
 scripts/kconfig/list.h               |  6 +++---
 tools/usb/usbip/libsrc/list.h        |  2 +-
 7 files changed, 43 insertions(+), 43 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/radeon/mkregtable.c b/drivers/gpu/drm/radeon/mkregtable.c
index 4a85bb644e24..b928c17bdeed 100644
--- a/drivers/gpu/drm/radeon/mkregtable.c
+++ b/drivers/gpu/drm/radeon/mkregtable.c
@@ -347,7 +347,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_entry - get the struct for this entry
  * @ptr:	the &struct list_head pointer.
  * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_entry(ptr, type, member) \
 	container_of(ptr, type, member)
@@ -356,7 +356,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_first_entry - get the first element from a list
  * @ptr:	the list head to take the element from.
  * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Note, that list is expected to be not empty.
  */
@@ -406,7 +406,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry	-	iterate over list of given type
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_for_each_entry(pos, head, member)				\
 	for (pos = list_entry((head)->next, typeof(*pos), member);	\
@@ -417,7 +417,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry_reverse - iterate backwards over list of given type.
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
 	for (pos = list_entry((head)->prev, typeof(*pos), member);	\
@@ -428,7 +428,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
  * @pos:	the type * to use as a start point
  * @head:	the head of the list
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
  */
@@ -439,7 +439,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry_continue - continue iteration over list of given type
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Continue to iterate over list of given type, continuing after
  * the current position.
@@ -453,7 +453,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry_continue_reverse - iterate backwards from the given point
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Start to iterate over list of given type backwards, continuing after
  * the current position.
@@ -467,7 +467,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry_from - iterate over list of given type from the current point
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Iterate over list of given type, continuing from current position.
  */
@@ -480,7 +480,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_for_each_entry_safe(pos, n, head, member)			\
 	for (pos = list_entry((head)->next, typeof(*pos), member),	\
@@ -493,7 +493,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Iterate over list of given type, continuing after current point,
  * safe against removal of list entry.
@@ -509,7 +509,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Iterate over list of given type from current point, safe against
  * removal of list entry.
@@ -524,7 +524,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Iterate backwards over list of given type, safe against removal
  * of list entry.
diff --git a/drivers/media/pci/cx18/cx18-driver.h b/drivers/media/pci/cx18/cx18-driver.h
index 57f4688ea55b..da59ec31d109 100644
--- a/drivers/media/pci/cx18/cx18-driver.h
+++ b/drivers/media/pci/cx18/cx18-driver.h
@@ -290,7 +290,7 @@ struct cx18_options {
  * list_entry_is_past_end - check if a previous loop cursor is off list end
  * @pos:	the type * previously used as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Check if the entry's list_head is the head of the list, thus it's not a
  * real entry but was the loop cursor that walked past the end
diff --git a/include/linux/list.h b/include/linux/list.h
index f33f831eb3c8..feb773c76ee0 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -346,7 +346,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_entry - get the struct for this entry
  * @ptr:	the &struct list_head pointer.
  * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_entry(ptr, type, member) \
 	container_of(ptr, type, member)
@@ -355,7 +355,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_first_entry - get the first element from a list
  * @ptr:	the list head to take the element from.
  * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Note, that list is expected to be not empty.
  */
@@ -366,7 +366,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_last_entry - get the last element from a list
  * @ptr:	the list head to take the element from.
  * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Note, that list is expected to be not empty.
  */
@@ -377,7 +377,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_first_entry_or_null - get the first element from a list
  * @ptr:	the list head to take the element from.
  * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Note that if the list is empty, it returns NULL.
  */
@@ -387,7 +387,7 @@ static inline void list_splice_tail_init(struct list_head *list,
 /**
  * list_next_entry - get the next element in list
  * @pos:	the type * to cursor
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_next_entry(pos, member) \
 	list_entry((pos)->member.next, typeof(*(pos)), member)
@@ -395,7 +395,7 @@ static inline void list_splice_tail_init(struct list_head *list,
 /**
  * list_prev_entry - get the prev element in list
  * @pos:	the type * to cursor
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_prev_entry(pos, member) \
 	list_entry((pos)->member.prev, typeof(*(pos)), member)
@@ -441,7 +441,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry	-	iterate over list of given type
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_for_each_entry(pos, head, member)				\
 	for (pos = list_first_entry(head, typeof(*pos), member);	\
@@ -452,7 +452,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry_reverse - iterate backwards over list of given type.
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_for_each_entry_reverse(pos, head, member)			\
 	for (pos = list_last_entry(head, typeof(*pos), member);		\
@@ -463,7 +463,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_prepare_entry - prepare a pos entry for use in list_for_each_entry_continue()
  * @pos:	the type * to use as a start point
  * @head:	the head of the list
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Prepares a pos entry for use as a start point in list_for_each_entry_continue().
  */
@@ -474,7 +474,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry_continue - continue iteration over list of given type
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Continue to iterate over list of given type, continuing after
  * the current position.
@@ -488,7 +488,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry_continue_reverse - iterate backwards from the given point
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Start to iterate over list of given type backwards, continuing after
  * the current position.
@@ -502,7 +502,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_for_each_entry_from - iterate over list of given type from the current point
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Iterate over list of given type, continuing from current position.
  */
@@ -515,7 +515,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_for_each_entry_safe(pos, n, head, member)			\
 	for (pos = list_first_entry(head, typeof(*pos), member),	\
@@ -528,7 +528,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Iterate over list of given type, continuing after current point,
  * safe against removal of list entry.
@@ -544,7 +544,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Iterate over list of given type from current point, safe against
  * removal of list entry.
@@ -559,7 +559,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Iterate backwards over list of given type, safe against removal
  * of list entry.
@@ -574,7 +574,7 @@ static inline void list_splice_tail_init(struct list_head *list,
  * list_safe_reset_next - reset a stale list_for_each_entry_safe loop
  * @pos:	the loop cursor used in the list_for_each_entry_safe loop
  * @n:		temporary storage used in list_for_each_entry_safe
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * list_safe_reset_next is not safe to use in general if the list may be
  * modified concurrently (eg. the lock is dropped in the loop body). An
diff --git a/include/linux/plist.h b/include/linux/plist.h
index 8b6c970cff6c..97883604a3c5 100644
--- a/include/linux/plist.h
+++ b/include/linux/plist.h
@@ -176,7 +176,7 @@ extern void plist_requeue(struct plist_node *node, struct plist_head *head);
  * plist_for_each_entry	- iterate over list of given type
  * @pos:	the type * to use as a loop counter
  * @head:	the head for your list
- * @mem:	the name of the list_struct within the struct
+ * @mem:	the name of the list_head within the struct
  */
 #define plist_for_each_entry(pos, head, mem)	\
 	 list_for_each_entry(pos, &(head)->node_list, mem.node_list)
@@ -185,7 +185,7 @@ extern void plist_requeue(struct plist_node *node, struct plist_head *head);
  * plist_for_each_entry_continue - continue iteration over list of given type
  * @pos:	the type * to use as a loop cursor
  * @head:	the head for your list
- * @m:		the name of the list_struct within the struct
+ * @m:		the name of the list_head within the struct
  *
  * Continue to iterate over list of given type, continuing after
  * the current position.
@@ -198,7 +198,7 @@ extern void plist_requeue(struct plist_node *node, struct plist_head *head);
  * @pos:	the type * to use as a loop counter
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list
- * @m:		the name of the list_struct within the struct
+ * @m:		the name of the list_head within the struct
  *
  * Iterate over list of given type, safe against removal of list entry.
  */
@@ -229,7 +229,7 @@ static inline int plist_node_empty(const struct plist_node *node)
  * plist_first_entry - get the struct for the first entry
  * @head:	the &struct plist_head pointer
  * @type:	the type of the struct this is embedded in
- * @member:	the name of the list_struct within the struct
+ * @member:	the name of the list_head within the struct
  */
 #ifdef CONFIG_DEBUG_PI_LIST
 # define plist_first_entry(head, type, member)	\
@@ -246,7 +246,7 @@ static inline int plist_node_empty(const struct plist_node *node)
  * plist_last_entry - get the struct for the last entry
  * @head:	the &struct plist_head pointer
  * @type:	the type of the struct this is embedded in
- * @member:	the name of the list_struct within the struct
+ * @member:	the name of the list_head within the struct
  */
 #ifdef CONFIG_DEBUG_PI_LIST
 # define plist_last_entry(head, type, member)	\
diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 372ad5e0dcb8..664060bd2f43 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -241,7 +241,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * list_entry_rcu - get the struct for this entry
  * @ptr:        the &struct list_head pointer.
  * @type:       the type of the struct this is embedded in.
- * @member:     the name of the list_struct within the struct.
+ * @member:     the name of the list_head within the struct.
  *
  * This primitive may safely run concurrently with the _rcu list-mutation
  * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock().
@@ -278,7 +278,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * list_first_or_null_rcu - get the first element from a list
  * @ptr:        the list head to take the element from.
  * @type:       the type of the struct this is embedded in.
- * @member:     the name of the list_struct within the struct.
+ * @member:     the name of the list_head within the struct.
  *
  * Note that if the list is empty, it returns NULL.
  *
@@ -296,7 +296,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * list_for_each_entry_rcu	-	iterate over rcu list of given type
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * This list-traversal primitive may safely run concurrently with
  * the _rcu list-mutation primitives such as list_add_rcu()
@@ -311,7 +311,7 @@ static inline void list_splice_init_rcu(struct list_head *list,
  * list_for_each_entry_continue_rcu - continue iteration over list of given type
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  *
  * Continue to iterate over list of given type, continuing after
  * the current position.
diff --git a/scripts/kconfig/list.h b/scripts/kconfig/list.h
index 685d80e1bb0e..2cf23f002d3f 100644
--- a/scripts/kconfig/list.h
+++ b/scripts/kconfig/list.h
@@ -34,7 +34,7 @@ struct list_head {
  * list_entry - get the struct for this entry
  * @ptr:	the &struct list_head pointer.
  * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_entry(ptr, type, member) \
 	container_of(ptr, type, member)
@@ -43,7 +43,7 @@ struct list_head {
  * list_for_each_entry	-	iterate over list of given type
  * @pos:	the type * to use as a loop cursor.
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_for_each_entry(pos, head, member)				\
 	for (pos = list_entry((head)->next, typeof(*pos), member);	\
@@ -55,7 +55,7 @@ struct list_head {
  * @pos:	the type * to use as a loop cursor.
  * @n:		another type * to use as temporary storage
  * @head:	the head for your list.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_for_each_entry_safe(pos, n, head, member)			\
 	for (pos = list_entry((head)->next, typeof(*pos), member),	\
diff --git a/tools/usb/usbip/libsrc/list.h b/tools/usb/usbip/libsrc/list.h
index 8d0c936e184f..5eaaa78e2c6a 100644
--- a/tools/usb/usbip/libsrc/list.h
+++ b/tools/usb/usbip/libsrc/list.h
@@ -98,7 +98,7 @@ static inline void list_del(struct list_head *entry)
  * list_entry - get the struct for this entry
  * @ptr:	the &struct list_head pointer.
  * @type:	the type of the struct this is embedded in.
- * @member:	the name of the list_struct within the struct.
+ * @member:	the name of the list_head within the struct.
  */
 #define list_entry(ptr, type, member) \
 	container_of(ptr, type, member)
-- 
cgit v1.2.3


From f9df89d897ee0928aa4e03b30250e87f5d1e788a Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <eduardo.valentin@ti.com>
Date: Mon, 6 Jan 2014 09:04:18 -0400
Subject: thermal: introduce clock cooling device

This patch introduces a new thermal cooling device based on common clock
framework. The original motivation to write this cooling device is to be
able to cool down thermal zones using clocks that feed co-processors, such
as GPUs, DSPs, Image Processing Co-processors, etc. But it is written
in a way that it can be used on top of any clock.

The implementation is pretty straight forward. The code creates
a thermal cooling device based on a pair of a struct device and a clock name.
The struct device is assumed to be usable by the OPP layer. The OPP layer
is used as source of the list of possible frequencies. The (cpufreq) frequency
table is then used as a map from frequencies to cooling states. Cooling
states are indexes to the frequency table.

The logic sits on top of common clock framework, specifically on clock
pre notifications. Any PRE_RATE_CHANGE is hijacked, and the transition is
only allowed when the new rate is within the thermal limit (cooling state -> freq).

When a thermal cooling device state transition is requested, the clock
is also checked to verify if the current clock rate is within the new
thermal limit.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Mike Turquette <mturquette@linaro.org>
Cc: Nishanth Menon <nm@ti.com>
Cc: Pavel Machek <pavel@ucw.cz>
Cc: "Rafael J. Wysocki" <rjw@sisk.pl>
Cc: Len Brown <len.brown@intel.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-pm@vger.kernel.org
Cc: linux-arm-kernel@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Eduardo Valentin <eduardo.valentin@ti.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/Kconfig         |  12 +
 drivers/thermal/Makefile        |   3 +
 drivers/thermal/clock_cooling.c | 485 ++++++++++++++++++++++++++++++++++++++++
 include/linux/clock_cooling.h   |  65 ++++++
 4 files changed, 565 insertions(+)
 create mode 100644 drivers/thermal/clock_cooling.c
 create mode 100644 include/linux/clock_cooling.h

(limited to 'include/linux')

diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index f554d25b4399..efbdb8c0bb1d 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -112,6 +112,18 @@ config CPU_THERMAL
 
 	  If you want this support, you should say Y here.
 
+config CLOCK_THERMAL
+	bool "Generic clock cooling support"
+	depends on COMMON_CLK
+	depends on PM_OPP
+	help
+	  This entry implements the generic clock cooling mechanism through
+	  frequency clipping. Typically used to cool off co-processors. The
+	  device that is configured to use this cooling mechanism will be
+	  controlled to reduce clock frequency whenever temperature is high.
+
+	  If you want this support, you should say Y here.
+
 config THERMAL_EMULATION
 	bool "Thermal emulation mode support"
 	help
diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile
index 39c4fe87da2f..9d33532cae9f 100644
--- a/drivers/thermal/Makefile
+++ b/drivers/thermal/Makefile
@@ -18,6 +18,9 @@ thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE)	+= user_space.o
 # cpufreq cooling
 thermal_sys-$(CONFIG_CPU_THERMAL)	+= cpu_cooling.o
 
+# clock cooling
+thermal_sys-$(CONFIG_CLOCK_THERMAL)	+= clock_cooling.o
+
 # platform thermal drivers
 obj-$(CONFIG_SPEAR_THERMAL)	+= spear_thermal.o
 obj-$(CONFIG_RCAR_THERMAL)	+= rcar_thermal.o
diff --git a/drivers/thermal/clock_cooling.c b/drivers/thermal/clock_cooling.c
new file mode 100644
index 000000000000..1b4ff0f4c716
--- /dev/null
+++ b/drivers/thermal/clock_cooling.c
@@ -0,0 +1,485 @@
+/*
+ *  drivers/thermal/clock_cooling.c
+ *
+ *  Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Copyright (C) 2013	Texas Instruments Inc.
+ *  Contact:  Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ *  Highly based on cpu_cooling.c.
+ *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
+ *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+#include <linux/clk.h>
+#include <linux/cpufreq.h>
+#include <linux/device.h>
+#include <linux/err.h>
+#include <linux/idr.h>
+#include <linux/mutex.h>
+#include <linux/pm_opp.h>
+#include <linux/slab.h>
+#include <linux/thermal.h>
+#include <linux/clock_cooling.h>
+
+/**
+ * struct clock_cooling_device - data for cooling device with clock
+ * @id: unique integer value corresponding to each clock_cooling_device
+ *	registered.
+ * @dev: struct device pointer to the device being used to cool off using
+ *       clock frequencies.
+ * @cdev: thermal_cooling_device pointer to keep track of the
+ *	registered cooling device.
+ * @clk_rate_change_nb: reference to notifier block used to receive clock
+ *                      rate changes.
+ * @freq_table: frequency table used to keep track of available frequencies.
+ * @clock_state: integer value representing the current state of clock
+ *	cooling	devices.
+ * @clock_val: integer value representing the absolute value of the clipped
+ *	frequency.
+ * @clk: struct clk reference used to enforce clock limits.
+ * @lock: mutex lock to protect this struct.
+ *
+ * This structure is required for keeping information of each
+ * clock_cooling_device registered. In order to prevent corruption of this a
+ * mutex @lock is used.
+ */
+struct clock_cooling_device {
+	int id;
+	struct device *dev;
+	struct thermal_cooling_device *cdev;
+	struct notifier_block clk_rate_change_nb;
+	struct cpufreq_frequency_table *freq_table;
+	unsigned long clock_state;
+	unsigned long clock_val;
+	struct clk *clk;
+	struct mutex lock; /* lock to protect the content of this struct */
+};
+#define to_clock_cooling_device(x) \
+		container_of(x, struct clock_cooling_device, clk_rate_change_nb)
+static DEFINE_IDR(clock_idr);
+static DEFINE_MUTEX(cooling_clock_lock);
+
+/**
+ * clock_cooling_get_idr - function to get an unique id.
+ * @id: int * value generated by this function.
+ *
+ * This function will populate @id with an unique
+ * id, using the idr API.
+ *
+ * Return: 0 on success, an error code on failure.
+ */
+static int clock_cooling_get_idr(int *id)
+{
+	int ret;
+
+	mutex_lock(&cooling_clock_lock);
+	ret = idr_alloc(&clock_idr, NULL, 0, 0, GFP_KERNEL);
+	mutex_unlock(&cooling_clock_lock);
+	if (unlikely(ret < 0))
+		return ret;
+	*id = ret;
+
+	return 0;
+}
+
+/**
+ * release_idr - function to free the unique id.
+ * @id: int value representing the unique id.
+ */
+static void release_idr(int id)
+{
+	mutex_lock(&cooling_clock_lock);
+	idr_remove(&clock_idr, id);
+	mutex_unlock(&cooling_clock_lock);
+}
+
+/* Below code defines functions to be used for clock as cooling device */
+
+enum clock_cooling_property {
+	GET_LEVEL,
+	GET_FREQ,
+	GET_MAXL,
+};
+
+/**
+ * clock_cooling_get_property - fetch a property of interest for a give cpu.
+ * @ccdev: clock cooling device reference
+ * @input: query parameter
+ * @output: query return
+ * @property: type of query (frequency, level, max level)
+ *
+ * This is the common function to
+ * 1. get maximum clock cooling states
+ * 2. translate frequency to cooling state
+ * 3. translate cooling state to frequency
+ * Note that the code may be not in good shape
+ * but it is written in this way in order to:
+ * a) reduce duplicate code as most of the code can be shared.
+ * b) make sure the logic is consistent when translating between
+ *    cooling states and frequencies.
+ *
+ * Return: 0 on success, -EINVAL when invalid parameters are passed.
+ */
+static int clock_cooling_get_property(struct clock_cooling_device *ccdev,
+				      unsigned long input,
+				      unsigned long *output,
+				      enum clock_cooling_property property)
+{
+	int i;
+	unsigned long max_level = 0, level = 0;
+	unsigned int freq = CPUFREQ_ENTRY_INVALID;
+	int descend = -1;
+	struct cpufreq_frequency_table *pos, *table = ccdev->freq_table;
+
+	if (!output)
+		return -EINVAL;
+
+	if (!table)
+		return -EINVAL;
+
+	cpufreq_for_each_valid_entry(pos, table) {
+		/* ignore duplicate entry */
+		if (freq == pos->frequency)
+			continue;
+
+		/* get the frequency order */
+		if (freq != CPUFREQ_ENTRY_INVALID && descend == -1)
+			descend = freq > pos->frequency;
+
+		freq = pos->frequency;
+		max_level++;
+	}
+
+	/* No valid cpu frequency entry */
+	if (max_level == 0)
+		return -EINVAL;
+
+	/* max_level is an index, not a counter */
+	max_level--;
+
+	/* get max level */
+	if (property == GET_MAXL) {
+		*output = max_level;
+		return 0;
+	}
+
+	if (property == GET_FREQ)
+		level = descend ? input : (max_level - input);
+
+	i = 0;
+	cpufreq_for_each_valid_entry(pos, table) {
+		/* ignore duplicate entry */
+		if (freq == pos->frequency)
+			continue;
+
+		/* now we have a valid frequency entry */
+		freq = pos->frequency;
+
+		if (property == GET_LEVEL && (unsigned int)input == freq) {
+			/* get level by frequency */
+			*output = descend ? i : (max_level - i);
+			return 0;
+		}
+		if (property == GET_FREQ && level == i) {
+			/* get frequency by level */
+			*output = freq;
+			return 0;
+		}
+		i++;
+	}
+
+	return -EINVAL;
+}
+
+/**
+ * clock_cooling_get_level - return the cooling level of given clock cooling.
+ * @cdev: reference of a thermal cooling device of used as clock cooling device
+ * @freq: the frequency of interest
+ *
+ * This function will match the cooling level corresponding to the
+ * requested @freq and return it.
+ *
+ * Return: The matched cooling level on success or THERMAL_CSTATE_INVALID
+ * otherwise.
+ */
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+	unsigned long val;
+
+	if (clock_cooling_get_property(ccdev, (unsigned long)freq, &val,
+				       GET_LEVEL))
+		return THERMAL_CSTATE_INVALID;
+
+	return val;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_get_level);
+
+/**
+ * clock_cooling_get_frequency - get the absolute value of frequency from level.
+ * @ccdev: clock cooling device reference
+ * @level: cooling level
+ *
+ * This function matches cooling level with frequency. Based on a cooling level
+ * of frequency, equals cooling state of cpu cooling device, it will return
+ * the corresponding frequency.
+ *	e.g level=0 --> 1st MAX FREQ, level=1 ---> 2nd MAX FREQ, .... etc
+ *
+ * Return: 0 on error, the corresponding frequency otherwise.
+ */
+static unsigned long
+clock_cooling_get_frequency(struct clock_cooling_device *ccdev,
+			    unsigned long level)
+{
+	int ret = 0;
+	unsigned long freq;
+
+	ret = clock_cooling_get_property(ccdev, level, &freq, GET_FREQ);
+	if (ret)
+		return 0;
+
+	return freq;
+}
+
+/**
+ * clock_cooling_apply - function to apply frequency clipping.
+ * @ccdev: clock_cooling_device pointer containing frequency clipping data.
+ * @cooling_state: value of the cooling state.
+ *
+ * Function used to make sure the clock layer is aware of current thermal
+ * limits. The limits are applied by updating the clock rate in case it is
+ * higher than the corresponding frequency based on the requested cooling_state.
+ *
+ * Return: 0 on success, an error code otherwise (-EINVAL in case wrong
+ * cooling state).
+ */
+static int clock_cooling_apply(struct clock_cooling_device *ccdev,
+			       unsigned long cooling_state)
+{
+	unsigned long clip_freq, cur_freq;
+	int ret = 0;
+
+	/* Here we write the clipping */
+	/* Check if the old cooling action is same as new cooling action */
+	if (ccdev->clock_state == cooling_state)
+		return 0;
+
+	clip_freq = clock_cooling_get_frequency(ccdev, cooling_state);
+	if (!clip_freq)
+		return -EINVAL;
+
+	cur_freq = clk_get_rate(ccdev->clk);
+
+	mutex_lock(&ccdev->lock);
+	ccdev->clock_state = cooling_state;
+	ccdev->clock_val = clip_freq;
+	/* enforce clock level */
+	if (cur_freq > clip_freq)
+		ret = clk_set_rate(ccdev->clk, clip_freq);
+	mutex_unlock(&ccdev->lock);
+
+	return ret;
+}
+
+/**
+ * clock_cooling_clock_notifier - notifier callback on clock rate changes.
+ * @nb:	struct notifier_block * with callback info.
+ * @event: value showing clock event for which this function invoked.
+ * @data: callback-specific data
+ *
+ * Callback to hijack the notification on clock transition.
+ * Every time there is a clock change, we intercept all pre change events
+ * and block the transition in case the new rate infringes thermal limits.
+ *
+ * Return: NOTIFY_DONE (success) or NOTIFY_BAD (new_rate > thermal limit).
+ */
+static int clock_cooling_clock_notifier(struct notifier_block *nb,
+					unsigned long event, void *data)
+{
+	struct clk_notifier_data *ndata = data;
+	struct clock_cooling_device *ccdev = to_clock_cooling_device(nb);
+
+	switch (event) {
+	case PRE_RATE_CHANGE:
+		/*
+		 * checks on current state
+		 * TODO: current method is not best we can find as it
+		 * allows possibly voltage transitions, in case DVFS
+		 * layer is also hijacking clock pre notifications.
+		 */
+		if (ndata->new_rate > ccdev->clock_val)
+			return NOTIFY_BAD;
+		/* fall through */
+	case POST_RATE_CHANGE:
+	case ABORT_RATE_CHANGE:
+	default:
+		return NOTIFY_DONE;
+	}
+}
+
+/* clock cooling device thermal callback functions are defined below */
+
+/**
+ * clock_cooling_get_max_state - callback function to get the max cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the max cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * max cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_get_max_state(struct thermal_cooling_device *cdev,
+				       unsigned long *state)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+	unsigned long count = 0;
+	int ret;
+
+	ret = clock_cooling_get_property(ccdev, 0, &count, GET_MAXL);
+	if (!ret)
+		*state = count;
+
+	return ret;
+}
+
+/**
+ * clock_cooling_get_cur_state - function to get the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: fill this variable with the current cooling state.
+ *
+ * Callback for the thermal cooling device to return the clock
+ * current cooling state.
+ *
+ * Return: 0 (success)
+ */
+static int clock_cooling_get_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long *state)
+{
+	struct clock_cooling_device *ccdev = cdev->devdata;
+
+	*state = ccdev->clock_state;
+
+	return 0;
+}
+
+/**
+ * clock_cooling_set_cur_state - function to set the current cooling state.
+ * @cdev: thermal cooling device pointer.
+ * @state: set this variable to the current cooling state.
+ *
+ * Callback for the thermal cooling device to change the clock cooling
+ * current cooling state.
+ *
+ * Return: 0 on success, an error code otherwise.
+ */
+static int clock_cooling_set_cur_state(struct thermal_cooling_device *cdev,
+				       unsigned long state)
+{
+	struct clock_cooling_device *clock_device = cdev->devdata;
+
+	return clock_cooling_apply(clock_device, state);
+}
+
+/* Bind clock callbacks to thermal cooling device ops */
+static struct thermal_cooling_device_ops const clock_cooling_ops = {
+	.get_max_state = clock_cooling_get_max_state,
+	.get_cur_state = clock_cooling_get_cur_state,
+	.set_cur_state = clock_cooling_set_cur_state,
+};
+
+/**
+ * clock_cooling_register - function to create clock cooling device.
+ * @dev: struct device pointer to the device used as clock cooling device.
+ * @clock_name: string containing the clock used as cooling mechanism.
+ *
+ * This interface function registers the clock cooling device with the name
+ * "thermal-clock-%x". The cooling device is based on clock frequencies.
+ * The struct device is assumed to be capable of DVFS transitions.
+ * The OPP layer is used to fetch and fill the available frequencies for
+ * the referred device. The ordered frequency table is used to control
+ * the clock cooling device cooling states and to limit clock transitions
+ * based on the cooling state requested by the thermal framework.
+ *
+ * Return: a valid struct thermal_cooling_device pointer on success,
+ * on failure, it returns a corresponding ERR_PTR().
+ */
+struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name)
+{
+	struct thermal_cooling_device *cdev;
+	struct clock_cooling_device *ccdev = NULL;
+	char dev_name[THERMAL_NAME_LENGTH];
+	int ret = 0;
+
+	ccdev = devm_kzalloc(dev, sizeof(*ccdev), GFP_KERNEL);
+	if (!ccdev)
+		return ERR_PTR(-ENOMEM);
+
+	ccdev->dev = dev;
+	ccdev->clk = devm_clk_get(dev, clock_name);
+	if (IS_ERR(ccdev->clk))
+		return ERR_CAST(ccdev->clk);
+
+	ret = clock_cooling_get_idr(&ccdev->id);
+	if (ret)
+		return ERR_PTR(-EINVAL);
+
+	snprintf(dev_name, sizeof(dev_name), "thermal-clock-%d", ccdev->id);
+
+	cdev = thermal_cooling_device_register(dev_name, ccdev,
+					       &clock_cooling_ops);
+	if (IS_ERR(cdev)) {
+		release_idr(ccdev->id);
+		return ERR_PTR(-EINVAL);
+	}
+	ccdev->cdev = cdev;
+	ccdev->clk_rate_change_nb.notifier_call = clock_cooling_clock_notifier;
+
+	/* Assuming someone has already filled the opp table for this device */
+	ret = dev_pm_opp_init_cpufreq_table(dev, &ccdev->freq_table);
+	if (ret) {
+		release_idr(ccdev->id);
+		return ERR_PTR(ret);
+	}
+	ccdev->clock_state = 0;
+	ccdev->clock_val = clock_cooling_get_frequency(ccdev, 0);
+
+	clk_notifier_register(ccdev->clk, &ccdev->clk_rate_change_nb);
+
+	return cdev;
+}
+EXPORT_SYMBOL_GPL(clock_cooling_register);
+
+/**
+ * clock_cooling_unregister - function to remove clock cooling device.
+ * @cdev: thermal cooling device pointer.
+ *
+ * This interface function unregisters the "thermal-clock-%x" cooling device.
+ */
+void clock_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+	struct clock_cooling_device *ccdev;
+
+	if (!cdev)
+		return;
+
+	ccdev = cdev->devdata;
+
+	clk_notifier_unregister(ccdev->clk, &ccdev->clk_rate_change_nb);
+	dev_pm_opp_free_cpufreq_table(ccdev->dev, &ccdev->freq_table);
+
+	thermal_cooling_device_unregister(ccdev->cdev);
+	release_idr(ccdev->id);
+}
+EXPORT_SYMBOL_GPL(clock_cooling_unregister);
diff --git a/include/linux/clock_cooling.h b/include/linux/clock_cooling.h
new file mode 100644
index 000000000000..4d1019d56f7f
--- /dev/null
+++ b/include/linux/clock_cooling.h
@@ -0,0 +1,65 @@
+/*
+ *  linux/include/linux/clock_cooling.h
+ *
+ *  Copyright (C) 2014 Eduardo Valentin <edubezval@gmail.com>
+ *
+ *  Copyright (C) 2013	Texas Instruments Inc.
+ *  Contact:  Eduardo Valentin <eduardo.valentin@ti.com>
+ *
+ *  Highly based on cpu_cooling.c.
+ *  Copyright (C) 2012	Samsung Electronics Co., Ltd(http://www.samsung.com)
+ *  Copyright (C) 2012  Amit Daniel <amit.kachhap@linaro.org>
+ *
+ *  This program is free software; you can redistribute it and/or modify
+ *  it under the terms of the GNU General Public License as published by
+ *  the Free Software Foundation; version 2 of the License.
+ *
+ *  This program is distributed in the hope that it will be useful, but
+ *  WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  General Public License for more details.
+ */
+
+#ifndef __CPU_COOLING_H__
+#define __CPU_COOLING_H__
+
+#include <linux/of.h>
+#include <linux/thermal.h>
+#include <linux/cpumask.h>
+
+#ifdef CONFIG_CLOCK_THERMAL
+/**
+ * clock_cooling_register - function to create clock cooling device.
+ * @dev: struct device pointer to the device used as clock cooling device.
+ * @clock_name: string containing the clock used as cooling mechanism.
+ */
+struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name);
+
+/**
+ * clock_cooling_unregister - function to remove clock cooling device.
+ * @cdev: thermal cooling device pointer.
+ */
+void clock_cooling_unregister(struct thermal_cooling_device *cdev);
+
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq);
+#else /* !CONFIG_CLOCK_THERMAL */
+static inline struct thermal_cooling_device *
+clock_cooling_register(struct device *dev, const char *clock_name)
+{
+	return NULL;
+}
+static inline
+void clock_cooling_unregister(struct thermal_cooling_device *cdev)
+{
+}
+static inline
+unsigned long clock_cooling_get_level(struct thermal_cooling_device *cdev,
+				      unsigned long freq)
+{
+	return THERMAL_CSTATE_INVALID;
+}
+#endif	/* CONFIG_CLOCK_THERMAL */
+
+#endif /* __CPU_COOLING_H__ */
-- 
cgit v1.2.3


From 2251aef64a38db60f4ae7a4a83f9203c6791f196 Mon Sep 17 00:00:00 2001
From: Eduardo Valentin <edubezval@gmail.com>
Date: Fri, 7 Nov 2014 21:24:39 -0400
Subject: thermal: of: improve of-thermal sensor registration API

Different drivers request API extensions in of-thermal. For this reason,
additional callbacks are required to fit the new drivers needs.

The current API implementation expects the registering sensor driver
to provide a get_temp and get_trend callbacks as function parameters.
As the amount of callbacks is growing, this patch changes the existing
implementation to use a .ops field to hold all the of thermal callbacks
to sensor drivers.

This patch also changes the existing of-thermal users to fit the new
API design. No functional change is introduced in this patch.

Cc: Alexandre Courbot <gnurou@gmail.com>
Cc: devicetree@vger.kernel.org
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Guenter Roeck <linux@roeck-us.net>
Cc: Jean Delvare <jdelvare@suse.de>
Cc: linux-kernel@vger.kernel.org
Cc: linux-pm@vger.kernel.org
Cc: linux-tegra@vger.kernel.org
Cc: lm-sensors@lm-sensors.org
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Stephen Warren <swarren@wwwdotorg.org>
Cc: Thierry Reding <thierry.reding@gmail.com>
Cc: Zhang Rui <rui.zhang@intel.com>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Tested-by: Mikko Perttunen <mikko.perttunen@kapsi.fi>
Reviewed-by: Mikko Perttunen <mikko.perttunen@kapsi.fi>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Lukasz Majewski <l.majewski@samsung.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/hwmon/lm75.c                               |  9 +++--
 drivers/hwmon/ntc_thermistor.c                     |  6 +++-
 drivers/hwmon/tmp102.c                             |  6 +++-
 drivers/thermal/of-thermal.c                       | 39 ++++++++++------------
 drivers/thermal/tegra_soctherm.c                   |  7 ++--
 drivers/thermal/ti-soc-thermal/ti-thermal-common.c |  8 +++--
 include/linux/thermal.h                            | 24 +++++++++----
 7 files changed, 62 insertions(+), 37 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hwmon/lm75.c b/drivers/hwmon/lm75.c
index d16dbb33a531..e7c8bf9093ea 100644
--- a/drivers/hwmon/lm75.c
+++ b/drivers/hwmon/lm75.c
@@ -176,6 +176,10 @@ static struct attribute *lm75_attrs[] = {
 };
 ATTRIBUTE_GROUPS(lm75);
 
+static const struct thermal_zone_of_device_ops lm75_of_thermal_ops = {
+	.get_temp = lm75_read_temp,
+};
+
 /*-----------------------------------------------------------------------*/
 
 /* device probe and removal */
@@ -291,10 +295,9 @@ lm75_probe(struct i2c_client *client, const struct i2c_device_id *id)
 	if (IS_ERR(data->hwmon_dev))
 		return PTR_ERR(data->hwmon_dev);
 
-	data->tz = thermal_zone_of_sensor_register(data->hwmon_dev,
-						   0,
+	data->tz = thermal_zone_of_sensor_register(data->hwmon_dev, 0,
 						   data->hwmon_dev,
-						   lm75_read_temp, NULL);
+						   &lm75_of_thermal_ops);
 	if (IS_ERR(data->tz))
 		data->tz = NULL;
 
diff --git a/drivers/hwmon/ntc_thermistor.c b/drivers/hwmon/ntc_thermistor.c
index 4ff89b2482e4..bca8521c8a9b 100644
--- a/drivers/hwmon/ntc_thermistor.c
+++ b/drivers/hwmon/ntc_thermistor.c
@@ -486,6 +486,10 @@ static const struct attribute_group ntc_attr_group = {
 	.attrs = ntc_attributes,
 };
 
+static const struct thermal_zone_of_device_ops ntc_of_thermal_ops = {
+	.get_temp = ntc_read_temp,
+};
+
 static int ntc_thermistor_probe(struct platform_device *pdev)
 {
 	const struct of_device_id *of_id =
@@ -579,7 +583,7 @@ static int ntc_thermistor_probe(struct platform_device *pdev)
 								pdev_id->name);
 
 	data->tz = thermal_zone_of_sensor_register(data->dev, 0, data->dev,
-						ntc_read_temp, NULL);
+						   &ntc_of_thermal_ops);
 	if (IS_ERR(data->tz)) {
 		dev_dbg(&pdev->dev, "Failed to register to thermal fw.\n");
 		data->tz = NULL;
diff --git a/drivers/hwmon/tmp102.c b/drivers/hwmon/tmp102.c
index 51719956cc03..ba9f478f64ee 100644
--- a/drivers/hwmon/tmp102.c
+++ b/drivers/hwmon/tmp102.c
@@ -158,6 +158,10 @@ ATTRIBUTE_GROUPS(tmp102);
 #define TMP102_CONFIG  (TMP102_CONF_TM | TMP102_CONF_EM | TMP102_CONF_CR1)
 #define TMP102_CONFIG_RD_ONLY (TMP102_CONF_R0 | TMP102_CONF_R1 | TMP102_CONF_AL)
 
+static const struct thermal_zone_of_device_ops tmp102_of_thermal_ops = {
+	.get_temp = tmp102_read_temp,
+};
+
 static int tmp102_probe(struct i2c_client *client,
 				  const struct i2c_device_id *id)
 {
@@ -215,7 +219,7 @@ static int tmp102_probe(struct i2c_client *client,
 	}
 	tmp102->hwmon_dev = hwmon_dev;
 	tmp102->tz = thermal_zone_of_sensor_register(hwmon_dev, 0, hwmon_dev,
-						     tmp102_read_temp, NULL);
+						     &tmp102_of_thermal_ops);
 	if (IS_ERR(tmp102->tz))
 		tmp102->tz = NULL;
 
diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 62143ba31001..b7982f0a6eaf 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -30,6 +30,7 @@
 #include <linux/err.h>
 #include <linux/export.h>
 #include <linux/string.h>
+#include <linux/thermal.h>
 
 #include "thermal_core.h"
 
@@ -77,8 +78,7 @@ struct __thermal_bind_params {
  * @num_tbps: number of thermal bind params
  * @tbps: an array of thermal bind params (0..num_tbps - 1)
  * @sensor_data: sensor private data used while reading temperature and trend
- * @get_temp: sensor callback to read temperature
- * @get_trend: sensor callback to read temperature trend
+ * @ops: set of callbacks to handle the thermal zone based on DT
  */
 
 struct __thermal_zone {
@@ -96,8 +96,7 @@ struct __thermal_zone {
 
 	/* sensor interface */
 	void *sensor_data;
-	int (*get_temp)(void *, long *);
-	int (*get_trend)(void *, long *);
+	const struct thermal_zone_of_device_ops *ops;
 };
 
 /***   DT thermal zone device callbacks   ***/
@@ -107,10 +106,10 @@ static int of_thermal_get_temp(struct thermal_zone_device *tz,
 {
 	struct __thermal_zone *data = tz->devdata;
 
-	if (!data->get_temp)
+	if (!data->ops->get_temp)
 		return -EINVAL;
 
-	return data->get_temp(data->sensor_data, temp);
+	return data->ops->get_temp(data->sensor_data, temp);
 }
 
 static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
@@ -120,10 +119,10 @@ static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
 	long dev_trend;
 	int r;
 
-	if (!data->get_trend)
+	if (!data->ops->get_trend)
 		return -EINVAL;
 
-	r = data->get_trend(data->sensor_data, &dev_trend);
+	r = data->ops->get_trend(data->sensor_data, &dev_trend);
 	if (r)
 		return r;
 
@@ -324,8 +323,7 @@ static struct thermal_zone_device_ops of_thermal_ops = {
 static struct thermal_zone_device *
 thermal_zone_of_add_sensor(struct device_node *zone,
 			   struct device_node *sensor, void *data,
-			   int (*get_temp)(void *, long *),
-			   int (*get_trend)(void *, long *))
+			   const struct thermal_zone_of_device_ops *ops)
 {
 	struct thermal_zone_device *tzd;
 	struct __thermal_zone *tz;
@@ -336,9 +334,11 @@ thermal_zone_of_add_sensor(struct device_node *zone,
 
 	tz = tzd->devdata;
 
+	if (!ops)
+		return ERR_PTR(-EINVAL);
+
 	mutex_lock(&tzd->lock);
-	tz->get_temp = get_temp;
-	tz->get_trend = get_trend;
+	tz->ops = ops;
 	tz->sensor_data = data;
 
 	tzd->ops->get_temp = of_thermal_get_temp;
@@ -356,8 +356,7 @@ thermal_zone_of_add_sensor(struct device_node *zone,
  *             than one sensors
  * @data: a private pointer (owned by the caller) that will be passed
  *        back, when a temperature reading is needed.
- * @get_temp: a pointer to a function that reads the sensor temperature.
- * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ * @ops: struct thermal_zone_of_device_ops *. Must contain at least .get_temp.
  *
  * This function will search the list of thermal zones described in device
  * tree and look for the zone that refer to the sensor device pointed by
@@ -382,9 +381,8 @@ thermal_zone_of_add_sensor(struct device_node *zone,
  * check the return value with help of IS_ERR() helper.
  */
 struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *))
+thermal_zone_of_sensor_register(struct device *dev, int sensor_id, void *data,
+				const struct thermal_zone_of_device_ops *ops)
 {
 	struct device_node *np, *child, *sensor_np;
 	struct thermal_zone_device *tzd = ERR_PTR(-ENODEV);
@@ -426,9 +424,7 @@ thermal_zone_of_sensor_register(struct device *dev, int sensor_id,
 
 		if (sensor_specs.np == sensor_np && id == sensor_id) {
 			tzd = thermal_zone_of_add_sensor(child, sensor_np,
-							 data,
-							 get_temp,
-							 get_trend);
+							 data, ops);
 			of_node_put(sensor_specs.np);
 			of_node_put(child);
 			goto exit;
@@ -476,8 +472,7 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 	tzd->ops->get_temp = NULL;
 	tzd->ops->get_trend = NULL;
 
-	tz->get_temp = NULL;
-	tz->get_trend = NULL;
+	tz->ops = NULL;
 	tz->sensor_data = NULL;
 	mutex_unlock(&tzd->lock);
 }
diff --git a/drivers/thermal/tegra_soctherm.c b/drivers/thermal/tegra_soctherm.c
index 70f7e9ef4355..9197fc05c5cc 100644
--- a/drivers/thermal/tegra_soctherm.c
+++ b/drivers/thermal/tegra_soctherm.c
@@ -317,6 +317,10 @@ static int tegra_thermctl_get_temp(void *data, long *out_temp)
 	return 0;
 }
 
+static const struct thermal_zone_of_device_ops tegra_of_thermal_ops = {
+	.get_temp = tegra_thermctl_get_temp,
+};
+
 static const struct of_device_id tegra_soctherm_of_match[] = {
 	{ .compatible = "nvidia,tegra124-soctherm" },
 	{ },
@@ -416,8 +420,7 @@ static int tegra_soctherm_probe(struct platform_device *pdev)
 		zone->shift = t124_thermctl_temp_zones[i].shift;
 
 		tz = thermal_zone_of_sensor_register(&pdev->dev, i, zone,
-						     tegra_thermctl_get_temp,
-						     NULL);
+						     &tegra_of_thermal_ops);
 		if (IS_ERR(tz)) {
 			err = PTR_ERR(tz);
 			dev_err(&pdev->dev, "failed to register sensor: %d\n",
diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
index 9eec26dc0448..5fd03865e396 100644
--- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
+++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c
@@ -286,6 +286,11 @@ static int ti_thermal_get_crit_temp(struct thermal_zone_device *thermal,
 	return ti_thermal_get_trip_temp(thermal, OMAP_TRIP_NUMBER - 1, temp);
 }
 
+static const struct thermal_zone_of_device_ops ti_of_thermal_ops = {
+	.get_temp = __ti_thermal_get_temp,
+	.get_trend = __ti_thermal_get_trend,
+};
+
 static struct thermal_zone_device_ops ti_thermal_ops = {
 	.get_temp = ti_thermal_get_temp,
 	.get_trend = ti_thermal_get_trend,
@@ -333,8 +338,7 @@ int ti_thermal_expose_sensor(struct ti_bandgap *bgp, int id,
 
 	/* in case this is specified by DT */
 	data->ti_thermal = thermal_zone_of_sensor_register(bgp->dev, id,
-					data, __ti_thermal_get_temp,
-					__ti_thermal_get_trend);
+					data, &ti_of_thermal_ops);
 	if (IS_ERR(data->ti_thermal)) {
 		/* Create thermal zone */
 		data->ti_thermal = thermal_zone_device_register(domain,
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index ef90838b36a0..5bc28a70014e 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -289,19 +289,31 @@ struct thermal_genl_event {
 	enum events event;
 };
 
+/**
+ * struct thermal_zone_of_device_ops - scallbacks for handling DT based zones
+ *
+ * Mandatory:
+ * @get_temp: a pointer to a function that reads the sensor temperature.
+ *
+ * Optional:
+ * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ */
+struct thermal_zone_of_device_ops {
+	int (*get_temp)(void *, long *);
+	int (*get_trend)(void *, long *);
+};
+
 /* Function declarations */
 #ifdef CONFIG_THERMAL_OF
 struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *));
+thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
+				const struct thermal_zone_of_device_ops *ops);
 void thermal_zone_of_sensor_unregister(struct device *dev,
 				       struct thermal_zone_device *tz);
 #else
 static inline struct thermal_zone_device *
-thermal_zone_of_sensor_register(struct device *dev, int id,
-				void *data, int (*get_temp)(void *, long *),
-				int (*get_trend)(void *, long *))
+thermal_zone_of_sensor_register(struct device *dev, int id, void *data,
+				const struct thermal_zone_of_device_ops *ops)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From e639cd5bfc03de7ba642d7e8570b9e533f10e54b Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Thu, 20 Nov 2014 12:11:25 -0800
Subject: ARM: OMAP2+: Prepare to move GPMC to drivers by platform data header

We still need to support platform data for omap3 until it's booting
in device tree only mode. So let's add platform_data/omap-gpmc.h for
that, and a minimal linux/omap-gpmc.h for the save and restore used
by the PM code.

Let's also keep a minimal mach-omap2/gpmc.h still around to avoid
churn on the board-*.c files. Once omap3 boots in device tree only
mode, we can drop mach-omap2/gpmc.h and we can make the data
structures in platform_data/omap-gpmc.h private to the GPMC driver.

Note that we can now also remove gpmc-nand.h and gpmc-onenand.h.

Cc: Arnd Bergmann <arnd@arndb.de>
Acked-by: Roger Quadros <rogerq@ti.com>
Signed-off-by: Tony Lindgren <tony@atomide.com>
---
 arch/arm/mach-omap2/board-am3517crane.c      |   1 +
 arch/arm/mach-omap2/board-cm-t35.c           |   3 +-
 arch/arm/mach-omap2/board-cm-t3517.c         |   3 +-
 arch/arm/mach-omap2/board-flash.c            |   3 +-
 arch/arm/mach-omap2/board-flash.h            |   1 -
 arch/arm/mach-omap2/board-n8x0.c             |   2 -
 arch/arm/mach-omap2/board-omap3pandora.c     |   2 +-
 arch/arm/mach-omap2/board-rx51-peripherals.c |   3 +-
 arch/arm/mach-omap2/gpmc-nand.c              |   3 +-
 arch/arm/mach-omap2/gpmc-nand.h              |  27 ----
 arch/arm/mach-omap2/gpmc-onenand.c           |   3 +-
 arch/arm/mach-omap2/gpmc-onenand.h           |  24 ---
 arch/arm/mach-omap2/gpmc.c                   |  63 +++++++-
 arch/arm/mach-omap2/gpmc.h                   | 227 +--------------------------
 arch/arm/mach-omap2/pm34xx.c                 |   2 +-
 include/linux/omap-gpmc.h                    | 199 +++++++++++++++++++++++
 16 files changed, 271 insertions(+), 295 deletions(-)
 delete mode 100644 arch/arm/mach-omap2/gpmc-nand.h
 delete mode 100644 arch/arm/mach-omap2/gpmc-onenand.h
 create mode 100644 include/linux/omap-gpmc.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-am3517crane.c b/arch/arm/mach-omap2/board-am3517crane.c
index 212c3160de18..8168ddabaeda 100644
--- a/arch/arm/mach-omap2/board-am3517crane.c
+++ b/arch/arm/mach-omap2/board-am3517crane.c
@@ -24,6 +24,7 @@
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
 #include <linux/mtd/partitions.h>
+#include <linux/omap-gpmc.h>
 
 #include <asm/mach-types.h>
 #include <asm/mach/arch.h>
diff --git a/arch/arm/mach-omap2/board-cm-t35.c b/arch/arm/mach-omap2/board-cm-t35.c
index c6df8eec4553..91738a14ecbe 100644
--- a/arch/arm/mach-omap2/board-cm-t35.c
+++ b/arch/arm/mach-omap2/board-cm-t35.c
@@ -25,6 +25,7 @@
 #include <linux/input/matrix_keypad.h>
 #include <linux/delay.h>
 #include <linux/gpio.h>
+#include <linux/omap-gpmc.h>
 #include <linux/platform_data/gpio-omap.h>
 
 #include <linux/platform_data/at24.h>
@@ -51,8 +52,6 @@
 #include "sdram-micron-mt46h32m32lf-6.h"
 #include "hsmmc.h"
 #include "common-board-devices.h"
-#include "gpmc.h"
-#include "gpmc-nand.h"
 
 #define CM_T35_GPIO_PENDOWN		57
 #define SB_T35_USB_HUB_RESET_GPIO	167
diff --git a/arch/arm/mach-omap2/board-cm-t3517.c b/arch/arm/mach-omap2/board-cm-t3517.c
index 8a2c1677964c..794756df8529 100644
--- a/arch/arm/mach-omap2/board-cm-t3517.c
+++ b/arch/arm/mach-omap2/board-cm-t3517.c
@@ -28,6 +28,7 @@
 #include <linux/delay.h>
 #include <linux/gpio.h>
 #include <linux/leds.h>
+#include <linux/omap-gpmc.h>
 #include <linux/rtc-v3020.h>
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/nand.h>
@@ -41,7 +42,6 @@
 
 #include "common.h"
 #include <linux/platform_data/mtd-nand-omap2.h>
-#include "gpmc.h"
 
 #include "am35xx.h"
 
@@ -50,7 +50,6 @@
 #include "hsmmc.h"
 #include "common-board-devices.h"
 #include "am35xx-emac.h"
-#include "gpmc-nand.h"
 
 #if defined(CONFIG_LEDS_GPIO) || defined(CONFIG_LEDS_GPIO_MODULE)
 static struct gpio_led cm_t3517_leds[] = {
diff --git a/arch/arm/mach-omap2/board-flash.c b/arch/arm/mach-omap2/board-flash.c
index 2d245c2e641c..70b21cc279ba 100644
--- a/arch/arm/mach-omap2/board-flash.c
+++ b/arch/arm/mach-omap2/board-flash.c
@@ -13,6 +13,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/omap-gpmc.h>
 #include <linux/platform_device.h>
 #include <linux/mtd/physmap.h>
 #include <linux/io.h>
@@ -23,8 +24,6 @@
 #include "soc.h"
 #include "common.h"
 #include "board-flash.h"
-#include "gpmc-onenand.h"
-#include "gpmc-nand.h"
 
 #define REG_FPGA_REV			0x10
 #define REG_FPGA_DIP_SWITCH_INPUT2	0x60
diff --git a/arch/arm/mach-omap2/board-flash.h b/arch/arm/mach-omap2/board-flash.h
index 2fb5d41a9fae..ea9aaebe11e7 100644
--- a/arch/arm/mach-omap2/board-flash.h
+++ b/arch/arm/mach-omap2/board-flash.h
@@ -12,7 +12,6 @@
  */
 #include <linux/mtd/mtd.h>
 #include <linux/mtd/partitions.h>
-#include "gpmc.h"
 
 #define PDC_NOR		1
 #define PDC_NAND	2
diff --git a/arch/arm/mach-omap2/board-n8x0.c b/arch/arm/mach-omap2/board-n8x0.c
index 97767a27ca9d..49c3c25808e7 100644
--- a/arch/arm/mach-omap2/board-n8x0.c
+++ b/arch/arm/mach-omap2/board-n8x0.c
@@ -22,7 +22,6 @@
 #include <linux/spi/spi.h>
 #include <linux/usb/musb.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
-#include <linux/platform_data/mtd-onenand-omap2.h>
 #include <linux/mfd/menelaus.h>
 #include <sound/tlv320aic3x.h>
 
@@ -32,7 +31,6 @@
 #include "common.h"
 #include "mmc.h"
 #include "soc.h"
-#include "gpmc-onenand.h"
 #include "common-board-devices.h"
 
 #define TUSB6010_ASYNC_CS	1
diff --git a/arch/arm/mach-omap2/board-omap3pandora.c b/arch/arm/mach-omap2/board-omap3pandora.c
index f32201656cf3..7f1708738c30 100644
--- a/arch/arm/mach-omap2/board-omap3pandora.c
+++ b/arch/arm/mach-omap2/board-omap3pandora.c
@@ -24,6 +24,7 @@
 #include <linux/spi/spi.h>
 #include <linux/regulator/machine.h>
 #include <linux/i2c/twl.h>
+#include <linux/omap-gpmc.h>
 #include <linux/wl12xx.h>
 #include <linux/mtd/partitions.h>
 #include <linux/mtd/nand.h>
@@ -51,7 +52,6 @@
 #include "sdram-micron-mt46h32m32lf-6.h"
 #include "hsmmc.h"
 #include "common-board-devices.h"
-#include "gpmc-nand.h"
 
 #define PANDORA_WIFI_IRQ_GPIO		21
 #define PANDORA_WIFI_NRESET_GPIO	23
diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 30e7d4ce7b8d..e2ad48b5d9c0 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -23,6 +23,7 @@
 #include <linux/regulator/machine.h>
 #include <linux/gpio.h>
 #include <linux/gpio_keys.h>
+#include <linux/omap-gpmc.h>
 #include <linux/mmc/host.h>
 #include <linux/power/isp1704_charger.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
@@ -54,8 +55,6 @@
 #include "omap-pm.h"
 #include "hsmmc.h"
 #include "common-board-devices.h"
-#include "gpmc.h"
-#include "gpmc-onenand.h"
 #include "soc.h"
 #include "omap-secure.h"
 
diff --git a/arch/arm/mach-omap2/gpmc-nand.c b/arch/arm/mach-omap2/gpmc-nand.c
index cb7764314f17..d5951b17b736 100644
--- a/arch/arm/mach-omap2/gpmc-nand.c
+++ b/arch/arm/mach-omap2/gpmc-nand.c
@@ -12,14 +12,13 @@
 #include <linux/kernel.h>
 #include <linux/platform_device.h>
 #include <linux/io.h>
+#include <linux/omap-gpmc.h>
 #include <linux/mtd/nand.h>
 #include <linux/platform_data/mtd-nand-omap2.h>
 
 #include <asm/mach/flash.h>
 
-#include "gpmc.h"
 #include "soc.h"
-#include "gpmc-nand.h"
 
 /* minimum size for IO mapping */
 #define	NAND_IO_SIZE	4
diff --git a/arch/arm/mach-omap2/gpmc-nand.h b/arch/arm/mach-omap2/gpmc-nand.h
deleted file mode 100644
index d59e1281e851..000000000000
--- a/arch/arm/mach-omap2/gpmc-nand.h
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- *  arch/arm/mach-omap2/gpmc-nand.h
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- */
-
-#ifndef	__OMAP2_GPMC_NAND_H
-#define	__OMAP2_GPMC_NAND_H
-
-#include "gpmc.h"
-#include <linux/platform_data/mtd-nand-omap2.h>
-
-#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2)
-extern int gpmc_nand_init(struct omap_nand_platform_data *d,
-			  struct gpmc_timings *gpmc_t);
-#else
-static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
-				 struct gpmc_timings *gpmc_t)
-{
-	return 0;
-}
-#endif
-
-#endif
diff --git a/arch/arm/mach-omap2/gpmc-onenand.c b/arch/arm/mach-omap2/gpmc-onenand.c
index 8b6876c98ce1..53d197e0c1f3 100644
--- a/arch/arm/mach-omap2/gpmc-onenand.c
+++ b/arch/arm/mach-omap2/gpmc-onenand.c
@@ -15,14 +15,13 @@
 #include <linux/platform_device.h>
 #include <linux/mtd/onenand_regs.h>
 #include <linux/io.h>
+#include <linux/omap-gpmc.h>
 #include <linux/platform_data/mtd-onenand-omap2.h>
 #include <linux/err.h>
 
 #include <asm/mach/flash.h>
 
-#include "gpmc.h"
 #include "soc.h"
-#include "gpmc-onenand.h"
 
 #define	ONENAND_IO_SIZE	SZ_128K
 
diff --git a/arch/arm/mach-omap2/gpmc-onenand.h b/arch/arm/mach-omap2/gpmc-onenand.h
deleted file mode 100644
index 216f23a8b45c..000000000000
--- a/arch/arm/mach-omap2/gpmc-onenand.h
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- *  arch/arm/mach-omap2/gpmc-onenand.h
- *
- *  This program is free software; you can redistribute  it and/or modify it
- *  under  the terms of  the GNU General  Public License as published by the
- *  Free Software Foundation;  either version 2 of the  License, or (at your
- *  option) any later version.
- */
-
-#ifndef	__OMAP2_GPMC_ONENAND_H
-#define	__OMAP2_GPMC_ONENAND_H
-
-#include <linux/platform_data/mtd-onenand-omap2.h>
-
-#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
-extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
-#else
-#define board_onenand_data	NULL
-static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
-{
-}
-#endif
-
-#endif
diff --git a/arch/arm/mach-omap2/gpmc.c b/arch/arm/mach-omap2/gpmc.c
index 9ea92b6f180d..0753a046fed2 100644
--- a/arch/arm/mach-omap2/gpmc.c
+++ b/arch/arm/mach-omap2/gpmc.c
@@ -29,18 +29,17 @@
 #include <linux/of_address.h>
 #include <linux/of_mtd.h>
 #include <linux/of_device.h>
+#include <linux/omap-gpmc.h>
 #include <linux/mtd/nand.h>
 #include <linux/pm_runtime.h>
 
 #include <linux/platform_data/mtd-nand-omap2.h>
-
-#include <asm/mach-types.h>
+#include <linux/platform_data/mtd-onenand-omap2.h>
 
 #include "soc.h"
 #include "omap_device.h"
-#include "gpmc.h"
-#include "gpmc-nand.h"
-#include "gpmc-onenand.h"
+
+#include <asm/mach-types.h>
 
 #define	DEVICE_NAME		"omap-gpmc"
 
@@ -115,6 +114,60 @@
 
 #define GPMC_NR_WAITPINS		4
 
+#define GPMC_CS_CONFIG1		0x00
+#define GPMC_CS_CONFIG2		0x04
+#define GPMC_CS_CONFIG3		0x08
+#define GPMC_CS_CONFIG4		0x0c
+#define GPMC_CS_CONFIG5		0x10
+#define GPMC_CS_CONFIG6		0x14
+#define GPMC_CS_CONFIG7		0x18
+#define GPMC_CS_NAND_COMMAND	0x1c
+#define GPMC_CS_NAND_ADDRESS	0x20
+#define GPMC_CS_NAND_DATA	0x24
+
+/* Control Commands */
+#define GPMC_CONFIG_RDY_BSY	0x00000001
+#define GPMC_CONFIG_DEV_SIZE	0x00000002
+#define GPMC_CONFIG_DEV_TYPE	0x00000003
+#define GPMC_SET_IRQ_STATUS	0x00000004
+
+#define GPMC_CONFIG1_WRAPBURST_SUPP     (1 << 31)
+#define GPMC_CONFIG1_READMULTIPLE_SUPP  (1 << 30)
+#define GPMC_CONFIG1_READTYPE_ASYNC     (0 << 29)
+#define GPMC_CONFIG1_READTYPE_SYNC      (1 << 29)
+#define GPMC_CONFIG1_WRITEMULTIPLE_SUPP (1 << 28)
+#define GPMC_CONFIG1_WRITETYPE_ASYNC    (0 << 27)
+#define GPMC_CONFIG1_WRITETYPE_SYNC     (1 << 27)
+#define GPMC_CONFIG1_CLKACTIVATIONTIME(val) ((val & 3) << 25)
+#define GPMC_CONFIG1_PAGE_LEN(val)      ((val & 3) << 23)
+#define GPMC_CONFIG1_WAIT_READ_MON      (1 << 22)
+#define GPMC_CONFIG1_WAIT_WRITE_MON     (1 << 21)
+#define GPMC_CONFIG1_WAIT_MON_IIME(val) ((val & 3) << 18)
+#define GPMC_CONFIG1_WAIT_PIN_SEL(val)  ((val & 3) << 16)
+#define GPMC_CONFIG1_DEVICESIZE(val)    ((val & 3) << 12)
+#define GPMC_CONFIG1_DEVICESIZE_16      GPMC_CONFIG1_DEVICESIZE(1)
+#define GPMC_CONFIG1_DEVICETYPE(val)    ((val & 3) << 10)
+#define GPMC_CONFIG1_DEVICETYPE_NOR     GPMC_CONFIG1_DEVICETYPE(0)
+#define GPMC_CONFIG1_MUXTYPE(val)       ((val & 3) << 8)
+#define GPMC_CONFIG1_TIME_PARA_GRAN     (1 << 4)
+#define GPMC_CONFIG1_FCLK_DIV(val)      (val & 3)
+#define GPMC_CONFIG1_FCLK_DIV2          (GPMC_CONFIG1_FCLK_DIV(1))
+#define GPMC_CONFIG1_FCLK_DIV3          (GPMC_CONFIG1_FCLK_DIV(2))
+#define GPMC_CONFIG1_FCLK_DIV4          (GPMC_CONFIG1_FCLK_DIV(3))
+#define GPMC_CONFIG7_CSVALID		(1 << 6)
+
+#define GPMC_DEVICETYPE_NOR		0
+#define GPMC_DEVICETYPE_NAND		2
+#define GPMC_CONFIG_WRITEPROTECT	0x00000010
+#define WR_RD_PIN_MONITORING		0x00600000
+
+#define GPMC_ENABLE_IRQ		0x0000000d
+
+/* ECC commands */
+#define GPMC_ECC_READ		0 /* Reset Hardware ECC for read */
+#define GPMC_ECC_WRITE		1 /* Reset Hardware ECC for write */
+#define GPMC_ECC_READSYN	2 /* Reset before syndrom is read back */
+
 /* XXX: Only NAND irq has been considered,currently these are the only ones used
  */
 #define	GPMC_NR_IRQ		2
diff --git a/arch/arm/mach-omap2/gpmc.h b/arch/arm/mach-omap2/gpmc.h
index 707f6d58edd5..9caa41a6cb04 100644
--- a/arch/arm/mach-omap2/gpmc.h
+++ b/arch/arm/mach-omap2/gpmc.h
@@ -6,226 +6,9 @@
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License version 2 as
  * published by the Free Software Foundation.
+ *
+ * Do not include this file in any new code, this will get removed
+ * once omap3 boots in device tree only mode.
+ *
  */
-
-#ifndef __OMAP2_GPMC_H
-#define __OMAP2_GPMC_H
-
-#include <linux/platform_data/mtd-nand-omap2.h>
-
-/* Maximum Number of Chip Selects */
-#define GPMC_CS_NUM		8
-
-#define GPMC_CS_CONFIG1		0x00
-#define GPMC_CS_CONFIG2		0x04
-#define GPMC_CS_CONFIG3		0x08
-#define GPMC_CS_CONFIG4		0x0c
-#define GPMC_CS_CONFIG5		0x10
-#define GPMC_CS_CONFIG6		0x14
-#define GPMC_CS_CONFIG7		0x18
-#define GPMC_CS_NAND_COMMAND	0x1c
-#define GPMC_CS_NAND_ADDRESS	0x20
-#define GPMC_CS_NAND_DATA	0x24
-
-/* Control Commands */
-#define GPMC_CONFIG_RDY_BSY	0x00000001
-#define GPMC_CONFIG_DEV_SIZE	0x00000002
-#define GPMC_CONFIG_DEV_TYPE	0x00000003
-#define GPMC_SET_IRQ_STATUS	0x00000004
-#define GPMC_CONFIG_WP		0x00000005
-
-#define GPMC_ENABLE_IRQ		0x0000000d
-
-/* ECC commands */
-#define GPMC_ECC_READ		0 /* Reset Hardware ECC for read */
-#define GPMC_ECC_WRITE		1 /* Reset Hardware ECC for write */
-#define GPMC_ECC_READSYN	2 /* Reset before syndrom is read back */
-
-#define GPMC_CONFIG1_WRAPBURST_SUPP     (1 << 31)
-#define GPMC_CONFIG1_READMULTIPLE_SUPP  (1 << 30)
-#define GPMC_CONFIG1_READTYPE_ASYNC     (0 << 29)
-#define GPMC_CONFIG1_READTYPE_SYNC      (1 << 29)
-#define GPMC_CONFIG1_WRITEMULTIPLE_SUPP (1 << 28)
-#define GPMC_CONFIG1_WRITETYPE_ASYNC    (0 << 27)
-#define GPMC_CONFIG1_WRITETYPE_SYNC     (1 << 27)
-#define GPMC_CONFIG1_CLKACTIVATIONTIME(val) ((val & 3) << 25)
-#define GPMC_CONFIG1_PAGE_LEN(val)      ((val & 3) << 23)
-#define GPMC_CONFIG1_WAIT_READ_MON      (1 << 22)
-#define GPMC_CONFIG1_WAIT_WRITE_MON     (1 << 21)
-#define GPMC_CONFIG1_WAIT_MON_IIME(val) ((val & 3) << 18)
-#define GPMC_CONFIG1_WAIT_PIN_SEL(val)  ((val & 3) << 16)
-#define GPMC_CONFIG1_DEVICESIZE(val)    ((val & 3) << 12)
-#define GPMC_CONFIG1_DEVICESIZE_16      GPMC_CONFIG1_DEVICESIZE(1)
-#define GPMC_CONFIG1_DEVICETYPE(val)    ((val & 3) << 10)
-#define GPMC_CONFIG1_DEVICETYPE_NOR     GPMC_CONFIG1_DEVICETYPE(0)
-#define GPMC_CONFIG1_MUXTYPE(val)       ((val & 3) << 8)
-#define GPMC_CONFIG1_TIME_PARA_GRAN     (1 << 4)
-#define GPMC_CONFIG1_FCLK_DIV(val)      (val & 3)
-#define GPMC_CONFIG1_FCLK_DIV2          (GPMC_CONFIG1_FCLK_DIV(1))
-#define GPMC_CONFIG1_FCLK_DIV3          (GPMC_CONFIG1_FCLK_DIV(2))
-#define GPMC_CONFIG1_FCLK_DIV4          (GPMC_CONFIG1_FCLK_DIV(3))
-#define GPMC_CONFIG7_CSVALID		(1 << 6)
-
-#define GPMC_DEVICETYPE_NOR		0
-#define GPMC_DEVICETYPE_NAND		2
-#define GPMC_CONFIG_WRITEPROTECT	0x00000010
-#define WR_RD_PIN_MONITORING		0x00600000
-#define GPMC_IRQ_FIFOEVENTENABLE	0x01
-#define GPMC_IRQ_COUNT_EVENT		0x02
-
-#define GPMC_BURST_4			4	/* 4 word burst */
-#define GPMC_BURST_8			8	/* 8 word burst */
-#define GPMC_BURST_16			16	/* 16 word burst */
-#define GPMC_DEVWIDTH_8BIT		1	/* 8-bit device width */
-#define GPMC_DEVWIDTH_16BIT		2	/* 16-bit device width */
-#define GPMC_MUX_AAD			1	/* Addr-Addr-Data multiplex */
-#define GPMC_MUX_AD			2	/* Addr-Data multiplex */
-
-/* bool type time settings */
-struct gpmc_bool_timings {
-	bool cycle2cyclediffcsen;
-	bool cycle2cyclesamecsen;
-	bool we_extra_delay;
-	bool oe_extra_delay;
-	bool adv_extra_delay;
-	bool cs_extra_delay;
-	bool time_para_granularity;
-};
-
-/*
- * Note that all values in this struct are in nanoseconds except sync_clk
- * (which is in picoseconds), while the register values are in gpmc_fck cycles.
- */
-struct gpmc_timings {
-	/* Minimum clock period for synchronous mode (in picoseconds) */
-	u32 sync_clk;
-
-	/* Chip-select signal timings corresponding to GPMC_CS_CONFIG2 */
-	u32 cs_on;		/* Assertion time */
-	u32 cs_rd_off;		/* Read deassertion time */
-	u32 cs_wr_off;		/* Write deassertion time */
-
-	/* ADV signal timings corresponding to GPMC_CONFIG3 */
-	u32 adv_on;		/* Assertion time */
-	u32 adv_rd_off;		/* Read deassertion time */
-	u32 adv_wr_off;		/* Write deassertion time */
-
-	/* WE signals timings corresponding to GPMC_CONFIG4 */
-	u32 we_on;		/* WE assertion time */
-	u32 we_off;		/* WE deassertion time */
-
-	/* OE signals timings corresponding to GPMC_CONFIG4 */
-	u32 oe_on;		/* OE assertion time */
-	u32 oe_off;		/* OE deassertion time */
-
-	/* Access time and cycle time timings corresponding to GPMC_CONFIG5 */
-	u32 page_burst_access;	/* Multiple access word delay */
-	u32 access;		/* Start-cycle to first data valid delay */
-	u32 rd_cycle;		/* Total read cycle time */
-	u32 wr_cycle;		/* Total write cycle time */
-
-	u32 bus_turnaround;
-	u32 cycle2cycle_delay;
-
-	u32 wait_monitoring;
-	u32 clk_activation;
-
-	/* The following are only on OMAP3430 */
-	u32 wr_access;		/* WRACCESSTIME */
-	u32 wr_data_mux_bus;	/* WRDATAONADMUXBUS */
-
-	struct gpmc_bool_timings bool_timings;
-};
-
-/* Device timings in picoseconds */
-struct gpmc_device_timings {
-	u32 t_ceasu;	/* address setup to CS valid */
-	u32 t_avdasu;	/* address setup to ADV valid */
-	/* XXX: try to combine t_avdp_r & t_avdp_w. Issue is
-	 * of tusb using these timings even for sync whilst
-	 * ideally for adv_rd/(wr)_off it should have considered
-	 * t_avdh instead. This indirectly necessitates r/w
-	 * variations of t_avdp as it is possible to have one
-	 * sync & other async
-	 */
-	u32 t_avdp_r;	/* ADV low time (what about t_cer ?) */
-	u32 t_avdp_w;
-	u32 t_aavdh;	/* address hold time */
-	u32 t_oeasu;	/* address setup to OE valid */
-	u32 t_aa;	/* access time from ADV assertion */
-	u32 t_iaa;	/* initial access time */
-	u32 t_oe;	/* access time from OE assertion */
-	u32 t_ce;	/* access time from CS asertion */
-	u32 t_rd_cycle;	/* read cycle time */
-	u32 t_cez_r;	/* read CS deassertion to high Z */
-	u32 t_cez_w;	/* write CS deassertion to high Z */
-	u32 t_oez;	/* OE deassertion to high Z */
-	u32 t_weasu;	/* address setup to WE valid */
-	u32 t_wpl;	/* write assertion time */
-	u32 t_wph;	/* write deassertion time */
-	u32 t_wr_cycle;	/* write cycle time */
-
-	u32 clk;
-	u32 t_bacc;	/* burst access valid clock to output delay */
-	u32 t_ces;	/* CS setup time to clk */
-	u32 t_avds;	/* ADV setup time to clk */
-	u32 t_avdh;	/* ADV hold time from clk */
-	u32 t_ach;	/* address hold time from clk */
-	u32 t_rdyo;	/* clk to ready valid */
-
-	u32 t_ce_rdyz;	/* XXX: description ?, or use t_cez instead */
-	u32 t_ce_avd;	/* CS on to ADV on delay */
-
-	/* XXX: check the possibility of combining
-	 * cyc_aavhd_oe & cyc_aavdh_we
-	 */
-	u8 cyc_aavdh_oe;/* read address hold time in cycles */
-	u8 cyc_aavdh_we;/* write address hold time in cycles */
-	u8 cyc_oe;	/* access time from OE assertion in cycles */
-	u8 cyc_wpl;	/* write deassertion time in cycles */
-	u32 cyc_iaa;	/* initial access time in cycles */
-
-	/* extra delays */
-	bool ce_xdelay;
-	bool avd_xdelay;
-	bool oe_xdelay;
-	bool we_xdelay;
-};
-
-struct gpmc_settings {
-	bool burst_wrap;	/* enables wrap bursting */
-	bool burst_read;	/* enables read page/burst mode */
-	bool burst_write;	/* enables write page/burst mode */
-	bool device_nand;	/* device is NAND */
-	bool sync_read;		/* enables synchronous reads */
-	bool sync_write;	/* enables synchronous writes */
-	bool wait_on_read;	/* monitor wait on reads */
-	bool wait_on_write;	/* monitor wait on writes */
-	u32 burst_len;		/* page/burst length */
-	u32 device_width;	/* device bus width (8 or 16 bit) */
-	u32 mux_add_data;	/* multiplex address & data */
-	u32 wait_pin;		/* wait-pin to be used */
-};
-
-extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t,
-			     struct gpmc_settings *gpmc_s,
-			     struct gpmc_device_timings *dev_t);
-
-extern void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs);
-extern int gpmc_get_client_irq(unsigned irq_config);
-
-extern unsigned int gpmc_ticks_to_ns(unsigned int ticks);
-
-extern void gpmc_cs_write_reg(int cs, int idx, u32 val);
-extern int gpmc_calc_divider(unsigned int sync_clk);
-extern int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t);
-extern int gpmc_cs_program_settings(int cs, struct gpmc_settings *p);
-extern int gpmc_cs_request(int cs, unsigned long size, unsigned long *base);
-extern void gpmc_cs_free(int cs);
-extern void omap3_gpmc_save_context(void);
-extern void omap3_gpmc_restore_context(void);
-extern int gpmc_configure(int cmd, int wval);
-extern void gpmc_read_settings_dt(struct device_node *np,
-				  struct gpmc_settings *p);
-
-#endif
+#include <linux/omap-gpmc.h>
diff --git a/arch/arm/mach-omap2/pm34xx.c b/arch/arm/mach-omap2/pm34xx.c
index 175564c88a30..88721df6001d 100644
--- a/arch/arm/mach-omap2/pm34xx.c
+++ b/arch/arm/mach-omap2/pm34xx.c
@@ -29,6 +29,7 @@
 #include <linux/delay.h>
 #include <linux/slab.h>
 #include <linux/omap-dma.h>
+#include <linux/omap-gpmc.h>
 #include <linux/platform_data/gpio-omap.h>
 
 #include <trace/events/power.h>
@@ -43,7 +44,6 @@
 #include "common.h"
 #include "cm3xxx.h"
 #include "cm-regbits-34xx.h"
-#include "gpmc.h"
 #include "prm-regbits-34xx.h"
 #include "prm3xxx.h"
 #include "pm.h"
diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h
new file mode 100644
index 000000000000..c2080eebbb47
--- /dev/null
+++ b/include/linux/omap-gpmc.h
@@ -0,0 +1,199 @@
+/*
+ *  OMAP GPMC (General Purpose Memory Controller) defines
+ *
+ *  This program is free software; you can redistribute  it and/or modify it
+ *  under  the terms of  the GNU General  Public License as published by the
+ *  Free Software Foundation;  either version 2 of the  License, or (at your
+ *  option) any later version.
+ */
+
+/* Maximum Number of Chip Selects */
+#define GPMC_CS_NUM		8
+
+#define GPMC_CONFIG_WP		0x00000005
+
+#define GPMC_IRQ_FIFOEVENTENABLE	0x01
+#define GPMC_IRQ_COUNT_EVENT		0x02
+
+#define GPMC_BURST_4			4	/* 4 word burst */
+#define GPMC_BURST_8			8	/* 8 word burst */
+#define GPMC_BURST_16			16	/* 16 word burst */
+#define GPMC_DEVWIDTH_8BIT		1	/* 8-bit device width */
+#define GPMC_DEVWIDTH_16BIT		2	/* 16-bit device width */
+#define GPMC_MUX_AAD			1	/* Addr-Addr-Data multiplex */
+#define GPMC_MUX_AD			2	/* Addr-Data multiplex */
+
+/* bool type time settings */
+struct gpmc_bool_timings {
+	bool cycle2cyclediffcsen;
+	bool cycle2cyclesamecsen;
+	bool we_extra_delay;
+	bool oe_extra_delay;
+	bool adv_extra_delay;
+	bool cs_extra_delay;
+	bool time_para_granularity;
+};
+
+/*
+ * Note that all values in this struct are in nanoseconds except sync_clk
+ * (which is in picoseconds), while the register values are in gpmc_fck cycles.
+ */
+struct gpmc_timings {
+	/* Minimum clock period for synchronous mode (in picoseconds) */
+	u32 sync_clk;
+
+	/* Chip-select signal timings corresponding to GPMC_CS_CONFIG2 */
+	u32 cs_on;		/* Assertion time */
+	u32 cs_rd_off;		/* Read deassertion time */
+	u32 cs_wr_off;		/* Write deassertion time */
+
+	/* ADV signal timings corresponding to GPMC_CONFIG3 */
+	u32 adv_on;		/* Assertion time */
+	u32 adv_rd_off;		/* Read deassertion time */
+	u32 adv_wr_off;		/* Write deassertion time */
+
+	/* WE signals timings corresponding to GPMC_CONFIG4 */
+	u32 we_on;		/* WE assertion time */
+	u32 we_off;		/* WE deassertion time */
+
+	/* OE signals timings corresponding to GPMC_CONFIG4 */
+	u32 oe_on;		/* OE assertion time */
+	u32 oe_off;		/* OE deassertion time */
+
+	/* Access time and cycle time timings corresponding to GPMC_CONFIG5 */
+	u32 page_burst_access;	/* Multiple access word delay */
+	u32 access;		/* Start-cycle to first data valid delay */
+	u32 rd_cycle;		/* Total read cycle time */
+	u32 wr_cycle;		/* Total write cycle time */
+
+	u32 bus_turnaround;
+	u32 cycle2cycle_delay;
+
+	u32 wait_monitoring;
+	u32 clk_activation;
+
+	/* The following are only on OMAP3430 */
+	u32 wr_access;		/* WRACCESSTIME */
+	u32 wr_data_mux_bus;	/* WRDATAONADMUXBUS */
+
+	struct gpmc_bool_timings bool_timings;
+};
+
+/* Device timings in picoseconds */
+struct gpmc_device_timings {
+	u32 t_ceasu;	/* address setup to CS valid */
+	u32 t_avdasu;	/* address setup to ADV valid */
+	/* XXX: try to combine t_avdp_r & t_avdp_w. Issue is
+	 * of tusb using these timings even for sync whilst
+	 * ideally for adv_rd/(wr)_off it should have considered
+	 * t_avdh instead. This indirectly necessitates r/w
+	 * variations of t_avdp as it is possible to have one
+	 * sync & other async
+	 */
+	u32 t_avdp_r;	/* ADV low time (what about t_cer ?) */
+	u32 t_avdp_w;
+	u32 t_aavdh;	/* address hold time */
+	u32 t_oeasu;	/* address setup to OE valid */
+	u32 t_aa;	/* access time from ADV assertion */
+	u32 t_iaa;	/* initial access time */
+	u32 t_oe;	/* access time from OE assertion */
+	u32 t_ce;	/* access time from CS asertion */
+	u32 t_rd_cycle;	/* read cycle time */
+	u32 t_cez_r;	/* read CS deassertion to high Z */
+	u32 t_cez_w;	/* write CS deassertion to high Z */
+	u32 t_oez;	/* OE deassertion to high Z */
+	u32 t_weasu;	/* address setup to WE valid */
+	u32 t_wpl;	/* write assertion time */
+	u32 t_wph;	/* write deassertion time */
+	u32 t_wr_cycle;	/* write cycle time */
+
+	u32 clk;
+	u32 t_bacc;	/* burst access valid clock to output delay */
+	u32 t_ces;	/* CS setup time to clk */
+	u32 t_avds;	/* ADV setup time to clk */
+	u32 t_avdh;	/* ADV hold time from clk */
+	u32 t_ach;	/* address hold time from clk */
+	u32 t_rdyo;	/* clk to ready valid */
+
+	u32 t_ce_rdyz;	/* XXX: description ?, or use t_cez instead */
+	u32 t_ce_avd;	/* CS on to ADV on delay */
+
+	/* XXX: check the possibility of combining
+	 * cyc_aavhd_oe & cyc_aavdh_we
+	 */
+	u8 cyc_aavdh_oe;/* read address hold time in cycles */
+	u8 cyc_aavdh_we;/* write address hold time in cycles */
+	u8 cyc_oe;	/* access time from OE assertion in cycles */
+	u8 cyc_wpl;	/* write deassertion time in cycles */
+	u32 cyc_iaa;	/* initial access time in cycles */
+
+	/* extra delays */
+	bool ce_xdelay;
+	bool avd_xdelay;
+	bool oe_xdelay;
+	bool we_xdelay;
+};
+
+struct gpmc_settings {
+	bool burst_wrap;	/* enables wrap bursting */
+	bool burst_read;	/* enables read page/burst mode */
+	bool burst_write;	/* enables write page/burst mode */
+	bool device_nand;	/* device is NAND */
+	bool sync_read;		/* enables synchronous reads */
+	bool sync_write;	/* enables synchronous writes */
+	bool wait_on_read;	/* monitor wait on reads */
+	bool wait_on_write;	/* monitor wait on writes */
+	u32 burst_len;		/* page/burst length */
+	u32 device_width;	/* device bus width (8 or 16 bit) */
+	u32 mux_add_data;	/* multiplex address & data */
+	u32 wait_pin;		/* wait-pin to be used */
+};
+
+extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t,
+			     struct gpmc_settings *gpmc_s,
+			     struct gpmc_device_timings *dev_t);
+
+struct gpmc_nand_regs;
+struct device_node;
+
+extern void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs);
+extern int gpmc_get_client_irq(unsigned irq_config);
+
+extern unsigned int gpmc_ticks_to_ns(unsigned int ticks);
+
+extern void gpmc_cs_write_reg(int cs, int idx, u32 val);
+extern int gpmc_calc_divider(unsigned int sync_clk);
+extern int gpmc_cs_set_timings(int cs, const struct gpmc_timings *t);
+extern int gpmc_cs_program_settings(int cs, struct gpmc_settings *p);
+extern int gpmc_cs_request(int cs, unsigned long size, unsigned long *base);
+extern void gpmc_cs_free(int cs);
+extern int gpmc_configure(int cmd, int wval);
+extern void gpmc_read_settings_dt(struct device_node *np,
+				  struct gpmc_settings *p);
+
+extern void omap3_gpmc_save_context(void);
+extern void omap3_gpmc_restore_context(void);
+
+struct gpmc_timings;
+struct omap_nand_platform_data;
+struct omap_onenand_platform_data;
+
+#if IS_ENABLED(CONFIG_MTD_NAND_OMAP2)
+extern int gpmc_nand_init(struct omap_nand_platform_data *d,
+			  struct gpmc_timings *gpmc_t);
+#else
+static inline int gpmc_nand_init(struct omap_nand_platform_data *d,
+				 struct gpmc_timings *gpmc_t)
+{
+	return 0;
+}
+#endif
+
+#if IS_ENABLED(CONFIG_MTD_ONENAND_OMAP2)
+extern void gpmc_onenand_init(struct omap_onenand_platform_data *d);
+#else
+#define board_onenand_data	NULL
+static inline void gpmc_onenand_init(struct omap_onenand_platform_data *d)
+{
+}
+#endif
-- 
cgit v1.2.3


From d451057464a7ea2fe400e56c8a7e004c875f2a84 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 19 Nov 2014 17:28:17 +0200
Subject: phy: safer to_phy() macro

This makes to_phy() macro work with other variable names
besides "dev".

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Tested-by: Vivek Gautam <gautam.vivek@samsung.com>
Acked-by: Felipe Balbi <balbi@ti.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 include/linux/phy/phy.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 8cb6f815475b..9fda68324298 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -110,7 +110,7 @@ struct phy_init_data {
 	.port		= _port,				\
 }
 
-#define	to_phy(dev)	(container_of((dev), struct phy, dev))
+#define	to_phy(a)	(container_of((a), struct phy, dev))
 
 #define	of_phy_provider_register(dev, xlate)	\
 	__of_phy_provider_register((dev), THIS_MODULE, (xlate))
-- 
cgit v1.2.3


From b7bc15b98e843926d01eb03b9c0e196d8ddbadeb Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 19 Nov 2014 17:28:18 +0200
Subject: phy: improved lookup method

Separates registration of the phy and the lookup. The method
is copied from clkdev.c,

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 Documentation/phy.txt   | 60 ++++++++++-------------------------
 drivers/phy/phy-core.c  | 84 ++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/phy/phy.h | 16 ++++++++++
 3 files changed, 115 insertions(+), 45 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/phy.txt b/Documentation/phy.txt
index c6594af94d25..371361c69a4b 100644
--- a/Documentation/phy.txt
+++ b/Documentation/phy.txt
@@ -54,18 +54,14 @@ The PHY driver should create the PHY in order for other peripheral controllers
 to make use of it. The PHY framework provides 2 APIs to create the PHY.
 
 struct phy *phy_create(struct device *dev, struct device_node *node,
-		       const struct phy_ops *ops,
-		       struct phy_init_data *init_data);
+		       const struct phy_ops *ops);
 struct phy *devm_phy_create(struct device *dev, struct device_node *node,
-			    const struct phy_ops *ops,
-			    struct phy_init_data *init_data);
+			    const struct phy_ops *ops);
 
 The PHY drivers can use one of the above 2 APIs to create the PHY by passing
-the device pointer, phy ops and init_data.
+the device pointer and phy ops.
 phy_ops is a set of function pointers for performing PHY operations such as
-init, exit, power_on and power_off. *init_data* is mandatory to get a reference
-to the PHY in the case of non-dt boot. See section *Board File Initialization*
-on how init_data should be used.
+init, exit, power_on and power_off.
 
 Inorder to dereference the private data (in phy_ops), the phy provider driver
 can use phy_set_drvdata() after creating the PHY and use phy_get_drvdata() in
@@ -137,42 +133,18 @@ There are exported APIs like phy_pm_runtime_get, phy_pm_runtime_get_sync,
 phy_pm_runtime_put, phy_pm_runtime_put_sync, phy_pm_runtime_allow and
 phy_pm_runtime_forbid for performing PM operations.
 
-8. Board File Initialization
-
-Certain board file initialization is necessary in order to get a reference
-to the PHY in the case of non-dt boot.
-Say we have a single device that implements 3 PHYs that of USB, SATA and PCIe,
-then in the board file the following initialization should be done.
-
-struct phy_consumer consumers[] = {
-	PHY_CONSUMER("dwc3.0", "usb"),
-	PHY_CONSUMER("pcie.0", "pcie"),
-	PHY_CONSUMER("sata.0", "sata"),
-};
-PHY_CONSUMER takes 2 parameters, first is the device name of the controller
-(PHY consumer) and second is the port name.
-
-struct phy_init_data init_data = {
-	.consumers = consumers,
-	.num_consumers = ARRAY_SIZE(consumers),
-};
-
-static const struct platform_device pipe3_phy_dev = {
-	.name = "pipe3-phy",
-	.id = -1,
-	.dev = {
-		.platform_data = {
-			.init_data = &init_data,
-		},
-	},
-};
-
-then, while doing phy_create, the PHY driver should pass this init_data
-	phy_create(dev, ops, pdata->init_data);
-
-and the controller driver (phy consumer) should pass the port name along with
-the device to get a reference to the PHY
-	phy_get(dev, "pcie");
+8. PHY Mappings
+
+In order to get reference to a PHY without help from DeviceTree, the framework
+offers lookups which can be compared to clkdev that allow clk structures to be
+bound to devices. A lookup can be made be made during runtime when a handle to
+the struct phy already exists.
+
+The framework offers the following API for registering and unregistering the
+lookups.
+
+int phy_create_lookup(struct phy *phy, const char *con_id, const char *dev_id);
+void phy_remove_lookup(struct phy *phy, const char *con_id, const char *dev_id);
 
 9. DeviceTree Binding
 
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index 1606ce9805d0..bc830773fe05 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -26,6 +26,7 @@
 static struct class *phy_class;
 static DEFINE_MUTEX(phy_provider_mutex);
 static LIST_HEAD(phy_provider_list);
+static LIST_HEAD(phys);
 static DEFINE_IDA(phy_ida);
 
 static void devm_phy_release(struct device *dev, void *res)
@@ -84,6 +85,87 @@ static struct phy *phy_lookup(struct device *device, const char *port)
 	return ERR_PTR(-ENODEV);
 }
 
+/**
+ * phy_create_lookup() - allocate and register PHY/device association
+ * @phy: the phy of the association
+ * @con_id: connection ID string on device
+ * @dev_id: the device of the association
+ *
+ * Creates and registers phy_lookup entry.
+ */
+int phy_create_lookup(struct phy *phy, const char *con_id, const char *dev_id)
+{
+	struct phy_lookup *pl;
+
+	if (!phy || !dev_id || !con_id)
+		return -EINVAL;
+
+	pl = kzalloc(sizeof(*pl), GFP_KERNEL);
+	if (!pl)
+		return -ENOMEM;
+
+	pl->dev_id = dev_id;
+	pl->con_id = con_id;
+	pl->phy = phy;
+
+	mutex_lock(&phy_provider_mutex);
+	list_add_tail(&pl->node, &phys);
+	mutex_unlock(&phy_provider_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(phy_create_lookup);
+
+/**
+ * phy_remove_lookup() - find and remove PHY/device association
+ * @phy: the phy of the association
+ * @con_id: connection ID string on device
+ * @dev_id: the device of the association
+ *
+ * Finds and unregisters phy_lookup entry that was created with
+ * phy_create_lookup().
+ */
+void phy_remove_lookup(struct phy *phy, const char *con_id, const char *dev_id)
+{
+	struct phy_lookup *pl;
+
+	if (!phy || !dev_id || !con_id)
+		return;
+
+	mutex_lock(&phy_provider_mutex);
+	list_for_each_entry(pl, &phys, node)
+		if (pl->phy == phy && !strcmp(pl->dev_id, dev_id) &&
+		    !strcmp(pl->con_id, con_id)) {
+			list_del(&pl->node);
+			kfree(pl);
+			break;
+		}
+	mutex_unlock(&phy_provider_mutex);
+}
+EXPORT_SYMBOL_GPL(phy_remove_lookup);
+
+static struct phy *phy_find(struct device *dev, const char *con_id)
+{
+	const char *dev_id = dev_name(dev);
+	struct phy_lookup *p, *pl = NULL;
+	struct phy *phy;
+
+	mutex_lock(&phy_provider_mutex);
+	list_for_each_entry(p, &phys, node)
+		if (!strcmp(p->dev_id, dev_id) && !strcmp(p->con_id, con_id)) {
+			pl = p;
+			break;
+		}
+	mutex_unlock(&phy_provider_mutex);
+
+	phy = pl ? pl->phy : ERR_PTR(-ENODEV);
+
+	/* fall-back to the old lookup method for now */
+	if (IS_ERR(phy))
+		phy = phy_lookup(dev, con_id);
+	return phy;
+}
+
 static struct phy_provider *of_phy_provider_lookup(struct device_node *node)
 {
 	struct phy_provider *phy_provider;
@@ -455,7 +537,7 @@ struct phy *phy_get(struct device *dev, const char *string)
 			string);
 		phy = _of_phy_get(dev->of_node, index);
 	} else {
-		phy = phy_lookup(dev, string);
+		phy = phy_find(dev, string);
 	}
 	if (IS_ERR(phy))
 		return phy;
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 9fda68324298..849284e5873f 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -110,6 +110,13 @@ struct phy_init_data {
 	.port		= _port,				\
 }
 
+struct phy_lookup {
+	struct list_head node;
+	const char *dev_id;
+	const char *con_id;
+	struct phy *phy;
+};
+
 #define	to_phy(a)	(container_of((a), struct phy, dev))
 
 #define	of_phy_provider_register(dev, xlate)	\
@@ -174,6 +181,8 @@ struct phy_provider *__devm_of_phy_provider_register(struct device *dev,
 void of_phy_provider_unregister(struct phy_provider *phy_provider);
 void devm_of_phy_provider_unregister(struct device *dev,
 	struct phy_provider *phy_provider);
+int phy_create_lookup(struct phy *phy, const char *con_id, const char *dev_id);
+void phy_remove_lookup(struct phy *phy, const char *con_id, const char *dev_id);
 #else
 static inline int phy_pm_runtime_get(struct phy *phy)
 {
@@ -345,6 +354,13 @@ static inline void devm_of_phy_provider_unregister(struct device *dev,
 	struct phy_provider *phy_provider)
 {
 }
+static inline int
+phy_create_lookup(struct phy *phy, const char *con_id, const char *dev_id)
+{
+	return 0;
+}
+static inline void phy_remove_lookup(struct phy *phy, const char *con_id,
+				     const char *dev_id) { }
 #endif
 
 #endif /* __DRIVERS_PHY_H */
-- 
cgit v1.2.3


From df9f7b311db1edae7ec5e2c78aa92fce7b9dd34d Mon Sep 17 00:00:00 2001
From: Kiran Raparthy <kiran.kumar@linaro.org>
Date: Fri, 21 Nov 2014 11:31:20 +0530
Subject: usb: phy: introduce usb_phy_set_event interface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PHY drivers require a generic interface to handle per-PHY events.

usb_phy_set_event interface sets event to phy event.
PHY drivers call this interface for each phy event.

Cc: Felipe Balbi <balbi@ti.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: linux-kernel@vger.kernel.org
Cc: linux-usb@vger.kernel.org
Cc: Android Kernel Team <kernel-team@android.com>
Cc: John Stultz <john.stultz@linaro.org>
Cc: Sumit Semwal <sumit.semwal@linaro.org>
Cc: Arve Hj�nnev�g <arve@android.com>
Cc: Benoit Goby <benoit@android.com>
[Original patch in Android from Todd]
Cc: Todd Poynor <toddpoynor@google.com>
Signed-off-by: Kiran Raparthy <kiran.kumar@linaro.org>
Signed-off-by: Felipe Balbi <balbi@ti.com>
---
 drivers/usb/phy/phy.c   | 12 ++++++++++++
 include/linux/usb/phy.h |  5 +++++
 2 files changed, 17 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/usb/phy/phy.c b/drivers/usb/phy/phy.c
index e6bf801a339a..b4066a001ba0 100644
--- a/drivers/usb/phy/phy.c
+++ b/drivers/usb/phy/phy.c
@@ -446,3 +446,15 @@ int usb_bind_phy(const char *dev_name, u8 index,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(usb_bind_phy);
+
+/**
+ * usb_phy_set_event - set event to phy event
+ * @x: the phy returned by usb_get_phy();
+ *
+ * This sets event to phy event
+ */
+void usb_phy_set_event(struct usb_phy *x, unsigned long event)
+{
+	x->last_event = event;
+}
+EXPORT_SYMBOL_GPL(usb_phy_set_event);
diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h
index ac7d7913694f..f499c23e6342 100644
--- a/include/linux/usb/phy.h
+++ b/include/linux/usb/phy.h
@@ -209,6 +209,7 @@ extern void usb_put_phy(struct usb_phy *);
 extern void devm_usb_put_phy(struct device *dev, struct usb_phy *x);
 extern int usb_bind_phy(const char *dev_name, u8 index,
 				const char *phy_dev_name);
+extern void usb_phy_set_event(struct usb_phy *x, unsigned long event);
 #else
 static inline struct usb_phy *usb_get_phy(enum usb_phy_type type)
 {
@@ -250,6 +251,10 @@ static inline int usb_bind_phy(const char *dev_name, u8 index,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline void usb_phy_set_event(struct usb_phy *x, unsigned long event)
+{
+}
 #endif
 
 static inline int
-- 
cgit v1.2.3


From 04b74b27c2941e5d62120f6fee3a0a9388a30613 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 21 Nov 2014 09:16:58 -0500
Subject: printk/percpu: Define printk_func when printk is not defined

To avoid include hell, the per_cpu variable printk_func was declared
in percpu.h. But it is only defined if printk is defined.

As users of printk may also use the printk_func variable, it needs to
be defined even if CONFIG_PRINTK is not.

Also add a printk.h include in percpu.h just to be safe.

Link: http://lkml.kernel.org/r/20141121183215.01ba539c@canb.auug.org.au

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/percpu.h | 1 +
 include/linux/printk.h | 4 ++--
 kernel/printk/printk.c | 3 +++
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu.h b/include/linux/percpu.h
index ba2e85a0ff5b..caebf2a758dc 100644
--- a/include/linux/percpu.h
+++ b/include/linux/percpu.h
@@ -5,6 +5,7 @@
 #include <linux/preempt.h>
 #include <linux/smp.h>
 #include <linux/cpumask.h>
+#include <linux/printk.h>
 #include <linux/pfn.h>
 #include <linux/init.h>
 
diff --git a/include/linux/printk.h b/include/linux/printk.h
index 3bbd979d32fb..c69be9ee8f48 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -124,6 +124,8 @@ static inline __printf(1, 2) __cold
 void early_printk(const char *s, ...) { }
 #endif
 
+typedef int(*printk_func_t)(const char *fmt, va_list args);
+
 #ifdef CONFIG_PRINTK
 asmlinkage __printf(5, 0)
 int vprintk_emit(int facility, int level,
@@ -162,8 +164,6 @@ extern int kptr_restrict;
 
 extern void wake_up_klogd(void);
 
-typedef int(*printk_func_t)(const char *fmt, va_list args);
-
 void log_buf_kexec_setup(void);
 void __init setup_log_buf(int early);
 void dump_stack_set_arch_desc(const char *fmt, ...);
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index f7b723f98cb9..5af2b8bc88f0 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1896,6 +1896,9 @@ static size_t msg_print_text(const struct printk_log *msg, enum log_flags prev,
 			     bool syslog, char *buf, size_t size) { return 0; }
 static size_t cont_print_text(char *text, size_t size) { return 0; }
 
+/* Still needs to be defined for users */
+DEFINE_PER_CPU(printk_func_t, printk_func);
+
 #endif /* CONFIG_PRINTK */
 
 #ifdef CONFIG_EARLY_PRINTK
-- 
cgit v1.2.3


From 6ef768fac9dfe3404d3fdc09909ea203a88f2f38 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 20 Nov 2014 13:45:31 +0100
Subject: kvm: x86: move ioapic.c and irq_comm.c back to arch/x86/

ia64 does not need them anymore.  Ack notifiers become x86-specific
too.

Suggested-by: Gleb Natapov <gleb@kernel.org>
Reviewed-by: Radim Krcmar <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |  16 +
 arch/x86/kvm/Makefile           |   5 +-
 arch/x86/kvm/ioapic.c           | 682 ++++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/ioapic.h           | 102 ++++++
 arch/x86/kvm/irq_comm.c         | 347 ++++++++++++++++++++
 arch/x86/kvm/x86.c              |   1 +
 include/linux/kvm_host.h        |  22 +-
 virt/kvm/eventfd.c              |   7 -
 virt/kvm/ioapic.c               | 682 ----------------------------------------
 virt/kvm/ioapic.h               | 103 ------
 virt/kvm/irq_comm.c             | 347 --------------------
 virt/kvm/kvm_main.c             |   3 -
 12 files changed, 1158 insertions(+), 1159 deletions(-)
 create mode 100644 arch/x86/kvm/ioapic.c
 create mode 100644 arch/x86/kvm/ioapic.h
 create mode 100644 arch/x86/kvm/irq_comm.c
 delete mode 100644 virt/kvm/ioapic.c
 delete mode 100644 virt/kvm/ioapic.h
 delete mode 100644 virt/kvm/irq_comm.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 769db36a3001..76ff3e2d8fd2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -603,6 +603,9 @@ struct kvm_arch {
 
 	struct kvm_xen_hvm_config xen_hvm_config;
 
+	/* reads protected by irq_srcu, writes by irq_lock */
+	struct hlist_head mask_notifier_list;
+
 	/* fields used by HYPER-V emulation */
 	u64 hv_guest_os_id;
 	u64 hv_hypercall;
@@ -819,6 +822,19 @@ int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa,
 			  const void *val, int bytes);
 u8 kvm_get_guest_memory_type(struct kvm_vcpu *vcpu, gfn_t gfn);
 
+struct kvm_irq_mask_notifier {
+	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
+	int irq;
+	struct hlist_node link;
+};
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn);
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn);
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask);
+
 extern bool tdp_enabled;
 
 u64 vcpu_tsc_khz(struct kvm_vcpu *vcpu);
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index 25d22b2d6509..ee1cd92b03be 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -7,14 +7,13 @@ CFLAGS_vmx.o := -I.
 
 KVM := ../../../virt/kvm
 
-kvm-y			+= $(KVM)/kvm_main.o $(KVM)/ioapic.o \
-				$(KVM)/coalesced_mmio.o $(KVM)/irq_comm.o \
+kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
 kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
-			   i8254.o cpuid.o pmu.o
+			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c
new file mode 100644
index 000000000000..f0f7ef82b7a6
--- /dev/null
+++ b/arch/x86/kvm/ioapic.c
@@ -0,0 +1,682 @@
+/*
+ *  Copyright (C) 2001  MandrakeSoft S.A.
+ *  Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ *    MandrakeSoft S.A.
+ *    43, rue d'Aboukir
+ *    75002 Paris - France
+ *    http://www.linux-mandrake.com/
+ *    http://www.mandrakesoft.com/
+ *
+ *  This library is free software; you can redistribute it and/or
+ *  modify it under the terms of the GNU Lesser General Public
+ *  License as published by the Free Software Foundation; either
+ *  version 2 of the License, or (at your option) any later version.
+ *
+ *  This library is distributed in the hope that it will be useful,
+ *  but WITHOUT ANY WARRANTY; without even the implied warranty of
+ *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ *  Lesser General Public License for more details.
+ *
+ *  You should have received a copy of the GNU Lesser General Public
+ *  License along with this library; if not, write to the Free Software
+ *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
+ *
+ *  Yunhong Jiang <yunhong.jiang@intel.com>
+ *  Yaozu (Eddie) Dong <eddie.dong@intel.com>
+ *  Based on Xen 3.1 code.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/mm.h>
+#include <linux/highmem.h>
+#include <linux/smp.h>
+#include <linux/hrtimer.h>
+#include <linux/io.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <asm/processor.h>
+#include <asm/page.h>
+#include <asm/current.h>
+#include <trace/events/kvm.h>
+
+#include "ioapic.h"
+#include "lapic.h"
+#include "irq.h"
+
+#if 0
+#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
+#else
+#define ioapic_debug(fmt, arg...)
+#endif
+static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
+		bool line_status);
+
+static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
+					  unsigned long addr,
+					  unsigned long length)
+{
+	unsigned long result = 0;
+
+	switch (ioapic->ioregsel) {
+	case IOAPIC_REG_VERSION:
+		result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
+			  | (IOAPIC_VERSION_ID & 0xff));
+		break;
+
+	case IOAPIC_REG_APIC_ID:
+	case IOAPIC_REG_ARB_ID:
+		result = ((ioapic->id & 0xf) << 24);
+		break;
+
+	default:
+		{
+			u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
+			u64 redir_content;
+
+			if (redir_index < IOAPIC_NUM_PINS)
+				redir_content =
+					ioapic->redirtbl[redir_index].bits;
+			else
+				redir_content = ~0ULL;
+
+			result = (ioapic->ioregsel & 0x1) ?
+			    (redir_content >> 32) & 0xffffffff :
+			    redir_content & 0xffffffff;
+			break;
+		}
+	}
+
+	return result;
+}
+
+static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
+{
+	ioapic->rtc_status.pending_eoi = 0;
+	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
+}
+
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
+
+static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
+{
+	if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
+		kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+	bool new_val, old_val;
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+	union kvm_ioapic_redirect_entry *e;
+
+	e = &ioapic->redirtbl[RTC_GSI];
+	if (!kvm_apic_match_dest(vcpu, NULL, 0,	e->fields.dest_id,
+				e->fields.dest_mode))
+		return;
+
+	new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
+	old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+
+	if (new_val == old_val)
+		return;
+
+	if (new_val) {
+		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi++;
+	} else {
+		__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi--;
+		rtc_status_pending_eoi_check_valid(ioapic);
+	}
+}
+
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
+{
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+
+	spin_lock(&ioapic->lock);
+	__rtc_irq_eoi_tracking_restore_one(vcpu);
+	spin_unlock(&ioapic->lock);
+}
+
+static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
+{
+	struct kvm_vcpu *vcpu;
+	int i;
+
+	if (RTC_GSI >= IOAPIC_NUM_PINS)
+		return;
+
+	rtc_irq_eoi_tracking_reset(ioapic);
+	kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
+	    __rtc_irq_eoi_tracking_restore_one(vcpu);
+}
+
+static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
+{
+	if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
+		--ioapic->rtc_status.pending_eoi;
+		rtc_status_pending_eoi_check_valid(ioapic);
+	}
+}
+
+static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
+{
+	if (ioapic->rtc_status.pending_eoi > 0)
+		return true; /* coalesced */
+
+	return false;
+}
+
+static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
+		int irq_level, bool line_status)
+{
+	union kvm_ioapic_redirect_entry entry;
+	u32 mask = 1 << irq;
+	u32 old_irr;
+	int edge, ret;
+
+	entry = ioapic->redirtbl[irq];
+	edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
+
+	if (!irq_level) {
+		ioapic->irr &= ~mask;
+		ret = 1;
+		goto out;
+	}
+
+	/*
+	 * Return 0 for coalesced interrupts; for edge-triggered interrupts,
+	 * this only happens if a previous edge has not been delivered due
+	 * do masking.  For level interrupts, the remote_irr field tells
+	 * us if the interrupt is waiting for an EOI.
+	 *
+	 * RTC is special: it is edge-triggered, but userspace likes to know
+	 * if it has been already ack-ed via EOI because coalesced RTC
+	 * interrupts lead to time drift in Windows guests.  So we track
+	 * EOI manually for the RTC interrupt.
+	 */
+	if (irq == RTC_GSI && line_status &&
+		rtc_irq_check_coalesced(ioapic)) {
+		ret = 0;
+		goto out;
+	}
+
+	old_irr = ioapic->irr;
+	ioapic->irr |= mask;
+	if ((edge && old_irr == ioapic->irr) ||
+	    (!edge && entry.fields.remote_irr)) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = ioapic_service(ioapic, irq, line_status);
+
+out:
+	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
+	return ret;
+}
+
+static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
+{
+	u32 idx;
+
+	rtc_irq_eoi_tracking_reset(ioapic);
+	for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
+		ioapic_set_irq(ioapic, idx, 1, true);
+
+	kvm_rtc_eoi_tracking_restore_all(ioapic);
+}
+
+
+static void update_handled_vectors(struct kvm_ioapic *ioapic)
+{
+	DECLARE_BITMAP(handled_vectors, 256);
+	int i;
+
+	memset(handled_vectors, 0, sizeof(handled_vectors));
+	for (i = 0; i < IOAPIC_NUM_PINS; ++i)
+		__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
+	memcpy(ioapic->handled_vectors, handled_vectors,
+	       sizeof(handled_vectors));
+	smp_wmb();
+}
+
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+			u32 *tmr)
+{
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+	union kvm_ioapic_redirect_entry *e;
+	int index;
+
+	spin_lock(&ioapic->lock);
+	for (index = 0; index < IOAPIC_NUM_PINS; index++) {
+		e = &ioapic->redirtbl[index];
+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
+		    kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
+		    index == RTC_GSI) {
+			if (kvm_apic_match_dest(vcpu, NULL, 0,
+				e->fields.dest_id, e->fields.dest_mode)) {
+				__set_bit(e->fields.vector,
+					(unsigned long *)eoi_exit_bitmap);
+				if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
+					__set_bit(e->fields.vector,
+						(unsigned long *)tmr);
+			}
+		}
+	}
+	spin_unlock(&ioapic->lock);
+}
+
+#ifdef CONFIG_X86
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+	if (!ioapic)
+		return;
+	kvm_make_scan_ioapic_request(kvm);
+}
+#else
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+	return;
+}
+#endif
+
+static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
+{
+	unsigned index;
+	bool mask_before, mask_after;
+	union kvm_ioapic_redirect_entry *e;
+
+	switch (ioapic->ioregsel) {
+	case IOAPIC_REG_VERSION:
+		/* Writes are ignored. */
+		break;
+
+	case IOAPIC_REG_APIC_ID:
+		ioapic->id = (val >> 24) & 0xf;
+		break;
+
+	case IOAPIC_REG_ARB_ID:
+		break;
+
+	default:
+		index = (ioapic->ioregsel - 0x10) >> 1;
+
+		ioapic_debug("change redir index %x val %x\n", index, val);
+		if (index >= IOAPIC_NUM_PINS)
+			return;
+		e = &ioapic->redirtbl[index];
+		mask_before = e->fields.mask;
+		if (ioapic->ioregsel & 1) {
+			e->bits &= 0xffffffff;
+			e->bits |= (u64) val << 32;
+		} else {
+			e->bits &= ~0xffffffffULL;
+			e->bits |= (u32) val;
+			e->fields.remote_irr = 0;
+		}
+		update_handled_vectors(ioapic);
+		mask_after = e->fields.mask;
+		if (mask_before != mask_after)
+			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
+		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
+		    && ioapic->irr & (1 << index))
+			ioapic_service(ioapic, index, false);
+		kvm_vcpu_request_scan_ioapic(ioapic->kvm);
+		break;
+	}
+}
+
+static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
+{
+	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
+	struct kvm_lapic_irq irqe;
+	int ret;
+
+	if (entry->fields.mask)
+		return -1;
+
+	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
+		     "vector=%x trig_mode=%x\n",
+		     entry->fields.dest_id, entry->fields.dest_mode,
+		     entry->fields.delivery_mode, entry->fields.vector,
+		     entry->fields.trig_mode);
+
+	irqe.dest_id = entry->fields.dest_id;
+	irqe.vector = entry->fields.vector;
+	irqe.dest_mode = entry->fields.dest_mode;
+	irqe.trig_mode = entry->fields.trig_mode;
+	irqe.delivery_mode = entry->fields.delivery_mode << 8;
+	irqe.level = 1;
+	irqe.shorthand = 0;
+
+	if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
+		ioapic->irr &= ~(1 << irq);
+
+	if (irq == RTC_GSI && line_status) {
+		/*
+		 * pending_eoi cannot ever become negative (see
+		 * rtc_status_pending_eoi_check_valid) and the caller
+		 * ensures that it is only called if it is >= zero, namely
+		 * if rtc_irq_check_coalesced returns false).
+		 */
+		BUG_ON(ioapic->rtc_status.pending_eoi != 0);
+		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
+				ioapic->rtc_status.dest_map);
+		ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
+	} else
+		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
+
+	if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
+		entry->fields.remote_irr = 1;
+
+	return ret;
+}
+
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
+		       int level, bool line_status)
+{
+	int ret, irq_level;
+
+	BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
+
+	spin_lock(&ioapic->lock);
+	irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
+					 irq_source_id, level);
+	ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
+
+	spin_unlock(&ioapic->lock);
+
+	return ret;
+}
+
+void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
+{
+	int i;
+
+	spin_lock(&ioapic->lock);
+	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
+		__clear_bit(irq_source_id, &ioapic->irq_states[i]);
+	spin_unlock(&ioapic->lock);
+}
+
+static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
+{
+	int i;
+	struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
+						 eoi_inject.work);
+	spin_lock(&ioapic->lock);
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+		if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
+			continue;
+
+		if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
+			ioapic_service(ioapic, i, false);
+	}
+	spin_unlock(&ioapic->lock);
+}
+
+#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
+
+static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
+			struct kvm_ioapic *ioapic, int vector, int trigger_mode)
+{
+	int i;
+
+	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
+		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
+
+		if (ent->fields.vector != vector)
+			continue;
+
+		if (i == RTC_GSI)
+			rtc_irq_eoi(ioapic, vcpu);
+		/*
+		 * We are dropping lock while calling ack notifiers because ack
+		 * notifier callbacks for assigned devices call into IOAPIC
+		 * recursively. Since remote_irr is cleared only after call
+		 * to notifiers if the same vector will be delivered while lock
+		 * is dropped it will be put into irr and will be delivered
+		 * after ack notifier returns.
+		 */
+		spin_unlock(&ioapic->lock);
+		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
+		spin_lock(&ioapic->lock);
+
+		if (trigger_mode != IOAPIC_LEVEL_TRIG)
+			continue;
+
+		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
+		ent->fields.remote_irr = 0;
+		if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
+			++ioapic->irq_eoi[i];
+			if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
+				/*
+				 * Real hardware does not deliver the interrupt
+				 * immediately during eoi broadcast, and this
+				 * lets a buggy guest make slow progress
+				 * even if it does not correctly handle a
+				 * level-triggered interrupt.  Emulate this
+				 * behavior if we detect an interrupt storm.
+				 */
+				schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
+				ioapic->irq_eoi[i] = 0;
+				trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
+			} else {
+				ioapic_service(ioapic, i, false);
+			}
+		} else {
+			ioapic->irq_eoi[i] = 0;
+		}
+	}
+}
+
+bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	smp_rmb();
+	return test_bit(vector, ioapic->handled_vectors);
+}
+
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
+{
+	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
+
+	spin_lock(&ioapic->lock);
+	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
+	spin_unlock(&ioapic->lock);
+}
+
+static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
+{
+	return container_of(dev, struct kvm_ioapic, dev);
+}
+
+static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
+{
+	return ((addr >= ioapic->base_address &&
+		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
+}
+
+static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
+			    void *val)
+{
+	struct kvm_ioapic *ioapic = to_ioapic(this);
+	u32 result;
+	if (!ioapic_in_range(ioapic, addr))
+		return -EOPNOTSUPP;
+
+	ioapic_debug("addr %lx\n", (unsigned long)addr);
+	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	addr &= 0xff;
+	spin_lock(&ioapic->lock);
+	switch (addr) {
+	case IOAPIC_REG_SELECT:
+		result = ioapic->ioregsel;
+		break;
+
+	case IOAPIC_REG_WINDOW:
+		result = ioapic_read_indirect(ioapic, addr, len);
+		break;
+
+	default:
+		result = 0;
+		break;
+	}
+	spin_unlock(&ioapic->lock);
+
+	switch (len) {
+	case 8:
+		*(u64 *) val = result;
+		break;
+	case 1:
+	case 2:
+	case 4:
+		memcpy(val, (char *)&result, len);
+		break;
+	default:
+		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
+	}
+	return 0;
+}
+
+static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
+			     const void *val)
+{
+	struct kvm_ioapic *ioapic = to_ioapic(this);
+	u32 data;
+	if (!ioapic_in_range(ioapic, addr))
+		return -EOPNOTSUPP;
+
+	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
+		     (void*)addr, len, val);
+	ASSERT(!(addr & 0xf));	/* check alignment */
+
+	switch (len) {
+	case 8:
+	case 4:
+		data = *(u32 *) val;
+		break;
+	case 2:
+		data = *(u16 *) val;
+		break;
+	case 1:
+		data = *(u8  *) val;
+		break;
+	default:
+		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
+		return 0;
+	}
+
+	addr &= 0xff;
+	spin_lock(&ioapic->lock);
+	switch (addr) {
+	case IOAPIC_REG_SELECT:
+		ioapic->ioregsel = data & 0xFF; /* 8-bit register */
+		break;
+
+	case IOAPIC_REG_WINDOW:
+		ioapic_write_indirect(ioapic, data);
+		break;
+
+	default:
+		break;
+	}
+	spin_unlock(&ioapic->lock);
+	return 0;
+}
+
+static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
+{
+	int i;
+
+	cancel_delayed_work_sync(&ioapic->eoi_inject);
+	for (i = 0; i < IOAPIC_NUM_PINS; i++)
+		ioapic->redirtbl[i].fields.mask = 1;
+	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
+	ioapic->ioregsel = 0;
+	ioapic->irr = 0;
+	ioapic->id = 0;
+	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
+	rtc_irq_eoi_tracking_reset(ioapic);
+	update_handled_vectors(ioapic);
+}
+
+static const struct kvm_io_device_ops ioapic_mmio_ops = {
+	.read     = ioapic_mmio_read,
+	.write    = ioapic_mmio_write,
+};
+
+int kvm_ioapic_init(struct kvm *kvm)
+{
+	struct kvm_ioapic *ioapic;
+	int ret;
+
+	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
+	if (!ioapic)
+		return -ENOMEM;
+	spin_lock_init(&ioapic->lock);
+	INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
+	kvm->arch.vioapic = ioapic;
+	kvm_ioapic_reset(ioapic);
+	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
+	ioapic->kvm = kvm;
+	mutex_lock(&kvm->slots_lock);
+	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
+				      IOAPIC_MEM_LENGTH, &ioapic->dev);
+	mutex_unlock(&kvm->slots_lock);
+	if (ret < 0) {
+		kvm->arch.vioapic = NULL;
+		kfree(ioapic);
+	}
+
+	return ret;
+}
+
+void kvm_ioapic_destroy(struct kvm *kvm)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+
+	cancel_delayed_work_sync(&ioapic->eoi_inject);
+	if (ioapic) {
+		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
+		kvm->arch.vioapic = NULL;
+		kfree(ioapic);
+	}
+}
+
+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+{
+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+	if (!ioapic)
+		return -EINVAL;
+
+	spin_lock(&ioapic->lock);
+	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
+	spin_unlock(&ioapic->lock);
+	return 0;
+}
+
+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
+{
+	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
+	if (!ioapic)
+		return -EINVAL;
+
+	spin_lock(&ioapic->lock);
+	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
+	ioapic->irr = 0;
+	update_handled_vectors(ioapic);
+	kvm_vcpu_request_scan_ioapic(kvm);
+	kvm_ioapic_inject_all(ioapic, state->irr);
+	spin_unlock(&ioapic->lock);
+	return 0;
+}
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
new file mode 100644
index 000000000000..deac8d509f2a
--- /dev/null
+++ b/arch/x86/kvm/ioapic.h
@@ -0,0 +1,102 @@
+#ifndef __KVM_IO_APIC_H
+#define __KVM_IO_APIC_H
+
+#include <linux/kvm_host.h>
+
+#include "iodev.h"
+
+struct kvm;
+struct kvm_vcpu;
+
+#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
+#define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
+#define IOAPIC_EDGE_TRIG  0
+#define IOAPIC_LEVEL_TRIG 1
+
+#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
+#define IOAPIC_MEM_LENGTH            0x100
+
+/* Direct registers. */
+#define IOAPIC_REG_SELECT  0x00
+#define IOAPIC_REG_WINDOW  0x10
+
+/* Indirect registers. */
+#define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
+#define IOAPIC_REG_VERSION 0x01
+#define IOAPIC_REG_ARB_ID  0x02	/* x86 IOAPIC only */
+
+/*ioapic delivery mode*/
+#define	IOAPIC_FIXED			0x0
+#define	IOAPIC_LOWEST_PRIORITY		0x1
+#define	IOAPIC_PMI			0x2
+#define	IOAPIC_NMI			0x4
+#define	IOAPIC_INIT			0x5
+#define	IOAPIC_EXTINT			0x7
+
+#ifdef CONFIG_X86
+#define RTC_GSI 8
+#else
+#define RTC_GSI -1U
+#endif
+
+struct rtc_status {
+	int pending_eoi;
+	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
+};
+
+struct kvm_ioapic {
+	u64 base_address;
+	u32 ioregsel;
+	u32 id;
+	u32 irr;
+	u32 pad;
+	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
+	unsigned long irq_states[IOAPIC_NUM_PINS];
+	struct kvm_io_device dev;
+	struct kvm *kvm;
+	void (*ack_notifier)(void *opaque, int irq);
+	spinlock_t lock;
+	DECLARE_BITMAP(handled_vectors, 256);
+	struct rtc_status rtc_status;
+	struct delayed_work eoi_inject;
+	u32 irq_eoi[IOAPIC_NUM_PINS];
+};
+
+#ifdef DEBUG
+#define ASSERT(x)  							\
+do {									\
+	if (!(x)) {							\
+		printk(KERN_EMERG "assertion failed %s: %d: %s\n",	\
+		       __FILE__, __LINE__, #x);				\
+		BUG();							\
+	}								\
+} while (0)
+#else
+#define ASSERT(x) do { } while (0)
+#endif
+
+static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
+{
+	return kvm->arch.vioapic;
+}
+
+void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
+int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
+		int short_hand, unsigned int dest, int dest_mode);
+int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
+void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
+			int trigger_mode);
+bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
+int kvm_ioapic_init(struct kvm *kvm);
+void kvm_ioapic_destroy(struct kvm *kvm);
+int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
+		       int level, bool line_status);
+void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, unsigned long *dest_map);
+int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
+void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
+			u32 *tmr);
+
+#endif
diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c
new file mode 100644
index 000000000000..e9c135b639aa
--- /dev/null
+++ b/arch/x86/kvm/irq_comm.c
@@ -0,0 +1,347 @@
+/*
+ * irq_comm.c: Common API for in kernel interrupt controller
+ * Copyright (c) 2007, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ * Authors:
+ *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
+ *
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/slab.h>
+#include <linux/export.h>
+#include <trace/events/kvm.h>
+
+#include <asm/msidef.h>
+
+#include "irq.h"
+
+#include "ioapic.h"
+
+static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
+			   struct kvm *kvm, int irq_source_id, int level,
+			   bool line_status)
+{
+#ifdef CONFIG_X86
+	struct kvm_pic *pic = pic_irqchip(kvm);
+	return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
+#else
+	return -1;
+#endif
+}
+
+static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
+			      struct kvm *kvm, int irq_source_id, int level,
+			      bool line_status)
+{
+	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
+	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
+				line_status);
+}
+
+inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
+{
+	return irq->delivery_mode == APIC_DM_LOWEST;
+}
+
+int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
+		struct kvm_lapic_irq *irq, unsigned long *dest_map)
+{
+	int i, r = -1;
+	struct kvm_vcpu *vcpu, *lowest = NULL;
+
+	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
+			kvm_is_dm_lowest_prio(irq)) {
+		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
+		irq->delivery_mode = APIC_DM_FIXED;
+	}
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
+		return r;
+
+	kvm_for_each_vcpu(i, vcpu, kvm) {
+		if (!kvm_apic_present(vcpu))
+			continue;
+
+		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
+					irq->dest_id, irq->dest_mode))
+			continue;
+
+		if (!kvm_is_dm_lowest_prio(irq)) {
+			if (r < 0)
+				r = 0;
+			r += kvm_apic_set_irq(vcpu, irq, dest_map);
+		} else if (kvm_lapic_enabled(vcpu)) {
+			if (!lowest)
+				lowest = vcpu;
+			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
+				lowest = vcpu;
+		}
+	}
+
+	if (lowest)
+		r = kvm_apic_set_irq(lowest, irq, dest_map);
+
+	return r;
+}
+
+static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
+				   struct kvm_lapic_irq *irq)
+{
+	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
+
+	irq->dest_id = (e->msi.address_lo &
+			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
+	irq->vector = (e->msi.data &
+			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
+	irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
+	irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
+	irq->delivery_mode = e->msi.data & 0x700;
+	irq->level = 1;
+	irq->shorthand = 0;
+	/* TODO Deal with RH bit of MSI message address */
+}
+
+int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
+		struct kvm *kvm, int irq_source_id, int level, bool line_status)
+{
+	struct kvm_lapic_irq irq;
+
+	if (!level)
+		return -1;
+
+	kvm_set_msi_irq(e, &irq);
+
+	return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
+}
+
+
+static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
+			 struct kvm *kvm)
+{
+	struct kvm_lapic_irq irq;
+	int r;
+
+	kvm_set_msi_irq(e, &irq);
+
+	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
+		return r;
+	else
+		return -EWOULDBLOCK;
+}
+
+/*
+ * Deliver an IRQ in an atomic context if we can, or return a failure,
+ * user can retry in a process context.
+ * Return value:
+ *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
+ *  Other values - No need to retry.
+ */
+int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
+{
+	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
+	struct kvm_kernel_irq_routing_entry *e;
+	int ret = -EINVAL;
+	int idx;
+
+	trace_kvm_set_irq(irq, level, irq_source_id);
+
+	/*
+	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
+	 * which would need to be retried from thread context;  when same GSI
+	 * is connected to both PIC and IOAPIC, we'd have to report a
+	 * partial failure here.
+	 * Since there's no easy way to do this, we only support injecting MSI
+	 * which is limited to 1:1 GSI mapping.
+	 */
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
+		e = &entries[0];
+		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
+			ret = kvm_set_msi_inatomic(e, kvm);
+		else
+			ret = -EWOULDBLOCK;
+	}
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+	return ret;
+}
+
+int kvm_request_irq_source_id(struct kvm *kvm)
+{
+	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
+	int irq_source_id;
+
+	mutex_lock(&kvm->irq_lock);
+	irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
+
+	if (irq_source_id >= BITS_PER_LONG) {
+		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
+		irq_source_id = -EFAULT;
+		goto unlock;
+	}
+
+	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+#ifdef CONFIG_X86
+	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+#endif
+	set_bit(irq_source_id, bitmap);
+unlock:
+	mutex_unlock(&kvm->irq_lock);
+
+	return irq_source_id;
+}
+
+void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
+{
+	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
+#ifdef CONFIG_X86
+	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
+#endif
+
+	mutex_lock(&kvm->irq_lock);
+	if (irq_source_id < 0 ||
+	    irq_source_id >= BITS_PER_LONG) {
+		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
+		goto unlock;
+	}
+	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
+	if (!irqchip_in_kernel(kvm))
+		goto unlock;
+
+	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
+#ifdef CONFIG_X86
+	kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
+#endif
+unlock:
+	mutex_unlock(&kvm->irq_lock);
+}
+
+void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
+				    struct kvm_irq_mask_notifier *kimn)
+{
+	mutex_lock(&kvm->irq_lock);
+	kimn->irq = irq;
+	hlist_add_head_rcu(&kimn->link, &kvm->arch.mask_notifier_list);
+	mutex_unlock(&kvm->irq_lock);
+}
+
+void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
+				      struct kvm_irq_mask_notifier *kimn)
+{
+	mutex_lock(&kvm->irq_lock);
+	hlist_del_rcu(&kimn->link);
+	mutex_unlock(&kvm->irq_lock);
+	synchronize_srcu(&kvm->irq_srcu);
+}
+
+void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
+			     bool mask)
+{
+	struct kvm_irq_mask_notifier *kimn;
+	int idx, gsi;
+
+	idx = srcu_read_lock(&kvm->irq_srcu);
+	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
+	if (gsi != -1)
+		hlist_for_each_entry_rcu(kimn, &kvm->arch.mask_notifier_list, link)
+			if (kimn->irq == gsi)
+				kimn->func(kimn, mask);
+	srcu_read_unlock(&kvm->irq_srcu, idx);
+}
+
+int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
+			  const struct kvm_irq_routing_entry *ue)
+{
+	int r = -EINVAL;
+	int delta;
+	unsigned max_pin;
+
+	switch (ue->type) {
+	case KVM_IRQ_ROUTING_IRQCHIP:
+		delta = 0;
+		switch (ue->u.irqchip.irqchip) {
+		case KVM_IRQCHIP_PIC_MASTER:
+			e->set = kvm_set_pic_irq;
+			max_pin = PIC_NUM_PINS;
+			break;
+		case KVM_IRQCHIP_PIC_SLAVE:
+			e->set = kvm_set_pic_irq;
+			max_pin = PIC_NUM_PINS;
+			delta = 8;
+			break;
+		case KVM_IRQCHIP_IOAPIC:
+			max_pin = KVM_IOAPIC_NUM_PINS;
+			e->set = kvm_set_ioapic_irq;
+			break;
+		default:
+			goto out;
+		}
+		e->irqchip.irqchip = ue->u.irqchip.irqchip;
+		e->irqchip.pin = ue->u.irqchip.pin + delta;
+		if (e->irqchip.pin >= max_pin)
+			goto out;
+		break;
+	case KVM_IRQ_ROUTING_MSI:
+		e->set = kvm_set_msi;
+		e->msi.address_lo = ue->u.msi.address_lo;
+		e->msi.address_hi = ue->u.msi.address_hi;
+		e->msi.data = ue->u.msi.data;
+		break;
+	default:
+		goto out;
+	}
+
+	r = 0;
+out:
+	return r;
+}
+
+#define IOAPIC_ROUTING_ENTRY(irq) \
+	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
+	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
+#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
+
+#ifdef CONFIG_X86
+#  define PIC_ROUTING_ENTRY(irq) \
+	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
+	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
+#  define ROUTING_ENTRY2(irq) \
+	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
+#else
+#  define ROUTING_ENTRY2(irq) \
+	IOAPIC_ROUTING_ENTRY(irq)
+#endif
+
+static const struct kvm_irq_routing_entry default_routing[] = {
+	ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
+	ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
+	ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
+	ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
+	ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
+	ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
+	ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
+	ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
+	ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
+	ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
+	ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
+	ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
+};
+
+int kvm_setup_default_irq_routing(struct kvm *kvm)
+{
+	return kvm_set_irq_routing(kvm, default_routing,
+				   ARRAY_SIZE(default_routing), 0);
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index a8f53a6960fd..5337039427c8 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -7264,6 +7264,7 @@ int kvm_arch_init_vm(struct kvm *kvm, unsigned long type)
 	if (type)
 		return -EINVAL;
 
+	INIT_HLIST_HEAD(&kvm->arch.mask_notifier_list);
 	INIT_LIST_HEAD(&kvm->arch.active_mmu_pages);
 	INIT_LIST_HEAD(&kvm->arch.zapped_obsolete_pages);
 	INIT_LIST_HEAD(&kvm->arch.assigned_dev_head);
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea53b04993f2..ded64cb3a081 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -395,7 +395,6 @@ struct kvm {
 	 * Update side is protected by irq_lock.
 	 */
 	struct kvm_irq_routing_table __rcu *irq_routing;
-	struct hlist_head mask_notifier_list;
 #endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	struct hlist_head irq_ack_notifier_list;
@@ -447,6 +446,14 @@ void kvm_vcpu_uninit(struct kvm_vcpu *vcpu);
 int __must_check vcpu_load(struct kvm_vcpu *vcpu);
 void vcpu_put(struct kvm_vcpu *vcpu);
 
+#ifdef __KVM_HAVE_IOAPIC
+void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
+#else
+static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
+{
+}
+#endif
+
 #ifdef CONFIG_HAVE_KVM_IRQFD
 int kvm_irqfd_init(void);
 void kvm_irqfd_exit(void);
@@ -736,19 +743,6 @@ struct kvm_assigned_dev_kernel {
 	struct pci_saved_state *pci_saved_state;
 };
 
-struct kvm_irq_mask_notifier {
-	void (*func)(struct kvm_irq_mask_notifier *kimn, bool masked);
-	int irq;
-	struct hlist_node link;
-};
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-				    struct kvm_irq_mask_notifier *kimn);
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-				      struct kvm_irq_mask_notifier *kimn);
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-			     bool mask);
-
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c
index b0fb390943c6..148b2392c762 100644
--- a/virt/kvm/eventfd.c
+++ b/virt/kvm/eventfd.c
@@ -36,9 +36,6 @@
 #include <linux/seqlock.h>
 #include <trace/events/kvm.h>
 
-#ifdef __KVM_HAVE_IOAPIC
-#include "ioapic.h"
-#endif
 #include "iodev.h"
 
 #ifdef CONFIG_HAVE_KVM_IRQFD
@@ -492,9 +489,7 @@ void kvm_register_irq_ack_notifier(struct kvm *kvm,
 	mutex_lock(&kvm->irq_lock);
 	hlist_add_head_rcu(&kian->link, &kvm->irq_ack_notifier_list);
 	mutex_unlock(&kvm->irq_lock);
-#ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 
 void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
@@ -504,9 +499,7 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm,
 	hlist_del_init_rcu(&kian->link);
 	mutex_unlock(&kvm->irq_lock);
 	synchronize_srcu(&kvm->irq_srcu);
-#ifdef __KVM_HAVE_IOAPIC
 	kvm_vcpu_request_scan_ioapic(kvm);
-#endif
 }
 #endif
 
diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
deleted file mode 100644
index f0f7ef82b7a6..000000000000
--- a/virt/kvm/ioapic.c
+++ /dev/null
@@ -1,682 +0,0 @@
-/*
- *  Copyright (C) 2001  MandrakeSoft S.A.
- *  Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- *    MandrakeSoft S.A.
- *    43, rue d'Aboukir
- *    75002 Paris - France
- *    http://www.linux-mandrake.com/
- *    http://www.mandrakesoft.com/
- *
- *  This library is free software; you can redistribute it and/or
- *  modify it under the terms of the GNU Lesser General Public
- *  License as published by the Free Software Foundation; either
- *  version 2 of the License, or (at your option) any later version.
- *
- *  This library is distributed in the hope that it will be useful,
- *  but WITHOUT ANY WARRANTY; without even the implied warranty of
- *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
- *  Lesser General Public License for more details.
- *
- *  You should have received a copy of the GNU Lesser General Public
- *  License along with this library; if not, write to the Free Software
- *  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307 USA
- *
- *  Yunhong Jiang <yunhong.jiang@intel.com>
- *  Yaozu (Eddie) Dong <eddie.dong@intel.com>
- *  Based on Xen 3.1 code.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/mm.h>
-#include <linux/highmem.h>
-#include <linux/smp.h>
-#include <linux/hrtimer.h>
-#include <linux/io.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <asm/processor.h>
-#include <asm/page.h>
-#include <asm/current.h>
-#include <trace/events/kvm.h>
-
-#include "ioapic.h"
-#include "lapic.h"
-#include "irq.h"
-
-#if 0
-#define ioapic_debug(fmt,arg...) printk(KERN_WARNING fmt,##arg)
-#else
-#define ioapic_debug(fmt, arg...)
-#endif
-static int ioapic_service(struct kvm_ioapic *vioapic, int irq,
-		bool line_status);
-
-static unsigned long ioapic_read_indirect(struct kvm_ioapic *ioapic,
-					  unsigned long addr,
-					  unsigned long length)
-{
-	unsigned long result = 0;
-
-	switch (ioapic->ioregsel) {
-	case IOAPIC_REG_VERSION:
-		result = ((((IOAPIC_NUM_PINS - 1) & 0xff) << 16)
-			  | (IOAPIC_VERSION_ID & 0xff));
-		break;
-
-	case IOAPIC_REG_APIC_ID:
-	case IOAPIC_REG_ARB_ID:
-		result = ((ioapic->id & 0xf) << 24);
-		break;
-
-	default:
-		{
-			u32 redir_index = (ioapic->ioregsel - 0x10) >> 1;
-			u64 redir_content;
-
-			if (redir_index < IOAPIC_NUM_PINS)
-				redir_content =
-					ioapic->redirtbl[redir_index].bits;
-			else
-				redir_content = ~0ULL;
-
-			result = (ioapic->ioregsel & 0x1) ?
-			    (redir_content >> 32) & 0xffffffff :
-			    redir_content & 0xffffffff;
-			break;
-		}
-	}
-
-	return result;
-}
-
-static void rtc_irq_eoi_tracking_reset(struct kvm_ioapic *ioapic)
-{
-	ioapic->rtc_status.pending_eoi = 0;
-	bitmap_zero(ioapic->rtc_status.dest_map, KVM_MAX_VCPUS);
-}
-
-static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic);
-
-static void rtc_status_pending_eoi_check_valid(struct kvm_ioapic *ioapic)
-{
-	if (WARN_ON(ioapic->rtc_status.pending_eoi < 0))
-		kvm_rtc_eoi_tracking_restore_all(ioapic);
-}
-
-static void __rtc_irq_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
-{
-	bool new_val, old_val;
-	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
-	union kvm_ioapic_redirect_entry *e;
-
-	e = &ioapic->redirtbl[RTC_GSI];
-	if (!kvm_apic_match_dest(vcpu, NULL, 0,	e->fields.dest_id,
-				e->fields.dest_mode))
-		return;
-
-	new_val = kvm_apic_pending_eoi(vcpu, e->fields.vector);
-	old_val = test_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
-
-	if (new_val == old_val)
-		return;
-
-	if (new_val) {
-		__set_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
-		ioapic->rtc_status.pending_eoi++;
-	} else {
-		__clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map);
-		ioapic->rtc_status.pending_eoi--;
-		rtc_status_pending_eoi_check_valid(ioapic);
-	}
-}
-
-void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu)
-{
-	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
-
-	spin_lock(&ioapic->lock);
-	__rtc_irq_eoi_tracking_restore_one(vcpu);
-	spin_unlock(&ioapic->lock);
-}
-
-static void kvm_rtc_eoi_tracking_restore_all(struct kvm_ioapic *ioapic)
-{
-	struct kvm_vcpu *vcpu;
-	int i;
-
-	if (RTC_GSI >= IOAPIC_NUM_PINS)
-		return;
-
-	rtc_irq_eoi_tracking_reset(ioapic);
-	kvm_for_each_vcpu(i, vcpu, ioapic->kvm)
-	    __rtc_irq_eoi_tracking_restore_one(vcpu);
-}
-
-static void rtc_irq_eoi(struct kvm_ioapic *ioapic, struct kvm_vcpu *vcpu)
-{
-	if (test_and_clear_bit(vcpu->vcpu_id, ioapic->rtc_status.dest_map)) {
-		--ioapic->rtc_status.pending_eoi;
-		rtc_status_pending_eoi_check_valid(ioapic);
-	}
-}
-
-static bool rtc_irq_check_coalesced(struct kvm_ioapic *ioapic)
-{
-	if (ioapic->rtc_status.pending_eoi > 0)
-		return true; /* coalesced */
-
-	return false;
-}
-
-static int ioapic_set_irq(struct kvm_ioapic *ioapic, unsigned int irq,
-		int irq_level, bool line_status)
-{
-	union kvm_ioapic_redirect_entry entry;
-	u32 mask = 1 << irq;
-	u32 old_irr;
-	int edge, ret;
-
-	entry = ioapic->redirtbl[irq];
-	edge = (entry.fields.trig_mode == IOAPIC_EDGE_TRIG);
-
-	if (!irq_level) {
-		ioapic->irr &= ~mask;
-		ret = 1;
-		goto out;
-	}
-
-	/*
-	 * Return 0 for coalesced interrupts; for edge-triggered interrupts,
-	 * this only happens if a previous edge has not been delivered due
-	 * do masking.  For level interrupts, the remote_irr field tells
-	 * us if the interrupt is waiting for an EOI.
-	 *
-	 * RTC is special: it is edge-triggered, but userspace likes to know
-	 * if it has been already ack-ed via EOI because coalesced RTC
-	 * interrupts lead to time drift in Windows guests.  So we track
-	 * EOI manually for the RTC interrupt.
-	 */
-	if (irq == RTC_GSI && line_status &&
-		rtc_irq_check_coalesced(ioapic)) {
-		ret = 0;
-		goto out;
-	}
-
-	old_irr = ioapic->irr;
-	ioapic->irr |= mask;
-	if ((edge && old_irr == ioapic->irr) ||
-	    (!edge && entry.fields.remote_irr)) {
-		ret = 0;
-		goto out;
-	}
-
-	ret = ioapic_service(ioapic, irq, line_status);
-
-out:
-	trace_kvm_ioapic_set_irq(entry.bits, irq, ret == 0);
-	return ret;
-}
-
-static void kvm_ioapic_inject_all(struct kvm_ioapic *ioapic, unsigned long irr)
-{
-	u32 idx;
-
-	rtc_irq_eoi_tracking_reset(ioapic);
-	for_each_set_bit(idx, &irr, IOAPIC_NUM_PINS)
-		ioapic_set_irq(ioapic, idx, 1, true);
-
-	kvm_rtc_eoi_tracking_restore_all(ioapic);
-}
-
-
-static void update_handled_vectors(struct kvm_ioapic *ioapic)
-{
-	DECLARE_BITMAP(handled_vectors, 256);
-	int i;
-
-	memset(handled_vectors, 0, sizeof(handled_vectors));
-	for (i = 0; i < IOAPIC_NUM_PINS; ++i)
-		__set_bit(ioapic->redirtbl[i].fields.vector, handled_vectors);
-	memcpy(ioapic->handled_vectors, handled_vectors,
-	       sizeof(handled_vectors));
-	smp_wmb();
-}
-
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
-			u32 *tmr)
-{
-	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
-	union kvm_ioapic_redirect_entry *e;
-	int index;
-
-	spin_lock(&ioapic->lock);
-	for (index = 0; index < IOAPIC_NUM_PINS; index++) {
-		e = &ioapic->redirtbl[index];
-		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG ||
-		    kvm_irq_has_notifier(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index) ||
-		    index == RTC_GSI) {
-			if (kvm_apic_match_dest(vcpu, NULL, 0,
-				e->fields.dest_id, e->fields.dest_mode)) {
-				__set_bit(e->fields.vector,
-					(unsigned long *)eoi_exit_bitmap);
-				if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG)
-					__set_bit(e->fields.vector,
-						(unsigned long *)tmr);
-			}
-		}
-	}
-	spin_unlock(&ioapic->lock);
-}
-
-#ifdef CONFIG_X86
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
-{
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
-	if (!ioapic)
-		return;
-	kvm_make_scan_ioapic_request(kvm);
-}
-#else
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm)
-{
-	return;
-}
-#endif
-
-static void ioapic_write_indirect(struct kvm_ioapic *ioapic, u32 val)
-{
-	unsigned index;
-	bool mask_before, mask_after;
-	union kvm_ioapic_redirect_entry *e;
-
-	switch (ioapic->ioregsel) {
-	case IOAPIC_REG_VERSION:
-		/* Writes are ignored. */
-		break;
-
-	case IOAPIC_REG_APIC_ID:
-		ioapic->id = (val >> 24) & 0xf;
-		break;
-
-	case IOAPIC_REG_ARB_ID:
-		break;
-
-	default:
-		index = (ioapic->ioregsel - 0x10) >> 1;
-
-		ioapic_debug("change redir index %x val %x\n", index, val);
-		if (index >= IOAPIC_NUM_PINS)
-			return;
-		e = &ioapic->redirtbl[index];
-		mask_before = e->fields.mask;
-		if (ioapic->ioregsel & 1) {
-			e->bits &= 0xffffffff;
-			e->bits |= (u64) val << 32;
-		} else {
-			e->bits &= ~0xffffffffULL;
-			e->bits |= (u32) val;
-			e->fields.remote_irr = 0;
-		}
-		update_handled_vectors(ioapic);
-		mask_after = e->fields.mask;
-		if (mask_before != mask_after)
-			kvm_fire_mask_notifiers(ioapic->kvm, KVM_IRQCHIP_IOAPIC, index, mask_after);
-		if (e->fields.trig_mode == IOAPIC_LEVEL_TRIG
-		    && ioapic->irr & (1 << index))
-			ioapic_service(ioapic, index, false);
-		kvm_vcpu_request_scan_ioapic(ioapic->kvm);
-		break;
-	}
-}
-
-static int ioapic_service(struct kvm_ioapic *ioapic, int irq, bool line_status)
-{
-	union kvm_ioapic_redirect_entry *entry = &ioapic->redirtbl[irq];
-	struct kvm_lapic_irq irqe;
-	int ret;
-
-	if (entry->fields.mask)
-		return -1;
-
-	ioapic_debug("dest=%x dest_mode=%x delivery_mode=%x "
-		     "vector=%x trig_mode=%x\n",
-		     entry->fields.dest_id, entry->fields.dest_mode,
-		     entry->fields.delivery_mode, entry->fields.vector,
-		     entry->fields.trig_mode);
-
-	irqe.dest_id = entry->fields.dest_id;
-	irqe.vector = entry->fields.vector;
-	irqe.dest_mode = entry->fields.dest_mode;
-	irqe.trig_mode = entry->fields.trig_mode;
-	irqe.delivery_mode = entry->fields.delivery_mode << 8;
-	irqe.level = 1;
-	irqe.shorthand = 0;
-
-	if (irqe.trig_mode == IOAPIC_EDGE_TRIG)
-		ioapic->irr &= ~(1 << irq);
-
-	if (irq == RTC_GSI && line_status) {
-		/*
-		 * pending_eoi cannot ever become negative (see
-		 * rtc_status_pending_eoi_check_valid) and the caller
-		 * ensures that it is only called if it is >= zero, namely
-		 * if rtc_irq_check_coalesced returns false).
-		 */
-		BUG_ON(ioapic->rtc_status.pending_eoi != 0);
-		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe,
-				ioapic->rtc_status.dest_map);
-		ioapic->rtc_status.pending_eoi = (ret < 0 ? 0 : ret);
-	} else
-		ret = kvm_irq_delivery_to_apic(ioapic->kvm, NULL, &irqe, NULL);
-
-	if (ret && irqe.trig_mode == IOAPIC_LEVEL_TRIG)
-		entry->fields.remote_irr = 1;
-
-	return ret;
-}
-
-int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
-		       int level, bool line_status)
-{
-	int ret, irq_level;
-
-	BUG_ON(irq < 0 || irq >= IOAPIC_NUM_PINS);
-
-	spin_lock(&ioapic->lock);
-	irq_level = __kvm_irq_line_state(&ioapic->irq_states[irq],
-					 irq_source_id, level);
-	ret = ioapic_set_irq(ioapic, irq, irq_level, line_status);
-
-	spin_unlock(&ioapic->lock);
-
-	return ret;
-}
-
-void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id)
-{
-	int i;
-
-	spin_lock(&ioapic->lock);
-	for (i = 0; i < KVM_IOAPIC_NUM_PINS; i++)
-		__clear_bit(irq_source_id, &ioapic->irq_states[i]);
-	spin_unlock(&ioapic->lock);
-}
-
-static void kvm_ioapic_eoi_inject_work(struct work_struct *work)
-{
-	int i;
-	struct kvm_ioapic *ioapic = container_of(work, struct kvm_ioapic,
-						 eoi_inject.work);
-	spin_lock(&ioapic->lock);
-	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
-
-		if (ent->fields.trig_mode != IOAPIC_LEVEL_TRIG)
-			continue;
-
-		if (ioapic->irr & (1 << i) && !ent->fields.remote_irr)
-			ioapic_service(ioapic, i, false);
-	}
-	spin_unlock(&ioapic->lock);
-}
-
-#define IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT 10000
-
-static void __kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu,
-			struct kvm_ioapic *ioapic, int vector, int trigger_mode)
-{
-	int i;
-
-	for (i = 0; i < IOAPIC_NUM_PINS; i++) {
-		union kvm_ioapic_redirect_entry *ent = &ioapic->redirtbl[i];
-
-		if (ent->fields.vector != vector)
-			continue;
-
-		if (i == RTC_GSI)
-			rtc_irq_eoi(ioapic, vcpu);
-		/*
-		 * We are dropping lock while calling ack notifiers because ack
-		 * notifier callbacks for assigned devices call into IOAPIC
-		 * recursively. Since remote_irr is cleared only after call
-		 * to notifiers if the same vector will be delivered while lock
-		 * is dropped it will be put into irr and will be delivered
-		 * after ack notifier returns.
-		 */
-		spin_unlock(&ioapic->lock);
-		kvm_notify_acked_irq(ioapic->kvm, KVM_IRQCHIP_IOAPIC, i);
-		spin_lock(&ioapic->lock);
-
-		if (trigger_mode != IOAPIC_LEVEL_TRIG)
-			continue;
-
-		ASSERT(ent->fields.trig_mode == IOAPIC_LEVEL_TRIG);
-		ent->fields.remote_irr = 0;
-		if (!ent->fields.mask && (ioapic->irr & (1 << i))) {
-			++ioapic->irq_eoi[i];
-			if (ioapic->irq_eoi[i] == IOAPIC_SUCCESSIVE_IRQ_MAX_COUNT) {
-				/*
-				 * Real hardware does not deliver the interrupt
-				 * immediately during eoi broadcast, and this
-				 * lets a buggy guest make slow progress
-				 * even if it does not correctly handle a
-				 * level-triggered interrupt.  Emulate this
-				 * behavior if we detect an interrupt storm.
-				 */
-				schedule_delayed_work(&ioapic->eoi_inject, HZ / 100);
-				ioapic->irq_eoi[i] = 0;
-				trace_kvm_ioapic_delayed_eoi_inj(ent->bits);
-			} else {
-				ioapic_service(ioapic, i, false);
-			}
-		} else {
-			ioapic->irq_eoi[i] = 0;
-		}
-	}
-}
-
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector)
-{
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	smp_rmb();
-	return test_bit(vector, ioapic->handled_vectors);
-}
-
-void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector, int trigger_mode)
-{
-	struct kvm_ioapic *ioapic = vcpu->kvm->arch.vioapic;
-
-	spin_lock(&ioapic->lock);
-	__kvm_ioapic_update_eoi(vcpu, ioapic, vector, trigger_mode);
-	spin_unlock(&ioapic->lock);
-}
-
-static inline struct kvm_ioapic *to_ioapic(struct kvm_io_device *dev)
-{
-	return container_of(dev, struct kvm_ioapic, dev);
-}
-
-static inline int ioapic_in_range(struct kvm_ioapic *ioapic, gpa_t addr)
-{
-	return ((addr >= ioapic->base_address &&
-		 (addr < ioapic->base_address + IOAPIC_MEM_LENGTH)));
-}
-
-static int ioapic_mmio_read(struct kvm_io_device *this, gpa_t addr, int len,
-			    void *val)
-{
-	struct kvm_ioapic *ioapic = to_ioapic(this);
-	u32 result;
-	if (!ioapic_in_range(ioapic, addr))
-		return -EOPNOTSUPP;
-
-	ioapic_debug("addr %lx\n", (unsigned long)addr);
-	ASSERT(!(addr & 0xf));	/* check alignment */
-
-	addr &= 0xff;
-	spin_lock(&ioapic->lock);
-	switch (addr) {
-	case IOAPIC_REG_SELECT:
-		result = ioapic->ioregsel;
-		break;
-
-	case IOAPIC_REG_WINDOW:
-		result = ioapic_read_indirect(ioapic, addr, len);
-		break;
-
-	default:
-		result = 0;
-		break;
-	}
-	spin_unlock(&ioapic->lock);
-
-	switch (len) {
-	case 8:
-		*(u64 *) val = result;
-		break;
-	case 1:
-	case 2:
-	case 4:
-		memcpy(val, (char *)&result, len);
-		break;
-	default:
-		printk(KERN_WARNING "ioapic: wrong length %d\n", len);
-	}
-	return 0;
-}
-
-static int ioapic_mmio_write(struct kvm_io_device *this, gpa_t addr, int len,
-			     const void *val)
-{
-	struct kvm_ioapic *ioapic = to_ioapic(this);
-	u32 data;
-	if (!ioapic_in_range(ioapic, addr))
-		return -EOPNOTSUPP;
-
-	ioapic_debug("ioapic_mmio_write addr=%p len=%d val=%p\n",
-		     (void*)addr, len, val);
-	ASSERT(!(addr & 0xf));	/* check alignment */
-
-	switch (len) {
-	case 8:
-	case 4:
-		data = *(u32 *) val;
-		break;
-	case 2:
-		data = *(u16 *) val;
-		break;
-	case 1:
-		data = *(u8  *) val;
-		break;
-	default:
-		printk(KERN_WARNING "ioapic: Unsupported size %d\n", len);
-		return 0;
-	}
-
-	addr &= 0xff;
-	spin_lock(&ioapic->lock);
-	switch (addr) {
-	case IOAPIC_REG_SELECT:
-		ioapic->ioregsel = data & 0xFF; /* 8-bit register */
-		break;
-
-	case IOAPIC_REG_WINDOW:
-		ioapic_write_indirect(ioapic, data);
-		break;
-
-	default:
-		break;
-	}
-	spin_unlock(&ioapic->lock);
-	return 0;
-}
-
-static void kvm_ioapic_reset(struct kvm_ioapic *ioapic)
-{
-	int i;
-
-	cancel_delayed_work_sync(&ioapic->eoi_inject);
-	for (i = 0; i < IOAPIC_NUM_PINS; i++)
-		ioapic->redirtbl[i].fields.mask = 1;
-	ioapic->base_address = IOAPIC_DEFAULT_BASE_ADDRESS;
-	ioapic->ioregsel = 0;
-	ioapic->irr = 0;
-	ioapic->id = 0;
-	memset(ioapic->irq_eoi, 0x00, IOAPIC_NUM_PINS);
-	rtc_irq_eoi_tracking_reset(ioapic);
-	update_handled_vectors(ioapic);
-}
-
-static const struct kvm_io_device_ops ioapic_mmio_ops = {
-	.read     = ioapic_mmio_read,
-	.write    = ioapic_mmio_write,
-};
-
-int kvm_ioapic_init(struct kvm *kvm)
-{
-	struct kvm_ioapic *ioapic;
-	int ret;
-
-	ioapic = kzalloc(sizeof(struct kvm_ioapic), GFP_KERNEL);
-	if (!ioapic)
-		return -ENOMEM;
-	spin_lock_init(&ioapic->lock);
-	INIT_DELAYED_WORK(&ioapic->eoi_inject, kvm_ioapic_eoi_inject_work);
-	kvm->arch.vioapic = ioapic;
-	kvm_ioapic_reset(ioapic);
-	kvm_iodevice_init(&ioapic->dev, &ioapic_mmio_ops);
-	ioapic->kvm = kvm;
-	mutex_lock(&kvm->slots_lock);
-	ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, ioapic->base_address,
-				      IOAPIC_MEM_LENGTH, &ioapic->dev);
-	mutex_unlock(&kvm->slots_lock);
-	if (ret < 0) {
-		kvm->arch.vioapic = NULL;
-		kfree(ioapic);
-	}
-
-	return ret;
-}
-
-void kvm_ioapic_destroy(struct kvm *kvm)
-{
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-
-	cancel_delayed_work_sync(&ioapic->eoi_inject);
-	if (ioapic) {
-		kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev);
-		kvm->arch.vioapic = NULL;
-		kfree(ioapic);
-	}
-}
-
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
-{
-	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-	if (!ioapic)
-		return -EINVAL;
-
-	spin_lock(&ioapic->lock);
-	memcpy(state, ioapic, sizeof(struct kvm_ioapic_state));
-	spin_unlock(&ioapic->lock);
-	return 0;
-}
-
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state)
-{
-	struct kvm_ioapic *ioapic = ioapic_irqchip(kvm);
-	if (!ioapic)
-		return -EINVAL;
-
-	spin_lock(&ioapic->lock);
-	memcpy(ioapic, state, sizeof(struct kvm_ioapic_state));
-	ioapic->irr = 0;
-	update_handled_vectors(ioapic);
-	kvm_vcpu_request_scan_ioapic(kvm);
-	kvm_ioapic_inject_all(ioapic, state->irr);
-	spin_unlock(&ioapic->lock);
-	return 0;
-}
diff --git a/virt/kvm/ioapic.h b/virt/kvm/ioapic.h
deleted file mode 100644
index dc3baa3a538f..000000000000
--- a/virt/kvm/ioapic.h
+++ /dev/null
@@ -1,103 +0,0 @@
-#ifndef __KVM_IO_APIC_H
-#define __KVM_IO_APIC_H
-
-#include <linux/kvm_host.h>
-
-#include "iodev.h"
-
-struct kvm;
-struct kvm_vcpu;
-
-#define IOAPIC_NUM_PINS  KVM_IOAPIC_NUM_PINS
-#define IOAPIC_VERSION_ID 0x11	/* IOAPIC version */
-#define IOAPIC_EDGE_TRIG  0
-#define IOAPIC_LEVEL_TRIG 1
-
-#define IOAPIC_DEFAULT_BASE_ADDRESS  0xfec00000
-#define IOAPIC_MEM_LENGTH            0x100
-
-/* Direct registers. */
-#define IOAPIC_REG_SELECT  0x00
-#define IOAPIC_REG_WINDOW  0x10
-
-/* Indirect registers. */
-#define IOAPIC_REG_APIC_ID 0x00	/* x86 IOAPIC only */
-#define IOAPIC_REG_VERSION 0x01
-#define IOAPIC_REG_ARB_ID  0x02	/* x86 IOAPIC only */
-
-/*ioapic delivery mode*/
-#define	IOAPIC_FIXED			0x0
-#define	IOAPIC_LOWEST_PRIORITY		0x1
-#define	IOAPIC_PMI			0x2
-#define	IOAPIC_NMI			0x4
-#define	IOAPIC_INIT			0x5
-#define	IOAPIC_EXTINT			0x7
-
-#ifdef CONFIG_X86
-#define RTC_GSI 8
-#else
-#define RTC_GSI -1U
-#endif
-
-struct rtc_status {
-	int pending_eoi;
-	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
-};
-
-struct kvm_ioapic {
-	u64 base_address;
-	u32 ioregsel;
-	u32 id;
-	u32 irr;
-	u32 pad;
-	union kvm_ioapic_redirect_entry redirtbl[IOAPIC_NUM_PINS];
-	unsigned long irq_states[IOAPIC_NUM_PINS];
-	struct kvm_io_device dev;
-	struct kvm *kvm;
-	void (*ack_notifier)(void *opaque, int irq);
-	spinlock_t lock;
-	DECLARE_BITMAP(handled_vectors, 256);
-	struct rtc_status rtc_status;
-	struct delayed_work eoi_inject;
-	u32 irq_eoi[IOAPIC_NUM_PINS];
-};
-
-#ifdef DEBUG
-#define ASSERT(x)  							\
-do {									\
-	if (!(x)) {							\
-		printk(KERN_EMERG "assertion failed %s: %d: %s\n",	\
-		       __FILE__, __LINE__, #x);				\
-		BUG();							\
-	}								\
-} while (0)
-#else
-#define ASSERT(x) do { } while (0)
-#endif
-
-static inline struct kvm_ioapic *ioapic_irqchip(struct kvm *kvm)
-{
-	return kvm->arch.vioapic;
-}
-
-void kvm_rtc_eoi_tracking_restore_one(struct kvm_vcpu *vcpu);
-int kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source,
-		int short_hand, unsigned int dest, int dest_mode);
-int kvm_apic_compare_prio(struct kvm_vcpu *vcpu1, struct kvm_vcpu *vcpu2);
-void kvm_ioapic_update_eoi(struct kvm_vcpu *vcpu, int vector,
-			int trigger_mode);
-bool kvm_ioapic_handles_vector(struct kvm *kvm, int vector);
-int kvm_ioapic_init(struct kvm *kvm);
-void kvm_ioapic_destroy(struct kvm *kvm);
-int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int irq_source_id,
-		       int level, bool line_status);
-void kvm_ioapic_clear_all(struct kvm_ioapic *ioapic, int irq_source_id);
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
-		struct kvm_lapic_irq *irq, unsigned long *dest_map);
-int kvm_get_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-int kvm_set_ioapic(struct kvm *kvm, struct kvm_ioapic_state *state);
-void kvm_vcpu_request_scan_ioapic(struct kvm *kvm);
-void kvm_ioapic_scan_entry(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap,
-			u32 *tmr);
-
-#endif
diff --git a/virt/kvm/irq_comm.c b/virt/kvm/irq_comm.c
deleted file mode 100644
index 1345bde064f5..000000000000
--- a/virt/kvm/irq_comm.c
+++ /dev/null
@@ -1,347 +0,0 @@
-/*
- * irq_comm.c: Common API for in kernel interrupt controller
- * Copyright (c) 2007, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- * Authors:
- *   Yaozu (Eddie) Dong <Eddie.dong@intel.com>
- *
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- */
-
-#include <linux/kvm_host.h>
-#include <linux/slab.h>
-#include <linux/export.h>
-#include <trace/events/kvm.h>
-
-#include <asm/msidef.h>
-
-#include "irq.h"
-
-#include "ioapic.h"
-
-static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e,
-			   struct kvm *kvm, int irq_source_id, int level,
-			   bool line_status)
-{
-#ifdef CONFIG_X86
-	struct kvm_pic *pic = pic_irqchip(kvm);
-	return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level);
-#else
-	return -1;
-#endif
-}
-
-static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e,
-			      struct kvm *kvm, int irq_source_id, int level,
-			      bool line_status)
-{
-	struct kvm_ioapic *ioapic = kvm->arch.vioapic;
-	return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level,
-				line_status);
-}
-
-inline static bool kvm_is_dm_lowest_prio(struct kvm_lapic_irq *irq)
-{
-	return irq->delivery_mode == APIC_DM_LOWEST;
-}
-
-int kvm_irq_delivery_to_apic(struct kvm *kvm, struct kvm_lapic *src,
-		struct kvm_lapic_irq *irq, unsigned long *dest_map)
-{
-	int i, r = -1;
-	struct kvm_vcpu *vcpu, *lowest = NULL;
-
-	if (irq->dest_mode == 0 && irq->dest_id == 0xff &&
-			kvm_is_dm_lowest_prio(irq)) {
-		printk(KERN_INFO "kvm: apic: phys broadcast and lowest prio\n");
-		irq->delivery_mode = APIC_DM_FIXED;
-	}
-
-	if (kvm_irq_delivery_to_apic_fast(kvm, src, irq, &r, dest_map))
-		return r;
-
-	kvm_for_each_vcpu(i, vcpu, kvm) {
-		if (!kvm_apic_present(vcpu))
-			continue;
-
-		if (!kvm_apic_match_dest(vcpu, src, irq->shorthand,
-					irq->dest_id, irq->dest_mode))
-			continue;
-
-		if (!kvm_is_dm_lowest_prio(irq)) {
-			if (r < 0)
-				r = 0;
-			r += kvm_apic_set_irq(vcpu, irq, dest_map);
-		} else if (kvm_lapic_enabled(vcpu)) {
-			if (!lowest)
-				lowest = vcpu;
-			else if (kvm_apic_compare_prio(vcpu, lowest) < 0)
-				lowest = vcpu;
-		}
-	}
-
-	if (lowest)
-		r = kvm_apic_set_irq(lowest, irq, dest_map);
-
-	return r;
-}
-
-static inline void kvm_set_msi_irq(struct kvm_kernel_irq_routing_entry *e,
-				   struct kvm_lapic_irq *irq)
-{
-	trace_kvm_msi_set_irq(e->msi.address_lo, e->msi.data);
-
-	irq->dest_id = (e->msi.address_lo &
-			MSI_ADDR_DEST_ID_MASK) >> MSI_ADDR_DEST_ID_SHIFT;
-	irq->vector = (e->msi.data &
-			MSI_DATA_VECTOR_MASK) >> MSI_DATA_VECTOR_SHIFT;
-	irq->dest_mode = (1 << MSI_ADDR_DEST_MODE_SHIFT) & e->msi.address_lo;
-	irq->trig_mode = (1 << MSI_DATA_TRIGGER_SHIFT) & e->msi.data;
-	irq->delivery_mode = e->msi.data & 0x700;
-	irq->level = 1;
-	irq->shorthand = 0;
-	/* TODO Deal with RH bit of MSI message address */
-}
-
-int kvm_set_msi(struct kvm_kernel_irq_routing_entry *e,
-		struct kvm *kvm, int irq_source_id, int level, bool line_status)
-{
-	struct kvm_lapic_irq irq;
-
-	if (!level)
-		return -1;
-
-	kvm_set_msi_irq(e, &irq);
-
-	return kvm_irq_delivery_to_apic(kvm, NULL, &irq, NULL);
-}
-
-
-static int kvm_set_msi_inatomic(struct kvm_kernel_irq_routing_entry *e,
-			 struct kvm *kvm)
-{
-	struct kvm_lapic_irq irq;
-	int r;
-
-	kvm_set_msi_irq(e, &irq);
-
-	if (kvm_irq_delivery_to_apic_fast(kvm, NULL, &irq, &r, NULL))
-		return r;
-	else
-		return -EWOULDBLOCK;
-}
-
-/*
- * Deliver an IRQ in an atomic context if we can, or return a failure,
- * user can retry in a process context.
- * Return value:
- *  -EWOULDBLOCK - Can't deliver in atomic context: retry in a process context.
- *  Other values - No need to retry.
- */
-int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level)
-{
-	struct kvm_kernel_irq_routing_entry entries[KVM_NR_IRQCHIPS];
-	struct kvm_kernel_irq_routing_entry *e;
-	int ret = -EINVAL;
-	int idx;
-
-	trace_kvm_set_irq(irq, level, irq_source_id);
-
-	/*
-	 * Injection into either PIC or IOAPIC might need to scan all CPUs,
-	 * which would need to be retried from thread context;  when same GSI
-	 * is connected to both PIC and IOAPIC, we'd have to report a
-	 * partial failure here.
-	 * Since there's no easy way to do this, we only support injecting MSI
-	 * which is limited to 1:1 GSI mapping.
-	 */
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	if (kvm_irq_map_gsi(kvm, entries, irq) > 0) {
-		e = &entries[0];
-		if (likely(e->type == KVM_IRQ_ROUTING_MSI))
-			ret = kvm_set_msi_inatomic(e, kvm);
-		else
-			ret = -EWOULDBLOCK;
-	}
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-	return ret;
-}
-
-int kvm_request_irq_source_id(struct kvm *kvm)
-{
-	unsigned long *bitmap = &kvm->arch.irq_sources_bitmap;
-	int irq_source_id;
-
-	mutex_lock(&kvm->irq_lock);
-	irq_source_id = find_first_zero_bit(bitmap, BITS_PER_LONG);
-
-	if (irq_source_id >= BITS_PER_LONG) {
-		printk(KERN_WARNING "kvm: exhaust allocatable IRQ sources!\n");
-		irq_source_id = -EFAULT;
-		goto unlock;
-	}
-
-	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
-	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
-	set_bit(irq_source_id, bitmap);
-unlock:
-	mutex_unlock(&kvm->irq_lock);
-
-	return irq_source_id;
-}
-
-void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id)
-{
-	ASSERT(irq_source_id != KVM_USERSPACE_IRQ_SOURCE_ID);
-#ifdef CONFIG_X86
-	ASSERT(irq_source_id != KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID);
-#endif
-
-	mutex_lock(&kvm->irq_lock);
-	if (irq_source_id < 0 ||
-	    irq_source_id >= BITS_PER_LONG) {
-		printk(KERN_ERR "kvm: IRQ source ID out of range!\n");
-		goto unlock;
-	}
-	clear_bit(irq_source_id, &kvm->arch.irq_sources_bitmap);
-	if (!irqchip_in_kernel(kvm))
-		goto unlock;
-
-	kvm_ioapic_clear_all(kvm->arch.vioapic, irq_source_id);
-#ifdef CONFIG_X86
-	kvm_pic_clear_all(pic_irqchip(kvm), irq_source_id);
-#endif
-unlock:
-	mutex_unlock(&kvm->irq_lock);
-}
-
-void kvm_register_irq_mask_notifier(struct kvm *kvm, int irq,
-				    struct kvm_irq_mask_notifier *kimn)
-{
-	mutex_lock(&kvm->irq_lock);
-	kimn->irq = irq;
-	hlist_add_head_rcu(&kimn->link, &kvm->mask_notifier_list);
-	mutex_unlock(&kvm->irq_lock);
-}
-
-void kvm_unregister_irq_mask_notifier(struct kvm *kvm, int irq,
-				      struct kvm_irq_mask_notifier *kimn)
-{
-	mutex_lock(&kvm->irq_lock);
-	hlist_del_rcu(&kimn->link);
-	mutex_unlock(&kvm->irq_lock);
-	synchronize_srcu(&kvm->irq_srcu);
-}
-
-void kvm_fire_mask_notifiers(struct kvm *kvm, unsigned irqchip, unsigned pin,
-			     bool mask)
-{
-	struct kvm_irq_mask_notifier *kimn;
-	int idx, gsi;
-
-	idx = srcu_read_lock(&kvm->irq_srcu);
-	gsi = kvm_irq_map_chip_pin(kvm, irqchip, pin);
-	if (gsi != -1)
-		hlist_for_each_entry_rcu(kimn, &kvm->mask_notifier_list, link)
-			if (kimn->irq == gsi)
-				kimn->func(kimn, mask);
-	srcu_read_unlock(&kvm->irq_srcu, idx);
-}
-
-int kvm_set_routing_entry(struct kvm_kernel_irq_routing_entry *e,
-			  const struct kvm_irq_routing_entry *ue)
-{
-	int r = -EINVAL;
-	int delta;
-	unsigned max_pin;
-
-	switch (ue->type) {
-	case KVM_IRQ_ROUTING_IRQCHIP:
-		delta = 0;
-		switch (ue->u.irqchip.irqchip) {
-		case KVM_IRQCHIP_PIC_MASTER:
-			e->set = kvm_set_pic_irq;
-			max_pin = PIC_NUM_PINS;
-			break;
-		case KVM_IRQCHIP_PIC_SLAVE:
-			e->set = kvm_set_pic_irq;
-			max_pin = PIC_NUM_PINS;
-			delta = 8;
-			break;
-		case KVM_IRQCHIP_IOAPIC:
-			max_pin = KVM_IOAPIC_NUM_PINS;
-			e->set = kvm_set_ioapic_irq;
-			break;
-		default:
-			goto out;
-		}
-		e->irqchip.irqchip = ue->u.irqchip.irqchip;
-		e->irqchip.pin = ue->u.irqchip.pin + delta;
-		if (e->irqchip.pin >= max_pin)
-			goto out;
-		break;
-	case KVM_IRQ_ROUTING_MSI:
-		e->set = kvm_set_msi;
-		e->msi.address_lo = ue->u.msi.address_lo;
-		e->msi.address_hi = ue->u.msi.address_hi;
-		e->msi.data = ue->u.msi.data;
-		break;
-	default:
-		goto out;
-	}
-
-	r = 0;
-out:
-	return r;
-}
-
-#define IOAPIC_ROUTING_ENTRY(irq) \
-	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
-	  .u.irqchip = { .irqchip = KVM_IRQCHIP_IOAPIC, .pin = (irq) } }
-#define ROUTING_ENTRY1(irq) IOAPIC_ROUTING_ENTRY(irq)
-
-#ifdef CONFIG_X86
-#  define PIC_ROUTING_ENTRY(irq) \
-	{ .gsi = irq, .type = KVM_IRQ_ROUTING_IRQCHIP,	\
-	  .u.irqchip = { .irqchip = SELECT_PIC(irq), .pin = (irq) % 8 } }
-#  define ROUTING_ENTRY2(irq) \
-	IOAPIC_ROUTING_ENTRY(irq), PIC_ROUTING_ENTRY(irq)
-#else
-#  define ROUTING_ENTRY2(irq) \
-	IOAPIC_ROUTING_ENTRY(irq)
-#endif
-
-static const struct kvm_irq_routing_entry default_routing[] = {
-	ROUTING_ENTRY2(0), ROUTING_ENTRY2(1),
-	ROUTING_ENTRY2(2), ROUTING_ENTRY2(3),
-	ROUTING_ENTRY2(4), ROUTING_ENTRY2(5),
-	ROUTING_ENTRY2(6), ROUTING_ENTRY2(7),
-	ROUTING_ENTRY2(8), ROUTING_ENTRY2(9),
-	ROUTING_ENTRY2(10), ROUTING_ENTRY2(11),
-	ROUTING_ENTRY2(12), ROUTING_ENTRY2(13),
-	ROUTING_ENTRY2(14), ROUTING_ENTRY2(15),
-	ROUTING_ENTRY1(16), ROUTING_ENTRY1(17),
-	ROUTING_ENTRY1(18), ROUTING_ENTRY1(19),
-	ROUTING_ENTRY1(20), ROUTING_ENTRY1(21),
-	ROUTING_ENTRY1(22), ROUTING_ENTRY1(23),
-};
-
-int kvm_setup_default_irq_routing(struct kvm *kvm)
-{
-	return kvm_set_irq_routing(kvm, default_routing,
-				   ARRAY_SIZE(default_routing), 0);
-}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 751ece6a595c..3be43424818b 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -468,9 +468,6 @@ static struct kvm *kvm_create_vm(unsigned long type)
 	if (r)
 		goto out_err_no_disable;
 
-#ifdef CONFIG_HAVE_KVM_IRQCHIP
-	INIT_HLIST_HEAD(&kvm->mask_notifier_list);
-#endif
 #ifdef CONFIG_HAVE_KVM_IRQFD
 	INIT_HLIST_HEAD(&kvm->irq_ack_notifier_list);
 #endif
-- 
cgit v1.2.3


From 860f6e9eb780443381a76e3766a9698afbc5e2e5 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 19 Nov 2014 12:59:14 +0100
Subject: net: phy: add static data field to struct phy_driver

Add static driver-data field to struct phy_driver, which can be used to
store structured device-type information.

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/phy.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/phy.h b/include/linux/phy.h
index 07794e720139..22af8f8f5802 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -433,6 +433,7 @@ struct phy_device {
  *   by this PHY
  * flags: A bitfield defining certain other features this PHY
  *   supports (like interrupts)
+ * driver_data: static driver data
  *
  * The drivers must implement config_aneg and read_status.  All
  * other functions are optional. Note that none of these
@@ -448,6 +449,7 @@ struct phy_driver {
 	unsigned int phy_id_mask;
 	u32 features;
 	u32 flags;
+	const void *driver_data;
 
 	/*
 	 * Called to issue a PHY software reset
-- 
cgit v1.2.3


From 63f44b2bfccdd98193bbd602747f780c0fae0f02 Mon Sep 17 00:00:00 2001
From: Johan Hovold <johan@kernel.org>
Date: Wed, 19 Nov 2014 12:59:18 +0100
Subject: net: phy: micrel: add generic clock-mode-select support

Add generic RMII-Reference-Clock-Select support.

Several Micrel PHY have an RMII-Reference-Clock-Select bit to select
25 MHz or 50 MHz clock mode. Recently, support for configuring this
through device tree for KSZ8021 and KSZ8031 was added.

Generalise this support so that it can be configured for other PHY types
as well.

Note that some PHY revisions (of the same type) has this bit inverted.
This should be either configurable through a new device-tree property,
or preferably, determined based on PHY ID if possible.

Also note that this removes support for setting 25 MHz mode from board
files which was also added by the above mentioned commit 45f56cb82e45
("net/phy: micrel: Add clock support for KSZ8021/KSZ8031").

Signed-off-by: Johan Hovold <johan@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c   | 93 +++++++++++++++++++++++++---------------------
 include/linux/micrel_phy.h |  1 -
 2 files changed, 50 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index a0e944099020..d2e790cd3651 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -57,7 +57,7 @@
 #define KSZPHY_CTRL_INT_ACTIVE_HIGH		BIT(9)
 #define KSZ9021_CTRL_INT_ACTIVE_HIGH		BIT(14)
 #define KS8737_CTRL_INT_ACTIVE_HIGH		BIT(14)
-#define KSZ8051_RMII_50MHZ_CLK			BIT(7)
+#define KSZPHY_RMII_REF_CLK_SEL			BIT(7)
 
 /* Write/read to/from extended registers */
 #define MII_KSZPHY_EXTREG                       0x0b
@@ -76,15 +76,19 @@
 struct kszphy_type {
 	u32 led_mode_reg;
 	bool has_broadcast_disable;
+	bool has_rmii_ref_clk_sel;
 };
 
 struct kszphy_priv {
 	const struct kszphy_type *type;
 	int led_mode;
+	bool rmii_ref_clk_sel;
+	bool rmii_ref_clk_sel_val;
 };
 
 static const struct kszphy_type ksz8021_type = {
 	.led_mode_reg		= MII_KSZPHY_CTRL_2,
+	.has_rmii_ref_clk_sel	= true,
 };
 
 static const struct kszphy_type ksz8041_type = {
@@ -100,21 +104,6 @@ static const struct kszphy_type ksz8081_type = {
 	.has_broadcast_disable	= true,
 };
 
-static int ksz_config_flags(struct phy_device *phydev)
-{
-	int regval;
-
-	if (phydev->dev_flags & (MICREL_PHY_50MHZ_CLK | MICREL_PHY_25MHZ_CLK)) {
-		regval = phy_read(phydev, MII_KSZPHY_CTRL);
-		if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK)
-			regval |= KSZ8051_RMII_50MHZ_CLK;
-		else
-			regval &= ~KSZ8051_RMII_50MHZ_CLK;
-		return phy_write(phydev, MII_KSZPHY_CTRL, regval);
-	}
-	return 0;
-}
-
 static int kszphy_extended_write(struct phy_device *phydev,
 				u32 regnum, u16 val)
 {
@@ -189,6 +178,22 @@ static int ks8737_config_intr(struct phy_device *phydev)
 	return rc < 0 ? rc : 0;
 }
 
+static int kszphy_rmii_clk_sel(struct phy_device *phydev, bool val)
+{
+	int ctrl;
+
+	ctrl = phy_read(phydev, MII_KSZPHY_CTRL);
+	if (ctrl < 0)
+		return ctrl;
+
+	if (val)
+		ctrl |= KSZPHY_RMII_REF_CLK_SEL;
+	else
+		ctrl &= ~KSZPHY_RMII_REF_CLK_SEL;
+
+	return phy_write(phydev, MII_KSZPHY_CTRL, ctrl);
+}
+
 static int kszphy_setup_led(struct phy_device *phydev, u32 reg, int val)
 {
 	int rc, temp, shift;
@@ -243,6 +248,7 @@ static int kszphy_config_init(struct phy_device *phydev)
 {
 	struct kszphy_priv *priv = phydev->priv;
 	const struct kszphy_type *type;
+	int ret;
 
 	if (!priv)
 		return 0;
@@ -252,6 +258,14 @@ static int kszphy_config_init(struct phy_device *phydev)
 	if (type->has_broadcast_disable)
 		kszphy_broadcast_disable(phydev);
 
+	if (priv->rmii_ref_clk_sel) {
+		ret = kszphy_rmii_clk_sel(phydev, priv->rmii_ref_clk_sel_val);
+		if (ret) {
+			dev_err(&phydev->dev, "failed to set rmii reference clock\n");
+			return ret;
+		}
+	}
+
 	if (priv->led_mode >= 0)
 		kszphy_setup_led(phydev, type->led_mode_reg, priv->led_mode);
 
@@ -262,10 +276,8 @@ static int ksz8021_config_init(struct phy_device *phydev)
 {
 	int rc;
 
-	kszphy_config_init(phydev);
-
-	rc = ksz_config_flags(phydev);
-	if (rc < 0)
+	rc = kszphy_config_init(phydev);
+	if (rc)
 		return rc;
 
 	rc = kszphy_broadcast_disable(phydev);
@@ -273,16 +285,6 @@ static int ksz8021_config_init(struct phy_device *phydev)
 	return rc < 0 ? rc : 0;
 }
 
-static int ks8051_config_init(struct phy_device *phydev)
-{
-	int rc;
-
-	kszphy_config_init(phydev);
-
-	rc = ksz_config_flags(phydev);
-	return rc < 0 ? rc : 0;
-}
-
 static int ksz9021_load_values_from_of(struct phy_device *phydev,
 				       struct device_node *of_node, u16 reg,
 				       char *field1, char *field2,
@@ -517,6 +519,7 @@ static int kszphy_probe(struct phy_device *phydev)
 	const struct kszphy_type *type = phydev->drv->driver_data;
 	struct device_node *np = phydev->dev.of_node;
 	struct kszphy_priv *priv;
+	struct clk *clk;
 	int ret;
 
 	priv = devm_kzalloc(&phydev->dev, sizeof(*priv), GFP_KERNEL);
@@ -542,28 +545,32 @@ static int kszphy_probe(struct phy_device *phydev)
 		priv->led_mode = -1;
 	}
 
-	return 0;
-}
-
-static int ksz8021_probe(struct phy_device *phydev)
-{
-	struct clk *clk;
-
 	clk = devm_clk_get(&phydev->dev, "rmii-ref");
 	if (!IS_ERR(clk)) {
 		unsigned long rate = clk_get_rate(clk);
 
+		priv->rmii_ref_clk_sel = type->has_rmii_ref_clk_sel;
+
+		/* FIXME: add support for PHY revisions that have this bit
+		 * inverted (e.g. through new property or based on PHY ID).
+		 */
 		if (rate > 24500000 && rate < 25500000) {
-			phydev->dev_flags |= MICREL_PHY_25MHZ_CLK;
+			priv->rmii_ref_clk_sel_val = false;
 		} else if (rate > 49500000 && rate < 50500000) {
-			phydev->dev_flags |= MICREL_PHY_50MHZ_CLK;
+			priv->rmii_ref_clk_sel_val = true;
 		} else {
 			dev_err(&phydev->dev, "Clock rate out of range: %ld\n", rate);
 			return -EINVAL;
 		}
 	}
 
-	return kszphy_probe(phydev);
+	/* Support legacy board-file configuration */
+	if (phydev->dev_flags & MICREL_PHY_50MHZ_CLK) {
+		priv->rmii_ref_clk_sel = true;
+		priv->rmii_ref_clk_sel_val = true;
+	}
+
+	return 0;
 }
 
 static struct phy_driver ksphy_driver[] = {
@@ -589,7 +596,7 @@ static struct phy_driver ksphy_driver[] = {
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8021_type,
-	.probe		= ksz8021_probe,
+	.probe		= kszphy_probe,
 	.config_init	= ksz8021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -606,7 +613,7 @@ static struct phy_driver ksphy_driver[] = {
 			   SUPPORTED_Asym_Pause),
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8021_type,
-	.probe		= ksz8021_probe,
+	.probe		= kszphy_probe,
 	.config_init	= ksz8021_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
@@ -658,7 +665,7 @@ static struct phy_driver ksphy_driver[] = {
 	.flags		= PHY_HAS_MAGICANEG | PHY_HAS_INTERRUPT,
 	.driver_data	= &ksz8051_type,
 	.probe		= kszphy_probe,
-	.config_init	= ks8051_config_init,
+	.config_init	= kszphy_config_init,
 	.config_aneg	= genphy_config_aneg,
 	.read_status	= genphy_read_status,
 	.ack_interrupt	= kszphy_ack_interrupt,
diff --git a/include/linux/micrel_phy.h b/include/linux/micrel_phy.h
index 53d33dee70e1..2e5b194b9b19 100644
--- a/include/linux/micrel_phy.h
+++ b/include/linux/micrel_phy.h
@@ -37,7 +37,6 @@
 
 /* struct phy_device dev_flags definitions */
 #define MICREL_PHY_50MHZ_CLK	0x00000001
-#define MICREL_PHY_25MHZ_CLK	0x00000002
 
 #define MICREL_KSZ9021_EXTREG_CTRL	0xB
 #define MICREL_KSZ9021_EXTREG_DATA_WRITE	0xC
-- 
cgit v1.2.3


From b960a0ac6939ef4962c5abbf33e80d1382b45fc1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 19 Nov 2014 14:04:56 +0100
Subject: vlan: make __vlan_hwaccel_put_tag return void

Always returns the same skb it gets, so change to void.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/scsi/fcoe/fcoe.c | 6 ++----
 include/linux/if_vlan.h  | 9 ++++-----
 net/8021q/vlan_dev.c     | 2 +-
 3 files changed, 7 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 4a8ac7d8c76b..73a8cc485f87 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -1669,10 +1669,8 @@ static int fcoe_xmit(struct fc_lport *lport, struct fc_frame *fp)
 	    fcoe->realdev->features & NETIF_F_HW_VLAN_CTAG_TX) {
 		/* must set skb->dev before calling vlan_put_tag */
 		skb->dev = fcoe->realdev;
-		skb = __vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-					     vlan_dev_vlan_id(fcoe->netdev));
-		if (!skb)
-			return -ENOMEM;
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+				       vlan_dev_vlan_id(fcoe->netdev));
 	} else
 		skb->dev = fcoe->netdev;
 
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index d69f0577a319..1b5dbc2711f9 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -347,13 +347,11 @@ static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb,
  *
  * Puts the VLAN TCI in @skb->vlan_tci and lets the device do the rest
  */
-static inline struct sk_buff *__vlan_hwaccel_put_tag(struct sk_buff *skb,
-						     __be16 vlan_proto,
-						     u16 vlan_tci)
+static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb,
+					  __be16 vlan_proto, u16 vlan_tci)
 {
 	skb->vlan_proto = vlan_proto;
 	skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
-	return skb;
 }
 
 /**
@@ -368,7 +366,8 @@ static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb,
 					   __be16 vlan_proto, u16 vlan_tci)
 {
 	if (vlan_hw_offload_capable(skb->dev->features, vlan_proto)) {
-		return __vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+		__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+		return skb;
 	} else {
 		return __vlan_put_tag(skb, vlan_proto, vlan_tci);
 	}
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 0d441ec8763e..d6524b2b206a 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -150,7 +150,7 @@ static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
 		u16 vlan_tci;
 		vlan_tci = vlan->vlan_id;
 		vlan_tci |= vlan_dev_get_egress_qos_mask(dev, skb->priority);
-		skb = __vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
+		__vlan_hwaccel_put_tag(skb, vlan->vlan_proto, vlan_tci);
 	}
 
 	skb->dev = vlan->real_dev;
-- 
cgit v1.2.3


From b4bef1b57544b18899eb15569e3bafd8d2eeeff6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 19 Nov 2014 14:04:57 +0100
Subject: vlan: kill vlan_put_tag helper

Since both tx and rx paths work with skb->vlan_tci, there's no need for
this function anymore. Switch users directly to __vlan_hwaccel_put_tag.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c  | 17 ++++-------------
 drivers/net/bonding/bond_main.c |  8 ++------
 drivers/net/usb/cdc_mbim.c      |  2 +-
 include/linux/if_vlan.h         | 19 -------------------
 4 files changed, 7 insertions(+), 39 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index e1f1a006af85..bb9e9fc45e1b 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -475,12 +475,8 @@ static void rlb_update_client(struct rlb_client_info *client_info)
 		skb->dev = client_info->slave->dev;
 
 		if (client_info->vlan_id) {
-			skb = vlan_put_tag(skb, htons(ETH_P_8021Q), client_info->vlan_id);
-			if (!skb) {
-				netdev_err(client_info->slave->bond->dev,
-					   "failed to insert VLAN tag\n");
-				continue;
-			}
+			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+					       client_info->vlan_id);
 		}
 
 		arp_xmit(skb);
@@ -951,13 +947,8 @@ static void alb_send_lp_vid(struct slave *slave, u8 mac_addr[],
 	skb->priority = TC_PRIO_CONTROL;
 	skb->dev = slave->dev;
 
-	if (vid) {
-		skb = vlan_put_tag(skb, vlan_proto, vid);
-		if (!skb) {
-			netdev_err(slave->bond->dev, "failed to insert VLAN tag\n");
-			return;
-		}
-	}
+	if (vid)
+		__vlan_hwaccel_put_tag(skb, vlan_proto, vid);
 
 	dev_queue_xmit(skb);
 }
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8575fee8b359..e26c68232032 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2159,12 +2159,8 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op,
 	if (outer_tag->vlan_id) {
 		netdev_dbg(slave_dev, "outer tag: proto %X vid %X\n",
 			   ntohs(outer_tag->vlan_proto), outer_tag->vlan_id);
-		skb = vlan_put_tag(skb, outer_tag->vlan_proto,
-				   outer_tag->vlan_id);
-		if (!skb) {
-			net_err_ratelimited("failed to insert outer VLAN tag\n");
-			return;
-		}
+		__vlan_hwaccel_put_tag(skb, outer_tag->vlan_proto,
+				       outer_tag->vlan_id);
 	}
 
 xmit:
diff --git a/drivers/net/usb/cdc_mbim.c b/drivers/net/usb/cdc_mbim.c
index 5ee7a1dbc023..96fc8a5bde84 100644
--- a/drivers/net/usb/cdc_mbim.c
+++ b/drivers/net/usb/cdc_mbim.c
@@ -402,7 +402,7 @@ static struct sk_buff *cdc_mbim_process_dgram(struct usbnet *dev, u8 *buf, size_
 
 	/* map MBIM session to VLAN */
 	if (tci)
-		vlan_put_tag(skb, htons(ETH_P_8021Q), tci);
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), tci);
 err:
 	return skb;
 }
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 1b5dbc2711f9..75b70a5e4a6d 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -354,25 +354,6 @@ static inline void __vlan_hwaccel_put_tag(struct sk_buff *skb,
 	skb->vlan_tci = VLAN_TAG_PRESENT | vlan_tci;
 }
 
-/**
- * vlan_put_tag - inserts VLAN tag according to device features
- * @skb: skbuff to tag
- * @vlan_tci: VLAN TCI to insert
- *
- * Assumes skb->dev is the target that will xmit this frame.
- * Returns a VLAN tagged skb.
- */
-static inline struct sk_buff *vlan_put_tag(struct sk_buff *skb,
-					   __be16 vlan_proto, u16 vlan_tci)
-{
-	if (vlan_hw_offload_capable(skb->dev->features, vlan_proto)) {
-		__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
-		return skb;
-	} else {
-		return __vlan_put_tag(skb, vlan_proto, vlan_tci);
-	}
-}
-
 /**
  * __vlan_get_tag - get the VLAN ID that is part of the payload
  * @skb: skbuff to query
-- 
cgit v1.2.3


From 62749e2cb3c4a7da3eaa5c01a7e787aebeff8536 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 19 Nov 2014 14:04:58 +0100
Subject: vlan: rename __vlan_put_tag to vlan_insert_tag_set_proto

Name fits better. Plus there's going to be introduced
__vlan_insert_tag later on.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c             |  4 ++--
 drivers/net/ethernet/emulex/benet/be_main.c |  6 ++++--
 drivers/net/vxlan.c                         | 12 ++++++------
 include/linux/if_vlan.h                     |  8 +++++---
 net/bridge/br_vlan.c                        |  4 ++--
 net/core/dev.c                              |  4 ++--
 net/core/netpoll.c                          |  4 ++--
 net/ipv4/geneve.c                           | 11 +++++------
 net/openvswitch/actions.c                   |  4 +++-
 net/openvswitch/datapath.c                  |  3 ++-
 net/openvswitch/vport-gre.c                 |  6 +++---
 11 files changed, 36 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index e26c68232032..c1d7da427a3e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -2146,8 +2146,8 @@ static void bond_arp_send(struct net_device *slave_dev, int arp_op,
 
 		netdev_dbg(slave_dev, "inner tag: proto %X vid %X\n",
 			   ntohs(outer_tag->vlan_proto), tags->vlan_id);
-		skb = __vlan_put_tag(skb, tags->vlan_proto,
-				     tags->vlan_id);
+		skb = vlan_insert_tag_set_proto(skb, tags->vlan_proto,
+						tags->vlan_id);
 		if (!skb) {
 			net_err_ratelimited("failed to insert inner VLAN tag\n");
 			return;
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 54160cc62656..d02fbc7694ea 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -887,7 +887,8 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
 	}
 
 	if (vlan_tag) {
-		skb = __vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
+						vlan_tag);
 		if (unlikely(!skb))
 			return skb;
 		skb->vlan_tci = 0;
@@ -896,7 +897,8 @@ static struct sk_buff *be_insert_vlan_in_pkt(struct be_adapter *adapter,
 	/* Insert the outer VLAN, if any */
 	if (adapter->qnq_vid) {
 		vlan_tag = adapter->qnq_vid;
-		skb = __vlan_put_tag(skb, htons(ETH_P_8021Q), vlan_tag);
+		skb = vlan_insert_tag_set_proto(skb, htons(ETH_P_8021Q),
+						vlan_tag);
 		if (unlikely(!skb))
 			return skb;
 		if (skip_hw_vlan)
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 23b1e8c0d547..bb8fbab438e8 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1600,9 +1600,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 		return err;
 
 	if (vlan_tx_tag_present(skb)) {
-		if (WARN_ON(!__vlan_put_tag(skb,
-					    skb->vlan_proto,
-					    vlan_tx_tag_get(skb))))
+		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+						vlan_tx_tag_get(skb));
+		if (WARN_ON(!skb))
 			return -ENOMEM;
 
 		skb->vlan_tci = 0;
@@ -1644,9 +1644,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		return err;
 
 	if (vlan_tx_tag_present(skb)) {
-		if (WARN_ON(!__vlan_put_tag(skb,
-					    skb->vlan_proto,
-					    vlan_tx_tag_get(skb))))
+		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+						vlan_tx_tag_get(skb));
+		if (WARN_ON(!skb))
 			return -ENOMEM;
 
 		skb->vlan_tci = 0;
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 75b70a5e4a6d..46e4a15b9b55 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -320,8 +320,9 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
 }
 
 /**
- * __vlan_put_tag - regular VLAN tag inserting
+ * vlan_insert_tag_set_proto - regular VLAN tag inserting
  * @skb: skbuff to tag
+ * @vlan_proto: VLAN encapsulation protocol
  * @vlan_tci: VLAN TCI to insert
  *
  * Inserts the VLAN tag into @skb as part of the payload
@@ -330,8 +331,9 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
  * Following the skb_unshare() example, in case of error, the calling function
  * doesn't have to worry about freeing the original skb.
  */
-static inline struct sk_buff *__vlan_put_tag(struct sk_buff *skb,
-					     __be16 vlan_proto, u16 vlan_tci)
+static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb,
+							__be16 vlan_proto,
+							u16 vlan_tci)
 {
 	skb = vlan_insert_tag(skb, vlan_proto, vlan_tci);
 	if (skb)
diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c
index 150048fb99b0..97b8ddf57363 100644
--- a/net/bridge/br_vlan.c
+++ b/net/bridge/br_vlan.c
@@ -199,8 +199,8 @@ bool br_allowed_ingress(struct net_bridge *br, struct net_port_vlans *v,
 		if (skb->vlan_proto != proto) {
 			/* Protocol-mismatch, empty out vlan_tci for new tag */
 			skb_push(skb, ETH_HLEN);
-			skb = __vlan_put_tag(skb, skb->vlan_proto,
-					     vlan_tx_tag_get(skb));
+			skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+							vlan_tx_tag_get(skb));
 			if (unlikely(!skb))
 				return false;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index 1ab168e0fdf7..3611e60df407 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2645,8 +2645,8 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
 {
 	if (vlan_tx_tag_present(skb) &&
 	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
-		skb = __vlan_put_tag(skb, skb->vlan_proto,
-				     vlan_tx_tag_get(skb));
+		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+						vlan_tx_tag_get(skb));
 		if (skb)
 			skb->vlan_tci = 0;
 	}
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index e6645b4f330a..65d372384a3f 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -79,8 +79,8 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	if (vlan_tx_tag_present(skb) &&
 	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
-		skb = __vlan_put_tag(skb, skb->vlan_proto,
-				     vlan_tx_tag_get(skb));
+		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+						vlan_tx_tag_get(skb));
 		if (unlikely(!skb)) {
 			/* This is actually a packet drop, but we
 			 * don't want the code that calls this
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index 31802afce34f..fd430a6a1c37 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -132,12 +132,11 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 		return err;
 
 	if (vlan_tx_tag_present(skb)) {
-		if (unlikely(!__vlan_put_tag(skb,
-					     skb->vlan_proto,
-					     vlan_tx_tag_get(skb)))) {
-			err = -ENOMEM;
-			return err;
-		}
+		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+						vlan_tx_tag_get(skb));
+		if (unlikely(!skb)
+			return -ENOMEM;
+
 		skb->vlan_tci = 0;
 	}
 
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 749a30163071..426b9131ede0 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -287,7 +287,9 @@ static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 		/* push down current VLAN tag */
 		current_tag = vlan_tx_tag_get(skb);
 
-		if (!__vlan_put_tag(skb, skb->vlan_proto, current_tag))
+		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+						current_tag);
+		if (!skb)
 			return -ENOMEM;
 		/* Update mac_len for subsequent MPLS actions */
 		skb->mac_len += VLAN_HLEN;
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index ab141d49bb9d..c63e60e4d947 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -425,7 +425,8 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 		if (!nskb)
 			return -ENOMEM;
 
-		nskb = __vlan_put_tag(nskb, nskb->vlan_proto, vlan_tx_tag_get(nskb));
+		nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto,
+						 vlan_tx_tag_get(nskb));
 		if (!nskb)
 			return -ENOMEM;
 
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 8e61a5c6ae7c..777cd8c71d53 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -176,9 +176,9 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 	}
 
 	if (vlan_tx_tag_present(skb)) {
-		if (unlikely(!__vlan_put_tag(skb,
-					     skb->vlan_proto,
-					     vlan_tx_tag_get(skb)))) {
+		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+						vlan_tx_tag_get(skb));
+		if (unlikely(!skb) {
 			err = -ENOMEM;
 			goto err_free_rt;
 		}
-- 
cgit v1.2.3


From 5968250c868ceee680aa77395b24e6ddcae17d36 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 19 Nov 2014 14:04:59 +0100
Subject: vlan: introduce *vlan_hwaccel_push_inside helpers

Use them to push skb->vlan_tci into the payload and avoid code
duplication.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c         | 22 ++++++----------------
 include/linux/if_vlan.h     | 34 ++++++++++++++++++++++++++++++++++
 net/core/dev.c              |  8 ++------
 net/core/netpoll.c          |  4 +---
 net/ipv4/geneve.c           | 11 +++--------
 net/openvswitch/datapath.c  |  4 +---
 net/openvswitch/vport-gre.c | 12 ++++--------
 7 files changed, 51 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index bb8fbab438e8..64d45fa3d997 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1599,14 +1599,9 @@ static int vxlan6_xmit_skb(struct vxlan_sock *vs,
 	if (unlikely(err))
 		return err;
 
-	if (vlan_tx_tag_present(skb)) {
-		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-						vlan_tx_tag_get(skb));
-		if (WARN_ON(!skb))
-			return -ENOMEM;
-
-		skb->vlan_tci = 0;
-	}
+	skb = vlan_hwaccel_push_inside(skb);
+	if (WARN_ON(!skb))
+		return -ENOMEM;
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
@@ -1643,14 +1638,9 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 	if (unlikely(err))
 		return err;
 
-	if (vlan_tx_tag_present(skb)) {
-		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-						vlan_tx_tag_get(skb));
-		if (WARN_ON(!skb))
-			return -ENOMEM;
-
-		skb->vlan_tci = 0;
-	}
+	skb = vlan_hwaccel_push_inside(skb);
+	if (WARN_ON(!skb))
+		return -ENOMEM;
 
 	vxh = (struct vxlanhdr *) __skb_push(skb, sizeof(*vxh));
 	vxh->vx_flags = htonl(VXLAN_FLAGS);
diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 46e4a15b9b55..291e6706876e 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -341,6 +341,40 @@ static inline struct sk_buff *vlan_insert_tag_set_proto(struct sk_buff *skb,
 	return skb;
 }
 
+/*
+ * __vlan_hwaccel_push_inside - pushes vlan tag to the payload
+ * @skb: skbuff to tag
+ *
+ * Pushes the VLAN tag from @skb->vlan_tci inside to the payload.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ */
+static inline struct sk_buff *__vlan_hwaccel_push_inside(struct sk_buff *skb)
+{
+	skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
+					vlan_tx_tag_get(skb));
+	if (likely(skb))
+		skb->vlan_tci = 0;
+	return skb;
+}
+/*
+ * vlan_hwaccel_push_inside - pushes vlan tag to the payload
+ * @skb: skbuff to tag
+ *
+ * Checks is tag is present in @skb->vlan_tci and if it is, it pushes the
+ * VLAN tag from @skb->vlan_tci inside to the payload.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ */
+static inline struct sk_buff *vlan_hwaccel_push_inside(struct sk_buff *skb)
+{
+	if (vlan_tx_tag_present(skb))
+		skb = __vlan_hwaccel_push_inside(skb);
+	return skb;
+}
+
 /**
  * __vlan_hwaccel_put_tag - hardware accelerated VLAN inserting
  * @skb: skbuff to tag
diff --git a/net/core/dev.c b/net/core/dev.c
index 3611e60df407..ac4836241a96 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2644,12 +2644,8 @@ static struct sk_buff *validate_xmit_vlan(struct sk_buff *skb,
 					  netdev_features_t features)
 {
 	if (vlan_tx_tag_present(skb) &&
-	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
-		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-						vlan_tx_tag_get(skb));
-		if (skb)
-			skb->vlan_tci = 0;
-	}
+	    !vlan_hw_offload_capable(features, skb->vlan_proto))
+		skb = __vlan_hwaccel_push_inside(skb);
 	return skb;
 }
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 65d372384a3f..e0ad5d16c9c5 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -79,8 +79,7 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 
 	if (vlan_tx_tag_present(skb) &&
 	    !vlan_hw_offload_capable(features, skb->vlan_proto)) {
-		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-						vlan_tx_tag_get(skb));
+		skb = __vlan_hwaccel_push_inside(skb);
 		if (unlikely(!skb)) {
 			/* This is actually a packet drop, but we
 			 * don't want the code that calls this
@@ -88,7 +87,6 @@ static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
 			 */
 			goto out;
 		}
-		skb->vlan_tci = 0;
 	}
 
 	status = netdev_start_xmit(skb, dev, txq, false);
diff --git a/net/ipv4/geneve.c b/net/ipv4/geneve.c
index fd430a6a1c37..a457232f0131 100644
--- a/net/ipv4/geneve.c
+++ b/net/ipv4/geneve.c
@@ -131,14 +131,9 @@ int geneve_xmit_skb(struct geneve_sock *gs, struct rtable *rt,
 	if (unlikely(err))
 		return err;
 
-	if (vlan_tx_tag_present(skb)) {
-		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-						vlan_tx_tag_get(skb));
-		if (unlikely(!skb)
-			return -ENOMEM;
-
-		skb->vlan_tci = 0;
-	}
+	skb = vlan_hwaccel_push_inside(skb);
+	if (unlikely(!skb))
+		return -ENOMEM;
 
 	gnvh = (struct genevehdr *)__skb_push(skb, sizeof(*gnvh) + opt_len);
 	geneve_build_header(gnvh, tun_flags, vni, opt_len, opt);
diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c
index c63e60e4d947..f37ca3e5824c 100644
--- a/net/openvswitch/datapath.c
+++ b/net/openvswitch/datapath.c
@@ -425,12 +425,10 @@ static int queue_userspace_packet(struct datapath *dp, struct sk_buff *skb,
 		if (!nskb)
 			return -ENOMEM;
 
-		nskb = vlan_insert_tag_set_proto(nskb, nskb->vlan_proto,
-						 vlan_tx_tag_get(nskb));
+		nskb = __vlan_hwaccel_push_inside(nskb);
 		if (!nskb)
 			return -ENOMEM;
 
-		nskb->vlan_tci = 0;
 		skb = nskb;
 	}
 
diff --git a/net/openvswitch/vport-gre.c b/net/openvswitch/vport-gre.c
index 777cd8c71d53..6b69df545b1d 100644
--- a/net/openvswitch/vport-gre.c
+++ b/net/openvswitch/vport-gre.c
@@ -175,14 +175,10 @@ static int gre_tnl_send(struct vport *vport, struct sk_buff *skb)
 			goto err_free_rt;
 	}
 
-	if (vlan_tx_tag_present(skb)) {
-		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-						vlan_tx_tag_get(skb));
-		if (unlikely(!skb) {
-			err = -ENOMEM;
-			goto err_free_rt;
-		}
-		skb->vlan_tci = 0;
+	skb = vlan_hwaccel_push_inside(skb);
+	if (unlikely(!skb)) {
+		err = -ENOMEM;
+		goto err_free_rt;
 	}
 
 	/* Push Tunnel header. */
-- 
cgit v1.2.3


From 15255a43e6c917813800702e100267046e240cc0 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 19 Nov 2014 14:05:00 +0100
Subject: vlan: introduce __vlan_insert_tag helper which does not free skb

There's a need for helper which inserts vlan tag but does not free the
skb in case of an error.

Suggested-by: Pravin Shelar <pshelar@nicira.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_vlan.h | 45 ++++++++++++++++++++++++++++++++++-----------
 1 file changed, 34 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h
index 291e6706876e..515a35e2a48a 100644
--- a/include/linux/if_vlan.h
+++ b/include/linux/if_vlan.h
@@ -282,28 +282,24 @@ static inline bool vlan_hw_offload_capable(netdev_features_t features,
 }
 
 /**
- * vlan_insert_tag - regular VLAN tag inserting
+ * __vlan_insert_tag - regular VLAN tag inserting
  * @skb: skbuff to tag
  * @vlan_proto: VLAN encapsulation protocol
  * @vlan_tci: VLAN TCI to insert
  *
  * Inserts the VLAN tag into @skb as part of the payload
- * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
- *
- * Following the skb_unshare() example, in case of error, the calling function
- * doesn't have to worry about freeing the original skb.
+ * Returns error if skb_cow_head failes.
  *
  * Does not change skb->protocol so this function can be used during receive.
  */
-static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
-					      __be16 vlan_proto, u16 vlan_tci)
+static inline int __vlan_insert_tag(struct sk_buff *skb,
+				    __be16 vlan_proto, u16 vlan_tci)
 {
 	struct vlan_ethhdr *veth;
 
-	if (skb_cow_head(skb, VLAN_HLEN) < 0) {
-		dev_kfree_skb_any(skb);
-		return NULL;
-	}
+	if (skb_cow_head(skb, VLAN_HLEN) < 0)
+		return -ENOMEM;
+
 	veth = (struct vlan_ethhdr *)skb_push(skb, VLAN_HLEN);
 
 	/* Move the mac addresses to the beginning of the new header. */
@@ -316,6 +312,33 @@ static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
 	/* now, the TCI */
 	veth->h_vlan_TCI = htons(vlan_tci);
 
+	return 0;
+}
+
+/**
+ * vlan_insert_tag - regular VLAN tag inserting
+ * @skb: skbuff to tag
+ * @vlan_proto: VLAN encapsulation protocol
+ * @vlan_tci: VLAN TCI to insert
+ *
+ * Inserts the VLAN tag into @skb as part of the payload
+ * Returns a VLAN tagged skb. If a new skb is created, @skb is freed.
+ *
+ * Following the skb_unshare() example, in case of error, the calling function
+ * doesn't have to worry about freeing the original skb.
+ *
+ * Does not change skb->protocol so this function can be used during receive.
+ */
+static inline struct sk_buff *vlan_insert_tag(struct sk_buff *skb,
+					      __be16 vlan_proto, u16 vlan_tci)
+{
+	int err;
+
+	err = __vlan_insert_tag(skb, vlan_proto, vlan_tci);
+	if (err) {
+		dev_kfree_skb_any(skb);
+		return NULL;
+	}
 	return skb;
 }
 
-- 
cgit v1.2.3


From e21951212f03b8d805795d8f71206853b2ab344d Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 19 Nov 2014 14:05:01 +0100
Subject: net: move make_writable helper into common code

note that skb_make_writable already exists in net/netfilter/core.c
but does something slightly different.

Suggested-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Pravin B Shelar <pshelar@nicira.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    |  1 +
 net/core/skbuff.c         | 12 ++++++++++++
 net/openvswitch/actions.c | 39 ++++++++++++++-------------------------
 3 files changed, 27 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 73c370e615de..e045516891a9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2678,6 +2678,7 @@ void skb_scrub_packet(struct sk_buff *skb, bool xnet);
 unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
+int skb_ensure_writable(struct sk_buff *skb, int write_len);
 
 struct skb_checksum_ops {
 	__wsum (*update)(const void *mem, int len, __wsum wsum);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 700189604f3d..d11bbe0da355 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4151,6 +4151,18 @@ err_free:
 }
 EXPORT_SYMBOL(skb_vlan_untag);
 
+int skb_ensure_writable(struct sk_buff *skb, int write_len)
+{
+	if (!pskb_may_pull(skb, write_len))
+		return -ENOMEM;
+
+	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
+		return 0;
+
+	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
+}
+EXPORT_SYMBOL(skb_ensure_writable);
+
 /**
  * alloc_skb_with_frags - allocate skb with page frags
  *
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 426b9131ede0..7ffa37716265 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -119,17 +119,6 @@ static bool is_flow_key_valid(const struct sw_flow_key *key)
 	return !!key->eth.type;
 }
 
-static int make_writable(struct sk_buff *skb, int write_len)
-{
-	if (!pskb_may_pull(skb, write_len))
-		return -ENOMEM;
-
-	if (!skb_cloned(skb) || skb_clone_writable(skb, write_len))
-		return 0;
-
-	return pskb_expand_head(skb, 0, 0, GFP_ATOMIC);
-}
-
 static int push_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_mpls *mpls)
 {
@@ -171,7 +160,7 @@ static int pop_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	struct ethhdr *hdr;
 	int err;
 
-	err = make_writable(skb, skb->mac_len + MPLS_HLEN);
+	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
 	if (unlikely(err))
 		return err;
 
@@ -201,7 +190,7 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	__be32 *stack;
 	int err;
 
-	err = make_writable(skb, skb->mac_len + MPLS_HLEN);
+	err = skb_ensure_writable(skb, skb->mac_len + MPLS_HLEN);
 	if (unlikely(err))
 		return err;
 
@@ -223,7 +212,7 @@ static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
 	struct vlan_hdr *vhdr;
 	int err;
 
-	err = make_writable(skb, VLAN_ETH_HLEN);
+	err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
 	if (unlikely(err))
 		return err;
 
@@ -310,7 +299,7 @@ static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
 			const struct ovs_key_ethernet *eth_key)
 {
 	int err;
-	err = make_writable(skb, ETH_HLEN);
+	err = skb_ensure_writable(skb, ETH_HLEN);
 	if (unlikely(err))
 		return err;
 
@@ -412,8 +401,8 @@ static int set_ipv4(struct sk_buff *skb, struct sw_flow_key *key,
 	struct iphdr *nh;
 	int err;
 
-	err = make_writable(skb, skb_network_offset(skb) +
-				 sizeof(struct iphdr));
+	err = skb_ensure_writable(skb, skb_network_offset(skb) +
+				  sizeof(struct iphdr));
 	if (unlikely(err))
 		return err;
 
@@ -450,8 +439,8 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
 	__be32 *saddr;
 	__be32 *daddr;
 
-	err = make_writable(skb, skb_network_offset(skb) +
-			    sizeof(struct ipv6hdr));
+	err = skb_ensure_writable(skb, skb_network_offset(skb) +
+				  sizeof(struct ipv6hdr));
 	if (unlikely(err))
 		return err;
 
@@ -493,7 +482,7 @@ static int set_ipv6(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
-/* Must follow make_writable() since that can move the skb data. */
+/* Must follow skb_ensure_writable() since that can move the skb data. */
 static void set_tp_port(struct sk_buff *skb, __be16 *port,
 			 __be16 new_port, __sum16 *check)
 {
@@ -523,8 +512,8 @@ static int set_udp(struct sk_buff *skb, struct sw_flow_key *key,
 	struct udphdr *uh;
 	int err;
 
-	err = make_writable(skb, skb_transport_offset(skb) +
-				 sizeof(struct udphdr));
+	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
+				  sizeof(struct udphdr));
 	if (unlikely(err))
 		return err;
 
@@ -548,8 +537,8 @@ static int set_tcp(struct sk_buff *skb, struct sw_flow_key *key,
 	struct tcphdr *th;
 	int err;
 
-	err = make_writable(skb, skb_transport_offset(skb) +
-				 sizeof(struct tcphdr));
+	err = skb_ensure_writable(skb, skb_transport_offset(skb) +
+				  sizeof(struct tcphdr));
 	if (unlikely(err))
 		return err;
 
@@ -574,7 +563,7 @@ static int set_sctp(struct sk_buff *skb, struct sw_flow_key *key,
 	int err;
 	unsigned int sctphoff = skb_transport_offset(skb);
 
-	err = make_writable(skb, sctphoff + sizeof(struct sctphdr));
+	err = skb_ensure_writable(skb, sctphoff + sizeof(struct sctphdr));
 	if (unlikely(err))
 		return err;
 
-- 
cgit v1.2.3


From 93515d53b133d66f01aec7b231fa3e40e3d2fd9a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Wed, 19 Nov 2014 14:05:02 +0100
Subject: net: move vlan pop/push functions into common code

So it can be used from out of openvswitch code.
Did couple of cosmetic changes on the way, namely variable naming and
adding support for 8021AD proto.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h    |  2 +
 net/core/skbuff.c         | 95 +++++++++++++++++++++++++++++++++++++++++++++++
 net/openvswitch/actions.c | 86 +++++-------------------------------------
 3 files changed, 106 insertions(+), 77 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e045516891a9..78c299f40bac 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2679,6 +2679,8 @@ unsigned int skb_gso_transport_seglen(const struct sk_buff *skb);
 struct sk_buff *skb_segment(struct sk_buff *skb, netdev_features_t features);
 struct sk_buff *skb_vlan_untag(struct sk_buff *skb);
 int skb_ensure_writable(struct sk_buff *skb, int write_len);
+int skb_vlan_pop(struct sk_buff *skb);
+int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 
 struct skb_checksum_ops {
 	__wsum (*update)(const void *mem, int len, __wsum wsum);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index d11bbe0da355..c906c5f4bf69 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -4163,6 +4163,101 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len)
 }
 EXPORT_SYMBOL(skb_ensure_writable);
 
+/* remove VLAN header from packet and update csum accordingly. */
+static int __skb_vlan_pop(struct sk_buff *skb, u16 *vlan_tci)
+{
+	struct vlan_hdr *vhdr;
+	unsigned int offset = skb->data - skb_mac_header(skb);
+	int err;
+
+	__skb_push(skb, offset);
+	err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
+	if (unlikely(err))
+		goto pull;
+
+	skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
+
+	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
+	*vlan_tci = ntohs(vhdr->h_vlan_TCI);
+
+	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
+	__skb_pull(skb, VLAN_HLEN);
+
+	vlan_set_encap_proto(skb, vhdr);
+	skb->mac_header += VLAN_HLEN;
+
+	if (skb_network_offset(skb) < ETH_HLEN)
+		skb_set_network_header(skb, ETH_HLEN);
+
+	skb_reset_mac_len(skb);
+pull:
+	__skb_pull(skb, offset);
+
+	return err;
+}
+
+int skb_vlan_pop(struct sk_buff *skb)
+{
+	u16 vlan_tci;
+	__be16 vlan_proto;
+	int err;
+
+	if (likely(vlan_tx_tag_present(skb))) {
+		skb->vlan_tci = 0;
+	} else {
+		if (unlikely((skb->protocol != htons(ETH_P_8021Q) &&
+			      skb->protocol != htons(ETH_P_8021AD)) ||
+			     skb->len < VLAN_ETH_HLEN))
+			return 0;
+
+		err = __skb_vlan_pop(skb, &vlan_tci);
+		if (err)
+			return err;
+	}
+	/* move next vlan tag to hw accel tag */
+	if (likely((skb->protocol != htons(ETH_P_8021Q) &&
+		    skb->protocol != htons(ETH_P_8021AD)) ||
+		   skb->len < VLAN_ETH_HLEN))
+		return 0;
+
+	vlan_proto = skb->protocol;
+	err = __skb_vlan_pop(skb, &vlan_tci);
+	if (unlikely(err))
+		return err;
+
+	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+	return 0;
+}
+EXPORT_SYMBOL(skb_vlan_pop);
+
+int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci)
+{
+	if (vlan_tx_tag_present(skb)) {
+		unsigned int offset = skb->data - skb_mac_header(skb);
+		int err;
+
+		/* __vlan_insert_tag expect skb->data pointing to mac header.
+		 * So change skb->data before calling it and change back to
+		 * original position later
+		 */
+		__skb_push(skb, offset);
+		err = __vlan_insert_tag(skb, skb->vlan_proto,
+					vlan_tx_tag_get(skb));
+		if (err)
+			return err;
+		skb->protocol = skb->vlan_proto;
+		skb->mac_len += VLAN_HLEN;
+		__skb_pull(skb, offset);
+
+		if (skb->ip_summed == CHECKSUM_COMPLETE)
+			skb->csum = csum_add(skb->csum, csum_partial(skb->data
+					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
+	}
+	__vlan_hwaccel_put_tag(skb, vlan_proto, vlan_tci);
+	return 0;
+}
+EXPORT_SYMBOL(skb_vlan_push);
+
 /**
  * alloc_skb_with_frags - allocate skb with page frags
  *
diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c
index 7ffa37716265..4e05ea1c2d11 100644
--- a/net/openvswitch/actions.c
+++ b/net/openvswitch/actions.c
@@ -206,93 +206,27 @@ static int set_mpls(struct sk_buff *skb, struct sw_flow_key *key,
 	return 0;
 }
 
-/* remove VLAN header from packet and update csum accordingly. */
-static int __pop_vlan_tci(struct sk_buff *skb, __be16 *current_tci)
-{
-	struct vlan_hdr *vhdr;
-	int err;
-
-	err = skb_ensure_writable(skb, VLAN_ETH_HLEN);
-	if (unlikely(err))
-		return err;
-
-	skb_postpull_rcsum(skb, skb->data + (2 * ETH_ALEN), VLAN_HLEN);
-
-	vhdr = (struct vlan_hdr *)(skb->data + ETH_HLEN);
-	*current_tci = vhdr->h_vlan_TCI;
-
-	memmove(skb->data + VLAN_HLEN, skb->data, 2 * ETH_ALEN);
-	__skb_pull(skb, VLAN_HLEN);
-
-	vlan_set_encap_proto(skb, vhdr);
-	skb->mac_header += VLAN_HLEN;
-
-	if (skb_network_offset(skb) < ETH_HLEN)
-		skb_set_network_header(skb, ETH_HLEN);
-
-	/* Update mac_len for subsequent MPLS actions */
-	skb_reset_mac_len(skb);
-	return 0;
-}
-
 static int pop_vlan(struct sk_buff *skb, struct sw_flow_key *key)
 {
-	__be16 tci;
 	int err;
 
-	if (likely(vlan_tx_tag_present(skb))) {
-		skb->vlan_tci = 0;
-	} else {
-		if (unlikely(skb->protocol != htons(ETH_P_8021Q) ||
-			     skb->len < VLAN_ETH_HLEN))
-			return 0;
-
-		err = __pop_vlan_tci(skb, &tci);
-		if (err)
-			return err;
-	}
-	/* move next vlan tag to hw accel tag */
-	if (likely(skb->protocol != htons(ETH_P_8021Q) ||
-		   skb->len < VLAN_ETH_HLEN)) {
+	err = skb_vlan_pop(skb);
+	if (vlan_tx_tag_present(skb))
+		invalidate_flow_key(key);
+	else
 		key->eth.tci = 0;
-		return 0;
-	}
-
-	invalidate_flow_key(key);
-	err = __pop_vlan_tci(skb, &tci);
-	if (unlikely(err))
-		return err;
-
-	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), ntohs(tci));
-	return 0;
+	return err;
 }
 
 static int push_vlan(struct sk_buff *skb, struct sw_flow_key *key,
 		     const struct ovs_action_push_vlan *vlan)
 {
-	if (unlikely(vlan_tx_tag_present(skb))) {
-		u16 current_tag;
-
-		/* push down current VLAN tag */
-		current_tag = vlan_tx_tag_get(skb);
-
-		skb = vlan_insert_tag_set_proto(skb, skb->vlan_proto,
-						current_tag);
-		if (!skb)
-			return -ENOMEM;
-		/* Update mac_len for subsequent MPLS actions */
-		skb->mac_len += VLAN_HLEN;
-
-		if (skb->ip_summed == CHECKSUM_COMPLETE)
-			skb->csum = csum_add(skb->csum, csum_partial(skb->data
-					+ (2 * ETH_ALEN), VLAN_HLEN, 0));
-
+	if (vlan_tx_tag_present(skb))
 		invalidate_flow_key(key);
-	} else {
+	else
 		key->eth.tci = vlan->vlan_tci;
-	}
-	__vlan_hwaccel_put_tag(skb, vlan->vlan_tpid, ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
-	return 0;
+	return skb_vlan_push(skb, vlan->vlan_tpid,
+			     ntohs(vlan->vlan_tci) & ~VLAN_TAG_PRESENT);
 }
 
 static int set_eth_addr(struct sk_buff *skb, struct sw_flow_key *key,
@@ -858,8 +792,6 @@ static int do_execute_actions(struct datapath *dp, struct sk_buff *skb,
 
 		case OVS_ACTION_ATTR_PUSH_VLAN:
 			err = push_vlan(skb, key, nla_data(a));
-			if (unlikely(err)) /* skb already freed. */
-				return err;
 			break;
 
 		case OVS_ACTION_ATTR_POP_VLAN:
-- 
cgit v1.2.3


From f8b8be8a310a55856fd2c369dade08088d85df3b Mon Sep 17 00:00:00 2001
From: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Date: Fri, 21 Nov 2014 05:25:16 -0500
Subject: ftrace, kprobes: Support IPMODIFY flag to find IP modify conflict

Introduce FTRACE_OPS_FL_IPMODIFY to avoid conflict among
ftrace users who may modify regs->ip to change the execution
path. If two or more users modify the regs->ip on the same
function entry, one of them will be broken. So they must add
IPMODIFY flag and make sure that ftrace_set_filter_ip() succeeds.

Note that ftrace doesn't allow ftrace_ops which has IPMODIFY
flag to have notrace hash, and the ftrace_ops must have a
filter hash (so that the ftrace_ops can hook only specific
entries), because it strongly depends on the address and
must be allowed for only few selected functions.

Link: http://lkml.kernel.org/r/20141121102516.11844.27829.stgit@localhost.localdomain

Cc: Jiri Kosina <jkosina@suse.cz>
Cc: Seth Jennings <sjenning@redhat.com>
Cc: Petr Mladek <pmladek@suse.cz>
Cc: Vojtech Pavlik <vojtech@suse.cz>
Cc: Miroslav Benes <mbenes@suse.cz>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Ananth N Mavinakayanahalli <ananth@in.ibm.com>
Cc: Josh Poimboeuf <jpoimboe@redhat.com>
Cc: Namhyung Kim <namhyung@kernel.org>
Signed-off-by: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
[ fixed up some of the comments ]
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/trace/ftrace.txt |   5 ++
 include/linux/ftrace.h         |  16 ++++-
 kernel/trace/ftrace.c          | 142 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 159 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/trace/ftrace.txt b/Documentation/trace/ftrace.txt
index 4da42616939f..f10f5f5d260d 100644
--- a/Documentation/trace/ftrace.txt
+++ b/Documentation/trace/ftrace.txt
@@ -234,6 +234,11 @@ of ftrace. Here is a list of some of the key files:
 	will be displayed on the same line as the function that
 	is returning registers.
 
+	If the callback registered to be traced by a function with
+	the "ip modify" attribute (thus the regs->ip can be changed),
+	an 'I' will be displayed on the same line as the function that
+	can be overridden.
+
   function_profile_enabled:
 
 	When set it will enable all functions with either the function
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 7b2616fa2472..ed501953f0b2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -61,6 +61,11 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
 /*
  * FTRACE_OPS_FL_* bits denote the state of ftrace_ops struct and are
  * set in the flags member.
+ * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and
+ * IPMODIFY are a kind of attribute flags which can be set only before
+ * registering the ftrace_ops, and can not be modified while registered.
+ * Changing those attribute flags after regsitering ftrace_ops will
+ * cause unexpected results.
  *
  * ENABLED - set/unset when ftrace_ops is registered/unregistered
  * DYNAMIC - set when ftrace_ops is registered to denote dynamically
@@ -101,6 +106,10 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops);
  *            The ftrace_ops trampoline can be set by the ftrace users, and
  *            in such cases the arch must not modify it. Only the arch ftrace
  *            core code should set this flag.
+ * IPMODIFY - The ops can modify the IP register. This can only be set with
+ *            SAVE_REGS. If another ops with this flag set is already registered
+ *            for any of the functions that this ops will be registered for, then
+ *            this ops will fail to register or set_filter_ip.
  */
 enum {
 	FTRACE_OPS_FL_ENABLED			= 1 << 0,
@@ -116,6 +125,7 @@ enum {
 	FTRACE_OPS_FL_REMOVING			= 1 << 10,
 	FTRACE_OPS_FL_MODIFYING			= 1 << 11,
 	FTRACE_OPS_FL_ALLOC_TRAMP		= 1 << 12,
+	FTRACE_OPS_FL_IPMODIFY			= 1 << 13,
 };
 
 #ifdef CONFIG_DYNAMIC_FTRACE
@@ -310,6 +320,7 @@ bool is_ftrace_trampoline(unsigned long addr);
  *  ENABLED - the function is being traced
  *  REGS    - the record wants the function to save regs
  *  REGS_EN - the function is set up to save regs.
+ *  IPMODIFY - the record allows for the IP address to be changed.
  *
  * When a new ftrace_ops is registered and wants a function to save
  * pt_regs, the rec->flag REGS is set. When the function has been
@@ -323,10 +334,11 @@ enum {
 	FTRACE_FL_REGS_EN	= (1UL << 29),
 	FTRACE_FL_TRAMP		= (1UL << 28),
 	FTRACE_FL_TRAMP_EN	= (1UL << 27),
+	FTRACE_FL_IPMODIFY	= (1UL << 26),
 };
 
-#define FTRACE_REF_MAX_SHIFT	27
-#define FTRACE_FL_BITS		5
+#define FTRACE_REF_MAX_SHIFT	26
+#define FTRACE_FL_BITS		6
 #define FTRACE_FL_MASKED_BITS	((1UL << FTRACE_FL_BITS) - 1)
 #define FTRACE_FL_MASK		(FTRACE_FL_MASKED_BITS << FTRACE_REF_MAX_SHIFT)
 #define FTRACE_REF_MAX		((1UL << FTRACE_REF_MAX_SHIFT) - 1)
diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c
index 588af40d33db..929a733d302e 100644
--- a/kernel/trace/ftrace.c
+++ b/kernel/trace/ftrace.c
@@ -1358,6 +1358,9 @@ ftrace_hash_rec_disable_modify(struct ftrace_ops *ops, int filter_hash);
 static void
 ftrace_hash_rec_enable_modify(struct ftrace_ops *ops, int filter_hash);
 
+static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
+				       struct ftrace_hash *new_hash);
+
 static int
 ftrace_hash_move(struct ftrace_ops *ops, int enable,
 		 struct ftrace_hash **dst, struct ftrace_hash *src)
@@ -1368,8 +1371,13 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
 	struct ftrace_hash *new_hash;
 	int size = src->count;
 	int bits = 0;
+	int ret;
 	int i;
 
+	/* Reject setting notrace hash on IPMODIFY ftrace_ops */
+	if (ops->flags & FTRACE_OPS_FL_IPMODIFY && !enable)
+		return -EINVAL;
+
 	/*
 	 * If the new source is empty, just free dst and assign it
 	 * the empty_hash.
@@ -1403,6 +1411,16 @@ ftrace_hash_move(struct ftrace_ops *ops, int enable,
 	}
 
 update:
+	/* Make sure this can be applied if it is IPMODIFY ftrace_ops */
+	if (enable) {
+		/* IPMODIFY should be updated only when filter_hash updating */
+		ret = ftrace_hash_ipmodify_update(ops, new_hash);
+		if (ret < 0) {
+			free_ftrace_hash(new_hash);
+			return ret;
+		}
+	}
+
 	/*
 	 * Remove the current set, update the hash and add
 	 * them back.
@@ -1767,6 +1785,114 @@ static void ftrace_hash_rec_enable_modify(struct ftrace_ops *ops,
 	ftrace_hash_rec_update_modify(ops, filter_hash, 1);
 }
 
+/*
+ * Try to update IPMODIFY flag on each ftrace_rec. Return 0 if it is OK
+ * or no-needed to update, -EBUSY if it detects a conflict of the flag
+ * on a ftrace_rec, and -EINVAL if the new_hash tries to trace all recs.
+ * Note that old_hash and new_hash has below meanings
+ *  - If the hash is NULL, it hits all recs (if IPMODIFY is set, this is rejected)
+ *  - If the hash is EMPTY_HASH, it hits nothing
+ *  - Anything else hits the recs which match the hash entries.
+ */
+static int __ftrace_hash_update_ipmodify(struct ftrace_ops *ops,
+					 struct ftrace_hash *old_hash,
+					 struct ftrace_hash *new_hash)
+{
+	struct ftrace_page *pg;
+	struct dyn_ftrace *rec, *end = NULL;
+	int in_old, in_new;
+
+	/* Only update if the ops has been registered */
+	if (!(ops->flags & FTRACE_OPS_FL_ENABLED))
+		return 0;
+
+	if (!(ops->flags & FTRACE_OPS_FL_IPMODIFY))
+		return 0;
+
+	/*
+	 * Since the IPMODIFY is a very address sensitive action, we do not
+	 * allow ftrace_ops to set all functions to new hash.
+	 */
+	if (!new_hash || !old_hash)
+		return -EINVAL;
+
+	/* Update rec->flags */
+	do_for_each_ftrace_rec(pg, rec) {
+		/* We need to update only differences of filter_hash */
+		in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
+		in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
+		if (in_old == in_new)
+			continue;
+
+		if (in_new) {
+			/* New entries must ensure no others are using it */
+			if (rec->flags & FTRACE_FL_IPMODIFY)
+				goto rollback;
+			rec->flags |= FTRACE_FL_IPMODIFY;
+		} else /* Removed entry */
+			rec->flags &= ~FTRACE_FL_IPMODIFY;
+	} while_for_each_ftrace_rec();
+
+	return 0;
+
+rollback:
+	end = rec;
+
+	/* Roll back what we did above */
+	do_for_each_ftrace_rec(pg, rec) {
+		if (rec == end)
+			goto err_out;
+
+		in_old = !!ftrace_lookup_ip(old_hash, rec->ip);
+		in_new = !!ftrace_lookup_ip(new_hash, rec->ip);
+		if (in_old == in_new)
+			continue;
+
+		if (in_new)
+			rec->flags &= ~FTRACE_FL_IPMODIFY;
+		else
+			rec->flags |= FTRACE_FL_IPMODIFY;
+	} while_for_each_ftrace_rec();
+
+err_out:
+	return -EBUSY;
+}
+
+static int ftrace_hash_ipmodify_enable(struct ftrace_ops *ops)
+{
+	struct ftrace_hash *hash = ops->func_hash->filter_hash;
+
+	if (ftrace_hash_empty(hash))
+		hash = NULL;
+
+	return __ftrace_hash_update_ipmodify(ops, EMPTY_HASH, hash);
+}
+
+/* Disabling always succeeds */
+static void ftrace_hash_ipmodify_disable(struct ftrace_ops *ops)
+{
+	struct ftrace_hash *hash = ops->func_hash->filter_hash;
+
+	if (ftrace_hash_empty(hash))
+		hash = NULL;
+
+	__ftrace_hash_update_ipmodify(ops, hash, EMPTY_HASH);
+}
+
+static int ftrace_hash_ipmodify_update(struct ftrace_ops *ops,
+				       struct ftrace_hash *new_hash)
+{
+	struct ftrace_hash *old_hash = ops->func_hash->filter_hash;
+
+	if (ftrace_hash_empty(old_hash))
+		old_hash = NULL;
+
+	if (ftrace_hash_empty(new_hash))
+		new_hash = NULL;
+
+	return __ftrace_hash_update_ipmodify(ops, old_hash, new_hash);
+}
+
 static void print_ip_ins(const char *fmt, unsigned char *p)
 {
 	int i;
@@ -2436,6 +2562,15 @@ static int ftrace_startup(struct ftrace_ops *ops, int command)
 	 */
 	ops->flags |= FTRACE_OPS_FL_ENABLED | FTRACE_OPS_FL_ADDING;
 
+	ret = ftrace_hash_ipmodify_enable(ops);
+	if (ret < 0) {
+		/* Rollback registration process */
+		__unregister_ftrace_function(ops);
+		ftrace_start_up--;
+		ops->flags &= ~FTRACE_OPS_FL_ENABLED;
+		return ret;
+	}
+
 	ftrace_hash_rec_enable(ops, 1);
 
 	ftrace_startup_enable(command);
@@ -2464,6 +2599,8 @@ static int ftrace_shutdown(struct ftrace_ops *ops, int command)
 	 */
 	WARN_ON_ONCE(ftrace_start_up < 0);
 
+	/* Disabling ipmodify never fails */
+	ftrace_hash_ipmodify_disable(ops);
 	ftrace_hash_rec_disable(ops, 1);
 
 	ops->flags &= ~FTRACE_OPS_FL_ENABLED;
@@ -3058,9 +3195,10 @@ static int t_show(struct seq_file *m, void *v)
 	if (iter->flags & FTRACE_ITER_ENABLED) {
 		struct ftrace_ops *ops = NULL;
 
-		seq_printf(m, " (%ld)%s",
+		seq_printf(m, " (%ld)%s%s",
 			   ftrace_rec_count(rec),
-			   rec->flags & FTRACE_FL_REGS ? " R" : "  ");
+			   rec->flags & FTRACE_FL_REGS ? " R" : "  ",
+			   rec->flags & FTRACE_FL_IPMODIFY ? " I" : "  ");
 		if (rec->flags & FTRACE_FL_TRAMP_EN) {
 			ops = ftrace_find_tramp_ops_any(rec);
 			if (ops)
-- 
cgit v1.2.3


From 479163f4608214d18bc3266ab6e4b578897a3052 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@ZenIV.linux.org.uk>
Date: Thu, 20 Nov 2014 08:13:57 +0000
Subject: mlx5: don't duplicate kvfree()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Acked-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx5/cq.c                     | 8 ++++----
 drivers/infiniband/hw/mlx5/mr.c                     | 4 ++--
 drivers/infiniband/hw/mlx5/qp.c                     | 8 ++++----
 drivers/infiniband/hw/mlx5/srq.c                    | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c        | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/port.c      | 4 ++--
 include/linux/mlx5/driver.h                         | 8 --------
 8 files changed, 19 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 10cfce5119a9..c463e7bba5f4 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -805,14 +805,14 @@ struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 		}
 
 
-	mlx5_vfree(cqb);
+	kvfree(cqb);
 	return &cq->ibcq;
 
 err_cmd:
 	mlx5_core_destroy_cq(dev->mdev, &cq->mcq);
 
 err_cqb:
-	mlx5_vfree(cqb);
+	kvfree(cqb);
 	if (context)
 		destroy_cq_user(cq, context);
 	else
@@ -1159,11 +1159,11 @@ int mlx5_ib_resize_cq(struct ib_cq *ibcq, int entries, struct ib_udata *udata)
 	}
 	mutex_unlock(&cq->resize_mutex);
 
-	mlx5_vfree(in);
+	kvfree(in);
 	return 0;
 
 ex_alloc:
-	mlx5_vfree(in);
+	kvfree(in);
 
 ex_resize:
 	if (udata)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 8ee7cb46e059..4c89b64aa9cf 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -853,14 +853,14 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 		goto err_2;
 	}
 	mr->umem = umem;
-	mlx5_vfree(in);
+	kvfree(in);
 
 	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
 
 	return mr;
 
 err_2:
-	mlx5_vfree(in);
+	kvfree(in);
 
 err_1:
 	kfree(mr);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index e261a53f9a02..0e2ef9fe0e29 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -647,7 +647,7 @@ err_unmap:
 	mlx5_ib_db_unmap_user(context, &qp->db);
 
 err_free:
-	mlx5_vfree(*in);
+	kvfree(*in);
 
 err_umem:
 	if (qp->umem)
@@ -761,7 +761,7 @@ err_wrid:
 	kfree(qp->rq.wrid);
 
 err_free:
-	mlx5_vfree(*in);
+	kvfree(*in);
 
 err_buf:
 	mlx5_buf_free(dev->mdev, &qp->buf);
@@ -971,7 +971,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		goto err_create;
 	}
 
-	mlx5_vfree(in);
+	kvfree(in);
 	/* Hardware wants QPN written in big-endian order (after
 	 * shifting) for send doorbell.  Precompute this value to save
 	 * a little bit when posting sends.
@@ -988,7 +988,7 @@ err_create:
 	else if (qp->create_type == MLX5_QP_KERNEL)
 		destroy_qp_kernel(dev, qp);
 
-	mlx5_vfree(in);
+	kvfree(in);
 	return err;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/srq.c b/drivers/infiniband/hw/mlx5/srq.c
index 97cc1baaa8e3..41fec66217dd 100644
--- a/drivers/infiniband/hw/mlx5/srq.c
+++ b/drivers/infiniband/hw/mlx5/srq.c
@@ -141,7 +141,7 @@ static int create_srq_user(struct ib_pd *pd, struct mlx5_ib_srq *srq,
 	return 0;
 
 err_in:
-	mlx5_vfree(*in);
+	kvfree(*in);
 
 err_umem:
 	ib_umem_release(srq->umem);
@@ -209,7 +209,7 @@ static int create_srq_kernel(struct mlx5_ib_dev *dev, struct mlx5_ib_srq *srq,
 	return 0;
 
 err_in:
-	mlx5_vfree(*in);
+	kvfree(*in);
 
 err_buf:
 	mlx5_buf_free(dev->mdev, &srq->buf);
@@ -306,7 +306,7 @@ struct ib_srq *mlx5_ib_create_srq(struct ib_pd *pd,
 	in->ctx.pd = cpu_to_be32(to_mpd(pd)->pdn);
 	in->ctx.db_record = cpu_to_be64(srq->db.dma);
 	err = mlx5_core_create_srq(dev->mdev, &srq->msrq, in, inlen);
-	mlx5_vfree(in);
+	kvfree(in);
 	if (err) {
 		mlx5_ib_dbg(dev, "create SRQ failed, err %d\n", err);
 		goto err_usr_kern_srq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ad2c96a02a53..dfd3ad0a39c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -390,7 +390,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx,
 	 */
 	eq_update_ci(eq, 1);
 
-	mlx5_vfree(in);
+	kvfree(in);
 	return 0;
 
 err_irq:
@@ -400,7 +400,7 @@ err_eq:
 	mlx5_cmd_destroy_eq(dev, eq->eqn);
 
 err_in:
-	mlx5_vfree(in);
+	kvfree(in);
 
 err_buf:
 	mlx5_buf_free(dev, &eq->buf);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index d476918ef269..4fdaae9b54d9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -349,7 +349,7 @@ out_4k:
 	for (i--; i >= 0; i--)
 		free_4k(dev, be64_to_cpu(in->pas[i]));
 out_free:
-	mlx5_vfree(in);
+	kvfree(in);
 	return err;
 }
 
@@ -400,7 +400,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	}
 
 out_free:
-	mlx5_vfree(out);
+	kvfree(out);
 	return err;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index 313965853e10..72c2d002c3b8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -68,9 +68,9 @@ int mlx5_core_access_reg(struct mlx5_core_dev *dev, void *data_in,
 		memcpy(data_out, out->data, size_out);
 
 ex2:
-	mlx5_vfree(out);
+	kvfree(out);
 ex1:
-	mlx5_vfree(in);
+	kvfree(in);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_access_reg);
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 246310dc8bef..b1bf41556b32 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -633,14 +633,6 @@ static inline void *mlx5_vzalloc(unsigned long size)
 	return rtn;
 }
 
-static inline void mlx5_vfree(const void *addr)
-{
-	if (addr && is_vmalloc_addr(addr))
-		vfree(addr);
-	else
-		kfree(addr);
-}
-
 static inline u32 mlx5_base_mkey(const u32 key)
 {
 	return key & 0xffffff00u;
-- 
cgit v1.2.3


From 21f7eca555ad14e7c7b2cb59a6c6252e74ee5c8b Mon Sep 17 00:00:00 2001
From: "pang.xunlei" <pang.xunlei@linaro.org>
Date: Tue, 18 Nov 2014 19:15:16 +0800
Subject: time: Provide y2038 safe do_settimeofday() replacement

The kernel uses 32-bit signed value(time_t) for seconds elapsed
1970-01-01:00:00:00, thus it will overflow at 2038-01-19 03:14:08
on 32-bit systems. This is widely known as the y2038 problem.

As part of addressing "y2038 problem" for in-kernel uses, this patch
adds safe do_settimeofday64() using timespec64.

After this patch, do_settimeofday() is deprecated and all its call
sites will be fixed using do_settimeofday64(), after that it can be
removed.

Signed-off-by: pang.xunlei <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 21 ++++++++++++++++++++-
 kernel/time/timekeeping.c   | 19 +++++++++----------
 2 files changed, 29 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 1caa6b04fdc5..071ad7e0c981 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -10,7 +10,7 @@ extern int timekeeping_suspended;
  * Get and set timeofday
  */
 extern void do_gettimeofday(struct timeval *tv);
-extern int do_settimeofday(const struct timespec *tv);
+extern int do_settimeofday64(const struct timespec64 *ts);
 extern int do_sys_settimeofday(const struct timespec *tv,
 			       const struct timezone *tz);
 
@@ -33,6 +33,14 @@ extern int __getnstimeofday64(struct timespec64 *tv);
 extern void getnstimeofday64(struct timespec64 *tv);
 
 #if BITS_PER_LONG == 64
+/**
+ * Deprecated. Use do_settimeofday64().
+ */
+static inline int do_settimeofday(const struct timespec *ts)
+{
+	return do_settimeofday64(ts);
+}
+
 static inline int __getnstimeofday(struct timespec *ts)
 {
 	return __getnstimeofday64(ts);
@@ -54,6 +62,17 @@ static inline void ktime_get_real_ts(struct timespec *ts)
 }
 
 #else
+/**
+ * Deprecated. Use do_settimeofday64().
+ */
+static inline int do_settimeofday(const struct timespec *ts)
+{
+	struct timespec64 ts64;
+
+	ts64 = timespec_to_timespec64(*ts);
+	return do_settimeofday64(&ts64);
+}
+
 static inline int __getnstimeofday(struct timespec *ts)
 {
 	struct timespec64 ts64;
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 41fcbe19ccfe..10140dae71c6 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -703,18 +703,18 @@ void do_gettimeofday(struct timeval *tv)
 EXPORT_SYMBOL(do_gettimeofday);
 
 /**
- * do_settimeofday - Sets the time of day
- * @tv:		pointer to the timespec variable containing the new time
+ * do_settimeofday64 - Sets the time of day.
+ * @ts:     pointer to the timespec64 variable containing the new time
  *
  * Sets the time of day to the new time and update NTP and notify hrtimers
  */
-int do_settimeofday(const struct timespec *tv)
+int do_settimeofday64(const struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct timespec64 ts_delta, xt, tmp;
+	struct timespec64 ts_delta, xt;
 	unsigned long flags;
 
-	if (!timespec_valid_strict(tv))
+	if (!timespec64_valid_strict(ts))
 		return -EINVAL;
 
 	raw_spin_lock_irqsave(&timekeeper_lock, flags);
@@ -723,13 +723,12 @@ int do_settimeofday(const struct timespec *tv)
 	timekeeping_forward_now(tk);
 
 	xt = tk_xtime(tk);
-	ts_delta.tv_sec = tv->tv_sec - xt.tv_sec;
-	ts_delta.tv_nsec = tv->tv_nsec - xt.tv_nsec;
+	ts_delta.tv_sec = ts->tv_sec - xt.tv_sec;
+	ts_delta.tv_nsec = ts->tv_nsec - xt.tv_nsec;
 
 	tk_set_wall_to_mono(tk, timespec64_sub(tk->wall_to_monotonic, ts_delta));
 
-	tmp = timespec_to_timespec64(*tv);
-	tk_set_xtime(tk, &tmp);
+	tk_set_xtime(tk, ts);
 
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
@@ -741,7 +740,7 @@ int do_settimeofday(const struct timespec *tv)
 
 	return 0;
 }
-EXPORT_SYMBOL(do_settimeofday);
+EXPORT_SYMBOL(do_settimeofday64);
 
 /**
  * timekeeping_inject_offset - Adds or subtracts from the current time.
-- 
cgit v1.2.3


From 04d9089086a8231ddc69a9f3f25e971a3c1d25e6 Mon Sep 17 00:00:00 2001
From: "pang.xunlei" <pang.xunlei@linaro.org>
Date: Tue, 18 Nov 2014 19:15:17 +0800
Subject: time: Provide y2038 safe timekeeping_inject_sleeptime() replacement

As part of addressing "y2038 problem" for in-kernel uses, this
patch adds timekeeping_inject_sleeptime64() using timespec64.

After this patch, timekeeping_inject_sleeptime() is deprecated
and all its call sites will be fixed using the new interface,
after that it can be removed.

NOTE: timekeeping_inject_sleeptime() is safe actually, but we
want to eliminate timespec eventually, so comes this patch.

Signed-off-by: pang.xunlei <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 13 ++++++++++++-
 kernel/time/timekeeping.c   | 10 ++++------
 2 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 071ad7e0c981..6d76c6502892 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -201,7 +201,18 @@ static inline void timekeeping_clocktai(struct timespec *ts)
 /*
  * RTC specific
  */
-extern void timekeeping_inject_sleeptime(struct timespec *delta);
+extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
+
+/**
+ * Deprecated. Use timekeeping_inject_sleeptime64().
+ */
+static inline void timekeeping_inject_sleeptime(struct timespec *delta)
+{
+	struct timespec64 delta64;
+
+	delta64 = timespec_to_timespec64(*delta);
+	timekeeping_inject_sleeptime64(&delta64);
+}
 
 /*
  * PPS accessor
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 10140dae71c6..2bde974437fd 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1067,8 +1067,8 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
 }
 
 /**
- * timekeeping_inject_sleeptime - Adds suspend interval to timeekeeping values
- * @delta: pointer to a timespec delta value
+ * timekeeping_inject_sleeptime64 - Adds suspend interval to timeekeeping values
+ * @delta: pointer to a timespec64 delta value
  *
  * This hook is for architectures that cannot support read_persistent_clock
  * because their RTC/persistent clock is only accessible when irqs are enabled.
@@ -1076,10 +1076,9 @@ static void __timekeeping_inject_sleeptime(struct timekeeper *tk,
  * This function should only be called by rtc_resume(), and allows
  * a suspend offset to be injected into the timekeeping values.
  */
-void timekeeping_inject_sleeptime(struct timespec *delta)
+void timekeeping_inject_sleeptime64(struct timespec64 *delta)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
-	struct timespec64 tmp;
 	unsigned long flags;
 
 	/*
@@ -1094,8 +1093,7 @@ void timekeeping_inject_sleeptime(struct timespec *delta)
 
 	timekeeping_forward_now(tk);
 
-	tmp = timespec_to_timespec64(*delta);
-	__timekeeping_inject_sleeptime(tk, &tmp);
+	__timekeeping_inject_sleeptime(tk, delta);
 
 	timekeeping_update(tk, TK_CLEAR_NTP | TK_MIRROR | TK_CLOCK_WAS_SET);
 
-- 
cgit v1.2.3


From 90b6ce9c4066e0b2098dff65e52e6e7df1a51079 Mon Sep 17 00:00:00 2001
From: "pang.xunlei" <pang.xunlei@linaro.org>
Date: Tue, 18 Nov 2014 19:15:18 +0800
Subject: time: Provide y2038 safe mktime() replacement

As part of addressing "y2038 problem" for in-kernel uses, this
patch adds safe mktime64() using time64_t.

After this patch, mktime() is deprecated and all its call sites
will be fixed using mktime64(), after that it can be removed.

Signed-off-by: pang.xunlei <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/time.h | 17 ++++++++++++++---
 kernel/time/time.c   | 20 ++++++++------------
 2 files changed, 22 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/time.h b/include/linux/time.h
index 8c42cf8d2444..203c2ad40d71 100644
--- a/include/linux/time.h
+++ b/include/linux/time.h
@@ -39,9 +39,20 @@ static inline int timeval_compare(const struct timeval *lhs, const struct timeva
 	return lhs->tv_usec - rhs->tv_usec;
 }
 
-extern unsigned long mktime(const unsigned int year, const unsigned int mon,
-			    const unsigned int day, const unsigned int hour,
-			    const unsigned int min, const unsigned int sec);
+extern time64_t mktime64(const unsigned int year, const unsigned int mon,
+			const unsigned int day, const unsigned int hour,
+			const unsigned int min, const unsigned int sec);
+
+/**
+ * Deprecated. Use mktime64().
+ */
+static inline unsigned long mktime(const unsigned int year,
+			const unsigned int mon, const unsigned int day,
+			const unsigned int hour, const unsigned int min,
+			const unsigned int sec)
+{
+	return mktime64(year, mon, day, hour, min, sec);
+}
 
 extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec);
 
diff --git a/kernel/time/time.c b/kernel/time/time.c
index a9ae20fb0b11..65015ff2f07c 100644
--- a/kernel/time/time.c
+++ b/kernel/time/time.c
@@ -304,7 +304,9 @@ struct timespec timespec_trunc(struct timespec t, unsigned gran)
 }
 EXPORT_SYMBOL(timespec_trunc);
 
-/* Converts Gregorian date to seconds since 1970-01-01 00:00:00.
+/*
+ * mktime64 - Converts date to seconds.
+ * Converts Gregorian date to seconds since 1970-01-01 00:00:00.
  * Assumes input in normal date format, i.e. 1980-12-31 23:59:59
  * => year=1980, mon=12, day=31, hour=23, min=59, sec=59.
  *
@@ -314,15 +316,10 @@ EXPORT_SYMBOL(timespec_trunc);
  * -year/100+year/400 terms, and add 10.]
  *
  * This algorithm was first published by Gauss (I think).
- *
- * WARNING: this function will overflow on 2106-02-07 06:28:16 on
- * machines where long is 32-bit! (However, as time_t is signed, we
- * will already get problems at other places on 2038-01-19 03:14:08)
  */
-unsigned long
-mktime(const unsigned int year0, const unsigned int mon0,
-       const unsigned int day, const unsigned int hour,
-       const unsigned int min, const unsigned int sec)
+time64_t mktime64(const unsigned int year0, const unsigned int mon0,
+		const unsigned int day, const unsigned int hour,
+		const unsigned int min, const unsigned int sec)
 {
 	unsigned int mon = mon0, year = year0;
 
@@ -332,15 +329,14 @@ mktime(const unsigned int year0, const unsigned int mon0,
 		year -= 1;
 	}
 
-	return ((((unsigned long)
+	return ((((time64_t)
 		  (year/4 - year/100 + year/400 + 367*mon/12 + day) +
 		  year*365 - 719499
 	    )*24 + hour /* now have hours */
 	  )*60 + min /* now have minutes */
 	)*60 + sec; /* finally seconds */
 }
-
-EXPORT_SYMBOL(mktime);
+EXPORT_SYMBOL(mktime64);
 
 /**
  * set_normalized_timespec - set timespec sec and nsec parts and normalize
-- 
cgit v1.2.3


From cdba2ec538d9843c42cac15ff4ec54dc2ac53f8a Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 7 Nov 2014 11:03:20 -0800
Subject: time: Expose getrawmonotonic64 for in-kernel uses

Adds a timespec64 based getrawmonotonic64() implementation
that can be used as we convert internal users of
getrawmonotonic away from using timespecs.

Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 15 ++++++++++++++-
 kernel/time/timekeeping.c   | 11 ++++++-----
 2 files changed, 20 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 6d76c6502892..e40a8d60fb21 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -26,7 +26,7 @@ struct timespec __current_kernel_time(void);
  * timespec based interfaces
  */
 struct timespec get_monotonic_coarse(void);
-extern void getrawmonotonic(struct timespec *ts);
+extern void getrawmonotonic64(struct timespec64 *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 
 extern int __getnstimeofday64(struct timespec64 *tv);
@@ -61,6 +61,11 @@ static inline void ktime_get_real_ts(struct timespec *ts)
 	getnstimeofday64(ts);
 }
 
+static inline void getrawmonotonic(struct timespec *ts)
+{
+	getrawmonotonic64(ts);
+}
+
 #else
 /**
  * Deprecated. Use do_settimeofday64().
@@ -105,6 +110,14 @@ static inline void ktime_get_real_ts(struct timespec *ts)
 	getnstimeofday64(&ts64);
 	*ts = timespec64_to_timespec(ts64);
 }
+
+static inline void getrawmonotonic(struct timespec *ts)
+{
+	struct timespec64 ts64;
+
+	getrawmonotonic64(&ts64);
+	*ts = timespec64_to_timespec(ts64);
+}
 #endif
 
 extern void getboottime(struct timespec *ts);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2bde974437fd..2e5f63212269 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -894,12 +894,12 @@ int timekeeping_notify(struct clocksource *clock)
 }
 
 /**
- * getrawmonotonic - Returns the raw monotonic time in a timespec
- * @ts:		pointer to the timespec to be set
+ * getrawmonotonic64 - Returns the raw monotonic time in a timespec
+ * @ts:		pointer to the timespec64 to be set
  *
  * Returns the raw monotonic time (completely un-modified by ntp)
  */
-void getrawmonotonic(struct timespec *ts)
+void getrawmonotonic64(struct timespec64 *ts)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 ts64;
@@ -914,9 +914,10 @@ void getrawmonotonic(struct timespec *ts)
 	} while (read_seqcount_retry(&tk_core.seq, seq));
 
 	timespec64_add_ns(&ts64, nsecs);
-	*ts = timespec64_to_timespec(ts64);
+	*ts = ts64;
 }
-EXPORT_SYMBOL(getrawmonotonic);
+EXPORT_SYMBOL(getrawmonotonic64);
+
 
 /**
  * timekeeping_valid_for_hres - Check if timekeeping is suitable for hres
-- 
cgit v1.2.3


From 334334b5f577a2255e29d2352d53197d9b796511 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Fri, 7 Nov 2014 11:20:40 -0800
Subject: time: Expose get_monotonic_coarse64() for in-kernel uses

Adds a timespec64 based get_monotonic_coarse64() implementation
that can be used as we convert internal users of
get_monotonic_coarse away from using timespecs.

Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 11 ++++++++++-
 kernel/time/timekeeping.c   |  4 ++--
 2 files changed, 12 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index e40a8d60fb21..8cab4b754be7 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -25,7 +25,7 @@ struct timespec __current_kernel_time(void);
 /*
  * timespec based interfaces
  */
-struct timespec get_monotonic_coarse(void);
+struct timespec64 get_monotonic_coarse64(void);
 extern void getrawmonotonic64(struct timespec64 *ts);
 extern void ktime_get_ts64(struct timespec64 *ts);
 
@@ -66,6 +66,10 @@ static inline void getrawmonotonic(struct timespec *ts)
 	getrawmonotonic64(ts);
 }
 
+static inline struct timespec get_monotonic_coarse(void)
+{
+	return get_monotonic_coarse64();
+}
 #else
 /**
  * Deprecated. Use do_settimeofday64().
@@ -118,6 +122,11 @@ static inline void getrawmonotonic(struct timespec *ts)
 	getrawmonotonic64(&ts64);
 	*ts = timespec64_to_timespec(ts64);
 }
+
+static inline struct timespec get_monotonic_coarse(void)
+{
+	return timespec64_to_timespec(get_monotonic_coarse64());
+}
 #endif
 
 extern void getboottime(struct timespec *ts);
diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c
index 2e5f63212269..f45e5e29a16d 100644
--- a/kernel/time/timekeeping.c
+++ b/kernel/time/timekeeping.c
@@ -1651,7 +1651,7 @@ struct timespec current_kernel_time(void)
 }
 EXPORT_SYMBOL(current_kernel_time);
 
-struct timespec get_monotonic_coarse(void)
+struct timespec64 get_monotonic_coarse64(void)
 {
 	struct timekeeper *tk = &tk_core.timekeeper;
 	struct timespec64 now, mono;
@@ -1667,7 +1667,7 @@ struct timespec get_monotonic_coarse(void)
 	set_normalized_timespec64(&now, now.tv_sec + mono.tv_sec,
 				now.tv_nsec + mono.tv_nsec);
 
-	return timespec64_to_timespec(now);
+	return now;
 }
 
 /*
-- 
cgit v1.2.3


From c2c11ae4b6bc03b41720337028b940cbec9e316f Mon Sep 17 00:00:00 2001
From: "pang.xunlei" <pang.xunlei@linaro.org>
Date: Tue, 18 Nov 2014 19:15:19 +0800
Subject: rtc/lib: Provide y2038 safe rtc_tm_to_time()/rtc_time_to_tm()
 replacement

As part of addressing "y2038 problem" for in-kernel uses, this patch
adds safe rtc_tm_to_time64()/rtc_time64_to_tm() respectively using
time64_t.

After this patch, rtc_tm_to_time() is deprecated and all its call
sites will be fixed using corresponding safe versions, it can be
removed when having no users. Also change rtc_tm_to_time64() to
return time64_t directly instead of just as a parameter like
rtc_tm_to_time() does.

After this patch, rtc_time_to_tm() is deprecated and all its call
sites will be fixed using corresponding safe versions, it can be
removed when having no users.

In addition, change rtc_tm_to_ktime() and rtc_ktime_to_tm() to use
the safe version in passing.

Signed-off-by: pang.xunlei <pang.xunlei@linaro.org>
Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 drivers/rtc/rtc-lib.c | 38 ++++++++++++++++++++------------------
 include/linux/rtc.h   | 21 +++++++++++++++++++--
 2 files changed, 39 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/rtc/rtc-lib.c b/drivers/rtc/rtc-lib.c
index c4cf05731118..e6bfb9c42a10 100644
--- a/drivers/rtc/rtc-lib.c
+++ b/drivers/rtc/rtc-lib.c
@@ -45,16 +45,20 @@ int rtc_year_days(unsigned int day, unsigned int month, unsigned int year)
 }
 EXPORT_SYMBOL(rtc_year_days);
 
+
 /*
+ * rtc_time_to_tm64 - Converts time64_t to rtc_time.
  * Convert seconds since 01-01-1970 00:00:00 to Gregorian date.
  */
-void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
+void rtc_time64_to_tm(time64_t time, struct rtc_time *tm)
 {
 	unsigned int month, year;
+	unsigned long secs;
 	int days;
 
-	days = time / 86400;
-	time -= (unsigned int) days * 86400;
+	/* time must be positive */
+	days = div_s64(time, 86400);
+	secs = time - (unsigned int) days * 86400;
 
 	/* day of the week, 1970-01-01 was a Thursday */
 	tm->tm_wday = (days + 4) % 7;
@@ -81,14 +85,14 @@ void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
 	tm->tm_mon = month;
 	tm->tm_mday = days + 1;
 
-	tm->tm_hour = time / 3600;
-	time -= tm->tm_hour * 3600;
-	tm->tm_min = time / 60;
-	tm->tm_sec = time - tm->tm_min * 60;
+	tm->tm_hour = secs / 3600;
+	secs -= tm->tm_hour * 3600;
+	tm->tm_min = secs / 60;
+	tm->tm_sec = secs - tm->tm_min * 60;
 
 	tm->tm_isdst = 0;
 }
-EXPORT_SYMBOL(rtc_time_to_tm);
+EXPORT_SYMBOL(rtc_time64_to_tm);
 
 /*
  * Does the rtc_time represent a valid date/time?
@@ -109,24 +113,22 @@ int rtc_valid_tm(struct rtc_time *tm)
 EXPORT_SYMBOL(rtc_valid_tm);
 
 /*
+ * rtc_tm_to_time64 - Converts rtc_time to time64_t.
  * Convert Gregorian date to seconds since 01-01-1970 00:00:00.
  */
-int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
+time64_t rtc_tm_to_time64(struct rtc_time *tm)
 {
-	*time = mktime(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
+	return mktime64(tm->tm_year + 1900, tm->tm_mon + 1, tm->tm_mday,
 			tm->tm_hour, tm->tm_min, tm->tm_sec);
-	return 0;
 }
-EXPORT_SYMBOL(rtc_tm_to_time);
+EXPORT_SYMBOL(rtc_tm_to_time64);
 
 /*
  * Convert rtc_time to ktime
  */
 ktime_t rtc_tm_to_ktime(struct rtc_time tm)
 {
-	time_t time;
-	rtc_tm_to_time(&tm, &time);
-	return ktime_set(time, 0);
+	return ktime_set(rtc_tm_to_time64(&tm), 0);
 }
 EXPORT_SYMBOL_GPL(rtc_tm_to_ktime);
 
@@ -135,14 +137,14 @@ EXPORT_SYMBOL_GPL(rtc_tm_to_ktime);
  */
 struct rtc_time rtc_ktime_to_tm(ktime_t kt)
 {
-	struct timespec ts;
+	struct timespec64 ts;
 	struct rtc_time ret;
 
-	ts = ktime_to_timespec(kt);
+	ts = ktime_to_timespec64(kt);
 	/* Round up any ns */
 	if (ts.tv_nsec)
 		ts.tv_sec++;
-	rtc_time_to_tm(ts.tv_sec, &ret);
+	rtc_time64_to_tm(ts.tv_sec, &ret);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(rtc_ktime_to_tm);
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index c2c28975293c..6d6be09a2fe5 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -19,11 +19,28 @@
 extern int rtc_month_days(unsigned int month, unsigned int year);
 extern int rtc_year_days(unsigned int day, unsigned int month, unsigned int year);
 extern int rtc_valid_tm(struct rtc_time *tm);
-extern int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time);
-extern void rtc_time_to_tm(unsigned long time, struct rtc_time *tm);
+extern time64_t rtc_tm_to_time64(struct rtc_time *tm);
+extern void rtc_time64_to_tm(time64_t time, struct rtc_time *tm);
 ktime_t rtc_tm_to_ktime(struct rtc_time tm);
 struct rtc_time rtc_ktime_to_tm(ktime_t kt);
 
+/**
+ * Deprecated. Use rtc_time64_to_tm().
+ */
+static inline void rtc_time_to_tm(unsigned long time, struct rtc_time *tm)
+{
+	rtc_time64_to_tm(time, tm);
+}
+
+/**
+ * Deprecated. Use rtc_tm_to_time64().
+ */
+static inline int rtc_tm_to_time(struct rtc_time *tm, unsigned long *time)
+{
+	*time = rtc_tm_to_time64(tm);
+
+	return 0;
+}
 
 #include <linux/device.h>
 #include <linux/seq_file.h>
-- 
cgit v1.2.3


From 21b6c0512e8aca75ce76365e1aef9fb16e007100 Mon Sep 17 00:00:00 2001
From: John Stultz <john.stultz@linaro.org>
Date: Thu, 20 Nov 2014 21:19:15 -0800
Subject: time: Remove timekeeping_inject_sleeptime()

Since all users have been converted to using the 64bit
timekeeping_inject_sleeptime64(), remove the old y2038
problematic timekeeping_inject_sleeptime().

Signed-off-by: John Stultz <john.stultz@linaro.org>
---
 include/linux/timekeeping.h | 11 -----------
 1 file changed, 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 8cab4b754be7..961fea373f83 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -225,17 +225,6 @@ static inline void timekeeping_clocktai(struct timespec *ts)
  */
 extern void timekeeping_inject_sleeptime64(struct timespec64 *delta);
 
-/**
- * Deprecated. Use timekeeping_inject_sleeptime64().
- */
-static inline void timekeeping_inject_sleeptime(struct timespec *delta)
-{
-	struct timespec64 delta64;
-
-	delta64 = timespec_to_timespec64(*delta);
-	timekeeping_inject_sleeptime64(&delta64);
-}
-
 /*
  * PPS accessor
  */
-- 
cgit v1.2.3


From dbc98635e0d42f0e62ea92813df1e0e4c90f8375 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Wed, 19 Nov 2014 17:28:21 +0200
Subject: phy: remove the old lookup method

The users of the old method are now converted to the new one.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
[ kishon@ti.com : made phy-berlin-usb.c and phy-miphy28lp.c to use the updated
		  devm_phy_create API.]
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
---
 drivers/phy/phy-bcm-kona-usb2.c      |  2 +-
 drivers/phy/phy-berlin-sata.c        |  2 +-
 drivers/phy/phy-berlin-usb.c         |  3 +--
 drivers/phy/phy-core.c               | 49 +++---------------------------------
 drivers/phy/phy-exynos-dp-video.c    |  2 +-
 drivers/phy/phy-exynos-mipi-video.c  |  2 +-
 drivers/phy/phy-exynos5-usbdrd.c     |  3 +--
 drivers/phy/phy-exynos5250-sata.c    |  2 +-
 drivers/phy/phy-hix5hd2-sata.c       |  2 +-
 drivers/phy/phy-miphy28lp.c          |  2 +-
 drivers/phy/phy-miphy365x.c          |  2 +-
 drivers/phy/phy-mvebu-sata.c         |  2 +-
 drivers/phy/phy-omap-usb2.c          |  2 +-
 drivers/phy/phy-qcom-apq8064-sata.c  |  3 +--
 drivers/phy/phy-qcom-ipq806x-sata.c  |  3 +--
 drivers/phy/phy-rcar-gen2.c          |  2 +-
 drivers/phy/phy-samsung-usb2.c       |  3 +--
 drivers/phy/phy-spear1310-miphy.c    |  2 +-
 drivers/phy/phy-spear1340-miphy.c    |  2 +-
 drivers/phy/phy-stih407-usb.c        |  2 +-
 drivers/phy/phy-stih41x-usb.c        |  2 +-
 drivers/phy/phy-sun4i-usb.c          |  2 +-
 drivers/phy/phy-ti-pipe3.c           |  2 +-
 drivers/phy/phy-twl4030-usb.c        |  2 +-
 drivers/phy/phy-xgene.c              |  2 +-
 drivers/pinctrl/pinctrl-tegra-xusb.c |  4 +--
 include/linux/phy/phy.h              | 38 +++-------------------------
 27 files changed, 34 insertions(+), 110 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/phy/phy-bcm-kona-usb2.c b/drivers/phy/phy-bcm-kona-usb2.c
index c1e0ca335c0e..ef2dc1aab2b9 100644
--- a/drivers/phy/phy-bcm-kona-usb2.c
+++ b/drivers/phy/phy-bcm-kona-usb2.c
@@ -117,7 +117,7 @@ static int bcm_kona_usb2_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, phy);
 
-	gphy = devm_phy_create(dev, NULL, &ops, NULL);
+	gphy = devm_phy_create(dev, NULL, &ops);
 	if (IS_ERR(gphy))
 		return PTR_ERR(gphy);
 
diff --git a/drivers/phy/phy-berlin-sata.c b/drivers/phy/phy-berlin-sata.c
index 873e7a890fee..3e599dc96164 100644
--- a/drivers/phy/phy-berlin-sata.c
+++ b/drivers/phy/phy-berlin-sata.c
@@ -249,7 +249,7 @@ static int phy_berlin_sata_probe(struct platform_device *pdev)
 		if (!phy_desc)
 			return -ENOMEM;
 
-		phy = devm_phy_create(dev, NULL, &phy_berlin_sata_ops, NULL);
+		phy = devm_phy_create(dev, NULL, &phy_berlin_sata_ops);
 		if (IS_ERR(phy)) {
 			dev_err(dev, "failed to create PHY %d\n", phy_id);
 			return PTR_ERR(phy);
diff --git a/drivers/phy/phy-berlin-usb.c b/drivers/phy/phy-berlin-usb.c
index f9f13067f50f..c8a8d53a6ece 100644
--- a/drivers/phy/phy-berlin-usb.c
+++ b/drivers/phy/phy-berlin-usb.c
@@ -192,8 +192,7 @@ static int phy_berlin_usb_probe(struct platform_device *pdev)
 
 	priv->pll_divider = *((u32 *)match->data);
 
-	priv->phy = devm_phy_create(&pdev->dev, NULL, &phy_berlin_usb_ops,
-				    NULL);
+	priv->phy = devm_phy_create(&pdev->dev, NULL, &phy_berlin_usb_ops);
 	if (IS_ERR(priv->phy)) {
 		dev_err(&pdev->dev, "failed to create PHY\n");
 		return PTR_ERR(priv->phy);
diff --git a/drivers/phy/phy-core.c b/drivers/phy/phy-core.c
index bc830773fe05..a12d35338313 100644
--- a/drivers/phy/phy-core.c
+++ b/drivers/phy/phy-core.c
@@ -55,36 +55,6 @@ static int devm_phy_match(struct device *dev, void *res, void *match_data)
 	return res == match_data;
 }
 
-static struct phy *phy_lookup(struct device *device, const char *port)
-{
-	unsigned int count;
-	struct phy *phy;
-	struct device *dev;
-	struct phy_consumer *consumers;
-	struct class_dev_iter iter;
-
-	class_dev_iter_init(&iter, phy_class, NULL, NULL);
-	while ((dev = class_dev_iter_next(&iter))) {
-		phy = to_phy(dev);
-
-		if (!phy->init_data)
-			continue;
-		count = phy->init_data->num_consumers;
-		consumers = phy->init_data->consumers;
-		while (count--) {
-			if (!strcmp(consumers->dev_name, dev_name(device)) &&
-					!strcmp(consumers->port, port)) {
-				class_dev_iter_exit(&iter);
-				return phy;
-			}
-			consumers++;
-		}
-	}
-
-	class_dev_iter_exit(&iter);
-	return ERR_PTR(-ENODEV);
-}
-
 /**
  * phy_create_lookup() - allocate and register PHY/device association
  * @phy: the phy of the association
@@ -148,7 +118,6 @@ static struct phy *phy_find(struct device *dev, const char *con_id)
 {
 	const char *dev_id = dev_name(dev);
 	struct phy_lookup *p, *pl = NULL;
-	struct phy *phy;
 
 	mutex_lock(&phy_provider_mutex);
 	list_for_each_entry(p, &phys, node)
@@ -158,12 +127,7 @@ static struct phy *phy_find(struct device *dev, const char *con_id)
 		}
 	mutex_unlock(&phy_provider_mutex);
 
-	phy = pl ? pl->phy : ERR_PTR(-ENODEV);
-
-	/* fall-back to the old lookup method for now */
-	if (IS_ERR(phy))
-		phy = phy_lookup(dev, con_id);
-	return phy;
+	return pl ? pl->phy : ERR_PTR(-ENODEV);
 }
 
 static struct phy_provider *of_phy_provider_lookup(struct device_node *node)
@@ -662,13 +626,11 @@ EXPORT_SYMBOL_GPL(devm_of_phy_get);
  * @dev: device that is creating the new phy
  * @node: device node of the phy
  * @ops: function pointers for performing phy operations
- * @init_data: contains the list of PHY consumers or NULL
  *
  * Called to create a phy using phy framework.
  */
 struct phy *phy_create(struct device *dev, struct device_node *node,
-		       const struct phy_ops *ops,
-		       struct phy_init_data *init_data)
+		       const struct phy_ops *ops)
 {
 	int ret;
 	int id;
@@ -706,7 +668,6 @@ struct phy *phy_create(struct device *dev, struct device_node *node,
 	phy->dev.of_node = node ?: dev->of_node;
 	phy->id = id;
 	phy->ops = ops;
-	phy->init_data = init_data;
 
 	ret = dev_set_name(&phy->dev, "phy-%s.%d", dev_name(dev), id);
 	if (ret)
@@ -741,7 +702,6 @@ EXPORT_SYMBOL_GPL(phy_create);
  * @dev: device that is creating the new phy
  * @node: device node of the phy
  * @ops: function pointers for performing phy operations
- * @init_data: contains the list of PHY consumers or NULL
  *
  * Creates a new PHY device adding it to the PHY class.
  * While at that, it also associates the device with the phy using devres.
@@ -749,8 +709,7 @@ EXPORT_SYMBOL_GPL(phy_create);
  * then, devres data is freed.
  */
 struct phy *devm_phy_create(struct device *dev, struct device_node *node,
-			    const struct phy_ops *ops,
-			    struct phy_init_data *init_data)
+			    const struct phy_ops *ops)
 {
 	struct phy **ptr, *phy;
 
@@ -758,7 +717,7 @@ struct phy *devm_phy_create(struct device *dev, struct device_node *node,
 	if (!ptr)
 		return ERR_PTR(-ENOMEM);
 
-	phy = phy_create(dev, node, ops, init_data);
+	phy = phy_create(dev, node, ops);
 	if (!IS_ERR(phy)) {
 		*ptr = phy;
 		devres_add(dev, ptr);
diff --git a/drivers/phy/phy-exynos-dp-video.c b/drivers/phy/phy-exynos-dp-video.c
index 84f49e5a3f24..f86cbe68ddaf 100644
--- a/drivers/phy/phy-exynos-dp-video.c
+++ b/drivers/phy/phy-exynos-dp-video.c
@@ -112,7 +112,7 @@ static int exynos_dp_video_phy_probe(struct platform_device *pdev)
 	match = of_match_node(exynos_dp_video_phy_of_match, dev->of_node);
 	state->drvdata = match->data;
 
-	phy = devm_phy_create(dev, NULL, &exynos_dp_video_phy_ops, NULL);
+	phy = devm_phy_create(dev, NULL, &exynos_dp_video_phy_ops);
 	if (IS_ERR(phy)) {
 		dev_err(dev, "failed to create Display Port PHY\n");
 		return PTR_ERR(phy);
diff --git a/drivers/phy/phy-exynos-mipi-video.c b/drivers/phy/phy-exynos-mipi-video.c
index 6a9bef138617..943e0f88a120 100644
--- a/drivers/phy/phy-exynos-mipi-video.c
+++ b/drivers/phy/phy-exynos-mipi-video.c
@@ -137,7 +137,7 @@ static int exynos_mipi_video_phy_probe(struct platform_device *pdev)
 
 	for (i = 0; i < EXYNOS_MIPI_PHYS_NUM; i++) {
 		struct phy *phy = devm_phy_create(dev, NULL,
-					&exynos_mipi_video_phy_ops, NULL);
+						  &exynos_mipi_video_phy_ops);
 		if (IS_ERR(phy)) {
 			dev_err(dev, "failed to create PHY %d\n", i);
 			return PTR_ERR(phy);
diff --git a/drivers/phy/phy-exynos5-usbdrd.c b/drivers/phy/phy-exynos5-usbdrd.c
index f756aca871db..b3ca3bc2314f 100644
--- a/drivers/phy/phy-exynos5-usbdrd.c
+++ b/drivers/phy/phy-exynos5-usbdrd.c
@@ -637,8 +637,7 @@ static int exynos5_usbdrd_phy_probe(struct platform_device *pdev)
 
 	for (i = 0; i < EXYNOS5_DRDPHYS_NUM; i++) {
 		struct phy *phy = devm_phy_create(dev, NULL,
-						  &exynos5_usbdrd_phy_ops,
-						  NULL);
+						  &exynos5_usbdrd_phy_ops);
 		if (IS_ERR(phy)) {
 			dev_err(dev, "Failed to create usbdrd_phy phy\n");
 			return PTR_ERR(phy);
diff --git a/drivers/phy/phy-exynos5250-sata.c b/drivers/phy/phy-exynos5250-sata.c
index 54cf4ae60d29..bc858cc800a1 100644
--- a/drivers/phy/phy-exynos5250-sata.c
+++ b/drivers/phy/phy-exynos5250-sata.c
@@ -210,7 +210,7 @@ static int exynos_sata_phy_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops, NULL);
+	sata_phy->phy = devm_phy_create(dev, NULL, &exynos_sata_phy_ops);
 	if (IS_ERR(sata_phy->phy)) {
 		clk_disable_unprepare(sata_phy->phyclk);
 		dev_err(dev, "failed to create PHY\n");
diff --git a/drivers/phy/phy-hix5hd2-sata.c b/drivers/phy/phy-hix5hd2-sata.c
index d5d978085c6d..a80ff9d7fe15 100644
--- a/drivers/phy/phy-hix5hd2-sata.c
+++ b/drivers/phy/phy-hix5hd2-sata.c
@@ -156,7 +156,7 @@ static int hix5hd2_sata_phy_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->peri_ctrl))
 		priv->peri_ctrl = NULL;
 
-	phy = devm_phy_create(dev, NULL, &hix5hd2_sata_phy_ops, NULL);
+	phy = devm_phy_create(dev, NULL, &hix5hd2_sata_phy_ops);
 	if (IS_ERR(phy)) {
 		dev_err(dev, "failed to create PHY\n");
 		return PTR_ERR(phy);
diff --git a/drivers/phy/phy-miphy28lp.c b/drivers/phy/phy-miphy28lp.c
index 87dcc9ab7f23..e34da13885e8 100644
--- a/drivers/phy/phy-miphy28lp.c
+++ b/drivers/phy/phy-miphy28lp.c
@@ -1231,7 +1231,7 @@ static int miphy28lp_probe(struct platform_device *pdev)
 
 		miphy_dev->phys[port] = miphy_phy;
 
-		phy = devm_phy_create(&pdev->dev, child, &miphy28lp_ops, NULL);
+		phy = devm_phy_create(&pdev->dev, child, &miphy28lp_ops);
 		if (IS_ERR(phy)) {
 			dev_err(&pdev->dev, "failed to create PHY\n");
 			return PTR_ERR(phy);
diff --git a/drivers/phy/phy-miphy365x.c b/drivers/phy/phy-miphy365x.c
index 801afaf2d449..239930edfe1d 100644
--- a/drivers/phy/phy-miphy365x.c
+++ b/drivers/phy/phy-miphy365x.c
@@ -593,7 +593,7 @@ static int miphy365x_probe(struct platform_device *pdev)
 
 		miphy_dev->phys[port] = miphy_phy;
 
-		phy = devm_phy_create(&pdev->dev, child, &miphy365x_ops, NULL);
+		phy = devm_phy_create(&pdev->dev, child, &miphy365x_ops);
 		if (IS_ERR(phy)) {
 			dev_err(&pdev->dev, "failed to create PHY\n");
 			return PTR_ERR(phy);
diff --git a/drivers/phy/phy-mvebu-sata.c b/drivers/phy/phy-mvebu-sata.c
index d395558cb12e..03b94f92e6f1 100644
--- a/drivers/phy/phy-mvebu-sata.c
+++ b/drivers/phy/phy-mvebu-sata.c
@@ -101,7 +101,7 @@ static int phy_mvebu_sata_probe(struct platform_device *pdev)
 	if (IS_ERR(priv->clk))
 		return PTR_ERR(priv->clk);
 
-	phy = devm_phy_create(&pdev->dev, NULL, &phy_mvebu_sata_ops, NULL);
+	phy = devm_phy_create(&pdev->dev, NULL, &phy_mvebu_sata_ops);
 	if (IS_ERR(phy))
 		return PTR_ERR(phy);
 
diff --git a/drivers/phy/phy-omap-usb2.c b/drivers/phy/phy-omap-usb2.c
index f091576b6449..5dab3ec71a19 100644
--- a/drivers/phy/phy-omap-usb2.c
+++ b/drivers/phy/phy-omap-usb2.c
@@ -260,7 +260,7 @@ static int omap_usb2_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, phy);
 	pm_runtime_enable(phy->dev);
 
-	generic_phy = devm_phy_create(phy->dev, NULL, &ops, NULL);
+	generic_phy = devm_phy_create(phy->dev, NULL, &ops);
 	if (IS_ERR(generic_phy)) {
 		pm_runtime_disable(phy->dev);
 		return PTR_ERR(generic_phy);
diff --git a/drivers/phy/phy-qcom-apq8064-sata.c b/drivers/phy/phy-qcom-apq8064-sata.c
index 7b3ddfb65898..4b243f7a10e4 100644
--- a/drivers/phy/phy-qcom-apq8064-sata.c
+++ b/drivers/phy/phy-qcom-apq8064-sata.c
@@ -228,8 +228,7 @@ static int qcom_apq8064_sata_phy_probe(struct platform_device *pdev)
 	if (IS_ERR(phy->mmio))
 		return PTR_ERR(phy->mmio);
 
-	generic_phy = devm_phy_create(dev, NULL, &qcom_apq8064_sata_phy_ops,
-				      NULL);
+	generic_phy = devm_phy_create(dev, NULL, &qcom_apq8064_sata_phy_ops);
 	if (IS_ERR(generic_phy)) {
 		dev_err(dev, "%s: failed to create phy\n", __func__);
 		return PTR_ERR(generic_phy);
diff --git a/drivers/phy/phy-qcom-ipq806x-sata.c b/drivers/phy/phy-qcom-ipq806x-sata.c
index 759b0bf5b6b3..6f2fe2627916 100644
--- a/drivers/phy/phy-qcom-ipq806x-sata.c
+++ b/drivers/phy/phy-qcom-ipq806x-sata.c
@@ -150,8 +150,7 @@ static int qcom_ipq806x_sata_phy_probe(struct platform_device *pdev)
 	if (IS_ERR(phy->mmio))
 		return PTR_ERR(phy->mmio);
 
-	generic_phy = devm_phy_create(dev, NULL, &qcom_ipq806x_sata_phy_ops,
-				      NULL);
+	generic_phy = devm_phy_create(dev, NULL, &qcom_ipq806x_sata_phy_ops);
 	if (IS_ERR(generic_phy)) {
 		dev_err(dev, "%s: failed to create phy\n", __func__);
 		return PTR_ERR(generic_phy);
diff --git a/drivers/phy/phy-rcar-gen2.c b/drivers/phy/phy-rcar-gen2.c
index 2793af17799f..778276aba3aa 100644
--- a/drivers/phy/phy-rcar-gen2.c
+++ b/drivers/phy/phy-rcar-gen2.c
@@ -304,7 +304,7 @@ static int rcar_gen2_phy_probe(struct platform_device *pdev)
 			phy->select_value = select_value[channel_num][n];
 
 			phy->phy = devm_phy_create(dev, NULL,
-						   &rcar_gen2_phy_ops, NULL);
+						   &rcar_gen2_phy_ops);
 			if (IS_ERR(phy->phy)) {
 				dev_err(dev, "Failed to create PHY\n");
 				return PTR_ERR(phy->phy);
diff --git a/drivers/phy/phy-samsung-usb2.c b/drivers/phy/phy-samsung-usb2.c
index 908949dfb4db..4a12f66b7fb5 100644
--- a/drivers/phy/phy-samsung-usb2.c
+++ b/drivers/phy/phy-samsung-usb2.c
@@ -202,8 +202,7 @@ static int samsung_usb2_phy_probe(struct platform_device *pdev)
 		struct samsung_usb2_phy_instance *p = &drv->instances[i];
 
 		dev_dbg(dev, "Creating phy \"%s\"\n", label);
-		p->phy = devm_phy_create(dev, NULL, &samsung_usb2_phy_ops,
-					 NULL);
+		p->phy = devm_phy_create(dev, NULL, &samsung_usb2_phy_ops);
 		if (IS_ERR(p->phy)) {
 			dev_err(drv->dev, "Failed to create usb2_phy \"%s\"\n",
 				label);
diff --git a/drivers/phy/phy-spear1310-miphy.c b/drivers/phy/phy-spear1310-miphy.c
index 5f4c586ee951..9f47fae7eecb 100644
--- a/drivers/phy/phy-spear1310-miphy.c
+++ b/drivers/phy/phy-spear1310-miphy.c
@@ -227,7 +227,7 @@ static int spear1310_miphy_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	priv->phy = devm_phy_create(dev, NULL, &spear1310_miphy_ops, NULL);
+	priv->phy = devm_phy_create(dev, NULL, &spear1310_miphy_ops);
 	if (IS_ERR(priv->phy)) {
 		dev_err(dev, "failed to create SATA PCIe PHY\n");
 		return PTR_ERR(priv->phy);
diff --git a/drivers/phy/phy-spear1340-miphy.c b/drivers/phy/phy-spear1340-miphy.c
index 1ecd0945bad3..e42bc200275f 100644
--- a/drivers/phy/phy-spear1340-miphy.c
+++ b/drivers/phy/phy-spear1340-miphy.c
@@ -259,7 +259,7 @@ static int spear1340_miphy_probe(struct platform_device *pdev)
 		return PTR_ERR(priv->misc);
 	}
 
-	priv->phy = devm_phy_create(dev, NULL, &spear1340_miphy_ops, NULL);
+	priv->phy = devm_phy_create(dev, NULL, &spear1340_miphy_ops);
 	if (IS_ERR(priv->phy)) {
 		dev_err(dev, "failed to create SATA PCIe PHY\n");
 		return PTR_ERR(priv->phy);
diff --git a/drivers/phy/phy-stih407-usb.c b/drivers/phy/phy-stih407-usb.c
index 42428d4181ea..74f0fab3cd8a 100644
--- a/drivers/phy/phy-stih407-usb.c
+++ b/drivers/phy/phy-stih407-usb.c
@@ -137,7 +137,7 @@ static int stih407_usb2_picophy_probe(struct platform_device *pdev)
 	}
 	phy_dev->param = res->start;
 
-	phy = devm_phy_create(dev, NULL, &stih407_usb2_picophy_data, NULL);
+	phy = devm_phy_create(dev, NULL, &stih407_usb2_picophy_data);
 	if (IS_ERR(phy)) {
 		dev_err(dev, "failed to create Display Port PHY\n");
 		return PTR_ERR(phy);
diff --git a/drivers/phy/phy-stih41x-usb.c b/drivers/phy/phy-stih41x-usb.c
index 9f16cb8e01f4..4ab581eadacf 100644
--- a/drivers/phy/phy-stih41x-usb.c
+++ b/drivers/phy/phy-stih41x-usb.c
@@ -148,7 +148,7 @@ static int stih41x_usb_phy_probe(struct platform_device *pdev)
 		return PTR_ERR(phy_dev->clk);
 	}
 
-	phy = devm_phy_create(dev, NULL, &stih41x_usb_phy_ops, NULL);
+	phy = devm_phy_create(dev, NULL, &stih41x_usb_phy_ops);
 
 	if (IS_ERR(phy)) {
 		dev_err(dev, "failed to create phy\n");
diff --git a/drivers/phy/phy-sun4i-usb.c b/drivers/phy/phy-sun4i-usb.c
index 6bd2b0c972cc..fb02a67c9181 100644
--- a/drivers/phy/phy-sun4i-usb.c
+++ b/drivers/phy/phy-sun4i-usb.c
@@ -298,7 +298,7 @@ static int sun4i_usb_phy_probe(struct platform_device *pdev)
 				return PTR_ERR(phy->pmu);
 		}
 
-		phy->phy = devm_phy_create(dev, NULL, &sun4i_usb_phy_ops, NULL);
+		phy->phy = devm_phy_create(dev, NULL, &sun4i_usb_phy_ops);
 		if (IS_ERR(phy->phy)) {
 			dev_err(dev, "failed to create PHY %d\n", i);
 			return PTR_ERR(phy->phy);
diff --git a/drivers/phy/phy-ti-pipe3.c b/drivers/phy/phy-ti-pipe3.c
index ab1e22d9a1e8..c297b7a10d30 100644
--- a/drivers/phy/phy-ti-pipe3.c
+++ b/drivers/phy/phy-ti-pipe3.c
@@ -399,7 +399,7 @@ static int ti_pipe3_probe(struct platform_device *pdev)
 	platform_set_drvdata(pdev, phy);
 	pm_runtime_enable(phy->dev);
 
-	generic_phy = devm_phy_create(phy->dev, NULL, &ops, NULL);
+	generic_phy = devm_phy_create(phy->dev, NULL, &ops);
 	if (IS_ERR(generic_phy))
 		return PTR_ERR(generic_phy);
 
diff --git a/drivers/phy/phy-twl4030-usb.c b/drivers/phy/phy-twl4030-usb.c
index c45a3aa4f2cc..d19e4a06b858 100644
--- a/drivers/phy/phy-twl4030-usb.c
+++ b/drivers/phy/phy-twl4030-usb.c
@@ -678,7 +678,7 @@ static int twl4030_usb_probe(struct platform_device *pdev)
 	otg->set_host		= twl4030_set_host;
 	otg->set_peripheral	= twl4030_set_peripheral;
 
-	phy = devm_phy_create(twl->dev, NULL, &ops, NULL);
+	phy = devm_phy_create(twl->dev, NULL, &ops);
 	if (IS_ERR(phy)) {
 		dev_dbg(&pdev->dev, "Failed to create PHY\n");
 		return PTR_ERR(phy);
diff --git a/drivers/phy/phy-xgene.c b/drivers/phy/phy-xgene.c
index f8a51b16ade8..29214a36ea28 100644
--- a/drivers/phy/phy-xgene.c
+++ b/drivers/phy/phy-xgene.c
@@ -1707,7 +1707,7 @@ static int xgene_phy_probe(struct platform_device *pdev)
 	ctx->dev = &pdev->dev;
 	platform_set_drvdata(pdev, ctx);
 
-	ctx->phy = devm_phy_create(ctx->dev, NULL, &xgene_phy_ops, NULL);
+	ctx->phy = devm_phy_create(ctx->dev, NULL, &xgene_phy_ops);
 	if (IS_ERR(ctx->phy)) {
 		dev_dbg(&pdev->dev, "Failed to create PHY\n");
 		rc = PTR_ERR(ctx->phy);
diff --git a/drivers/pinctrl/pinctrl-tegra-xusb.c b/drivers/pinctrl/pinctrl-tegra-xusb.c
index 1631ec94fb02..a84299b922c8 100644
--- a/drivers/pinctrl/pinctrl-tegra-xusb.c
+++ b/drivers/pinctrl/pinctrl-tegra-xusb.c
@@ -910,7 +910,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev)
 		goto reset;
 	}
 
-	phy = devm_phy_create(&pdev->dev, NULL, &pcie_phy_ops, NULL);
+	phy = devm_phy_create(&pdev->dev, NULL, &pcie_phy_ops);
 	if (IS_ERR(phy)) {
 		err = PTR_ERR(phy);
 		goto unregister;
@@ -919,7 +919,7 @@ static int tegra_xusb_padctl_probe(struct platform_device *pdev)
 	padctl->phys[TEGRA_XUSB_PADCTL_PCIE] = phy;
 	phy_set_drvdata(phy, padctl);
 
-	phy = devm_phy_create(&pdev->dev, NULL, &sata_phy_ops, NULL);
+	phy = devm_phy_create(&pdev->dev, NULL, &sata_phy_ops);
 	if (IS_ERR(phy)) {
 		err = PTR_ERR(phy);
 		goto unregister;
diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h
index 849284e5873f..a0197fa1b116 100644
--- a/include/linux/phy/phy.h
+++ b/include/linux/phy/phy.h
@@ -61,7 +61,6 @@ struct phy {
 	struct device		dev;
 	int			id;
 	const struct phy_ops	*ops;
-	struct phy_init_data	*init_data;
 	struct mutex		mutex;
 	int			init_count;
 	int			power_count;
@@ -84,32 +83,6 @@ struct phy_provider {
 		struct of_phandle_args *args);
 };
 
-/**
- * struct phy_consumer - represents the phy consumer
- * @dev_name: the device name of the controller that will use this PHY device
- * @port: name given to the consumer port
- */
-struct phy_consumer {
-	const char *dev_name;
-	const char *port;
-};
-
-/**
- * struct phy_init_data - contains the list of PHY consumers
- * @num_consumers: number of consumers for this PHY device
- * @consumers: list of PHY consumers
- */
-struct phy_init_data {
-	unsigned int num_consumers;
-	struct phy_consumer *consumers;
-};
-
-#define PHY_CONSUMER(_dev_name, _port)				\
-{								\
-	.dev_name	= _dev_name,				\
-	.port		= _port,				\
-}
-
 struct phy_lookup {
 	struct list_head node;
 	const char *dev_id;
@@ -166,10 +139,9 @@ struct phy *of_phy_get(struct device_node *np, const char *con_id);
 struct phy *of_phy_simple_xlate(struct device *dev,
 	struct of_phandle_args *args);
 struct phy *phy_create(struct device *dev, struct device_node *node,
-		       const struct phy_ops *ops,
-		       struct phy_init_data *init_data);
+		       const struct phy_ops *ops);
 struct phy *devm_phy_create(struct device *dev, struct device_node *node,
-	const struct phy_ops *ops, struct phy_init_data *init_data);
+			    const struct phy_ops *ops);
 void phy_destroy(struct phy *phy);
 void devm_phy_destroy(struct device *dev, struct phy *phy);
 struct phy_provider *__of_phy_provider_register(struct device *dev,
@@ -310,16 +282,14 @@ static inline struct phy *of_phy_simple_xlate(struct device *dev,
 
 static inline struct phy *phy_create(struct device *dev,
 				     struct device_node *node,
-				     const struct phy_ops *ops,
-				     struct phy_init_data *init_data)
+				     const struct phy_ops *ops)
 {
 	return ERR_PTR(-ENOSYS);
 }
 
 static inline struct phy *devm_phy_create(struct device *dev,
 					  struct device_node *node,
-					  const struct phy_ops *ops,
-					  struct phy_init_data *init_data)
+					  const struct phy_ops *ops)
 {
 	return ERR_PTR(-ENOSYS);
 }
-- 
cgit v1.2.3


From eadac03e898617521f327faf265932b73ecc3e0f Mon Sep 17 00:00:00 2001
From: Pranith Kumar <bobby.prani@gmail.com>
Date: Fri, 21 Nov 2014 10:05:59 -0500
Subject: percpu: Replace smp_read_barrier_depends() with
 lockless_dereference()

Recently lockless_dereference() was added which can be used in place of
hard-coding smp_read_barrier_depends(). The following PATCH makes the change.

Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Signed-off-by: Tejun Heo <tj@kernel.org>
---
 include/linux/percpu-refcount.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index d5c89e0dd0e6..6b0c81872142 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -128,10 +128,8 @@ static inline void percpu_ref_kill(struct percpu_ref *ref)
 static inline bool __ref_is_percpu(struct percpu_ref *ref,
 					  unsigned long __percpu **percpu_countp)
 {
-	unsigned long percpu_ptr = ACCESS_ONCE(ref->percpu_count_ptr);
-
 	/* paired with smp_store_release() in percpu_ref_reinit() */
-	smp_read_barrier_depends();
+	unsigned long percpu_ptr = lockless_dereference(ref->percpu_count_ptr);
 
 	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
 		return false;
-- 
cgit v1.2.3


From 49c71d1c3dccff640cc082875bd8d62988d63df9 Mon Sep 17 00:00:00 2001
From: Fugang Duan <b38611@freescale.com>
Date: Wed, 24 Sep 2014 10:11:18 +0800
Subject: ARM: imx6sx: add imx6sx iomux-gpr field define

Add imx6sx iomux-gpr register field define in "imx6q-iomuxc-gpr.h" header
file, which is not fully define all iomux-gpr registers and fields, only
align with freescale internal tree related GPR macro define.

Signed-off-by: Fugang Duan <B38611@freescale.com>
Signed-off-by: Shawn Guo <shawn.guo@freescale.com>
---
 include/linux/mfd/syscon/imx6q-iomuxc-gpr.h | 39 +++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
index ff44374a1a4e..c877cad61a13 100644
--- a/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
+++ b/include/linux/mfd/syscon/imx6q-iomuxc-gpr.h
@@ -395,4 +395,43 @@
 #define IMX6SL_GPR1_FEC_CLOCK_MUX1_SEL_MASK    (0x3 << 17)
 #define IMX6SL_GPR1_FEC_CLOCK_MUX2_SEL_MASK    (0x1 << 14)
 
+/* For imx6sx iomux gpr register field define */
+#define IMX6SX_GPR1_VDEC_SW_RST_MASK			(0x1 << 20)
+#define IMX6SX_GPR1_VDEC_SW_RST_RESET			(0x1 << 20)
+#define IMX6SX_GPR1_VDEC_SW_RST_RELEASE			(0x0 << 20)
+#define IMX6SX_GPR1_VADC_SW_RST_MASK			(0x1 << 19)
+#define IMX6SX_GPR1_VADC_SW_RST_RESET			(0x1 << 19)
+#define IMX6SX_GPR1_VADC_SW_RST_RELEASE			(0x0 << 19)
+#define IMX6SX_GPR1_FEC_CLOCK_MUX_SEL_MASK		(0x3 << 13)
+#define IMX6SX_GPR1_FEC_CLOCK_PAD_DIR_MASK		(0x3 << 17)
+#define IMX6SX_GPR1_FEC_CLOCK_MUX_SEL_EXT		(0x3 << 13)
+
+#define IMX6SX_GPR4_FEC_ENET1_STOP_REQ			(0x1 << 3)
+#define IMX6SX_GPR4_FEC_ENET2_STOP_REQ			(0x1 << 4)
+
+#define IMX6SX_GPR5_DISP_MUX_LDB_CTRL_MASK		(0x1 << 3)
+#define IMX6SX_GPR5_DISP_MUX_LDB_CTRL_LCDIF1		(0x0 << 3)
+#define IMX6SX_GPR5_DISP_MUX_LDB_CTRL_LCDIF2		(0x1 << 3)
+
+#define IMX6SX_GPR5_CSI2_MUX_CTRL_MASK			(0x3 << 27)
+#define IMX6SX_GPR5_CSI2_MUX_CTRL_EXT_PIN		(0x0 << 27)
+#define IMX6SX_GPR5_CSI2_MUX_CTRL_CVD			(0x1 << 27)
+#define IMX6SX_GPR5_CSI2_MUX_CTRL_VDAC_TO_CSI		(0x2 << 27)
+#define IMX6SX_GPR5_CSI2_MUX_CTRL_GND			(0x3 << 27)
+#define IMX6SX_GPR5_VADC_TO_CSI_CAPTURE_EN_MASK		(0x1 << 26)
+#define IMX6SX_GPR5_VADC_TO_CSI_CAPTURE_EN_ENABLE	(0x1 << 26)
+#define IMX6SX_GPR5_VADC_TO_CSI_CAPTURE_EN_DISABLE	(0x0 << 26)
+#define IMX6SX_GPR5_CSI1_MUX_CTRL_MASK			(0x3 << 4)
+#define IMX6SX_GPR5_CSI1_MUX_CTRL_EXT_PIN		(0x0 << 4)
+#define IMX6SX_GPR5_CSI1_MUX_CTRL_CVD			(0x1 << 4)
+#define IMX6SX_GPR5_CSI1_MUX_CTRL_VDAC_TO_CSI		(0x2 << 4)
+#define IMX6SX_GPR5_CSI1_MUX_CTRL_GND			(0x3 << 4)
+
+#define IMX6SX_GPR5_DISP_MUX_DCIC2_LCDIF2		(0x0 << 2)
+#define IMX6SX_GPR5_DISP_MUX_DCIC2_LVDS			(0x1 << 2)
+#define IMX6SX_GPR5_DISP_MUX_DCIC2_MASK			(0x1 << 2)
+#define IMX6SX_GPR5_DISP_MUX_DCIC1_LCDIF1		(0x0 << 1)
+#define IMX6SX_GPR5_DISP_MUX_DCIC1_LVDS			(0x1 << 1)
+#define IMX6SX_GPR5_DISP_MUX_DCIC1_MASK			(0x1 << 1)
+
 #endif /* __LINUX_IMX6Q_IOMUXC_GPR_H */
-- 
cgit v1.2.3


From 891d4a48f7da39de2be17a59b47df62dccf0f3d5 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sun, 9 Nov 2014 23:10:33 +0800
Subject: PCI/MSI: Rename __read_msi_msg() to __pci_read_msi_msg()

Rename __read_msi_msg() to __pci_read_msi_msg() and kill unused
read_msi_msg(). It's a preparation to separate generic MSI code from
PCI core.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/powerpc/platforms/pseries/msi.c | 2 +-
 arch/x86/pci/xen.c                   | 2 +-
 drivers/pci/msi.c                    | 9 +--------
 include/linux/msi.h                  | 5 +++--
 4 files changed, 6 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/platforms/pseries/msi.c b/arch/powerpc/platforms/pseries/msi.c
index 8ab5add4ac82..90f756d0f58f 100644
--- a/arch/powerpc/platforms/pseries/msi.c
+++ b/arch/powerpc/platforms/pseries/msi.c
@@ -476,7 +476,7 @@ again:
 		irq_set_msi_desc(virq, entry);
 
 		/* Read config space back so we can restore after reset */
-		__read_msi_msg(entry, &msg);
+		__pci_read_msi_msg(entry, &msg);
 		entry->msg = msg;
 	}
 
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index 466b978e13a5..ff0068b8ce5e 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -229,7 +229,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 		return 1;
 
 	list_for_each_entry(msidesc, &dev->msi_list, list) {
-		__read_msi_msg(msidesc, &msg);
+		__pci_read_msi_msg(msidesc, &msg);
 		pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) |
 			((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff);
 		if (msg.data != XEN_PIRQ_MSI_DATA ||
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 5cb471ec3e61..c77adc735f8a 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -252,7 +252,7 @@ void default_restore_msi_irqs(struct pci_dev *dev)
 		default_restore_msi_irq(dev, entry->irq);
 }
 
-void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
 	BUG_ON(entry->dev->current_state != PCI_D0);
 
@@ -282,13 +282,6 @@ void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 	}
 }
 
-void read_msi_msg(unsigned int irq, struct msi_msg *msg)
-{
-	struct msi_desc *entry = irq_get_msi_desc(irq);
-
-	__read_msi_msg(entry, msg);
-}
-
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
 	/* Assert that the cache is valid, assuming that
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 6704991b0174..f36c37b46f10 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -16,12 +16,11 @@ struct irq_data;
 struct msi_desc;
 void mask_msi_irq(struct irq_data *data);
 void unmask_msi_irq(struct irq_data *data);
-void __read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
-void read_msi_msg(unsigned int irq, struct msi_msg *msg);
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
 void write_msi_msg(unsigned int irq, struct msi_msg *msg);
+
 u32 __msix_mask_irq(struct msi_desc *desc, u32 flag);
 u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
 
@@ -51,6 +50,8 @@ struct msi_desc {
 	struct msi_msg msg;
 };
 
+void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
+
 /*
  * The arch hooks to setup up msi irqs. Those functions are
  * implemented as weak symbols so that they /can/ be overriden by
-- 
cgit v1.2.3


From 83a18912b0e8d275001bca6fc9c0fe519d98f280 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sun, 9 Nov 2014 23:10:34 +0800
Subject: PCI/MSI: Rename write_msi_msg() to pci_write_msi_msg()

Rename write_msi_msg() to pci_write_msi_msg() to mark it as PCI
specific.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/arm/mach-iop13xx/msi.c            |  2 +-
 arch/ia64/kernel/msi_ia64.c            |  4 ++--
 arch/ia64/sn/kernel/msi_sn.c           |  4 ++--
 arch/mips/pci/msi-octeon.c             |  2 +-
 arch/mips/pci/msi-xlp.c                |  4 ++--
 arch/mips/pci/pci-xlr.c                |  2 +-
 arch/powerpc/platforms/cell/axon_msi.c |  2 +-
 arch/powerpc/platforms/powernv/pci.c   |  2 +-
 arch/powerpc/sysdev/fsl_msi.c          |  2 +-
 arch/powerpc/sysdev/mpic_pasemi_msi.c  |  2 +-
 arch/powerpc/sysdev/mpic_u3msi.c       |  2 +-
 arch/powerpc/sysdev/ppc4xx_hsta_msi.c  |  2 +-
 arch/powerpc/sysdev/ppc4xx_msi.c       |  2 +-
 arch/s390/pci/pci.c                    |  2 +-
 arch/sparc/kernel/pci_msi.c            |  2 +-
 arch/tile/kernel/pci_gx.c              |  2 +-
 arch/x86/kernel/apic/io_apic.c         |  4 ++--
 arch/x86/pci/xen.c                     |  2 +-
 drivers/irqchip/irq-armada-370-xp.c    |  2 +-
 drivers/pci/host/pci-tegra.c           |  2 +-
 drivers/pci/host/pcie-designware.c     |  2 +-
 drivers/pci/host/pcie-rcar.c           |  2 +-
 drivers/pci/host/pcie-xilinx.c         |  2 +-
 drivers/pci/msi.c                      | 10 +++++-----
 drivers/vfio/pci/vfio_pci_intrs.c      |  2 +-
 include/linux/msi.h                    | 14 ++++++++++++--
 26 files changed, 45 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-iop13xx/msi.c b/arch/arm/mach-iop13xx/msi.c
index e7730cf9c15d..3f7739cdb85b 100644
--- a/arch/arm/mach-iop13xx/msi.c
+++ b/arch/arm/mach-iop13xx/msi.c
@@ -153,7 +153,7 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 	id = iop13xx_cpu_id();
 	msg.data = (id << IOP13XX_MU_MIMR_CORE_SELECT) | (irq & 0x7f);
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 	irq_set_chip_and_handler(irq, &iop13xx_msi_chip, handle_simple_irq);
 
 	return 0;
diff --git a/arch/ia64/kernel/msi_ia64.c b/arch/ia64/kernel/msi_ia64.c
index 8c3730c3c63d..35b10dd2bb76 100644
--- a/arch/ia64/kernel/msi_ia64.c
+++ b/arch/ia64/kernel/msi_ia64.c
@@ -35,7 +35,7 @@ static int ia64_set_msi_irq_affinity(struct irq_data *idata,
 	data |= MSI_DATA_VECTOR(irq_to_vector(irq));
 	msg.data = data;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 	cpumask_copy(idata->affinity, cpumask_of(cpu));
 
 	return 0;
@@ -71,7 +71,7 @@ int ia64_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 		MSI_DATA_DELIVERY_FIXED |
 		MSI_DATA_VECTOR(vector);
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 	irq_set_chip_and_handler(irq, &ia64_msi_chip, handle_edge_irq);
 
 	return 0;
diff --git a/arch/ia64/sn/kernel/msi_sn.c b/arch/ia64/sn/kernel/msi_sn.c
index 446e7799928c..433cafed5c3a 100644
--- a/arch/ia64/sn/kernel/msi_sn.c
+++ b/arch/ia64/sn/kernel/msi_sn.c
@@ -145,7 +145,7 @@ int sn_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *entry)
 	msg.data = 0x100 + irq;
 
 	irq_set_msi_desc(irq, entry);
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 	irq_set_chip_and_handler(irq, &sn_msi_chip, handle_edge_irq);
 
 	return 0;
@@ -205,7 +205,7 @@ static int sn_set_msi_irq_affinity(struct irq_data *data,
 	msg.address_hi = (u32)(bus_addr >> 32);
 	msg.address_lo = (u32)(bus_addr & 0x00000000ffffffff);
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 	cpumask_copy(data->affinity, cpu_mask);
 
 	return 0;
diff --git a/arch/mips/pci/msi-octeon.c b/arch/mips/pci/msi-octeon.c
index 63bbe07a1ccd..cffaaf4aae3c 100644
--- a/arch/mips/pci/msi-octeon.c
+++ b/arch/mips/pci/msi-octeon.c
@@ -178,7 +178,7 @@ msi_irq_allocated:
 	pci_write_config_word(dev, dev->msi_cap + PCI_MSI_FLAGS, control);
 
 	irq_set_msi_desc(irq, desc);
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 	return 0;
 }
 
diff --git a/arch/mips/pci/msi-xlp.c b/arch/mips/pci/msi-xlp.c
index fa374fe3746b..81ea2b40db61 100644
--- a/arch/mips/pci/msi-xlp.c
+++ b/arch/mips/pci/msi-xlp.c
@@ -345,7 +345,7 @@ static int xlp_setup_msi(uint64_t lnkbase, int node, int link,
 	if (ret < 0)
 		return ret;
 
-	write_msi_msg(xirq, &msg);
+	pci_write_msi_msg(xirq, &msg);
 	return 0;
 }
 
@@ -448,7 +448,7 @@ static int xlp_setup_msix(uint64_t lnkbase, int node, int link,
 		return ret;
 	}
 
-	write_msi_msg(xirq, &msg);
+	pci_write_msi_msg(xirq, &msg);
 	return 0;
 }
 
diff --git a/arch/mips/pci/pci-xlr.c b/arch/mips/pci/pci-xlr.c
index 0dde80332d3a..26d2dabef281 100644
--- a/arch/mips/pci/pci-xlr.c
+++ b/arch/mips/pci/pci-xlr.c
@@ -260,7 +260,7 @@ int arch_setup_msi_irq(struct pci_dev *dev, struct msi_desc *desc)
 	if (ret < 0)
 		return ret;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 	return 0;
 }
 #endif
diff --git a/arch/powerpc/platforms/cell/axon_msi.c b/arch/powerpc/platforms/cell/axon_msi.c
index 862b32702d29..26fa3432352e 100644
--- a/arch/powerpc/platforms/cell/axon_msi.c
+++ b/arch/powerpc/platforms/cell/axon_msi.c
@@ -279,7 +279,7 @@ static int axon_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 		irq_set_msi_desc(virq, entry);
 		msg.data = virq;
-		write_msi_msg(virq, &msg);
+		pci_write_msi_msg(virq, &msg);
 	}
 
 	return 0;
diff --git a/arch/powerpc/platforms/powernv/pci.c b/arch/powerpc/platforms/powernv/pci.c
index b3ca77ddf36d..e79a38a1df9f 100644
--- a/arch/powerpc/platforms/powernv/pci.c
+++ b/arch/powerpc/platforms/powernv/pci.c
@@ -91,7 +91,7 @@ static int pnv_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 			return rc;
 		}
 		irq_set_msi_desc(virq, entry);
-		write_msi_msg(virq, &msg);
+		pci_write_msi_msg(virq, &msg);
 	}
 	return 0;
 }
diff --git a/arch/powerpc/sysdev/fsl_msi.c b/arch/powerpc/sysdev/fsl_msi.c
index de40b48b460e..bd5a9f7c0029 100644
--- a/arch/powerpc/sysdev/fsl_msi.c
+++ b/arch/powerpc/sysdev/fsl_msi.c
@@ -242,7 +242,7 @@ static int fsl_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 		irq_set_msi_desc(virq, entry);
 
 		fsl_compose_msi_msg(pdev, hwirq, &msg, msi_data);
-		write_msi_msg(virq, &msg);
+		pci_write_msi_msg(virq, &msg);
 	}
 	return 0;
 
diff --git a/arch/powerpc/sysdev/mpic_pasemi_msi.c b/arch/powerpc/sysdev/mpic_pasemi_msi.c
index 15dccd35fa11..45a6f407883c 100644
--- a/arch/powerpc/sysdev/mpic_pasemi_msi.c
+++ b/arch/powerpc/sysdev/mpic_pasemi_msi.c
@@ -136,7 +136,7 @@ static int pasemi_msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 		 * register to generate MSI [512...1023]
 		 */
 		msg.data = hwirq-0x200;
-		write_msi_msg(virq, &msg);
+		pci_write_msi_msg(virq, &msg);
 	}
 
 	return 0;
diff --git a/arch/powerpc/sysdev/mpic_u3msi.c b/arch/powerpc/sysdev/mpic_u3msi.c
index 623d7fba15b4..ac6f1f9d03ce 100644
--- a/arch/powerpc/sysdev/mpic_u3msi.c
+++ b/arch/powerpc/sysdev/mpic_u3msi.c
@@ -171,7 +171,7 @@ static int u3msi_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 		printk("u3msi: allocated virq 0x%x (hw 0x%x) addr 0x%lx\n",
 			  virq, hwirq, (unsigned long)addr);
 		msg.data = hwirq;
-		write_msi_msg(virq, &msg);
+		pci_write_msi_msg(virq, &msg);
 
 		hwirq++;
 	}
diff --git a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
index a6a4dbda9078..908105f835d1 100644
--- a/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_hsta_msi.c
@@ -85,7 +85,7 @@ static int hsta_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 			msi_bitmap_free_hwirqs(&ppc4xx_hsta_msi.bmp, irq, 1);
 			return -EINVAL;
 		}
-		write_msi_msg(hwirq, &msg);
+		pci_write_msi_msg(hwirq, &msg);
 	}
 
 	return 0;
diff --git a/arch/powerpc/sysdev/ppc4xx_msi.c b/arch/powerpc/sysdev/ppc4xx_msi.c
index 22b5200636e7..518eabbe0bdc 100644
--- a/arch/powerpc/sysdev/ppc4xx_msi.c
+++ b/arch/powerpc/sysdev/ppc4xx_msi.c
@@ -116,7 +116,7 @@ static int ppc4xx_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 
 		irq_set_msi_desc(virq, entry);
 		msg.data = int_no;
-		write_msi_msg(virq, &msg);
+		pci_write_msi_msg(virq, &msg);
 	}
 	return 0;
 }
diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index 552b9908aa77..ad1a9e2a1831 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -403,7 +403,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type)
 		msg.data = hwirq;
 		msg.address_lo = zdev->msi_addr & 0xffffffff;
 		msg.address_hi = zdev->msi_addr >> 32;
-		write_msi_msg(irq, &msg);
+		pci_write_msi_msg(irq, &msg);
 		airq_iv_set_data(zdev->aibv, hwirq, irq);
 		hwirq++;
 	}
diff --git a/arch/sparc/kernel/pci_msi.c b/arch/sparc/kernel/pci_msi.c
index 580651af73f2..73db0ed80d83 100644
--- a/arch/sparc/kernel/pci_msi.c
+++ b/arch/sparc/kernel/pci_msi.c
@@ -161,7 +161,7 @@ static int sparc64_setup_msi_irq(unsigned int *irq_p,
 	msg.data = msi;
 
 	irq_set_msi_desc(*irq_p, entry);
-	write_msi_msg(*irq_p, &msg);
+	pci_write_msi_msg(*irq_p, &msg);
 
 	return 0;
 
diff --git a/arch/tile/kernel/pci_gx.c b/arch/tile/kernel/pci_gx.c
index e39f9c542807..376d9f1d9951 100644
--- a/arch/tile/kernel/pci_gx.c
+++ b/arch/tile/kernel/pci_gx.c
@@ -1590,7 +1590,7 @@ int arch_setup_msi_irq(struct pci_dev *pdev, struct msi_desc *desc)
 	msg.address_hi = msi_addr >> 32;
 	msg.address_lo = msi_addr & 0xffffffff;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 	irq_set_chip_and_handler(irq, &tilegx_msi_chip, handle_level_irq);
 	irq_set_handler_data(irq, controller);
 
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 1183d545da1e..d775aef42b87 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -3158,7 +3158,7 @@ msi_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force)
 	msg.address_lo &= ~MSI_ADDR_DEST_ID_MASK;
 	msg.address_lo |= MSI_ADDR_DEST_ID(dest);
 
-	__write_msi_msg(data->msi_desc, &msg);
+	__pci_write_msi_msg(data->msi_desc, &msg);
 
 	return IRQ_SET_MASK_OK_NOCOPY;
 }
@@ -3196,7 +3196,7 @@ int setup_msi_irq(struct pci_dev *dev, struct msi_desc *msidesc,
 	 * MSI message denotes a contiguous group of IRQs, written for 0th IRQ.
 	 */
 	if (!irq_offset)
-		write_msi_msg(irq, &msg);
+		pci_write_msi_msg(irq, &msg);
 
 	setup_remapped_irq(irq, irq_cfg(irq), chip);
 
diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c
index ff0068b8ce5e..1819a91bbb9f 100644
--- a/arch/x86/pci/xen.c
+++ b/arch/x86/pci/xen.c
@@ -240,7 +240,7 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 				goto error;
 			}
 			xen_msi_compose_msg(dev, pirq, &msg);
-			__write_msi_msg(msidesc, &msg);
+			__pci_write_msi_msg(msidesc, &msg);
 			dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq);
 		} else {
 			dev_dbg(&dev->dev,
diff --git a/drivers/irqchip/irq-armada-370-xp.c b/drivers/irqchip/irq-armada-370-xp.c
index 4e202375e027..8c7f077abb09 100644
--- a/drivers/irqchip/irq-armada-370-xp.c
+++ b/drivers/irqchip/irq-armada-370-xp.c
@@ -156,7 +156,7 @@ static int armada_370_xp_setup_msi_irq(struct msi_controller *chip,
 	msg.address_hi = 0;
 	msg.data = 0xf00 | (hwirq + 16);
 
-	write_msi_msg(virq, &msg);
+	pci_write_msi_msg(virq, &msg);
 	return 0;
 }
 
diff --git a/drivers/pci/host/pci-tegra.c b/drivers/pci/host/pci-tegra.c
index 1f01faa918cc..31841718bedd 100644
--- a/drivers/pci/host/pci-tegra.c
+++ b/drivers/pci/host/pci-tegra.c
@@ -1299,7 +1299,7 @@ static int tegra_msi_setup_irq(struct msi_controller *chip,
 	msg.address_hi = 0;
 	msg.data = hwirq;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 
 	return 0;
 }
diff --git a/drivers/pci/host/pcie-designware.c b/drivers/pci/host/pcie-designware.c
index 5278d3e63070..e03f4e7e1f7d 100644
--- a/drivers/pci/host/pcie-designware.c
+++ b/drivers/pci/host/pcie-designware.c
@@ -298,7 +298,7 @@ static int dw_msi_setup_irq(struct msi_controller *chip, struct pci_dev *pdev,
 	else
 		msg.data = pos;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 
 	return 0;
 }
diff --git a/drivers/pci/host/pcie-rcar.c b/drivers/pci/host/pcie-rcar.c
index c2e3fcb55fd4..c1177cd55ce3 100644
--- a/drivers/pci/host/pcie-rcar.c
+++ b/drivers/pci/host/pcie-rcar.c
@@ -640,7 +640,7 @@ static int rcar_msi_setup_irq(struct msi_controller *chip, struct pci_dev *pdev,
 	msg.address_hi = rcar_pci_read_reg(pcie, PCIEMSIAUR);
 	msg.data = hwirq;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 
 	return 0;
 }
diff --git a/drivers/pci/host/pcie-xilinx.c b/drivers/pci/host/pcie-xilinx.c
index eca292347c69..67683380f0f7 100644
--- a/drivers/pci/host/pcie-xilinx.c
+++ b/drivers/pci/host/pcie-xilinx.c
@@ -375,7 +375,7 @@ static int xilinx_pcie_msi_setup_irq(struct msi_controller *chip,
 	msg.address_lo = msg_addr;
 	msg.data = irq;
 
-	write_msi_msg(irq, &msg);
+	pci_write_msi_msg(irq, &msg);
 
 	return 0;
 }
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index c77adc735f8a..156ba8f809b7 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -130,7 +130,7 @@ static void default_restore_msi_irq(struct pci_dev *dev, int irq)
 	}
 
 	if (entry)
-		__write_msi_msg(entry, &entry->msg);
+		__pci_write_msi_msg(entry, &entry->msg);
 }
 
 void __weak arch_restore_msi_irqs(struct pci_dev *dev)
@@ -300,7 +300,7 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
 }
 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
 
-void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
 	if (entry->dev->current_state != PCI_D0) {
 		/* Don't touch the hardware now */
@@ -337,13 +337,13 @@ void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 	entry->msg = *msg;
 }
 
-void write_msi_msg(unsigned int irq, struct msi_msg *msg)
+void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg)
 {
 	struct msi_desc *entry = irq_get_msi_desc(irq);
 
-	__write_msi_msg(entry, msg);
+	__pci_write_msi_msg(entry, msg);
 }
-EXPORT_SYMBOL_GPL(write_msi_msg);
+EXPORT_SYMBOL_GPL(pci_write_msi_msg);
 
 static void free_msi_irqs(struct pci_dev *dev)
 {
diff --git a/drivers/vfio/pci/vfio_pci_intrs.c b/drivers/vfio/pci/vfio_pci_intrs.c
index 553212f037c3..e8d695b3f54e 100644
--- a/drivers/vfio/pci/vfio_pci_intrs.c
+++ b/drivers/vfio/pci/vfio_pci_intrs.c
@@ -560,7 +560,7 @@ static int vfio_msi_set_vector_signal(struct vfio_pci_device *vdev,
 		struct msi_msg msg;
 
 		get_cached_msi_msg(irq, &msg);
-		write_msi_msg(irq, &msg);
+		pci_write_msi_msg(irq, &msg);
 	}
 
 	ret = request_irq(irq, vfio_msihandler, 0,
diff --git a/include/linux/msi.h b/include/linux/msi.h
index f36c37b46f10..301adecbc34d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -17,9 +17,7 @@ struct msi_desc;
 void mask_msi_irq(struct irq_data *data);
 void unmask_msi_irq(struct irq_data *data);
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
-void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
-void write_msi_msg(unsigned int irq, struct msi_msg *msg);
 
 u32 __msix_mask_irq(struct msi_desc *desc, u32 flag);
 u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
@@ -51,6 +49,18 @@ struct msi_desc {
 };
 
 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
+void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
+void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
+
+/* Conversion helpers. Should be removed after merging */
+static inline void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
+{
+	__pci_write_msi_msg(entry, msg);
+}
+static inline void write_msi_msg(int irq, struct msi_msg *msg)
+{
+	pci_write_msi_msg(irq, msg);
+}
 
 /*
  * The arch hooks to setup up msi irqs. Those functions are
-- 
cgit v1.2.3


From 23ed8d57f3b87520e045ba0e3a2340638b31198a Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sun, 23 Nov 2014 11:55:58 +0100
Subject: PCI/MSI: Rename mask/unmask_msi_irq et al

mask/unmask_msi_irq and __mask_msi/msix_irq are PCI/MSI specific
functions and should be named accordingly. This is a preparatory patch
to support MSI on non PCI devices.

Rename mask/unmask_msi_irq to pci_msi_mask/unmask_irq and document the
functions. Provide conversion helpers.

Rename __mask_msi/msix_irq to __pci_msi/msix_desc_mask so its clear
that they operated on msi_desc. Fixup the only user outside of
pci/msi.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Cc: Heiko Carstens <heiko.carstens@de.ibm.com>
---
 arch/s390/pci/pci.c |  4 ++--
 drivers/pci/msi.c   | 24 ++++++++++++++++--------
 include/linux/msi.h | 18 +++++++++++++-----
 3 files changed, 31 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c
index ad1a9e2a1831..fb54bbdbb48d 100644
--- a/arch/s390/pci/pci.c
+++ b/arch/s390/pci/pci.c
@@ -448,9 +448,9 @@ void arch_teardown_msi_irqs(struct pci_dev *pdev)
 	/* Release MSI interrupts */
 	list_for_each_entry(msi, &pdev->msi_list, list) {
 		if (msi->msi_attrib.is_msix)
-			__msix_mask_irq(msi, 1);
+			__pci_msix_desc_mask_irq(msi, 1);
 		else
-			__msi_mask_irq(msi, 1, 1);
+			__pci_msi_desc_mask_irq(msi, 1, 1);
 		irq_set_msi_desc(msi->irq, NULL);
 		irq_free_desc(msi->irq);
 		msi->msg.address_lo = 0;
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 156ba8f809b7..b5bf2f641770 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -173,7 +173,7 @@ static inline __attribute_const__ u32 msi_mask(unsigned x)
  * reliably as devices without an INTx disable bit will then generate a
  * level IRQ which will never be cleared.
  */
-u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
+u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 
@@ -189,7 +189,7 @@ u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 
 static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
 {
-	desc->masked = __msi_mask_irq(desc, mask, flag);
+	desc->masked = __pci_msi_desc_mask_irq(desc, mask, flag);
 }
 
 /*
@@ -199,7 +199,7 @@ static void msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag)
  * file.  This saves a few milliseconds when initialising devices with lots
  * of MSI-X interrupts.
  */
-u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
+u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag)
 {
 	u32 mask_bits = desc->masked;
 	unsigned offset = desc->msi_attrib.entry_nr * PCI_MSIX_ENTRY_SIZE +
@@ -218,7 +218,7 @@ u32 __msix_mask_irq(struct msi_desc *desc, u32 flag)
 
 static void msix_mask_irq(struct msi_desc *desc, u32 flag)
 {
-	desc->masked = __msix_mask_irq(desc, flag);
+	desc->masked = __pci_msix_desc_mask_irq(desc, flag);
 }
 
 static void msi_set_mask_bit(struct irq_data *data, u32 flag)
@@ -234,12 +234,20 @@ static void msi_set_mask_bit(struct irq_data *data, u32 flag)
 	}
 }
 
-void mask_msi_irq(struct irq_data *data)
+/**
+ * pci_msi_mask_irq - Generic irq chip callback to mask PCI/MSI interrupts
+ * @data:	pointer to irqdata associated to that interrupt
+ */
+void pci_msi_mask_irq(struct irq_data *data)
 {
 	msi_set_mask_bit(data, 1);
 }
 
-void unmask_msi_irq(struct irq_data *data)
+/**
+ * pci_msi_unmask_irq - Generic irq chip callback to unmask PCI/MSI interrupts
+ * @data:	pointer to irqdata associated to that interrupt
+ */
+void pci_msi_unmask_irq(struct irq_data *data)
 {
 	msi_set_mask_bit(data, 0);
 }
@@ -858,7 +866,7 @@ void pci_msi_shutdown(struct pci_dev *dev)
 	/* Return the device with MSI unmasked as initial states */
 	mask = msi_mask(desc->msi_attrib.multi_cap);
 	/* Keep cached state to be restored */
-	__msi_mask_irq(desc, mask, ~mask);
+	__pci_msi_desc_mask_irq(desc, mask, ~mask);
 
 	/* Restore dev->irq to its default pin-assertion irq */
 	dev->irq = desc->msi_attrib.default_irq;
@@ -956,7 +964,7 @@ void pci_msix_shutdown(struct pci_dev *dev)
 	/* Return the device with MSI-X masked as initial states */
 	list_for_each_entry(entry, &dev->msi_list, list) {
 		/* Keep cached states to be restored */
-		__msix_mask_irq(entry, 1);
+		__pci_msix_desc_mask_irq(entry, 1);
 	}
 
 	msix_clear_and_set_ctrl(dev, PCI_MSIX_FLAGS_ENABLE, 0);
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 301adecbc34d..1dcad0c1e806 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -14,14 +14,9 @@ extern int pci_msi_ignore_mask;
 /* Helper functions */
 struct irq_data;
 struct msi_desc;
-void mask_msi_irq(struct irq_data *data);
-void unmask_msi_irq(struct irq_data *data);
 void __get_cached_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg);
 
-u32 __msix_mask_irq(struct msi_desc *desc, u32 flag);
-u32 __msi_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
-
 struct msi_desc {
 	struct {
 		__u8	is_msix	: 1;
@@ -52,6 +47,11 @@ void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
 
+u32 __pci_msix_desc_mask_irq(struct msi_desc *desc, u32 flag);
+u32 __pci_msi_desc_mask_irq(struct msi_desc *desc, u32 mask, u32 flag);
+void pci_msi_mask_irq(struct irq_data *data);
+void pci_msi_unmask_irq(struct irq_data *data);
+
 /* Conversion helpers. Should be removed after merging */
 static inline void __write_msi_msg(struct msi_desc *entry, struct msi_msg *msg)
 {
@@ -61,6 +61,14 @@ static inline void write_msi_msg(int irq, struct msi_msg *msg)
 {
 	pci_write_msi_msg(irq, msg);
 }
+static inline void mask_msi_irq(struct irq_data *data)
+{
+	pci_msi_mask_irq(data);
+}
+static inline void unmask_msi_irq(struct irq_data *data)
+{
+	pci_msi_unmask_irq(data);
+}
 
 /*
  * The arch hooks to setup up msi irqs. Those functions are
-- 
cgit v1.2.3


From d31eb342409b24e3d2e1989c775f3361e93acc08 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sat, 15 Nov 2014 22:24:03 +0800
Subject: PCI/MSI: Introduce helpers to hide struct msi_desc implementation
 details

Introduce helpers to hide struct msi_desc implementation details,
so we could easily support non-PCI-compliant MSI devices later by
moving msi_list into struct device.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Alexander Gordeev <agordeev@redhat.com>
Link: http://lkml.kernel.org/r/1416061447-9472-6-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 1dcad0c1e806..e0d08517bd3d 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -43,6 +43,25 @@ struct msi_desc {
 	struct msi_msg msg;
 };
 
+/* Helpers to hide struct msi_desc implementation details */
+#define msi_desc_to_dev(desc)		(&(desc)->dev.dev)
+#define dev_to_msi_list(dev)		(&to_pci_dev((dev))->msi_list)
+#define first_msi_entry(dev)		\
+	list_first_entry(dev_to_msi_list((dev)), struct msi_desc, list)
+#define for_each_msi_entry(desc, dev)	\
+	list_for_each_entry((desc), dev_to_msi_list((dev)), list)
+
+#ifdef CONFIG_PCI_MSI
+#define first_pci_msi_entry(pdev)	first_msi_entry(&(pdev)->dev)
+#define for_each_pci_msi_entry(desc, pdev)	\
+	for_each_msi_entry((desc), &(pdev)->dev)
+
+static inline struct pci_dev *msi_desc_to_pci_dev(struct msi_desc *desc)
+{
+	return desc->dev;
+}
+#endif /* CONFIG_PCI_MSI */
+
 void __pci_read_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void __pci_write_msi_msg(struct msi_desc *entry, struct msi_msg *msg);
 void pci_write_msi_msg(unsigned int irq, struct msi_msg *msg);
-- 
cgit v1.2.3


From f8264e34965aaf43203912ed8f7b543c00c8d70f Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 6 Nov 2014 22:20:14 +0800
Subject: irqdomain: Introduce new interfaces to support hierarchy irqdomains

We plan to use hierarchy irqdomain to suppport CPU vector assignment,
interrupt remapping controller, IO-APIC controller, MSI interrupt
and hypertransport interrupt etc on x86 platforms. So extend irqdomain
interfaces to support hierarchy irqdomain.

There are already many clients of current irqdomain interfaces.
To minimize the changes, we choose to introduce new version 2 interfaces
to support hierarchy instead of extending existing irqdomain interfaces.

According to Thomas's suggestion, the most important design decision is
to build hierarchy struct irq_data to support hierarchy irqdomain, so
hierarchy irqdomain related data could be saved in struct irq_data.
With support of hierarchy irq_data, we could also support stacked
irq_chips. This is most useful in case of set_affinity().

The new hierarchy irqdomain introduces following interfaces:
1) irq_domain_alloc_irqs()/irq_domain_free_irqs(): allocate/release IRQ
   and related resources.
2) __irq_domain_alloc_irqs(): a special version to support legacy IRQs.
3) irq_domain_activate_irq()/irq_domain_deactivate_irq(): program
   interrupt controllers to activate/deactivate interrupt.

There are also several help functions to ease irqdomain implemenations:
1) irq_domain_get_irq_data(): get irq_data associated with a specific
   irqdomain.
2) irq_domain_set_hwirq_and_chip(): save irqdomain specific data into
   irq_data.
3) irq_domain_alloc_irqs_parent()/irq_domain_free_irqs_parent(): invoke
   parent irqdomain's alloc/free callbacks.

We also changed irq_startup()/irq_shutdown() to invoke
irq_domain_activate_irq()/irq_domain_deactivate_irq() to program
interrupt controller when start/stop interrupts.

[ tglx: Folded parts of the later patch series in ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 Documentation/IRQ-domain.txt |  71 ++++++++
 include/linux/irq.h          |   5 +
 include/linux/irqdomain.h    |  98 ++++++++++
 kernel/irq/Kconfig           |   5 +
 kernel/irq/chip.c            |   3 +
 kernel/irq/irqdomain.c       | 415 +++++++++++++++++++++++++++++++++++++++++--
 6 files changed, 581 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/IRQ-domain.txt b/Documentation/IRQ-domain.txt
index 8a8b82c9ca53..39cfa72732ff 100644
--- a/Documentation/IRQ-domain.txt
+++ b/Documentation/IRQ-domain.txt
@@ -151,3 +151,74 @@ used and no descriptor gets allocated it is very important to make sure
 that the driver using the simple domain call irq_create_mapping()
 before any irq_find_mapping() since the latter will actually work
 for the static IRQ assignment case.
+
+==== Hierarchy IRQ domain ====
+On some architectures, there may be multiple interrupt controllers
+involved in delivering an interrupt from the device to the target CPU.
+Let's look at a typical interrupt delivering path on x86 platforms:
+
+Device --> IOAPIC -> Interrupt remapping Controller -> Local APIC -> CPU
+
+There are three interrupt controllers involved:
+1) IOAPIC controller
+2) Interrupt remapping controller
+3) Local APIC controller
+
+To support such a hardware topology and make software architecture match
+hardware architecture, an irq_domain data structure is built for each
+interrupt controller and those irq_domains are organized into hierarchy.
+When building irq_domain hierarchy, the irq_domain near to the device is
+child and the irq_domain near to CPU is parent. So a hierarchy structure
+as below will be built for the example above.
+	CPU Vector irq_domain (root irq_domain to manage CPU vectors)
+		^
+		|
+	Interrupt Remapping irq_domain (manage irq_remapping entries)
+		^
+		|
+	IOAPIC irq_domain (manage IOAPIC delivery entries/pins)
+
+There are four major interfaces to use hierarchy irq_domain:
+1) irq_domain_alloc_irqs(): allocate IRQ descriptors and interrupt
+   controller related resources to deliver these interrupts.
+2) irq_domain_free_irqs(): free IRQ descriptors and interrupt controller
+   related resources associated with these interrupts.
+3) irq_domain_activate_irq(): activate interrupt controller hardware to
+   deliver the interrupt.
+3) irq_domain_deactivate_irq(): deactivate interrupt controller hardware
+   to stop delivering the interrupt.
+
+Following changes are needed to support hierarchy irq_domain.
+1) a new field 'parent' is added to struct irq_domain; it's used to
+   maintain irq_domain hierarchy information.
+2) a new field 'parent_data' is added to struct irq_data; it's used to
+   build hierarchy irq_data to match hierarchy irq_domains. The irq_data
+   is used to store irq_domain pointer and hardware irq number.
+3) new callbacks are added to struct irq_domain_ops to support hierarchy
+   irq_domain operations.
+
+With support of hierarchy irq_domain and hierarchy irq_data ready, an
+irq_domain structure is built for each interrupt controller, and an
+irq_data structure is allocated for each irq_domain associated with an
+IRQ. Now we could go one step further to support stacked(hierarchy)
+irq_chip. That is, an irq_chip is associated with each irq_data along
+the hierarchy. A child irq_chip may implement a required action by
+itself or by cooperating with its parent irq_chip.
+
+With stacked irq_chip, interrupt controller driver only needs to deal
+with the hardware managed by itself and may ask for services from its
+parent irq_chip when needed. So we could achieve a much cleaner
+software architecture.
+
+For an interrupt controller driver to support hierarchy irq_domain, it
+needs to:
+1) Implement irq_domain_ops.alloc and irq_domain_ops.free
+2) Optionally implement irq_domain_ops.activate and
+   irq_domain_ops.deactivate.
+3) Optionally implement an irq_chip to manage the interrupt controller
+   hardware.
+4) No need to implement irq_domain_ops.map and irq_domain_ops.unmap,
+   they are unused with hierarchy irq_domain.
+
+Hierarchy irq_domain may also be used to support other architectures,
+such as ARM, ARM64 etc.
diff --git a/include/linux/irq.h b/include/linux/irq.h
index 03f48d936f66..13ba412ce3a0 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -133,6 +133,8 @@ struct irq_domain;
  * @chip:		low level interrupt hardware access
  * @domain:		Interrupt translation domain; responsible for mapping
  *			between hwirq number and linux irq number.
+ * @parent_data:	pointer to parent struct irq_data to support hierarchy
+ *			irq_domain
  * @handler_data:	per-IRQ data for the irq_chip methods
  * @chip_data:		platform-specific per-chip private data for the chip
  *			methods, to allow shared chip implementations
@@ -151,6 +153,9 @@ struct irq_data {
 	unsigned int		state_use_accessors;
 	struct irq_chip		*chip;
 	struct irq_domain	*domain;
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	struct irq_data		*parent_data;
+#endif
 	void			*handler_data;
 	void			*chip_data;
 	struct msi_desc		*msi_desc;
diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index b0f9d16e48f6..f8563dcfd254 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -38,6 +38,8 @@
 struct device_node;
 struct irq_domain;
 struct of_device_id;
+struct irq_chip;
+struct irq_data;
 
 /* Number of irqs reserved for a legacy isa controller */
 #define NUM_ISA_INTERRUPTS	16
@@ -64,6 +66,16 @@ struct irq_domain_ops {
 	int (*xlate)(struct irq_domain *d, struct device_node *node,
 		     const u32 *intspec, unsigned int intsize,
 		     unsigned long *out_hwirq, unsigned int *out_type);
+
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	/* extended V2 interfaces to support hierarchy irq_domains */
+	int (*alloc)(struct irq_domain *d, unsigned int virq,
+		     unsigned int nr_irqs, void *arg);
+	void (*free)(struct irq_domain *d, unsigned int virq,
+		     unsigned int nr_irqs);
+	void (*activate)(struct irq_domain *d, struct irq_data *irq_data);
+	void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data);
+#endif
 };
 
 extern struct irq_domain_ops irq_generic_chip_ops;
@@ -77,6 +89,7 @@ struct irq_domain_chip_generic;
  * @ops: pointer to irq_domain methods
  * @host_data: private data pointer for use by owner.  Not touched by irq_domain
  *             core code.
+ * @flags: host per irq_domain flags
  *
  * Optional elements
  * @of_node: Pointer to device tree nodes associated with the irq_domain. Used
@@ -84,6 +97,7 @@ struct irq_domain_chip_generic;
  * @gc: Pointer to a list of generic chips. There is a helper function for
  *      setting up one or more generic chips for interrupt controllers
  *      drivers using the generic chip library which uses this pointer.
+ * @parent: Pointer to parent irq_domain to support hierarchy irq_domains
  *
  * Revmap data, used internally by irq_domain
  * @revmap_direct_max_irq: The largest hwirq that can be set for controllers that
@@ -97,10 +111,14 @@ struct irq_domain {
 	const char *name;
 	const struct irq_domain_ops *ops;
 	void *host_data;
+	unsigned int flags;
 
 	/* Optional data */
 	struct device_node *of_node;
 	struct irq_domain_chip_generic *gc;
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	struct irq_domain *parent;
+#endif
 
 	/* reverse map data. The linear map gets appended to the irq_domain */
 	irq_hw_number_t hwirq_max;
@@ -110,6 +128,19 @@ struct irq_domain {
 	unsigned int linear_revmap[];
 };
 
+/* Irq domain flags */
+enum {
+	/* Irq domain is hierarchical */
+	IRQ_DOMAIN_FLAG_HIERARCHY	= (1 << 0),
+
+	/*
+	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
+	 * for implementation specific purposes and ignored by the
+	 * core code.
+	 */
+	IRQ_DOMAIN_FLAG_NONCORE		= (1 << 16),
+};
+
 #ifdef CONFIG_IRQ_DOMAIN
 struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
 				    irq_hw_number_t hwirq_max, int direct_max,
@@ -220,8 +251,75 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
 			const u32 *intspec, unsigned int intsize,
 			irq_hw_number_t *out_hwirq, unsigned int *out_type);
 
+/* V2 interfaces to support hierarchy IRQ domains. */
+extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
+						unsigned int virq);
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+				   unsigned int nr_irqs, int node, void *arg,
+				   bool realloc);
+extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs);
+extern void irq_domain_activate_irq(struct irq_data *irq_data);
+extern void irq_domain_deactivate_irq(struct irq_data *irq_data);
+
+static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
+			unsigned int nr_irqs, int node, void *arg)
+{
+	return __irq_domain_alloc_irqs(domain, -1, nr_irqs, node, arg, false);
+}
+
+extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
+					 unsigned int virq,
+					 irq_hw_number_t hwirq,
+					 struct irq_chip *chip,
+					 void *chip_data);
+extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
+extern void irq_domain_free_irqs_common(struct irq_domain *domain,
+					unsigned int virq,
+					unsigned int nr_irqs);
+extern void irq_domain_free_irqs_top(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs);
+
+static inline int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
+					       unsigned int irq_base,
+					       unsigned int nr_irqs, void *arg)
+{
+	if (domain->parent && domain->parent->ops->alloc)
+		return domain->parent->ops->alloc(domain->parent, irq_base,
+						  nr_irqs, arg);
+	return -ENOSYS;
+}
+
+static inline void irq_domain_free_irqs_parent(struct irq_domain *domain,
+				unsigned int irq_base, unsigned int nr_irqs)
+{
+	if (domain->parent && domain->parent->ops->free)
+		domain->parent->ops->free(domain->parent, irq_base, nr_irqs);
+}
+
+static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_HIERARCHY;
+}
+#else	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
+static inline void irq_domain_activate_irq(struct irq_data *data) { }
+static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
+static inline int irq_domain_alloc_irqs(struct irq_domain *domain,
+			unsigned int nr_irqs, int node, void *arg)
+{
+	return -1;
+}
+
+static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
+{
+	return false;
+}
+#endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
+
 #else /* CONFIG_IRQ_DOMAIN */
 static inline void irq_dispose_mapping(unsigned int virq) { }
+static inline void irq_domain_activate_irq(struct irq_data *data) { }
+static inline void irq_domain_deactivate_irq(struct irq_data *data) { }
 #endif /* !CONFIG_IRQ_DOMAIN */
 
 #endif /* _LINUX_IRQDOMAIN_H */
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 225086b2652e..4f2eb2b1f23b 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -55,6 +55,11 @@ config GENERIC_IRQ_CHIP
 config IRQ_DOMAIN
 	bool
 
+# Support for hierarchical irq domains
+config IRQ_DOMAIN_HIERARCHY
+	bool
+	select IRQ_DOMAIN
+
 config HANDLE_DOMAIN_IRQ
 	bool
 
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index e5202f00cabc..72a93086216b 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -15,6 +15,7 @@
 #include <linux/module.h>
 #include <linux/interrupt.h>
 #include <linux/kernel_stat.h>
+#include <linux/irqdomain.h>
 
 #include <trace/events/irq.h>
 
@@ -178,6 +179,7 @@ int irq_startup(struct irq_desc *desc, bool resend)
 	irq_state_clr_disabled(desc);
 	desc->depth = 0;
 
+	irq_domain_activate_irq(&desc->irq_data);
 	if (desc->irq_data.chip->irq_startup) {
 		ret = desc->irq_data.chip->irq_startup(&desc->irq_data);
 		irq_state_clr_masked(desc);
@@ -199,6 +201,7 @@ void irq_shutdown(struct irq_desc *desc)
 		desc->irq_data.chip->irq_disable(&desc->irq_data);
 	else
 		desc->irq_data.chip->irq_mask(&desc->irq_data);
+	irq_domain_deactivate_irq(&desc->irq_data);
 	irq_state_set_masked(desc);
 }
 
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 6534ff6ce02e..43f3be6fac70 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -23,6 +23,10 @@ static DEFINE_MUTEX(irq_domain_mutex);
 static DEFINE_MUTEX(revmap_trees_mutex);
 static struct irq_domain *irq_default_domain;
 
+static int irq_domain_alloc_descs(int virq, unsigned int nr_irqs,
+				  irq_hw_number_t hwirq, int node);
+static void irq_domain_check_hierarchy(struct irq_domain *domain);
+
 /**
  * __irq_domain_add() - Allocate a new irq_domain data structure
  * @of_node: optional device-tree node of the interrupt controller
@@ -30,7 +34,7 @@ static struct irq_domain *irq_default_domain;
  * @hwirq_max: Maximum number of interrupts supported by controller
  * @direct_max: Maximum value of direct maps; Use ~0 for no limit; 0 for no
  *              direct mapping
- * @ops: map/unmap domain callbacks
+ * @ops: domain callbacks
  * @host_data: Controller private data pointer
  *
  * Allocates and initialize and irq_domain structure.
@@ -56,6 +60,7 @@ struct irq_domain *__irq_domain_add(struct device_node *of_node, int size,
 	domain->hwirq_max = hwirq_max;
 	domain->revmap_size = size;
 	domain->revmap_direct_max_irq = direct_max;
+	irq_domain_check_hierarchy(domain);
 
 	mutex_lock(&irq_domain_mutex);
 	list_add(&domain->link, &irq_domain_list);
@@ -109,7 +114,7 @@ EXPORT_SYMBOL_GPL(irq_domain_remove);
  * @first_irq: first number of irq block assigned to the domain,
  *	pass zero to assign irqs on-the-fly. If first_irq is non-zero, then
  *	pre-map all of the irqs in the domain to virqs starting at first_irq.
- * @ops: map/unmap domain callbacks
+ * @ops: domain callbacks
  * @host_data: Controller private data pointer
  *
  * Allocates an irq_domain, and optionally if first_irq is positive then also
@@ -174,10 +179,8 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node,
 
 	domain = __irq_domain_add(of_node, first_hwirq + size,
 				  first_hwirq + size, 0, ops, host_data);
-	if (!domain)
-		return NULL;
-
-	irq_domain_associate_many(domain, first_irq, first_hwirq, size);
+	if (domain)
+		irq_domain_associate_many(domain, first_irq, first_hwirq, size);
 
 	return domain;
 }
@@ -388,7 +391,6 @@ EXPORT_SYMBOL_GPL(irq_create_direct_mapping);
 unsigned int irq_create_mapping(struct irq_domain *domain,
 				irq_hw_number_t hwirq)
 {
-	unsigned int hint;
 	int virq;
 
 	pr_debug("irq_create_mapping(0x%p, 0x%lx)\n", domain, hwirq);
@@ -410,12 +412,8 @@ unsigned int irq_create_mapping(struct irq_domain *domain,
 	}
 
 	/* Allocate a virtual interrupt number */
-	hint = hwirq % nr_irqs;
-	if (hint == 0)
-		hint++;
-	virq = irq_alloc_desc_from(hint, of_node_to_nid(domain->of_node));
-	if (virq <= 0)
-		virq = irq_alloc_desc_from(1, of_node_to_nid(domain->of_node));
+	virq = irq_domain_alloc_descs(-1, 1, hwirq,
+				      of_node_to_nid(domain->of_node));
 	if (virq <= 0) {
 		pr_debug("-> virq allocation failed\n");
 		return 0;
@@ -471,7 +469,7 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
 	struct irq_domain *domain;
 	irq_hw_number_t hwirq;
 	unsigned int type = IRQ_TYPE_NONE;
-	unsigned int virq;
+	int virq;
 
 	domain = irq_data->np ? irq_find_host(irq_data->np) : irq_default_domain;
 	if (!domain) {
@@ -480,6 +478,11 @@ unsigned int irq_create_of_mapping(struct of_phandle_args *irq_data)
 		return 0;
 	}
 
+	if (irq_domain_is_hierarchy(domain)) {
+		virq = irq_domain_alloc_irqs(domain, 1, NUMA_NO_NODE, irq_data);
+		return virq <= 0 ? 0 : virq;
+	}
+
 	/* If domain has no translation, then we assume interrupt line */
 	if (domain->ops->xlate == NULL)
 		hwirq = irq_data->args[0];
@@ -540,8 +543,8 @@ unsigned int irq_find_mapping(struct irq_domain *domain,
 		return 0;
 
 	if (hwirq < domain->revmap_direct_max_irq) {
-		data = irq_get_irq_data(hwirq);
-		if (data && (data->domain == domain) && (data->hwirq == hwirq))
+		data = irq_domain_get_irq_data(domain, hwirq);
+		if (data && data->hwirq == hwirq)
 			return hwirq;
 	}
 
@@ -709,3 +712,383 @@ const struct irq_domain_ops irq_domain_simple_ops = {
 	.xlate = irq_domain_xlate_onetwocell,
 };
 EXPORT_SYMBOL_GPL(irq_domain_simple_ops);
+
+static int irq_domain_alloc_descs(int virq, unsigned int cnt,
+				  irq_hw_number_t hwirq, int node)
+{
+	unsigned int hint;
+
+	if (virq >= 0) {
+		virq = irq_alloc_descs(virq, virq, cnt, node);
+	} else {
+		hint = hwirq % nr_irqs;
+		if (hint == 0)
+			hint++;
+		virq = irq_alloc_descs_from(hint, cnt, node);
+		if (virq <= 0 && hint > 1)
+			virq = irq_alloc_descs_from(1, cnt, node);
+	}
+
+	return virq;
+}
+
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+static void irq_domain_insert_irq(int virq)
+{
+	struct irq_data *data;
+
+	for (data = irq_get_irq_data(virq); data; data = data->parent_data) {
+		struct irq_domain *domain = data->domain;
+		irq_hw_number_t hwirq = data->hwirq;
+
+		if (hwirq < domain->revmap_size) {
+			domain->linear_revmap[hwirq] = virq;
+		} else {
+			mutex_lock(&revmap_trees_mutex);
+			radix_tree_insert(&domain->revmap_tree, hwirq, data);
+			mutex_unlock(&revmap_trees_mutex);
+		}
+
+		/* If not already assigned, give the domain the chip's name */
+		if (!domain->name && data->chip)
+			domain->name = data->chip->name;
+	}
+
+	irq_clear_status_flags(virq, IRQ_NOREQUEST);
+}
+
+static void irq_domain_remove_irq(int virq)
+{
+	struct irq_data *data;
+
+	irq_set_status_flags(virq, IRQ_NOREQUEST);
+	irq_set_chip_and_handler(virq, NULL, NULL);
+	synchronize_irq(virq);
+	smp_mb();
+
+	for (data = irq_get_irq_data(virq); data; data = data->parent_data) {
+		struct irq_domain *domain = data->domain;
+		irq_hw_number_t hwirq = data->hwirq;
+
+		if (hwirq < domain->revmap_size) {
+			domain->linear_revmap[hwirq] = 0;
+		} else {
+			mutex_lock(&revmap_trees_mutex);
+			radix_tree_delete(&domain->revmap_tree, hwirq);
+			mutex_unlock(&revmap_trees_mutex);
+		}
+	}
+}
+
+static struct irq_data *irq_domain_insert_irq_data(struct irq_domain *domain,
+						   struct irq_data *child)
+{
+	struct irq_data *irq_data;
+
+	irq_data = kzalloc_node(sizeof(*irq_data), GFP_KERNEL, child->node);
+	if (irq_data) {
+		child->parent_data = irq_data;
+		irq_data->irq = child->irq;
+		irq_data->node = child->node;
+		irq_data->domain = domain;
+	}
+
+	return irq_data;
+}
+
+static void irq_domain_free_irq_data(unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *irq_data, *tmp;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_data = irq_get_irq_data(virq + i);
+		tmp = irq_data->parent_data;
+		irq_data->parent_data = NULL;
+		irq_data->domain = NULL;
+
+		while (tmp) {
+			irq_data = tmp;
+			tmp = tmp->parent_data;
+			kfree(irq_data);
+		}
+	}
+}
+
+static int irq_domain_alloc_irq_data(struct irq_domain *domain,
+				     unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *irq_data;
+	struct irq_domain *parent;
+	int i;
+
+	/* The outermost irq_data is embedded in struct irq_desc */
+	for (i = 0; i < nr_irqs; i++) {
+		irq_data = irq_get_irq_data(virq + i);
+		irq_data->domain = domain;
+
+		for (parent = domain->parent; parent; parent = parent->parent) {
+			irq_data = irq_domain_insert_irq_data(parent, irq_data);
+			if (!irq_data) {
+				irq_domain_free_irq_data(virq, i + 1);
+				return -ENOMEM;
+			}
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
+ * @domain:	domain to match
+ * @virq:	IRQ number to get irq_data
+ */
+struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
+					 unsigned int virq)
+{
+	struct irq_data *irq_data;
+
+	for (irq_data = irq_get_irq_data(virq); irq_data;
+	     irq_data = irq_data->parent_data)
+		if (irq_data->domain == domain)
+			return irq_data;
+
+	return NULL;
+}
+
+/**
+ * irq_domain_set_hwirq_and_chip - Set hwirq and irqchip of @virq at @domain
+ * @domain:	Interrupt domain to match
+ * @virq:	IRQ number
+ * @hwirq:	The hwirq number
+ * @chip:	The associated interrupt chip
+ * @chip_data:	The associated chip data
+ */
+int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
+				  irq_hw_number_t hwirq, struct irq_chip *chip,
+				  void *chip_data)
+{
+	struct irq_data *irq_data = irq_domain_get_irq_data(domain, virq);
+
+	if (!irq_data)
+		return -ENOENT;
+
+	irq_data->hwirq = hwirq;
+	irq_data->chip = chip ? chip : &no_irq_chip;
+	irq_data->chip_data = chip_data;
+
+	return 0;
+}
+
+/**
+ * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data
+ * @irq_data:	The pointer to irq_data
+ */
+void irq_domain_reset_irq_data(struct irq_data *irq_data)
+{
+	irq_data->hwirq = 0;
+	irq_data->chip = &no_irq_chip;
+	irq_data->chip_data = NULL;
+}
+
+/**
+ * irq_domain_free_irqs_common - Clear irq_data and free the parent
+ * @domain:	Interrupt domain to match
+ * @virq:	IRQ number to start with
+ * @nr_irqs:	The number of irqs to free
+ */
+void irq_domain_free_irqs_common(struct irq_domain *domain, unsigned int virq,
+				 unsigned int nr_irqs)
+{
+	struct irq_data *irq_data;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_data = irq_domain_get_irq_data(domain, virq + i);
+		if (irq_data)
+			irq_domain_reset_irq_data(irq_data);
+	}
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+/**
+ * irq_domain_free_irqs_top - Clear handler and handler data, clear irqdata and free parent
+ * @domain:	Interrupt domain to match
+ * @virq:	IRQ number to start with
+ * @nr_irqs:	The number of irqs to free
+ */
+void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq,
+			      unsigned int nr_irqs)
+{
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		irq_set_handler_data(virq + i, NULL);
+		irq_set_handler(virq + i, NULL);
+	}
+	irq_domain_free_irqs_common(domain, virq, nr_irqs);
+}
+
+/**
+ * __irq_domain_alloc_irqs - Allocate IRQs from domain
+ * @domain:	domain to allocate from
+ * @irq_base:	allocate specified IRQ nubmer if irq_base >= 0
+ * @nr_irqs:	number of IRQs to allocate
+ * @node:	NUMA node id for memory allocation
+ * @arg:	domain specific argument
+ * @realloc:	IRQ descriptors have already been allocated if true
+ *
+ * Allocate IRQ numbers and initialized all data structures to support
+ * hierarchy IRQ domains.
+ * Parameter @realloc is mainly to support legacy IRQs.
+ * Returns error code or allocated IRQ number
+ *
+ * The whole process to setup an IRQ has been split into two steps.
+ * The first step, __irq_domain_alloc_irqs(), is to allocate IRQ
+ * descriptor and required hardware resources. The second step,
+ * irq_domain_activate_irq(), is to program hardwares with preallocated
+ * resources. In this way, it's easier to rollback when failing to
+ * allocate resources.
+ */
+int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
+			    unsigned int nr_irqs, int node, void *arg,
+			    bool realloc)
+{
+	int i, ret, virq;
+
+	if (domain == NULL) {
+		domain = irq_default_domain;
+		if (WARN(!domain, "domain is NULL; cannot allocate IRQ\n"))
+			return -EINVAL;
+	}
+
+	if (!domain->ops->alloc) {
+		pr_debug("domain->ops->alloc() is NULL\n");
+		return -ENOSYS;
+	}
+
+	if (realloc && irq_base >= 0) {
+		virq = irq_base;
+	} else {
+		virq = irq_domain_alloc_descs(irq_base, nr_irqs, 0, node);
+		if (virq < 0) {
+			pr_debug("cannot allocate IRQ(base %d, count %d)\n",
+				 irq_base, nr_irqs);
+			return virq;
+		}
+	}
+
+	if (irq_domain_alloc_irq_data(domain, virq, nr_irqs)) {
+		pr_debug("cannot allocate memory for IRQ%d\n", virq);
+		ret = -ENOMEM;
+		goto out_free_desc;
+	}
+
+	mutex_lock(&irq_domain_mutex);
+	ret = domain->ops->alloc(domain, virq, nr_irqs, arg);
+	if (ret < 0) {
+		mutex_unlock(&irq_domain_mutex);
+		goto out_free_irq_data;
+	}
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_insert_irq(virq + i);
+	mutex_unlock(&irq_domain_mutex);
+
+	return virq;
+
+out_free_irq_data:
+	irq_domain_free_irq_data(virq, nr_irqs);
+out_free_desc:
+	irq_free_descs(virq, nr_irqs);
+	return ret;
+}
+
+/**
+ * irq_domain_free_irqs - Free IRQ number and associated data structures
+ * @virq:	base IRQ number
+ * @nr_irqs:	number of IRQs to free
+ */
+void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *data = irq_get_irq_data(virq);
+	int i;
+
+	if (WARN(!data || !data->domain || !data->domain->ops->free,
+		 "NULL pointer, cannot free irq\n"))
+		return;
+
+	mutex_lock(&irq_domain_mutex);
+	for (i = 0; i < nr_irqs; i++)
+		irq_domain_remove_irq(virq + i);
+	data->domain->ops->free(data->domain, virq, nr_irqs);
+	mutex_unlock(&irq_domain_mutex);
+
+	irq_domain_free_irq_data(virq, nr_irqs);
+	irq_free_descs(virq, nr_irqs);
+}
+
+/**
+ * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
+ *			     interrupt
+ * @irq_data:	outermost irq_data associated with interrupt
+ *
+ * This is the second step to call domain_ops->activate to program interrupt
+ * controllers, so the interrupt could actually get delivered.
+ */
+void irq_domain_activate_irq(struct irq_data *irq_data)
+{
+	if (irq_data && irq_data->domain) {
+		struct irq_domain *domain = irq_data->domain;
+
+		if (irq_data->parent_data)
+			irq_domain_activate_irq(irq_data->parent_data);
+		if (domain->ops->activate)
+			domain->ops->activate(domain, irq_data);
+	}
+}
+
+/**
+ * irq_domain_deactivate_irq - Call domain_ops->deactivate recursively to
+ *			       deactivate interrupt
+ * @irq_data: outermost irq_data associated with interrupt
+ *
+ * It calls domain_ops->deactivate to program interrupt controllers to disable
+ * interrupt delivery.
+ */
+void irq_domain_deactivate_irq(struct irq_data *irq_data)
+{
+	if (irq_data && irq_data->domain) {
+		struct irq_domain *domain = irq_data->domain;
+
+		if (domain->ops->deactivate)
+			domain->ops->deactivate(domain, irq_data);
+		if (irq_data->parent_data)
+			irq_domain_deactivate_irq(irq_data->parent_data);
+	}
+}
+
+static void irq_domain_check_hierarchy(struct irq_domain *domain)
+{
+	/* Hierarchy irq_domains must implement callback alloc() */
+	if (domain->ops->alloc)
+		domain->flags |= IRQ_DOMAIN_FLAG_HIERARCHY;
+}
+#else	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
+/**
+ * irq_domain_get_irq_data - Get irq_data associated with @virq and @domain
+ * @domain:	domain to match
+ * @virq:	IRQ number to get irq_data
+ */
+struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
+					 unsigned int virq)
+{
+	struct irq_data *irq_data = irq_get_irq_data(virq);
+
+	return (irq_data && irq_data->domain == domain) ? irq_data : NULL;
+}
+
+static void irq_domain_check_hierarchy(struct irq_domain *domain)
+{
+}
+#endif	/* CONFIG_IRQ_DOMAIN_HIERARCHY */
-- 
cgit v1.2.3


From 85f08c17de26f117be6ca7aa260d2ec02a2248ba Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 6 Nov 2014 22:20:16 +0800
Subject: genirq: Introduce helper functions to support stacked irq_chip

Now we already support hierarchy irq_data, so introduce several helpers
to support stacked irq_chips.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h |  5 +++++
 kernel/irq/chip.c   | 28 ++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 13ba412ce3a0..0adcbbbf2e87 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -443,6 +443,11 @@ extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_nested_irq(unsigned int irq);
 
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+extern void irq_chip_ack_parent(struct irq_data *data);
+extern int irq_chip_retrigger_hierarchy(struct irq_data *data);
+#endif
+
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			   irqreturn_t action_ret);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 72a93086216b..dd1d3c4c93a2 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -850,3 +850,31 @@ void irq_cpu_offline(void)
 		raw_spin_unlock_irqrestore(&desc->lock, flags);
 	}
 }
+
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+/**
+ * irq_chip_ack_parent - Acknowledge the parent interrupt
+ * @data:	Pointer to interrupt specific data
+ */
+void irq_chip_ack_parent(struct irq_data *data)
+{
+	data = data->parent_data;
+	data->chip->irq_ack(data);
+}
+
+/**
+ * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware
+ * @data:	Pointer to interrupt specific data
+ *
+ * Iterate through the domain hierarchy of the interrupt and check
+ * whether a hw retrigger function exists. If yes, invoke it.
+ */
+int irq_chip_retrigger_hierarchy(struct irq_data *data)
+{
+	for (data = data->parent_data; data; data = data->parent_data)
+		if (data->chip && data->chip->irq_retrigger)
+			return data->chip->irq_retrigger(data);
+
+	return -ENOSYS;
+}
+#endif
-- 
cgit v1.2.3


From 56e8abab615e0c5858cfb9fa0015a44641762b9d Mon Sep 17 00:00:00 2001
From: Yingjoe Chen <yingjoe.chen@mediatek.com>
Date: Thu, 13 Nov 2014 23:37:05 +0800
Subject: genirq: Add more helper functions to support stacked irq_chip

Add more helper function for stacked irq_chip to just call parent's
function.

Signed-off-by: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Pawel Moll <pawel.moll@arm.com>
Cc: Mark Rutland <mark.rutland@arm.com>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Gran Likely <grant.likely@linaro.org>
Cc: Boris BREZILLON <boris.brezillon@free-electrons.com>
Cc: <linux-arm-kernel@lists.infradead.org>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Cc: <srv_heupstream@mediatek.com>
Cc: <yingjoe.chen@gmail.com>
Cc: <hc.yen@mediatek.com>
Cc: <eddie.huang@mediatek.com>
Cc: <nathan.chung@mediatek.com>
Cc: <yh.chen@mediatek.com>
Cc: Sascha Hauer <kernel@pengutronix.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Link: http://lkml.kernel.org/r/1415893029-2971-3-git-send-email-yingjoe.chen@mediatek.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h |  6 ++++++
 kernel/irq/chip.c   | 48 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 54 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 0adcbbbf2e87..fad4bf6f15f6 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -446,6 +446,12 @@ extern void handle_nested_irq(unsigned int irq);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 extern void irq_chip_ack_parent(struct irq_data *data);
 extern int irq_chip_retrigger_hierarchy(struct irq_data *data);
+extern void irq_chip_mask_parent(struct irq_data *data);
+extern void irq_chip_unmask_parent(struct irq_data *data);
+extern void irq_chip_eoi_parent(struct irq_data *data);
+extern int irq_chip_set_affinity_parent(struct irq_data *data,
+					const struct cpumask *dest,
+					bool force);
 #endif
 
 /* Handling of unhandled and spurious interrupts: */
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index dd1d3c4c93a2..47f4c6469a43 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -862,6 +862,54 @@ void irq_chip_ack_parent(struct irq_data *data)
 	data->chip->irq_ack(data);
 }
 
+/**
+ * irq_chip_mask_parent - Mask the parent interrupt
+ * @data:	Pointer to interrupt specific data
+ */
+void irq_chip_mask_parent(struct irq_data *data)
+{
+	data = data->parent_data;
+	data->chip->irq_mask(data);
+}
+
+/**
+ * irq_chip_unmask_parent - Unmask the parent interrupt
+ * @data:	Pointer to interrupt specific data
+ */
+void irq_chip_unmask_parent(struct irq_data *data)
+{
+	data = data->parent_data;
+	data->chip->irq_unmask(data);
+}
+
+/**
+ * irq_chip_eoi_parent - Invoke EOI on the parent interrupt
+ * @data:	Pointer to interrupt specific data
+ */
+void irq_chip_eoi_parent(struct irq_data *data)
+{
+	data = data->parent_data;
+	data->chip->irq_eoi(data);
+}
+
+/**
+ * irq_chip_set_affinity_parent - Set affinity on the parent interrupt
+ * @data:	Pointer to interrupt specific data
+ * @dest:	The affinity mask to set
+ * @force:	Flag to enforce setting (disable online checks)
+ *
+ * Conditinal, as the underlying parent chip might not implement it.
+ */
+int irq_chip_set_affinity_parent(struct irq_data *data,
+				 const struct cpumask *dest, bool force)
+{
+	data = data->parent_data;
+	if (data->chip->irq_set_affinity)
+		return data->chip->irq_set_affinity(data, dest, force);
+
+	return -ENOSYS;
+}
+
 /**
  * irq_chip_retrigger_hierarchy - Retrigger an interrupt in hardware
  * @data:	Pointer to interrupt specific data
-- 
cgit v1.2.3


From 515085ef7ee74694bc9b02bc45196452defad59a Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 6 Nov 2014 22:20:17 +0800
Subject: genirq: Introduce irq_chip.irq_compose_msi_msg() to support stacked
 irqchip

Add callback irq_compose_msi_msg to struct irq_chip, which will be used
to support stacked irqchip.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h |  5 +++++
 kernel/irq/chip.c   | 26 ++++++++++++++++++++++++++
 2 files changed, 31 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index fad4bf6f15f6..d58e58935465 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -29,6 +29,7 @@ struct seq_file;
 struct module;
 struct irq_desc;
 struct irq_data;
+struct msi_msg;
 typedef	void (*irq_flow_handler_t)(unsigned int irq,
 					    struct irq_desc *desc);
 typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
@@ -320,6 +321,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  *				any other callback related to this irq
  * @irq_release_resources:	optional to release resources acquired with
  *				irq_request_resources
+ * @irq_compose_msi_msg:	optional to compose message content for MSI
  * @flags:		chip specific flags
  */
 struct irq_chip {
@@ -356,6 +358,8 @@ struct irq_chip {
 	int		(*irq_request_resources)(struct irq_data *data);
 	void		(*irq_release_resources)(struct irq_data *data);
 
+	void		(*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg);
+
 	unsigned long	flags;
 };
 
@@ -443,6 +447,7 @@ extern void handle_percpu_devid_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_bad_irq(unsigned int irq, struct irq_desc *desc);
 extern void handle_nested_irq(unsigned int irq);
 
+extern int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
 extern void irq_chip_ack_parent(struct irq_data *data);
 extern int irq_chip_retrigger_hierarchy(struct irq_data *data);
diff --git a/kernel/irq/chip.c b/kernel/irq/chip.c
index 47f4c6469a43..63c16d165e78 100644
--- a/kernel/irq/chip.c
+++ b/kernel/irq/chip.c
@@ -926,3 +926,29 @@ int irq_chip_retrigger_hierarchy(struct irq_data *data)
 	return -ENOSYS;
 }
 #endif
+
+/**
+ * irq_chip_compose_msi_msg - Componse msi message for a irq chip
+ * @data:	Pointer to interrupt specific data
+ * @msg:	Pointer to the MSI message
+ *
+ * For hierarchical domains we find the first chip in the hierarchy
+ * which implements the irq_compose_msi_msg callback. For non
+ * hierarchical we use the top level chip.
+ */
+int irq_chip_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct irq_data *pos = NULL;
+
+#ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+	for (; data; data = data->parent_data)
+#endif
+		if (data->chip && data->chip->irq_compose_msi_msg)
+			pos = data;
+	if (!pos)
+		return -ENOSYS;
+
+	pos->chip->irq_compose_msi_msg(pos, msg);
+
+	return 0;
+}
-- 
cgit v1.2.3


From 2cb625478f8cea0f72b565007a35e1eb7882ac3a Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Thu, 6 Nov 2014 22:20:18 +0800
Subject: genirq: Add IRQ_SET_MASK_OK_DONE to support stacked irqchip

Add IRQ_SET_MASK_OK_DONE in addition to IRQ_SET_MASK_OK and
IRQ_SET_MASK_OK_NOCOPY to support stacked irqchip. IRQ_SET_MASK_OK_DONE
is the same as IRQ_SET_MASK_OK to irq core. To stacked irqchip, it means
that ascendant irqchips have done all the work and no more handling
needed in descendant irqchips.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 4 ++++
 kernel/irq/manage.c | 2 ++
 2 files changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index d58e58935465..566b1e541323 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -114,10 +114,14 @@ enum {
  *
  * IRQ_SET_MASK_OK	- OK, core updates irq_data.affinity
  * IRQ_SET_MASK_NOCPY	- OK, chip did update irq_data.affinity
+ * IRQ_SET_MASK_OK_DONE	- Same as IRQ_SET_MASK_OK for core. Special code to
+ *			  support stacked irqchips, which indicates skipping
+ *			  all descendent irqchips.
  */
 enum {
 	IRQ_SET_MASK_OK = 0,
 	IRQ_SET_MASK_OK_NOCOPY,
+	IRQ_SET_MASK_OK_DONE,
 };
 
 struct msi_desc;
diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c
index 0a9104b4608b..80692373abd6 100644
--- a/kernel/irq/manage.c
+++ b/kernel/irq/manage.c
@@ -183,6 +183,7 @@ int irq_do_set_affinity(struct irq_data *data, const struct cpumask *mask,
 	ret = chip->irq_set_affinity(data, mask, force);
 	switch (ret) {
 	case IRQ_SET_MASK_OK:
+	case IRQ_SET_MASK_OK_DONE:
 		cpumask_copy(data->affinity, mask);
 	case IRQ_SET_MASK_OK_NOCOPY:
 		irq_set_thread_affinity(desc);
@@ -600,6 +601,7 @@ int __irq_set_trigger(struct irq_desc *desc, unsigned int irq,
 
 	switch (ret) {
 	case IRQ_SET_MASK_OK:
+	case IRQ_SET_MASK_OK_DONE:
 		irqd_clear(&desc->irq_data, IRQD_TRIGGER_MASK);
 		irqd_set(&desc->irq_data, flags);
 
-- 
cgit v1.2.3


From 75ffc0075007ca649131a2c42863ce6995d9bf80 Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Tue, 11 Nov 2014 21:58:34 +0100
Subject: genirq: Split out flow handler typedefs into seperate header file

Required to avoid circular include dependencies.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h        |  6 +-----
 include/linux/irqhandler.h | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 5 deletions(-)
 create mode 100644 include/linux/irqhandler.h

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 566b1e541323..677482bd8b92 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -15,6 +15,7 @@
 #include <linux/spinlock.h>
 #include <linux/cpumask.h>
 #include <linux/gfp.h>
+#include <linux/irqhandler.h>
 #include <linux/irqreturn.h>
 #include <linux/irqnr.h>
 #include <linux/errno.h>
@@ -27,12 +28,7 @@
 
 struct seq_file;
 struct module;
-struct irq_desc;
-struct irq_data;
 struct msi_msg;
-typedef	void (*irq_flow_handler_t)(unsigned int irq,
-					    struct irq_desc *desc);
-typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
 
 /*
  * IRQ line status.
diff --git a/include/linux/irqhandler.h b/include/linux/irqhandler.h
new file mode 100644
index 000000000000..62d543004197
--- /dev/null
+++ b/include/linux/irqhandler.h
@@ -0,0 +1,14 @@
+#ifndef _LINUX_IRQHANDLER_H
+#define _LINUX_IRQHANDLER_H
+
+/*
+ * Interrupt flow handler typedefs are defined here to avoid circular
+ * include dependencies.
+ */
+
+struct irq_desc;
+struct irq_data;
+typedef	void (*irq_flow_handler_t)(unsigned int irq, struct irq_desc *desc);
+typedef	void (*irq_preflow_handler_t)(struct irq_data *data);
+
+#endif
-- 
cgit v1.2.3


From 1b5377087cb4e68d719a875120894fddfbcbf0f9 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sun, 9 Nov 2014 23:10:24 +0800
Subject: genirq: Introduce helper irq_domain_set_info() to reduce duplicated
 code

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h |  5 +++++
 kernel/irq/irqdomain.c    | 21 +++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index f8563dcfd254..7aca1adb68a1 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -33,6 +33,7 @@
 #define _LINUX_IRQDOMAIN_H
 
 #include <linux/types.h>
+#include <linux/irqhandler.h>
 #include <linux/radix-tree.h>
 
 struct device_node;
@@ -273,6 +274,10 @@ extern int irq_domain_set_hwirq_and_chip(struct irq_domain *domain,
 					 irq_hw_number_t hwirq,
 					 struct irq_chip *chip,
 					 void *chip_data);
+extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
+				irq_hw_number_t hwirq, struct irq_chip *chip,
+				void *chip_data, irq_flow_handler_t handler,
+				void *handler_data, const char *handler_name);
 extern void irq_domain_reset_irq_data(struct irq_data *irq_data);
 extern void irq_domain_free_irqs_common(struct irq_domain *domain,
 					unsigned int virq,
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9a61de21933a..4e62832ace82 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -890,6 +890,27 @@ int irq_domain_set_hwirq_and_chip(struct irq_domain *domain, unsigned int virq,
 	return 0;
 }
 
+/**
+ * irq_domain_set_info - Set the complete data for a @virq in @domain
+ * @domain:		Interrupt domain to match
+ * @virq:		IRQ number
+ * @hwirq:		The hardware interrupt number
+ * @chip:		The associated interrupt chip
+ * @chip_data:		The associated interrupt chip data
+ * @handler:		The interrupt flow handler
+ * @handler_data:	The interrupt flow handler data
+ * @handler_name:	The interrupt handler name
+ */
+void irq_domain_set_info(struct irq_domain *domain, unsigned int virq,
+			 irq_hw_number_t hwirq, struct irq_chip *chip,
+			 void *chip_data, irq_flow_handler_t handler,
+			 void *handler_data, const char *handler_name)
+{
+	irq_domain_set_hwirq_and_chip(domain, virq, hwirq, chip, chip_data);
+	__irq_set_handler(virq, handler, 0, handler_name);
+	irq_set_handler_data(virq, handler_data);
+}
+
 /**
  * irq_domain_reset_irq_data - Clear hwirq, chip and chip_data in @irq_data
  * @irq_data:	The pointer to irq_data
-- 
cgit v1.2.3


From 36d727310cb9f85efb5ac089ffb1797e7c3538e1 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sat, 15 Nov 2014 22:24:01 +0800
Subject: irqdomain: Implement a method to automatically call parent domains
 alloc/free

Add a flags to irq_domain.flags to control whether the irqdomain core
should automatically call parent irqdomain's alloc/free callbacks. It
help to reduce hierarchy irqdomains users' code size.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Link: http://lkml.kernel.org/r/1416061447-9472-4-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h | 24 ++++++--------
 kernel/irq/irqdomain.c    | 82 +++++++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 89 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index 7aca1adb68a1..dd2709bdad56 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -134,6 +134,9 @@ enum {
 	/* Irq domain is hierarchical */
 	IRQ_DOMAIN_FLAG_HIERARCHY	= (1 << 0),
 
+	/* Core calls alloc/free recursive through the domain hierarchy. */
+	IRQ_DOMAIN_FLAG_AUTO_RECURSIVE	= (1 << 1),
+
 	/*
 	 * Flags starting from IRQ_DOMAIN_FLAG_NONCORE are reserved
 	 * for implementation specific purposes and ignored by the
@@ -285,22 +288,13 @@ extern void irq_domain_free_irqs_common(struct irq_domain *domain,
 extern void irq_domain_free_irqs_top(struct irq_domain *domain,
 				     unsigned int virq, unsigned int nr_irqs);
 
-static inline int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
-					       unsigned int irq_base,
-					       unsigned int nr_irqs, void *arg)
-{
-	if (domain->parent && domain->parent->ops->alloc)
-		return domain->parent->ops->alloc(domain->parent, irq_base,
-						  nr_irqs, arg);
-	return -ENOSYS;
-}
+extern int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
+					unsigned int irq_base,
+					unsigned int nr_irqs, void *arg);
 
-static inline void irq_domain_free_irqs_parent(struct irq_domain *domain,
-				unsigned int irq_base, unsigned int nr_irqs)
-{
-	if (domain->parent && domain->parent->ops->free)
-		domain->parent->ops->free(domain->parent, irq_base, nr_irqs);
-}
+extern void irq_domain_free_irqs_parent(struct irq_domain *domain,
+					unsigned int irq_base,
+					unsigned int nr_irqs);
 
 static inline bool irq_domain_is_hierarchy(struct irq_domain *domain)
 {
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 4e62832ace82..9c88db7056d4 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -960,6 +960,43 @@ void irq_domain_free_irqs_top(struct irq_domain *domain, unsigned int virq,
 	irq_domain_free_irqs_common(domain, virq, nr_irqs);
 }
 
+static bool irq_domain_is_auto_recursive(struct irq_domain *domain)
+{
+	return domain->flags & IRQ_DOMAIN_FLAG_AUTO_RECURSIVE;
+}
+
+static void irq_domain_free_irqs_recursive(struct irq_domain *domain,
+					   unsigned int irq_base,
+					   unsigned int nr_irqs)
+{
+	domain->ops->free(domain, irq_base, nr_irqs);
+	if (irq_domain_is_auto_recursive(domain)) {
+		BUG_ON(!domain->parent);
+		irq_domain_free_irqs_recursive(domain->parent, irq_base,
+					       nr_irqs);
+	}
+}
+
+static int irq_domain_alloc_irqs_recursive(struct irq_domain *domain,
+					   unsigned int irq_base,
+					   unsigned int nr_irqs, void *arg)
+{
+	int ret = 0;
+	struct irq_domain *parent = domain->parent;
+	bool recursive = irq_domain_is_auto_recursive(domain);
+
+	BUG_ON(recursive && !parent);
+	if (recursive)
+		ret = irq_domain_alloc_irqs_recursive(parent, irq_base,
+						      nr_irqs, arg);
+	if (ret >= 0)
+		ret = domain->ops->alloc(domain, irq_base, nr_irqs, arg);
+	if (ret < 0 && recursive)
+		irq_domain_free_irqs_recursive(parent, irq_base, nr_irqs);
+
+	return ret;
+}
+
 /**
  * __irq_domain_alloc_irqs - Allocate IRQs from domain
  * @domain:	domain to allocate from
@@ -1016,7 +1053,7 @@ int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 	}
 
 	mutex_lock(&irq_domain_mutex);
-	ret = domain->ops->alloc(domain, virq, nr_irqs, arg);
+	ret = irq_domain_alloc_irqs_recursive(domain, virq, nr_irqs, arg);
 	if (ret < 0) {
 		mutex_unlock(&irq_domain_mutex);
 		goto out_free_irq_data;
@@ -1051,13 +1088,54 @@ void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs)
 	mutex_lock(&irq_domain_mutex);
 	for (i = 0; i < nr_irqs; i++)
 		irq_domain_remove_irq(virq + i);
-	data->domain->ops->free(data->domain, virq, nr_irqs);
+	irq_domain_free_irqs_recursive(data->domain, virq, nr_irqs);
 	mutex_unlock(&irq_domain_mutex);
 
 	irq_domain_free_irq_data(virq, nr_irqs);
 	irq_free_descs(virq, nr_irqs);
 }
 
+/**
+ * irq_domain_alloc_irqs_parent - Allocate interrupts from parent domain
+ * @irq_base:	Base IRQ number
+ * @nr_irqs:	Number of IRQs to allocate
+ * @arg:	Allocation data (arch/domain specific)
+ *
+ * Check whether the domain has been setup recursive. If not allocate
+ * through the parent domain.
+ */
+int irq_domain_alloc_irqs_parent(struct irq_domain *domain,
+				 unsigned int irq_base, unsigned int nr_irqs,
+				 void *arg)
+{
+	/* irq_domain_alloc_irqs_recursive() has called parent's alloc() */
+	if (irq_domain_is_auto_recursive(domain))
+		return 0;
+
+	domain = domain->parent;
+	if (domain)
+		return irq_domain_alloc_irqs_recursive(domain, irq_base,
+						       nr_irqs, arg);
+	return -ENOSYS;
+}
+
+/**
+ * irq_domain_free_irqs_parent - Free interrupts from parent domain
+ * @irq_base:	Base IRQ number
+ * @nr_irqs:	Number of IRQs to free
+ *
+ * Check whether the domain has been setup recursive. If not free
+ * through the parent domain.
+ */
+void irq_domain_free_irqs_parent(struct irq_domain *domain,
+				 unsigned int irq_base, unsigned int nr_irqs)
+{
+	/* irq_domain_free_irqs_recursive() will call parent's free */
+	if (!irq_domain_is_auto_recursive(domain) && domain->parent)
+		irq_domain_free_irqs_recursive(domain->parent, irq_base,
+					       nr_irqs);
+}
+
 /**
  * irq_domain_activate_irq - Call domain_ops->activate recursively to activate
  *			     interrupt
-- 
cgit v1.2.3


From afb7da83b9f476728623130703acb553d7c7c4d9 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sat, 15 Nov 2014 22:24:02 +0800
Subject: irqdomain: Introduce helper function irq_domain_add_hierarchy()

Introduce helper function irq_domain_add_hierarchy(), which creates
a linear irqdomain if parameter 'size' is not zero, otherwise creates
a tree irqdomain.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Link: http://lkml.kernel.org/r/1416061447-9472-5-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irqdomain.h |  4 ++++
 kernel/irq/irqdomain.c    | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 40 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h
index dd2709bdad56..676d7306a360 100644
--- a/include/linux/irqdomain.h
+++ b/include/linux/irqdomain.h
@@ -259,6 +259,10 @@ int irq_domain_xlate_onetwocell(struct irq_domain *d, struct device_node *ctrlr,
 extern struct irq_data *irq_domain_get_irq_data(struct irq_domain *domain,
 						unsigned int virq);
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+extern struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
+			unsigned int flags, unsigned int size,
+			struct device_node *node,
+			const struct irq_domain_ops *ops, void *host_data);
 extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base,
 				   unsigned int nr_irqs, int node, void *arg,
 				   bool realloc);
diff --git a/kernel/irq/irqdomain.c b/kernel/irq/irqdomain.c
index 9c88db7056d4..7fac311057b8 100644
--- a/kernel/irq/irqdomain.c
+++ b/kernel/irq/irqdomain.c
@@ -742,6 +742,42 @@ static int irq_domain_alloc_descs(int virq, unsigned int cnt,
 }
 
 #ifdef	CONFIG_IRQ_DOMAIN_HIERARCHY
+/**
+ * irq_domain_add_hierarchy - Add a irqdomain into the hierarchy
+ * @parent:	Parent irq domain to associate with the new domain
+ * @flags:	Irq domain flags associated to the domain
+ * @size:	Size of the domain. See below
+ * @node:	Optional device-tree node of the interrupt controller
+ * @ops:	Pointer to the interrupt domain callbacks
+ * @host_data:	Controller private data pointer
+ *
+ * If @size is 0 a tree domain is created, otherwise a linear domain.
+ *
+ * If successful the parent is associated to the new domain and the
+ * domain flags are set.
+ * Returns pointer to IRQ domain, or NULL on failure.
+ */
+struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent,
+					    unsigned int flags,
+					    unsigned int size,
+					    struct device_node *node,
+					    const struct irq_domain_ops *ops,
+					    void *host_data)
+{
+	struct irq_domain *domain;
+
+	if (size)
+		domain = irq_domain_add_linear(node, size, ops, host_data);
+	else
+		domain = irq_domain_add_tree(node, ops, host_data);
+	if (domain) {
+		domain->parent = parent;
+		domain->flags |= flags;
+	}
+
+	return domain;
+}
+
 static void irq_domain_insert_irq(int virq)
 {
 	struct irq_data *data;
-- 
cgit v1.2.3


From 9dde55b72dc80bfae4280ddce5dbd69ba8240813 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sun, 9 Nov 2014 23:10:28 +0800
Subject: genirq: Introduce callback irq_chip.irq_write_msi_msg

Introduce callback irq_chip.irq_write_msi_msg, so we can share common
code among MSI alike interrupt controllers, such as HPET and DMAR.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/irq.h | 8 ++++++++
 1 file changed, 8 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 677482bd8b92..8badf34baf0f 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -322,6 +322,7 @@ static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d)
  * @irq_release_resources:	optional to release resources acquired with
  *				irq_request_resources
  * @irq_compose_msi_msg:	optional to compose message content for MSI
+ * @irq_write_msi_msg:	optional to write message content for MSI
  * @flags:		chip specific flags
  */
 struct irq_chip {
@@ -359,6 +360,7 @@ struct irq_chip {
 	void		(*irq_release_resources)(struct irq_data *data);
 
 	void		(*irq_compose_msi_msg)(struct irq_data *data, struct msi_msg *msg);
+	void		(*irq_write_msi_msg)(struct irq_data *data, struct msi_msg *msg);
 
 	unsigned long	flags;
 };
@@ -459,6 +461,12 @@ extern int irq_chip_set_affinity_parent(struct irq_data *data,
 					bool force);
 #endif
 
+static inline void irq_chip_write_msi_msg(struct irq_data *data,
+					  struct msi_msg *msg)
+{
+	data->chip->irq_write_msi_msg(data, msg);
+}
+
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			   irqreturn_t action_ret);
-- 
cgit v1.2.3


From f3cf8bb0d6c3c11ddedf01f02f856f2ae8c33aa4 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Wed, 12 Nov 2014 11:39:03 +0100
Subject: genirq: Add generic msi irq domain support

Implement the basic functions for MSI interrupt support with
hierarchical interrupt domains.

[ tglx: Extracted and combined from several patches ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h |  45 +++++++++++++++++
 kernel/irq/Kconfig  |  10 ++++
 kernel/irq/Makefile |   1 +
 kernel/irq/msi.c    | 141 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 197 insertions(+)
 create mode 100644 kernel/irq/msi.c

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index e0d08517bd3d..b5ca2456769c 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -114,4 +114,49 @@ struct msi_controller {
 	void (*teardown_irq)(struct msi_controller *chip, unsigned int irq);
 };
 
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+struct irq_domain;
+struct irq_chip;
+struct device_node;
+struct msi_domain_info;
+
+/**
+ * struct msi_domain_ops - MSI interrupt domain callbacks
+ * @get_hwirq:		Retrieve the resulting hw irq number
+ * @msi_init:		Domain specific init function for MSI interrupts
+ * @msi_free:		Domain specific function to free a MSI interrupts
+ */
+struct msi_domain_ops {
+	irq_hw_number_t	(*get_hwirq)(struct msi_domain_info *info, void *arg);
+	int		(*msi_init)(struct irq_domain *domain,
+				    struct msi_domain_info *info,
+				    unsigned int virq, irq_hw_number_t hwirq,
+				    void *arg);
+	void		(*msi_free)(struct irq_domain *domain,
+				    struct msi_domain_info *info,
+				    unsigned int virq);
+};
+
+/**
+ * struct msi_domain_info - MSI interrupt domain data
+ * @ops:	The callback data structure
+ * @chip:	The associated interrupt chip
+ * @data:	Domain specific data
+ */
+struct msi_domain_info {
+	struct msi_domain_ops	*ops;
+	struct irq_chip		*chip;
+	void			*data;
+};
+
+int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
+			    bool force);
+
+struct irq_domain *msi_create_irq_domain(struct device_node *of_node,
+					 struct msi_domain_info *info,
+					 struct irq_domain *parent);
+struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
+
+#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
+
 #endif /* LINUX_MSI_H */
diff --git a/kernel/irq/Kconfig b/kernel/irq/Kconfig
index 4f2eb2b1f23b..9a76e3beda54 100644
--- a/kernel/irq/Kconfig
+++ b/kernel/irq/Kconfig
@@ -60,6 +60,16 @@ config IRQ_DOMAIN_HIERARCHY
 	bool
 	select IRQ_DOMAIN
 
+# Generic MSI interrupt support
+config GENERIC_MSI_IRQ
+	bool
+
+# Generic MSI hierarchical interrupt domain support
+config GENERIC_MSI_IRQ_DOMAIN
+	bool
+	select IRQ_DOMAIN_HIERARCHY
+	select GENERIC_MSI_IRQ
+
 config HANDLE_DOMAIN_IRQ
 	bool
 
diff --git a/kernel/irq/Makefile b/kernel/irq/Makefile
index fff17381f0af..d12123526e2b 100644
--- a/kernel/irq/Makefile
+++ b/kernel/irq/Makefile
@@ -6,3 +6,4 @@ obj-$(CONFIG_IRQ_DOMAIN) += irqdomain.o
 obj-$(CONFIG_PROC_FS) += proc.o
 obj-$(CONFIG_GENERIC_PENDING_IRQ) += migration.o
 obj-$(CONFIG_PM_SLEEP) += pm.o
+obj-$(CONFIG_GENERIC_MSI_IRQ) += msi.o
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
new file mode 100644
index 000000000000..5e0cef4741d9
--- /dev/null
+++ b/kernel/irq/msi.c
@@ -0,0 +1,141 @@
+/*
+ * linux/kernel/irq/msi.c
+ *
+ * Copyright (C) 2014 Intel Corp.
+ * Author: Jiang Liu <jiang.liu@linux.intel.com>
+ *
+ * This file is licensed under GPLv2.
+ *
+ * This file contains common code to support Message Signalled Interrupt for
+ * PCI compatible and non PCI compatible devices.
+ */
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/msi.h>
+
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+/**
+ * msi_domain_set_affinity - Generic affinity setter function for MSI domains
+ * @irq_data:	The irq data associated to the interrupt
+ * @mask:	The affinity mask to set
+ * @force:	Flag to enforce setting (disable online checks)
+ *
+ * Intended to be used by MSI interrupt controllers which are
+ * implemented with hierarchical domains.
+ */
+int msi_domain_set_affinity(struct irq_data *irq_data,
+			    const struct cpumask *mask, bool force)
+{
+	struct irq_data *parent = irq_data->parent_data;
+	struct msi_msg msg;
+	int ret;
+
+	ret = parent->chip->irq_set_affinity(parent, mask, force);
+	if (ret >= 0 && ret != IRQ_SET_MASK_OK_DONE) {
+		BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg));
+		irq_chip_write_msi_msg(irq_data, &msg);
+	}
+
+	return ret;
+}
+
+static void msi_domain_activate(struct irq_domain *domain,
+				struct irq_data *irq_data)
+{
+	struct msi_msg msg;
+
+	BUG_ON(irq_chip_compose_msi_msg(irq_data, &msg));
+	irq_chip_write_msi_msg(irq_data, &msg);
+}
+
+static void msi_domain_deactivate(struct irq_domain *domain,
+				  struct irq_data *irq_data)
+{
+	struct msi_msg msg;
+
+	memset(&msg, 0, sizeof(msg));
+	irq_chip_write_msi_msg(irq_data, &msg);
+}
+
+static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
+			    unsigned int nr_irqs, void *arg)
+{
+	struct msi_domain_info *info = domain->host_data;
+	struct msi_domain_ops *ops = info->ops;
+	irq_hw_number_t hwirq = ops->get_hwirq(info, arg);
+	int i, ret;
+
+	if (irq_find_mapping(domain, hwirq) > 0)
+		return -EEXIST;
+
+	ret = irq_domain_alloc_irqs_parent(domain, virq, nr_irqs, arg);
+	if (ret < 0)
+		return ret;
+
+	for (i = 0; i < nr_irqs; i++) {
+		ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
+		if (ret < 0) {
+			if (ops->msi_free) {
+				for (i--; i > 0; i--)
+					ops->msi_free(domain, info, virq + i);
+			}
+			irq_domain_free_irqs_top(domain, virq, nr_irqs);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+static void msi_domain_free(struct irq_domain *domain, unsigned int virq,
+			    unsigned int nr_irqs)
+{
+	struct msi_domain_info *info = domain->host_data;
+	int i;
+
+	if (info->ops->msi_free) {
+		for (i = 0; i < nr_irqs; i++)
+			info->ops->msi_free(domain, info, virq + i);
+	}
+	irq_domain_free_irqs_top(domain, virq, nr_irqs);
+}
+
+static struct irq_domain_ops msi_domain_ops = {
+	.alloc		= msi_domain_alloc,
+	.free		= msi_domain_free,
+	.activate	= msi_domain_activate,
+	.deactivate	= msi_domain_deactivate,
+};
+
+/**
+ * msi_create_irq_domain - Create a MSI interrupt domain
+ * @of_node:	Optional device-tree node of the interrupt controller
+ * @info:	MSI domain info
+ * @parent:	Parent irq domain
+ */
+struct irq_domain *msi_create_irq_domain(struct device_node *of_node,
+					 struct msi_domain_info *info,
+					 struct irq_domain *parent)
+{
+	struct irq_domain *domain;
+
+	domain = irq_domain_add_tree(of_node, &msi_domain_ops, info);
+	if (domain)
+		domain->parent = parent;
+
+	return domain;
+}
+
+/**
+ * msi_get_domain_info - Get the MSI interrupt domain info for @domain
+ * @domain:	The interrupt domain to retrieve data from
+ *
+ * Returns the pointer to the msi_domain_info stored in
+ * @domain->host_data.
+ */
+struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain)
+{
+	return (struct msi_domain_info *)domain->host_data;
+}
+
+#endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
-- 
cgit v1.2.3


From d9109698be6e7439e6082aa00d79d4556114739b Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sat, 15 Nov 2014 22:24:04 +0800
Subject: genirq: Introduce msi_domain_alloc/free_irqs()

Introduce msi_domain_{alloc|free}_irqs() to alloc/free interrupts
from generic MSI irqdomain.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Alexander Gordeev <agordeev@redhat.com>
Link: http://lkml.kernel.org/r/1416061447-9472-7-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h | 29 +++++++++++++++++++++
 kernel/irq/msi.c    | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 104 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index b5ca2456769c..7a93a988dce8 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -115,6 +115,9 @@ struct msi_controller {
 };
 
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+
+#include <asm/msi.h>
+
 struct irq_domain;
 struct irq_chip;
 struct device_node;
@@ -125,6 +128,18 @@ struct msi_domain_info;
  * @get_hwirq:		Retrieve the resulting hw irq number
  * @msi_init:		Domain specific init function for MSI interrupts
  * @msi_free:		Domain specific function to free a MSI interrupts
+ * @msi_check:		Callback for verification of the domain/info/dev data
+ * @msi_prepare:	Prepare the allocation of the interrupts in the domain
+ * @msi_finish:		Optional callbacl to finalize the allocation
+ * @set_desc:		Set the msi descriptor for an interrupt
+ * @handle_error:	Optional error handler if the allocation fails
+ *
+ * @get_hwirq, @msi_init and @msi_free are callbacks used by
+ * msi_create_irq_domain() and related interfaces
+ *
+ * @msi_check, @msi_prepare, @msi_finish, @set_desc and @handle_error
+ * are callbacks used by msi_irq_domain_alloc_irqs() and related
+ * interfaces which are based on msi_desc.
  */
 struct msi_domain_ops {
 	irq_hw_number_t	(*get_hwirq)(struct msi_domain_info *info, void *arg);
@@ -135,6 +150,17 @@ struct msi_domain_ops {
 	void		(*msi_free)(struct irq_domain *domain,
 				    struct msi_domain_info *info,
 				    unsigned int virq);
+	int		(*msi_check)(struct irq_domain *domain,
+				     struct msi_domain_info *info,
+				     struct device *dev);
+	int		(*msi_prepare)(struct irq_domain *domain,
+				       struct device *dev, int nvec,
+				       msi_alloc_info_t *arg);
+	void		(*msi_finish)(msi_alloc_info_t *arg, int retval);
+	void		(*set_desc)(msi_alloc_info_t *arg,
+				    struct msi_desc *desc);
+	int		(*handle_error)(struct irq_domain *domain,
+					struct msi_desc *desc, int error);
 };
 
 /**
@@ -155,6 +181,9 @@ int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
 struct irq_domain *msi_create_irq_domain(struct device_node *of_node,
 					 struct msi_domain_info *info,
 					 struct irq_domain *parent);
+int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
+			  int nvec);
+void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev);
 struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
 
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 5e0cef4741d9..23111aaa06b2 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -13,6 +13,9 @@
 #include <linux/irqdomain.h>
 #include <linux/msi.h>
 
+/* Temparory solution for building, will be removed later */
+#include <linux/pci.h>
+
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
 /**
  * msi_domain_set_affinity - Generic affinity setter function for MSI domains
@@ -126,6 +129,78 @@ struct irq_domain *msi_create_irq_domain(struct device_node *of_node,
 	return domain;
 }
 
+/**
+ * msi_domain_alloc_irqs - Allocate interrupts from a MSI interrupt domain
+ * @domain:	The domain to allocate from
+ * @dev:	Pointer to device struct of the device for which the interrupts
+ *		are allocated
+ * @nvec:	The number of interrupts to allocate
+ *
+ * Returns 0 on success or an error code.
+ */
+int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
+			  int nvec)
+{
+	struct msi_domain_info *info = domain->host_data;
+	struct msi_domain_ops *ops = info->ops;
+	msi_alloc_info_t arg;
+	struct msi_desc *desc;
+	int i, ret, virq = -1;
+
+	ret = ops->msi_check(domain, info, dev);
+	if (ret == 0)
+		ret = ops->msi_prepare(domain, dev, nvec, &arg);
+	if (ret)
+		return ret;
+
+	for_each_msi_entry(desc, dev) {
+		ops->set_desc(&arg, desc);
+
+		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
+					       dev_to_node(dev), &arg, false);
+		if (virq < 0) {
+			ret = -ENOSPC;
+			if (ops->handle_error)
+				ret = ops->handle_error(domain, desc, ret);
+			if (ops->msi_finish)
+				ops->msi_finish(&arg, ret);
+			return ret;
+		}
+
+		for (i = 0; i < desc->nvec_used; i++)
+			irq_set_msi_desc_off(virq, i, desc);
+	}
+
+	if (ops->msi_finish)
+		ops->msi_finish(&arg, 0);
+
+	for_each_msi_entry(desc, dev) {
+		if (desc->nvec_used == 1)
+			dev_dbg(dev, "irq %d for MSI\n", virq);
+		else
+			dev_dbg(dev, "irq [%d-%d] for MSI\n",
+				virq, virq + desc->nvec_used - 1);
+	}
+
+	return 0;
+}
+
+/**
+ * msi_domain_free_irqs - Free interrupts from a MSI interrupt @domain associated tp @dev
+ * @domain:	The domain to managing the interrupts
+ * @dev:	Pointer to device struct of the device for which the interrupts
+ *		are free
+ */
+void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev)
+{
+	struct msi_desc *desc;
+
+	for_each_msi_entry(desc, dev) {
+		irq_domain_free_irqs(desc->irq, desc->nvec_used);
+		desc->irq = 0;
+	}
+}
+
 /**
  * msi_get_domain_info - Get the MSI interrupt domain info for @domain
  * @domain:	The interrupt domain to retrieve data from
-- 
cgit v1.2.3


From aeeb59657c35da64068336c20068da237f41ab76 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sat, 15 Nov 2014 22:24:05 +0800
Subject: genirq: Provide default callbacks for msi_domain_ops

Extend struct msi_domain_info and provide default callbacks for
msi_domain_ops.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Alexander Gordeev <agordeev@redhat.com>
Link: http://lkml.kernel.org/r/1416061447-9472-8-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 include/linux/msi.h |  42 +++++++++++++++++---
 kernel/irq/msi.c    | 111 ++++++++++++++++++++++++++++++++++++++++++++++++----
 2 files changed, 140 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/msi.h b/include/linux/msi.h
index 7a93a988dce8..0098e2c8fd42 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -116,6 +116,7 @@ struct msi_controller {
 
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
 
+#include <linux/irqhandler.h>
 #include <asm/msi.h>
 
 struct irq_domain;
@@ -142,11 +143,12 @@ struct msi_domain_info;
  * interfaces which are based on msi_desc.
  */
 struct msi_domain_ops {
-	irq_hw_number_t	(*get_hwirq)(struct msi_domain_info *info, void *arg);
+	irq_hw_number_t	(*get_hwirq)(struct msi_domain_info *info,
+				     msi_alloc_info_t *arg);
 	int		(*msi_init)(struct irq_domain *domain,
 				    struct msi_domain_info *info,
 				    unsigned int virq, irq_hw_number_t hwirq,
-				    void *arg);
+				    msi_alloc_info_t *arg);
 	void		(*msi_free)(struct irq_domain *domain,
 				    struct msi_domain_info *info,
 				    unsigned int virq);
@@ -165,16 +167,46 @@ struct msi_domain_ops {
 
 /**
  * struct msi_domain_info - MSI interrupt domain data
- * @ops:	The callback data structure
- * @chip:	The associated interrupt chip
- * @data:	Domain specific data
+ * @flags:		Flags to decribe features and capabilities
+ * @ops:		The callback data structure
+ * @chip:		Optional: associated interrupt chip
+ * @chip_data:		Optional: associated interrupt chip data
+ * @handler:		Optional: associated interrupt flow handler
+ * @handler_data:	Optional: associated interrupt flow handler data
+ * @handler_name:	Optional: associated interrupt flow handler name
+ * @data:		Optional: domain specific data
  */
 struct msi_domain_info {
+	u32			flags;
 	struct msi_domain_ops	*ops;
 	struct irq_chip		*chip;
+	void			*chip_data;
+	irq_flow_handler_t	handler;
+	void			*handler_data;
+	const char		*handler_name;
 	void			*data;
 };
 
+/* Flags for msi_domain_info */
+enum {
+	/*
+	 * Init non implemented ops callbacks with default MSI domain
+	 * callbacks.
+	 */
+	MSI_FLAG_USE_DEF_DOM_OPS	= (1 << 0),
+	/*
+	 * Init non implemented chip callbacks with default MSI chip
+	 * callbacks.
+	 */
+	MSI_FLAG_USE_DEF_CHIP_OPS	= (1 << 1),
+	/* Build identity map between hwirq and irq */
+	MSI_FLAG_IDENTITY_MAP		= (1 << 2),
+	/* Support multiple PCI MSI interrupts */
+	MSI_FLAG_MULTI_PCI_MSI		= (1 << 3),
+	/* Support PCI MSIX interrupts */
+	MSI_FLAG_PCI_MSIX		= (1 << 4),
+};
+
 int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask,
 			    bool force);
 
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 23111aaa06b2..d0fe84d881f6 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -9,6 +9,8 @@
  * This file contains common code to support Message Signalled Interrupt for
  * PCI compatible and non PCI compatible devices.
  */
+#include <linux/types.h>
+#include <linux/device.h>
 #include <linux/irq.h>
 #include <linux/irqdomain.h>
 #include <linux/msi.h>
@@ -110,23 +112,112 @@ static struct irq_domain_ops msi_domain_ops = {
 	.deactivate	= msi_domain_deactivate,
 };
 
+#ifdef GENERIC_MSI_DOMAIN_OPS
+static irq_hw_number_t msi_domain_ops_get_hwirq(struct msi_domain_info *info,
+						msi_alloc_info_t *arg)
+{
+	return arg->hwirq;
+}
+
+static int msi_domain_ops_prepare(struct irq_domain *domain, struct device *dev,
+				  int nvec, msi_alloc_info_t *arg)
+{
+	memset(arg, 0, sizeof(*arg));
+	return 0;
+}
+
+static void msi_domain_ops_set_desc(msi_alloc_info_t *arg,
+				    struct msi_desc *desc)
+{
+	arg->desc = desc;
+}
+#else
+#define msi_domain_ops_get_hwirq	NULL
+#define msi_domain_ops_prepare		NULL
+#define msi_domain_ops_set_desc		NULL
+#endif /* !GENERIC_MSI_DOMAIN_OPS */
+
+static int msi_domain_ops_init(struct irq_domain *domain,
+			       struct msi_domain_info *info,
+			       unsigned int virq, irq_hw_number_t hwirq,
+			       msi_alloc_info_t *arg)
+{
+	irq_domain_set_hwirq_and_chip(domain, virq, hwirq, info->chip,
+				      info->chip_data);
+	if (info->handler && info->handler_name) {
+		__irq_set_handler(virq, info->handler, 0, info->handler_name);
+		if (info->handler_data)
+			irq_set_handler_data(virq, info->handler_data);
+	}
+	return 0;
+}
+
+static int msi_domain_ops_check(struct irq_domain *domain,
+				struct msi_domain_info *info,
+				struct device *dev)
+{
+	return 0;
+}
+
+static struct msi_domain_ops msi_domain_ops_default = {
+	.get_hwirq	= msi_domain_ops_get_hwirq,
+	.msi_init	= msi_domain_ops_init,
+	.msi_check	= msi_domain_ops_check,
+	.msi_prepare	= msi_domain_ops_prepare,
+	.set_desc	= msi_domain_ops_set_desc,
+};
+
+static void msi_domain_update_dom_ops(struct msi_domain_info *info)
+{
+	struct msi_domain_ops *ops = info->ops;
+
+	if (ops == NULL) {
+		info->ops = &msi_domain_ops_default;
+		return;
+	}
+
+	if (ops->get_hwirq == NULL)
+		ops->get_hwirq = msi_domain_ops_default.get_hwirq;
+	if (ops->msi_init == NULL)
+		ops->msi_init = msi_domain_ops_default.msi_init;
+	if (ops->msi_check == NULL)
+		ops->msi_check = msi_domain_ops_default.msi_check;
+	if (ops->msi_prepare == NULL)
+		ops->msi_prepare = msi_domain_ops_default.msi_prepare;
+	if (ops->set_desc == NULL)
+		ops->set_desc = msi_domain_ops_default.set_desc;
+}
+
+static void msi_domain_update_chip_ops(struct msi_domain_info *info)
+{
+	struct irq_chip *chip = info->chip;
+
+	BUG_ON(!chip);
+	if (!chip->irq_mask)
+		chip->irq_mask = pci_msi_mask_irq;
+	if (!chip->irq_unmask)
+		chip->irq_unmask = pci_msi_unmask_irq;
+	if (!chip->irq_set_affinity)
+		chip->irq_set_affinity = msi_domain_set_affinity;
+}
+
 /**
  * msi_create_irq_domain - Create a MSI interrupt domain
  * @of_node:	Optional device-tree node of the interrupt controller
  * @info:	MSI domain info
  * @parent:	Parent irq domain
  */
-struct irq_domain *msi_create_irq_domain(struct device_node *of_node,
+struct irq_domain *msi_create_irq_domain(struct device_node *node,
 					 struct msi_domain_info *info,
 					 struct irq_domain *parent)
 {
-	struct irq_domain *domain;
+	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
+		msi_domain_update_dom_ops(info);
+	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
+		msi_domain_update_chip_ops(info);
 
-	domain = irq_domain_add_tree(of_node, &msi_domain_ops, info);
-	if (domain)
-		domain->parent = parent;
-
-	return domain;
+	return irq_domain_add_hierarchy(parent, 0, 0, node, &msi_domain_ops,
+					info);
 }
 
 /**
@@ -155,8 +246,12 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev,
 
 	for_each_msi_entry(desc, dev) {
 		ops->set_desc(&arg, desc);
+		if (info->flags & MSI_FLAG_IDENTITY_MAP)
+			virq = (int)ops->get_hwirq(info, &arg);
+		else
+			virq = -1;
 
-		virq = __irq_domain_alloc_irqs(domain, -1, desc->nvec_used,
+		virq = __irq_domain_alloc_irqs(domain, virq, desc->nvec_used,
 					       dev_to_node(dev), &arg, false);
 		if (virq < 0) {
 			ret = -ENOSPC;
-- 
cgit v1.2.3


From 3878eaefb89aa841ae4c2150490cee864ac628cb Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Tue, 11 Nov 2014 21:02:18 +0800
Subject: PCI/MSI: Enhance core to support hierarchy irqdomain

Enhance PCI MSI core to support hierarchy irqdomain, so the common
code can be shared across architectures.

[ tglx: Extracted and combined from several patches ]

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/Kconfig |   5 ++
 drivers/pci/msi.c   | 168 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/msi.h |  14 +++++
 3 files changed, 187 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/Kconfig b/drivers/pci/Kconfig
index 82f95f4512f8..cced84233ac0 100644
--- a/drivers/pci/Kconfig
+++ b/drivers/pci/Kconfig
@@ -17,6 +17,11 @@ config PCI_MSI
 
 	   If you don't know what to do here, say Y.
 
+config PCI_MSI_IRQ_DOMAIN
+	bool
+	depends on PCI_MSI
+	select GENERIC_MSI_IRQ_DOMAIN
+
 config PCI_DEBUG
 	bool "PCI Debugging"
 	depends on PCI && DEBUG_KERNEL
diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 103700e127bf..36b0c2e55929 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -19,6 +19,7 @@
 #include <linux/errno.h>
 #include <linux/io.h>
 #include <linux/slab.h>
+#include <linux/irqdomain.h>
 
 #include "pci.h"
 
@@ -1091,3 +1092,170 @@ int pci_enable_msix_range(struct pci_dev *dev, struct msix_entry *entries,
 	return nvec;
 }
 EXPORT_SYMBOL(pci_enable_msix_range);
+
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+/**
+ * pci_msi_domain_write_msg - Helper to write MSI message to PCI config space
+ * @irq_data:	Pointer to interrupt data of the MSI interrupt
+ * @msg:	Pointer to the message
+ */
+void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg)
+{
+	struct msi_desc *desc = irq_data->msi_desc;
+
+	/*
+	 * For MSI-X desc->irq is always equal to irq_data->irq. For
+	 * MSI only the first interrupt of MULTI MSI passes the test.
+	 */
+	if (desc->irq == irq_data->irq)
+		__pci_write_msi_msg(desc, msg);
+}
+
+/**
+ * pci_msi_domain_calc_hwirq - Generate a unique ID for an MSI source
+ * @dev:	Pointer to the PCI device
+ * @desc:	Pointer to the msi descriptor
+ *
+ * The ID number is only used within the irqdomain.
+ */
+irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
+					  struct msi_desc *desc)
+{
+	return (irq_hw_number_t)desc->msi_attrib.entry_nr |
+		PCI_DEVID(dev->bus->number, dev->devfn) << 11 |
+		(pci_domain_nr(dev->bus) & 0xFFFFFFFF) << 27;
+}
+
+static inline bool pci_msi_desc_is_multi_msi(struct msi_desc *desc)
+{
+	return !desc->msi_attrib.is_msix && desc->nvec_used > 1;
+}
+
+/**
+ * pci_msi_domain_check_cap - Verify that @domain supports the capabilities for @dev
+ * @domain:	The interrupt domain to check
+ * @info:	The domain info for verification
+ * @dev:	The device to check
+ *
+ * Returns:
+ *  0 if the functionality is supported
+ *  1 if Multi MSI is requested, but the domain does not support it
+ *  -ENOTSUPP otherwise
+ */
+int pci_msi_domain_check_cap(struct irq_domain *domain,
+			     struct msi_domain_info *info, struct device *dev)
+{
+	struct msi_desc *desc = first_pci_msi_entry(to_pci_dev(dev));
+
+	/* Special handling to support pci_enable_msi_range() */
+	if (pci_msi_desc_is_multi_msi(desc) &&
+	    !(info->flags & MSI_FLAG_MULTI_PCI_MSI))
+		return 1;
+	else if (desc->msi_attrib.is_msix && !(info->flags & MSI_FLAG_PCI_MSIX))
+		return -ENOTSUPP;
+
+	return 0;
+}
+
+static int pci_msi_domain_handle_error(struct irq_domain *domain,
+				       struct msi_desc *desc, int error)
+{
+	/* Special handling to support pci_enable_msi_range() */
+	if (pci_msi_desc_is_multi_msi(desc) && error == -ENOSPC)
+		return 1;
+
+	return error;
+}
+
+#ifdef GENERIC_MSI_DOMAIN_OPS
+static void pci_msi_domain_set_desc(msi_alloc_info_t *arg,
+				    struct msi_desc *desc)
+{
+	arg->desc = desc;
+	arg->hwirq = pci_msi_domain_calc_hwirq(msi_desc_to_pci_dev(desc),
+					       desc);
+}
+#else
+#define pci_msi_domain_set_desc		NULL
+#endif
+
+static struct msi_domain_ops pci_msi_domain_ops_default = {
+	.set_desc	= pci_msi_domain_set_desc,
+	.msi_check	= pci_msi_domain_check_cap,
+	.handle_error	= pci_msi_domain_handle_error,
+};
+
+static void pci_msi_domain_update_dom_ops(struct msi_domain_info *info)
+{
+	struct msi_domain_ops *ops = info->ops;
+
+	if (ops == NULL) {
+		info->ops = &pci_msi_domain_ops_default;
+	} else {
+		if (ops->set_desc == NULL)
+			ops->set_desc = pci_msi_domain_set_desc;
+		if (ops->msi_check == NULL)
+			ops->msi_check = pci_msi_domain_check_cap;
+		if (ops->handle_error == NULL)
+			ops->handle_error = pci_msi_domain_handle_error;
+	}
+}
+
+static void pci_msi_domain_update_chip_ops(struct msi_domain_info *info)
+{
+	struct irq_chip *chip = info->chip;
+
+	BUG_ON(!chip);
+	if (!chip->irq_write_msi_msg)
+		chip->irq_write_msi_msg = pci_msi_domain_write_msg;
+}
+
+/**
+ * pci_msi_create_irq_domain - Creat a MSI interrupt domain
+ * @node:	Optional device-tree node of the interrupt controller
+ * @info:	MSI domain info
+ * @parent:	Parent irq domain
+ *
+ * Updates the domain and chip ops and creates a MSI interrupt domain.
+ *
+ * Returns:
+ * A domain pointer or NULL in case of failure.
+ */
+struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
+					     struct msi_domain_info *info,
+					     struct irq_domain *parent)
+{
+	if (info->flags & MSI_FLAG_USE_DEF_DOM_OPS)
+		pci_msi_domain_update_dom_ops(info);
+	if (info->flags & MSI_FLAG_USE_DEF_CHIP_OPS)
+		pci_msi_domain_update_chip_ops(info);
+
+	return msi_create_irq_domain(node, info, parent);
+}
+
+/**
+ * pci_msi_domain_alloc_irqs - Allocate interrupts for @dev in @domain
+ * @domain:	The interrupt domain to allocate from
+ * @dev:	The device for which to allocate
+ * @nvec:	The number of interrupts to allocate
+ * @type:	Unused to allow simpler migration from the arch_XXX interfaces
+ *
+ * Returns:
+ * A virtual interrupt number or an error code in case of failure
+ */
+int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev,
+			      int nvec, int type)
+{
+	return msi_domain_alloc_irqs(domain, &dev->dev, nvec);
+}
+
+/**
+ * pci_msi_domain_free_irqs - Free interrupts for @dev in @domain
+ * @domain:	The interrupt domain
+ * @dev:	The device for which to free interrupts
+ */
+void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev)
+{
+	msi_domain_free_irqs(domain, &dev->dev);
+}
+#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 0098e2c8fd42..1628788aee25 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -220,4 +220,18 @@ struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain);
 
 #endif /* CONFIG_GENERIC_MSI_IRQ_DOMAIN */
 
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg);
+struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
+					     struct msi_domain_info *info,
+					     struct irq_domain *parent);
+int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev,
+			      int nvec, int type);
+void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev);
+irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
+					  struct msi_desc *desc);
+int pci_msi_domain_check_cap(struct irq_domain *domain,
+			     struct msi_domain_info *info, struct device *dev);
+#endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
+
 #endif /* LINUX_MSI_H */
-- 
cgit v1.2.3


From 8e047adae969701c6cec136484bb9de8572af934 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Sat, 15 Nov 2014 22:24:07 +0800
Subject: PCI/MSI: Provide mechanism to alloc/free MSI/MSIX interrupt from
 irqdomain

Provide mechanism to directly alloc/free MSI/MSIX interrupt from
irqdomain, which will be used to replace arch_setup_msi_irq()/
arch_setup_msi_irqs()/arch_teardown_msi_irq()/arch_teardown_msi_irqs().

To kill weak functions, this patch introduce a new weak function
arch_get_pci_msi_domain(), which is to retrieve the MSI irqdomain
for a PCI device. This weak function could be killed once we get
a common way to associate MSI domain with PCI device.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yijing Wang <wangyijing@huawei.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Matthias Brugger <matthias.bgg@gmail.com>
Cc: Alexander Gordeev <agordeev@redhat.com>
Link: http://lkml.kernel.org/r/1416061447-9472-10-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/msi.c   | 67 ++++++++++++++++++++++++++++++++++++++++++++++++++---
 include/linux/msi.h |  3 +++
 2 files changed, 67 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 36b0c2e55929..4befe09053c1 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -28,6 +28,40 @@ int pci_msi_ignore_mask;
 
 #define msix_table_size(flags)	((flags & PCI_MSIX_FLAGS_QSIZE) + 1)
 
+#ifdef CONFIG_PCI_MSI_IRQ_DOMAIN
+static struct irq_domain *pci_msi_default_domain;
+static DEFINE_MUTEX(pci_msi_domain_lock);
+
+struct irq_domain * __weak arch_get_pci_msi_domain(struct pci_dev *dev)
+{
+	return pci_msi_default_domain;
+}
+
+static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
+{
+	struct irq_domain *domain;
+
+	domain = arch_get_pci_msi_domain(dev);
+	if (domain)
+		return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
+
+	return arch_setup_msi_irqs(dev, nvec, type);
+}
+
+static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
+{
+	struct irq_domain *domain;
+
+	domain = arch_get_pci_msi_domain(dev);
+	if (domain)
+		pci_msi_domain_free_irqs(domain, dev);
+	else
+		arch_teardown_msi_irqs(dev);
+}
+#else
+#define pci_msi_setup_msi_irqs		arch_setup_msi_irqs
+#define pci_msi_teardown_msi_irqs	arch_teardown_msi_irqs
+#endif
 
 /* Arch hooks */
 
@@ -348,7 +382,7 @@ static void free_msi_irqs(struct pci_dev *dev)
 			for (i = 0; i < entry->nvec_used; i++)
 				BUG_ON(irq_has_action(entry->irq + i));
 
-	arch_teardown_msi_irqs(dev);
+	pci_msi_teardown_msi_irqs(dev);
 
 	list_for_each_entry_safe(entry, tmp, &dev->msi_list, list) {
 		if (entry->msi_attrib.is_msix) {
@@ -600,7 +634,7 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 	list_add_tail(&entry->list, &dev->msi_list);
 
 	/* Configure MSI capability structure */
-	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
+	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSI);
 	if (ret) {
 		msi_mask_irq(entry, mask, ~mask);
 		free_msi_irqs(dev);
@@ -715,7 +749,7 @@ static int msix_capability_init(struct pci_dev *dev,
 	if (ret)
 		return ret;
 
-	ret = arch_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
+	ret = pci_msi_setup_msi_irqs(dev, nvec, PCI_CAP_ID_MSIX);
 	if (ret)
 		goto out_avail;
 
@@ -1258,4 +1292,31 @@ void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev)
 {
 	msi_domain_free_irqs(domain, &dev->dev);
 }
+
+/**
+ * pci_msi_create_default_irq_domain - Create a default MSI interrupt domain
+ * @node:	Optional device-tree node of the interrupt controller
+ * @info:	MSI domain info
+ * @parent:	Parent irq domain
+ *
+ * Returns: A domain pointer or NULL in case of failure. If successful
+ * the default PCI/MSI irqdomain pointer is updated.
+ */
+struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node,
+		struct msi_domain_info *info, struct irq_domain *parent)
+{
+	struct irq_domain *domain;
+
+	mutex_lock(&pci_msi_domain_lock);
+	if (pci_msi_default_domain) {
+		pr_err("PCI: default irq domain for PCI MSI has already been created.\n");
+		domain = NULL;
+	} else {
+		domain = pci_msi_create_irq_domain(node, info, parent);
+		pci_msi_default_domain = domain;
+	}
+	mutex_unlock(&pci_msi_domain_lock);
+
+	return domain;
+}
 #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 1628788aee25..692f217ae813 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -228,6 +228,9 @@ struct irq_domain *pci_msi_create_irq_domain(struct device_node *node,
 int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev,
 			      int nvec, int type);
 void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev);
+struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node,
+		 struct msi_domain_info *info, struct irq_domain *parent);
+
 irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev,
 					  struct msi_desc *desc);
 int pci_msi_domain_check_cap(struct irq_domain *domain,
-- 
cgit v1.2.3


From 020c312658d61297ffe43b412441c69b1c36fb1b Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Sat, 15 Nov 2014 10:49:12 +0000
Subject: PCI/MSI: Allow an msi_controller to be associated to an irq domain

With the new stacked irq domains, it becomes pretty tempting to
allocate an MSI domain per PCI bus, which would remove the requirement
of either relying on arch-specific code, or a default PCI MSI domain.

By allowing the msi_controller structure to carry a pointer to an
irq_domain, we can easily use this in pci_msi_setup_msi_irqs.  The
existing code can still be used as a fallback if the MSI driver does
not populate the domain field.

Tested on arm64 with the GICv3 ITS driver.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Cc: Yingjoe Chen <yingjoe.chen@mediatek.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: linux-arm-kernel@lists.infradead.org
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Link: http://lkml.kernel.org/r/1416048553-29289-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 drivers/pci/msi.c   | 16 ++++++++++++++--
 include/linux/msi.h |  3 +++
 2 files changed, 17 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 4befe09053c1..476f4b1a2727 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -37,11 +37,23 @@ struct irq_domain * __weak arch_get_pci_msi_domain(struct pci_dev *dev)
 	return pci_msi_default_domain;
 }
 
+static struct irq_domain *pci_msi_get_domain(struct pci_dev *dev)
+{
+	struct irq_domain *domain = NULL;
+
+	if (dev->bus->msi)
+		domain = dev->bus->msi->domain;
+	if (!domain)
+		domain = arch_get_pci_msi_domain(dev);
+
+	return domain;
+}
+
 static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type)
 {
 	struct irq_domain *domain;
 
-	domain = arch_get_pci_msi_domain(dev);
+	domain = pci_msi_get_domain(dev);
 	if (domain)
 		return pci_msi_domain_alloc_irqs(domain, dev, nvec, type);
 
@@ -52,7 +64,7 @@ static void pci_msi_teardown_msi_irqs(struct pci_dev *dev)
 {
 	struct irq_domain *domain;
 
-	domain = arch_get_pci_msi_domain(dev);
+	domain = pci_msi_get_domain(dev);
 	if (domain)
 		pci_msi_domain_free_irqs(domain, dev);
 	else
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 692f217ae813..8ac4a68ffae2 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -108,6 +108,9 @@ struct msi_controller {
 	struct device *dev;
 	struct device_node *of_node;
 	struct list_head list;
+#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+	struct irq_domain *domain;
+#endif
 
 	int (*setup_irq)(struct msi_controller *chip, struct pci_dev *dev,
 			 struct msi_desc *desc);
-- 
cgit v1.2.3


From c274e03af70544506cd7214fcc2d4c4376c2c6f4 Mon Sep 17 00:00:00 2001
From: Radim Krčmář <rkrcmar@redhat.com>
Date: Fri, 21 Nov 2014 22:21:50 +0100
Subject: kvm: x86: move assigned-dev.c and iommu.c to arch/x86/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now that ia64 is gone, we can hide deprecated device assignment in x86.

Notable changes:
 - kvm_vm_ioctl_assigned_device() was moved to x86/kvm_arch_vm_ioctl()

The easy parts were removed from generic kvm code, remaining
 - kvm_iommu_(un)map_pages() would require new code to be moved
 - struct kvm_assigned_dev_kernel depends on struct kvm_irq_ack_notifier

Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h |   23 +
 arch/x86/kvm/Makefile           |    2 +-
 arch/x86/kvm/assigned-dev.c     | 1026 +++++++++++++++++++++++++++++++++++++++
 arch/x86/kvm/iommu.c            |  358 ++++++++++++++
 arch/x86/kvm/x86.c              |    2 +-
 include/linux/kvm_host.h        |   26 -
 virt/kvm/assigned-dev.c         | 1026 ---------------------------------------
 virt/kvm/iommu.c                |  358 --------------
 virt/kvm/kvm_main.c             |    2 -
 9 files changed, 1409 insertions(+), 1414 deletions(-)
 create mode 100644 arch/x86/kvm/assigned-dev.c
 create mode 100644 arch/x86/kvm/iommu.c
 delete mode 100644 virt/kvm/assigned-dev.c
 delete mode 100644 virt/kvm/iommu.c

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 76ff3e2d8fd2..d549cf8bfb69 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1112,4 +1112,27 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+int kvm_iommu_map_guest(struct kvm *kvm);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg);
+
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+#else
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+						unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+#endif
+
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/Makefile b/arch/x86/kvm/Makefile
index ee1cd92b03be..08f790dfadc9 100644
--- a/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@ -9,11 +9,11 @@ KVM := ../../../virt/kvm
 
 kvm-y			+= $(KVM)/kvm_main.o $(KVM)/coalesced_mmio.o \
 				$(KVM)/eventfd.o $(KVM)/irqchip.o $(KVM)/vfio.o
-kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= $(KVM)/assigned-dev.o $(KVM)/iommu.o
 kvm-$(CONFIG_KVM_ASYNC_PF)	+= $(KVM)/async_pf.o
 
 kvm-y			+= x86.o mmu.o emulate.o i8259.o irq.o lapic.o \
 			   i8254.o ioapic.o irq_comm.o cpuid.o pmu.o
+kvm-$(CONFIG_KVM_DEVICE_ASSIGNMENT)	+= assigned-dev.o iommu.o
 kvm-intel-y		+= vmx.o
 kvm-amd-y		+= svm.o
 
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
new file mode 100644
index 000000000000..e05000e200d2
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.c
@@ -0,0 +1,1026 @@
+/*
+ * Kernel-based Virtual Machine - device assignment support
+ *
+ * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * This work is licensed under the terms of the GNU GPL, version 2.  See
+ * the COPYING file in the top-level directory.
+ *
+ */
+
+#include <linux/kvm_host.h>
+#include <linux/kvm.h>
+#include <linux/uaccess.h>
+#include <linux/vmalloc.h>
+#include <linux/errno.h>
+#include <linux/spinlock.h>
+#include <linux/pci.h>
+#include <linux/interrupt.h>
+#include <linux/slab.h>
+#include <linux/namei.h>
+#include <linux/fs.h>
+#include "irq.h"
+
+static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
+						      int assigned_dev_id)
+{
+	struct list_head *ptr;
+	struct kvm_assigned_dev_kernel *match;
+
+	list_for_each(ptr, head) {
+		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
+		if (match->assigned_dev_id == assigned_dev_id)
+			return match;
+	}
+	return NULL;
+}
+
+static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
+				    *assigned_dev, int irq)
+{
+	int i, index;
+	struct msix_entry *host_msix_entries;
+
+	host_msix_entries = assigned_dev->host_msix_entries;
+
+	index = -1;
+	for (i = 0; i < assigned_dev->entries_nr; i++)
+		if (irq == host_msix_entries[i].vector) {
+			index = i;
+			break;
+		}
+	if (index < 0)
+		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
+
+	return index;
+}
+
+static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+	int ret;
+
+	spin_lock(&assigned_dev->intx_lock);
+	if (pci_check_and_mask_intx(assigned_dev->dev)) {
+		assigned_dev->host_irq_disabled = true;
+		ret = IRQ_WAKE_THREAD;
+	} else
+		ret = IRQ_NONE;
+	spin_unlock(&assigned_dev->intx_lock);
+
+	return ret;
+}
+
+static void
+kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
+				 int vector)
+{
+	if (unlikely(assigned_dev->irq_requested_type &
+		     KVM_DEV_IRQ_GUEST_INTX)) {
+		spin_lock(&assigned_dev->intx_mask_lock);
+		if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
+			kvm_set_irq(assigned_dev->kvm,
+				    assigned_dev->irq_source_id, vector, 1,
+				    false);
+		spin_unlock(&assigned_dev->intx_mask_lock);
+	} else
+		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+			    vector, 1, false);
+}
+
+static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+
+	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+		spin_lock_irq(&assigned_dev->intx_lock);
+		disable_irq_nosync(irq);
+		assigned_dev->host_irq_disabled = true;
+		spin_unlock_irq(&assigned_dev->intx_lock);
+	}
+
+	kvm_assigned_dev_raise_guest_irq(assigned_dev,
+					 assigned_dev->guest_irq);
+
+	return IRQ_HANDLED;
+}
+
+#ifdef __KVM_HAVE_MSI
+static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+	int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+				       assigned_dev->irq_source_id,
+				       assigned_dev->guest_irq, 1);
+	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
+static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+
+	kvm_assigned_dev_raise_guest_irq(assigned_dev,
+					 assigned_dev->guest_irq);
+
+	return IRQ_HANDLED;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+	int index = find_index_from_host_irq(assigned_dev, irq);
+	u32 vector;
+	int ret = 0;
+
+	if (index >= 0) {
+		vector = assigned_dev->guest_msix_entries[index].vector;
+		ret = kvm_set_irq_inatomic(assigned_dev->kvm,
+					   assigned_dev->irq_source_id,
+					   vector, 1);
+	}
+
+	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
+}
+
+static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
+{
+	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
+	int index = find_index_from_host_irq(assigned_dev, irq);
+	u32 vector;
+
+	if (index >= 0) {
+		vector = assigned_dev->guest_msix_entries[index].vector;
+		kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
+	}
+
+	return IRQ_HANDLED;
+}
+#endif
+
+/* Ack the irq line for an assigned device */
+static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
+{
+	struct kvm_assigned_dev_kernel *dev =
+		container_of(kian, struct kvm_assigned_dev_kernel,
+			     ack_notifier);
+
+	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
+
+	spin_lock(&dev->intx_mask_lock);
+
+	if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
+		bool reassert = false;
+
+		spin_lock_irq(&dev->intx_lock);
+		/*
+		 * The guest IRQ may be shared so this ack can come from an
+		 * IRQ for another guest device.
+		 */
+		if (dev->host_irq_disabled) {
+			if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
+				enable_irq(dev->host_irq);
+			else if (!pci_check_and_unmask_intx(dev->dev))
+				reassert = true;
+			dev->host_irq_disabled = reassert;
+		}
+		spin_unlock_irq(&dev->intx_lock);
+
+		if (reassert)
+			kvm_set_irq(dev->kvm, dev->irq_source_id,
+				    dev->guest_irq, 1, false);
+	}
+
+	spin_unlock(&dev->intx_mask_lock);
+}
+
+static void deassign_guest_irq(struct kvm *kvm,
+			       struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	if (assigned_dev->ack_notifier.gsi != -1)
+		kvm_unregister_irq_ack_notifier(kvm,
+						&assigned_dev->ack_notifier);
+
+	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
+		    assigned_dev->guest_irq, 0, false);
+
+	if (assigned_dev->irq_source_id != -1)
+		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
+	assigned_dev->irq_source_id = -1;
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
+}
+
+/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
+static void deassign_host_irq(struct kvm *kvm,
+			      struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	/*
+	 * We disable irq here to prevent further events.
+	 *
+	 * Notice this maybe result in nested disable if the interrupt type is
+	 * INTx, but it's OK for we are going to free it.
+	 *
+	 * If this function is a part of VM destroy, please ensure that till
+	 * now, the kvm state is still legal for probably we also have to wait
+	 * on a currently running IRQ handler.
+	 */
+	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
+		int i;
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			disable_irq(assigned_dev->host_msix_entries[i].vector);
+
+		for (i = 0; i < assigned_dev->entries_nr; i++)
+			free_irq(assigned_dev->host_msix_entries[i].vector,
+				 assigned_dev);
+
+		assigned_dev->entries_nr = 0;
+		kfree(assigned_dev->host_msix_entries);
+		kfree(assigned_dev->guest_msix_entries);
+		pci_disable_msix(assigned_dev->dev);
+	} else {
+		/* Deal with MSI and INTx */
+		if ((assigned_dev->irq_requested_type &
+		     KVM_DEV_IRQ_HOST_INTX) &&
+		    (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+			spin_lock_irq(&assigned_dev->intx_lock);
+			pci_intx(assigned_dev->dev, false);
+			spin_unlock_irq(&assigned_dev->intx_lock);
+			synchronize_irq(assigned_dev->host_irq);
+		} else
+			disable_irq(assigned_dev->host_irq);
+
+		free_irq(assigned_dev->host_irq, assigned_dev);
+
+		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
+			pci_disable_msi(assigned_dev->dev);
+	}
+
+	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
+}
+
+static int kvm_deassign_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *assigned_dev,
+			    unsigned long irq_requested_type)
+{
+	unsigned long guest_irq_type, host_irq_type;
+
+	if (!irqchip_in_kernel(kvm))
+		return -EINVAL;
+	/* no irq assignment to deassign */
+	if (!assigned_dev->irq_requested_type)
+		return -ENXIO;
+
+	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
+	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
+
+	if (host_irq_type)
+		deassign_host_irq(kvm, assigned_dev);
+	if (guest_irq_type)
+		deassign_guest_irq(kvm, assigned_dev);
+
+	return 0;
+}
+
+static void kvm_free_assigned_irq(struct kvm *kvm,
+				  struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
+}
+
+static void kvm_free_assigned_device(struct kvm *kvm,
+				     struct kvm_assigned_dev_kernel
+				     *assigned_dev)
+{
+	kvm_free_assigned_irq(kvm, assigned_dev);
+
+	pci_reset_function(assigned_dev->dev);
+	if (pci_load_and_free_saved_state(assigned_dev->dev,
+					  &assigned_dev->pci_saved_state))
+		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+		       __func__, dev_name(&assigned_dev->dev->dev));
+	else
+		pci_restore_state(assigned_dev->dev);
+
+	pci_clear_dev_assigned(assigned_dev->dev);
+
+	pci_release_regions(assigned_dev->dev);
+	pci_disable_device(assigned_dev->dev);
+	pci_dev_put(assigned_dev->dev);
+
+	list_del(&assigned_dev->list);
+	kfree(assigned_dev);
+}
+
+void kvm_free_all_assigned_devices(struct kvm *kvm)
+{
+	struct list_head *ptr, *ptr2;
+	struct kvm_assigned_dev_kernel *assigned_dev;
+
+	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
+		assigned_dev = list_entry(ptr,
+					  struct kvm_assigned_dev_kernel,
+					  list);
+
+		kvm_free_assigned_device(kvm, assigned_dev);
+	}
+}
+
+static int assigned_device_enable_host_intx(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
+{
+	irq_handler_t irq_handler;
+	unsigned long flags;
+
+	dev->host_irq = dev->dev->irq;
+
+	/*
+	 * We can only share the IRQ line with other host devices if we are
+	 * able to disable the IRQ source at device-level - independently of
+	 * the guest driver. Otherwise host devices may suffer from unbounded
+	 * IRQ latencies when the guest keeps the line asserted.
+	 */
+	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
+		irq_handler = kvm_assigned_dev_intx;
+		flags = IRQF_SHARED;
+	} else {
+		irq_handler = NULL;
+		flags = IRQF_ONESHOT;
+	}
+	if (request_threaded_irq(dev->host_irq, irq_handler,
+				 kvm_assigned_dev_thread_intx, flags,
+				 dev->irq_name, dev))
+		return -EIO;
+
+	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
+		spin_lock_irq(&dev->intx_lock);
+		pci_intx(dev->dev, true);
+		spin_unlock_irq(&dev->intx_lock);
+	}
+	return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_host_msi(struct kvm *kvm,
+					   struct kvm_assigned_dev_kernel *dev)
+{
+	int r;
+
+	if (!dev->dev->msi_enabled) {
+		r = pci_enable_msi(dev->dev);
+		if (r)
+			return r;
+	}
+
+	dev->host_irq = dev->dev->irq;
+	if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
+				 kvm_assigned_dev_thread_msi, 0,
+				 dev->irq_name, dev)) {
+		pci_disable_msi(dev->dev);
+		return -EIO;
+	}
+
+	return 0;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_host_msix(struct kvm *kvm,
+					    struct kvm_assigned_dev_kernel *dev)
+{
+	int i, r = -EINVAL;
+
+	/* host_msix_entries and guest_msix_entries should have been
+	 * initialized */
+	if (dev->entries_nr == 0)
+		return r;
+
+	r = pci_enable_msix_exact(dev->dev,
+				  dev->host_msix_entries, dev->entries_nr);
+	if (r)
+		return r;
+
+	for (i = 0; i < dev->entries_nr; i++) {
+		r = request_threaded_irq(dev->host_msix_entries[i].vector,
+					 kvm_assigned_dev_msix,
+					 kvm_assigned_dev_thread_msix,
+					 0, dev->irq_name, dev);
+		if (r)
+			goto err;
+	}
+
+	return 0;
+err:
+	for (i -= 1; i >= 0; i--)
+		free_irq(dev->host_msix_entries[i].vector, dev);
+	pci_disable_msix(dev->dev);
+	return r;
+}
+
+#endif
+
+static int assigned_device_enable_guest_intx(struct kvm *kvm,
+				struct kvm_assigned_dev_kernel *dev,
+				struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = irq->guest_irq;
+	return 0;
+}
+
+#ifdef __KVM_HAVE_MSI
+static int assigned_device_enable_guest_msi(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	return 0;
+}
+#endif
+
+#ifdef __KVM_HAVE_MSIX
+static int assigned_device_enable_guest_msix(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *dev,
+			struct kvm_assigned_irq *irq)
+{
+	dev->guest_irq = irq->guest_irq;
+	dev->ack_notifier.gsi = -1;
+	return 0;
+}
+#endif
+
+static int assign_host_irq(struct kvm *kvm,
+			   struct kvm_assigned_dev_kernel *dev,
+			   __u32 host_irq_type)
+{
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
+		return r;
+
+	snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
+		 pci_name(dev->dev));
+
+	switch (host_irq_type) {
+	case KVM_DEV_IRQ_HOST_INTX:
+		r = assigned_device_enable_host_intx(kvm, dev);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_HOST_MSI:
+		r = assigned_device_enable_host_msi(kvm, dev);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_HOST_MSIX:
+		r = assigned_device_enable_host_msix(kvm, dev);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
+	dev->host_irq_disabled = false;
+
+	if (!r)
+		dev->irq_requested_type |= host_irq_type;
+
+	return r;
+}
+
+static int assign_guest_irq(struct kvm *kvm,
+			    struct kvm_assigned_dev_kernel *dev,
+			    struct kvm_assigned_irq *irq,
+			    unsigned long guest_irq_type)
+{
+	int id;
+	int r = -EEXIST;
+
+	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
+		return r;
+
+	id = kvm_request_irq_source_id(kvm);
+	if (id < 0)
+		return id;
+
+	dev->irq_source_id = id;
+
+	switch (guest_irq_type) {
+	case KVM_DEV_IRQ_GUEST_INTX:
+		r = assigned_device_enable_guest_intx(kvm, dev, irq);
+		break;
+#ifdef __KVM_HAVE_MSI
+	case KVM_DEV_IRQ_GUEST_MSI:
+		r = assigned_device_enable_guest_msi(kvm, dev, irq);
+		break;
+#endif
+#ifdef __KVM_HAVE_MSIX
+	case KVM_DEV_IRQ_GUEST_MSIX:
+		r = assigned_device_enable_guest_msix(kvm, dev, irq);
+		break;
+#endif
+	default:
+		r = -EINVAL;
+	}
+
+	if (!r) {
+		dev->irq_requested_type |= guest_irq_type;
+		if (dev->ack_notifier.gsi != -1)
+			kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
+	} else {
+		kvm_free_irq_source_id(kvm, dev->irq_source_id);
+		dev->irq_source_id = -1;
+	}
+
+	return r;
+}
+
+/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
+static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
+				   struct kvm_assigned_irq *assigned_irq)
+{
+	int r = -EINVAL;
+	struct kvm_assigned_dev_kernel *match;
+	unsigned long host_irq_type, guest_irq_type;
+
+	if (!irqchip_in_kernel(kvm))
+		return r;
+
+	mutex_lock(&kvm->lock);
+	r = -ENODEV;
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
+	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
+
+	r = -EINVAL;
+	/* can only assign one type at a time */
+	if (hweight_long(host_irq_type) > 1)
+		goto out;
+	if (hweight_long(guest_irq_type) > 1)
+		goto out;
+	if (host_irq_type == 0 && guest_irq_type == 0)
+		goto out;
+
+	r = 0;
+	if (host_irq_type)
+		r = assign_host_irq(kvm, match, host_irq_type);
+	if (r)
+		goto out;
+
+	if (guest_irq_type)
+		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
+					 struct kvm_assigned_irq
+					 *assigned_irq)
+{
+	int r = -ENODEV;
+	struct kvm_assigned_dev_kernel *match;
+	unsigned long irq_type;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_irq->assigned_dev_id);
+	if (!match)
+		goto out;
+
+	irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
+					  KVM_DEV_IRQ_GUEST_MASK);
+	r = kvm_deassign_irq(kvm, match, irq_type);
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+/*
+ * We want to test whether the caller has been granted permissions to
+ * use this device.  To be able to configure and control the device,
+ * the user needs access to PCI configuration space and BAR resources.
+ * These are accessed through PCI sysfs.  PCI config space is often
+ * passed to the process calling this ioctl via file descriptor, so we
+ * can't rely on access to that file.  We can check for permissions
+ * on each of the BAR resource files, which is a pretty clear
+ * indicator that the user has been granted access to the device.
+ */
+static int probe_sysfs_permissions(struct pci_dev *dev)
+{
+#ifdef CONFIG_SYSFS
+	int i;
+	bool bar_found = false;
+
+	for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
+		char *kpath, *syspath;
+		struct path path;
+		struct inode *inode;
+		int r;
+
+		if (!pci_resource_len(dev, i))
+			continue;
+
+		kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
+		if (!kpath)
+			return -ENOMEM;
+
+		/* Per sysfs-rules, sysfs is always at /sys */
+		syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
+		kfree(kpath);
+		if (!syspath)
+			return -ENOMEM;
+
+		r = kern_path(syspath, LOOKUP_FOLLOW, &path);
+		kfree(syspath);
+		if (r)
+			return r;
+
+		inode = path.dentry->d_inode;
+
+		r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
+		path_put(&path);
+		if (r)
+			return r;
+
+		bar_found = true;
+	}
+
+	/* If no resources, probably something special */
+	if (!bar_found)
+		return -EPERM;
+
+	return 0;
+#else
+	return -EINVAL; /* No way to control the device without sysfs */
+#endif
+}
+
+static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
+				      struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0, idx;
+	struct kvm_assigned_dev_kernel *match;
+	struct pci_dev *dev;
+
+	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
+		return -EINVAL;
+
+	mutex_lock(&kvm->lock);
+	idx = srcu_read_lock(&kvm->srcu);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (match) {
+		/* device already assigned */
+		r = -EEXIST;
+		goto out;
+	}
+
+	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
+	if (match == NULL) {
+		printk(KERN_INFO "%s: Couldn't allocate memory\n",
+		       __func__);
+		r = -ENOMEM;
+		goto out;
+	}
+	dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
+				   assigned_dev->busnr,
+				   assigned_dev->devfn);
+	if (!dev) {
+		printk(KERN_INFO "%s: host device not found\n", __func__);
+		r = -EINVAL;
+		goto out_free;
+	}
+
+	/* Don't allow bridges to be assigned */
+	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
+		r = -EPERM;
+		goto out_put;
+	}
+
+	r = probe_sysfs_permissions(dev);
+	if (r)
+		goto out_put;
+
+	if (pci_enable_device(dev)) {
+		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
+		r = -EBUSY;
+		goto out_put;
+	}
+	r = pci_request_regions(dev, "kvm_assigned_device");
+	if (r) {
+		printk(KERN_INFO "%s: Could not get access to device regions\n",
+		       __func__);
+		goto out_disable;
+	}
+
+	pci_reset_function(dev);
+	pci_save_state(dev);
+	match->pci_saved_state = pci_store_saved_state(dev);
+	if (!match->pci_saved_state)
+		printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
+		       __func__, dev_name(&dev->dev));
+
+	if (!pci_intx_mask_supported(dev))
+		assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
+
+	match->assigned_dev_id = assigned_dev->assigned_dev_id;
+	match->host_segnr = assigned_dev->segnr;
+	match->host_busnr = assigned_dev->busnr;
+	match->host_devfn = assigned_dev->devfn;
+	match->flags = assigned_dev->flags;
+	match->dev = dev;
+	spin_lock_init(&match->intx_lock);
+	spin_lock_init(&match->intx_mask_lock);
+	match->irq_source_id = -1;
+	match->kvm = kvm;
+	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
+
+	list_add(&match->list, &kvm->arch.assigned_dev_head);
+
+	if (!kvm->arch.iommu_domain) {
+		r = kvm_iommu_map_guest(kvm);
+		if (r)
+			goto out_list_del;
+	}
+	r = kvm_assign_device(kvm, match);
+	if (r)
+		goto out_list_del;
+
+out:
+	srcu_read_unlock(&kvm->srcu, idx);
+	mutex_unlock(&kvm->lock);
+	return r;
+out_list_del:
+	if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
+		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
+		       __func__, dev_name(&dev->dev));
+	list_del(&match->list);
+	pci_release_regions(dev);
+out_disable:
+	pci_disable_device(dev);
+out_put:
+	pci_dev_put(dev);
+out_free:
+	kfree(match);
+	srcu_read_unlock(&kvm->srcu, idx);
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
+		struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (!match) {
+		printk(KERN_INFO "%s: device hasn't been assigned before, "
+		  "so cannot be deassigned\n", __func__);
+		r = -EINVAL;
+		goto out;
+	}
+
+	kvm_deassign_device(kvm, match);
+
+	kvm_free_assigned_device(kvm, match);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+
+#ifdef __KVM_HAVE_MSIX
+static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
+				    struct kvm_assigned_msix_nr *entry_nr)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry_nr->assigned_dev_id);
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_nr_out;
+	}
+
+	if (adev->entries_nr == 0) {
+		adev->entries_nr = entry_nr->entry_nr;
+		if (adev->entries_nr == 0 ||
+		    adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
+			r = -EINVAL;
+			goto msix_nr_out;
+		}
+
+		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
+						entry_nr->entry_nr,
+						GFP_KERNEL);
+		if (!adev->host_msix_entries) {
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+		adev->guest_msix_entries =
+			kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
+				GFP_KERNEL);
+		if (!adev->guest_msix_entries) {
+			kfree(adev->host_msix_entries);
+			r = -ENOMEM;
+			goto msix_nr_out;
+		}
+	} else /* Not allowed set MSI-X number twice */
+		r = -EINVAL;
+msix_nr_out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
+				       struct kvm_assigned_msix_entry *entry)
+{
+	int r = 0, i;
+	struct kvm_assigned_dev_kernel *adev;
+
+	mutex_lock(&kvm->lock);
+
+	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      entry->assigned_dev_id);
+
+	if (!adev) {
+		r = -EINVAL;
+		goto msix_entry_out;
+	}
+
+	for (i = 0; i < adev->entries_nr; i++)
+		if (adev->guest_msix_entries[i].vector == 0 ||
+		    adev->guest_msix_entries[i].entry == entry->entry) {
+			adev->guest_msix_entries[i].entry = entry->entry;
+			adev->guest_msix_entries[i].vector = entry->gsi;
+			adev->host_msix_entries[i].entry = entry->entry;
+			break;
+		}
+	if (i == adev->entries_nr) {
+		r = -ENOSPC;
+		goto msix_entry_out;
+	}
+
+msix_entry_out:
+	mutex_unlock(&kvm->lock);
+
+	return r;
+}
+#endif
+
+static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
+		struct kvm_assigned_pci_dev *assigned_dev)
+{
+	int r = 0;
+	struct kvm_assigned_dev_kernel *match;
+
+	mutex_lock(&kvm->lock);
+
+	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
+				      assigned_dev->assigned_dev_id);
+	if (!match) {
+		r = -ENODEV;
+		goto out;
+	}
+
+	spin_lock(&match->intx_mask_lock);
+
+	match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
+	match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
+
+	if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
+		if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
+			kvm_set_irq(match->kvm, match->irq_source_id,
+				    match->guest_irq, 0, false);
+			/*
+			 * Masking at hardware-level is performed on demand,
+			 * i.e. when an IRQ actually arrives at the host.
+			 */
+		} else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
+			/*
+			 * Unmask the IRQ line if required. Unmasking at
+			 * device level will be performed by user space.
+			 */
+			spin_lock_irq(&match->intx_lock);
+			if (match->host_irq_disabled) {
+				enable_irq(match->host_irq);
+				match->host_irq_disabled = false;
+			}
+			spin_unlock_irq(&match->intx_lock);
+		}
+	}
+
+	spin_unlock(&match->intx_mask_lock);
+
+out:
+	mutex_unlock(&kvm->lock);
+	return r;
+}
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg)
+{
+	void __user *argp = (void __user *)arg;
+	int r;
+
+	switch (ioctl) {
+	case KVM_ASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_IRQ: {
+		r = -EOPNOTSUPP;
+		break;
+	}
+	case KVM_ASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_DEASSIGN_DEV_IRQ: {
+		struct kvm_assigned_irq assigned_irq;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
+			goto out;
+		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_DEASSIGN_PCI_DEVICE: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
+		if (r)
+			goto out;
+		break;
+	}
+#ifdef __KVM_HAVE_MSIX
+	case KVM_ASSIGN_SET_MSIX_NR: {
+		struct kvm_assigned_msix_nr entry_nr;
+		r = -EFAULT;
+		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
+		if (r)
+			goto out;
+		break;
+	}
+	case KVM_ASSIGN_SET_MSIX_ENTRY: {
+		struct kvm_assigned_msix_entry entry;
+		r = -EFAULT;
+		if (copy_from_user(&entry, argp, sizeof entry))
+			goto out;
+		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
+		if (r)
+			goto out;
+		break;
+	}
+#endif
+	case KVM_ASSIGN_SET_INTX_MASK: {
+		struct kvm_assigned_pci_dev assigned_dev;
+
+		r = -EFAULT;
+		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
+			goto out;
+		r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
+		break;
+	}
+	default:
+		r = -ENOTTY;
+		break;
+	}
+out:
+	return r;
+}
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
new file mode 100644
index 000000000000..c1e6ae989a43
--- /dev/null
+++ b/arch/x86/kvm/iommu.c
@@ -0,0 +1,358 @@
+/*
+ * Copyright (c) 2006, Intel Corporation.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
+ * Place - Suite 330, Boston, MA 02111-1307 USA.
+ *
+ * Copyright (C) 2006-2008 Intel Corporation
+ * Copyright IBM Corporation, 2008
+ * Copyright 2010 Red Hat, Inc. and/or its affiliates.
+ *
+ * Author: Allen M. Kay <allen.m.kay@intel.com>
+ * Author: Weidong Han <weidong.han@intel.com>
+ * Author: Ben-Ami Yassour <benami@il.ibm.com>
+ */
+
+#include <linux/list.h>
+#include <linux/kvm_host.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+#include <linux/stat.h>
+#include <linux/dmar.h>
+#include <linux/iommu.h>
+#include <linux/intel-iommu.h>
+
+static bool allow_unsafe_assigned_interrupts;
+module_param_named(allow_unsafe_assigned_interrupts,
+		   allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
+MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
+ "Enable device assignment on platforms without interrupt remapping support.");
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm);
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages);
+
+static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
+			   unsigned long npages)
+{
+	gfn_t end_gfn;
+	pfn_t pfn;
+
+	pfn     = gfn_to_pfn_memslot(slot, gfn);
+	end_gfn = gfn + npages;
+	gfn    += 1;
+
+	if (is_error_noslot_pfn(pfn))
+		return pfn;
+
+	while (gfn < end_gfn)
+		gfn_to_pfn_memslot(slot, gfn++);
+
+	return pfn;
+}
+
+static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
+{
+	unsigned long i;
+
+	for (i = 0; i < npages; ++i)
+		kvm_release_pfn_clean(pfn + i);
+}
+
+int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	gfn_t gfn, end_gfn;
+	pfn_t pfn;
+	int r = 0;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int flags;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	gfn     = slot->base_gfn;
+	end_gfn = gfn + slot->npages;
+
+	flags = IOMMU_READ;
+	if (!(slot->flags & KVM_MEM_READONLY))
+		flags |= IOMMU_WRITE;
+	if (!kvm->arch.iommu_noncoherent)
+		flags |= IOMMU_CACHE;
+
+
+	while (gfn < end_gfn) {
+		unsigned long page_size;
+
+		/* Check if already mapped */
+		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
+			gfn += 1;
+			continue;
+		}
+
+		/* Get the page size we could use to map */
+		page_size = kvm_host_page_size(kvm, gfn);
+
+		/* Make sure the page_size does not exceed the memslot */
+		while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
+			page_size >>= 1;
+
+		/* Make sure gfn is aligned to the page size we want to map */
+		while ((gfn << PAGE_SHIFT) & (page_size - 1))
+			page_size >>= 1;
+
+		/* Make sure hva is aligned to the page size we want to map */
+		while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
+			page_size >>= 1;
+
+		/*
+		 * Pin all pages we are about to map in memory. This is
+		 * important because we unmap and unpin in 4kb steps later.
+		 */
+		pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
+		if (is_error_noslot_pfn(pfn)) {
+			gfn += 1;
+			continue;
+		}
+
+		/* Map into IO address space */
+		r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
+			      page_size, flags);
+		if (r) {
+			printk(KERN_ERR "kvm_iommu_map_address:"
+			       "iommu failed to map pfn=%llx\n", pfn);
+			kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
+			goto unmap_pages;
+		}
+
+		gfn += page_size >> PAGE_SHIFT;
+
+
+	}
+
+	return 0;
+
+unmap_pages:
+	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
+	return r;
+}
+
+static int kvm_iommu_map_memslots(struct kvm *kvm)
+{
+	int idx, r = 0;
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+
+	if (kvm->arch.iommu_noncoherent)
+		kvm_arch_register_noncoherent_dma(kvm);
+
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+
+	kvm_for_each_memslot(memslot, slots) {
+		r = kvm_iommu_map_pages(kvm, memslot);
+		if (r)
+			break;
+	}
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	return r;
+}
+
+int kvm_assign_device(struct kvm *kvm,
+		      struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct pci_dev *pdev = NULL;
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	int r;
+	bool noncoherent;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	pdev = assigned_dev->dev;
+	if (pdev == NULL)
+		return -ENODEV;
+
+	r = iommu_attach_device(domain, &pdev->dev);
+	if (r) {
+		dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
+		return r;
+	}
+
+	noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
+
+	/* Check if need to update IOMMU page table for guest memory */
+	if (noncoherent != kvm->arch.iommu_noncoherent) {
+		kvm_iommu_unmap_memslots(kvm);
+		kvm->arch.iommu_noncoherent = noncoherent;
+		r = kvm_iommu_map_memslots(kvm);
+		if (r)
+			goto out_unmap;
+	}
+
+	pci_set_dev_assigned(pdev);
+
+	dev_info(&pdev->dev, "kvm assign device\n");
+
+	return 0;
+out_unmap:
+	kvm_iommu_unmap_memslots(kvm);
+	return r;
+}
+
+int kvm_deassign_device(struct kvm *kvm,
+			struct kvm_assigned_dev_kernel *assigned_dev)
+{
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
+	struct pci_dev *pdev = NULL;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	pdev = assigned_dev->dev;
+	if (pdev == NULL)
+		return -ENODEV;
+
+	iommu_detach_device(domain, &pdev->dev);
+
+	pci_clear_dev_assigned(pdev);
+
+	dev_info(&pdev->dev, "kvm deassign device\n");
+
+	return 0;
+}
+
+int kvm_iommu_map_guest(struct kvm *kvm)
+{
+	int r;
+
+	if (!iommu_present(&pci_bus_type)) {
+		printk(KERN_ERR "%s: iommu not found\n", __func__);
+		return -ENODEV;
+	}
+
+	mutex_lock(&kvm->slots_lock);
+
+	kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
+	if (!kvm->arch.iommu_domain) {
+		r = -ENOMEM;
+		goto out_unlock;
+	}
+
+	if (!allow_unsafe_assigned_interrupts &&
+	    !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
+		printk(KERN_WARNING "%s: No interrupt remapping support,"
+		       " disallowing device assignment."
+		       " Re-enble with \"allow_unsafe_assigned_interrupts=1\""
+		       " module option.\n", __func__);
+		iommu_domain_free(kvm->arch.iommu_domain);
+		kvm->arch.iommu_domain = NULL;
+		r = -EPERM;
+		goto out_unlock;
+	}
+
+	r = kvm_iommu_map_memslots(kvm);
+	if (r)
+		kvm_iommu_unmap_memslots(kvm);
+
+out_unlock:
+	mutex_unlock(&kvm->slots_lock);
+	return r;
+}
+
+static void kvm_iommu_put_pages(struct kvm *kvm,
+				gfn_t base_gfn, unsigned long npages)
+{
+	struct iommu_domain *domain;
+	gfn_t end_gfn, gfn;
+	pfn_t pfn;
+	u64 phys;
+
+	domain  = kvm->arch.iommu_domain;
+	end_gfn = base_gfn + npages;
+	gfn     = base_gfn;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return;
+
+	while (gfn < end_gfn) {
+		unsigned long unmap_pages;
+		size_t size;
+
+		/* Get physical address */
+		phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
+
+		if (!phys) {
+			gfn++;
+			continue;
+		}
+
+		pfn  = phys >> PAGE_SHIFT;
+
+		/* Unmap address from IO address space */
+		size       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+		unmap_pages = 1ULL << get_order(size);
+
+		/* Unpin all pages we just unmapped to not leak any memory */
+		kvm_unpin_pages(kvm, pfn, unmap_pages);
+
+		gfn += unmap_pages;
+	}
+}
+
+void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
+{
+	kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
+}
+
+static int kvm_iommu_unmap_memslots(struct kvm *kvm)
+{
+	int idx;
+	struct kvm_memslots *slots;
+	struct kvm_memory_slot *memslot;
+
+	idx = srcu_read_lock(&kvm->srcu);
+	slots = kvm_memslots(kvm);
+
+	kvm_for_each_memslot(memslot, slots)
+		kvm_iommu_unmap_pages(kvm, memslot);
+
+	srcu_read_unlock(&kvm->srcu, idx);
+
+	if (kvm->arch.iommu_noncoherent)
+		kvm_arch_unregister_noncoherent_dma(kvm);
+
+	return 0;
+}
+
+int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	struct iommu_domain *domain = kvm->arch.iommu_domain;
+
+	/* check if iommu exists and in use */
+	if (!domain)
+		return 0;
+
+	mutex_lock(&kvm->slots_lock);
+	kvm_iommu_unmap_memslots(kvm);
+	kvm->arch.iommu_domain = NULL;
+	kvm->arch.iommu_noncoherent = false;
+	mutex_unlock(&kvm->slots_lock);
+
+	iommu_domain_free(domain);
+	return 0;
+}
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 5337039427c8..782e4eaf4561 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -4007,7 +4007,7 @@ long kvm_arch_vm_ioctl(struct file *filp,
 	}
 
 	default:
-		;
+		r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ded64cb3a081..aa56894ce839 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -764,8 +764,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
 int kvm_assign_device(struct kvm *kvm,
 		      struct kvm_assigned_dev_kernel *assigned_dev);
 int kvm_deassign_device(struct kvm *kvm,
@@ -781,11 +779,6 @@ static inline void kvm_iommu_unmap_pages(struct kvm *kvm,
 					 struct kvm_memory_slot *slot)
 {
 }
-
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	return 0;
-}
 #endif
 
 static inline void kvm_guest_enter(void)
@@ -1005,25 +998,6 @@ static inline bool kvm_vcpu_compatible(struct kvm_vcpu *vcpu) { return true; }
 
 #endif
 
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-				  unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-
-#else
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-						unsigned long arg)
-{
-	return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-
-#endif
-
 static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu)
 {
 	set_bit(req, &vcpu->requests);
diff --git a/virt/kvm/assigned-dev.c b/virt/kvm/assigned-dev.c
deleted file mode 100644
index e05000e200d2..000000000000
--- a/virt/kvm/assigned-dev.c
+++ /dev/null
@@ -1,1026 +0,0 @@
-/*
- * Kernel-based Virtual Machine - device assignment support
- *
- * Copyright (C) 2010 Red Hat, Inc. and/or its affiliates.
- *
- * This work is licensed under the terms of the GNU GPL, version 2.  See
- * the COPYING file in the top-level directory.
- *
- */
-
-#include <linux/kvm_host.h>
-#include <linux/kvm.h>
-#include <linux/uaccess.h>
-#include <linux/vmalloc.h>
-#include <linux/errno.h>
-#include <linux/spinlock.h>
-#include <linux/pci.h>
-#include <linux/interrupt.h>
-#include <linux/slab.h>
-#include <linux/namei.h>
-#include <linux/fs.h>
-#include "irq.h"
-
-static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
-						      int assigned_dev_id)
-{
-	struct list_head *ptr;
-	struct kvm_assigned_dev_kernel *match;
-
-	list_for_each(ptr, head) {
-		match = list_entry(ptr, struct kvm_assigned_dev_kernel, list);
-		if (match->assigned_dev_id == assigned_dev_id)
-			return match;
-	}
-	return NULL;
-}
-
-static int find_index_from_host_irq(struct kvm_assigned_dev_kernel
-				    *assigned_dev, int irq)
-{
-	int i, index;
-	struct msix_entry *host_msix_entries;
-
-	host_msix_entries = assigned_dev->host_msix_entries;
-
-	index = -1;
-	for (i = 0; i < assigned_dev->entries_nr; i++)
-		if (irq == host_msix_entries[i].vector) {
-			index = i;
-			break;
-		}
-	if (index < 0)
-		printk(KERN_WARNING "Fail to find correlated MSI-X entry!\n");
-
-	return index;
-}
-
-static irqreturn_t kvm_assigned_dev_intx(int irq, void *dev_id)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-	int ret;
-
-	spin_lock(&assigned_dev->intx_lock);
-	if (pci_check_and_mask_intx(assigned_dev->dev)) {
-		assigned_dev->host_irq_disabled = true;
-		ret = IRQ_WAKE_THREAD;
-	} else
-		ret = IRQ_NONE;
-	spin_unlock(&assigned_dev->intx_lock);
-
-	return ret;
-}
-
-static void
-kvm_assigned_dev_raise_guest_irq(struct kvm_assigned_dev_kernel *assigned_dev,
-				 int vector)
-{
-	if (unlikely(assigned_dev->irq_requested_type &
-		     KVM_DEV_IRQ_GUEST_INTX)) {
-		spin_lock(&assigned_dev->intx_mask_lock);
-		if (!(assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX))
-			kvm_set_irq(assigned_dev->kvm,
-				    assigned_dev->irq_source_id, vector, 1,
-				    false);
-		spin_unlock(&assigned_dev->intx_mask_lock);
-	} else
-		kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-			    vector, 1, false);
-}
-
-static irqreturn_t kvm_assigned_dev_thread_intx(int irq, void *dev_id)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
-	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
-		spin_lock_irq(&assigned_dev->intx_lock);
-		disable_irq_nosync(irq);
-		assigned_dev->host_irq_disabled = true;
-		spin_unlock_irq(&assigned_dev->intx_lock);
-	}
-
-	kvm_assigned_dev_raise_guest_irq(assigned_dev,
-					 assigned_dev->guest_irq);
-
-	return IRQ_HANDLED;
-}
-
-#ifdef __KVM_HAVE_MSI
-static irqreturn_t kvm_assigned_dev_msi(int irq, void *dev_id)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-	int ret = kvm_set_irq_inatomic(assigned_dev->kvm,
-				       assigned_dev->irq_source_id,
-				       assigned_dev->guest_irq, 1);
-	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msi(int irq, void *dev_id)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-
-	kvm_assigned_dev_raise_guest_irq(assigned_dev,
-					 assigned_dev->guest_irq);
-
-	return IRQ_HANDLED;
-}
-#endif
-
-#ifdef __KVM_HAVE_MSIX
-static irqreturn_t kvm_assigned_dev_msix(int irq, void *dev_id)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-	int index = find_index_from_host_irq(assigned_dev, irq);
-	u32 vector;
-	int ret = 0;
-
-	if (index >= 0) {
-		vector = assigned_dev->guest_msix_entries[index].vector;
-		ret = kvm_set_irq_inatomic(assigned_dev->kvm,
-					   assigned_dev->irq_source_id,
-					   vector, 1);
-	}
-
-	return unlikely(ret == -EWOULDBLOCK) ? IRQ_WAKE_THREAD : IRQ_HANDLED;
-}
-
-static irqreturn_t kvm_assigned_dev_thread_msix(int irq, void *dev_id)
-{
-	struct kvm_assigned_dev_kernel *assigned_dev = dev_id;
-	int index = find_index_from_host_irq(assigned_dev, irq);
-	u32 vector;
-
-	if (index >= 0) {
-		vector = assigned_dev->guest_msix_entries[index].vector;
-		kvm_assigned_dev_raise_guest_irq(assigned_dev, vector);
-	}
-
-	return IRQ_HANDLED;
-}
-#endif
-
-/* Ack the irq line for an assigned device */
-static void kvm_assigned_dev_ack_irq(struct kvm_irq_ack_notifier *kian)
-{
-	struct kvm_assigned_dev_kernel *dev =
-		container_of(kian, struct kvm_assigned_dev_kernel,
-			     ack_notifier);
-
-	kvm_set_irq(dev->kvm, dev->irq_source_id, dev->guest_irq, 0, false);
-
-	spin_lock(&dev->intx_mask_lock);
-
-	if (!(dev->flags & KVM_DEV_ASSIGN_MASK_INTX)) {
-		bool reassert = false;
-
-		spin_lock_irq(&dev->intx_lock);
-		/*
-		 * The guest IRQ may be shared so this ack can come from an
-		 * IRQ for another guest device.
-		 */
-		if (dev->host_irq_disabled) {
-			if (!(dev->flags & KVM_DEV_ASSIGN_PCI_2_3))
-				enable_irq(dev->host_irq);
-			else if (!pci_check_and_unmask_intx(dev->dev))
-				reassert = true;
-			dev->host_irq_disabled = reassert;
-		}
-		spin_unlock_irq(&dev->intx_lock);
-
-		if (reassert)
-			kvm_set_irq(dev->kvm, dev->irq_source_id,
-				    dev->guest_irq, 1, false);
-	}
-
-	spin_unlock(&dev->intx_mask_lock);
-}
-
-static void deassign_guest_irq(struct kvm *kvm,
-			       struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	if (assigned_dev->ack_notifier.gsi != -1)
-		kvm_unregister_irq_ack_notifier(kvm,
-						&assigned_dev->ack_notifier);
-
-	kvm_set_irq(assigned_dev->kvm, assigned_dev->irq_source_id,
-		    assigned_dev->guest_irq, 0, false);
-
-	if (assigned_dev->irq_source_id != -1)
-		kvm_free_irq_source_id(kvm, assigned_dev->irq_source_id);
-	assigned_dev->irq_source_id = -1;
-	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_GUEST_MASK);
-}
-
-/* The function implicit hold kvm->lock mutex due to cancel_work_sync() */
-static void deassign_host_irq(struct kvm *kvm,
-			      struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	/*
-	 * We disable irq here to prevent further events.
-	 *
-	 * Notice this maybe result in nested disable if the interrupt type is
-	 * INTx, but it's OK for we are going to free it.
-	 *
-	 * If this function is a part of VM destroy, please ensure that till
-	 * now, the kvm state is still legal for probably we also have to wait
-	 * on a currently running IRQ handler.
-	 */
-	if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSIX) {
-		int i;
-		for (i = 0; i < assigned_dev->entries_nr; i++)
-			disable_irq(assigned_dev->host_msix_entries[i].vector);
-
-		for (i = 0; i < assigned_dev->entries_nr; i++)
-			free_irq(assigned_dev->host_msix_entries[i].vector,
-				 assigned_dev);
-
-		assigned_dev->entries_nr = 0;
-		kfree(assigned_dev->host_msix_entries);
-		kfree(assigned_dev->guest_msix_entries);
-		pci_disable_msix(assigned_dev->dev);
-	} else {
-		/* Deal with MSI and INTx */
-		if ((assigned_dev->irq_requested_type &
-		     KVM_DEV_IRQ_HOST_INTX) &&
-		    (assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
-			spin_lock_irq(&assigned_dev->intx_lock);
-			pci_intx(assigned_dev->dev, false);
-			spin_unlock_irq(&assigned_dev->intx_lock);
-			synchronize_irq(assigned_dev->host_irq);
-		} else
-			disable_irq(assigned_dev->host_irq);
-
-		free_irq(assigned_dev->host_irq, assigned_dev);
-
-		if (assigned_dev->irq_requested_type & KVM_DEV_IRQ_HOST_MSI)
-			pci_disable_msi(assigned_dev->dev);
-	}
-
-	assigned_dev->irq_requested_type &= ~(KVM_DEV_IRQ_HOST_MASK);
-}
-
-static int kvm_deassign_irq(struct kvm *kvm,
-			    struct kvm_assigned_dev_kernel *assigned_dev,
-			    unsigned long irq_requested_type)
-{
-	unsigned long guest_irq_type, host_irq_type;
-
-	if (!irqchip_in_kernel(kvm))
-		return -EINVAL;
-	/* no irq assignment to deassign */
-	if (!assigned_dev->irq_requested_type)
-		return -ENXIO;
-
-	host_irq_type = irq_requested_type & KVM_DEV_IRQ_HOST_MASK;
-	guest_irq_type = irq_requested_type & KVM_DEV_IRQ_GUEST_MASK;
-
-	if (host_irq_type)
-		deassign_host_irq(kvm, assigned_dev);
-	if (guest_irq_type)
-		deassign_guest_irq(kvm, assigned_dev);
-
-	return 0;
-}
-
-static void kvm_free_assigned_irq(struct kvm *kvm,
-				  struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	kvm_deassign_irq(kvm, assigned_dev, assigned_dev->irq_requested_type);
-}
-
-static void kvm_free_assigned_device(struct kvm *kvm,
-				     struct kvm_assigned_dev_kernel
-				     *assigned_dev)
-{
-	kvm_free_assigned_irq(kvm, assigned_dev);
-
-	pci_reset_function(assigned_dev->dev);
-	if (pci_load_and_free_saved_state(assigned_dev->dev,
-					  &assigned_dev->pci_saved_state))
-		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
-		       __func__, dev_name(&assigned_dev->dev->dev));
-	else
-		pci_restore_state(assigned_dev->dev);
-
-	pci_clear_dev_assigned(assigned_dev->dev);
-
-	pci_release_regions(assigned_dev->dev);
-	pci_disable_device(assigned_dev->dev);
-	pci_dev_put(assigned_dev->dev);
-
-	list_del(&assigned_dev->list);
-	kfree(assigned_dev);
-}
-
-void kvm_free_all_assigned_devices(struct kvm *kvm)
-{
-	struct list_head *ptr, *ptr2;
-	struct kvm_assigned_dev_kernel *assigned_dev;
-
-	list_for_each_safe(ptr, ptr2, &kvm->arch.assigned_dev_head) {
-		assigned_dev = list_entry(ptr,
-					  struct kvm_assigned_dev_kernel,
-					  list);
-
-		kvm_free_assigned_device(kvm, assigned_dev);
-	}
-}
-
-static int assigned_device_enable_host_intx(struct kvm *kvm,
-					    struct kvm_assigned_dev_kernel *dev)
-{
-	irq_handler_t irq_handler;
-	unsigned long flags;
-
-	dev->host_irq = dev->dev->irq;
-
-	/*
-	 * We can only share the IRQ line with other host devices if we are
-	 * able to disable the IRQ source at device-level - independently of
-	 * the guest driver. Otherwise host devices may suffer from unbounded
-	 * IRQ latencies when the guest keeps the line asserted.
-	 */
-	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
-		irq_handler = kvm_assigned_dev_intx;
-		flags = IRQF_SHARED;
-	} else {
-		irq_handler = NULL;
-		flags = IRQF_ONESHOT;
-	}
-	if (request_threaded_irq(dev->host_irq, irq_handler,
-				 kvm_assigned_dev_thread_intx, flags,
-				 dev->irq_name, dev))
-		return -EIO;
-
-	if (dev->flags & KVM_DEV_ASSIGN_PCI_2_3) {
-		spin_lock_irq(&dev->intx_lock);
-		pci_intx(dev->dev, true);
-		spin_unlock_irq(&dev->intx_lock);
-	}
-	return 0;
-}
-
-#ifdef __KVM_HAVE_MSI
-static int assigned_device_enable_host_msi(struct kvm *kvm,
-					   struct kvm_assigned_dev_kernel *dev)
-{
-	int r;
-
-	if (!dev->dev->msi_enabled) {
-		r = pci_enable_msi(dev->dev);
-		if (r)
-			return r;
-	}
-
-	dev->host_irq = dev->dev->irq;
-	if (request_threaded_irq(dev->host_irq, kvm_assigned_dev_msi,
-				 kvm_assigned_dev_thread_msi, 0,
-				 dev->irq_name, dev)) {
-		pci_disable_msi(dev->dev);
-		return -EIO;
-	}
-
-	return 0;
-}
-#endif
-
-#ifdef __KVM_HAVE_MSIX
-static int assigned_device_enable_host_msix(struct kvm *kvm,
-					    struct kvm_assigned_dev_kernel *dev)
-{
-	int i, r = -EINVAL;
-
-	/* host_msix_entries and guest_msix_entries should have been
-	 * initialized */
-	if (dev->entries_nr == 0)
-		return r;
-
-	r = pci_enable_msix_exact(dev->dev,
-				  dev->host_msix_entries, dev->entries_nr);
-	if (r)
-		return r;
-
-	for (i = 0; i < dev->entries_nr; i++) {
-		r = request_threaded_irq(dev->host_msix_entries[i].vector,
-					 kvm_assigned_dev_msix,
-					 kvm_assigned_dev_thread_msix,
-					 0, dev->irq_name, dev);
-		if (r)
-			goto err;
-	}
-
-	return 0;
-err:
-	for (i -= 1; i >= 0; i--)
-		free_irq(dev->host_msix_entries[i].vector, dev);
-	pci_disable_msix(dev->dev);
-	return r;
-}
-
-#endif
-
-static int assigned_device_enable_guest_intx(struct kvm *kvm,
-				struct kvm_assigned_dev_kernel *dev,
-				struct kvm_assigned_irq *irq)
-{
-	dev->guest_irq = irq->guest_irq;
-	dev->ack_notifier.gsi = irq->guest_irq;
-	return 0;
-}
-
-#ifdef __KVM_HAVE_MSI
-static int assigned_device_enable_guest_msi(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *dev,
-			struct kvm_assigned_irq *irq)
-{
-	dev->guest_irq = irq->guest_irq;
-	dev->ack_notifier.gsi = -1;
-	return 0;
-}
-#endif
-
-#ifdef __KVM_HAVE_MSIX
-static int assigned_device_enable_guest_msix(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *dev,
-			struct kvm_assigned_irq *irq)
-{
-	dev->guest_irq = irq->guest_irq;
-	dev->ack_notifier.gsi = -1;
-	return 0;
-}
-#endif
-
-static int assign_host_irq(struct kvm *kvm,
-			   struct kvm_assigned_dev_kernel *dev,
-			   __u32 host_irq_type)
-{
-	int r = -EEXIST;
-
-	if (dev->irq_requested_type & KVM_DEV_IRQ_HOST_MASK)
-		return r;
-
-	snprintf(dev->irq_name, sizeof(dev->irq_name), "kvm:%s",
-		 pci_name(dev->dev));
-
-	switch (host_irq_type) {
-	case KVM_DEV_IRQ_HOST_INTX:
-		r = assigned_device_enable_host_intx(kvm, dev);
-		break;
-#ifdef __KVM_HAVE_MSI
-	case KVM_DEV_IRQ_HOST_MSI:
-		r = assigned_device_enable_host_msi(kvm, dev);
-		break;
-#endif
-#ifdef __KVM_HAVE_MSIX
-	case KVM_DEV_IRQ_HOST_MSIX:
-		r = assigned_device_enable_host_msix(kvm, dev);
-		break;
-#endif
-	default:
-		r = -EINVAL;
-	}
-	dev->host_irq_disabled = false;
-
-	if (!r)
-		dev->irq_requested_type |= host_irq_type;
-
-	return r;
-}
-
-static int assign_guest_irq(struct kvm *kvm,
-			    struct kvm_assigned_dev_kernel *dev,
-			    struct kvm_assigned_irq *irq,
-			    unsigned long guest_irq_type)
-{
-	int id;
-	int r = -EEXIST;
-
-	if (dev->irq_requested_type & KVM_DEV_IRQ_GUEST_MASK)
-		return r;
-
-	id = kvm_request_irq_source_id(kvm);
-	if (id < 0)
-		return id;
-
-	dev->irq_source_id = id;
-
-	switch (guest_irq_type) {
-	case KVM_DEV_IRQ_GUEST_INTX:
-		r = assigned_device_enable_guest_intx(kvm, dev, irq);
-		break;
-#ifdef __KVM_HAVE_MSI
-	case KVM_DEV_IRQ_GUEST_MSI:
-		r = assigned_device_enable_guest_msi(kvm, dev, irq);
-		break;
-#endif
-#ifdef __KVM_HAVE_MSIX
-	case KVM_DEV_IRQ_GUEST_MSIX:
-		r = assigned_device_enable_guest_msix(kvm, dev, irq);
-		break;
-#endif
-	default:
-		r = -EINVAL;
-	}
-
-	if (!r) {
-		dev->irq_requested_type |= guest_irq_type;
-		if (dev->ack_notifier.gsi != -1)
-			kvm_register_irq_ack_notifier(kvm, &dev->ack_notifier);
-	} else {
-		kvm_free_irq_source_id(kvm, dev->irq_source_id);
-		dev->irq_source_id = -1;
-	}
-
-	return r;
-}
-
-/* TODO Deal with KVM_DEV_IRQ_ASSIGNED_MASK_MSIX */
-static int kvm_vm_ioctl_assign_irq(struct kvm *kvm,
-				   struct kvm_assigned_irq *assigned_irq)
-{
-	int r = -EINVAL;
-	struct kvm_assigned_dev_kernel *match;
-	unsigned long host_irq_type, guest_irq_type;
-
-	if (!irqchip_in_kernel(kvm))
-		return r;
-
-	mutex_lock(&kvm->lock);
-	r = -ENODEV;
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_irq->assigned_dev_id);
-	if (!match)
-		goto out;
-
-	host_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_HOST_MASK);
-	guest_irq_type = (assigned_irq->flags & KVM_DEV_IRQ_GUEST_MASK);
-
-	r = -EINVAL;
-	/* can only assign one type at a time */
-	if (hweight_long(host_irq_type) > 1)
-		goto out;
-	if (hweight_long(guest_irq_type) > 1)
-		goto out;
-	if (host_irq_type == 0 && guest_irq_type == 0)
-		goto out;
-
-	r = 0;
-	if (host_irq_type)
-		r = assign_host_irq(kvm, match, host_irq_type);
-	if (r)
-		goto out;
-
-	if (guest_irq_type)
-		r = assign_guest_irq(kvm, match, assigned_irq, guest_irq_type);
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-static int kvm_vm_ioctl_deassign_dev_irq(struct kvm *kvm,
-					 struct kvm_assigned_irq
-					 *assigned_irq)
-{
-	int r = -ENODEV;
-	struct kvm_assigned_dev_kernel *match;
-	unsigned long irq_type;
-
-	mutex_lock(&kvm->lock);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_irq->assigned_dev_id);
-	if (!match)
-		goto out;
-
-	irq_type = assigned_irq->flags & (KVM_DEV_IRQ_HOST_MASK |
-					  KVM_DEV_IRQ_GUEST_MASK);
-	r = kvm_deassign_irq(kvm, match, irq_type);
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-/*
- * We want to test whether the caller has been granted permissions to
- * use this device.  To be able to configure and control the device,
- * the user needs access to PCI configuration space and BAR resources.
- * These are accessed through PCI sysfs.  PCI config space is often
- * passed to the process calling this ioctl via file descriptor, so we
- * can't rely on access to that file.  We can check for permissions
- * on each of the BAR resource files, which is a pretty clear
- * indicator that the user has been granted access to the device.
- */
-static int probe_sysfs_permissions(struct pci_dev *dev)
-{
-#ifdef CONFIG_SYSFS
-	int i;
-	bool bar_found = false;
-
-	for (i = PCI_STD_RESOURCES; i <= PCI_STD_RESOURCE_END; i++) {
-		char *kpath, *syspath;
-		struct path path;
-		struct inode *inode;
-		int r;
-
-		if (!pci_resource_len(dev, i))
-			continue;
-
-		kpath = kobject_get_path(&dev->dev.kobj, GFP_KERNEL);
-		if (!kpath)
-			return -ENOMEM;
-
-		/* Per sysfs-rules, sysfs is always at /sys */
-		syspath = kasprintf(GFP_KERNEL, "/sys%s/resource%d", kpath, i);
-		kfree(kpath);
-		if (!syspath)
-			return -ENOMEM;
-
-		r = kern_path(syspath, LOOKUP_FOLLOW, &path);
-		kfree(syspath);
-		if (r)
-			return r;
-
-		inode = path.dentry->d_inode;
-
-		r = inode_permission(inode, MAY_READ | MAY_WRITE | MAY_ACCESS);
-		path_put(&path);
-		if (r)
-			return r;
-
-		bar_found = true;
-	}
-
-	/* If no resources, probably something special */
-	if (!bar_found)
-		return -EPERM;
-
-	return 0;
-#else
-	return -EINVAL; /* No way to control the device without sysfs */
-#endif
-}
-
-static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
-				      struct kvm_assigned_pci_dev *assigned_dev)
-{
-	int r = 0, idx;
-	struct kvm_assigned_dev_kernel *match;
-	struct pci_dev *dev;
-
-	if (!(assigned_dev->flags & KVM_DEV_ASSIGN_ENABLE_IOMMU))
-		return -EINVAL;
-
-	mutex_lock(&kvm->lock);
-	idx = srcu_read_lock(&kvm->srcu);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_dev->assigned_dev_id);
-	if (match) {
-		/* device already assigned */
-		r = -EEXIST;
-		goto out;
-	}
-
-	match = kzalloc(sizeof(struct kvm_assigned_dev_kernel), GFP_KERNEL);
-	if (match == NULL) {
-		printk(KERN_INFO "%s: Couldn't allocate memory\n",
-		       __func__);
-		r = -ENOMEM;
-		goto out;
-	}
-	dev = pci_get_domain_bus_and_slot(assigned_dev->segnr,
-				   assigned_dev->busnr,
-				   assigned_dev->devfn);
-	if (!dev) {
-		printk(KERN_INFO "%s: host device not found\n", __func__);
-		r = -EINVAL;
-		goto out_free;
-	}
-
-	/* Don't allow bridges to be assigned */
-	if (dev->hdr_type != PCI_HEADER_TYPE_NORMAL) {
-		r = -EPERM;
-		goto out_put;
-	}
-
-	r = probe_sysfs_permissions(dev);
-	if (r)
-		goto out_put;
-
-	if (pci_enable_device(dev)) {
-		printk(KERN_INFO "%s: Could not enable PCI device\n", __func__);
-		r = -EBUSY;
-		goto out_put;
-	}
-	r = pci_request_regions(dev, "kvm_assigned_device");
-	if (r) {
-		printk(KERN_INFO "%s: Could not get access to device regions\n",
-		       __func__);
-		goto out_disable;
-	}
-
-	pci_reset_function(dev);
-	pci_save_state(dev);
-	match->pci_saved_state = pci_store_saved_state(dev);
-	if (!match->pci_saved_state)
-		printk(KERN_DEBUG "%s: Couldn't store %s saved state\n",
-		       __func__, dev_name(&dev->dev));
-
-	if (!pci_intx_mask_supported(dev))
-		assigned_dev->flags &= ~KVM_DEV_ASSIGN_PCI_2_3;
-
-	match->assigned_dev_id = assigned_dev->assigned_dev_id;
-	match->host_segnr = assigned_dev->segnr;
-	match->host_busnr = assigned_dev->busnr;
-	match->host_devfn = assigned_dev->devfn;
-	match->flags = assigned_dev->flags;
-	match->dev = dev;
-	spin_lock_init(&match->intx_lock);
-	spin_lock_init(&match->intx_mask_lock);
-	match->irq_source_id = -1;
-	match->kvm = kvm;
-	match->ack_notifier.irq_acked = kvm_assigned_dev_ack_irq;
-
-	list_add(&match->list, &kvm->arch.assigned_dev_head);
-
-	if (!kvm->arch.iommu_domain) {
-		r = kvm_iommu_map_guest(kvm);
-		if (r)
-			goto out_list_del;
-	}
-	r = kvm_assign_device(kvm, match);
-	if (r)
-		goto out_list_del;
-
-out:
-	srcu_read_unlock(&kvm->srcu, idx);
-	mutex_unlock(&kvm->lock);
-	return r;
-out_list_del:
-	if (pci_load_and_free_saved_state(dev, &match->pci_saved_state))
-		printk(KERN_INFO "%s: Couldn't reload %s saved state\n",
-		       __func__, dev_name(&dev->dev));
-	list_del(&match->list);
-	pci_release_regions(dev);
-out_disable:
-	pci_disable_device(dev);
-out_put:
-	pci_dev_put(dev);
-out_free:
-	kfree(match);
-	srcu_read_unlock(&kvm->srcu, idx);
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
-		struct kvm_assigned_pci_dev *assigned_dev)
-{
-	int r = 0;
-	struct kvm_assigned_dev_kernel *match;
-
-	mutex_lock(&kvm->lock);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_dev->assigned_dev_id);
-	if (!match) {
-		printk(KERN_INFO "%s: device hasn't been assigned before, "
-		  "so cannot be deassigned\n", __func__);
-		r = -EINVAL;
-		goto out;
-	}
-
-	kvm_deassign_device(kvm, match);
-
-	kvm_free_assigned_device(kvm, match);
-
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-
-#ifdef __KVM_HAVE_MSIX
-static int kvm_vm_ioctl_set_msix_nr(struct kvm *kvm,
-				    struct kvm_assigned_msix_nr *entry_nr)
-{
-	int r = 0;
-	struct kvm_assigned_dev_kernel *adev;
-
-	mutex_lock(&kvm->lock);
-
-	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      entry_nr->assigned_dev_id);
-	if (!adev) {
-		r = -EINVAL;
-		goto msix_nr_out;
-	}
-
-	if (adev->entries_nr == 0) {
-		adev->entries_nr = entry_nr->entry_nr;
-		if (adev->entries_nr == 0 ||
-		    adev->entries_nr > KVM_MAX_MSIX_PER_DEV) {
-			r = -EINVAL;
-			goto msix_nr_out;
-		}
-
-		adev->host_msix_entries = kzalloc(sizeof(struct msix_entry) *
-						entry_nr->entry_nr,
-						GFP_KERNEL);
-		if (!adev->host_msix_entries) {
-			r = -ENOMEM;
-			goto msix_nr_out;
-		}
-		adev->guest_msix_entries =
-			kzalloc(sizeof(struct msix_entry) * entry_nr->entry_nr,
-				GFP_KERNEL);
-		if (!adev->guest_msix_entries) {
-			kfree(adev->host_msix_entries);
-			r = -ENOMEM;
-			goto msix_nr_out;
-		}
-	} else /* Not allowed set MSI-X number twice */
-		r = -EINVAL;
-msix_nr_out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-static int kvm_vm_ioctl_set_msix_entry(struct kvm *kvm,
-				       struct kvm_assigned_msix_entry *entry)
-{
-	int r = 0, i;
-	struct kvm_assigned_dev_kernel *adev;
-
-	mutex_lock(&kvm->lock);
-
-	adev = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      entry->assigned_dev_id);
-
-	if (!adev) {
-		r = -EINVAL;
-		goto msix_entry_out;
-	}
-
-	for (i = 0; i < adev->entries_nr; i++)
-		if (adev->guest_msix_entries[i].vector == 0 ||
-		    adev->guest_msix_entries[i].entry == entry->entry) {
-			adev->guest_msix_entries[i].entry = entry->entry;
-			adev->guest_msix_entries[i].vector = entry->gsi;
-			adev->host_msix_entries[i].entry = entry->entry;
-			break;
-		}
-	if (i == adev->entries_nr) {
-		r = -ENOSPC;
-		goto msix_entry_out;
-	}
-
-msix_entry_out:
-	mutex_unlock(&kvm->lock);
-
-	return r;
-}
-#endif
-
-static int kvm_vm_ioctl_set_pci_irq_mask(struct kvm *kvm,
-		struct kvm_assigned_pci_dev *assigned_dev)
-{
-	int r = 0;
-	struct kvm_assigned_dev_kernel *match;
-
-	mutex_lock(&kvm->lock);
-
-	match = kvm_find_assigned_dev(&kvm->arch.assigned_dev_head,
-				      assigned_dev->assigned_dev_id);
-	if (!match) {
-		r = -ENODEV;
-		goto out;
-	}
-
-	spin_lock(&match->intx_mask_lock);
-
-	match->flags &= ~KVM_DEV_ASSIGN_MASK_INTX;
-	match->flags |= assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX;
-
-	if (match->irq_requested_type & KVM_DEV_IRQ_GUEST_INTX) {
-		if (assigned_dev->flags & KVM_DEV_ASSIGN_MASK_INTX) {
-			kvm_set_irq(match->kvm, match->irq_source_id,
-				    match->guest_irq, 0, false);
-			/*
-			 * Masking at hardware-level is performed on demand,
-			 * i.e. when an IRQ actually arrives at the host.
-			 */
-		} else if (!(assigned_dev->flags & KVM_DEV_ASSIGN_PCI_2_3)) {
-			/*
-			 * Unmask the IRQ line if required. Unmasking at
-			 * device level will be performed by user space.
-			 */
-			spin_lock_irq(&match->intx_lock);
-			if (match->host_irq_disabled) {
-				enable_irq(match->host_irq);
-				match->host_irq_disabled = false;
-			}
-			spin_unlock_irq(&match->intx_lock);
-		}
-	}
-
-	spin_unlock(&match->intx_mask_lock);
-
-out:
-	mutex_unlock(&kvm->lock);
-	return r;
-}
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-				  unsigned long arg)
-{
-	void __user *argp = (void __user *)arg;
-	int r;
-
-	switch (ioctl) {
-	case KVM_ASSIGN_PCI_DEVICE: {
-		struct kvm_assigned_pci_dev assigned_dev;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-			goto out;
-		r = kvm_vm_ioctl_assign_device(kvm, &assigned_dev);
-		if (r)
-			goto out;
-		break;
-	}
-	case KVM_ASSIGN_IRQ: {
-		r = -EOPNOTSUPP;
-		break;
-	}
-	case KVM_ASSIGN_DEV_IRQ: {
-		struct kvm_assigned_irq assigned_irq;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
-			goto out;
-		r = kvm_vm_ioctl_assign_irq(kvm, &assigned_irq);
-		if (r)
-			goto out;
-		break;
-	}
-	case KVM_DEASSIGN_DEV_IRQ: {
-		struct kvm_assigned_irq assigned_irq;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_irq, argp, sizeof assigned_irq))
-			goto out;
-		r = kvm_vm_ioctl_deassign_dev_irq(kvm, &assigned_irq);
-		if (r)
-			goto out;
-		break;
-	}
-	case KVM_DEASSIGN_PCI_DEVICE: {
-		struct kvm_assigned_pci_dev assigned_dev;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-			goto out;
-		r = kvm_vm_ioctl_deassign_device(kvm, &assigned_dev);
-		if (r)
-			goto out;
-		break;
-	}
-#ifdef __KVM_HAVE_MSIX
-	case KVM_ASSIGN_SET_MSIX_NR: {
-		struct kvm_assigned_msix_nr entry_nr;
-		r = -EFAULT;
-		if (copy_from_user(&entry_nr, argp, sizeof entry_nr))
-			goto out;
-		r = kvm_vm_ioctl_set_msix_nr(kvm, &entry_nr);
-		if (r)
-			goto out;
-		break;
-	}
-	case KVM_ASSIGN_SET_MSIX_ENTRY: {
-		struct kvm_assigned_msix_entry entry;
-		r = -EFAULT;
-		if (copy_from_user(&entry, argp, sizeof entry))
-			goto out;
-		r = kvm_vm_ioctl_set_msix_entry(kvm, &entry);
-		if (r)
-			goto out;
-		break;
-	}
-#endif
-	case KVM_ASSIGN_SET_INTX_MASK: {
-		struct kvm_assigned_pci_dev assigned_dev;
-
-		r = -EFAULT;
-		if (copy_from_user(&assigned_dev, argp, sizeof assigned_dev))
-			goto out;
-		r = kvm_vm_ioctl_set_pci_irq_mask(kvm, &assigned_dev);
-		break;
-	}
-	default:
-		r = -ENOTTY;
-		break;
-	}
-out:
-	return r;
-}
diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
deleted file mode 100644
index c1e6ae989a43..000000000000
--- a/virt/kvm/iommu.c
+++ /dev/null
@@ -1,358 +0,0 @@
-/*
- * Copyright (c) 2006, Intel Corporation.
- *
- * This program is free software; you can redistribute it and/or modify it
- * under the terms and conditions of the GNU General Public License,
- * version 2, as published by the Free Software Foundation.
- *
- * This program is distributed in the hope it will be useful, but WITHOUT
- * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
- * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
- * more details.
- *
- * You should have received a copy of the GNU General Public License along with
- * this program; if not, write to the Free Software Foundation, Inc., 59 Temple
- * Place - Suite 330, Boston, MA 02111-1307 USA.
- *
- * Copyright (C) 2006-2008 Intel Corporation
- * Copyright IBM Corporation, 2008
- * Copyright 2010 Red Hat, Inc. and/or its affiliates.
- *
- * Author: Allen M. Kay <allen.m.kay@intel.com>
- * Author: Weidong Han <weidong.han@intel.com>
- * Author: Ben-Ami Yassour <benami@il.ibm.com>
- */
-
-#include <linux/list.h>
-#include <linux/kvm_host.h>
-#include <linux/module.h>
-#include <linux/pci.h>
-#include <linux/stat.h>
-#include <linux/dmar.h>
-#include <linux/iommu.h>
-#include <linux/intel-iommu.h>
-
-static bool allow_unsafe_assigned_interrupts;
-module_param_named(allow_unsafe_assigned_interrupts,
-		   allow_unsafe_assigned_interrupts, bool, S_IRUGO | S_IWUSR);
-MODULE_PARM_DESC(allow_unsafe_assigned_interrupts,
- "Enable device assignment on platforms without interrupt remapping support.");
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm);
-static void kvm_iommu_put_pages(struct kvm *kvm,
-				gfn_t base_gfn, unsigned long npages);
-
-static pfn_t kvm_pin_pages(struct kvm_memory_slot *slot, gfn_t gfn,
-			   unsigned long npages)
-{
-	gfn_t end_gfn;
-	pfn_t pfn;
-
-	pfn     = gfn_to_pfn_memslot(slot, gfn);
-	end_gfn = gfn + npages;
-	gfn    += 1;
-
-	if (is_error_noslot_pfn(pfn))
-		return pfn;
-
-	while (gfn < end_gfn)
-		gfn_to_pfn_memslot(slot, gfn++);
-
-	return pfn;
-}
-
-static void kvm_unpin_pages(struct kvm *kvm, pfn_t pfn, unsigned long npages)
-{
-	unsigned long i;
-
-	for (i = 0; i < npages; ++i)
-		kvm_release_pfn_clean(pfn + i);
-}
-
-int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-	gfn_t gfn, end_gfn;
-	pfn_t pfn;
-	int r = 0;
-	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	int flags;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
-
-	gfn     = slot->base_gfn;
-	end_gfn = gfn + slot->npages;
-
-	flags = IOMMU_READ;
-	if (!(slot->flags & KVM_MEM_READONLY))
-		flags |= IOMMU_WRITE;
-	if (!kvm->arch.iommu_noncoherent)
-		flags |= IOMMU_CACHE;
-
-
-	while (gfn < end_gfn) {
-		unsigned long page_size;
-
-		/* Check if already mapped */
-		if (iommu_iova_to_phys(domain, gfn_to_gpa(gfn))) {
-			gfn += 1;
-			continue;
-		}
-
-		/* Get the page size we could use to map */
-		page_size = kvm_host_page_size(kvm, gfn);
-
-		/* Make sure the page_size does not exceed the memslot */
-		while ((gfn + (page_size >> PAGE_SHIFT)) > end_gfn)
-			page_size >>= 1;
-
-		/* Make sure gfn is aligned to the page size we want to map */
-		while ((gfn << PAGE_SHIFT) & (page_size - 1))
-			page_size >>= 1;
-
-		/* Make sure hva is aligned to the page size we want to map */
-		while (__gfn_to_hva_memslot(slot, gfn) & (page_size - 1))
-			page_size >>= 1;
-
-		/*
-		 * Pin all pages we are about to map in memory. This is
-		 * important because we unmap and unpin in 4kb steps later.
-		 */
-		pfn = kvm_pin_pages(slot, gfn, page_size >> PAGE_SHIFT);
-		if (is_error_noslot_pfn(pfn)) {
-			gfn += 1;
-			continue;
-		}
-
-		/* Map into IO address space */
-		r = iommu_map(domain, gfn_to_gpa(gfn), pfn_to_hpa(pfn),
-			      page_size, flags);
-		if (r) {
-			printk(KERN_ERR "kvm_iommu_map_address:"
-			       "iommu failed to map pfn=%llx\n", pfn);
-			kvm_unpin_pages(kvm, pfn, page_size >> PAGE_SHIFT);
-			goto unmap_pages;
-		}
-
-		gfn += page_size >> PAGE_SHIFT;
-
-
-	}
-
-	return 0;
-
-unmap_pages:
-	kvm_iommu_put_pages(kvm, slot->base_gfn, gfn - slot->base_gfn);
-	return r;
-}
-
-static int kvm_iommu_map_memslots(struct kvm *kvm)
-{
-	int idx, r = 0;
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-
-	if (kvm->arch.iommu_noncoherent)
-		kvm_arch_register_noncoherent_dma(kvm);
-
-	idx = srcu_read_lock(&kvm->srcu);
-	slots = kvm_memslots(kvm);
-
-	kvm_for_each_memslot(memslot, slots) {
-		r = kvm_iommu_map_pages(kvm, memslot);
-		if (r)
-			break;
-	}
-	srcu_read_unlock(&kvm->srcu, idx);
-
-	return r;
-}
-
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	struct pci_dev *pdev = NULL;
-	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	int r;
-	bool noncoherent;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
-
-	pdev = assigned_dev->dev;
-	if (pdev == NULL)
-		return -ENODEV;
-
-	r = iommu_attach_device(domain, &pdev->dev);
-	if (r) {
-		dev_err(&pdev->dev, "kvm assign device failed ret %d", r);
-		return r;
-	}
-
-	noncoherent = !iommu_capable(&pci_bus_type, IOMMU_CAP_CACHE_COHERENCY);
-
-	/* Check if need to update IOMMU page table for guest memory */
-	if (noncoherent != kvm->arch.iommu_noncoherent) {
-		kvm_iommu_unmap_memslots(kvm);
-		kvm->arch.iommu_noncoherent = noncoherent;
-		r = kvm_iommu_map_memslots(kvm);
-		if (r)
-			goto out_unmap;
-	}
-
-	pci_set_dev_assigned(pdev);
-
-	dev_info(&pdev->dev, "kvm assign device\n");
-
-	return 0;
-out_unmap:
-	kvm_iommu_unmap_memslots(kvm);
-	return r;
-}
-
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev)
-{
-	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	struct pci_dev *pdev = NULL;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
-
-	pdev = assigned_dev->dev;
-	if (pdev == NULL)
-		return -ENODEV;
-
-	iommu_detach_device(domain, &pdev->dev);
-
-	pci_clear_dev_assigned(pdev);
-
-	dev_info(&pdev->dev, "kvm deassign device\n");
-
-	return 0;
-}
-
-int kvm_iommu_map_guest(struct kvm *kvm)
-{
-	int r;
-
-	if (!iommu_present(&pci_bus_type)) {
-		printk(KERN_ERR "%s: iommu not found\n", __func__);
-		return -ENODEV;
-	}
-
-	mutex_lock(&kvm->slots_lock);
-
-	kvm->arch.iommu_domain = iommu_domain_alloc(&pci_bus_type);
-	if (!kvm->arch.iommu_domain) {
-		r = -ENOMEM;
-		goto out_unlock;
-	}
-
-	if (!allow_unsafe_assigned_interrupts &&
-	    !iommu_capable(&pci_bus_type, IOMMU_CAP_INTR_REMAP)) {
-		printk(KERN_WARNING "%s: No interrupt remapping support,"
-		       " disallowing device assignment."
-		       " Re-enble with \"allow_unsafe_assigned_interrupts=1\""
-		       " module option.\n", __func__);
-		iommu_domain_free(kvm->arch.iommu_domain);
-		kvm->arch.iommu_domain = NULL;
-		r = -EPERM;
-		goto out_unlock;
-	}
-
-	r = kvm_iommu_map_memslots(kvm);
-	if (r)
-		kvm_iommu_unmap_memslots(kvm);
-
-out_unlock:
-	mutex_unlock(&kvm->slots_lock);
-	return r;
-}
-
-static void kvm_iommu_put_pages(struct kvm *kvm,
-				gfn_t base_gfn, unsigned long npages)
-{
-	struct iommu_domain *domain;
-	gfn_t end_gfn, gfn;
-	pfn_t pfn;
-	u64 phys;
-
-	domain  = kvm->arch.iommu_domain;
-	end_gfn = base_gfn + npages;
-	gfn     = base_gfn;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return;
-
-	while (gfn < end_gfn) {
-		unsigned long unmap_pages;
-		size_t size;
-
-		/* Get physical address */
-		phys = iommu_iova_to_phys(domain, gfn_to_gpa(gfn));
-
-		if (!phys) {
-			gfn++;
-			continue;
-		}
-
-		pfn  = phys >> PAGE_SHIFT;
-
-		/* Unmap address from IO address space */
-		size       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
-		unmap_pages = 1ULL << get_order(size);
-
-		/* Unpin all pages we just unmapped to not leak any memory */
-		kvm_unpin_pages(kvm, pfn, unmap_pages);
-
-		gfn += unmap_pages;
-	}
-}
-
-void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot)
-{
-	kvm_iommu_put_pages(kvm, slot->base_gfn, slot->npages);
-}
-
-static int kvm_iommu_unmap_memslots(struct kvm *kvm)
-{
-	int idx;
-	struct kvm_memslots *slots;
-	struct kvm_memory_slot *memslot;
-
-	idx = srcu_read_lock(&kvm->srcu);
-	slots = kvm_memslots(kvm);
-
-	kvm_for_each_memslot(memslot, slots)
-		kvm_iommu_unmap_pages(kvm, memslot);
-
-	srcu_read_unlock(&kvm->srcu, idx);
-
-	if (kvm->arch.iommu_noncoherent)
-		kvm_arch_unregister_noncoherent_dma(kvm);
-
-	return 0;
-}
-
-int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	struct iommu_domain *domain = kvm->arch.iommu_domain;
-
-	/* check if iommu exists and in use */
-	if (!domain)
-		return 0;
-
-	mutex_lock(&kvm->slots_lock);
-	kvm_iommu_unmap_memslots(kvm);
-	kvm->arch.iommu_domain = NULL;
-	kvm->arch.iommu_noncoherent = false;
-	mutex_unlock(&kvm->slots_lock);
-
-	iommu_domain_free(domain);
-	return 0;
-}
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 3be43424818b..5b4533079eaa 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -2582,8 +2582,6 @@ static long kvm_vm_ioctl(struct file *filp,
 		break;
 	default:
 		r = kvm_arch_vm_ioctl(filp, ioctl, arg);
-		if (r == -ENOTTY)
-			r = kvm_vm_ioctl_assigned_device(kvm, ioctl, arg);
 	}
 out:
 	return r;
-- 
cgit v1.2.3


From 4aab3b5b3ccf94fc907e66233e6ca4d8675759a6 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Sat, 22 Nov 2014 09:22:42 -0500
Subject: percpu-ref: fix DEAD flag contamination of percpu pointer

While decoupling ATOMIC and DEAD flags, f47ad4578461 ("percpu_ref:
decouple switching to percpu mode and reinit") updated
__ref_is_percpu() so that it only tests ATOMIC flag to determine
whether the ref is in percpu mode or not; however, while DEAD implies
ATOMIC, the two flags are set separately during percpu_ref_kill() and
if __ref_is_percpu() races percpu_ref_kill(), it may see DEAD w/o
ATOMIC.  Because __ref_is_percpu() returns @ref->percpu_count_ptr
value verbatim as the percpu pointer after testing ATOMIC, the pointer
may now be contaminated with the DEAD flag.

This can be fixed by clearing the flag bits before returning the
pointer which was the fix proposed by Shaohua; however, as DEAD
implies ATOMIC, we can just test for both flags at once and avoid the
explicit masking.

Update __ref_is_percpu() so that it tests that both ATOMIC and DEAD
are clear before returning @ref->percpu_count_ptr as the percpu
pointer.

Signed-off-by: Tejun Heo <tj@kernel.org>
Reported-and-Reviewed-by: Shaohua Li <shli@kernel.org>
Link: http://lkml.kernel.org/r/995deb699f5b873c45d667df4add3b06f73c2c25.1416638887.git.shli@kernel.org
Fixes: f47ad4578461 ("percpu_ref: decouple switching to percpu mode and reinit")
---
 include/linux/percpu-refcount.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index d5c89e0dd0e6..51ce60c35f4c 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -133,7 +133,13 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 	/* paired with smp_store_release() in percpu_ref_reinit() */
 	smp_read_barrier_depends();
 
-	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC))
+	/*
+	 * Theoretically, the following could test just ATOMIC; however,
+	 * then we'd have to mask off DEAD separately as DEAD may be
+	 * visible without ATOMIC if we race with percpu_ref_kill().  DEAD
+	 * implies ATOMIC anyway.  Test them together.
+	 */
+	if (unlikely(percpu_ptr & __PERCPU_REF_ATOMIC_DEAD))
 		return false;
 
 	*percpu_countp = (unsigned long __percpu *)percpu_ptr;
-- 
cgit v1.2.3


From 40e7fcb19293cbdff02c74cb0668413480f82ea1 Mon Sep 17 00:00:00 2001
From: Lan Tianyu <tianyu.lan@intel.com>
Date: Sun, 23 Nov 2014 21:22:54 +0800
Subject: ACPI: Add _DEP support to fix battery issue on Asus T100TA

ACPI 5.0 introduces _DEP (Operation Region Dependencies) to designate
device objects that OSPM should assign a higher priority in start
ordering due to future operation region accesses.

On Asus T100TA, ACPI battery info are read from a I2C slave device via
I2C operation region. Before I2C operation region handler is installed,
battery _STA always returns 0. There is a _DEP method of designating
start order under battery device node.

This patch is to implement _DEP feature to fix battery issue on the
Asus T100TA.  Introducing acpi_dep_list and adding dep_unmet count
in struct acpi_device. During ACPI namespace scan, create struct
acpi_dep_data for a valid pair of master (device pointed to by _DEP)/
slave(device with _DEP), record master's and slave's ACPI handle in
it and put it into acpi_dep_list. The dep_unmet count will increase
by one if there is a device under its _DEP. Driver's probe() should
return EPROBE_DEFER when find dep_unmet is larger than 0. When I2C
operation region handler is installed, remove all struct acpi_dep_data
on the acpi_dep_list whose master is pointed to I2C host controller
and decrease slave's dep_unmet. When dep_unmet decreases to 0, all
_DEP conditions are met and then do acpi_bus_attach() for the device
in order to resolve battery _STA issue on the Asus T100TA.

Link: https://bugzilla.kernel.org/show_bug.cgi?id=69011
Tested-by: Jan-Michael Brummer <jan.brummer@tabos.org>
Tested-by: Adam Williamson <adamw@happyassassin.net>
Tested-by: Michael Shigorin <shigorin@gmail.com>
Acked-by: Wolfram Sang <wsa@the-dreams.de>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Lan Tianyu <tianyu.lan@intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/battery.c  |  4 +++
 drivers/acpi/scan.c     | 85 +++++++++++++++++++++++++++++++++++++++++++++++++
 drivers/i2c/i2c-core.c  |  1 +
 include/acpi/acpi_bus.h |  1 +
 include/linux/acpi.h    |  1 +
 5 files changed, 92 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/acpi/battery.c b/drivers/acpi/battery.c
index 8ec8a89a20ab..d98ba4355819 100644
--- a/drivers/acpi/battery.c
+++ b/drivers/acpi/battery.c
@@ -1180,6 +1180,10 @@ static int acpi_battery_add(struct acpi_device *device)
 
 	if (!device)
 		return -EINVAL;
+
+	if (device->dep_unmet)
+		return -EPROBE_DEFER;
+
 	battery = kzalloc(sizeof(struct acpi_battery), GFP_KERNEL);
 	if (!battery)
 		return -ENOMEM;
diff --git a/drivers/acpi/scan.c b/drivers/acpi/scan.c
index 0476e90b2091..00189ad63c8f 100644
--- a/drivers/acpi/scan.c
+++ b/drivers/acpi/scan.c
@@ -36,6 +36,8 @@ bool acpi_force_hot_remove;
 
 static const char *dummy_hid = "device";
 
+static LIST_HEAD(acpi_dep_list);
+static DEFINE_MUTEX(acpi_dep_list_lock);
 static LIST_HEAD(acpi_bus_id_list);
 static DEFINE_MUTEX(acpi_scan_lock);
 static LIST_HEAD(acpi_scan_handlers_list);
@@ -43,6 +45,12 @@ DEFINE_MUTEX(acpi_device_lock);
 LIST_HEAD(acpi_wakeup_device_list);
 static DEFINE_MUTEX(acpi_hp_context_lock);
 
+struct acpi_dep_data {
+	struct list_head node;
+	acpi_handle master;
+	acpi_handle slave;
+};
+
 struct acpi_device_bus_id{
 	char bus_id[15];
 	unsigned int instance_no;
@@ -2086,6 +2094,59 @@ static void acpi_scan_init_hotplug(struct acpi_device *adev)
 	}
 }
 
+static void acpi_device_dep_initialize(struct acpi_device *adev)
+{
+	struct acpi_dep_data *dep;
+	struct acpi_handle_list dep_devices;
+	acpi_status status;
+	int i;
+
+	if (!acpi_has_method(adev->handle, "_DEP"))
+		return;
+
+	status = acpi_evaluate_reference(adev->handle, "_DEP", NULL,
+					&dep_devices);
+	if (ACPI_FAILURE(status)) {
+		dev_err(&adev->dev, "Failed to evaluate _DEP.\n");
+		return;
+	}
+
+	for (i = 0; i < dep_devices.count; i++) {
+		struct acpi_device_info *info;
+		int skip;
+
+		status = acpi_get_object_info(dep_devices.handles[i], &info);
+		if (ACPI_FAILURE(status)) {
+			dev_err(&adev->dev, "Error reading device info\n");
+			continue;
+		}
+
+		/*
+		 * Skip the dependency of Windows System Power
+		 * Management Controller
+		 */
+		skip = info->valid & ACPI_VALID_HID &&
+			!strcmp(info->hardware_id.string, "INT3396");
+
+		kfree(info);
+
+		if (skip)
+			continue;
+
+		dep = kzalloc(sizeof(struct acpi_dep_data), GFP_KERNEL);
+		if (!dep)
+			return;
+
+		dep->master = dep_devices.handles[i];
+		dep->slave  = adev->handle;
+		adev->dep_unmet++;
+
+		mutex_lock(&acpi_dep_list_lock);
+		list_add_tail(&dep->node , &acpi_dep_list);
+		mutex_unlock(&acpi_dep_list_lock);
+	}
+}
+
 static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
 				      void *not_used, void **return_value)
 {
@@ -2112,6 +2173,7 @@ static acpi_status acpi_bus_check_add(acpi_handle handle, u32 lvl_not_used,
 		return AE_CTRL_DEPTH;
 
 	acpi_scan_init_hotplug(device);
+	acpi_device_dep_initialize(device);
 
  out:
 	if (!*return_value)
@@ -2232,6 +2294,29 @@ static void acpi_bus_attach(struct acpi_device *device)
 		device->handler->hotplug.notify_online(device);
 }
 
+void acpi_walk_dep_device_list(acpi_handle handle)
+{
+	struct acpi_dep_data *dep, *tmp;
+	struct acpi_device *adev;
+
+	mutex_lock(&acpi_dep_list_lock);
+	list_for_each_entry_safe(dep, tmp, &acpi_dep_list, node) {
+		if (dep->master == handle) {
+			acpi_bus_get_device(dep->slave, &adev);
+			if (!adev)
+				continue;
+
+			adev->dep_unmet--;
+			if (!adev->dep_unmet)
+				acpi_bus_attach(adev);
+			list_del(&dep->node);
+			kfree(dep);
+		}
+	}
+	mutex_unlock(&acpi_dep_list_lock);
+}
+EXPORT_SYMBOL_GPL(acpi_walk_dep_device_list);
+
 /**
  * acpi_bus_scan - Add ACPI device node objects in a given namespace scope.
  * @handle: Root of the namespace scope to scan.
diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index f43b4e11647a..68aeb8eedae0 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -403,6 +403,7 @@ static int acpi_i2c_install_space_handler(struct i2c_adapter *adapter)
 		return -ENOMEM;
 	}
 
+	acpi_walk_dep_device_list(handle);
 	return 0;
 }
 
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index f34a0835aa4f..ea3697dc7046 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -359,6 +359,7 @@ struct acpi_device {
 	void *driver_data;
 	struct device dev;
 	unsigned int physical_node_count;
+	unsigned int dep_unmet;
 	struct list_head physical_node_list;
 	struct mutex physical_node_lock;
 	void (*remove)(struct acpi_device *);
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 407a12f663eb..6cb310388c88 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -431,6 +431,7 @@ static inline bool acpi_driver_match_device(struct device *dev,
 
 int acpi_device_uevent_modalias(struct device *, struct kobj_uevent_env *);
 int acpi_device_modalias(struct device *, char *, int);
+void acpi_walk_dep_device_list(acpi_handle handle);
 
 struct platform_device *acpi_create_platform_device(struct acpi_device *);
 #define ACPI_PTR(_ptr)	(_ptr)
-- 
cgit v1.2.3


From f144d1496b47e7450f41b767d0d91c724c2198bc Mon Sep 17 00:00:00 2001
From: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Date: Fri, 3 Oct 2014 15:13:24 +1000
Subject: PCI/MSI: Add device flag indicating that 64-bit MSIs don't work

This can be set by quirks/drivers to be used by the architecture code
that assigns the MSI addresses.

We additionally add verification in the core MSI code that the values
assigned by the architecture do satisfy the limitation in order to fail
gracefully if they don't (ie. the arch hasn't been updated to deal with
that quirk yet).

Signed-off-by: Benjamin Herrenschmidt <benh@kernel.crashing.org>
CC: <stable@vger.kernel.org>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
---
 drivers/pci/msi.c   | 26 ++++++++++++++++++++++++++
 include/linux/pci.h |  1 +
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c
index 9fab30af0e75..084587d7cd13 100644
--- a/drivers/pci/msi.c
+++ b/drivers/pci/msi.c
@@ -590,6 +590,20 @@ static struct msi_desc *msi_setup_entry(struct pci_dev *dev)
 	return entry;
 }
 
+static int msi_verify_entries(struct pci_dev *dev)
+{
+	struct msi_desc *entry;
+
+	list_for_each_entry(entry, &dev->msi_list, list) {
+		if (!dev->no_64bit_msi || !entry->msg.address_hi)
+			continue;
+		dev_err(&dev->dev, "Device has broken 64-bit MSI but arch"
+			" tried to assign one above 4G\n");
+		return -EIO;
+	}
+	return 0;
+}
+
 /**
  * msi_capability_init - configure device's MSI capability structure
  * @dev: pointer to the pci_dev data structure of MSI device function
@@ -627,6 +641,13 @@ static int msi_capability_init(struct pci_dev *dev, int nvec)
 		return ret;
 	}
 
+	ret = msi_verify_entries(dev);
+	if (ret) {
+		msi_mask_irq(entry, mask, ~mask);
+		free_msi_irqs(dev);
+		return ret;
+	}
+
 	ret = populate_msi_sysfs(dev);
 	if (ret) {
 		msi_mask_irq(entry, mask, ~mask);
@@ -739,6 +760,11 @@ static int msix_capability_init(struct pci_dev *dev,
 	if (ret)
 		goto out_avail;
 
+	/* Check if all MSI entries honor device restrictions */
+	ret = msi_verify_entries(dev);
+	if (ret)
+		goto out_free;
+
 	/*
 	 * Some devices require MSI-X to be enabled before we can touch the
 	 * MSI-X registers.  We need to mask all the vectors to prevent
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 5be8db45e368..4c8ac5fcc224 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -331,6 +331,7 @@ struct pci_dev {
 	unsigned int	is_added:1;
 	unsigned int	is_busmaster:1; /* device is busmaster */
 	unsigned int	no_msi:1;	/* device may not use msi */
+	unsigned int	no_64bit_msi:1; /* device may only use 32-bit MSIs */
 	unsigned int	block_cfg_access:1;	/* config space access is blocked */
 	unsigned int	broken_parity_status:1;	/* Device generates false positive parity */
 	unsigned int	irq_reroute_variant:2;	/* device needs IRQ rerouting variant */
-- 
cgit v1.2.3


From 4060bbe9931eca2ed3c2124022a070a75d507472 Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Mon, 20 Oct 2014 12:03:53 -0700
Subject: MIPS: Move gic.h to include/linux/irqchip/mips-gic.h

Now that the MIPS GIC irqchip lives in drivers/irqchip/, move
its header over to include/linux/irqchip/.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Qais Yousef <qais.yousef@imgtec.com>
Cc: John Crispin <blogic@openwrt.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/8129/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/include/asm/gic.h                  | 325 ---------------------------
 arch/mips/include/asm/mips-boards/maltaint.h |   2 +-
 arch/mips/include/asm/mips-boards/sead3int.h |   2 +-
 arch/mips/kernel/cevt-gic.c                  |   2 +-
 arch/mips/kernel/cevt-r4k.c                  |   2 +-
 arch/mips/kernel/csrc-gic.c                  |   3 +-
 arch/mips/kernel/smp-cmp.c                   |   2 +-
 arch/mips/kernel/smp-cps.c                   |   2 +-
 arch/mips/kernel/smp-gic.c                   |   2 +-
 arch/mips/kernel/smp-mt.c                    |   2 +-
 arch/mips/mti-malta/malta-int.c              |   2 +-
 arch/mips/mti-malta/malta-time.c             |   2 +-
 arch/mips/mti-sead3/sead3-ehci.c             |   2 +-
 arch/mips/mti-sead3/sead3-int.c              |   2 +-
 arch/mips/mti-sead3/sead3-net.c              |   2 +-
 arch/mips/mti-sead3/sead3-platform.c         |   2 +-
 arch/mips/mti-sead3/sead3-time.c             |   2 +-
 drivers/irqchip/irq-mips-gic.c               |   2 +-
 include/linux/irqchip/mips-gic.h             | 325 +++++++++++++++++++++++++++
 19 files changed, 342 insertions(+), 343 deletions(-)
 delete mode 100644 arch/mips/include/asm/gic.h
 create mode 100644 include/linux/irqchip/mips-gic.h

(limited to 'include/linux')

diff --git a/arch/mips/include/asm/gic.h b/arch/mips/include/asm/gic.h
deleted file mode 100644
index 285944ca9f6c..000000000000
--- a/arch/mips/include/asm/gic.h
+++ /dev/null
@@ -1,325 +0,0 @@
-/*
- * This file is subject to the terms and conditions of the GNU General Public
- * License.  See the file "COPYING" in the main directory of this archive
- * for more details.
- *
- * Copyright (C) 2000, 07 MIPS Technologies, Inc.
- *
- * GIC Register Definitions
- *
- */
-#ifndef _ASM_GICREGS_H
-#define _ASM_GICREGS_H
-
-#include <linux/bitmap.h>
-#include <linux/threads.h>
-
-#include <irq.h>
-
-#define GIC_MAX_INTRS			256
-
-/* Constants */
-#define GIC_POL_POS			1
-#define GIC_POL_NEG			0
-#define GIC_TRIG_EDGE			1
-#define GIC_TRIG_LEVEL			0
-#define GIC_TRIG_DUAL_ENABLE		1
-#define GIC_TRIG_DUAL_DISABLE		0
-
-#define MSK(n) ((1 << (n)) - 1)
-
-/* Accessors */
-#define GIC_REG(segment, offset) (segment##_##SECTION_OFS + offset##_##OFS)
-
-/* GIC Address Space */
-#define SHARED_SECTION_OFS		0x0000
-#define SHARED_SECTION_SIZE		0x8000
-#define VPE_LOCAL_SECTION_OFS		0x8000
-#define VPE_LOCAL_SECTION_SIZE		0x4000
-#define VPE_OTHER_SECTION_OFS		0xc000
-#define VPE_OTHER_SECTION_SIZE		0x4000
-#define USM_VISIBLE_SECTION_OFS		0x10000
-#define USM_VISIBLE_SECTION_SIZE	0x10000
-
-/* Register Map for Shared Section */
-
-#define GIC_SH_CONFIG_OFS		0x0000
-
-/* Shared Global Counter */
-#define GIC_SH_COUNTER_31_00_OFS	0x0010
-#define GIC_SH_COUNTER_63_32_OFS	0x0014
-#define GIC_SH_REVISIONID_OFS		0x0020
-
-/* Interrupt Polarity */
-#define GIC_SH_POL_31_0_OFS		0x0100
-#define GIC_SH_POL_63_32_OFS		0x0104
-#define GIC_SH_POL_95_64_OFS		0x0108
-#define GIC_SH_POL_127_96_OFS		0x010c
-#define GIC_SH_POL_159_128_OFS		0x0110
-#define GIC_SH_POL_191_160_OFS		0x0114
-#define GIC_SH_POL_223_192_OFS		0x0118
-#define GIC_SH_POL_255_224_OFS		0x011c
-
-/* Edge/Level Triggering */
-#define GIC_SH_TRIG_31_0_OFS		0x0180
-#define GIC_SH_TRIG_63_32_OFS		0x0184
-#define GIC_SH_TRIG_95_64_OFS		0x0188
-#define GIC_SH_TRIG_127_96_OFS		0x018c
-#define GIC_SH_TRIG_159_128_OFS		0x0190
-#define GIC_SH_TRIG_191_160_OFS		0x0194
-#define GIC_SH_TRIG_223_192_OFS		0x0198
-#define GIC_SH_TRIG_255_224_OFS		0x019c
-
-/* Dual Edge Triggering */
-#define GIC_SH_DUAL_31_0_OFS		0x0200
-#define GIC_SH_DUAL_63_32_OFS		0x0204
-#define GIC_SH_DUAL_95_64_OFS		0x0208
-#define GIC_SH_DUAL_127_96_OFS		0x020c
-#define GIC_SH_DUAL_159_128_OFS		0x0210
-#define GIC_SH_DUAL_191_160_OFS		0x0214
-#define GIC_SH_DUAL_223_192_OFS		0x0218
-#define GIC_SH_DUAL_255_224_OFS		0x021c
-
-/* Set/Clear corresponding bit in Edge Detect Register */
-#define GIC_SH_WEDGE_OFS		0x0280
-
-/* Reset Mask - Disables Interrupt */
-#define GIC_SH_RMASK_31_0_OFS		0x0300
-#define GIC_SH_RMASK_63_32_OFS		0x0304
-#define GIC_SH_RMASK_95_64_OFS		0x0308
-#define GIC_SH_RMASK_127_96_OFS		0x030c
-#define GIC_SH_RMASK_159_128_OFS	0x0310
-#define GIC_SH_RMASK_191_160_OFS	0x0314
-#define GIC_SH_RMASK_223_192_OFS	0x0318
-#define GIC_SH_RMASK_255_224_OFS	0x031c
-
-/* Set Mask (WO) - Enables Interrupt */
-#define GIC_SH_SMASK_31_0_OFS		0x0380
-#define GIC_SH_SMASK_63_32_OFS		0x0384
-#define GIC_SH_SMASK_95_64_OFS		0x0388
-#define GIC_SH_SMASK_127_96_OFS		0x038c
-#define GIC_SH_SMASK_159_128_OFS	0x0390
-#define GIC_SH_SMASK_191_160_OFS	0x0394
-#define GIC_SH_SMASK_223_192_OFS	0x0398
-#define GIC_SH_SMASK_255_224_OFS	0x039c
-
-/* Global Interrupt Mask Register (RO) - Bit Set == Interrupt enabled */
-#define GIC_SH_MASK_31_0_OFS		0x0400
-#define GIC_SH_MASK_63_32_OFS		0x0404
-#define GIC_SH_MASK_95_64_OFS		0x0408
-#define GIC_SH_MASK_127_96_OFS		0x040c
-#define GIC_SH_MASK_159_128_OFS		0x0410
-#define GIC_SH_MASK_191_160_OFS		0x0414
-#define GIC_SH_MASK_223_192_OFS		0x0418
-#define GIC_SH_MASK_255_224_OFS		0x041c
-
-/* Pending Global Interrupts (RO) */
-#define GIC_SH_PEND_31_0_OFS		0x0480
-#define GIC_SH_PEND_63_32_OFS		0x0484
-#define GIC_SH_PEND_95_64_OFS		0x0488
-#define GIC_SH_PEND_127_96_OFS		0x048c
-#define GIC_SH_PEND_159_128_OFS		0x0490
-#define GIC_SH_PEND_191_160_OFS		0x0494
-#define GIC_SH_PEND_223_192_OFS		0x0498
-#define GIC_SH_PEND_255_224_OFS		0x049c
-
-#define GIC_SH_INTR_MAP_TO_PIN_BASE_OFS 0x0500
-
-/* Maps Interrupt X to a Pin */
-#define GIC_SH_MAP_TO_PIN(intr)		(4 * (intr))
-
-#define GIC_SH_INTR_MAP_TO_VPE_BASE_OFS 0x2000
-
-/* Maps Interrupt X to a VPE */
-#define GIC_SH_MAP_TO_VPE_REG_OFF(intr, vpe) \
-	((32 * (intr)) + (((vpe) / 32) * 4))
-#define GIC_SH_MAP_TO_VPE_REG_BIT(vpe)	(1 << ((vpe) % 32))
-
-/* Convert an interrupt number to a byte offset/bit for multi-word registers */
-#define GIC_INTR_OFS(intr) (((intr) / 32)*4)
-#define GIC_INTR_BIT(intr) ((intr) % 32)
-
-/* Polarity : Reset Value is always 0 */
-#define GIC_SH_SET_POLARITY_OFS		0x0100
-
-/* Triggering : Reset Value is always 0 */
-#define GIC_SH_SET_TRIGGER_OFS		0x0180
-
-/* Dual edge triggering : Reset Value is always 0 */
-#define GIC_SH_SET_DUAL_OFS		0x0200
-
-/* Mask manipulation */
-#define GIC_SH_SMASK_OFS		0x0380
-#define GIC_SH_RMASK_OFS		0x0300
-
-/* Register Map for Local Section */
-#define GIC_VPE_CTL_OFS			0x0000
-#define GIC_VPE_PEND_OFS		0x0004
-#define GIC_VPE_MASK_OFS		0x0008
-#define GIC_VPE_RMASK_OFS		0x000c
-#define GIC_VPE_SMASK_OFS		0x0010
-#define GIC_VPE_WD_MAP_OFS		0x0040
-#define GIC_VPE_COMPARE_MAP_OFS		0x0044
-#define GIC_VPE_TIMER_MAP_OFS		0x0048
-#define GIC_VPE_FDC_MAP_OFS		0x004c
-#define GIC_VPE_PERFCTR_MAP_OFS		0x0050
-#define GIC_VPE_SWINT0_MAP_OFS		0x0054
-#define GIC_VPE_SWINT1_MAP_OFS		0x0058
-#define GIC_VPE_OTHER_ADDR_OFS		0x0080
-#define GIC_VPE_WD_CONFIG0_OFS		0x0090
-#define GIC_VPE_WD_COUNT0_OFS		0x0094
-#define GIC_VPE_WD_INITIAL0_OFS		0x0098
-#define GIC_VPE_COMPARE_LO_OFS		0x00a0
-#define GIC_VPE_COMPARE_HI_OFS		0x00a4
-
-#define GIC_VPE_EIC_SHADOW_SET_BASE_OFS	0x0100
-#define GIC_VPE_EIC_SS(intr)		(4 * (intr))
-
-#define GIC_VPE_EIC_VEC_BASE_OFS	0x0800
-#define GIC_VPE_EIC_VEC(intr)		(4 * (intr))
-
-#define GIC_VPE_TENABLE_NMI_OFS		0x1000
-#define GIC_VPE_TENABLE_YQ_OFS		0x1004
-#define GIC_VPE_TENABLE_INT_31_0_OFS	0x1080
-#define GIC_VPE_TENABLE_INT_63_32_OFS	0x1084
-
-/* User Mode Visible Section Register Map */
-#define GIC_UMV_SH_COUNTER_31_00_OFS	0x0000
-#define GIC_UMV_SH_COUNTER_63_32_OFS	0x0004
-
-/* Masks */
-#define GIC_SH_CONFIG_COUNTSTOP_SHF	28
-#define GIC_SH_CONFIG_COUNTSTOP_MSK	(MSK(1) << GIC_SH_CONFIG_COUNTSTOP_SHF)
-
-#define GIC_SH_CONFIG_COUNTBITS_SHF	24
-#define GIC_SH_CONFIG_COUNTBITS_MSK	(MSK(4) << GIC_SH_CONFIG_COUNTBITS_SHF)
-
-#define GIC_SH_CONFIG_NUMINTRS_SHF	16
-#define GIC_SH_CONFIG_NUMINTRS_MSK	(MSK(8) << GIC_SH_CONFIG_NUMINTRS_SHF)
-
-#define GIC_SH_CONFIG_NUMVPES_SHF	0
-#define GIC_SH_CONFIG_NUMVPES_MSK	(MSK(8) << GIC_SH_CONFIG_NUMVPES_SHF)
-
-#define GIC_SH_WEDGE_SET(intr)		(intr | (0x1 << 31))
-#define GIC_SH_WEDGE_CLR(intr)		(intr & ~(0x1 << 31))
-
-#define GIC_MAP_TO_PIN_SHF		31
-#define GIC_MAP_TO_PIN_MSK		(MSK(1) << GIC_MAP_TO_PIN_SHF)
-#define GIC_MAP_TO_NMI_SHF		30
-#define GIC_MAP_TO_NMI_MSK		(MSK(1) << GIC_MAP_TO_NMI_SHF)
-#define GIC_MAP_TO_YQ_SHF		29
-#define GIC_MAP_TO_YQ_MSK		(MSK(1) << GIC_MAP_TO_YQ_SHF)
-#define GIC_MAP_SHF			0
-#define GIC_MAP_MSK			(MSK(6) << GIC_MAP_SHF)
-
-/* GIC_VPE_CTL Masks */
-#define GIC_VPE_CTL_FDC_RTBL_SHF	4
-#define GIC_VPE_CTL_FDC_RTBL_MSK	(MSK(1) << GIC_VPE_CTL_FDC_RTBL_SHF)
-#define GIC_VPE_CTL_SWINT_RTBL_SHF	3
-#define GIC_VPE_CTL_SWINT_RTBL_MSK	(MSK(1) << GIC_VPE_CTL_SWINT_RTBL_SHF)
-#define GIC_VPE_CTL_PERFCNT_RTBL_SHF	2
-#define GIC_VPE_CTL_PERFCNT_RTBL_MSK	(MSK(1) << GIC_VPE_CTL_PERFCNT_RTBL_SHF)
-#define GIC_VPE_CTL_TIMER_RTBL_SHF	1
-#define GIC_VPE_CTL_TIMER_RTBL_MSK	(MSK(1) << GIC_VPE_CTL_TIMER_RTBL_SHF)
-#define GIC_VPE_CTL_EIC_MODE_SHF	0
-#define GIC_VPE_CTL_EIC_MODE_MSK	(MSK(1) << GIC_VPE_CTL_EIC_MODE_SHF)
-
-/* GIC_VPE_PEND Masks */
-#define GIC_VPE_PEND_WD_SHF		0
-#define GIC_VPE_PEND_WD_MSK		(MSK(1) << GIC_VPE_PEND_WD_SHF)
-#define GIC_VPE_PEND_CMP_SHF		1
-#define GIC_VPE_PEND_CMP_MSK		(MSK(1) << GIC_VPE_PEND_CMP_SHF)
-#define GIC_VPE_PEND_TIMER_SHF		2
-#define GIC_VPE_PEND_TIMER_MSK		(MSK(1) << GIC_VPE_PEND_TIMER_SHF)
-#define GIC_VPE_PEND_PERFCOUNT_SHF	3
-#define GIC_VPE_PEND_PERFCOUNT_MSK	(MSK(1) << GIC_VPE_PEND_PERFCOUNT_SHF)
-#define GIC_VPE_PEND_SWINT0_SHF		4
-#define GIC_VPE_PEND_SWINT0_MSK		(MSK(1) << GIC_VPE_PEND_SWINT0_SHF)
-#define GIC_VPE_PEND_SWINT1_SHF		5
-#define GIC_VPE_PEND_SWINT1_MSK		(MSK(1) << GIC_VPE_PEND_SWINT1_SHF)
-
-/* GIC_VPE_RMASK Masks */
-#define GIC_VPE_RMASK_WD_SHF		0
-#define GIC_VPE_RMASK_WD_MSK		(MSK(1) << GIC_VPE_RMASK_WD_SHF)
-#define GIC_VPE_RMASK_CMP_SHF		1
-#define GIC_VPE_RMASK_CMP_MSK		(MSK(1) << GIC_VPE_RMASK_CMP_SHF)
-#define GIC_VPE_RMASK_TIMER_SHF		2
-#define GIC_VPE_RMASK_TIMER_MSK		(MSK(1) << GIC_VPE_RMASK_TIMER_SHF)
-#define GIC_VPE_RMASK_PERFCNT_SHF	3
-#define GIC_VPE_RMASK_PERFCNT_MSK	(MSK(1) << GIC_VPE_RMASK_PERFCNT_SHF)
-#define GIC_VPE_RMASK_SWINT0_SHF	4
-#define GIC_VPE_RMASK_SWINT0_MSK	(MSK(1) << GIC_VPE_RMASK_SWINT0_SHF)
-#define GIC_VPE_RMASK_SWINT1_SHF	5
-#define GIC_VPE_RMASK_SWINT1_MSK	(MSK(1) << GIC_VPE_RMASK_SWINT1_SHF)
-
-/* GIC_VPE_SMASK Masks */
-#define GIC_VPE_SMASK_WD_SHF		0
-#define GIC_VPE_SMASK_WD_MSK		(MSK(1) << GIC_VPE_SMASK_WD_SHF)
-#define GIC_VPE_SMASK_CMP_SHF		1
-#define GIC_VPE_SMASK_CMP_MSK		(MSK(1) << GIC_VPE_SMASK_CMP_SHF)
-#define GIC_VPE_SMASK_TIMER_SHF		2
-#define GIC_VPE_SMASK_TIMER_MSK		(MSK(1) << GIC_VPE_SMASK_TIMER_SHF)
-#define GIC_VPE_SMASK_PERFCNT_SHF	3
-#define GIC_VPE_SMASK_PERFCNT_MSK	(MSK(1) << GIC_VPE_SMASK_PERFCNT_SHF)
-#define GIC_VPE_SMASK_SWINT0_SHF	4
-#define GIC_VPE_SMASK_SWINT0_MSK	(MSK(1) << GIC_VPE_SMASK_SWINT0_SHF)
-#define GIC_VPE_SMASK_SWINT1_SHF	5
-#define GIC_VPE_SMASK_SWINT1_MSK	(MSK(1) << GIC_VPE_SMASK_SWINT1_SHF)
-
-/* GIC nomenclature for Core Interrupt Pins. */
-#define GIC_CPU_INT0		0 /* Core Interrupt 2 */
-#define GIC_CPU_INT1		1 /* .		      */
-#define GIC_CPU_INT2		2 /* .		      */
-#define GIC_CPU_INT3		3 /* .		      */
-#define GIC_CPU_INT4		4 /* .		      */
-#define GIC_CPU_INT5		5 /* Core Interrupt 7 */
-
-/* Add 2 to convert GIC CPU pin to core interrupt */
-#define GIC_CPU_PIN_OFFSET	2
-
-/* Add 2 to convert non-EIC hardware interrupt to EIC vector number. */
-#define GIC_CPU_TO_VEC_OFFSET	(2)
-
-/* Mapped interrupt to pin X, then GIC will generate the vector (X+1). */
-#define GIC_PIN_TO_VEC_OFFSET	(1)
-
-/* Local GIC interrupts. */
-#define GIC_LOCAL_INT_WD	0 /* GIC watchdog */
-#define GIC_LOCAL_INT_COMPARE	1 /* GIC count and compare timer */
-#define GIC_LOCAL_INT_TIMER	2 /* CPU timer interrupt */
-#define GIC_LOCAL_INT_PERFCTR	3 /* CPU performance counter */
-#define GIC_LOCAL_INT_SWINT0	4 /* CPU software interrupt 0 */
-#define GIC_LOCAL_INT_SWINT1	5 /* CPU software interrupt 1 */
-#define GIC_LOCAL_INT_FDC	6 /* CPU fast debug channel */
-#define GIC_NUM_LOCAL_INTRS	7
-
-/* Convert between local/shared IRQ number and GIC HW IRQ number. */
-#define GIC_LOCAL_HWIRQ_BASE	0
-#define GIC_LOCAL_TO_HWIRQ(x)	(GIC_LOCAL_HWIRQ_BASE + (x))
-#define GIC_HWIRQ_TO_LOCAL(x)	((x) - GIC_LOCAL_HWIRQ_BASE)
-#define GIC_SHARED_HWIRQ_BASE	GIC_NUM_LOCAL_INTRS
-#define GIC_SHARED_TO_HWIRQ(x)	(GIC_SHARED_HWIRQ_BASE + (x))
-#define GIC_HWIRQ_TO_SHARED(x)	((x) - GIC_SHARED_HWIRQ_BASE)
-
-#include <linux/clocksource.h>
-#include <linux/irq.h>
-
-extern unsigned int gic_present;
-extern unsigned int gic_frequency;
-
-extern void gic_init(unsigned long gic_base_addr,
-	unsigned long gic_addrspace_size, unsigned int cpu_vec,
-	unsigned int irqbase);
-extern void gic_clocksource_init(unsigned int);
-extern cycle_t gic_read_count(void);
-extern unsigned int gic_get_count_width(void);
-extern cycle_t gic_read_compare(void);
-extern void gic_write_compare(cycle_t cnt);
-extern void gic_write_cpu_compare(cycle_t cnt, int cpu);
-extern void gic_send_ipi(unsigned int intr);
-extern unsigned int plat_ipi_call_int_xlate(unsigned int);
-extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
-extern unsigned int gic_get_timer_pending(void);
-extern int gic_get_c0_compare_int(void);
-extern int gic_get_c0_perfcount_int(void);
-#endif /* _ASM_GICREGS_H */
diff --git a/arch/mips/include/asm/mips-boards/maltaint.h b/arch/mips/include/asm/mips-boards/maltaint.h
index 38b06a027437..987ff580466b 100644
--- a/arch/mips/include/asm/mips-boards/maltaint.h
+++ b/arch/mips/include/asm/mips-boards/maltaint.h
@@ -10,7 +10,7 @@
 #ifndef _MIPS_MALTAINT_H
 #define _MIPS_MALTAINT_H
 
-#include <asm/gic.h>
+#include <linux/irqchip/mips-gic.h>
 
 /*
  * Interrupts 0..15 are used for Malta ISA compatible interrupts
diff --git a/arch/mips/include/asm/mips-boards/sead3int.h b/arch/mips/include/asm/mips-boards/sead3int.h
index 59d6c32c7595..8932c7de0419 100644
--- a/arch/mips/include/asm/mips-boards/sead3int.h
+++ b/arch/mips/include/asm/mips-boards/sead3int.h
@@ -10,7 +10,7 @@
 #ifndef _MIPS_SEAD3INT_H
 #define _MIPS_SEAD3INT_H
 
-#include <asm/gic.h>
+#include <linux/irqchip/mips-gic.h>
 
 /* SEAD-3 GIC address space definitions. */
 #define GIC_BASE_ADDR		0x1b1c0000
diff --git a/arch/mips/kernel/cevt-gic.c b/arch/mips/kernel/cevt-gic.c
index 4f9262ab04f9..9caa68a2bcdc 100644
--- a/arch/mips/kernel/cevt-gic.c
+++ b/arch/mips/kernel/cevt-gic.c
@@ -10,9 +10,9 @@
 #include <linux/percpu.h>
 #include <linux/smp.h>
 #include <linux/irq.h>
+#include <linux/irqchip/mips-gic.h>
 
 #include <asm/time.h>
-#include <asm/gic.h>
 #include <asm/mips-boards/maltaint.h>
 
 DEFINE_PER_CPU(struct clock_event_device, gic_clockevent_device);
diff --git a/arch/mips/kernel/cevt-r4k.c b/arch/mips/kernel/cevt-r4k.c
index fd0ef8d851cc..6acaad0480af 100644
--- a/arch/mips/kernel/cevt-r4k.c
+++ b/arch/mips/kernel/cevt-r4k.c
@@ -11,10 +11,10 @@
 #include <linux/percpu.h>
 #include <linux/smp.h>
 #include <linux/irq.h>
+#include <linux/irqchip/mips-gic.h>
 
 #include <asm/time.h>
 #include <asm/cevt-r4k.h>
-#include <asm/gic.h>
 
 static int mips_next_event(unsigned long delta,
 			   struct clock_event_device *evt)
diff --git a/arch/mips/kernel/csrc-gic.c b/arch/mips/kernel/csrc-gic.c
index ab615c6c51a5..0bf28e6aca7a 100644
--- a/arch/mips/kernel/csrc-gic.c
+++ b/arch/mips/kernel/csrc-gic.c
@@ -6,10 +6,9 @@
  * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
  */
 #include <linux/init.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/time.h>
 
-#include <asm/gic.h>
-
 static cycle_t gic_hpt_read(struct clocksource *cs)
 {
 	return gic_read_count();
diff --git a/arch/mips/kernel/smp-cmp.c b/arch/mips/kernel/smp-cmp.c
index fc8a51553426..1e0a93c5a3e7 100644
--- a/arch/mips/kernel/smp-cmp.c
+++ b/arch/mips/kernel/smp-cmp.c
@@ -24,6 +24,7 @@
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
 #include <linux/compiler.h>
+#include <linux/irqchip/mips-gic.h>
 
 #include <linux/atomic.h>
 #include <asm/cacheflush.h>
@@ -37,7 +38,6 @@
 #include <asm/mipsmtregs.h>
 #include <asm/mips_mt.h>
 #include <asm/amon.h>
-#include <asm/gic.h>
 
 static void cmp_init_secondary(void)
 {
diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c
index cd20acad7f17..bed7590e475f 100644
--- a/arch/mips/kernel/smp-cps.c
+++ b/arch/mips/kernel/smp-cps.c
@@ -9,13 +9,13 @@
  */
 
 #include <linux/io.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/sched.h>
 #include <linux/slab.h>
 #include <linux/smp.h>
 #include <linux/types.h>
 
 #include <asm/bcache.h>
-#include <asm/gic.h>
 #include <asm/mips-cm.h>
 #include <asm/mips-cpc.h>
 #include <asm/mips_mt.h>
diff --git a/arch/mips/kernel/smp-gic.c b/arch/mips/kernel/smp-gic.c
index 3b21a96d1ccb..5f0ab5bcd01e 100644
--- a/arch/mips/kernel/smp-gic.c
+++ b/arch/mips/kernel/smp-gic.c
@@ -12,9 +12,9 @@
  * option) any later version.
  */
 
+#include <linux/irqchip/mips-gic.h>
 #include <linux/printk.h>
 
-#include <asm/gic.h>
 #include <asm/mips-cpc.h>
 #include <asm/smp-ops.h>
 
diff --git a/arch/mips/kernel/smp-mt.c b/arch/mips/kernel/smp-mt.c
index d60475fe5957..ad86951b73bd 100644
--- a/arch/mips/kernel/smp-mt.c
+++ b/arch/mips/kernel/smp-mt.c
@@ -21,6 +21,7 @@
 #include <linux/sched.h>
 #include <linux/cpumask.h>
 #include <linux/interrupt.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/compiler.h>
 #include <linux/smp.h>
 
@@ -34,7 +35,6 @@
 #include <asm/mipsregs.h>
 #include <asm/mipsmtregs.h>
 #include <asm/mips_mt.h>
-#include <asm/gic.h>
 
 static void __init smvp_copy_vpe_config(void)
 {
diff --git a/arch/mips/mti-malta/malta-int.c b/arch/mips/mti-malta/malta-int.c
index a058e0ba2a26..d1392f8f5811 100644
--- a/arch/mips/mti-malta/malta-int.c
+++ b/arch/mips/mti-malta/malta-int.c
@@ -18,6 +18,7 @@
 #include <linux/smp.h>
 #include <linux/interrupt.h>
 #include <linux/io.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/kernel_stat.h>
 #include <linux/kernel.h>
 #include <linux/random.h>
@@ -33,7 +34,6 @@
 #include <asm/mips-boards/generic.h>
 #include <asm/mips-boards/msc01_pci.h>
 #include <asm/msc01_ic.h>
-#include <asm/gic.h>
 #include <asm/setup.h>
 #include <asm/rtlx.h>
 
diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 39f3902ed341..608655f8e6dd 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -24,6 +24,7 @@
 #include <linux/sched.h>
 #include <linux/spinlock.h>
 #include <linux/interrupt.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/timex.h>
 #include <linux/mc146818rtc.h>
 
@@ -37,7 +38,6 @@
 #include <asm/time.h>
 #include <asm/mc146818-time.h>
 #include <asm/msc01_ic.h>
-#include <asm/gic.h>
 
 #include <asm/mips-boards/generic.h>
 #include <asm/mips-boards/maltaint.h>
diff --git a/arch/mips/mti-sead3/sead3-ehci.c b/arch/mips/mti-sead3/sead3-ehci.c
index 4ddaa0fd5804..014dd7ba4d68 100644
--- a/arch/mips/mti-sead3/sead3-ehci.c
+++ b/arch/mips/mti-sead3/sead3-ehci.c
@@ -9,8 +9,8 @@
 #include <linux/irq.h>
 #include <linux/dma-mapping.h>
 #include <linux/platform_device.h>
+#include <linux/irqchip/mips-gic.h>
 
-#include <asm/gic.h>
 #include <asm/mips-boards/sead3int.h>
 
 struct resource ehci_resources[] = {
diff --git a/arch/mips/mti-sead3/sead3-int.c b/arch/mips/mti-sead3/sead3-int.c
index 02bf0dba1127..e31e17f81eef 100644
--- a/arch/mips/mti-sead3/sead3-int.c
+++ b/arch/mips/mti-sead3/sead3-int.c
@@ -7,9 +7,9 @@
  */
 #include <linux/init.h>
 #include <linux/irq.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/io.h>
 
-#include <asm/gic.h>
 #include <asm/irq_cpu.h>
 #include <asm/setup.h>
 
diff --git a/arch/mips/mti-sead3/sead3-net.c b/arch/mips/mti-sead3/sead3-net.c
index c9f728a41bdb..46176b804576 100644
--- a/arch/mips/mti-sead3/sead3-net.c
+++ b/arch/mips/mti-sead3/sead3-net.c
@@ -7,10 +7,10 @@
  */
 #include <linux/module.h>
 #include <linux/irq.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/platform_device.h>
 #include <linux/smsc911x.h>
 
-#include <asm/gic.h>
 #include <asm/mips-boards/sead3int.h>
 
 static struct smsc911x_platform_config sead3_smsc911x_data = {
diff --git a/arch/mips/mti-sead3/sead3-platform.c b/arch/mips/mti-sead3/sead3-platform.c
index d9661eb6fd6d..53ee6f1f018d 100644
--- a/arch/mips/mti-sead3/sead3-platform.c
+++ b/arch/mips/mti-sead3/sead3-platform.c
@@ -7,9 +7,9 @@
  */
 #include <linux/module.h>
 #include <linux/init.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/serial_8250.h>
 
-#include <asm/gic.h>
 #include <asm/mips-boards/sead3int.h>
 
 #define UART(base)							\
diff --git a/arch/mips/mti-sead3/sead3-time.c b/arch/mips/mti-sead3/sead3-time.c
index fd40de352c57..ec1dd2491f96 100644
--- a/arch/mips/mti-sead3/sead3-time.c
+++ b/arch/mips/mti-sead3/sead3-time.c
@@ -6,9 +6,9 @@
  * Copyright (C) 2012 MIPS Technologies, Inc.  All rights reserved.
  */
 #include <linux/init.h>
+#include <linux/irqchip/mips-gic.h>
 
 #include <asm/cpu.h>
-#include <asm/gic.h>
 #include <asm/setup.h>
 #include <asm/time.h>
 #include <asm/irq.h>
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index 88086d7e7c51..bf0f7c978086 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -9,13 +9,13 @@
 #include <linux/bitmap.h>
 #include <linux/init.h>
 #include <linux/interrupt.h>
+#include <linux/irqchip/mips-gic.h>
 #include <linux/sched.h>
 #include <linux/smp.h>
 #include <linux/irq.h>
 #include <linux/clocksource.h>
 
 #include <asm/io.h>
-#include <asm/gic.h>
 #include <asm/setup.h>
 #include <asm/traps.h>
 #include <linux/hardirq.h>
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
new file mode 100644
index 000000000000..285944ca9f6c
--- /dev/null
+++ b/include/linux/irqchip/mips-gic.h
@@ -0,0 +1,325 @@
+/*
+ * This file is subject to the terms and conditions of the GNU General Public
+ * License.  See the file "COPYING" in the main directory of this archive
+ * for more details.
+ *
+ * Copyright (C) 2000, 07 MIPS Technologies, Inc.
+ *
+ * GIC Register Definitions
+ *
+ */
+#ifndef _ASM_GICREGS_H
+#define _ASM_GICREGS_H
+
+#include <linux/bitmap.h>
+#include <linux/threads.h>
+
+#include <irq.h>
+
+#define GIC_MAX_INTRS			256
+
+/* Constants */
+#define GIC_POL_POS			1
+#define GIC_POL_NEG			0
+#define GIC_TRIG_EDGE			1
+#define GIC_TRIG_LEVEL			0
+#define GIC_TRIG_DUAL_ENABLE		1
+#define GIC_TRIG_DUAL_DISABLE		0
+
+#define MSK(n) ((1 << (n)) - 1)
+
+/* Accessors */
+#define GIC_REG(segment, offset) (segment##_##SECTION_OFS + offset##_##OFS)
+
+/* GIC Address Space */
+#define SHARED_SECTION_OFS		0x0000
+#define SHARED_SECTION_SIZE		0x8000
+#define VPE_LOCAL_SECTION_OFS		0x8000
+#define VPE_LOCAL_SECTION_SIZE		0x4000
+#define VPE_OTHER_SECTION_OFS		0xc000
+#define VPE_OTHER_SECTION_SIZE		0x4000
+#define USM_VISIBLE_SECTION_OFS		0x10000
+#define USM_VISIBLE_SECTION_SIZE	0x10000
+
+/* Register Map for Shared Section */
+
+#define GIC_SH_CONFIG_OFS		0x0000
+
+/* Shared Global Counter */
+#define GIC_SH_COUNTER_31_00_OFS	0x0010
+#define GIC_SH_COUNTER_63_32_OFS	0x0014
+#define GIC_SH_REVISIONID_OFS		0x0020
+
+/* Interrupt Polarity */
+#define GIC_SH_POL_31_0_OFS		0x0100
+#define GIC_SH_POL_63_32_OFS		0x0104
+#define GIC_SH_POL_95_64_OFS		0x0108
+#define GIC_SH_POL_127_96_OFS		0x010c
+#define GIC_SH_POL_159_128_OFS		0x0110
+#define GIC_SH_POL_191_160_OFS		0x0114
+#define GIC_SH_POL_223_192_OFS		0x0118
+#define GIC_SH_POL_255_224_OFS		0x011c
+
+/* Edge/Level Triggering */
+#define GIC_SH_TRIG_31_0_OFS		0x0180
+#define GIC_SH_TRIG_63_32_OFS		0x0184
+#define GIC_SH_TRIG_95_64_OFS		0x0188
+#define GIC_SH_TRIG_127_96_OFS		0x018c
+#define GIC_SH_TRIG_159_128_OFS		0x0190
+#define GIC_SH_TRIG_191_160_OFS		0x0194
+#define GIC_SH_TRIG_223_192_OFS		0x0198
+#define GIC_SH_TRIG_255_224_OFS		0x019c
+
+/* Dual Edge Triggering */
+#define GIC_SH_DUAL_31_0_OFS		0x0200
+#define GIC_SH_DUAL_63_32_OFS		0x0204
+#define GIC_SH_DUAL_95_64_OFS		0x0208
+#define GIC_SH_DUAL_127_96_OFS		0x020c
+#define GIC_SH_DUAL_159_128_OFS		0x0210
+#define GIC_SH_DUAL_191_160_OFS		0x0214
+#define GIC_SH_DUAL_223_192_OFS		0x0218
+#define GIC_SH_DUAL_255_224_OFS		0x021c
+
+/* Set/Clear corresponding bit in Edge Detect Register */
+#define GIC_SH_WEDGE_OFS		0x0280
+
+/* Reset Mask - Disables Interrupt */
+#define GIC_SH_RMASK_31_0_OFS		0x0300
+#define GIC_SH_RMASK_63_32_OFS		0x0304
+#define GIC_SH_RMASK_95_64_OFS		0x0308
+#define GIC_SH_RMASK_127_96_OFS		0x030c
+#define GIC_SH_RMASK_159_128_OFS	0x0310
+#define GIC_SH_RMASK_191_160_OFS	0x0314
+#define GIC_SH_RMASK_223_192_OFS	0x0318
+#define GIC_SH_RMASK_255_224_OFS	0x031c
+
+/* Set Mask (WO) - Enables Interrupt */
+#define GIC_SH_SMASK_31_0_OFS		0x0380
+#define GIC_SH_SMASK_63_32_OFS		0x0384
+#define GIC_SH_SMASK_95_64_OFS		0x0388
+#define GIC_SH_SMASK_127_96_OFS		0x038c
+#define GIC_SH_SMASK_159_128_OFS	0x0390
+#define GIC_SH_SMASK_191_160_OFS	0x0394
+#define GIC_SH_SMASK_223_192_OFS	0x0398
+#define GIC_SH_SMASK_255_224_OFS	0x039c
+
+/* Global Interrupt Mask Register (RO) - Bit Set == Interrupt enabled */
+#define GIC_SH_MASK_31_0_OFS		0x0400
+#define GIC_SH_MASK_63_32_OFS		0x0404
+#define GIC_SH_MASK_95_64_OFS		0x0408
+#define GIC_SH_MASK_127_96_OFS		0x040c
+#define GIC_SH_MASK_159_128_OFS		0x0410
+#define GIC_SH_MASK_191_160_OFS		0x0414
+#define GIC_SH_MASK_223_192_OFS		0x0418
+#define GIC_SH_MASK_255_224_OFS		0x041c
+
+/* Pending Global Interrupts (RO) */
+#define GIC_SH_PEND_31_0_OFS		0x0480
+#define GIC_SH_PEND_63_32_OFS		0x0484
+#define GIC_SH_PEND_95_64_OFS		0x0488
+#define GIC_SH_PEND_127_96_OFS		0x048c
+#define GIC_SH_PEND_159_128_OFS		0x0490
+#define GIC_SH_PEND_191_160_OFS		0x0494
+#define GIC_SH_PEND_223_192_OFS		0x0498
+#define GIC_SH_PEND_255_224_OFS		0x049c
+
+#define GIC_SH_INTR_MAP_TO_PIN_BASE_OFS 0x0500
+
+/* Maps Interrupt X to a Pin */
+#define GIC_SH_MAP_TO_PIN(intr)		(4 * (intr))
+
+#define GIC_SH_INTR_MAP_TO_VPE_BASE_OFS 0x2000
+
+/* Maps Interrupt X to a VPE */
+#define GIC_SH_MAP_TO_VPE_REG_OFF(intr, vpe) \
+	((32 * (intr)) + (((vpe) / 32) * 4))
+#define GIC_SH_MAP_TO_VPE_REG_BIT(vpe)	(1 << ((vpe) % 32))
+
+/* Convert an interrupt number to a byte offset/bit for multi-word registers */
+#define GIC_INTR_OFS(intr) (((intr) / 32)*4)
+#define GIC_INTR_BIT(intr) ((intr) % 32)
+
+/* Polarity : Reset Value is always 0 */
+#define GIC_SH_SET_POLARITY_OFS		0x0100
+
+/* Triggering : Reset Value is always 0 */
+#define GIC_SH_SET_TRIGGER_OFS		0x0180
+
+/* Dual edge triggering : Reset Value is always 0 */
+#define GIC_SH_SET_DUAL_OFS		0x0200
+
+/* Mask manipulation */
+#define GIC_SH_SMASK_OFS		0x0380
+#define GIC_SH_RMASK_OFS		0x0300
+
+/* Register Map for Local Section */
+#define GIC_VPE_CTL_OFS			0x0000
+#define GIC_VPE_PEND_OFS		0x0004
+#define GIC_VPE_MASK_OFS		0x0008
+#define GIC_VPE_RMASK_OFS		0x000c
+#define GIC_VPE_SMASK_OFS		0x0010
+#define GIC_VPE_WD_MAP_OFS		0x0040
+#define GIC_VPE_COMPARE_MAP_OFS		0x0044
+#define GIC_VPE_TIMER_MAP_OFS		0x0048
+#define GIC_VPE_FDC_MAP_OFS		0x004c
+#define GIC_VPE_PERFCTR_MAP_OFS		0x0050
+#define GIC_VPE_SWINT0_MAP_OFS		0x0054
+#define GIC_VPE_SWINT1_MAP_OFS		0x0058
+#define GIC_VPE_OTHER_ADDR_OFS		0x0080
+#define GIC_VPE_WD_CONFIG0_OFS		0x0090
+#define GIC_VPE_WD_COUNT0_OFS		0x0094
+#define GIC_VPE_WD_INITIAL0_OFS		0x0098
+#define GIC_VPE_COMPARE_LO_OFS		0x00a0
+#define GIC_VPE_COMPARE_HI_OFS		0x00a4
+
+#define GIC_VPE_EIC_SHADOW_SET_BASE_OFS	0x0100
+#define GIC_VPE_EIC_SS(intr)		(4 * (intr))
+
+#define GIC_VPE_EIC_VEC_BASE_OFS	0x0800
+#define GIC_VPE_EIC_VEC(intr)		(4 * (intr))
+
+#define GIC_VPE_TENABLE_NMI_OFS		0x1000
+#define GIC_VPE_TENABLE_YQ_OFS		0x1004
+#define GIC_VPE_TENABLE_INT_31_0_OFS	0x1080
+#define GIC_VPE_TENABLE_INT_63_32_OFS	0x1084
+
+/* User Mode Visible Section Register Map */
+#define GIC_UMV_SH_COUNTER_31_00_OFS	0x0000
+#define GIC_UMV_SH_COUNTER_63_32_OFS	0x0004
+
+/* Masks */
+#define GIC_SH_CONFIG_COUNTSTOP_SHF	28
+#define GIC_SH_CONFIG_COUNTSTOP_MSK	(MSK(1) << GIC_SH_CONFIG_COUNTSTOP_SHF)
+
+#define GIC_SH_CONFIG_COUNTBITS_SHF	24
+#define GIC_SH_CONFIG_COUNTBITS_MSK	(MSK(4) << GIC_SH_CONFIG_COUNTBITS_SHF)
+
+#define GIC_SH_CONFIG_NUMINTRS_SHF	16
+#define GIC_SH_CONFIG_NUMINTRS_MSK	(MSK(8) << GIC_SH_CONFIG_NUMINTRS_SHF)
+
+#define GIC_SH_CONFIG_NUMVPES_SHF	0
+#define GIC_SH_CONFIG_NUMVPES_MSK	(MSK(8) << GIC_SH_CONFIG_NUMVPES_SHF)
+
+#define GIC_SH_WEDGE_SET(intr)		(intr | (0x1 << 31))
+#define GIC_SH_WEDGE_CLR(intr)		(intr & ~(0x1 << 31))
+
+#define GIC_MAP_TO_PIN_SHF		31
+#define GIC_MAP_TO_PIN_MSK		(MSK(1) << GIC_MAP_TO_PIN_SHF)
+#define GIC_MAP_TO_NMI_SHF		30
+#define GIC_MAP_TO_NMI_MSK		(MSK(1) << GIC_MAP_TO_NMI_SHF)
+#define GIC_MAP_TO_YQ_SHF		29
+#define GIC_MAP_TO_YQ_MSK		(MSK(1) << GIC_MAP_TO_YQ_SHF)
+#define GIC_MAP_SHF			0
+#define GIC_MAP_MSK			(MSK(6) << GIC_MAP_SHF)
+
+/* GIC_VPE_CTL Masks */
+#define GIC_VPE_CTL_FDC_RTBL_SHF	4
+#define GIC_VPE_CTL_FDC_RTBL_MSK	(MSK(1) << GIC_VPE_CTL_FDC_RTBL_SHF)
+#define GIC_VPE_CTL_SWINT_RTBL_SHF	3
+#define GIC_VPE_CTL_SWINT_RTBL_MSK	(MSK(1) << GIC_VPE_CTL_SWINT_RTBL_SHF)
+#define GIC_VPE_CTL_PERFCNT_RTBL_SHF	2
+#define GIC_VPE_CTL_PERFCNT_RTBL_MSK	(MSK(1) << GIC_VPE_CTL_PERFCNT_RTBL_SHF)
+#define GIC_VPE_CTL_TIMER_RTBL_SHF	1
+#define GIC_VPE_CTL_TIMER_RTBL_MSK	(MSK(1) << GIC_VPE_CTL_TIMER_RTBL_SHF)
+#define GIC_VPE_CTL_EIC_MODE_SHF	0
+#define GIC_VPE_CTL_EIC_MODE_MSK	(MSK(1) << GIC_VPE_CTL_EIC_MODE_SHF)
+
+/* GIC_VPE_PEND Masks */
+#define GIC_VPE_PEND_WD_SHF		0
+#define GIC_VPE_PEND_WD_MSK		(MSK(1) << GIC_VPE_PEND_WD_SHF)
+#define GIC_VPE_PEND_CMP_SHF		1
+#define GIC_VPE_PEND_CMP_MSK		(MSK(1) << GIC_VPE_PEND_CMP_SHF)
+#define GIC_VPE_PEND_TIMER_SHF		2
+#define GIC_VPE_PEND_TIMER_MSK		(MSK(1) << GIC_VPE_PEND_TIMER_SHF)
+#define GIC_VPE_PEND_PERFCOUNT_SHF	3
+#define GIC_VPE_PEND_PERFCOUNT_MSK	(MSK(1) << GIC_VPE_PEND_PERFCOUNT_SHF)
+#define GIC_VPE_PEND_SWINT0_SHF		4
+#define GIC_VPE_PEND_SWINT0_MSK		(MSK(1) << GIC_VPE_PEND_SWINT0_SHF)
+#define GIC_VPE_PEND_SWINT1_SHF		5
+#define GIC_VPE_PEND_SWINT1_MSK		(MSK(1) << GIC_VPE_PEND_SWINT1_SHF)
+
+/* GIC_VPE_RMASK Masks */
+#define GIC_VPE_RMASK_WD_SHF		0
+#define GIC_VPE_RMASK_WD_MSK		(MSK(1) << GIC_VPE_RMASK_WD_SHF)
+#define GIC_VPE_RMASK_CMP_SHF		1
+#define GIC_VPE_RMASK_CMP_MSK		(MSK(1) << GIC_VPE_RMASK_CMP_SHF)
+#define GIC_VPE_RMASK_TIMER_SHF		2
+#define GIC_VPE_RMASK_TIMER_MSK		(MSK(1) << GIC_VPE_RMASK_TIMER_SHF)
+#define GIC_VPE_RMASK_PERFCNT_SHF	3
+#define GIC_VPE_RMASK_PERFCNT_MSK	(MSK(1) << GIC_VPE_RMASK_PERFCNT_SHF)
+#define GIC_VPE_RMASK_SWINT0_SHF	4
+#define GIC_VPE_RMASK_SWINT0_MSK	(MSK(1) << GIC_VPE_RMASK_SWINT0_SHF)
+#define GIC_VPE_RMASK_SWINT1_SHF	5
+#define GIC_VPE_RMASK_SWINT1_MSK	(MSK(1) << GIC_VPE_RMASK_SWINT1_SHF)
+
+/* GIC_VPE_SMASK Masks */
+#define GIC_VPE_SMASK_WD_SHF		0
+#define GIC_VPE_SMASK_WD_MSK		(MSK(1) << GIC_VPE_SMASK_WD_SHF)
+#define GIC_VPE_SMASK_CMP_SHF		1
+#define GIC_VPE_SMASK_CMP_MSK		(MSK(1) << GIC_VPE_SMASK_CMP_SHF)
+#define GIC_VPE_SMASK_TIMER_SHF		2
+#define GIC_VPE_SMASK_TIMER_MSK		(MSK(1) << GIC_VPE_SMASK_TIMER_SHF)
+#define GIC_VPE_SMASK_PERFCNT_SHF	3
+#define GIC_VPE_SMASK_PERFCNT_MSK	(MSK(1) << GIC_VPE_SMASK_PERFCNT_SHF)
+#define GIC_VPE_SMASK_SWINT0_SHF	4
+#define GIC_VPE_SMASK_SWINT0_MSK	(MSK(1) << GIC_VPE_SMASK_SWINT0_SHF)
+#define GIC_VPE_SMASK_SWINT1_SHF	5
+#define GIC_VPE_SMASK_SWINT1_MSK	(MSK(1) << GIC_VPE_SMASK_SWINT1_SHF)
+
+/* GIC nomenclature for Core Interrupt Pins. */
+#define GIC_CPU_INT0		0 /* Core Interrupt 2 */
+#define GIC_CPU_INT1		1 /* .		      */
+#define GIC_CPU_INT2		2 /* .		      */
+#define GIC_CPU_INT3		3 /* .		      */
+#define GIC_CPU_INT4		4 /* .		      */
+#define GIC_CPU_INT5		5 /* Core Interrupt 7 */
+
+/* Add 2 to convert GIC CPU pin to core interrupt */
+#define GIC_CPU_PIN_OFFSET	2
+
+/* Add 2 to convert non-EIC hardware interrupt to EIC vector number. */
+#define GIC_CPU_TO_VEC_OFFSET	(2)
+
+/* Mapped interrupt to pin X, then GIC will generate the vector (X+1). */
+#define GIC_PIN_TO_VEC_OFFSET	(1)
+
+/* Local GIC interrupts. */
+#define GIC_LOCAL_INT_WD	0 /* GIC watchdog */
+#define GIC_LOCAL_INT_COMPARE	1 /* GIC count and compare timer */
+#define GIC_LOCAL_INT_TIMER	2 /* CPU timer interrupt */
+#define GIC_LOCAL_INT_PERFCTR	3 /* CPU performance counter */
+#define GIC_LOCAL_INT_SWINT0	4 /* CPU software interrupt 0 */
+#define GIC_LOCAL_INT_SWINT1	5 /* CPU software interrupt 1 */
+#define GIC_LOCAL_INT_FDC	6 /* CPU fast debug channel */
+#define GIC_NUM_LOCAL_INTRS	7
+
+/* Convert between local/shared IRQ number and GIC HW IRQ number. */
+#define GIC_LOCAL_HWIRQ_BASE	0
+#define GIC_LOCAL_TO_HWIRQ(x)	(GIC_LOCAL_HWIRQ_BASE + (x))
+#define GIC_HWIRQ_TO_LOCAL(x)	((x) - GIC_LOCAL_HWIRQ_BASE)
+#define GIC_SHARED_HWIRQ_BASE	GIC_NUM_LOCAL_INTRS
+#define GIC_SHARED_TO_HWIRQ(x)	(GIC_SHARED_HWIRQ_BASE + (x))
+#define GIC_HWIRQ_TO_SHARED(x)	((x) - GIC_SHARED_HWIRQ_BASE)
+
+#include <linux/clocksource.h>
+#include <linux/irq.h>
+
+extern unsigned int gic_present;
+extern unsigned int gic_frequency;
+
+extern void gic_init(unsigned long gic_base_addr,
+	unsigned long gic_addrspace_size, unsigned int cpu_vec,
+	unsigned int irqbase);
+extern void gic_clocksource_init(unsigned int);
+extern cycle_t gic_read_count(void);
+extern unsigned int gic_get_count_width(void);
+extern cycle_t gic_read_compare(void);
+extern void gic_write_compare(cycle_t cnt);
+extern void gic_write_cpu_compare(cycle_t cnt, int cpu);
+extern void gic_send_ipi(unsigned int intr);
+extern unsigned int plat_ipi_call_int_xlate(unsigned int);
+extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
+extern unsigned int gic_get_timer_pending(void);
+extern int gic_get_c0_compare_int(void);
+extern int gic_get_c0_perfcount_int(void);
+#endif /* _ASM_GICREGS_H */
-- 
cgit v1.2.3


From 774c105ed8d791b709b40082d107f5bb40254374 Mon Sep 17 00:00:00 2001
From: Paul Burton <paul.burton@imgtec.com>
Date: Thu, 11 Sep 2014 08:30:16 +0100
Subject: binfmt_elf: allow arch code to examine PT_LOPROC ... PT_HIPROC
 headers

MIPS is introducing new variants of its O32 ABI which differ in their
handling of floating point, in order to enable a gradual transition
towards a world where mips32 binaries can take advantage of new hardware
features only available when configured for certain FP modes. In order
to do this ELF binaries are being augmented with a new section that
indicates, amongst other things, the FP mode requirements of the binary.
The presence & location of such a section is indicated by a program
header in the PT_LOPROC ... PT_HIPROC range.

In order to allow the MIPS architecture code to examine the program
header & section in question, pass all program headers in this range
to an architecture-specific arch_elf_pt_proc function. This function
may return an error if the header is deemed invalid or unsuitable for
the system, in which case that error will be returned from
load_elf_binary and upwards through the execve syscall.

A means is required for the architecture code to make a decision once
it is known that all such headers have been seen, but before it is too
late to return from an execve syscall. For this purpose the
arch_check_elf function is added, and called once, after all PT_LOPROC
to PT_HIPROC headers have been passed to arch_elf_pt_proc but before
the code which invoked execve has been lost. This enables the
architecture code to make a decision based upon all the headers present
in an ELF binary and its interpreter, as is required to forbid
conflicting FP ABI requirements between an ELF & its interpreter.

In order to allow data to be stored throughout the calls to the above
functions, struct arch_elf_state is introduced.

Finally a variant of the SET_PERSONALITY macro is introduced which
accepts a pointer to the struct arch_elf_state, allowing it to act
based upon state observed from the architecture specific program
headers.

Signed-off-by: Paul Burton <paul.burton@imgtec.com>
Cc: linux-mips@linux-mips.org
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: linux-fsdevel@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/7679/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 fs/Kconfig.binfmt   |   3 ++
 fs/binfmt_elf.c     | 105 +++++++++++++++++++++++++++++++++++++++++++++++++++-
 include/linux/elf.h |   5 +++
 3 files changed, 111 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/Kconfig.binfmt b/fs/Kconfig.binfmt
index 370b24cee4d8..c055d56ec63d 100644
--- a/fs/Kconfig.binfmt
+++ b/fs/Kconfig.binfmt
@@ -30,6 +30,9 @@ config COMPAT_BINFMT_ELF
 config ARCH_BINFMT_ELF_RANDOMIZE_PIE
 	bool
 
+config ARCH_BINFMT_ELF_STATE
+	bool
+
 config BINFMT_ELF_FDPIC
 	bool "Kernel support for FDPIC ELF binaries"
 	default y
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 26a951f115c1..a333cd982d33 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -440,6 +440,74 @@ out:
 	return elf_phdata;
 }
 
+#ifndef CONFIG_ARCH_BINFMT_ELF_STATE
+
+/**
+ * struct arch_elf_state - arch-specific ELF loading state
+ *
+ * This structure is used to preserve architecture specific data during
+ * the loading of an ELF file, throughout the checking of architecture
+ * specific ELF headers & through to the point where the ELF load is
+ * known to be proceeding (ie. SET_PERSONALITY).
+ *
+ * This implementation is a dummy for architectures which require no
+ * specific state.
+ */
+struct arch_elf_state {
+};
+
+#define INIT_ARCH_ELF_STATE {}
+
+/**
+ * arch_elf_pt_proc() - check a PT_LOPROC..PT_HIPROC ELF program header
+ * @ehdr:	The main ELF header
+ * @phdr:	The program header to check
+ * @elf:	The open ELF file
+ * @is_interp:	True if the phdr is from the interpreter of the ELF being
+ *		loaded, else false.
+ * @state:	Architecture-specific state preserved throughout the process
+ *		of loading the ELF.
+ *
+ * Inspects the program header phdr to validate its correctness and/or
+ * suitability for the system. Called once per ELF program header in the
+ * range PT_LOPROC to PT_HIPROC, for both the ELF being loaded and its
+ * interpreter.
+ *
+ * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
+ *         with that return code.
+ */
+static inline int arch_elf_pt_proc(struct elfhdr *ehdr,
+				   struct elf_phdr *phdr,
+				   struct file *elf, bool is_interp,
+				   struct arch_elf_state *state)
+{
+	/* Dummy implementation, always proceed */
+	return 0;
+}
+
+/**
+ * arch_check_elf() - check a PT_LOPROC..PT_HIPROC ELF program header
+ * @ehdr:	The main ELF header
+ * @has_interp:	True if the ELF has an interpreter, else false.
+ * @state:	Architecture-specific state preserved throughout the process
+ *		of loading the ELF.
+ *
+ * Provides a final opportunity for architecture code to reject the loading
+ * of the ELF & cause an exec syscall to return an error. This is called after
+ * all program headers to be checked by arch_elf_pt_proc have been.
+ *
+ * Return: Zero to proceed with the ELF load, non-zero to fail the ELF load
+ *         with that return code.
+ */
+static inline int arch_check_elf(struct elfhdr *ehdr, bool has_interp,
+				 struct arch_elf_state *state)
+{
+	/* Dummy implementation, always proceed */
+	return 0;
+}
+
+#endif /* !CONFIG_ARCH_BINFMT_ELF_STATE */
+
 /* This is much more generalized than the library routine read function,
    so we keep this separate.  Technically the library read function
    is only provided so that we can read a.out libraries that have
@@ -611,6 +679,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		struct elfhdr elf_ex;
 		struct elfhdr interp_elf_ex;
 	} *loc;
+	struct arch_elf_state arch_state = INIT_ARCH_ELF_STATE;
 
 	loc = kmalloc(sizeof(*loc), GFP_KERNEL);
 	if (!loc) {
@@ -705,12 +774,21 @@ static int load_elf_binary(struct linux_binprm *bprm)
 
 	elf_ppnt = elf_phdata;
 	for (i = 0; i < loc->elf_ex.e_phnum; i++, elf_ppnt++)
-		if (elf_ppnt->p_type == PT_GNU_STACK) {
+		switch (elf_ppnt->p_type) {
+		case PT_GNU_STACK:
 			if (elf_ppnt->p_flags & PF_X)
 				executable_stack = EXSTACK_ENABLE_X;
 			else
 				executable_stack = EXSTACK_DISABLE_X;
 			break;
+
+		case PT_LOPROC ... PT_HIPROC:
+			retval = arch_elf_pt_proc(&loc->elf_ex, elf_ppnt,
+						  bprm->file, false,
+						  &arch_state);
+			if (retval)
+				goto out_free_dentry;
+			break;
 		}
 
 	/* Some simple consistency checks for the interpreter */
@@ -728,8 +806,30 @@ static int load_elf_binary(struct linux_binprm *bprm)
 						   interpreter);
 		if (!interp_elf_phdata)
 			goto out_free_dentry;
+
+		/* Pass PT_LOPROC..PT_HIPROC headers to arch code */
+		elf_ppnt = interp_elf_phdata;
+		for (i = 0; i < loc->interp_elf_ex.e_phnum; i++, elf_ppnt++)
+			switch (elf_ppnt->p_type) {
+			case PT_LOPROC ... PT_HIPROC:
+				retval = arch_elf_pt_proc(&loc->interp_elf_ex,
+							  elf_ppnt, interpreter,
+							  true, &arch_state);
+				if (retval)
+					goto out_free_dentry;
+				break;
+			}
 	}
 
+	/*
+	 * Allow arch code to reject the ELF at this point, whilst it's
+	 * still possible to return an error to the code that invoked
+	 * the exec syscall.
+	 */
+	retval = arch_check_elf(&loc->elf_ex, !!interpreter, &arch_state);
+	if (retval)
+		goto out_free_dentry;
+
 	/* Flush all traces of the currently running executable */
 	retval = flush_old_exec(bprm);
 	if (retval)
@@ -737,7 +837,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 
 	/* Do this immediately, since STACK_TOP as used in setup_arg_pages
 	   may depend on the personality.  */
-	SET_PERSONALITY(loc->elf_ex);
+	SET_PERSONALITY2(loc->elf_ex, &arch_state);
 	if (elf_read_implies_exec(loc->elf_ex, executable_stack))
 		current->personality |= READ_IMPLIES_EXEC;
 
@@ -929,6 +1029,7 @@ static int load_elf_binary(struct linux_binprm *bprm)
 		}
 	}
 
+	kfree(interp_elf_phdata);
 	kfree(elf_phdata);
 
 	set_binfmt(&elf_format);
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 67a5fa7830c4..20fa8d8ae313 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -15,6 +15,11 @@
 	set_personality(PER_LINUX | (current->personality & (~PER_MASK)))
 #endif
 
+#ifndef SET_PERSONALITY2
+#define SET_PERSONALITY2(ex, state) \
+	SET_PERSONALITY(ex)
+#endif
+
 #if ELF_CLASS == ELFCLASS32
 
 extern Elf32_Dyn _DYNAMIC [];
-- 
cgit v1.2.3


From 824f3f7fa2b441416e3d9aaf1f19feab7db44747 Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Mon, 20 Oct 2014 12:03:54 -0700
Subject: irqchip: mips-gic: Clean up header file

Remove duplicate #defines and unnecessary #includes, fix parenthesization,
and re-order register definitions in ascending order.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Qais Yousef <qais.yousef@imgtec.com>
Cc: John Crispin <blogic@openwrt.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/8128/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 drivers/irqchip/irq-mips-gic.c   |   4 +-
 include/linux/irqchip/mips-gic.h | 129 ++++++++-------------------------------
 2 files changed, 29 insertions(+), 104 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index bf0f7c978086..eaebeea36d23 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -253,8 +253,8 @@ static unsigned int gic_get_int(void)
 	intrmask = intrmask_regs[smp_processor_id()].intrmask;
 	pcpu_mask = pcpu_masks[smp_processor_id()].pcpu_mask;
 
-	pending_reg = GIC_REG(SHARED, GIC_SH_PEND_31_0);
-	intrmask_reg = GIC_REG(SHARED, GIC_SH_MASK_31_0);
+	pending_reg = GIC_REG(SHARED, GIC_SH_PEND);
+	intrmask_reg = GIC_REG(SHARED, GIC_SH_MASK);
 
 	for (i = 0; i < BITS_TO_LONGS(gic_shared_intrs); i++) {
 		pending[i] = gic_read(pending_reg);
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index 285944ca9f6c..0350effb7ccc 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -4,17 +4,11 @@
  * for more details.
  *
  * Copyright (C) 2000, 07 MIPS Technologies, Inc.
- *
- * GIC Register Definitions
- *
  */
-#ifndef _ASM_GICREGS_H
-#define _ASM_GICREGS_H
-
-#include <linux/bitmap.h>
-#include <linux/threads.h>
+#ifndef __LINUX_IRQCHIP_MIPS_GIC_H
+#define __LINUX_IRQCHIP_MIPS_GIC_H
 
-#include <irq.h>
+#include <linux/clocksource.h>
 
 #define GIC_MAX_INTRS			256
 
@@ -50,108 +44,42 @@
 #define GIC_SH_COUNTER_63_32_OFS	0x0014
 #define GIC_SH_REVISIONID_OFS		0x0020
 
-/* Interrupt Polarity */
-#define GIC_SH_POL_31_0_OFS		0x0100
-#define GIC_SH_POL_63_32_OFS		0x0104
-#define GIC_SH_POL_95_64_OFS		0x0108
-#define GIC_SH_POL_127_96_OFS		0x010c
-#define GIC_SH_POL_159_128_OFS		0x0110
-#define GIC_SH_POL_191_160_OFS		0x0114
-#define GIC_SH_POL_223_192_OFS		0x0118
-#define GIC_SH_POL_255_224_OFS		0x011c
-
-/* Edge/Level Triggering */
-#define GIC_SH_TRIG_31_0_OFS		0x0180
-#define GIC_SH_TRIG_63_32_OFS		0x0184
-#define GIC_SH_TRIG_95_64_OFS		0x0188
-#define GIC_SH_TRIG_127_96_OFS		0x018c
-#define GIC_SH_TRIG_159_128_OFS		0x0190
-#define GIC_SH_TRIG_191_160_OFS		0x0194
-#define GIC_SH_TRIG_223_192_OFS		0x0198
-#define GIC_SH_TRIG_255_224_OFS		0x019c
-
-/* Dual Edge Triggering */
-#define GIC_SH_DUAL_31_0_OFS		0x0200
-#define GIC_SH_DUAL_63_32_OFS		0x0204
-#define GIC_SH_DUAL_95_64_OFS		0x0208
-#define GIC_SH_DUAL_127_96_OFS		0x020c
-#define GIC_SH_DUAL_159_128_OFS		0x0210
-#define GIC_SH_DUAL_191_160_OFS		0x0214
-#define GIC_SH_DUAL_223_192_OFS		0x0218
-#define GIC_SH_DUAL_255_224_OFS		0x021c
+/* Convert an interrupt number to a byte offset/bit for multi-word registers */
+#define GIC_INTR_OFS(intr)		(((intr) / 32) * 4)
+#define GIC_INTR_BIT(intr)		((intr) % 32)
+
+/* Polarity : Reset Value is always 0 */
+#define GIC_SH_SET_POLARITY_OFS		0x0100
+
+/* Triggering : Reset Value is always 0 */
+#define GIC_SH_SET_TRIGGER_OFS		0x0180
+
+/* Dual edge triggering : Reset Value is always 0 */
+#define GIC_SH_SET_DUAL_OFS		0x0200
 
 /* Set/Clear corresponding bit in Edge Detect Register */
 #define GIC_SH_WEDGE_OFS		0x0280
 
-/* Reset Mask - Disables Interrupt */
-#define GIC_SH_RMASK_31_0_OFS		0x0300
-#define GIC_SH_RMASK_63_32_OFS		0x0304
-#define GIC_SH_RMASK_95_64_OFS		0x0308
-#define GIC_SH_RMASK_127_96_OFS		0x030c
-#define GIC_SH_RMASK_159_128_OFS	0x0310
-#define GIC_SH_RMASK_191_160_OFS	0x0314
-#define GIC_SH_RMASK_223_192_OFS	0x0318
-#define GIC_SH_RMASK_255_224_OFS	0x031c
-
-/* Set Mask (WO) - Enables Interrupt */
-#define GIC_SH_SMASK_31_0_OFS		0x0380
-#define GIC_SH_SMASK_63_32_OFS		0x0384
-#define GIC_SH_SMASK_95_64_OFS		0x0388
-#define GIC_SH_SMASK_127_96_OFS		0x038c
-#define GIC_SH_SMASK_159_128_OFS	0x0390
-#define GIC_SH_SMASK_191_160_OFS	0x0394
-#define GIC_SH_SMASK_223_192_OFS	0x0398
-#define GIC_SH_SMASK_255_224_OFS	0x039c
+/* Mask manipulation */
+#define GIC_SH_RMASK_OFS		0x0300
+#define GIC_SH_SMASK_OFS		0x0380
 
 /* Global Interrupt Mask Register (RO) - Bit Set == Interrupt enabled */
-#define GIC_SH_MASK_31_0_OFS		0x0400
-#define GIC_SH_MASK_63_32_OFS		0x0404
-#define GIC_SH_MASK_95_64_OFS		0x0408
-#define GIC_SH_MASK_127_96_OFS		0x040c
-#define GIC_SH_MASK_159_128_OFS		0x0410
-#define GIC_SH_MASK_191_160_OFS		0x0414
-#define GIC_SH_MASK_223_192_OFS		0x0418
-#define GIC_SH_MASK_255_224_OFS		0x041c
+#define GIC_SH_MASK_OFS			0x0400
 
 /* Pending Global Interrupts (RO) */
-#define GIC_SH_PEND_31_0_OFS		0x0480
-#define GIC_SH_PEND_63_32_OFS		0x0484
-#define GIC_SH_PEND_95_64_OFS		0x0488
-#define GIC_SH_PEND_127_96_OFS		0x048c
-#define GIC_SH_PEND_159_128_OFS		0x0490
-#define GIC_SH_PEND_191_160_OFS		0x0494
-#define GIC_SH_PEND_223_192_OFS		0x0498
-#define GIC_SH_PEND_255_224_OFS		0x049c
-
-#define GIC_SH_INTR_MAP_TO_PIN_BASE_OFS 0x0500
+#define GIC_SH_PEND_OFS			0x0480
 
 /* Maps Interrupt X to a Pin */
+#define GIC_SH_INTR_MAP_TO_PIN_BASE_OFS 0x0500
 #define GIC_SH_MAP_TO_PIN(intr)		(4 * (intr))
 
-#define GIC_SH_INTR_MAP_TO_VPE_BASE_OFS 0x2000
-
 /* Maps Interrupt X to a VPE */
+#define GIC_SH_INTR_MAP_TO_VPE_BASE_OFS 0x2000
 #define GIC_SH_MAP_TO_VPE_REG_OFF(intr, vpe) \
 	((32 * (intr)) + (((vpe) / 32) * 4))
 #define GIC_SH_MAP_TO_VPE_REG_BIT(vpe)	(1 << ((vpe) % 32))
 
-/* Convert an interrupt number to a byte offset/bit for multi-word registers */
-#define GIC_INTR_OFS(intr) (((intr) / 32)*4)
-#define GIC_INTR_BIT(intr) ((intr) % 32)
-
-/* Polarity : Reset Value is always 0 */
-#define GIC_SH_SET_POLARITY_OFS		0x0100
-
-/* Triggering : Reset Value is always 0 */
-#define GIC_SH_SET_TRIGGER_OFS		0x0180
-
-/* Dual edge triggering : Reset Value is always 0 */
-#define GIC_SH_SET_DUAL_OFS		0x0200
-
-/* Mask manipulation */
-#define GIC_SH_SMASK_OFS		0x0380
-#define GIC_SH_RMASK_OFS		0x0300
-
 /* Register Map for Local Section */
 #define GIC_VPE_CTL_OFS			0x0000
 #define GIC_VPE_PEND_OFS		0x0004
@@ -200,8 +128,8 @@
 #define GIC_SH_CONFIG_NUMVPES_SHF	0
 #define GIC_SH_CONFIG_NUMVPES_MSK	(MSK(8) << GIC_SH_CONFIG_NUMVPES_SHF)
 
-#define GIC_SH_WEDGE_SET(intr)		(intr | (0x1 << 31))
-#define GIC_SH_WEDGE_CLR(intr)		(intr & ~(0x1 << 31))
+#define GIC_SH_WEDGE_SET(intr)		((intr) | (0x1 << 31))
+#define GIC_SH_WEDGE_CLR(intr)		((intr) & ~(0x1 << 31))
 
 #define GIC_MAP_TO_PIN_SHF		31
 #define GIC_MAP_TO_PIN_MSK		(MSK(1) << GIC_MAP_TO_PIN_SHF)
@@ -278,10 +206,10 @@
 #define GIC_CPU_PIN_OFFSET	2
 
 /* Add 2 to convert non-EIC hardware interrupt to EIC vector number. */
-#define GIC_CPU_TO_VEC_OFFSET	(2)
+#define GIC_CPU_TO_VEC_OFFSET	2
 
 /* Mapped interrupt to pin X, then GIC will generate the vector (X+1). */
-#define GIC_PIN_TO_VEC_OFFSET	(1)
+#define GIC_PIN_TO_VEC_OFFSET	1
 
 /* Local GIC interrupts. */
 #define GIC_LOCAL_INT_WD	0 /* GIC watchdog */
@@ -301,9 +229,6 @@
 #define GIC_SHARED_TO_HWIRQ(x)	(GIC_SHARED_HWIRQ_BASE + (x))
 #define GIC_HWIRQ_TO_SHARED(x)	((x) - GIC_SHARED_HWIRQ_BASE)
 
-#include <linux/clocksource.h>
-#include <linux/irq.h>
-
 extern unsigned int gic_present;
 extern unsigned int gic_frequency;
 
@@ -322,4 +247,4 @@ extern unsigned int plat_ipi_resched_int_xlate(unsigned int);
 extern unsigned int gic_get_timer_pending(void);
 extern int gic_get_c0_compare_int(void);
 extern int gic_get_c0_perfcount_int(void);
-#endif /* _ASM_GICREGS_H */
+#endif /* __LINUX_IRQCHIP_MIPS_GIC_H */
-- 
cgit v1.2.3


From b0854514537e4e2f0a599ca05d18fe95dcd3ee42 Mon Sep 17 00:00:00 2001
From: Andrew Bresticker <abrestic@chromium.org>
Date: Mon, 20 Oct 2014 12:04:01 -0700
Subject: clocksource: mips-gic: Move gic_frequency to clocksource driver

There's no reason for gic_frequency to be global any more and it
certainly doesn't belong in the GIC irqchip driver, so move it to
the GIC clocksource driver.

Signed-off-by: Andrew Bresticker <abrestic@chromium.org>
Cc: Daniel Lezcano <daniel.lezcano@linaro.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Jason Cooper <jason@lakedaemon.net>
Cc: Paul Burton <paul.burton@imgtec.com>
Cc: Qais Yousef <qais.yousef@imgtec.com>
Cc: John Crispin <blogic@openwrt.org>
Cc: linux-mips@linux-mips.org
Cc: linux-kernel@vger.kernel.org
Patchwork: https://patchwork.linux-mips.org/patch/8137/
Signed-off-by: Ralf Baechle <ralf@linux-mips.org>
---
 arch/mips/mti-malta/malta-time.c     | 2 ++
 drivers/clocksource/mips-gic-timer.c | 3 +++
 drivers/irqchip/irq-mips-gic.c       | 1 -
 include/linux/irqchip/mips-gic.h     | 1 -
 4 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/mips/mti-malta/malta-time.c b/arch/mips/mti-malta/malta-time.c
index 028fae077001..ce02dbdedc62 100644
--- a/arch/mips/mti-malta/malta-time.c
+++ b/arch/mips/mti-malta/malta-time.c
@@ -46,6 +46,8 @@ static int mips_cpu_timer_irq;
 static int mips_cpu_perf_irq;
 extern int cp0_perfcount_irq;
 
+static unsigned int gic_frequency;
+
 static void mips_timer_dispatch(void)
 {
 	do_IRQ(mips_cpu_timer_irq);
diff --git a/drivers/clocksource/mips-gic-timer.c b/drivers/clocksource/mips-gic-timer.c
index 2603f50e5706..bced17d2d2c1 100644
--- a/drivers/clocksource/mips-gic-timer.c
+++ b/drivers/clocksource/mips-gic-timer.c
@@ -17,6 +17,7 @@
 
 static DEFINE_PER_CPU(struct clock_event_device, gic_clockevent_device);
 static int gic_timer_irq_installed;
+static unsigned int gic_frequency;
 
 static int gic_next_event(unsigned long delta, struct clock_event_device *evt)
 {
@@ -112,6 +113,8 @@ static struct clocksource gic_clocksource = {
 
 void __init gic_clocksource_init(unsigned int frequency)
 {
+	gic_frequency = frequency;
+
 	/* Set clocksource mask. */
 	gic_clocksource.mask = CLOCKSOURCE_MASK(gic_get_count_width());
 
diff --git a/drivers/irqchip/irq-mips-gic.c b/drivers/irqchip/irq-mips-gic.c
index cb674696810d..7ec3c18f1330 100644
--- a/drivers/irqchip/irq-mips-gic.c
+++ b/drivers/irqchip/irq-mips-gic.c
@@ -18,7 +18,6 @@
 #include <asm/setup.h>
 #include <asm/traps.h>
 
-unsigned int gic_frequency;
 unsigned int gic_present;
 
 struct gic_pcpu_mask {
diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h
index 0350effb7ccc..420f77b34d02 100644
--- a/include/linux/irqchip/mips-gic.h
+++ b/include/linux/irqchip/mips-gic.h
@@ -230,7 +230,6 @@
 #define GIC_HWIRQ_TO_SHARED(x)	((x) - GIC_SHARED_HWIRQ_BASE)
 
 extern unsigned int gic_present;
-extern unsigned int gic_frequency;
 
 extern void gic_init(unsigned long gic_base_addr,
 	unsigned long gic_addrspace_size, unsigned int cpu_vec,
-- 
cgit v1.2.3


From 227158db160449b6513d2e31894a135104b90e90 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Apr 2014 18:47:38 -0400
Subject: new helper: skb_copy_and_csum_datagram_msg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h | 5 +++++
 net/ipv4/udp.c         | 5 ++---
 net/ipv6/raw.c         | 2 +-
 net/ipv6/udp.c         | 2 +-
 4 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 78c299f40bac..cbe4b2078b30 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2651,6 +2651,11 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
 }
 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
 				     struct iovec *iov);
+static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
+			    struct msghdr *msg)
+{
+	return skb_copy_and_csum_datagram_iovec(skb, hlen, msg->msg_iov);
+}
 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 				 const struct iovec *from, int from_offset,
 				 int len);
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 4a16b9129079..b2d606833ce4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -1284,9 +1284,8 @@ try_again:
 		err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
 					    msg, copied);
 	else {
-		err = skb_copy_and_csum_datagram_iovec(skb,
-						       sizeof(struct udphdr),
-						       msg->msg_iov);
+		err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr),
+						     msg);
 
 		if (err == -EINVAL)
 			goto csum_copy_err;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0cbcf98f2cab..8baa53e17a30 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -492,7 +492,7 @@ static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk,
 			goto csum_copy_err;
 		err = skb_copy_datagram_msg(skb, 0, msg, copied);
 	} else {
-		err = skb_copy_and_csum_datagram_iovec(skb, 0, msg->msg_iov);
+		err = skb_copy_and_csum_datagram_msg(skb, 0, msg);
 		if (err == -EINVAL)
 			goto csum_copy_err;
 	}
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
index dbc0b042bdd6..7cfb5d745a2d 100644
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -428,7 +428,7 @@ try_again:
 		err = skb_copy_datagram_msg(skb, sizeof(struct udphdr),
 					    msg, copied);
 	else {
-		err = skb_copy_and_csum_datagram_iovec(skb, sizeof(struct udphdr), msg->msg_iov);
+		err = skb_copy_and_csum_datagram_msg(skb, sizeof(struct udphdr), msg);
 		if (err == -EINVAL)
 			goto csum_copy_err;
 	}
-- 
cgit v1.2.3


From 6ce8e9ce5989ae13f493062975304700be86d20e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Apr 2014 21:25:44 -0400
Subject: new helper: memcpy_from_msg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 crypto/algif_skcipher.c     | 10 +++++-----
 drivers/isdn/mISDN/socket.c |  2 +-
 drivers/net/ppp/pppoe.c     |  2 +-
 include/linux/skbuff.h      |  5 +++++
 include/net/sctp/sm.h       |  2 +-
 net/appletalk/ddp.c         |  2 +-
 net/ax25/af_ax25.c          |  2 +-
 net/bluetooth/hci_sock.c    |  2 +-
 net/bluetooth/mgmt.c        |  2 +-
 net/bluetooth/rfcomm/sock.c |  2 +-
 net/bluetooth/sco.c         |  2 +-
 net/caif/caif_socket.c      |  4 ++--
 net/can/bcm.c               | 19 ++++++++-----------
 net/can/raw.c               |  2 +-
 net/dccp/proto.c            |  2 +-
 net/decnet/af_decnet.c      |  2 +-
 net/ieee802154/dgram.c      |  2 +-
 net/ieee802154/raw.c        |  2 +-
 net/ipv4/ping.c             |  2 +-
 net/ipv4/tcp_input.c        |  2 +-
 net/irda/af_irda.c          |  6 +++---
 net/iucv/af_iucv.c          |  2 +-
 net/key/af_key.c            |  2 +-
 net/l2tp/l2tp_ip.c          |  2 +-
 net/l2tp/l2tp_ppp.c         |  3 +--
 net/llc/af_llc.c            |  2 +-
 net/netlink/af_netlink.c    |  2 +-
 net/netrom/af_netrom.c      |  2 +-
 net/nfc/llcp_commands.c     |  4 ++--
 net/nfc/rawsock.c           |  2 +-
 net/packet/af_packet.c      |  5 ++---
 net/phonet/datagram.c       |  2 +-
 net/phonet/pep.c            |  2 +-
 net/rose/af_rose.c          |  2 +-
 net/sctp/sm_make_chunk.c    |  4 ++--
 net/x25/af_x25.c            |  2 +-
 36 files changed, 57 insertions(+), 57 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index 83187f497c7c..c3b482bee208 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -298,9 +298,9 @@ static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock,
 			len = min_t(unsigned long, len,
 				    PAGE_SIZE - sg->offset - sg->length);
 
-			err = memcpy_fromiovec(page_address(sg_page(sg)) +
-					       sg->offset + sg->length,
-					       msg->msg_iov, len);
+			err = memcpy_from_msg(page_address(sg_page(sg)) +
+					      sg->offset + sg->length,
+					      msg, len);
 			if (err)
 				goto unlock;
 
@@ -337,8 +337,8 @@ static int skcipher_sendmsg(struct kiocb *unused, struct socket *sock,
 			if (!sg_page(sg + i))
 				goto unlock;
 
-			err = memcpy_fromiovec(page_address(sg_page(sg + i)),
-					       msg->msg_iov, plen);
+			err = memcpy_from_msg(page_address(sg_page(sg + i)),
+					      msg, plen);
 			if (err) {
 				__free_page(sg_page(sg + i));
 				sg_assign_page(sg + i, NULL);
diff --git a/drivers/isdn/mISDN/socket.c b/drivers/isdn/mISDN/socket.c
index dcbd8589f0c4..84b35925ee4d 100644
--- a/drivers/isdn/mISDN/socket.c
+++ b/drivers/isdn/mISDN/socket.c
@@ -203,7 +203,7 @@ mISDN_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		goto done;
 
-	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
 		err = -EFAULT;
 		goto done;
 	}
diff --git a/drivers/net/ppp/pppoe.c b/drivers/net/ppp/pppoe.c
index 443cbbf5c55f..d2408a5e43a6 100644
--- a/drivers/net/ppp/pppoe.c
+++ b/drivers/net/ppp/pppoe.c
@@ -869,7 +869,7 @@ static int pppoe_sendmsg(struct kiocb *iocb, struct socket *sock,
 	ph = (struct pppoe_hdr *)skb_put(skb, total_len + sizeof(struct pppoe_hdr));
 	start = (char *)&ph->tag[0];
 
-	error = memcpy_fromiovec(start, m->msg_iov, total_len);
+	error = memcpy_from_msg(start, m, total_len);
 	if (error < 0) {
 		kfree_skb(skb);
 		goto end;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index cbe4b2078b30..97dc5f8123b3 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2687,6 +2687,11 @@ int skb_ensure_writable(struct sk_buff *skb, int write_len);
 int skb_vlan_pop(struct sk_buff *skb);
 int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 
+static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
+{
+	return memcpy_fromiovec(data, msg->msg_iov, len);
+}
+
 struct skb_checksum_ops {
 	__wsum (*update)(const void *mem, int len, __wsum wsum);
 	__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h
index 72a31db47ded..487ef34bbd63 100644
--- a/include/net/sctp/sm.h
+++ b/include/net/sctp/sm.h
@@ -219,7 +219,7 @@ struct sctp_chunk *sctp_make_abort_no_data(const struct sctp_association *,
 				      const struct sctp_chunk *,
 				      __u32 tsn);
 struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *,
-					const struct msghdr *, size_t msg_len);
+					struct msghdr *, size_t msg_len);
 struct sctp_chunk *sctp_make_abort_violation(const struct sctp_association *,
 				   const struct sctp_chunk *,
 				   const __u8 *,
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 425942db17f6..0d0766ea5ab1 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -1659,7 +1659,7 @@ static int atalk_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr
 
 	SOCK_DEBUG(sk, "SK %p: Copy user data (%Zd bytes).\n", sk, len);
 
-	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	err = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (err) {
 		kfree_skb(skb);
 		err = -EFAULT;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index f4f835e19378..ca049a7c9287 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -1549,7 +1549,7 @@ static int ax25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reserve(skb, size - len);
 
 	/* User data follows immediately after the AX.25 data */
-	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
 		err = -EFAULT;
 		kfree_skb(skb);
 		goto out;
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 5e2cd2535978..2c245fdf319a 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -947,7 +947,7 @@ static int hci_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (!skb)
 		goto done;
 
-	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
 		err = -EFAULT;
 		goto drop;
 	}
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index cbeef5f62f3b..f3e4a16fb157 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -5767,7 +5767,7 @@ int mgmt_control(struct sock *sk, struct msghdr *msg, size_t msglen)
 	if (!buf)
 		return -ENOMEM;
 
-	if (memcpy_fromiovec(buf, msg->msg_iov, msglen)) {
+	if (memcpy_from_msg(buf, msg, msglen)) {
 		err = -EFAULT;
 		goto done;
 	}
diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index 8bbbb5ec468c..2348176401a0 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -588,7 +588,7 @@ static int rfcomm_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 		}
 		skb_reserve(skb, RFCOMM_SKB_HEAD_RESERVE);
 
-		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+		err = memcpy_from_msg(skb_put(skb, size), msg, size);
 		if (err) {
 			kfree_skb(skb);
 			if (sent == 0)
diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c
index 7ee9e4ab00f8..30e5ea3f1ad3 100644
--- a/net/bluetooth/sco.c
+++ b/net/bluetooth/sco.c
@@ -285,7 +285,7 @@ static int sco_send_frame(struct sock *sk, struct msghdr *msg, int len)
 	if (!skb)
 		return err;
 
-	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
 		kfree_skb(skb);
 		return -EFAULT;
 	}
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index fbcd156099fb..230f14026c11 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -566,7 +566,7 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 
 	skb_reserve(skb, cf_sk->headroom);
 
-	ret = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	ret = memcpy_from_msg(skb_put(skb, len), msg, len);
 
 	if (ret)
 		goto err;
@@ -641,7 +641,7 @@ static int caif_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		 */
 		size = min_t(int, size, skb_tailroom(skb));
 
-		err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+		err = memcpy_from_msg(skb_put(skb, size), msg, size);
 		if (err) {
 			kfree_skb(skb);
 			goto out_err;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index dcb75c0e66c1..b9a1f715df18 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -858,8 +858,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 
 		/* update can_frames content */
 		for (i = 0; i < msg_head->nframes; i++) {
-			err = memcpy_fromiovec((u8 *)&op->frames[i],
-					       msg->msg_iov, CFSIZ);
+			err = memcpy_from_msg((u8 *)&op->frames[i], msg, CFSIZ);
 
 			if (op->frames[i].can_dlc > 8)
 				err = -EINVAL;
@@ -894,8 +893,7 @@ static int bcm_tx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 			op->frames = &op->sframe;
 
 		for (i = 0; i < msg_head->nframes; i++) {
-			err = memcpy_fromiovec((u8 *)&op->frames[i],
-					       msg->msg_iov, CFSIZ);
+			err = memcpy_from_msg((u8 *)&op->frames[i], msg, CFSIZ);
 
 			if (op->frames[i].can_dlc > 8)
 				err = -EINVAL;
@@ -1024,9 +1022,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 
 		if (msg_head->nframes) {
 			/* update can_frames content */
-			err = memcpy_fromiovec((u8 *)op->frames,
-					       msg->msg_iov,
-					       msg_head->nframes * CFSIZ);
+			err = memcpy_from_msg((u8 *)op->frames, msg,
+					      msg_head->nframes * CFSIZ);
 			if (err < 0)
 				return err;
 
@@ -1072,8 +1069,8 @@ static int bcm_rx_setup(struct bcm_msg_head *msg_head, struct msghdr *msg,
 		}
 
 		if (msg_head->nframes) {
-			err = memcpy_fromiovec((u8 *)op->frames, msg->msg_iov,
-					       msg_head->nframes * CFSIZ);
+			err = memcpy_from_msg((u8 *)op->frames, msg,
+					      msg_head->nframes * CFSIZ);
 			if (err < 0) {
 				if (op->frames != &op->sframe)
 					kfree(op->frames);
@@ -1209,7 +1206,7 @@ static int bcm_tx_send(struct msghdr *msg, int ifindex, struct sock *sk)
 
 	can_skb_reserve(skb);
 
-	err = memcpy_fromiovec(skb_put(skb, CFSIZ), msg->msg_iov, CFSIZ);
+	err = memcpy_from_msg(skb_put(skb, CFSIZ), msg, CFSIZ);
 	if (err < 0) {
 		kfree_skb(skb);
 		return err;
@@ -1285,7 +1282,7 @@ static int bcm_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 	/* read message head information */
 
-	ret = memcpy_fromiovec((u8 *)&msg_head, msg->msg_iov, MHSIZ);
+	ret = memcpy_from_msg((u8 *)&msg_head, msg, MHSIZ);
 	if (ret < 0)
 		return ret;
 
diff --git a/net/can/raw.c b/net/can/raw.c
index 081e81fd017f..0e4004fb6876 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -703,7 +703,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock,
 	can_skb_reserve(skb);
 	can_skb_prv(skb)->ifindex = dev->ifindex;
 
-	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+	err = memcpy_from_msg(skb_put(skb, size), msg, size);
 	if (err < 0)
 		goto free_skb;
 
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 8e6ae9422a7b..19f038739087 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -781,7 +781,7 @@ int dccp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		goto out_release;
 
 	skb_reserve(skb, sk->sk_prot->max_header);
-	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (rc != 0)
 		goto out_discard;
 
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index 25733d538147..e2e2e3cb9113 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -2032,7 +2032,7 @@ static int dn_sendmsg(struct kiocb *iocb, struct socket *sock,
 
 		skb_reserve(skb, 64 + DN_MAX_NSP_DATA_HEADER);
 
-		if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+		if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
 			err = -EFAULT;
 			goto out;
 		}
diff --git a/net/ieee802154/dgram.c b/net/ieee802154/dgram.c
index b8555ec71387..2c7a93e7167e 100644
--- a/net/ieee802154/dgram.c
+++ b/net/ieee802154/dgram.c
@@ -276,7 +276,7 @@ static int dgram_sendmsg(struct kiocb *iocb, struct sock *sk,
 	if (err < 0)
 		goto out_skb;
 
-	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+	err = memcpy_from_msg(skb_put(skb, size), msg, size);
 	if (err < 0)
 		goto out_skb;
 
diff --git a/net/ieee802154/raw.c b/net/ieee802154/raw.c
index 21c38945ab8b..61e9d2972947 100644
--- a/net/ieee802154/raw.c
+++ b/net/ieee802154/raw.c
@@ -150,7 +150,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk,
 	skb_reset_mac_header(skb);
 	skb_reset_network_header(skb);
 
-	err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size);
+	err = memcpy_from_msg(skb_put(skb, size), msg, size);
 	if (err < 0)
 		goto out_skb;
 
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index ce2920f5bef3..ef8f6ee90473 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -660,7 +660,7 @@ int ping_common_sendmsg(int family, struct msghdr *msg, size_t len,
 	 *	Fetch the ICMP header provided by the userland.
 	 *	iovec is modified! The ICMP header is consumed.
 	 */
-	if (memcpy_fromiovec(user_icmph, msg->msg_iov, icmph_len))
+	if (memcpy_from_msg(user_icmph, msg, icmph_len))
 		return -EFAULT;
 
 	if (family == AF_INET) {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d22a31f27ab4..69de1a1c05c9 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4368,7 +4368,7 @@ int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size)
 	if (tcp_try_rmem_schedule(sk, skb, skb->truesize))
 		goto err_free;
 
-	if (memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size))
+	if (memcpy_from_msg(skb_put(skb, size), msg, size))
 		goto err_free;
 
 	TCP_SKB_CB(skb)->seq = tcp_sk(sk)->rcv_nxt;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index e8c409055922..9052462cf42a 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1319,7 +1319,7 @@ static int irda_sendmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reserve(skb, self->max_header_size + 16);
 	skb_reset_transport_header(skb);
 	skb_put(skb, len);
-	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
+	err = memcpy_from_msg(skb_transport_header(skb), msg, len);
 	if (err) {
 		kfree_skb(skb);
 		goto out_err;
@@ -1569,7 +1569,7 @@ static int irda_sendmsg_dgram(struct kiocb *iocb, struct socket *sock,
 
 	pr_debug("%s(), appending user data\n", __func__);
 	skb_put(skb, len);
-	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
+	err = memcpy_from_msg(skb_transport_header(skb), msg, len);
 	if (err) {
 		kfree_skb(skb);
 		goto out;
@@ -1678,7 +1678,7 @@ static int irda_sendmsg_ultra(struct kiocb *iocb, struct socket *sock,
 
 	pr_debug("%s(), appending user data\n", __func__);
 	skb_put(skb, len);
-	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
+	err = memcpy_from_msg(skb_transport_header(skb), msg, len);
 	if (err) {
 		kfree_skb(skb);
 		goto out;
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 057b5647ef92..1cd3f8107239 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1122,7 +1122,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	}
 	if (iucv->transport == AF_IUCV_TRANS_HIPER)
 		skb_reserve(skb, sizeof(struct af_iucv_trans_hdr) + ETH_HLEN);
-	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
 		err = -EFAULT;
 		goto fail;
 	}
diff --git a/net/key/af_key.c b/net/key/af_key.c
index e5883091a8c6..f8ac939d52b4 100644
--- a/net/key/af_key.c
+++ b/net/key/af_key.c
@@ -3611,7 +3611,7 @@ static int pfkey_sendmsg(struct kiocb *kiocb,
 		goto out;
 
 	err = -EFAULT;
-	if (memcpy_fromiovec(skb_put(skb,len), msg->msg_iov, len))
+	if (memcpy_from_msg(skb_put(skb,len), msg, len))
 		goto out;
 
 	hdr = pfkey_get_base_msg(skb, &err);
diff --git a/net/l2tp/l2tp_ip.c b/net/l2tp/l2tp_ip.c
index a6cc1fed2b52..05dfc8aa36af 100644
--- a/net/l2tp/l2tp_ip.c
+++ b/net/l2tp/l2tp_ip.c
@@ -441,7 +441,7 @@ static int l2tp_ip_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *m
 	*((__be32 *) skb_put(skb, 4)) = 0;
 
 	/* Copy user data into skb */
-	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (rc < 0) {
 		kfree_skb(skb);
 		goto error;
diff --git a/net/l2tp/l2tp_ppp.c b/net/l2tp/l2tp_ppp.c
index c559bcdf4679..cc7a828fc914 100644
--- a/net/l2tp/l2tp_ppp.c
+++ b/net/l2tp/l2tp_ppp.c
@@ -346,8 +346,7 @@ static int pppol2tp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msgh
 	skb_put(skb, 2);
 
 	/* Copy user data into skb */
-	error = memcpy_fromiovec(skb_put(skb, total_len), m->msg_iov,
-				 total_len);
+	error = memcpy_from_msg(skb_put(skb, total_len), m, total_len);
 	if (error < 0) {
 		kfree_skb(skb);
 		goto error_put_sess_tun;
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index af662669f951..2c0b83ce43bd 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -921,7 +921,7 @@ static int llc_ui_sendmsg(struct kiocb *iocb, struct socket *sock,
 	skb->dev      = llc->dev;
 	skb->protocol = llc_proto_type(addr->sllc_arphrd);
 	skb_reserve(skb, hdrlen);
-	rc = memcpy_fromiovec(skb_put(skb, copied), msg->msg_iov, copied);
+	rc = memcpy_from_msg(skb_put(skb, copied), msg, copied);
 	if (rc)
 		goto out;
 	if (sk->sk_type == SOCK_DGRAM || addr->sllc_ua) {
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index e1aad6eeac14..63aa5c8acf12 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2325,7 +2325,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	NETLINK_CB(skb).flags	= netlink_skb_flags;
 
 	err = -EFAULT;
-	if (memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len)) {
+	if (memcpy_from_msg(skb_put(skb, len), msg, len)) {
 		kfree_skb(skb);
 		goto out;
 	}
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 7e13f6afcd1f..69f1d5e9959f 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -1113,7 +1113,7 @@ static int nr_sendmsg(struct kiocb *iocb, struct socket *sock,
 	skb_put(skb, len);
 
 	/* User data follows immediately after the NET/ROM transport header */
-	if (memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len)) {
+	if (memcpy_from_msg(skb_transport_header(skb), msg, len)) {
 		kfree_skb(skb);
 		err = -EFAULT;
 		goto out;
diff --git a/net/nfc/llcp_commands.c b/net/nfc/llcp_commands.c
index a3ad69a4c648..c4da0c2d8a14 100644
--- a/net/nfc/llcp_commands.c
+++ b/net/nfc/llcp_commands.c
@@ -665,7 +665,7 @@ int nfc_llcp_send_i_frame(struct nfc_llcp_sock *sock,
 	if (msg_data == NULL)
 		return -ENOMEM;
 
-	if (memcpy_fromiovec(msg_data, msg->msg_iov, len)) {
+	if (memcpy_from_msg(msg_data, msg, len)) {
 		kfree(msg_data);
 		return -EFAULT;
 	}
@@ -731,7 +731,7 @@ int nfc_llcp_send_ui_frame(struct nfc_llcp_sock *sock, u8 ssap, u8 dsap,
 	if (msg_data == NULL)
 		return -ENOMEM;
 
-	if (memcpy_fromiovec(msg_data, msg->msg_iov, len)) {
+	if (memcpy_from_msg(msg_data, msg, len)) {
 		kfree(msg_data);
 		return -EFAULT;
 	}
diff --git a/net/nfc/rawsock.c b/net/nfc/rawsock.c
index 9d7d2b7ba5e4..373e138c0ab6 100644
--- a/net/nfc/rawsock.c
+++ b/net/nfc/rawsock.c
@@ -231,7 +231,7 @@ static int rawsock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	if (skb == NULL)
 		return rc;
 
-	rc = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	rc = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (rc < 0) {
 		kfree_skb(skb);
 		return rc;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 58af58026dd2..07ac95014ecb 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -1676,7 +1676,7 @@ retry:
 			if (len < hhlen)
 				skb_reset_network_header(skb);
 		}
-		err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+		err = memcpy_from_msg(skb_put(skb, len), msg, len);
 		if (err)
 			goto out_free;
 		goto retry;
@@ -2446,8 +2446,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 		len -= vnet_hdr_len;
 
-		err = memcpy_fromiovec((void *)&vnet_hdr, msg->msg_iov,
-				       vnet_hdr_len);
+		err = memcpy_from_msg((void *)&vnet_hdr, msg, vnet_hdr_len);
 		if (err < 0)
 			goto out_unlock;
 
diff --git a/net/phonet/datagram.c b/net/phonet/datagram.c
index 0918bc21eae6..26054b4b467c 100644
--- a/net/phonet/datagram.c
+++ b/net/phonet/datagram.c
@@ -109,7 +109,7 @@ static int pn_sendmsg(struct kiocb *iocb, struct sock *sk,
 		return err;
 	skb_reserve(skb, MAX_PHONET_HEADER);
 
-	err = memcpy_fromiovec((void *)skb_put(skb, len), msg->msg_iov, len);
+	err = memcpy_from_msg((void *)skb_put(skb, len), msg, len);
 	if (err < 0) {
 		kfree_skb(skb);
 		return err;
diff --git a/net/phonet/pep.c b/net/phonet/pep.c
index 9cd069dfaf65..5d3f2b7507d4 100644
--- a/net/phonet/pep.c
+++ b/net/phonet/pep.c
@@ -1141,7 +1141,7 @@ static int pep_sendmsg(struct kiocb *iocb, struct sock *sk,
 		return err;
 
 	skb_reserve(skb, MAX_PHONET_HEADER + 3 + pn->aligned);
-	err = memcpy_fromiovec(skb_put(skb, len), msg->msg_iov, len);
+	err = memcpy_from_msg(skb_put(skb, len), msg, len);
 	if (err < 0)
 		goto outfree;
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 9b600c20a7a3..43bac7c4dd9e 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -1121,7 +1121,7 @@ static int rose_sendmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reset_transport_header(skb);
 	skb_put(skb, len);
 
-	err = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
+	err = memcpy_from_msg(skb_transport_header(skb), msg, len);
 	if (err) {
 		kfree_skb(skb);
 		return err;
diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c
index 9f32741abb1c..e49bccebb0cc 100644
--- a/net/sctp/sm_make_chunk.c
+++ b/net/sctp/sm_make_chunk.c
@@ -1001,7 +1001,7 @@ no_mem:
 
 /* Helper to create ABORT with a SCTP_ERROR_USER_ABORT error.  */
 struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
-					const struct msghdr *msg,
+					struct msghdr *msg,
 					size_t paylen)
 {
 	struct sctp_chunk *retval;
@@ -1018,7 +1018,7 @@ struct sctp_chunk *sctp_make_abort_user(const struct sctp_association *asoc,
 		if (!payload)
 			goto err_payload;
 
-		err = memcpy_fromiovec(payload, msg->msg_iov, paylen);
+		err = memcpy_from_msg(payload, msg, paylen);
 		if (err < 0)
 			goto err_copy;
 	}
diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c
index 59e785bfde65..d9149b68b9bc 100644
--- a/net/x25/af_x25.c
+++ b/net/x25/af_x25.c
@@ -1170,7 +1170,7 @@ static int x25_sendmsg(struct kiocb *iocb, struct socket *sock,
 	skb_reset_transport_header(skb);
 	skb_put(skb, len);
 
-	rc = memcpy_fromiovec(skb_transport_header(skb), msg->msg_iov, len);
+	rc = memcpy_from_msg(skb_transport_header(skb), msg, len);
 	if (rc)
 		goto out_kfree_skb;
 
-- 
cgit v1.2.3


From 7eab8d9e8a722ca07bc785f73e21c3d3418defa6 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 6 Apr 2014 21:51:23 -0400
Subject: new helper: memcpy_to_msg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 crypto/algif_hash.c    | 2 +-
 include/linux/skbuff.h | 5 +++++
 net/caif/caif_socket.c | 2 +-
 net/can/bcm.c          | 2 +-
 net/can/raw.c          | 2 +-
 net/decnet/af_decnet.c | 2 +-
 net/ipv4/tcp.c         | 2 +-
 net/irda/af_irda.c     | 2 +-
 net/packet/af_packet.c | 3 +--
 9 files changed, 13 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 850246206b12..35c93ff11f35 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -174,7 +174,7 @@ static int hash_recvmsg(struct kiocb *unused, struct socket *sock,
 			goto unlock;
 	}
 
-	err = memcpy_toiovec(msg->msg_iov, ctx->result, len);
+	err = memcpy_to_msg(msg, ctx->result, len);
 
 unlock:
 	release_sock(sk);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 97dc5f8123b3..d048347a010a 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2692,6 +2692,11 @@ static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 	return memcpy_fromiovec(data, msg->msg_iov, len);
 }
 
+static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
+{
+	return memcpy_toiovec(msg->msg_iov, data, len);
+}
+
 struct skb_checksum_ops {
 	__wsum (*update)(const void *mem, int len, __wsum wsum);
 	__wsum (*combine)(__wsum csum, __wsum csum2, int offset, int len);
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index 230f14026c11..ac618b0b8a4f 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -418,7 +418,7 @@ unlock:
 		}
 		release_sock(sk);
 		chunk = min_t(unsigned int, skb->len, size);
-		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+		if (memcpy_to_msg(msg, skb->data, chunk)) {
 			skb_queue_head(&sk->sk_receive_queue, skb);
 			if (copied == 0)
 				copied = -EFAULT;
diff --git a/net/can/bcm.c b/net/can/bcm.c
index b9a1f715df18..01671187e3fe 100644
--- a/net/can/bcm.c
+++ b/net/can/bcm.c
@@ -1555,7 +1555,7 @@ static int bcm_recvmsg(struct kiocb *iocb, struct socket *sock,
 	if (skb->len < size)
 		size = skb->len;
 
-	err = memcpy_toiovec(msg->msg_iov, skb->data, size);
+	err = memcpy_to_msg(msg, skb->data, size);
 	if (err < 0) {
 		skb_free_datagram(sk, skb);
 		return err;
diff --git a/net/can/raw.c b/net/can/raw.c
index 0e4004fb6876..dfdcffbb1070 100644
--- a/net/can/raw.c
+++ b/net/can/raw.c
@@ -750,7 +750,7 @@ static int raw_recvmsg(struct kiocb *iocb, struct socket *sock,
 	else
 		size = skb->len;
 
-	err = memcpy_toiovec(msg->msg_iov, skb->data, size);
+	err = memcpy_to_msg(msg, skb->data, size);
 	if (err < 0) {
 		skb_free_datagram(sk, skb);
 		return err;
diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c
index e2e2e3cb9113..810228646de3 100644
--- a/net/decnet/af_decnet.c
+++ b/net/decnet/af_decnet.c
@@ -1760,7 +1760,7 @@ static int dn_recvmsg(struct kiocb *iocb, struct socket *sock,
 		if ((chunk + copied) > size)
 			chunk = size - copied;
 
-		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+		if (memcpy_to_msg(msg, skb->data, chunk)) {
 			rv = -EFAULT;
 			break;
 		}
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index c239f4740d10..435443bfc3c3 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1349,7 +1349,7 @@ static int tcp_recv_urg(struct sock *sk, struct msghdr *msg, int len, int flags)
 
 		if (len > 0) {
 			if (!(flags & MSG_TRUNC))
-				err = memcpy_toiovec(msg->msg_iov, &c, 1);
+				err = memcpy_to_msg(msg, &c, 1);
 			len = 1;
 		} else
 			msg->msg_flags |= MSG_TRUNC;
diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c
index 9052462cf42a..568edc72d737 100644
--- a/net/irda/af_irda.c
+++ b/net/irda/af_irda.c
@@ -1466,7 +1466,7 @@ static int irda_recvmsg_stream(struct kiocb *iocb, struct socket *sock,
 		}
 
 		chunk = min_t(unsigned int, skb->len, size);
-		if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) {
+		if (memcpy_to_msg(msg, skb->data, chunk)) {
 			skb_queue_head(&sk->sk_receive_queue, skb);
 			if (copied == 0)
 				copied = -EFAULT;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 07ac95014ecb..108d7f381b87 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2949,8 +2949,7 @@ static int packet_recvmsg(struct kiocb *iocb, struct socket *sock,
 			vnet_hdr.flags = VIRTIO_NET_HDR_F_DATA_VALID;
 		} /* else everything is zero */
 
-		err = memcpy_toiovec(msg->msg_iov, (void *)&vnet_hdr,
-				     vnet_hdr_len);
+		err = memcpy_to_msg(msg, (void *)&vnet_hdr, vnet_hdr_len);
 		if (err < 0)
 			goto out_free;
 	}
-- 
cgit v1.2.3


From 3a654f975bf99165016fe257a3d2b4e6716e4931 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 19 Jun 2014 14:15:22 -0400
Subject: new helpers: skb_copy_datagram_from_iter() and
 zerocopy_sg_from_iter()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h |   3 ++
 net/core/datagram.c    | 116 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 119 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d048347a010a..a01cd9ad0b51 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2659,10 +2659,13 @@ static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
 int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 				 const struct iovec *from, int from_offset,
 				 int len);
+int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
+				 struct iov_iter *from, int len);
 int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
 			   int offset, size_t count);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 			   struct iov_iter *to, int size);
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
 int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 26391a3fe3e5..3ea038431ceb 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -572,6 +572,78 @@ fault:
 }
 EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
 
+int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
+				 struct iov_iter *from,
+				 int len)
+{
+	int start = skb_headlen(skb);
+	int i, copy = start - offset;
+	struct sk_buff *frag_iter;
+
+	/* Copy header. */
+	if (copy > 0) {
+		if (copy > len)
+			copy = len;
+		if (copy_from_iter(skb->data + offset, copy, from) != copy)
+			goto fault;
+		if ((len -= copy) == 0)
+			return 0;
+		offset += copy;
+	}
+
+	/* Copy paged appendix. Hmm... why does this look so complicated? */
+	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+		int end;
+		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+
+		WARN_ON(start > offset + len);
+
+		end = start + skb_frag_size(frag);
+		if ((copy = end - offset) > 0) {
+			size_t copied;
+
+			if (copy > len)
+				copy = len;
+			copied = copy_page_from_iter(skb_frag_page(frag),
+					  frag->page_offset + offset - start,
+					  copy, from);
+			if (copied != copy)
+				goto fault;
+
+			if (!(len -= copy))
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+
+	skb_walk_frags(skb, frag_iter) {
+		int end;
+
+		WARN_ON(start > offset + len);
+
+		end = start + frag_iter->len;
+		if ((copy = end - offset) > 0) {
+			if (copy > len)
+				copy = len;
+			if (skb_copy_datagram_from_iter(frag_iter,
+							offset - start,
+							from, copy))
+				goto fault;
+			if ((len -= copy) == 0)
+				return 0;
+			offset += copy;
+		}
+		start = end;
+	}
+	if (!len)
+		return 0;
+
+fault:
+	return -EFAULT;
+}
+EXPORT_SYMBOL(skb_copy_datagram_from_iter);
+
 /**
  *	zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
  *	@skb: buffer to copy
@@ -643,6 +715,50 @@ int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
 }
 EXPORT_SYMBOL(zerocopy_sg_from_iovec);
 
+int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
+{
+	int len = iov_iter_count(from);
+	int copy = min_t(int, skb_headlen(skb), len);
+	int frag = 0;
+
+	/* copy up to skb headlen */
+	if (skb_copy_datagram_from_iter(skb, 0, from, copy))
+		return -EFAULT;
+
+	while (iov_iter_count(from)) {
+		struct page *pages[MAX_SKB_FRAGS];
+		size_t start;
+		ssize_t copied;
+		unsigned long truesize;
+		int n = 0;
+
+		if (frag == MAX_SKB_FRAGS)
+			return -EMSGSIZE;
+
+		copied = iov_iter_get_pages(from, pages, ~0U,
+					    MAX_SKB_FRAGS - frag, &start);
+		if (copied < 0)
+			return -EFAULT;
+
+		iov_iter_advance(from, copied);
+
+		truesize = PAGE_ALIGN(copied + start);
+		skb->data_len += copied;
+		skb->len += copied;
+		skb->truesize += truesize;
+		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
+		while (copied) {
+			int size = min_t(int, copied, PAGE_SIZE - start);
+			skb_fill_page_desc(skb, frag++, pages[n], start, size);
+			start = 0;
+			copied -= size;
+			n++;
+		}
+	}
+	return 0;
+}
+EXPORT_SYMBOL(zerocopy_sg_from_iter);
+
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 				      u8 __user *to, int len,
 				      __wsum *csump)
-- 
cgit v1.2.3


From 195e952d03a797aa953f62ffe24ec58693e17ed8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Nov 2014 00:56:48 -0500
Subject: kill zerocopy_sg_from_iovec()

no users left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h |  2 --
 net/core/datagram.c    | 65 ++------------------------------------------------
 2 files changed, 2 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a01cd9ad0b51..178cdbde82f0 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2661,8 +2661,6 @@ int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
 				 int len);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
-int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *frm,
-			   int offset, size_t count);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 			   struct iov_iter *to, int size);
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 3ea038431ceb..c4d832efebb8 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -645,76 +645,15 @@ fault:
 EXPORT_SYMBOL(skb_copy_datagram_from_iter);
 
 /**
- *	zerocopy_sg_from_iovec - Build a zerocopy datagram from an iovec
+ *	zerocopy_sg_from_iter - Build a zerocopy datagram from an iov_iter
  *	@skb: buffer to copy
- *	@from: io vector to copy from
- *	@offset: offset in the io vector to start copying from
- *	@count: amount of vectors to copy to buffer from
+ *	@from: the source to copy from
  *
  *	The function will first copy up to headlen, and then pin the userspace
  *	pages and build frags through them.
  *
  *	Returns 0, -EFAULT or -EMSGSIZE.
- *	Note: the iovec is not modified during the copy
  */
-int zerocopy_sg_from_iovec(struct sk_buff *skb, const struct iovec *from,
-				  int offset, size_t count)
-{
-	int len = iov_length(from, count) - offset;
-	int copy = min_t(int, skb_headlen(skb), len);
-	int size;
-	int i = 0;
-
-	/* copy up to skb headlen */
-	if (skb_copy_datagram_from_iovec(skb, 0, from, offset, copy))
-		return -EFAULT;
-
-	if (len == copy)
-		return 0;
-
-	offset += copy;
-	while (count--) {
-		struct page *page[MAX_SKB_FRAGS];
-		int num_pages;
-		unsigned long base;
-		unsigned long truesize;
-
-		/* Skip over from offset and copied */
-		if (offset >= from->iov_len) {
-			offset -= from->iov_len;
-			++from;
-			continue;
-		}
-		len = from->iov_len - offset;
-		base = (unsigned long)from->iov_base + offset;
-		size = ((base & ~PAGE_MASK) + len + ~PAGE_MASK) >> PAGE_SHIFT;
-		if (i + size > MAX_SKB_FRAGS)
-			return -EMSGSIZE;
-		num_pages = get_user_pages_fast(base, size, 0, &page[i]);
-		if (num_pages != size) {
-			release_pages(&page[i], num_pages, 0);
-			return -EFAULT;
-		}
-		truesize = size * PAGE_SIZE;
-		skb->data_len += len;
-		skb->len += len;
-		skb->truesize += truesize;
-		atomic_add(truesize, &skb->sk->sk_wmem_alloc);
-		while (len) {
-			int off = base & ~PAGE_MASK;
-			int size = min_t(int, len, PAGE_SIZE - off);
-			skb_fill_page_desc(skb, i, page[i], off, size);
-			base += size;
-			len -= size;
-			i++;
-		}
-		offset = 0;
-		++from;
-	}
-	return 0;
-}
-EXPORT_SYMBOL(zerocopy_sg_from_iovec);
-
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 {
 	int len = iov_iter_count(from);
-- 
cgit v1.2.3


From 8feb2fb2bb986c533e18037d3c45a5f779421992 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 6 Nov 2014 01:10:59 -0500
Subject: switch AF_PACKET and AF_UNIX to skb_copy_datagram_from_iter()

... and kill skb_copy_datagram_iovec()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h |  3 --
 net/core/datagram.c    | 88 ++------------------------------------------------
 net/packet/af_packet.c | 11 +++++--
 net/unix/af_unix.c     | 11 +++++--
 4 files changed, 18 insertions(+), 95 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 178cdbde82f0..7691ad5b4771 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2656,9 +2656,6 @@ static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
 {
 	return skb_copy_and_csum_datagram_iovec(skb, hlen, msg->msg_iov);
 }
-int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
-				 const struct iovec *from, int from_offset,
-				 int len);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index c4d832efebb8..b6e303b0f01f 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -480,98 +480,14 @@ short_copy:
 EXPORT_SYMBOL(skb_copy_datagram_iter);
 
 /**
- *	skb_copy_datagram_from_iovec - Copy a datagram from an iovec.
+ *	skb_copy_datagram_from_iter - Copy a datagram from an iov_iter.
  *	@skb: buffer to copy
  *	@offset: offset in the buffer to start copying to
- *	@from: io vector to copy to
- *	@from_offset: offset in the io vector to start copying from
+ *	@from: the copy source
  *	@len: amount of data to copy to buffer from iovec
  *
  *	Returns 0 or -EFAULT.
- *	Note: the iovec is not modified during the copy.
  */
-int skb_copy_datagram_from_iovec(struct sk_buff *skb, int offset,
-				 const struct iovec *from, int from_offset,
-				 int len)
-{
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
-	struct sk_buff *frag_iter;
-
-	/* Copy header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		if (memcpy_fromiovecend(skb->data + offset, from, from_offset,
-					copy))
-			goto fault;
-		if ((len -= copy) == 0)
-			return 0;
-		offset += copy;
-		from_offset += copy;
-	}
-
-	/* Copy paged appendix. Hmm... why does this look so complicated? */
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		WARN_ON(start > offset + len);
-
-		end = start + skb_frag_size(frag);
-		if ((copy = end - offset) > 0) {
-			int err;
-			u8  *vaddr;
-			struct page *page = skb_frag_page(frag);
-
-			if (copy > len)
-				copy = len;
-			vaddr = kmap(page);
-			err = memcpy_fromiovecend(vaddr + frag->page_offset +
-						  offset - start,
-						  from, from_offset, copy);
-			kunmap(page);
-			if (err)
-				goto fault;
-
-			if (!(len -= copy))
-				return 0;
-			offset += copy;
-			from_offset += copy;
-		}
-		start = end;
-	}
-
-	skb_walk_frags(skb, frag_iter) {
-		int end;
-
-		WARN_ON(start > offset + len);
-
-		end = start + frag_iter->len;
-		if ((copy = end - offset) > 0) {
-			if (copy > len)
-				copy = len;
-			if (skb_copy_datagram_from_iovec(frag_iter,
-							 offset - start,
-							 from,
-							 from_offset,
-							 copy))
-				goto fault;
-			if ((len -= copy) == 0)
-				return 0;
-			offset += copy;
-			from_offset += copy;
-		}
-		start = end;
-	}
-	if (!len)
-		return 0;
-
-fault:
-	return -EFAULT;
-}
-EXPORT_SYMBOL(skb_copy_datagram_from_iovec);
-
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from,
 				 int len)
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 108d7f381b87..dfb148e9fdaa 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2408,6 +2408,10 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	unsigned short gso_type = 0;
 	int hlen, tlen;
 	int extra_len = 0;
+	struct iov_iter from;
+	ssize_t n;
+
+	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
 
 	/*
 	 *	Get and verify the address.
@@ -2446,8 +2450,9 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 
 		len -= vnet_hdr_len;
 
-		err = memcpy_from_msg((void *)&vnet_hdr, msg, vnet_hdr_len);
-		if (err < 0)
+		err = -EFAULT;
+		n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &from);
+		if (n != vnet_hdr_len)
 			goto out_unlock;
 
 		if ((vnet_hdr.flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) &&
@@ -2517,7 +2522,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	/* Returns -EFAULT on error */
-	err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len);
+	err = skb_copy_datagram_from_iter(skb, offset, &from, len);
 	if (err)
 		goto out_free;
 
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 5eee625d113f..4450d6226602 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1459,6 +1459,9 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct scm_cookie tmp_scm;
 	int max_level;
 	int data_len = 0;
+	struct iov_iter from;
+
+	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1516,7 +1519,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
 	skb->len = len;
-	err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov, 0, len);
+	err = skb_copy_datagram_from_iter(skb, 0, &from, len);
 	if (err)
 		goto out_free;
 
@@ -1638,6 +1641,9 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	bool fds_sent = false;
 	int max_level;
 	int data_len;
+	struct iov_iter from;
+
+	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1694,8 +1700,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		skb_put(skb, size - data_len);
 		skb->data_len = data_len;
 		skb->len = size;
-		err = skb_copy_datagram_from_iovec(skb, 0, msg->msg_iov,
-						   sent, size);
+		err = skb_copy_datagram_from_iter(skb, 0, &from, size);
 		if (err) {
 			kfree_skb(skb);
 			goto out_err;
-- 
cgit v1.2.3


From db5ed4dfd5dd0142ec36ff7b335e0ec3b836b3e6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 13 Nov 2014 15:08:42 +0100
Subject: scsi: drop reason argument from ->change_queue_depth

Drop the now unused reason argument from the ->change_queue_depth method.
Also add a return value to scsi_adjust_queue_depth, and rename it to
scsi_change_queue_depth now that it can be used as the default
->change_queue_depth implementation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Mike Christie <michaelc@cs.wisc.edu>
Reviewed-by: Hannes Reinecke <hare@suse.de>
---
 Documentation/scsi/scsi_mid_low_api.txt   | 20 +++++++--------
 drivers/ata/libata-scsi.c                 | 17 ++++---------
 drivers/ata/sata_nv.c                     |  2 +-
 drivers/infiniband/ulp/iser/iscsi_iser.c  |  2 +-
 drivers/infiniband/ulp/srp/ib_srp.c       |  7 ++----
 drivers/message/fusion/mptscsih.c         | 12 +++------
 drivers/message/fusion/mptscsih.h         |  3 +--
 drivers/s390/scsi/zfcp_scsi.c             | 11 ++-------
 drivers/scsi/3w-9xxx.c                    | 13 +---------
 drivers/scsi/3w-sas.c                     | 13 +---------
 drivers/scsi/3w-xxxx.c                    | 13 +---------
 drivers/scsi/53c700.c                     | 19 ++++++--------
 drivers/scsi/BusLogic.c                   |  4 +--
 drivers/scsi/aacraid/linit.c              | 18 ++++++--------
 drivers/scsi/advansys.c                   |  8 +++---
 drivers/scsi/aic7xxx/aic79xx_osm.c        |  4 +--
 drivers/scsi/aic7xxx/aic7xxx_osm.c        |  4 +--
 drivers/scsi/arcmsr/arcmsr_hba.c          |  9 ++-----
 drivers/scsi/be2iscsi/be_main.c           |  2 +-
 drivers/scsi/bfa/bfad_im.c                |  4 +--
 drivers/scsi/bnx2fc/bnx2fc_fcoe.c         |  2 +-
 drivers/scsi/bnx2i/bnx2i_iscsi.c          |  2 +-
 drivers/scsi/csiostor/csio_scsi.c         |  2 +-
 drivers/scsi/cxgbi/cxgb3i/cxgb3i.c        |  2 +-
 drivers/scsi/cxgbi/cxgb4i/cxgb4i.c        |  2 +-
 drivers/scsi/dpt_i2o.c                    |  2 +-
 drivers/scsi/eata.c                       |  6 ++---
 drivers/scsi/esas2r/esas2r.h              |  1 -
 drivers/scsi/esas2r/esas2r_main.c         | 11 +--------
 drivers/scsi/esp_scsi.c                   |  2 +-
 drivers/scsi/fcoe/fcoe.c                  |  2 +-
 drivers/scsi/fnic/fnic_main.c             |  4 +--
 drivers/scsi/hpsa.c                       | 16 +-----------
 drivers/scsi/hptiop.c                     |  8 ++----
 drivers/scsi/ibmvscsi/ibmvfc.c            |  9 ++-----
 drivers/scsi/ibmvscsi/ibmvscsi.c          | 10 ++------
 drivers/scsi/ipr.c                        | 10 +++-----
 drivers/scsi/ips.c                        |  2 +-
 drivers/scsi/iscsi_tcp.c                  |  2 +-
 drivers/scsi/libfc/fc_fcp.c               | 15 +----------
 drivers/scsi/libiscsi.c                   |  7 ------
 drivers/scsi/libsas/sas_scsi_host.c       | 12 ++++-----
 drivers/scsi/lpfc/lpfc_scsi.c             | 26 +++-----------------
 drivers/scsi/megaraid/megaraid_mbox.c     | 21 +---------------
 drivers/scsi/megaraid/megaraid_sas_base.c | 13 +---------
 drivers/scsi/mpt2sas/mpt2sas_scsih.c      | 10 +++-----
 drivers/scsi/mpt3sas/mpt3sas_scsih.c      | 10 +++-----
 drivers/scsi/ncr53c8xx.c                  |  2 +-
 drivers/scsi/pmcraid.c                    | 12 ++-------
 drivers/scsi/qla1280.c                    |  4 +--
 drivers/scsi/qla2xxx/qla_os.c             | 12 ++-------
 drivers/scsi/qla4xxx/ql4_os.c             |  4 +--
 drivers/scsi/scsi.c                       | 41 +++++++++++--------------------
 drivers/scsi/scsi_debug.c                 |  4 +--
 drivers/scsi/scsi_error.c                 |  2 +-
 drivers/scsi/scsi_scan.c                  |  2 +-
 drivers/scsi/scsi_sysfs.c                 |  3 +--
 drivers/scsi/storvsc_drv.c                |  2 +-
 drivers/scsi/sym53c8xx_2/sym_glue.c       |  2 +-
 drivers/scsi/tmscsim.c                    |  2 +-
 drivers/scsi/u14-34f.c                    | 10 ++++----
 drivers/scsi/ufs/ufshcd.c                 | 13 +++-------
 drivers/scsi/virtio_scsi.c                |  8 ++----
 drivers/scsi/vmw_pvscsi.c                 | 12 ++-------
 drivers/scsi/wd7000.c                     |  1 -
 drivers/target/loopback/tcm_loop.c        | 15 +----------
 drivers/usb/storage/uas.c                 |  2 +-
 include/linux/libata.h                    |  4 +--
 include/scsi/libfc.h                      |  1 -
 include/scsi/libiscsi.h                   |  2 --
 include/scsi/libsas.h                     |  3 +--
 include/scsi/scsi_device.h                |  2 +-
 include/scsi/scsi_host.h                  |  8 ++----
 73 files changed, 155 insertions(+), 412 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/scsi/scsi_mid_low_api.txt b/Documentation/scsi/scsi_mid_low_api.txt
index bee7d86b9dcc..731bc4f4c5e6 100644
--- a/Documentation/scsi/scsi_mid_low_api.txt
+++ b/Documentation/scsi/scsi_mid_low_api.txt
@@ -149,7 +149,7 @@ scsi_add_host()  ---->
 scsi_scan_host()  -------+
                          |
                     slave_alloc()
-                    slave_configure() -->  scsi_adjust_queue_depth()
+                    slave_configure() -->  scsi_change_queue_depth()
                          |
                     slave_alloc()
                     slave_configure()
@@ -159,7 +159,7 @@ scsi_scan_host()  -------+
 ------------------------------------------------------------
 
 If the LLD wants to adjust the default queue settings, it can invoke
-scsi_adjust_queue_depth() in its slave_configure() routine.
+scsi_change_queue_depth() in its slave_configure() routine.
 
 *** For scsi devices that the mid level tries to scan but do not
     respond, a slave_alloc(), slave_destroy() pair is called.
@@ -203,7 +203,7 @@ LLD                   mid level                    LLD
 scsi_add_device()  ------+
                          |
                     slave_alloc()
-                    slave_configure()   [--> scsi_adjust_queue_depth()]
+                    slave_configure()   [--> scsi_change_queue_depth()]
 ------------------------------------------------------------
 
 In a similar fashion, an LLD may become aware that a SCSI device has been
@@ -261,7 +261,7 @@ init_this_scsi_driver() ----+
                             |                scsi_register()
                             |
                       slave_alloc()
-                      slave_configure()  -->  scsi_adjust_queue_depth()
+                      slave_configure()  -->  scsi_change_queue_depth()
                       slave_alloc()   ***
                       slave_destroy() ***
                             |
@@ -271,7 +271,7 @@ init_this_scsi_driver() ----+
                       slave_destroy() ***
 ------------------------------------------------------------
 
-The mid level invokes scsi_adjust_queue_depth() with "cmd_per_lun" for that
+The mid level invokes scsi_change_queue_depth() with "cmd_per_lun" for that
 host as the queue length. These settings can be overridden by a
 slave_configure() supplied by the LLD.
 
@@ -368,7 +368,7 @@ names all start with "scsi_".
 Summary:
    scsi_add_device - creates new scsi device (lu) instance
    scsi_add_host - perform sysfs registration and set up transport class
-   scsi_adjust_queue_depth - change the queue depth on a SCSI device
+   scsi_change_queue_depth - change the queue depth on a SCSI device
    scsi_bios_ptable - return copy of block device's partition table
    scsi_block_requests - prevent further commands being queued to given host
    scsi_host_alloc - return a new scsi_host instance whose refcount==1
@@ -436,7 +436,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device * dev)
 
 
 /**
- * scsi_adjust_queue_depth - allow LLD to change queue depth on a SCSI device
+ * scsi_change_queue_depth - allow LLD to change queue depth on a SCSI device
  * @sdev:       pointer to SCSI device to change queue depth on
  * @tags        Number of tags allowed if tagged queuing enabled,
  *              or number of commands the LLD can queue up
@@ -453,7 +453,7 @@ int scsi_add_host(struct Scsi_Host *shost, struct device * dev)
  *      Defined in: drivers/scsi/scsi.c [see source code for more notes]
  *
  **/
-void scsi_adjust_queue_depth(struct scsi_device *sdev, int tags)
+int scsi_change_queue_depth(struct scsi_device *sdev, int tags)
 
 
 /**
@@ -1214,7 +1214,7 @@ of interest:
                    for disk firmware uploads.
     cmd_per_lun  - maximum number of commands that can be queued on devices
                    controlled by the host. Overridden by LLD calls to
-                   scsi_adjust_queue_depth().
+                   scsi_change_queue_depth().
     unchecked_isa_dma - 1=>only use bottom 16 MB of ram (ISA DMA addressing
                    restriction), 0=>can use full 32 bit (or better) DMA
                    address space
@@ -1254,7 +1254,7 @@ struct scsi_cmnd
 Instances of this structure convey SCSI commands to the LLD and responses
 back to the mid level. The SCSI mid level will ensure that no more SCSI
 commands become queued against the LLD than are indicated by
-scsi_adjust_queue_depth() (or struct Scsi_Host::cmd_per_lun). There will
+scsi_change_queue_depth() (or struct Scsi_Host::cmd_per_lun). There will
 be at least one instance of struct scsi_cmnd available for each SCSI device.
 Members of interest:
     cmnd         - array containing SCSI command
diff --git a/drivers/ata/libata-scsi.c b/drivers/ata/libata-scsi.c
index c8bb6abbf12c..de46385dbe71 100644
--- a/drivers/ata/libata-scsi.c
+++ b/drivers/ata/libata-scsi.c
@@ -1164,7 +1164,7 @@ static int ata_scsi_dev_config(struct scsi_device *sdev,
 
 		depth = min(sdev->host->can_queue, ata_id_queue_depth(dev->id));
 		depth = min(ATA_MAX_QUEUE - 1, depth);
-		scsi_adjust_queue_depth(sdev, depth);
+		scsi_change_queue_depth(sdev, depth);
 	}
 
 	blk_queue_flush_queueable(q, false);
@@ -1243,21 +1243,17 @@ void ata_scsi_slave_destroy(struct scsi_device *sdev)
  *	@ap: ATA port to which the device change the queue depth
  *	@sdev: SCSI device to configure queue depth for
  *	@queue_depth: new queue depth
- *	@reason: calling context
  *
  *	libsas and libata have different approaches for associating a sdev to
  *	its ata_port.
  *
  */
 int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
-			     int queue_depth, int reason)
+			     int queue_depth)
 {
 	struct ata_device *dev;
 	unsigned long flags;
 
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (queue_depth < 1 || queue_depth == sdev->queue_depth)
 		return sdev->queue_depth;
 
@@ -1282,15 +1278,13 @@ int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
 	if (sdev->queue_depth == queue_depth)
 		return -EINVAL;
 
-	scsi_adjust_queue_depth(sdev, queue_depth);
-	return queue_depth;
+	return scsi_change_queue_depth(sdev, queue_depth);
 }
 
 /**
  *	ata_scsi_change_queue_depth - SCSI callback for queue depth config
  *	@sdev: SCSI device to configure queue depth for
  *	@queue_depth: new queue depth
- *	@reason: calling context
  *
  *	This is libata standard hostt->change_queue_depth callback.
  *	SCSI will call into this callback when user tries to set queue
@@ -1302,12 +1296,11 @@ int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
  *	RETURNS:
  *	Newly configured queue depth.
  */
-int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth,
-				int reason)
+int ata_scsi_change_queue_depth(struct scsi_device *sdev, int queue_depth)
 {
 	struct ata_port *ap = ata_shost_to_port(sdev->host);
 
-	return __ata_change_queue_depth(ap, sdev, queue_depth, reason);
+	return __ata_change_queue_depth(ap, sdev, queue_depth);
 }
 
 /**
diff --git a/drivers/ata/sata_nv.c b/drivers/ata/sata_nv.c
index cdf99fac139a..1db6f5ce5e89 100644
--- a/drivers/ata/sata_nv.c
+++ b/drivers/ata/sata_nv.c
@@ -1951,7 +1951,7 @@ static int nv_swncq_slave_config(struct scsi_device *sdev)
 	ata_id_c_string(dev->id, model_num, ATA_ID_PROD, sizeof(model_num));
 
 	if (strncmp(model_num, "Maxtor", 6) == 0) {
-		ata_scsi_change_queue_depth(sdev, 1, SCSI_QDEPTH_DEFAULT);
+		ata_scsi_change_queue_depth(sdev, 1);
 		ata_dev_notice(dev, "Disabling SWNCQ mode (depth %x)\n",
 			       sdev->queue_depth);
 	}
diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c
index 812a2891de58..20ca6a619476 100644
--- a/drivers/infiniband/ulp/iser/iscsi_iser.c
+++ b/drivers/infiniband/ulp/iser/iscsi_iser.c
@@ -911,7 +911,7 @@ static struct scsi_host_template iscsi_iser_sht = {
 	.module                 = THIS_MODULE,
 	.name                   = "iSCSI Initiator over iSER",
 	.queuecommand           = iscsi_queuecommand,
-	.change_queue_depth	= iscsi_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.sg_tablesize           = ISCSI_ISER_SG_TABLESIZE,
 	.max_sectors		= 1024,
 	.cmd_per_lun            = ISER_DEF_CMD_PER_LUN,
diff --git a/drivers/infiniband/ulp/srp/ib_srp.c b/drivers/infiniband/ulp/srp/ib_srp.c
index 8d13a19e04b2..5461924c9f10 100644
--- a/drivers/infiniband/ulp/srp/ib_srp.c
+++ b/drivers/infiniband/ulp/srp/ib_srp.c
@@ -2402,18 +2402,15 @@ static int srp_cm_handler(struct ib_cm_id *cm_id, struct ib_cm_event *event)
  * srp_change_queue_depth - setting device queue depth
  * @sdev: scsi device struct
  * @qdepth: requested queue depth
- * @reason: SCSI_QDEPTH_DEFAULT
- * (see include/scsi/scsi_host.h for definition)
  *
  * Returns queue depth.
  */
 static int
-srp_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
+srp_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
 	if (!sdev->tagged_supported)
 		qdepth = 1;
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
+	return scsi_change_queue_depth(sdev, qdepth);
 }
 
 static int srp_send_tsk_mgmt(struct srp_rdma_ch *ch, u64 req_tag,
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index dee06d6f0b68..6c9fc11efb87 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -2311,12 +2311,11 @@ mptscsih_slave_destroy(struct scsi_device *sdev)
  *	mptscsih_change_queue_depth - This function will set a devices queue depth
  *	@sdev: per scsi_device pointer
  *	@qdepth: requested queue depth
- *	@reason: calling context
  *
  *	Adding support for new 'change_queue_depth' api.
 */
 int
-mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
+mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
 	MPT_SCSI_HOST		*hd = shost_priv(sdev->host);
 	VirtTarget 		*vtarget;
@@ -2327,9 +2326,6 @@ mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
 	starget = scsi_target(sdev);
 	vtarget = starget->hostdata;
 
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (ioc->bus_type == SPI) {
 		if (!(vtarget->tflags & MPT_TARGET_FLAGS_Q_YES))
 			max_depth = 1;
@@ -2347,8 +2343,7 @@ mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
 	if (qdepth > max_depth)
 		qdepth = max_depth;
 
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
+	return scsi_change_queue_depth(sdev, qdepth);
 }
 
 /*
@@ -2392,8 +2387,7 @@ mptscsih_slave_configure(struct scsi_device *sdev)
 		    ioc->name, vtarget->negoFlags, vtarget->maxOffset,
 		    vtarget->minSyncFactor));
 
-	mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH,
-				    SCSI_QDEPTH_DEFAULT);
+	mptscsih_change_queue_depth(sdev, MPT_SCSI_CMD_PER_DEV_HIGH);
 	dsprintk(ioc, printk(MYIOC_s_DEBUG_FMT
 		"tagged %d, simple %d\n",
 		ioc->name,sdev->tagged_supported, sdev->simple_tags));
diff --git a/drivers/message/fusion/mptscsih.h b/drivers/message/fusion/mptscsih.h
index e1b1a198a62a..2baeefd9be7a 100644
--- a/drivers/message/fusion/mptscsih.h
+++ b/drivers/message/fusion/mptscsih.h
@@ -128,8 +128,7 @@ extern int mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_F
 extern int mptscsih_scandv_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *r);
 extern int mptscsih_event_process(MPT_ADAPTER *ioc, EventNotificationReply_t *pEvReply);
 extern int mptscsih_ioc_reset(MPT_ADAPTER *ioc, int post_reset);
-extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth,
-				       int reason);
+extern int mptscsih_change_queue_depth(struct scsi_device *sdev, int qdepth);
 extern u8 mptscsih_raid_id_to_num(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern int mptscsih_is_phys_disk(MPT_ADAPTER *ioc, u8 channel, u8 id);
 extern struct device_attribute *mptscsih_host_attrs[];
diff --git a/drivers/s390/scsi/zfcp_scsi.c b/drivers/s390/scsi/zfcp_scsi.c
index 179bf3d8af6c..75f4bfc2b98a 100644
--- a/drivers/s390/scsi/zfcp_scsi.c
+++ b/drivers/s390/scsi/zfcp_scsi.c
@@ -32,13 +32,6 @@ static bool allow_lun_scan = 1;
 module_param(allow_lun_scan, bool, 0600);
 MODULE_PARM_DESC(allow_lun_scan, "For NPIV, scan and attach all storage LUNs");
 
-static int zfcp_scsi_change_queue_depth(struct scsi_device *sdev, int depth,
-					int reason)
-{
-	scsi_adjust_queue_depth(sdev, depth);
-	return sdev->queue_depth;
-}
-
 static void zfcp_scsi_slave_destroy(struct scsi_device *sdev)
 {
 	struct zfcp_scsi_dev *zfcp_sdev = sdev_to_zfcp(sdev);
@@ -54,7 +47,7 @@ static void zfcp_scsi_slave_destroy(struct scsi_device *sdev)
 static int zfcp_scsi_slave_configure(struct scsi_device *sdp)
 {
 	if (sdp->tagged_supported)
-		scsi_adjust_queue_depth(sdp, default_depth);
+		scsi_change_queue_depth(sdp, default_depth);
 	return 0;
 }
 
@@ -293,7 +286,7 @@ static struct scsi_host_template zfcp_scsi_host_template = {
 	.slave_alloc		 = zfcp_scsi_slave_alloc,
 	.slave_configure	 = zfcp_scsi_slave_configure,
 	.slave_destroy		 = zfcp_scsi_slave_destroy,
-	.change_queue_depth	 = zfcp_scsi_change_queue_depth,
+	.change_queue_depth	 = scsi_change_queue_depth,
 	.proc_name		 = "zfcp",
 	.can_queue		 = 4096,
 	.this_id		 = -1,
diff --git a/drivers/scsi/3w-9xxx.c b/drivers/scsi/3w-9xxx.c
index 1cf37032290a..cd4129ff7ae4 100644
--- a/drivers/scsi/3w-9xxx.c
+++ b/drivers/scsi/3w-9xxx.c
@@ -189,17 +189,6 @@ static ssize_t twa_show_stats(struct device *dev,
 	return len;
 } /* End twa_show_stats() */
 
-/* This function will set a devices queue depth */
-static int twa_change_queue_depth(struct scsi_device *sdev, int queue_depth,
-				  int reason)
-{
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
-	scsi_adjust_queue_depth(sdev, queue_depth);
-	return queue_depth;
-} /* End twa_change_queue_depth() */
-
 /* Create sysfs 'stats' entry */
 static struct device_attribute twa_host_stats_attr = {
 	.attr = {
@@ -2014,7 +2003,7 @@ static struct scsi_host_template driver_template = {
 	.queuecommand		= twa_scsi_queue,
 	.eh_host_reset_handler	= twa_scsi_eh_reset,
 	.bios_param		= twa_scsi_biosparam,
-	.change_queue_depth	= twa_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.can_queue		= TW_Q_LENGTH-2,
 	.slave_configure	= twa_slave_configure,
 	.this_id		= -1,
diff --git a/drivers/scsi/3w-sas.c b/drivers/scsi/3w-sas.c
index 547756b7d5bf..2361772d5909 100644
--- a/drivers/scsi/3w-sas.c
+++ b/drivers/scsi/3w-sas.c
@@ -191,17 +191,6 @@ static ssize_t twl_show_stats(struct device *dev,
 	return len;
 } /* End twl_show_stats() */
 
-/* This function will set a devices queue depth */
-static int twl_change_queue_depth(struct scsi_device *sdev, int queue_depth,
-				  int reason)
-{
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
-	scsi_adjust_queue_depth(sdev, queue_depth);
-	return queue_depth;
-} /* End twl_change_queue_depth() */
-
 /* stats sysfs attribute initializer */
 static struct device_attribute twl_host_stats_attr = {
 	.attr = {
@@ -1588,7 +1577,7 @@ static struct scsi_host_template driver_template = {
 	.queuecommand		= twl_scsi_queue,
 	.eh_host_reset_handler	= twl_scsi_eh_reset,
 	.bios_param		= twl_scsi_biosparam,
-	.change_queue_depth	= twl_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.can_queue		= TW_Q_LENGTH-2,
 	.slave_configure	= twl_slave_configure,
 	.this_id		= -1,
diff --git a/drivers/scsi/3w-xxxx.c b/drivers/scsi/3w-xxxx.c
index 261a4c1da962..c75f2048319f 100644
--- a/drivers/scsi/3w-xxxx.c
+++ b/drivers/scsi/3w-xxxx.c
@@ -523,17 +523,6 @@ static ssize_t tw_show_stats(struct device *dev, struct device_attribute *attr,
 	return len;
 } /* End tw_show_stats() */
 
-/* This function will set a devices queue depth */
-static int tw_change_queue_depth(struct scsi_device *sdev, int queue_depth,
-				 int reason)
-{
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
-	scsi_adjust_queue_depth(sdev, queue_depth);
-	return queue_depth;
-} /* End tw_change_queue_depth() */
-
 /* Create sysfs 'stats' entry */
 static struct device_attribute tw_host_stats_attr = {
 	.attr = {
@@ -2268,7 +2257,7 @@ static struct scsi_host_template driver_template = {
 	.queuecommand		= tw_scsi_queue,
 	.eh_host_reset_handler	= tw_scsi_eh_reset,
 	.bios_param		= tw_scsi_biosparam,
-	.change_queue_depth	= tw_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.can_queue		= TW_Q_LENGTH-2,
 	.slave_configure	= tw_slave_configure,
 	.this_id		= -1,
diff --git a/drivers/scsi/53c700.c b/drivers/scsi/53c700.c
index d7557b932113..aa915da2a5e5 100644
--- a/drivers/scsi/53c700.c
+++ b/drivers/scsi/53c700.c
@@ -175,7 +175,7 @@ STATIC void NCR_700_chip_reset(struct Scsi_Host *host);
 STATIC int NCR_700_slave_alloc(struct scsi_device *SDpnt);
 STATIC int NCR_700_slave_configure(struct scsi_device *SDpnt);
 STATIC void NCR_700_slave_destroy(struct scsi_device *SDpnt);
-static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth, int reason);
+static int NCR_700_change_queue_depth(struct scsi_device *SDpnt, int depth);
 static int NCR_700_change_queue_type(struct scsi_device *SDpnt, int depth);
 
 STATIC struct device_attribute *NCR_700_dev_attrs[];
@@ -904,7 +904,7 @@ process_message(struct Scsi_Host *host,	struct NCR_700_Host_Parameters *hostdata
 			hostdata->tag_negotiated &= ~(1<<scmd_id(SCp));
 
 			SCp->device->tagged_supported = 0;
-			scsi_adjust_queue_depth(SCp->device, host->cmd_per_lun);
+			scsi_change_queue_depth(SCp->device, host->cmd_per_lun);
 			scsi_set_tag_type(SCp->device, 0);
 		} else {
 			shost_printk(KERN_WARNING, host,
@@ -2052,7 +2052,7 @@ NCR_700_slave_configure(struct scsi_device *SDp)
 
 	/* to do here: allocate memory; build a queue_full list */
 	if(SDp->tagged_supported) {
-		scsi_adjust_queue_depth(SDp, NCR_700_DEFAULT_TAGS);
+		scsi_change_queue_depth(SDp, NCR_700_DEFAULT_TAGS);
 		NCR_700_set_tag_neg_state(SDp, NCR_700_START_TAG_NEGOTIATION);
 	}
 
@@ -2075,16 +2075,11 @@ NCR_700_slave_destroy(struct scsi_device *SDp)
 }
 
 static int
-NCR_700_change_queue_depth(struct scsi_device *SDp, int depth, int reason)
+NCR_700_change_queue_depth(struct scsi_device *SDp, int depth)
 {
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (depth > NCR_700_MAX_TAGS)
 		depth = NCR_700_MAX_TAGS;
-
-	scsi_adjust_queue_depth(SDp, depth);
-	return depth;
+	return scsi_change_queue_depth(SDp, depth);
 }
 
 static int NCR_700_change_queue_type(struct scsi_device *SDp, int tag_type)
@@ -2105,12 +2100,12 @@ static int NCR_700_change_queue_type(struct scsi_device *SDp, int tag_type)
 	if (!tag_type) {
 		/* shift back to the default unqueued number of commands
 		 * (the user can still raise this) */
-		scsi_adjust_queue_depth(SDp, SDp->host->cmd_per_lun);
+		scsi_change_queue_depth(SDp, SDp->host->cmd_per_lun);
 		hostdata->tag_negotiated &= ~(1 << sdev_id(SDp));
 	} else {
 		/* Here, we cleared the negotiation flag above, so this
 		 * will force the driver to renegotiate */
-		scsi_adjust_queue_depth(SDp, SDp->queue_depth);
+		scsi_change_queue_depth(SDp, SDp->queue_depth);
 		if (change_tag)
 			NCR_700_set_tag_neg_state(SDp, NCR_700_START_TAG_NEGOTIATION);
 	}
diff --git a/drivers/scsi/BusLogic.c b/drivers/scsi/BusLogic.c
index 5aa476b6b8a8..8d66a6469e29 100644
--- a/drivers/scsi/BusLogic.c
+++ b/drivers/scsi/BusLogic.c
@@ -2327,12 +2327,12 @@ static int blogic_slaveconfig(struct scsi_device *dev)
 		if (qdepth == 0)
 			qdepth = BLOGIC_MAX_AUTO_TAG_DEPTH;
 		adapter->qdepth[tgt_id] = qdepth;
-		scsi_adjust_queue_depth(dev, qdepth);
+		scsi_change_queue_depth(dev, qdepth);
 	} else {
 		adapter->tagq_ok &= ~(1 << tgt_id);
 		qdepth = adapter->untag_qdepth;
 		adapter->qdepth[tgt_id] = qdepth;
-		scsi_adjust_queue_depth(dev, qdepth);
+		scsi_change_queue_depth(dev, qdepth);
 	}
 	qdepth = 0;
 	for (tgt_id = 0; tgt_id < adapter->maxdev; tgt_id++)
diff --git a/drivers/scsi/aacraid/linit.c b/drivers/scsi/aacraid/linit.c
index 41b9c68bca67..d11c23aad046 100644
--- a/drivers/scsi/aacraid/linit.c
+++ b/drivers/scsi/aacraid/linit.c
@@ -462,9 +462,9 @@ static int aac_slave_configure(struct scsi_device *sdev)
 			depth = 256;
 		else if (depth < 2)
 			depth = 2;
-		scsi_adjust_queue_depth(sdev, depth);
+		scsi_change_queue_depth(sdev, depth);
 	} else
-		scsi_adjust_queue_depth(sdev, 1);
+		scsi_change_queue_depth(sdev, 1);
 
 	return 0;
 }
@@ -478,12 +478,8 @@ static int aac_slave_configure(struct scsi_device *sdev)
  *	total capacity and the queue depth supported by the target device.
  */
 
-static int aac_change_queue_depth(struct scsi_device *sdev, int depth,
-				  int reason)
+static int aac_change_queue_depth(struct scsi_device *sdev, int depth)
 {
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (sdev->tagged_supported && (sdev->type == TYPE_DISK) &&
 	    (sdev_channel(sdev) == CONTAINER_CHANNEL)) {
 		struct scsi_device * dev;
@@ -504,10 +500,10 @@ static int aac_change_queue_depth(struct scsi_device *sdev, int depth,
 			depth = 256;
 		else if (depth < 2)
 			depth = 2;
-		scsi_adjust_queue_depth(sdev, depth);
-	} else
-		scsi_adjust_queue_depth(sdev, 1);
-	return sdev->queue_depth;
+		return scsi_change_queue_depth(sdev, depth);
+	}
+
+	return scsi_change_queue_depth(sdev, 1);
 }
 
 static ssize_t aac_show_raid_level(struct device *dev, struct device_attribute *attr, char *buf)
diff --git a/drivers/scsi/advansys.c b/drivers/scsi/advansys.c
index ae4840e4c1c5..6719a3390ebd 100644
--- a/drivers/scsi/advansys.c
+++ b/drivers/scsi/advansys.c
@@ -7706,7 +7706,7 @@ advansys_narrow_slave_configure(struct scsi_device *sdev, ASC_DVC_VAR *asc_dvc)
 				asc_dvc->cfg->can_tagged_qng |= tid_bit;
 				asc_dvc->use_tagged_qng |= tid_bit;
 			}
-			scsi_adjust_queue_depth(sdev, 
+			scsi_change_queue_depth(sdev, 
 						asc_dvc->max_dvc_qng[sdev->id]);
 		}
 	} else {
@@ -7847,10 +7847,8 @@ advansys_wide_slave_configure(struct scsi_device *sdev, ADV_DVC_VAR *adv_dvc)
 		}
 	}
 
-	if ((adv_dvc->tagqng_able & tidmask) && sdev->tagged_supported) {
-		scsi_adjust_queue_depth(sdev,
-					adv_dvc->max_dvc_qng);
-	}
+	if ((adv_dvc->tagqng_able & tidmask) && sdev->tagged_supported)
+		scsi_change_queue_depth(sdev, adv_dvc->max_dvc_qng);
 }
 
 /*
diff --git a/drivers/scsi/aic7xxx/aic79xx_osm.c b/drivers/scsi/aic7xxx/aic79xx_osm.c
index 80cb4fd7caaa..d5c7b193d8d3 100644
--- a/drivers/scsi/aic7xxx/aic79xx_osm.c
+++ b/drivers/scsi/aic7xxx/aic79xx_osm.c
@@ -1470,7 +1470,7 @@ ahd_platform_set_tags(struct ahd_softc *ahd, struct scsi_device *sdev,
 	switch ((dev->flags & (AHD_DEV_Q_BASIC|AHD_DEV_Q_TAGGED))) {
 	case AHD_DEV_Q_BASIC:
 	case AHD_DEV_Q_TAGGED:
-		scsi_adjust_queue_depth(sdev,
+		scsi_change_queue_depth(sdev,
 				dev->openings + dev->active);
 		break;
 	default:
@@ -1480,7 +1480,7 @@ ahd_platform_set_tags(struct ahd_softc *ahd, struct scsi_device *sdev,
 		 * serially on the controller/device.  This should
 		 * remove some latency.
 		 */
-		scsi_adjust_queue_depth(sdev, 1);
+		scsi_change_queue_depth(sdev, 1);
 		break;
 	}
 }
diff --git a/drivers/scsi/aic7xxx/aic7xxx_osm.c b/drivers/scsi/aic7xxx/aic7xxx_osm.c
index a6a27d5398dd..88360116dbcb 100644
--- a/drivers/scsi/aic7xxx/aic7xxx_osm.c
+++ b/drivers/scsi/aic7xxx/aic7xxx_osm.c
@@ -1336,7 +1336,7 @@ ahc_platform_set_tags(struct ahc_softc *ahc, struct scsi_device *sdev,
 	switch ((dev->flags & (AHC_DEV_Q_BASIC|AHC_DEV_Q_TAGGED))) {
 	case AHC_DEV_Q_BASIC:
 	case AHC_DEV_Q_TAGGED:
-		scsi_adjust_queue_depth(sdev,
+		scsi_change_queue_depth(sdev,
 				dev->openings + dev->active);
 	default:
 		/*
@@ -1345,7 +1345,7 @@ ahc_platform_set_tags(struct ahc_softc *ahc, struct scsi_device *sdev,
 		 * serially on the controller/device.  This should
 		 * remove some latency.
 		 */
-		scsi_adjust_queue_depth(sdev, 2);
+		scsi_change_queue_depth(sdev, 2);
 		break;
 	}
 }
diff --git a/drivers/scsi/arcmsr/arcmsr_hba.c b/drivers/scsi/arcmsr/arcmsr_hba.c
index 209f77162d06..914c39f9f388 100644
--- a/drivers/scsi/arcmsr/arcmsr_hba.c
+++ b/drivers/scsi/arcmsr/arcmsr_hba.c
@@ -114,16 +114,11 @@ static void arcmsr_hardware_reset(struct AdapterControlBlock *acb);
 static const char *arcmsr_info(struct Scsi_Host *);
 static irqreturn_t arcmsr_interrupt(struct AdapterControlBlock *acb);
 static void arcmsr_free_irq(struct pci_dev *, struct AdapterControlBlock *);
-static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev,
-					  int queue_depth, int reason)
+static int arcmsr_adjust_disk_queue_depth(struct scsi_device *sdev, int queue_depth)
 {
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (queue_depth > ARCMSR_MAX_CMD_PERLUN)
 		queue_depth = ARCMSR_MAX_CMD_PERLUN;
-	scsi_adjust_queue_depth(sdev, queue_depth);
-	return queue_depth;
+	return scsi_change_queue_depth(sdev, queue_depth);
 }
 
 static struct scsi_host_template arcmsr_scsi_host_template = {
diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index d9b999a3416f..f3193406776c 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -556,7 +556,7 @@ static struct scsi_host_template beiscsi_sht = {
 	.name = "Emulex 10Gbe open-iscsi Initiator Driver",
 	.proc_name = DRV_NAME,
 	.queuecommand = iscsi_queuecommand,
-	.change_queue_depth = iscsi_change_queue_depth,
+	.change_queue_depth = scsi_change_queue_depth,
 	.slave_configure = beiscsi_slave_configure,
 	.target_alloc = iscsi_target_alloc,
 	.eh_abort_handler = beiscsi_eh_abort,
diff --git a/drivers/scsi/bfa/bfad_im.c b/drivers/scsi/bfa/bfad_im.c
index 87b09cd232cc..7223b0006740 100644
--- a/drivers/scsi/bfa/bfad_im.c
+++ b/drivers/scsi/bfa/bfad_im.c
@@ -776,7 +776,7 @@ bfad_thread_workq(struct bfad_s *bfad)
 static int
 bfad_im_slave_configure(struct scsi_device *sdev)
 {
-	scsi_adjust_queue_depth(sdev, bfa_lun_queue_depth);
+	scsi_change_queue_depth(sdev, bfa_lun_queue_depth);
 	return 0;
 }
 
@@ -866,7 +866,7 @@ bfad_ramp_up_qdepth(struct bfad_itnim_s *itnim, struct scsi_device *sdev)
 			if (bfa_lun_queue_depth > tmp_sdev->queue_depth) {
 				if (tmp_sdev->id != sdev->id)
 					continue;
-				scsi_adjust_queue_depth(tmp_sdev,
+				scsi_change_queue_depth(tmp_sdev,
 					tmp_sdev->queue_depth + 1);
 
 				itnim->last_ramp_up_time = jiffies;
diff --git a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
index cc537972f3ea..386c2cfad306 100644
--- a/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
+++ b/drivers/scsi/bnx2fc/bnx2fc_fcoe.c
@@ -2784,7 +2784,7 @@ static struct scsi_host_template bnx2fc_shost_template = {
 	.eh_target_reset_handler = bnx2fc_eh_target_reset, /* tgt reset */
 	.eh_host_reset_handler	= fc_eh_host_reset,
 	.slave_alloc		= fc_slave_alloc,
-	.change_queue_depth	= fc_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.change_queue_type	= scsi_change_queue_type,
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
diff --git a/drivers/scsi/bnx2i/bnx2i_iscsi.c b/drivers/scsi/bnx2i/bnx2i_iscsi.c
index 9de1c20bb0f8..e53078d03309 100644
--- a/drivers/scsi/bnx2i/bnx2i_iscsi.c
+++ b/drivers/scsi/bnx2i/bnx2i_iscsi.c
@@ -2259,7 +2259,7 @@ static struct scsi_host_template bnx2i_host_template = {
 	.eh_abort_handler	= iscsi_eh_abort,
 	.eh_device_reset_handler = iscsi_eh_device_reset,
 	.eh_target_reset_handler = iscsi_eh_recover_target,
-	.change_queue_depth	= iscsi_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.target_alloc		= iscsi_target_alloc,
 	.can_queue		= 2048,
 	.max_sectors		= 127,
diff --git a/drivers/scsi/csiostor/csio_scsi.c b/drivers/scsi/csiostor/csio_scsi.c
index 44a8cc51428f..4d0b6ce55f20 100644
--- a/drivers/scsi/csiostor/csio_scsi.c
+++ b/drivers/scsi/csiostor/csio_scsi.c
@@ -2241,7 +2241,7 @@ csio_slave_alloc(struct scsi_device *sdev)
 static int
 csio_slave_configure(struct scsi_device *sdev)
 {
-	scsi_adjust_queue_depth(sdev, csio_lun_qdepth);
+	scsi_change_queue_depth(sdev, csio_lun_qdepth);
 	return 0;
 }
 
diff --git a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
index 99ea67dcdd2a..3db4c63978c5 100644
--- a/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
+++ b/drivers/scsi/cxgbi/cxgb3i/cxgb3i.c
@@ -86,7 +86,7 @@ static struct scsi_host_template cxgb3i_host_template = {
 	.proc_name	= DRV_MODULE_NAME,
 	.can_queue	= CXGB3I_SCSI_HOST_QDEPTH,
 	.queuecommand	= iscsi_queuecommand,
-	.change_queue_depth = iscsi_change_queue_depth,
+	.change_queue_depth = scsi_change_queue_depth,
 	.sg_tablesize	= SG_ALL,
 	.max_sectors	= 0xFFFF,
 	.cmd_per_lun	= ISCSI_DEF_CMD_PER_LUN,
diff --git a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
index af86e8f57b84..efe42ef7d92b 100644
--- a/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
+++ b/drivers/scsi/cxgbi/cxgb4i/cxgb4i.c
@@ -89,7 +89,7 @@ static struct scsi_host_template cxgb4i_host_template = {
 	.proc_name	= DRV_MODULE_NAME,
 	.can_queue	= CXGB4I_SCSI_HOST_QDEPTH,
 	.queuecommand	= iscsi_queuecommand,
-	.change_queue_depth = iscsi_change_queue_depth,
+	.change_queue_depth = scsi_change_queue_depth,
 	.sg_tablesize	= SG_ALL,
 	.max_sectors	= 0xFFFF,
 	.cmd_per_lun	= ISCSI_DEF_CMD_PER_LUN,
diff --git a/drivers/scsi/dpt_i2o.c b/drivers/scsi/dpt_i2o.c
index 1af8d54bcded..0bf976936a10 100644
--- a/drivers/scsi/dpt_i2o.c
+++ b/drivers/scsi/dpt_i2o.c
@@ -415,7 +415,7 @@ static int adpt_slave_configure(struct scsi_device * device)
 	pHba = (adpt_hba *) host->hostdata[0];
 
 	if (host->can_queue && device->tagged_supported) {
-		scsi_adjust_queue_depth(device,
+		scsi_change_queue_depth(device,
 				host->can_queue - 1);
 	}
 	return 0;
diff --git a/drivers/scsi/eata.c b/drivers/scsi/eata.c
index bc0f918f1729..227dd2c2ec2f 100644
--- a/drivers/scsi/eata.c
+++ b/drivers/scsi/eata.c
@@ -952,12 +952,12 @@ static int eata2x_slave_configure(struct scsi_device *dev)
 		} else {
 			tag_suffix = ", no tags";
 		}
-		scsi_adjust_queue_depth(dev, tqd);
+		scsi_change_queue_depth(dev, tqd);
 	} else if (TLDEV(dev->type) && linked_comm) {
-		scsi_adjust_queue_depth(dev, tqd);
+		scsi_change_queue_depth(dev, tqd);
 		tag_suffix = ", untagged";
 	} else {
-		scsi_adjust_queue_depth(dev, utqd);
+		scsi_change_queue_depth(dev, utqd);
 		tag_suffix = "";
 	}
 
diff --git a/drivers/scsi/esas2r/esas2r.h b/drivers/scsi/esas2r/esas2r.h
index 1941d837f6f2..b6030e3edd01 100644
--- a/drivers/scsi/esas2r/esas2r.h
+++ b/drivers/scsi/esas2r/esas2r.h
@@ -972,7 +972,6 @@ u8 handle_hba_ioctl(struct esas2r_adapter *a,
 		    struct atto_ioctl *ioctl_hba);
 int esas2r_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *cmd);
 int esas2r_show_info(struct seq_file *m, struct Scsi_Host *sh);
-int esas2r_change_queue_depth(struct scsi_device *dev, int depth, int reason);
 long esas2r_proc_ioctl(struct file *fp, unsigned int cmd, unsigned long arg);
 
 /* SCSI error handler (eh) functions */
diff --git a/drivers/scsi/esas2r/esas2r_main.c b/drivers/scsi/esas2r/esas2r_main.c
index 30fce64faf75..593ff8a63c70 100644
--- a/drivers/scsi/esas2r/esas2r_main.c
+++ b/drivers/scsi/esas2r/esas2r_main.c
@@ -254,7 +254,7 @@ static struct scsi_host_template driver_template = {
 	.use_clustering			= ENABLE_CLUSTERING,
 	.emulated			= 0,
 	.proc_name			= ESAS2R_DRVR_NAME,
-	.change_queue_depth		= esas2r_change_queue_depth,
+	.change_queue_depth		= scsi_change_queue_depth,
 	.change_queue_type		= scsi_change_queue_type,
 	.max_sectors			= 0xFFFF,
 	.use_blk_tags			= 1,
@@ -1257,15 +1257,6 @@ int esas2r_target_reset(struct scsi_cmnd *cmd)
 	return esas2r_dev_targ_reset(cmd, true);
 }
 
-int esas2r_change_queue_depth(struct scsi_device *dev, int depth, int reason)
-{
-	esas2r_log(ESAS2R_LOG_INFO, "change_queue_depth %p, %d", dev, depth);
-
-	scsi_adjust_queue_depth(dev, depth);
-
-	return dev->queue_depth;
-}
-
 void esas2r_log_request_failure(struct esas2r_adapter *a,
 				struct esas2r_request *rq)
 {
diff --git a/drivers/scsi/esp_scsi.c b/drivers/scsi/esp_scsi.c
index 38c23e0b73af..7e7687f73deb 100644
--- a/drivers/scsi/esp_scsi.c
+++ b/drivers/scsi/esp_scsi.c
@@ -2407,7 +2407,7 @@ static int esp_slave_configure(struct scsi_device *dev)
 		/* XXX make this configurable somehow XXX */
 		int goal_tags = min(ESP_DEFAULT_TAGS, ESP_MAX_TAG);
 
-		scsi_adjust_queue_depth(dev, goal_tags);
+		scsi_change_queue_depth(dev, goal_tags);
 	}
 
 	tp->flags |= ESP_TGT_DISCONNECT;
diff --git a/drivers/scsi/fcoe/fcoe.c b/drivers/scsi/fcoe/fcoe.c
index 97229860398f..308a016fdaea 100644
--- a/drivers/scsi/fcoe/fcoe.c
+++ b/drivers/scsi/fcoe/fcoe.c
@@ -280,7 +280,7 @@ static struct scsi_host_template fcoe_shost_template = {
 	.eh_device_reset_handler = fc_eh_device_reset,
 	.eh_host_reset_handler = fc_eh_host_reset,
 	.slave_alloc = fc_slave_alloc,
-	.change_queue_depth = fc_change_queue_depth,
+	.change_queue_depth = scsi_change_queue_depth,
 	.change_queue_type = scsi_change_queue_type,
 	.this_id = -1,
 	.cmd_per_lun = 3,
diff --git a/drivers/scsi/fnic/fnic_main.c b/drivers/scsi/fnic/fnic_main.c
index 86b496c8633d..0c1f8177b5b7 100644
--- a/drivers/scsi/fnic/fnic_main.c
+++ b/drivers/scsi/fnic/fnic_main.c
@@ -98,7 +98,7 @@ static int fnic_slave_alloc(struct scsi_device *sdev)
 	if (!rport || fc_remote_port_chkready(rport))
 		return -ENXIO;
 
-	scsi_adjust_queue_depth(sdev, fnic_max_qdepth);
+	scsi_change_queue_depth(sdev, fnic_max_qdepth);
 	return 0;
 }
 
@@ -110,7 +110,7 @@ static struct scsi_host_template fnic_host_template = {
 	.eh_device_reset_handler = fnic_device_reset,
 	.eh_host_reset_handler = fnic_host_reset,
 	.slave_alloc = fnic_slave_alloc,
-	.change_queue_depth = fc_change_queue_depth,
+	.change_queue_depth = scsi_change_queue_depth,
 	.change_queue_type = scsi_change_queue_type,
 	.this_id = -1,
 	.cmd_per_lun = 3,
diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 617f218e2a16..6bb4611b238a 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -216,8 +216,6 @@ static int hpsa_scsi_queue_command(struct Scsi_Host *h, struct scsi_cmnd *cmd);
 static void hpsa_scan_start(struct Scsi_Host *);
 static int hpsa_scan_finished(struct Scsi_Host *sh,
 	unsigned long elapsed_time);
-static int hpsa_change_queue_depth(struct scsi_device *sdev,
-	int qdepth, int reason);
 
 static int hpsa_eh_device_reset_handler(struct scsi_cmnd *scsicmd);
 static int hpsa_eh_abort_handler(struct scsi_cmnd *scsicmd);
@@ -673,7 +671,7 @@ static struct scsi_host_template hpsa_driver_template = {
 	.queuecommand		= hpsa_scsi_queue_command,
 	.scan_start		= hpsa_scan_start,
 	.scan_finished		= hpsa_scan_finished,
-	.change_queue_depth	= hpsa_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.this_id		= -1,
 	.use_clustering		= ENABLE_CLUSTERING,
 	.eh_abort_handler	= hpsa_eh_abort_handler,
@@ -4074,18 +4072,6 @@ static int hpsa_scan_finished(struct Scsi_Host *sh,
 	return finished;
 }
 
-static int hpsa_change_queue_depth(struct scsi_device *sdev,
-	int qdepth, int reason)
-{
-	struct ctlr_info *h = sdev_to_hba(sdev);
-
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -ENOTSUPP;
-
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
-}
-
 static void hpsa_unregister_scsi(struct ctlr_info *h)
 {
 	/* we are being forcibly unloaded, and may not refuse. */
diff --git a/drivers/scsi/hptiop.c b/drivers/scsi/hptiop.c
index 151893148abd..e995218476ed 100644
--- a/drivers/scsi/hptiop.c
+++ b/drivers/scsi/hptiop.c
@@ -1118,17 +1118,13 @@ static int hptiop_reset(struct scsi_cmnd *scp)
 }
 
 static int hptiop_adjust_disk_queue_depth(struct scsi_device *sdev,
-					  int queue_depth, int reason)
+					  int queue_depth)
 {
 	struct hptiop_hba *hba = (struct hptiop_hba *)sdev->host->hostdata;
 
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (queue_depth > hba->max_requests)
 		queue_depth = hba->max_requests;
-	scsi_adjust_queue_depth(sdev, queue_depth);
-	return queue_depth;
+	return scsi_change_queue_depth(sdev, queue_depth);
 }
 
 static ssize_t hptiop_show_version(struct device *dev,
diff --git a/drivers/scsi/ibmvscsi/ibmvfc.c b/drivers/scsi/ibmvscsi/ibmvfc.c
index 381449d5be76..f58c6d8e0264 100644
--- a/drivers/scsi/ibmvscsi/ibmvfc.c
+++ b/drivers/scsi/ibmvscsi/ibmvfc.c
@@ -2900,17 +2900,12 @@ static int ibmvfc_slave_configure(struct scsi_device *sdev)
  * Return value:
  * 	actual depth set
  **/
-static int ibmvfc_change_queue_depth(struct scsi_device *sdev, int qdepth,
-				     int reason)
+static int ibmvfc_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (qdepth > IBMVFC_MAX_CMDS_PER_LUN)
 		qdepth = IBMVFC_MAX_CMDS_PER_LUN;
 
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
+	return scsi_change_queue_depth(sdev, qdepth);
 }
 
 static ssize_t ibmvfc_show_host_partition_name(struct device *dev,
diff --git a/drivers/scsi/ibmvscsi/ibmvscsi.c b/drivers/scsi/ibmvscsi/ibmvscsi.c
index e8c3cdf0d03b..acea5d6eebd0 100644
--- a/drivers/scsi/ibmvscsi/ibmvscsi.c
+++ b/drivers/scsi/ibmvscsi/ibmvscsi.c
@@ -1941,17 +1941,11 @@ static int ibmvscsi_slave_configure(struct scsi_device *sdev)
  * Return value:
  * 	actual depth set
  **/
-static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth,
-				       int reason)
+static int ibmvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (qdepth > IBMVSCSI_MAX_CMDS_PER_LUN)
 		qdepth = IBMVSCSI_MAX_CMDS_PER_LUN;
-
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
+	return scsi_change_queue_depth(sdev, qdepth);
 }
 
 /* ------------------------------------------------------------
diff --git a/drivers/scsi/ipr.c b/drivers/scsi/ipr.c
index d8d16625a876..540294389355 100644
--- a/drivers/scsi/ipr.c
+++ b/drivers/scsi/ipr.c
@@ -4328,16 +4328,12 @@ static int ipr_free_dump(struct ipr_ioa_cfg *ioa_cfg) { return 0; };
  * Return value:
  * 	actual depth set
  **/
-static int ipr_change_queue_depth(struct scsi_device *sdev, int qdepth,
-				  int reason)
+static int ipr_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
 	struct ipr_ioa_cfg *ioa_cfg = (struct ipr_ioa_cfg *)sdev->host->hostdata;
 	struct ipr_resource_entry *res;
 	unsigned long lock_flags = 0;
 
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	spin_lock_irqsave(ioa_cfg->host->host_lock, lock_flags);
 	res = (struct ipr_resource_entry *)sdev->hostdata;
 
@@ -4345,7 +4341,7 @@ static int ipr_change_queue_depth(struct scsi_device *sdev, int qdepth,
 		qdepth = IPR_MAX_CMD_PER_ATA_LUN;
 	spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 
-	scsi_adjust_queue_depth(sdev, qdepth);
+	scsi_change_queue_depth(sdev, qdepth);
 	return sdev->queue_depth;
 }
 
@@ -4752,7 +4748,7 @@ static int ipr_slave_configure(struct scsi_device *sdev)
 		spin_unlock_irqrestore(ioa_cfg->host->host_lock, lock_flags);
 
 		if (ap) {
-			scsi_adjust_queue_depth(sdev, IPR_MAX_CMD_PER_ATA_LUN);
+			scsi_change_queue_depth(sdev, IPR_MAX_CMD_PER_ATA_LUN);
 			ata_sas_slave_configure(sdev, ap);
 		}
 
diff --git a/drivers/scsi/ips.c b/drivers/scsi/ips.c
index 454741a8da45..e5c28435d768 100644
--- a/drivers/scsi/ips.c
+++ b/drivers/scsi/ips.c
@@ -1210,7 +1210,7 @@ ips_slave_configure(struct scsi_device * SDptr)
 		min = ha->max_cmds / 2;
 		if (ha->enq->ucLogDriveCount <= 2)
 			min = ha->max_cmds - 1;
-		scsi_adjust_queue_depth(SDptr, min);
+		scsi_change_queue_depth(SDptr, min);
 	}
 
 	SDptr->skip_ms_page_8 = 1;
diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index a575d845b667..0b8af186e707 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -952,7 +952,7 @@ static struct scsi_host_template iscsi_sw_tcp_sht = {
 	.module			= THIS_MODULE,
 	.name			= "iSCSI Initiator over TCP/IP",
 	.queuecommand           = iscsi_queuecommand,
-	.change_queue_depth	= iscsi_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.can_queue		= ISCSI_DEF_XMIT_CMDS_MAX - 1,
 	.sg_tablesize		= 4096,
 	.max_sectors		= 0xFFFF,
diff --git a/drivers/scsi/libfc/fc_fcp.c b/drivers/scsi/libfc/fc_fcp.c
index 0d2d024e77c5..c6795941b45d 100644
--- a/drivers/scsi/libfc/fc_fcp.c
+++ b/drivers/scsi/libfc/fc_fcp.c
@@ -2160,24 +2160,11 @@ int fc_slave_alloc(struct scsi_device *sdev)
 	if (!rport || fc_remote_port_chkready(rport))
 		return -ENXIO;
 
-	scsi_adjust_queue_depth(sdev, FC_FCP_DFLT_QUEUE_DEPTH);
+	scsi_change_queue_depth(sdev, FC_FCP_DFLT_QUEUE_DEPTH);
 	return 0;
 }
 EXPORT_SYMBOL(fc_slave_alloc);
 
-/**
- * fc_change_queue_depth() - Change a device's queue depth
- * @sdev:   The SCSI device whose queue depth is to change
- * @qdepth: The new queue depth
- * @reason: The resason for the change
- */
-int fc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
-{
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
-}
-EXPORT_SYMBOL(fc_change_queue_depth);
-
 /**
  * fc_fcp_destory() - Tear down the FCP layer for a given local port
  * @lport: The local port that no longer needs the FCP layer
diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 79e977484ad5..8053f24f0349 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -1771,13 +1771,6 @@ fault:
 }
 EXPORT_SYMBOL_GPL(iscsi_queuecommand);
 
-int iscsi_change_queue_depth(struct scsi_device *sdev, int depth, int reason)
-{
-	scsi_adjust_queue_depth(sdev, depth);
-	return sdev->queue_depth;
-}
-EXPORT_SYMBOL_GPL(iscsi_change_queue_depth);
-
 int iscsi_target_alloc(struct scsi_target *starget)
 {
 	struct iscsi_cls_session *cls_session = starget_to_session(starget);
diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c
index 914e41165137..b492293d51f2 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -940,12 +940,12 @@ int sas_slave_configure(struct scsi_device *scsi_dev)
 	sas_read_port_mode_page(scsi_dev);
 
 	if (scsi_dev->tagged_supported) {
-		scsi_adjust_queue_depth(scsi_dev, SAS_DEF_QD);
+		scsi_change_queue_depth(scsi_dev, SAS_DEF_QD);
 	} else {
 		SAS_DPRINTK("device %llx, LUN %llx doesn't support "
 			    "TCQ\n", SAS_ADDR(dev->sas_addr),
 			    scsi_dev->lun);
-		scsi_adjust_queue_depth(scsi_dev, 1);
+		scsi_change_queue_depth(scsi_dev, 1);
 	}
 
 	scsi_dev->allow_restart = 1;
@@ -953,18 +953,16 @@ int sas_slave_configure(struct scsi_device *scsi_dev)
 	return 0;
 }
 
-int sas_change_queue_depth(struct scsi_device *sdev, int depth, int reason)
+int sas_change_queue_depth(struct scsi_device *sdev, int depth)
 {
 	struct domain_device *dev = sdev_to_domain_dev(sdev);
 
 	if (dev_is_sata(dev))
-		return __ata_change_queue_depth(dev->sata_dev.ap, sdev, depth,
-						reason);
+		return __ata_change_queue_depth(dev->sata_dev.ap, sdev, depth);
 
 	if (!sdev->tagged_supported)
 		depth = 1;
-	scsi_adjust_queue_depth(sdev, depth);
-	return depth;
+	return scsi_change_queue_depth(sdev, depth);
 }
 
 int sas_change_queue_type(struct scsi_device *scsi_dev, int type)
diff --git a/drivers/scsi/lpfc/lpfc_scsi.c b/drivers/scsi/lpfc/lpfc_scsi.c
index 522854920369..fd85952b621d 100644
--- a/drivers/scsi/lpfc/lpfc_scsi.c
+++ b/drivers/scsi/lpfc/lpfc_scsi.c
@@ -242,23 +242,6 @@ lpfc_update_stats(struct lpfc_hba *phba, struct  lpfc_scsi_buf *lpfc_cmd)
 	spin_unlock_irqrestore(shost->host_lock, flags);
 }
 
-/**
- * lpfc_change_queue_depth - Alter scsi device queue depth
- * @sdev: Pointer the scsi device on which to change the queue depth.
- * @qdepth: New queue depth to set the sdev to.
- * @reason: The reason for the queue depth change.
- *
- * This function is called by the midlayer and the LLD to alter the queue
- * depth for a scsi device. This function sets the queue depth to the new
- * value and sends an event out to log the queue depth change.
- **/
-static int
-lpfc_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
-{
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
-}
-
 /**
  * lpfc_rampdown_queue_depth - Post RAMP_DOWN_QUEUE event to worker thread
  * @phba: The Hba for which this call is being executed.
@@ -344,8 +327,7 @@ lpfc_ramp_down_queue_handler(struct lpfc_hba *phba)
 				else
 					new_queue_depth = sdev->queue_depth -
 								new_queue_depth;
-				lpfc_change_queue_depth(sdev, new_queue_depth,
-							SCSI_QDEPTH_DEFAULT);
+				scsi_change_queue_depth(sdev, new_queue_depth);
 			}
 		}
 	lpfc_destroy_vport_work_array(phba, vports);
@@ -5513,7 +5495,7 @@ lpfc_slave_configure(struct scsi_device *sdev)
 	struct lpfc_vport *vport = (struct lpfc_vport *) sdev->host->hostdata;
 	struct lpfc_hba   *phba = vport->phba;
 
-	scsi_adjust_queue_depth(sdev, vport->cfg_lun_queue_depth);
+	scsi_change_queue_depth(sdev, vport->cfg_lun_queue_depth);
 
 	if (phba->cfg_poll & ENABLE_FCP_RING_POLLING) {
 		lpfc_sli_handle_fast_ring_event(phba,
@@ -5896,7 +5878,7 @@ struct scsi_host_template lpfc_template = {
 	.shost_attrs		= lpfc_hba_attrs,
 	.max_sectors		= 0xFFFF,
 	.vendor_id		= LPFC_NL_VENDOR_ID,
-	.change_queue_depth	= lpfc_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.change_queue_type	= scsi_change_queue_type,
 	.use_blk_tags		= 1,
 	.track_queue_depth	= 1,
@@ -5921,7 +5903,7 @@ struct scsi_host_template lpfc_vport_template = {
 	.use_clustering		= ENABLE_CLUSTERING,
 	.shost_attrs		= lpfc_vport_attrs,
 	.max_sectors		= 0xFFFF,
-	.change_queue_depth	= lpfc_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.change_queue_type	= scsi_change_queue_type,
 	.use_blk_tags		= 1,
 	.track_queue_depth	= 1,
diff --git a/drivers/scsi/megaraid/megaraid_mbox.c b/drivers/scsi/megaraid/megaraid_mbox.c
index d56eb9d3d40c..f0987f22ea70 100644
--- a/drivers/scsi/megaraid/megaraid_mbox.c
+++ b/drivers/scsi/megaraid/megaraid_mbox.c
@@ -332,25 +332,6 @@ static struct device_attribute *megaraid_sdev_attrs[] = {
 	NULL,
 };
 
-/**
- * megaraid_change_queue_depth - Change the device's queue depth
- * @sdev:	scsi device struct
- * @qdepth:	depth to set
- * @reason:	calling context
- *
- * Return value:
- * 	actual depth set
- */
-static int megaraid_change_queue_depth(struct scsi_device *sdev, int qdepth,
-				       int reason)
-{
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
-}
-
 /*
  * Scsi host template for megaraid unified driver
  */
@@ -363,7 +344,7 @@ static struct scsi_host_template megaraid_template_g = {
 	.eh_device_reset_handler	= megaraid_reset_handler,
 	.eh_bus_reset_handler		= megaraid_reset_handler,
 	.eh_host_reset_handler		= megaraid_reset_handler,
-	.change_queue_depth		= megaraid_change_queue_depth,
+	.change_queue_depth		= scsi_change_queue_depth,
 	.use_clustering			= ENABLE_CLUSTERING,
 	.no_write_same			= 1,
 	.sdev_attrs			= megaraid_sdev_attrs,
diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c b/drivers/scsi/megaraid/megaraid_sas_base.c
index 69a9dd6ae04c..f05580e693d0 100644
--- a/drivers/scsi/megaraid/megaraid_sas_base.c
+++ b/drivers/scsi/megaraid/megaraid_sas_base.c
@@ -2591,17 +2591,6 @@ megasas_service_aen(struct megasas_instance *instance, struct megasas_cmd *cmd)
 	}
 }
 
-static int megasas_change_queue_depth(struct scsi_device *sdev,
-				      int queue_depth, int reason)
-{
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
-	scsi_adjust_queue_depth(sdev, queue_depth);
-
-	return queue_depth;
-}
-
 static ssize_t
 megasas_fw_crash_buffer_store(struct device *cdev,
 	struct device_attribute *attr, const char *buf, size_t count)
@@ -2766,7 +2755,7 @@ static struct scsi_host_template megasas_template = {
 	.shost_attrs = megaraid_host_attrs,
 	.bios_param = megasas_bios_param,
 	.use_clustering = ENABLE_CLUSTERING,
-	.change_queue_depth = megasas_change_queue_depth,
+	.change_queue_depth = scsi_change_queue_depth,
 	.no_write_same = 1,
 };
 
diff --git a/drivers/scsi/mpt2sas/mpt2sas_scsih.c b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
index b006e1e9fcb8..12229de433bf 100644
--- a/drivers/scsi/mpt2sas/mpt2sas_scsih.c
+++ b/drivers/scsi/mpt2sas/mpt2sas_scsih.c
@@ -1222,20 +1222,18 @@ _scsih_adjust_queue_depth(struct scsi_device *sdev, int qdepth)
 		max_depth = 1;
 	if (qdepth > max_depth)
 		qdepth = max_depth;
-	scsi_adjust_queue_depth(sdev, qdepth);
+	scsi_change_queue_depth(sdev, qdepth);
 }
 
 /**
  * _scsih_change_queue_depth - setting device queue depth
  * @sdev: scsi device struct
  * @qdepth: requested queue depth
- * @reason: SCSI_QDEPTH_DEFAULT
- * (see include/scsi/scsi_host.h for definition)
  *
  * Returns queue depth.
  */
 static int
-_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
+_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
 	_scsih_adjust_queue_depth(sdev, qdepth);
 
@@ -2077,7 +2075,7 @@ _scsih_slave_configure(struct scsi_device *sdev)
 			    r_level, raid_device->handle,
 			    (unsigned long long)raid_device->wwid,
 			    raid_device->num_pds, ds);
-		_scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT);
+		_scsih_change_queue_depth(sdev, qdepth);
 		/* raid transport support */
 		if (!ioc->is_warpdrive)
 			_scsih_set_level(sdev, raid_device->volume_type);
@@ -2142,7 +2140,7 @@ _scsih_slave_configure(struct scsi_device *sdev)
 		_scsih_display_sata_capabilities(ioc, handle, sdev);
 
 
-	_scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT);
+	_scsih_change_queue_depth(sdev, qdepth);
 
 	if (ssp_target) {
 		sas_read_port_mode_page(sdev);
diff --git a/drivers/scsi/mpt3sas/mpt3sas_scsih.c b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
index 568dcaed36cb..de175b9915e2 100644
--- a/drivers/scsi/mpt3sas/mpt3sas_scsih.c
+++ b/drivers/scsi/mpt3sas/mpt3sas_scsih.c
@@ -1090,20 +1090,18 @@ _scsih_adjust_queue_depth(struct scsi_device *sdev, int qdepth)
 		max_depth = 1;
 	if (qdepth > max_depth)
 		qdepth = max_depth;
-	scsi_adjust_queue_depth(sdev, qdepth);
+	scsi_change_queue_depth(sdev, qdepth);
 }
 
 /**
  * _scsih_change_queue_depth - setting device queue depth
  * @sdev: scsi device struct
  * @qdepth: requested queue depth
- * @reason: SCSI_QDEPTH_DEFAULT
- * (see include/scsi/scsi_host.h for definition)
  *
  * Returns queue depth.
  */
 static int
-_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
+_scsih_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
 	_scsih_adjust_queue_depth(sdev, qdepth);
 
@@ -1734,7 +1732,7 @@ _scsih_slave_configure(struct scsi_device *sdev)
 			 raid_device->num_pds, ds);
 
 
-		_scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT);
+		_scsih_change_queue_depth(sdev, qdepth);
 
 /* raid transport support */
 		_scsih_set_level(sdev, raid_device->volume_type);
@@ -1800,7 +1798,7 @@ _scsih_slave_configure(struct scsi_device *sdev)
 		_scsih_display_sata_capabilities(ioc, handle, sdev);
 
 
-	_scsih_change_queue_depth(sdev, qdepth, SCSI_QDEPTH_DEFAULT);
+	_scsih_change_queue_depth(sdev, qdepth);
 
 	if (ssp_target) {
 		sas_read_port_mode_page(sdev);
diff --git a/drivers/scsi/ncr53c8xx.c b/drivers/scsi/ncr53c8xx.c
index 9c331b7bfdcd..5b93ed810f6e 100644
--- a/drivers/scsi/ncr53c8xx.c
+++ b/drivers/scsi/ncr53c8xx.c
@@ -7997,7 +7997,7 @@ static int ncr53c8xx_slave_configure(struct scsi_device *device)
 	if (depth_to_use > MAX_TAGS)
 		depth_to_use = MAX_TAGS;
 
-	scsi_adjust_queue_depth(device, depth_to_use);
+	scsi_change_queue_depth(device, depth_to_use);
 
 	/*
 	**	Since the queue depth is not tunable under Linux,
diff --git a/drivers/scsi/pmcraid.c b/drivers/scsi/pmcraid.c
index d8b9ba251fbd..b1b1f66b1ab7 100644
--- a/drivers/scsi/pmcraid.c
+++ b/drivers/scsi/pmcraid.c
@@ -285,23 +285,15 @@ static void pmcraid_slave_destroy(struct scsi_device *scsi_dev)
  * pmcraid_change_queue_depth - Change the device's queue depth
  * @scsi_dev: scsi device struct
  * @depth: depth to set
- * @reason: calling context
  *
  * Return value
  *	actual depth set
  */
-static int pmcraid_change_queue_depth(struct scsi_device *scsi_dev, int depth,
-				      int reason)
+static int pmcraid_change_queue_depth(struct scsi_device *scsi_dev, int depth)
 {
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		return -EOPNOTSUPP;
-
 	if (depth > PMCRAID_MAX_CMD_PER_LUN)
 		depth = PMCRAID_MAX_CMD_PER_LUN;
-
-	scsi_adjust_queue_depth(scsi_dev, depth);
-
-	return scsi_dev->queue_depth;
+	return scsi_change_queue_depth(scsi_dev, depth);
 }
 
 /**
diff --git a/drivers/scsi/qla1280.c b/drivers/scsi/qla1280.c
index adedb6ef8eec..c68a66e8cfc1 100644
--- a/drivers/scsi/qla1280.c
+++ b/drivers/scsi/qla1280.c
@@ -1224,9 +1224,9 @@ qla1280_slave_configure(struct scsi_device *device)
 
 	if (device->tagged_supported &&
 	    (ha->bus_settings[bus].qtag_enables & (BIT_0 << target))) {
-		scsi_adjust_queue_depth(device, ha->bus_settings[bus].hiwat);
+		scsi_change_queue_depth(device, ha->bus_settings[bus].hiwat);
 	} else {
-		scsi_adjust_queue_depth(device, default_depth);
+		scsi_change_queue_depth(device, default_depth);
 	}
 
 	nv->bus[bus].target[target].parameter.enable_sync = device->sdtr;
diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index 20049b176b64..6b4d9235368a 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -236,7 +236,6 @@ static int qla2xxx_eh_target_reset(struct scsi_cmnd *);
 static int qla2xxx_eh_bus_reset(struct scsi_cmnd *);
 static int qla2xxx_eh_host_reset(struct scsi_cmnd *);
 
-static int qla2x00_change_queue_depth(struct scsi_device *, int, int);
 static void qla2x00_clear_drv_active(struct qla_hw_data *);
 static void qla2x00_free_device(scsi_qla_host_t *);
 static void qla83xx_disable_laser(scsi_qla_host_t *vha);
@@ -258,7 +257,7 @@ struct scsi_host_template qla2xxx_driver_template = {
 	.slave_destroy		= qla2xxx_slave_destroy,
 	.scan_finished		= qla2xxx_scan_finished,
 	.scan_start		= qla2xxx_scan_start,
-	.change_queue_depth	= qla2x00_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.change_queue_type	= scsi_change_queue_type,
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
@@ -1406,7 +1405,7 @@ qla2xxx_slave_configure(struct scsi_device *sdev)
 	if (IS_T10_PI_CAPABLE(vha->hw))
 		blk_queue_update_dma_alignment(sdev->request_queue, 0x7);
 
-	scsi_adjust_queue_depth(sdev, req->max_q_depth);
+	scsi_change_queue_depth(sdev, req->max_q_depth);
 	return 0;
 }
 
@@ -1416,13 +1415,6 @@ qla2xxx_slave_destroy(struct scsi_device *sdev)
 	sdev->hostdata = NULL;
 }
 
-static int
-qla2x00_change_queue_depth(struct scsi_device *sdev, int qdepth, int reason)
-{
-	scsi_adjust_queue_depth(sdev, qdepth);
-	return sdev->queue_depth;
-}
-
 /**
  * qla2x00_config_dma_addressing() - Configure OS DMA addressing method.
  * @ha: HA context
diff --git a/drivers/scsi/qla4xxx/ql4_os.c b/drivers/scsi/qla4xxx/ql4_os.c
index 2bfde373ea2b..6d25879d87c8 100644
--- a/drivers/scsi/qla4xxx/ql4_os.c
+++ b/drivers/scsi/qla4xxx/ql4_os.c
@@ -201,7 +201,7 @@ static struct scsi_host_template qla4xxx_driver_template = {
 	.eh_timed_out		= qla4xxx_eh_cmd_timed_out,
 
 	.slave_alloc		= qla4xxx_slave_alloc,
-	.change_queue_depth	= iscsi_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 
 	.this_id		= -1,
 	.cmd_per_lun		= 3,
@@ -9059,7 +9059,7 @@ static int qla4xxx_slave_alloc(struct scsi_device *sdev)
 	if (ql4xmaxqdepth != 0 && ql4xmaxqdepth <= 0xffffU)
 		queue_depth = ql4xmaxqdepth;
 
-	scsi_adjust_queue_depth(sdev, queue_depth);
+	scsi_change_queue_depth(sdev, queue_depth);
 	return 0;
 }
 
diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index 106fa2f886d2..5ea15fc7d2fb 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -742,30 +742,18 @@ void scsi_finish_command(struct scsi_cmnd *cmd)
 }
 
 /**
- * scsi_adjust_queue_depth - Let low level drivers change a device's queue depth
+ * scsi_change_queue_depth - change a device's queue depth
  * @sdev: SCSI Device in question
- * @tags: Number of tags allowed if tagged queueing enabled,
- *        or number of commands the low level driver can
- *        queue up in non-tagged mode (as per cmd_per_lun).
+ * @depth: number of commands allowed to be queued to the driver
  *
- * Returns:	Nothing
- *
- * Lock Status:	None held on entry
- *
- * Notes:	Low level drivers may call this at any time and we will do
- * 		the right thing depending on whether or not the device is
- * 		currently active and whether or not it even has the
- * 		command blocks built yet.
+ * Sets the device queue depth and returns the new value.
  */
-void scsi_adjust_queue_depth(struct scsi_device *sdev, int tags)
+int scsi_change_queue_depth(struct scsi_device *sdev, int depth)
 {
 	unsigned long flags;
 
-	/*
-	 * refuse to set tagged depth to an unworkable size
-	 */
-	if (tags <= 0)
-		return;
+	if (depth <= 0)
+		goto out;
 
 	spin_lock_irqsave(sdev->request_queue->queue_lock, flags);
 
@@ -780,15 +768,17 @@ void scsi_adjust_queue_depth(struct scsi_device *sdev, int tags)
 	 */
 	if (!shost_use_blk_mq(sdev->host) && !sdev->host->bqt) {
 		if (blk_queue_tagged(sdev->request_queue) &&
-		    blk_queue_resize_tags(sdev->request_queue, tags) != 0)
-			goto out;
+		    blk_queue_resize_tags(sdev->request_queue, depth) != 0)
+			goto out_unlock;
 	}
 
-	sdev->queue_depth = tags;
- out:
+	sdev->queue_depth = depth;
+out_unlock:
 	spin_unlock_irqrestore(sdev->request_queue->queue_lock, flags);
+out:
+	return sdev->queue_depth;
 }
-EXPORT_SYMBOL(scsi_adjust_queue_depth);
+EXPORT_SYMBOL(scsi_change_queue_depth);
 
 /**
  * scsi_track_queue_full - track QUEUE_FULL events to adjust queue depth
@@ -833,12 +823,11 @@ int scsi_track_queue_full(struct scsi_device *sdev, int depth)
 	if (sdev->last_queue_full_depth < 8) {
 		/* Drop back to untagged */
 		scsi_set_tag_type(sdev, 0);
-		scsi_adjust_queue_depth(sdev, sdev->host->cmd_per_lun);
+		scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
 		return -1;
 	}
 
-	scsi_adjust_queue_depth(sdev, depth);
-	return depth;
+	return scsi_change_queue_depth(sdev, depth);
 }
 EXPORT_SYMBOL(scsi_track_queue_full);
 
diff --git a/drivers/scsi/scsi_debug.c b/drivers/scsi/scsi_debug.c
index 84cf82e0782d..ce71b6d4393c 100644
--- a/drivers/scsi/scsi_debug.c
+++ b/drivers/scsi/scsi_debug.c
@@ -4469,7 +4469,7 @@ sdebug_queuecommand_lock_or_not(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 }
 
 static int
-sdebug_change_qdepth(struct scsi_device *sdev, int qdepth, int reason)
+sdebug_change_qdepth(struct scsi_device *sdev, int qdepth)
 {
 	int num_in_q = 0;
 	unsigned long iflags;
@@ -4489,7 +4489,7 @@ sdebug_change_qdepth(struct scsi_device *sdev, int qdepth, int reason)
 	/* allow to exceed max host queued_arr elements for testing */
 	if (qdepth > SCSI_DEBUG_CANQUEUE + 10)
 		qdepth = SCSI_DEBUG_CANQUEUE + 10;
-	scsi_adjust_queue_depth(sdev, qdepth);
+	scsi_change_queue_depth(sdev, qdepth);
 
 	if (SCSI_DEBUG_OPT_Q_NOISE & scsi_debug_opts) {
 		sdev_printk(KERN_INFO, sdev,
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 2d0f5155ee51..1f63559184b9 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -632,7 +632,7 @@ static void scsi_handle_queue_ramp_up(struct scsi_device *sdev)
 		    tmp_sdev->queue_depth == sdev->max_queue_depth)
 			continue;
 
-		scsi_adjust_queue_depth(tmp_sdev, tmp_sdev->queue_depth + 1);
+		scsi_change_queue_depth(tmp_sdev, tmp_sdev->queue_depth + 1);
 		sdev->last_queue_ramp_up = jiffies;
 	}
 }
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index d97597e6337e..0af713375db5 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -292,7 +292,7 @@ static struct scsi_device *scsi_alloc_sdev(struct scsi_target *starget,
 		blk_queue_init_tags(sdev->request_queue,
 				    sdev->host->cmd_per_lun, shost->bqt);
 	}
-	scsi_adjust_queue_depth(sdev, sdev->host->cmd_per_lun);
+	scsi_change_queue_depth(sdev, sdev->host->cmd_per_lun);
 
 	scsi_sysfs_device_initialize(sdev);
 
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index bffd5abdcd1f..1cb64a8e18c9 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -880,8 +880,7 @@ sdev_store_queue_depth(struct device *dev, struct device_attribute *attr,
 	if (depth < 1 || depth > sht->can_queue)
 		return -EINVAL;
 
-	retval = sht->change_queue_depth(sdev, depth,
-					 SCSI_QDEPTH_DEFAULT);
+	retval = sht->change_queue_depth(sdev, depth);
 	if (retval < 0)
 		return retval;
 
diff --git a/drivers/scsi/storvsc_drv.c b/drivers/scsi/storvsc_drv.c
index ff8befbdf17c..e3ba251fb6e7 100644
--- a/drivers/scsi/storvsc_drv.c
+++ b/drivers/scsi/storvsc_drv.c
@@ -1429,7 +1429,7 @@ static void storvsc_device_destroy(struct scsi_device *sdevice)
 
 static int storvsc_device_configure(struct scsi_device *sdevice)
 {
-	scsi_adjust_queue_depth(sdevice, STORVSC_MAX_IO_REQUESTS);
+	scsi_change_queue_depth(sdevice, STORVSC_MAX_IO_REQUESTS);
 
 	blk_queue_max_segment_size(sdevice->request_queue, PAGE_SIZE);
 
diff --git a/drivers/scsi/sym53c8xx_2/sym_glue.c b/drivers/scsi/sym53c8xx_2/sym_glue.c
index 3557b385251a..5d00e514ff28 100644
--- a/drivers/scsi/sym53c8xx_2/sym_glue.c
+++ b/drivers/scsi/sym53c8xx_2/sym_glue.c
@@ -820,7 +820,7 @@ static int sym53c8xx_slave_configure(struct scsi_device *sdev)
 	if (reqtags > SYM_CONF_MAX_TAG)
 		reqtags = SYM_CONF_MAX_TAG;
 	depth_to_use = reqtags ? reqtags : 1;
-	scsi_adjust_queue_depth(sdev, depth_to_use);
+	scsi_change_queue_depth(sdev, depth_to_use);
 	lp->s.scdev_depth = depth_to_use;
 	sym_tune_dev_queuing(tp, sdev->lun, reqtags);
 
diff --git a/drivers/scsi/tmscsim.c b/drivers/scsi/tmscsim.c
index 844c9a048c00..6c3c2cef3891 100644
--- a/drivers/scsi/tmscsim.c
+++ b/drivers/scsi/tmscsim.c
@@ -2194,7 +2194,7 @@ static int dc390_slave_configure(struct scsi_device *sdev)
 
 	if (sdev->tagged_supported && (dcb->DevMode & TAG_QUEUEING_)) {
 		dcb->SyncMode |= EN_TAG_QUEUEING;
-		scsi_adjust_queue_depth(sdev, acb->TagMaxNum);
+		scsi_change_queue_depth(sdev, acb->TagMaxNum);
 	}
 
 	return 0;
diff --git a/drivers/scsi/u14-34f.c b/drivers/scsi/u14-34f.c
index aa0f4035afaf..14eb50b95a1e 100644
--- a/drivers/scsi/u14-34f.c
+++ b/drivers/scsi/u14-34f.c
@@ -696,25 +696,25 @@ static int u14_34f_slave_configure(struct scsi_device *dev) {
    if (TLDEV(dev->type) && dev->tagged_supported)
 
       if (tag_mode == TAG_SIMPLE) {
-         scsi_adjust_queue_depth(dev, tqd);
+         scsi_change_queue_depth(dev, tqd);
          tag_suffix = ", simple tags";
          }
       else if (tag_mode == TAG_ORDERED) {
-         scsi_adjust_queue_depth(dev, tqd);
+         scsi_change_queue_depth(dev, tqd);
          tag_suffix = ", ordered tags";
          }
       else {
-         scsi_adjust_queue_depth(dev, tqd);
+         scsi_change_queue_depth(dev, tqd);
          tag_suffix = ", no tags";
          }
 
    else if (TLDEV(dev->type) && linked_comm) {
-      scsi_adjust_queue_depth(dev, tqd);
+      scsi_change_queue_depth(dev, tqd);
       tag_suffix = ", untagged";
       }
 
    else {
-      scsi_adjust_queue_depth(dev, utqd);
+      scsi_change_queue_depth(dev, utqd);
       tag_suffix = "";
       }
 
diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index e96ab253e3e5..0c4f98ee6047 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2695,7 +2695,7 @@ static void ufshcd_set_queue_depth(struct scsi_device *sdev)
 
 	dev_dbg(hba->dev, "%s: activate tcq with queue depth %d\n",
 			__func__, lun_qdepth);
-	scsi_adjust_queue_depth(sdev, lun_qdepth);
+	scsi_change_queue_depth(sdev, lun_qdepth);
 }
 
 /*
@@ -2787,21 +2787,16 @@ static int ufshcd_slave_alloc(struct scsi_device *sdev)
  * ufshcd_change_queue_depth - change queue depth
  * @sdev: pointer to SCSI device
  * @depth: required depth to set
- * @reason: reason for changing the depth
  *
- * Change queue depth according to the reason and make sure
- * the max. limits are not crossed.
+ * Change queue depth and make sure the max. limits are not crossed.
  */
-static int ufshcd_change_queue_depth(struct scsi_device *sdev,
-		int depth, int reason)
+static int ufshcd_change_queue_depth(struct scsi_device *sdev, int depth)
 {
 	struct ufs_hba *hba = shost_priv(sdev->host);
 
 	if (depth > hba->nutrs)
 		depth = hba->nutrs;
-
-	scsi_adjust_queue_depth(sdev, depth);
-	return depth;
+	return scsi_change_queue_depth(sdev, depth);
 }
 
 /**
diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index 0f7e4c7ff8c2..22e70126425b 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -682,17 +682,13 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc)
  * virtscsi_change_queue_depth() - Change a virtscsi target's queue depth
  * @sdev:	Virtscsi target whose queue depth to change
  * @qdepth:	New queue depth
- * @reason:	Reason for the queue depth change.
  */
-static int virtscsi_change_queue_depth(struct scsi_device *sdev,
-				       int qdepth,
-				       int reason)
+static int virtscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
 	struct Scsi_Host *shost = sdev->host;
 	int max_depth = shost->cmd_per_lun;
 
-	scsi_adjust_queue_depth(sdev, min(max_depth, qdepth));
-	return sdev->queue_depth;
+	return scsi_change_queue_depth(sdev, min(max_depth, qdepth));
 }
 
 static int virtscsi_abort(struct scsi_cmnd *sc)
diff --git a/drivers/scsi/vmw_pvscsi.c b/drivers/scsi/vmw_pvscsi.c
index 03ad24be728e..ade1f1d013b1 100644
--- a/drivers/scsi/vmw_pvscsi.c
+++ b/drivers/scsi/vmw_pvscsi.c
@@ -504,19 +504,11 @@ static void pvscsi_setup_all_rings(const struct pvscsi_adapter *adapter)
 	}
 }
 
-static int pvscsi_change_queue_depth(struct scsi_device *sdev,
-				     int qdepth,
-				     int reason)
+static int pvscsi_change_queue_depth(struct scsi_device *sdev, int qdepth)
 {
-	if (reason != SCSI_QDEPTH_DEFAULT)
-		/*
-		 * We support only changing default.
-		 */
-		return -EOPNOTSUPP;
-
 	if (!sdev->tagged_supported)
 		qdepth = 1;
-	scsi_adjust_queue_depth(sdev, qdepth);
+	scsi_change_queue_depth(sdev, qdepth);
 
 	if (sdev->inquiry_len > 7)
 		sdev_printk(KERN_INFO, sdev,
diff --git a/drivers/scsi/wd7000.c b/drivers/scsi/wd7000.c
index 32674236fec7..f94d73611ab4 100644
--- a/drivers/scsi/wd7000.c
+++ b/drivers/scsi/wd7000.c
@@ -1653,7 +1653,6 @@ static struct scsi_host_template driver_template = {
 	.can_queue		= WD7000_Q,
 	.this_id		= 7,
 	.sg_tablesize		= WD7000_SG,
-	.cmd_per_lun		= 1,
 	.unchecked_isa_dma	= 1,
 	.use_clustering		= ENABLE_CLUSTERING,
 };
diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c
index 670b75a62243..4d1b7224a7f2 100644
--- a/drivers/target/loopback/tcm_loop.c
+++ b/drivers/target/loopback/tcm_loop.c
@@ -110,19 +110,6 @@ static struct device_driver tcm_loop_driverfs = {
  */
 struct device *tcm_loop_primary;
 
-/*
- * Copied from drivers/scsi/libfc/fc_fcp.c:fc_change_queue_depth() and
- * drivers/scsi/libiscsi.c:iscsi_change_queue_depth()
- */
-static int tcm_loop_change_queue_depth(
-	struct scsi_device *sdev,
-	int depth,
-	int reason)
-{
-	scsi_adjust_queue_depth(sdev, depth);
-	return sdev->queue_depth;
-}
-
 static void tcm_loop_submission_work(struct work_struct *work)
 {
 	struct tcm_loop_cmd *tl_cmd =
@@ -397,7 +384,7 @@ static struct scsi_host_template tcm_loop_driver_template = {
 	.proc_name		= "tcm_loopback",
 	.name			= "TCM_Loopback",
 	.queuecommand		= tcm_loop_queuecommand,
-	.change_queue_depth	= tcm_loop_change_queue_depth,
+	.change_queue_depth	= scsi_change_queue_depth,
 	.change_queue_type	= scsi_change_queue_type,
 	.eh_abort_handler = tcm_loop_abort_task,
 	.eh_device_reset_handler = tcm_loop_device_reset,
diff --git a/drivers/usb/storage/uas.c b/drivers/usb/storage/uas.c
index 33f211b56a42..4047edfb64e1 100644
--- a/drivers/usb/storage/uas.c
+++ b/drivers/usb/storage/uas.c
@@ -799,7 +799,7 @@ static int uas_slave_configure(struct scsi_device *sdev)
 	if (devinfo->flags & US_FL_NO_REPORT_OPCODES)
 		sdev->no_report_opcodes = 1;
 
-	scsi_adjust_queue_depth(sdev, devinfo->qdepth - 2);
+	scsi_change_queue_depth(sdev, devinfo->qdepth - 2);
 	return 0;
 }
 
diff --git a/include/linux/libata.h b/include/linux/libata.h
index bd5fefeaf548..bfbc817c34ee 100644
--- a/include/linux/libata.h
+++ b/include/linux/libata.h
@@ -1191,9 +1191,9 @@ extern void ata_scsi_unlock_native_capacity(struct scsi_device *sdev);
 extern int ata_scsi_slave_config(struct scsi_device *sdev);
 extern void ata_scsi_slave_destroy(struct scsi_device *sdev);
 extern int ata_scsi_change_queue_depth(struct scsi_device *sdev,
-				       int queue_depth, int reason);
+				       int queue_depth);
 extern int __ata_change_queue_depth(struct ata_port *ap, struct scsi_device *sdev,
-				    int queue_depth, int reason);
+				    int queue_depth);
 extern struct ata_device *ata_dev_pair(struct ata_device *adev);
 extern int ata_do_set_mode(struct ata_link *link, struct ata_device **r_failed_dev);
 extern void ata_scsi_port_error_handler(struct Scsi_Host *host, struct ata_port *ap);
diff --git a/include/scsi/libfc.h b/include/scsi/libfc.h
index 2e0cf568a9c1..93d14daf0994 100644
--- a/include/scsi/libfc.h
+++ b/include/scsi/libfc.h
@@ -1105,7 +1105,6 @@ int fc_eh_abort(struct scsi_cmnd *);
 int fc_eh_device_reset(struct scsi_cmnd *);
 int fc_eh_host_reset(struct scsi_cmnd *);
 int fc_slave_alloc(struct scsi_device *);
-int fc_change_queue_depth(struct scsi_device *, int qdepth, int reason);
 
 /*
  * ELS/CT interface
diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h
index 728c9ad9feb0..4d1c46aac331 100644
--- a/include/scsi/libiscsi.h
+++ b/include/scsi/libiscsi.h
@@ -378,8 +378,6 @@ struct iscsi_host {
 /*
  * scsi host template
  */
-extern int iscsi_change_queue_depth(struct scsi_device *sdev, int depth,
-				    int reason);
 extern int iscsi_eh_abort(struct scsi_cmnd *sc);
 extern int iscsi_eh_recover_target(struct scsi_cmnd *sc);
 extern int iscsi_eh_session_reset(struct scsi_cmnd *sc);
diff --git a/include/scsi/libsas.h b/include/scsi/libsas.h
index ef7872c20da9..1f8b33ec612f 100644
--- a/include/scsi/libsas.h
+++ b/include/scsi/libsas.h
@@ -704,8 +704,7 @@ int sas_queue_up(struct sas_task *task);
 extern int sas_queuecommand(struct Scsi_Host * ,struct scsi_cmnd *);
 extern int sas_target_alloc(struct scsi_target *);
 extern int sas_slave_configure(struct scsi_device *);
-extern int sas_change_queue_depth(struct scsi_device *, int new_depth,
-				  int reason);
+extern int sas_change_queue_depth(struct scsi_device *, int new_depth);
 extern int sas_change_queue_type(struct scsi_device *, int qt);
 extern int sas_bios_param(struct scsi_device *,
 			  struct block_device *,
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index 0aeaa003c3c1..6364e23454dd 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -380,7 +380,7 @@ extern struct scsi_device *__scsi_iterate_devices(struct Scsi_Host *,
 #define __shost_for_each_device(sdev, shost) \
 	list_for_each_entry((sdev), &((shost)->__devices), siblings)
 
-extern void scsi_adjust_queue_depth(struct scsi_device *, int);
+extern int scsi_change_queue_depth(struct scsi_device *, int);
 extern int scsi_track_queue_full(struct scsi_device *, int);
 
 extern int scsi_set_medium_removal(struct scsi_device *, char);
diff --git a/include/scsi/scsi_host.h b/include/scsi/scsi_host.h
index a0b13a5cd25e..c8a462ef9a4e 100644
--- a/include/scsi/scsi_host.h
+++ b/include/scsi/scsi_host.h
@@ -46,10 +46,6 @@ struct blk_queue_tags;
 #define DISABLE_CLUSTERING 0
 #define ENABLE_CLUSTERING 1
 
-enum {
-	SCSI_QDEPTH_DEFAULT,	/* default requested change, e.g. from sysfs */
-};
-
 struct scsi_host_template {
 	struct module *module;
 	const char *name;
@@ -193,7 +189,7 @@ struct scsi_host_template {
 	 * Things currently recommended to be handled at this time include:
 	 *
 	 * 1.  Setting the device queue depth.  Proper setting of this is
-	 *     described in the comments for scsi_adjust_queue_depth.
+	 *     described in the comments for scsi_change_queue_depth.
 	 * 2.  Determining if the device supports the various synchronous
 	 *     negotiation protocols.  The device struct will already have
 	 *     responded to INQUIRY and the results of the standard items
@@ -279,7 +275,7 @@ struct scsi_host_template {
 	 *
 	 * Status: OPTIONAL
 	 */
-	int (* change_queue_depth)(struct scsi_device *, int, int);
+	int (* change_queue_depth)(struct scsi_device *, int);
 
 	/*
 	 * Fill in this function to allow the changing of tag types
-- 
cgit v1.2.3


From 5d26a105b5a73e5635eae0629b42fa0a90e07b7b Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Thu, 20 Nov 2014 17:05:53 -0800
Subject: crypto: prefix module autoloading with "crypto-"

This prefixes all crypto module loading with "crypto-" so we never run
the risk of exposing module auto-loading to userspace via a crypto API,
as demonstrated by Mathias Krause:

https://lkml.org/lkml/2013/3/4/70

Signed-off-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 arch/arm/crypto/aes_glue.c                  |  4 ++--
 arch/arm/crypto/sha1_glue.c                 |  2 +-
 arch/arm/crypto/sha1_neon_glue.c            |  2 +-
 arch/arm/crypto/sha512_neon_glue.c          |  4 ++--
 arch/arm64/crypto/aes-ce-ccm-glue.c         |  2 +-
 arch/arm64/crypto/aes-glue.c                |  8 ++++----
 arch/powerpc/crypto/sha1.c                  |  2 +-
 arch/s390/crypto/aes_s390.c                 |  2 +-
 arch/s390/crypto/des_s390.c                 |  4 ++--
 arch/s390/crypto/ghash_s390.c               |  2 +-
 arch/s390/crypto/sha1_s390.c                |  2 +-
 arch/s390/crypto/sha256_s390.c              |  4 ++--
 arch/s390/crypto/sha512_s390.c              |  4 ++--
 arch/sparc/crypto/aes_glue.c                |  2 +-
 arch/sparc/crypto/camellia_glue.c           |  2 +-
 arch/sparc/crypto/crc32c_glue.c             |  2 +-
 arch/sparc/crypto/des_glue.c                |  2 +-
 arch/sparc/crypto/md5_glue.c                |  2 +-
 arch/sparc/crypto/sha1_glue.c               |  2 +-
 arch/sparc/crypto/sha256_glue.c             |  4 ++--
 arch/sparc/crypto/sha512_glue.c             |  4 ++--
 arch/x86/crypto/aes_glue.c                  |  4 ++--
 arch/x86/crypto/aesni-intel_glue.c          |  2 +-
 arch/x86/crypto/blowfish_glue.c             |  4 ++--
 arch/x86/crypto/camellia_aesni_avx2_glue.c  |  4 ++--
 arch/x86/crypto/camellia_aesni_avx_glue.c   |  4 ++--
 arch/x86/crypto/camellia_glue.c             |  4 ++--
 arch/x86/crypto/cast5_avx_glue.c            |  2 +-
 arch/x86/crypto/cast6_avx_glue.c            |  2 +-
 arch/x86/crypto/crc32-pclmul_glue.c         |  4 ++--
 arch/x86/crypto/crc32c-intel_glue.c         |  4 ++--
 arch/x86/crypto/crct10dif-pclmul_glue.c     |  4 ++--
 arch/x86/crypto/des3_ede_glue.c             |  8 ++++----
 arch/x86/crypto/ghash-clmulni-intel_glue.c  |  2 +-
 arch/x86/crypto/salsa20_glue.c              |  4 ++--
 arch/x86/crypto/serpent_avx2_glue.c         |  4 ++--
 arch/x86/crypto/serpent_avx_glue.c          |  2 +-
 arch/x86/crypto/serpent_sse2_glue.c         |  2 +-
 arch/x86/crypto/sha1_ssse3_glue.c           |  2 +-
 arch/x86/crypto/sha256_ssse3_glue.c         |  4 ++--
 arch/x86/crypto/sha512_ssse3_glue.c         |  4 ++--
 arch/x86/crypto/twofish_avx_glue.c          |  2 +-
 arch/x86/crypto/twofish_glue.c              |  4 ++--
 arch/x86/crypto/twofish_glue_3way.c         |  4 ++--
 crypto/842.c                                |  1 +
 crypto/aes_generic.c                        |  2 +-
 crypto/ansi_cprng.c                         |  2 +-
 crypto/anubis.c                             |  1 +
 crypto/api.c                                |  4 ++--
 crypto/arc4.c                               |  1 +
 crypto/blowfish_generic.c                   |  2 +-
 crypto/camellia_generic.c                   |  2 +-
 crypto/cast5_generic.c                      |  2 +-
 crypto/cast6_generic.c                      |  2 +-
 crypto/ccm.c                                |  4 ++--
 crypto/crc32.c                              |  1 +
 crypto/crc32c_generic.c                     |  2 +-
 crypto/crct10dif_generic.c                  |  2 +-
 crypto/crypto_null.c                        |  6 +++---
 crypto/ctr.c                                |  2 +-
 crypto/deflate.c                            |  2 +-
 crypto/des_generic.c                        |  2 +-
 crypto/fcrypt.c                             |  1 +
 crypto/gcm.c                                |  6 +++---
 crypto/ghash-generic.c                      |  2 +-
 crypto/khazad.c                             |  1 +
 crypto/krng.c                               |  2 +-
 crypto/lz4.c                                |  1 +
 crypto/lz4hc.c                              |  1 +
 crypto/lzo.c                                |  1 +
 crypto/md4.c                                |  2 +-
 crypto/md5.c                                |  1 +
 crypto/michael_mic.c                        |  1 +
 crypto/rmd128.c                             |  1 +
 crypto/rmd160.c                             |  1 +
 crypto/rmd256.c                             |  1 +
 crypto/rmd320.c                             |  1 +
 crypto/salsa20_generic.c                    |  2 +-
 crypto/seed.c                               |  1 +
 crypto/serpent_generic.c                    |  4 ++--
 crypto/sha1_generic.c                       |  2 +-
 crypto/sha256_generic.c                     |  4 ++--
 crypto/sha512_generic.c                     |  4 ++--
 crypto/tea.c                                |  4 ++--
 crypto/tgr192.c                             |  4 ++--
 crypto/twofish_generic.c                    |  2 +-
 crypto/wp512.c                              |  4 ++--
 crypto/zlib.c                               |  1 +
 drivers/crypto/padlock-aes.c                |  2 +-
 drivers/crypto/padlock-sha.c                |  8 ++++----
 drivers/crypto/qat/qat_common/adf_ctl_drv.c |  3 ++-
 drivers/crypto/ux500/cryp/cryp_core.c       |  4 ++--
 drivers/crypto/ux500/hash/hash_core.c       |  8 ++++----
 drivers/s390/crypto/ap_bus.c                |  3 ++-
 include/linux/crypto.h                      | 13 +++++++++++++
 95 files changed, 155 insertions(+), 123 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/crypto/aes_glue.c b/arch/arm/crypto/aes_glue.c
index 3003fa1f6fb4..0409b8f89782 100644
--- a/arch/arm/crypto/aes_glue.c
+++ b/arch/arm/crypto/aes_glue.c
@@ -93,6 +93,6 @@ module_exit(aes_fini);
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm (ASM)");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
-MODULE_ALIAS("aes-asm");
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("aes-asm");
 MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");
diff --git a/arch/arm/crypto/sha1_glue.c b/arch/arm/crypto/sha1_glue.c
index 84f2a756588b..e31b0440c613 100644
--- a/arch/arm/crypto/sha1_glue.c
+++ b/arch/arm/crypto/sha1_glue.c
@@ -171,5 +171,5 @@ module_exit(sha1_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm (ARM)");
-MODULE_ALIAS("sha1");
+MODULE_ALIAS_CRYPTO("sha1");
 MODULE_AUTHOR("David McCullough <ucdevel@gmail.com>");
diff --git a/arch/arm/crypto/sha1_neon_glue.c b/arch/arm/crypto/sha1_neon_glue.c
index 6f1b411b1d55..0b0083757d47 100644
--- a/arch/arm/crypto/sha1_neon_glue.c
+++ b/arch/arm/crypto/sha1_neon_glue.c
@@ -194,4 +194,4 @@ module_exit(sha1_neon_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, NEON accelerated");
-MODULE_ALIAS("sha1");
+MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/arm/crypto/sha512_neon_glue.c b/arch/arm/crypto/sha512_neon_glue.c
index 0d2758ff5e12..f3452c66059d 100644
--- a/arch/arm/crypto/sha512_neon_glue.c
+++ b/arch/arm/crypto/sha512_neon_glue.c
@@ -301,5 +301,5 @@ module_exit(sha512_neon_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, NEON accelerated");
 
-MODULE_ALIAS("sha512");
-MODULE_ALIAS("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha384");
diff --git a/arch/arm64/crypto/aes-ce-ccm-glue.c b/arch/arm64/crypto/aes-ce-ccm-glue.c
index 9e6cdde9b43d..0156a268e163 100644
--- a/arch/arm64/crypto/aes-ce-ccm-glue.c
+++ b/arch/arm64/crypto/aes-ce-ccm-glue.c
@@ -294,4 +294,4 @@ module_exit(aes_mod_exit);
 MODULE_DESCRIPTION("Synchronous AES in CCM mode using ARMv8 Crypto Extensions");
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
 MODULE_LICENSE("GPL v2");
-MODULE_ALIAS("ccm(aes)");
+MODULE_ALIAS_CRYPTO("ccm(aes)");
diff --git a/arch/arm64/crypto/aes-glue.c b/arch/arm64/crypto/aes-glue.c
index 79cd911ef88c..5f63a791b2fb 100644
--- a/arch/arm64/crypto/aes-glue.c
+++ b/arch/arm64/crypto/aes-glue.c
@@ -38,10 +38,10 @@ MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 Crypto Extensions");
 #define aes_xts_encrypt		neon_aes_xts_encrypt
 #define aes_xts_decrypt		neon_aes_xts_decrypt
 MODULE_DESCRIPTION("AES-ECB/CBC/CTR/XTS using ARMv8 NEON");
-MODULE_ALIAS("ecb(aes)");
-MODULE_ALIAS("cbc(aes)");
-MODULE_ALIAS("ctr(aes)");
-MODULE_ALIAS("xts(aes)");
+MODULE_ALIAS_CRYPTO("ecb(aes)");
+MODULE_ALIAS_CRYPTO("cbc(aes)");
+MODULE_ALIAS_CRYPTO("ctr(aes)");
+MODULE_ALIAS_CRYPTO("xts(aes)");
 #endif
 
 MODULE_AUTHOR("Ard Biesheuvel <ard.biesheuvel@linaro.org>");
diff --git a/arch/powerpc/crypto/sha1.c b/arch/powerpc/crypto/sha1.c
index f9e8b9491efc..0f88c7b41119 100644
--- a/arch/powerpc/crypto/sha1.c
+++ b/arch/powerpc/crypto/sha1.c
@@ -154,4 +154,4 @@ module_exit(sha1_powerpc_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
 
-MODULE_ALIAS("sha1-powerpc");
+MODULE_ALIAS_CRYPTO("sha1-powerpc");
diff --git a/arch/s390/crypto/aes_s390.c b/arch/s390/crypto/aes_s390.c
index 23223cd63e54..1f272b24fc0b 100644
--- a/arch/s390/crypto/aes_s390.c
+++ b/arch/s390/crypto/aes_s390.c
@@ -979,7 +979,7 @@ static void __exit aes_s390_fini(void)
 module_init(aes_s390_init);
 module_exit(aes_s390_fini);
 
-MODULE_ALIAS("aes-all");
+MODULE_ALIAS_CRYPTO("aes-all");
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
 MODULE_LICENSE("GPL");
diff --git a/arch/s390/crypto/des_s390.c b/arch/s390/crypto/des_s390.c
index 7acb77f7ef1a..9e05cc453a40 100644
--- a/arch/s390/crypto/des_s390.c
+++ b/arch/s390/crypto/des_s390.c
@@ -619,8 +619,8 @@ static void __exit des_s390_exit(void)
 module_init(des_s390_init);
 module_exit(des_s390_exit);
 
-MODULE_ALIAS("des");
-MODULE_ALIAS("des3_ede");
+MODULE_ALIAS_CRYPTO("des");
+MODULE_ALIAS_CRYPTO("des3_ede");
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms");
diff --git a/arch/s390/crypto/ghash_s390.c b/arch/s390/crypto/ghash_s390.c
index d43485d142e9..7940dc90e80b 100644
--- a/arch/s390/crypto/ghash_s390.c
+++ b/arch/s390/crypto/ghash_s390.c
@@ -160,7 +160,7 @@ static void __exit ghash_mod_exit(void)
 module_init(ghash_mod_init);
 module_exit(ghash_mod_exit);
 
-MODULE_ALIAS("ghash");
+MODULE_ALIAS_CRYPTO("ghash");
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("GHASH Message Digest Algorithm, s390 implementation");
diff --git a/arch/s390/crypto/sha1_s390.c b/arch/s390/crypto/sha1_s390.c
index a1b3a9dc9d8a..5b2bee323694 100644
--- a/arch/s390/crypto/sha1_s390.c
+++ b/arch/s390/crypto/sha1_s390.c
@@ -103,6 +103,6 @@ static void __exit sha1_s390_fini(void)
 module_init(sha1_s390_init);
 module_exit(sha1_s390_fini);
 
-MODULE_ALIAS("sha1");
+MODULE_ALIAS_CRYPTO("sha1");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha256_s390.c b/arch/s390/crypto/sha256_s390.c
index 9b853809a492..b74ff158108c 100644
--- a/arch/s390/crypto/sha256_s390.c
+++ b/arch/s390/crypto/sha256_s390.c
@@ -143,7 +143,7 @@ static void __exit sha256_s390_fini(void)
 module_init(sha256_s390_init);
 module_exit(sha256_s390_fini);
 
-MODULE_ALIAS("sha256");
-MODULE_ALIAS("sha224");
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA256 and SHA224 Secure Hash Algorithm");
diff --git a/arch/s390/crypto/sha512_s390.c b/arch/s390/crypto/sha512_s390.c
index 32a81383b69c..0c36989ba182 100644
--- a/arch/s390/crypto/sha512_s390.c
+++ b/arch/s390/crypto/sha512_s390.c
@@ -86,7 +86,7 @@ static struct shash_alg sha512_alg = {
 	}
 };
 
-MODULE_ALIAS("sha512");
+MODULE_ALIAS_CRYPTO("sha512");
 
 static int sha384_init(struct shash_desc *desc)
 {
@@ -126,7 +126,7 @@ static struct shash_alg sha384_alg = {
 	}
 };
 
-MODULE_ALIAS("sha384");
+MODULE_ALIAS_CRYPTO("sha384");
 
 static int __init init(void)
 {
diff --git a/arch/sparc/crypto/aes_glue.c b/arch/sparc/crypto/aes_glue.c
index df922f52d76d..705408766ab0 100644
--- a/arch/sparc/crypto/aes_glue.c
+++ b/arch/sparc/crypto/aes_glue.c
@@ -499,6 +499,6 @@ module_exit(aes_sparc64_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("AES Secure Hash Algorithm, sparc64 aes opcode accelerated");
 
-MODULE_ALIAS("aes");
+MODULE_ALIAS_CRYPTO("aes");
 
 #include "crop_devid.c"
diff --git a/arch/sparc/crypto/camellia_glue.c b/arch/sparc/crypto/camellia_glue.c
index 888f6260b4ec..641f55cb61c3 100644
--- a/arch/sparc/crypto/camellia_glue.c
+++ b/arch/sparc/crypto/camellia_glue.c
@@ -322,6 +322,6 @@ module_exit(camellia_sparc64_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Camellia Cipher Algorithm, sparc64 camellia opcode accelerated");
 
-MODULE_ALIAS("aes");
+MODULE_ALIAS_CRYPTO("aes");
 
 #include "crop_devid.c"
diff --git a/arch/sparc/crypto/crc32c_glue.c b/arch/sparc/crypto/crc32c_glue.c
index 5162fad912ce..d1064e46efe8 100644
--- a/arch/sparc/crypto/crc32c_glue.c
+++ b/arch/sparc/crypto/crc32c_glue.c
@@ -176,6 +176,6 @@ module_exit(crc32c_sparc64_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("CRC32c (Castagnoli), sparc64 crc32c opcode accelerated");
 
-MODULE_ALIAS("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c");
 
 #include "crop_devid.c"
diff --git a/arch/sparc/crypto/des_glue.c b/arch/sparc/crypto/des_glue.c
index 3065bc61f9d3..d11500972994 100644
--- a/arch/sparc/crypto/des_glue.c
+++ b/arch/sparc/crypto/des_glue.c
@@ -532,6 +532,6 @@ module_exit(des_sparc64_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("DES & Triple DES EDE Cipher Algorithms, sparc64 des opcode accelerated");
 
-MODULE_ALIAS("des");
+MODULE_ALIAS_CRYPTO("des");
 
 #include "crop_devid.c"
diff --git a/arch/sparc/crypto/md5_glue.c b/arch/sparc/crypto/md5_glue.c
index 09a9ea1dfb69..64c7ff5f72a9 100644
--- a/arch/sparc/crypto/md5_glue.c
+++ b/arch/sparc/crypto/md5_glue.c
@@ -185,6 +185,6 @@ module_exit(md5_sparc64_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MD5 Secure Hash Algorithm, sparc64 md5 opcode accelerated");
 
-MODULE_ALIAS("md5");
+MODULE_ALIAS_CRYPTO("md5");
 
 #include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha1_glue.c b/arch/sparc/crypto/sha1_glue.c
index 6cd5f29e1e0d..1b3e47accc74 100644
--- a/arch/sparc/crypto/sha1_glue.c
+++ b/arch/sparc/crypto/sha1_glue.c
@@ -180,6 +180,6 @@ module_exit(sha1_sparc64_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, sparc64 sha1 opcode accelerated");
 
-MODULE_ALIAS("sha1");
+MODULE_ALIAS_CRYPTO("sha1");
 
 #include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha256_glue.c b/arch/sparc/crypto/sha256_glue.c
index 04f555ab2680..41f27cca2a22 100644
--- a/arch/sparc/crypto/sha256_glue.c
+++ b/arch/sparc/crypto/sha256_glue.c
@@ -237,7 +237,7 @@ module_exit(sha256_sparc64_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm, sparc64 sha256 opcode accelerated");
 
-MODULE_ALIAS("sha224");
-MODULE_ALIAS("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha256");
 
 #include "crop_devid.c"
diff --git a/arch/sparc/crypto/sha512_glue.c b/arch/sparc/crypto/sha512_glue.c
index f04d1994d19a..9fff88541b8c 100644
--- a/arch/sparc/crypto/sha512_glue.c
+++ b/arch/sparc/crypto/sha512_glue.c
@@ -222,7 +222,7 @@ module_exit(sha512_sparc64_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA-384 and SHA-512 Secure Hash Algorithm, sparc64 sha512 opcode accelerated");
 
-MODULE_ALIAS("sha384");
-MODULE_ALIAS("sha512");
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
 
 #include "crop_devid.c"
diff --git a/arch/x86/crypto/aes_glue.c b/arch/x86/crypto/aes_glue.c
index aafe8ce0d65d..e26984f7ab8d 100644
--- a/arch/x86/crypto/aes_glue.c
+++ b/arch/x86/crypto/aes_glue.c
@@ -66,5 +66,5 @@ module_exit(aes_fini);
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, asm optimized");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
-MODULE_ALIAS("aes-asm");
+MODULE_ALIAS_CRYPTO("aes");
+MODULE_ALIAS_CRYPTO("aes-asm");
diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c
index bcebf754466c..ae855f4f64b7 100644
--- a/arch/x86/crypto/aesni-intel_glue.c
+++ b/arch/x86/crypto/aesni-intel_glue.c
@@ -1546,4 +1546,4 @@ module_exit(aesni_exit);
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("aes");
+MODULE_ALIAS_CRYPTO("aes");
diff --git a/arch/x86/crypto/blowfish_glue.c b/arch/x86/crypto/blowfish_glue.c
index 8af519ed73d1..17c05531dfd1 100644
--- a/arch/x86/crypto/blowfish_glue.c
+++ b/arch/x86/crypto/blowfish_glue.c
@@ -478,5 +478,5 @@ module_exit(fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Blowfish Cipher Algorithm, asm optimized");
-MODULE_ALIAS("blowfish");
-MODULE_ALIAS("blowfish-asm");
+MODULE_ALIAS_CRYPTO("blowfish");
+MODULE_ALIAS_CRYPTO("blowfish-asm");
diff --git a/arch/x86/crypto/camellia_aesni_avx2_glue.c b/arch/x86/crypto/camellia_aesni_avx2_glue.c
index 4209a76fcdaa..9a07fafe3831 100644
--- a/arch/x86/crypto/camellia_aesni_avx2_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx2_glue.c
@@ -582,5 +582,5 @@ module_exit(camellia_aesni_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX2 optimized");
-MODULE_ALIAS("camellia");
-MODULE_ALIAS("camellia-asm");
+MODULE_ALIAS_CRYPTO("camellia");
+MODULE_ALIAS_CRYPTO("camellia-asm");
diff --git a/arch/x86/crypto/camellia_aesni_avx_glue.c b/arch/x86/crypto/camellia_aesni_avx_glue.c
index 87a041a10f4a..ed38d959add6 100644
--- a/arch/x86/crypto/camellia_aesni_avx_glue.c
+++ b/arch/x86/crypto/camellia_aesni_avx_glue.c
@@ -574,5 +574,5 @@ module_exit(camellia_aesni_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Camellia Cipher Algorithm, AES-NI/AVX optimized");
-MODULE_ALIAS("camellia");
-MODULE_ALIAS("camellia-asm");
+MODULE_ALIAS_CRYPTO("camellia");
+MODULE_ALIAS_CRYPTO("camellia-asm");
diff --git a/arch/x86/crypto/camellia_glue.c b/arch/x86/crypto/camellia_glue.c
index c171dcbf192d..5c8b6266a394 100644
--- a/arch/x86/crypto/camellia_glue.c
+++ b/arch/x86/crypto/camellia_glue.c
@@ -1725,5 +1725,5 @@ module_exit(fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Camellia Cipher Algorithm, asm optimized");
-MODULE_ALIAS("camellia");
-MODULE_ALIAS("camellia-asm");
+MODULE_ALIAS_CRYPTO("camellia");
+MODULE_ALIAS_CRYPTO("camellia-asm");
diff --git a/arch/x86/crypto/cast5_avx_glue.c b/arch/x86/crypto/cast5_avx_glue.c
index e57e20ab5e0b..60ada677a928 100644
--- a/arch/x86/crypto/cast5_avx_glue.c
+++ b/arch/x86/crypto/cast5_avx_glue.c
@@ -491,4 +491,4 @@ module_exit(cast5_exit);
 
 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("cast5");
+MODULE_ALIAS_CRYPTO("cast5");
diff --git a/arch/x86/crypto/cast6_avx_glue.c b/arch/x86/crypto/cast6_avx_glue.c
index 09f3677393e4..0160f68a57ff 100644
--- a/arch/x86/crypto/cast6_avx_glue.c
+++ b/arch/x86/crypto/cast6_avx_glue.c
@@ -611,4 +611,4 @@ module_exit(cast6_exit);
 
 MODULE_DESCRIPTION("Cast6 Cipher Algorithm, AVX optimized");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("cast6");
+MODULE_ALIAS_CRYPTO("cast6");
diff --git a/arch/x86/crypto/crc32-pclmul_glue.c b/arch/x86/crypto/crc32-pclmul_glue.c
index 9d014a74ef96..1937fc1d8763 100644
--- a/arch/x86/crypto/crc32-pclmul_glue.c
+++ b/arch/x86/crypto/crc32-pclmul_glue.c
@@ -197,5 +197,5 @@ module_exit(crc32_pclmul_mod_fini);
 MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
 MODULE_LICENSE("GPL");
 
-MODULE_ALIAS("crc32");
-MODULE_ALIAS("crc32-pclmul");
+MODULE_ALIAS_CRYPTO("crc32");
+MODULE_ALIAS_CRYPTO("crc32-pclmul");
diff --git a/arch/x86/crypto/crc32c-intel_glue.c b/arch/x86/crypto/crc32c-intel_glue.c
index 6812ad98355c..28640c3d6af7 100644
--- a/arch/x86/crypto/crc32c-intel_glue.c
+++ b/arch/x86/crypto/crc32c-intel_glue.c
@@ -280,5 +280,5 @@ MODULE_AUTHOR("Austin Zhang <austin.zhang@intel.com>, Kent Liu <kent.liu@intel.c
 MODULE_DESCRIPTION("CRC32c (Castagnoli) optimization using Intel Hardware.");
 MODULE_LICENSE("GPL");
 
-MODULE_ALIAS("crc32c");
-MODULE_ALIAS("crc32c-intel");
+MODULE_ALIAS_CRYPTO("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c-intel");
diff --git a/arch/x86/crypto/crct10dif-pclmul_glue.c b/arch/x86/crypto/crct10dif-pclmul_glue.c
index 7845d7fd54c0..b6c67bf30fdf 100644
--- a/arch/x86/crypto/crct10dif-pclmul_glue.c
+++ b/arch/x86/crypto/crct10dif-pclmul_glue.c
@@ -147,5 +147,5 @@ MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
 MODULE_DESCRIPTION("T10 DIF CRC calculation accelerated with PCLMULQDQ.");
 MODULE_LICENSE("GPL");
 
-MODULE_ALIAS("crct10dif");
-MODULE_ALIAS("crct10dif-pclmul");
+MODULE_ALIAS_CRYPTO("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif-pclmul");
diff --git a/arch/x86/crypto/des3_ede_glue.c b/arch/x86/crypto/des3_ede_glue.c
index 0e9c0668fe4e..38a14f818ef1 100644
--- a/arch/x86/crypto/des3_ede_glue.c
+++ b/arch/x86/crypto/des3_ede_glue.c
@@ -502,8 +502,8 @@ module_exit(des3_ede_x86_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Triple DES EDE Cipher Algorithm, asm optimized");
-MODULE_ALIAS("des3_ede");
-MODULE_ALIAS("des3_ede-asm");
-MODULE_ALIAS("des");
-MODULE_ALIAS("des-asm");
+MODULE_ALIAS_CRYPTO("des3_ede");
+MODULE_ALIAS_CRYPTO("des3_ede-asm");
+MODULE_ALIAS_CRYPTO("des");
+MODULE_ALIAS_CRYPTO("des-asm");
 MODULE_AUTHOR("Jussi Kivilinna <jussi.kivilinna@iki.fi>");
diff --git a/arch/x86/crypto/ghash-clmulni-intel_glue.c b/arch/x86/crypto/ghash-clmulni-intel_glue.c
index 88bb7ba8b175..8253d85aa165 100644
--- a/arch/x86/crypto/ghash-clmulni-intel_glue.c
+++ b/arch/x86/crypto/ghash-clmulni-intel_glue.c
@@ -341,4 +341,4 @@ module_exit(ghash_pclmulqdqni_mod_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("GHASH Message Digest Algorithm, "
 		   "acclerated by PCLMULQDQ-NI");
-MODULE_ALIAS("ghash");
+MODULE_ALIAS_CRYPTO("ghash");
diff --git a/arch/x86/crypto/salsa20_glue.c b/arch/x86/crypto/salsa20_glue.c
index 5e8e67739bb5..399a29d067d6 100644
--- a/arch/x86/crypto/salsa20_glue.c
+++ b/arch/x86/crypto/salsa20_glue.c
@@ -119,5 +119,5 @@ module_exit(fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm (optimized assembly version)");
-MODULE_ALIAS("salsa20");
-MODULE_ALIAS("salsa20-asm");
+MODULE_ALIAS_CRYPTO("salsa20");
+MODULE_ALIAS_CRYPTO("salsa20-asm");
diff --git a/arch/x86/crypto/serpent_avx2_glue.c b/arch/x86/crypto/serpent_avx2_glue.c
index 2fae489b1524..437e47a4d302 100644
--- a/arch/x86/crypto/serpent_avx2_glue.c
+++ b/arch/x86/crypto/serpent_avx2_glue.c
@@ -558,5 +558,5 @@ module_exit(fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX2 optimized");
-MODULE_ALIAS("serpent");
-MODULE_ALIAS("serpent-asm");
+MODULE_ALIAS_CRYPTO("serpent");
+MODULE_ALIAS_CRYPTO("serpent-asm");
diff --git a/arch/x86/crypto/serpent_avx_glue.c b/arch/x86/crypto/serpent_avx_glue.c
index ff4870870972..7e217398b4eb 100644
--- a/arch/x86/crypto/serpent_avx_glue.c
+++ b/arch/x86/crypto/serpent_avx_glue.c
@@ -617,4 +617,4 @@ module_exit(serpent_exit);
 
 MODULE_DESCRIPTION("Serpent Cipher Algorithm, AVX optimized");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("serpent");
+MODULE_ALIAS_CRYPTO("serpent");
diff --git a/arch/x86/crypto/serpent_sse2_glue.c b/arch/x86/crypto/serpent_sse2_glue.c
index 8c95f8637306..bf025adaea01 100644
--- a/arch/x86/crypto/serpent_sse2_glue.c
+++ b/arch/x86/crypto/serpent_sse2_glue.c
@@ -618,4 +618,4 @@ module_exit(serpent_sse2_exit);
 
 MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("serpent");
+MODULE_ALIAS_CRYPTO("serpent");
diff --git a/arch/x86/crypto/sha1_ssse3_glue.c b/arch/x86/crypto/sha1_ssse3_glue.c
index 74d16ef707c7..6c20fe04a738 100644
--- a/arch/x86/crypto/sha1_ssse3_glue.c
+++ b/arch/x86/crypto/sha1_ssse3_glue.c
@@ -278,4 +278,4 @@ module_exit(sha1_ssse3_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm, Supplemental SSE3 accelerated");
 
-MODULE_ALIAS("sha1");
+MODULE_ALIAS_CRYPTO("sha1");
diff --git a/arch/x86/crypto/sha256_ssse3_glue.c b/arch/x86/crypto/sha256_ssse3_glue.c
index f248546da1ca..4dc100d82902 100644
--- a/arch/x86/crypto/sha256_ssse3_glue.c
+++ b/arch/x86/crypto/sha256_ssse3_glue.c
@@ -318,5 +318,5 @@ module_exit(sha256_ssse3_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA256 Secure Hash Algorithm, Supplemental SSE3 accelerated");
 
-MODULE_ALIAS("sha256");
-MODULE_ALIAS("sha224");
+MODULE_ALIAS_CRYPTO("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
diff --git a/arch/x86/crypto/sha512_ssse3_glue.c b/arch/x86/crypto/sha512_ssse3_glue.c
index 8626b03e83b7..26a5898a6f26 100644
--- a/arch/x86/crypto/sha512_ssse3_glue.c
+++ b/arch/x86/crypto/sha512_ssse3_glue.c
@@ -326,5 +326,5 @@ module_exit(sha512_ssse3_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA512 Secure Hash Algorithm, Supplemental SSE3 accelerated");
 
-MODULE_ALIAS("sha512");
-MODULE_ALIAS("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
+MODULE_ALIAS_CRYPTO("sha384");
diff --git a/arch/x86/crypto/twofish_avx_glue.c b/arch/x86/crypto/twofish_avx_glue.c
index 4e3c665be129..1ac531ea9bcc 100644
--- a/arch/x86/crypto/twofish_avx_glue.c
+++ b/arch/x86/crypto/twofish_avx_glue.c
@@ -579,4 +579,4 @@ module_exit(twofish_exit);
 
 MODULE_DESCRIPTION("Twofish Cipher Algorithm, AVX optimized");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("twofish");
+MODULE_ALIAS_CRYPTO("twofish");
diff --git a/arch/x86/crypto/twofish_glue.c b/arch/x86/crypto/twofish_glue.c
index 0a5202303501..77e06c2da83d 100644
--- a/arch/x86/crypto/twofish_glue.c
+++ b/arch/x86/crypto/twofish_glue.c
@@ -96,5 +96,5 @@ module_exit(fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION ("Twofish Cipher Algorithm, asm optimized");
-MODULE_ALIAS("twofish");
-MODULE_ALIAS("twofish-asm");
+MODULE_ALIAS_CRYPTO("twofish");
+MODULE_ALIAS_CRYPTO("twofish-asm");
diff --git a/arch/x86/crypto/twofish_glue_3way.c b/arch/x86/crypto/twofish_glue_3way.c
index 13e63b3e1dfb..56d8a08ee479 100644
--- a/arch/x86/crypto/twofish_glue_3way.c
+++ b/arch/x86/crypto/twofish_glue_3way.c
@@ -495,5 +495,5 @@ module_exit(fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized");
-MODULE_ALIAS("twofish");
-MODULE_ALIAS("twofish-asm");
+MODULE_ALIAS_CRYPTO("twofish");
+MODULE_ALIAS_CRYPTO("twofish-asm");
diff --git a/crypto/842.c b/crypto/842.c
index 65c7a89cfa09..b48f4f108c47 100644
--- a/crypto/842.c
+++ b/crypto/842.c
@@ -180,3 +180,4 @@ module_exit(nx842_mod_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("842 Compression Algorithm");
+MODULE_ALIAS_CRYPTO("842");
diff --git a/crypto/aes_generic.c b/crypto/aes_generic.c
index fd0d6b454975..9b3c54c1cbe8 100644
--- a/crypto/aes_generic.c
+++ b/crypto/aes_generic.c
@@ -1474,4 +1474,4 @@ module_exit(aes_fini);
 
 MODULE_DESCRIPTION("Rijndael (AES) Cipher Algorithm");
 MODULE_LICENSE("Dual BSD/GPL");
-MODULE_ALIAS("aes");
+MODULE_ALIAS_CRYPTO("aes");
diff --git a/crypto/ansi_cprng.c b/crypto/ansi_cprng.c
index 666f1962a160..b4485a108389 100644
--- a/crypto/ansi_cprng.c
+++ b/crypto/ansi_cprng.c
@@ -476,4 +476,4 @@ module_param(dbg, int, 0);
 MODULE_PARM_DESC(dbg, "Boolean to enable debugging (0/1 == off/on)");
 module_init(prng_mod_init);
 module_exit(prng_mod_fini);
-MODULE_ALIAS("stdrng");
+MODULE_ALIAS_CRYPTO("stdrng");
diff --git a/crypto/anubis.c b/crypto/anubis.c
index 008c8a4fb67c..4bb187c2a902 100644
--- a/crypto/anubis.c
+++ b/crypto/anubis.c
@@ -704,3 +704,4 @@ module_exit(anubis_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Anubis Cryptographic Algorithm");
+MODULE_ALIAS_CRYPTO("anubis");
diff --git a/crypto/api.c b/crypto/api.c
index a2b39c5f3649..2a81e98a0021 100644
--- a/crypto/api.c
+++ b/crypto/api.c
@@ -216,11 +216,11 @@ struct crypto_alg *crypto_larval_lookup(const char *name, u32 type, u32 mask)
 
 	alg = crypto_alg_lookup(name, type, mask);
 	if (!alg) {
-		request_module("%s", name);
+		request_module("crypto-%s", name);
 
 		if (!((type ^ CRYPTO_ALG_NEED_FALLBACK) & mask &
 		      CRYPTO_ALG_NEED_FALLBACK))
-			request_module("%s-all", name);
+			request_module("crypto-%s-all", name);
 
 		alg = crypto_alg_lookup(name, type, mask);
 	}
diff --git a/crypto/arc4.c b/crypto/arc4.c
index 5a772c3657d5..f1a81925558f 100644
--- a/crypto/arc4.c
+++ b/crypto/arc4.c
@@ -166,3 +166,4 @@ module_exit(arc4_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("ARC4 Cipher Algorithm");
 MODULE_AUTHOR("Jon Oberheide <jon@oberheide.org>");
+MODULE_ALIAS_CRYPTO("arc4");
diff --git a/crypto/blowfish_generic.c b/crypto/blowfish_generic.c
index 8baf5447d35b..7bd71f02d0dd 100644
--- a/crypto/blowfish_generic.c
+++ b/crypto/blowfish_generic.c
@@ -138,4 +138,4 @@ module_exit(blowfish_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Blowfish Cipher Algorithm");
-MODULE_ALIAS("blowfish");
+MODULE_ALIAS_CRYPTO("blowfish");
diff --git a/crypto/camellia_generic.c b/crypto/camellia_generic.c
index 26bcd7a2d6b4..1b74c5a3e891 100644
--- a/crypto/camellia_generic.c
+++ b/crypto/camellia_generic.c
@@ -1098,4 +1098,4 @@ module_exit(camellia_fini);
 
 MODULE_DESCRIPTION("Camellia Cipher Algorithm");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("camellia");
+MODULE_ALIAS_CRYPTO("camellia");
diff --git a/crypto/cast5_generic.c b/crypto/cast5_generic.c
index 5558f630a0eb..84c86db67ec7 100644
--- a/crypto/cast5_generic.c
+++ b/crypto/cast5_generic.c
@@ -549,4 +549,4 @@ module_exit(cast5_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cast5 Cipher Algorithm");
-MODULE_ALIAS("cast5");
+MODULE_ALIAS_CRYPTO("cast5");
diff --git a/crypto/cast6_generic.c b/crypto/cast6_generic.c
index de732528a430..f408f0bd8de2 100644
--- a/crypto/cast6_generic.c
+++ b/crypto/cast6_generic.c
@@ -291,4 +291,4 @@ module_exit(cast6_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Cast6 Cipher Algorithm");
-MODULE_ALIAS("cast6");
+MODULE_ALIAS_CRYPTO("cast6");
diff --git a/crypto/ccm.c b/crypto/ccm.c
index 1df84217f7c9..647575b41281 100644
--- a/crypto/ccm.c
+++ b/crypto/ccm.c
@@ -879,5 +879,5 @@ module_exit(crypto_ccm_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Counter with CBC MAC");
-MODULE_ALIAS("ccm_base");
-MODULE_ALIAS("rfc4309");
+MODULE_ALIAS_CRYPTO("ccm_base");
+MODULE_ALIAS_CRYPTO("rfc4309");
diff --git a/crypto/crc32.c b/crypto/crc32.c
index 9d1c41569898..187ded28cb0b 100644
--- a/crypto/crc32.c
+++ b/crypto/crc32.c
@@ -156,3 +156,4 @@ module_exit(crc32_mod_fini);
 MODULE_AUTHOR("Alexander Boyko <alexander_boyko@xyratex.com>");
 MODULE_DESCRIPTION("CRC32 calculations wrapper for lib/crc32");
 MODULE_LICENSE("GPL");
+MODULE_ALIAS_CRYPTO("crc32");
diff --git a/crypto/crc32c_generic.c b/crypto/crc32c_generic.c
index d9c7beba8e50..2a062025749d 100644
--- a/crypto/crc32c_generic.c
+++ b/crypto/crc32c_generic.c
@@ -170,5 +170,5 @@ module_exit(crc32c_mod_fini);
 MODULE_AUTHOR("Clay Haapala <chaapala@cisco.com>");
 MODULE_DESCRIPTION("CRC32c (Castagnoli) calculations wrapper for lib/crc32c");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("crc32c");
+MODULE_ALIAS_CRYPTO("crc32c");
 MODULE_SOFTDEP("pre: crc32c");
diff --git a/crypto/crct10dif_generic.c b/crypto/crct10dif_generic.c
index 877e7114ec5c..08bb4f504520 100644
--- a/crypto/crct10dif_generic.c
+++ b/crypto/crct10dif_generic.c
@@ -124,4 +124,4 @@ module_exit(crct10dif_mod_fini);
 MODULE_AUTHOR("Tim Chen <tim.c.chen@linux.intel.com>");
 MODULE_DESCRIPTION("T10 DIF CRC calculation.");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("crct10dif");
+MODULE_ALIAS_CRYPTO("crct10dif");
diff --git a/crypto/crypto_null.c b/crypto/crypto_null.c
index 1dc54bb95a87..a20319132e33 100644
--- a/crypto/crypto_null.c
+++ b/crypto/crypto_null.c
@@ -145,9 +145,9 @@ static struct crypto_alg null_algs[3] = { {
 	.coa_decompress		=	null_compress } }
 } };
 
-MODULE_ALIAS("compress_null");
-MODULE_ALIAS("digest_null");
-MODULE_ALIAS("cipher_null");
+MODULE_ALIAS_CRYPTO("compress_null");
+MODULE_ALIAS_CRYPTO("digest_null");
+MODULE_ALIAS_CRYPTO("cipher_null");
 
 static int __init crypto_null_mod_init(void)
 {
diff --git a/crypto/ctr.c b/crypto/ctr.c
index f2b94f27bb2c..3d81ff7e6b48 100644
--- a/crypto/ctr.c
+++ b/crypto/ctr.c
@@ -466,4 +466,4 @@ module_exit(crypto_ctr_module_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("CTR Counter block mode");
-MODULE_ALIAS("rfc3686");
+MODULE_ALIAS_CRYPTO("rfc3686");
diff --git a/crypto/deflate.c b/crypto/deflate.c
index b57d70eb156b..95d8d37c5021 100644
--- a/crypto/deflate.c
+++ b/crypto/deflate.c
@@ -222,4 +222,4 @@ module_exit(deflate_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Deflate Compression Algorithm for IPCOMP");
 MODULE_AUTHOR("James Morris <jmorris@intercode.com.au>");
-
+MODULE_ALIAS_CRYPTO("deflate");
diff --git a/crypto/des_generic.c b/crypto/des_generic.c
index 298d464ab7d2..42912948776b 100644
--- a/crypto/des_generic.c
+++ b/crypto/des_generic.c
@@ -983,7 +983,7 @@ static struct crypto_alg des_algs[2] = { {
 	.cia_decrypt		=	des3_ede_decrypt } }
 } };
 
-MODULE_ALIAS("des3_ede");
+MODULE_ALIAS_CRYPTO("des3_ede");
 
 static int __init des_generic_mod_init(void)
 {
diff --git a/crypto/fcrypt.c b/crypto/fcrypt.c
index 021d7fec6bc8..77286ea28865 100644
--- a/crypto/fcrypt.c
+++ b/crypto/fcrypt.c
@@ -420,3 +420,4 @@ module_exit(fcrypt_mod_fini);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_DESCRIPTION("FCrypt Cipher Algorithm");
 MODULE_AUTHOR("David Howells <dhowells@redhat.com>");
+MODULE_ALIAS_CRYPTO("fcrypt");
diff --git a/crypto/gcm.c b/crypto/gcm.c
index 276cdac567b6..aefb74a3f522 100644
--- a/crypto/gcm.c
+++ b/crypto/gcm.c
@@ -1441,6 +1441,6 @@ module_exit(crypto_gcm_module_exit);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Galois/Counter Mode");
 MODULE_AUTHOR("Mikko Herranen <mh1@iki.fi>");
-MODULE_ALIAS("gcm_base");
-MODULE_ALIAS("rfc4106");
-MODULE_ALIAS("rfc4543");
+MODULE_ALIAS_CRYPTO("gcm_base");
+MODULE_ALIAS_CRYPTO("rfc4106");
+MODULE_ALIAS_CRYPTO("rfc4543");
diff --git a/crypto/ghash-generic.c b/crypto/ghash-generic.c
index 9d3f0c69a86f..4e97fae9666f 100644
--- a/crypto/ghash-generic.c
+++ b/crypto/ghash-generic.c
@@ -172,4 +172,4 @@ module_exit(ghash_mod_exit);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("GHASH Message Digest Algorithm");
-MODULE_ALIAS("ghash");
+MODULE_ALIAS_CRYPTO("ghash");
diff --git a/crypto/khazad.c b/crypto/khazad.c
index 60e7cd66facc..873eb5ded6d7 100644
--- a/crypto/khazad.c
+++ b/crypto/khazad.c
@@ -880,3 +880,4 @@ module_exit(khazad_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Khazad Cryptographic Algorithm");
+MODULE_ALIAS_CRYPTO("khazad");
diff --git a/crypto/krng.c b/crypto/krng.c
index a2d2b72fc135..67c88b331210 100644
--- a/crypto/krng.c
+++ b/crypto/krng.c
@@ -62,4 +62,4 @@ module_exit(krng_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Kernel Random Number Generator");
-MODULE_ALIAS("stdrng");
+MODULE_ALIAS_CRYPTO("stdrng");
diff --git a/crypto/lz4.c b/crypto/lz4.c
index 34d072b72a73..aefbceaf3104 100644
--- a/crypto/lz4.c
+++ b/crypto/lz4.c
@@ -104,3 +104,4 @@ module_exit(lz4_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZ4 Compression Algorithm");
+MODULE_ALIAS_CRYPTO("lz4");
diff --git a/crypto/lz4hc.c b/crypto/lz4hc.c
index 9218b3fed5e3..a1d3b5bd3d85 100644
--- a/crypto/lz4hc.c
+++ b/crypto/lz4hc.c
@@ -104,3 +104,4 @@ module_exit(lz4hc_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZ4HC Compression Algorithm");
+MODULE_ALIAS_CRYPTO("lz4hc");
diff --git a/crypto/lzo.c b/crypto/lzo.c
index a8ff2f778dc4..4b3e92525dac 100644
--- a/crypto/lzo.c
+++ b/crypto/lzo.c
@@ -107,3 +107,4 @@ module_exit(lzo_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("LZO Compression Algorithm");
+MODULE_ALIAS_CRYPTO("lzo");
diff --git a/crypto/md4.c b/crypto/md4.c
index 0477a6a01d58..3515af425cc9 100644
--- a/crypto/md4.c
+++ b/crypto/md4.c
@@ -255,4 +255,4 @@ module_exit(md4_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MD4 Message Digest Algorithm");
-
+MODULE_ALIAS_CRYPTO("md4");
diff --git a/crypto/md5.c b/crypto/md5.c
index 7febeaab923b..36f5e5b103f3 100644
--- a/crypto/md5.c
+++ b/crypto/md5.c
@@ -168,3 +168,4 @@ module_exit(md5_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("MD5 Message Digest Algorithm");
+MODULE_ALIAS_CRYPTO("md5");
diff --git a/crypto/michael_mic.c b/crypto/michael_mic.c
index 079b761bc70d..46195e0d0f4d 100644
--- a/crypto/michael_mic.c
+++ b/crypto/michael_mic.c
@@ -184,3 +184,4 @@ module_exit(michael_mic_exit);
 MODULE_LICENSE("GPL v2");
 MODULE_DESCRIPTION("Michael MIC");
 MODULE_AUTHOR("Jouni Malinen <j@w1.fi>");
+MODULE_ALIAS_CRYPTO("michael_mic");
diff --git a/crypto/rmd128.c b/crypto/rmd128.c
index 8a0f68b7f257..049486ede938 100644
--- a/crypto/rmd128.c
+++ b/crypto/rmd128.c
@@ -327,3 +327,4 @@ module_exit(rmd128_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
 MODULE_DESCRIPTION("RIPEMD-128 Message Digest");
+MODULE_ALIAS_CRYPTO("rmd128");
diff --git a/crypto/rmd160.c b/crypto/rmd160.c
index 525d7bb752cf..de585e51d455 100644
--- a/crypto/rmd160.c
+++ b/crypto/rmd160.c
@@ -371,3 +371,4 @@ module_exit(rmd160_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
 MODULE_DESCRIPTION("RIPEMD-160 Message Digest");
+MODULE_ALIAS_CRYPTO("rmd160");
diff --git a/crypto/rmd256.c b/crypto/rmd256.c
index 69293d9b56e0..4ec02a754e09 100644
--- a/crypto/rmd256.c
+++ b/crypto/rmd256.c
@@ -346,3 +346,4 @@ module_exit(rmd256_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
 MODULE_DESCRIPTION("RIPEMD-256 Message Digest");
+MODULE_ALIAS_CRYPTO("rmd256");
diff --git a/crypto/rmd320.c b/crypto/rmd320.c
index 09f97dfdfbba..770f2cb369f8 100644
--- a/crypto/rmd320.c
+++ b/crypto/rmd320.c
@@ -395,3 +395,4 @@ module_exit(rmd320_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Adrian-Ken Rueegsegger <ken@codelabs.ch>");
 MODULE_DESCRIPTION("RIPEMD-320 Message Digest");
+MODULE_ALIAS_CRYPTO("rmd320");
diff --git a/crypto/salsa20_generic.c b/crypto/salsa20_generic.c
index 9a4770c02284..3d0f9df30ac9 100644
--- a/crypto/salsa20_generic.c
+++ b/crypto/salsa20_generic.c
@@ -248,4 +248,4 @@ module_exit(salsa20_generic_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION ("Salsa20 stream cipher algorithm");
-MODULE_ALIAS("salsa20");
+MODULE_ALIAS_CRYPTO("salsa20");
diff --git a/crypto/seed.c b/crypto/seed.c
index 9c904d6d2151..c6ba8438be43 100644
--- a/crypto/seed.c
+++ b/crypto/seed.c
@@ -476,3 +476,4 @@ module_exit(seed_fini);
 MODULE_DESCRIPTION("SEED Cipher Algorithm");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Hye-Shik Chang <perky@FreeBSD.org>, Kim Hyun <hkim@kisa.or.kr>");
+MODULE_ALIAS_CRYPTO("seed");
diff --git a/crypto/serpent_generic.c b/crypto/serpent_generic.c
index 7ddbd7e88859..a53b5e2af335 100644
--- a/crypto/serpent_generic.c
+++ b/crypto/serpent_generic.c
@@ -665,5 +665,5 @@ module_exit(serpent_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Serpent and tnepres (kerneli compatible serpent reversed) Cipher Algorithm");
 MODULE_AUTHOR("Dag Arne Osvik <osvik@ii.uib.no>");
-MODULE_ALIAS("tnepres");
-MODULE_ALIAS("serpent");
+MODULE_ALIAS_CRYPTO("tnepres");
+MODULE_ALIAS_CRYPTO("serpent");
diff --git a/crypto/sha1_generic.c b/crypto/sha1_generic.c
index 7bb047432782..039e58cfa155 100644
--- a/crypto/sha1_generic.c
+++ b/crypto/sha1_generic.c
@@ -153,4 +153,4 @@ module_exit(sha1_generic_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA1 Secure Hash Algorithm");
 
-MODULE_ALIAS("sha1");
+MODULE_ALIAS_CRYPTO("sha1");
diff --git a/crypto/sha256_generic.c b/crypto/sha256_generic.c
index 65e7b76b057f..5eb21b120033 100644
--- a/crypto/sha256_generic.c
+++ b/crypto/sha256_generic.c
@@ -384,5 +384,5 @@ module_exit(sha256_generic_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA-224 and SHA-256 Secure Hash Algorithm");
 
-MODULE_ALIAS("sha224");
-MODULE_ALIAS("sha256");
+MODULE_ALIAS_CRYPTO("sha224");
+MODULE_ALIAS_CRYPTO("sha256");
diff --git a/crypto/sha512_generic.c b/crypto/sha512_generic.c
index 95db67197cd9..8d0b19ed4f4b 100644
--- a/crypto/sha512_generic.c
+++ b/crypto/sha512_generic.c
@@ -288,5 +288,5 @@ module_exit(sha512_generic_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("SHA-512 and SHA-384 Secure Hash Algorithms");
 
-MODULE_ALIAS("sha384");
-MODULE_ALIAS("sha512");
+MODULE_ALIAS_CRYPTO("sha384");
+MODULE_ALIAS_CRYPTO("sha512");
diff --git a/crypto/tea.c b/crypto/tea.c
index 0a572323ee4a..495be2d0077d 100644
--- a/crypto/tea.c
+++ b/crypto/tea.c
@@ -270,8 +270,8 @@ static void __exit tea_mod_fini(void)
 	crypto_unregister_algs(tea_algs, ARRAY_SIZE(tea_algs));
 }
 
-MODULE_ALIAS("xtea");
-MODULE_ALIAS("xeta");
+MODULE_ALIAS_CRYPTO("xtea");
+MODULE_ALIAS_CRYPTO("xeta");
 
 module_init(tea_mod_init);
 module_exit(tea_mod_fini);
diff --git a/crypto/tgr192.c b/crypto/tgr192.c
index 3c7af0d1ff7a..6e5651c66cf8 100644
--- a/crypto/tgr192.c
+++ b/crypto/tgr192.c
@@ -676,8 +676,8 @@ static void __exit tgr192_mod_fini(void)
 	crypto_unregister_shashes(tgr_algs, ARRAY_SIZE(tgr_algs));
 }
 
-MODULE_ALIAS("tgr160");
-MODULE_ALIAS("tgr128");
+MODULE_ALIAS_CRYPTO("tgr160");
+MODULE_ALIAS_CRYPTO("tgr128");
 
 module_init(tgr192_mod_init);
 module_exit(tgr192_mod_fini);
diff --git a/crypto/twofish_generic.c b/crypto/twofish_generic.c
index 2d5000552d0f..523ad8c4e359 100644
--- a/crypto/twofish_generic.c
+++ b/crypto/twofish_generic.c
@@ -211,4 +211,4 @@ module_exit(twofish_mod_fini);
 
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION ("Twofish Cipher Algorithm");
-MODULE_ALIAS("twofish");
+MODULE_ALIAS_CRYPTO("twofish");
diff --git a/crypto/wp512.c b/crypto/wp512.c
index ec64e7762fbb..0de42eb3d040 100644
--- a/crypto/wp512.c
+++ b/crypto/wp512.c
@@ -1167,8 +1167,8 @@ static void __exit wp512_mod_fini(void)
 	crypto_unregister_shashes(wp_algs, ARRAY_SIZE(wp_algs));
 }
 
-MODULE_ALIAS("wp384");
-MODULE_ALIAS("wp256");
+MODULE_ALIAS_CRYPTO("wp384");
+MODULE_ALIAS_CRYPTO("wp256");
 
 module_init(wp512_mod_init);
 module_exit(wp512_mod_fini);
diff --git a/crypto/zlib.c b/crypto/zlib.c
index c9ee681d57fd..0eefa9d237ac 100644
--- a/crypto/zlib.c
+++ b/crypto/zlib.c
@@ -378,3 +378,4 @@ module_exit(zlib_mod_fini);
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("Zlib Compression Algorithm");
 MODULE_AUTHOR("Sony Corporation");
+MODULE_ALIAS_CRYPTO("zlib");
diff --git a/drivers/crypto/padlock-aes.c b/drivers/crypto/padlock-aes.c
index 633ba945e153..c178ed8c3908 100644
--- a/drivers/crypto/padlock-aes.c
+++ b/drivers/crypto/padlock-aes.c
@@ -563,4 +563,4 @@ MODULE_DESCRIPTION("VIA PadLock AES algorithm support");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Ludvig");
 
-MODULE_ALIAS("aes");
+MODULE_ALIAS_CRYPTO("aes");
diff --git a/drivers/crypto/padlock-sha.c b/drivers/crypto/padlock-sha.c
index bace885634f2..95f7d27ce491 100644
--- a/drivers/crypto/padlock-sha.c
+++ b/drivers/crypto/padlock-sha.c
@@ -593,7 +593,7 @@ MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
 MODULE_LICENSE("GPL");
 MODULE_AUTHOR("Michal Ludvig");
 
-MODULE_ALIAS("sha1-all");
-MODULE_ALIAS("sha256-all");
-MODULE_ALIAS("sha1-padlock");
-MODULE_ALIAS("sha256-padlock");
+MODULE_ALIAS_CRYPTO("sha1-all");
+MODULE_ALIAS_CRYPTO("sha256-all");
+MODULE_ALIAS_CRYPTO("sha1-padlock");
+MODULE_ALIAS_CRYPTO("sha256-padlock");
diff --git a/drivers/crypto/qat/qat_common/adf_ctl_drv.c b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
index 244d73378f0e..7ee93f881db6 100644
--- a/drivers/crypto/qat/qat_common/adf_ctl_drv.c
+++ b/drivers/crypto/qat/qat_common/adf_ctl_drv.c
@@ -52,6 +52,7 @@
 #include <linux/pci.h>
 #include <linux/cdev.h>
 #include <linux/uaccess.h>
+#include <linux/crypto.h>
 
 #include "adf_accel_devices.h"
 #include "adf_common_drv.h"
@@ -487,4 +488,4 @@ module_exit(adf_unregister_ctl_device_driver);
 MODULE_LICENSE("Dual BSD/GPL");
 MODULE_AUTHOR("Intel");
 MODULE_DESCRIPTION("Intel(R) QuickAssist Technology");
-MODULE_ALIAS("intel_qat");
+MODULE_ALIAS_CRYPTO("intel_qat");
diff --git a/drivers/crypto/ux500/cryp/cryp_core.c b/drivers/crypto/ux500/cryp/cryp_core.c
index 83811aab207e..7c035de9055e 100644
--- a/drivers/crypto/ux500/cryp/cryp_core.c
+++ b/drivers/crypto/ux500/cryp/cryp_core.c
@@ -1812,7 +1812,7 @@ module_exit(ux500_cryp_mod_fini);
 module_param(cryp_mode, int, 0);
 
 MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 CRYP crypto engine.");
-MODULE_ALIAS("aes-all");
-MODULE_ALIAS("des-all");
+MODULE_ALIAS_CRYPTO("aes-all");
+MODULE_ALIAS_CRYPTO("des-all");
 
 MODULE_LICENSE("GPL");
diff --git a/drivers/crypto/ux500/hash/hash_core.c b/drivers/crypto/ux500/hash/hash_core.c
index 5af53b5508b0..76ecc8d143d0 100644
--- a/drivers/crypto/ux500/hash/hash_core.c
+++ b/drivers/crypto/ux500/hash/hash_core.c
@@ -1997,7 +1997,7 @@ module_exit(ux500_hash_mod_fini);
 MODULE_DESCRIPTION("Driver for ST-Ericsson UX500 HASH engine.");
 MODULE_LICENSE("GPL");
 
-MODULE_ALIAS("sha1-all");
-MODULE_ALIAS("sha256-all");
-MODULE_ALIAS("hmac-sha1-all");
-MODULE_ALIAS("hmac-sha256-all");
+MODULE_ALIAS_CRYPTO("sha1-all");
+MODULE_ALIAS_CRYPTO("sha256-all");
+MODULE_ALIAS_CRYPTO("hmac-sha1-all");
+MODULE_ALIAS_CRYPTO("hmac-sha256-all");
diff --git a/drivers/s390/crypto/ap_bus.c b/drivers/s390/crypto/ap_bus.c
index 99485415dcc2..91e97ec01418 100644
--- a/drivers/s390/crypto/ap_bus.c
+++ b/drivers/s390/crypto/ap_bus.c
@@ -44,6 +44,7 @@
 #include <linux/hrtimer.h>
 #include <linux/ktime.h>
 #include <asm/facility.h>
+#include <linux/crypto.h>
 
 #include "ap_bus.h"
 
@@ -71,7 +72,7 @@ MODULE_AUTHOR("IBM Corporation");
 MODULE_DESCRIPTION("Adjunct Processor Bus driver, " \
 		   "Copyright IBM Corp. 2006, 2012");
 MODULE_LICENSE("GPL");
-MODULE_ALIAS("z90crypt");
+MODULE_ALIAS_CRYPTO("z90crypt");
 
 /*
  * Module parameter
diff --git a/include/linux/crypto.h b/include/linux/crypto.h
index 208a63290b23..9c8776d0ada8 100644
--- a/include/linux/crypto.h
+++ b/include/linux/crypto.h
@@ -25,6 +25,19 @@
 #include <linux/string.h>
 #include <linux/uaccess.h>
 
+/*
+ * Autoloaded crypto modules should only use a prefixed name to avoid allowing
+ * arbitrary modules to be loaded. Loading from userspace may still need the
+ * unprefixed names, so retains those aliases as well.
+ * This uses __MODULE_INFO directly instead of MODULE_ALIAS because pre-4.3
+ * gcc (e.g. avr32 toolchain) uses __LINE__ for uniqueness, and this macro
+ * expands twice on the same line. Instead, use a separate base name for the
+ * alias.
+ */
+#define MODULE_ALIAS_CRYPTO(name)	\
+		__MODULE_INFO(alias, alias_userspace, name);	\
+		__MODULE_INFO(alias, alias_crypto, "crypto-" name)
+
 /*
  * Algorithm masks and types.
  */
-- 
cgit v1.2.3


From 394ffa503bc40e32d7f54a9b817264e81ce131b4 Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Mon, 24 Nov 2014 11:05:22 +0800
Subject: blk: introduce generic io stat accounting help function

Many block drivers accounting io stat based on bio (e.g. NVMe...),
the blk_account_io_start/end() which is based on request
does not make sense to them, so here we introduce the similar help
function named generic_start/end_io_acct base on raw sectors, and it can
simplify some driver's open io accounting code.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: Jens Axboe <axboe@fb.com>
---
 block/bio.c         | 28 ++++++++++++++++++++++++++++
 include/linux/bio.h |  5 +++++
 2 files changed, 33 insertions(+)

(limited to 'include/linux')

diff --git a/block/bio.c b/block/bio.c
index 3e6e1986a5b2..3d4a072375ef 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -1739,6 +1739,34 @@ void bio_check_pages_dirty(struct bio *bio)
 	}
 }
 
+void generic_start_io_acct(int rw, unsigned long sectors,
+			   struct hd_struct *part)
+{
+	int cpu = part_stat_lock();
+
+	part_round_stats(cpu, part);
+	part_stat_inc(cpu, part, ios[rw]);
+	part_stat_add(cpu, part, sectors[rw], sectors);
+	part_inc_in_flight(part, rw);
+
+	part_stat_unlock();
+}
+EXPORT_SYMBOL(generic_start_io_acct);
+
+void generic_end_io_acct(int rw, struct hd_struct *part,
+			 unsigned long start_time)
+{
+	unsigned long duration = jiffies - start_time;
+	int cpu = part_stat_lock();
+
+	part_stat_add(cpu, part, ticks[rw], duration);
+	part_round_stats(cpu, part);
+	part_dec_in_flight(part, rw);
+
+	part_stat_unlock();
+}
+EXPORT_SYMBOL(generic_end_io_acct);
+
 #if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 void bio_flush_dcache_pages(struct bio *bi)
 {
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 7347f486ceca..efead0b532c4 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -443,6 +443,11 @@ extern struct bio *bio_copy_kern(struct request_queue *, void *, unsigned int,
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
+void generic_start_io_acct(int rw, unsigned long sectors,
+			   struct hd_struct *part);
+void generic_end_io_acct(int rw, struct hd_struct *part,
+			 unsigned long start_time);
+
 #ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE
 # error	"You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform"
 #endif
-- 
cgit v1.2.3


From a1c8a5512b7cddc81767172f0de37b155cea039f Mon Sep 17 00:00:00 2001
From: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Date: Mon, 24 Nov 2014 14:10:52 +0000
Subject: regulator: core: Add PRE_DISABLE notification

Add a PRE_DISABLE notification so that consumers can use a
notifier to run any steps required to prepare for the
regulator being switched off. Since the regulator disable
can fail an abort notification is also added.

Signed-off-by: Richard Fitzgerald <rf@opensource.wolfsonmicro.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/core.c           | 16 ++++++++++++++++
 include/linux/regulator/consumer.h |  4 ++++
 2 files changed, 20 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/regulator/core.c b/drivers/regulator/core.c
index cd87c0c37034..53de911a0954 100644
--- a/drivers/regulator/core.c
+++ b/drivers/regulator/core.c
@@ -1976,9 +1976,18 @@ static int _regulator_disable(struct regulator_dev *rdev)
 
 		/* we are last user */
 		if (_regulator_can_change_status(rdev)) {
+			ret = _notifier_call_chain(rdev,
+						   REGULATOR_EVENT_PRE_DISABLE,
+						   NULL);
+			if (ret & NOTIFY_STOP_MASK)
+				return -EINVAL;
+
 			ret = _regulator_do_disable(rdev);
 			if (ret < 0) {
 				rdev_err(rdev, "failed to disable\n");
+				_notifier_call_chain(rdev,
+						REGULATOR_EVENT_ABORT_DISABLE,
+						NULL);
 				return ret;
 			}
 			_notifier_call_chain(rdev, REGULATOR_EVENT_DISABLE,
@@ -2035,9 +2044,16 @@ static int _regulator_force_disable(struct regulator_dev *rdev)
 {
 	int ret = 0;
 
+	ret = _notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE |
+			REGULATOR_EVENT_PRE_DISABLE, NULL);
+	if (ret & NOTIFY_STOP_MASK)
+		return -EINVAL;
+
 	ret = _regulator_do_disable(rdev);
 	if (ret < 0) {
 		rdev_err(rdev, "failed to force disable\n");
+		_notifier_call_chain(rdev, REGULATOR_EVENT_FORCE_DISABLE |
+				REGULATOR_EVENT_ABORT_DISABLE, NULL);
 		return ret;
 	}
 
diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h
index d347c805f923..9efddd2a63ee 100644
--- a/include/linux/regulator/consumer.h
+++ b/include/linux/regulator/consumer.h
@@ -99,6 +99,8 @@ struct regmap;
  *                      Data passed is "struct pre_voltage_change_data"
  * ABORT_VOLTAGE_CHANGE Regulator voltage change failed for some reason.
  *                      Data passed is old voltage cast to (void *).
+ * PRE_DISABLE    Regulator is about to be disabled
+ * ABORT_DISABLE  Regulator disable failed for some reason
  *
  * NOTE: These events can be OR'ed together when passed into handler.
  */
@@ -113,6 +115,8 @@ struct regmap;
 #define REGULATOR_EVENT_DISABLE 		0x80
 #define REGULATOR_EVENT_PRE_VOLTAGE_CHANGE	0x100
 #define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE	0x200
+#define REGULATOR_EVENT_PRE_DISABLE		0x400
+#define REGULATOR_EVENT_ABORT_DISABLE		0x800
 
 /**
  * struct pre_voltage_change_data - Data sent with PRE_VOLTAGE_CHANGE event
-- 
cgit v1.2.3


From c9eab58f6466cef3d9cd760a96e4de5e060e5195 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 24 Nov 2014 15:27:17 +0100
Subject: KVM: x86: move device assignment out of kvm_host.h

Create a new header, and hide the device assignment functions there.
Move struct kvm_assigned_dev_kernel to assigned-dev.c by modifying
arch/x86/kvm/iommu.c to take a PCI device struct.

Based on a patch by Radim Krcmar <rkrcmark@redhat.com>.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 23 -----------------------
 arch/x86/kvm/assigned-dev.c     | 30 ++++++++++++++++++++++++++++--
 arch/x86/kvm/assigned-dev.h     | 32 ++++++++++++++++++++++++++++++++
 arch/x86/kvm/iommu.c            | 11 +++--------
 arch/x86/kvm/x86.c              |  1 +
 include/linux/kvm_host.h        | 29 -----------------------------
 6 files changed, 64 insertions(+), 62 deletions(-)
 create mode 100644 arch/x86/kvm/assigned-dev.h

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index d549cf8bfb69..76ff3e2d8fd2 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -1112,27 +1112,4 @@ int kvm_pmu_read_pmc(struct kvm_vcpu *vcpu, unsigned pmc, u64 *data);
 void kvm_handle_pmu_event(struct kvm_vcpu *vcpu);
 void kvm_deliver_pmi(struct kvm_vcpu *vcpu);
 
-#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
-int kvm_iommu_map_guest(struct kvm *kvm);
-int kvm_iommu_unmap_guest(struct kvm *kvm);
-
-long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-				  unsigned long arg);
-
-void kvm_free_all_assigned_devices(struct kvm *kvm);
-#else
-static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
-{
-	return 0;
-}
-
-static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
-						unsigned long arg)
-{
-	return -ENOTTY;
-}
-
-static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
-#endif
-
 #endif /* _ASM_X86_KVM_HOST_H */
diff --git a/arch/x86/kvm/assigned-dev.c b/arch/x86/kvm/assigned-dev.c
index e05000e200d2..6eb5c20ee373 100644
--- a/arch/x86/kvm/assigned-dev.c
+++ b/arch/x86/kvm/assigned-dev.c
@@ -20,6 +20,32 @@
 #include <linux/namei.h>
 #include <linux/fs.h>
 #include "irq.h"
+#include "assigned-dev.h"
+
+struct kvm_assigned_dev_kernel {
+	struct kvm_irq_ack_notifier ack_notifier;
+	struct list_head list;
+	int assigned_dev_id;
+	int host_segnr;
+	int host_busnr;
+	int host_devfn;
+	unsigned int entries_nr;
+	int host_irq;
+	bool host_irq_disabled;
+	bool pci_2_3;
+	struct msix_entry *host_msix_entries;
+	int guest_irq;
+	struct msix_entry *guest_msix_entries;
+	unsigned long irq_requested_type;
+	int irq_source_id;
+	int flags;
+	struct pci_dev *dev;
+	struct kvm *kvm;
+	spinlock_t intx_lock;
+	spinlock_t intx_mask_lock;
+	char irq_name[32];
+	struct pci_saved_state *pci_saved_state;
+};
 
 static struct kvm_assigned_dev_kernel *kvm_find_assigned_dev(struct list_head *head,
 						      int assigned_dev_id)
@@ -748,7 +774,7 @@ static int kvm_vm_ioctl_assign_device(struct kvm *kvm,
 		if (r)
 			goto out_list_del;
 	}
-	r = kvm_assign_device(kvm, match);
+	r = kvm_assign_device(kvm, match->dev);
 	if (r)
 		goto out_list_del;
 
@@ -790,7 +816,7 @@ static int kvm_vm_ioctl_deassign_device(struct kvm *kvm,
 		goto out;
 	}
 
-	kvm_deassign_device(kvm, match);
+	kvm_deassign_device(kvm, match->dev);
 
 	kvm_free_assigned_device(kvm, match);
 
diff --git a/arch/x86/kvm/assigned-dev.h b/arch/x86/kvm/assigned-dev.h
new file mode 100644
index 000000000000..a428c1a211b2
--- /dev/null
+++ b/arch/x86/kvm/assigned-dev.h
@@ -0,0 +1,32 @@
+#ifndef ARCH_X86_KVM_ASSIGNED_DEV_H
+#define ARCH_X86_KVM_ASSIGNED_DEV_H
+
+#include <linux/kvm_host.h>
+
+#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev);
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev);
+
+int kvm_iommu_map_guest(struct kvm *kvm);
+int kvm_iommu_unmap_guest(struct kvm *kvm);
+
+long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+				  unsigned long arg);
+
+void kvm_free_all_assigned_devices(struct kvm *kvm);
+#else
+static inline int kvm_iommu_unmap_guest(struct kvm *kvm)
+{
+	return 0;
+}
+
+static inline long kvm_vm_ioctl_assigned_device(struct kvm *kvm, unsigned ioctl,
+						unsigned long arg)
+{
+	return -ENOTTY;
+}
+
+static inline void kvm_free_all_assigned_devices(struct kvm *kvm) {}
+#endif /* CONFIG_KVM_DEVICE_ASSIGNMENT */
+
+#endif /* ARCH_X86_KVM_ASSIGNED_DEV_H */
diff --git a/arch/x86/kvm/iommu.c b/arch/x86/kvm/iommu.c
index c1e6ae989a43..17b73eeac8a4 100644
--- a/arch/x86/kvm/iommu.c
+++ b/arch/x86/kvm/iommu.c
@@ -31,6 +31,7 @@
 #include <linux/dmar.h>
 #include <linux/iommu.h>
 #include <linux/intel-iommu.h>
+#include "assigned-dev.h"
 
 static bool allow_unsafe_assigned_interrupts;
 module_param_named(allow_unsafe_assigned_interrupts,
@@ -169,10 +170,8 @@ static int kvm_iommu_map_memslots(struct kvm *kvm)
 	return r;
 }
 
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_assign_device(struct kvm *kvm, struct pci_dev *pdev)
 {
-	struct pci_dev *pdev = NULL;
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
 	int r;
 	bool noncoherent;
@@ -181,7 +180,6 @@ int kvm_assign_device(struct kvm *kvm,
 	if (!domain)
 		return 0;
 
-	pdev = assigned_dev->dev;
 	if (pdev == NULL)
 		return -ENODEV;
 
@@ -212,17 +210,14 @@ out_unmap:
 	return r;
 }
 
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev)
+int kvm_deassign_device(struct kvm *kvm, struct pci_dev *pdev)
 {
 	struct iommu_domain *domain = kvm->arch.iommu_domain;
-	struct pci_dev *pdev = NULL;
 
 	/* check if iommu exists and in use */
 	if (!domain)
 		return 0;
 
-	pdev = assigned_dev->dev;
 	if (pdev == NULL)
 		return -ENODEV;
 
diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 782e4eaf4561..c42bca47f7f5 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -27,6 +27,7 @@
 #include "kvm_cache_regs.h"
 #include "x86.h"
 #include "cpuid.h"
+#include "assigned-dev.h"
 
 #include <linux/clocksource.h>
 #include <linux/interrupt.h>
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index aa56894ce839..231dd9472226 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -718,31 +718,6 @@ struct kvm_irq_ack_notifier {
 	void (*irq_acked)(struct kvm_irq_ack_notifier *kian);
 };
 
-struct kvm_assigned_dev_kernel {
-	struct kvm_irq_ack_notifier ack_notifier;
-	struct list_head list;
-	int assigned_dev_id;
-	int host_segnr;
-	int host_busnr;
-	int host_devfn;
-	unsigned int entries_nr;
-	int host_irq;
-	bool host_irq_disabled;
-	bool pci_2_3;
-	struct msix_entry *host_msix_entries;
-	int guest_irq;
-	struct msix_entry *guest_msix_entries;
-	unsigned long irq_requested_type;
-	int irq_source_id;
-	int flags;
-	struct pci_dev *dev;
-	struct kvm *kvm;
-	spinlock_t intx_lock;
-	spinlock_t intx_mask_lock;
-	char irq_name[32];
-	struct pci_saved_state *pci_saved_state;
-};
-
 int kvm_irq_map_gsi(struct kvm *kvm,
 		    struct kvm_kernel_irq_routing_entry *entries, int gsi);
 int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin);
@@ -764,10 +739,6 @@ void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id);
 #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT
 int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
 void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot);
-int kvm_assign_device(struct kvm *kvm,
-		      struct kvm_assigned_dev_kernel *assigned_dev);
-int kvm_deassign_device(struct kvm *kvm,
-			struct kvm_assigned_dev_kernel *assigned_dev);
 #else
 static inline int kvm_iommu_map_pages(struct kvm *kvm,
 				      struct kvm_memory_slot *slot)
-- 
cgit v1.2.3


From 1a867a0898b2e366a1eb5b7fe21413a2b2b1629f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Tue, 28 Oct 2014 14:24:14 -0400
Subject: sunrpc: add tracepoints in xs_tcp_data_recv

Add tracepoints inside the main loop on xs_tcp_data_recv that allow
us to keep an eye on what's happening during each phase of it.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/xprtsock.h | 59 +++++++++++++++++++++++++++++++++++++++
 include/trace/events/sunrpc.h   | 44 ++++++++++++++++++++++++++++-
 net/sunrpc/xprtsock.c           | 61 ++---------------------------------------
 3 files changed, 104 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h
index 1ad36cc25b2e..7591788e9fbf 100644
--- a/include/linux/sunrpc/xprtsock.h
+++ b/include/linux/sunrpc/xprtsock.h
@@ -17,6 +17,65 @@ void		cleanup_socket_xprt(void);
 #define RPC_DEF_MIN_RESVPORT	(665U)
 #define RPC_DEF_MAX_RESVPORT	(1023U)
 
+struct sock_xprt {
+	struct rpc_xprt		xprt;
+
+	/*
+	 * Network layer
+	 */
+	struct socket *		sock;
+	struct sock *		inet;
+
+	/*
+	 * State of TCP reply receive
+	 */
+	__be32			tcp_fraghdr,
+				tcp_xid,
+				tcp_calldir;
+
+	u32			tcp_offset,
+				tcp_reclen;
+
+	unsigned long		tcp_copied,
+				tcp_flags;
+
+	/*
+	 * Connection of transports
+	 */
+	struct delayed_work	connect_worker;
+	struct sockaddr_storage	srcaddr;
+	unsigned short		srcport;
+
+	/*
+	 * UDP socket buffer size parameters
+	 */
+	size_t			rcvsize,
+				sndsize;
+
+	/*
+	 * Saved socket callback addresses
+	 */
+	void			(*old_data_ready)(struct sock *);
+	void			(*old_state_change)(struct sock *);
+	void			(*old_write_space)(struct sock *);
+	void			(*old_error_report)(struct sock *);
+};
+
+/*
+ * TCP receive state flags
+ */
+#define TCP_RCV_LAST_FRAG	(1UL << 0)
+#define TCP_RCV_COPY_FRAGHDR	(1UL << 1)
+#define TCP_RCV_COPY_XID	(1UL << 2)
+#define TCP_RCV_COPY_DATA	(1UL << 3)
+#define TCP_RCV_READ_CALLDIR	(1UL << 4)
+#define TCP_RCV_COPY_CALLDIR	(1UL << 5)
+
+/*
+ * TCP RPC flags
+ */
+#define TCP_RPC_REPLY		(1UL << 6)
+
 #endif /* __KERNEL__ */
 
 #endif /* _LINUX_SUNRPC_XPRTSOCK_H */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 5edb16bcd836..171ca4ff6d99 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -7,6 +7,7 @@
 #include <linux/sunrpc/sched.h>
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/svc.h>
+#include <linux/sunrpc/xprtsock.h>
 #include <net/tcp_states.h>
 #include <linux/net.h>
 #include <linux/tracepoint.h>
@@ -326,7 +327,7 @@ DECLARE_EVENT_CLASS(rpc_xprt_event,
 		__assign_str(port, xprt->address_strings[RPC_DISPLAY_PORT]);
 	),
 
-	TP_printk("peer=%s/%s xid=0x%x status=%d", __get_str(addr),
+	TP_printk("peer=[%s]:%s xid=0x%x status=%d", __get_str(addr),
 			__get_str(port), be32_to_cpu(__entry->xid),
 			__entry->status)
 );
@@ -370,6 +371,47 @@ TRACE_EVENT(xs_tcp_data_ready,
 			__get_str(port), __entry->err, __entry->total)
 );
 
+#define rpc_show_sock_xprt_flags(flags) \
+	__print_flags(flags, "|", \
+		{ TCP_RCV_LAST_FRAG, "TCP_RCV_LAST_FRAG" }, \
+		{ TCP_RCV_COPY_FRAGHDR, "TCP_RCV_COPY_FRAGHDR" }, \
+		{ TCP_RCV_COPY_XID, "TCP_RCV_COPY_XID" }, \
+		{ TCP_RCV_COPY_DATA, "TCP_RCV_COPY_DATA" }, \
+		{ TCP_RCV_READ_CALLDIR, "TCP_RCV_READ_CALLDIR" }, \
+		{ TCP_RCV_COPY_CALLDIR, "TCP_RCV_COPY_CALLDIR" }, \
+		{ TCP_RPC_REPLY, "TCP_RPC_REPLY" })
+
+TRACE_EVENT(xs_tcp_data_recv,
+	TP_PROTO(struct sock_xprt *xs),
+
+	TP_ARGS(xs),
+
+	TP_STRUCT__entry(
+		__string(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR])
+		__string(port, xs->xprt.address_strings[RPC_DISPLAY_PORT])
+		__field(__be32, xid)
+		__field(unsigned long, flags)
+		__field(unsigned long, copied)
+		__field(unsigned int, reclen)
+		__field(unsigned long, offset)
+	),
+
+	TP_fast_assign(
+		__assign_str(addr, xs->xprt.address_strings[RPC_DISPLAY_ADDR]);
+		__assign_str(port, xs->xprt.address_strings[RPC_DISPLAY_PORT]);
+		__entry->xid = xs->tcp_xid;
+		__entry->flags = xs->tcp_flags;
+		__entry->copied = xs->tcp_copied;
+		__entry->reclen = xs->tcp_reclen;
+		__entry->offset = xs->tcp_offset;
+	),
+
+	TP_printk("peer=[%s]:%s xid=0x%x flags=%s copied=%lu reclen=%u offset=%lu",
+			__get_str(addr), __get_str(port), be32_to_cpu(__entry->xid),
+			rpc_show_sock_xprt_flags(__entry->flags),
+			__entry->copied, __entry->reclen, __entry->offset)
+);
+
 TRACE_EVENT(svc_recv,
 	TP_PROTO(struct svc_rqst *rqst, int status),
 
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index b63e26272dc2..31c015196a29 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -216,65 +216,6 @@ static inline void xs_pktdump(char *msg, u32 *packet, unsigned int count)
 }
 #endif
 
-struct sock_xprt {
-	struct rpc_xprt		xprt;
-
-	/*
-	 * Network layer
-	 */
-	struct socket *		sock;
-	struct sock *		inet;
-
-	/*
-	 * State of TCP reply receive
-	 */
-	__be32			tcp_fraghdr,
-				tcp_xid,
-				tcp_calldir;
-
-	u32			tcp_offset,
-				tcp_reclen;
-
-	unsigned long		tcp_copied,
-				tcp_flags;
-
-	/*
-	 * Connection of transports
-	 */
-	struct delayed_work	connect_worker;
-	struct sockaddr_storage	srcaddr;
-	unsigned short		srcport;
-
-	/*
-	 * UDP socket buffer size parameters
-	 */
-	size_t			rcvsize,
-				sndsize;
-
-	/*
-	 * Saved socket callback addresses
-	 */
-	void			(*old_data_ready)(struct sock *);
-	void			(*old_state_change)(struct sock *);
-	void			(*old_write_space)(struct sock *);
-	void			(*old_error_report)(struct sock *);
-};
-
-/*
- * TCP receive state flags
- */
-#define TCP_RCV_LAST_FRAG	(1UL << 0)
-#define TCP_RCV_COPY_FRAGHDR	(1UL << 1)
-#define TCP_RCV_COPY_XID	(1UL << 2)
-#define TCP_RCV_COPY_DATA	(1UL << 3)
-#define TCP_RCV_READ_CALLDIR	(1UL << 4)
-#define TCP_RCV_COPY_CALLDIR	(1UL << 5)
-
-/*
- * TCP RPC flags
- */
-#define TCP_RPC_REPLY		(1UL << 6)
-
 static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
 {
 	return (struct rpc_xprt *) sk->sk_user_data;
@@ -1415,6 +1356,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
 
 	dprintk("RPC:       xs_tcp_data_recv started\n");
 	do {
+		trace_xs_tcp_data_recv(transport);
 		/* Read in a new fragment marker if necessary */
 		/* Can we ever really expect to get completely empty fragments? */
 		if (transport->tcp_flags & TCP_RCV_COPY_FRAGHDR) {
@@ -1439,6 +1381,7 @@ static int xs_tcp_data_recv(read_descriptor_t *rd_desc, struct sk_buff *skb, uns
 		/* Skip over any trailing bytes on short reads */
 		xs_tcp_read_discard(transport, &desc);
 	} while (desc.count);
+	trace_xs_tcp_data_recv(transport);
 	dprintk("RPC:       xs_tcp_data_recv done\n");
 	return len - desc.count;
 }
-- 
cgit v1.2.3


From b671358ad7916beb7ca9e5c304ae88afc7da4557 Mon Sep 17 00:00:00 2001
From: Beniamino Galvani <b.galvani@gmail.com>
Date: Sat, 22 Nov 2014 16:21:39 +0100
Subject: spi: core: Add spi_transfer_is_last() helper

This adds the function spi_transfer_is_last() which can be used by
drivers to know whether a given transfer is the last one in the
current message.

Signed-off-by: Beniamino Galvani <b.galvani@gmail.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/spi/spi.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h
index 46d188a9947c..a6ef2a8e6de4 100644
--- a/include/linux/spi/spi.h
+++ b/include/linux/spi/spi.h
@@ -1049,4 +1049,10 @@ spi_unregister_device(struct spi_device *spi)
 extern const struct spi_device_id *
 spi_get_device_id(const struct spi_device *sdev);
 
+static inline bool
+spi_transfer_is_last(struct spi_master *master, struct spi_transfer *xfer)
+{
+	return list_is_last(&xfer->transfer_list, &master->cur_msg->transfers);
+}
+
 #endif /* __LINUX_SPI_H */
-- 
cgit v1.2.3


From 2ad7bf3638411cb547f2823df08166c13ab04269 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Sun, 23 Nov 2014 23:07:46 -0800
Subject: ipvlan: Initial check-in of the IPVLAN driver.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This driver is very similar to the macvlan driver except that it
uses L3 on the frame to determine the logical interface while
functioning as packet dispatcher. It inherits L2 of the master
device hence the packets on wire will have the same L2 for all
the packets originating from all virtual devices off of the same
master device.

This driver was developed keeping the namespace use-case in
mind. Hence most of the examples given here take that as the
base setup where main-device belongs to the default-ns and
virtual devices are assigned to the additional namespaces.

The device operates in two different modes and the difference
in these two modes in primarily in the TX side.

(a) L2 mode : In this mode, the device behaves as a L2 device.
TX processing upto L2 happens on the stack of the virtual device
associated with (namespace). Packets are switched after that
into the main device (default-ns) and queued for xmit.

RX processing is simple and all multicast, broadcast (if
applicable), and unicast belonging to the address(es) are
delivered to the virtual devices.

(b) L3 mode : In this mode, the device behaves like a L3 device.
TX processing upto L3 happens on the stack of the virtual device
associated with (namespace). Packets are switched to the
main-device (default-ns) for the L2 processing. Hence the routing
table of the default-ns will be used in this mode.

RX processins is somewhat similar to the L2 mode except that in
this mode only Unicast packets are delivered to the virtual device
while main-dev will handle all other packets.

The devices can be added using the "ip" command from the iproute2
package -

	ip link add link <master> <virtual> type ipvlan mode [ l2 | l3 ]

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Maciej Żenczykowski <maze@google.com>
Cc: Laurent Chavey <chavey@google.com>
Cc: Tim Hockin <thockin@google.com>
Cc: Brandon Philips <brandon.philips@coreos.com>
Cc: Pavel Emelianov <xemul@parallels.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ipvlan.txt | 107 +++++
 drivers/net/Kconfig                 |  18 +
 drivers/net/Makefile                |   1 +
 drivers/net/ipvlan/Makefile         |   7 +
 drivers/net/ipvlan/ipvlan.h         | 130 ++++++
 drivers/net/ipvlan/ipvlan_core.c    | 607 +++++++++++++++++++++++++++
 drivers/net/ipvlan/ipvlan_main.c    | 789 ++++++++++++++++++++++++++++++++++++
 include/linux/netdevice.h           |   4 +
 include/uapi/linux/if_link.h        |  15 +
 9 files changed, 1678 insertions(+)
 create mode 100644 Documentation/networking/ipvlan.txt
 create mode 100644 drivers/net/ipvlan/Makefile
 create mode 100644 drivers/net/ipvlan/ipvlan.h
 create mode 100644 drivers/net/ipvlan/ipvlan_core.c
 create mode 100644 drivers/net/ipvlan/ipvlan_main.c

(limited to 'include/linux')

diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
new file mode 100644
index 000000000000..cf996394e466
--- /dev/null
+++ b/Documentation/networking/ipvlan.txt
@@ -0,0 +1,107 @@
+
+                            IPVLAN Driver HOWTO
+
+Initial Release:
+	Mahesh Bandewar <maheshb AT google.com>
+
+1. Introduction:
+	This is conceptually very similar to the macvlan driver with one major
+exception of using L3 for mux-ing /demux-ing among slaves. This property makes
+the master device share the L2 with it's slave devices. I have developed this
+driver in conjuntion with network namespaces and not sure if there is use case
+outside of it.
+
+
+2. Building and Installation:
+	In order to build the driver, please select the config item CONFIG_IPVLAN.
+The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
+(CONFIG_IPVLAN=m).
+
+
+3. Configuration:
+	There are no module parameters for this driver and it can be configured
+using IProute2/ip utility.
+
+	ip link add link <master-dev> <slave-dev> type ipvlan mode { l2 | L3 }
+
+	e.g. ip link add link ipvl0 eth0 type ipvlan mode l2
+
+
+4. Operating modes:
+	IPvlan has two modes of operation - L2 and L3. For a given master device,
+you can select one of these two modes and all slaves on that master will
+operate in the same (selected) mode. The RX mode is almost identical except
+that in L3 mode the slaves wont receive any multicast / broadcast traffic.
+L3 mode is more restrictive since routing is controlled from the other (mostly)
+default namespace.
+
+4.1 L2 mode:
+	In this mode TX processing happens on the stack instance attached to the
+slave device and packets are switched and queued to the master device to send
+out. In this mode the slaves will RX/TX multicast and broadcast (if applicable)
+as well.
+
+4.2 L3 mode:
+	In this mode TX processing upto L3 happens on the stack instance attached
+to the slave device and packets are switched to the stack instance of the
+master device for the L2 processing and routing from that instance will be
+used before packets are queued on the outbound device. In this mode the slaves
+will not receive nor can send multicast / broadcast traffic.
+
+
+5. What to choose (macvlan vs. ipvlan)?
+	These two devices are very similar in many regards and the specific use
+case could very well define which device to choose. if one of the following
+situations defines your use case then you can choose to use ipvlan -
+	(a) The Linux host that is connected to the external switch / router has
+policy configured that allows only one mac per port.
+	(b) No of virtual devices created on a master exceed the mac capacity and
+puts the NIC in promiscous mode and degraded performance is a concern.
+	(c) If the slave device is to be put into the hostile / untrusted network
+namespace where L2 on the slave could be changed / misused.
+
+
+6. Example configuration:
+
+  +=============================================================+
+  |  Host: host1                                                |
+  |                                                             |
+  |   +----------------------+      +----------------------+    |
+  |   |   NS:ns0             |      |  NS:ns1              |    |
+  |   |                      |      |                      |    |
+  |   |                      |      |                      |    |
+  |   |        ipvl0         |      |         ipvl1        |    |
+  |   +----------#-----------+      +-----------#----------+    |
+  |              #                              #               |
+  |              ################################               |
+  |                              # eth0                         |
+  +==============================#==============================+
+
+
+	(a) Create two network namespaces - ns0, ns1
+		ip netns add ns0
+		ip netns add ns1
+
+	(b) Create two ipvlan slaves on eth0 (master device)
+		ip link add link eth0 ipvl0 type ipvlan mode l2
+		ip link add link eth0 ipvl1 type ipvlan mode l2
+
+	(c) Assign slaves to the respective network namespaces
+		ip link set dev ipvl0 netns ns0
+		ip link set dev ipvl1 netns ns1
+
+	(d) Now switch to the namespace (ns0 or ns1) to configure the slave devices
+		- For ns0
+			(1) ip netns exec ns0 bash
+			(2) ip link set dev ipvl0 up
+			(3) ip link set dev lo up
+			(4) ip -4 addr add 127.0.0.1 dev lo
+			(5) ip -4 addr add $IPADDR dev ipvl0
+			(6) ip -4 route add default via $ROUTER dev ipvl0
+		- For ns1
+			(1) ip netns exec ns1 bash
+			(2) ip link set dev ipvl1 up
+			(3) ip link set dev lo up
+			(4) ip -4 addr add 127.0.0.1 dev lo
+			(5) ip -4 addr add $IPADDR dev ipvl1
+			(6) ip -4 route add default via $ROUTER dev ipvl1
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index f9009be3f307..b6d64f546574 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -145,6 +145,24 @@ config MACVTAP
 	  To compile this driver as a module, choose M here: the module
 	  will be called macvtap.
 
+
+config IPVLAN
+    tristate "IP-VLAN support"
+    ---help---
+      This allows one to create virtual devices off of a main interface
+      and packets will be delivered based on the dest L3 (IPv6/IPv4 addr)
+      on packets. All interfaces (including the main interface) share L2
+      making it transparent to the connected L2 switch.
+
+      Ipvlan devices can be added using the "ip" command from the
+      iproute2 package starting with the iproute2-X.Y.ZZ release:
+
+      "ip link add link <main-dev> [ NAME ] type ipvlan"
+
+      To compile this driver as a module, choose M here: the module
+      will be called ipvlan.
+
+
 config VXLAN
        tristate "Virtual eXtensible Local Area Network (VXLAN)"
        depends on INET
diff --git a/drivers/net/Makefile b/drivers/net/Makefile
index 61aefdd1e173..e25fdd7d905e 100644
--- a/drivers/net/Makefile
+++ b/drivers/net/Makefile
@@ -6,6 +6,7 @@
 # Networking Core Drivers
 #
 obj-$(CONFIG_BONDING) += bonding/
+obj-$(CONFIG_IPVLAN) += ipvlan/
 obj-$(CONFIG_DUMMY) += dummy.o
 obj-$(CONFIG_EQUALIZER) += eql.o
 obj-$(CONFIG_IFB) += ifb.o
diff --git a/drivers/net/ipvlan/Makefile b/drivers/net/ipvlan/Makefile
new file mode 100644
index 000000000000..df79910192d6
--- /dev/null
+++ b/drivers/net/ipvlan/Makefile
@@ -0,0 +1,7 @@
+#
+# Makefile for the Ethernet Ipvlan driver
+#
+
+obj-$(CONFIG_IPVLAN) += ipvlan.o
+
+ipvlan-objs := ipvlan_core.o ipvlan_main.o
diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
new file mode 100644
index 000000000000..ab3e7614ed71
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -0,0 +1,130 @@
+/*
+ * Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ */
+#ifndef __IPVLAN_H
+#define __IPVLAN_H
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/init.h>
+#include <linux/rculist.h>
+#include <linux/notifier.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/if_arp.h>
+#include <linux/if_link.h>
+#include <linux/if_vlan.h>
+#include <linux/ip.h>
+#include <linux/inetdevice.h>
+#include <net/rtnetlink.h>
+#include <net/gre.h>
+#include <net/route.h>
+#include <net/addrconf.h>
+
+#define IPVLAN_DRV	"ipvlan"
+#define IPV_DRV_VER	"0.1"
+
+#define IPVLAN_HASH_SIZE	(1 << BITS_PER_BYTE)
+#define IPVLAN_HASH_MASK	(IPVLAN_HASH_SIZE - 1)
+
+#define IPVLAN_MAC_FILTER_BITS	8
+#define IPVLAN_MAC_FILTER_SIZE	(1 << IPVLAN_MAC_FILTER_BITS)
+#define IPVLAN_MAC_FILTER_MASK	(IPVLAN_MAC_FILTER_SIZE - 1)
+
+typedef enum {
+	IPVL_IPV6 = 0,
+	IPVL_ICMPV6,
+	IPVL_IPV4,
+	IPVL_ARP,
+} ipvl_hdr_type;
+
+struct ipvl_pcpu_stats {
+	u64			rx_pkts;
+	u64			rx_bytes;
+	u64			rx_mcast;
+	u64			tx_pkts;
+	u64			tx_bytes;
+	struct u64_stats_sync	syncp;
+	u32			rx_errs;
+	u32			tx_drps;
+};
+
+struct ipvl_port;
+
+struct ipvl_dev {
+	struct net_device	*dev;
+	struct list_head	pnode;
+	struct ipvl_port	*port;
+	struct net_device	*phy_dev;
+	struct list_head	addrs;
+	int			ipv4cnt;
+	int			ipv6cnt;
+	struct ipvl_pcpu_stats	*pcpu_stats;
+	DECLARE_BITMAP(mac_filters, IPVLAN_MAC_FILTER_SIZE);
+	netdev_features_t	sfeatures;
+	u32			msg_enable;
+	u16			mtu_adj;
+};
+
+struct ipvl_addr {
+	struct ipvl_dev		*master; /* Back pointer to master */
+	union {
+		struct in6_addr	ip6;	 /* IPv6 address on logical interface */
+		struct in_addr	ip4;	 /* IPv4 address on logical interface */
+	} ipu;
+#define ip6addr	ipu.ip6
+#define ip4addr ipu.ip4
+	struct hlist_node	hlnode;  /* Hash-table linkage */
+	struct list_head	anode;   /* logical-interface linkage */
+	struct rcu_head		rcu;
+	ipvl_hdr_type		atype;
+};
+
+struct ipvl_port {
+	struct net_device	*dev;
+	struct hlist_head	hlhead[IPVLAN_HASH_SIZE];
+	struct list_head	ipvlans;
+	struct rcu_head		rcu;
+	int			count;
+	u16			mode;
+};
+
+static inline struct ipvl_port *ipvlan_port_get_rcu(const struct net_device *d)
+{
+	return rcu_dereference(d->rx_handler_data);
+}
+
+static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d)
+{
+	return rtnl_dereference(d->rx_handler_data);
+}
+
+static inline bool ipvlan_dev_master(struct net_device *d)
+{
+	return d->priv_flags & IFF_IPVLAN_MASTER;
+}
+
+static inline bool ipvlan_dev_slave(struct net_device *d)
+{
+	return d->priv_flags & IFF_IPVLAN_SLAVE;
+}
+
+void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev);
+void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval);
+void ipvlan_init_secret(void);
+unsigned int ipvlan_mac_hash(const unsigned char *addr);
+rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb);
+int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev);
+void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr);
+bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6);
+struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
+					const void *iaddr, bool is_v6);
+void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync);
+#endif /* __IPVLAN_H */
diff --git a/drivers/net/ipvlan/ipvlan_core.c b/drivers/net/ipvlan/ipvlan_core.c
new file mode 100644
index 000000000000..a14d87783245
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan_core.c
@@ -0,0 +1,607 @@
+/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ */
+
+#include "ipvlan.h"
+
+static u32 ipvlan_jhash_secret;
+
+void ipvlan_init_secret(void)
+{
+	net_get_random_once(&ipvlan_jhash_secret, sizeof(ipvlan_jhash_secret));
+}
+
+static void ipvlan_count_rx(const struct ipvl_dev *ipvlan,
+			    unsigned int len, bool success, bool mcast)
+{
+	if (!ipvlan)
+		return;
+
+	if (likely(success)) {
+		struct ipvl_pcpu_stats *pcptr;
+
+		pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
+		u64_stats_update_begin(&pcptr->syncp);
+		pcptr->rx_pkts++;
+		pcptr->rx_bytes += len;
+		if (mcast)
+			pcptr->rx_mcast++;
+		u64_stats_update_end(&pcptr->syncp);
+	} else {
+		this_cpu_inc(ipvlan->pcpu_stats->rx_errs);
+	}
+}
+
+static u8 ipvlan_get_v6_hash(const void *iaddr)
+{
+	const struct in6_addr *ip6_addr = iaddr;
+
+	return __ipv6_addr_jhash(ip6_addr, ipvlan_jhash_secret) &
+	       IPVLAN_HASH_MASK;
+}
+
+static u8 ipvlan_get_v4_hash(const void *iaddr)
+{
+	const struct in_addr *ip4_addr = iaddr;
+
+	return jhash_1word(ip4_addr->s_addr, ipvlan_jhash_secret) &
+	       IPVLAN_HASH_MASK;
+}
+
+struct ipvl_addr *ipvlan_ht_addr_lookup(const struct ipvl_port *port,
+					const void *iaddr, bool is_v6)
+{
+	struct ipvl_addr *addr;
+	u8 hash;
+
+	hash = is_v6 ? ipvlan_get_v6_hash(iaddr) :
+	       ipvlan_get_v4_hash(iaddr);
+	hlist_for_each_entry_rcu(addr, &port->hlhead[hash], hlnode) {
+		if (is_v6 && addr->atype == IPVL_IPV6 &&
+		    ipv6_addr_equal(&addr->ip6addr, iaddr))
+			return addr;
+		else if (!is_v6 && addr->atype == IPVL_IPV4 &&
+			 addr->ip4addr.s_addr ==
+				((struct in_addr *)iaddr)->s_addr)
+			return addr;
+	}
+	return NULL;
+}
+
+void ipvlan_ht_addr_add(struct ipvl_dev *ipvlan, struct ipvl_addr *addr)
+{
+	struct ipvl_port *port = ipvlan->port;
+	u8 hash;
+
+	hash = (addr->atype == IPVL_IPV6) ?
+	       ipvlan_get_v6_hash(&addr->ip6addr) :
+	       ipvlan_get_v4_hash(&addr->ip4addr);
+	hlist_add_head_rcu(&addr->hlnode, &port->hlhead[hash]);
+}
+
+void ipvlan_ht_addr_del(struct ipvl_addr *addr, bool sync)
+{
+	hlist_del_rcu(&addr->hlnode);
+	if (sync)
+		synchronize_rcu();
+}
+
+bool ipvlan_addr_busy(struct ipvl_dev *ipvlan, void *iaddr, bool is_v6)
+{
+	struct ipvl_port *port = ipvlan->port;
+	struct ipvl_addr *addr;
+
+	list_for_each_entry(addr, &ipvlan->addrs, anode) {
+		if ((is_v6 && addr->atype == IPVL_IPV6 &&
+		    ipv6_addr_equal(&addr->ip6addr, iaddr)) ||
+		    (!is_v6 && addr->atype == IPVL_IPV4 &&
+		    addr->ip4addr.s_addr == ((struct in_addr *)iaddr)->s_addr))
+			return true;
+	}
+
+	if (ipvlan_ht_addr_lookup(port, iaddr, is_v6))
+		return true;
+
+	return false;
+}
+
+static void *ipvlan_get_L3_hdr(struct sk_buff *skb, int *type)
+{
+	void *lyr3h = NULL;
+
+	switch (skb->protocol) {
+	case htons(ETH_P_ARP): {
+		struct arphdr *arph;
+
+		if (unlikely(!pskb_may_pull(skb, sizeof(*arph))))
+			return NULL;
+
+		arph = arp_hdr(skb);
+		*type = IPVL_ARP;
+		lyr3h = arph;
+		break;
+	}
+	case htons(ETH_P_IP): {
+		u32 pktlen;
+		struct iphdr *ip4h;
+
+		if (unlikely(!pskb_may_pull(skb, sizeof(*ip4h))))
+			return NULL;
+
+		ip4h = ip_hdr(skb);
+		pktlen = ntohs(ip4h->tot_len);
+		if (ip4h->ihl < 5 || ip4h->version != 4)
+			return NULL;
+		if (skb->len < pktlen || pktlen < (ip4h->ihl * 4))
+			return NULL;
+
+		*type = IPVL_IPV4;
+		lyr3h = ip4h;
+		break;
+	}
+	case htons(ETH_P_IPV6): {
+		struct ipv6hdr *ip6h;
+
+		if (unlikely(!pskb_may_pull(skb, sizeof(*ip6h))))
+			return NULL;
+
+		ip6h = ipv6_hdr(skb);
+		if (ip6h->version != 6)
+			return NULL;
+
+		*type = IPVL_IPV6;
+		lyr3h = ip6h;
+		/* Only Neighbour Solicitation pkts need different treatment */
+		if (ipv6_addr_any(&ip6h->saddr) &&
+		    ip6h->nexthdr == NEXTHDR_ICMP) {
+			*type = IPVL_ICMPV6;
+			lyr3h = ip6h + 1;
+		}
+		break;
+	}
+	default:
+		return NULL;
+	}
+
+	return lyr3h;
+}
+
+unsigned int ipvlan_mac_hash(const unsigned char *addr)
+{
+	u32 hash = jhash_1word(__get_unaligned_cpu32(addr+2),
+			       ipvlan_jhash_secret);
+
+	return hash & IPVLAN_MAC_FILTER_MASK;
+}
+
+static void ipvlan_multicast_frame(struct ipvl_port *port, struct sk_buff *skb,
+				   const struct ipvl_dev *in_dev, bool local)
+{
+	struct ethhdr *eth = eth_hdr(skb);
+	struct ipvl_dev *ipvlan;
+	struct sk_buff *nskb;
+	unsigned int len;
+	unsigned int mac_hash;
+	int ret;
+
+	if (skb->protocol == htons(ETH_P_PAUSE))
+		return;
+
+	list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+		if (local && (ipvlan == in_dev))
+			continue;
+
+		mac_hash = ipvlan_mac_hash(eth->h_dest);
+		if (!test_bit(mac_hash, ipvlan->mac_filters))
+			continue;
+
+		ret = NET_RX_DROP;
+		len = skb->len + ETH_HLEN;
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (!nskb)
+			goto mcast_acct;
+
+		if (ether_addr_equal(eth->h_dest, ipvlan->phy_dev->broadcast))
+			nskb->pkt_type = PACKET_BROADCAST;
+		else
+			nskb->pkt_type = PACKET_MULTICAST;
+
+		nskb->dev = ipvlan->dev;
+		if (local)
+			ret = dev_forward_skb(ipvlan->dev, nskb);
+		else
+			ret = netif_rx(nskb);
+mcast_acct:
+		ipvlan_count_rx(ipvlan, len, ret == NET_RX_SUCCESS, true);
+	}
+
+	/* Locally generated? ...Forward a copy to the main-device as
+	 * well. On the RX side we'll ignore it (wont give it to any
+	 * of the virtual devices.
+	 */
+	if (local) {
+		nskb = skb_clone(skb, GFP_ATOMIC);
+		if (nskb) {
+			if (ether_addr_equal(eth->h_dest, port->dev->broadcast))
+				nskb->pkt_type = PACKET_BROADCAST;
+			else
+				nskb->pkt_type = PACKET_MULTICAST;
+
+			dev_forward_skb(port->dev, nskb);
+		}
+	}
+}
+
+static int ipvlan_rcv_frame(struct ipvl_addr *addr, struct sk_buff *skb,
+			    bool local)
+{
+	struct ipvl_dev *ipvlan = addr->master;
+	struct net_device *dev = ipvlan->dev;
+	unsigned int len;
+	rx_handler_result_t ret = RX_HANDLER_CONSUMED;
+	bool success = false;
+
+	len = skb->len + ETH_HLEN;
+	if (unlikely(!(dev->flags & IFF_UP))) {
+		kfree_skb(skb);
+		goto out;
+	}
+
+	skb = skb_share_check(skb, GFP_ATOMIC);
+	if (!skb)
+		goto out;
+
+	skb->dev = dev;
+	skb->pkt_type = PACKET_HOST;
+
+	if (local) {
+		if (dev_forward_skb(ipvlan->dev, skb) == NET_RX_SUCCESS)
+			success = true;
+	} else {
+		ret = RX_HANDLER_ANOTHER;
+		success = true;
+	}
+
+out:
+	ipvlan_count_rx(ipvlan, len, success, false);
+	return ret;
+}
+
+static struct ipvl_addr *ipvlan_addr_lookup(struct ipvl_port *port,
+					    void *lyr3h, int addr_type,
+					    bool use_dest)
+{
+	struct ipvl_addr *addr = NULL;
+
+	if (addr_type == IPVL_IPV6) {
+		struct ipv6hdr *ip6h;
+		struct in6_addr *i6addr;
+
+		ip6h = (struct ipv6hdr *)lyr3h;
+		i6addr = use_dest ? &ip6h->daddr : &ip6h->saddr;
+		addr = ipvlan_ht_addr_lookup(port, i6addr, true);
+	} else if (addr_type == IPVL_ICMPV6) {
+		struct nd_msg *ndmh;
+		struct in6_addr *i6addr;
+
+		/* Make sure that the NeighborSolicitation ICMPv6 packets
+		 * are handled to avoid DAD issue.
+		 */
+		ndmh = (struct nd_msg *)lyr3h;
+		if (ndmh->icmph.icmp6_type == NDISC_NEIGHBOUR_SOLICITATION) {
+			i6addr = &ndmh->target;
+			addr = ipvlan_ht_addr_lookup(port, i6addr, true);
+		}
+	} else if (addr_type == IPVL_IPV4) {
+		struct iphdr *ip4h;
+		__be32 *i4addr;
+
+		ip4h = (struct iphdr *)lyr3h;
+		i4addr = use_dest ? &ip4h->daddr : &ip4h->saddr;
+		addr = ipvlan_ht_addr_lookup(port, i4addr, false);
+	} else if (addr_type == IPVL_ARP) {
+		struct arphdr *arph;
+		unsigned char *arp_ptr;
+		__be32 dip;
+
+		arph = (struct arphdr *)lyr3h;
+		arp_ptr = (unsigned char *)(arph + 1);
+		if (use_dest)
+			arp_ptr += (2 * port->dev->addr_len) + 4;
+		else
+			arp_ptr += port->dev->addr_len;
+
+		memcpy(&dip, arp_ptr, 4);
+		addr = ipvlan_ht_addr_lookup(port, &dip, false);
+	}
+
+	return addr;
+}
+
+static int ipvlan_process_v4_outbound(struct sk_buff *skb)
+{
+	const struct iphdr *ip4h = ip_hdr(skb);
+	struct net_device *dev = skb->dev;
+	struct rtable *rt;
+	int err, ret = NET_XMIT_DROP;
+	struct flowi4 fl4 = {
+		.flowi4_oif = dev->iflink,
+		.flowi4_tos = RT_TOS(ip4h->tos),
+		.flowi4_flags = FLOWI_FLAG_ANYSRC,
+		.daddr = ip4h->daddr,
+		.saddr = ip4h->saddr,
+	};
+
+	rt = ip_route_output_flow(dev_net(dev), &fl4, NULL);
+	if (IS_ERR(rt))
+		goto err;
+
+	if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) {
+		ip_rt_put(rt);
+		goto err;
+	}
+	skb_dst_drop(skb);
+	skb_dst_set(skb, &rt->dst);
+	err = ip_local_out(skb);
+	if (unlikely(net_xmit_eval(err)))
+		dev->stats.tx_errors++;
+	else
+		ret = NET_XMIT_SUCCESS;
+	goto out;
+err:
+	dev->stats.tx_errors++;
+	kfree_skb(skb);
+out:
+	return ret;
+}
+
+static int ipvlan_process_v6_outbound(struct sk_buff *skb)
+{
+	const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+	struct net_device *dev = skb->dev;
+	struct dst_entry *dst;
+	int err, ret = NET_XMIT_DROP;
+	struct flowi6 fl6 = {
+		.flowi6_iif = skb->dev->ifindex,
+		.daddr = ip6h->daddr,
+		.saddr = ip6h->saddr,
+		.flowi6_flags = FLOWI_FLAG_ANYSRC,
+		.flowlabel = ip6_flowinfo(ip6h),
+		.flowi6_mark = skb->mark,
+		.flowi6_proto = ip6h->nexthdr,
+	};
+
+	dst = ip6_route_output(dev_net(dev), NULL, &fl6);
+	if (IS_ERR(dst))
+		goto err;
+
+	skb_dst_drop(skb);
+	skb_dst_set(skb, dst);
+	err = ip6_local_out(skb);
+	if (unlikely(net_xmit_eval(err)))
+		dev->stats.tx_errors++;
+	else
+		ret = NET_XMIT_SUCCESS;
+	goto out;
+err:
+	dev->stats.tx_errors++;
+	kfree_skb(skb);
+out:
+	return ret;
+}
+
+static int ipvlan_process_outbound(struct sk_buff *skb,
+				   const struct ipvl_dev *ipvlan)
+{
+	struct ethhdr *ethh = eth_hdr(skb);
+	int ret = NET_XMIT_DROP;
+
+	/* In this mode we dont care about multicast and broadcast traffic */
+	if (is_multicast_ether_addr(ethh->h_dest)) {
+		pr_warn_ratelimited("Dropped {multi|broad}cast of type= [%x]\n",
+				    ntohs(skb->protocol));
+		kfree_skb(skb);
+		goto out;
+	}
+
+	/* The ipvlan is a pseudo-L2 device, so the packets that we receive
+	 * will have L2; which need to discarded and processed further
+	 * in the net-ns of the main-device.
+	 */
+	if (skb_mac_header_was_set(skb)) {
+		skb_pull(skb, sizeof(*ethh));
+		skb->mac_header = (typeof(skb->mac_header))~0U;
+		skb_reset_network_header(skb);
+	}
+
+	if (skb->protocol == htons(ETH_P_IPV6))
+		ret = ipvlan_process_v6_outbound(skb);
+	else if (skb->protocol == htons(ETH_P_IP))
+		ret = ipvlan_process_v4_outbound(skb);
+	else {
+		pr_warn_ratelimited("Dropped outbound packet type=%x\n",
+				    ntohs(skb->protocol));
+		kfree_skb(skb);
+	}
+out:
+	return ret;
+}
+
+static int ipvlan_xmit_mode_l3(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+	void *lyr3h;
+	struct ipvl_addr *addr;
+	int addr_type;
+
+	lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
+	if (!lyr3h)
+		goto out;
+
+	addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
+	if (addr)
+		return ipvlan_rcv_frame(addr, skb, true);
+
+out:
+	skb->dev = ipvlan->phy_dev;
+	return ipvlan_process_outbound(skb, ipvlan);
+}
+
+static int ipvlan_xmit_mode_l2(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ethhdr *eth = eth_hdr(skb);
+	struct ipvl_addr *addr;
+	void *lyr3h;
+	int addr_type;
+
+	if (ether_addr_equal(eth->h_dest, eth->h_source)) {
+		lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
+		if (lyr3h) {
+			addr = ipvlan_addr_lookup(ipvlan->port, lyr3h, addr_type, true);
+			if (addr)
+				return ipvlan_rcv_frame(addr, skb, true);
+		}
+		skb = skb_share_check(skb, GFP_ATOMIC);
+		if (!skb)
+			return NET_XMIT_DROP;
+
+		/* Packet definitely does not belong to any of the
+		 * virtual devices, but the dest is local. So forward
+		 * the skb for the main-dev. At the RX side we just return
+		 * RX_PASS for it to be processed further on the stack.
+		 */
+		return dev_forward_skb(ipvlan->phy_dev, skb);
+
+	} else if (is_multicast_ether_addr(eth->h_dest)) {
+		u8 ip_summed = skb->ip_summed;
+
+		skb->ip_summed = CHECKSUM_UNNECESSARY;
+		ipvlan_multicast_frame(ipvlan->port, skb, ipvlan, true);
+		skb->ip_summed = ip_summed;
+	}
+
+	skb->dev = ipvlan->phy_dev;
+	return dev_queue_xmit(skb);
+}
+
+int ipvlan_queue_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port = ipvlan_port_get_rcu(ipvlan->phy_dev);
+
+	if (!port)
+		goto out;
+
+	if (unlikely(!pskb_may_pull(skb, sizeof(struct ethhdr))))
+		goto out;
+
+	switch(port->mode) {
+	case IPVLAN_MODE_L2:
+		return ipvlan_xmit_mode_l2(skb, dev);
+	case IPVLAN_MODE_L3:
+		return ipvlan_xmit_mode_l3(skb, dev);
+	}
+
+	/* Should not reach here */
+	WARN_ONCE(true, "ipvlan_queue_xmit() called for mode = [%hx]\n",
+			  port->mode);
+out:
+	kfree_skb(skb);
+	return NET_XMIT_DROP;
+}
+
+static bool ipvlan_external_frame(struct sk_buff *skb, struct ipvl_port *port)
+{
+	struct ethhdr *eth = eth_hdr(skb);
+	struct ipvl_addr *addr;
+	void *lyr3h;
+	int addr_type;
+
+	if (ether_addr_equal(eth->h_source, skb->dev->dev_addr)) {
+		lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
+		if (!lyr3h)
+			return true;
+
+		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, false);
+		if (addr)
+			return false;
+	}
+
+	return true;
+}
+
+static rx_handler_result_t ipvlan_handle_mode_l3(struct sk_buff **pskb,
+						 struct ipvl_port *port)
+{
+	void *lyr3h;
+	int addr_type;
+	struct ipvl_addr *addr;
+	struct sk_buff *skb = *pskb;
+	rx_handler_result_t ret = RX_HANDLER_PASS;
+
+	lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
+	if (!lyr3h)
+		goto out;
+
+	addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+	if (addr)
+		ret = ipvlan_rcv_frame(addr, skb, false);
+
+out:
+	return ret;
+}
+
+static rx_handler_result_t ipvlan_handle_mode_l2(struct sk_buff **pskb,
+						 struct ipvl_port *port)
+{
+	struct sk_buff *skb = *pskb;
+	struct ethhdr *eth = eth_hdr(skb);
+	rx_handler_result_t ret = RX_HANDLER_PASS;
+	void *lyr3h;
+	int addr_type;
+
+	if (is_multicast_ether_addr(eth->h_dest)) {
+		if (ipvlan_external_frame(skb, port))
+			ipvlan_multicast_frame(port, skb, NULL, false);
+	} else {
+		struct ipvl_addr *addr;
+
+		lyr3h = ipvlan_get_L3_hdr(skb, &addr_type);
+		if (!lyr3h)
+			return ret;
+
+		addr = ipvlan_addr_lookup(port, lyr3h, addr_type, true);
+		if (addr)
+			ret = ipvlan_rcv_frame(addr, skb, false);
+	}
+
+	return ret;
+}
+
+rx_handler_result_t ipvlan_handle_frame(struct sk_buff **pskb)
+{
+	struct sk_buff *skb = *pskb;
+	struct ipvl_port *port = ipvlan_port_get_rcu(skb->dev);
+
+	if (!port)
+		return RX_HANDLER_PASS;
+
+	switch (port->mode) {
+	case IPVLAN_MODE_L2:
+		return ipvlan_handle_mode_l2(pskb, port);
+	case IPVLAN_MODE_L3:
+		return ipvlan_handle_mode_l3(pskb, port);
+	}
+
+	/* Should not reach here */
+	WARN_ONCE(true, "ipvlan_handle_frame() called for mode = [%hx]\n",
+			  port->mode);
+	kfree_skb(skb);
+	return NET_RX_DROP;
+}
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
new file mode 100644
index 000000000000..c3df84bd2857
--- /dev/null
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -0,0 +1,789 @@
+/* Copyright (c) 2014 Mahesh Bandewar <maheshb@google.com>
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of
+ * the License, or (at your option) any later version.
+ *
+ */
+
+#include "ipvlan.h"
+
+void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev)
+{
+	ipvlan->dev->mtu = dev->mtu - ipvlan->mtu_adj;
+}
+
+void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval)
+{
+	struct ipvl_dev *ipvlan;
+
+	if (port->mode != nval) {
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+			if (nval == IPVLAN_MODE_L3)
+				ipvlan->dev->flags |= IFF_NOARP;
+			else
+				ipvlan->dev->flags &= ~IFF_NOARP;
+		}
+		port->mode = nval;
+	}
+}
+
+static int ipvlan_port_create(struct net_device *dev)
+{
+	struct ipvl_port *port;
+	int err, idx;
+
+	if (dev->type != ARPHRD_ETHER || dev->flags & IFF_LOOPBACK) {
+		netdev_err(dev, "Master is either lo or non-ether device\n");
+		return -EINVAL;
+	}
+	port = kzalloc(sizeof(struct ipvl_port), GFP_KERNEL);
+	if (!port)
+		return -ENOMEM;
+
+	port->dev = dev;
+	port->mode = IPVLAN_MODE_L3;
+	INIT_LIST_HEAD(&port->ipvlans);
+	for (idx = 0; idx < IPVLAN_HASH_SIZE; idx++)
+		INIT_HLIST_HEAD(&port->hlhead[idx]);
+
+	err = netdev_rx_handler_register(dev, ipvlan_handle_frame, port);
+	if (err)
+		goto err;
+
+	dev->priv_flags |= IFF_IPVLAN_MASTER;
+	return 0;
+
+err:
+	kfree_rcu(port, rcu);
+	return err;
+}
+
+static void ipvlan_port_destroy(struct net_device *dev)
+{
+	struct ipvl_port *port = ipvlan_port_get_rtnl(dev);
+
+	dev->priv_flags &= ~IFF_IPVLAN_MASTER;
+	netdev_rx_handler_unregister(dev);
+	kfree_rcu(port, rcu);
+}
+
+/* ipvlan network devices have devices nesting below it and are a special
+ * "super class" of normal network devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key ipvlan_netdev_xmit_lock_key;
+static struct lock_class_key ipvlan_netdev_addr_lock_key;
+
+#define IPVLAN_FEATURES \
+	(NETIF_F_SG | NETIF_F_ALL_CSUM | NETIF_F_HIGHDMA | NETIF_F_FRAGLIST | \
+	 NETIF_F_GSO | NETIF_F_TSO | NETIF_F_UFO | NETIF_F_GSO_ROBUST | \
+	 NETIF_F_TSO_ECN | NETIF_F_TSO6 | NETIF_F_GRO | NETIF_F_RXCSUM | \
+	 NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_HW_VLAN_STAG_FILTER)
+
+#define IPVLAN_STATE_MASK \
+	((1<<__LINK_STATE_NOCARRIER) | (1<<__LINK_STATE_DORMANT))
+
+static void ipvlan_set_lockdep_class_one(struct net_device *dev,
+					 struct netdev_queue *txq,
+					 void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock, &ipvlan_netdev_xmit_lock_key);
+}
+
+static void ipvlan_set_lockdep_class(struct net_device *dev)
+{
+	lockdep_set_class(&dev->addr_list_lock, &ipvlan_netdev_addr_lock_key);
+	netdev_for_each_tx_queue(dev, ipvlan_set_lockdep_class_one, NULL);
+}
+
+static int ipvlan_init(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	const struct net_device *phy_dev = ipvlan->phy_dev;
+
+	dev->state = (dev->state & ~IPVLAN_STATE_MASK) |
+		     (phy_dev->state & IPVLAN_STATE_MASK);
+	dev->features = phy_dev->features & IPVLAN_FEATURES;
+	dev->features |= NETIF_F_LLTX;
+	dev->gso_max_size = phy_dev->gso_max_size;
+	dev->iflink = phy_dev->ifindex;
+	dev->hard_header_len = phy_dev->hard_header_len;
+
+	ipvlan_set_lockdep_class(dev);
+
+	ipvlan->pcpu_stats = alloc_percpu(struct ipvl_pcpu_stats);
+	if (!ipvlan->pcpu_stats)
+		return -ENOMEM;
+
+	return 0;
+}
+
+static void ipvlan_uninit(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port = ipvlan->port;
+
+	if (ipvlan->pcpu_stats)
+		free_percpu(ipvlan->pcpu_stats);
+
+	port->count -= 1;
+	if (!port->count)
+		ipvlan_port_destroy(port->dev);
+}
+
+static int ipvlan_open(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+	struct ipvl_addr *addr;
+
+	if (ipvlan->port->mode == IPVLAN_MODE_L3)
+		dev->flags |= IFF_NOARP;
+	else
+		dev->flags &= ~IFF_NOARP;
+
+	if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) {
+		list_for_each_entry(addr, &ipvlan->addrs, anode)
+			ipvlan_ht_addr_add(ipvlan, addr);
+	}
+	return dev_uc_add(phy_dev, phy_dev->dev_addr);
+}
+
+static int ipvlan_stop(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+	struct ipvl_addr *addr;
+
+	dev_uc_unsync(phy_dev, dev);
+	dev_mc_unsync(phy_dev, dev);
+
+	dev_uc_del(phy_dev, phy_dev->dev_addr);
+
+	if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) {
+		list_for_each_entry(addr, &ipvlan->addrs, anode)
+			ipvlan_ht_addr_del(addr, !dev->dismantle);
+	}
+	return 0;
+}
+
+netdev_tx_t ipvlan_start_xmit(struct sk_buff *skb, struct net_device *dev)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+	int skblen = skb->len;
+	int ret;
+
+	ret = ipvlan_queue_xmit(skb, dev);
+	if (likely(ret == NET_XMIT_SUCCESS || ret == NET_XMIT_CN)) {
+		struct ipvl_pcpu_stats *pcptr;
+
+		pcptr = this_cpu_ptr(ipvlan->pcpu_stats);
+
+		u64_stats_update_begin(&pcptr->syncp);
+		pcptr->tx_pkts++;
+		pcptr->tx_bytes += skblen;
+		u64_stats_update_end(&pcptr->syncp);
+	} else {
+		this_cpu_inc(ipvlan->pcpu_stats->tx_drps);
+	}
+	return ret;
+}
+
+static netdev_features_t ipvlan_fix_features(struct net_device *dev,
+					     netdev_features_t features)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	return features & (ipvlan->sfeatures | ~IPVLAN_FEATURES);
+}
+
+static void ipvlan_change_rx_flags(struct net_device *dev, int change)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+
+	if (change & IFF_ALLMULTI)
+		dev_set_allmulti(phy_dev, dev->flags & IFF_ALLMULTI? 1 : -1);
+}
+
+static void ipvlan_set_broadcast_mac_filter(struct ipvl_dev *ipvlan, bool set)
+{
+	struct net_device *dev = ipvlan->dev;
+	unsigned int hashbit = ipvlan_mac_hash(dev->broadcast);
+
+	if (set && !test_bit(hashbit, ipvlan->mac_filters))
+		__set_bit(hashbit, ipvlan->mac_filters);
+	else if (!set && test_bit(hashbit, ipvlan->mac_filters))
+		__clear_bit(hashbit, ipvlan->mac_filters);
+}
+
+static void ipvlan_set_multicast_mac_filter(struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (dev->flags & (IFF_PROMISC | IFF_ALLMULTI)) {
+		bitmap_fill(ipvlan->mac_filters, IPVLAN_MAC_FILTER_SIZE);
+	} else {
+		struct netdev_hw_addr *ha;
+		DECLARE_BITMAP(mc_filters, IPVLAN_MAC_FILTER_SIZE);
+
+		bitmap_zero(mc_filters, IPVLAN_MAC_FILTER_SIZE);
+		netdev_for_each_mc_addr(ha, dev)
+			__set_bit(ipvlan_mac_hash(ha->addr), mc_filters);
+
+		bitmap_copy(ipvlan->mac_filters, mc_filters,
+			    IPVLAN_MAC_FILTER_SIZE);
+	}
+	dev_uc_sync(ipvlan->phy_dev, dev);
+	dev_mc_sync(ipvlan->phy_dev, dev);
+}
+
+static struct rtnl_link_stats64 *ipvlan_get_stats64(struct net_device *dev,
+						    struct rtnl_link_stats64 *s)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (ipvlan->pcpu_stats) {
+		struct ipvl_pcpu_stats *pcptr;
+		u64 rx_pkts, rx_bytes, rx_mcast, tx_pkts, tx_bytes;
+		u32 rx_errs = 0, tx_drps = 0;
+		u32 strt;
+		int idx;
+
+		for_each_possible_cpu(idx) {
+			pcptr = per_cpu_ptr(ipvlan->pcpu_stats, idx);
+			do {
+				strt= u64_stats_fetch_begin_irq(&pcptr->syncp);
+				rx_pkts = pcptr->rx_pkts;
+				rx_bytes = pcptr->rx_bytes;
+				rx_mcast = pcptr->rx_mcast;
+				tx_pkts = pcptr->tx_pkts;
+				tx_bytes = pcptr->tx_bytes;
+			} while (u64_stats_fetch_retry_irq(&pcptr->syncp,
+							   strt));
+
+			s->rx_packets += rx_pkts;
+			s->rx_bytes += rx_bytes;
+			s->multicast += rx_mcast;
+			s->tx_packets += tx_pkts;
+			s->tx_bytes += tx_bytes;
+
+			/* u32 values are updated without syncp protection. */
+			rx_errs += pcptr->rx_errs;
+			tx_drps += pcptr->tx_drps;
+		}
+		s->rx_errors = rx_errs;
+		s->rx_dropped = rx_errs;
+		s->tx_dropped = tx_drps;
+	}
+	return s;
+}
+
+static int ipvlan_vlan_rx_add_vid(struct net_device *dev, __be16 proto, u16 vid)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+
+	return vlan_vid_add(phy_dev, proto, vid);
+}
+
+static int ipvlan_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
+				   u16 vid)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+
+	vlan_vid_del(phy_dev, proto, vid);
+	return 0;
+}
+
+static const struct net_device_ops ipvlan_netdev_ops = {
+	.ndo_init		= ipvlan_init,
+	.ndo_uninit		= ipvlan_uninit,
+	.ndo_open		= ipvlan_open,
+	.ndo_stop		= ipvlan_stop,
+	.ndo_start_xmit		= ipvlan_start_xmit,
+	.ndo_fix_features	= ipvlan_fix_features,
+	.ndo_change_rx_flags	= ipvlan_change_rx_flags,
+	.ndo_set_rx_mode	= ipvlan_set_multicast_mac_filter,
+	.ndo_get_stats64	= ipvlan_get_stats64,
+	.ndo_vlan_rx_add_vid	= ipvlan_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid	= ipvlan_vlan_rx_kill_vid,
+};
+
+static int ipvlan_hard_header(struct sk_buff *skb, struct net_device *dev,
+			      unsigned short type, const void *daddr,
+			      const void *saddr, unsigned len)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct net_device *phy_dev = ipvlan->phy_dev;
+
+	/* TODO Probably use a different field than dev_addr so that the
+	 * mac-address on the virtual device is portable and can be carried
+	 * while the packets use the mac-addr on the physical device.
+	 */
+	return dev_hard_header(skb, phy_dev, type, daddr,
+			       saddr ? : dev->dev_addr, len);
+}
+
+static const struct header_ops ipvlan_header_ops = {
+	.create  	= ipvlan_hard_header,
+	.rebuild	= eth_rebuild_header,
+	.parse		= eth_header_parse,
+	.cache		= eth_header_cache,
+	.cache_update	= eth_header_cache_update,
+};
+
+static int ipvlan_ethtool_get_settings(struct net_device *dev,
+				       struct ethtool_cmd *cmd)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	return __ethtool_get_settings(ipvlan->phy_dev, cmd);
+}
+
+static void ipvlan_ethtool_get_drvinfo(struct net_device *dev,
+				       struct ethtool_drvinfo *drvinfo)
+{
+	strlcpy(drvinfo->driver, IPVLAN_DRV, sizeof(drvinfo->driver));
+	strlcpy(drvinfo->version, IPV_DRV_VER, sizeof(drvinfo->version));
+}
+
+static u32 ipvlan_ethtool_get_msglevel(struct net_device *dev)
+{
+	const struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	return ipvlan->msg_enable;
+}
+
+static void ipvlan_ethtool_set_msglevel(struct net_device *dev, u32 value)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	ipvlan->msg_enable = value;
+}
+
+static const struct ethtool_ops ipvlan_ethtool_ops = {
+	.get_link	= ethtool_op_get_link,
+	.get_settings	= ipvlan_ethtool_get_settings,
+	.get_drvinfo	= ipvlan_ethtool_get_drvinfo,
+	.get_msglevel	= ipvlan_ethtool_get_msglevel,
+	.set_msglevel	= ipvlan_ethtool_set_msglevel,
+};
+
+static int ipvlan_nl_changelink(struct net_device *dev,
+				struct nlattr *tb[], struct nlattr *data[])
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
+
+	if (data && data[IFLA_IPVLAN_MODE]) {
+		u16 nmode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
+
+		ipvlan_set_port_mode(port, nmode);
+	}
+	return 0;
+}
+
+static size_t ipvlan_nl_getsize(const struct net_device *dev)
+{
+	return (0
+		+ nla_total_size(2) /* IFLA_IPVLAN_MODE */
+		);
+}
+
+static int ipvlan_nl_validate(struct nlattr *tb[], struct nlattr *data[])
+{
+	if (data && data[IFLA_IPVLAN_MODE]) {
+		u16 mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
+
+		if (mode < IPVLAN_MODE_L2 || mode >= IPVLAN_MODE_MAX)
+			return -EINVAL;
+	}
+	return 0;
+}
+
+static int ipvlan_nl_fillinfo(struct sk_buff *skb,
+			      const struct net_device *dev)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port = ipvlan_port_get_rtnl(ipvlan->phy_dev);
+	int ret = -EINVAL;
+
+	if (!port)
+		goto err;
+
+	ret = -EMSGSIZE;
+	if (nla_put_u16(skb, IFLA_IPVLAN_MODE, port->mode))
+		goto err;
+
+	return 0;
+
+err:
+	return ret;
+}
+
+static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
+			   struct nlattr *tb[], struct nlattr *data[])
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_port *port;
+	struct net_device *phy_dev;
+	int err;
+
+	if (!tb[IFLA_LINK])
+		return -EINVAL;
+
+	phy_dev = __dev_get_by_index(src_net, nla_get_u32(tb[IFLA_LINK]));
+	if (!phy_dev)
+		return -ENODEV;
+
+	if (ipvlan_dev_slave(phy_dev)) {
+		struct ipvl_dev *tmp = netdev_priv(phy_dev);
+
+		phy_dev = tmp->phy_dev;
+	} else if (!ipvlan_dev_master(phy_dev)) {
+		err = ipvlan_port_create(phy_dev);
+		if (err < 0)
+			return err;
+	}
+
+	port = ipvlan_port_get_rtnl(phy_dev);
+	if (data && data[IFLA_IPVLAN_MODE])
+		port->mode = nla_get_u16(data[IFLA_IPVLAN_MODE]);
+
+	ipvlan->phy_dev = phy_dev;
+	ipvlan->dev = dev;
+	ipvlan->port = port;
+	ipvlan->sfeatures = IPVLAN_FEATURES;
+	INIT_LIST_HEAD(&ipvlan->addrs);
+	ipvlan->ipv4cnt = 0;
+	ipvlan->ipv6cnt = 0;
+
+	/* TODO Probably put random address here to be presented to the
+	 * world but keep using the physical-dev address for the outgoing
+	 * packets.
+	 */
+	memcpy(dev->dev_addr, phy_dev->dev_addr, ETH_ALEN);
+
+	dev->priv_flags |= IFF_IPVLAN_SLAVE;
+
+	port->count += 1;
+	err = register_netdevice(dev);
+	if (err < 0)
+		goto ipvlan_destroy_port;
+
+	err = netdev_upper_dev_link(phy_dev, dev);
+	if (err)
+		goto ipvlan_destroy_port;
+
+	list_add_tail_rcu(&ipvlan->pnode, &port->ipvlans);
+	netif_stacked_transfer_operstate(phy_dev, dev);
+	return 0;
+
+ipvlan_destroy_port:
+	port->count -= 1;
+	if (!port->count)
+		ipvlan_port_destroy(phy_dev);
+
+	return err;
+}
+
+static void ipvlan_link_delete(struct net_device *dev, struct list_head *head)
+{
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct ipvl_addr *addr, *next;
+
+	if (ipvlan->ipv6cnt > 0 || ipvlan->ipv4cnt > 0) {
+		list_for_each_entry_safe(addr, next, &ipvlan->addrs, anode) {
+			ipvlan_ht_addr_del(addr, !dev->dismantle);
+			list_del_rcu(&addr->anode);
+		}
+	}
+	list_del_rcu(&ipvlan->pnode);
+	unregister_netdevice_queue(dev, head);
+	netdev_upper_dev_unlink(ipvlan->phy_dev, dev);
+}
+
+static void ipvlan_link_setup(struct net_device *dev)
+{
+	ether_setup(dev);
+
+	dev->priv_flags &= ~(IFF_XMIT_DST_RELEASE | IFF_TX_SKB_SHARING);
+	dev->priv_flags |= IFF_UNICAST_FLT;
+	dev->netdev_ops = &ipvlan_netdev_ops;
+	dev->destructor = free_netdev;
+	dev->header_ops = &ipvlan_header_ops;
+	dev->ethtool_ops = &ipvlan_ethtool_ops;
+	dev->tx_queue_len = 0;
+}
+
+static const struct nla_policy ipvlan_nl_policy[IFLA_IPVLAN_MAX + 1] =
+{
+	[IFLA_IPVLAN_MODE] = { .type = NLA_U16 },
+};
+
+static struct rtnl_link_ops ipvlan_link_ops = {
+	.kind		= "ipvlan",
+	.priv_size	= sizeof(struct ipvl_dev),
+
+	.get_size	= ipvlan_nl_getsize,
+	.policy		= ipvlan_nl_policy,
+	.validate	= ipvlan_nl_validate,
+	.fill_info	= ipvlan_nl_fillinfo,
+	.changelink	= ipvlan_nl_changelink,
+	.maxtype	= IFLA_IPVLAN_MAX,
+
+	.setup		= ipvlan_link_setup,
+	.newlink	= ipvlan_link_new,
+	.dellink	= ipvlan_link_delete,
+};
+
+int ipvlan_link_register(struct rtnl_link_ops *ops)
+{
+	return rtnl_link_register(ops);
+}
+
+static int ipvlan_device_event(struct notifier_block *unused,
+			       unsigned long event, void *ptr)
+{
+	struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+	struct ipvl_dev *ipvlan, *next;
+	struct ipvl_port *port;
+	LIST_HEAD(lst_kill);
+
+	if (!ipvlan_dev_master(dev))
+		return NOTIFY_DONE;
+
+	port = ipvlan_port_get_rtnl(dev);
+
+	switch (event) {
+	case NETDEV_CHANGE:
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+			netif_stacked_transfer_operstate(ipvlan->phy_dev,
+							 ipvlan->dev);
+		break;
+
+	case NETDEV_UNREGISTER:
+		if (dev->reg_state != NETREG_UNREGISTERING)
+			break;
+
+		list_for_each_entry_safe(ipvlan, next, &port->ipvlans,
+					 pnode)
+			ipvlan->dev->rtnl_link_ops->dellink(ipvlan->dev,
+							    &lst_kill);
+		unregister_netdevice_many(&lst_kill);
+		break;
+
+	case NETDEV_FEAT_CHANGE:
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode) {
+			ipvlan->dev->features = dev->features & IPVLAN_FEATURES;
+			ipvlan->dev->gso_max_size = dev->gso_max_size;
+			netdev_features_change(ipvlan->dev);
+		}
+		break;
+
+	case NETDEV_CHANGEMTU:
+		list_for_each_entry(ipvlan, &port->ipvlans, pnode)
+			ipvlan_adjust_mtu(ipvlan, dev);
+		break;
+
+	case NETDEV_PRE_TYPE_CHANGE:
+		/* Forbid underlying device to change its type. */
+		return NOTIFY_BAD;
+	}
+	return NOTIFY_DONE;
+}
+
+static int ipvlan_add_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+	struct ipvl_addr *addr;
+
+	if (ipvlan_addr_busy(ipvlan, ip6_addr, true)) {
+		netif_err(ipvlan, ifup, ipvlan->dev,
+			  "Failed to add IPv6=%pI6c addr for %s intf\n",
+			  ip6_addr, ipvlan->dev->name);
+		return -EINVAL;
+	}
+	addr = kzalloc(sizeof(struct ipvl_addr), GFP_ATOMIC);
+	if (!addr)
+		return -ENOMEM;
+
+	addr->master = ipvlan;
+	memcpy(&addr->ip6addr, ip6_addr, sizeof(struct in6_addr));
+	addr->atype = IPVL_IPV6;
+	list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
+	ipvlan->ipv6cnt++;
+	ipvlan_ht_addr_add(ipvlan, addr);
+
+	return 0;
+}
+
+static void ipvlan_del_addr6(struct ipvl_dev *ipvlan, struct in6_addr *ip6_addr)
+{
+	struct ipvl_addr *addr;
+
+	addr = ipvlan_ht_addr_lookup(ipvlan->port, ip6_addr, true);
+	if (!addr)
+		return;
+
+	ipvlan_ht_addr_del(addr, true);
+	list_del_rcu(&addr->anode);
+	ipvlan->ipv6cnt--;
+	WARN_ON(ipvlan->ipv6cnt < 0);
+	kfree_rcu(addr, rcu);
+
+	return;
+}
+
+static int ipvlan_addr6_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr)
+{
+	struct inet6_ifaddr *if6 = (struct inet6_ifaddr *)ptr;
+	struct net_device *dev = (struct net_device *)if6->idev->dev;
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+
+	if (!ipvlan_dev_slave(dev))
+		return NOTIFY_DONE;
+
+	if (!ipvlan || !ipvlan->port)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		if (ipvlan_add_addr6(ipvlan, &if6->addr))
+			return NOTIFY_BAD;
+		break;
+
+	case NETDEV_DOWN:
+		ipvlan_del_addr6(ipvlan, &if6->addr);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static int ipvlan_add_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
+{
+	struct ipvl_addr *addr;
+
+	if (ipvlan_addr_busy(ipvlan, ip4_addr, false)) {
+		netif_err(ipvlan, ifup, ipvlan->dev,
+			  "Failed to add IPv4=%pI4 on %s intf.\n",
+			  ip4_addr, ipvlan->dev->name);
+		return -EINVAL;
+	}
+	addr = kzalloc(sizeof(struct ipvl_addr), GFP_KERNEL);
+	if (!addr)
+		return -ENOMEM;
+
+	addr->master = ipvlan;
+	memcpy(&addr->ip4addr, ip4_addr, sizeof(struct in_addr));
+	addr->atype = IPVL_IPV4;
+	list_add_tail_rcu(&addr->anode, &ipvlan->addrs);
+	ipvlan->ipv4cnt++;
+	ipvlan_ht_addr_add(ipvlan, addr);
+	ipvlan_set_broadcast_mac_filter(ipvlan, true);
+
+	return 0;
+}
+
+static void ipvlan_del_addr4(struct ipvl_dev *ipvlan, struct in_addr *ip4_addr)
+{
+	struct ipvl_addr *addr;
+
+	addr = ipvlan_ht_addr_lookup(ipvlan->port, ip4_addr, false);
+	if (!addr)
+		return;
+
+	ipvlan_ht_addr_del(addr, true);
+	list_del_rcu(&addr->anode);
+	ipvlan->ipv4cnt--;
+	WARN_ON(ipvlan->ipv4cnt < 0);
+	if (!ipvlan->ipv4cnt)
+	    ipvlan_set_broadcast_mac_filter(ipvlan, false);
+	kfree_rcu(addr, rcu);
+
+	return;
+}
+
+static int ipvlan_addr4_event(struct notifier_block *unused,
+			      unsigned long event, void *ptr)
+{
+	struct in_ifaddr *if4 = (struct in_ifaddr *)ptr;
+	struct net_device *dev = (struct net_device *)if4->ifa_dev->dev;
+	struct ipvl_dev *ipvlan = netdev_priv(dev);
+	struct in_addr ip4_addr;
+
+	if (!ipvlan_dev_slave(dev))
+		return NOTIFY_DONE;
+
+	if (!ipvlan || !ipvlan->port)
+		return NOTIFY_DONE;
+
+	switch (event) {
+	case NETDEV_UP:
+		ip4_addr.s_addr = if4->ifa_address;
+		if (ipvlan_add_addr4(ipvlan, &ip4_addr))
+			return NOTIFY_BAD;
+		break;
+
+	case NETDEV_DOWN:
+		ip4_addr.s_addr = if4->ifa_address;
+		ipvlan_del_addr4(ipvlan, &ip4_addr);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block ipvlan_addr4_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_addr4_event,
+};
+
+static struct notifier_block ipvlan_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_device_event,
+};
+
+static struct notifier_block ipvlan_addr6_notifier_block __read_mostly = {
+	.notifier_call = ipvlan_addr6_event,
+};
+
+static int __init ipvlan_init_module(void)
+{
+	int err;
+
+	ipvlan_init_secret();
+	register_netdevice_notifier(&ipvlan_notifier_block);
+	register_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+	register_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+
+	err = ipvlan_link_register(&ipvlan_link_ops);
+	if (err < 0)
+		goto error;
+
+	return 0;
+error:
+	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+	unregister_netdevice_notifier(&ipvlan_notifier_block);
+	return err;
+}
+
+static void __exit ipvlan_cleanup_module(void)
+{
+	rtnl_link_unregister(&ipvlan_link_ops);
+	unregister_netdevice_notifier(&ipvlan_notifier_block);
+	unregister_inetaddr_notifier(&ipvlan_addr4_notifier_block);
+	unregister_inet6addr_notifier(&ipvlan_addr6_notifier_block);
+}
+
+module_init(ipvlan_init_module);
+module_exit(ipvlan_cleanup_module);
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Mahesh Bandewar <maheshb@google.com>");
+MODULE_DESCRIPTION("Driver for L3 (IPv6/IPv4) based VLANs");
+MODULE_ALIAS_RTNL_LINK("ipvlan");
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5cd508787572..2cb772495f7a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1230,6 +1230,8 @@ enum netdev_priv_flags {
 	IFF_LIVE_ADDR_CHANGE		= 1<<20,
 	IFF_MACVLAN			= 1<<21,
 	IFF_XMIT_DST_RELEASE_PERM	= 1<<22,
+	IFF_IPVLAN_MASTER		= 1<<23,
+	IFF_IPVLAN_SLAVE		= 1<<24,
 };
 
 #define IFF_802_1Q_VLAN			IFF_802_1Q_VLAN
@@ -1255,6 +1257,8 @@ enum netdev_priv_flags {
 #define IFF_LIVE_ADDR_CHANGE		IFF_LIVE_ADDR_CHANGE
 #define IFF_MACVLAN			IFF_MACVLAN
 #define IFF_XMIT_DST_RELEASE_PERM	IFF_XMIT_DST_RELEASE_PERM
+#define IFF_IPVLAN_MASTER		IFF_IPVLAN_MASTER
+#define IFF_IPVLAN_SLAVE		IFF_IPVLAN_SLAVE
 
 /**
  *	struct net_device - The DEVICE structure.
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 7072d8325016..36bddc233633 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -330,6 +330,21 @@ enum macvlan_macaddr_mode {
 
 #define MACVLAN_FLAG_NOPROMISC	1
 
+/* IPVLAN section */
+enum {
+	IFLA_IPVLAN_UNSPEC,
+	IFLA_IPVLAN_MODE,
+	__IFLA_IPVLAN_MAX
+};
+
+#define IFLA_IPVLAN_MAX (__IFLA_IPVLAN_MAX - 1)
+
+enum ipvlan_mode {
+	IPVLAN_MODE_L2 = 0,
+	IPVLAN_MODE_L3,
+	IPVLAN_MODE_MAX
+};
+
 /* VXLAN section */
 enum {
 	IFLA_VXLAN_UNSPEC,
-- 
cgit v1.2.3


From cb1410c71e0b6b2eba8c1890645a76ff86169d24 Mon Sep 17 00:00:00 2001
From: Weston Andros Adamson <dros@primarydata.com>
Date: Wed, 12 Nov 2014 12:08:00 -0500
Subject: NFS: fix subtle change in COMMIT behavior

Recent work in the pgio layer made it possible for there to be more than one
request per page. This caused a subtle change in commit behavior, because
write.c:nfs_commit_unstable_pages compares the number of *pages* waiting for
writeback against the number of requests on a commit list to choose when to
send a COMMIT in a non-blocking flush.

This is probably hard to hit in normal operation - you have to be using
rsize/wsize < PAGE_SIZE, or pnfs with lots of boundaries that are not page
aligned to have a noticeable change in behavior.

Signed-off-by: Weston Andros Adamson <dros@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/callback_proc.c |  2 +-
 fs/nfs/inode.c         |  8 ++++----
 fs/nfs/pagelist.c      | 11 ++++++++---
 fs/nfs/write.c         | 17 ++++++++++++-----
 include/linux/nfs_fs.h |  4 ++--
 5 files changed, 27 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index 73466b934090..e36a9d78ea49 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -49,7 +49,7 @@ __be32 nfs4_callback_getattr(struct cb_getattrargs *args,
 		goto out_iput;
 	res->size = i_size_read(inode);
 	res->change_attr = delegation->change_attr;
-	if (nfsi->npages != 0)
+	if (nfsi->nrequests != 0)
 		res->change_attr++;
 	res->ctime = inode->i_ctime;
 	res->mtime = inode->i_mtime;
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 00689a8a85e4..2b48ce58a584 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -1149,7 +1149,7 @@ static unsigned long nfs_wcc_update_inode(struct inode *inode, struct nfs_fattr
 	if ((fattr->valid & NFS_ATTR_FATTR_PRESIZE)
 			&& (fattr->valid & NFS_ATTR_FATTR_SIZE)
 			&& i_size_read(inode) == nfs_size_to_loff_t(fattr->pre_size)
-			&& nfsi->npages == 0) {
+			&& nfsi->nrequests == 0) {
 		i_size_write(inode, nfs_size_to_loff_t(fattr->size));
 		ret |= NFS_INO_INVALID_ATTR;
 	}
@@ -1192,7 +1192,7 @@ static int nfs_check_inode_attributes(struct inode *inode, struct nfs_fattr *fat
 	if (fattr->valid & NFS_ATTR_FATTR_SIZE) {
 		cur_size = i_size_read(inode);
 		new_isize = nfs_size_to_loff_t(fattr->size);
-		if (cur_size != new_isize && nfsi->npages == 0)
+		if (cur_size != new_isize && nfsi->nrequests == 0)
 			invalid |= NFS_INO_INVALID_ATTR|NFS_INO_REVAL_PAGECACHE;
 	}
 
@@ -1619,7 +1619,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr)
 		if (new_isize != cur_isize) {
 			/* Do we perhaps have any outstanding writes, or has
 			 * the file grown beyond our last write? */
-			if ((nfsi->npages == 0) || new_isize > cur_isize) {
+			if ((nfsi->nrequests == 0) || new_isize > cur_isize) {
 				i_size_write(inode, new_isize);
 				invalid |= NFS_INO_INVALID_ATTR|NFS_INO_INVALID_DATA;
 				invalid &= ~NFS_INO_REVAL_PAGECACHE;
@@ -1784,7 +1784,7 @@ static void init_once(void *foo)
 	INIT_LIST_HEAD(&nfsi->access_cache_entry_lru);
 	INIT_LIST_HEAD(&nfsi->access_cache_inode_lru);
 	INIT_LIST_HEAD(&nfsi->commit_info.list);
-	nfsi->npages = 0;
+	nfsi->nrequests = 0;
 	nfsi->commit_info.ncommit = 0;
 	atomic_set(&nfsi->commit_info.rpcs_out, 0);
 	atomic_set(&nfsi->silly_count, 1);
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index ed0db61f8543..2b5e769beb16 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -258,6 +258,7 @@ bool nfs_page_group_sync_on_bit(struct nfs_page *req, unsigned int bit)
 static inline void
 nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
 {
+	struct inode *inode;
 	WARN_ON_ONCE(prev == req);
 
 	if (!prev) {
@@ -276,12 +277,16 @@ nfs_page_group_init(struct nfs_page *req, struct nfs_page *prev)
 		 * nfs_page_group_destroy is called */
 		kref_get(&req->wb_head->wb_kref);
 
-		/* grab extra ref if head request has extra ref from
-		 * the write/commit path to handle handoff between write
-		 * and commit lists */
+		/* grab extra ref and bump the request count if head request
+		 * has extra ref from the write/commit path to handle handoff
+		 * between write and commit lists. */
 		if (test_bit(PG_INODE_REF, &prev->wb_head->wb_flags)) {
+			inode = page_file_mapping(req->wb_page)->host;
 			set_bit(PG_INODE_REF, &req->wb_flags);
 			kref_get(&req->wb_kref);
+			spin_lock(&inode->i_lock);
+			NFS_I(inode)->nrequests++;
+			spin_unlock(&inode->i_lock);
 		}
 	}
 }
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index f83b02dc9166..d489ff3f438f 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -670,7 +670,8 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 	nfs_lock_request(req);
 
 	spin_lock(&inode->i_lock);
-	if (!nfsi->npages && NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
+	if (!nfsi->nrequests &&
+	    NFS_PROTO(inode)->have_delegation(inode, FMODE_WRITE))
 		inode->i_version++;
 	/*
 	 * Swap-space should not get truncated. Hence no need to plug the race
@@ -681,9 +682,11 @@ static void nfs_inode_add_request(struct inode *inode, struct nfs_page *req)
 		SetPagePrivate(req->wb_page);
 		set_page_private(req->wb_page, (unsigned long)req);
 	}
-	nfsi->npages++;
+	nfsi->nrequests++;
 	/* this a head request for a page group - mark it as having an
-	 * extra reference so sub groups can follow suit */
+	 * extra reference so sub groups can follow suit.
+	 * This flag also informs pgio layer when to bump nrequests when
+	 * adding subrequests. */
 	WARN_ON(test_and_set_bit(PG_INODE_REF, &req->wb_flags));
 	kref_get(&req->wb_kref);
 	spin_unlock(&inode->i_lock);
@@ -709,7 +712,11 @@ static void nfs_inode_remove_request(struct nfs_page *req)
 			wake_up_page(head->wb_page, PG_private);
 			clear_bit(PG_MAPPED, &head->wb_flags);
 		}
-		nfsi->npages--;
+		nfsi->nrequests--;
+		spin_unlock(&inode->i_lock);
+	} else {
+		spin_lock(&inode->i_lock);
+		nfsi->nrequests--;
 		spin_unlock(&inode->i_lock);
 	}
 
@@ -1735,7 +1742,7 @@ static int nfs_commit_unstable_pages(struct inode *inode, struct writeback_contr
 		/* Don't commit yet if this is a non-blocking flush and there
 		 * are a lot of outstanding writes for this mapping.
 		 */
-		if (nfsi->commit_info.ncommit <= (nfsi->npages >> 1))
+		if (nfsi->commit_info.ncommit <= (nfsi->nrequests >> 1))
 			goto out_mark_dirty;
 
 		/* don't wait for the COMMIT response */
diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h
index c72d1ad41ad4..6d627b92df53 100644
--- a/include/linux/nfs_fs.h
+++ b/include/linux/nfs_fs.h
@@ -163,7 +163,7 @@ struct nfs_inode {
 	 */
 	__be32			cookieverf[2];
 
-	unsigned long		npages;
+	unsigned long		nrequests;
 	struct nfs_mds_commit_info commit_info;
 
 	/* Open contexts for shared mmap writes */
@@ -520,7 +520,7 @@ extern void nfs_commit_free(struct nfs_commit_data *data);
 static inline int
 nfs_have_writebacks(struct inode *inode)
 {
-	return NFS_I(inode)->npages != 0;
+	return NFS_I(inode)->nrequests != 0;
 }
 
 /*
-- 
cgit v1.2.3


From 10b89567db51e143c2f0828839332502916d012d Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 17 Nov 2014 16:58:03 -0500
Subject: lockd: eliminate LOCKD_DEBUG

LOCKD_DEBUG is always the same value as CONFIG_SUNRPC_DEBUG, so we can
just use it instead.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/lockd/svclock.c          | 2 +-
 include/linux/lockd/debug.h | 6 +-----
 2 files changed, 2 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/lockd/svclock.c b/fs/lockd/svclock.c
index 13db95f54176..56598742dde4 100644
--- a/fs/lockd/svclock.c
+++ b/fs/lockd/svclock.c
@@ -53,7 +53,7 @@ static const struct rpc_call_ops nlmsvc_grant_ops;
 static LIST_HEAD(nlm_blocked);
 static DEFINE_SPINLOCK(nlm_blocked_lock);
 
-#ifdef LOCKD_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static const char *nlmdbg_cookie2a(const struct nlm_cookie *cookie)
 {
 	/*
diff --git a/include/linux/lockd/debug.h b/include/linux/lockd/debug.h
index 257d3779f2ab..0ca8109934e4 100644
--- a/include/linux/lockd/debug.h
+++ b/include/linux/lockd/debug.h
@@ -17,12 +17,8 @@
  * Enable lockd debugging.
  * Requires RPC_DEBUG.
  */
-#ifdef RPC_DEBUG
-# define LOCKD_DEBUG		1
-#endif
-
 #undef ifdebug
-#if defined(RPC_DEBUG) && defined(LOCKD_DEBUG)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define ifdebug(flag)		if (unlikely(nlm_debug & NLMDBG_##flag))
 #else
 # define ifdebug(flag)		if (0)
-- 
cgit v1.2.3


From b53a2340d0d30468b7315992ba77fe188c3bc5c8 Mon Sep 17 00:00:00 2001
From: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Date: Tue, 28 Oct 2014 22:33:53 +0200
Subject: of/reconfig: Add of_reconfig_get_state_change() of notifier helper.

Introduce of_reconfig_get_state_change() which allows an of notifier
to query about device state changes.

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/dynamic.c | 96 ++++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of.h   |  7 ++++
 2 files changed, 103 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index 661ad2f5c00c..6659c39ab3aa 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -85,6 +85,102 @@ int of_reconfig_notify(unsigned long action, void *p)
 	return notifier_to_errno(rc);
 }
 
+/*
+ * of_reconfig_get_state_change()	- Returns new state of device
+ * @action	- action of the of notifier
+ * @arg		- argument of the of notifier
+ *
+ * Returns the new state of a device based on the notifier used.
+ * Returns 0 on device going from enabled to disabled, 1 on device
+ * going from disabled to enabled and -1 on no change.
+ */
+int of_reconfig_get_state_change(unsigned long action, void *arg)
+{
+	struct device_node *dn;
+	struct property *prop, *old_prop;
+	struct of_prop_reconfig *pr;
+	int is_status, status_state, old_status_state, prev_state, new_state;
+
+	/* figure out if a device should be created or destroyed */
+	dn = NULL;
+	prop = old_prop = NULL;
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+	case OF_RECONFIG_DETACH_NODE:
+		dn = arg;
+		prop = of_find_property(dn, "status", NULL);
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		pr = arg;
+		dn = pr->dn;
+		prop = pr->prop;
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		pr = arg;
+		dn = pr->dn;
+		prop = pr->prop;
+		old_prop = pr->old_prop;
+		break;
+	default:
+		return OF_RECONFIG_NO_CHANGE;
+	}
+
+	is_status = 0;
+	status_state = -1;
+	old_status_state = -1;
+	prev_state = -1;
+	new_state = -1;
+
+	if (prop && !strcmp(prop->name, "status")) {
+		is_status = 1;
+		status_state = !strcmp(prop->value, "okay") ||
+			       !strcmp(prop->value, "ok");
+		if (old_prop)
+			old_status_state = !strcmp(old_prop->value, "okay") ||
+					   !strcmp(old_prop->value, "ok");
+	}
+
+	switch (action) {
+	case OF_RECONFIG_ATTACH_NODE:
+		prev_state = 0;
+		/* -1 & 0 status either missing or okay */
+		new_state = status_state != 0;
+		break;
+	case OF_RECONFIG_DETACH_NODE:
+		/* -1 & 0 status either missing or okay */
+		prev_state = status_state != 0;
+		new_state = 0;
+		break;
+	case OF_RECONFIG_ADD_PROPERTY:
+		if (is_status) {
+			/* no status property -> enabled (legacy) */
+			prev_state = 1;
+			new_state = status_state;
+		}
+		break;
+	case OF_RECONFIG_REMOVE_PROPERTY:
+		if (is_status) {
+			prev_state = status_state;
+			/* no status property -> enabled (legacy) */
+			new_state = 1;
+		}
+		break;
+	case OF_RECONFIG_UPDATE_PROPERTY:
+		if (is_status) {
+			prev_state = old_status_state != 0;
+			new_state = status_state != 0;
+		}
+		break;
+	}
+
+	if (prev_state == new_state)
+		return OF_RECONFIG_NO_CHANGE;
+
+	return new_state ? OF_RECONFIG_CHANGE_ADD : OF_RECONFIG_CHANGE_REMOVE;
+}
+EXPORT_SYMBOL_GPL(of_reconfig_get_state_change);
+
 int of_property_notify(int action, struct device_node *np,
 		       struct property *prop, struct property *oldprop)
 {
diff --git a/include/linux/of.h b/include/linux/of.h
index 27635c89d8c2..8e3a264c869c 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -327,6 +327,7 @@ struct of_prop_reconfig {
 extern int of_reconfig_notifier_register(struct notifier_block *);
 extern int of_reconfig_notifier_unregister(struct notifier_block *);
 extern int of_reconfig_notify(unsigned long, void *);
+extern int of_reconfig_get_state_change(unsigned long action, void *arg);
 
 extern int of_attach_node(struct device_node *);
 extern int of_detach_node(struct device_node *);
@@ -887,6 +888,12 @@ struct of_changeset {
 	struct list_head entries;
 };
 
+enum of_reconfig_change {
+	OF_RECONFIG_NO_CHANGE = 0,
+	OF_RECONFIG_CHANGE_ADD,
+	OF_RECONFIG_CHANGE_REMOVE,
+};
+
 #ifdef CONFIG_OF_DYNAMIC
 extern void of_changeset_init(struct of_changeset *ocs);
 extern void of_changeset_destroy(struct of_changeset *ocs);
-- 
cgit v1.2.3


From f6892d193fb9d694a1b50550649ba6e82f8abec1 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 21 Nov 2014 15:14:58 +0000
Subject: of/reconfig: Add empty stubs for the of_reconfig methods

To simplify subsystem setup code, make empty stubs for the
of_reconfig_*() methods. This is particularly useful for registering and
unregistering the notifiers so that it doesn't impact the flow of the
subsystem setup code.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 include/linux/of.h | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/of.h b/include/linux/of.h
index 8e3a264c869c..b59ee21933a4 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -324,11 +324,6 @@ struct of_prop_reconfig {
 	struct property		*old_prop;
 };
 
-extern int of_reconfig_notifier_register(struct notifier_block *);
-extern int of_reconfig_notifier_unregister(struct notifier_block *);
-extern int of_reconfig_notify(unsigned long, void *);
-extern int of_reconfig_get_state_change(unsigned long action, void *arg);
-
 extern int of_attach_node(struct device_node *);
 extern int of_detach_node(struct device_node *);
 
@@ -895,6 +890,11 @@ enum of_reconfig_change {
 };
 
 #ifdef CONFIG_OF_DYNAMIC
+extern int of_reconfig_notifier_register(struct notifier_block *);
+extern int of_reconfig_notifier_unregister(struct notifier_block *);
+extern int of_reconfig_notify(unsigned long, void *);
+extern int of_reconfig_get_state_change(unsigned long action, void *arg);
+
 extern void of_changeset_init(struct of_changeset *ocs);
 extern void of_changeset_destroy(struct of_changeset *ocs);
 extern int of_changeset_apply(struct of_changeset *ocs);
@@ -932,7 +932,24 @@ static inline int of_changeset_update_property(struct of_changeset *ocs,
 {
 	return of_changeset_action(ocs, OF_RECONFIG_UPDATE_PROPERTY, np, prop);
 }
-#endif
+#else /* CONFIG_OF_DYNAMIC */
+static inline int of_reconfig_notifier_register(struct notifier_block *nb)
+{
+	return -EINVAL;
+}
+static inline int of_reconfig_notifier_unregister(struct notifier_block *nb)
+{
+	return -EINVAL;
+}
+static inline int of_reconfig_notify(unsigned long action, void *arg)
+{
+	return -EINVAL;
+}
+static inline int of_reconfig_get_state_change(unsigned long action, void *arg)
+{
+	return -EINVAL;
+}
+#endif /* CONFIG_OF_DYNAMIC */
 
 /* CONFIG_OF_RESOLVE api */
 extern int of_resolve_phandles(struct device_node *tree);
-- 
cgit v1.2.3


From f5242e5a883bf1c1aba6bfd87b85e7dda0e62191 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Mon, 24 Nov 2014 17:58:01 +0000
Subject: of/reconfig: Always use the same structure for notifiers

The OF_RECONFIG notifier callback uses a different structure depending
on whether it is a node change or a property change. This is silly, and
not very safe. Rework the code to use the same data structure regardless
of the type of notifier.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rob Herring <robh+dt@kernel.org>
Cc: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Cc: <linuxppc-dev@lists.ozlabs.org>
---
 arch/powerpc/mm/numa.c                          |  3 +-
 arch/powerpc/platforms/pseries/hotplug-cpu.c    |  7 +++--
 arch/powerpc/platforms/pseries/hotplug-memory.c | 15 +++++----
 arch/powerpc/platforms/pseries/iommu.c          |  5 +--
 arch/powerpc/platforms/pseries/setup.c          |  5 +--
 drivers/crypto/nx/nx-842.c                      |  4 +--
 drivers/of/dynamic.c                            | 41 +++++++++++++------------
 include/linux/of.h                              | 23 ++++++++------
 8 files changed, 54 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/arch/powerpc/mm/numa.c b/arch/powerpc/mm/numa.c
index b9d1dfdbe5bb..9fe6002c1d5a 100644
--- a/arch/powerpc/mm/numa.c
+++ b/arch/powerpc/mm/numa.c
@@ -1711,12 +1711,11 @@ static void stage_topology_update(int core_id)
 static int dt_update_callback(struct notifier_block *nb,
 				unsigned long action, void *data)
 {
-	struct of_prop_reconfig *update;
+	struct of_reconfig_data *update = data;
 	int rc = NOTIFY_DONE;
 
 	switch (action) {
 	case OF_RECONFIG_UPDATE_PROPERTY:
-		update = (struct of_prop_reconfig *)data;
 		if (!of_prop_cmp(update->dn->type, "cpu") &&
 		    !of_prop_cmp(update->prop->name, "ibm,associativity")) {
 			u32 core_id;
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index 5c375f93c669..f30cf4d136a4 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -340,16 +340,17 @@ static void pseries_remove_processor(struct device_node *np)
 }
 
 static int pseries_smp_notifier(struct notifier_block *nb,
-				unsigned long action, void *node)
+				unsigned long action, void *data)
 {
+	struct of_reconfig_data *rd = data;
 	int err = 0;
 
 	switch (action) {
 	case OF_RECONFIG_ATTACH_NODE:
-		err = pseries_add_processor(node);
+		err = pseries_add_processor(rd->dn);
 		break;
 	case OF_RECONFIG_DETACH_NODE:
-		pseries_remove_processor(node);
+		pseries_remove_processor(rd->dn);
 		break;
 	}
 	return notifier_from_errno(err);
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 3c4c0dcd90d3..1bbb78fab530 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -183,7 +183,7 @@ static int pseries_add_mem_node(struct device_node *np)
 	return (ret < 0) ? -EINVAL : 0;
 }
 
-static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
+static int pseries_update_drconf_memory(struct of_reconfig_data *pr)
 {
 	struct of_drconf_cell *new_drmem, *old_drmem;
 	unsigned long memblock_size;
@@ -232,22 +232,21 @@ static int pseries_update_drconf_memory(struct of_prop_reconfig *pr)
 }
 
 static int pseries_memory_notifier(struct notifier_block *nb,
-				   unsigned long action, void *node)
+				   unsigned long action, void *data)
 {
-	struct of_prop_reconfig *pr;
+	struct of_reconfig_data *rd = data;
 	int err = 0;
 
 	switch (action) {
 	case OF_RECONFIG_ATTACH_NODE:
-		err = pseries_add_mem_node(node);
+		err = pseries_add_mem_node(rd->dn);
 		break;
 	case OF_RECONFIG_DETACH_NODE:
-		err = pseries_remove_mem_node(node);
+		err = pseries_remove_mem_node(rd->dn);
 		break;
 	case OF_RECONFIG_UPDATE_PROPERTY:
-		pr = (struct of_prop_reconfig *)node;
-		if (!strcmp(pr->prop->name, "ibm,dynamic-memory"))
-			err = pseries_update_drconf_memory(pr);
+		if (!strcmp(rd->prop->name, "ibm,dynamic-memory"))
+			err = pseries_update_drconf_memory(rd);
 		break;
 	}
 	return notifier_from_errno(err);
diff --git a/arch/powerpc/platforms/pseries/iommu.c b/arch/powerpc/platforms/pseries/iommu.c
index e32e00976a94..3e5bfdafee63 100644
--- a/arch/powerpc/platforms/pseries/iommu.c
+++ b/arch/powerpc/platforms/pseries/iommu.c
@@ -1251,10 +1251,11 @@ static struct notifier_block iommu_mem_nb = {
 	.notifier_call = iommu_mem_notifier,
 };
 
-static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
+static int iommu_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
 {
 	int err = NOTIFY_OK;
-	struct device_node *np = node;
+	struct of_reconfig_data *rd = data;
+	struct device_node *np = rd->dn;
 	struct pci_dn *pci = PCI_DN(np);
 	struct direct_window *window;
 
diff --git a/arch/powerpc/platforms/pseries/setup.c b/arch/powerpc/platforms/pseries/setup.c
index 125c589eeef5..ed8a90022a3d 100644
--- a/arch/powerpc/platforms/pseries/setup.c
+++ b/arch/powerpc/platforms/pseries/setup.c
@@ -251,9 +251,10 @@ static void __init pseries_discover_pic(void)
 	       " interrupt-controller\n");
 }
 
-static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *node)
+static int pci_dn_reconfig_notifier(struct notifier_block *nb, unsigned long action, void *data)
 {
-	struct device_node *np = node;
+	struct of_reconfig_data *rd = data;
+	struct device_node *np = rd->dn;
 	struct pci_dn *pci = NULL;
 	int err = NOTIFY_OK;
 
diff --git a/drivers/crypto/nx/nx-842.c b/drivers/crypto/nx/nx-842.c
index 061407d59520..887196e9b50c 100644
--- a/drivers/crypto/nx/nx-842.c
+++ b/drivers/crypto/nx/nx-842.c
@@ -1009,9 +1009,9 @@ error_out:
  *		notifier_to_errno() to decode this value
  */
 static int nx842_OF_notifier(struct notifier_block *np, unsigned long action,
-			     void *update)
+			     void *data)
 {
-	struct of_prop_reconfig *upd = update;
+	struct of_reconfig_data *upd = data;
 	struct nx842_devdata *local_devdata;
 	struct device_node *node = NULL;
 
diff --git a/drivers/of/dynamic.c b/drivers/of/dynamic.c
index cc106529dca8..3351ef408125 100644
--- a/drivers/of/dynamic.c
+++ b/drivers/of/dynamic.c
@@ -87,18 +87,17 @@ const char *action_names[] = {
 };
 #endif
 
-int of_reconfig_notify(unsigned long action, void *p)
+int of_reconfig_notify(unsigned long action, struct of_reconfig_data *p)
 {
 	int rc;
 #ifdef DEBUG
-	struct device_node *dn = p;
-	struct of_prop_reconfig *pr = p;
+	struct of_reconfig_data *pr = p;
 
 	switch (action) {
 	case OF_RECONFIG_ATTACH_NODE:
 	case OF_RECONFIG_DETACH_NODE:
 		pr_debug("of/notify %-15s %s\n", action_names[action],
-			dn->full_name);
+			pr->dn->full_name);
 		break;
 	case OF_RECONFIG_ADD_PROPERTY:
 	case OF_RECONFIG_REMOVE_PROPERTY:
@@ -122,31 +121,22 @@ int of_reconfig_notify(unsigned long action, void *p)
  * Returns 0 on device going from enabled to disabled, 1 on device
  * going from disabled to enabled and -1 on no change.
  */
-int of_reconfig_get_state_change(unsigned long action, void *arg)
+int of_reconfig_get_state_change(unsigned long action, struct of_reconfig_data *pr)
 {
-	struct device_node *dn;
-	struct property *prop, *old_prop;
-	struct of_prop_reconfig *pr;
+	struct property *prop, *old_prop = NULL;
 	int is_status, status_state, old_status_state, prev_state, new_state;
 
 	/* figure out if a device should be created or destroyed */
-	dn = NULL;
-	prop = old_prop = NULL;
 	switch (action) {
 	case OF_RECONFIG_ATTACH_NODE:
 	case OF_RECONFIG_DETACH_NODE:
-		dn = arg;
-		prop = of_find_property(dn, "status", NULL);
+		prop = of_find_property(pr->dn, "status", NULL);
 		break;
 	case OF_RECONFIG_ADD_PROPERTY:
 	case OF_RECONFIG_REMOVE_PROPERTY:
-		pr = arg;
-		dn = pr->dn;
 		prop = pr->prop;
 		break;
 	case OF_RECONFIG_UPDATE_PROPERTY:
-		pr = arg;
-		dn = pr->dn;
 		prop = pr->prop;
 		old_prop = pr->old_prop;
 		break;
@@ -212,7 +202,7 @@ EXPORT_SYMBOL_GPL(of_reconfig_get_state_change);
 int of_property_notify(int action, struct device_node *np,
 		       struct property *prop, struct property *oldprop)
 {
-	struct of_prop_reconfig pr;
+	struct of_reconfig_data pr;
 
 	/* only call notifiers if the node is attached */
 	if (!of_node_is_attached(np))
@@ -250,8 +240,12 @@ void __of_attach_node(struct device_node *np)
  */
 int of_attach_node(struct device_node *np)
 {
+	struct of_reconfig_data rd;
 	unsigned long flags;
 
+	memset(&rd, 0, sizeof(rd));
+	rd.dn = np;
+
 	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_attach_node(np);
@@ -260,7 +254,7 @@ int of_attach_node(struct device_node *np)
 	__of_attach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
 
-	of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, np);
+	of_reconfig_notify(OF_RECONFIG_ATTACH_NODE, &rd);
 
 	return 0;
 }
@@ -298,9 +292,13 @@ void __of_detach_node(struct device_node *np)
  */
 int of_detach_node(struct device_node *np)
 {
+	struct of_reconfig_data rd;
 	unsigned long flags;
 	int rc = 0;
 
+	memset(&rd, 0, sizeof(rd));
+	rd.dn = np;
+
 	mutex_lock(&of_mutex);
 	raw_spin_lock_irqsave(&devtree_lock, flags);
 	__of_detach_node(np);
@@ -309,7 +307,7 @@ int of_detach_node(struct device_node *np)
 	__of_detach_node_sysfs(np);
 	mutex_unlock(&of_mutex);
 
-	of_reconfig_notify(OF_RECONFIG_DETACH_NODE, np);
+	of_reconfig_notify(OF_RECONFIG_DETACH_NODE, &rd);
 
 	return rc;
 }
@@ -505,6 +503,7 @@ static void __of_changeset_entry_invert(struct of_changeset_entry *ce,
 
 static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool revert)
 {
+	struct of_reconfig_data rd;
 	struct of_changeset_entry ce_inverted;
 	int ret;
 
@@ -516,7 +515,9 @@ static void __of_changeset_entry_notify(struct of_changeset_entry *ce, bool reve
 	switch (ce->action) {
 	case OF_RECONFIG_ATTACH_NODE:
 	case OF_RECONFIG_DETACH_NODE:
-		ret = of_reconfig_notify(ce->action, ce->np);
+		memset(&rd, 0, sizeof(rd));
+		rd.dn = ce->np;
+		ret = of_reconfig_notify(ce->action, &rd);
 		break;
 	case OF_RECONFIG_ADD_PROPERTY:
 	case OF_RECONFIG_REMOVE_PROPERTY:
diff --git a/include/linux/of.h b/include/linux/of.h
index b59ee21933a4..fe1dec87fd68 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -73,6 +73,12 @@ struct of_phandle_args {
 	uint32_t args[MAX_PHANDLE_ARGS];
 };
 
+struct of_reconfig_data {
+	struct device_node	*dn;
+	struct property		*prop;
+	struct property		*old_prop;
+};
+
 /* initialize a node */
 extern struct kobj_type of_node_ktype;
 static inline void of_node_init(struct device_node *node)
@@ -318,12 +324,6 @@ extern int of_update_property(struct device_node *np, struct property *newprop);
 #define OF_RECONFIG_REMOVE_PROPERTY	0x0004
 #define OF_RECONFIG_UPDATE_PROPERTY	0x0005
 
-struct of_prop_reconfig {
-	struct device_node	*dn;
-	struct property		*prop;
-	struct property		*old_prop;
-};
-
 extern int of_attach_node(struct device_node *);
 extern int of_detach_node(struct device_node *);
 
@@ -892,8 +892,9 @@ enum of_reconfig_change {
 #ifdef CONFIG_OF_DYNAMIC
 extern int of_reconfig_notifier_register(struct notifier_block *);
 extern int of_reconfig_notifier_unregister(struct notifier_block *);
-extern int of_reconfig_notify(unsigned long, void *);
-extern int of_reconfig_get_state_change(unsigned long action, void *arg);
+extern int of_reconfig_notify(unsigned long, struct of_reconfig_data *rd);
+extern int of_reconfig_get_state_change(unsigned long action,
+					struct of_reconfig_data *arg);
 
 extern void of_changeset_init(struct of_changeset *ocs);
 extern void of_changeset_destroy(struct of_changeset *ocs);
@@ -941,11 +942,13 @@ static inline int of_reconfig_notifier_unregister(struct notifier_block *nb)
 {
 	return -EINVAL;
 }
-static inline int of_reconfig_notify(unsigned long action, void *arg)
+static inline int of_reconfig_notify(unsigned long action,
+				     struct of_reconfig_data *arg)
 {
 	return -EINVAL;
 }
-static inline int of_reconfig_get_state_change(unsigned long action, void *arg)
+static inline int of_reconfig_get_state_change(unsigned long action,
+						struct of_reconfig_data *arg)
 {
 	return -EINVAL;
 }
-- 
cgit v1.2.3


From 801d728c10db4b28e01590b46bf1f0df930760cc Mon Sep 17 00:00:00 2001
From: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Date: Tue, 28 Oct 2014 22:36:01 +0200
Subject: of/reconfig: Add OF_DYNAMIC notifier for platform_bus_type

Add OF notifier handler needed for creating/destroying platform devices
according to dynamic runtime changes in the DT live tree.

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/base/platform.c     |  1 +
 drivers/of/platform.c       | 55 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/of_platform.h |  6 +++++
 3 files changed, 62 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index b2afc29403f9..233ececd15a3 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -1006,6 +1006,7 @@ int __init platform_bus_init(void)
 	error =  bus_register(&platform_bus_type);
 	if (error)
 		device_unregister(&platform_bus);
+	of_platform_register_reconfig_notifier();
 	return error;
 }
 
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 656cccf0e680..cd87a36495be 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -550,4 +550,59 @@ void of_platform_depopulate(struct device *parent)
 }
 EXPORT_SYMBOL_GPL(of_platform_depopulate);
 
+#ifdef CONFIG_OF_DYNAMIC
+static int of_platform_notify(struct notifier_block *nb,
+				unsigned long action, void *arg)
+{
+	struct of_reconfig_data *rd = arg;
+	struct platform_device *pdev_parent, *pdev;
+	bool children_left;
+
+	switch (of_reconfig_get_state_change(action, rd)) {
+	case OF_RECONFIG_CHANGE_ADD:
+		/* verify that the parent is a bus */
+		if (!of_node_check_flag(rd->dn->parent, OF_POPULATED_BUS))
+			return NOTIFY_OK;	/* not for us */
+
+		/* pdev_parent may be NULL when no bus platform device */
+		pdev_parent = of_find_device_by_node(rd->dn->parent);
+		pdev = of_platform_device_create(rd->dn, NULL,
+				pdev_parent ? &pdev_parent->dev : NULL);
+		of_dev_put(pdev_parent);
+
+		if (pdev == NULL) {
+			pr_err("%s: failed to create for '%s'\n",
+					__func__, rd->dn->full_name);
+			/* of_platform_device_create tosses the error code */
+			return notifier_from_errno(-EINVAL);
+		}
+		break;
+
+	case OF_RECONFIG_CHANGE_REMOVE:
+		/* find our device by node */
+		pdev = of_find_device_by_node(rd->dn);
+		if (pdev == NULL)
+			return NOTIFY_OK;	/* no? not meant for us */
+
+		/* unregister takes one ref away */
+		of_platform_device_destroy(&pdev->dev, &children_left);
+
+		/* and put the reference of the find */
+		of_dev_put(pdev);
+		break;
+	}
+
+	return NOTIFY_OK;
+}
+
+static struct notifier_block platform_of_notifier = {
+	.notifier_call = of_platform_notify,
+};
+
+void of_platform_register_reconfig_notifier(void)
+{
+	WARN_ON(of_reconfig_notifier_register(&platform_of_notifier));
+}
+#endif /* CONFIG_OF_DYNAMIC */
+
 #endif /* CONFIG_OF_ADDRESS */
diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h
index c2b0627a2317..8a860f096c35 100644
--- a/include/linux/of_platform.h
+++ b/include/linux/of_platform.h
@@ -84,4 +84,10 @@ static inline int of_platform_populate(struct device_node *root,
 static inline void of_platform_depopulate(struct device *parent) { }
 #endif
 
+#ifdef CONFIG_OF_DYNAMIC
+extern void of_platform_register_reconfig_notifier(void);
+#else
+static inline void of_platform_register_reconfig_notifier(void) { }
+#endif
+
 #endif	/* _LINUX_OF_PLATFORM_H */
-- 
cgit v1.2.3


From 7518b5890d8ac366faa2326ce2356ef6392ce63d Mon Sep 17 00:00:00 2001
From: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Date: Tue, 28 Oct 2014 22:35:58 +0200
Subject: of/overlay: Introduce DT overlay support

Overlays are a method to dynamically modify part of the kernel's
device tree with dynamically loaded data. Add the core functionality to
parse, apply and remove an overlay changeset. The core functionality
takes care of managing the overlay data format and performing the add
and remove. Drivers are expected to use the overlay functionality to
support custom expansion busses commonly found on consumer development
boards like the BeagleBone or Raspberry Pi.

The overlay code uses CONFIG_OF_DYNAMIC changesets to perform the low
level work of modifying the devicetree.

Documentation about internal and APIs is provided in
	Documentation/devicetree/overlay-notes.txt

v2:
- Switch from __of_node_alloc() to __of_node_dup()
- Documentation fixups
- Remove 2-pass processing of properties
- Remove separate ov_lock; just use the DT mutex.
v1:
- Drop delete capability using '-' prefix. The '-' prefixed names
are valid properties and nodes and there is no need for it just yet.
- Do not update special properties - name & phandle ones.
- Change order of node attachment, so that the special property update
works.

Signed-off-by: Pantelis Antoniou <pantelis.antoniou@konsulko.com>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 Documentation/devicetree/overlay-notes.txt | 133 +++++++
 drivers/of/Kconfig                         |   7 +
 drivers/of/Makefile                        |   1 +
 drivers/of/overlay.c                       | 562 +++++++++++++++++++++++++++++
 include/linux/of.h                         |  31 ++
 5 files changed, 734 insertions(+)
 create mode 100644 Documentation/devicetree/overlay-notes.txt
 create mode 100644 drivers/of/overlay.c

(limited to 'include/linux')

diff --git a/Documentation/devicetree/overlay-notes.txt b/Documentation/devicetree/overlay-notes.txt
new file mode 100644
index 000000000000..30ae758e3eef
--- /dev/null
+++ b/Documentation/devicetree/overlay-notes.txt
@@ -0,0 +1,133 @@
+Device Tree Overlay Notes
+-------------------------
+
+This document describes the implementation of the in-kernel
+device tree overlay functionality residing in drivers/of/overlay.c and is a
+companion document to Documentation/devicetree/dt-object-internal.txt[1] &
+Documentation/devicetree/dynamic-resolution-notes.txt[2]
+
+How overlays work
+-----------------
+
+A Device Tree's overlay purpose is to modify the kernel's live tree, and
+have the modification affecting the state of the the kernel in a way that
+is reflecting the changes.
+Since the kernel mainly deals with devices, any new device node that result
+in an active device should have it created while if the device node is either
+disabled or removed all together, the affected device should be deregistered.
+
+Lets take an example where we have a foo board with the following base tree
+which is taken from [1].
+
+---- foo.dts -----------------------------------------------------------------
+	/* FOO platform */
+	/ {
+		compatible = "corp,foo";
+
+		/* shared resources */
+		res: res {
+		};
+
+		/* On chip peripherals */
+		ocp: ocp {
+			/* peripherals that are always instantiated */
+			peripheral1 { ... };
+		}
+	};
+---- foo.dts -----------------------------------------------------------------
+
+The overlay bar.dts, when loaded (and resolved as described in [2]) should
+
+---- bar.dts -----------------------------------------------------------------
+/plugin/;	/* allow undefined label references and record them */
+/ {
+	....	/* various properties for loader use; i.e. part id etc. */
+	fragment@0 {
+		target = <&ocp>;
+		__overlay__ {
+			/* bar peripheral */
+			bar {
+				compatible = "corp,bar";
+				... /* various properties and child nodes */
+			}
+		};
+	};
+};
+---- bar.dts -----------------------------------------------------------------
+
+result in foo+bar.dts
+
+---- foo+bar.dts -------------------------------------------------------------
+	/* FOO platform + bar peripheral */
+	/ {
+		compatible = "corp,foo";
+
+		/* shared resources */
+		res: res {
+		};
+
+		/* On chip peripherals */
+		ocp: ocp {
+			/* peripherals that are always instantiated */
+			peripheral1 { ... };
+
+			/* bar peripheral */
+			bar {
+				compatible = "corp,bar";
+				... /* various properties and child nodes */
+			}
+		}
+	};
+---- foo+bar.dts -------------------------------------------------------------
+
+As a result of the the overlay, a new device node (bar) has been created
+so a bar platform device will be registered and if a matching device driver
+is loaded the device will be created as expected.
+
+Overlay in-kernel API
+--------------------------------
+
+The API is quite easy to use.
+
+1. Call of_overlay_create() to create and apply an overlay. The return value
+is a cookie identifying this overlay.
+
+2. Call of_overlay_destroy() to remove and cleanup the overlay previously
+created via the call to of_overlay_create(). Removal of an overlay that
+is stacked by another will not be permitted.
+
+Finally, if you need to remove all overlays in one-go, just call
+of_overlay_destroy_all() which will remove every single one in the correct
+order.
+
+Overlay DTS Format
+------------------
+
+The DTS of an overlay should have the following format:
+
+{
+	/* ignored properties by the overlay */
+
+	fragment@0 {	/* first child node */
+
+		target=<phandle>;	/* phandle target of the overlay */
+	or
+		target-path="/path";	/* target path of the overlay */
+
+		__overlay__ {
+			property-a;	/* add property-a to the target */
+			node-a {	/* add to an existing, or create a node-a */
+				...
+			};
+		};
+	}
+	fragment@1 {	/* second child node */
+		...
+	};
+	/* more fragments follow */
+}
+
+Using the non-phandle based target method allows one to use a base DT which does
+not contain a __symbols__ node, i.e. it was not compiled with the -@ option.
+The __symbols__ node is only required for the target=<phandle> method, since it
+contains the information required to map from a phandle to a tree location.
diff --git a/drivers/of/Kconfig b/drivers/of/Kconfig
index fbe8f8d418f7..18b2e2539f84 100644
--- a/drivers/of/Kconfig
+++ b/drivers/of/Kconfig
@@ -84,4 +84,11 @@ config OF_RESERVED_MEM
 config OF_RESOLVE
 	bool
 
+config OF_OVERLAY
+	bool
+	depends on OF
+	select OF_DYNAMIC
+	select OF_DEVICE
+	select OF_RESOLVE
+
 endmenu # OF
diff --git a/drivers/of/Makefile b/drivers/of/Makefile
index d90553fcd37f..7563f36c71db 100644
--- a/drivers/of/Makefile
+++ b/drivers/of/Makefile
@@ -14,6 +14,7 @@ obj-$(CONFIG_OF_PCI_IRQ)  += of_pci_irq.o
 obj-$(CONFIG_OF_MTD)	+= of_mtd.o
 obj-$(CONFIG_OF_RESERVED_MEM) += of_reserved_mem.o
 obj-$(CONFIG_OF_RESOLVE)  += resolver.o
+obj-$(CONFIG_OF_OVERLAY) += overlay.o
 
 CFLAGS_fdt.o = -I$(src)/../../scripts/dtc/libfdt
 CFLAGS_fdt_address.o = -I$(src)/../../scripts/dtc/libfdt
diff --git a/drivers/of/overlay.c b/drivers/of/overlay.c
new file mode 100644
index 000000000000..ea63fbd228ed
--- /dev/null
+++ b/drivers/of/overlay.c
@@ -0,0 +1,562 @@
+/*
+ * Functions for working with device tree overlays
+ *
+ * Copyright (C) 2012 Pantelis Antoniou <panto@antoniou-consulting.com>
+ * Copyright (C) 2012 Texas Instruments Inc.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License
+ * version 2 as published by the Free Software Foundation.
+ */
+#undef DEBUG
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/errno.h>
+#include <linux/string.h>
+#include <linux/slab.h>
+#include <linux/err.h>
+
+#include "of_private.h"
+
+/**
+ * struct of_overlay_info - Holds a single overlay info
+ * @target:	target of the overlay operation
+ * @overlay:	pointer to the overlay contents node
+ *
+ * Holds a single overlay state, including all the overlay logs &
+ * records.
+ */
+struct of_overlay_info {
+	struct device_node *target;
+	struct device_node *overlay;
+};
+
+/**
+ * struct of_overlay - Holds a complete overlay transaction
+ * @node:	List on which we are located
+ * @count:	Count of ovinfo structures
+ * @ovinfo_tab:	Overlay info table (count sized)
+ * @cset:	Changeset to be used
+ *
+ * Holds a complete overlay transaction
+ */
+struct of_overlay {
+	int id;
+	struct list_head node;
+	int count;
+	struct of_overlay_info *ovinfo_tab;
+	struct of_changeset cset;
+};
+
+static int of_overlay_apply_one(struct of_overlay *ov,
+		struct device_node *target, const struct device_node *overlay);
+
+static int of_overlay_apply_single_property(struct of_overlay *ov,
+		struct device_node *target, struct property *prop)
+{
+	struct property *propn, *tprop;
+
+	/* NOTE: Multiple changes of single properties not supported */
+	tprop = of_find_property(target, prop->name, NULL);
+
+	/* special properties are not meant to be updated (silent NOP) */
+	if (of_prop_cmp(prop->name, "name") == 0 ||
+	    of_prop_cmp(prop->name, "phandle") == 0 ||
+	    of_prop_cmp(prop->name, "linux,phandle") == 0)
+		return 0;
+
+	propn = __of_prop_dup(prop, GFP_KERNEL);
+	if (propn == NULL)
+		return -ENOMEM;
+
+	/* not found? add */
+	if (tprop == NULL)
+		return of_changeset_add_property(&ov->cset, target, propn);
+
+	/* found? update */
+	return of_changeset_update_property(&ov->cset, target, propn);
+}
+
+static int of_overlay_apply_single_device_node(struct of_overlay *ov,
+		struct device_node *target, struct device_node *child)
+{
+	const char *cname;
+	struct device_node *tchild, *grandchild;
+	int ret = 0;
+
+	cname = kbasename(child->full_name);
+	if (cname == NULL)
+		return -ENOMEM;
+
+	/* NOTE: Multiple mods of created nodes not supported */
+	tchild = of_get_child_by_name(target, cname);
+	if (tchild != NULL) {
+		/* apply overlay recursively */
+		ret = of_overlay_apply_one(ov, tchild, child);
+		of_node_put(tchild);
+	} else {
+		/* create empty tree as a target */
+		tchild = __of_node_dup(child, "%s/%s", target->full_name, cname);
+		if (!tchild)
+			return -ENOMEM;
+
+		/* point to parent */
+		tchild->parent = target;
+
+		ret = of_changeset_attach_node(&ov->cset, tchild);
+		if (ret)
+			return ret;
+
+		ret = of_overlay_apply_one(ov, tchild, child);
+		if (ret)
+			return ret;
+
+		/* The properties are already copied, now do the child nodes */
+		for_each_child_of_node(child, grandchild) {
+			ret = of_overlay_apply_single_device_node(ov, tchild, grandchild);
+			if (ret) {
+				pr_err("%s: Failed to apply single node @%s/%s\n",
+					__func__, tchild->full_name,
+					grandchild->name);
+				return ret;
+			}
+		}
+	}
+
+	return ret;
+}
+
+/*
+ * Apply a single overlay node recursively.
+ *
+ * Note that the in case of an error the target node is left
+ * in a inconsistent state. Error recovery should be performed
+ * by using the changeset.
+ */
+static int of_overlay_apply_one(struct of_overlay *ov,
+		struct device_node *target, const struct device_node *overlay)
+{
+	struct device_node *child;
+	struct property *prop;
+	int ret;
+
+	for_each_property_of_node(overlay, prop) {
+		ret = of_overlay_apply_single_property(ov, target, prop);
+		if (ret) {
+			pr_err("%s: Failed to apply prop @%s/%s\n",
+				__func__, target->full_name, prop->name);
+			return ret;
+		}
+	}
+
+	for_each_child_of_node(overlay, child) {
+		ret = of_overlay_apply_single_device_node(ov, target, child);
+		if (ret != 0) {
+			pr_err("%s: Failed to apply single node @%s/%s\n",
+					__func__, target->full_name,
+					child->name);
+			return ret;
+		}
+	}
+
+	return 0;
+}
+
+/**
+ * of_overlay_apply() - Apply @count overlays pointed at by @ovinfo_tab
+ * @ov:		Overlay to apply
+ *
+ * Applies the overlays given, while handling all error conditions
+ * appropriately. Either the operation succeeds, or if it fails the
+ * live tree is reverted to the state before the attempt.
+ * Returns 0, or an error if the overlay attempt failed.
+ */
+static int of_overlay_apply(struct of_overlay *ov)
+{
+	int i, err;
+
+	/* first we apply the overlays atomically */
+	for (i = 0; i < ov->count; i++) {
+		struct of_overlay_info *ovinfo = &ov->ovinfo_tab[i];
+
+		err = of_overlay_apply_one(ov, ovinfo->target, ovinfo->overlay);
+		if (err != 0) {
+			pr_err("%s: overlay failed '%s'\n",
+				__func__, ovinfo->target->full_name);
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+/*
+ * Find the target node using a number of different strategies
+ * in order of preference
+ *
+ * "target" property containing the phandle of the target
+ * "target-path" property containing the path of the target
+ */
+static struct device_node *find_target_node(struct device_node *info_node)
+{
+	const char *path;
+	u32 val;
+	int ret;
+
+	/* first try to go by using the target as a phandle */
+	ret = of_property_read_u32(info_node, "target", &val);
+	if (ret == 0)
+		return of_find_node_by_phandle(val);
+
+	/* now try to locate by path */
+	ret = of_property_read_string(info_node, "target-path", &path);
+	if (ret == 0)
+		return of_find_node_by_path(path);
+
+	pr_err("%s: Failed to find target for node %p (%s)\n", __func__,
+		info_node, info_node->name);
+
+	return NULL;
+}
+
+/**
+ * of_fill_overlay_info() - Fill an overlay info structure
+ * @ov		Overlay to fill
+ * @info_node:	Device node containing the overlay
+ * @ovinfo:	Pointer to the overlay info structure to fill
+ *
+ * Fills an overlay info structure with the overlay information
+ * from a device node. This device node must have a target property
+ * which contains a phandle of the overlay target node, and an
+ * __overlay__ child node which has the overlay contents.
+ * Both ovinfo->target & ovinfo->overlay have their references taken.
+ *
+ * Returns 0 on success, or a negative error value.
+ */
+static int of_fill_overlay_info(struct of_overlay *ov,
+		struct device_node *info_node, struct of_overlay_info *ovinfo)
+{
+	ovinfo->overlay = of_get_child_by_name(info_node, "__overlay__");
+	if (ovinfo->overlay == NULL)
+		goto err_fail;
+
+	ovinfo->target = find_target_node(info_node);
+	if (ovinfo->target == NULL)
+		goto err_fail;
+
+	return 0;
+
+err_fail:
+	of_node_put(ovinfo->target);
+	of_node_put(ovinfo->overlay);
+
+	memset(ovinfo, 0, sizeof(*ovinfo));
+	return -EINVAL;
+}
+
+/**
+ * of_build_overlay_info() - Build an overlay info array
+ * @ov		Overlay to build
+ * @tree:	Device node containing all the overlays
+ *
+ * Helper function that given a tree containing overlay information,
+ * allocates and builds an overlay info array containing it, ready
+ * for use using of_overlay_apply.
+ *
+ * Returns 0 on success with the @cntp @ovinfop pointers valid,
+ * while on error a negative error value is returned.
+ */
+static int of_build_overlay_info(struct of_overlay *ov,
+		struct device_node *tree)
+{
+	struct device_node *node;
+	struct of_overlay_info *ovinfo;
+	int cnt, err;
+
+	/* worst case; every child is a node */
+	cnt = 0;
+	for_each_child_of_node(tree, node)
+		cnt++;
+
+	ovinfo = kcalloc(cnt, sizeof(*ovinfo), GFP_KERNEL);
+	if (ovinfo == NULL)
+		return -ENOMEM;
+
+	cnt = 0;
+	for_each_child_of_node(tree, node) {
+		memset(&ovinfo[cnt], 0, sizeof(*ovinfo));
+		err = of_fill_overlay_info(ov, node, &ovinfo[cnt]);
+		if (err == 0)
+			cnt++;
+	}
+
+	/* if nothing filled, return error */
+	if (cnt == 0) {
+		kfree(ovinfo);
+		return -ENODEV;
+	}
+
+	ov->count = cnt;
+	ov->ovinfo_tab = ovinfo;
+
+	return 0;
+}
+
+/**
+ * of_free_overlay_info() - Free an overlay info array
+ * @ov		Overlay to free the overlay info from
+ * @ovinfo_tab:	Array of overlay_info's to free
+ *
+ * Releases the memory of a previously allocated ovinfo array
+ * by of_build_overlay_info.
+ * Returns 0, or an error if the arguments are bogus.
+ */
+static int of_free_overlay_info(struct of_overlay *ov)
+{
+	struct of_overlay_info *ovinfo;
+	int i;
+
+	/* do it in reverse */
+	for (i = ov->count - 1; i >= 0; i--) {
+		ovinfo = &ov->ovinfo_tab[i];
+
+		of_node_put(ovinfo->target);
+		of_node_put(ovinfo->overlay);
+	}
+	kfree(ov->ovinfo_tab);
+
+	return 0;
+}
+
+static LIST_HEAD(ov_list);
+static DEFINE_IDR(ov_idr);
+
+/**
+ * of_overlay_create() - Create and apply an overlay
+ * @tree:	Device node containing all the overlays
+ *
+ * Creates and applies an overlay while also keeping track
+ * of the overlay in a list. This list can be used to prevent
+ * illegal overlay removals.
+ *
+ * Returns the id of the created overlay, or an negative error number
+ */
+int of_overlay_create(struct device_node *tree)
+{
+	struct of_overlay *ov;
+	int err, id;
+
+	/* allocate the overlay structure */
+	ov = kzalloc(sizeof(*ov), GFP_KERNEL);
+	if (ov == NULL)
+		return -ENOMEM;
+	ov->id = -1;
+
+	INIT_LIST_HEAD(&ov->node);
+
+	of_changeset_init(&ov->cset);
+
+	mutex_lock(&of_mutex);
+
+	id = idr_alloc(&ov_idr, ov, 0, 0, GFP_KERNEL);
+	if (id < 0) {
+		pr_err("%s: idr_alloc() failed for tree@%s\n",
+				__func__, tree->full_name);
+		err = id;
+		goto err_destroy_trans;
+	}
+	ov->id = id;
+
+	/* build the overlay info structures */
+	err = of_build_overlay_info(ov, tree);
+	if (err) {
+		pr_err("%s: of_build_overlay_info() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_free_idr;
+	}
+
+	/* apply the overlay */
+	err = of_overlay_apply(ov);
+	if (err) {
+		pr_err("%s: of_overlay_apply() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_abort_trans;
+	}
+
+	/* apply the changeset */
+	err = of_changeset_apply(&ov->cset);
+	if (err) {
+		pr_err("%s: of_changeset_apply() failed for tree@%s\n",
+				__func__, tree->full_name);
+		goto err_revert_overlay;
+	}
+
+	/* add to the tail of the overlay list */
+	list_add_tail(&ov->node, &ov_list);
+
+	mutex_unlock(&of_mutex);
+
+	return id;
+
+err_revert_overlay:
+err_abort_trans:
+	of_free_overlay_info(ov);
+err_free_idr:
+	idr_remove(&ov_idr, ov->id);
+err_destroy_trans:
+	of_changeset_destroy(&ov->cset);
+	kfree(ov);
+	mutex_unlock(&of_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(of_overlay_create);
+
+/* check whether the given node, lies under the given tree */
+static int overlay_subtree_check(struct device_node *tree,
+		struct device_node *dn)
+{
+	struct device_node *child;
+
+	/* match? */
+	if (tree == dn)
+		return 1;
+
+	for_each_child_of_node(tree, child) {
+		if (overlay_subtree_check(child, dn))
+			return 1;
+	}
+
+	return 0;
+}
+
+/* check whether this overlay is the topmost */
+static int overlay_is_topmost(struct of_overlay *ov, struct device_node *dn)
+{
+	struct of_overlay *ovt;
+	struct of_changeset_entry *ce;
+
+	list_for_each_entry_reverse(ovt, &ov_list, node) {
+		/* if we hit ourselves, we're done */
+		if (ovt == ov)
+			break;
+
+		/* check against each subtree affected by this overlay */
+		list_for_each_entry(ce, &ovt->cset.entries, node) {
+			if (overlay_subtree_check(ce->np, dn)) {
+				pr_err("%s: #%d clashes #%d @%s\n",
+					__func__, ov->id, ovt->id,
+					dn->full_name);
+				return 0;
+			}
+		}
+	}
+
+	/* overlay is topmost */
+	return 1;
+}
+
+/*
+ * We can safely remove the overlay only if it's the top-most one.
+ * Newly applied overlays are inserted at the tail of the overlay list,
+ * so a top most overlay is the one that is closest to the tail.
+ *
+ * The topmost check is done by exploiting this property. For each
+ * affected device node in the log list we check if this overlay is
+ * the one closest to the tail. If another overlay has affected this
+ * device node and is closest to the tail, then removal is not permited.
+ */
+static int overlay_removal_is_ok(struct of_overlay *ov)
+{
+	struct of_changeset_entry *ce;
+
+	list_for_each_entry(ce, &ov->cset.entries, node) {
+		if (!overlay_is_topmost(ov, ce->np)) {
+			pr_err("%s: overlay #%d is not topmost\n",
+					__func__, ov->id);
+			return 0;
+		}
+	}
+
+	return 1;
+}
+
+/**
+ * of_overlay_destroy() - Removes an overlay
+ * @id:	Overlay id number returned by a previous call to of_overlay_create
+ *
+ * Removes an overlay if it is permissible.
+ *
+ * Returns 0 on success, or an negative error number
+ */
+int of_overlay_destroy(int id)
+{
+	struct of_overlay *ov;
+	int err;
+
+	mutex_lock(&of_mutex);
+
+	ov = idr_find(&ov_idr, id);
+	if (ov == NULL) {
+		err = -ENODEV;
+		pr_err("%s: Could not find overlay #%d\n",
+				__func__, id);
+		goto out;
+	}
+
+	/* check whether the overlay is safe to remove */
+	if (!overlay_removal_is_ok(ov)) {
+		err = -EBUSY;
+		pr_err("%s: removal check failed for overlay #%d\n",
+				__func__, id);
+		goto out;
+	}
+
+
+	list_del(&ov->node);
+	of_changeset_revert(&ov->cset);
+	of_free_overlay_info(ov);
+	idr_remove(&ov_idr, id);
+	of_changeset_destroy(&ov->cset);
+	kfree(ov);
+
+	err = 0;
+
+out:
+	mutex_unlock(&of_mutex);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(of_overlay_destroy);
+
+/**
+ * of_overlay_destroy_all() - Removes all overlays from the system
+ *
+ * Removes all overlays from the system in the correct order.
+ *
+ * Returns 0 on success, or an negative error number
+ */
+int of_overlay_destroy_all(void)
+{
+	struct of_overlay *ov, *ovn;
+
+	mutex_lock(&of_mutex);
+
+	/* the tail of list is guaranteed to be safe to remove */
+	list_for_each_entry_safe_reverse(ov, ovn, &ov_list, node) {
+		list_del(&ov->node);
+		of_changeset_revert(&ov->cset);
+		of_free_overlay_info(ov);
+		idr_remove(&ov_idr, ov->id);
+		kfree(ov);
+	}
+
+	mutex_unlock(&of_mutex);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(of_overlay_destroy_all);
diff --git a/include/linux/of.h b/include/linux/of.h
index fe1dec87fd68..aa01cf5852f8 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -23,6 +23,7 @@
 #include <linux/spinlock.h>
 #include <linux/topology.h>
 #include <linux/notifier.h>
+#include <linux/list.h>
 
 #include <asm/byteorder.h>
 #include <asm/errno.h>
@@ -957,4 +958,34 @@ static inline int of_reconfig_get_state_change(unsigned long action,
 /* CONFIG_OF_RESOLVE api */
 extern int of_resolve_phandles(struct device_node *tree);
 
+/**
+ * Overlay support
+ */
+
+#ifdef CONFIG_OF_OVERLAY
+
+/* ID based overlays; the API for external users */
+int of_overlay_create(struct device_node *tree);
+int of_overlay_destroy(int id);
+int of_overlay_destroy_all(void);
+
+#else
+
+static inline int of_overlay_create(struct device_node *tree)
+{
+	return -ENOTSUPP;
+}
+
+static inline int of_overlay_destroy(int id)
+{
+	return -ENOTSUPP;
+}
+
+static inline int of_overlay_destroy_all(void)
+{
+	return -ENOTSUPP;
+}
+
+#endif
+
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From f895b252d4edf66b2895fb5a7b17a638665f3e1f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 17 Nov 2014 16:58:04 -0500
Subject: sunrpc: eliminate RPC_DEBUG

It's always set to whatever CONFIG_SUNRPC_DEBUG is, so just use that.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/auth.h             | 2 +-
 include/linux/sunrpc/debug.h            | 9 +++------
 include/linux/sunrpc/sched.h            | 8 ++++----
 include/uapi/linux/nfsd/debug.h         | 2 +-
 net/sunrpc/auth.c                       | 4 ++--
 net/sunrpc/auth_generic.c               | 2 +-
 net/sunrpc/auth_gss/auth_gss.c          | 2 +-
 net/sunrpc/auth_gss/gss_generic_token.c | 2 +-
 net/sunrpc/auth_gss/gss_krb5_crypto.c   | 2 +-
 net/sunrpc/auth_gss/gss_krb5_keys.c     | 2 +-
 net/sunrpc/auth_gss/gss_krb5_mech.c     | 2 +-
 net/sunrpc/auth_gss/gss_krb5_seal.c     | 2 +-
 net/sunrpc/auth_gss/gss_krb5_seqnum.c   | 2 +-
 net/sunrpc/auth_gss/gss_krb5_unseal.c   | 2 +-
 net/sunrpc/auth_gss/gss_krb5_wrap.c     | 2 +-
 net/sunrpc/auth_gss/gss_mech_switch.c   | 2 +-
 net/sunrpc/auth_gss/gss_rpc_xdr.h       | 2 +-
 net/sunrpc/auth_gss/svcauth_gss.c       | 2 +-
 net/sunrpc/auth_null.c                  | 4 ++--
 net/sunrpc/auth_unix.c                  | 2 +-
 net/sunrpc/backchannel_rqst.c           | 2 +-
 net/sunrpc/clnt.c                       | 6 +++---
 net/sunrpc/rpcb_clnt.c                  | 2 +-
 net/sunrpc/sched.c                      | 4 ++--
 net/sunrpc/sunrpc_syms.c                | 4 ++--
 net/sunrpc/svc.c                        | 2 +-
 net/sunrpc/sysctl.c                     | 2 +-
 net/sunrpc/xprt.c                       | 2 +-
 net/sunrpc/xprtrdma/rpc_rdma.c          | 4 ++--
 net/sunrpc/xprtrdma/transport.c         | 8 ++++----
 net/sunrpc/xprtrdma/verbs.c             | 8 ++++----
 net/sunrpc/xprtsock.c                   | 8 ++++----
 32 files changed, 53 insertions(+), 56 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/auth.h b/include/linux/sunrpc/auth.h
index 8e030075fe79..a7cbb570cc5c 100644
--- a/include/linux/sunrpc/auth.h
+++ b/include/linux/sunrpc/auth.h
@@ -53,7 +53,7 @@ struct rpc_cred {
 	struct rcu_head		cr_rcu;
 	struct rpc_auth *	cr_auth;
 	const struct rpc_credops *cr_ops;
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	unsigned long		cr_magic;	/* 0x0f4aa4f0 */
 #endif
 	unsigned long		cr_expire;	/* when to gc */
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 9385bd74c860..51143757b8f7 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -14,9 +14,6 @@
 /*
  * Enable RPC debugging/profiling.
  */
-#ifdef CONFIG_SUNRPC_DEBUG
-#define  RPC_DEBUG
-#endif
 #ifdef CONFIG_TRACEPOINTS
 #define RPC_TRACEPOINTS
 #endif
@@ -25,7 +22,7 @@
 /*
  * Debugging macros etc
  */
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 extern unsigned int		rpc_debug;
 extern unsigned int		nfs_debug;
 extern unsigned int		nfsd_debug;
@@ -36,7 +33,7 @@ extern unsigned int		nlm_debug;
 #define dprintk_rcu(args...)	dfprintk_rcu(FACILITY, ## args)
 
 #undef ifdebug
-#ifdef RPC_DEBUG			
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define ifdebug(fac)		if (unlikely(rpc_debug & RPCDBG_##fac))
 
 # define dfprintk(fac, args...)	\
@@ -65,7 +62,7 @@ extern unsigned int		nlm_debug;
 /*
  * Sysctl interface for RPC debugging
  */
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 void		rpc_register_sysctl(void);
 void		rpc_unregister_sysctl(void);
 #endif
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index 1a8959944c5f..fecdbf1b4797 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -79,7 +79,7 @@ struct rpc_task {
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned short		tk_timeouts;	/* maj timeouts */
 
-#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS)
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
 	unsigned char		tk_priority : 2,/* Task priority */
@@ -187,7 +187,7 @@ struct rpc_wait_queue {
 	unsigned char		nr;			/* # tasks remaining for cookie */
 	unsigned short		qlen;			/* total # tasks waiting in queue */
 	struct rpc_timer	timer_list;
-#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS)
 	const char *		name;
 #endif
 };
@@ -237,7 +237,7 @@ void		rpc_free(void *);
 int		rpciod_up(void);
 void		rpciod_down(void);
 int		__rpc_wait_for_completion_task(struct rpc_task *task, wait_bit_action_f *);
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 struct net;
 void		rpc_show_tasks(struct net *);
 #endif
@@ -251,7 +251,7 @@ static inline int rpc_wait_for_completion_task(struct rpc_task *task)
 	return __rpc_wait_for_completion_task(task, NULL);
 }
 
-#if defined(RPC_DEBUG) || defined (RPC_TRACEPOINTS)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined (RPC_TRACEPOINTS)
 static inline const char * rpc_qname(const struct rpc_wait_queue *q)
 {
 	return ((q && q->name) ? q->name : "unknown");
diff --git a/include/uapi/linux/nfsd/debug.h b/include/uapi/linux/nfsd/debug.h
index a6f453c740b8..1fdc95bb2375 100644
--- a/include/uapi/linux/nfsd/debug.h
+++ b/include/uapi/linux/nfsd/debug.h
@@ -15,7 +15,7 @@
  * Enable debugging for nfsd.
  * Requires RPC_DEBUG.
  */
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define NFSD_DEBUG		1
 #endif
 
diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c
index 383eb919ac0b..47f38be4155f 100644
--- a/net/sunrpc/auth.c
+++ b/net/sunrpc/auth.c
@@ -16,7 +16,7 @@
 #include <linux/sunrpc/gss_api.h>
 #include <linux/spinlock.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
@@ -646,7 +646,7 @@ rpcauth_init_cred(struct rpc_cred *cred, const struct auth_cred *acred,
 	cred->cr_auth = auth;
 	cred->cr_ops = ops;
 	cred->cr_expire = jiffies;
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	cred->cr_magic = RPCAUTH_CRED_MAGIC;
 #endif
 	cred->cr_uid = acred->uid;
diff --git a/net/sunrpc/auth_generic.c b/net/sunrpc/auth_generic.c
index 6f6b829c9e8e..41248b1820c7 100644
--- a/net/sunrpc/auth_generic.c
+++ b/net/sunrpc/auth_generic.c
@@ -14,7 +14,7 @@
 #include <linux/sunrpc/debug.h>
 #include <linux/sunrpc/sched.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c
index 53ed8d3f8897..dace13d7638e 100644
--- a/net/sunrpc/auth_gss/auth_gss.c
+++ b/net/sunrpc/auth_gss/auth_gss.c
@@ -66,7 +66,7 @@ static unsigned int gss_expired_cred_retry_delay = GSS_RETRY_EXPIRED;
 #define GSS_KEY_EXPIRE_TIMEO 240
 static unsigned int gss_key_expire_timeo = GSS_KEY_EXPIRE_TIMEO;
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c
index c586e92bcf76..254defe446a7 100644
--- a/net/sunrpc/auth_gss/gss_generic_token.c
+++ b/net/sunrpc/auth_gss/gss_generic_token.c
@@ -38,7 +38,7 @@
 #include <linux/sunrpc/gss_asn1.h>
 
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_crypto.c b/net/sunrpc/auth_gss/gss_krb5_crypto.c
index f5ed9f6ece06..b5408e8a37f2 100644
--- a/net/sunrpc/auth_gss/gss_krb5_crypto.c
+++ b/net/sunrpc/auth_gss/gss_krb5_crypto.c
@@ -45,7 +45,7 @@
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/sunrpc/xdr.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_keys.c b/net/sunrpc/auth_gss/gss_krb5_keys.c
index 24589bd2a4b6..234fa8d0fd9b 100644
--- a/net/sunrpc/auth_gss/gss_krb5_keys.c
+++ b/net/sunrpc/auth_gss/gss_krb5_keys.c
@@ -61,7 +61,7 @@
 #include <linux/sunrpc/xdr.h>
 #include <linux/lcm.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_mech.c b/net/sunrpc/auth_gss/gss_krb5_mech.c
index 0d3c158ef8fa..28db442a0034 100644
--- a/net/sunrpc/auth_gss/gss_krb5_mech.c
+++ b/net/sunrpc/auth_gss/gss_krb5_mech.c
@@ -45,7 +45,7 @@
 #include <linux/crypto.h>
 #include <linux/sunrpc/gss_krb5_enctypes.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_seal.c b/net/sunrpc/auth_gss/gss_krb5_seal.c
index 42768e5c3994..1d74d653e6c0 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seal.c
@@ -64,7 +64,7 @@
 #include <linux/random.h>
 #include <linux/crypto.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
index 62ac90c62cb1..20d55c793eb6 100644
--- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c
+++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c
@@ -35,7 +35,7 @@
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/crypto.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_unseal.c b/net/sunrpc/auth_gss/gss_krb5_unseal.c
index 6c981ddc19f8..dcf9515d9aef 100644
--- a/net/sunrpc/auth_gss/gss_krb5_unseal.c
+++ b/net/sunrpc/auth_gss/gss_krb5_unseal.c
@@ -62,7 +62,7 @@
 #include <linux/sunrpc/gss_krb5.h>
 #include <linux/crypto.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_krb5_wrap.c b/net/sunrpc/auth_gss/gss_krb5_wrap.c
index 4b614c604fe0..ca7e92a32f84 100644
--- a/net/sunrpc/auth_gss/gss_krb5_wrap.c
+++ b/net/sunrpc/auth_gss/gss_krb5_wrap.c
@@ -35,7 +35,7 @@
 #include <linux/pagemap.h>
 #include <linux/crypto.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c
index 92d5ab99fbf3..7063d856a598 100644
--- a/net/sunrpc/auth_gss/gss_mech_switch.c
+++ b/net/sunrpc/auth_gss/gss_mech_switch.c
@@ -46,7 +46,7 @@
 #include <linux/sunrpc/gss_api.h>
 #include <linux/sunrpc/clnt.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY        RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/gss_rpc_xdr.h b/net/sunrpc/auth_gss/gss_rpc_xdr.h
index 685a688f3d8a..9d88c6239f01 100644
--- a/net/sunrpc/auth_gss/gss_rpc_xdr.h
+++ b/net/sunrpc/auth_gss/gss_rpc_xdr.h
@@ -25,7 +25,7 @@
 #include <linux/sunrpc/clnt.h>
 #include <linux/sunrpc/xprtsock.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index c548ab213f76..de856ddf5fed 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -51,7 +51,7 @@
 #include "gss_rpc_upcall.h"
 
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/auth_null.c b/net/sunrpc/auth_null.c
index 712c123e04e9..c2a2b584a056 100644
--- a/net/sunrpc/auth_null.c
+++ b/net/sunrpc/auth_null.c
@@ -10,7 +10,7 @@
 #include <linux/module.h>
 #include <linux/sunrpc/clnt.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
@@ -138,7 +138,7 @@ struct rpc_cred null_cred = {
 	.cr_ops		= &null_credops,
 	.cr_count	= ATOMIC_INIT(1),
 	.cr_flags	= 1UL << RPCAUTH_CRED_UPTODATE,
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	.cr_magic	= RPCAUTH_CRED_MAGIC,
 #endif
 };
diff --git a/net/sunrpc/auth_unix.c b/net/sunrpc/auth_unix.c
index d5d692366294..4feda2d0a833 100644
--- a/net/sunrpc/auth_unix.c
+++ b/net/sunrpc/auth_unix.c
@@ -25,7 +25,7 @@ struct unx_cred {
 
 #define UNX_WRITESLACK		(21 + (UNX_MAXNODENAME >> 2))
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_AUTH
 #endif
 
diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c
index 9761a0da964d..651f49ab601f 100644
--- a/net/sunrpc/backchannel_rqst.c
+++ b/net/sunrpc/backchannel_rqst.c
@@ -27,7 +27,7 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #include <linux/export.h>
 #include <linux/sunrpc/bc_xprt.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 #define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 9acd6ce88db7..36c64ef460cf 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -42,7 +42,7 @@
 #include "sunrpc.h"
 #include "netns.h"
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_CALL
 #endif
 
@@ -1396,7 +1396,7 @@ rpc_restart_call(struct rpc_task *task)
 }
 EXPORT_SYMBOL_GPL(rpc_restart_call);
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static const char *rpc_proc_name(const struct rpc_task *task)
 {
 	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
@@ -2421,7 +2421,7 @@ struct rpc_task *rpc_call_null(struct rpc_clnt *clnt, struct rpc_cred *cred, int
 }
 EXPORT_SYMBOL_GPL(rpc_call_null);
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static void rpc_show_header(void)
 {
 	printk(KERN_INFO "-pid- flgs status -client- --rqstp- "
diff --git a/net/sunrpc/rpcb_clnt.c b/net/sunrpc/rpcb_clnt.c
index 1891a1022c17..05202012bcfc 100644
--- a/net/sunrpc/rpcb_clnt.c
+++ b/net/sunrpc/rpcb_clnt.c
@@ -32,7 +32,7 @@
 
 #include "netns.h"
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_BIND
 #endif
 
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index fe3441abdbe5..574b2977fc4b 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -24,7 +24,7 @@
 
 #include "sunrpc.h"
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 #define RPCDBG_FACILITY		RPCDBG_SCHED
 #endif
 
@@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key)
 	return 0;
 }
 
-#if defined(RPC_DEBUG) || defined(RPC_TRACEPOINTS)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS)
 static void rpc_task_set_debuginfo(struct rpc_task *task)
 {
 	static atomic_t rpc_pid;
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index cd30120de9e4..f632e476ab6c 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -97,7 +97,7 @@ init_sunrpc(void)
 	err = register_rpc_pipefs();
 	if (err)
 		goto out4;
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	rpc_register_sysctl();
 #endif
 	svc_init_xprt_sock();	/* svc sock transport */
@@ -123,7 +123,7 @@ cleanup_sunrpc(void)
 	unregister_rpc_pipefs();
 	rpc_destroy_mempool();
 	unregister_pernet_subsys(&sunrpc_net_ops);
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	rpc_unregister_sysctl();
 #endif
 	rcu_barrier(); /* Wait for completion of call_rcu()'s */
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 371a8bbb43d6..2783fd80c229 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1042,7 +1042,7 @@ static void svc_unregister(const struct svc_serv *serv, struct net *net)
 /*
  * dprintk the given error with the address of the client that caused it.
  */
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static __printf(2, 3)
 void svc_printk(struct svc_rqst *rqstp, const char *fmt, ...)
 {
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index c99c58e2ee66..887f0183b4c6 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -37,7 +37,7 @@ EXPORT_SYMBOL_GPL(nfsd_debug);
 unsigned int	nlm_debug;
 EXPORT_SYMBOL_GPL(nlm_debug);
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 
 static struct ctl_table_header *sunrpc_table_header;
 static struct ctl_table sunrpc_table[];
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 1b2e5e616cae..894d071426b2 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -57,7 +57,7 @@
  * Local variables
  */
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_XPRT
 #endif
 
diff --git a/net/sunrpc/xprtrdma/rpc_rdma.c b/net/sunrpc/xprtrdma/rpc_rdma.c
index 6166c985fe24..df01d124936c 100644
--- a/net/sunrpc/xprtrdma/rpc_rdma.c
+++ b/net/sunrpc/xprtrdma/rpc_rdma.c
@@ -49,11 +49,11 @@
 
 #include <linux/highmem.h>
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static const char transfertypes[][12] = {
 	"pure inline",	/* no chunks */
 	" read chunk",	/* some argument via rdma read */
diff --git a/net/sunrpc/xprtrdma/transport.c b/net/sunrpc/xprtrdma/transport.c
index 6a4615dd0261..ef58ebadb3ae 100644
--- a/net/sunrpc/xprtrdma/transport.c
+++ b/net/sunrpc/xprtrdma/transport.c
@@ -55,7 +55,7 @@
 
 #include "xprt_rdma.h"
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
@@ -75,7 +75,7 @@ static unsigned int xprt_rdma_inline_write_padding;
 static unsigned int xprt_rdma_memreg_strategy = RPCRDMA_FRMR;
                 int xprt_rdma_pad_optimize = 0;
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 
 static unsigned int min_slot_table_size = RPCRDMA_MIN_SLOT_TABLE;
 static unsigned int max_slot_table_size = RPCRDMA_MAX_SLOT_TABLE;
@@ -705,7 +705,7 @@ static void __exit xprt_rdma_cleanup(void)
 	int rc;
 
 	dprintk("RPCRDMA Module Removed, deregister RPC RDMA transport\n");
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (sunrpc_table_header) {
 		unregister_sysctl_table(sunrpc_table_header);
 		sunrpc_table_header = NULL;
@@ -736,7 +736,7 @@ static int __init xprt_rdma_init(void)
 	dprintk("\tPadding %d\n\tMemreg %d\n",
 		xprt_rdma_inline_write_padding, xprt_rdma_memreg_strategy);
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (!sunrpc_table_header)
 		sunrpc_table_header = register_sysctl_table(sunrpc_table);
 #endif
diff --git a/net/sunrpc/xprtrdma/verbs.c b/net/sunrpc/xprtrdma/verbs.c
index 61c41298b4ea..b92b04083e40 100644
--- a/net/sunrpc/xprtrdma/verbs.c
+++ b/net/sunrpc/xprtrdma/verbs.c
@@ -57,7 +57,7 @@
  * Globals/Macros
  */
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
 
@@ -313,7 +313,7 @@ rpcrdma_flush_cqs(struct rpcrdma_ep *ep)
 	rpcrdma_sendcq_upcall(ep->rep_attr.send_cq, ep);
 }
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 static const char * const conn[] = {
 	"address resolved",
 	"address error",
@@ -344,7 +344,7 @@ rpcrdma_conn_upcall(struct rdma_cm_id *id, struct rdma_cm_event *event)
 	struct rpcrdma_xprt *xprt = id->context;
 	struct rpcrdma_ia *ia = &xprt->rx_ia;
 	struct rpcrdma_ep *ep = &xprt->rx_ep;
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	struct sockaddr_in *addr = (struct sockaddr_in *) &ep->rep_remote_addr;
 #endif
 	struct ib_qp_attr attr;
@@ -408,7 +408,7 @@ connected:
 		break;
 	}
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (connstate == 1) {
 		int ird = attr.max_dest_rd_atomic;
 		int tird = ep->rep_remote_cma.responder_resources;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 31c015196a29..87ce7e8bb8dc 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -75,7 +75,7 @@ static unsigned int xs_tcp_fin_timeout __read_mostly = XS_TCP_LINGER_TO;
  * someone else's file names!
  */
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 
 static unsigned int min_slot_table_size = RPC_MIN_SLOT_TABLE;
 static unsigned int max_slot_table_size = RPC_MAX_SLOT_TABLE;
@@ -186,7 +186,7 @@ static struct ctl_table sunrpc_table[] = {
  */
 #define XS_IDLE_DISC_TO		(5U * 60 * HZ)
 
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 # undef  RPC_DEBUG_DATA
 # define RPCDBG_FACILITY	RPCDBG_TRANS
 #endif
@@ -2991,7 +2991,7 @@ static struct xprt_class	xs_bc_tcp_transport = {
  */
 int init_socket_xprt(void)
 {
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (!sunrpc_table_header)
 		sunrpc_table_header = register_sysctl_table(sunrpc_table);
 #endif
@@ -3010,7 +3010,7 @@ int init_socket_xprt(void)
  */
 void cleanup_socket_xprt(void)
 {
-#ifdef RPC_DEBUG
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	if (sunrpc_table_header) {
 		unregister_sysctl_table(sunrpc_table_header);
 		sunrpc_table_header = NULL;
-- 
cgit v1.2.3


From 1306729b0d4f4a0bd0d098711ed3d938dc5a1a28 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 17 Nov 2014 16:58:05 -0500
Subject: sunrpc: eliminate RPC_TRACEPOINTS

It's always set to the same value as CONFIG_TRACEPOINTS, so we can just
use that instead.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/debug.h | 9 ---------
 include/linux/sunrpc/sched.h | 6 +++---
 net/sunrpc/sched.c           | 2 +-
 3 files changed, 4 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 51143757b8f7..43f38ee9668c 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -10,15 +10,6 @@
 
 #include <uapi/linux/sunrpc/debug.h>
 
-
-/*
- * Enable RPC debugging/profiling.
- */
-#ifdef CONFIG_TRACEPOINTS
-#define RPC_TRACEPOINTS
-#endif
-/* #define  RPC_PROFILE */
-
 /*
  * Debugging macros etc
  */
diff --git a/include/linux/sunrpc/sched.h b/include/linux/sunrpc/sched.h
index fecdbf1b4797..5f1e6bd4c316 100644
--- a/include/linux/sunrpc/sched.h
+++ b/include/linux/sunrpc/sched.h
@@ -79,7 +79,7 @@ struct rpc_task {
 	unsigned short		tk_flags;	/* misc flags */
 	unsigned short		tk_timeouts;	/* maj timeouts */
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
 	unsigned short		tk_pid;		/* debugging aid */
 #endif
 	unsigned char		tk_priority : 2,/* Task priority */
@@ -187,7 +187,7 @@ struct rpc_wait_queue {
 	unsigned char		nr;			/* # tasks remaining for cookie */
 	unsigned short		qlen;			/* total # tasks waiting in queue */
 	struct rpc_timer	timer_list;
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
 	const char *		name;
 #endif
 };
@@ -251,7 +251,7 @@ static inline int rpc_wait_for_completion_task(struct rpc_task *task)
 	return __rpc_wait_for_completion_task(task, NULL);
 }
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined (RPC_TRACEPOINTS)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
 static inline const char * rpc_qname(const struct rpc_wait_queue *q)
 {
 	return ((q && q->name) ? q->name : "unknown");
diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c
index 574b2977fc4b..d20f2329eea3 100644
--- a/net/sunrpc/sched.c
+++ b/net/sunrpc/sched.c
@@ -258,7 +258,7 @@ static int rpc_wait_bit_killable(struct wait_bit_key *key)
 	return 0;
 }
 
-#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || defined(RPC_TRACEPOINTS)
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG) || IS_ENABLED(CONFIG_TRACEPOINTS)
 static void rpc_task_set_debuginfo(struct rpc_task *task)
 {
 	static atomic_t rpc_pid;
-- 
cgit v1.2.3


From bf4bea8e9a9058319a19f8c2710a6f0ef2459983 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 10 Nov 2014 08:33:56 +0000
Subject: kvm: fix kvm_is_mmio_pfn() and rename to kvm_is_reserved_pfn()

This reverts commit 85c8555ff0 ("KVM: check for !is_zero_pfn() in
kvm_is_mmio_pfn()") and renames the function to kvm_is_reserved_pfn.

The problem being addressed by the patch above was that some ARM code
based the memory mapping attributes of a pfn on the return value of
kvm_is_mmio_pfn(), whose name indeed suggests that such pfns should
be mapped as device memory.

However, kvm_is_mmio_pfn() doesn't do quite what it says on the tin,
and the existing non-ARM users were already using it in a way which
suggests that its name should probably have been 'kvm_is_reserved_pfn'
from the beginning, e.g., whether or not to call get_page/put_page on
it etc. This means that returning false for the zero page is a mistake
and the patch above should be reverted.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 arch/ia64/kvm/kvm-ia64.c |  2 +-
 arch/x86/kvm/mmu.c       |  6 +++---
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 16 ++++++++--------
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index ec6b9acb6bea..dbe46f43884d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1563,7 +1563,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 	for (i = 0; i < npages; i++) {
 		pfn = gfn_to_pfn(kvm, base_gfn + i);
-		if (!kvm_is_mmio_pfn(pfn)) {
+		if (!kvm_is_reserved_pfn(pfn)) {
 			kvm_set_pmt_entry(kvm, base_gfn + i,
 					pfn << PAGE_SHIFT,
 				_PAGE_AR_RWX | _PAGE_MA_WB);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac1c4de3a484..978f402006ee 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -630,7 +630,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 	 * kvm mmu, before reclaiming the page, we should
 	 * unmap it from mmu first.
 	 */
-	WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn)));
+	WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
 
 	if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
 		kvm_set_pfn_accessed(pfn);
@@ -2461,7 +2461,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		spte |= PT_PAGE_SIZE_MASK;
 	if (tdp_enabled)
 		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
-			kvm_is_mmio_pfn(pfn));
+			kvm_is_reserved_pfn(pfn));
 
 	if (host_writable)
 		spte |= SPTE_HOST_WRITEABLE;
@@ -2737,7 +2737,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
 	 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
 	 * here.
 	 */
-	if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
+	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
 	    level == PT_PAGE_TABLE_LEVEL &&
 	    PageTransCompound(pfn_to_page(pfn)) &&
 	    !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea53b04993f2..a6059bdf7b03 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -703,7 +703,7 @@ void kvm_arch_sync_events(struct kvm *kvm);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
-bool kvm_is_mmio_pfn(pfn_t pfn);
+bool kvm_is_reserved_pfn(pfn_t pfn);
 
 struct kvm_irq_ack_notifier {
 	struct hlist_node link;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25ffac9e947d..3cee7b167052 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -107,10 +107,10 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
 
 static bool largepages_enabled = true;
 
-bool kvm_is_mmio_pfn(pfn_t pfn)
+bool kvm_is_reserved_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
-		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+		return PageReserved(pfn_to_page(pfn));
 
 	return true;
 }
@@ -1321,7 +1321,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	else if ((vma->vm_flags & VM_PFNMAP)) {
 		pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
 			vma->vm_pgoff;
-		BUG_ON(!kvm_is_mmio_pfn(pfn));
+		BUG_ON(!kvm_is_reserved_pfn(pfn));
 	} else {
 		if (async && vma_is_valid(vma, write_fault))
 			*async = true;
@@ -1427,7 +1427,7 @@ static struct page *kvm_pfn_to_page(pfn_t pfn)
 	if (is_error_noslot_pfn(pfn))
 		return KVM_ERR_PTR_BAD_PAGE;
 
-	if (kvm_is_mmio_pfn(pfn)) {
+	if (kvm_is_reserved_pfn(pfn)) {
 		WARN_ON(1);
 		return KVM_ERR_PTR_BAD_PAGE;
 	}
@@ -1456,7 +1456,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
 void kvm_release_pfn_clean(pfn_t pfn)
 {
-	if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
+	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
 		put_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@@ -1477,7 +1477,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn)
 
 void kvm_set_pfn_dirty(pfn_t pfn)
 {
-	if (!kvm_is_mmio_pfn(pfn)) {
+	if (!kvm_is_reserved_pfn(pfn)) {
 		struct page *page = pfn_to_page(pfn);
 		if (!PageReserved(page))
 			SetPageDirty(page);
@@ -1487,14 +1487,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
 
 void kvm_set_pfn_accessed(pfn_t pfn)
 {
-	if (!kvm_is_mmio_pfn(pfn))
+	if (!kvm_is_reserved_pfn(pfn))
 		mark_page_accessed(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
 
 void kvm_get_pfn(pfn_t pfn)
 {
-	if (!kvm_is_mmio_pfn(pfn))
+	if (!kvm_is_reserved_pfn(pfn))
 		get_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_get_pfn);
-- 
cgit v1.2.3


From 1050dcda3052912984b26fb6d2695a3f41792000 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 17 Nov 2014 14:58:51 +0000
Subject: kvm: add a memslot flag for incoherent memory regions

Memory regions may be incoherent with the caches, typically when the
guest has mapped a host system RAM backed memory region as uncached.
Add a flag KVM_MEMSLOT_INCOHERENT so that we can tag these memslots
and handle them appropriately when mapping them.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/kvm_host.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index a6059bdf7b03..e4d8f705fecd 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -43,6 +43,7 @@
  * include/linux/kvm_h.
  */
 #define KVM_MEMSLOT_INVALID	(1UL << 16)
+#define KVM_MEMSLOT_INCOHERENT	(1UL << 17)
 
 /* Two fragments for cross MMIO pages. */
 #define KVM_MAX_MMIO_FRAGMENTS	2
-- 
cgit v1.2.3


From 7ff28aee40c42676abc3baab122d45826726ea49 Mon Sep 17 00:00:00 2001
From: Ondrej Zary <linux@rainbow-software.org>
Date: Mon, 24 Nov 2014 23:24:40 +0100
Subject: eeprom-93cx6: Add (read-only) support for 8-bit mode

Add read-only support for EEPROMs configured in 8-bit mode (ORG pin connected
to GND).
This will be used by wd719x driver.

Signed-off-by: Ondrej Zary <linux@rainbow-software.org>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 drivers/misc/eeprom/eeprom_93cx6.c | 62 +++++++++++++++++++++++++++++++++++++-
 include/linux/eeprom_93cx6.h       |  4 +++
 2 files changed, 65 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/misc/eeprom/eeprom_93cx6.c b/drivers/misc/eeprom/eeprom_93cx6.c
index 0ff4b02177be..0cf2c9d676be 100644
--- a/drivers/misc/eeprom/eeprom_93cx6.c
+++ b/drivers/misc/eeprom/eeprom_93cx6.c
@@ -170,7 +170,7 @@ static void eeprom_93cx6_read_bits(struct eeprom_93cx6 *eeprom,
 }
 
 /**
- * eeprom_93cx6_read - Read multiple words from eeprom
+ * eeprom_93cx6_read - Read a word from eeprom
  * @eeprom: Pointer to eeprom structure
  * @word: Word index from where we should start reading
  * @data: target pointer where the information will have to be stored
@@ -234,6 +234,66 @@ void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom, const u8 word,
 }
 EXPORT_SYMBOL_GPL(eeprom_93cx6_multiread);
 
+/**
+ * eeprom_93cx6_readb - Read a byte from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @word: Byte index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ *
+ * This function will read a byte of the eeprom data
+ * into the given data pointer.
+ */
+void eeprom_93cx6_readb(struct eeprom_93cx6 *eeprom, const u8 byte,
+	u8 *data)
+{
+	u16 command;
+	u16 tmp;
+
+	/*
+	 * Initialize the eeprom register
+	 */
+	eeprom_93cx6_startup(eeprom);
+
+	/*
+	 * Select the read opcode and the byte to be read.
+	 */
+	command = (PCI_EEPROM_READ_OPCODE << (eeprom->width + 1)) | byte;
+	eeprom_93cx6_write_bits(eeprom, command,
+		PCI_EEPROM_WIDTH_OPCODE + eeprom->width + 1);
+
+	/*
+	 * Read the requested 8 bits.
+	 */
+	eeprom_93cx6_read_bits(eeprom, &tmp, 8);
+	*data = tmp & 0xff;
+
+	/*
+	 * Cleanup eeprom register.
+	 */
+	eeprom_93cx6_cleanup(eeprom);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_readb);
+
+/**
+ * eeprom_93cx6_multireadb - Read multiple bytes from eeprom
+ * @eeprom: Pointer to eeprom structure
+ * @byte: Index from where we should start reading
+ * @data: target pointer where the information will have to be stored
+ * @words: Number of bytes that should be read.
+ *
+ * This function will read all requested bytes from the eeprom,
+ * this is done by calling eeprom_93cx6_readb() multiple times.
+ */
+void eeprom_93cx6_multireadb(struct eeprom_93cx6 *eeprom, const u8 byte,
+	u8 *data, const u16 bytes)
+{
+	unsigned int i;
+
+	for (i = 0; i < bytes; i++)
+		eeprom_93cx6_readb(eeprom, byte + i, &data[i]);
+}
+EXPORT_SYMBOL_GPL(eeprom_93cx6_multireadb);
+
 /**
  * eeprom_93cx6_wren - set the write enable state
  * @eeprom: Pointer to eeprom structure
diff --git a/include/linux/eeprom_93cx6.h b/include/linux/eeprom_93cx6.h
index e50f98b0297a..eb0b1988050a 100644
--- a/include/linux/eeprom_93cx6.h
+++ b/include/linux/eeprom_93cx6.h
@@ -75,6 +75,10 @@ extern void eeprom_93cx6_read(struct eeprom_93cx6 *eeprom,
 	const u8 word, u16 *data);
 extern void eeprom_93cx6_multiread(struct eeprom_93cx6 *eeprom,
 	const u8 word, __le16 *data, const u16 words);
+extern void eeprom_93cx6_readb(struct eeprom_93cx6 *eeprom,
+	const u8 byte, u8 *data);
+extern void eeprom_93cx6_multireadb(struct eeprom_93cx6 *eeprom,
+	const u8 byte, u8 *data, const u16 bytes);
 
 extern void eeprom_93cx6_wren(struct eeprom_93cx6 *eeprom, bool enable);
 
-- 
cgit v1.2.3


From 3bc2ee91a470c52fb3979c23c12d43283455f10d Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 18 Nov 2014 17:59:39 +0900
Subject: mfd: sec-core: Add support for S2MPS13 device

This patch adds the support for Samsung S2MPS13 PMIC device to the sec-core MFD
driver. The S2MPS13 is very similar with existing S2MPS14 and includes PMIC/
RTC/CLOCK devices.

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Sangbeom Kim <sbkim73@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c           | 16 ++++++++++++++++
 drivers/mfd/sec-irq.c            | 23 +++++++++++++++++------
 include/linux/mfd/samsung/core.h |  1 +
 3 files changed, 34 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index dba7e2b6f8e9..d086e8cee1d7 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -74,6 +74,15 @@ static const struct mfd_cell s2mps11_devs[] = {
 	}
 };
 
+static const struct mfd_cell s2mps13_devs[] = {
+	{ .name = "s2mps13-pmic", },
+	{ .name = "s2mps13-rtc", },
+	{
+		.name = "s2mps13-clk",
+		.of_compatible = "samsung,s2mps13-clk",
+	},
+};
+
 static const struct mfd_cell s2mps14_devs[] = {
 	{
 		.name = "s2mps14-pmic",
@@ -107,6 +116,9 @@ static const struct of_device_id sec_dt_match[] = {
 	}, {
 		.compatible = "samsung,s2mps11-pmic",
 		.data = (void *)S2MPS11X,
+	}, {
+		.compatible = "samsung,s2mps13-pmic",
+		.data = (void *)S2MPS13X,
 	}, {
 		.compatible = "samsung,s2mps14-pmic",
 		.data = (void *)S2MPS14X,
@@ -378,6 +390,10 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 		sec_devs = s2mps11_devs;
 		num_sec_devs = ARRAY_SIZE(s2mps11_devs);
 		break;
+	case S2MPS13X:
+		sec_devs = s2mps13_devs;
+		num_sec_devs = ARRAY_SIZE(s2mps13_devs);
+		break;
 	case S2MPS14X:
 		sec_devs = s2mps14_devs;
 		num_sec_devs = ARRAY_SIZE(s2mps14_devs);
diff --git a/drivers/mfd/sec-irq.c b/drivers/mfd/sec-irq.c
index f9a57869e3ec..ba86a918c2da 100644
--- a/drivers/mfd/sec-irq.c
+++ b/drivers/mfd/sec-irq.c
@@ -389,14 +389,22 @@ static const struct regmap_irq_chip s2mps11_irq_chip = {
 	.ack_base = S2MPS11_REG_INT1,
 };
 
+#define S2MPS1X_IRQ_CHIP_COMMON_DATA		\
+	.irqs = s2mps14_irqs,			\
+	.num_irqs = ARRAY_SIZE(s2mps14_irqs),	\
+	.num_regs = 3,				\
+	.status_base = S2MPS14_REG_INT1,	\
+	.mask_base = S2MPS14_REG_INT1M,		\
+	.ack_base = S2MPS14_REG_INT1		\
+
+static const struct regmap_irq_chip s2mps13_irq_chip = {
+	.name = "s2mps13",
+	S2MPS1X_IRQ_CHIP_COMMON_DATA,
+};
+
 static const struct regmap_irq_chip s2mps14_irq_chip = {
 	.name = "s2mps14",
-	.irqs = s2mps14_irqs,
-	.num_irqs = ARRAY_SIZE(s2mps14_irqs),
-	.num_regs = 3,
-	.status_base = S2MPS14_REG_INT1,
-	.mask_base = S2MPS14_REG_INT1M,
-	.ack_base = S2MPS14_REG_INT1,
+	S2MPS1X_IRQ_CHIP_COMMON_DATA,
 };
 
 static const struct regmap_irq_chip s2mpu02_irq_chip = {
@@ -452,6 +460,9 @@ int sec_irq_init(struct sec_pmic_dev *sec_pmic)
 	case S2MPS11X:
 		sec_irq_chip = &s2mps11_irq_chip;
 		break;
+	case S2MPS13X:
+		sec_irq_chip = &s2mps13_irq_chip;
+		break;
 	case S2MPS14X:
 		sec_irq_chip = &s2mps14_irq_chip;
 		break;
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 1825edacbda7..0c0343e06ba6 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -41,6 +41,7 @@ enum sec_device_type {
 	S5M8767X,
 	S2MPA01,
 	S2MPS11X,
+	S2MPS13X,
 	S2MPS14X,
 	S2MPU02,
 };
-- 
cgit v1.2.3


From 76b9840b24ae049b39f1b3cf0e49f21b7c41748f Mon Sep 17 00:00:00 2001
From: Chanwoo Choi <cw00.choi@samsung.com>
Date: Tue, 18 Nov 2014 17:59:40 +0900
Subject: regulator: s2mps11: Add support S2MPS13 regulator device

This patch adds S2MPS13 regulator device to existing S2MPS11 device driver.
The S2MPS13 has just different number of regulators from S2MPS14.
The S2MPS13 regulator device includes LDO[1-40] and BUCK[1-10].

Signed-off-by: Chanwoo Choi <cw00.choi@samsung.com>
Acked-by: Sangbeom Kim <sbkim73@samsung.com>
Acked-by: Mark Brown <broonie@kernel.org>
Reviewed-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/sec-core.c              |  13 +++
 drivers/regulator/Kconfig           |  10 +-
 drivers/regulator/s2mps11.c         | 102 +++++++++++++++++++-
 include/linux/mfd/samsung/core.h    |   1 +
 include/linux/mfd/samsung/s2mps13.h | 186 ++++++++++++++++++++++++++++++++++++
 5 files changed, 304 insertions(+), 8 deletions(-)
 create mode 100644 include/linux/mfd/samsung/s2mps13.h

(limited to 'include/linux')

diff --git a/drivers/mfd/sec-core.c b/drivers/mfd/sec-core.c
index d086e8cee1d7..b39960532f76 100644
--- a/drivers/mfd/sec-core.c
+++ b/drivers/mfd/sec-core.c
@@ -27,6 +27,7 @@
 #include <linux/mfd/samsung/irq.h>
 #include <linux/mfd/samsung/s2mpa01.h>
 #include <linux/mfd/samsung/s2mps11.h>
+#include <linux/mfd/samsung/s2mps13.h>
 #include <linux/mfd/samsung/s2mps14.h>
 #include <linux/mfd/samsung/s2mpu02.h>
 #include <linux/mfd/samsung/s5m8763.h>
@@ -206,6 +207,15 @@ static const struct regmap_config s2mps11_regmap_config = {
 	.cache_type = REGCACHE_FLAT,
 };
 
+static const struct regmap_config s2mps13_regmap_config = {
+	.reg_bits = 8,
+	.val_bits = 8,
+
+	.max_register = S2MPS13_REG_LDODSCH5,
+	.volatile_reg = s2mps11_volatile,
+	.cache_type = REGCACHE_FLAT,
+};
+
 static const struct regmap_config s2mps14_regmap_config = {
 	.reg_bits = 8,
 	.val_bits = 8,
@@ -337,6 +347,9 @@ static int sec_pmic_probe(struct i2c_client *i2c,
 	case S2MPS11X:
 		regmap = &s2mps11_regmap_config;
 		break;
+	case S2MPS13X:
+		regmap = &s2mps13_regmap_config;
+		break;
 	case S2MPS14X:
 		regmap = &s2mps14_regmap_config;
 		break;
diff --git a/drivers/regulator/Kconfig b/drivers/regulator/Kconfig
index 55d7b7b0f2e0..5e061347be93 100644
--- a/drivers/regulator/Kconfig
+++ b/drivers/regulator/Kconfig
@@ -529,13 +529,13 @@ config REGULATOR_S2MPA01
 	 via I2C bus. S2MPA01 has 10 Bucks and 26 LDO outputs.
 
 config REGULATOR_S2MPS11
-	tristate "Samsung S2MPS11/S2MPS14/S2MPU02 voltage regulator"
+	tristate "Samsung S2MPS11/S2MPS13/S2MPS14/S2MPU02 voltage regulator"
 	depends on MFD_SEC_CORE
 	help
-	 This driver supports a Samsung S2MPS11/S2MPS14/S2MPU02 voltage output
-	 regulator via I2C bus. The chip is comprised of high efficient Buck
-	 converters including Dual-Phase Buck converter, Buck-Boost converter,
-	 various LDOs.
+	 This driver supports a Samsung S2MPS11/S2MPS13/S2MPS14/S2MPU02 voltage
+	 output regulator via I2C bus. The chip is comprised of high efficient
+	 Buck converters including Dual-Phase Buck converter, Buck-Boost
+	 converter, various LDOs.
 
 config REGULATOR_S5M8767
 	tristate "Samsung S5M8767A voltage regulator"
diff --git a/drivers/regulator/s2mps11.c b/drivers/regulator/s2mps11.c
index adab82d5279f..738dc7763d47 100644
--- a/drivers/regulator/s2mps11.c
+++ b/drivers/regulator/s2mps11.c
@@ -30,6 +30,7 @@
 #include <linux/of_gpio.h>
 #include <linux/mfd/samsung/core.h>
 #include <linux/mfd/samsung/s2mps11.h>
+#include <linux/mfd/samsung/s2mps13.h>
 #include <linux/mfd/samsung/s2mps14.h>
 #include <linux/mfd/samsung/s2mpu02.h>
 
@@ -45,10 +46,10 @@ struct s2mps11_info {
 	enum sec_device_type dev_type;
 
 	/*
-	 * One bit for each S2MPS14/S2MPU02 regulator whether the suspend mode
-	 * was enabled.
+	 * One bit for each S2MPS13/S2MPS14/S2MPU02 regulator whether
+	 * the suspend mode was enabled.
 	 */
-	unsigned long long s2mps14_suspend_state:35;
+	unsigned long long s2mps14_suspend_state:50;
 
 	/* Array of size rdev_num with GPIO-s for external sleep control */
 	int *ext_control_gpio;
@@ -369,12 +370,101 @@ static const struct regulator_desc s2mps11_regulators[] = {
 	regulator_desc_s2mps11_buck6_10(10, MIN_750_MV, STEP_12_5_MV),
 };
 
+static struct regulator_ops s2mps14_reg_ops;
+
+#define regulator_desc_s2mps13_ldo(num, min, step, min_sel) {	\
+	.name		= "LDO"#num,				\
+	.id		= S2MPS13_LDO##num,			\
+	.ops		= &s2mps14_reg_ops,			\
+	.type		= REGULATOR_VOLTAGE,			\
+	.owner		= THIS_MODULE,				\
+	.min_uV		= min,					\
+	.uV_step	= step,					\
+	.linear_min_sel	= min_sel,				\
+	.n_voltages	= S2MPS14_LDO_N_VOLTAGES,		\
+	.vsel_reg	= S2MPS13_REG_L1CTRL + num - 1,		\
+	.vsel_mask	= S2MPS14_LDO_VSEL_MASK,		\
+	.enable_reg	= S2MPS13_REG_L1CTRL + num - 1,		\
+	.enable_mask	= S2MPS14_ENABLE_MASK			\
+}
+
+#define regulator_desc_s2mps13_buck(num, min, step, min_sel) {	\
+	.name		= "BUCK"#num,				\
+	.id		= S2MPS13_BUCK##num,			\
+	.ops		= &s2mps14_reg_ops,			\
+	.type		= REGULATOR_VOLTAGE,			\
+	.owner		= THIS_MODULE,				\
+	.min_uV		= min,					\
+	.uV_step	= step,					\
+	.linear_min_sel	= min_sel,				\
+	.n_voltages	= S2MPS14_BUCK_N_VOLTAGES,		\
+	.ramp_delay	= S2MPS13_BUCK_RAMP_DELAY,		\
+	.vsel_reg	= S2MPS13_REG_B1OUT + (num - 1) * 2,	\
+	.vsel_mask	= S2MPS14_BUCK_VSEL_MASK,		\
+	.enable_reg	= S2MPS13_REG_B1CTRL + (num - 1) * 2,	\
+	.enable_mask	= S2MPS14_ENABLE_MASK			\
+}
+
+static const struct regulator_desc s2mps13_regulators[] = {
+	regulator_desc_s2mps13_ldo(1,  MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(2,  MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(3,  MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(4,  MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(5,  MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(6,  MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(7,  MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(8,  MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(9,  MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(10, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(11, MIN_800_MV,  STEP_25_MV,   0x10),
+	regulator_desc_s2mps13_ldo(12, MIN_800_MV,  STEP_25_MV,   0x10),
+	regulator_desc_s2mps13_ldo(13, MIN_800_MV,  STEP_25_MV,   0x10),
+	regulator_desc_s2mps13_ldo(14, MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(15, MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(16, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(17, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(18, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(19, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(20, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(21, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(22, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(23, MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(24, MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(25, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(26, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(27, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(28, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(29, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(30, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(31, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(32, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(33, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(34, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(35, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(36, MIN_800_MV,  STEP_12_5_MV, 0x00),
+	regulator_desc_s2mps13_ldo(37, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(38, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_ldo(39, MIN_1000_MV, STEP_25_MV,   0x08),
+	regulator_desc_s2mps13_ldo(40, MIN_1400_MV, STEP_50_MV,   0x0C),
+	regulator_desc_s2mps13_buck(1,  MIN_500_MV,  STEP_6_25_MV, 0x10),
+	regulator_desc_s2mps13_buck(2,  MIN_500_MV,  STEP_6_25_MV, 0x10),
+	regulator_desc_s2mps13_buck(3,  MIN_500_MV,  STEP_6_25_MV, 0x10),
+	regulator_desc_s2mps13_buck(4,  MIN_500_MV,  STEP_6_25_MV, 0x10),
+	regulator_desc_s2mps13_buck(5,  MIN_500_MV,  STEP_6_25_MV, 0x10),
+	regulator_desc_s2mps13_buck(6,  MIN_500_MV,  STEP_6_25_MV, 0x10),
+	regulator_desc_s2mps13_buck(7,  MIN_500_MV,  STEP_6_25_MV, 0x10),
+	regulator_desc_s2mps13_buck(8,  MIN_1000_MV, STEP_12_5_MV, 0x20),
+	regulator_desc_s2mps13_buck(9,  MIN_1000_MV, STEP_12_5_MV, 0x20),
+	regulator_desc_s2mps13_buck(10, MIN_500_MV,  STEP_6_25_MV, 0x10),
+};
+
 static int s2mps14_regulator_enable(struct regulator_dev *rdev)
 {
 	struct s2mps11_info *s2mps11 = rdev_get_drvdata(rdev);
 	unsigned int val;
 
 	switch (s2mps11->dev_type) {
+	case S2MPS13X:
 	case S2MPS14X:
 		if (s2mps11->s2mps14_suspend_state & (1 << rdev_get_id(rdev)))
 			val = S2MPS14_ENABLE_SUSPEND;
@@ -406,6 +496,7 @@ static int s2mps14_regulator_set_suspend_disable(struct regulator_dev *rdev)
 
 	/* Below LDO should be always on or does not support suspend mode. */
 	switch (s2mps11->dev_type) {
+	case S2MPS13X:
 	case S2MPS14X:
 		switch (rdev_id) {
 		case S2MPS14_LDO3:
@@ -831,6 +922,10 @@ static int s2mps11_pmic_probe(struct platform_device *pdev)
 		s2mps11->rdev_num = ARRAY_SIZE(s2mps11_regulators);
 		regulators = s2mps11_regulators;
 		break;
+	case S2MPS13X:
+		s2mps11->rdev_num = ARRAY_SIZE(s2mps13_regulators);
+		regulators = s2mps13_regulators;
+		break;
 	case S2MPS14X:
 		s2mps11->rdev_num = ARRAY_SIZE(s2mps14_regulators);
 		regulators = s2mps14_regulators;
@@ -927,6 +1022,7 @@ out:
 
 static const struct platform_device_id s2mps11_pmic_id[] = {
 	{ "s2mps11-pmic", S2MPS11X},
+	{ "s2mps13-pmic", S2MPS13X},
 	{ "s2mps14-pmic", S2MPS14X},
 	{ "s2mpu02-pmic", S2MPU02},
 	{ },
diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h
index 0c0343e06ba6..3fdb7cfbffb3 100644
--- a/include/linux/mfd/samsung/core.h
+++ b/include/linux/mfd/samsung/core.h
@@ -28,6 +28,7 @@
 #define MIN_800_MV		800000
 #define MIN_750_MV		750000
 #define MIN_600_MV		600000
+#define MIN_500_MV		500000
 
 /* Macros to represent steps for LDO/BUCK */
 #define STEP_50_MV		50000
diff --git a/include/linux/mfd/samsung/s2mps13.h b/include/linux/mfd/samsung/s2mps13.h
new file mode 100644
index 000000000000..ce5dda8958fe
--- /dev/null
+++ b/include/linux/mfd/samsung/s2mps13.h
@@ -0,0 +1,186 @@
+/*
+ * s2mps13.h
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd
+ *              http://www.samsung.com
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ */
+
+#ifndef __LINUX_MFD_S2MPS13_H
+#define __LINUX_MFD_S2MPS13_H
+
+/* S2MPS13 registers */
+enum s2mps13_reg {
+	S2MPS13_REG_ID,
+	S2MPS13_REG_INT1,
+	S2MPS13_REG_INT2,
+	S2MPS13_REG_INT3,
+	S2MPS13_REG_INT1M,
+	S2MPS13_REG_INT2M,
+	S2MPS13_REG_INT3M,
+	S2MPS13_REG_ST1,
+	S2MPS13_REG_ST2,
+	S2MPS13_REG_PWRONSRC,
+	S2MPS13_REG_OFFSRC,
+	S2MPS13_REG_BU_CHG,
+	S2MPS13_REG_RTCCTRL,
+	S2MPS13_REG_CTRL1,
+	S2MPS13_REG_CTRL2,
+	S2MPS13_REG_RSVD1,
+	S2MPS13_REG_RSVD2,
+	S2MPS13_REG_RSVD3,
+	S2MPS13_REG_RSVD4,
+	S2MPS13_REG_RSVD5,
+	S2MPS13_REG_RSVD6,
+	S2MPS13_REG_CTRL3,
+	S2MPS13_REG_RSVD7,
+	S2MPS13_REG_RSVD8,
+	S2MPS13_REG_WRSTBI,
+	S2MPS13_REG_B1CTRL,
+	S2MPS13_REG_B1OUT,
+	S2MPS13_REG_B2CTRL,
+	S2MPS13_REG_B2OUT,
+	S2MPS13_REG_B3CTRL,
+	S2MPS13_REG_B3OUT,
+	S2MPS13_REG_B4CTRL,
+	S2MPS13_REG_B4OUT,
+	S2MPS13_REG_B5CTRL,
+	S2MPS13_REG_B5OUT,
+	S2MPS13_REG_B6CTRL,
+	S2MPS13_REG_B6OUT,
+	S2MPS13_REG_B7CTRL,
+	S2MPS13_REG_B7OUT,
+	S2MPS13_REG_B8CTRL,
+	S2MPS13_REG_B8OUT,
+	S2MPS13_REG_B9CTRL,
+	S2MPS13_REG_B9OUT,
+	S2MPS13_REG_B10CTRL,
+	S2MPS13_REG_B10OUT,
+	S2MPS13_REG_BB1CTRL,
+	S2MPS13_REG_BB1OUT,
+	S2MPS13_REG_BUCK_RAMP1,
+	S2MPS13_REG_BUCK_RAMP2,
+	S2MPS13_REG_LDO_DVS1,
+	S2MPS13_REG_LDO_DVS2,
+	S2MPS13_REG_LDO_DVS3,
+	S2MPS13_REG_B6OUT2,
+	S2MPS13_REG_L1CTRL,
+	S2MPS13_REG_L2CTRL,
+	S2MPS13_REG_L3CTRL,
+	S2MPS13_REG_L4CTRL,
+	S2MPS13_REG_L5CTRL,
+	S2MPS13_REG_L6CTRL,
+	S2MPS13_REG_L7CTRL,
+	S2MPS13_REG_L8CTRL,
+	S2MPS13_REG_L9CTRL,
+	S2MPS13_REG_L10CTRL,
+	S2MPS13_REG_L11CTRL,
+	S2MPS13_REG_L12CTRL,
+	S2MPS13_REG_L13CTRL,
+	S2MPS13_REG_L14CTRL,
+	S2MPS13_REG_L15CTRL,
+	S2MPS13_REG_L16CTRL,
+	S2MPS13_REG_L17CTRL,
+	S2MPS13_REG_L18CTRL,
+	S2MPS13_REG_L19CTRL,
+	S2MPS13_REG_L20CTRL,
+	S2MPS13_REG_L21CTRL,
+	S2MPS13_REG_L22CTRL,
+	S2MPS13_REG_L23CTRL,
+	S2MPS13_REG_L24CTRL,
+	S2MPS13_REG_L25CTRL,
+	S2MPS13_REG_L26CTRL,
+	S2MPS13_REG_L27CTRL,
+	S2MPS13_REG_L28CTRL,
+	S2MPS13_REG_L30CTRL,
+	S2MPS13_REG_L31CTRL,
+	S2MPS13_REG_L32CTRL,
+	S2MPS13_REG_L33CTRL,
+	S2MPS13_REG_L34CTRL,
+	S2MPS13_REG_L35CTRL,
+	S2MPS13_REG_L36CTRL,
+	S2MPS13_REG_L37CTRL,
+	S2MPS13_REG_L38CTRL,
+	S2MPS13_REG_L39CTRL,
+	S2MPS13_REG_L40CTRL,
+	S2MPS13_REG_LDODSCH1,
+	S2MPS13_REG_LDODSCH2,
+	S2MPS13_REG_LDODSCH3,
+	S2MPS13_REG_LDODSCH4,
+	S2MPS13_REG_LDODSCH5,
+};
+
+/*  regulator ids */
+enum s2mps13_regulators {
+	S2MPS13_LDO1,
+	S2MPS13_LDO2,
+	S2MPS13_LDO3,
+	S2MPS13_LDO4,
+	S2MPS13_LDO5,
+	S2MPS13_LDO6,
+	S2MPS13_LDO7,
+	S2MPS13_LDO8,
+	S2MPS13_LDO9,
+	S2MPS13_LDO10,
+	S2MPS13_LDO11,
+	S2MPS13_LDO12,
+	S2MPS13_LDO13,
+	S2MPS13_LDO14,
+	S2MPS13_LDO15,
+	S2MPS13_LDO16,
+	S2MPS13_LDO17,
+	S2MPS13_LDO18,
+	S2MPS13_LDO19,
+	S2MPS13_LDO20,
+	S2MPS13_LDO21,
+	S2MPS13_LDO22,
+	S2MPS13_LDO23,
+	S2MPS13_LDO24,
+	S2MPS13_LDO25,
+	S2MPS13_LDO26,
+	S2MPS13_LDO27,
+	S2MPS13_LDO28,
+	S2MPS13_LDO29,
+	S2MPS13_LDO30,
+	S2MPS13_LDO31,
+	S2MPS13_LDO32,
+	S2MPS13_LDO33,
+	S2MPS13_LDO34,
+	S2MPS13_LDO35,
+	S2MPS13_LDO36,
+	S2MPS13_LDO37,
+	S2MPS13_LDO38,
+	S2MPS13_LDO39,
+	S2MPS13_LDO40,
+	S2MPS13_BUCK1,
+	S2MPS13_BUCK2,
+	S2MPS13_BUCK3,
+	S2MPS13_BUCK4,
+	S2MPS13_BUCK5,
+	S2MPS13_BUCK6,
+	S2MPS13_BUCK7,
+	S2MPS13_BUCK8,
+	S2MPS13_BUCK9,
+	S2MPS13_BUCK10,
+
+	S2MPS13_REGULATOR_MAX,
+};
+
+/*
+ * Default ramp delay in uv/us. Datasheet says that ramp delay can be
+ * controlled however it does not specify which register is used for that.
+ * Let's assume that default value will be set.
+ */
+#define S2MPS13_BUCK_RAMP_DELAY		12500
+
+#endif /*  __LINUX_MFD_S2MPS13_H */
-- 
cgit v1.2.3


From 2c86e9fb7263dbca2c21a086090d32ba90129f7b Mon Sep 17 00:00:00 2001
From: Boris Brezillon <boris.brezillon@free-electrons.com>
Date: Mon, 6 Oct 2014 15:48:43 +0200
Subject: mfd: Add atmel-hlcdc driver

The HLCDC IP available on some Atmel SoCs (i.e. at91sam9n12, at91sam9x5
family or sama5d3 family) exposes 2 subdevices:
- a display controller (controlled by a DRM driver)
- a PWM chip

The MFD device provides a regmap and several clocks (those connected
to this hardware block) to its subdevices.

This way concurrent accesses to the iomem range are handled by the regmap
framework, and each subdevice can safely access HLCDC registers.

Signed-off-by: Boris Brezillon <boris.brezillon@free-electrons.com>
Tested-by: Anthony Harivel <anthony.harivel@emtrion.de>
Tested-by: Ludovic Desroches <ludovic.desroches@atmel.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Kconfig             |   6 ++
 drivers/mfd/Makefile            |   1 +
 drivers/mfd/atmel-hlcdc.c       | 122 ++++++++++++++++++++++++++++++++++++++++
 include/linux/mfd/atmel-hlcdc.h |  85 ++++++++++++++++++++++++++++
 4 files changed, 214 insertions(+)
 create mode 100644 drivers/mfd/atmel-hlcdc.c
 create mode 100644 include/linux/mfd/atmel-hlcdc.h

(limited to 'include/linux')

diff --git a/drivers/mfd/Kconfig b/drivers/mfd/Kconfig
index fdbb40181834..bd4a155a9564 100644
--- a/drivers/mfd/Kconfig
+++ b/drivers/mfd/Kconfig
@@ -59,6 +59,12 @@ config MFD_AAT2870_CORE
 	  additional drivers must be enabled in order to use the
 	  functionality of the device.
 
+config MFD_ATMEL_HLCDC
+	tristate
+	select MFD_CORE
+	select REGMAP_MMIO
+	depends on OF
+
 config MFD_BCM590XX
 	tristate "Broadcom BCM590xx PMUs"
 	select MFD_CORE
diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 10bbd0b9096b..2cd7e743280c 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -157,6 +157,7 @@ obj-$(CONFIG_MFD_SPMI_PMIC)	+= qcom-spmi-pmic.o
 obj-$(CONFIG_TPS65911_COMPARATOR)	+= tps65911-comparator.o
 obj-$(CONFIG_MFD_TPS65090)	+= tps65090.o
 obj-$(CONFIG_MFD_AAT2870_CORE)	+= aat2870-core.o
+obj-$(CONFIG_MFD_ATMEL_HLCDC)	+= atmel-hlcdc.o
 obj-$(CONFIG_MFD_INTEL_MSIC)	+= intel_msic.o
 obj-$(CONFIG_MFD_PALMAS)	+= palmas.o
 obj-$(CONFIG_MFD_VIPERBOARD)    += viperboard.o
diff --git a/drivers/mfd/atmel-hlcdc.c b/drivers/mfd/atmel-hlcdc.c
new file mode 100644
index 000000000000..cfd58f4cc5c3
--- /dev/null
+++ b/drivers/mfd/atmel-hlcdc.c
@@ -0,0 +1,122 @@
+/*
+ * Copyright (C) 2014 Free Electrons
+ * Copyright (C) 2014 Atmel
+ *
+ * Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/clk.h>
+#include <linux/mfd/atmel-hlcdc.h>
+#include <linux/mfd/core.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/regmap.h>
+
+#define ATMEL_HLCDC_REG_MAX		(0x4000 - 0x4)
+
+static const struct mfd_cell atmel_hlcdc_cells[] = {
+	{
+		.name = "atmel-hlcdc-pwm",
+		.of_compatible = "atmel,hlcdc-pwm",
+	},
+	{
+		.name = "atmel-hlcdc-dc",
+		.of_compatible = "atmel,hlcdc-display-controller",
+	},
+};
+
+static const struct regmap_config atmel_hlcdc_regmap_config = {
+	.reg_bits = 32,
+	.val_bits = 32,
+	.reg_stride = 4,
+	.max_register = ATMEL_HLCDC_REG_MAX,
+};
+
+static int atmel_hlcdc_probe(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct atmel_hlcdc *hlcdc;
+	struct resource *res;
+	void __iomem *regs;
+
+	hlcdc = devm_kzalloc(dev, sizeof(*hlcdc), GFP_KERNEL);
+	if (!hlcdc)
+		return -ENOMEM;
+
+	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	regs = devm_ioremap_resource(dev, res);
+	if (IS_ERR(regs))
+		return PTR_ERR(regs);
+
+	hlcdc->irq = platform_get_irq(pdev, 0);
+	if (hlcdc->irq < 0)
+		return hlcdc->irq;
+
+	hlcdc->periph_clk = devm_clk_get(dev, "periph_clk");
+	if (IS_ERR(hlcdc->periph_clk)) {
+		dev_err(dev, "failed to get peripheral clock\n");
+		return PTR_ERR(hlcdc->periph_clk);
+	}
+
+	hlcdc->sys_clk = devm_clk_get(dev, "sys_clk");
+	if (IS_ERR(hlcdc->sys_clk)) {
+		dev_err(dev, "failed to get system clock\n");
+		return PTR_ERR(hlcdc->sys_clk);
+	}
+
+	hlcdc->slow_clk = devm_clk_get(dev, "slow_clk");
+	if (IS_ERR(hlcdc->slow_clk)) {
+		dev_err(dev, "failed to get slow clock\n");
+		return PTR_ERR(hlcdc->slow_clk);
+	}
+
+	hlcdc->regmap = devm_regmap_init_mmio(dev, regs,
+					      &atmel_hlcdc_regmap_config);
+	if (IS_ERR(hlcdc->regmap))
+		return PTR_ERR(hlcdc->regmap);
+
+	dev_set_drvdata(dev, hlcdc);
+
+	return mfd_add_devices(dev, -1, atmel_hlcdc_cells,
+			       ARRAY_SIZE(atmel_hlcdc_cells),
+			       NULL, 0, NULL);
+}
+
+static int atmel_hlcdc_remove(struct platform_device *pdev)
+{
+	mfd_remove_devices(&pdev->dev);
+
+	return 0;
+}
+
+static const struct of_device_id atmel_hlcdc_match[] = {
+	{ .compatible = "atmel,sama5d3-hlcdc" },
+	{ /* sentinel */ },
+};
+
+static struct platform_driver atmel_hlcdc_driver = {
+	.probe = atmel_hlcdc_probe,
+	.remove = atmel_hlcdc_remove,
+	.driver = {
+		.name = "atmel-hlcdc",
+		.of_match_table = atmel_hlcdc_match,
+	},
+};
+module_platform_driver(atmel_hlcdc_driver);
+
+MODULE_ALIAS("platform:atmel-hlcdc");
+MODULE_AUTHOR("Boris Brezillon <boris.brezillon@free-electrons.com>");
+MODULE_DESCRIPTION("Atmel HLCDC driver");
+MODULE_LICENSE("GPL v2");
diff --git a/include/linux/mfd/atmel-hlcdc.h b/include/linux/mfd/atmel-hlcdc.h
new file mode 100644
index 000000000000..1279ab1644b5
--- /dev/null
+++ b/include/linux/mfd/atmel-hlcdc.h
@@ -0,0 +1,85 @@
+/*
+ * Copyright (C) 2014 Free Electrons
+ * Copyright (C) 2014 Atmel
+ *
+ * Author: Boris BREZILLON <boris.brezillon@free-electrons.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published by
+ * the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef __LINUX_MFD_HLCDC_H
+#define __LINUX_MFD_HLCDC_H
+
+#include <linux/clk.h>
+#include <linux/regmap.h>
+
+#define ATMEL_HLCDC_CFG(i)		((i) * 0x4)
+#define ATMEL_HLCDC_SIG_CFG		LCDCFG(5)
+#define ATMEL_HLCDC_HSPOL		BIT(0)
+#define ATMEL_HLCDC_VSPOL		BIT(1)
+#define ATMEL_HLCDC_VSPDLYS		BIT(2)
+#define ATMEL_HLCDC_VSPDLYE		BIT(3)
+#define ATMEL_HLCDC_DISPPOL		BIT(4)
+#define ATMEL_HLCDC_DITHER		BIT(6)
+#define ATMEL_HLCDC_DISPDLY		BIT(7)
+#define ATMEL_HLCDC_MODE_MASK		GENMASK(9, 8)
+#define ATMEL_HLCDC_PP			BIT(10)
+#define ATMEL_HLCDC_VSPSU		BIT(12)
+#define ATMEL_HLCDC_VSPHO		BIT(13)
+#define ATMEL_HLCDC_GUARDTIME_MASK	GENMASK(20, 16)
+
+#define ATMEL_HLCDC_EN			0x20
+#define ATMEL_HLCDC_DIS			0x24
+#define ATMEL_HLCDC_SR			0x28
+#define ATMEL_HLCDC_IER			0x2c
+#define ATMEL_HLCDC_IDR			0x30
+#define ATMEL_HLCDC_IMR			0x34
+#define ATMEL_HLCDC_ISR			0x38
+
+#define ATMEL_HLCDC_CLKPOL		BIT(0)
+#define ATMEL_HLCDC_CLKSEL		BIT(2)
+#define ATMEL_HLCDC_CLKPWMSEL		BIT(3)
+#define ATMEL_HLCDC_CGDIS(i)		BIT(8 + (i))
+#define ATMEL_HLCDC_CLKDIV_SHFT		16
+#define ATMEL_HLCDC_CLKDIV_MASK		GENMASK(23, 16)
+#define ATMEL_HLCDC_CLKDIV(div)		((div - 2) << ATMEL_HLCDC_CLKDIV_SHFT)
+
+#define ATMEL_HLCDC_PIXEL_CLK		BIT(0)
+#define ATMEL_HLCDC_SYNC		BIT(1)
+#define ATMEL_HLCDC_DISP		BIT(2)
+#define ATMEL_HLCDC_PWM			BIT(3)
+#define ATMEL_HLCDC_SIP			BIT(4)
+
+#define ATMEL_HLCDC_SOF			BIT(0)
+#define ATMEL_HLCDC_SYNCDIS		BIT(1)
+#define ATMEL_HLCDC_FIFOERR		BIT(4)
+#define ATMEL_HLCDC_LAYER_STATUS(x)	BIT((x) + 8)
+
+/**
+ * Structure shared by the MFD device and its subdevices.
+ *
+ * @regmap: register map used to access HLCDC IP registers
+ * @periph_clk: the hlcdc peripheral clock
+ * @sys_clk: the hlcdc system clock
+ * @slow_clk: the system slow clk
+ * @irq: the hlcdc irq
+ */
+struct atmel_hlcdc {
+	struct regmap *regmap;
+	struct clk *periph_clk;
+	struct clk *sys_clk;
+	struct clk *slow_clk;
+	int irq;
+};
+
+#endif /* __LINUX_MFD_HLCDC_H */
-- 
cgit v1.2.3


From 6cbac55368324ec2a0e59e192c5b596d0f4569f7 Mon Sep 17 00:00:00 2001
From: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Date: Fri, 10 Oct 2014 10:23:53 +0200
Subject: mfd: max77693: Remove unused define

Remove old MAX77693_NUM_IRQ_MUIC_REGS define. Not used anywhere.

Signed-off-by: Krzysztof Kozlowski <k.kozlowski@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/max77693-private.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/max77693-private.h b/include/linux/mfd/max77693-private.h
index fc17d56581b2..6392a1565452 100644
--- a/include/linux/mfd/max77693-private.h
+++ b/include/linux/mfd/max77693-private.h
@@ -26,7 +26,6 @@
 
 #include <linux/i2c.h>
 
-#define MAX77693_NUM_IRQ_MUIC_REGS	3
 #define MAX77693_REG_INVALID		(0xff)
 
 /* Slave addr = 0xCC: PMIC, Charger, Flash LED */
-- 
cgit v1.2.3


From 5cb5d9616a47d5383a85379afa4429382ef46b38 Mon Sep 17 00:00:00 2001
From: Micky Ching <micky_ching@realsil.com.cn>
Date: Fri, 10 Oct 2014 13:58:44 +0800
Subject: mfd: rtsx: Fix PM suspend for 5227 & 5249

Fix rts5227&5249 failed send buffer cmd after suspend,
PM_CTRL3 should reset before send any buffer cmd after suspend.
Otherwise, buffer cmd will failed, this will lead resume fail.

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/Makefile         |  2 +-
 drivers/mfd/rts5227.c        |  6 ++++++
 drivers/mfd/rts5249.c        |  4 ++++
 drivers/mfd/rtsx_gops.c      | 37 +++++++++++++++++++++++++++++++++++++
 drivers/mfd/rtsx_pcr.h       |  3 +++
 include/linux/mfd/rtsx_pci.h | 28 ++++++++++++++++++++++++++++
 6 files changed, 79 insertions(+), 1 deletion(-)
 create mode 100644 drivers/mfd/rtsx_gops.c

(limited to 'include/linux')

diff --git a/drivers/mfd/Makefile b/drivers/mfd/Makefile
index 2cd7e743280c..53467e211381 100644
--- a/drivers/mfd/Makefile
+++ b/drivers/mfd/Makefile
@@ -13,7 +13,7 @@ obj-$(CONFIG_MFD_CROS_EC)	+= cros_ec.o
 obj-$(CONFIG_MFD_CROS_EC_I2C)	+= cros_ec_i2c.o
 obj-$(CONFIG_MFD_CROS_EC_SPI)	+= cros_ec_spi.o
 
-rtsx_pci-objs			:= rtsx_pcr.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o
+rtsx_pci-objs			:= rtsx_pcr.o rtsx_gops.o rts5209.o rts5229.o rtl8411.o rts5227.o rts5249.o
 obj-$(CONFIG_MFD_RTSX_PCI)	+= rtsx_pci.o
 obj-$(CONFIG_MFD_RTSX_USB)	+= rtsx_usb.o
 
diff --git a/drivers/mfd/rts5227.c b/drivers/mfd/rts5227.c
index 9c8eec80ceed..32407404d838 100644
--- a/drivers/mfd/rts5227.c
+++ b/drivers/mfd/rts5227.c
@@ -130,6 +130,12 @@ static int rts5227_extra_init_hw(struct rtsx_pcr *pcr)
 
 static int rts5227_optimize_phy(struct rtsx_pcr *pcr)
 {
+	int err;
+
+	err = rtsx_gops_pm_reset(pcr);
+	if (err < 0)
+		return err;
+
 	/* Optimize RX sensitivity */
 	return rtsx_pci_write_phy_register(pcr, 0x00, 0xBA42);
 }
diff --git a/drivers/mfd/rts5249.c b/drivers/mfd/rts5249.c
index 573de7bfcced..cf425cc959d5 100644
--- a/drivers/mfd/rts5249.c
+++ b/drivers/mfd/rts5249.c
@@ -130,6 +130,10 @@ static int rts5249_optimize_phy(struct rtsx_pcr *pcr)
 {
 	int err;
 
+	err = rtsx_gops_pm_reset(pcr);
+	if (err < 0)
+		return err;
+
 	err = rtsx_pci_write_phy_register(pcr, PHY_REG_REV,
 			PHY_REG_REV_RESV | PHY_REG_REV_RXIDLE_LATCHED |
 			PHY_REG_REV_P1_EN | PHY_REG_REV_RXIDLE_EN |
diff --git a/drivers/mfd/rtsx_gops.c b/drivers/mfd/rtsx_gops.c
new file mode 100644
index 000000000000..b1a98c678593
--- /dev/null
+++ b/drivers/mfd/rtsx_gops.c
@@ -0,0 +1,37 @@
+/* Driver for Realtek PCI-Express card reader
+ *
+ * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 2, or (at your option) any
+ * later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with this program; if not, see <http://www.gnu.org/licenses/>.
+ *
+ * Author:
+ *   Micky Ching <micky_ching@realsil.com.cn>
+ */
+
+#include <linux/mfd/rtsx_pci.h>
+#include "rtsx_pcr.h"
+
+int rtsx_gops_pm_reset(struct rtsx_pcr *pcr)
+{
+	int err;
+
+	/* init aspm */
+	rtsx_pci_write_register(pcr, ASPM_FORCE_CTL, 0xFF, 0x00);
+	err = rtsx_pci_update_cfg_byte(pcr, LCTLR, ~LCTLR_ASPM_CTL_MASK, 0x00);
+	if (err < 0)
+		return err;
+
+	/* reset PM_CTRL3 before send buffer cmd */
+	return rtsx_pci_write_register(pcr, PM_CTRL3, D3_DELINK_MODE_EN, 0x00);
+}
diff --git a/drivers/mfd/rtsx_pcr.h b/drivers/mfd/rtsx_pcr.h
index 07e4c2ebf05a..fe2bbb67defc 100644
--- a/drivers/mfd/rtsx_pcr.h
+++ b/drivers/mfd/rtsx_pcr.h
@@ -72,4 +72,7 @@ do {									\
 	pcr->ms_pull_ctl_disable_tbl = __device##_ms_pull_ctl_disable_tbl; \
 } while (0)
 
+/* generic operations */
+int rtsx_gops_pm_reset(struct rtsx_pcr *pcr);
+
 #endif
diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 74346d5e7899..1604dda4edcf 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -707,6 +707,14 @@
 #define PM_CTRL1			0xFF44
 #define PM_CTRL2			0xFF45
 #define PM_CTRL3			0xFF46
+#define   SDIO_SEND_PME_EN		0x80
+#define   FORCE_RC_MODE_ON		0x40
+#define   FORCE_RX50_LINK_ON		0x20
+#define   D3_DELINK_MODE_EN		0x10
+#define   USE_PESRTB_CTL_DELINK		0x08
+#define   DELAY_PIN_WAKE		0x04
+#define   RESET_PIN_WAKE		0x02
+#define   PM_WAKE_EN			0x01
 #define PM_CTRL4			0xFF47
 
 /* Memory mapping */
@@ -752,6 +760,14 @@
 #define PHY_DUM_REG			0x1F
 
 #define LCTLR				0x80
+#define   LCTLR_EXT_SYNC		0x80
+#define   LCTLR_COMMON_CLOCK_CFG	0x40
+#define   LCTLR_RETRAIN_LINK		0x20
+#define   LCTLR_LINK_DISABLE		0x10
+#define   LCTLR_RCB			0x08
+#define   LCTLR_RESERVED		0x04
+#define   LCTLR_ASPM_CTL_MASK		0x03
+
 #define PCR_SETTING_REG1		0x724
 #define PCR_SETTING_REG2		0x814
 #define PCR_SETTING_REG3		0x747
@@ -967,4 +983,16 @@ static inline u8 *rtsx_pci_get_cmd_data(struct rtsx_pcr *pcr)
 	return (u8 *)(pcr->host_cmds_ptr);
 }
 
+static inline int rtsx_pci_update_cfg_byte(struct rtsx_pcr *pcr, int addr,
+		u8 mask, u8 append)
+{
+	int err;
+	u8 val;
+
+	err = pci_read_config_byte(pcr->pci, addr, &val);
+	if (err < 0)
+		return err;
+	return pci_write_config_byte(pcr->pci, addr, (val & mask) | append);
+}
+
 #endif
-- 
cgit v1.2.3


From 7d082baa349e59ce3de6452abc05e5de6436aad4 Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Thu, 9 Oct 2014 09:18:38 -0700
Subject: mfd: ab8500-sysctrl: Drop ab8500_restart

ab8500_restart is not called from anywhere in the kernel, so drop it.

Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Acked-by: Linus Walleij <linus.walleij@linaro.org>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 drivers/mfd/ab8500-sysctrl.c              | 57 -------------------------------
 include/linux/mfd/abx500/ab8500-sysctrl.h |  1 -
 2 files changed, 58 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/ab8500-sysctrl.c b/drivers/mfd/ab8500-sysctrl.c
index 8e0dae59844d..94dbcdd2a1ff 100644
--- a/drivers/mfd/ab8500-sysctrl.c
+++ b/drivers/mfd/ab8500-sysctrl.c
@@ -85,63 +85,6 @@ shutdown:
 	}
 }
 
-/*
- * Use the AB WD to reset the platform. It will perform a hard
- * reset instead of a soft reset. Write the reset reason to
- * the AB before reset, which can be read upon restart.
- */
-void ab8500_restart(char mode, const char *cmd)
-{
-	struct ab8500_platform_data *plat;
-	struct ab8500_sysctrl_platform_data *pdata;
-	u16 reason = 0;
-	u8 val;
-
-	if (sysctrl_dev == NULL) {
-		pr_err("%s: sysctrl not initialized\n", __func__);
-		return;
-	}
-
-	plat = dev_get_platdata(sysctrl_dev->parent);
-	pdata = plat->sysctrl;
-	if (pdata && pdata->reboot_reason_code)
-		reason = pdata->reboot_reason_code(cmd);
-	else
-		pr_warn("[%s] No reboot reason set. Default reason %d\n",
-			__func__, reason);
-
-	/*
-	 * Disable RTC alarm, just a precaution so that no alarm
-	 * is running when WD reset is executed.
-	 */
-	abx500_get_register_interruptible(sysctrl_dev, AB8500_RTC,
-		RTC_CTRL , &val);
-	abx500_set_register_interruptible(sysctrl_dev, AB8500_RTC,
-		RTC_CTRL , (val & ~RTC_ALARM_ENABLE));
-
-	/*
-	 * Android is not using the RTC alarm registers during reboot
-	 * so we borrow them for writing the reason of reset
-	 */
-
-	/* reason[8 LSB] */
-	val = reason & 0xFF;
-	abx500_set_register_interruptible(sysctrl_dev, AB8500_RTC,
-		AB8500_ALARM_MIN_LOW , val);
-
-	/* reason[8 MSB] */
-	val = (reason>>8) & 0xFF;
-	abx500_set_register_interruptible(sysctrl_dev, AB8500_RTC,
-		AB8500_ALARM_MIN_MID , val);
-
-	/* Setting WD timeout to 0 */
-	ab8500_sysctrl_write(AB8500_MAINWDOGTIMER, 0xFF, 0x0);
-
-	/* Setting the parameters to AB8500 WD*/
-	ab8500_sysctrl_write(AB8500_MAINWDOGCTRL, 0xFF, (AB8500_ENABLE_WD |
-		AB8500_WD_RESTART_ON_EXPIRE | AB8500_KICK_WD));
-}
-
 static inline bool valid_bank(u8 bank)
 {
 	return ((bank == AB8500_SYS_CTRL1_BLOCK) ||
diff --git a/include/linux/mfd/abx500/ab8500-sysctrl.h b/include/linux/mfd/abx500/ab8500-sysctrl.h
index adba89d9c660..689312745b2f 100644
--- a/include/linux/mfd/abx500/ab8500-sysctrl.h
+++ b/include/linux/mfd/abx500/ab8500-sysctrl.h
@@ -12,7 +12,6 @@
 
 int ab8500_sysctrl_read(u16 reg, u8 *value);
 int ab8500_sysctrl_write(u16 reg, u8 mask, u8 value);
-void ab8500_restart(char mode, const char *cmd);
 
 #else
 
-- 
cgit v1.2.3


From 47958c5ab4035bd91f05598f76a61cd9f7f2934c Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 4 Nov 2014 15:24:36 +0000
Subject: mfd: arizona: Document HP_CTRL_1L and HP_CTRL_1R registers

These registers are documented in the datasheet and used as part of the
extcon driver. Expose them properly through regmap as the datasheet
notes they should be treated as volatile do so.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
---
 drivers/mfd/wm5102-tables.c           |  6 +++--
 drivers/mfd/wm5110-tables.c           |  4 ++++
 drivers/mfd/wm8997-tables.c           |  4 ++++
 include/linux/mfd/arizona/registers.h | 42 +++++++++++++++++++++++++++++++++++
 4 files changed, 54 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5102-tables.c b/drivers/mfd/wm5102-tables.c
index d6f35bbf795b..b326a82017ee 100644
--- a/drivers/mfd/wm5102-tables.c
+++ b/drivers/mfd/wm5102-tables.c
@@ -336,8 +336,6 @@ static const struct reg_default wm5102_reg_default[] = {
 	{ 0x00000218, 0x01A6 },   /* R536   - Mic Bias Ctrl 1 */ 
 	{ 0x00000219, 0x01A6 },   /* R537   - Mic Bias Ctrl 2 */ 
 	{ 0x0000021A, 0x01A6 },   /* R538   - Mic Bias Ctrl 3 */ 
-	{ 0x00000225, 0x0400 },   /* R549   - HP Ctrl 1L */
-	{ 0x00000226, 0x0400 },   /* R550   - HP Ctrl 1R */
 	{ 0x00000293, 0x0000 },   /* R659   - Accessory Detect Mode 1 */ 
 	{ 0x0000029B, 0x0020 },   /* R667   - Headphone Detect 1 */ 
 	{ 0x0000029C, 0x0000 },   /* R668   - Headphone Detect 2 */
@@ -1112,6 +1110,8 @@ static bool wm5102_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_MIC_BIAS_CTRL_1:
 	case ARIZONA_MIC_BIAS_CTRL_2:
 	case ARIZONA_MIC_BIAS_CTRL_3:
+	case ARIZONA_HP_CTRL_1L:
+	case ARIZONA_HP_CTRL_1R:
 	case ARIZONA_ACCESSORY_DETECT_MODE_1:
 	case ARIZONA_HEADPHONE_DETECT_1:
 	case ARIZONA_HEADPHONE_DETECT_2:
@@ -1949,6 +1949,8 @@ static bool wm5102_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_DSP1_SCRATCH_1:
 	case ARIZONA_DSP1_SCRATCH_2:
 	case ARIZONA_DSP1_SCRATCH_3:
+	case ARIZONA_HP_CTRL_1L:
+	case ARIZONA_HP_CTRL_1R:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_HP_DACVAL:
 	case ARIZONA_MIC_DETECT_3:
diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index 4642b5b816a0..c75390ad4f45 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -1790,6 +1790,8 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_MIC_BIAS_CTRL_1:
 	case ARIZONA_MIC_BIAS_CTRL_2:
 	case ARIZONA_MIC_BIAS_CTRL_3:
+	case ARIZONA_HP_CTRL_1L:
+	case ARIZONA_HP_CTRL_1R:
 	case ARIZONA_ACCESSORY_DETECT_MODE_1:
 	case ARIZONA_HEADPHONE_DETECT_1:
 	case ARIZONA_HEADPHONE_DETECT_2:
@@ -2825,6 +2827,8 @@ static bool wm5110_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
 	case ARIZONA_ASYNC_SAMPLE_RATE_2_STATUS:
 	case ARIZONA_MIC_DETECT_3:
+	case ARIZONA_HP_CTRL_1L:
+	case ARIZONA_HP_CTRL_1R:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_INPUT_ENABLES_STATUS:
 	case ARIZONA_OUTPUT_STATUS_1:
diff --git a/drivers/mfd/wm8997-tables.c b/drivers/mfd/wm8997-tables.c
index f44d195d4496..c0c25d75aacc 100644
--- a/drivers/mfd/wm8997-tables.c
+++ b/drivers/mfd/wm8997-tables.c
@@ -887,6 +887,8 @@ static bool wm8997_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_MIC_BIAS_CTRL_1:
 	case ARIZONA_MIC_BIAS_CTRL_2:
 	case ARIZONA_MIC_BIAS_CTRL_3:
+	case ARIZONA_HP_CTRL_1L:
+	case ARIZONA_HP_CTRL_1R:
 	case ARIZONA_ACCESSORY_DETECT_MODE_1:
 	case ARIZONA_HEADPHONE_DETECT_1:
 	case ARIZONA_HEADPHONE_DETECT_2:
@@ -1479,6 +1481,8 @@ static bool wm8997_volatile_register(struct device *dev, unsigned int reg)
 	case ARIZONA_SAMPLE_RATE_3_STATUS:
 	case ARIZONA_ASYNC_SAMPLE_RATE_1_STATUS:
 	case ARIZONA_MIC_DETECT_3:
+	case ARIZONA_HP_CTRL_1L:
+	case ARIZONA_HP_CTRL_1R:
 	case ARIZONA_HEADPHONE_DETECT_2:
 	case ARIZONA_INPUT_ENABLES_STATUS:
 	case ARIZONA_OUTPUT_STATUS_1:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index c0b075f6bc35..d521f327b34f 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -125,6 +125,8 @@
 #define ARIZONA_MIC_BIAS_CTRL_1                  0x218
 #define ARIZONA_MIC_BIAS_CTRL_2                  0x219
 #define ARIZONA_MIC_BIAS_CTRL_3                  0x21A
+#define ARIZONA_HP_CTRL_1L                       0x225
+#define ARIZONA_HP_CTRL_1R                       0x226
 #define ARIZONA_ACCESSORY_DETECT_MODE_1          0x293
 #define ARIZONA_HEADPHONE_DETECT_1               0x29B
 #define ARIZONA_HEADPHONE_DETECT_2               0x29C
@@ -2244,6 +2246,46 @@
 #define ARIZONA_MICB3_ENA_SHIFT                       0  /* MICB3_ENA */
 #define ARIZONA_MICB3_ENA_WIDTH                       1  /* MICB3_ENA */
 
+/*
+ * R549 (0x225) - HP Ctrl 1L
+ */
+#define ARIZONA_RMV_SHRT_HP1L                    0x4000  /* RMV_SHRT_HP1L */
+#define ARIZONA_RMV_SHRT_HP1L_MASK               0x4000  /* RMV_SHRT_HP1L */
+#define ARIZONA_RMV_SHRT_HP1L_SHIFT                  14  /* RMV_SHRT_HP1L */
+#define ARIZONA_RMV_SHRT_HP1L_WIDTH                   1  /* RMV_SHRT_HP1L */
+#define ARIZONA_HP1L_FLWR                        0x0004  /* HP1L_FLWR */
+#define ARIZONA_HP1L_FLWR_MASK                   0x0004  /* HP1L_FLWR */
+#define ARIZONA_HP1L_FLWR_SHIFT                       2  /* HP1L_FLWR */
+#define ARIZONA_HP1L_FLWR_WIDTH                       1  /* HP1L_FLWR */
+#define ARIZONA_HP1L_SHRTI                       0x0002  /* HP1L_SHRTI */
+#define ARIZONA_HP1L_SHRTI_MASK                  0x0002  /* HP1L_SHRTI */
+#define ARIZONA_HP1L_SHRTI_SHIFT                      1  /* HP1L_SHRTI */
+#define ARIZONA_HP1L_SHRTI_WIDTH                      1  /* HP1L_SHRTI */
+#define ARIZONA_HP1L_SHRTO                       0x0001  /* HP1L_SHRTO */
+#define ARIZONA_HP1L_SHRTO_MASK                  0x0001  /* HP1L_SHRTO */
+#define ARIZONA_HP1L_SHRTO_SHIFT                      0  /* HP1L_SHRTO */
+#define ARIZONA_HP1L_SHRTO_WIDTH                      1  /* HP1L_SHRTO */
+
+/*
+ * R550 (0x226) - HP Ctrl 1R
+ */
+#define ARIZONA_RMV_SHRT_HP1R                    0x4000  /* RMV_SHRT_HP1R */
+#define ARIZONA_RMV_SHRT_HP1R_MASK               0x4000  /* RMV_SHRT_HP1R */
+#define ARIZONA_RMV_SHRT_HP1R_SHIFT                  14  /* RMV_SHRT_HP1R */
+#define ARIZONA_RMV_SHRT_HP1R_WIDTH                   1  /* RMV_SHRT_HP1R */
+#define ARIZONA_HP1R_FLWR                        0x0004  /* HP1R_FLWR */
+#define ARIZONA_HP1R_FLWR_MASK                   0x0004  /* HP1R_FLWR */
+#define ARIZONA_HP1R_FLWR_SHIFT                       2  /* HP1R_FLWR */
+#define ARIZONA_HP1R_FLWR_WIDTH                       1  /* HP1R_FLWR */
+#define ARIZONA_HP1R_SHRTI                       0x0002  /* HP1R_SHRTI */
+#define ARIZONA_HP1R_SHRTI_MASK                  0x0002  /* HP1R_SHRTI */
+#define ARIZONA_HP1R_SHRTI_SHIFT                      1  /* HP1R_SHRTI */
+#define ARIZONA_HP1R_SHRTI_WIDTH                      1  /* HP1R_SHRTI */
+#define ARIZONA_HP1R_SHRTO                       0x0001  /* HP1R_SHRTO */
+#define ARIZONA_HP1R_SHRTO_MASK                  0x0001  /* HP1R_SHRTO */
+#define ARIZONA_HP1R_SHRTO_SHIFT                      0  /* HP1R_SHRTO */
+#define ARIZONA_HP1R_SHRTO_WIDTH                      1  /* HP1R_SHRTO */
+
 /*
  * R659 (0x293) - Accessory Detect Mode 1
  */
-- 
cgit v1.2.3


From 90f2d0f7bf069b1a2798156b7dcc8e7d1e874406 Mon Sep 17 00:00:00 2001
From: Linus Walleij <linus.walleij@linaro.org>
Date: Tue, 28 Oct 2014 11:06:56 +0100
Subject: mfd: tc3589x: get rid of static base

The TC3589x driver is now a device tree-only driver, so we want
only dynamic IRQs and GPIO numbers from the tc3589x, no static
assignments.

Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpio-tc3589x.c | 2 +-
 drivers/mfd/tc3589x.c       | 9 +++------
 include/linux/mfd/tc3589x.h | 8 --------
 3 files changed, 4 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpio-tc3589x.c b/drivers/gpio/gpio-tc3589x.c
index ae0f6466eb09..abdcf58935f5 100644
--- a/drivers/gpio/gpio-tc3589x.c
+++ b/drivers/gpio/gpio-tc3589x.c
@@ -262,7 +262,7 @@ static int tc3589x_gpio_probe(struct platform_device *pdev)
 	tc3589x_gpio->chip = template_chip;
 	tc3589x_gpio->chip.ngpio = tc3589x->num_gpio;
 	tc3589x_gpio->chip.dev = &pdev->dev;
-	tc3589x_gpio->chip.base = (pdata) ? pdata->gpio_base : -1;
+	tc3589x_gpio->chip.base = -1;
 
 #ifdef CONFIG_OF_GPIO
 	tc3589x_gpio->chip.of_node = np;
diff --git a/drivers/mfd/tc3589x.c b/drivers/mfd/tc3589x.c
index 0072e668c208..aacb3720065c 100644
--- a/drivers/mfd/tc3589x.c
+++ b/drivers/mfd/tc3589x.c
@@ -241,10 +241,8 @@ static struct irq_domain_ops tc3589x_irq_ops = {
 
 static int tc3589x_irq_init(struct tc3589x *tc3589x, struct device_node *np)
 {
-	int base = tc3589x->irq_base;
-
 	tc3589x->domain = irq_domain_add_simple(
-		np, TC3589x_NR_INTERNAL_IRQS, base,
+		np, TC3589x_NR_INTERNAL_IRQS, 0,
 		&tc3589x_irq_ops, tc3589x);
 
 	if (!tc3589x->domain) {
@@ -298,7 +296,7 @@ static int tc3589x_device_init(struct tc3589x *tc3589x)
 	if (blocks & TC3589x_BLOCK_GPIO) {
 		ret = mfd_add_devices(tc3589x->dev, -1, tc3589x_dev_gpio,
 				      ARRAY_SIZE(tc3589x_dev_gpio), NULL,
-				      tc3589x->irq_base, tc3589x->domain);
+				      0, tc3589x->domain);
 		if (ret) {
 			dev_err(tc3589x->dev, "failed to add gpio child\n");
 			return ret;
@@ -309,7 +307,7 @@ static int tc3589x_device_init(struct tc3589x *tc3589x)
 	if (blocks & TC3589x_BLOCK_KEYPAD) {
 		ret = mfd_add_devices(tc3589x->dev, -1, tc3589x_dev_keypad,
 				      ARRAY_SIZE(tc3589x_dev_keypad), NULL,
-				      tc3589x->irq_base, tc3589x->domain);
+				      0, tc3589x->domain);
 		if (ret) {
 			dev_err(tc3589x->dev, "failed to keypad child\n");
 			return ret;
@@ -404,7 +402,6 @@ static int tc3589x_probe(struct i2c_client *i2c,
 	tc3589x->dev = &i2c->dev;
 	tc3589x->i2c = i2c;
 	tc3589x->pdata = pdata;
-	tc3589x->irq_base = pdata->irq_base;
 
 	switch (version) {
 	case TC3589X_TC35893:
diff --git a/include/linux/mfd/tc3589x.h b/include/linux/mfd/tc3589x.h
index e6088c2e2092..e1c12d84c26a 100644
--- a/include/linux/mfd/tc3589x.h
+++ b/include/linux/mfd/tc3589x.h
@@ -164,13 +164,10 @@ struct tc3589x_keypad_platform_data {
 
 /**
  * struct tc3589x_gpio_platform_data - TC3589x GPIO platform data
- * @gpio_base: first gpio number assigned to TC3589x.  A maximum of
- *	       %TC3589x_NR_GPIOS GPIOs will be allocated.
  * @setup: callback for board-specific initialization
  * @remove: callback for board-specific teardown
  */
 struct tc3589x_gpio_platform_data {
-	int gpio_base;
 	void (*setup)(struct tc3589x *tc3589x, unsigned gpio_base);
 	void (*remove)(struct tc3589x *tc3589x, unsigned gpio_base);
 };
@@ -178,18 +175,13 @@ struct tc3589x_gpio_platform_data {
 /**
  * struct tc3589x_platform_data - TC3589x platform data
  * @block: bitmask of blocks to enable (use TC3589x_BLOCK_*)
- * @irq_base: base IRQ number.  %TC3589x_NR_IRQS irqs will be used.
  * @gpio: GPIO-specific platform data
  * @keypad: keypad-specific platform data
  */
 struct tc3589x_platform_data {
 	unsigned int block;
-	int irq_base;
 	struct tc3589x_gpio_platform_data *gpio;
 	const struct tc3589x_keypad_platform_data *keypad;
 };
 
-#define TC3589x_NR_GPIOS	24
-#define TC3589x_NR_IRQS		TC3589x_INT_GPIO(TC3589x_NR_GPIOS)
-
 #endif
-- 
cgit v1.2.3


From 783f6fc4cecd770dfdb1418c7c890dbeb3bf3c91 Mon Sep 17 00:00:00 2001
From: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
Date: Tue, 4 Nov 2014 13:04:07 +0000
Subject: mfd: wm5110: Add missing registers for AIF2 channels 3-6

When the extra 4 channels were added to AIF2 the necessary frame control
registers were not given defaults and marked readable. This patch fixes
this.

Signed-off-by: Charles Keepax <ckeepax@opensource.wolfsonmicro.com>
---
 drivers/mfd/wm5110-tables.c           | 16 ++++++++++++++++
 include/linux/mfd/arizona/registers.h |  8 ++++++++
 2 files changed, 24 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mfd/wm5110-tables.c b/drivers/mfd/wm5110-tables.c
index c75390ad4f45..12cad94b4035 100644
--- a/drivers/mfd/wm5110-tables.c
+++ b/drivers/mfd/wm5110-tables.c
@@ -895,8 +895,16 @@ static const struct reg_default wm5110_reg_default[] = {
 	{ 0x00000548, 0x1818 },    /* R1352  - AIF2 Frame Ctrl 2 */
 	{ 0x00000549, 0x0000 },    /* R1353  - AIF2 Frame Ctrl 3 */
 	{ 0x0000054A, 0x0001 },    /* R1354  - AIF2 Frame Ctrl 4 */
+	{ 0x0000054B, 0x0002 },    /* R1355  - AIF2 Frame Ctrl 5 */
+	{ 0x0000054C, 0x0003 },    /* R1356  - AIF2 Frame Ctrl 6 */
+	{ 0x0000054D, 0x0004 },    /* R1357  - AIF2 Frame Ctrl 7 */
+	{ 0x0000054E, 0x0005 },    /* R1358  - AIF2 Frame Ctrl 8 */
 	{ 0x00000551, 0x0000 },    /* R1361  - AIF2 Frame Ctrl 11 */
 	{ 0x00000552, 0x0001 },    /* R1362  - AIF2 Frame Ctrl 12 */
+	{ 0x00000553, 0x0002 },    /* R1363  - AIF2 Frame Ctrl 13 */
+	{ 0x00000554, 0x0003 },    /* R1364  - AIF2 Frame Ctrl 14 */
+	{ 0x00000555, 0x0004 },    /* R1365  - AIF2 Frame Ctrl 15 */
+	{ 0x00000556, 0x0005 },    /* R1366  - AIF2 Frame Ctrl 16 */
 	{ 0x00000559, 0x0000 },    /* R1369  - AIF2 Tx Enables */
 	{ 0x0000055A, 0x0000 },    /* R1370  - AIF2 Rx Enables */
 	{ 0x00000580, 0x000C },    /* R1408  - AIF3 BCLK Ctrl */
@@ -1936,8 +1944,16 @@ static bool wm5110_readable_register(struct device *dev, unsigned int reg)
 	case ARIZONA_AIF2_FRAME_CTRL_2:
 	case ARIZONA_AIF2_FRAME_CTRL_3:
 	case ARIZONA_AIF2_FRAME_CTRL_4:
+	case ARIZONA_AIF2_FRAME_CTRL_5:
+	case ARIZONA_AIF2_FRAME_CTRL_6:
+	case ARIZONA_AIF2_FRAME_CTRL_7:
+	case ARIZONA_AIF2_FRAME_CTRL_8:
 	case ARIZONA_AIF2_FRAME_CTRL_11:
 	case ARIZONA_AIF2_FRAME_CTRL_12:
+	case ARIZONA_AIF2_FRAME_CTRL_13:
+	case ARIZONA_AIF2_FRAME_CTRL_14:
+	case ARIZONA_AIF2_FRAME_CTRL_15:
+	case ARIZONA_AIF2_FRAME_CTRL_16:
 	case ARIZONA_AIF2_TX_ENABLES:
 	case ARIZONA_AIF2_RX_ENABLES:
 	case ARIZONA_AIF3_BCLK_CTRL:
diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h
index d521f327b34f..aacc10d7789c 100644
--- a/include/linux/mfd/arizona/registers.h
+++ b/include/linux/mfd/arizona/registers.h
@@ -281,8 +281,16 @@
 #define ARIZONA_AIF2_FRAME_CTRL_2                0x548
 #define ARIZONA_AIF2_FRAME_CTRL_3                0x549
 #define ARIZONA_AIF2_FRAME_CTRL_4                0x54A
+#define ARIZONA_AIF2_FRAME_CTRL_5                0x54B
+#define ARIZONA_AIF2_FRAME_CTRL_6                0x54C
+#define ARIZONA_AIF2_FRAME_CTRL_7                0x54D
+#define ARIZONA_AIF2_FRAME_CTRL_8                0x54E
 #define ARIZONA_AIF2_FRAME_CTRL_11               0x551
 #define ARIZONA_AIF2_FRAME_CTRL_12               0x552
+#define ARIZONA_AIF2_FRAME_CTRL_13               0x553
+#define ARIZONA_AIF2_FRAME_CTRL_14               0x554
+#define ARIZONA_AIF2_FRAME_CTRL_15               0x555
+#define ARIZONA_AIF2_FRAME_CTRL_16               0x556
 #define ARIZONA_AIF2_TX_ENABLES                  0x559
 #define ARIZONA_AIF2_RX_ENABLES                  0x55A
 #define ARIZONA_AIF2_FORCE_WRITE                 0x55B
-- 
cgit v1.2.3


From edef1297f33a4546559d905457b435a5ea160bab Mon Sep 17 00:00:00 2001
From: Chuck Lever <chuck.lever@oracle.com>
Date: Sat, 8 Nov 2014 20:15:09 -0500
Subject: SUNRPC: serialize iostats updates

Occasionally mountstats reports a negative retransmission rate.
Ensure that two RPCs completing concurrently don't confuse the sums
in the transport's op_metrics array.

Since pNFS filelayout can invoke rpc_count_iostats() on another
transport from xprt_release(), we can't rely on simply holding the
transport_lock in xprt_release(). There's nothing for it but hard
serialization. One spin lock per RPC operation should make this as
painless as it can be.

Signed-off-by: Chuck Lever <chuck.lever@oracle.com>
Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
---
 include/linux/sunrpc/metrics.h |  3 +++
 net/sunrpc/stats.c             | 21 ++++++++++++++++-----
 2 files changed, 19 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/metrics.h b/include/linux/sunrpc/metrics.h
index 1565bbe86d51..eecb5a71e6c0 100644
--- a/include/linux/sunrpc/metrics.h
+++ b/include/linux/sunrpc/metrics.h
@@ -27,10 +27,13 @@
 
 #include <linux/seq_file.h>
 #include <linux/ktime.h>
+#include <linux/spinlock.h>
 
 #define RPC_IOSTATS_VERS	"1.0"
 
 struct rpc_iostats {
+	spinlock_t		om_lock;
+
 	/*
 	 * These counters give an idea about how many request
 	 * transmissions are required, on average, to complete that
diff --git a/net/sunrpc/stats.c b/net/sunrpc/stats.c
index 54530490944e..9711a155bc50 100644
--- a/net/sunrpc/stats.c
+++ b/net/sunrpc/stats.c
@@ -116,7 +116,15 @@ EXPORT_SYMBOL_GPL(svc_seq_show);
  */
 struct rpc_iostats *rpc_alloc_iostats(struct rpc_clnt *clnt)
 {
-	return kcalloc(clnt->cl_maxproc, sizeof(struct rpc_iostats), GFP_KERNEL);
+	struct rpc_iostats *stats;
+	int i;
+
+	stats = kcalloc(clnt->cl_maxproc, sizeof(*stats), GFP_KERNEL);
+	if (stats) {
+		for (i = 0; i < clnt->cl_maxproc; i++)
+			spin_lock_init(&stats[i].om_lock);
+	}
+	return stats;
 }
 EXPORT_SYMBOL_GPL(rpc_alloc_iostats);
 
@@ -135,20 +143,21 @@ EXPORT_SYMBOL_GPL(rpc_free_iostats);
  * rpc_count_iostats - tally up per-task stats
  * @task: completed rpc_task
  * @stats: array of stat structures
- *
- * Relies on the caller for serialization.
  */
 void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
 {
 	struct rpc_rqst *req = task->tk_rqstp;
 	struct rpc_iostats *op_metrics;
-	ktime_t delta;
+	ktime_t delta, now;
 
 	if (!stats || !req)
 		return;
 
+	now = ktime_get();
 	op_metrics = &stats[task->tk_msg.rpc_proc->p_statidx];
 
+	spin_lock(&op_metrics->om_lock);
+
 	op_metrics->om_ops++;
 	op_metrics->om_ntrans += req->rq_ntrans;
 	op_metrics->om_timeouts += task->tk_timeouts;
@@ -161,8 +170,10 @@ void rpc_count_iostats(const struct rpc_task *task, struct rpc_iostats *stats)
 
 	op_metrics->om_rtt = ktime_add(op_metrics->om_rtt, req->rq_rtt);
 
-	delta = ktime_sub(ktime_get(), task->tk_start);
+	delta = ktime_sub(now, task->tk_start);
 	op_metrics->om_execute = ktime_add(op_metrics->om_execute, delta);
+
+	spin_unlock(&op_metrics->om_lock);
 }
 EXPORT_SYMBOL_GPL(rpc_count_iostats);
 
-- 
cgit v1.2.3


From f4ac1674f5da420ef17896f0f222c5215ebcde80 Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 25 Nov 2014 13:18:15 -0500
Subject: nfs: Add ALLOCATE support

This patch adds support for using the NFS v4.2 operation ALLOCATE to
preallocate data in a file.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/inode.c            |  1 +
 fs/nfs/nfs42.h            |  1 +
 fs/nfs/nfs42proc.c        | 58 ++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs42xdr.c         | 75 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4_fs.h          |  1 +
 fs/nfs/nfs4file.c         | 28 ++++++++++++++++++
 fs/nfs/nfs4proc.c         |  3 +-
 fs/nfs/nfs4xdr.c          |  1 +
 include/linux/nfs4.h      |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 include/linux/nfs_xdr.h   | 14 +++++++++
 11 files changed, 183 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 2b48ce58a584..4bffe637ea32 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -192,6 +192,7 @@ void nfs_zap_caches(struct inode *inode)
 	nfs_zap_caches_locked(inode);
 	spin_unlock(&inode->i_lock);
 }
+EXPORT_SYMBOL_GPL(nfs_zap_caches);
 
 void nfs_zap_mapping(struct inode *inode, struct address_space *mapping)
 {
diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index d10333a197bf..42656a9cf485 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -6,6 +6,7 @@
 #define __LINUX_FS_NFS_NFS4_2_H
 
 /* nfs4.2proc.c */
+int nfs42_proc_allocate(struct file *, loff_t, loff_t);
 loff_t nfs42_proc_llseek(struct file *, loff_t, int);
 
 /* nfs4.2xdr.h */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 056f5aafecc1..581f34e9d8a0 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -32,6 +32,64 @@ static int nfs42_set_rw_stateid(nfs4_stateid *dst, struct file *file,
 	return ret;
 }
 
+static int _nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
+				 loff_t offset, loff_t len)
+{
+	struct inode *inode = file_inode(filep);
+	struct nfs42_falloc_args args = {
+		.falloc_fh	= NFS_FH(inode),
+		.falloc_offset	= offset,
+		.falloc_length	= len,
+	};
+	struct nfs42_falloc_res res;
+	struct nfs_server *server = NFS_SERVER(inode);
+	int status;
+
+	msg->rpc_argp = &args;
+	msg->rpc_resp = &res;
+
+	status = nfs42_set_rw_stateid(&args.falloc_stateid, filep, FMODE_WRITE);
+	if (status)
+		return status;
+
+	return nfs4_call_sync(server->client, server, msg,
+			      &args.seq_args, &res.seq_res, 0);
+}
+
+static int nfs42_proc_fallocate(struct rpc_message *msg, struct file *filep,
+				loff_t offset, loff_t len)
+{
+	struct nfs_server *server = NFS_SERVER(file_inode(filep));
+	struct nfs4_exception exception = { };
+	int err;
+
+	do {
+		err = _nfs42_proc_fallocate(msg, filep, offset, len);
+		if (err == -ENOTSUPP)
+			return -EOPNOTSUPP;
+		err = nfs4_handle_exception(server, err, &exception);
+	} while (exception.retry);
+
+	return err;
+}
+
+int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_ALLOCATE],
+	};
+	struct inode *inode = file_inode(filep);
+	int err;
+
+	if (!nfs_server_capable(inode, NFS_CAP_ALLOCATE))
+		return -EOPNOTSUPP;
+
+	err = nfs42_proc_fallocate(&msg, filep, offset, len);
+	if (err == -EOPNOTSUPP)
+		NFS_SERVER(inode)->caps &= ~NFS_CAP_ALLOCATE;
+	return err;
+}
+
 loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
 {
 	struct inode *inode = file_inode(filep);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index c90469b604b8..4248d034479c 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -4,6 +4,12 @@
 #ifndef __LINUX_FS_NFS_NFS4_2XDR_H
 #define __LINUX_FS_NFS_NFS4_2XDR_H
 
+#define encode_fallocate_maxsz		(encode_stateid_maxsz + \
+					 2 /* offset */ + \
+					 2 /* length */)
+#define encode_allocate_maxsz		(op_encode_hdr_maxsz + \
+					 encode_fallocate_maxsz)
+#define decode_allocate_maxsz		(op_decode_hdr_maxsz)
 #define encode_seek_maxsz		(op_encode_hdr_maxsz + \
 					 encode_stateid_maxsz + \
 					 2 /* offset */ + \
@@ -14,6 +20,12 @@
 					 2 /* offset */ + \
 					 2 /* length */)
 
+#define NFS4_enc_allocate_sz		(compound_encode_hdr_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_allocate_maxsz)
+#define NFS4_dec_allocate_sz		(compound_decode_hdr_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_allocate_maxsz)
 #define NFS4_enc_seek_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
 					 encode_seek_maxsz)
@@ -22,6 +34,22 @@
 					 decode_seek_maxsz)
 
 
+static void encode_fallocate(struct xdr_stream *xdr,
+			     struct nfs42_falloc_args *args)
+{
+	encode_nfs4_stateid(xdr, &args->falloc_stateid);
+	encode_uint64(xdr, args->falloc_offset);
+	encode_uint64(xdr, args->falloc_length);
+}
+
+static void encode_allocate(struct xdr_stream *xdr,
+			    struct nfs42_falloc_args *args,
+			    struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_ALLOCATE, decode_allocate_maxsz, hdr);
+	encode_fallocate(xdr, args);
+}
+
 static void encode_seek(struct xdr_stream *xdr,
 			struct nfs42_seek_args *args,
 			struct compound_hdr *hdr)
@@ -32,6 +60,24 @@ static void encode_seek(struct xdr_stream *xdr,
 	encode_uint32(xdr, args->sa_what);
 }
 
+/*
+ * Encode ALLOCATE request
+ */
+static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
+				  struct xdr_stream *xdr,
+				  struct nfs42_falloc_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->falloc_fh, &hdr);
+	encode_allocate(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
 /*
  * Encode SEEK request
  */
@@ -50,6 +96,11 @@ static void nfs4_xdr_enc_seek(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
+{
+	return decode_op_hdr(xdr, OP_ALLOCATE);
+}
+
 static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
 {
 	int status;
@@ -72,6 +123,30 @@ out_overflow:
 	return -EIO;
 }
 
+/*
+ * Decode ALLOCATE request
+ */
+static int nfs4_xdr_dec_allocate(struct rpc_rqst *rqstp,
+				 struct xdr_stream *xdr,
+				 struct nfs42_falloc_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_allocate(xdr, res);
+out:
+	return status;
+}
+
 /*
  * Decode SEEK request
  */
diff --git a/fs/nfs/nfs4_fs.h b/fs/nfs/nfs4_fs.h
index be6cac37ea10..a08178764cf9 100644
--- a/fs/nfs/nfs4_fs.h
+++ b/fs/nfs/nfs4_fs.h
@@ -226,6 +226,7 @@ int nfs4_replace_transport(struct nfs_server *server,
 				const struct nfs4_fs_locations *locations);
 
 /* nfs4proc.c */
+extern int nfs4_handle_exception(struct nfs_server *, int, struct nfs4_exception *);
 extern int nfs4_call_sync(struct rpc_clnt *, struct nfs_server *,
 			  struct rpc_message *, struct nfs4_sequence_args *,
 			  struct nfs4_sequence_res *, int);
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index c51fb4db9bfe..f78e9fd0735a 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -3,6 +3,8 @@
  *
  *  Copyright (C) 1992  Rick Sladkey
  */
+#include <linux/fs.h>
+#include <linux/falloc.h>
 #include <linux/nfs_fs.h>
 #include "internal.h"
 #include "fscache.h"
@@ -134,6 +136,29 @@ static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
 		return nfs_file_llseek(filep, offset, whence);
 	}
 }
+
+static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t len)
+{
+	struct inode *inode = file_inode(filep);
+	long ret;
+
+	if (!S_ISREG(inode->i_mode))
+		return -EOPNOTSUPP;
+
+	if (mode != 0)
+		return -EOPNOTSUPP;
+
+	ret = inode_newsize_ok(inode, offset + len);
+	if (ret < 0)
+		return ret;
+
+	mutex_lock(&inode->i_mutex);
+	ret = nfs42_proc_allocate(filep, offset, len);
+	mutex_unlock(&inode->i_mutex);
+
+	nfs_zap_caches(inode);
+	return ret;
+}
 #endif /* CONFIG_NFS_V4_2 */
 
 const struct file_operations nfs4_file_operations = {
@@ -155,6 +180,9 @@ const struct file_operations nfs4_file_operations = {
 	.flock		= nfs_flock,
 	.splice_read	= nfs_file_splice_read,
 	.splice_write	= iter_file_splice_write,
+#ifdef CONFIG_NFS_V4_2
+	.fallocate	= nfs42_fallocate,
+#endif /* CONFIG_NFS_V4_2 */
 	.check_flags	= nfs_check_flags,
 	.setlease	= simple_nosetlease,
 };
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 3138913d1cdf..b1b403b0ca0d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -342,7 +342,7 @@ static int nfs4_delay(struct rpc_clnt *clnt, long *timeout)
 /* This is the error handling routine for processes that are allowed
  * to sleep.
  */
-static int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
+int nfs4_handle_exception(struct nfs_server *server, int errorcode, struct nfs4_exception *exception)
 {
 	struct nfs_client *clp = server->nfs_client;
 	struct nfs4_state *state = exception->state;
@@ -8424,6 +8424,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
 		| NFS_CAP_POSIX_LOCK
 		| NFS_CAP_STATEID_NFSV41
 		| NFS_CAP_ATOMIC_OPEN_V1
+		| NFS_CAP_ALLOCATE
 		| NFS_CAP_SEEK,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 206c08a60c7f..0a1484561e4b 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7394,6 +7394,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 #endif /* CONFIG_NFS_V4_1 */
 #ifdef CONFIG_NFS_V4_2
 	PROC(SEEK,		enc_seek,		dec_seek),
+	PROC(ALLOCATE,		enc_allocate,		dec_allocate),
 #endif /* CONFIG_NFS_V4_2 */
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 356acc2846fd..2b28a21edcd0 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -490,6 +490,7 @@ enum {
 
 	/* nfs42 */
 	NFSPROC4_CLNT_SEEK,
+	NFSPROC4_CLNT_ALLOCATE,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index a32ba0d7a98f..df6ed429b406 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -231,5 +231,6 @@ struct nfs_server {
 #define NFS_CAP_ATOMIC_OPEN_V1	(1U << 17)
 #define NFS_CAP_SECURITY_LABEL	(1U << 18)
 #define NFS_CAP_SEEK		(1U << 19)
+#define NFS_CAP_ALLOCATE	(1U << 20)
 
 #endif
diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h
index 47ebb4fafd87..467c84efb596 100644
--- a/include/linux/nfs_xdr.h
+++ b/include/linux/nfs_xdr.h
@@ -1243,6 +1243,20 @@ nfs_free_pnfs_ds_cinfo(struct pnfs_ds_commit_info *cinfo)
 #endif /* CONFIG_NFS_V4_1 */
 
 #ifdef CONFIG_NFS_V4_2
+struct nfs42_falloc_args {
+	struct nfs4_sequence_args	seq_args;
+
+	struct nfs_fh			*falloc_fh;
+	nfs4_stateid			 falloc_stateid;
+	u64				 falloc_offset;
+	u64				 falloc_length;
+};
+
+struct nfs42_falloc_res {
+	struct nfs4_sequence_res	seq_res;
+	unsigned int			status;
+};
+
 struct nfs42_seek_args {
 	struct nfs4_sequence_args	seq_args;
 
-- 
cgit v1.2.3


From 624bd5b7b683c978c6d5f4e9f6142cfb3470983d Mon Sep 17 00:00:00 2001
From: Anna Schumaker <Anna.Schumaker@netapp.com>
Date: Tue, 25 Nov 2014 13:18:16 -0500
Subject: nfs: Add DEALLOCATE support

This patch adds support for using the NFS v4.2 operation DEALLOCATE to
punch holes in a file.

Signed-off-by: Anna Schumaker <Anna.Schumaker@Netapp.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 fs/nfs/nfs42.h            |  1 +
 fs/nfs/nfs42proc.c        | 17 +++++++++++++
 fs/nfs/nfs42xdr.c         | 64 +++++++++++++++++++++++++++++++++++++++++++++++
 fs/nfs/nfs4file.c         |  7 ++++--
 fs/nfs/nfs4proc.c         |  1 +
 fs/nfs/nfs4xdr.c          |  1 +
 include/linux/nfs4.h      |  1 +
 include/linux/nfs_fs_sb.h |  1 +
 8 files changed, 91 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h
index 42656a9cf485..7afb8947dfdf 100644
--- a/fs/nfs/nfs42.h
+++ b/fs/nfs/nfs42.h
@@ -7,6 +7,7 @@
 
 /* nfs4.2proc.c */
 int nfs42_proc_allocate(struct file *, loff_t, loff_t);
+int nfs42_proc_deallocate(struct file *, loff_t, loff_t);
 loff_t nfs42_proc_llseek(struct file *, loff_t, int);
 
 /* nfs4.2xdr.h */
diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c
index 581f34e9d8a0..cb170722769c 100644
--- a/fs/nfs/nfs42proc.c
+++ b/fs/nfs/nfs42proc.c
@@ -90,6 +90,23 @@ int nfs42_proc_allocate(struct file *filep, loff_t offset, loff_t len)
 	return err;
 }
 
+int nfs42_proc_deallocate(struct file *filep, loff_t offset, loff_t len)
+{
+	struct rpc_message msg = {
+		.rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_DEALLOCATE],
+	};
+	struct inode *inode = file_inode(filep);
+	int err;
+
+	if (!nfs_server_capable(inode, NFS_CAP_DEALLOCATE))
+		return -EOPNOTSUPP;
+
+	err = nfs42_proc_fallocate(&msg, filep, offset, len);
+	if (err == -EOPNOTSUPP)
+		NFS_SERVER(inode)->caps &= ~NFS_CAP_DEALLOCATE;
+	return err;
+}
+
 loff_t nfs42_proc_llseek(struct file *filep, loff_t offset, int whence)
 {
 	struct inode *inode = file_inode(filep);
diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c
index 4248d034479c..038a7e1521fa 100644
--- a/fs/nfs/nfs42xdr.c
+++ b/fs/nfs/nfs42xdr.c
@@ -10,6 +10,9 @@
 #define encode_allocate_maxsz		(op_encode_hdr_maxsz + \
 					 encode_fallocate_maxsz)
 #define decode_allocate_maxsz		(op_decode_hdr_maxsz)
+#define encode_deallocate_maxsz		(op_encode_hdr_maxsz + \
+					 encode_fallocate_maxsz)
+#define decode_deallocate_maxsz		(op_decode_hdr_maxsz)
 #define encode_seek_maxsz		(op_encode_hdr_maxsz + \
 					 encode_stateid_maxsz + \
 					 2 /* offset */ + \
@@ -26,6 +29,12 @@
 #define NFS4_dec_allocate_sz		(compound_decode_hdr_maxsz + \
 					 decode_putfh_maxsz + \
 					 decode_allocate_maxsz)
+#define NFS4_enc_deallocate_sz		(compound_encode_hdr_maxsz + \
+					 encode_putfh_maxsz + \
+					 encode_deallocate_maxsz)
+#define NFS4_dec_deallocate_sz		(compound_decode_hdr_maxsz + \
+					 decode_putfh_maxsz + \
+					 decode_deallocate_maxsz)
 #define NFS4_enc_seek_sz		(compound_encode_hdr_maxsz + \
 					 encode_putfh_maxsz + \
 					 encode_seek_maxsz)
@@ -50,6 +59,14 @@ static void encode_allocate(struct xdr_stream *xdr,
 	encode_fallocate(xdr, args);
 }
 
+static void encode_deallocate(struct xdr_stream *xdr,
+			      struct nfs42_falloc_args *args,
+			      struct compound_hdr *hdr)
+{
+	encode_op_hdr(xdr, OP_DEALLOCATE, decode_deallocate_maxsz, hdr);
+	encode_fallocate(xdr, args);
+}
+
 static void encode_seek(struct xdr_stream *xdr,
 			struct nfs42_seek_args *args,
 			struct compound_hdr *hdr)
@@ -78,6 +95,24 @@ static void nfs4_xdr_enc_allocate(struct rpc_rqst *req,
 	encode_nops(&hdr);
 }
 
+/*
+ * Encode DEALLOCATE request
+ */
+static void nfs4_xdr_enc_deallocate(struct rpc_rqst *req,
+				    struct xdr_stream *xdr,
+				    struct nfs42_falloc_args *args)
+{
+	struct compound_hdr hdr = {
+		.minorversion = nfs4_xdr_minorversion(&args->seq_args),
+	};
+
+	encode_compound_hdr(xdr, req, &hdr);
+	encode_sequence(xdr, &args->seq_args, &hdr);
+	encode_putfh(xdr, args->falloc_fh, &hdr);
+	encode_deallocate(xdr, args, &hdr);
+	encode_nops(&hdr);
+}
+
 /*
  * Encode SEEK request
  */
@@ -101,6 +136,11 @@ static int decode_allocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
 	return decode_op_hdr(xdr, OP_ALLOCATE);
 }
 
+static int decode_deallocate(struct xdr_stream *xdr, struct nfs42_falloc_res *res)
+{
+	return decode_op_hdr(xdr, OP_DEALLOCATE);
+}
+
 static int decode_seek(struct xdr_stream *xdr, struct nfs42_seek_res *res)
 {
 	int status;
@@ -147,6 +187,30 @@ out:
 	return status;
 }
 
+/*
+ * Decode DEALLOCATE request
+ */
+static int nfs4_xdr_dec_deallocate(struct rpc_rqst *rqstp,
+				   struct xdr_stream *xdr,
+				   struct nfs42_falloc_res *res)
+{
+	struct compound_hdr hdr;
+	int status;
+
+	status = decode_compound_hdr(xdr, &hdr);
+	if (status)
+		goto out;
+	status = decode_sequence(xdr, &res->seq_res, rqstp);
+	if (status)
+		goto out;
+	status = decode_putfh(xdr);
+	if (status)
+		goto out;
+	status = decode_deallocate(xdr, res);
+out:
+	return status;
+}
+
 /*
  * Decode SEEK request
  */
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index f78e9fd0735a..8b46389c4c5b 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -145,7 +145,7 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
 	if (!S_ISREG(inode->i_mode))
 		return -EOPNOTSUPP;
 
-	if (mode != 0)
+	if ((mode != 0) && (mode != (FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE)))
 		return -EOPNOTSUPP;
 
 	ret = inode_newsize_ok(inode, offset + len);
@@ -153,7 +153,10 @@ static long nfs42_fallocate(struct file *filep, int mode, loff_t offset, loff_t
 		return ret;
 
 	mutex_lock(&inode->i_mutex);
-	ret = nfs42_proc_allocate(filep, offset, len);
+	if (mode & FALLOC_FL_PUNCH_HOLE)
+		ret = nfs42_proc_deallocate(filep, offset, len);
+	else
+		ret = nfs42_proc_allocate(filep, offset, len);
 	mutex_unlock(&inode->i_mutex);
 
 	nfs_zap_caches(inode);
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index b1b403b0ca0d..e7f8d5ff2581 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -8425,6 +8425,7 @@ static const struct nfs4_minor_version_ops nfs_v4_2_minor_ops = {
 		| NFS_CAP_STATEID_NFSV41
 		| NFS_CAP_ATOMIC_OPEN_V1
 		| NFS_CAP_ALLOCATE
+		| NFS_CAP_DEALLOCATE
 		| NFS_CAP_SEEK,
 	.init_client = nfs41_init_client,
 	.shutdown_client = nfs41_shutdown_client,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 0a1484561e4b..03d0fa62a06e 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -7395,6 +7395,7 @@ struct rpc_procinfo	nfs4_procedures[] = {
 #ifdef CONFIG_NFS_V4_2
 	PROC(SEEK,		enc_seek,		dec_seek),
 	PROC(ALLOCATE,		enc_allocate,		dec_allocate),
+	PROC(DEALLOCATE,	enc_deallocate,		dec_deallocate),
 #endif /* CONFIG_NFS_V4_2 */
 };
 
diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h
index 2b28a21edcd0..022b761dbf0a 100644
--- a/include/linux/nfs4.h
+++ b/include/linux/nfs4.h
@@ -491,6 +491,7 @@ enum {
 	/* nfs42 */
 	NFSPROC4_CLNT_SEEK,
 	NFSPROC4_CLNT_ALLOCATE,
+	NFSPROC4_CLNT_DEALLOCATE,
 };
 
 /* nfs41 types */
diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h
index df6ed429b406..1e37fbb78f7a 100644
--- a/include/linux/nfs_fs_sb.h
+++ b/include/linux/nfs_fs_sb.h
@@ -232,5 +232,6 @@ struct nfs_server {
 #define NFS_CAP_SECURITY_LABEL	(1U << 18)
 #define NFS_CAP_SEEK		(1U << 19)
 #define NFS_CAP_ALLOCATE	(1U << 20)
+#define NFS_CAP_DEALLOCATE	(1U << 21)
 
 #endif
-- 
cgit v1.2.3


From 3ffb1a8193bead7bf4ef0fec2dae050c70e4c1c1 Mon Sep 17 00:00:00 2001
From: Kevin Cernekee <cernekee@gmail.com>
Date: Wed, 12 Nov 2014 12:53:59 -0800
Subject: serial: core: Add big-endian iotype

Since most drivers interpret UPIO_MEM32 to mean "little-endian" and use
readl/writel to access the registers, add a parallel UPIO_MEM32BE to
request the use of big-endian MMIO accessors (ioread32be/iowrite32be).

Signed-off-by: Kevin Cernekee <cernekee@gmail.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/tty/serial/serial_core.c |  2 ++
 include/linux/serial_core.h      | 13 +++++++------
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c
index 5c8b8f50f787..57ca61b14670 100644
--- a/drivers/tty/serial/serial_core.c
+++ b/drivers/tty/serial/serial_core.c
@@ -2153,6 +2153,7 @@ uart_report_port(struct uart_driver *drv, struct uart_port *port)
 		break;
 	case UPIO_MEM:
 	case UPIO_MEM32:
+	case UPIO_MEM32BE:
 	case UPIO_AU:
 	case UPIO_TSI:
 		snprintf(address, sizeof(address),
@@ -2796,6 +2797,7 @@ int uart_match_port(struct uart_port *port1, struct uart_port *port2)
 		       (port1->hub6   == port2->hub6);
 	case UPIO_MEM:
 	case UPIO_MEM32:
+	case UPIO_MEM32BE:
 	case UPIO_AU:
 	case UPIO_TSI:
 		return (port1->mapbase == port2->mapbase);
diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h
index 3231a43f6acf..057038cf2788 100644
--- a/include/linux/serial_core.h
+++ b/include/linux/serial_core.h
@@ -142,12 +142,13 @@ struct uart_port {
 	unsigned char		iotype;			/* io access style */
 	unsigned char		unused1;
 
-#define UPIO_PORT		(0)
-#define UPIO_HUB6		(1)
-#define UPIO_MEM		(2)
-#define UPIO_MEM32		(3)
-#define UPIO_AU			(4)			/* Au1x00 and RT288x type IO */
-#define UPIO_TSI		(5)			/* Tsi108/109 type IO */
+#define UPIO_PORT		(0)			/* 8b I/O port access */
+#define UPIO_HUB6		(1)			/* Hub6 ISA card */
+#define UPIO_MEM		(2)			/* 8b MMIO access */
+#define UPIO_MEM32		(3)			/* 32b little endian */
+#define UPIO_MEM32BE		(4)			/* 32b big endian */
+#define UPIO_AU			(5)			/* Au1x00 and RT288x type IO */
+#define UPIO_TSI		(6)			/* Tsi108/109 type IO */
 
 	unsigned int		read_status_mask;	/* driver specific */
 	unsigned int		ignore_status_mask;	/* driver specific */
-- 
cgit v1.2.3


From 641519cb61bc766c3b4a70fd9ef14fc9509bf8bf Mon Sep 17 00:00:00 2001
From: Brian Norris <computersforpeace@gmail.com>
Date: Tue, 4 Nov 2014 11:32:45 -0800
Subject: mtd: nand: add ATO manufacturer info

Tested with ATO AFND1G08U3, 128MiB NAND.

Signed-off-by: Brian Norris <computersforpeace@gmail.com>
---
 drivers/mtd/nand/nand_ids.c | 1 +
 include/linux/mtd/nand.h    | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mtd/nand/nand_ids.c b/drivers/mtd/nand/nand_ids.c
index fbde89105245..dd620c19c619 100644
--- a/drivers/mtd/nand/nand_ids.c
+++ b/drivers/mtd/nand/nand_ids.c
@@ -178,6 +178,7 @@ struct nand_manufacturers nand_manuf_ids[] = {
 	{NAND_MFR_EON, "Eon"},
 	{NAND_MFR_SANDISK, "SanDisk"},
 	{NAND_MFR_INTEL, "Intel"},
+	{NAND_MFR_ATO, "ATO"},
 	{0x0, "Unknown"}
 };
 
diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h
index b14d190b593a..3d4ea7eb2b68 100644
--- a/include/linux/mtd/nand.h
+++ b/include/linux/mtd/nand.h
@@ -736,6 +736,7 @@ struct nand_chip {
 #define NAND_MFR_EON		0x92
 #define NAND_MFR_SANDISK	0x45
 #define NAND_MFR_INTEL		0x89
+#define NAND_MFR_ATO		0x9b
 
 /* The maximum expected count of bytes in the NAND ID sequence */
 #define NAND_MAX_ID_LEN 8
-- 
cgit v1.2.3


From 9b8ffea6efb0d0edcac265a1ca422188fc1b6dfb Mon Sep 17 00:00:00 2001
From: Vincent Wan <vincent.wan@amd.com>
Date: Wed, 5 Nov 2014 14:09:00 +0800
Subject: mmc: sdhci: Add a quirk for AMD SDHC transfer mode register need to
 be cleared for cmd without data

SDHC controller in AMD chipsets require SDHC transfer mode
register to be cleared for commands without data. The issue was
uncovered during testing eMMC cards on KB/ML based platforms

Signed-off-by: Vincent Wan <vincent.wan@amd.com>
Signed-off-by: Wan Zongshun <mcuos.com@gmail.com>
Signed-off-by: Arindam Nath <arindam.nath@amd.com>
Tested-by: Vikram B <vikram.b@amd.com>
Tested-by: Raghavendra Swamy <raghavendra.swamy@amd.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 9 +++++++--
 include/linux/mmc/sdhci.h | 2 ++
 2 files changed, 9 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index f895ab07fcc2..a743d5227eda 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -915,10 +915,15 @@ static void sdhci_set_transfer_mode(struct sdhci_host *host,
 	struct mmc_data *data = cmd->data;
 
 	if (data == NULL) {
+		if (host->quirks2 &
+			SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD) {
+			sdhci_writew(host, 0x0, SDHCI_TRANSFER_MODE);
+		} else {
 		/* clear Auto CMD settings for no data CMDs */
-		mode = sdhci_readw(host, SDHCI_TRANSFER_MODE);
-		sdhci_writew(host, mode & ~(SDHCI_TRNS_AUTO_CMD12 |
+			mode = sdhci_readw(host, SDHCI_TRANSFER_MODE);
+			sdhci_writew(host, mode & ~(SDHCI_TRNS_AUTO_CMD12 |
 				SDHCI_TRNS_AUTO_CMD23), SDHCI_TRANSFER_MODE);
+		}
 		return;
 	}
 
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index 931ac5e05453..ae7f357b78c9 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -102,6 +102,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK2_STOP_WITH_TC			(1<<8)
 /* Controller does not support 64-bit DMA */
 #define SDHCI_QUIRK2_BROKEN_64_BIT_DMA			(1<<9)
+/* need clear transfer mode register before send cmd */
+#define SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD	(1<<10)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3


From 69d99fdcfd7815dfb2318f0777a46181d5bf42dc Mon Sep 17 00:00:00 2001
From: Prabu Thangamuthu <Prabu.T@synopsys.com>
Date: Mon, 20 Oct 2014 07:12:33 +0000
Subject: mmc: dw_mmc: Add IDMAC 64-bit address mode support

Synopsys DW_MMC IP core supports Internal DMA Controller with 64-bit address mode from IP version 2.70a onwards.
Updated the driver to support IDMAC 64-bit addressing mode.

Signed-off-by: Prabu Thangamuthu <prabu.t@synopsys.com>
Reviewed-by: Alim Akhtar <alim.akhtar@samsung.com>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc.c  | 199 ++++++++++++++++++++++++++++++++++++---------
 drivers/mmc/host/dw_mmc.h  |  11 +++
 include/linux/mmc/dw_mmc.h |   2 +
 3 files changed, 174 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index bb46b1b8d16b..5a37c33879a1 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -61,6 +61,24 @@
 				 SDMMC_IDMAC_INT_FBE | SDMMC_IDMAC_INT_RI | \
 				 SDMMC_IDMAC_INT_TI)
 
+struct idmac_desc_64addr {
+	u32		des0;	/* Control Descriptor */
+
+	u32		des1;	/* Reserved */
+
+	u32		des2;	/*Buffer sizes */
+#define IDMAC_64ADDR_SET_BUFFER1_SIZE(d, s) \
+	((d)->des2 = ((d)->des2 & 0x03ffe000) | ((s) & 0x1fff))
+
+	u32		des3;	/* Reserved */
+
+	u32		des4;	/* Lower 32-bits of Buffer Address Pointer 1*/
+	u32		des5;	/* Upper 32-bits of Buffer Address Pointer 1*/
+
+	u32		des6;	/* Lower 32-bits of Next Descriptor Address */
+	u32		des7;	/* Upper 32-bits of Next Descriptor Address */
+};
+
 struct idmac_desc {
 	u32		des0;	/* Control Descriptor */
 #define IDMAC_DES0_DIC	BIT(1)
@@ -414,30 +432,66 @@ static void dw_mci_translate_sglist(struct dw_mci *host, struct mmc_data *data,
 				    unsigned int sg_len)
 {
 	int i;
-	struct idmac_desc *desc = host->sg_cpu;
+	if (host->dma_64bit_address == 1) {
+		struct idmac_desc_64addr *desc = host->sg_cpu;
 
-	for (i = 0; i < sg_len; i++, desc++) {
-		unsigned int length = sg_dma_len(&data->sg[i]);
-		u32 mem_addr = sg_dma_address(&data->sg[i]);
+		for (i = 0; i < sg_len; i++, desc++) {
+			unsigned int length = sg_dma_len(&data->sg[i]);
+			u64 mem_addr = sg_dma_address(&data->sg[i]);
 
-		/* Set the OWN bit and disable interrupts for this descriptor */
-		desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC | IDMAC_DES0_CH;
+			/*
+			 * Set the OWN bit and disable interrupts for this
+			 * descriptor
+			 */
+			desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC |
+						IDMAC_DES0_CH;
+			/* Buffer length */
+			IDMAC_64ADDR_SET_BUFFER1_SIZE(desc, length);
+
+			/* Physical address to DMA to/from */
+			desc->des4 = mem_addr & 0xffffffff;
+			desc->des5 = mem_addr >> 32;
+		}
 
-		/* Buffer length */
-		IDMAC_SET_BUFFER1_SIZE(desc, length);
+		/* Set first descriptor */
+		desc = host->sg_cpu;
+		desc->des0 |= IDMAC_DES0_FD;
 
-		/* Physical address to DMA to/from */
-		desc->des2 = mem_addr;
-	}
+		/* Set last descriptor */
+		desc = host->sg_cpu + (i - 1) *
+				sizeof(struct idmac_desc_64addr);
+		desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
+		desc->des0 |= IDMAC_DES0_LD;
+
+	} else {
+		struct idmac_desc *desc = host->sg_cpu;
+
+		for (i = 0; i < sg_len; i++, desc++) {
+			unsigned int length = sg_dma_len(&data->sg[i]);
+			u32 mem_addr = sg_dma_address(&data->sg[i]);
+
+			/*
+			 * Set the OWN bit and disable interrupts for this
+			 * descriptor
+			 */
+			desc->des0 = IDMAC_DES0_OWN | IDMAC_DES0_DIC |
+						IDMAC_DES0_CH;
+			/* Buffer length */
+			IDMAC_SET_BUFFER1_SIZE(desc, length);
+
+			/* Physical address to DMA to/from */
+			desc->des2 = mem_addr;
+		}
 
-	/* Set first descriptor */
-	desc = host->sg_cpu;
-	desc->des0 |= IDMAC_DES0_FD;
+		/* Set first descriptor */
+		desc = host->sg_cpu;
+		desc->des0 |= IDMAC_DES0_FD;
 
-	/* Set last descriptor */
-	desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc);
-	desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
-	desc->des0 |= IDMAC_DES0_LD;
+		/* Set last descriptor */
+		desc = host->sg_cpu + (i - 1) * sizeof(struct idmac_desc);
+		desc->des0 &= ~(IDMAC_DES0_CH | IDMAC_DES0_DIC);
+		desc->des0 |= IDMAC_DES0_LD;
+	}
 
 	wmb();
 }
@@ -470,29 +524,71 @@ static void dw_mci_idmac_start_dma(struct dw_mci *host, unsigned int sg_len)
 
 static int dw_mci_idmac_init(struct dw_mci *host)
 {
-	struct idmac_desc *p;
 	int i;
 
-	/* Number of descriptors in the ring buffer */
-	host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc);
+	if (host->dma_64bit_address == 1) {
+		struct idmac_desc_64addr *p;
+		/* Number of descriptors in the ring buffer */
+		host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc_64addr);
+
+		/* Forward link the descriptor list */
+		for (i = 0, p = host->sg_cpu; i < host->ring_size - 1;
+								i++, p++) {
+			p->des6 = (host->sg_dma +
+					(sizeof(struct idmac_desc_64addr) *
+							(i + 1))) & 0xffffffff;
+
+			p->des7 = (u64)(host->sg_dma +
+					(sizeof(struct idmac_desc_64addr) *
+							(i + 1))) >> 32;
+			/* Initialize reserved and buffer size fields to "0" */
+			p->des1 = 0;
+			p->des2 = 0;
+			p->des3 = 0;
+		}
+
+		/* Set the last descriptor as the end-of-ring descriptor */
+		p->des6 = host->sg_dma & 0xffffffff;
+		p->des7 = (u64)host->sg_dma >> 32;
+		p->des0 = IDMAC_DES0_ER;
+
+	} else {
+		struct idmac_desc *p;
+		/* Number of descriptors in the ring buffer */
+		host->ring_size = PAGE_SIZE / sizeof(struct idmac_desc);
 
-	/* Forward link the descriptor list */
-	for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++)
-		p->des3 = host->sg_dma + (sizeof(struct idmac_desc) * (i + 1));
+		/* Forward link the descriptor list */
+		for (i = 0, p = host->sg_cpu; i < host->ring_size - 1; i++, p++)
+			p->des3 = host->sg_dma + (sizeof(struct idmac_desc) *
+								(i + 1));
 
-	/* Set the last descriptor as the end-of-ring descriptor */
-	p->des3 = host->sg_dma;
-	p->des0 = IDMAC_DES0_ER;
+		/* Set the last descriptor as the end-of-ring descriptor */
+		p->des3 = host->sg_dma;
+		p->des0 = IDMAC_DES0_ER;
+	}
 
 	dw_mci_idmac_reset(host);
 
-	/* Mask out interrupts - get Tx & Rx complete only */
-	mci_writel(host, IDSTS, IDMAC_INT_CLR);
-	mci_writel(host, IDINTEN, SDMMC_IDMAC_INT_NI | SDMMC_IDMAC_INT_RI |
-		   SDMMC_IDMAC_INT_TI);
+	if (host->dma_64bit_address == 1) {
+		/* Mask out interrupts - get Tx & Rx complete only */
+		mci_writel(host, IDSTS64, IDMAC_INT_CLR);
+		mci_writel(host, IDINTEN64, SDMMC_IDMAC_INT_NI |
+				SDMMC_IDMAC_INT_RI | SDMMC_IDMAC_INT_TI);
+
+		/* Set the descriptor base address */
+		mci_writel(host, DBADDRL, host->sg_dma & 0xffffffff);
+		mci_writel(host, DBADDRU, (u64)host->sg_dma >> 32);
+
+	} else {
+		/* Mask out interrupts - get Tx & Rx complete only */
+		mci_writel(host, IDSTS, IDMAC_INT_CLR);
+		mci_writel(host, IDINTEN, SDMMC_IDMAC_INT_NI |
+				SDMMC_IDMAC_INT_RI | SDMMC_IDMAC_INT_TI);
+
+		/* Set the descriptor base address */
+		mci_writel(host, DBADDR, host->sg_dma);
+	}
 
-	/* Set the descriptor base address */
-	mci_writel(host, DBADDR, host->sg_dma);
 	return 0;
 }
 
@@ -2066,11 +2162,22 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 
 #ifdef CONFIG_MMC_DW_IDMAC
 	/* Handle DMA interrupts */
-	pending = mci_readl(host, IDSTS);
-	if (pending & (SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI)) {
-		mci_writel(host, IDSTS, SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI);
-		mci_writel(host, IDSTS, SDMMC_IDMAC_INT_NI);
-		host->dma_ops->complete(host);
+	if (host->dma_64bit_address == 1) {
+		pending = mci_readl(host, IDSTS64);
+		if (pending & (SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI)) {
+			mci_writel(host, IDSTS64, SDMMC_IDMAC_INT_TI |
+							SDMMC_IDMAC_INT_RI);
+			mci_writel(host, IDSTS64, SDMMC_IDMAC_INT_NI);
+			host->dma_ops->complete(host);
+		}
+	} else {
+		pending = mci_readl(host, IDSTS);
+		if (pending & (SDMMC_IDMAC_INT_TI | SDMMC_IDMAC_INT_RI)) {
+			mci_writel(host, IDSTS, SDMMC_IDMAC_INT_TI |
+							SDMMC_IDMAC_INT_RI);
+			mci_writel(host, IDSTS, SDMMC_IDMAC_INT_NI);
+			host->dma_ops->complete(host);
+		}
 	}
 #endif
 
@@ -2245,6 +2352,22 @@ static void dw_mci_cleanup_slot(struct dw_mci_slot *slot, unsigned int id)
 
 static void dw_mci_init_dma(struct dw_mci *host)
 {
+	int addr_config;
+	/* Check ADDR_CONFIG bit in HCON to find IDMAC address bus width */
+	addr_config = (mci_readl(host, HCON) >> 27) & 0x01;
+
+	if (addr_config == 1) {
+		/* host supports IDMAC in 64-bit address mode */
+		host->dma_64bit_address = 1;
+		dev_info(host->dev, "IDMAC supports 64-bit address mode.\n");
+		if (!dma_set_mask(host->dev, DMA_BIT_MASK(64)))
+			dma_set_coherent_mask(host->dev, DMA_BIT_MASK(64));
+	} else {
+		/* host supports IDMAC in 32-bit address mode */
+		host->dma_64bit_address = 0;
+		dev_info(host->dev, "IDMAC supports 32-bit address mode.\n");
+	}
+
 	/* Alloc memory for sg translation */
 	host->sg_cpu = dmam_alloc_coherent(host->dev, PAGE_SIZE,
 					  &host->sg_dma, GFP_KERNEL);
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 71d499557edc..58d8a54d644b 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -55,6 +55,17 @@
 #define SDMMC_BUFADDR		0x098
 #define SDMMC_CDTHRCTL		0x100
 #define SDMMC_DATA(x)		(x)
+/*
+* Registers to support idmac 64-bit address mode
+*/
+#define SDMMC_DBADDRL		0x088
+#define SDMMC_DBADDRU		0x08c
+#define SDMMC_IDSTS64		0x090
+#define SDMMC_IDINTEN64		0x094
+#define SDMMC_DSCADDRL		0x098
+#define SDMMC_DSCADDRU		0x09c
+#define SDMMC_BUFADDRL		0x0A0
+#define SDMMC_BUFADDRU		0x0A4
 
 /*
  * Data offset is difference according to Version
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 69d08144cfad..0a551152d600 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -54,6 +54,7 @@ struct mmc_data;
  *	transfer is in progress.
  * @use_dma: Whether DMA channel is initialized or not.
  * @using_dma: Whether DMA is in use for the current transfer.
+ * @dma_64bit_address: Whether DMA supports 64-bit address mode or not.
  * @sg_dma: Bus address of DMA buffer.
  * @sg_cpu: Virtual address of DMA buffer.
  * @dma_ops: Pointer to platform-specific DMA callbacks.
@@ -139,6 +140,7 @@ struct dw_mci {
 	/* DMA interface members*/
 	int			use_dma;
 	int			using_dma;
+	int			dma_64bit_address;
 
 	dma_addr_t		sg_dma;
 	void			*sg_cpu;
-- 
cgit v1.2.3


From 549c0b18485d10bb419a81b24efe719df75089bd Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 6 Nov 2014 15:19:05 +0200
Subject: mmc: sdhci: Clear also HS400 1.2V capability if 1.2V is not supported

1.2V HS200 mode capability is cleared if there is not a voltage
regulator that supports 1.2V.  Do the same for 1.2V HS400 mode.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c | 13 +++++++------
 include/linux/mmc/host.h |  1 +
 2 files changed, 8 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index c2f4754a28b6..5589563761f8 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -3115,16 +3115,17 @@ int sdhci_add_host(struct sdhci_host *host)
 		/* SD3.0: SDR104 is supported so (for eMMC) the caps2
 		 * field can be promoted to support HS200.
 		 */
-		if (!(host->quirks2 & SDHCI_QUIRK2_BROKEN_HS200)) {
+		if (!(host->quirks2 & SDHCI_QUIRK2_BROKEN_HS200))
 			mmc->caps2 |= MMC_CAP2_HS200;
-			if (IS_ERR(mmc->supply.vqmmc) ||
-					!regulator_is_supported_voltage
-					(mmc->supply.vqmmc, 1100000, 1300000))
-				mmc->caps2 &= ~MMC_CAP2_HS200_1_2V_SDR;
-		}
 	} else if (caps[1] & SDHCI_SUPPORT_SDR50)
 		mmc->caps |= MMC_CAP_UHS_SDR50;
 
+	if ((mmc->caps2 & MMC_CAP2_HSX00_1_2V) &&
+	    (IS_ERR(mmc->supply.vqmmc) ||
+	     !regulator_is_supported_voltage(mmc->supply.vqmmc, 1100000,
+					     1300000)))
+		mmc->caps2 &= ~MMC_CAP2_HSX00_1_2V;
+
 	if ((caps[1] & SDHCI_SUPPORT_DDR50) &&
 		!(host->quirks2 & SDHCI_QUIRK2_BROKEN_DDR50))
 		mmc->caps |= MMC_CAP_UHS_DDR50;
diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h
index df0c15396bbf..9f322706f7cb 100644
--- a/include/linux/mmc/host.h
+++ b/include/linux/mmc/host.h
@@ -289,6 +289,7 @@ struct mmc_host {
 #define MMC_CAP2_HS400_1_2V	(1 << 16)	/* Can support HS400 1.2V */
 #define MMC_CAP2_HS400		(MMC_CAP2_HS400_1_8V | \
 				 MMC_CAP2_HS400_1_2V)
+#define MMC_CAP2_HSX00_1_2V	(MMC_CAP2_HS200_1_2V_SDR | MMC_CAP2_HS400_1_2V)
 #define MMC_CAP2_SDIO_IRQ_NOTHREAD (1 << 17)
 
 	mmc_pm_flag_t		pm_caps;	/* supported pm features */
-- 
cgit v1.2.3


From e9fb05d5bca7428f2749d059559e9657c710fe53 Mon Sep 17 00:00:00 2001
From: Adrian Hunter <adrian.hunter@intel.com>
Date: Thu, 6 Nov 2014 15:19:06 +0200
Subject: mmc: sdhci: Add HS400 support to SDHCI driver

MMC core already has support for HS400.  Add HS400
support to SDHCI driver.  The SDHC Standard specification
does not define HS400 so consequently HS400 support is
non-standard.  However HS400 is not selected without
the host controller setting the corresponding capability
flags so host controllers not yet supporting HS400
will not be affected.  To support that, a quirk
SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 is introduced to
enable the use of capabilities register reserved bit-63
to indicate HS400 support.

Because HS400 is non-standard for SDHCI, it is possible
that different vendors will do things in different ways.
However HS200 support faced the same issue but currently
there is only one solution.  As such, no attempt has
been made to provide for alternate HS400 solutions except
for SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400.

Signed-off-by: Adrian Hunter <adrian.hunter@intel.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/sdhci.c  | 13 ++++++++++++-
 drivers/mmc/host/sdhci.h  |  3 +++
 include/linux/mmc/sdhci.h |  2 ++
 3 files changed, 17 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/sdhci.c b/drivers/mmc/host/sdhci.c
index 5589563761f8..73de62a58d70 100644
--- a/drivers/mmc/host/sdhci.c
+++ b/drivers/mmc/host/sdhci.c
@@ -1148,6 +1148,9 @@ static u16 sdhci_get_preset_value(struct sdhci_host *host)
 	case MMC_TIMING_UHS_DDR50:
 		preset = sdhci_readw(host, SDHCI_PRESET_FOR_DDR50);
 		break;
+	case MMC_TIMING_MMC_HS400:
+		preset = sdhci_readw(host, SDHCI_PRESET_FOR_HS400);
+		break;
 	default:
 		pr_warn("%s: Invalid UHS-I mode selected\n",
 			mmc_hostname(host->mmc));
@@ -1475,6 +1478,8 @@ void sdhci_set_uhs_signaling(struct sdhci_host *host, unsigned timing)
 	else if ((timing == MMC_TIMING_UHS_DDR50) ||
 		 (timing == MMC_TIMING_MMC_DDR52))
 		ctrl_2 |= SDHCI_CTRL_UHS_DDR50;
+	else if (timing == MMC_TIMING_MMC_HS400)
+		ctrl_2 |= SDHCI_CTRL_HS400; /* Non-standard */
 	sdhci_writew(host, ctrl_2, SDHCI_HOST_CONTROL2);
 }
 EXPORT_SYMBOL_GPL(sdhci_set_uhs_signaling);
@@ -1546,7 +1551,8 @@ static void sdhci_do_set_ios(struct sdhci_host *host, struct mmc_ios *ios)
 		u16 clk, ctrl_2;
 
 		/* In case of UHS-I modes, set High Speed Enable */
-		if ((ios->timing == MMC_TIMING_MMC_HS200) ||
+		if ((ios->timing == MMC_TIMING_MMC_HS400) ||
+		    (ios->timing == MMC_TIMING_MMC_HS200) ||
 		    (ios->timing == MMC_TIMING_MMC_DDR52) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR50) ||
 		    (ios->timing == MMC_TIMING_UHS_SDR104) ||
@@ -1893,6 +1899,7 @@ static int sdhci_execute_tuning(struct mmc_host *mmc, u32 opcode)
 	 * tuning function has to be executed.
 	 */
 	switch (host->timing) {
+	case MMC_TIMING_MMC_HS400:
 	case MMC_TIMING_MMC_HS200:
 	case MMC_TIMING_UHS_SDR104:
 		break;
@@ -3120,6 +3127,10 @@ int sdhci_add_host(struct sdhci_host *host)
 	} else if (caps[1] & SDHCI_SUPPORT_SDR50)
 		mmc->caps |= MMC_CAP_UHS_SDR50;
 
+	if (host->quirks2 & SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400 &&
+	    (caps[1] & SDHCI_SUPPORT_HS400))
+		mmc->caps2 |= MMC_CAP2_HS400;
+
 	if ((mmc->caps2 & MMC_CAP2_HSX00_1_2V) &&
 	    (IS_ERR(mmc->supply.vqmmc) ||
 	     !regulator_is_supported_voltage(mmc->supply.vqmmc, 1100000,
diff --git a/drivers/mmc/host/sdhci.h b/drivers/mmc/host/sdhci.h
index 79937cfff4ca..ddd31cda2370 100644
--- a/drivers/mmc/host/sdhci.h
+++ b/drivers/mmc/host/sdhci.h
@@ -161,6 +161,7 @@
 #define   SDHCI_CTRL_UHS_SDR50		0x0002
 #define   SDHCI_CTRL_UHS_SDR104		0x0003
 #define   SDHCI_CTRL_UHS_DDR50		0x0004
+#define   SDHCI_CTRL_HS400		0x0005 /* Non-standard */
 #define  SDHCI_CTRL_VDD_180		0x0008
 #define  SDHCI_CTRL_DRV_TYPE_MASK	0x0030
 #define   SDHCI_CTRL_DRV_TYPE_B		0x0000
@@ -203,6 +204,7 @@
 #define  SDHCI_RETUNING_MODE_SHIFT		14
 #define  SDHCI_CLOCK_MUL_MASK	0x00FF0000
 #define  SDHCI_CLOCK_MUL_SHIFT	16
+#define  SDHCI_SUPPORT_HS400	0x80000000 /* Non-standard */
 
 #define SDHCI_CAPABILITIES_1	0x44
 
@@ -235,6 +237,7 @@
 #define SDHCI_PRESET_FOR_SDR50 0x6A
 #define SDHCI_PRESET_FOR_SDR104        0x6C
 #define SDHCI_PRESET_FOR_DDR50 0x6E
+#define SDHCI_PRESET_FOR_HS400 0x74 /* Non-standard */
 #define SDHCI_PRESET_DRV_MASK  0xC000
 #define SDHCI_PRESET_DRV_SHIFT  14
 #define SDHCI_PRESET_CLKGEN_SEL_MASK   0x400
diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h
index ae7f357b78c9..375af80bde7d 100644
--- a/include/linux/mmc/sdhci.h
+++ b/include/linux/mmc/sdhci.h
@@ -104,6 +104,8 @@ struct sdhci_host {
 #define SDHCI_QUIRK2_BROKEN_64_BIT_DMA			(1<<9)
 /* need clear transfer mode register before send cmd */
 #define SDHCI_QUIRK2_CLEAR_TRANSFERMODE_REG_BEFORE_CMD	(1<<10)
+/* Capability register bit-63 indicates HS400 support */
+#define SDHCI_QUIRK2_CAPS_BIT63_FOR_HS400		(1<<11)
 
 	int irq;		/* Device IRQ */
 	void __iomem *ioaddr;	/* Mapped address */
-- 
cgit v1.2.3


From 551434389074791da30b7afbf44c4bbe9b8b0116 Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Sat, 8 Nov 2014 15:33:09 +0100
Subject: ARM: OMAP1/2+: MMC: separate platform data for mmc and mmc hs driver

- omap mmc driver supports multiplexing, omap_mmc_hs doesn't
this leads to one of the major confusions in the omap_hsmmc driver

- platform data should be read-only for the driver
most callbacks are not set by the omap3 platform init code while still
required. So they are set from the driver probe function, which is against
the paradigm that platform-data should not be modified by the driver
typical examples are card_detect, read_only callbacks

un-bundling by searching for driver name \"omap_hsmmc in the
arch/arm folder. omap_hsmmc_platform_data is not initialized directly,
but from omap2_hsmmc_info, which is defined in a separate header file
not touched by this patch

hwmod includes platform headers to declare features of the platform. All
the declared features are prefixed OMAP_HSMMC. There is no need to
include platform header from hwmod other except for feature defines

Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 arch/arm/mach-omap2/hsmmc.c                        |  26 ++--
 arch/arm/mach-omap2/omap_hwmod_2430_data.c         |   4 +-
 .../mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c |   8 +-
 arch/arm/mach-omap2/omap_hwmod_3xxx_data.c         |   8 +-
 arch/arm/mach-omap2/omap_hwmod_44xx_data.c         |   4 +-
 arch/arm/mach-omap2/omap_hwmod_54xx_data.c         |   4 +-
 arch/arm/mach-omap2/omap_hwmod_7xx_data.c          |   4 +-
 drivers/mmc/host/omap_hsmmc.c                      |  28 ++--
 include/linux/platform_data/hsmmc-omap.h           | 149 +++++++++++++++++++++
 include/linux/platform_data/mmc-omap.h             |  27 ----
 10 files changed, 192 insertions(+), 70 deletions(-)
 create mode 100644 include/linux/platform_data/hsmmc-omap.h

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index 73e28ac45d99..9edd7596d1e7 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -16,7 +16,7 @@
 #include <linux/gpio.h>
 #include <linux/mmc/host.h>
 #include <linux/platform_data/gpio-omap.h>
-#include <linux/platform_data/mmc-omap.h>
+#include <linux/platform_data/hsmmc-omap.h>
 
 #include "soc.h"
 #include "omap_device.h"
@@ -48,7 +48,7 @@ static void omap_hsmmc1_before_set_reg(struct device *dev, int slot,
 				  int power_on, int vdd)
 {
 	u32 reg, prog_io;
-	struct omap_mmc_platform_data *mmc = dev->platform_data;
+	struct omap_hsmmc_platform_data *mmc = dev->platform_data;
 
 	if (mmc->slots[0].remux)
 		mmc->slots[0].remux(dev, slot, power_on);
@@ -121,7 +121,7 @@ static void omap_hsmmc1_after_set_reg(struct device *dev, int slot,
 	}
 }
 
-static void hsmmc2_select_input_clk_src(struct omap_mmc_platform_data *mmc)
+static void hsmmc2_select_input_clk_src(struct omap_hsmmc_platform_data *mmc)
 {
 	u32 reg;
 
@@ -136,7 +136,7 @@ static void hsmmc2_select_input_clk_src(struct omap_mmc_platform_data *mmc)
 static void hsmmc2_before_set_reg(struct device *dev, int slot,
 				   int power_on, int vdd)
 {
-	struct omap_mmc_platform_data *mmc = dev->platform_data;
+	struct omap_hsmmc_platform_data *mmc = dev->platform_data;
 
 	if (mmc->slots[0].remux)
 		mmc->slots[0].remux(dev, slot, power_on);
@@ -148,7 +148,7 @@ static void hsmmc2_before_set_reg(struct device *dev, int slot,
 static int am35x_hsmmc2_set_power(struct device *dev, int slot,
 				  int power_on, int vdd)
 {
-	struct omap_mmc_platform_data *mmc = dev->platform_data;
+	struct omap_hsmmc_platform_data *mmc = dev->platform_data;
 
 	if (power_on)
 		hsmmc2_select_input_clk_src(mmc);
@@ -162,8 +162,8 @@ static int nop_mmc_set_power(struct device *dev, int slot, int power_on,
 	return 0;
 }
 
-static inline void omap_hsmmc_mux(struct omap_mmc_platform_data *mmc_controller,
-			int controller_nr)
+static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
+				  *mmc_controller, int controller_nr)
 {
 	if (gpio_is_valid(mmc_controller->slots[0].switch_pin) &&
 		(mmc_controller->slots[0].switch_pin < OMAP_MAX_GPIO_LINES))
@@ -244,7 +244,7 @@ static inline void omap_hsmmc_mux(struct omap_mmc_platform_data *mmc_controller,
 }
 
 static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
-					struct omap_mmc_platform_data *mmc)
+					struct omap_hsmmc_platform_data *mmc)
 {
 	char *hc_name;
 
@@ -369,7 +369,7 @@ static int omap_hsmmc_done;
 void omap_hsmmc_late_init(struct omap2_hsmmc_info *c)
 {
 	struct platform_device *pdev;
-	struct omap_mmc_platform_data *mmc_pdata;
+	struct omap_hsmmc_platform_data *mmc_pdata;
 	int res;
 
 	if (omap_hsmmc_done != 1)
@@ -409,12 +409,12 @@ static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo,
 	struct omap_device *od;
 	struct platform_device *pdev;
 	char oh_name[MAX_OMAP_MMC_HWMOD_NAME_LEN];
-	struct omap_mmc_platform_data *mmc_data;
-	struct omap_mmc_dev_attr *mmc_dev_attr;
+	struct omap_hsmmc_platform_data *mmc_data;
+	struct omap_hsmmc_dev_attr *mmc_dev_attr;
 	char *name;
 	int res;
 
-	mmc_data = kzalloc(sizeof(struct omap_mmc_platform_data), GFP_KERNEL);
+	mmc_data = kzalloc(sizeof(*mmc_data), GFP_KERNEL);
 	if (!mmc_data) {
 		pr_err("Cannot allocate memory for mmc device!\n");
 		return;
@@ -464,7 +464,7 @@ static void __init omap_hsmmc_init_one(struct omap2_hsmmc_info *hsmmcinfo,
 	}
 
 	res = platform_device_add_data(pdev, mmc_data,
-			      sizeof(struct omap_mmc_platform_data));
+			      sizeof(struct omap_hsmmc_platform_data));
 	if (res) {
 		pr_err("Could not add pdata for %s\n", name);
 		goto put_pdev;
diff --git a/arch/arm/mach-omap2/omap_hwmod_2430_data.c b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
index cd95e820c6d4..79127b35fe60 100644
--- a/arch/arm/mach-omap2/omap_hwmod_2430_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_2430_data.c
@@ -15,7 +15,7 @@
 
 #include <linux/i2c-omap.h>
 #include <linux/platform_data/asoc-ti-mcbsp.h>
-#include <linux/platform_data/mmc-omap.h>
+#include <linux/platform_data/hsmmc-omap.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include <linux/omap-dma.h>
 #include <plat/dmtimer.h>
@@ -372,7 +372,7 @@ static struct omap_hwmod_opt_clk omap2430_mmc1_opt_clks[] = {
 	{ .role = "dbck", .clk = "mmchsdb1_fck" },
 };
 
-static struct omap_mmc_dev_attr mmc1_dev_attr = {
+static struct omap_hsmmc_dev_attr mmc1_dev_attr = {
 	.flags = OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
index bf8c12d595bf..cabc5695b504 100644
--- a/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_33xx_43xx_ipblock_data.c
@@ -15,7 +15,7 @@
  */
 
 #include <linux/platform_data/gpio-omap.h>
-#include <linux/platform_data/mmc-omap.h>
+#include <linux/platform_data/hsmmc-omap.h>
 #include <linux/platform_data/spi-omap2-mcspi.h>
 #include "omap_hwmod.h"
 #include "i2c.h"
@@ -836,7 +836,7 @@ static struct omap_hwmod_class am33xx_mmc_hwmod_class = {
 };
 
 /* mmc0 */
-static struct omap_mmc_dev_attr am33xx_mmc0_dev_attr = {
+static struct omap_hsmmc_dev_attr am33xx_mmc0_dev_attr = {
 	.flags		= OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
 };
 
@@ -854,7 +854,7 @@ struct omap_hwmod am33xx_mmc0_hwmod = {
 };
 
 /* mmc1 */
-static struct omap_mmc_dev_attr am33xx_mmc1_dev_attr = {
+static struct omap_hsmmc_dev_attr am33xx_mmc1_dev_attr = {
 	.flags		= OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
 };
 
@@ -872,7 +872,7 @@ struct omap_hwmod am33xx_mmc1_hwmod = {
 };
 
 /* mmc2 */
-static struct omap_mmc_dev_attr am33xx_mmc2_dev_attr = {
+static struct omap_hsmmc_dev_attr am33xx_mmc2_dev_attr = {
 	.flags		= OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
 };
 struct omap_hwmod am33xx_mmc2_hwmod = {
diff --git a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
index 5f244a90eee5..11468eea3871 100644
--- a/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_3xxx_data.c
@@ -18,7 +18,7 @@
 #include <linux/i2c-omap.h>
 #include <linux/power/smartreflex.h>
 #include <linux/platform_data/gpio-omap.h>
-#include <linux/platform_data/mmc-omap.h>
+#include <linux/platform_data/hsmmc-omap.h>
 
 #include <linux/omap-dma.h>
 #include "l3_3xxx.h"
@@ -1786,12 +1786,12 @@ static struct omap_hwmod_opt_clk omap34xx_mmc1_opt_clks[] = {
 	{ .role = "dbck", .clk = "omap_32k_fck", },
 };
 
-static struct omap_mmc_dev_attr mmc1_dev_attr = {
+static struct omap_hsmmc_dev_attr mmc1_dev_attr = {
 	.flags = OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
 };
 
 /* See 35xx errata 2.1.1.128 in SPRZ278F */
-static struct omap_mmc_dev_attr mmc1_pre_es3_dev_attr = {
+static struct omap_hsmmc_dev_attr mmc1_pre_es3_dev_attr = {
 	.flags = (OMAP_HSMMC_SUPPORTS_DUAL_VOLT |
 		  OMAP_HSMMC_BROKEN_MULTIBLOCK_READ),
 };
@@ -1854,7 +1854,7 @@ static struct omap_hwmod_opt_clk omap34xx_mmc2_opt_clks[] = {
 };
 
 /* See 35xx errata 2.1.1.128 in SPRZ278F */
-static struct omap_mmc_dev_attr mmc2_pre_es3_dev_attr = {
+static struct omap_hsmmc_dev_attr mmc2_pre_es3_dev_attr = {
 	.flags = OMAP_HSMMC_BROKEN_MULTIBLOCK_READ,
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
index c385185c755d..d8a3cf1c1787 100644
--- a/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_44xx_data.c
@@ -22,7 +22,7 @@
 
 #include <linux/io.h>
 #include <linux/platform_data/gpio-omap.h>
-#include <linux/platform_data/mmc-omap.h>
+#include <linux/platform_data/hsmmc-omap.h>
 #include <linux/power/smartreflex.h>
 #include <linux/i2c-omap.h>
 
@@ -1952,7 +1952,7 @@ static struct omap_hwmod_dma_info omap44xx_mmc1_sdma_reqs[] = {
 };
 
 /* mmc1 dev_attr */
-static struct omap_mmc_dev_attr mmc1_dev_attr = {
+static struct omap_hsmmc_dev_attr mmc1_dev_attr = {
 	.flags	= OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
index 13bf6a72c5ee..5ec786a76d3c 100644
--- a/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_54xx_data.c
@@ -19,7 +19,7 @@
 
 #include <linux/io.h>
 #include <linux/platform_data/gpio-omap.h>
-#include <linux/platform_data/mmc-omap.h>
+#include <linux/platform_data/hsmmc-omap.h>
 #include <linux/power/smartreflex.h>
 #include <linux/i2c-omap.h>
 
@@ -1269,7 +1269,7 @@ static struct omap_hwmod_opt_clk mmc1_opt_clks[] = {
 };
 
 /* mmc1 dev_attr */
-static struct omap_mmc_dev_attr mmc1_dev_attr = {
+static struct omap_hsmmc_dev_attr mmc1_dev_attr = {
 	.flags	= OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
 };
 
diff --git a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
index 8523821fe78f..711c97e90990 100644
--- a/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
+++ b/arch/arm/mach-omap2/omap_hwmod_7xx_data.c
@@ -19,7 +19,7 @@
 
 #include <linux/io.h>
 #include <linux/platform_data/gpio-omap.h>
-#include <linux/platform_data/mmc-omap.h>
+#include <linux/platform_data/hsmmc-omap.h>
 #include <linux/power/smartreflex.h>
 #include <linux/i2c-omap.h>
 
@@ -1301,7 +1301,7 @@ static struct omap_hwmod_opt_clk mmc1_opt_clks[] = {
 };
 
 /* mmc1 dev_attr */
-static struct omap_mmc_dev_attr mmc1_dev_attr = {
+static struct omap_hsmmc_dev_attr mmc1_dev_attr = {
 	.flags	= OMAP_HSMMC_SUPPORTS_DUAL_VOLT,
 };
 
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index df27bb4fc098..4957c5fe555b 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -42,7 +42,7 @@
 #include <linux/regulator/consumer.h>
 #include <linux/pinctrl/consumer.h>
 #include <linux/pm_runtime.h>
-#include <linux/platform_data/mmc-omap.h>
+#include <linux/platform_data/hsmmc-omap.h>
 
 /* OMAP HSMMC Host Controller Registers */
 #define OMAP_HSMMC_SYSSTATUS	0x0014
@@ -220,7 +220,7 @@ struct omap_hsmmc_host {
 #define HSMMC_SDIO_IRQ_ENABLED	(1 << 1)        /* SDIO irq enabled */
 #define HSMMC_WAKE_IRQ_ENABLED	(1 << 2)
 	struct omap_hsmmc_next	next_data;
-	struct	omap_mmc_platform_data	*pdata;
+	struct	omap_hsmmc_platform_data	*pdata;
 };
 
 struct omap_mmc_of_data {
@@ -233,7 +233,7 @@ static void omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host);
 static int omap_hsmmc_card_detect(struct device *dev, int slot)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-	struct omap_mmc_platform_data *mmc = host->pdata;
+	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
 	/* NOTE: assumes card detect signal is active-low */
 	return !gpio_get_value_cansleep(mmc->slots[0].switch_pin);
@@ -242,7 +242,7 @@ static int omap_hsmmc_card_detect(struct device *dev, int slot)
 static int omap_hsmmc_get_wp(struct device *dev, int slot)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-	struct omap_mmc_platform_data *mmc = host->pdata;
+	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
 	/* NOTE: assumes write protect signal is active-high */
 	return gpio_get_value_cansleep(mmc->slots[0].gpio_wp);
@@ -251,7 +251,7 @@ static int omap_hsmmc_get_wp(struct device *dev, int slot)
 static int omap_hsmmc_get_cover_state(struct device *dev, int slot)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-	struct omap_mmc_platform_data *mmc = host->pdata;
+	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
 	/* NOTE: assumes card detect signal is active-low */
 	return !gpio_get_value_cansleep(mmc->slots[0].switch_pin);
@@ -262,7 +262,7 @@ static int omap_hsmmc_get_cover_state(struct device *dev, int slot)
 static int omap_hsmmc_suspend_cdirq(struct device *dev, int slot)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-	struct omap_mmc_platform_data *mmc = host->pdata;
+	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
 	disable_irq(mmc->slots[0].card_detect_irq);
 	return 0;
@@ -271,7 +271,7 @@ static int omap_hsmmc_suspend_cdirq(struct device *dev, int slot)
 static int omap_hsmmc_resume_cdirq(struct device *dev, int slot)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-	struct omap_mmc_platform_data *mmc = host->pdata;
+	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
 	enable_irq(mmc->slots[0].card_detect_irq);
 	return 0;
@@ -449,7 +449,7 @@ static inline int omap_hsmmc_have_reg(void)
 
 #endif
 
-static int omap_hsmmc_gpio_init(struct omap_mmc_platform_data *pdata)
+static int omap_hsmmc_gpio_init(struct omap_hsmmc_platform_data *pdata)
 {
 	int ret;
 
@@ -492,7 +492,7 @@ err_free_sp:
 	return ret;
 }
 
-static void omap_hsmmc_gpio_free(struct omap_mmc_platform_data *pdata)
+static void omap_hsmmc_gpio_free(struct omap_hsmmc_platform_data *pdata)
 {
 	if (gpio_is_valid(pdata->slots[0].gpio_wp))
 		gpio_free(pdata->slots[0].gpio_wp);
@@ -1286,7 +1286,7 @@ static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
 static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id)
 {
 	struct omap_hsmmc_host *host = dev_id;
-	struct omap_mmc_slot_data *slot = &mmc_slot(host);
+	struct omap_hsmmc_slot_data *slot = &mmc_slot(host);
 	int carddetect;
 
 	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
@@ -1957,9 +1957,9 @@ static const struct of_device_id omap_mmc_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, omap_mmc_of_match);
 
-static struct omap_mmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
+static struct omap_hsmmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
 {
-	struct omap_mmc_platform_data *pdata;
+	struct omap_hsmmc_platform_data *pdata;
 	struct device_node *np = dev->of_node;
 	u32 bus_width, max_freq;
 	int cd_gpio, wp_gpio;
@@ -2009,7 +2009,7 @@ static struct omap_mmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
 	return pdata;
 }
 #else
-static inline struct omap_mmc_platform_data
+static inline struct omap_hsmmc_platform_data
 			*of_get_hsmmc_pdata(struct device *dev)
 {
 	return ERR_PTR(-EINVAL);
@@ -2018,7 +2018,7 @@ static inline struct omap_mmc_platform_data
 
 static int omap_hsmmc_probe(struct platform_device *pdev)
 {
-	struct omap_mmc_platform_data *pdata = pdev->dev.platform_data;
+	struct omap_hsmmc_platform_data *pdata = pdev->dev.platform_data;
 	struct mmc_host *mmc;
 	struct omap_hsmmc_host *host = NULL;
 	struct resource *res;
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
new file mode 100644
index 000000000000..7dd42e54a587
--- /dev/null
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -0,0 +1,149 @@
+/*
+ * MMC definitions for OMAP2
+ *
+ * Copyright (C) 2006 Nokia Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#define OMAP_HSMMC_MAX_SLOTS	1
+
+/*
+ * struct omap_hsmmc_dev_attr.flags possibilities
+ *
+ * OMAP_HSMMC_SUPPORTS_DUAL_VOLT: Some HSMMC controller instances can
+ *    operate with either 1.8Vdc or 3.0Vdc card voltages; this flag
+ *    should be set if this is the case.  See for example Section 22.5.3
+ *    "MMC/SD/SDIO1 Bus Voltage Selection" of the OMAP34xx Multimedia
+ *    Device Silicon Revision 3.1.x Revision ZR (July 2011) (SWPU223R).
+ *
+ * OMAP_HSMMC_BROKEN_MULTIBLOCK_READ: Multiple-block read transfers
+ *    don't work correctly on some MMC controller instances on some
+ *    OMAP3 SoCs; this flag should be set if this is the case.  See
+ *    for example Advisory 2.1.1.128 "MMC: Multiple Block Read
+ *    Operation Issue" in _OMAP3530/3525/3515/3503 Silicon Errata_
+ *    Revision F (October 2010) (SPRZ278F).
+ */
+#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT		BIT(0)
+#define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ	BIT(1)
+#define OMAP_HSMMC_SWAKEUP_MISSING		BIT(2)
+
+struct omap_hsmmc_dev_attr {
+	u8 flags;
+};
+
+struct mmc_card;
+
+struct omap_hsmmc_platform_data {
+	/* back-link to device */
+	struct device *dev;
+
+	/* number of slots per controller */
+	unsigned nr_slots:2;
+
+	/* set if your board has components or wiring that limits the
+	 * maximum frequency on the MMC bus */
+	unsigned int max_freq;
+
+	/* switch the bus to a new slot */
+	int (*switch_slot)(struct device *dev, int slot);
+	/* initialize board-specific MMC functionality, can be NULL if
+	 * not supported */
+	int (*init)(struct device *dev);
+	void (*cleanup)(struct device *dev);
+	void (*shutdown)(struct device *dev);
+
+	/* To handle board related suspend/resume functionality for MMC */
+	int (*suspend)(struct device *dev, int slot);
+	int (*resume)(struct device *dev, int slot);
+
+	/* Return context loss count due to PM states changing */
+	int (*get_context_loss_count)(struct device *dev);
+
+	/* Integrating attributes from the omap_hwmod layer */
+	u8 controller_flags;
+
+	/* Register offset deviation */
+	u16 reg_offset;
+
+	struct omap_hsmmc_slot_data {
+		/*
+		 * 4/8 wires and any additional host capabilities
+		 * need to OR'd all capabilities (ref. linux/mmc/host.h)
+		 */
+		u8  wires;	/* Used for the MMC driver on omap1 and 2420 */
+		u32 caps;	/* Used for the MMC driver on 2430 and later */
+		u32 pm_caps;	/* PM capabilities of the mmc */
+
+		/*
+		 * nomux means "standard" muxing is wrong on this board, and
+		 * that board-specific code handled it before common init logic.
+		 */
+		unsigned nomux:1;
+
+		/* switch pin can be for card detect (default) or card cover */
+		unsigned cover:1;
+
+		/* use the internal clock */
+		unsigned internal_clock:1;
+
+		/* nonremovable e.g. eMMC */
+		unsigned nonremovable:1;
+
+		/* Try to sleep or power off when possible */
+		unsigned power_saving:1;
+
+		/* If using power_saving and the MMC power is not to go off */
+		unsigned no_off:1;
+
+		/* eMMC does not handle power off when not in sleep state */
+		unsigned no_regulator_off_init:1;
+
+		/* Regulator off remapped to sleep */
+		unsigned vcc_aux_disable_is_sleep:1;
+
+		/* we can put the features above into this variable */
+#define HSMMC_HAS_PBIAS		(1 << 0)
+#define HSMMC_HAS_UPDATED_RESET	(1 << 1)
+#define HSMMC_HAS_HSPE_SUPPORT	(1 << 2)
+		unsigned features;
+
+		int switch_pin;			/* gpio (card detect) */
+		int gpio_wp;			/* gpio (write protect) */
+
+		int (*set_bus_mode)(struct device *dev, int slot, int bus_mode);
+		int (*set_power)(struct device *dev, int slot,
+				 int power_on, int vdd);
+		int (*get_ro)(struct device *dev, int slot);
+		void (*remux)(struct device *dev, int slot, int power_on);
+		/* Call back before enabling / disabling regulators */
+		void (*before_set_reg)(struct device *dev, int slot,
+				       int power_on, int vdd);
+		/* Call back after enabling / disabling regulators */
+		void (*after_set_reg)(struct device *dev, int slot,
+				      int power_on, int vdd);
+		/* if we have special card, init it using this callback */
+		void (*init_card)(struct mmc_card *card);
+
+		/* return MMC cover switch state, can be NULL if not supported.
+		 *
+		 * possible return values:
+		 *   0 - closed
+		 *   1 - open
+		 */
+		int (*get_cover_state)(struct device *dev, int slot);
+
+		const char *name;
+		u32 ocr_mask;
+
+		/* Card detection IRQs */
+		int card_detect_irq;
+
+		int (*card_detect)(struct device *dev, int slot);
+
+		unsigned int ban_openended:1;
+
+	} slots[OMAP_HSMMC_MAX_SLOTS];
+};
diff --git a/include/linux/platform_data/mmc-omap.h b/include/linux/platform_data/mmc-omap.h
index 51e70cf25cbc..5c188f4e9bec 100644
--- a/include/linux/platform_data/mmc-omap.h
+++ b/include/linux/platform_data/mmc-omap.h
@@ -10,32 +10,8 @@
 
 #define OMAP_MMC_MAX_SLOTS	2
 
-/*
- * struct omap_mmc_dev_attr.flags possibilities
- *
- * OMAP_HSMMC_SUPPORTS_DUAL_VOLT: Some HSMMC controller instances can
- *    operate with either 1.8Vdc or 3.0Vdc card voltages; this flag
- *    should be set if this is the case.  See for example Section 22.5.3
- *    "MMC/SD/SDIO1 Bus Voltage Selection" of the OMAP34xx Multimedia
- *    Device Silicon Revision 3.1.x Revision ZR (July 2011) (SWPU223R).
- *
- * OMAP_HSMMC_BROKEN_MULTIBLOCK_READ: Multiple-block read transfers
- *    don't work correctly on some MMC controller instances on some
- *    OMAP3 SoCs; this flag should be set if this is the case.  See
- *    for example Advisory 2.1.1.128 "MMC: Multiple Block Read
- *    Operation Issue" in _OMAP3530/3525/3515/3503 Silicon Errata_
- *    Revision F (October 2010) (SPRZ278F).
- */
-#define OMAP_HSMMC_SUPPORTS_DUAL_VOLT		BIT(0)
-#define OMAP_HSMMC_BROKEN_MULTIBLOCK_READ	BIT(1)
-#define OMAP_HSMMC_SWAKEUP_MISSING		BIT(2)
-
 struct mmc_card;
 
-struct omap_mmc_dev_attr {
-	u8 flags;
-};
-
 struct omap_mmc_platform_data {
 	/* back-link to device */
 	struct device *dev;
@@ -106,9 +82,6 @@ struct omap_mmc_platform_data {
 		unsigned vcc_aux_disable_is_sleep:1;
 
 		/* we can put the features above into this variable */
-#define HSMMC_HAS_PBIAS		(1 << 0)
-#define HSMMC_HAS_UPDATED_RESET	(1 << 1)
-#define HSMMC_HAS_HSPE_SUPPORT	(1 << 2)
 #define MMC_OMAP7XX		(1 << 3)
 #define MMC_OMAP15XX		(1 << 4)
 #define MMC_OMAP16XX		(1 << 5)
-- 
cgit v1.2.3


From a74fecdf795e1d219fcdb6470b6a0709ff7e3e76 Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Sat, 8 Nov 2014 15:33:10 +0100
Subject: mmc: omap_hsmmc: remove unused fields in platform_data

platform data is built from omap2_hsmmc_info, remove all fields that
are never set in omap_hsmmc_info, hence never copied to platform data.
Note that the omap_hsmmc driver is not affected by this patch those
fields were completely unused.

Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 arch/arm/mach-omap2/hsmmc.c              | 14 --------------
 arch/arm/mach-omap2/hsmmc.h              |  6 ------
 include/linux/platform_data/hsmmc-omap.h | 19 -------------------
 3 files changed, 39 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index 9edd7596d1e7..75b75b96040f 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -263,9 +263,7 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 	mmc->slots[0].name = hc_name;
 	mmc->nr_slots = 1;
 	mmc->slots[0].caps = c->caps;
-	mmc->slots[0].pm_caps = c->pm_caps;
 	mmc->slots[0].internal_clock = !c->ext_clock;
-	mmc->max_freq = c->max_freq;
 	mmc->reg_offset = 0;
 	mmc->get_context_loss_count = hsmmc_get_context_loss;
 
@@ -281,18 +279,6 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 	if (c->nonremovable)
 		mmc->slots[0].nonremovable = 1;
 
-	if (c->power_saving)
-		mmc->slots[0].power_saving = 1;
-
-	if (c->no_off)
-		mmc->slots[0].no_off = 1;
-
-	if (c->no_off_init)
-		mmc->slots[0].no_regulator_off_init = c->no_off_init;
-
-	if (c->vcc_aux_disable_is_sleep)
-		mmc->slots[0].vcc_aux_disable_is_sleep = 1;
-
 	/*
 	 * NOTE:  MMC slots should have a Vcc regulator set up.
 	 * This may be from a TWL4030-family chip, another
diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h
index 7f2e790e0929..bdc8870dde7e 100644
--- a/arch/arm/mach-omap2/hsmmc.h
+++ b/arch/arm/mach-omap2/hsmmc.h
@@ -12,23 +12,17 @@ struct omap2_hsmmc_info {
 	u8	mmc;		/* controller 1/2/3 */
 	u32	caps;		/* 4/8 wires and any additional host
 				 * capabilities OR'd (ref. linux/mmc/host.h) */
-	u32	pm_caps;	/* PM capabilities */
 	bool	transceiver;	/* MMC-2 option */
 	bool	ext_clock;	/* use external pin for input clock */
 	bool	cover_only;	/* No card detect - just cover switch */
 	bool	nonremovable;	/* Nonremovable e.g. eMMC */
 	bool	power_saving;	/* Try to sleep or power off when possible */
-	bool	no_off;		/* power_saving and power is not to go off */
-	bool	no_off_init;	/* no power off when not in MMC sleep state */
-	bool	vcc_aux_disable_is_sleep; /* Regulator off remapped to sleep */
 	bool	deferred;	/* mmc needs a deferred probe */
 	int	gpio_cd;	/* or -EINVAL */
 	int	gpio_wp;	/* or -EINVAL */
 	char	*name;		/* or NULL for default */
 	struct platform_device *pdev;	/* mmc controller instance */
 	int	ocr_mask;	/* temporary HACK */
-	int	max_freq;	/* maximum clock, if constrained by external
-				 * circuitry, or 0 for default */
 	/* Remux (pad configuration) when powering on/off */
 	void (*remux)(struct device *dev, int slot, int power_on);
 	/* init some special card */
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 7dd42e54a587..11d7ed99603f 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -73,16 +73,9 @@ struct omap_hsmmc_platform_data {
 		 * 4/8 wires and any additional host capabilities
 		 * need to OR'd all capabilities (ref. linux/mmc/host.h)
 		 */
-		u8  wires;	/* Used for the MMC driver on omap1 and 2420 */
 		u32 caps;	/* Used for the MMC driver on 2430 and later */
 		u32 pm_caps;	/* PM capabilities of the mmc */
 
-		/*
-		 * nomux means "standard" muxing is wrong on this board, and
-		 * that board-specific code handled it before common init logic.
-		 */
-		unsigned nomux:1;
-
 		/* switch pin can be for card detect (default) or card cover */
 		unsigned cover:1;
 
@@ -92,18 +85,9 @@ struct omap_hsmmc_platform_data {
 		/* nonremovable e.g. eMMC */
 		unsigned nonremovable:1;
 
-		/* Try to sleep or power off when possible */
-		unsigned power_saving:1;
-
-		/* If using power_saving and the MMC power is not to go off */
-		unsigned no_off:1;
-
 		/* eMMC does not handle power off when not in sleep state */
 		unsigned no_regulator_off_init:1;
 
-		/* Regulator off remapped to sleep */
-		unsigned vcc_aux_disable_is_sleep:1;
-
 		/* we can put the features above into this variable */
 #define HSMMC_HAS_PBIAS		(1 << 0)
 #define HSMMC_HAS_UPDATED_RESET	(1 << 1)
@@ -142,8 +126,5 @@ struct omap_hsmmc_platform_data {
 		int card_detect_irq;
 
 		int (*card_detect)(struct device *dev, int slot);
-
-		unsigned int ban_openended:1;
-
 	} slots[OMAP_HSMMC_MAX_SLOTS];
 };
-- 
cgit v1.2.3


From bb09d15114a55ae050f5315f2dfd6d1c989ecacd Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Sat, 8 Nov 2014 15:33:11 +0100
Subject: mmc: omap_hsmmc: remove un-initialized callbacks from platform data

these callbacks are not set, probably legacy omap 1/2 features

Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c            | 15 +--------------
 include/linux/platform_data/hsmmc-omap.h |  9 ---------
 2 files changed, 1 insertion(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 4957c5fe555b..03e8e9aa3756 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -2204,18 +2204,10 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
-	if (pdata->init != NULL) {
-		if (pdata->init(&pdev->dev) != 0) {
-			dev_err(mmc_dev(host->mmc),
-				"Unable to configure MMC IRQs\n");
-			goto err_irq;
-		}
-	}
-
 	if (omap_hsmmc_have_reg() && !mmc_slot(host).set_power) {
 		ret = omap_hsmmc_reg_get(host);
 		if (ret)
-			goto err_reg;
+			goto err_irq;
 		host->use_reg = 1;
 	}
 
@@ -2278,9 +2270,6 @@ err_slot_name:
 err_irq_cd:
 	if (host->use_reg)
 		omap_hsmmc_reg_put(host);
-err_reg:
-	if (host->pdata->cleanup)
-		host->pdata->cleanup(&pdev->dev);
 err_irq:
 	if (host->tx_chan)
 		dma_release_channel(host->tx_chan);
@@ -2306,8 +2295,6 @@ static int omap_hsmmc_remove(struct platform_device *pdev)
 	mmc_remove_host(host->mmc);
 	if (host->use_reg)
 		omap_hsmmc_reg_put(host);
-	if (host->pdata->cleanup)
-		host->pdata->cleanup(&pdev->dev);
 
 	if (host->tx_chan)
 		dma_release_channel(host->tx_chan);
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 11d7ed99603f..7e70e15154e4 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -47,14 +47,6 @@ struct omap_hsmmc_platform_data {
 	 * maximum frequency on the MMC bus */
 	unsigned int max_freq;
 
-	/* switch the bus to a new slot */
-	int (*switch_slot)(struct device *dev, int slot);
-	/* initialize board-specific MMC functionality, can be NULL if
-	 * not supported */
-	int (*init)(struct device *dev);
-	void (*cleanup)(struct device *dev);
-	void (*shutdown)(struct device *dev);
-
 	/* To handle board related suspend/resume functionality for MMC */
 	int (*suspend)(struct device *dev, int slot);
 	int (*resume)(struct device *dev, int slot);
@@ -97,7 +89,6 @@ struct omap_hsmmc_platform_data {
 		int switch_pin;			/* gpio (card detect) */
 		int gpio_wp;			/* gpio (write protect) */
 
-		int (*set_bus_mode)(struct device *dev, int slot, int bus_mode);
 		int (*set_power)(struct device *dev, int slot,
 				 int power_on, int vdd);
 		int (*get_ro)(struct device *dev, int slot);
-- 
cgit v1.2.3


From df206c313987bff595d2199f3bbf9b160e666b32 Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Sat, 8 Nov 2014 15:33:13 +0100
Subject: mmc: omap_hsmmc: remove unused get_context_loss_count callback

trigger of this callback has been removed in 0a82e06e6183

Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 arch/arm/mach-omap2/hsmmc.c              | 12 ------------
 include/linux/platform_data/hsmmc-omap.h |  3 ---
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index 75b75b96040f..c65efc3cf81f 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -33,17 +33,6 @@ static u16 control_devconf1_offset;
 
 #define HSMMC_NAME_LEN	9
 
-#if defined(CONFIG_ARCH_OMAP3) && defined(CONFIG_PM)
-
-static int hsmmc_get_context_loss(struct device *dev)
-{
-	return omap_pm_get_dev_context_loss_count(dev);
-}
-
-#else
-#define hsmmc_get_context_loss NULL
-#endif
-
 static void omap_hsmmc1_before_set_reg(struct device *dev, int slot,
 				  int power_on, int vdd)
 {
@@ -265,7 +254,6 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 	mmc->slots[0].caps = c->caps;
 	mmc->slots[0].internal_clock = !c->ext_clock;
 	mmc->reg_offset = 0;
-	mmc->get_context_loss_count = hsmmc_get_context_loss;
 
 	mmc->slots[0].switch_pin = c->gpio_cd;
 	mmc->slots[0].gpio_wp = c->gpio_wp;
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 7e70e15154e4..35d494f7d62f 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -51,9 +51,6 @@ struct omap_hsmmc_platform_data {
 	int (*suspend)(struct device *dev, int slot);
 	int (*resume)(struct device *dev, int slot);
 
-	/* Return context loss count due to PM states changing */
-	int (*get_context_loss_count)(struct device *dev);
-
 	/* Integrating attributes from the omap_hwmod layer */
 	u8 controller_flags;
 
-- 
cgit v1.2.3


From 326119c9923711d782e71e197429b1bab16125e1 Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Sat, 8 Nov 2014 15:33:14 +0100
Subject: mmc: omap_hsmmc: remove unnecessary omap_hsmmc_slot_data indirection

omap_hsmmc supports only one slot per controller, see OMAP_MMC_MAX_SLOTS.
This unnecessary indirection leads to confusion in the omap_hsmmc driver.
For example the card_detect callback is not installed by platform code
but from the driver probe function. So it should be a field of
omap_hsmmc_host. But since it is declared under the platform slot while
the drivers struct omap_hsmmc_host has no slot abstraction, this looks
like a bug, especially when not familiar that this driver only supports
1 slot anyway.
Either we should add a slot abstraction to omap_hsmmc_host or remove
it from the platform data struct. Removed since slot multiplexing is
an un-implemented feature

Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 arch/arm/mach-omap2/hsmmc.c              |  88 ++++++++--------
 drivers/mmc/host/omap_hsmmc.c            | 175 ++++++++++++++++---------------
 include/linux/platform_data/hsmmc-omap.h | 100 +++++++++---------
 3 files changed, 181 insertions(+), 182 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index c65efc3cf81f..4e2896ad0119 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -39,8 +39,8 @@ static void omap_hsmmc1_before_set_reg(struct device *dev, int slot,
 	u32 reg, prog_io;
 	struct omap_hsmmc_platform_data *mmc = dev->platform_data;
 
-	if (mmc->slots[0].remux)
-		mmc->slots[0].remux(dev, slot, power_on);
+	if (mmc->remux)
+		mmc->remux(dev, slot, power_on);
 
 	/*
 	 * Assume we power both OMAP VMMC1 (for CMD, CLK, DAT0..3) and the
@@ -62,7 +62,7 @@ static void omap_hsmmc1_before_set_reg(struct device *dev, int slot,
 			omap_ctrl_writel(reg, OMAP243X_CONTROL_DEVCONF1);
 		}
 
-		if (mmc->slots[0].internal_clock) {
+		if (mmc->internal_clock) {
 			reg = omap_ctrl_readl(OMAP2_CONTROL_DEVCONF0);
 			reg |= OMAP2_MMCSDIO1ADPCLKISEL;
 			omap_ctrl_writel(reg, OMAP2_CONTROL_DEVCONF0);
@@ -115,7 +115,7 @@ static void hsmmc2_select_input_clk_src(struct omap_hsmmc_platform_data *mmc)
 	u32 reg;
 
 	reg = omap_ctrl_readl(control_devconf1_offset);
-	if (mmc->slots[0].internal_clock)
+	if (mmc->internal_clock)
 		reg |= OMAP2_MMCSDIO2ADPCLKISEL;
 	else
 		reg &= ~OMAP2_MMCSDIO2ADPCLKISEL;
@@ -127,8 +127,8 @@ static void hsmmc2_before_set_reg(struct device *dev, int slot,
 {
 	struct omap_hsmmc_platform_data *mmc = dev->platform_data;
 
-	if (mmc->slots[0].remux)
-		mmc->slots[0].remux(dev, slot, power_on);
+	if (mmc->remux)
+		mmc->remux(dev, slot, power_on);
 
 	if (power_on)
 		hsmmc2_select_input_clk_src(mmc);
@@ -154,14 +154,14 @@ static int nop_mmc_set_power(struct device *dev, int slot, int power_on,
 static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
 				  *mmc_controller, int controller_nr)
 {
-	if (gpio_is_valid(mmc_controller->slots[0].switch_pin) &&
-		(mmc_controller->slots[0].switch_pin < OMAP_MAX_GPIO_LINES))
-		omap_mux_init_gpio(mmc_controller->slots[0].switch_pin,
-					OMAP_PIN_INPUT_PULLUP);
-	if (gpio_is_valid(mmc_controller->slots[0].gpio_wp) &&
-		(mmc_controller->slots[0].gpio_wp < OMAP_MAX_GPIO_LINES))
-		omap_mux_init_gpio(mmc_controller->slots[0].gpio_wp,
-					OMAP_PIN_INPUT_PULLUP);
+	if (gpio_is_valid(mmc_controller->switch_pin) &&
+	    (mmc_controller->switch_pin < OMAP_MAX_GPIO_LINES))
+		omap_mux_init_gpio(mmc_controller->switch_pin,
+				   OMAP_PIN_INPUT_PULLUP);
+	if (gpio_is_valid(mmc_controller->gpio_wp) &&
+	    (mmc_controller->gpio_wp < OMAP_MAX_GPIO_LINES))
+		omap_mux_init_gpio(mmc_controller->gpio_wp,
+				   OMAP_PIN_INPUT_PULLUP);
 	if (cpu_is_omap34xx()) {
 		if (controller_nr == 0) {
 			omap_mux_init_signal("sdmmc1_clk",
@@ -170,7 +170,7 @@ static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
 				OMAP_PIN_INPUT_PULLUP);
 			omap_mux_init_signal("sdmmc1_dat0",
 				OMAP_PIN_INPUT_PULLUP);
-			if (mmc_controller->slots[0].caps &
+			if (mmc_controller->caps &
 				(MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)) {
 				omap_mux_init_signal("sdmmc1_dat1",
 					OMAP_PIN_INPUT_PULLUP);
@@ -179,7 +179,7 @@ static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
 				omap_mux_init_signal("sdmmc1_dat3",
 					OMAP_PIN_INPUT_PULLUP);
 			}
-			if (mmc_controller->slots[0].caps &
+			if (mmc_controller->caps &
 						MMC_CAP_8_BIT_DATA) {
 				omap_mux_init_signal("sdmmc1_dat4",
 					OMAP_PIN_INPUT_PULLUP);
@@ -204,7 +204,7 @@ static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
 			 * For 8 wire configurations, Lines DAT4, 5, 6 and 7
 			 * need to be muxed in the board-*.c files
 			 */
-			if (mmc_controller->slots[0].caps &
+			if (mmc_controller->caps &
 				(MMC_CAP_4_BIT_DATA | MMC_CAP_8_BIT_DATA)) {
 				omap_mux_init_signal("sdmmc2_dat1",
 					OMAP_PIN_INPUT_PULLUP);
@@ -213,7 +213,7 @@ static inline void omap_hsmmc_mux(struct omap_hsmmc_platform_data
 				omap_mux_init_signal("sdmmc2_dat3",
 					OMAP_PIN_INPUT_PULLUP);
 			}
-			if (mmc_controller->slots[0].caps &
+			if (mmc_controller->caps &
 							MMC_CAP_8_BIT_DATA) {
 				omap_mux_init_signal("sdmmc2_dat4.sdmmc2_dat4",
 					OMAP_PIN_INPUT_PULLUP);
@@ -249,23 +249,23 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 	else
 		snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i",
 								c->mmc, 1);
-	mmc->slots[0].name = hc_name;
+	mmc->name = hc_name;
 	mmc->nr_slots = 1;
-	mmc->slots[0].caps = c->caps;
-	mmc->slots[0].internal_clock = !c->ext_clock;
+	mmc->caps = c->caps;
+	mmc->internal_clock = !c->ext_clock;
 	mmc->reg_offset = 0;
 
-	mmc->slots[0].switch_pin = c->gpio_cd;
-	mmc->slots[0].gpio_wp = c->gpio_wp;
+	mmc->switch_pin = c->gpio_cd;
+	mmc->gpio_wp = c->gpio_wp;
 
-	mmc->slots[0].remux = c->remux;
-	mmc->slots[0].init_card = c->init_card;
+	mmc->remux = c->remux;
+	mmc->init_card = c->init_card;
 
 	if (c->cover_only)
-		mmc->slots[0].cover = 1;
+		mmc->cover = 1;
 
 	if (c->nonremovable)
-		mmc->slots[0].nonremovable = 1;
+		mmc->nonremovable = 1;
 
 	/*
 	 * NOTE:  MMC slots should have a Vcc regulator set up.
@@ -275,42 +275,42 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 	 * temporary HACK: ocr_mask instead of fixed supply
 	 */
 	if (soc_is_am35xx())
-		mmc->slots[0].ocr_mask = MMC_VDD_165_195 |
+		mmc->ocr_mask = MMC_VDD_165_195 |
 					 MMC_VDD_26_27 |
 					 MMC_VDD_27_28 |
 					 MMC_VDD_29_30 |
 					 MMC_VDD_30_31 |
 					 MMC_VDD_31_32;
 	else
-		mmc->slots[0].ocr_mask = c->ocr_mask;
+		mmc->ocr_mask = c->ocr_mask;
 
 	if (!soc_is_am35xx())
-		mmc->slots[0].features |= HSMMC_HAS_PBIAS;
+		mmc->features |= HSMMC_HAS_PBIAS;
 
 	switch (c->mmc) {
 	case 1:
-		if (mmc->slots[0].features & HSMMC_HAS_PBIAS) {
+		if (mmc->features & HSMMC_HAS_PBIAS) {
 			/* on-chip level shifting via PBIAS0/PBIAS1 */
-			mmc->slots[0].before_set_reg =
+			mmc->before_set_reg =
 					omap_hsmmc1_before_set_reg;
-			mmc->slots[0].after_set_reg =
+			mmc->after_set_reg =
 					omap_hsmmc1_after_set_reg;
 		}
 
 		if (soc_is_am35xx())
-			mmc->slots[0].set_power = nop_mmc_set_power;
+			mmc->set_power = nop_mmc_set_power;
 
 		/* OMAP3630 HSMMC1 supports only 4-bit */
 		if (cpu_is_omap3630() &&
 				(c->caps & MMC_CAP_8_BIT_DATA)) {
 			c->caps &= ~MMC_CAP_8_BIT_DATA;
 			c->caps |= MMC_CAP_4_BIT_DATA;
-			mmc->slots[0].caps = c->caps;
+			mmc->caps = c->caps;
 		}
 		break;
 	case 2:
 		if (soc_is_am35xx())
-			mmc->slots[0].set_power = am35x_hsmmc2_set_power;
+			mmc->set_power = am35x_hsmmc2_set_power;
 
 		if (c->ext_clock)
 			c->transceiver = 1;
@@ -318,17 +318,17 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 			c->caps &= ~MMC_CAP_8_BIT_DATA;
 			c->caps |= MMC_CAP_4_BIT_DATA;
 		}
-		if (mmc->slots[0].features & HSMMC_HAS_PBIAS) {
+		if (mmc->features & HSMMC_HAS_PBIAS) {
 			/* off-chip level shifting, or none */
-			mmc->slots[0].before_set_reg = hsmmc2_before_set_reg;
-			mmc->slots[0].after_set_reg = NULL;
+			mmc->before_set_reg = hsmmc2_before_set_reg;
+			mmc->after_set_reg = NULL;
 		}
 		break;
 	case 3:
 	case 4:
 	case 5:
-		mmc->slots[0].before_set_reg = NULL;
-		mmc->slots[0].after_set_reg = NULL;
+		mmc->before_set_reg = NULL;
+		mmc->after_set_reg = NULL;
 		break;
 	default:
 		pr_err("MMC%d configuration not supported!\n", c->mmc);
@@ -363,8 +363,8 @@ void omap_hsmmc_late_init(struct omap2_hsmmc_info *c)
 		if (!mmc_pdata)
 			continue;
 
-		mmc_pdata->slots[0].switch_pin = c->gpio_cd;
-		mmc_pdata->slots[0].gpio_wp = c->gpio_wp;
+		mmc_pdata->switch_pin = c->gpio_cd;
+		mmc_pdata->gpio_wp = c->gpio_wp;
 
 		res = omap_device_register(pdev);
 		if (res)
@@ -464,7 +464,7 @@ put_pdev:
 	platform_device_put(pdev);
 
 free_name:
-	kfree(mmc_data->slots[0].name);
+	kfree(mmc_data->name);
 
 free_mmc:
 	kfree(mmc_data);
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 03e8e9aa3756..291b9e125d46 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -155,7 +155,7 @@
  * omap.c controller driver. Luckily this is not currently done on any known
  * omap_hsmmc.c device.
  */
-#define mmc_slot(host)		(host->pdata->slots[host->slot_id])
+#define mmc_pdata(host)		host->pdata
 
 /*
  * MMC Host controller read/write API's
@@ -236,7 +236,7 @@ static int omap_hsmmc_card_detect(struct device *dev, int slot)
 	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
 	/* NOTE: assumes card detect signal is active-low */
-	return !gpio_get_value_cansleep(mmc->slots[0].switch_pin);
+	return !gpio_get_value_cansleep(mmc->switch_pin);
 }
 
 static int omap_hsmmc_get_wp(struct device *dev, int slot)
@@ -245,7 +245,7 @@ static int omap_hsmmc_get_wp(struct device *dev, int slot)
 	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
 	/* NOTE: assumes write protect signal is active-high */
-	return gpio_get_value_cansleep(mmc->slots[0].gpio_wp);
+	return gpio_get_value_cansleep(mmc->gpio_wp);
 }
 
 static int omap_hsmmc_get_cover_state(struct device *dev, int slot)
@@ -254,7 +254,7 @@ static int omap_hsmmc_get_cover_state(struct device *dev, int slot)
 	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
 	/* NOTE: assumes card detect signal is active-low */
-	return !gpio_get_value_cansleep(mmc->slots[0].switch_pin);
+	return !gpio_get_value_cansleep(mmc->switch_pin);
 }
 
 #ifdef CONFIG_PM
@@ -264,7 +264,7 @@ static int omap_hsmmc_suspend_cdirq(struct device *dev, int slot)
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
-	disable_irq(mmc->slots[0].card_detect_irq);
+	disable_irq(mmc->card_detect_irq);
 	return 0;
 }
 
@@ -273,7 +273,7 @@ static int omap_hsmmc_resume_cdirq(struct device *dev, int slot)
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
-	enable_irq(mmc->slots[0].card_detect_irq);
+	enable_irq(mmc->card_detect_irq);
 	return 0;
 }
 
@@ -300,8 +300,8 @@ static int omap_hsmmc_set_power(struct device *dev, int slot, int power_on,
 	if (!host->vcc)
 		return 0;
 
-	if (mmc_slot(host).before_set_reg)
-		mmc_slot(host).before_set_reg(dev, slot, power_on, vdd);
+	if (mmc_pdata(host)->before_set_reg)
+		mmc_pdata(host)->before_set_reg(dev, slot, power_on, vdd);
 
 	if (host->pbias) {
 		if (host->pbias_enabled == 1) {
@@ -363,8 +363,8 @@ static int omap_hsmmc_set_power(struct device *dev, int slot, int power_on,
 		}
 	}
 
-	if (mmc_slot(host).after_set_reg)
-		mmc_slot(host).after_set_reg(dev, slot, power_on, vdd);
+	if (mmc_pdata(host)->after_set_reg)
+		mmc_pdata(host)->after_set_reg(dev, slot, power_on, vdd);
 
 error_set_power:
 	return ret;
@@ -383,18 +383,18 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	} else {
 		host->vcc = reg;
 		ocr_value = mmc_regulator_get_ocrmask(reg);
-		if (!mmc_slot(host).ocr_mask) {
-			mmc_slot(host).ocr_mask = ocr_value;
+		if (!mmc_pdata(host)->ocr_mask) {
+			mmc_pdata(host)->ocr_mask = ocr_value;
 		} else {
-			if (!(mmc_slot(host).ocr_mask & ocr_value)) {
+			if (!(mmc_pdata(host)->ocr_mask & ocr_value)) {
 				dev_err(host->dev, "ocrmask %x is not supported\n",
-					mmc_slot(host).ocr_mask);
-				mmc_slot(host).ocr_mask = 0;
+					mmc_pdata(host)->ocr_mask);
+				mmc_pdata(host)->ocr_mask = 0;
 				return -EINVAL;
 			}
 		}
 	}
-	mmc_slot(host).set_power = omap_hsmmc_set_power;
+	mmc_pdata(host)->set_power = omap_hsmmc_set_power;
 
 	/* Allow an aux regulator */
 	reg = devm_regulator_get_optional(host->dev, "vmmc_aux");
@@ -404,7 +404,7 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	host->pbias = IS_ERR(reg) ? NULL : reg;
 
 	/* For eMMC do not power off when not in sleep state */
-	if (mmc_slot(host).no_regulator_off_init)
+	if (mmc_pdata(host)->no_regulator_off_init)
 		return 0;
 	/*
 	 * To disable boot_on regulator, enable regulator
@@ -412,10 +412,10 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	 */
 	if ((host->vcc && regulator_is_enabled(host->vcc) > 0) ||
 	    (host->vcc_aux && regulator_is_enabled(host->vcc_aux))) {
-		int vdd = ffs(mmc_slot(host).ocr_mask) - 1;
+		int vdd = ffs(mmc_pdata(host)->ocr_mask) - 1;
 
-		mmc_slot(host).set_power(host->dev, host->slot_id, 1, vdd);
-		mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
+		mmc_pdata(host)->set_power(host->dev, host->slot_id, 1, vdd);
+		mmc_pdata(host)->set_power(host->dev, host->slot_id, 0, 0);
 	}
 
 	return 0;
@@ -423,7 +423,7 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 
 static void omap_hsmmc_reg_put(struct omap_hsmmc_host *host)
 {
-	mmc_slot(host).set_power = NULL;
+	mmc_pdata(host)->set_power = NULL;
 }
 
 static inline int omap_hsmmc_have_reg(void)
@@ -453,51 +453,53 @@ static int omap_hsmmc_gpio_init(struct omap_hsmmc_platform_data *pdata)
 {
 	int ret;
 
-	if (gpio_is_valid(pdata->slots[0].switch_pin)) {
-		if (pdata->slots[0].cover)
-			pdata->slots[0].get_cover_state =
+	if (gpio_is_valid(pdata->switch_pin)) {
+		if (pdata->cover)
+			pdata->get_cover_state =
 					omap_hsmmc_get_cover_state;
 		else
-			pdata->slots[0].card_detect = omap_hsmmc_card_detect;
-		pdata->slots[0].card_detect_irq =
-				gpio_to_irq(pdata->slots[0].switch_pin);
-		ret = gpio_request(pdata->slots[0].switch_pin, "mmc_cd");
+			pdata->card_detect = omap_hsmmc_card_detect;
+		pdata->card_detect_irq =
+				gpio_to_irq(pdata->switch_pin);
+		ret = gpio_request(pdata->switch_pin, "mmc_cd");
 		if (ret)
 			return ret;
-		ret = gpio_direction_input(pdata->slots[0].switch_pin);
+		ret = gpio_direction_input(pdata->switch_pin);
 		if (ret)
 			goto err_free_sp;
-	} else
-		pdata->slots[0].switch_pin = -EINVAL;
+	} else {
+		pdata->switch_pin = -EINVAL;
+	}
 
-	if (gpio_is_valid(pdata->slots[0].gpio_wp)) {
-		pdata->slots[0].get_ro = omap_hsmmc_get_wp;
-		ret = gpio_request(pdata->slots[0].gpio_wp, "mmc_wp");
+	if (gpio_is_valid(pdata->gpio_wp)) {
+		pdata->get_ro = omap_hsmmc_get_wp;
+		ret = gpio_request(pdata->gpio_wp, "mmc_wp");
 		if (ret)
 			goto err_free_cd;
-		ret = gpio_direction_input(pdata->slots[0].gpio_wp);
+		ret = gpio_direction_input(pdata->gpio_wp);
 		if (ret)
 			goto err_free_wp;
-	} else
-		pdata->slots[0].gpio_wp = -EINVAL;
+	} else {
+		pdata->gpio_wp = -EINVAL;
+	}
 
 	return 0;
 
 err_free_wp:
-	gpio_free(pdata->slots[0].gpio_wp);
+	gpio_free(pdata->gpio_wp);
 err_free_cd:
-	if (gpio_is_valid(pdata->slots[0].switch_pin))
+	if (gpio_is_valid(pdata->switch_pin))
 err_free_sp:
-		gpio_free(pdata->slots[0].switch_pin);
+		gpio_free(pdata->switch_pin);
 	return ret;
 }
 
 static void omap_hsmmc_gpio_free(struct omap_hsmmc_platform_data *pdata)
 {
-	if (gpio_is_valid(pdata->slots[0].gpio_wp))
-		gpio_free(pdata->slots[0].gpio_wp);
-	if (gpio_is_valid(pdata->slots[0].switch_pin))
-		gpio_free(pdata->slots[0].switch_pin);
+	if (gpio_is_valid(pdata->gpio_wp))
+		gpio_free(pdata->gpio_wp);
+	if (gpio_is_valid(pdata->switch_pin))
+		gpio_free(pdata->switch_pin);
 }
 
 /*
@@ -607,7 +609,7 @@ static void omap_hsmmc_set_clock(struct omap_hsmmc_host *host)
 	 *	  in capabilities register
 	 *	- MMC/SD clock coming out of controller > 25MHz
 	 */
-	if ((mmc_slot(host).features & HSMMC_HAS_HSPE_SUPPORT) &&
+	if ((mmc_pdata(host)->features & HSMMC_HAS_HSPE_SUPPORT) &&
 	    (ios->timing != MMC_TIMING_MMC_DDR52) &&
 	    ((OMAP_HSMMC_READ(host->base, CAPA) & HSS) == HSS)) {
 		regval = OMAP_HSMMC_READ(host->base, HCTL);
@@ -791,8 +793,8 @@ int omap_hsmmc_cover_is_closed(struct omap_hsmmc_host *host)
 {
 	int r = 1;
 
-	if (mmc_slot(host).get_cover_state)
-		r = mmc_slot(host).get_cover_state(host->dev, host->slot_id);
+	if (mmc_pdata(host)->get_cover_state)
+		r = mmc_pdata(host)->get_cover_state(host->dev, host->slot_id);
 	return r;
 }
 
@@ -816,7 +818,7 @@ omap_hsmmc_show_slot_name(struct device *dev, struct device_attribute *attr,
 	struct mmc_host *mmc = container_of(dev, struct mmc_host, class_dev);
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	return sprintf(buf, "%s\n", mmc_slot(host).name);
+	return sprintf(buf, "%s\n", mmc_pdata(host)->name);
 }
 
 static DEVICE_ATTR(slot_name, S_IRUGO, omap_hsmmc_show_slot_name, NULL);
@@ -1061,7 +1063,7 @@ static inline void omap_hsmmc_reset_controller_fsm(struct omap_hsmmc_host *host,
 	 * OMAP4 ES2 and greater has an updated reset logic.
 	 * Monitor a 0->1 transition first
 	 */
-	if (mmc_slot(host).features & HSMMC_HAS_UPDATED_RESET) {
+	if (mmc_pdata(host)->features & HSMMC_HAS_UPDATED_RESET) {
 		while ((!(OMAP_HSMMC_READ(host->base, SYSCTL) & bit))
 					&& (i++ < limit))
 			udelay(1);
@@ -1210,12 +1212,12 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd)
 		clk_disable_unprepare(host->dbclk);
 
 	/* Turn the power off */
-	ret = mmc_slot(host).set_power(host->dev, host->slot_id, 0, 0);
+	ret = mmc_pdata(host)->set_power(host->dev, host->slot_id, 0, 0);
 
 	/* Turn the power ON with given VDD 1.8 or 3.0v */
 	if (!ret)
-		ret = mmc_slot(host).set_power(host->dev, host->slot_id, 1,
-					       vdd);
+		ret = mmc_pdata(host)->set_power(host->dev, host->slot_id, 1,
+						 vdd);
 	pm_runtime_get_sync(host->dev);
 	if (host->dbclk)
 		clk_prepare_enable(host->dbclk);
@@ -1259,11 +1261,11 @@ err:
 /* Protect the card while the cover is open */
 static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
 {
-	if (!mmc_slot(host).get_cover_state)
+	if (!mmc_pdata(host)->get_cover_state)
 		return;
 
 	host->reqs_blocked = 0;
-	if (mmc_slot(host).get_cover_state(host->dev, host->slot_id)) {
+	if (mmc_pdata(host)->get_cover_state(host->dev, host->slot_id)) {
 		if (host->protect_card) {
 			dev_info(host->dev, "%s: cover is closed, "
 					 "card is now accessible\n",
@@ -1286,13 +1288,13 @@ static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
 static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id)
 {
 	struct omap_hsmmc_host *host = dev_id;
-	struct omap_hsmmc_slot_data *slot = &mmc_slot(host);
+	struct omap_hsmmc_platform_data *pdata = host->pdata;
 	int carddetect;
 
 	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
 
-	if (slot->card_detect)
-		carddetect = slot->card_detect(host->dev, host->slot_id);
+	if (pdata->card_detect)
+		carddetect = pdata->card_detect(host->dev, host->slot_id);
 	else {
 		omap_hsmmc_protect_card(host);
 		carddetect = -ENOSYS;
@@ -1618,12 +1620,12 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (ios->power_mode != host->power_mode) {
 		switch (ios->power_mode) {
 		case MMC_POWER_OFF:
-			mmc_slot(host).set_power(host->dev, host->slot_id,
-						 0, 0);
+			mmc_pdata(host)->set_power(host->dev, host->slot_id,
+						   0, 0);
 			break;
 		case MMC_POWER_UP:
-			mmc_slot(host).set_power(host->dev, host->slot_id,
-						 1, ios->vdd);
+			mmc_pdata(host)->set_power(host->dev, host->slot_id,
+						   1, ios->vdd);
 			break;
 		case MMC_POWER_ON:
 			do_send_init_stream = 1;
@@ -1668,26 +1670,26 @@ static int omap_hsmmc_get_cd(struct mmc_host *mmc)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	if (!mmc_slot(host).card_detect)
+	if (!mmc_pdata(host)->card_detect)
 		return -ENOSYS;
-	return mmc_slot(host).card_detect(host->dev, host->slot_id);
+	return mmc_pdata(host)->card_detect(host->dev, host->slot_id);
 }
 
 static int omap_hsmmc_get_ro(struct mmc_host *mmc)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	if (!mmc_slot(host).get_ro)
+	if (!mmc_pdata(host)->get_ro)
 		return -ENOSYS;
-	return mmc_slot(host).get_ro(host->dev, 0);
+	return mmc_pdata(host)->get_ro(host->dev, 0);
 }
 
 static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	if (mmc_slot(host).init_card)
-		mmc_slot(host).init_card(card);
+	if (mmc_pdata(host)->init_card)
+		mmc_pdata(host)->init_card(card);
 }
 
 static void omap_hsmmc_enable_sdio_irq(struct mmc_host *mmc, int enable)
@@ -1978,33 +1980,33 @@ static struct omap_hsmmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
 
 	/* This driver only supports 1 slot */
 	pdata->nr_slots = 1;
-	pdata->slots[0].switch_pin = cd_gpio;
-	pdata->slots[0].gpio_wp = wp_gpio;
+	pdata->switch_pin = cd_gpio;
+	pdata->gpio_wp = wp_gpio;
 
 	if (of_find_property(np, "ti,non-removable", NULL)) {
-		pdata->slots[0].nonremovable = true;
-		pdata->slots[0].no_regulator_off_init = true;
+		pdata->nonremovable = true;
+		pdata->no_regulator_off_init = true;
 	}
 	of_property_read_u32(np, "bus-width", &bus_width);
 	if (bus_width == 4)
-		pdata->slots[0].caps |= MMC_CAP_4_BIT_DATA;
+		pdata->caps |= MMC_CAP_4_BIT_DATA;
 	else if (bus_width == 8)
-		pdata->slots[0].caps |= MMC_CAP_8_BIT_DATA;
+		pdata->caps |= MMC_CAP_8_BIT_DATA;
 
 	if (of_find_property(np, "ti,needs-special-reset", NULL))
-		pdata->slots[0].features |= HSMMC_HAS_UPDATED_RESET;
+		pdata->features |= HSMMC_HAS_UPDATED_RESET;
 
 	if (!of_property_read_u32(np, "max-frequency", &max_freq))
 		pdata->max_freq = max_freq;
 
 	if (of_find_property(np, "ti,needs-special-hs-handling", NULL))
-		pdata->slots[0].features |= HSMMC_HAS_HSPE_SUPPORT;
+		pdata->features |= HSMMC_HAS_HSPE_SUPPORT;
 
 	if (of_find_property(np, "keep-power-in-suspend", NULL))
-		pdata->slots[0].pm_caps |= MMC_PM_KEEP_POWER;
+		pdata->pm_caps |= MMC_PM_KEEP_POWER;
 
 	if (of_find_property(np, "enable-sdio-wakeup", NULL))
-		pdata->slots[0].pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
+		pdata->pm_caps |= MMC_PM_WAKE_SDIO_IRQ;
 
 	return pdata;
 }
@@ -2144,14 +2146,14 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	mmc->caps |= MMC_CAP_MMC_HIGHSPEED | MMC_CAP_SD_HIGHSPEED |
 		     MMC_CAP_WAIT_WHILE_BUSY | MMC_CAP_ERASE;
 
-	mmc->caps |= mmc_slot(host).caps;
+	mmc->caps |= mmc_pdata(host)->caps;
 	if (mmc->caps & MMC_CAP_8_BIT_DATA)
 		mmc->caps |= MMC_CAP_4_BIT_DATA;
 
-	if (mmc_slot(host).nonremovable)
+	if (mmc_pdata(host)->nonremovable)
 		mmc->caps |= MMC_CAP_NONREMOVABLE;
 
-	mmc->pm_caps = mmc_slot(host).pm_caps;
+	mmc->pm_caps = mmc_pdata(host)->pm_caps;
 
 	omap_hsmmc_conf_bus_power(host);
 
@@ -2204,19 +2206,19 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		goto err_irq;
 	}
 
-	if (omap_hsmmc_have_reg() && !mmc_slot(host).set_power) {
+	if (omap_hsmmc_have_reg() && !mmc_pdata(host)->set_power) {
 		ret = omap_hsmmc_reg_get(host);
 		if (ret)
 			goto err_irq;
 		host->use_reg = 1;
 	}
 
-	mmc->ocr_avail = mmc_slot(host).ocr_mask;
+	mmc->ocr_avail = mmc_pdata(host)->ocr_mask;
 
 	/* Request IRQ for card detect */
-	if ((mmc_slot(host).card_detect_irq)) {
+	if ((mmc_pdata(host)->card_detect_irq)) {
 		ret = devm_request_threaded_irq(&pdev->dev,
-						mmc_slot(host).card_detect_irq,
+						mmc_pdata(host)->card_detect_irq,
 						NULL, omap_hsmmc_detect,
 					   IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 					   mmc_hostname(mmc), host);
@@ -2247,12 +2249,13 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 
 	mmc_add_host(mmc);
 
-	if (mmc_slot(host).name != NULL) {
+	if (mmc_pdata(host)->name != NULL) {
 		ret = device_create_file(&mmc->class_dev, &dev_attr_slot_name);
 		if (ret < 0)
 			goto err_slot_name;
 	}
-	if (mmc_slot(host).card_detect_irq && mmc_slot(host).get_cover_state) {
+	if (mmc_pdata(host)->card_detect_irq &&
+	    mmc_pdata(host)->get_cover_state) {
 		ret = device_create_file(&mmc->class_dev,
 					&dev_attr_cover_switch);
 		if (ret < 0)
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 35d494f7d62f..26912143adc0 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -8,8 +8,6 @@
  * published by the Free Software Foundation.
  */
 
-#define OMAP_HSMMC_MAX_SLOTS	1
-
 /*
  * struct omap_hsmmc_dev_attr.flags possibilities
  *
@@ -57,62 +55,60 @@ struct omap_hsmmc_platform_data {
 	/* Register offset deviation */
 	u16 reg_offset;
 
-	struct omap_hsmmc_slot_data {
-		/*
-		 * 4/8 wires and any additional host capabilities
-		 * need to OR'd all capabilities (ref. linux/mmc/host.h)
-		 */
-		u32 caps;	/* Used for the MMC driver on 2430 and later */
-		u32 pm_caps;	/* PM capabilities of the mmc */
+	/*
+	 * 4/8 wires and any additional host capabilities
+	 * need to OR'd all capabilities (ref. linux/mmc/host.h)
+	 */
+	u32 caps;	/* Used for the MMC driver on 2430 and later */
+	u32 pm_caps;	/* PM capabilities of the mmc */
 
-		/* switch pin can be for card detect (default) or card cover */
-		unsigned cover:1;
+	/* switch pin can be for card detect (default) or card cover */
+	unsigned cover:1;
 
-		/* use the internal clock */
-		unsigned internal_clock:1;
+	/* use the internal clock */
+	unsigned internal_clock:1;
 
-		/* nonremovable e.g. eMMC */
-		unsigned nonremovable:1;
+	/* nonremovable e.g. eMMC */
+	unsigned nonremovable:1;
 
-		/* eMMC does not handle power off when not in sleep state */
-		unsigned no_regulator_off_init:1;
+	/* eMMC does not handle power off when not in sleep state */
+	unsigned no_regulator_off_init:1;
 
-		/* we can put the features above into this variable */
+	/* we can put the features above into this variable */
 #define HSMMC_HAS_PBIAS		(1 << 0)
 #define HSMMC_HAS_UPDATED_RESET	(1 << 1)
 #define HSMMC_HAS_HSPE_SUPPORT	(1 << 2)
-		unsigned features;
-
-		int switch_pin;			/* gpio (card detect) */
-		int gpio_wp;			/* gpio (write protect) */
-
-		int (*set_power)(struct device *dev, int slot,
-				 int power_on, int vdd);
-		int (*get_ro)(struct device *dev, int slot);
-		void (*remux)(struct device *dev, int slot, int power_on);
-		/* Call back before enabling / disabling regulators */
-		void (*before_set_reg)(struct device *dev, int slot,
-				       int power_on, int vdd);
-		/* Call back after enabling / disabling regulators */
-		void (*after_set_reg)(struct device *dev, int slot,
-				      int power_on, int vdd);
-		/* if we have special card, init it using this callback */
-		void (*init_card)(struct mmc_card *card);
-
-		/* return MMC cover switch state, can be NULL if not supported.
-		 *
-		 * possible return values:
-		 *   0 - closed
-		 *   1 - open
-		 */
-		int (*get_cover_state)(struct device *dev, int slot);
-
-		const char *name;
-		u32 ocr_mask;
-
-		/* Card detection IRQs */
-		int card_detect_irq;
-
-		int (*card_detect)(struct device *dev, int slot);
-	} slots[OMAP_HSMMC_MAX_SLOTS];
+	unsigned features;
+
+	int switch_pin;			/* gpio (card detect) */
+	int gpio_wp;			/* gpio (write protect) */
+
+	int (*set_power)(struct device *dev, int slot,
+			 int power_on, int vdd);
+	int (*get_ro)(struct device *dev, int slot);
+	void (*remux)(struct device *dev, int slot, int power_on);
+	/* Call back before enabling / disabling regulators */
+	void (*before_set_reg)(struct device *dev, int slot,
+			       int power_on, int vdd);
+	/* Call back after enabling / disabling regulators */
+	void (*after_set_reg)(struct device *dev, int slot,
+			      int power_on, int vdd);
+	/* if we have special card, init it using this callback */
+	void (*init_card)(struct mmc_card *card);
+
+	/* return MMC cover switch state, can be NULL if not supported.
+	 *
+	 * possible return values:
+	 *   0 - closed
+	 *   1 - open
+	 */
+	int (*get_cover_state)(struct device *dev, int slot);
+
+	const char *name;
+	u32 ocr_mask;
+
+	/* Card detection IRQs */
+	int card_detect_irq;
+
+	int (*card_detect)(struct device *dev, int slot);
 };
-- 
cgit v1.2.3


From b5cd43f062717b6c92f93bc0c593764e144ea331 Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Sat, 8 Nov 2014 15:33:16 +0100
Subject: mmc: omap_hsmmc: Remove unnecessary callbacks from platform data

These callbacks are set during driver probe and not from the platform
init, -- evtl. they had been for oamp 1/2 -- for omap3 they are local
functions of the driver. These indirection could be dropped
altogether in favor of regular function calls TODO

Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/omap_hsmmc.c            | 75 +++++++++++++++++++-------------
 include/linux/platform_data/hsmmc-omap.h | 18 --------
 2 files changed, 45 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index 8a216c92c5a8..f4f1bcd632f3 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -221,6 +221,25 @@ struct omap_hsmmc_host {
 #define HSMMC_WAKE_IRQ_ENABLED	(1 << 2)
 	struct omap_hsmmc_next	next_data;
 	struct	omap_hsmmc_platform_data	*pdata;
+
+	/* To handle board related suspend/resume functionality for MMC */
+	int (*suspend)(struct device *dev, int slot);
+	int (*resume)(struct device *dev, int slot);
+
+	/* return MMC cover switch state, can be NULL if not supported.
+	 *
+	 * possible return values:
+	 *   0 - closed
+	 *   1 - open
+	 */
+	int (*get_cover_state)(struct device *dev, int slot);
+
+	/* Card detection IRQs */
+	int card_detect_irq;
+
+	int (*card_detect)(struct device *dev, int slot);
+	int (*get_ro)(struct device *dev, int slot);
+
 };
 
 struct omap_mmc_of_data {
@@ -262,18 +281,16 @@ static int omap_hsmmc_get_cover_state(struct device *dev, int slot)
 static int omap_hsmmc_suspend_cdirq(struct device *dev, int slot)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
-	disable_irq(mmc->card_detect_irq);
+	disable_irq(host->card_detect_irq);
 	return 0;
 }
 
 static int omap_hsmmc_resume_cdirq(struct device *dev, int slot)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
-	struct omap_hsmmc_platform_data *mmc = host->pdata;
 
-	enable_irq(mmc->card_detect_irq);
+	enable_irq(host->card_detect_irq);
 	return 0;
 }
 
@@ -456,11 +473,11 @@ static int omap_hsmmc_gpio_init(struct omap_hsmmc_host *host,
 
 	if (gpio_is_valid(pdata->switch_pin)) {
 		if (pdata->cover)
-			pdata->get_cover_state =
-					omap_hsmmc_get_cover_state;
+			host->get_cover_state =
+				omap_hsmmc_get_cover_state;
 		else
-			pdata->card_detect = omap_hsmmc_card_detect;
-		pdata->card_detect_irq =
+			host->card_detect = omap_hsmmc_card_detect;
+		host->card_detect_irq =
 				gpio_to_irq(pdata->switch_pin);
 		ret = gpio_request(pdata->switch_pin, "mmc_cd");
 		if (ret)
@@ -473,7 +490,7 @@ static int omap_hsmmc_gpio_init(struct omap_hsmmc_host *host,
 	}
 
 	if (gpio_is_valid(pdata->gpio_wp)) {
-		pdata->get_ro = omap_hsmmc_get_wp;
+		host->get_ro = omap_hsmmc_get_wp;
 		ret = gpio_request(pdata->gpio_wp, "mmc_wp");
 		if (ret)
 			goto err_free_cd;
@@ -795,8 +812,8 @@ int omap_hsmmc_cover_is_closed(struct omap_hsmmc_host *host)
 {
 	int r = 1;
 
-	if (mmc_pdata(host)->get_cover_state)
-		r = mmc_pdata(host)->get_cover_state(host->dev, host->slot_id);
+	if (host->get_cover_state)
+		r = host->get_cover_state(host->dev, host->slot_id);
 	return r;
 }
 
@@ -1263,11 +1280,11 @@ err:
 /* Protect the card while the cover is open */
 static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
 {
-	if (!mmc_pdata(host)->get_cover_state)
+	if (!host->get_cover_state)
 		return;
 
 	host->reqs_blocked = 0;
-	if (mmc_pdata(host)->get_cover_state(host->dev, host->slot_id)) {
+	if (host->get_cover_state(host->dev, host->slot_id)) {
 		if (host->protect_card) {
 			dev_info(host->dev, "%s: cover is closed, "
 					 "card is now accessible\n",
@@ -1290,13 +1307,12 @@ static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
 static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id)
 {
 	struct omap_hsmmc_host *host = dev_id;
-	struct omap_hsmmc_platform_data *pdata = host->pdata;
 	int carddetect;
 
 	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
 
-	if (pdata->card_detect)
-		carddetect = pdata->card_detect(host->dev, host->slot_id);
+	if (host->card_detect)
+		carddetect = host->card_detect(host->dev, host->slot_id);
 	else {
 		omap_hsmmc_protect_card(host);
 		carddetect = -ENOSYS;
@@ -1672,18 +1688,18 @@ static int omap_hsmmc_get_cd(struct mmc_host *mmc)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	if (!mmc_pdata(host)->card_detect)
+	if (!host->card_detect)
 		return -ENOSYS;
-	return mmc_pdata(host)->card_detect(host->dev, host->slot_id);
+	return host->card_detect(host->dev, host->slot_id);
 }
 
 static int omap_hsmmc_get_ro(struct mmc_host *mmc)
 {
 	struct omap_hsmmc_host *host = mmc_priv(mmc);
 
-	if (!mmc_pdata(host)->get_ro)
+	if (!host->get_ro)
 		return -ENOSYS;
-	return mmc_pdata(host)->get_ro(host->dev, 0);
+	return host->get_ro(host->dev, 0);
 }
 
 static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
@@ -2218,9 +2234,9 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	mmc->ocr_avail = mmc_pdata(host)->ocr_mask;
 
 	/* Request IRQ for card detect */
-	if ((mmc_pdata(host)->card_detect_irq)) {
+	if (host->card_detect_irq) {
 		ret = devm_request_threaded_irq(&pdev->dev,
-						mmc_pdata(host)->card_detect_irq,
+						host->card_detect_irq,
 						NULL, omap_hsmmc_detect,
 					   IRQF_TRIGGER_RISING | IRQF_TRIGGER_FALLING | IRQF_ONESHOT,
 					   mmc_hostname(mmc), host);
@@ -2229,8 +2245,8 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 				"Unable to grab MMC CD IRQ\n");
 			goto err_irq_cd;
 		}
-		pdata->suspend = omap_hsmmc_suspend_cdirq;
-		pdata->resume = omap_hsmmc_resume_cdirq;
+		host->suspend = omap_hsmmc_suspend_cdirq;
+		host->resume = omap_hsmmc_resume_cdirq;
 	}
 
 	omap_hsmmc_disable_irq(host);
@@ -2256,8 +2272,7 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		if (ret < 0)
 			goto err_slot_name;
 	}
-	if (mmc_pdata(host)->card_detect_irq &&
-	    mmc_pdata(host)->get_cover_state) {
+	if (host->card_detect_irq && host->get_cover_state) {
 		ret = device_create_file(&mmc->class_dev,
 					&dev_attr_cover_switch);
 		if (ret < 0)
@@ -2322,8 +2337,8 @@ static int omap_hsmmc_prepare(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 
-	if (host->pdata->suspend)
-		return host->pdata->suspend(dev, host->slot_id);
+	if (host->suspend)
+		return host->suspend(dev, host->slot_id);
 
 	return 0;
 }
@@ -2332,8 +2347,8 @@ static void omap_hsmmc_complete(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 
-	if (host->pdata->resume)
-		host->pdata->resume(dev, host->slot_id);
+	if (host->resume)
+		host->resume(dev, host->slot_id);
 
 }
 
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 26912143adc0..68ffec14b56a 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -45,10 +45,6 @@ struct omap_hsmmc_platform_data {
 	 * maximum frequency on the MMC bus */
 	unsigned int max_freq;
 
-	/* To handle board related suspend/resume functionality for MMC */
-	int (*suspend)(struct device *dev, int slot);
-	int (*resume)(struct device *dev, int slot);
-
 	/* Integrating attributes from the omap_hwmod layer */
 	u8 controller_flags;
 
@@ -85,7 +81,6 @@ struct omap_hsmmc_platform_data {
 
 	int (*set_power)(struct device *dev, int slot,
 			 int power_on, int vdd);
-	int (*get_ro)(struct device *dev, int slot);
 	void (*remux)(struct device *dev, int slot, int power_on);
 	/* Call back before enabling / disabling regulators */
 	void (*before_set_reg)(struct device *dev, int slot,
@@ -96,19 +91,6 @@ struct omap_hsmmc_platform_data {
 	/* if we have special card, init it using this callback */
 	void (*init_card)(struct mmc_card *card);
 
-	/* return MMC cover switch state, can be NULL if not supported.
-	 *
-	 * possible return values:
-	 *   0 - closed
-	 *   1 - open
-	 */
-	int (*get_cover_state)(struct device *dev, int slot);
-
 	const char *name;
 	u32 ocr_mask;
-
-	/* Card detection IRQs */
-	int card_detect_irq;
-
-	int (*card_detect)(struct device *dev, int slot);
 };
-- 
cgit v1.2.3


From 80412ca8abf087354891108d2f888ad3de56e73c Mon Sep 17 00:00:00 2001
From: Andreas Fenkart <afenkart@gmail.com>
Date: Sat, 8 Nov 2014 15:33:17 +0100
Subject: mmc: omap_hsmmc: remove unused slot_id parameter

omap_hsmmc only supports one slot. So slot id is always zero, and
slot id was never used in the callbacks anyway

Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Andreas Fenkart <afenkart@gmail.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 arch/arm/mach-omap2/board-rx51-peripherals.c |  2 +-
 arch/arm/mach-omap2/hsmmc.c                  | 21 ++++-----
 arch/arm/mach-omap2/hsmmc.h                  |  2 +-
 drivers/mmc/host/omap_hsmmc.c                | 65 +++++++++++-----------------
 include/linux/platform_data/hsmmc-omap.h     | 14 ++----
 5 files changed, 40 insertions(+), 64 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/board-rx51-peripherals.c b/arch/arm/mach-omap2/board-rx51-peripherals.c
index 0a8ac844b75b..3d5040f82e90 100644
--- a/arch/arm/mach-omap2/board-rx51-peripherals.c
+++ b/arch/arm/mach-omap2/board-rx51-peripherals.c
@@ -484,7 +484,7 @@ static struct omap_mux_partition *partition;
  * Current flows to eMMC when eMMC is off and the data lines are pulled up,
  * so pull them down. N.B. we pull 8 lines because we are using 8 lines.
  */
-static void rx51_mmc2_remux(struct device *dev, int slot, int power_on)
+static void rx51_mmc2_remux(struct device *dev, int power_on)
 {
 	if (power_on)
 		omap_mux_write_array(partition, rx51_mmc2_on_mux);
diff --git a/arch/arm/mach-omap2/hsmmc.c b/arch/arm/mach-omap2/hsmmc.c
index 4e2896ad0119..dc6e79c4484a 100644
--- a/arch/arm/mach-omap2/hsmmc.c
+++ b/arch/arm/mach-omap2/hsmmc.c
@@ -33,14 +33,14 @@ static u16 control_devconf1_offset;
 
 #define HSMMC_NAME_LEN	9
 
-static void omap_hsmmc1_before_set_reg(struct device *dev, int slot,
-				  int power_on, int vdd)
+static void omap_hsmmc1_before_set_reg(struct device *dev,
+				       int power_on, int vdd)
 {
 	u32 reg, prog_io;
 	struct omap_hsmmc_platform_data *mmc = dev->platform_data;
 
 	if (mmc->remux)
-		mmc->remux(dev, slot, power_on);
+		mmc->remux(dev, power_on);
 
 	/*
 	 * Assume we power both OMAP VMMC1 (for CMD, CLK, DAT0..3) and the
@@ -86,8 +86,7 @@ static void omap_hsmmc1_before_set_reg(struct device *dev, int slot,
 	}
 }
 
-static void omap_hsmmc1_after_set_reg(struct device *dev, int slot,
-				 int power_on, int vdd)
+static void omap_hsmmc1_after_set_reg(struct device *dev, int power_on, int vdd)
 {
 	u32 reg;
 
@@ -122,20 +121,18 @@ static void hsmmc2_select_input_clk_src(struct omap_hsmmc_platform_data *mmc)
 	omap_ctrl_writel(reg, control_devconf1_offset);
 }
 
-static void hsmmc2_before_set_reg(struct device *dev, int slot,
-				   int power_on, int vdd)
+static void hsmmc2_before_set_reg(struct device *dev, int power_on, int vdd)
 {
 	struct omap_hsmmc_platform_data *mmc = dev->platform_data;
 
 	if (mmc->remux)
-		mmc->remux(dev, slot, power_on);
+		mmc->remux(dev, power_on);
 
 	if (power_on)
 		hsmmc2_select_input_clk_src(mmc);
 }
 
-static int am35x_hsmmc2_set_power(struct device *dev, int slot,
-				  int power_on, int vdd)
+static int am35x_hsmmc2_set_power(struct device *dev, int power_on, int vdd)
 {
 	struct omap_hsmmc_platform_data *mmc = dev->platform_data;
 
@@ -145,8 +142,7 @@ static int am35x_hsmmc2_set_power(struct device *dev, int slot,
 	return 0;
 }
 
-static int nop_mmc_set_power(struct device *dev, int slot, int power_on,
-							int vdd)
+static int nop_mmc_set_power(struct device *dev, int power_on, int vdd)
 {
 	return 0;
 }
@@ -250,7 +246,6 @@ static int __init omap_hsmmc_pdata_init(struct omap2_hsmmc_info *c,
 		snprintf(hc_name, (HSMMC_NAME_LEN + 1), "mmc%islot%i",
 								c->mmc, 1);
 	mmc->name = hc_name;
-	mmc->nr_slots = 1;
 	mmc->caps = c->caps;
 	mmc->internal_clock = !c->ext_clock;
 	mmc->reg_offset = 0;
diff --git a/arch/arm/mach-omap2/hsmmc.h b/arch/arm/mach-omap2/hsmmc.h
index 30c78c17eb7e..148cd9b15499 100644
--- a/arch/arm/mach-omap2/hsmmc.h
+++ b/arch/arm/mach-omap2/hsmmc.h
@@ -23,7 +23,7 @@ struct omap2_hsmmc_info {
 	struct platform_device *pdev;	/* mmc controller instance */
 	int	ocr_mask;	/* temporary HACK */
 	/* Remux (pad configuration) when powering on/off */
-	void (*remux)(struct device *dev, int slot, int power_on);
+	void (*remux)(struct device *dev, int power_on);
 	/* init some special card */
 	void (*init_card)(struct mmc_card *card);
 };
diff --git a/drivers/mmc/host/omap_hsmmc.c b/drivers/mmc/host/omap_hsmmc.c
index f4f1bcd632f3..82b40b85293f 100644
--- a/drivers/mmc/host/omap_hsmmc.c
+++ b/drivers/mmc/host/omap_hsmmc.c
@@ -207,7 +207,6 @@ struct omap_hsmmc_host {
 	int			use_dma, dma_ch;
 	struct dma_chan		*tx_chan;
 	struct dma_chan		*rx_chan;
-	int			slot_id;
 	int			response_busy;
 	int			context_loss;
 	int			protect_card;
@@ -223,8 +222,8 @@ struct omap_hsmmc_host {
 	struct	omap_hsmmc_platform_data	*pdata;
 
 	/* To handle board related suspend/resume functionality for MMC */
-	int (*suspend)(struct device *dev, int slot);
-	int (*resume)(struct device *dev, int slot);
+	int (*suspend)(struct device *dev);
+	int (*resume)(struct device *dev);
 
 	/* return MMC cover switch state, can be NULL if not supported.
 	 *
@@ -232,13 +231,13 @@ struct omap_hsmmc_host {
 	 *   0 - closed
 	 *   1 - open
 	 */
-	int (*get_cover_state)(struct device *dev, int slot);
+	int (*get_cover_state)(struct device *dev);
 
 	/* Card detection IRQs */
 	int card_detect_irq;
 
-	int (*card_detect)(struct device *dev, int slot);
-	int (*get_ro)(struct device *dev, int slot);
+	int (*card_detect)(struct device *dev);
+	int (*get_ro)(struct device *dev);
 
 };
 
@@ -249,7 +248,7 @@ struct omap_mmc_of_data {
 
 static void omap_hsmmc_start_dma_transfer(struct omap_hsmmc_host *host);
 
-static int omap_hsmmc_card_detect(struct device *dev, int slot)
+static int omap_hsmmc_card_detect(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 	struct omap_hsmmc_platform_data *mmc = host->pdata;
@@ -258,7 +257,7 @@ static int omap_hsmmc_card_detect(struct device *dev, int slot)
 	return !gpio_get_value_cansleep(mmc->switch_pin);
 }
 
-static int omap_hsmmc_get_wp(struct device *dev, int slot)
+static int omap_hsmmc_get_wp(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 	struct omap_hsmmc_platform_data *mmc = host->pdata;
@@ -267,7 +266,7 @@ static int omap_hsmmc_get_wp(struct device *dev, int slot)
 	return gpio_get_value_cansleep(mmc->gpio_wp);
 }
 
-static int omap_hsmmc_get_cover_state(struct device *dev, int slot)
+static int omap_hsmmc_get_cover_state(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 	struct omap_hsmmc_platform_data *mmc = host->pdata;
@@ -278,7 +277,7 @@ static int omap_hsmmc_get_cover_state(struct device *dev, int slot)
 
 #ifdef CONFIG_PM
 
-static int omap_hsmmc_suspend_cdirq(struct device *dev, int slot)
+static int omap_hsmmc_suspend_cdirq(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 
@@ -286,7 +285,7 @@ static int omap_hsmmc_suspend_cdirq(struct device *dev, int slot)
 	return 0;
 }
 
-static int omap_hsmmc_resume_cdirq(struct device *dev, int slot)
+static int omap_hsmmc_resume_cdirq(struct device *dev)
 {
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 
@@ -303,8 +302,7 @@ static int omap_hsmmc_resume_cdirq(struct device *dev, int slot)
 
 #ifdef CONFIG_REGULATOR
 
-static int omap_hsmmc_set_power(struct device *dev, int slot, int power_on,
-				   int vdd)
+static int omap_hsmmc_set_power(struct device *dev, int power_on, int vdd)
 {
 	struct omap_hsmmc_host *host =
 		platform_get_drvdata(to_platform_device(dev));
@@ -318,7 +316,7 @@ static int omap_hsmmc_set_power(struct device *dev, int slot, int power_on,
 		return 0;
 
 	if (mmc_pdata(host)->before_set_reg)
-		mmc_pdata(host)->before_set_reg(dev, slot, power_on, vdd);
+		mmc_pdata(host)->before_set_reg(dev, power_on, vdd);
 
 	if (host->pbias) {
 		if (host->pbias_enabled == 1) {
@@ -381,7 +379,7 @@ static int omap_hsmmc_set_power(struct device *dev, int slot, int power_on,
 	}
 
 	if (mmc_pdata(host)->after_set_reg)
-		mmc_pdata(host)->after_set_reg(dev, slot, power_on, vdd);
+		mmc_pdata(host)->after_set_reg(dev, power_on, vdd);
 
 error_set_power:
 	return ret;
@@ -431,8 +429,8 @@ static int omap_hsmmc_reg_get(struct omap_hsmmc_host *host)
 	    (host->vcc_aux && regulator_is_enabled(host->vcc_aux))) {
 		int vdd = ffs(mmc_pdata(host)->ocr_mask) - 1;
 
-		mmc_pdata(host)->set_power(host->dev, host->slot_id, 1, vdd);
-		mmc_pdata(host)->set_power(host->dev, host->slot_id, 0, 0);
+		mmc_pdata(host)->set_power(host->dev, 1, vdd);
+		mmc_pdata(host)->set_power(host->dev, 0, 0);
 	}
 
 	return 0;
@@ -813,7 +811,7 @@ int omap_hsmmc_cover_is_closed(struct omap_hsmmc_host *host)
 	int r = 1;
 
 	if (host->get_cover_state)
-		r = host->get_cover_state(host->dev, host->slot_id);
+		r = host->get_cover_state(host->dev);
 	return r;
 }
 
@@ -1231,12 +1229,11 @@ static int omap_hsmmc_switch_opcond(struct omap_hsmmc_host *host, int vdd)
 		clk_disable_unprepare(host->dbclk);
 
 	/* Turn the power off */
-	ret = mmc_pdata(host)->set_power(host->dev, host->slot_id, 0, 0);
+	ret = mmc_pdata(host)->set_power(host->dev, 0, 0);
 
 	/* Turn the power ON with given VDD 1.8 or 3.0v */
 	if (!ret)
-		ret = mmc_pdata(host)->set_power(host->dev, host->slot_id, 1,
-						 vdd);
+		ret = mmc_pdata(host)->set_power(host->dev, 1, vdd);
 	pm_runtime_get_sync(host->dev);
 	if (host->dbclk)
 		clk_prepare_enable(host->dbclk);
@@ -1284,7 +1281,7 @@ static void omap_hsmmc_protect_card(struct omap_hsmmc_host *host)
 		return;
 
 	host->reqs_blocked = 0;
-	if (host->get_cover_state(host->dev, host->slot_id)) {
+	if (host->get_cover_state(host->dev)) {
 		if (host->protect_card) {
 			dev_info(host->dev, "%s: cover is closed, "
 					 "card is now accessible\n",
@@ -1312,7 +1309,7 @@ static irqreturn_t omap_hsmmc_detect(int irq, void *dev_id)
 	sysfs_notify(&host->mmc->class_dev.kobj, NULL, "cover_switch");
 
 	if (host->card_detect)
-		carddetect = host->card_detect(host->dev, host->slot_id);
+		carddetect = host->card_detect(host->dev);
 	else {
 		omap_hsmmc_protect_card(host);
 		carddetect = -ENOSYS;
@@ -1638,12 +1635,10 @@ static void omap_hsmmc_set_ios(struct mmc_host *mmc, struct mmc_ios *ios)
 	if (ios->power_mode != host->power_mode) {
 		switch (ios->power_mode) {
 		case MMC_POWER_OFF:
-			mmc_pdata(host)->set_power(host->dev, host->slot_id,
-						   0, 0);
+			mmc_pdata(host)->set_power(host->dev, 0, 0);
 			break;
 		case MMC_POWER_UP:
-			mmc_pdata(host)->set_power(host->dev, host->slot_id,
-						   1, ios->vdd);
+			mmc_pdata(host)->set_power(host->dev, 1, ios->vdd);
 			break;
 		case MMC_POWER_ON:
 			do_send_init_stream = 1;
@@ -1690,7 +1685,7 @@ static int omap_hsmmc_get_cd(struct mmc_host *mmc)
 
 	if (!host->card_detect)
 		return -ENOSYS;
-	return host->card_detect(host->dev, host->slot_id);
+	return host->card_detect(host->dev);
 }
 
 static int omap_hsmmc_get_ro(struct mmc_host *mmc)
@@ -1699,7 +1694,7 @@ static int omap_hsmmc_get_ro(struct mmc_host *mmc)
 
 	if (!host->get_ro)
 		return -ENOSYS;
-	return host->get_ro(host->dev, 0);
+	return host->get_ro(host->dev);
 }
 
 static void omap_hsmmc_init_card(struct mmc_host *mmc, struct mmc_card *card)
@@ -1996,8 +1991,6 @@ static struct omap_hsmmc_platform_data *of_get_hsmmc_pdata(struct device *dev)
 	if (of_find_property(np, "ti,dual-volt", NULL))
 		pdata->controller_flags |= OMAP_HSMMC_SUPPORTS_DUAL_VOLT;
 
-	/* This driver only supports 1 slot */
-	pdata->nr_slots = 1;
 	pdata->switch_pin = cd_gpio;
 	pdata->gpio_wp = wp_gpio;
 
@@ -2068,11 +2061,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 		return -ENXIO;
 	}
 
-	if (pdata->nr_slots == 0) {
-		dev_err(&pdev->dev, "No Slots\n");
-		return -ENXIO;
-	}
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	irq = platform_get_irq(pdev, 0);
 	if (res == NULL || irq < 0)
@@ -2095,7 +2083,6 @@ static int omap_hsmmc_probe(struct platform_device *pdev)
 	host->use_dma	= 1;
 	host->dma_ch	= -1;
 	host->irq	= irq;
-	host->slot_id	= 0;
 	host->mapbase	= res->start + pdata->reg_offset;
 	host->base	= base + pdata->reg_offset;
 	host->power_mode = MMC_POWER_OFF;
@@ -2338,7 +2325,7 @@ static int omap_hsmmc_prepare(struct device *dev)
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 
 	if (host->suspend)
-		return host->suspend(dev, host->slot_id);
+		return host->suspend(dev);
 
 	return 0;
 }
@@ -2348,7 +2335,7 @@ static void omap_hsmmc_complete(struct device *dev)
 	struct omap_hsmmc_host *host = dev_get_drvdata(dev);
 
 	if (host->resume)
-		host->resume(dev, host->slot_id);
+		host->resume(dev);
 
 }
 
diff --git a/include/linux/platform_data/hsmmc-omap.h b/include/linux/platform_data/hsmmc-omap.h
index 68ffec14b56a..67bbcf0785f6 100644
--- a/include/linux/platform_data/hsmmc-omap.h
+++ b/include/linux/platform_data/hsmmc-omap.h
@@ -38,9 +38,6 @@ struct omap_hsmmc_platform_data {
 	/* back-link to device */
 	struct device *dev;
 
-	/* number of slots per controller */
-	unsigned nr_slots:2;
-
 	/* set if your board has components or wiring that limits the
 	 * maximum frequency on the MMC bus */
 	unsigned int max_freq;
@@ -79,15 +76,12 @@ struct omap_hsmmc_platform_data {
 	int switch_pin;			/* gpio (card detect) */
 	int gpio_wp;			/* gpio (write protect) */
 
-	int (*set_power)(struct device *dev, int slot,
-			 int power_on, int vdd);
-	void (*remux)(struct device *dev, int slot, int power_on);
+	int (*set_power)(struct device *dev, int power_on, int vdd);
+	void (*remux)(struct device *dev, int power_on);
 	/* Call back before enabling / disabling regulators */
-	void (*before_set_reg)(struct device *dev, int slot,
-			       int power_on, int vdd);
+	void (*before_set_reg)(struct device *dev, int power_on, int vdd);
 	/* Call back after enabling / disabling regulators */
-	void (*after_set_reg)(struct device *dev, int slot,
-			      int power_on, int vdd);
+	void (*after_set_reg)(struct device *dev, int power_on, int vdd);
 	/* if we have special card, init it using this callback */
 	void (*init_card)(struct mmc_card *card);
 
-- 
cgit v1.2.3


From 767562348b72cb2612f5991ad35a5c0448254939 Mon Sep 17 00:00:00 2001
From: Addy Ke <addy.ke@rock-chips.com>
Date: Tue, 4 Nov 2014 22:03:09 +0800
Subject: mmc: dw_mmc: add support for the other bit of sdio interrupt

The bit of sdio interrupt is 16 in designware implementation,
but it is 24 on Rockchip SoCs.This patch add sdio_id0 for the
number of slot0 in the SDIO interrupt registers.

Signed-off-by: Addy Ke <addy.ke@rock-chips.com>
Reviewed-by: Doug Anderson <dianders@chromium.org>
Acked-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/host/dw_mmc-rockchip.c | 10 ++++++++++
 drivers/mmc/host/dw_mmc.c          | 12 +++++++-----
 drivers/mmc/host/dw_mmc.h          |  2 ++
 include/linux/mmc/dw_mmc.h         |  3 +++
 4 files changed, 22 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/host/dw_mmc-rockchip.c b/drivers/mmc/host/dw_mmc-rockchip.c
index bbb4ec386e56..5650ac488cf3 100644
--- a/drivers/mmc/host/dw_mmc-rockchip.c
+++ b/drivers/mmc/host/dw_mmc-rockchip.c
@@ -68,14 +68,24 @@ static void dw_mci_rk3288_set_ios(struct dw_mci *host, struct mmc_ios *ios)
 	}
 }
 
+static int dw_mci_rockchip_init(struct dw_mci *host)
+{
+	/* It is slot 8 on Rockchip SoCs */
+	host->sdio_id0 = 8;
+
+	return 0;
+}
+
 static const struct dw_mci_drv_data rk2928_drv_data = {
 	.prepare_command        = dw_mci_rockchip_prepare_command,
+	.init			= dw_mci_rockchip_init,
 };
 
 static const struct dw_mci_drv_data rk3288_drv_data = {
 	.prepare_command        = dw_mci_rockchip_prepare_command,
 	.set_ios		= dw_mci_rk3288_set_ios,
 	.setup_clock    = dw_mci_rk3288_setup_clock,
+	.init			= dw_mci_rockchip_init,
 };
 
 static const struct of_device_id dw_mci_rockchip_match[] = {
diff --git a/drivers/mmc/host/dw_mmc.c b/drivers/mmc/host/dw_mmc.c
index 5a37c33879a1..b4c30448566e 100644
--- a/drivers/mmc/host/dw_mmc.c
+++ b/drivers/mmc/host/dw_mmc.c
@@ -919,7 +919,7 @@ static void dw_mci_setup_bus(struct dw_mci_slot *slot, bool force_clkinit)
 
 		/* enable clock; only low power if no SDIO */
 		clk_en_a = SDMMC_CLKEN_ENABLE << slot->id;
-		if (!(mci_readl(host, INTMASK) & SDMMC_INT_SDIO(slot->id)))
+		if (!(mci_readl(host, INTMASK) & SDMMC_INT_SDIO(slot->sdio_id)))
 			clk_en_a |= SDMMC_CLKEN_LOW_PWR << slot->id;
 		mci_writel(host, CLKENA, clk_en_a);
 
@@ -1280,10 +1280,10 @@ static void dw_mci_enable_sdio_irq(struct mmc_host *mmc, int enb)
 		dw_mci_disable_low_power(slot);
 
 		mci_writel(host, INTMASK,
-			   (int_mask | SDMMC_INT_SDIO(slot->id)));
+			   (int_mask | SDMMC_INT_SDIO(slot->sdio_id)));
 	} else {
 		mci_writel(host, INTMASK,
-			   (int_mask & ~SDMMC_INT_SDIO(slot->id)));
+			   (int_mask & ~SDMMC_INT_SDIO(slot->sdio_id)));
 	}
 }
 
@@ -2152,8 +2152,9 @@ static irqreturn_t dw_mci_interrupt(int irq, void *dev_id)
 		/* Handle SDIO Interrupts */
 		for (i = 0; i < host->num_slots; i++) {
 			struct dw_mci_slot *slot = host->slot[i];
-			if (pending & SDMMC_INT_SDIO(i)) {
-				mci_writel(host, RINTSTS, SDMMC_INT_SDIO(i));
+			if (pending & SDMMC_INT_SDIO(slot->sdio_id)) {
+				mci_writel(host, RINTSTS,
+					   SDMMC_INT_SDIO(slot->sdio_id));
 				mmc_signal_sdio_irq(slot->mmc);
 			}
 		}
@@ -2252,6 +2253,7 @@ static int dw_mci_init_slot(struct dw_mci *host, unsigned int id)
 
 	slot = mmc_priv(mmc);
 	slot->id = id;
+	slot->sdio_id = host->sdio_id0 + id;
 	slot->mmc = mmc;
 	slot->host = host;
 	host->slot[id] = slot;
diff --git a/drivers/mmc/host/dw_mmc.h b/drivers/mmc/host/dw_mmc.h
index 58d8a54d644b..0d0f7a271d63 100644
--- a/drivers/mmc/host/dw_mmc.h
+++ b/drivers/mmc/host/dw_mmc.h
@@ -225,6 +225,7 @@ extern int dw_mci_resume(struct dw_mci *host);
  *	with CONFIG_MMC_CLKGATE.
  * @flags: Random state bits associated with the slot.
  * @id: Number of this slot.
+ * @sdio_id: Number of this slot in the SDIO interrupt registers.
  */
 struct dw_mci_slot {
 	struct mmc_host		*mmc;
@@ -244,6 +245,7 @@ struct dw_mci_slot {
 #define DW_MMC_CARD_PRESENT	0
 #define DW_MMC_CARD_NEED_INIT	1
 	int			id;
+	int			sdio_id;
 };
 
 struct dw_mci_tuning_data {
diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h
index 0a551152d600..42b724e8d503 100644
--- a/include/linux/mmc/dw_mmc.h
+++ b/include/linux/mmc/dw_mmc.h
@@ -97,6 +97,7 @@ struct mmc_data;
  * @quirks: Set of quirks that apply to specific versions of the IP.
  * @irq_flags: The flags to be passed to request_irq.
  * @irq: The irq value to be passed to request_irq.
+ * @sdio_id0: Number of slot0 in the SDIO interrupt registers.
  *
  * Locking
  * =======
@@ -193,6 +194,8 @@ struct dw_mci {
 	bool			vqmmc_enabled;
 	unsigned long		irq_flags; /* IRQ flags */
 	int			irq;
+
+	int			sdio_id0;
 };
 
 /* DMA ops for Internal/External DMAC interface */
-- 
cgit v1.2.3


From 996903de92f0c7a32d8c83f37d7ebcea0def8660 Mon Sep 17 00:00:00 2001
From: Minda Chen <Minda.Chen@csr.com>
Date: Wed, 26 Nov 2014 13:05:33 +0800
Subject: mmc: core: add core-level function for sending tuning commands

According to the SD card spec, Add a manual tuning command function
for SDR104/HS200.
Sending command 19 or command 21 to read data and compare with the
tunning block pattern.

This patch will help to decrease some platform private codes in SDHCI
platform_execute_tuning() callbacks.

Signed-off-by: Minda Chen <Minda.Chen@csr.com>
Signed-off-by: Barry Song <Baohua.Song@csr.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
---
 drivers/mmc/core/mmc_ops.c | 70 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mmc/core.h   |  1 +
 2 files changed, 71 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 23aa3a380f27..12b2a32df346 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -547,6 +547,76 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 }
 EXPORT_SYMBOL_GPL(mmc_switch);
 
+int mmc_send_tuning(struct mmc_card *card)
+{
+	struct mmc_request mrq = {NULL};
+	struct mmc_command cmd = {0};
+	struct mmc_data data = {0};
+	struct scatterlist sg;
+	struct mmc_host *mmc = card->host;
+	struct mmc_ios *ios = &mmc->ios;
+	const u8 *tuning_block_pattern;
+	int size, err = 0;
+	u8 *data_buf;
+	u32 opcode;
+
+	if (ios->bus_width == MMC_BUS_WIDTH_8) {
+		tuning_block_pattern = tuning_blk_pattern_8bit;
+		size = sizeof(tuning_blk_pattern_8bit);
+		opcode = MMC_SEND_TUNING_BLOCK_HS200;
+	} else if (ios->bus_width == MMC_BUS_WIDTH_4) {
+		tuning_block_pattern = tuning_blk_pattern_4bit;
+		size = sizeof(tuning_blk_pattern_4bit);
+		opcode = MMC_SEND_TUNING_BLOCK;
+	} else
+		return -EINVAL;
+
+	data_buf = kzalloc(size, GFP_KERNEL);
+	if (!data_buf)
+		return -ENOMEM;
+
+	mrq.cmd = &cmd;
+	mrq.data = &data;
+
+	cmd.opcode = opcode;
+	cmd.flags = MMC_RSP_R1 | MMC_CMD_ADTC;
+
+	data.blksz = size;
+	data.blocks = 1;
+	data.flags = MMC_DATA_READ;
+
+	/*
+	 * According to the tuning specs, Tuning process
+	 * is normally shorter 40 executions of CMD19,
+	 * and timeout value should be shorter than 150 ms
+	 */
+	data.timeout_ns = 150 * NSEC_PER_MSEC;
+
+	data.sg = &sg;
+	data.sg_len = 1;
+	sg_init_one(&sg, data_buf, size);
+
+	mmc_wait_for_req(mmc, &mrq);
+
+	if (cmd.error) {
+		err = cmd.error;
+		goto out;
+	}
+
+	if (data.error) {
+		err = data.error;
+		goto out;
+	}
+
+	if (memcmp(data_buf, tuning_block_pattern, size))
+		err = -EIO;
+
+out:
+	kfree(data_buf);
+	return err;
+}
+EXPORT_SYMBOL_GPL(mmc_send_tuning);
+
 static int
 mmc_send_bus_test(struct mmc_card *card, struct mmc_host *host, u8 opcode,
 		  u8 len)
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index b11e43c10631..c4bdaa128693 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -154,6 +154,7 @@ extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
 extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool,
 			bool, bool);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
+extern int mmc_send_tuning(struct mmc_card *card);
 extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 
 #define MMC_ERASE_ARG		0x00000000
-- 
cgit v1.2.3


From d3fccc7ef831d1d829b4da5eaa081db55b1e38f3 Mon Sep 17 00:00:00 2001
From: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Date: Mon, 10 Nov 2014 09:33:56 +0100
Subject: kvm: fix kvm_is_mmio_pfn() and rename to kvm_is_reserved_pfn()

This reverts commit 85c8555ff0 ("KVM: check for !is_zero_pfn() in
kvm_is_mmio_pfn()") and renames the function to kvm_is_reserved_pfn.

The problem being addressed by the patch above was that some ARM code
based the memory mapping attributes of a pfn on the return value of
kvm_is_mmio_pfn(), whose name indeed suggests that such pfns should
be mapped as device memory.

However, kvm_is_mmio_pfn() doesn't do quite what it says on the tin,
and the existing non-ARM users were already using it in a way which
suggests that its name should probably have been 'kvm_is_reserved_pfn'
from the beginning, e.g., whether or not to call get_page/put_page on
it etc. This means that returning false for the zero page is a mistake
and the patch above should be reverted.

Signed-off-by: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c |  2 +-
 arch/x86/kvm/mmu.c       |  6 +++---
 include/linux/kvm_host.h |  2 +-
 virt/kvm/kvm_main.c      | 16 ++++++++--------
 4 files changed, 13 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index ec6b9acb6bea..dbe46f43884d 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -1563,7 +1563,7 @@ int kvm_arch_prepare_memory_region(struct kvm *kvm,
 
 	for (i = 0; i < npages; i++) {
 		pfn = gfn_to_pfn(kvm, base_gfn + i);
-		if (!kvm_is_mmio_pfn(pfn)) {
+		if (!kvm_is_reserved_pfn(pfn)) {
 			kvm_set_pmt_entry(kvm, base_gfn + i,
 					pfn << PAGE_SHIFT,
 				_PAGE_AR_RWX | _PAGE_MA_WB);
diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index ac1c4de3a484..978f402006ee 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -630,7 +630,7 @@ static int mmu_spte_clear_track_bits(u64 *sptep)
 	 * kvm mmu, before reclaiming the page, we should
 	 * unmap it from mmu first.
 	 */
-	WARN_ON(!kvm_is_mmio_pfn(pfn) && !page_count(pfn_to_page(pfn)));
+	WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn)));
 
 	if (!shadow_accessed_mask || old_spte & shadow_accessed_mask)
 		kvm_set_pfn_accessed(pfn);
@@ -2461,7 +2461,7 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 		spte |= PT_PAGE_SIZE_MASK;
 	if (tdp_enabled)
 		spte |= kvm_x86_ops->get_mt_mask(vcpu, gfn,
-			kvm_is_mmio_pfn(pfn));
+			kvm_is_reserved_pfn(pfn));
 
 	if (host_writable)
 		spte |= SPTE_HOST_WRITEABLE;
@@ -2737,7 +2737,7 @@ static void transparent_hugepage_adjust(struct kvm_vcpu *vcpu,
 	 * PT_PAGE_TABLE_LEVEL and there would be no adjustment done
 	 * here.
 	 */
-	if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn) &&
+	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn) &&
 	    level == PT_PAGE_TABLE_LEVEL &&
 	    PageTransCompound(pfn_to_page(pfn)) &&
 	    !has_wrprotected_page(vcpu->kvm, gfn, PT_DIRECTORY_LEVEL)) {
diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index ea53b04993f2..a6059bdf7b03 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -703,7 +703,7 @@ void kvm_arch_sync_events(struct kvm *kvm);
 int kvm_cpu_has_pending_timer(struct kvm_vcpu *vcpu);
 void kvm_vcpu_kick(struct kvm_vcpu *vcpu);
 
-bool kvm_is_mmio_pfn(pfn_t pfn);
+bool kvm_is_reserved_pfn(pfn_t pfn);
 
 struct kvm_irq_ack_notifier {
 	struct hlist_node link;
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 25ffac9e947d..3cee7b167052 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -107,10 +107,10 @@ EXPORT_SYMBOL_GPL(kvm_rebooting);
 
 static bool largepages_enabled = true;
 
-bool kvm_is_mmio_pfn(pfn_t pfn)
+bool kvm_is_reserved_pfn(pfn_t pfn)
 {
 	if (pfn_valid(pfn))
-		return !is_zero_pfn(pfn) && PageReserved(pfn_to_page(pfn));
+		return PageReserved(pfn_to_page(pfn));
 
 	return true;
 }
@@ -1321,7 +1321,7 @@ static pfn_t hva_to_pfn(unsigned long addr, bool atomic, bool *async,
 	else if ((vma->vm_flags & VM_PFNMAP)) {
 		pfn = ((addr - vma->vm_start) >> PAGE_SHIFT) +
 			vma->vm_pgoff;
-		BUG_ON(!kvm_is_mmio_pfn(pfn));
+		BUG_ON(!kvm_is_reserved_pfn(pfn));
 	} else {
 		if (async && vma_is_valid(vma, write_fault))
 			*async = true;
@@ -1427,7 +1427,7 @@ static struct page *kvm_pfn_to_page(pfn_t pfn)
 	if (is_error_noslot_pfn(pfn))
 		return KVM_ERR_PTR_BAD_PAGE;
 
-	if (kvm_is_mmio_pfn(pfn)) {
+	if (kvm_is_reserved_pfn(pfn)) {
 		WARN_ON(1);
 		return KVM_ERR_PTR_BAD_PAGE;
 	}
@@ -1456,7 +1456,7 @@ EXPORT_SYMBOL_GPL(kvm_release_page_clean);
 
 void kvm_release_pfn_clean(pfn_t pfn)
 {
-	if (!is_error_noslot_pfn(pfn) && !kvm_is_mmio_pfn(pfn))
+	if (!is_error_noslot_pfn(pfn) && !kvm_is_reserved_pfn(pfn))
 		put_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_release_pfn_clean);
@@ -1477,7 +1477,7 @@ static void kvm_release_pfn_dirty(pfn_t pfn)
 
 void kvm_set_pfn_dirty(pfn_t pfn)
 {
-	if (!kvm_is_mmio_pfn(pfn)) {
+	if (!kvm_is_reserved_pfn(pfn)) {
 		struct page *page = pfn_to_page(pfn);
 		if (!PageReserved(page))
 			SetPageDirty(page);
@@ -1487,14 +1487,14 @@ EXPORT_SYMBOL_GPL(kvm_set_pfn_dirty);
 
 void kvm_set_pfn_accessed(pfn_t pfn)
 {
-	if (!kvm_is_mmio_pfn(pfn))
+	if (!kvm_is_reserved_pfn(pfn))
 		mark_page_accessed(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_set_pfn_accessed);
 
 void kvm_get_pfn(pfn_t pfn)
 {
-	if (!kvm_is_mmio_pfn(pfn))
+	if (!kvm_is_reserved_pfn(pfn))
 		get_page(pfn_to_page(pfn));
 }
 EXPORT_SYMBOL_GPL(kvm_get_pfn);
-- 
cgit v1.2.3


From f5c1434c217fd72ac0d24d3142d09e49a3d4e72e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 24 Nov 2014 14:35:10 +0000
Subject: irqchip: GICv3: rework redistributor structure

The basic GICv3 driver has almost no use for the redistributor
(other than the basic per-CPU interrupts), but the ITS needs
a lot more from them.

As such, rework the set of data structures. The behaviour of the
GICv3 driver is otherwise unaffected.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1416839720-18400-4-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3.c       | 73 +++++++++++++++++++++++---------------
 include/linux/irqchip/arm-gic-v3.h | 15 ++++++++
 2 files changed, 59 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 4cb355aff3c6..43e57da0d80e 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -34,20 +34,25 @@
 #include "irq-gic-common.h"
 #include "irqchip.h"
 
+struct redist_region {
+	void __iomem		*redist_base;
+	phys_addr_t		phys_base;
+};
+
 struct gic_chip_data {
 	void __iomem		*dist_base;
-	void __iomem		**redist_base;
-	void __iomem * __percpu	*rdist;
+	struct redist_region	*redist_regions;
+	struct rdists		rdists;
 	struct irq_domain	*domain;
 	u64			redist_stride;
-	u32			redist_regions;
+	u32			nr_redist_regions;
 	unsigned int		irq_nr;
 };
 
 static struct gic_chip_data gic_data __read_mostly;
 
-#define gic_data_rdist()		(this_cpu_ptr(gic_data.rdist))
-#define gic_data_rdist_rd_base()	(*gic_data_rdist())
+#define gic_data_rdist()		(this_cpu_ptr(gic_data.rdists.rdist))
+#define gic_data_rdist_rd_base()	(gic_data_rdist()->rd_base)
 #define gic_data_rdist_sgi_base()	(gic_data_rdist_rd_base() + SZ_64K)
 
 /* Our default, arbitrary priority value. Linux only uses one anyway. */
@@ -333,8 +338,8 @@ static int gic_populate_rdist(void)
 	       MPIDR_AFFINITY_LEVEL(mpidr, 1) << 8 |
 	       MPIDR_AFFINITY_LEVEL(mpidr, 0));
 
-	for (i = 0; i < gic_data.redist_regions; i++) {
-		void __iomem *ptr = gic_data.redist_base[i];
+	for (i = 0; i < gic_data.nr_redist_regions; i++) {
+		void __iomem *ptr = gic_data.redist_regions[i].redist_base;
 		u32 reg;
 
 		reg = readl_relaxed(ptr + GICR_PIDR2) & GIC_PIDR2_ARCH_MASK;
@@ -347,10 +352,13 @@ static int gic_populate_rdist(void)
 		do {
 			typer = readq_relaxed(ptr + GICR_TYPER);
 			if ((typer >> 32) == aff) {
+				u64 offset = ptr - gic_data.redist_regions[i].redist_base;
 				gic_data_rdist_rd_base() = ptr;
-				pr_info("CPU%d: found redistributor %llx @%p\n",
+				gic_data_rdist()->phys_base = gic_data.redist_regions[i].phys_base + offset;
+				pr_info("CPU%d: found redistributor %llx region %d:%pa\n",
 					smp_processor_id(),
-					(unsigned long long)mpidr, ptr);
+					(unsigned long long)mpidr,
+					i, &gic_data_rdist()->phys_base);
 				return 0;
 			}
 
@@ -673,9 +681,10 @@ static const struct irq_domain_ops gic_irq_domain_ops = {
 static int __init gic_of_init(struct device_node *node, struct device_node *parent)
 {
 	void __iomem *dist_base;
-	void __iomem **redist_base;
+	struct redist_region *rdist_regs;
 	u64 redist_stride;
-	u32 redist_regions;
+	u32 nr_redist_regions;
+	u32 typer;
 	u32 reg;
 	int gic_irqs;
 	int err;
@@ -696,48 +705,54 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 		goto out_unmap_dist;
 	}
 
-	if (of_property_read_u32(node, "#redistributor-regions", &redist_regions))
-		redist_regions = 1;
+	if (of_property_read_u32(node, "#redistributor-regions", &nr_redist_regions))
+		nr_redist_regions = 1;
 
-	redist_base = kzalloc(sizeof(*redist_base) * redist_regions, GFP_KERNEL);
-	if (!redist_base) {
+	rdist_regs = kzalloc(sizeof(*rdist_regs) * nr_redist_regions, GFP_KERNEL);
+	if (!rdist_regs) {
 		err = -ENOMEM;
 		goto out_unmap_dist;
 	}
 
-	for (i = 0; i < redist_regions; i++) {
-		redist_base[i] = of_iomap(node, 1 + i);
-		if (!redist_base[i]) {
+	for (i = 0; i < nr_redist_regions; i++) {
+		struct resource res;
+		int ret;
+
+		ret = of_address_to_resource(node, 1 + i, &res);
+		rdist_regs[i].redist_base = of_iomap(node, 1 + i);
+		if (ret || !rdist_regs[i].redist_base) {
 			pr_err("%s: couldn't map region %d\n",
 			       node->full_name, i);
 			err = -ENODEV;
 			goto out_unmap_rdist;
 		}
+		rdist_regs[i].phys_base = res.start;
 	}
 
 	if (of_property_read_u64(node, "redistributor-stride", &redist_stride))
 		redist_stride = 0;
 
 	gic_data.dist_base = dist_base;
-	gic_data.redist_base = redist_base;
-	gic_data.redist_regions = redist_regions;
+	gic_data.redist_regions = rdist_regs;
+	gic_data.nr_redist_regions = nr_redist_regions;
 	gic_data.redist_stride = redist_stride;
 
 	/*
 	 * Find out how many interrupts are supported.
 	 * The GIC only supports up to 1020 interrupt sources (SGI+PPI+SPI)
 	 */
-	gic_irqs = readl_relaxed(gic_data.dist_base + GICD_TYPER) & 0x1f;
-	gic_irqs = (gic_irqs + 1) * 32;
+	typer = readl_relaxed(gic_data.dist_base + GICD_TYPER);
+	gic_data.rdists.id_bits = GICD_TYPER_ID_BITS(typer);
+	gic_irqs = GICD_TYPER_IRQS(typer);
 	if (gic_irqs > 1020)
 		gic_irqs = 1020;
 	gic_data.irq_nr = gic_irqs;
 
 	gic_data.domain = irq_domain_add_tree(node, &gic_irq_domain_ops,
 					      &gic_data);
-	gic_data.rdist = alloc_percpu(typeof(*gic_data.rdist));
+	gic_data.rdists.rdist = alloc_percpu(typeof(*gic_data.rdists.rdist));
 
-	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdist)) {
+	if (WARN_ON(!gic_data.domain) || WARN_ON(!gic_data.rdists.rdist)) {
 		err = -ENOMEM;
 		goto out_free;
 	}
@@ -754,12 +769,12 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 out_free:
 	if (gic_data.domain)
 		irq_domain_remove(gic_data.domain);
-	free_percpu(gic_data.rdist);
+	free_percpu(gic_data.rdists.rdist);
 out_unmap_rdist:
-	for (i = 0; i < redist_regions; i++)
-		if (redist_base[i])
-			iounmap(redist_base[i]);
-	kfree(redist_base);
+	for (i = 0; i < nr_redist_regions; i++)
+		if (rdist_regs[i].redist_base)
+			iounmap(rdist_regs[i].redist_base);
+	kfree(rdist_regs);
 out_unmap_dist:
 	iounmap(dist_base);
 	return err;
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 03a4ea37ba86..040615a48bf5 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -49,6 +49,10 @@
 #define GICD_CTLR_ENABLE_G1A		(1U << 1)
 #define GICD_CTLR_ENABLE_G1		(1U << 0)
 
+#define GICD_TYPER_ID_BITS(typer)	((((typer) >> 19) & 0x1f) + 1)
+#define GICD_TYPER_IRQS(typer)		((((typer) & 0x1f) + 1) * 32)
+#define GICD_TYPER_LPIS			(1U << 17)
+
 #define GICD_IROUTER_SPI_MODE_ONE	(0U << 31)
 #define GICD_IROUTER_SPI_MODE_ANY	(1U << 31)
 
@@ -189,6 +193,17 @@
 
 #include <linux/stringify.h>
 
+struct rdists {
+	struct {
+		void __iomem	*rd_base;
+		struct page	*pend_page;
+		phys_addr_t	phys_base;
+	} __percpu		*rdist;
+	struct page		*prop_page;
+	int			id_bits;
+	u64			flags;
+};
+
 static inline void gic_write_eoir(u64 irq)
 {
 	asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq));
-- 
cgit v1.2.3


From cc2d3216f53c9fff0030eb71cacc4ce5f39d1d7e Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 24 Nov 2014 14:35:11 +0000
Subject: irqchip: GICv3: ITS command queue

The ITS is configured through a number commands that the driver
issues to the HW using a memory-based circular buffer.

This patch implements the subset of commands that are required
for Linux.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1416839720-18400-5-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3-its.c   | 511 +++++++++++++++++++++++++++++++++++++
 include/linux/irqchip/arm-gic-v3.h | 102 ++++++++
 2 files changed, 613 insertions(+)
 create mode 100644 drivers/irqchip/irq-gic-v3-its.c

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
new file mode 100644
index 000000000000..a5ab12c7d7fc
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -0,0 +1,511 @@
+/*
+ * Copyright (C) 2013, 2014 ARM Limited, All Rights Reserved.
+ * Author: Marc Zyngier <marc.zyngier@arm.com>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <linux/bitmap.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/interrupt.h>
+#include <linux/log2.h>
+#include <linux/mm.h>
+#include <linux/msi.h>
+#include <linux/of.h>
+#include <linux/of_address.h>
+#include <linux/of_irq.h>
+#include <linux/of_pci.h>
+#include <linux/of_platform.h>
+#include <linux/percpu.h>
+#include <linux/slab.h>
+
+#include <linux/irqchip/arm-gic-v3.h>
+
+#include <asm/cacheflush.h>
+#include <asm/cputype.h>
+#include <asm/exception.h>
+
+#include "irqchip.h"
+
+#define ITS_FLAGS_CMDQ_NEEDS_FLUSHING		(1 << 0)
+
+/*
+ * Collection structure - just an ID, and a redistributor address to
+ * ping. We use one per CPU as a bag of interrupts assigned to this
+ * CPU.
+ */
+struct its_collection {
+	u64			target_address;
+	u16			col_id;
+};
+
+/*
+ * The ITS structure - contains most of the infrastructure, with the
+ * msi_controller, the command queue, the collections, and the list of
+ * devices writing to it.
+ */
+struct its_node {
+	raw_spinlock_t		lock;
+	struct list_head	entry;
+	struct msi_controller	msi_chip;
+	struct irq_domain	*domain;
+	void __iomem		*base;
+	unsigned long		phys_base;
+	struct its_cmd_block	*cmd_base;
+	struct its_cmd_block	*cmd_write;
+	void			*tables[GITS_BASER_NR_REGS];
+	struct its_collection	*collections;
+	struct list_head	its_device_list;
+	u64			flags;
+	u32			ite_size;
+};
+
+#define ITS_ITT_ALIGN		SZ_256
+
+/*
+ * The ITS view of a device - belongs to an ITS, a collection, owns an
+ * interrupt translation table, and a list of interrupts.
+ */
+struct its_device {
+	struct list_head	entry;
+	struct its_node		*its;
+	struct its_collection	*collection;
+	void			*itt;
+	unsigned long		*lpi_map;
+	irq_hw_number_t		lpi_base;
+	int			nr_lpis;
+	u32			nr_ites;
+	u32			device_id;
+};
+
+/*
+ * ITS command descriptors - parameters to be encoded in a command
+ * block.
+ */
+struct its_cmd_desc {
+	union {
+		struct {
+			struct its_device *dev;
+			u32 event_id;
+		} its_inv_cmd;
+
+		struct {
+			struct its_device *dev;
+			u32 event_id;
+		} its_int_cmd;
+
+		struct {
+			struct its_device *dev;
+			int valid;
+		} its_mapd_cmd;
+
+		struct {
+			struct its_collection *col;
+			int valid;
+		} its_mapc_cmd;
+
+		struct {
+			struct its_device *dev;
+			u32 phys_id;
+			u32 event_id;
+		} its_mapvi_cmd;
+
+		struct {
+			struct its_device *dev;
+			struct its_collection *col;
+			u32 id;
+		} its_movi_cmd;
+
+		struct {
+			struct its_device *dev;
+			u32 event_id;
+		} its_discard_cmd;
+
+		struct {
+			struct its_collection *col;
+		} its_invall_cmd;
+	};
+};
+
+/*
+ * The ITS command block, which is what the ITS actually parses.
+ */
+struct its_cmd_block {
+	u64	raw_cmd[4];
+};
+
+#define ITS_CMD_QUEUE_SZ		SZ_64K
+#define ITS_CMD_QUEUE_NR_ENTRIES	(ITS_CMD_QUEUE_SZ / sizeof(struct its_cmd_block))
+
+typedef struct its_collection *(*its_cmd_builder_t)(struct its_cmd_block *,
+						    struct its_cmd_desc *);
+
+static void its_encode_cmd(struct its_cmd_block *cmd, u8 cmd_nr)
+{
+	cmd->raw_cmd[0] &= ~0xffUL;
+	cmd->raw_cmd[0] |= cmd_nr;
+}
+
+static void its_encode_devid(struct its_cmd_block *cmd, u32 devid)
+{
+	cmd->raw_cmd[0] &= ~(0xffffUL << 32);
+	cmd->raw_cmd[0] |= ((u64)devid) << 32;
+}
+
+static void its_encode_event_id(struct its_cmd_block *cmd, u32 id)
+{
+	cmd->raw_cmd[1] &= ~0xffffffffUL;
+	cmd->raw_cmd[1] |= id;
+}
+
+static void its_encode_phys_id(struct its_cmd_block *cmd, u32 phys_id)
+{
+	cmd->raw_cmd[1] &= 0xffffffffUL;
+	cmd->raw_cmd[1] |= ((u64)phys_id) << 32;
+}
+
+static void its_encode_size(struct its_cmd_block *cmd, u8 size)
+{
+	cmd->raw_cmd[1] &= ~0x1fUL;
+	cmd->raw_cmd[1] |= size & 0x1f;
+}
+
+static void its_encode_itt(struct its_cmd_block *cmd, u64 itt_addr)
+{
+	cmd->raw_cmd[2] &= ~0xffffffffffffUL;
+	cmd->raw_cmd[2] |= itt_addr & 0xffffffffff00UL;
+}
+
+static void its_encode_valid(struct its_cmd_block *cmd, int valid)
+{
+	cmd->raw_cmd[2] &= ~(1UL << 63);
+	cmd->raw_cmd[2] |= ((u64)!!valid) << 63;
+}
+
+static void its_encode_target(struct its_cmd_block *cmd, u64 target_addr)
+{
+	cmd->raw_cmd[2] &= ~(0xffffffffUL << 16);
+	cmd->raw_cmd[2] |= (target_addr & (0xffffffffUL << 16));
+}
+
+static void its_encode_collection(struct its_cmd_block *cmd, u16 col)
+{
+	cmd->raw_cmd[2] &= ~0xffffUL;
+	cmd->raw_cmd[2] |= col;
+}
+
+static inline void its_fixup_cmd(struct its_cmd_block *cmd)
+{
+	/* Let's fixup BE commands */
+	cmd->raw_cmd[0] = cpu_to_le64(cmd->raw_cmd[0]);
+	cmd->raw_cmd[1] = cpu_to_le64(cmd->raw_cmd[1]);
+	cmd->raw_cmd[2] = cpu_to_le64(cmd->raw_cmd[2]);
+	cmd->raw_cmd[3] = cpu_to_le64(cmd->raw_cmd[3]);
+}
+
+static struct its_collection *its_build_mapd_cmd(struct its_cmd_block *cmd,
+						 struct its_cmd_desc *desc)
+{
+	unsigned long itt_addr;
+	u8 size = order_base_2(desc->its_mapd_cmd.dev->nr_ites);
+
+	itt_addr = virt_to_phys(desc->its_mapd_cmd.dev->itt);
+	itt_addr = ALIGN(itt_addr, ITS_ITT_ALIGN);
+
+	its_encode_cmd(cmd, GITS_CMD_MAPD);
+	its_encode_devid(cmd, desc->its_mapd_cmd.dev->device_id);
+	its_encode_size(cmd, size - 1);
+	its_encode_itt(cmd, itt_addr);
+	its_encode_valid(cmd, desc->its_mapd_cmd.valid);
+
+	its_fixup_cmd(cmd);
+
+	return desc->its_mapd_cmd.dev->collection;
+}
+
+static struct its_collection *its_build_mapc_cmd(struct its_cmd_block *cmd,
+						 struct its_cmd_desc *desc)
+{
+	its_encode_cmd(cmd, GITS_CMD_MAPC);
+	its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id);
+	its_encode_target(cmd, desc->its_mapc_cmd.col->target_address);
+	its_encode_valid(cmd, desc->its_mapc_cmd.valid);
+
+	its_fixup_cmd(cmd);
+
+	return desc->its_mapc_cmd.col;
+}
+
+static struct its_collection *its_build_mapvi_cmd(struct its_cmd_block *cmd,
+						  struct its_cmd_desc *desc)
+{
+	its_encode_cmd(cmd, GITS_CMD_MAPVI);
+	its_encode_devid(cmd, desc->its_mapvi_cmd.dev->device_id);
+	its_encode_event_id(cmd, desc->its_mapvi_cmd.event_id);
+	its_encode_phys_id(cmd, desc->its_mapvi_cmd.phys_id);
+	its_encode_collection(cmd, desc->its_mapvi_cmd.dev->collection->col_id);
+
+	its_fixup_cmd(cmd);
+
+	return desc->its_mapvi_cmd.dev->collection;
+}
+
+static struct its_collection *its_build_movi_cmd(struct its_cmd_block *cmd,
+						 struct its_cmd_desc *desc)
+{
+	its_encode_cmd(cmd, GITS_CMD_MOVI);
+	its_encode_devid(cmd, desc->its_movi_cmd.dev->device_id);
+	its_encode_event_id(cmd, desc->its_movi_cmd.id);
+	its_encode_collection(cmd, desc->its_movi_cmd.col->col_id);
+
+	its_fixup_cmd(cmd);
+
+	return desc->its_movi_cmd.dev->collection;
+}
+
+static struct its_collection *its_build_discard_cmd(struct its_cmd_block *cmd,
+						    struct its_cmd_desc *desc)
+{
+	its_encode_cmd(cmd, GITS_CMD_DISCARD);
+	its_encode_devid(cmd, desc->its_discard_cmd.dev->device_id);
+	its_encode_event_id(cmd, desc->its_discard_cmd.event_id);
+
+	its_fixup_cmd(cmd);
+
+	return desc->its_discard_cmd.dev->collection;
+}
+
+static struct its_collection *its_build_inv_cmd(struct its_cmd_block *cmd,
+						struct its_cmd_desc *desc)
+{
+	its_encode_cmd(cmd, GITS_CMD_INV);
+	its_encode_devid(cmd, desc->its_inv_cmd.dev->device_id);
+	its_encode_event_id(cmd, desc->its_inv_cmd.event_id);
+
+	its_fixup_cmd(cmd);
+
+	return desc->its_inv_cmd.dev->collection;
+}
+
+static struct its_collection *its_build_invall_cmd(struct its_cmd_block *cmd,
+						   struct its_cmd_desc *desc)
+{
+	its_encode_cmd(cmd, GITS_CMD_INVALL);
+	its_encode_collection(cmd, desc->its_mapc_cmd.col->col_id);
+
+	its_fixup_cmd(cmd);
+
+	return NULL;
+}
+
+static u64 its_cmd_ptr_to_offset(struct its_node *its,
+				 struct its_cmd_block *ptr)
+{
+	return (ptr - its->cmd_base) * sizeof(*ptr);
+}
+
+static int its_queue_full(struct its_node *its)
+{
+	int widx;
+	int ridx;
+
+	widx = its->cmd_write - its->cmd_base;
+	ridx = readl_relaxed(its->base + GITS_CREADR) / sizeof(struct its_cmd_block);
+
+	/* This is incredibly unlikely to happen, unless the ITS locks up. */
+	if (((widx + 1) % ITS_CMD_QUEUE_NR_ENTRIES) == ridx)
+		return 1;
+
+	return 0;
+}
+
+static struct its_cmd_block *its_allocate_entry(struct its_node *its)
+{
+	struct its_cmd_block *cmd;
+	u32 count = 1000000;	/* 1s! */
+
+	while (its_queue_full(its)) {
+		count--;
+		if (!count) {
+			pr_err_ratelimited("ITS queue not draining\n");
+			return NULL;
+		}
+		cpu_relax();
+		udelay(1);
+	}
+
+	cmd = its->cmd_write++;
+
+	/* Handle queue wrapping */
+	if (its->cmd_write == (its->cmd_base + ITS_CMD_QUEUE_NR_ENTRIES))
+		its->cmd_write = its->cmd_base;
+
+	return cmd;
+}
+
+static struct its_cmd_block *its_post_commands(struct its_node *its)
+{
+	u64 wr = its_cmd_ptr_to_offset(its, its->cmd_write);
+
+	writel_relaxed(wr, its->base + GITS_CWRITER);
+
+	return its->cmd_write;
+}
+
+static void its_flush_cmd(struct its_node *its, struct its_cmd_block *cmd)
+{
+	/*
+	 * Make sure the commands written to memory are observable by
+	 * the ITS.
+	 */
+	if (its->flags & ITS_FLAGS_CMDQ_NEEDS_FLUSHING)
+		__flush_dcache_area(cmd, sizeof(*cmd));
+	else
+		dsb(ishst);
+}
+
+static void its_wait_for_range_completion(struct its_node *its,
+					  struct its_cmd_block *from,
+					  struct its_cmd_block *to)
+{
+	u64 rd_idx, from_idx, to_idx;
+	u32 count = 1000000;	/* 1s! */
+
+	from_idx = its_cmd_ptr_to_offset(its, from);
+	to_idx = its_cmd_ptr_to_offset(its, to);
+
+	while (1) {
+		rd_idx = readl_relaxed(its->base + GITS_CREADR);
+		if (rd_idx >= to_idx || rd_idx < from_idx)
+			break;
+
+		count--;
+		if (!count) {
+			pr_err_ratelimited("ITS queue timeout\n");
+			return;
+		}
+		cpu_relax();
+		udelay(1);
+	}
+}
+
+static void its_send_single_command(struct its_node *its,
+				    its_cmd_builder_t builder,
+				    struct its_cmd_desc *desc)
+{
+	struct its_cmd_block *cmd, *sync_cmd, *next_cmd;
+	struct its_collection *sync_col;
+
+	raw_spin_lock(&its->lock);
+
+	cmd = its_allocate_entry(its);
+	if (!cmd) {		/* We're soooooo screewed... */
+		pr_err_ratelimited("ITS can't allocate, dropping command\n");
+		raw_spin_unlock(&its->lock);
+		return;
+	}
+	sync_col = builder(cmd, desc);
+	its_flush_cmd(its, cmd);
+
+	if (sync_col) {
+		sync_cmd = its_allocate_entry(its);
+		if (!sync_cmd) {
+			pr_err_ratelimited("ITS can't SYNC, skipping\n");
+			goto post;
+		}
+		its_encode_cmd(sync_cmd, GITS_CMD_SYNC);
+		its_encode_target(sync_cmd, sync_col->target_address);
+		its_fixup_cmd(sync_cmd);
+		its_flush_cmd(its, sync_cmd);
+	}
+
+post:
+	next_cmd = its_post_commands(its);
+	raw_spin_unlock(&its->lock);
+
+	its_wait_for_range_completion(its, cmd, next_cmd);
+}
+
+static void its_send_inv(struct its_device *dev, u32 event_id)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_inv_cmd.dev = dev;
+	desc.its_inv_cmd.event_id = event_id;
+
+	its_send_single_command(dev->its, its_build_inv_cmd, &desc);
+}
+
+static void its_send_mapd(struct its_device *dev, int valid)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_mapd_cmd.dev = dev;
+	desc.its_mapd_cmd.valid = !!valid;
+
+	its_send_single_command(dev->its, its_build_mapd_cmd, &desc);
+}
+
+static void its_send_mapc(struct its_node *its, struct its_collection *col,
+			  int valid)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_mapc_cmd.col = col;
+	desc.its_mapc_cmd.valid = !!valid;
+
+	its_send_single_command(its, its_build_mapc_cmd, &desc);
+}
+
+static void its_send_mapvi(struct its_device *dev, u32 irq_id, u32 id)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_mapvi_cmd.dev = dev;
+	desc.its_mapvi_cmd.phys_id = irq_id;
+	desc.its_mapvi_cmd.event_id = id;
+
+	its_send_single_command(dev->its, its_build_mapvi_cmd, &desc);
+}
+
+static void its_send_movi(struct its_device *dev,
+			  struct its_collection *col, u32 id)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_movi_cmd.dev = dev;
+	desc.its_movi_cmd.col = col;
+	desc.its_movi_cmd.id = id;
+
+	its_send_single_command(dev->its, its_build_movi_cmd, &desc);
+}
+
+static void its_send_discard(struct its_device *dev, u32 id)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_discard_cmd.dev = dev;
+	desc.its_discard_cmd.event_id = id;
+
+	its_send_single_command(dev->its, its_build_discard_cmd, &desc);
+}
+
+static void its_send_invall(struct its_node *its, struct its_collection *col)
+{
+	struct its_cmd_desc desc;
+
+	desc.its_invall_cmd.col = col;
+
+	its_send_single_command(its, its_build_invall_cmd, &desc);
+}
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 040615a48bf5..21c9d70426d1 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -80,9 +80,27 @@
 #define GICR_MOVALLR			0x0110
 #define GICR_PIDR2			GICD_PIDR2
 
+#define GICR_CTLR_ENABLE_LPIS		(1UL << 0)
+
+#define GICR_TYPER_CPU_NUMBER(r)	(((r) >> 8) & 0xffff)
+
 #define GICR_WAKER_ProcessorSleep	(1U << 1)
 #define GICR_WAKER_ChildrenAsleep	(1U << 2)
 
+#define GICR_PROPBASER_NonShareable	(0U << 10)
+#define GICR_PROPBASER_InnerShareable	(1U << 10)
+#define GICR_PROPBASER_OuterShareable	(2U << 10)
+#define GICR_PROPBASER_SHAREABILITY_MASK (3UL << 10)
+#define GICR_PROPBASER_nCnB		(0U << 7)
+#define GICR_PROPBASER_nC		(1U << 7)
+#define GICR_PROPBASER_RaWt		(2U << 7)
+#define GICR_PROPBASER_RaWb		(3U << 7)
+#define GICR_PROPBASER_WaWt		(4U << 7)
+#define GICR_PROPBASER_WaWb		(5U << 7)
+#define GICR_PROPBASER_RaWaWt		(6U << 7)
+#define GICR_PROPBASER_RaWaWb		(7U << 7)
+#define GICR_PROPBASER_IDBITS_MASK	(0x1f)
+
 /*
  * Re-Distributor registers, offsets from SGI_base
  */
@@ -95,9 +113,93 @@
 #define GICR_IPRIORITYR0		GICD_IPRIORITYR
 #define GICR_ICFGR0			GICD_ICFGR
 
+#define GICR_TYPER_PLPIS		(1U << 0)
 #define GICR_TYPER_VLPIS		(1U << 1)
 #define GICR_TYPER_LAST			(1U << 4)
 
+#define LPI_PROP_GROUP1			(1 << 1)
+#define LPI_PROP_ENABLED		(1 << 0)
+
+/*
+ * ITS registers, offsets from ITS_base
+ */
+#define GITS_CTLR			0x0000
+#define GITS_IIDR			0x0004
+#define GITS_TYPER			0x0008
+#define GITS_CBASER			0x0080
+#define GITS_CWRITER			0x0088
+#define GITS_CREADR			0x0090
+#define GITS_BASER			0x0100
+#define GITS_PIDR2			GICR_PIDR2
+
+#define GITS_TRANSLATER			0x10040
+
+#define GITS_TYPER_PTA			(1UL << 19)
+
+#define GITS_CBASER_VALID		(1UL << 63)
+#define GITS_CBASER_nCnB		(0UL << 59)
+#define GITS_CBASER_nC			(1UL << 59)
+#define GITS_CBASER_RaWt		(2UL << 59)
+#define GITS_CBASER_RaWb		(3UL << 59)
+#define GITS_CBASER_WaWt		(4UL << 59)
+#define GITS_CBASER_WaWb		(5UL << 59)
+#define GITS_CBASER_RaWaWt		(6UL << 59)
+#define GITS_CBASER_RaWaWb		(7UL << 59)
+#define GITS_CBASER_NonShareable	(0UL << 10)
+#define GITS_CBASER_InnerShareable	(1UL << 10)
+#define GITS_CBASER_OuterShareable	(2UL << 10)
+#define GITS_CBASER_SHAREABILITY_MASK	(3UL << 10)
+
+#define GITS_BASER_NR_REGS		8
+
+#define GITS_BASER_VALID		(1UL << 63)
+#define GITS_BASER_nCnB			(0UL << 59)
+#define GITS_BASER_nC			(1UL << 59)
+#define GITS_BASER_RaWt			(2UL << 59)
+#define GITS_BASER_RaWb			(3UL << 59)
+#define GITS_BASER_WaWt			(4UL << 59)
+#define GITS_BASER_WaWb			(5UL << 59)
+#define GITS_BASER_RaWaWt		(6UL << 59)
+#define GITS_BASER_RaWaWb		(7UL << 59)
+#define GITS_BASER_TYPE_SHIFT		(56)
+#define GITS_BASER_TYPE(r)		(((r) >> GITS_BASER_TYPE_SHIFT) & 7)
+#define GITS_BASER_ENTRY_SIZE_SHIFT	(48)
+#define GITS_BASER_ENTRY_SIZE(r)	((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0xff) + 1)
+#define GITS_BASER_NonShareable		(0UL << 10)
+#define GITS_BASER_InnerShareable	(1UL << 10)
+#define GITS_BASER_OuterShareable	(2UL << 10)
+#define GITS_BASER_SHAREABILITY_SHIFT	(10)
+#define GITS_BASER_SHAREABILITY_MASK	(3UL << GITS_BASER_SHAREABILITY_SHIFT)
+#define GITS_BASER_PAGE_SIZE_SHIFT	(8)
+#define GITS_BASER_PAGE_SIZE_4K		(0UL << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_16K	(1UL << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_64K	(2UL << GITS_BASER_PAGE_SIZE_SHIFT)
+#define GITS_BASER_PAGE_SIZE_MASK	(3UL << GITS_BASER_PAGE_SIZE_SHIFT)
+
+#define GITS_BASER_TYPE_NONE		0
+#define GITS_BASER_TYPE_DEVICE		1
+#define GITS_BASER_TYPE_VCPU		2
+#define GITS_BASER_TYPE_CPU		3
+#define GITS_BASER_TYPE_COLLECTION	4
+#define GITS_BASER_TYPE_RESERVED5	5
+#define GITS_BASER_TYPE_RESERVED6	6
+#define GITS_BASER_TYPE_RESERVED7	7
+
+/*
+ * ITS commands
+ */
+#define GITS_CMD_MAPD			0x08
+#define GITS_CMD_MAPC			0x09
+#define GITS_CMD_MAPVI			0x0a
+#define GITS_CMD_MOVI			0x01
+#define GITS_CMD_DISCARD		0x0f
+#define GITS_CMD_INV			0x0c
+#define GITS_CMD_MOVALL			0x0e
+#define GITS_CMD_INVALL			0x0d
+#define GITS_CMD_INT			0x03
+#define GITS_CMD_CLEAR			0x04
+#define GITS_CMD_SYNC			0x05
+
 /*
  * CPU interface registers
  */
-- 
cgit v1.2.3


From b48ac83d6bbc20a973c3e8133fd1ebda873d026a Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 24 Nov 2014 14:35:16 +0000
Subject: irqchip: GICv3: ITS: MSI support

Now, the bit of code that allow us to use the ITS as a MSI controller.
Both MSI and MSI-X are supported.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1416839720-18400-10-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3-its.c   | 176 +++++++++++++++++++++++++++++++++++++
 include/linux/irqchip/arm-gic-v3.h |   6 ++
 2 files changed, 182 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c
index d687fd43fbbb..532c6df89992 100644
--- a/drivers/irqchip/irq-gic-v3-its.c
+++ b/drivers/irqchip/irq-gic-v3-its.c
@@ -587,12 +587,47 @@ static int its_set_affinity(struct irq_data *d, const struct cpumask *mask_val,
 	return IRQ_SET_MASK_OK_DONE;
 }
 
+static void its_irq_compose_msi_msg(struct irq_data *d, struct msi_msg *msg)
+{
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	struct its_node *its;
+	u64 addr;
+
+	its = its_dev->its;
+	addr = its->phys_base + GITS_TRANSLATER;
+
+	msg->address_lo		= addr & ((1UL << 32) - 1);
+	msg->address_hi		= addr >> 32;
+	msg->data		= its_get_event_id(d);
+}
+
 static struct irq_chip its_irq_chip = {
 	.name			= "ITS",
 	.irq_mask		= its_mask_irq,
 	.irq_unmask		= its_unmask_irq,
 	.irq_eoi		= its_eoi_irq,
 	.irq_set_affinity	= its_set_affinity,
+	.irq_compose_msi_msg	= its_irq_compose_msi_msg,
+};
+
+static void its_mask_msi_irq(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void its_unmask_msi_irq(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip its_msi_irq_chip = {
+	.name			= "ITS-MSI",
+	.irq_unmask		= its_unmask_msi_irq,
+	.irq_mask		= its_mask_msi_irq,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_write_msi_msg	= pci_msi_domain_write_msg,
 };
 
 /*
@@ -1055,3 +1090,144 @@ static void its_free_device(struct its_device *its_dev)
 	kfree(its_dev->itt);
 	kfree(its_dev);
 }
+
+static int its_alloc_device_irq(struct its_device *dev, irq_hw_number_t *hwirq)
+{
+	int idx;
+
+	idx = find_first_zero_bit(dev->lpi_map, dev->nr_lpis);
+	if (idx == dev->nr_lpis)
+		return -ENOSPC;
+
+	*hwirq = dev->lpi_base + idx;
+	set_bit(idx, dev->lpi_map);
+
+	/* Map the GIC irq ID to the device */
+	its_send_mapvi(dev, *hwirq, idx);
+
+	return 0;
+}
+
+static int its_msi_prepare(struct irq_domain *domain, struct device *dev,
+			   int nvec, msi_alloc_info_t *info)
+{
+	struct pci_dev *pdev;
+	struct its_node *its;
+	u32 dev_id;
+	struct its_device *its_dev;
+
+	if (!dev_is_pci(dev))
+		return -EINVAL;
+
+	pdev = to_pci_dev(dev);
+	dev_id = PCI_DEVID(pdev->bus->number, pdev->devfn);
+	its = domain->parent->host_data;
+
+	its_dev = its_find_device(its, dev_id);
+	if (WARN_ON(its_dev))
+		return -EINVAL;
+
+	its_dev = its_create_device(its, dev_id, nvec);
+	if (!its_dev)
+		return -ENOMEM;
+
+	dev_dbg(&pdev->dev, "ITT %d entries, %d bits\n", nvec, ilog2(nvec));
+
+	info->scratchpad[0].ptr = its_dev;
+	info->scratchpad[1].ptr = dev;
+	return 0;
+}
+
+static struct msi_domain_ops its_pci_msi_ops = {
+	.msi_prepare	= its_msi_prepare,
+};
+
+static struct msi_domain_info its_pci_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_MULTI_PCI_MSI | MSI_FLAG_PCI_MSIX),
+	.ops	= &its_pci_msi_ops,
+	.chip	= &its_msi_irq_chip,
+};
+
+static int its_irq_gic_domain_alloc(struct irq_domain *domain,
+				    unsigned int virq,
+				    irq_hw_number_t hwirq)
+{
+	struct of_phandle_args args;
+
+	args.np = domain->parent->of_node;
+	args.args_count = 3;
+	args.args[0] = GIC_IRQ_TYPE_LPI;
+	args.args[1] = hwirq;
+	args.args[2] = IRQ_TYPE_EDGE_RISING;
+
+	return irq_domain_alloc_irqs_parent(domain, virq, 1, &args);
+}
+
+static int its_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs, void *args)
+{
+	msi_alloc_info_t *info = args;
+	struct its_device *its_dev = info->scratchpad[0].ptr;
+	irq_hw_number_t hwirq;
+	int err;
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		err = its_alloc_device_irq(its_dev, &hwirq);
+		if (err)
+			return err;
+
+		err = its_irq_gic_domain_alloc(domain, virq + i, hwirq);
+		if (err)
+			return err;
+
+		irq_domain_set_hwirq_and_chip(domain, virq + i,
+					      hwirq, &its_irq_chip, its_dev);
+		dev_dbg(info->scratchpad[1].ptr, "ID:%d pID:%d vID:%d\n",
+			(int)(hwirq - its_dev->lpi_base), (int)hwirq, virq + i);
+	}
+
+	return 0;
+}
+
+static void its_irq_domain_free(struct irq_domain *domain, unsigned int virq,
+				unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct its_device *its_dev = irq_data_get_irq_chip_data(d);
+	int i;
+
+	for (i = 0; i < nr_irqs; i++) {
+		struct irq_data *data = irq_domain_get_irq_data(domain,
+								virq + i);
+		int event = its_get_event_id(data);
+
+		/* Stop the delivery of interrupts */
+		its_send_discard(its_dev, event);
+
+		/* Mark interrupt index as unused */
+		clear_bit(event, its_dev->lpi_map);
+
+		/* Nuke the entry in the domain */
+		irq_domain_reset_irq_data(d);
+	}
+
+	/* If all interrupts have been freed, start mopping the floor */
+	if (bitmap_empty(its_dev->lpi_map, its_dev->nr_lpis)) {
+		its_lpi_free(its_dev->lpi_map,
+			     its_dev->lpi_base,
+			     its_dev->nr_lpis);
+
+		/* Unmap device/itt */
+		its_send_mapd(its_dev, 0);
+		its_free_device(its_dev);
+	}
+
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops its_domain_ops = {
+	.alloc			= its_irq_domain_alloc,
+	.free			= its_irq_domain_free,
+};
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 21c9d70426d1..0ed30d7d9338 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -295,6 +295,12 @@
 
 #include <linux/stringify.h>
 
+/*
+ * We need a value to serve as a irq-type for LPIs. Choose one that will
+ * hopefully pique the interest of the reviewer.
+ */
+#define GIC_IRQ_TYPE_LPI		0xa110c8ed
+
 struct rdists {
 	struct {
 		void __iomem	*rd_base;
-- 
cgit v1.2.3


From da33f31de3e1eebb198109c1cccdc3a094e369c4 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Mon, 24 Nov 2014 14:35:18 +0000
Subject: irqchip: GICv3: ITS: plug ITS init into main GICv3 code

As the ITS is always a subsystem if GICv3, its probing/init is
driven by the main GICv3 code.

Plug that code in (guarded by a config option).

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1416839720-18400-12-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/irqchip/irq-gic-v3.c       | 41 ++++++++++++++++++++++++++++++++------
 include/linux/irqchip/arm-gic-v3.h |  5 +++++
 2 files changed, 40 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/irqchip/irq-gic-v3.c b/drivers/irqchip/irq-gic-v3.c
index 43e57da0d80e..1a146ccee701 100644
--- a/drivers/irqchip/irq-gic-v3.c
+++ b/drivers/irqchip/irq-gic-v3.c
@@ -76,9 +76,6 @@ static inline void __iomem *gic_dist_base(struct irq_data *d)
 	if (d->hwirq <= 1023)		/* SPI -> dist_base */
 		return gic_data.dist_base;
 
-	if (d->hwirq >= 8192)
-		BUG();		/* LPI Detected!!! */
-
 	return NULL;
 }
 
@@ -276,11 +273,11 @@ static asmlinkage void __exception_irq_entry gic_handle_irq(struct pt_regs *regs
 	do {
 		irqnr = gic_read_iar();
 
-		if (likely(irqnr > 15 && irqnr < 1020)) {
+		if (likely(irqnr > 15 && irqnr < 1020) || irqnr >= 8192) {
 			int err;
 			err = handle_domain_irq(gic_data.domain, irqnr, regs);
 			if (err) {
-				WARN_ONCE(true, "Unexpected SPI received!\n");
+				WARN_ONCE(true, "Unexpected interrupt received!\n");
 				gic_write_eoir(irqnr);
 			}
 			continue;
@@ -393,6 +390,11 @@ static void gic_cpu_sys_reg_init(void)
 	gic_write_grpen1(1);
 }
 
+static int gic_dist_supports_lpis(void)
+{
+	return !!(readl_relaxed(gic_data.dist_base + GICD_TYPER) & GICD_TYPER_LPIS);
+}
+
 static void gic_cpu_init(void)
 {
 	void __iomem *rbase;
@@ -407,6 +409,10 @@ static void gic_cpu_init(void)
 
 	gic_cpu_config(rbase, gic_redist_wait_for_rwp);
 
+	/* Give LPIs a spin */
+	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
+		its_cpu_init();
+
 	/* initialise system registers */
 	gic_cpu_sys_reg_init();
 }
@@ -593,12 +599,21 @@ static struct irq_chip gic_chip = {
 	.irq_set_affinity	= gic_set_affinity,
 };
 
+#define GIC_ID_NR		(1U << gic_data.rdists.id_bits)
+
 static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 			      irq_hw_number_t hw)
 {
 	/* SGIs are private to the core kernel */
 	if (hw < 16)
 		return -EPERM;
+	/* Nothing here */
+	if (hw >= gic_data.irq_nr && hw < 8192)
+		return -EPERM;
+	/* Off limits */
+	if (hw >= GIC_ID_NR)
+		return -EPERM;
+
 	/* PPIs */
 	if (hw < 32) {
 		irq_set_percpu_devid(irq);
@@ -612,7 +627,15 @@ static int gic_irq_domain_map(struct irq_domain *d, unsigned int irq,
 				    handle_fasteoi_irq, NULL, NULL);
 		set_irq_flags(irq, IRQF_VALID | IRQF_PROBE);
 	}
-	irq_set_chip_data(irq, d->host_data);
+	/* LPIs */
+	if (hw >= 8192 && hw < GIC_ID_NR) {
+		if (!gic_dist_supports_lpis())
+			return -EPERM;
+		irq_domain_set_info(d, irq, hw, &gic_chip, d->host_data,
+				    handle_fasteoi_irq, NULL, NULL);
+		set_irq_flags(irq, IRQF_VALID);
+	}
+
 	return 0;
 }
 
@@ -633,6 +656,9 @@ static int gic_irq_domain_xlate(struct irq_domain *d,
 	case 1:			/* PPI */
 		*out_hwirq = intspec[1] + 16;
 		break;
+	case GIC_IRQ_TYPE_LPI:	/* LPI */
+		*out_hwirq = intspec[1];
+		break;
 	default:
 		return -EINVAL;
 	}
@@ -759,6 +785,9 @@ static int __init gic_of_init(struct device_node *node, struct device_node *pare
 
 	set_handle_irq(gic_handle_irq);
 
+	if (IS_ENABLED(CONFIG_ARM_GIC_V3_ITS) && gic_dist_supports_lpis())
+		its_init(node, &gic_data.rdists, gic_data.domain);
+
 	gic_smp_init();
 	gic_dist_init();
 	gic_cpu_init();
diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h
index 0ed30d7d9338..1e8b0cf30792 100644
--- a/include/linux/irqchip/arm-gic-v3.h
+++ b/include/linux/irqchip/arm-gic-v3.h
@@ -318,6 +318,11 @@ static inline void gic_write_eoir(u64 irq)
 	isb();
 }
 
+struct irq_domain;
+int its_cpu_init(void);
+int its_init(struct device_node *node, struct rdists *rdists,
+	     struct irq_domain *domain);
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From 853a33ce6932601030f550653aea91a0e0a71511 Mon Sep 17 00:00:00 2001
From: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
Date: Tue, 25 Nov 2014 18:47:22 +0000
Subject: irqchip: gic-v2m: Add support for ARM GICv2m MSI(-X) doorbell

ARM GICv2m specification extends GICv2 to support MSI(-X) with
a new register frame. This allows a GICv2 based system to support
MSI with minimal changes.

Signed-off-by: Suravee Suthikulpanit <Suravee.Suthikulpanit@amd.com>
[maz: converted the driver to use stacked irq domains,
      updated changelog]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Link: https://lkml.kernel.org/r/1416941243-7181-2-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 arch/arm64/Kconfig              |   1 +
 drivers/irqchip/Kconfig         |   6 +
 drivers/irqchip/Makefile        |   1 +
 drivers/irqchip/irq-gic-v2m.c   | 333 ++++++++++++++++++++++++++++++++++++++++
 drivers/irqchip/irq-gic.c       |   4 +
 include/linux/irqchip/arm-gic.h |   2 +
 6 files changed, 347 insertions(+)
 create mode 100644 drivers/irqchip/irq-gic-v2m.c

(limited to 'include/linux')

diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig
index 1f49c288457b..e06e1a99eafa 100644
--- a/arch/arm64/Kconfig
+++ b/arch/arm64/Kconfig
@@ -12,6 +12,7 @@ config ARM64
 	select ARM_ARCH_TIMER
 	select ARM_GIC
 	select AUDIT_ARCH_COMPAT_GENERIC
+	select ARM_GIC_V2M if PCI_MSI
 	select ARM_GIC_V3
 	select ARM_GIC_V3_ITS if PCI_MSI
 	select BUILDTIME_EXTABLE_SORT
diff --git a/drivers/irqchip/Kconfig b/drivers/irqchip/Kconfig
index d47fa846763b..e72e23960632 100644
--- a/drivers/irqchip/Kconfig
+++ b/drivers/irqchip/Kconfig
@@ -8,6 +8,12 @@ config ARM_GIC
 	select IRQ_DOMAIN_HIERARCHY
 	select MULTI_IRQ_HANDLER
 
+config ARM_GIC_V2M
+	bool
+	depends on ARM_GIC
+	depends on PCI && PCI_MSI
+	select PCI_MSI_IRQ_DOMAIN
+
 config GIC_NON_BANKED
 	bool
 
diff --git a/drivers/irqchip/Makefile b/drivers/irqchip/Makefile
index 2029e79c07a9..983634d61b25 100644
--- a/drivers/irqchip/Makefile
+++ b/drivers/irqchip/Makefile
@@ -19,6 +19,7 @@ obj-$(CONFIG_ARCH_SUNXI)		+= irq-sun4i.o
 obj-$(CONFIG_ARCH_SUNXI)		+= irq-sunxi-nmi.o
 obj-$(CONFIG_ARCH_SPEAR3XX)		+= spear-shirq.o
 obj-$(CONFIG_ARM_GIC)			+= irq-gic.o irq-gic-common.o
+obj-$(CONFIG_ARM_GIC_V2M)		+= irq-gic-v2m.o
 obj-$(CONFIG_ARM_GIC_V3)		+= irq-gic-v3.o irq-gic-common.o
 obj-$(CONFIG_ARM_GIC_V3_ITS)		+= irq-gic-v3-its.o
 obj-$(CONFIG_ARM_NVIC)			+= irq-nvic.o
diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c
new file mode 100644
index 000000000000..fdf706555d72
--- /dev/null
+++ b/drivers/irqchip/irq-gic-v2m.c
@@ -0,0 +1,333 @@
+/*
+ * ARM GIC v2m MSI(-X) support
+ * Support for Message Signaled Interrupts for systems that
+ * implement ARM Generic Interrupt Controller: GICv2m.
+ *
+ * Copyright (C) 2014 Advanced Micro Devices, Inc.
+ * Authors: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com>
+ *	    Harish Kasiviswanathan <harish.kasiviswanathan@amd.com>
+ *	    Brandon Anderson <brandon.anderson@amd.com>
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms of the GNU General Public License version 2 as published
+ * by the Free Software Foundation.
+ */
+
+#define pr_fmt(fmt) "GICv2m: " fmt
+
+#include <linux/irq.h>
+#include <linux/irqdomain.h>
+#include <linux/kernel.h>
+#include <linux/of_address.h>
+#include <linux/of_pci.h>
+#include <linux/slab.h>
+#include <linux/spinlock.h>
+
+/*
+* MSI_TYPER:
+*     [31:26] Reserved
+*     [25:16] lowest SPI assigned to MSI
+*     [15:10] Reserved
+*     [9:0]   Numer of SPIs assigned to MSI
+*/
+#define V2M_MSI_TYPER		       0x008
+#define V2M_MSI_TYPER_BASE_SHIFT       16
+#define V2M_MSI_TYPER_BASE_MASK	       0x3FF
+#define V2M_MSI_TYPER_NUM_MASK	       0x3FF
+#define V2M_MSI_SETSPI_NS	       0x040
+#define V2M_MIN_SPI		       32
+#define V2M_MAX_SPI		       1019
+
+#define V2M_MSI_TYPER_BASE_SPI(x)      \
+	       (((x) >> V2M_MSI_TYPER_BASE_SHIFT) & V2M_MSI_TYPER_BASE_MASK)
+
+#define V2M_MSI_TYPER_NUM_SPI(x)       ((x) & V2M_MSI_TYPER_NUM_MASK)
+
+struct v2m_data {
+	spinlock_t msi_cnt_lock;
+	struct msi_controller mchip;
+	struct resource res;	/* GICv2m resource */
+	void __iomem *base;	/* GICv2m virt address */
+	u32 spi_start;		/* The SPI number that MSIs start */
+	u32 nr_spis;		/* The number of SPIs for MSIs */
+	unsigned long *bm;	/* MSI vector bitmap */
+	struct irq_domain *domain;
+};
+
+static void gicv2m_mask_msi_irq(struct irq_data *d)
+{
+	pci_msi_mask_irq(d);
+	irq_chip_mask_parent(d);
+}
+
+static void gicv2m_unmask_msi_irq(struct irq_data *d)
+{
+	pci_msi_unmask_irq(d);
+	irq_chip_unmask_parent(d);
+}
+
+static struct irq_chip gicv2m_msi_irq_chip = {
+	.name			= "MSI",
+	.irq_mask		= gicv2m_mask_msi_irq,
+	.irq_unmask		= gicv2m_unmask_msi_irq,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_write_msi_msg	= pci_msi_domain_write_msg,
+};
+
+static struct msi_domain_info gicv2m_msi_domain_info = {
+	.flags	= (MSI_FLAG_USE_DEF_DOM_OPS | MSI_FLAG_USE_DEF_CHIP_OPS |
+		   MSI_FLAG_PCI_MSIX),
+	.chip	= &gicv2m_msi_irq_chip,
+};
+
+static int gicv2m_set_affinity(struct irq_data *irq_data,
+			       const struct cpumask *mask, bool force)
+{
+	int ret;
+
+	ret = irq_chip_set_affinity_parent(irq_data, mask, force);
+	if (ret == IRQ_SET_MASK_OK)
+		ret = IRQ_SET_MASK_OK_DONE;
+
+	return ret;
+}
+
+static void gicv2m_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
+{
+	struct v2m_data *v2m = irq_data_get_irq_chip_data(data);
+	phys_addr_t addr = v2m->res.start + V2M_MSI_SETSPI_NS;
+
+	msg->address_hi = (u32) (addr >> 32);
+	msg->address_lo = (u32) (addr);
+	msg->data = data->hwirq;
+}
+
+static struct irq_chip gicv2m_irq_chip = {
+	.name			= "GICv2m",
+	.irq_mask		= irq_chip_mask_parent,
+	.irq_unmask		= irq_chip_unmask_parent,
+	.irq_eoi		= irq_chip_eoi_parent,
+	.irq_set_affinity	= gicv2m_set_affinity,
+	.irq_compose_msi_msg	= gicv2m_compose_msi_msg,
+};
+
+static int gicv2m_irq_gic_domain_alloc(struct irq_domain *domain,
+				       unsigned int virq,
+				       irq_hw_number_t hwirq)
+{
+	struct of_phandle_args args;
+	struct irq_data *d;
+	int err;
+
+	args.np = domain->parent->of_node;
+	args.args_count = 3;
+	args.args[0] = 0;
+	args.args[1] = hwirq - 32;
+	args.args[2] = IRQ_TYPE_EDGE_RISING;
+
+	err = irq_domain_alloc_irqs_parent(domain, virq, 1, &args);
+	if (err)
+		return err;
+
+	/* Configure the interrupt line to be edge */
+	d = irq_domain_get_irq_data(domain->parent, virq);
+	d->chip->irq_set_type(d, IRQ_TYPE_EDGE_RISING);
+	return 0;
+}
+
+static void gicv2m_unalloc_msi(struct v2m_data *v2m, unsigned int hwirq)
+{
+	int pos;
+
+	pos = hwirq - v2m->spi_start;
+	if (pos < 0 || pos >= v2m->nr_spis) {
+		pr_err("Failed to teardown msi. Invalid hwirq %d\n", hwirq);
+		return;
+	}
+
+	spin_lock(&v2m->msi_cnt_lock);
+	__clear_bit(pos, v2m->bm);
+	spin_unlock(&v2m->msi_cnt_lock);
+}
+
+static int gicv2m_irq_domain_alloc(struct irq_domain *domain, unsigned int virq,
+				   unsigned int nr_irqs, void *args)
+{
+	struct v2m_data *v2m = domain->host_data;
+	int hwirq, offset, err = 0;
+
+	spin_lock(&v2m->msi_cnt_lock);
+	offset = find_first_zero_bit(v2m->bm, v2m->nr_spis);
+	if (offset < v2m->nr_spis)
+		__set_bit(offset, v2m->bm);
+	else
+		err = -ENOSPC;
+	spin_unlock(&v2m->msi_cnt_lock);
+
+	if (err)
+		return err;
+
+	hwirq = v2m->spi_start + offset;
+
+	err = gicv2m_irq_gic_domain_alloc(domain, virq, hwirq);
+	if (err) {
+		gicv2m_unalloc_msi(v2m, hwirq);
+		return err;
+	}
+
+	irq_domain_set_hwirq_and_chip(domain, virq, hwirq,
+				      &gicv2m_irq_chip, v2m);
+
+	return 0;
+}
+
+static void gicv2m_irq_domain_free(struct irq_domain *domain,
+				   unsigned int virq, unsigned int nr_irqs)
+{
+	struct irq_data *d = irq_domain_get_irq_data(domain, virq);
+	struct v2m_data *v2m = irq_data_get_irq_chip_data(d);
+
+	BUG_ON(nr_irqs != 1);
+	gicv2m_unalloc_msi(v2m, d->hwirq);
+	irq_domain_free_irqs_parent(domain, virq, nr_irqs);
+}
+
+static const struct irq_domain_ops gicv2m_domain_ops = {
+	.alloc			= gicv2m_irq_domain_alloc,
+	.free			= gicv2m_irq_domain_free,
+};
+
+static bool is_msi_spi_valid(u32 base, u32 num)
+{
+	if (base < V2M_MIN_SPI) {
+		pr_err("Invalid MSI base SPI (base:%u)\n", base);
+		return false;
+	}
+
+	if ((num == 0) || (base + num > V2M_MAX_SPI)) {
+		pr_err("Number of SPIs (%u) exceed maximum (%u)\n",
+		       num, V2M_MAX_SPI - V2M_MIN_SPI + 1);
+		return false;
+	}
+
+	return true;
+}
+
+static int __init gicv2m_init_one(struct device_node *node,
+				  struct irq_domain *parent)
+{
+	int ret;
+	struct v2m_data *v2m;
+
+	v2m = kzalloc(sizeof(struct v2m_data), GFP_KERNEL);
+	if (!v2m) {
+		pr_err("Failed to allocate struct v2m_data.\n");
+		return -ENOMEM;
+	}
+
+	ret = of_address_to_resource(node, 0, &v2m->res);
+	if (ret) {
+		pr_err("Failed to allocate v2m resource.\n");
+		goto err_free_v2m;
+	}
+
+	v2m->base = ioremap(v2m->res.start, resource_size(&v2m->res));
+	if (!v2m->base) {
+		pr_err("Failed to map GICv2m resource\n");
+		ret = -ENOMEM;
+		goto err_free_v2m;
+	}
+
+	if (!of_property_read_u32(node, "arm,msi-base-spi", &v2m->spi_start) &&
+	    !of_property_read_u32(node, "arm,msi-num-spis", &v2m->nr_spis)) {
+		pr_info("Overriding V2M MSI_TYPER (base:%u, num:%u)\n",
+			v2m->spi_start, v2m->nr_spis);
+	} else {
+		u32 typer = readl_relaxed(v2m->base + V2M_MSI_TYPER);
+
+		v2m->spi_start = V2M_MSI_TYPER_BASE_SPI(typer);
+		v2m->nr_spis = V2M_MSI_TYPER_NUM_SPI(typer);
+	}
+
+	if (!is_msi_spi_valid(v2m->spi_start, v2m->nr_spis)) {
+		ret = -EINVAL;
+		goto err_iounmap;
+	}
+
+	v2m->bm = kzalloc(sizeof(long) * BITS_TO_LONGS(v2m->nr_spis),
+			  GFP_KERNEL);
+	if (!v2m->bm) {
+		ret = -ENOMEM;
+		goto err_iounmap;
+	}
+
+	v2m->domain = irq_domain_add_tree(NULL, &gicv2m_domain_ops, v2m);
+	if (!v2m->domain) {
+		pr_err("Failed to create GICv2m domain\n");
+		ret = -ENOMEM;
+		goto err_free_bm;
+	}
+
+	v2m->domain->parent = parent;
+	v2m->mchip.of_node = node;
+	v2m->mchip.domain = pci_msi_create_irq_domain(node,
+						      &gicv2m_msi_domain_info,
+						      v2m->domain);
+	if (!v2m->mchip.domain) {
+		pr_err("Failed to create MSI domain\n");
+		ret = -ENOMEM;
+		goto err_free_domains;
+	}
+
+	spin_lock_init(&v2m->msi_cnt_lock);
+
+	ret = of_pci_msi_chip_add(&v2m->mchip);
+	if (ret) {
+		pr_err("Failed to add msi_chip.\n");
+		goto err_free_domains;
+	}
+
+	pr_info("Node %s: range[%#lx:%#lx], SPI[%d:%d]\n", node->name,
+		(unsigned long)v2m->res.start, (unsigned long)v2m->res.end,
+		v2m->spi_start, (v2m->spi_start + v2m->nr_spis));
+
+	return 0;
+
+err_free_domains:
+	if (v2m->mchip.domain)
+		irq_domain_remove(v2m->mchip.domain);
+	if (v2m->domain)
+		irq_domain_remove(v2m->domain);
+err_free_bm:
+	kfree(v2m->bm);
+err_iounmap:
+	iounmap(v2m->base);
+err_free_v2m:
+	kfree(v2m);
+	return ret;
+}
+
+static struct of_device_id gicv2m_device_id[] = {
+	{	.compatible	= "arm,gic-v2m-frame",	},
+	{},
+};
+
+int __init gicv2m_of_init(struct device_node *node, struct irq_domain *parent)
+{
+	int ret = 0;
+	struct device_node *child;
+
+	for (child = of_find_matching_node(node, gicv2m_device_id); child;
+	     child = of_find_matching_node(child, gicv2m_device_id)) {
+		if (!of_find_property(child, "msi-controller", NULL))
+			continue;
+
+		ret = gicv2m_init_one(child, parent);
+		if (ret) {
+			of_node_put(node);
+			break;
+		}
+	}
+
+	return ret;
+}
diff --git a/drivers/irqchip/irq-gic.c b/drivers/irqchip/irq-gic.c
index ab6069b09c94..5a71be79786d 100644
--- a/drivers/irqchip/irq-gic.c
+++ b/drivers/irqchip/irq-gic.c
@@ -1066,6 +1066,10 @@ gic_of_init(struct device_node *node, struct device_node *parent)
 		irq = irq_of_parse_and_map(node, 0);
 		gic_cascade_irq(gic_cnt, irq);
 	}
+
+	if (IS_ENABLED(CONFIG_ARM_GIC_V2M))
+		gicv2m_of_init(node, gic_data[gic_cnt].domain);
+
 	gic_cnt++;
 	return 0;
 }
diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 13eed92c7d24..60b09ed58cae 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -106,6 +106,8 @@ static inline void gic_init(unsigned int nr, int start,
 	gic_init_bases(nr, start, dist, cpu, 0, NULL);
 }
 
+int gicv2m_of_init(struct device_node *node, struct irq_domain *parent);
+
 void gic_send_sgi(unsigned int cpu_id, unsigned int irq);
 int gic_get_cpu_id(unsigned int cpu);
 void gic_migrate_target(unsigned int new_cpu_id);
-- 
cgit v1.2.3


From 947c88592f17bd299ff677049c3cda36cc6f93dd Mon Sep 17 00:00:00 2001
From: Peter Chen <peter.chen@freescale.com>
Date: Wed, 26 Nov 2014 13:44:31 +0800
Subject: usb: chipidea: remove flag CI_HDRC_REQUIRE_TRANSCEIVER

Now, USB PHY is mandatory for chipidea core, the flag
CI_HDRC_REQUIRE_TRANSCEIVER is useless.

Signed-off-by: Peter Chen <peter.chen@freescale.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/usb/chipidea/ci_hdrc_imx.c | 3 +--
 drivers/usb/chipidea/ci_hdrc_msm.c | 1 -
 include/linux/usb/chipidea.h       | 1 -
 3 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/chipidea/ci_hdrc_imx.c b/drivers/usb/chipidea/ci_hdrc_imx.c
index 75421c2187f5..450a168974d5 100644
--- a/drivers/usb/chipidea/ci_hdrc_imx.c
+++ b/drivers/usb/chipidea/ci_hdrc_imx.c
@@ -106,8 +106,7 @@ static int ci_hdrc_imx_probe(struct platform_device *pdev)
 	struct ci_hdrc_platform_data pdata = {
 		.name		= dev_name(&pdev->dev),
 		.capoffset	= DEF_CAPOFFSET,
-		.flags		= CI_HDRC_REQUIRE_TRANSCEIVER |
-				  CI_HDRC_DISABLE_STREAMING,
+		.flags		= CI_HDRC_DISABLE_STREAMING,
 	};
 	int ret;
 	const struct of_device_id *of_id =
diff --git a/drivers/usb/chipidea/ci_hdrc_msm.c b/drivers/usb/chipidea/ci_hdrc_msm.c
index 3edf969ed797..d79ecc08a1be 100644
--- a/drivers/usb/chipidea/ci_hdrc_msm.c
+++ b/drivers/usb/chipidea/ci_hdrc_msm.c
@@ -46,7 +46,6 @@ static struct ci_hdrc_platform_data ci_hdrc_msm_platdata = {
 	.name			= "ci_hdrc_msm",
 	.capoffset		= DEF_CAPOFFSET,
 	.flags			= CI_HDRC_REGS_SHARED |
-				  CI_HDRC_REQUIRE_TRANSCEIVER |
 				  CI_HDRC_DISABLE_STREAMING,
 
 	.notify_event		= ci_hdrc_msm_notify_event,
diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h
index c01bf4ea27b9..535997a6681b 100644
--- a/include/linux/usb/chipidea.h
+++ b/include/linux/usb/chipidea.h
@@ -19,7 +19,6 @@ struct ci_hdrc_platform_data {
 	enum usb_phy_interface phy_mode;
 	unsigned long	 flags;
 #define CI_HDRC_REGS_SHARED		BIT(0)
-#define CI_HDRC_REQUIRE_TRANSCEIVER	BIT(1)
 #define CI_HDRC_DISABLE_STREAMING	BIT(3)
 	/*
 	 * Only set it when DCCPARAMS.DC==1 and DCCPARAMS.HC==1,
-- 
cgit v1.2.3


From e5262d0568dc9e10de79a726dfd7edb712a2c10b Mon Sep 17 00:00:00 2001
From: Weike Chen <alvin.chen@intel.com>
Date: Wed, 26 Nov 2014 02:35:10 -0800
Subject: spi: spi-pxa2xx: SPI support for Intel Quark X1000

There are two SPI controllers exported by PCI subsystem for Intel Quark X1000.
The SPI memory mapped I/O registers supported by Quark are different from
the current implementation, and Quark only supports the registers of 'SSCR0',
'SSCR1', 'SSSR', 'SSDR', and 'DDS_RATE'. This patch is to enable the SPI for
Intel Quark X1000.

This piece of work is derived from Dan O'Donovan's initial work for Intel Quark
X1000 SPI enabling.

Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Mika Westerberg <mika.westerberg@linux.intel.com>
Signed-off-by: Weike Chen <alvin.chen@intel.com>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/spi/spi-pxa2xx-pci.c |   8 ++
 drivers/spi/spi-pxa2xx.c     | 197 +++++++++++++++++++++++++++++++++++++++----
 drivers/spi/spi-pxa2xx.h     |  16 ++--
 include/linux/pxa2xx_ssp.h   |  20 +++++
 4 files changed, 219 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/spi/spi-pxa2xx-pci.c b/drivers/spi/spi-pxa2xx-pci.c
index 6beee8ce2d68..fa7399e84bbb 100644
--- a/drivers/spi/spi-pxa2xx-pci.c
+++ b/drivers/spi/spi-pxa2xx-pci.c
@@ -19,6 +19,7 @@ enum {
 	PORT_BSW0,
 	PORT_BSW1,
 	PORT_BSW2,
+	PORT_QUARK_X1000,
 };
 
 struct pxa_spi_info {
@@ -92,6 +93,12 @@ static struct pxa_spi_info spi_info_configs[] = {
 		.tx_param = &bsw2_tx_param,
 		.rx_param = &bsw2_rx_param,
 	},
+	[PORT_QUARK_X1000] = {
+		.type = QUARK_X1000_SSP,
+		.port_id = -1,
+		.num_chipselect = 1,
+		.max_clk_rate = 50000000,
+	},
 };
 
 static int pxa2xx_spi_pci_probe(struct pci_dev *dev,
@@ -191,6 +198,7 @@ static void pxa2xx_spi_pci_remove(struct pci_dev *dev)
 
 static const struct pci_device_id pxa2xx_spi_pci_devices[] = {
 	{ PCI_VDEVICE(INTEL, 0x2e6a), PORT_CE4100 },
+	{ PCI_VDEVICE(INTEL, 0x0935), PORT_QUARK_X1000 },
 	{ PCI_VDEVICE(INTEL, 0x0f0e), PORT_BYT },
 	{ PCI_VDEVICE(INTEL, 0x228e), PORT_BSW0 },
 	{ PCI_VDEVICE(INTEL, 0x2290), PORT_BSW1 },
diff --git a/drivers/spi/spi-pxa2xx.c b/drivers/spi/spi-pxa2xx.c
index d4d29c594156..1a1df5092aca 100644
--- a/drivers/spi/spi-pxa2xx.c
+++ b/drivers/spi/spi-pxa2xx.c
@@ -63,10 +63,64 @@ MODULE_ALIAS("platform:pxa2xx-spi");
 				| SSCR1_RFT | SSCR1_TFT | SSCR1_MWDS \
 				| SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
 
+#define QUARK_X1000_SSCR1_CHANGE_MASK (QUARK_X1000_SSCR1_STRF	\
+				| QUARK_X1000_SSCR1_EFWR	\
+				| QUARK_X1000_SSCR1_RFT		\
+				| QUARK_X1000_SSCR1_TFT		\
+				| SSCR1_SPH | SSCR1_SPO | SSCR1_LBM)
+
 #define LPSS_RX_THRESH_DFLT	64
 #define LPSS_TX_LOTHRESH_DFLT	160
 #define LPSS_TX_HITHRESH_DFLT	224
 
+struct quark_spi_rate {
+	u32 bitrate;
+	u32 dds_clk_rate;
+	u32 clk_div;
+};
+
+/*
+ * 'rate', 'dds', 'clk_div' lookup table, which is defined in
+ * the Quark SPI datasheet.
+ */
+static const struct quark_spi_rate quark_spi_rate_table[] = {
+/*	bitrate,	dds_clk_rate,	clk_div */
+	{50000000,	0x800000,	0},
+	{40000000,	0x666666,	0},
+	{25000000,	0x400000,	0},
+	{20000000,	0x666666,	1},
+	{16667000,	0x800000,	2},
+	{13333000,	0x666666,	2},
+	{12500000,	0x200000,	0},
+	{10000000,	0x800000,	4},
+	{8000000,	0x666666,	4},
+	{6250000,	0x400000,	3},
+	{5000000,	0x400000,	4},
+	{4000000,	0x666666,	9},
+	{3125000,	0x80000,	0},
+	{2500000,	0x400000,	9},
+	{2000000,	0x666666,	19},
+	{1563000,	0x40000,	0},
+	{1250000,	0x200000,	9},
+	{1000000,	0x400000,	24},
+	{800000,	0x666666,	49},
+	{781250,	0x20000,	0},
+	{625000,	0x200000,	19},
+	{500000,	0x400000,	49},
+	{400000,	0x666666,	99},
+	{390625,	0x10000,	0},
+	{250000,	0x400000,	99},
+	{200000,	0x666666,	199},
+	{195313,	0x8000,		0},
+	{125000,	0x100000,	49},
+	{100000,	0x200000,	124},
+	{50000,		0x100000,	124},
+	{25000,		0x80000,	124},
+	{10016,		0x20000,	77},
+	{5040,		0x20000,	154},
+	{1002,		0x8000,		194},
+};
+
 /* Offset from drv_data->lpss_base */
 #define GENERAL_REG		0x08
 #define GENERAL_REG_RXTO_HOLDOFF_DISABLE BIT(24)
@@ -80,9 +134,16 @@ static bool is_lpss_ssp(const struct driver_data *drv_data)
 	return drv_data->ssp_type == LPSS_SSP;
 }
 
+static bool is_quark_x1000_ssp(const struct driver_data *drv_data)
+{
+	return drv_data->ssp_type == QUARK_X1000_SSP;
+}
+
 static u32 pxa2xx_spi_get_ssrc1_change_mask(const struct driver_data *drv_data)
 {
 	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		return QUARK_X1000_SSCR1_CHANGE_MASK;
 	default:
 		return SSCR1_CHANGE_MASK;
 	}
@@ -92,6 +153,8 @@ static u32
 pxa2xx_spi_get_rx_default_thre(const struct driver_data *drv_data)
 {
 	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		return RX_THRESH_QUARK_X1000_DFLT;
 	default:
 		return RX_THRESH_DFLT;
 	}
@@ -103,6 +166,9 @@ static bool pxa2xx_spi_txfifo_full(const struct driver_data *drv_data)
 	u32 mask;
 
 	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		mask = QUARK_X1000_SSSR_TFL_MASK;
+		break;
 	default:
 		mask = SSSR_TFL_MASK;
 		break;
@@ -117,6 +183,9 @@ static void pxa2xx_spi_clear_rx_thre(const struct driver_data *drv_data,
 	u32 mask;
 
 	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		mask = QUARK_X1000_SSCR1_RFT;
+		break;
 	default:
 		mask = SSCR1_RFT;
 		break;
@@ -128,6 +197,9 @@ static void pxa2xx_spi_set_rx_thre(const struct driver_data *drv_data,
 				   u32 *sccr1_reg, u32 threshold)
 {
 	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		*sccr1_reg |= QUARK_X1000_SSCR1_RxTresh(threshold);
+		break;
 	default:
 		*sccr1_reg |= SSCR1_RxTresh(threshold);
 		break;
@@ -138,6 +210,11 @@ static u32 pxa2xx_configure_sscr0(const struct driver_data *drv_data,
 				  u32 clk_div, u8 bits)
 {
 	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		return clk_div
+			| QUARK_X1000_SSCR0_Motorola
+			| QUARK_X1000_SSCR0_DataSize(bits > 32 ? 8 : bits)
+			| SSCR0_SSE;
 	default:
 		return clk_div
 			| SSCR0_Motorola
@@ -654,6 +731,28 @@ static irqreturn_t ssp_int(int irq, void *dev_id)
 	return drv_data->transfer_handler(drv_data);
 }
 
+/*
+ * The Quark SPI data sheet gives a table, and for the given 'rate',
+ * the 'dds' and 'clk_div' can be found in the table.
+ */
+static u32 quark_x1000_set_clk_regvals(u32 rate, u32 *dds, u32 *clk_div)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(quark_spi_rate_table); i++) {
+		if (rate >= quark_spi_rate_table[i].bitrate) {
+			*dds = quark_spi_rate_table[i].dds_clk_rate;
+			*clk_div = quark_spi_rate_table[i].clk_div;
+			return quark_spi_rate_table[i].bitrate;
+		}
+	}
+
+	*dds = quark_spi_rate_table[i-1].dds_clk_rate;
+	*clk_div = quark_spi_rate_table[i-1].clk_div;
+
+	return quark_spi_rate_table[i-1].bitrate;
+}
+
 static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 {
 	unsigned long ssp_clk = drv_data->max_clk_rate;
@@ -667,6 +766,20 @@ static unsigned int ssp_get_clk_div(struct driver_data *drv_data, int rate)
 		return ((ssp_clk / rate - 1) & 0xfff) << 8;
 }
 
+static unsigned int pxa2xx_ssp_get_clk_div(struct driver_data *drv_data,
+					   struct chip_data *chip, int rate)
+{
+	u32 clk_div;
+
+	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		quark_x1000_set_clk_regvals(rate, &chip->dds_rate, &clk_div);
+		return clk_div << 8;
+	default:
+		return ssp_get_clk_div(drv_data, rate);
+	}
+}
+
 static void pump_transfers(unsigned long data)
 {
 	struct driver_data *drv_data = (struct driver_data *)data;
@@ -769,7 +882,7 @@ static void pump_transfers(unsigned long data)
 		if (transfer->bits_per_word)
 			bits = transfer->bits_per_word;
 
-		clk_div = ssp_get_clk_div(drv_data, speed);
+		clk_div = pxa2xx_ssp_get_clk_div(drv_data, chip, speed);
 
 		if (bits <= 8) {
 			drv_data->n_bytes = 1;
@@ -837,6 +950,10 @@ static void pump_transfers(unsigned long data)
 			write_SSITF(chip->lpss_tx_threshold, reg);
 	}
 
+	if (is_quark_x1000_ssp(drv_data) &&
+	    (read_DDS_RATE(reg) != chip->dds_rate))
+		write_DDS_RATE(chip->dds_rate, reg);
+
 	/* see if we need to reload the config registers */
 	if ((read_SSCR0(reg) != cr0) ||
 	    (read_SSCR1(reg) & change_mask) != (cr1 & change_mask)) {
@@ -940,14 +1057,22 @@ static int setup(struct spi_device *spi)
 	unsigned int clk_div;
 	uint tx_thres, tx_hi_thres, rx_thres;
 
-	if (is_lpss_ssp(drv_data)) {
+	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		tx_thres = TX_THRESH_QUARK_X1000_DFLT;
+		tx_hi_thres = 0;
+		rx_thres = RX_THRESH_QUARK_X1000_DFLT;
+		break;
+	case LPSS_SSP:
 		tx_thres = LPSS_TX_LOTHRESH_DFLT;
 		tx_hi_thres = LPSS_TX_HITHRESH_DFLT;
 		rx_thres = LPSS_RX_THRESH_DFLT;
-	} else {
+		break;
+	default:
 		tx_thres = TX_THRESH_DFLT;
 		tx_hi_thres = 0;
 		rx_thres = RX_THRESH_DFLT;
+		break;
 	}
 
 	/* Only alloc on first setup */
@@ -1000,9 +1125,6 @@ static int setup(struct spi_device *spi)
 		chip->enable_dma = drv_data->master_info->enable_dma;
 	}
 
-	chip->threshold = (SSCR1_RxTresh(rx_thres) & SSCR1_RFT) |
-			(SSCR1_TxTresh(tx_thres) & SSCR1_TFT);
-
 	chip->lpss_rx_threshold = SSIRF_RxThresh(rx_thres);
 	chip->lpss_tx_threshold = SSITF_TxLoThresh(tx_thres)
 				| SSITF_TxHiThresh(tx_hi_thres);
@@ -1021,11 +1143,24 @@ static int setup(struct spi_device *spi)
 		}
 	}
 
-	clk_div = ssp_get_clk_div(drv_data, spi->max_speed_hz);
+	clk_div = pxa2xx_ssp_get_clk_div(drv_data, chip, spi->max_speed_hz);
 	chip->speed_hz = spi->max_speed_hz;
 
 	chip->cr0 = pxa2xx_configure_sscr0(drv_data, clk_div,
 					   spi->bits_per_word);
+	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		chip->threshold = (QUARK_X1000_SSCR1_RxTresh(rx_thres)
+				   & QUARK_X1000_SSCR1_RFT)
+				   | (QUARK_X1000_SSCR1_TxTresh(tx_thres)
+				   & QUARK_X1000_SSCR1_TFT);
+		break;
+	default:
+		chip->threshold = (SSCR1_RxTresh(rx_thres) & SSCR1_RFT) |
+			(SSCR1_TxTresh(tx_thres) & SSCR1_TFT);
+		break;
+	}
+
 	chip->cr1 &= ~(SSCR1_SPO | SSCR1_SPH);
 	chip->cr1 |= (((spi->mode & SPI_CPHA) != 0) ? SSCR1_SPH : 0)
 			| (((spi->mode & SPI_CPOL) != 0) ? SSCR1_SPO : 0);
@@ -1054,7 +1189,8 @@ static int setup(struct spi_device *spi)
 		chip->read = u16_reader;
 		chip->write = u16_writer;
 	} else if (spi->bits_per_word <= 32) {
-		chip->cr0 |= SSCR0_EDSS;
+		if (!is_quark_x1000_ssp(drv_data))
+			chip->cr0 |= SSCR0_EDSS;
 		chip->n_bytes = 4;
 		chip->read = u32_reader;
 		chip->write = u32_writer;
@@ -1205,7 +1341,15 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 	drv_data->ioaddr = ssp->mmio_base;
 	drv_data->ssdr_physical = ssp->phys_base + SSDR;
 	if (pxa25x_ssp_comp(drv_data)) {
-		master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+		switch (drv_data->ssp_type) {
+		case QUARK_X1000_SSP:
+			master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 32);
+			break;
+		default:
+			master->bits_per_word_mask = SPI_BPW_RANGE_MASK(4, 16);
+			break;
+		}
+
 		drv_data->int_cr1 = SSCR1_TIE | SSCR1_RIE;
 		drv_data->dma_cr1 = 0;
 		drv_data->clear_sr = SSSR_ROR;
@@ -1243,16 +1387,35 @@ static int pxa2xx_spi_probe(struct platform_device *pdev)
 
 	/* Load default SSP configuration */
 	write_SSCR0(0, drv_data->ioaddr);
-	write_SSCR1(SSCR1_RxTresh(RX_THRESH_DFLT) |
-				SSCR1_TxTresh(TX_THRESH_DFLT),
-				drv_data->ioaddr);
-	write_SSCR0(SSCR0_SCR(2)
-			| SSCR0_Motorola
-			| SSCR0_DataSize(8),
-			drv_data->ioaddr);
+	switch (drv_data->ssp_type) {
+	case QUARK_X1000_SSP:
+		write_SSCR1(QUARK_X1000_SSCR1_RxTresh(
+					RX_THRESH_QUARK_X1000_DFLT) |
+			    QUARK_X1000_SSCR1_TxTresh(
+					TX_THRESH_QUARK_X1000_DFLT),
+			    drv_data->ioaddr);
+
+		/* using the Motorola SPI protocol and use 8 bit frame */
+		write_SSCR0(QUARK_X1000_SSCR0_Motorola
+			    | QUARK_X1000_SSCR0_DataSize(8),
+			    drv_data->ioaddr);
+		break;
+	default:
+		write_SSCR1(SSCR1_RxTresh(RX_THRESH_DFLT) |
+			    SSCR1_TxTresh(TX_THRESH_DFLT),
+			    drv_data->ioaddr);
+		write_SSCR0(SSCR0_SCR(2)
+			    | SSCR0_Motorola
+			    | SSCR0_DataSize(8),
+			    drv_data->ioaddr);
+		break;
+	}
+
 	if (!pxa25x_ssp_comp(drv_data))
 		write_SSTO(0, drv_data->ioaddr);
-	write_SSPSP(0, drv_data->ioaddr);
+
+	if (!is_quark_x1000_ssp(drv_data))
+		write_SSPSP(0, drv_data->ioaddr);
 
 	lpss_ssp_setup(drv_data);
 
diff --git a/drivers/spi/spi-pxa2xx.h b/drivers/spi/spi-pxa2xx.h
index 5adc2a11c7bc..6bec59c90cd4 100644
--- a/drivers/spi/spi-pxa2xx.h
+++ b/drivers/spi/spi-pxa2xx.h
@@ -93,6 +93,7 @@ struct driver_data {
 struct chip_data {
 	u32 cr0;
 	u32 cr1;
+	u32 dds_rate;
 	u32 psp;
 	u32 timeout;
 	u8 n_bytes;
@@ -126,6 +127,7 @@ DEFINE_SSP_REG(SSCR1, 0x04)
 DEFINE_SSP_REG(SSSR, 0x08)
 DEFINE_SSP_REG(SSITR, 0x0c)
 DEFINE_SSP_REG(SSDR, 0x10)
+DEFINE_SSP_REG(DDS_RATE, 0x28)  /* DDS Clock Rate */
 DEFINE_SSP_REG(SSTO, 0x28)
 DEFINE_SSP_REG(SSPSP, 0x2c)
 DEFINE_SSP_REG(SSITF, SSITF)
@@ -141,18 +143,22 @@ DEFINE_SSP_REG(SSIRF, SSIRF)
 
 static inline int pxa25x_ssp_comp(struct driver_data *drv_data)
 {
-	if (drv_data->ssp_type == PXA25x_SSP)
+	switch (drv_data->ssp_type) {
+	case PXA25x_SSP:
+	case CE4100_SSP:
+	case QUARK_X1000_SSP:
 		return 1;
-	if (drv_data->ssp_type == CE4100_SSP)
-		return 1;
-	return 0;
+	default:
+		return 0;
+	}
 }
 
 static inline void write_SSSR_CS(struct driver_data *drv_data, u32 val)
 {
 	void __iomem *reg = drv_data->ioaddr;
 
-	if (drv_data->ssp_type == CE4100_SSP)
+	if (drv_data->ssp_type == CE4100_SSP ||
+	    drv_data->ssp_type == QUARK_X1000_SSP)
 		val |= read_SSSR(reg) & SSSR_ALT_FRM_MASK;
 
 	write_SSSR(val, reg);
diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h
index f2b405116166..77aed9ea1d26 100644
--- a/include/linux/pxa2xx_ssp.h
+++ b/include/linux/pxa2xx_ssp.h
@@ -108,6 +108,25 @@
 #define SSCR1_RxTresh(x) (((x) - 1) << 10) /* level [1..4] */
 #endif
 
+/* QUARK_X1000 SSCR0 bit definition */
+#define QUARK_X1000_SSCR0_DSS	(0x1F)		/* Data Size Select (mask) */
+#define QUARK_X1000_SSCR0_DataSize(x)  ((x) - 1)	/* Data Size Select [4..32] */
+#define QUARK_X1000_SSCR0_FRF	(0x3 << 5)	/* FRame Format (mask) */
+#define QUARK_X1000_SSCR0_Motorola	(0x0 << 5)	/* Motorola's Serial Peripheral Interface (SPI) */
+
+#define RX_THRESH_QUARK_X1000_DFLT	1
+#define TX_THRESH_QUARK_X1000_DFLT	16
+
+#define QUARK_X1000_SSSR_TFL_MASK	(0x1F << 8)	/* Transmit FIFO Level mask */
+#define QUARK_X1000_SSSR_RFL_MASK	(0x1F << 13)	/* Receive FIFO Level mask */
+
+#define QUARK_X1000_SSCR1_TFT	(0x1F << 6)	/* Transmit FIFO Threshold (mask) */
+#define QUARK_X1000_SSCR1_TxTresh(x) (((x) - 1) << 6)	/* level [1..32] */
+#define QUARK_X1000_SSCR1_RFT	(0x1F << 11)	/* Receive FIFO Threshold (mask) */
+#define QUARK_X1000_SSCR1_RxTresh(x) (((x) - 1) << 11)	/* level [1..32] */
+#define QUARK_X1000_SSCR1_STRF       (1 << 17)		/* Select FIFO or EFWR */
+#define QUARK_X1000_SSCR1_EFWR	(1 << 16)		/* Enable FIFO Write/Read */
+
 /* extra bits in PXA255, PXA26x and PXA27x SSP ports */
 #define SSCR0_TISSP		(1 << 4)	/* TI Sync Serial Protocol */
 #define SSCR0_PSP		(3 << 4)	/* PSP - Programmable Serial Protocol */
@@ -175,6 +194,7 @@ enum pxa_ssp_type {
 	PXA910_SSP,
 	CE4100_SSP,
 	LPSS_SSP,
+	QUARK_X1000_SSP,
 };
 
 struct ssp_device {
-- 
cgit v1.2.3


From 2378ad1228d2cdae0ea5448beb4fb2b42ebaf99c Mon Sep 17 00:00:00 2001
From: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
Date: Wed, 17 Sep 2014 02:07:52 +0300
Subject: drm: rcar-du: Remove platform data support

All platforms now instantiate the DU through DT, platform data support
isn't needed anymore.

Signed-off-by: Laurent Pinchart <laurent.pinchart+renesas@ideasonboard.com>
---
 drivers/gpu/drm/rcar-du/rcar_du_crtc.h    | 10 ++++-
 drivers/gpu/drm/rcar-du/rcar_du_drv.c     |  4 +-
 drivers/gpu/drm/rcar-du/rcar_du_drv.h     |  2 -
 drivers/gpu/drm/rcar-du/rcar_du_encoder.c |  8 +---
 drivers/gpu/drm/rcar-du/rcar_du_encoder.h | 10 +++--
 drivers/gpu/drm/rcar-du/rcar_du_kms.c     | 52 +++-------------------
 drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c | 26 +++++------
 drivers/gpu/drm/rcar-du/rcar_du_lvdscon.h |  2 -
 drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h |  1 -
 include/linux/platform_data/rcar-du.h     | 74 -------------------------------
 10 files changed, 38 insertions(+), 151 deletions(-)
 delete mode 100644 include/linux/platform_data/rcar-du.h

(limited to 'include/linux')

diff --git a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
index e97ae502dec5..984e6083699f 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_crtc.h
@@ -15,7 +15,6 @@
 #define __RCAR_DU_CRTC_H__
 
 #include <linux/mutex.h>
-#include <linux/platform_data/rcar-du.h>
 
 #include <drm/drmP.h>
 #include <drm/drm_crtc.h>
@@ -41,6 +40,15 @@ struct rcar_du_crtc {
 
 #define to_rcar_crtc(c)	container_of(c, struct rcar_du_crtc, crtc)
 
+enum rcar_du_output {
+	RCAR_DU_OUTPUT_DPAD0,
+	RCAR_DU_OUTPUT_DPAD1,
+	RCAR_DU_OUTPUT_LVDS0,
+	RCAR_DU_OUTPUT_LVDS1,
+	RCAR_DU_OUTPUT_TCON,
+	RCAR_DU_OUTPUT_MAX,
+};
+
 int rcar_du_crtc_create(struct rcar_du_group *rgrp, unsigned int index);
 void rcar_du_crtc_enable_vblank(struct rcar_du_crtc *rcrtc, bool enable);
 void rcar_du_crtc_cancel_page_flip(struct rcar_du_crtc *rcrtc,
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.c b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
index d212efa6a495..967ae8f20233 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.c
@@ -146,12 +146,11 @@ static int rcar_du_load(struct drm_device *dev, unsigned long flags)
 {
 	struct platform_device *pdev = dev->platformdev;
 	struct device_node *np = pdev->dev.of_node;
-	struct rcar_du_platform_data *pdata = pdev->dev.platform_data;
 	struct rcar_du_device *rcdu;
 	struct resource *mem;
 	int ret;
 
-	if (pdata == NULL && np == NULL) {
+	if (np == NULL) {
 		dev_err(dev->dev, "no platform data\n");
 		return -ENODEV;
 	}
@@ -163,7 +162,6 @@ static int rcar_du_load(struct drm_device *dev, unsigned long flags)
 	}
 
 	rcdu->dev = &pdev->dev;
-	rcdu->pdata = pdata;
 	rcdu->info = np ? of_match_device(rcar_du_of_table, rcdu->dev)->data
 		   : (void *)platform_get_device_id(pdev)->driver_data;
 	rcdu->ddev = dev;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_drv.h b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
index 8e494633c3b3..0a724669f02d 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_drv.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_drv.h
@@ -15,7 +15,6 @@
 #define __RCAR_DU_DRV_H__
 
 #include <linux/kernel.h>
-#include <linux/platform_data/rcar-du.h>
 
 #include "rcar_du_crtc.h"
 #include "rcar_du_group.h"
@@ -67,7 +66,6 @@ struct rcar_du_device_info {
 
 struct rcar_du_device {
 	struct device *dev;
-	const struct rcar_du_platform_data *pdata;
 	const struct rcar_du_device_info *info;
 
 	void __iomem *mmio;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
index 7c0ec95915ef..89ed4a929f7b 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.c
@@ -142,7 +142,6 @@ static const struct drm_encoder_funcs encoder_funcs = {
 int rcar_du_encoder_init(struct rcar_du_device *rcdu,
 			 enum rcar_du_encoder_type type,
 			 enum rcar_du_output output,
-			 const struct rcar_du_encoder_data *data,
 			 struct device_node *np)
 {
 	struct rcar_du_encoder *renc;
@@ -190,11 +189,8 @@ int rcar_du_encoder_init(struct rcar_du_device *rcdu,
 	drm_encoder_helper_add(&renc->encoder, &encoder_helper_funcs);
 
 	switch (encoder_type) {
-	case DRM_MODE_ENCODER_LVDS: {
-		const struct rcar_du_panel_data *pdata =
-			data ? &data->connector.lvds.panel : NULL;
-		return rcar_du_lvds_connector_init(rcdu, renc, pdata, np);
-	}
+	case DRM_MODE_ENCODER_LVDS:
+		return rcar_du_lvds_connector_init(rcdu, renc, np);
 
 	case DRM_MODE_ENCODER_DAC:
 		return rcar_du_vga_connector_init(rcdu, renc);
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
index bd624135ef1f..c37ed9499542 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_encoder.h
@@ -14,13 +14,18 @@
 #ifndef __RCAR_DU_ENCODER_H__
 #define __RCAR_DU_ENCODER_H__
 
-#include <linux/platform_data/rcar-du.h>
-
 #include <drm/drm_crtc.h>
 
 struct rcar_du_device;
 struct rcar_du_lvdsenc;
 
+enum rcar_du_encoder_type {
+	RCAR_DU_ENCODER_UNUSED = 0,
+	RCAR_DU_ENCODER_NONE,
+	RCAR_DU_ENCODER_VGA,
+	RCAR_DU_ENCODER_LVDS,
+};
+
 struct rcar_du_encoder {
 	struct drm_encoder encoder;
 	enum rcar_du_output output;
@@ -44,7 +49,6 @@ rcar_du_connector_best_encoder(struct drm_connector *connector);
 int rcar_du_encoder_init(struct rcar_du_device *rcdu,
 			 enum rcar_du_encoder_type type,
 			 enum rcar_du_output output,
-			 const struct rcar_du_encoder_data *data,
 			 struct device_node *np);
 
 #endif /* __RCAR_DU_ENCODER_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_kms.c b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
index 6289e3797bc5..704827269a2e 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_kms.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_kms.c
@@ -190,43 +190,9 @@ static const struct drm_mode_config_funcs rcar_du_mode_config_funcs = {
 	.output_poll_changed = rcar_du_output_poll_changed,
 };
 
-static int rcar_du_encoders_init_pdata(struct rcar_du_device *rcdu)
-{
-	unsigned int num_encoders = 0;
-	unsigned int i;
-	int ret;
-
-	for (i = 0; i < rcdu->pdata->num_encoders; ++i) {
-		const struct rcar_du_encoder_data *pdata =
-			&rcdu->pdata->encoders[i];
-		const struct rcar_du_output_routing *route =
-			&rcdu->info->routes[pdata->output];
-
-		if (pdata->type == RCAR_DU_ENCODER_UNUSED)
-			continue;
-
-		if (pdata->output >= RCAR_DU_OUTPUT_MAX ||
-		    route->possible_crtcs == 0) {
-			dev_warn(rcdu->dev,
-				 "encoder %u references unexisting output %u, skipping\n",
-				 i, pdata->output);
-			continue;
-		}
-
-		ret = rcar_du_encoder_init(rcdu, pdata->type, pdata->output,
-					   pdata, NULL);
-		if (ret < 0)
-			return ret;
-
-		num_encoders++;
-	}
-
-	return num_encoders;
-}
-
-static int rcar_du_encoders_init_dt_one(struct rcar_du_device *rcdu,
-					enum rcar_du_output output,
-					struct of_endpoint *ep)
+static int rcar_du_encoders_init_one(struct rcar_du_device *rcdu,
+				     enum rcar_du_output output,
+				     struct of_endpoint *ep)
 {
 	static const struct {
 		const char *compatible;
@@ -323,14 +289,14 @@ static int rcar_du_encoders_init_dt_one(struct rcar_du_device *rcdu,
 		connector = entity;
 	}
 
-	ret = rcar_du_encoder_init(rcdu, enc_type, output, NULL, connector);
+	ret = rcar_du_encoder_init(rcdu, enc_type, output, connector);
 	of_node_put(encoder);
 	of_node_put(connector);
 
 	return ret < 0 ? ret : 1;
 }
 
-static int rcar_du_encoders_init_dt(struct rcar_du_device *rcdu)
+static int rcar_du_encoders_init(struct rcar_du_device *rcdu)
 {
 	struct device_node *np = rcdu->dev->of_node;
 	struct device_node *prev = NULL;
@@ -377,7 +343,7 @@ static int rcar_du_encoders_init_dt(struct rcar_du_device *rcdu)
 		}
 
 		/* Process the output pipeline. */
-		ret = rcar_du_encoders_init_dt_one(rcdu, output, &ep);
+		ret = rcar_du_encoders_init_one(rcdu, output, &ep);
 		if (ret < 0) {
 			of_node_put(ep_node);
 			return ret;
@@ -442,11 +408,7 @@ int rcar_du_modeset_init(struct rcar_du_device *rcdu)
 	if (ret < 0)
 		return ret;
 
-	if (rcdu->pdata)
-		ret = rcar_du_encoders_init_pdata(rcdu);
-	else
-		ret = rcar_du_encoders_init_dt(rcdu);
-
+	ret = rcar_du_encoders_init(rcdu);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
index 115eed20db12..80dc02166c88 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.c
@@ -27,7 +27,11 @@
 struct rcar_du_lvds_connector {
 	struct rcar_du_connector connector;
 
-	struct rcar_du_panel_data panel;
+	struct {
+		unsigned int width_mm;		/* Panel width in mm */
+		unsigned int height_mm;		/* Panel height in mm */
+		struct videomode mode;
+	} panel;
 };
 
 #define to_rcar_lvds_connector(c) \
@@ -78,31 +82,25 @@ static const struct drm_connector_funcs connector_funcs = {
 
 int rcar_du_lvds_connector_init(struct rcar_du_device *rcdu,
 				struct rcar_du_encoder *renc,
-				const struct rcar_du_panel_data *panel,
 				/* TODO const */ struct device_node *np)
 {
 	struct rcar_du_lvds_connector *lvdscon;
 	struct drm_connector *connector;
+	struct display_timing timing;
 	int ret;
 
 	lvdscon = devm_kzalloc(rcdu->dev, sizeof(*lvdscon), GFP_KERNEL);
 	if (lvdscon == NULL)
 		return -ENOMEM;
 
-	if (panel) {
-		lvdscon->panel = *panel;
-	} else {
-		struct display_timing timing;
-
-		ret = of_get_display_timing(np, "panel-timing", &timing);
-		if (ret < 0)
-			return ret;
+	ret = of_get_display_timing(np, "panel-timing", &timing);
+	if (ret < 0)
+		return ret;
 
-		videomode_from_timing(&timing, &lvdscon->panel.mode);
+	videomode_from_timing(&timing, &lvdscon->panel.mode);
 
-		of_property_read_u32(np, "width-mm", &lvdscon->panel.width_mm);
-		of_property_read_u32(np, "height-mm", &lvdscon->panel.height_mm);
-	}
+	of_property_read_u32(np, "width-mm", &lvdscon->panel.width_mm);
+	of_property_read_u32(np, "height-mm", &lvdscon->panel.height_mm);
 
 	connector = &lvdscon->connector.connector;
 	connector->display_info.width_mm = lvdscon->panel.width_mm;
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.h b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.h
index d11424d537f9..d4881ee0be7e 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdscon.h
@@ -16,11 +16,9 @@
 
 struct rcar_du_device;
 struct rcar_du_encoder;
-struct rcar_du_panel_data;
 
 int rcar_du_lvds_connector_init(struct rcar_du_device *rcdu,
 				struct rcar_du_encoder *renc,
-				const struct rcar_du_panel_data *panel,
 				struct device_node *np);
 
 #endif /* __RCAR_DU_LVDSCON_H__ */
diff --git a/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h
index 3303a55cec79..f65aabda0796 100644
--- a/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h
+++ b/drivers/gpu/drm/rcar-du/rcar_du_lvdsenc.h
@@ -16,7 +16,6 @@
 
 #include <linux/io.h>
 #include <linux/module.h>
-#include <linux/platform_data/rcar-du.h>
 
 struct rcar_drm_crtc;
 struct rcar_du_lvdsenc;
diff --git a/include/linux/platform_data/rcar-du.h b/include/linux/platform_data/rcar-du.h
deleted file mode 100644
index a5f045e1d8fe..000000000000
--- a/include/linux/platform_data/rcar-du.h
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * rcar_du.h  --  R-Car Display Unit DRM driver
- *
- * Copyright (C) 2013 Renesas Corporation
- *
- * Contact: Laurent Pinchart (laurent.pinchart@ideasonboard.com)
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- */
-
-#ifndef __RCAR_DU_H__
-#define __RCAR_DU_H__
-
-#include <video/videomode.h>
-
-enum rcar_du_output {
-	RCAR_DU_OUTPUT_DPAD0,
-	RCAR_DU_OUTPUT_DPAD1,
-	RCAR_DU_OUTPUT_LVDS0,
-	RCAR_DU_OUTPUT_LVDS1,
-	RCAR_DU_OUTPUT_TCON,
-	RCAR_DU_OUTPUT_MAX,
-};
-
-enum rcar_du_encoder_type {
-	RCAR_DU_ENCODER_UNUSED = 0,
-	RCAR_DU_ENCODER_NONE,
-	RCAR_DU_ENCODER_VGA,
-	RCAR_DU_ENCODER_LVDS,
-};
-
-struct rcar_du_panel_data {
-	unsigned int width_mm;		/* Panel width in mm */
-	unsigned int height_mm;		/* Panel height in mm */
-	struct videomode mode;
-};
-
-struct rcar_du_connector_lvds_data {
-	struct rcar_du_panel_data panel;
-};
-
-struct rcar_du_connector_vga_data {
-	/* TODO: Add DDC information for EDID retrieval */
-};
-
-/*
- * struct rcar_du_encoder_data - Encoder platform data
- * @type: the encoder type (RCAR_DU_ENCODER_*)
- * @output: the DU output the connector is connected to (RCAR_DU_OUTPUT_*)
- * @connector.lvds: platform data for LVDS connectors
- * @connector.vga: platform data for VGA connectors
- *
- * Encoder platform data describes an on-board encoder, its associated DU SoC
- * output, and the connector.
- */
-struct rcar_du_encoder_data {
-	enum rcar_du_encoder_type type;
-	enum rcar_du_output output;
-
-	union {
-		struct rcar_du_connector_lvds_data lvds;
-		struct rcar_du_connector_vga_data vga;
-	} connector;
-};
-
-struct rcar_du_platform_data {
-	struct rcar_du_encoder_data *encoders;
-	unsigned int num_encoders;
-};
-
-#endif /* __RCAR_DU_H__ */
-- 
cgit v1.2.3


From 87e1e0f29f703e91c54e81f05d831432ec659dde Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Mon, 10 Nov 2014 14:43:52 +0100
Subject: regulator: Add mode mapping function to struct regulator_desc

The "regulator-initial-mode" and "regulator-mode" DT properties allows
to configure the regulator operating modes at startup or when a system
enters into a susend state.

But these properties use as valid values the operating modes supported
by each device while the core deals with the standard operating modes.
So a mapping function is needed to translate from the hardware specific
modes to the standard ones.

This mapping is a non-varying configuration for each regulator, so add
a function pointer to struct regulator_desc that will allow drivers to
define their callback to do the modes translation.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/regulator/driver.h | 4 ++++
 1 file changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h
index fc0ee0ce8325..73dd073afef5 100644
--- a/include/linux/regulator/driver.h
+++ b/include/linux/regulator/driver.h
@@ -243,6 +243,8 @@ enum regulator_type {
  *
  * @enable_time: Time taken for initial enable of regulator (in uS).
  * @off_on_delay: guard time (in uS), before re-enabling a regulator
+ *
+ * @of_map_mode: Maps a hardware mode defined in a DeviceTree to a standard mode
  */
 struct regulator_desc {
 	const char *name;
@@ -285,6 +287,8 @@ struct regulator_desc {
 	unsigned int enable_time;
 
 	unsigned int off_on_delay;
+
+	unsigned int (*of_map_mode)(unsigned int mode);
 };
 
 /**
-- 
cgit v1.2.3


From 072e78b12bf5182a3e2e460388214a291023ef1c Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Mon, 10 Nov 2014 14:43:53 +0100
Subject: regulator: of: Add regulator desc param to
 of_get_regulator_init_data()

The of_get_regulator_init_data() function is used to extract the regulator
init_data but information on how to extract certain data is defined in the
static regulator descriptor (e.g: how to map the hardware operating modes).

Add a const struct regulator_desc * parameter to the function signature so
the parsing logic could use the information in the struct regulator_desc.

of_get_regulator_init_data() relies on of_get_regulation_constraints() to
actually extract the init_data so it has to pass the struct regulator_desc
but that is modified on a later patch.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/88pm8607.c               |  3 ++-
 drivers/regulator/anatop-regulator.c       |  5 ++--
 drivers/regulator/arizona-ldo1.c           |  8 +++---
 drivers/regulator/arizona-micsupp.c        |  8 +++---
 drivers/regulator/da9052-regulator.c       |  3 ++-
 drivers/regulator/da9210-regulator.c       |  2 +-
 drivers/regulator/fan53555.c               | 17 ++++++------
 drivers/regulator/fixed.c                  | 19 +++++++------
 drivers/regulator/gpio-regulator.c         | 18 +++++++------
 drivers/regulator/max8952.c                |  2 +-
 drivers/regulator/max8973-regulator.c      |  3 ++-
 drivers/regulator/max8997.c                |  3 ++-
 drivers/regulator/max8998.c                |  5 ++--
 drivers/regulator/mc13xxx-regulator-core.c |  3 ++-
 drivers/regulator/of_regulator.c           |  9 ++++---
 drivers/regulator/pwm-regulator.c          |  3 ++-
 drivers/regulator/qcom_rpm-regulator.c     |  9 ++++---
 drivers/regulator/s5m8767.c                |  3 ++-
 drivers/regulator/sky81452-regulator.c     |  2 +-
 drivers/regulator/stw481x-vmmc.c           |  3 ++-
 drivers/regulator/ti-abb-regulator.c       |  3 ++-
 drivers/regulator/tps51632-regulator.c     | 43 ++++++++++++++++--------------
 drivers/regulator/tps62360-regulator.c     | 31 +++++++++++----------
 drivers/regulator/tps65218-regulator.c     |  3 ++-
 drivers/regulator/twl-regulator.c          |  3 ++-
 drivers/regulator/vexpress.c               |  3 ++-
 include/linux/regulator/of_regulator.h     |  8 ++++--
 27 files changed, 130 insertions(+), 92 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/88pm8607.c b/drivers/regulator/88pm8607.c
index 6d77dcd7dcf6..3fe47bd66153 100644
--- a/drivers/regulator/88pm8607.c
+++ b/drivers/regulator/88pm8607.c
@@ -330,7 +330,8 @@ static int pm8607_regulator_dt_init(struct platform_device *pdev,
 	for_each_child_of_node(nproot, np) {
 		if (!of_node_cmp(np->name, info->desc.name)) {
 			config->init_data =
-				of_get_regulator_init_data(&pdev->dev, np);
+				of_get_regulator_init_data(&pdev->dev, np,
+							   &info->desc);
 			config->of_node = np;
 			break;
 		}
diff --git a/drivers/regulator/anatop-regulator.c b/drivers/regulator/anatop-regulator.c
index 4f730af70e7c..6eaef9f64420 100644
--- a/drivers/regulator/anatop-regulator.c
+++ b/drivers/regulator/anatop-regulator.c
@@ -189,17 +189,18 @@ static int anatop_regulator_probe(struct platform_device *pdev)
 	int ret = 0;
 	u32 val;
 
-	initdata = of_get_regulator_init_data(dev, np);
 	sreg = devm_kzalloc(dev, sizeof(*sreg), GFP_KERNEL);
 	if (!sreg)
 		return -ENOMEM;
-	sreg->initdata = initdata;
 	sreg->name = of_get_property(np, "regulator-name", NULL);
 	rdesc = &sreg->rdesc;
 	rdesc->name = sreg->name;
 	rdesc->type = REGULATOR_VOLTAGE;
 	rdesc->owner = THIS_MODULE;
 
+	initdata = of_get_regulator_init_data(dev, np, rdesc);
+	sreg->initdata = initdata;
+
 	anatop_np = of_get_parent(np);
 	if (!anatop_np)
 		return -ENODEV;
diff --git a/drivers/regulator/arizona-ldo1.c b/drivers/regulator/arizona-ldo1.c
index 4c9db589f6c1..b1eea7f76489 100644
--- a/drivers/regulator/arizona-ldo1.c
+++ b/drivers/regulator/arizona-ldo1.c
@@ -179,7 +179,8 @@ static const struct regulator_init_data arizona_ldo1_default = {
 };
 
 static int arizona_ldo1_of_get_pdata(struct arizona *arizona,
-				     struct regulator_config *config)
+				     struct regulator_config *config,
+				     const struct regulator_desc *desc)
 {
 	struct arizona_pdata *pdata = &arizona->pdata;
 	struct arizona_ldo1 *ldo1 = config->driver_data;
@@ -194,7 +195,8 @@ static int arizona_ldo1_of_get_pdata(struct arizona *arizona,
 	if (init_node) {
 		config->of_node = init_node;
 
-		init_data = of_get_regulator_init_data(arizona->dev, init_node);
+		init_data = of_get_regulator_init_data(arizona->dev, init_node,
+						       desc);
 
 		if (init_data) {
 			init_data->consumer_supplies = &ldo1->supply;
@@ -257,7 +259,7 @@ static int arizona_ldo1_probe(struct platform_device *pdev)
 
 	if (IS_ENABLED(CONFIG_OF)) {
 		if (!dev_get_platdata(arizona->dev)) {
-			ret = arizona_ldo1_of_get_pdata(arizona, &config);
+			ret = arizona_ldo1_of_get_pdata(arizona, &config, desc);
 			if (ret < 0)
 				return ret;
 		}
diff --git a/drivers/regulator/arizona-micsupp.c b/drivers/regulator/arizona-micsupp.c
index ce9aca5f8ee7..c313ef4c3a2f 100644
--- a/drivers/regulator/arizona-micsupp.c
+++ b/drivers/regulator/arizona-micsupp.c
@@ -198,7 +198,8 @@ static const struct regulator_init_data arizona_micsupp_ext_default = {
 };
 
 static int arizona_micsupp_of_get_pdata(struct arizona *arizona,
-					struct regulator_config *config)
+					struct regulator_config *config,
+					const struct regulator_desc *desc)
 {
 	struct arizona_pdata *pdata = &arizona->pdata;
 	struct arizona_micsupp *micsupp = config->driver_data;
@@ -210,7 +211,7 @@ static int arizona_micsupp_of_get_pdata(struct arizona *arizona,
 	if (np) {
 		config->of_node = np;
 
-		init_data = of_get_regulator_init_data(arizona->dev, np);
+		init_data = of_get_regulator_init_data(arizona->dev, np, desc);
 
 		if (init_data) {
 			init_data->consumer_supplies = &micsupp->supply;
@@ -264,7 +265,8 @@ static int arizona_micsupp_probe(struct platform_device *pdev)
 
 	if (IS_ENABLED(CONFIG_OF)) {
 		if (!dev_get_platdata(arizona->dev)) {
-			ret = arizona_micsupp_of_get_pdata(arizona, &config);
+			ret = arizona_micsupp_of_get_pdata(arizona, &config,
+							   desc);
 			if (ret < 0)
 				return ret;
 		}
diff --git a/drivers/regulator/da9052-regulator.c b/drivers/regulator/da9052-regulator.c
index 00033625a09c..3945f1006d23 100644
--- a/drivers/regulator/da9052-regulator.c
+++ b/drivers/regulator/da9052-regulator.c
@@ -436,7 +436,8 @@ static int da9052_regulator_probe(struct platform_device *pdev)
 			if (!of_node_cmp(np->name,
 					 regulator->info->reg_desc.name)) {
 				config.init_data = of_get_regulator_init_data(
-					&pdev->dev, np);
+					&pdev->dev, np,
+					&regulator->info->reg_desc);
 				config.of_node = np;
 				break;
 			}
diff --git a/drivers/regulator/da9210-regulator.c b/drivers/regulator/da9210-regulator.c
index 7a320dd11c46..bc6100103f7f 100644
--- a/drivers/regulator/da9210-regulator.c
+++ b/drivers/regulator/da9210-regulator.c
@@ -147,7 +147,7 @@ static int da9210_i2c_probe(struct i2c_client *i2c,
 
 	config.dev = &i2c->dev;
 	config.init_data = pdata ? &pdata->da9210_constraints :
-		of_get_regulator_init_data(dev, dev->of_node);
+		of_get_regulator_init_data(dev, dev->of_node, &da9210_reg);
 	config.driver_data = chip;
 	config.regmap = chip->regmap;
 	config.of_node = dev->of_node;
diff --git a/drivers/regulator/fan53555.c b/drivers/regulator/fan53555.c
index f8e4257aef92..6c43ab2d5121 100644
--- a/drivers/regulator/fan53555.c
+++ b/drivers/regulator/fan53555.c
@@ -302,7 +302,8 @@ static struct regmap_config fan53555_regmap_config = {
 };
 
 static struct fan53555_platform_data *fan53555_parse_dt(struct device *dev,
-							struct device_node *np)
+					      struct device_node *np,
+					      const struct regulator_desc *desc)
 {
 	struct fan53555_platform_data *pdata;
 	int ret;
@@ -312,7 +313,7 @@ static struct fan53555_platform_data *fan53555_parse_dt(struct device *dev,
 	if (!pdata)
 		return NULL;
 
-	pdata->regulator = of_get_regulator_init_data(dev, np);
+	pdata->regulator = of_get_regulator_init_data(dev, np, desc);
 
 	ret = of_property_read_u32(np, "fcs,suspend-voltage-selector",
 				   &tmp);
@@ -347,20 +348,20 @@ static int fan53555_regulator_probe(struct i2c_client *client,
 	unsigned int val;
 	int ret;
 
+	di = devm_kzalloc(&client->dev, sizeof(struct fan53555_device_info),
+					GFP_KERNEL);
+	if (!di)
+		return -ENOMEM;
+
 	pdata = dev_get_platdata(&client->dev);
 	if (!pdata)
-		pdata = fan53555_parse_dt(&client->dev, np);
+		pdata = fan53555_parse_dt(&client->dev, np, &di->desc);
 
 	if (!pdata || !pdata->regulator) {
 		dev_err(&client->dev, "Platform data not found!\n");
 		return -ENODEV;
 	}
 
-	di = devm_kzalloc(&client->dev, sizeof(struct fan53555_device_info),
-					GFP_KERNEL);
-	if (!di)
-		return -ENOMEM;
-
 	di->regulator = pdata->regulator;
 	if (client->dev.of_node) {
 		const struct of_device_id *match;
diff --git a/drivers/regulator/fixed.c b/drivers/regulator/fixed.c
index 354105eff1f8..6cfcbc8b6594 100644
--- a/drivers/regulator/fixed.c
+++ b/drivers/regulator/fixed.c
@@ -40,13 +40,15 @@ struct fixed_voltage_data {
 /**
  * of_get_fixed_voltage_config - extract fixed_voltage_config structure info
  * @dev: device requesting for fixed_voltage_config
+ * @desc: regulator description
  *
  * Populates fixed_voltage_config structure by extracting data from device
  * tree node, returns a pointer to the populated structure of NULL if memory
  * alloc fails.
  */
 static struct fixed_voltage_config *
-of_get_fixed_voltage_config(struct device *dev)
+of_get_fixed_voltage_config(struct device *dev,
+			    const struct regulator_desc *desc)
 {
 	struct fixed_voltage_config *config;
 	struct device_node *np = dev->of_node;
@@ -57,7 +59,7 @@ of_get_fixed_voltage_config(struct device *dev)
 	if (!config)
 		return ERR_PTR(-ENOMEM);
 
-	config->init_data = of_get_regulator_init_data(dev, dev->of_node);
+	config->init_data = of_get_regulator_init_data(dev, dev->of_node, desc);
 	if (!config->init_data)
 		return ERR_PTR(-EINVAL);
 
@@ -112,8 +114,14 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 	struct regulator_config cfg = { };
 	int ret;
 
+	drvdata = devm_kzalloc(&pdev->dev, sizeof(struct fixed_voltage_data),
+			       GFP_KERNEL);
+	if (!drvdata)
+		return -ENOMEM;
+
 	if (pdev->dev.of_node) {
-		config = of_get_fixed_voltage_config(&pdev->dev);
+		config = of_get_fixed_voltage_config(&pdev->dev,
+						     &drvdata->desc);
 		if (IS_ERR(config))
 			return PTR_ERR(config);
 	} else {
@@ -123,11 +131,6 @@ static int reg_fixed_voltage_probe(struct platform_device *pdev)
 	if (!config)
 		return -ENOMEM;
 
-	drvdata = devm_kzalloc(&pdev->dev, sizeof(struct fixed_voltage_data),
-			       GFP_KERNEL);
-	if (!drvdata)
-		return -ENOMEM;
-
 	drvdata->desc.name = devm_kstrdup(&pdev->dev,
 					  config->supply_name,
 					  GFP_KERNEL);
diff --git a/drivers/regulator/gpio-regulator.c b/drivers/regulator/gpio-regulator.c
index 989b23b377c0..5c3bcae478b9 100644
--- a/drivers/regulator/gpio-regulator.c
+++ b/drivers/regulator/gpio-regulator.c
@@ -133,7 +133,8 @@ static struct regulator_ops gpio_regulator_voltage_ops = {
 };
 
 static struct gpio_regulator_config *
-of_get_gpio_regulator_config(struct device *dev, struct device_node *np)
+of_get_gpio_regulator_config(struct device *dev, struct device_node *np,
+			     const struct regulator_desc *desc)
 {
 	struct gpio_regulator_config *config;
 	const char *regtype;
@@ -146,7 +147,7 @@ of_get_gpio_regulator_config(struct device *dev, struct device_node *np)
 	if (!config)
 		return ERR_PTR(-ENOMEM);
 
-	config->init_data = of_get_regulator_init_data(dev, np);
+	config->init_data = of_get_regulator_init_data(dev, np, desc);
 	if (!config->init_data)
 		return ERR_PTR(-EINVAL);
 
@@ -243,17 +244,18 @@ static int gpio_regulator_probe(struct platform_device *pdev)
 	struct regulator_config cfg = { };
 	int ptr, ret, state;
 
-	if (np) {
-		config = of_get_gpio_regulator_config(&pdev->dev, np);
-		if (IS_ERR(config))
-			return PTR_ERR(config);
-	}
-
 	drvdata = devm_kzalloc(&pdev->dev, sizeof(struct gpio_regulator_data),
 			       GFP_KERNEL);
 	if (drvdata == NULL)
 		return -ENOMEM;
 
+	if (np) {
+		config = of_get_gpio_regulator_config(&pdev->dev, np,
+						      &drvdata->desc);
+		if (IS_ERR(config))
+			return PTR_ERR(config);
+	}
+
 	drvdata->desc.name = kstrdup(config->supply_name, GFP_KERNEL);
 	if (drvdata->desc.name == NULL) {
 		dev_err(&pdev->dev, "Failed to allocate supply name\n");
diff --git a/drivers/regulator/max8952.c b/drivers/regulator/max8952.c
index f7f9efcfedb7..6e54d786b22c 100644
--- a/drivers/regulator/max8952.c
+++ b/drivers/regulator/max8952.c
@@ -174,7 +174,7 @@ static struct max8952_platform_data *max8952_parse_dt(struct device *dev)
 	if (of_property_read_u32(np, "max8952,ramp-speed", &pd->ramp_speed))
 		dev_warn(dev, "max8952,ramp-speed property not specified, defaulting to 32mV/us\n");
 
-	pd->reg_data = of_get_regulator_init_data(dev, np);
+	pd->reg_data = of_get_regulator_init_data(dev, np, &regulator);
 	if (!pd->reg_data) {
 		dev_err(dev, "Failed to parse regulator init data\n");
 		return NULL;
diff --git a/drivers/regulator/max8973-regulator.c b/drivers/regulator/max8973-regulator.c
index dbedf1768db0..c3d55c2db593 100644
--- a/drivers/regulator/max8973-regulator.c
+++ b/drivers/regulator/max8973-regulator.c
@@ -458,7 +458,8 @@ static int max8973_probe(struct i2c_client *client,
 
 	config.dev = &client->dev;
 	config.init_data = pdata ? pdata->reg_init_data :
-		of_get_regulator_init_data(&client->dev, client->dev.of_node);
+		of_get_regulator_init_data(&client->dev, client->dev.of_node,
+					   &max->desc);
 	config.driver_data = max;
 	config.of_node = client->dev.of_node;
 	config.regmap = max->regmap;
diff --git a/drivers/regulator/max8997.c b/drivers/regulator/max8997.c
index 9c31e215a521..726fde1d883e 100644
--- a/drivers/regulator/max8997.c
+++ b/drivers/regulator/max8997.c
@@ -953,7 +953,8 @@ static int max8997_pmic_dt_parse_pdata(struct platform_device *pdev,
 
 		rdata->id = i;
 		rdata->initdata = of_get_regulator_init_data(&pdev->dev,
-							     reg_np);
+							     reg_np,
+							     &regulators[i]);
 		rdata->reg_node = reg_np;
 		rdata++;
 	}
diff --git a/drivers/regulator/max8998.c b/drivers/regulator/max8998.c
index 961091b46557..59e34a05a4a2 100644
--- a/drivers/regulator/max8998.c
+++ b/drivers/regulator/max8998.c
@@ -686,8 +686,9 @@ static int max8998_pmic_dt_parse_pdata(struct max8998_dev *iodev,
 			continue;
 
 		rdata->id = regulators[i].id;
-		rdata->initdata = of_get_regulator_init_data(
-							iodev->dev, reg_np);
+		rdata->initdata = of_get_regulator_init_data(iodev->dev,
+							     reg_np,
+							     &regulators[i]);
 		rdata->reg_node = reg_np;
 		++rdata;
 	}
diff --git a/drivers/regulator/mc13xxx-regulator-core.c b/drivers/regulator/mc13xxx-regulator-core.c
index afba024953e1..0281c31ae2ed 100644
--- a/drivers/regulator/mc13xxx-regulator-core.c
+++ b/drivers/regulator/mc13xxx-regulator-core.c
@@ -194,7 +194,8 @@ struct mc13xxx_regulator_init_data *mc13xxx_parse_regulators_dt(
 					 regulators[i].desc.name)) {
 				p->id = i;
 				p->init_data = of_get_regulator_init_data(
-							&pdev->dev, child);
+							&pdev->dev, child,
+							&regulators[i].desc);
 				p->node = child;
 				p++;
 
diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 50be70878d2d..3687a1c38c1c 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -120,13 +120,16 @@ static void of_get_regulation_constraints(struct device_node *np,
 /**
  * of_get_regulator_init_data - extract regulator_init_data structure info
  * @dev: device requesting for regulator_init_data
+ * @node: regulator device node
+ * @desc: regulator description
  *
  * Populates regulator_init_data structure by extracting data from device
  * tree node, returns a pointer to the populated struture or NULL if memory
  * alloc fails.
  */
 struct regulator_init_data *of_get_regulator_init_data(struct device *dev,
-						struct device_node *node)
+					  struct device_node *node,
+					  const struct regulator_desc *desc)
 {
 	struct regulator_init_data *init_data;
 
@@ -218,7 +221,7 @@ int of_regulator_match(struct device *dev, struct device_node *node,
 				continue;
 
 			match->init_data =
-				of_get_regulator_init_data(dev, child);
+				of_get_regulator_init_data(dev, child, NULL);
 			if (!match->init_data) {
 				dev_err(dev,
 					"failed to parse DT for regulator %s\n",
@@ -265,7 +268,7 @@ struct regulator_init_data *regulator_of_get_init_data(struct device *dev,
 		if (strcmp(desc->of_match, name))
 			continue;
 
-		init_data = of_get_regulator_init_data(dev, child);
+		init_data = of_get_regulator_init_data(dev, child, desc);
 		if (!init_data) {
 			dev_err(dev,
 				"failed to parse DT for regulator %s\n",
diff --git a/drivers/regulator/pwm-regulator.c b/drivers/regulator/pwm-regulator.c
index d3f55eaea058..91f34ca3a9ac 100644
--- a/drivers/regulator/pwm-regulator.c
+++ b/drivers/regulator/pwm-regulator.c
@@ -149,7 +149,8 @@ static int pwm_regulator_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	config.init_data = of_get_regulator_init_data(&pdev->dev, np);
+	config.init_data = of_get_regulator_init_data(&pdev->dev, np,
+						      &drvdata->desc);
 	if (!config.init_data)
 		return -ENOMEM;
 
diff --git a/drivers/regulator/qcom_rpm-regulator.c b/drivers/regulator/qcom_rpm-regulator.c
index b55cd5b50ebe..dabd28a359dc 100644
--- a/drivers/regulator/qcom_rpm-regulator.c
+++ b/drivers/regulator/qcom_rpm-regulator.c
@@ -643,10 +643,6 @@ static int rpm_reg_probe(struct platform_device *pdev)
 	match = of_match_device(rpm_of_match, &pdev->dev);
 	template = match->data;
 
-	initdata = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node);
-	if (!initdata)
-		return -EINVAL;
-
 	vreg = devm_kmalloc(&pdev->dev, sizeof(*vreg), GFP_KERNEL);
 	if (!vreg) {
 		dev_err(&pdev->dev, "failed to allocate vreg\n");
@@ -666,6 +662,11 @@ static int rpm_reg_probe(struct platform_device *pdev)
 		return -ENODEV;
 	}
 
+	initdata = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node,
+					      &vreg->desc);
+	if (!initdata)
+		return -EINVAL;
+
 	key = "reg";
 	ret = of_property_read_u32(pdev->dev.of_node, key, &val);
 	if (ret) {
diff --git a/drivers/regulator/s5m8767.c b/drivers/regulator/s5m8767.c
index 0ab5cbeeb797..26932fe42b47 100644
--- a/drivers/regulator/s5m8767.c
+++ b/drivers/regulator/s5m8767.c
@@ -581,7 +581,8 @@ static int s5m8767_pmic_dt_parse_pdata(struct platform_device *pdev,
 
 		rdata->id = i;
 		rdata->initdata = of_get_regulator_init_data(
-						&pdev->dev, reg_np);
+						&pdev->dev, reg_np,
+						&regulators[i]);
 		rdata->reg_node = reg_np;
 		rdata++;
 		rmode->id = i;
diff --git a/drivers/regulator/sky81452-regulator.c b/drivers/regulator/sky81452-regulator.c
index 97aff0ccd65f..75deae706f84 100644
--- a/drivers/regulator/sky81452-regulator.c
+++ b/drivers/regulator/sky81452-regulator.c
@@ -76,7 +76,7 @@ static struct regulator_init_data *sky81452_reg_parse_dt(struct device *dev)
 		return NULL;
 	}
 
-	init_data = of_get_regulator_init_data(dev, np);
+	init_data = of_get_regulator_init_data(dev, np, &sky81452_reg);
 
 	of_node_put(np);
 	return init_data;
diff --git a/drivers/regulator/stw481x-vmmc.c b/drivers/regulator/stw481x-vmmc.c
index a7e152696a02..b4f1696456a7 100644
--- a/drivers/regulator/stw481x-vmmc.c
+++ b/drivers/regulator/stw481x-vmmc.c
@@ -72,7 +72,8 @@ static int stw481x_vmmc_regulator_probe(struct platform_device *pdev)
 	config.regmap = stw481x->map;
 	config.of_node = pdev->dev.of_node;
 	config.init_data = of_get_regulator_init_data(&pdev->dev,
-						      pdev->dev.of_node);
+						      pdev->dev.of_node,
+						      &vmmc_regulator);
 
 	stw481x->vmmc_regulator = devm_regulator_register(&pdev->dev,
 						&vmmc_regulator, &config);
diff --git a/drivers/regulator/ti-abb-regulator.c b/drivers/regulator/ti-abb-regulator.c
index a2dabb575b97..1ef5aba96f17 100644
--- a/drivers/regulator/ti-abb-regulator.c
+++ b/drivers/regulator/ti-abb-regulator.c
@@ -837,7 +837,8 @@ skip_opt:
 		return -EINVAL;
 	}
 
-	initdata = of_get_regulator_init_data(dev, pdev->dev.of_node);
+	initdata = of_get_regulator_init_data(dev, pdev->dev.of_node,
+					      &abb->rdesc);
 	if (!initdata) {
 		dev_err(dev, "%s: Unable to alloc regulator init data\n",
 			__func__);
diff --git a/drivers/regulator/tps51632-regulator.c b/drivers/regulator/tps51632-regulator.c
index f31f22e3e1bd..c213e37eb69e 100644
--- a/drivers/regulator/tps51632-regulator.c
+++ b/drivers/regulator/tps51632-regulator.c
@@ -221,7 +221,8 @@ static const struct of_device_id tps51632_of_match[] = {
 MODULE_DEVICE_TABLE(of, tps51632_of_match);
 
 static struct tps51632_regulator_platform_data *
-	of_get_tps51632_platform_data(struct device *dev)
+	of_get_tps51632_platform_data(struct device *dev,
+				      const struct regulator_desc *desc)
 {
 	struct tps51632_regulator_platform_data *pdata;
 	struct device_node *np = dev->of_node;
@@ -230,7 +231,8 @@ static struct tps51632_regulator_platform_data *
 	if (!pdata)
 		return NULL;
 
-	pdata->reg_init_data = of_get_regulator_init_data(dev, dev->of_node);
+	pdata->reg_init_data = of_get_regulator_init_data(dev, dev->of_node,
+							  desc);
 	if (!pdata->reg_init_data) {
 		dev_err(dev, "Not able to get OF regulator init data\n");
 		return NULL;
@@ -248,7 +250,8 @@ static struct tps51632_regulator_platform_data *
 }
 #else
 static struct tps51632_regulator_platform_data *
-	of_get_tps51632_platform_data(struct device *dev)
+	of_get_tps51632_platform_data(struct device *dev,
+				      const struct regulator_desc *desc)
 {
 	return NULL;
 }
@@ -273,9 +276,25 @@ static int tps51632_probe(struct i2c_client *client,
 		}
 	}
 
+	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	tps->dev = &client->dev;
+	tps->desc.name = client->name;
+	tps->desc.id = 0;
+	tps->desc.ramp_delay = TPS51632_DEFAULT_RAMP_DELAY;
+	tps->desc.min_uV = TPS51632_MIN_VOLTAGE;
+	tps->desc.uV_step = TPS51632_VOLTAGE_STEP_10mV;
+	tps->desc.linear_min_sel = TPS51632_MIN_VSEL;
+	tps->desc.n_voltages = TPS51632_MAX_VSEL + 1;
+	tps->desc.ops = &tps51632_dcdc_ops;
+	tps->desc.type = REGULATOR_VOLTAGE;
+	tps->desc.owner = THIS_MODULE;
+
 	pdata = dev_get_platdata(&client->dev);
 	if (!pdata && client->dev.of_node)
-		pdata = of_get_tps51632_platform_data(&client->dev);
+		pdata = of_get_tps51632_platform_data(&client->dev, &tps->desc);
 	if (!pdata) {
 		dev_err(&client->dev, "No Platform data\n");
 		return -EINVAL;
@@ -296,22 +315,6 @@ static int tps51632_probe(struct i2c_client *client,
 		}
 	}
 
-	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
-	if (!tps)
-		return -ENOMEM;
-
-	tps->dev = &client->dev;
-	tps->desc.name = client->name;
-	tps->desc.id = 0;
-	tps->desc.ramp_delay = TPS51632_DEFAULT_RAMP_DELAY;
-	tps->desc.min_uV = TPS51632_MIN_VOLTAGE;
-	tps->desc.uV_step = TPS51632_VOLTAGE_STEP_10mV;
-	tps->desc.linear_min_sel = TPS51632_MIN_VSEL;
-	tps->desc.n_voltages = TPS51632_MAX_VSEL + 1;
-	tps->desc.ops = &tps51632_dcdc_ops;
-	tps->desc.type = REGULATOR_VOLTAGE;
-	tps->desc.owner = THIS_MODULE;
-
 	if (pdata->enable_pwm_dvfs)
 		tps->desc.vsel_reg = TPS51632_VOLTAGE_BASE_REG;
 	else
diff --git a/drivers/regulator/tps62360-regulator.c b/drivers/regulator/tps62360-regulator.c
index a1672044e519..a1fd626c6c96 100644
--- a/drivers/regulator/tps62360-regulator.c
+++ b/drivers/regulator/tps62360-regulator.c
@@ -293,7 +293,8 @@ static const struct regmap_config tps62360_regmap_config = {
 };
 
 static struct tps62360_regulator_platform_data *
-	of_get_tps62360_platform_data(struct device *dev)
+	of_get_tps62360_platform_data(struct device *dev,
+				      const struct regulator_desc *desc)
 {
 	struct tps62360_regulator_platform_data *pdata;
 	struct device_node *np = dev->of_node;
@@ -302,7 +303,8 @@ static struct tps62360_regulator_platform_data *
 	if (!pdata)
 		return NULL;
 
-	pdata->reg_init_data = of_get_regulator_init_data(dev, dev->of_node);
+	pdata->reg_init_data = of_get_regulator_init_data(dev, dev->of_node,
+							  desc);
 	if (!pdata->reg_init_data) {
 		dev_err(dev, "Not able to get OF regulator init data\n");
 		return NULL;
@@ -350,6 +352,17 @@ static int tps62360_probe(struct i2c_client *client,
 
 	pdata = dev_get_platdata(&client->dev);
 
+	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
+	if (!tps)
+		return -ENOMEM;
+
+	tps->desc.name = client->name;
+	tps->desc.id = 0;
+	tps->desc.ops = &tps62360_dcdc_ops;
+	tps->desc.type = REGULATOR_VOLTAGE;
+	tps->desc.owner = THIS_MODULE;
+	tps->desc.uV_step = 10000;
+
 	if (client->dev.of_node) {
 		const struct of_device_id *match;
 		match = of_match_device(of_match_ptr(tps62360_of_match),
@@ -360,7 +373,8 @@ static int tps62360_probe(struct i2c_client *client,
 		}
 		chip_id = (int)(long)match->data;
 		if (!pdata)
-			pdata = of_get_tps62360_platform_data(&client->dev);
+			pdata = of_get_tps62360_platform_data(&client->dev,
+							      &tps->desc);
 	} else if (id) {
 		chip_id = id->driver_data;
 	} else {
@@ -374,10 +388,6 @@ static int tps62360_probe(struct i2c_client *client,
 		return -EIO;
 	}
 
-	tps = devm_kzalloc(&client->dev, sizeof(*tps), GFP_KERNEL);
-	if (!tps)
-		return -ENOMEM;
-
 	tps->en_discharge = pdata->en_discharge;
 	tps->en_internal_pulldn = pdata->en_internal_pulldn;
 	tps->vsel0_gpio = pdata->vsel0_gpio;
@@ -401,13 +411,6 @@ static int tps62360_probe(struct i2c_client *client,
 		return -ENODEV;
 	}
 
-	tps->desc.name = client->name;
-	tps->desc.id = 0;
-	tps->desc.ops = &tps62360_dcdc_ops;
-	tps->desc.type = REGULATOR_VOLTAGE;
-	tps->desc.owner = THIS_MODULE;
-	tps->desc.uV_step = 10000;
-
 	tps->regmap = devm_regmap_init_i2c(client, &tps62360_regmap_config);
 	if (IS_ERR(tps->regmap)) {
 		ret = PTR_ERR(tps->regmap);
diff --git a/drivers/regulator/tps65218-regulator.c b/drivers/regulator/tps65218-regulator.c
index f0a40281b9c1..263cc85d6202 100644
--- a/drivers/regulator/tps65218-regulator.c
+++ b/drivers/regulator/tps65218-regulator.c
@@ -231,7 +231,8 @@ static int tps65218_regulator_probe(struct platform_device *pdev)
 
 	template = match->data;
 	id = template->id;
-	init_data = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node);
+	init_data = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node,
+					       &regulators[id]);
 
 	platform_set_drvdata(pdev, tps);
 
diff --git a/drivers/regulator/twl-regulator.c b/drivers/regulator/twl-regulator.c
index 0b4f8660fdb4..dd727bca1983 100644
--- a/drivers/regulator/twl-regulator.c
+++ b/drivers/regulator/twl-regulator.c
@@ -1104,7 +1104,8 @@ static int twlreg_probe(struct platform_device *pdev)
 		template = match->data;
 		id = template->desc.id;
 		initdata = of_get_regulator_init_data(&pdev->dev,
-						      pdev->dev.of_node);
+						      pdev->dev.of_node,
+						      &template->desc);
 		drvdata = NULL;
 	} else {
 		id = pdev->id;
diff --git a/drivers/regulator/vexpress.c b/drivers/regulator/vexpress.c
index 02e7267ccf92..5e7c789023a9 100644
--- a/drivers/regulator/vexpress.c
+++ b/drivers/regulator/vexpress.c
@@ -74,7 +74,8 @@ static int vexpress_regulator_probe(struct platform_device *pdev)
 	reg->desc.owner = THIS_MODULE;
 	reg->desc.continuous_voltage_range = true;
 
-	init_data = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node);
+	init_data = of_get_regulator_init_data(&pdev->dev, pdev->dev.of_node,
+					       &reg->desc);
 	if (!init_data)
 		return -EINVAL;
 
diff --git a/include/linux/regulator/of_regulator.h b/include/linux/regulator/of_regulator.h
index f9217965aaa3..8d1d136c0fb9 100644
--- a/include/linux/regulator/of_regulator.h
+++ b/include/linux/regulator/of_regulator.h
@@ -6,6 +6,8 @@
 #ifndef __LINUX_OF_REG_H
 #define __LINUX_OF_REG_H
 
+struct regulator_desc;
+
 struct of_regulator_match {
 	const char *name;
 	void *driver_data;
@@ -16,14 +18,16 @@ struct of_regulator_match {
 #if defined(CONFIG_OF)
 extern struct regulator_init_data
 	*of_get_regulator_init_data(struct device *dev,
-				    struct device_node *node);
+				    struct device_node *node,
+				    const struct regulator_desc *desc);
 extern int of_regulator_match(struct device *dev, struct device_node *node,
 			      struct of_regulator_match *matches,
 			      unsigned int num_matches);
 #else
 static inline struct regulator_init_data
 	*of_get_regulator_init_data(struct device *dev,
-				    struct device_node *node)
+				    struct device_node *node,
+				    const struct regulator_desc *desc)
 {
 	return NULL;
 }
-- 
cgit v1.2.3


From 75d6b2faf79cbe9086e831351d5d9085f1852928 Mon Sep 17 00:00:00 2001
From: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Date: Mon, 10 Nov 2014 14:43:54 +0100
Subject: regulator: of: Pass the regulator description in the match table

Drivers can use the of_regulator_match() function to parse the regulator
init_data from DT. A match table is used to specify the name of the node
containing the regulators, the device node and to return the init_data
to the caller.

But also the static regulator descriptor is needed to correctly extract
some DT properties like the regulator initial and suspend modes. Use the
match table to pass that information.

Signed-off-by: Javier Martinez Canillas <javier.martinez@collabora.co.uk>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 drivers/regulator/of_regulator.c       | 3 ++-
 include/linux/regulator/of_regulator.h | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/regulator/of_regulator.c b/drivers/regulator/of_regulator.c
index 3687a1c38c1c..64c09a37ac7f 100644
--- a/drivers/regulator/of_regulator.c
+++ b/drivers/regulator/of_regulator.c
@@ -221,7 +221,8 @@ int of_regulator_match(struct device *dev, struct device_node *node,
 				continue;
 
 			match->init_data =
-				of_get_regulator_init_data(dev, child, NULL);
+				of_get_regulator_init_data(dev, child,
+							   match->desc);
 			if (!match->init_data) {
 				dev_err(dev,
 					"failed to parse DT for regulator %s\n",
diff --git a/include/linux/regulator/of_regulator.h b/include/linux/regulator/of_regulator.h
index 8d1d136c0fb9..763953f7e3b8 100644
--- a/include/linux/regulator/of_regulator.h
+++ b/include/linux/regulator/of_regulator.h
@@ -13,6 +13,7 @@ struct of_regulator_match {
 	void *driver_data;
 	struct regulator_init_data *init_data;
 	struct device_node *of_node;
+	const struct regulator_desc *desc;
 };
 
 #if defined(CONFIG_OF)
-- 
cgit v1.2.3


From f08bb472bff3c0397fb7d6f47bc5cec41dad76e3 Mon Sep 17 00:00:00 2001
From: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Date: Wed, 26 Nov 2014 22:01:13 +0800
Subject: ACPI / table: Add new function to get table entries

The acpi_table_parse() function has a callback that
passes a pointer to a table_header. Add a new function
which takes this pointer and parses its entries. This
eliminates the need to re-traverse all the tables for
each call. e.g. as in acpi_table_parse_madt() which is
normally called after acpi_table_parse().

Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Ashwin Chaugule <ashwin.chaugule@linaro.org>
Signed-off-by: Tomasz Nowicki <tomasz.nowicki@linaro.org>
Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/tables.c | 63 +++++++++++++++++++++++++++++++++++----------------
 include/linux/acpi.h  |  4 ++++
 2 files changed, 48 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/tables.c b/drivers/acpi/tables.c
index 6d5a6cda0734..f1debe97dcfc 100644
--- a/drivers/acpi/tables.c
+++ b/drivers/acpi/tables.c
@@ -190,30 +190,24 @@ void acpi_table_print_madt_entry(struct acpi_subtable_header *header)
 	}
 }
 
-
 int __init
-acpi_table_parse_entries(char *id,
-			     unsigned long table_size,
-			     int entry_id,
-			     acpi_tbl_entry_handler handler,
-			     unsigned int max_entries)
+acpi_parse_entries(char *id, unsigned long table_size,
+		acpi_tbl_entry_handler handler,
+		struct acpi_table_header *table_header,
+		int entry_id, unsigned int max_entries)
 {
-	struct acpi_table_header *table_header = NULL;
 	struct acpi_subtable_header *entry;
-	unsigned int count = 0;
+	int count = 0;
 	unsigned long table_end;
-	acpi_size tbl_size;
 
 	if (acpi_disabled)
 		return -ENODEV;
 
-	if (!handler)
+	if (!id || !handler)
 		return -EINVAL;
 
-	if (strncmp(id, ACPI_SIG_MADT, 4) == 0)
-		acpi_get_table_with_size(id, acpi_apic_instance, &table_header, &tbl_size);
-	else
-		acpi_get_table_with_size(id, 0, &table_header, &tbl_size);
+	if (!table_size)
+		return -EINVAL;
 
 	if (!table_header) {
 		pr_warn("%4.4s not present\n", id);
@@ -232,7 +226,7 @@ acpi_table_parse_entries(char *id,
 		if (entry->type == entry_id
 		    && (!max_entries || count++ < max_entries))
 			if (handler(entry, table_end))
-				goto err;
+				return -EINVAL;
 
 		/*
 		 * If entry->length is 0, break from this loop to avoid
@@ -240,22 +234,53 @@ acpi_table_parse_entries(char *id,
 		 */
 		if (entry->length == 0) {
 			pr_err("[%4.4s:0x%02x] Invalid zero length\n", id, entry_id);
-			goto err;
+			return -EINVAL;
 		}
 
 		entry = (struct acpi_subtable_header *)
 		    ((unsigned long)entry + entry->length);
 	}
+
 	if (max_entries && count > max_entries) {
 		pr_warn("[%4.4s:0x%02x] ignored %i entries of %i found\n",
 			id, entry_id, count - max_entries, count);
 	}
 
-	early_acpi_os_unmap_memory((char *)table_header, tbl_size);
 	return count;
-err:
+}
+
+int __init
+acpi_table_parse_entries(char *id,
+			 unsigned long table_size,
+			 int entry_id,
+			 acpi_tbl_entry_handler handler,
+			 unsigned int max_entries)
+{
+	struct acpi_table_header *table_header = NULL;
+	acpi_size tbl_size;
+	int count;
+	u32 instance = 0;
+
+	if (acpi_disabled)
+		return -ENODEV;
+
+	if (!id || !handler)
+		return -EINVAL;
+
+	if (!strncmp(id, ACPI_SIG_MADT, 4))
+		instance = acpi_apic_instance;
+
+	acpi_get_table_with_size(id, instance, &table_header, &tbl_size);
+	if (!table_header) {
+		pr_warn("%4.4s not present\n", id);
+		return -ENODEV;
+	}
+
+	count = acpi_parse_entries(id, table_size, handler, table_header,
+			entry_id, max_entries);
+
 	early_acpi_os_unmap_memory((char *)table_header, tbl_size);
-	return -EINVAL;
+	return count;
 }
 
 int __init
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 407a12f663eb..3bd0c59e1fb9 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -123,6 +123,10 @@ int acpi_numa_init (void);
 
 int acpi_table_init (void);
 int acpi_table_parse(char *id, acpi_tbl_table_handler handler);
+int __init acpi_parse_entries(char *id, unsigned long table_size,
+			      acpi_tbl_entry_handler handler,
+			      struct acpi_table_header *table_header,
+			      int entry_id, unsigned int max_entries);
 int __init acpi_table_parse_entries(char *id, unsigned long table_size,
 				    int entry_id,
 				    acpi_tbl_entry_handler handler,
-- 
cgit v1.2.3


From 8ee885a98e2653a7f9c84d37cd8c5b6edf21b77f Mon Sep 17 00:00:00 2001
From: Pankaj Dubey <pankaj.dubey@samsung.com>
Date: Thu, 13 Nov 2014 14:12:48 +0530
Subject: coresight: fixed comments in coresight.h

fixes following minor issues in code comments in coresight.h
- typo %s/enpoint/endpoint
- alignment of comment section for struct coresight_desc
- correction of comment for struct coresight_connection and
  struct coresight_device.

Signed-off-by: Pankaj Dubey <pankaj.dubey@samsung.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/coresight.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/coresight.h b/include/linux/coresight.h
index bdde4199c74a..5d3c54311f7a 100644
--- a/include/linux/coresight.h
+++ b/include/linux/coresight.h
@@ -87,7 +87,7 @@ struct coresight_dev_subtype {
  * @cpu:	the CPU a source belongs to. Only applicable for ETM/PTMs.
  * @name:	name of the component as shown under sysfs.
  * @nr_inport:	number of input ports for this component.
- * @outports:	list of remote enpoint port number.
+ * @outports:	list of remote endpoint port number.
  * @child_names:name of all child components connected to this device.
  * @child_ports:child component port number the current component is
 		connected  to.
@@ -113,7 +113,7 @@ struct coresight_platform_data {
 		by @coresight_ops.
  * @pdata:	platform data collected from DT.
  * @dev:	The device entity associated to this component.
- * @groups	:operations specific to this component. These will end up
+ * @groups:	operations specific to this component. These will end up
 		in the component's sysfs sub-directory.
  */
 struct coresight_desc {
@@ -127,7 +127,6 @@ struct coresight_desc {
 
 /**
  * struct coresight_connection - representation of a single connection
- * @ref_count:	keeping count a port' references.
  * @outport:	a connection's output port number.
  * @chid_name:	remote component's name.
  * @child_port:	remote component's port number @output is connected to.
@@ -143,6 +142,7 @@ struct coresight_connection {
 
 /**
  * struct coresight_device - representation of a device as used by the framework
+ * @conns:	array of coresight_connections associated to this component.
  * @nr_inport:	number of input port associated to this component.
  * @nr_outport:	number of output port associated to this component.
  * @type:	as defined by @coresight_dev_type.
-- 
cgit v1.2.3


From 5b48e0605a84c3d2ccd06f6218df90e737c10559 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <sboyd@codeaurora.org>
Date: Wed, 26 Nov 2014 14:47:09 -0800
Subject: spmi: Remove shutdown/suspend/resume kernel-doc

These members of the driver structure are not present.
Remove them from the kernel-doc.

Cc: Josh Cartwright <joshc@codeaurora.org>
Signed-off-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/spmi.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/spmi.h b/include/linux/spmi.h
index 91f5eab9e428..f84212cd3b7d 100644
--- a/include/linux/spmi.h
+++ b/include/linux/spmi.h
@@ -134,9 +134,6 @@ void spmi_controller_remove(struct spmi_controller *ctrl);
  *		this structure.
  * @probe:	binds this driver to a SPMI device.
  * @remove:	unbinds this driver from the SPMI device.
- * @shutdown:	standard shutdown callback used during powerdown/halt.
- * @suspend:	standard suspend callback used during system suspend.
- * @resume:	standard resume callback used during system resume.
  *
  * If PM runtime support is desired for a slave, a device driver can call
  * pm_runtime_put() from their probe() routine (and a balancing
-- 
cgit v1.2.3


From 98210b7f73f1db182bd9a558a031093cd166e907 Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Sun, 9 Nov 2014 11:31:58 +0100
Subject: debugfs: add helper function to create device related seq_file

This patch adds a helper function that simplifies adding a
so-called single_open sequence file for device drivers. The
calling device driver needs to provide a read function and
a device pointer. The field struct seq_file::private will
reference the device pointer upon call to the read function
so the driver can obtain his data from it and do its task
of providing the file content using seq_printf() calls and
alike. Using this helper function also gets rid of the need
to specify file operations per debugfs file.

Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 fs/debugfs/file.c       | 54 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/debugfs.h | 16 ++++++++++++++-
 2 files changed, 69 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/debugfs/file.c b/fs/debugfs/file.c
index 76c08c2beb2f..a2f8ec70fe8d 100644
--- a/fs/debugfs/file.c
+++ b/fs/debugfs/file.c
@@ -22,6 +22,7 @@
 #include <linux/io.h>
 #include <linux/slab.h>
 #include <linux/atomic.h>
+#include <linux/device.h>
 
 static ssize_t default_read_file(struct file *file, char __user *buf,
 				 size_t count, loff_t *ppos)
@@ -761,3 +762,56 @@ struct dentry *debugfs_create_regset32(const char *name, umode_t mode,
 EXPORT_SYMBOL_GPL(debugfs_create_regset32);
 
 #endif /* CONFIG_HAS_IOMEM */
+
+struct debugfs_devm_entry {
+	int (*read)(struct seq_file *seq, void *data);
+	struct device *dev;
+};
+
+static int debugfs_devm_entry_open(struct inode *inode, struct file *f)
+{
+	struct debugfs_devm_entry *entry = inode->i_private;
+
+	return single_open(f, entry->read, entry->dev);
+}
+
+static const struct file_operations debugfs_devm_entry_ops = {
+	.owner = THIS_MODULE,
+	.open = debugfs_devm_entry_open,
+	.release = single_release,
+	.read = seq_read,
+	.llseek = seq_lseek
+};
+
+/**
+ * debugfs_create_devm_seqfile - create a debugfs file that is bound to device.
+ *
+ * @dev: device related to this debugfs file.
+ * @name: name of the debugfs file.
+ * @parent: a pointer to the parent dentry for this file.  This should be a
+ *	directory dentry if set.  If this parameter is %NULL, then the
+ *	file will be created in the root of the debugfs filesystem.
+ * @read_fn: function pointer called to print the seq_file content.
+ */
+struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name,
+					   struct dentry *parent,
+					   int (*read_fn)(struct seq_file *s,
+							  void *data))
+{
+	struct debugfs_devm_entry *entry;
+
+	if (IS_ERR(parent))
+		return ERR_PTR(-ENOENT);
+
+	entry = devm_kzalloc(dev, sizeof(*entry), GFP_KERNEL);
+	if (!entry)
+		return ERR_PTR(-ENOMEM);
+
+	entry->read = read_fn;
+	entry->dev = dev;
+
+	return debugfs_create_file(name, S_IRUGO, parent, entry,
+				   &debugfs_devm_entry_ops);
+}
+EXPORT_SYMBOL_GPL(debugfs_create_devm_seqfile);
+
diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 4d0b4d1aa132..4bbe2ace972a 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -99,13 +99,18 @@ struct dentry *debugfs_create_u32_array(const char *name, umode_t mode,
 					struct dentry *parent,
 					u32 *array, u32 elements);
 
+struct dentry *debugfs_create_devm_seqfile(struct device *dev, const char *name,
+					   struct dentry *parent,
+					   int (*read_fn)(struct seq_file *s,
+							  void *data));
+
 bool debugfs_initialized(void);
 
 #else
 
 #include <linux/err.h>
 
-/* 
+/*
  * We do not return NULL from these functions if CONFIG_DEBUG_FS is not enabled
  * so users have a chance to detect if there was a real error or not.  We don't
  * want to duplicate the design decision mistakes of procfs and devfs again.
@@ -251,6 +256,15 @@ static inline struct dentry *debugfs_create_u32_array(const char *name, umode_t
 	return ERR_PTR(-ENODEV);
 }
 
+static inline struct dentry *debugfs_create_devm_seqfile(struct device *dev,
+							 const char *name,
+							 struct dentry *parent,
+					   int (*read_fn)(struct seq_file *s,
+							  void *data))
+{
+	return ERR_PTR(-ENODEV);
+}
+
 #endif
 
 #endif
-- 
cgit v1.2.3


From e135303bd5bebcd22bcf8a6b06ec0dd5a9432708 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Mon, 17 Nov 2014 18:18:22 -0800
Subject: device: Add dev_<level>_once variants

Add the equivalents to pr_<level>_once.

Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/device.h | 35 +++++++++++++++++++++++++++++++++++
 1 file changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/device.h b/include/linux/device.h
index ce1f21608b16..99fb07cb4f06 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -1118,6 +1118,41 @@ do {						     \
 })
 #endif
 
+#ifdef CONFIG_PRINTK
+#define dev_level_once(dev_level, dev, fmt, ...)			\
+do {									\
+	static bool __print_once __read_mostly;				\
+									\
+	if (!__print_once) {						\
+		__print_once = true;					\
+		dev_level(dev, fmt, ##__VA_ARGS__);			\
+	}								\
+} while (0)
+#else
+#define dev_level_once(dev_level, dev, fmt, ...)			\
+do {									\
+	if (0)								\
+		dev_level(dev, fmt, ##__VA_ARGS__);			\
+} while (0)
+#endif
+
+#define dev_emerg_once(dev, fmt, ...)					\
+	dev_level_once(dev_emerg, dev, fmt, ##__VA_ARGS__)
+#define dev_alert_once(dev, fmt, ...)					\
+	dev_level_once(dev_alert, dev, fmt, ##__VA_ARGS__)
+#define dev_crit_once(dev, fmt, ...)					\
+	dev_level_once(dev_crit, dev, fmt, ##__VA_ARGS__)
+#define dev_err_once(dev, fmt, ...)					\
+	dev_level_once(dev_err, dev, fmt, ##__VA_ARGS__)
+#define dev_warn_once(dev, fmt, ...)					\
+	dev_level_once(dev_warn, dev, fmt, ##__VA_ARGS__)
+#define dev_notice_once(dev, fmt, ...)					\
+	dev_level_once(dev_notice, dev, fmt, ##__VA_ARGS__)
+#define dev_info_once(dev, fmt, ...)					\
+	dev_level_once(dev_info, dev, fmt, ##__VA_ARGS__)
+#define dev_dbg_once(dev, fmt, ...)					\
+	dev_level_once(dev_info, dev, fmt, ##__VA_ARGS__)
+
 #define dev_level_ratelimited(dev_level, dev, fmt, ...)			\
 do {									\
 	static DEFINE_RATELIMIT_STATE(_rs,				\
-- 
cgit v1.2.3


From 97b0c7bd2e86c6e1e00c6b7178cf953d5f70c71a Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Tue, 11 Nov 2014 18:33:01 +0000
Subject: mailbox: add tx_prepare client callback

If the mailbox controller expects the payload is in place before
initiating the transmit, then it's impossible to reuse the list
maintained by core mailbox code currently. Maintaining another list
for sending the message in the controller seems totally unnecessary
as core mailbox library already provides that feature.

This patch introduces tx_prepare callback in mbox_client which
can be used by the core mailbox library before initiating the
transaction through mbox->ops->send_data. The client driver can
implement this callback to ensure the payload is copied to the
shared memory.

Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 drivers/mailbox/mailbox.c      | 2 ++
 include/linux/mailbox_client.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/mailbox/mailbox.c b/drivers/mailbox/mailbox.c
index c281e5562876..58789b027708 100644
--- a/drivers/mailbox/mailbox.c
+++ b/drivers/mailbox/mailbox.c
@@ -78,6 +78,8 @@ static void msg_submit(struct mbox_chan *chan)
 
 	data = chan->msg_data[idx];
 
+	if (chan->cl->tx_prepare)
+		chan->cl->tx_prepare(chan->cl, data);
 	/* Try to submit a message to the MBOX controller */
 	err = chan->mbox->ops->send_data(chan, data);
 	if (!err) {
diff --git a/include/linux/mailbox_client.h b/include/linux/mailbox_client.h
index 307d9cab2026..1726ccbd8009 100644
--- a/include/linux/mailbox_client.h
+++ b/include/linux/mailbox_client.h
@@ -25,6 +25,8 @@ struct mbox_chan;
  *			if the client receives some ACK packet for transmission.
  *			Unused if the controller already has TX_Done/RTR IRQ.
  * @rx_callback:	Atomic callback to provide client the data received
+ * @tx_prepare: 	Atomic callback to ask client to prepare the payload
+ *			before initiating the transmission if required.
  * @tx_done:		Atomic callback to tell client of data transmission
  */
 struct mbox_client {
@@ -34,6 +36,7 @@ struct mbox_client {
 	bool knows_txdone;
 
 	void (*rx_callback)(struct mbox_client *cl, void *mssg);
+	void (*tx_prepare)(struct mbox_client *cl, void *mssg);
 	void (*tx_done)(struct mbox_client *cl, void *mssg, int r);
 };
 
-- 
cgit v1.2.3


From 8841a66aaa7c3b1dfeeb4192d05f2ca86df58f00 Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Mon, 3 Nov 2014 17:05:50 -0600
Subject: mailbox/omap: adapt to the new mailbox framework

The OMAP mailbox driver and its existing clients (remoteproc
for OMAP4+) are adapted to use the generic mailbox framework.

The main changes for the adaptation are:
  - The tasklet used for Tx is replaced with the state machine from
    the generic mailbox framework. The workqueue used for processing
    the received messages stays intact for minimizing the effects on
    the OMAP mailbox clients.
  - The existing exported client API, omap_mbox_get, omap_mbox_put and
    omap_mbox_send_msg are deleted, as the framework provides equivalent
    functionality. A OMAP-specific omap_mbox_request_channel is added
    though to support non-DT way of requesting mailboxes.
  - The OMAP mailbox driver is integrated with the mailbox framework
    through the proper implementations of mbox_chan_ops, except for
    .last_tx_done and .peek_data. The OMAP mailbox driver does not need
    these ops, as it is completely interrupt driven.
  - The OMAP mailbox driver uses a custom of_xlate controller ops that
    allows phandles for the pargs specifier instead of indexing to avoid
    any channel registration order dependencies.
  - The new framework does not support multiple clients operating on a
    single channel, so the reference counting logic is simplified.
  - The remoteproc driver (current client) is adapted to use the new API.
    The notifier callbacks used within this client is replaced with the
    regular callbacks from the newer framework.
  - The exported OMAP mailbox API are limited to omap_mbox_save_ctx,
    omap_mbox_restore_ctx, omap_mbox_enable_irq & omap_mbox_disable_irq,
    with the signature modified to take in the new mbox_chan handle instead
    of the OMAP specific omap_mbox handle. The first 2 will be removed when
    the OMAP mailbox driver is adapted to runtime_pm. The other exported
    API omap_mbox_request_channel will be removed once existing legacy
    users are converted to DT.

Signed-off-by: Suman Anna <s-anna@ti.com>
Cc: Ohad Ben-Cohen <ohad@wizery.com>
Signed-off-by: Jassi Brar <jaswinder.singh@linaro.org>
---
 .../devicetree/bindings/mailbox/omap-mailbox.txt   |  23 ++
 drivers/mailbox/omap-mailbox.c                     | 346 ++++++++++++---------
 drivers/remoteproc/omap_remoteproc.c               |  51 +--
 include/linux/omap-mailbox.h                       |  16 +-
 4 files changed, 256 insertions(+), 180 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt b/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
index 48edc4b92afb..d1a043339c11 100644
--- a/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
+++ b/Documentation/devicetree/bindings/mailbox/omap-mailbox.txt
@@ -43,6 +43,9 @@ Required properties:
 			device. The format is dependent on which interrupt
 			controller the OMAP device uses
 - ti,hwmods:		Name of the hwmod associated with the mailbox
+- #mbox-cells:		Common mailbox binding property to identify the number
+			of cells required for the mailbox specifier. Should be
+			1
 - ti,mbox-num-users:	Number of targets (processor devices) that the mailbox
 			device can interrupt
 - ti,mbox-num-fifos:	Number of h/w fifo queues within the mailbox IP block
@@ -72,6 +75,18 @@ data that represent the following:
     Cell #3 (usr_id)  - mailbox user id for identifying the interrupt line
                         associated with generating a tx/rx fifo interrupt.
 
+Mailbox Users:
+==============
+A device needing to communicate with a target processor device should specify
+them using the common mailbox binding properties, "mboxes" and the optional
+"mbox-names" (please see Documentation/devicetree/bindings/mailbox/mailbox.txt
+for details). Each value of the mboxes property should contain a phandle to the
+mailbox controller device node and an args specifier that will be the phandle to
+the intended sub-mailbox child node to be used for communication. The equivalent
+"mbox-names" property value can be used to give a name to the communication channel
+to be used by the client user.
+
+
 Example:
 --------
 
@@ -81,6 +96,7 @@ mailbox: mailbox@4a0f4000 {
 	reg = <0x4a0f4000 0x200>;
 	interrupts = <GIC_SPI 26 IRQ_TYPE_LEVEL_HIGH>;
 	ti,hwmods = "mailbox";
+	#mbox-cells = <1>;
 	ti,mbox-num-users = <3>;
 	ti,mbox-num-fifos = <8>;
 	mbox_ipu: mbox_ipu {
@@ -93,12 +109,19 @@ mailbox: mailbox@4a0f4000 {
 	};
 };
 
+dsp {
+	...
+	mboxes = <&mailbox &mbox_dsp>;
+	...
+};
+
 /* AM33xx */
 mailbox: mailbox@480C8000 {
 	compatible = "ti,omap4-mailbox";
 	reg = <0x480C8000 0x200>;
 	interrupts = <77>;
 	ti,hwmods = "mailbox";
+	#mbox-cells = <1>;
 	ti,mbox-num-users = <4>;
 	ti,mbox-num-fifos = <8>;
 	mbox_wkupm3: wkup_m3 {
diff --git a/drivers/mailbox/omap-mailbox.c b/drivers/mailbox/omap-mailbox.c
index bcc7ee129276..66b83ca94dcf 100644
--- a/drivers/mailbox/omap-mailbox.c
+++ b/drivers/mailbox/omap-mailbox.c
@@ -29,13 +29,14 @@
 #include <linux/slab.h>
 #include <linux/kfifo.h>
 #include <linux/err.h>
-#include <linux/notifier.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
 #include <linux/platform_device.h>
 #include <linux/pm_runtime.h>
 #include <linux/platform_data/mailbox-omap.h>
 #include <linux/omap-mailbox.h>
+#include <linux/mailbox_controller.h>
+#include <linux/mailbox_client.h>
 
 #define MAILBOX_REVISION		0x000
 #define MAILBOX_MESSAGE(m)		(0x040 + 4 * (m))
@@ -80,7 +81,6 @@ struct omap_mbox_queue {
 	spinlock_t		lock;
 	struct kfifo		fifo;
 	struct work_struct	work;
-	struct tasklet_struct	tasklet;
 	struct omap_mbox	*mbox;
 	bool full;
 };
@@ -92,6 +92,7 @@ struct omap_mbox_device {
 	u32 num_users;
 	u32 num_fifos;
 	struct omap_mbox **mboxes;
+	struct mbox_controller controller;
 	struct list_head elem;
 };
 
@@ -110,15 +111,14 @@ struct omap_mbox_fifo_info {
 struct omap_mbox {
 	const char		*name;
 	int			irq;
-	struct omap_mbox_queue	*txq, *rxq;
+	struct omap_mbox_queue	*rxq;
 	struct device		*dev;
 	struct omap_mbox_device *parent;
 	struct omap_mbox_fifo	tx_fifo;
 	struct omap_mbox_fifo	rx_fifo;
 	u32			ctx[OMAP4_MBOX_NR_REGS];
 	u32			intr_type;
-	int			use_count;
-	struct blocking_notifier_head	notifier;
+	struct mbox_chan	*chan;
 };
 
 /* global variables for the mailbox devices */
@@ -129,6 +129,14 @@ static unsigned int mbox_kfifo_size = CONFIG_OMAP_MBOX_KFIFO_SIZE;
 module_param(mbox_kfifo_size, uint, S_IRUGO);
 MODULE_PARM_DESC(mbox_kfifo_size, "Size of omap's mailbox kfifo (bytes)");
 
+static struct omap_mbox *mbox_chan_to_omap_mbox(struct mbox_chan *chan)
+{
+	if (!chan || !chan->con_priv)
+		return NULL;
+
+	return (struct omap_mbox *)chan->con_priv;
+}
+
 static inline
 unsigned int mbox_read_reg(struct omap_mbox_device *mdev, size_t ofs)
 {
@@ -194,41 +202,14 @@ static int is_mbox_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq)
 	return (int)(enable & status & bit);
 }
 
-/*
- * message sender
- */
-int omap_mbox_msg_send(struct omap_mbox *mbox, mbox_msg_t msg)
-{
-	struct omap_mbox_queue *mq = mbox->txq;
-	int ret = 0, len;
-
-	spin_lock_bh(&mq->lock);
-
-	if (kfifo_avail(&mq->fifo) < sizeof(msg)) {
-		ret = -ENOMEM;
-		goto out;
-	}
-
-	if (kfifo_is_empty(&mq->fifo) && !mbox_fifo_full(mbox)) {
-		mbox_fifo_write(mbox, msg);
-		goto out;
-	}
-
-	len = kfifo_in(&mq->fifo, (unsigned char *)&msg, sizeof(msg));
-	WARN_ON(len != sizeof(msg));
-
-	tasklet_schedule(&mbox->txq->tasklet);
-
-out:
-	spin_unlock_bh(&mq->lock);
-	return ret;
-}
-EXPORT_SYMBOL(omap_mbox_msg_send);
-
-void omap_mbox_save_ctx(struct omap_mbox *mbox)
+void omap_mbox_save_ctx(struct mbox_chan *chan)
 {
 	int i;
 	int nr_regs;
+	struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan);
+
+	if (WARN_ON(!mbox))
+		return;
 
 	if (mbox->intr_type)
 		nr_regs = OMAP4_MBOX_NR_REGS;
@@ -243,10 +224,14 @@ void omap_mbox_save_ctx(struct omap_mbox *mbox)
 }
 EXPORT_SYMBOL(omap_mbox_save_ctx);
 
-void omap_mbox_restore_ctx(struct omap_mbox *mbox)
+void omap_mbox_restore_ctx(struct mbox_chan *chan)
 {
 	int i;
 	int nr_regs;
+	struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan);
+
+	if (WARN_ON(!mbox))
+		return;
 
 	if (mbox->intr_type)
 		nr_regs = OMAP4_MBOX_NR_REGS;
@@ -254,14 +239,13 @@ void omap_mbox_restore_ctx(struct omap_mbox *mbox)
 		nr_regs = MBOX_NR_REGS;
 	for (i = 0; i < nr_regs; i++) {
 		mbox_write_reg(mbox->parent, mbox->ctx[i], i * sizeof(u32));
-
 		dev_dbg(mbox->dev, "%s: [%02x] %08x\n", __func__,
 			i, mbox->ctx[i]);
 	}
 }
 EXPORT_SYMBOL(omap_mbox_restore_ctx);
 
-void omap_mbox_enable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq)
+static void _omap_mbox_enable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq)
 {
 	u32 l;
 	struct omap_mbox_fifo *fifo = (irq == IRQ_TX) ?
@@ -273,9 +257,8 @@ void omap_mbox_enable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq)
 	l |= bit;
 	mbox_write_reg(mbox->parent, l, irqenable);
 }
-EXPORT_SYMBOL(omap_mbox_enable_irq);
 
-void omap_mbox_disable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq)
+static void _omap_mbox_disable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq)
 {
 	struct omap_mbox_fifo *fifo = (irq == IRQ_TX) ?
 				&mbox->tx_fifo : &mbox->rx_fifo;
@@ -291,28 +274,28 @@ void omap_mbox_disable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq)
 
 	mbox_write_reg(mbox->parent, bit, irqdisable);
 }
-EXPORT_SYMBOL(omap_mbox_disable_irq);
 
-static void mbox_tx_tasklet(unsigned long tx_data)
+void omap_mbox_enable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq)
 {
-	struct omap_mbox *mbox = (struct omap_mbox *)tx_data;
-	struct omap_mbox_queue *mq = mbox->txq;
-	mbox_msg_t msg;
-	int ret;
+	struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan);
 
-	while (kfifo_len(&mq->fifo)) {
-		if (mbox_fifo_full(mbox)) {
-			omap_mbox_enable_irq(mbox, IRQ_TX);
-			break;
-		}
+	if (WARN_ON(!mbox))
+		return;
 
-		ret = kfifo_out(&mq->fifo, (unsigned char *)&msg,
-								sizeof(msg));
-		WARN_ON(ret != sizeof(msg));
+	_omap_mbox_enable_irq(mbox, irq);
+}
+EXPORT_SYMBOL(omap_mbox_enable_irq);
 
-		mbox_fifo_write(mbox, msg);
-	}
+void omap_mbox_disable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq)
+{
+	struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan);
+
+	if (WARN_ON(!mbox))
+		return;
+
+	_omap_mbox_disable_irq(mbox, irq);
 }
+EXPORT_SYMBOL(omap_mbox_disable_irq);
 
 /*
  * Message receiver(workqueue)
@@ -328,12 +311,11 @@ static void mbox_rx_work(struct work_struct *work)
 		len = kfifo_out(&mq->fifo, (unsigned char *)&msg, sizeof(msg));
 		WARN_ON(len != sizeof(msg));
 
-		blocking_notifier_call_chain(&mq->mbox->notifier, len,
-								(void *)msg);
+		mbox_chan_received_data(mq->mbox->chan, (void *)msg);
 		spin_lock_irq(&mq->lock);
 		if (mq->full) {
 			mq->full = false;
-			omap_mbox_enable_irq(mq->mbox, IRQ_RX);
+			_omap_mbox_enable_irq(mq->mbox, IRQ_RX);
 		}
 		spin_unlock_irq(&mq->lock);
 	}
@@ -344,9 +326,9 @@ static void mbox_rx_work(struct work_struct *work)
  */
 static void __mbox_tx_interrupt(struct omap_mbox *mbox)
 {
-	omap_mbox_disable_irq(mbox, IRQ_TX);
+	_omap_mbox_disable_irq(mbox, IRQ_TX);
 	ack_mbox_irq(mbox, IRQ_TX);
-	tasklet_schedule(&mbox->txq->tasklet);
+	mbox_chan_txdone(mbox->chan, 0);
 }
 
 static void __mbox_rx_interrupt(struct omap_mbox *mbox)
@@ -357,7 +339,7 @@ static void __mbox_rx_interrupt(struct omap_mbox *mbox)
 
 	while (!mbox_fifo_empty(mbox)) {
 		if (unlikely(kfifo_avail(&mq->fifo) < sizeof(msg))) {
-			omap_mbox_disable_irq(mbox, IRQ_RX);
+			_omap_mbox_disable_irq(mbox, IRQ_RX);
 			mq->full = true;
 			goto nomem;
 		}
@@ -388,11 +370,13 @@ static irqreturn_t mbox_interrupt(int irq, void *p)
 }
 
 static struct omap_mbox_queue *mbox_queue_alloc(struct omap_mbox *mbox,
-					void (*work) (struct work_struct *),
-					void (*tasklet)(unsigned long))
+					void (*work)(struct work_struct *))
 {
 	struct omap_mbox_queue *mq;
 
+	if (!work)
+		return NULL;
+
 	mq = kzalloc(sizeof(struct omap_mbox_queue), GFP_KERNEL);
 	if (!mq)
 		return NULL;
@@ -402,12 +386,9 @@ static struct omap_mbox_queue *mbox_queue_alloc(struct omap_mbox *mbox,
 	if (kfifo_alloc(&mq->fifo, mbox_kfifo_size, GFP_KERNEL))
 		goto error;
 
-	if (work)
-		INIT_WORK(&mq->work, work);
-
-	if (tasklet)
-		tasklet_init(&mq->tasklet, tasklet, (unsigned long)mbox);
+	INIT_WORK(&mq->work, work);
 	return mq;
+
 error:
 	kfree(mq);
 	return NULL;
@@ -423,71 +404,35 @@ static int omap_mbox_startup(struct omap_mbox *mbox)
 {
 	int ret = 0;
 	struct omap_mbox_queue *mq;
-	struct omap_mbox_device *mdev = mbox->parent;
 
-	mutex_lock(&mdev->cfg_lock);
-	ret = pm_runtime_get_sync(mdev->dev);
-	if (unlikely(ret < 0))
-		goto fail_startup;
-
-	if (!mbox->use_count++) {
-		mq = mbox_queue_alloc(mbox, NULL, mbox_tx_tasklet);
-		if (!mq) {
-			ret = -ENOMEM;
-			goto fail_alloc_txq;
-		}
-		mbox->txq = mq;
+	mq = mbox_queue_alloc(mbox, mbox_rx_work);
+	if (!mq)
+		return -ENOMEM;
+	mbox->rxq = mq;
+	mq->mbox = mbox;
+
+	ret = request_irq(mbox->irq, mbox_interrupt, IRQF_SHARED,
+			  mbox->name, mbox);
+	if (unlikely(ret)) {
+		pr_err("failed to register mailbox interrupt:%d\n", ret);
+		goto fail_request_irq;
+	}
 
-		mq = mbox_queue_alloc(mbox, mbox_rx_work, NULL);
-		if (!mq) {
-			ret = -ENOMEM;
-			goto fail_alloc_rxq;
-		}
-		mbox->rxq = mq;
-		mq->mbox = mbox;
-		ret = request_irq(mbox->irq, mbox_interrupt, IRQF_SHARED,
-							mbox->name, mbox);
-		if (unlikely(ret)) {
-			pr_err("failed to register mailbox interrupt:%d\n",
-									ret);
-			goto fail_request_irq;
-		}
+	_omap_mbox_enable_irq(mbox, IRQ_RX);
 
-		omap_mbox_enable_irq(mbox, IRQ_RX);
-	}
-	mutex_unlock(&mdev->cfg_lock);
 	return 0;
 
 fail_request_irq:
 	mbox_queue_free(mbox->rxq);
-fail_alloc_rxq:
-	mbox_queue_free(mbox->txq);
-fail_alloc_txq:
-	pm_runtime_put_sync(mdev->dev);
-	mbox->use_count--;
-fail_startup:
-	mutex_unlock(&mdev->cfg_lock);
 	return ret;
 }
 
 static void omap_mbox_fini(struct omap_mbox *mbox)
 {
-	struct omap_mbox_device *mdev = mbox->parent;
-
-	mutex_lock(&mdev->cfg_lock);
-
-	if (!--mbox->use_count) {
-		omap_mbox_disable_irq(mbox, IRQ_RX);
-		free_irq(mbox->irq, mbox);
-		tasklet_kill(&mbox->txq->tasklet);
-		flush_work(&mbox->rxq->work);
-		mbox_queue_free(mbox->txq);
-		mbox_queue_free(mbox->rxq);
-	}
-
-	pm_runtime_put_sync(mdev->dev);
-
-	mutex_unlock(&mdev->cfg_lock);
+	_omap_mbox_disable_irq(mbox, IRQ_RX);
+	free_irq(mbox->irq, mbox);
+	flush_work(&mbox->rxq->work);
+	mbox_queue_free(mbox->rxq);
 }
 
 static struct omap_mbox *omap_mbox_device_find(struct omap_mbox_device *mdev,
@@ -509,42 +454,55 @@ static struct omap_mbox *omap_mbox_device_find(struct omap_mbox_device *mdev,
 	return mbox;
 }
 
-struct omap_mbox *omap_mbox_get(const char *name, struct notifier_block *nb)
+struct mbox_chan *omap_mbox_request_channel(struct mbox_client *cl,
+					    const char *chan_name)
 {
+	struct device *dev = cl->dev;
 	struct omap_mbox *mbox = NULL;
 	struct omap_mbox_device *mdev;
+	struct mbox_chan *chan;
+	unsigned long flags;
 	int ret;
 
+	if (!dev)
+		return ERR_PTR(-ENODEV);
+
+	if (dev->of_node) {
+		pr_err("%s: please use mbox_request_channel(), this API is supported only for OMAP non-DT usage\n",
+		       __func__);
+		return ERR_PTR(-ENODEV);
+	}
+
 	mutex_lock(&omap_mbox_devices_lock);
 	list_for_each_entry(mdev, &omap_mbox_devices, elem) {
-		mbox = omap_mbox_device_find(mdev, name);
+		mbox = omap_mbox_device_find(mdev, chan_name);
 		if (mbox)
 			break;
 	}
 	mutex_unlock(&omap_mbox_devices_lock);
 
-	if (!mbox)
+	if (!mbox || !mbox->chan)
 		return ERR_PTR(-ENOENT);
 
-	if (nb)
-		blocking_notifier_chain_register(&mbox->notifier, nb);
+	chan = mbox->chan;
+	spin_lock_irqsave(&chan->lock, flags);
+	chan->msg_free = 0;
+	chan->msg_count = 0;
+	chan->active_req = NULL;
+	chan->cl = cl;
+	init_completion(&chan->tx_complete);
+	spin_unlock_irqrestore(&chan->lock, flags);
 
-	ret = omap_mbox_startup(mbox);
+	ret = chan->mbox->ops->startup(chan);
 	if (ret) {
-		blocking_notifier_chain_unregister(&mbox->notifier, nb);
-		return ERR_PTR(-ENODEV);
+		pr_err("Unable to startup the chan (%d)\n", ret);
+		mbox_free_channel(chan);
+		chan = ERR_PTR(ret);
 	}
 
-	return mbox;
-}
-EXPORT_SYMBOL(omap_mbox_get);
-
-void omap_mbox_put(struct omap_mbox *mbox, struct notifier_block *nb)
-{
-	blocking_notifier_chain_unregister(&mbox->notifier, nb);
-	omap_mbox_fini(mbox);
+	return chan;
 }
-EXPORT_SYMBOL(omap_mbox_put);
+EXPORT_SYMBOL(omap_mbox_request_channel);
 
 static struct class omap_mbox_class = { .name = "mbox", };
 
@@ -560,25 +518,25 @@ static int omap_mbox_register(struct omap_mbox_device *mdev)
 	mboxes = mdev->mboxes;
 	for (i = 0; mboxes[i]; i++) {
 		struct omap_mbox *mbox = mboxes[i];
-		mbox->dev = device_create(&omap_mbox_class,
-				mdev->dev, 0, mbox, "%s", mbox->name);
+		mbox->dev = device_create(&omap_mbox_class, mdev->dev,
+					0, mbox, "%s", mbox->name);
 		if (IS_ERR(mbox->dev)) {
 			ret = PTR_ERR(mbox->dev);
 			goto err_out;
 		}
-
-		BLOCKING_INIT_NOTIFIER_HEAD(&mbox->notifier);
 	}
 
 	mutex_lock(&omap_mbox_devices_lock);
 	list_add(&mdev->elem, &omap_mbox_devices);
 	mutex_unlock(&omap_mbox_devices_lock);
 
-	return 0;
+	ret = mbox_controller_register(&mdev->controller);
 
 err_out:
-	while (i--)
-		device_unregister(mboxes[i]->dev);
+	if (ret) {
+		while (i--)
+			device_unregister(mboxes[i]->dev);
+	}
 	return ret;
 }
 
@@ -594,12 +552,64 @@ static int omap_mbox_unregister(struct omap_mbox_device *mdev)
 	list_del(&mdev->elem);
 	mutex_unlock(&omap_mbox_devices_lock);
 
+	mbox_controller_unregister(&mdev->controller);
+
 	mboxes = mdev->mboxes;
 	for (i = 0; mboxes[i]; i++)
 		device_unregister(mboxes[i]->dev);
 	return 0;
 }
 
+static int omap_mbox_chan_startup(struct mbox_chan *chan)
+{
+	struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan);
+	struct omap_mbox_device *mdev = mbox->parent;
+	int ret = 0;
+
+	mutex_lock(&mdev->cfg_lock);
+	pm_runtime_get_sync(mdev->dev);
+	ret = omap_mbox_startup(mbox);
+	if (ret)
+		pm_runtime_put_sync(mdev->dev);
+	mutex_unlock(&mdev->cfg_lock);
+	return ret;
+}
+
+static void omap_mbox_chan_shutdown(struct mbox_chan *chan)
+{
+	struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan);
+	struct omap_mbox_device *mdev = mbox->parent;
+
+	mutex_lock(&mdev->cfg_lock);
+	omap_mbox_fini(mbox);
+	pm_runtime_put_sync(mdev->dev);
+	mutex_unlock(&mdev->cfg_lock);
+}
+
+static int omap_mbox_chan_send_data(struct mbox_chan *chan, void *data)
+{
+	struct omap_mbox *mbox = mbox_chan_to_omap_mbox(chan);
+	int ret = -EBUSY;
+
+	if (!mbox)
+		return -EINVAL;
+
+	if (!mbox_fifo_full(mbox)) {
+		mbox_fifo_write(mbox, (mbox_msg_t)data);
+		ret = 0;
+	}
+
+	/* always enable the interrupt */
+	_omap_mbox_enable_irq(mbox, IRQ_TX);
+	return ret;
+}
+
+static struct mbox_chan_ops omap_mbox_chan_ops = {
+	.startup        = omap_mbox_chan_startup,
+	.send_data      = omap_mbox_chan_send_data,
+	.shutdown       = omap_mbox_chan_shutdown,
+};
+
 static const struct of_device_id omap_mailbox_of_match[] = {
 	{
 		.compatible	= "ti,omap2-mailbox",
@@ -619,10 +629,35 @@ static const struct of_device_id omap_mailbox_of_match[] = {
 };
 MODULE_DEVICE_TABLE(of, omap_mailbox_of_match);
 
+static struct mbox_chan *omap_mbox_of_xlate(struct mbox_controller *controller,
+					    const struct of_phandle_args *sp)
+{
+	phandle phandle = sp->args[0];
+	struct device_node *node;
+	struct omap_mbox_device *mdev;
+	struct omap_mbox *mbox;
+
+	mdev = container_of(controller, struct omap_mbox_device, controller);
+	if (WARN_ON(!mdev))
+		return NULL;
+
+	node = of_find_node_by_phandle(phandle);
+	if (!node) {
+		pr_err("%s: could not find node phandle 0x%x\n",
+		       __func__, phandle);
+		return NULL;
+	}
+
+	mbox = omap_mbox_device_find(mdev, node->name);
+	of_node_put(node);
+	return mbox ? mbox->chan : NULL;
+}
+
 static int omap_mbox_probe(struct platform_device *pdev)
 {
 	struct resource *mem;
 	int ret;
+	struct mbox_chan *chnls;
 	struct omap_mbox **list, *mbox, *mboxblk;
 	struct omap_mbox_pdata *pdata = pdev->dev.platform_data;
 	struct omap_mbox_dev_info *info = NULL;
@@ -727,6 +762,11 @@ static int omap_mbox_probe(struct platform_device *pdev)
 	if (!list)
 		return -ENOMEM;
 
+	chnls = devm_kzalloc(&pdev->dev, (info_count + 1) * sizeof(*chnls),
+			     GFP_KERNEL);
+	if (!chnls)
+		return -ENOMEM;
+
 	mboxblk = devm_kzalloc(&pdev->dev, info_count * sizeof(*mbox),
 			       GFP_KERNEL);
 	if (!mboxblk)
@@ -758,6 +798,8 @@ static int omap_mbox_probe(struct platform_device *pdev)
 		mbox->irq = platform_get_irq(pdev, finfo->tx_irq);
 		if (mbox->irq < 0)
 			return mbox->irq;
+		mbox->chan = &chnls[i];
+		chnls[i].con_priv = mbox;
 		list[i] = mbox++;
 	}
 
@@ -766,6 +808,14 @@ static int omap_mbox_probe(struct platform_device *pdev)
 	mdev->num_users = num_users;
 	mdev->num_fifos = num_fifos;
 	mdev->mboxes = list;
+
+	/* OMAP does not have a Tx-Done IRQ, but rather a Tx-Ready IRQ */
+	mdev->controller.txdone_irq = true;
+	mdev->controller.dev = mdev->dev;
+	mdev->controller.ops = &omap_mbox_chan_ops;
+	mdev->controller.chans = chnls;
+	mdev->controller.num_chans = info_count;
+	mdev->controller.of_xlate = omap_mbox_of_xlate;
 	ret = omap_mbox_register(mdev);
 	if (ret)
 		return ret;
diff --git a/drivers/remoteproc/omap_remoteproc.c b/drivers/remoteproc/omap_remoteproc.c
index 51689721ea7a..cf92f6e7c5dc 100644
--- a/drivers/remoteproc/omap_remoteproc.c
+++ b/drivers/remoteproc/omap_remoteproc.c
@@ -27,6 +27,7 @@
 #include <linux/platform_device.h>
 #include <linux/dma-mapping.h>
 #include <linux/remoteproc.h>
+#include <linux/mailbox_client.h>
 #include <linux/omap-mailbox.h>
 
 #include <linux/platform_data/remoteproc-omap.h>
@@ -36,20 +37,19 @@
 
 /**
  * struct omap_rproc - omap remote processor state
- * @mbox: omap mailbox handle
- * @nb: notifier block that will be invoked on inbound mailbox messages
+ * @mbox: mailbox channel handle
+ * @client: mailbox client to request the mailbox channel
  * @rproc: rproc handle
  */
 struct omap_rproc {
-	struct omap_mbox *mbox;
-	struct notifier_block nb;
+	struct mbox_chan *mbox;
+	struct mbox_client client;
 	struct rproc *rproc;
 };
 
 /**
  * omap_rproc_mbox_callback() - inbound mailbox message handler
- * @this: notifier block
- * @index: unused
+ * @client: mailbox client pointer used for requesting the mailbox channel
  * @data: mailbox payload
  *
  * This handler is invoked by omap's mailbox driver whenever a mailbox
@@ -61,13 +61,13 @@ struct omap_rproc {
  * that indicates different events. Those values are deliberately very
  * big so they don't coincide with virtqueue indices.
  */
-static int omap_rproc_mbox_callback(struct notifier_block *this,
-					unsigned long index, void *data)
+static void omap_rproc_mbox_callback(struct mbox_client *client, void *data)
 {
-	mbox_msg_t msg = (mbox_msg_t) data;
-	struct omap_rproc *oproc = container_of(this, struct omap_rproc, nb);
+	struct omap_rproc *oproc = container_of(client, struct omap_rproc,
+						client);
 	struct device *dev = oproc->rproc->dev.parent;
 	const char *name = oproc->rproc->name;
+	u32 msg = (u32)data;
 
 	dev_dbg(dev, "mbox msg: 0x%x\n", msg);
 
@@ -84,8 +84,6 @@ static int omap_rproc_mbox_callback(struct notifier_block *this,
 		if (rproc_vq_interrupt(oproc->rproc, msg) == IRQ_NONE)
 			dev_dbg(dev, "no message was found in vqid %d\n", msg);
 	}
-
-	return NOTIFY_DONE;
 }
 
 /* kick a virtqueue */
@@ -96,8 +94,8 @@ static void omap_rproc_kick(struct rproc *rproc, int vqid)
 	int ret;
 
 	/* send the index of the triggered virtqueue in the mailbox payload */
-	ret = omap_mbox_msg_send(oproc->mbox, vqid);
-	if (ret)
+	ret = mbox_send_message(oproc->mbox, (void *)vqid);
+	if (ret < 0)
 		dev_err(dev, "omap_mbox_msg_send failed: %d\n", ret);
 }
 
@@ -115,17 +113,22 @@ static int omap_rproc_start(struct rproc *rproc)
 	struct platform_device *pdev = to_platform_device(dev);
 	struct omap_rproc_pdata *pdata = pdev->dev.platform_data;
 	int ret;
+	struct mbox_client *client = &oproc->client;
 
 	if (pdata->set_bootaddr)
 		pdata->set_bootaddr(rproc->bootaddr);
 
-	oproc->nb.notifier_call = omap_rproc_mbox_callback;
+	client->dev = dev;
+	client->tx_done = NULL;
+	client->rx_callback = omap_rproc_mbox_callback;
+	client->tx_block = false;
+	client->knows_txdone = false;
 
-	/* every omap rproc is assigned a mailbox instance for messaging */
-	oproc->mbox = omap_mbox_get(pdata->mbox_name, &oproc->nb);
+	oproc->mbox = omap_mbox_request_channel(client, pdata->mbox_name);
 	if (IS_ERR(oproc->mbox)) {
-		ret = PTR_ERR(oproc->mbox);
-		dev_err(dev, "omap_mbox_get failed: %d\n", ret);
+		ret = -EBUSY;
+		dev_err(dev, "mbox_request_channel failed: %ld\n",
+			PTR_ERR(oproc->mbox));
 		return ret;
 	}
 
@@ -136,9 +139,9 @@ static int omap_rproc_start(struct rproc *rproc)
 	 * Note that the reply will _not_ arrive immediately: this message
 	 * will wait in the mailbox fifo until the remote processor is booted.
 	 */
-	ret = omap_mbox_msg_send(oproc->mbox, RP_MBOX_ECHO_REQUEST);
-	if (ret) {
-		dev_err(dev, "omap_mbox_get failed: %d\n", ret);
+	ret = mbox_send_message(oproc->mbox, (void *)RP_MBOX_ECHO_REQUEST);
+	if (ret < 0) {
+		dev_err(dev, "mbox_send_message failed: %d\n", ret);
 		goto put_mbox;
 	}
 
@@ -151,7 +154,7 @@ static int omap_rproc_start(struct rproc *rproc)
 	return 0;
 
 put_mbox:
-	omap_mbox_put(oproc->mbox, &oproc->nb);
+	mbox_free_channel(oproc->mbox);
 	return ret;
 }
 
@@ -168,7 +171,7 @@ static int omap_rproc_stop(struct rproc *rproc)
 	if (ret)
 		return ret;
 
-	omap_mbox_put(oproc->mbox, &oproc->nb);
+	mbox_free_channel(oproc->mbox);
 
 	return 0;
 }
diff --git a/include/linux/omap-mailbox.h b/include/linux/omap-mailbox.h
index f8322d9cd235..587bbdd31f5a 100644
--- a/include/linux/omap-mailbox.h
+++ b/include/linux/omap-mailbox.h
@@ -10,20 +10,20 @@
 #define OMAP_MAILBOX_H
 
 typedef u32 mbox_msg_t;
-struct omap_mbox;
 
 typedef int __bitwise omap_mbox_irq_t;
 #define IRQ_TX ((__force omap_mbox_irq_t) 1)
 #define IRQ_RX ((__force omap_mbox_irq_t) 2)
 
-int omap_mbox_msg_send(struct omap_mbox *, mbox_msg_t msg);
+struct mbox_chan;
+struct mbox_client;
 
-struct omap_mbox *omap_mbox_get(const char *, struct notifier_block *nb);
-void omap_mbox_put(struct omap_mbox *mbox, struct notifier_block *nb);
+struct mbox_chan *omap_mbox_request_channel(struct mbox_client *cl,
+					    const char *chan_name);
 
-void omap_mbox_save_ctx(struct omap_mbox *mbox);
-void omap_mbox_restore_ctx(struct omap_mbox *mbox);
-void omap_mbox_enable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq);
-void omap_mbox_disable_irq(struct omap_mbox *mbox, omap_mbox_irq_t irq);
+void omap_mbox_save_ctx(struct mbox_chan *chan);
+void omap_mbox_restore_ctx(struct mbox_chan *chan);
+void omap_mbox_enable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq);
+void omap_mbox_disable_irq(struct mbox_chan *chan, omap_mbox_irq_t irq);
 
 #endif /* OMAP_MAILBOX_H */
-- 
cgit v1.2.3


From 002fe7c831404d179266cfe0dad00a67333256f1 Mon Sep 17 00:00:00 2001
From: Lars-Peter Clausen <lars@metafoo.de>
Date: Wed, 26 Nov 2014 20:57:50 +0100
Subject: ASoC: cq93vc: Remove unused state struct

While two of the fields in the cq93vc driver state struct are initialized
none of them are ever acutally read again. So remove the whole struct.

Signed-off-by: Lars-Peter Clausen <lars@metafoo.de>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 include/linux/mfd/davinci_voicecodec.h | 7 -------
 sound/soc/codecs/cq93vc.c              | 8 --------
 2 files changed, 15 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mfd/davinci_voicecodec.h b/include/linux/mfd/davinci_voicecodec.h
index cb01496bfa49..8e1cdbef3dad 100644
--- a/include/linux/mfd/davinci_voicecodec.h
+++ b/include/linux/mfd/davinci_voicecodec.h
@@ -99,12 +99,6 @@ struct davinci_vcif {
 	dma_addr_t dma_rx_addr;
 };
 
-struct cq93vc {
-	struct platform_device *pdev;
-	struct snd_soc_codec *codec;
-	u32 sysclk;
-};
-
 struct davinci_vc;
 
 struct davinci_vc {
@@ -122,7 +116,6 @@ struct davinci_vc {
 
 	/* Client devices */
 	struct davinci_vcif davinci_vcif;
-	struct cq93vc cq93vc;
 };
 
 #endif
diff --git a/sound/soc/codecs/cq93vc.c b/sound/soc/codecs/cq93vc.c
index 537327c7f7f1..036a877746b9 100644
--- a/sound/soc/codecs/cq93vc.c
+++ b/sound/soc/codecs/cq93vc.c
@@ -62,14 +62,10 @@ static int cq93vc_mute(struct snd_soc_dai *dai, int mute)
 static int cq93vc_set_dai_sysclk(struct snd_soc_dai *codec_dai,
 				 int clk_id, unsigned int freq, int dir)
 {
-	struct snd_soc_codec *codec = codec_dai->codec;
-	struct davinci_vc *davinci_vc = codec->dev->platform_data;
-
 	switch (freq) {
 	case 22579200:
 	case 27000000:
 	case 33868800:
-		davinci_vc->cq93vc.sysclk = freq;
 		return 0;
 	}
 
@@ -135,10 +131,6 @@ static int cq93vc_resume(struct snd_soc_codec *codec)
 
 static int cq93vc_probe(struct snd_soc_codec *codec)
 {
-	struct davinci_vc *davinci_vc = codec->dev->platform_data;
-
-	davinci_vc->cq93vc.codec = codec;
-
 	/* Off, with power on */
 	cq93vc_set_bias_level(codec, SND_SOC_BIAS_STANDBY);
 
-- 
cgit v1.2.3


From 5f42424354f5b0ca5413b4fb8528d150692c85b7 Mon Sep 17 00:00:00 2001
From: Rojhalat Ibrahim <imr@rtschenk.de>
Date: Tue, 4 Nov 2014 17:12:06 +0100
Subject: gpiolib: allow simultaneous setting of multiple GPIO outputs

Introduce new functions gpiod_set_array & gpiod_set_raw_array to the consumer
interface which allow setting multiple outputs with just one function call.
Also add an optional set_multiple function to the driver interface. Without an
implementation of that function in the chip driver outputs are set
sequentially.

Implementing the set_multiple function in a chip driver allows for:
- Improved performance for certain use cases. The original motivation for this
  was the task of configuring an FPGA. In that specific case, where 9 GPIO
  lines have to be set many times, configuration time goes down from 48 s to
  20 s when using the new function.
- Simultaneous glitch-free setting of multiple pins on any kind of parallel
  bus attached to GPIOs provided they all reside on the same chip and bank.

Limitations:
  Performance is only improved for normal high-low outputs. Open drain and
  open source outputs are always set separately from each other. Those kinds
  of outputs could probably be accelerated in a similar way if we could
  forgo the error checking when setting GPIO directions.

Change log:
  v6: - rebase on current linux-gpio devel branch
  v5: - check can_sleep property per chip
      - remove superfluous checks
      - supplement documentation
  v4: - add gpiod_set_array function for setting logical values
      - change interface of the set_multiple driver function to use
        unsigned long as type for the bit fields
      - use generic bitops (which also use unsigned long for bit fields)
      - do not use ARCH_NR_GPIOS any more
  v3: - add documentation
      - change commit message
  v2: - use descriptor interface
      - allow arbitrary groups of GPIOs spanning multiple chips

Signed-off-by: Rojhalat Ibrahim <imr@rtschenk.de>
Reviewed-by: Alexandre Courbot <acourbot@nvidia.com>
Reviewed-by: Mark Brown <broonie@linaro.org>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 Documentation/gpio/consumer.txt |  27 +++++++
 drivers/gpio/gpiolib.c          | 168 ++++++++++++++++++++++++++++++++++++++++
 include/linux/gpio/consumer.h   |  38 +++++++++
 include/linux/gpio/driver.h     |   4 +
 4 files changed, 237 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/gpio/consumer.txt b/Documentation/gpio/consumer.txt
index 6ce544191ca6..c67f806401a5 100644
--- a/Documentation/gpio/consumer.txt
+++ b/Documentation/gpio/consumer.txt
@@ -199,6 +199,33 @@ The active-low state of a GPIO can also be queried using the following call:
 Note that these functions should only be used with great moderation ; a driver
 should not have to care about the physical line level.
 
+
+Set multiple GPIO outputs with a single function call
+-----------------------------------------------------
+The following functions set the output values of an array of GPIOs:
+
+	void gpiod_set_array(unsigned int array_size,
+			     struct gpio_desc **desc_array,
+			     int *value_array)
+	void gpiod_set_raw_array(unsigned int array_size,
+				 struct gpio_desc **desc_array,
+				 int *value_array)
+	void gpiod_set_array_cansleep(unsigned int array_size,
+				      struct gpio_desc **desc_array,
+				      int *value_array)
+	void gpiod_set_raw_array_cansleep(unsigned int array_size,
+					  struct gpio_desc **desc_array,
+					  int *value_array)
+
+The array can be an arbitrary set of GPIOs. The functions will try to set
+GPIOs belonging to the same bank or chip simultaneously if supported by the
+corresponding chip driver. In that case a significantly improved performance
+can be expected. If simultaneous setting is not possible the GPIOs will be set
+sequentially.
+Note that for optimal performance GPIOs belonging to the same chip should be
+contiguous within the array of descriptors.
+
+
 GPIOs mapped to IRQs
 --------------------
 GPIO lines can quite often be used as IRQs. You can get the IRQ number
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 50e18a4b3a9f..eb739a51e774 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -1254,6 +1254,88 @@ static void _gpiod_set_raw_value(struct gpio_desc *desc, bool value)
 		chip->set(chip, gpio_chip_hwgpio(desc), value);
 }
 
+/*
+ * set multiple outputs on the same chip;
+ * use the chip's set_multiple function if available;
+ * otherwise set the outputs sequentially;
+ * @mask: bit mask array; one bit per output; BITS_PER_LONG bits per word
+ *        defines which outputs are to be changed
+ * @bits: bit value array; one bit per output; BITS_PER_LONG bits per word
+ *        defines the values the outputs specified by mask are to be set to
+ */
+static void gpio_chip_set_multiple(struct gpio_chip *chip,
+				   unsigned long *mask, unsigned long *bits)
+{
+	if (chip->set_multiple) {
+		chip->set_multiple(chip, mask, bits);
+	} else {
+		int i;
+		for (i = 0; i < chip->ngpio; i++) {
+			if (mask[BIT_WORD(i)] == 0) {
+				/* no more set bits in this mask word;
+				 * skip ahead to the next word */
+				i = (BIT_WORD(i) + 1) * BITS_PER_LONG - 1;
+				continue;
+			}
+			/* set outputs if the corresponding mask bit is set */
+			if (__test_and_clear_bit(i, mask)) {
+				chip->set(chip, i, test_bit(i, bits));
+			}
+		}
+	}
+}
+
+static void gpiod_set_array_priv(bool raw, bool can_sleep,
+				 unsigned int array_size,
+				 struct gpio_desc **desc_array,
+				 int *value_array)
+{
+	int i = 0;
+
+	while (i < array_size) {
+		struct gpio_chip *chip = desc_array[i]->chip;
+		unsigned long mask[BITS_TO_LONGS(chip->ngpio)];
+		unsigned long bits[BITS_TO_LONGS(chip->ngpio)];
+		int count = 0;
+
+		if (!can_sleep) {
+			WARN_ON(chip->can_sleep);
+		}
+		memset(mask, 0, sizeof(mask));
+		do {
+			struct gpio_desc *desc = desc_array[i];
+			int hwgpio = gpio_chip_hwgpio(desc);
+			int value = value_array[i];
+
+			if (!raw && test_bit(FLAG_ACTIVE_LOW, &desc->flags))
+				value = !value;
+			trace_gpio_value(desc_to_gpio(desc), 0, value);
+			/*
+			 * collect all normal outputs belonging to the same chip
+			 * open drain and open source outputs are set individually
+			 */
+			if (test_bit(FLAG_OPEN_DRAIN, &desc->flags)) {
+				_gpio_set_open_drain_value(desc,value);
+			} else if (test_bit(FLAG_OPEN_SOURCE, &desc->flags)) {
+				_gpio_set_open_source_value(desc, value);
+			} else {
+				__set_bit(hwgpio, mask);
+				if (value) {
+					__set_bit(hwgpio, bits);
+				} else {
+					__clear_bit(hwgpio, bits);
+				}
+				count++;
+			}
+			i++;
+		} while ((i < array_size) && (desc_array[i]->chip == chip));
+		/* push collected bits to outputs */
+		if (count != 0) {
+			gpio_chip_set_multiple(chip, mask, bits);
+		}
+	}
+}
+
 /**
  * gpiod_set_raw_value() - assign a gpio's raw value
  * @desc: gpio whose value will be assigned
@@ -1298,6 +1380,48 @@ void gpiod_set_value(struct gpio_desc *desc, int value)
 }
 EXPORT_SYMBOL_GPL(gpiod_set_value);
 
+/**
+ * gpiod_set_raw_array() - assign values to an array of GPIOs
+ * @array_size: number of elements in the descriptor / value arrays
+ * @desc_array: array of GPIO descriptors whose values will be assigned
+ * @value_array: array of values to assign
+ *
+ * Set the raw values of the GPIOs, i.e. the values of the physical lines
+ * without regard for their ACTIVE_LOW status.
+ *
+ * This function should be called from contexts where we cannot sleep, and will
+ * complain if the GPIO chip functions potentially sleep.
+ */
+void gpiod_set_raw_array(unsigned int array_size,
+			 struct gpio_desc **desc_array, int *value_array)
+{
+	if (!desc_array)
+		return;
+	gpiod_set_array_priv(true, false, array_size, desc_array, value_array);
+}
+EXPORT_SYMBOL_GPL(gpiod_set_raw_array);
+
+/**
+ * gpiod_set_array() - assign values to an array of GPIOs
+ * @array_size: number of elements in the descriptor / value arrays
+ * @desc_array: array of GPIO descriptors whose values will be assigned
+ * @value_array: array of values to assign
+ *
+ * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
+ * into account.
+ *
+ * This function should be called from contexts where we cannot sleep, and will
+ * complain if the GPIO chip functions potentially sleep.
+ */
+void gpiod_set_array(unsigned int array_size,
+		     struct gpio_desc **desc_array, int *value_array)
+{
+	if (!desc_array)
+		return;
+	gpiod_set_array_priv(false, false, array_size, desc_array, value_array);
+}
+EXPORT_SYMBOL_GPL(gpiod_set_array);
+
 /**
  * gpiod_cansleep() - report whether gpio value access may sleep
  * @desc: gpio to check
@@ -1457,6 +1581,50 @@ void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 }
 EXPORT_SYMBOL_GPL(gpiod_set_value_cansleep);
 
+/**
+ * gpiod_set_raw_array_cansleep() - assign values to an array of GPIOs
+ * @array_size: number of elements in the descriptor / value arrays
+ * @desc_array: array of GPIO descriptors whose values will be assigned
+ * @value_array: array of values to assign
+ *
+ * Set the raw values of the GPIOs, i.e. the values of the physical lines
+ * without regard for their ACTIVE_LOW status.
+ *
+ * This function is to be called from contexts that can sleep.
+ */
+void gpiod_set_raw_array_cansleep(unsigned int array_size,
+				  struct gpio_desc **desc_array,
+				  int *value_array)
+{
+	might_sleep_if(extra_checks);
+	if (!desc_array)
+		return;
+	gpiod_set_array_priv(true, true, array_size, desc_array, value_array);
+}
+EXPORT_SYMBOL_GPL(gpiod_set_raw_array_cansleep);
+
+/**
+ * gpiod_set_array_cansleep() - assign values to an array of GPIOs
+ * @array_size: number of elements in the descriptor / value arrays
+ * @desc_array: array of GPIO descriptors whose values will be assigned
+ * @value_array: array of values to assign
+ *
+ * Set the logical values of the GPIOs, i.e. taking their ACTIVE_LOW status
+ * into account.
+ *
+ * This function is to be called from contexts that can sleep.
+ */
+void gpiod_set_array_cansleep(unsigned int array_size,
+			      struct gpio_desc **desc_array,
+			      int *value_array)
+{
+	might_sleep_if(extra_checks);
+	if (!desc_array)
+		return;
+	gpiod_set_array_priv(false, true, array_size, desc_array, value_array);
+}
+EXPORT_SYMBOL_GPL(gpiod_set_array_cansleep);
+
 /**
  * gpiod_add_lookup_table() - register GPIO device consumers
  * @table: table of consumers to register
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 12f146fa6604..83c0a61c605d 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -74,14 +74,24 @@ int gpiod_direction_output_raw(struct gpio_desc *desc, int value);
 /* Value get/set from non-sleeping context */
 int gpiod_get_value(const struct gpio_desc *desc);
 void gpiod_set_value(struct gpio_desc *desc, int value);
+void gpiod_set_array(unsigned int array_size,
+		     struct gpio_desc **desc_array, int *value_array);
 int gpiod_get_raw_value(const struct gpio_desc *desc);
 void gpiod_set_raw_value(struct gpio_desc *desc, int value);
+void gpiod_set_raw_array(unsigned int array_size,
+			 struct gpio_desc **desc_array, int *value_array);
 
 /* Value get/set from sleeping context */
 int gpiod_get_value_cansleep(const struct gpio_desc *desc);
 void gpiod_set_value_cansleep(struct gpio_desc *desc, int value);
+void gpiod_set_array_cansleep(unsigned int array_size,
+			      struct gpio_desc **desc_array,
+			      int *value_array);
 int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc);
 void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value);
+void gpiod_set_raw_array_cansleep(unsigned int array_size,
+				  struct gpio_desc **desc_array,
+				  int *value_array);
 
 int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce);
 
@@ -210,6 +220,13 @@ static inline void gpiod_set_value(struct gpio_desc *desc, int value)
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
+static inline void gpiod_set_array(unsigned int array_size,
+				   struct gpio_desc **desc_array,
+				   int *value_array)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
 static inline int gpiod_get_raw_value(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
@@ -221,6 +238,13 @@ static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value)
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
+static inline void gpiod_set_raw_array(unsigned int array_size,
+				       struct gpio_desc **desc_array,
+				       int *value_array)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
 
 static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc)
 {
@@ -233,6 +257,13 @@ static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value)
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
+static inline void gpiod_set_array_cansleep(unsigned int array_size,
+					    struct gpio_desc **desc_array,
+					    int *value_array)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
 static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc)
 {
 	/* GPIO can never have been requested */
@@ -245,6 +276,13 @@ static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc,
 	/* GPIO can never have been requested */
 	WARN_ON(1);
 }
+static inline void gpiod_set_raw_array_cansleep(unsigned int array_size,
+						struct gpio_desc **desc_array,
+						int *value_array)
+{
+	/* GPIO can never have been requested */
+	WARN_ON(1);
+}
 
 static inline int gpiod_set_debounce(struct gpio_desc *desc, unsigned debounce)
 {
diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h
index ff200a75501e..c497c62889d1 100644
--- a/include/linux/gpio/driver.h
+++ b/include/linux/gpio/driver.h
@@ -32,6 +32,7 @@ struct seq_file;
  * @get: returns value for signal "offset"; for output signals this
  *	returns either the value actually sensed, or zero
  * @set: assigns output value for signal "offset"
+ * @set_multiple: assigns output values for multiple signals defined by "mask"
  * @set_debounce: optional hook for setting debounce time for specified gpio in
  *      interrupt triggered gpio chips
  * @to_irq: optional hook supporting non-static gpio_to_irq() mappings;
@@ -89,6 +90,9 @@ struct gpio_chip {
 						unsigned offset);
 	void			(*set)(struct gpio_chip *chip,
 						unsigned offset, int value);
+	void			(*set_multiple)(struct gpio_chip *chip,
+						unsigned long *mask,
+						unsigned long *bits);
 	int			(*set_debounce)(struct gpio_chip *chip,
 						unsigned offset,
 						unsigned debounce);
-- 
cgit v1.2.3


From b4b9d2ccf0be61c69213f6ae4e33377c05194ef4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 26 Nov 2014 14:44:43 -0500
Subject: sunrpc: add debugfs file for displaying client rpc_task queue

It's possible to get a dump of the RPC task queue by writing a value to
/proc/sys/sunrpc/rpc_debug. If you write any value to that file, you get
a dump of the RPC client task list into the log buffer. This is a rather
inconvenient interface however, and makes it hard to get immediate info
about the task queue.

Add a new directory hierarchy under debugfs:

    sunrpc/
        rpc_clnt/
            <clientid>/

Within each clientid directory we create a new "tasks" file that will
dump info similar to what shows up in the log buffer, but with a few
small differences -- we avoid printing raw kernel addresses in favor of
symbolic names and the XID is also displayed.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/clnt.h  |   4 +
 include/linux/sunrpc/debug.h |  31 +++++++
 net/sunrpc/Kconfig           |   1 +
 net/sunrpc/Makefile          |   1 +
 net/sunrpc/clnt.c            |  10 ++-
 net/sunrpc/debugfs.c         | 191 +++++++++++++++++++++++++++++++++++++++++++
 net/sunrpc/sunrpc_syms.c     |   8 ++
 7 files changed, 245 insertions(+), 1 deletion(-)
 create mode 100644 net/sunrpc/debugfs.c

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h
index 70736b98c721..d86acc63b25f 100644
--- a/include/linux/sunrpc/clnt.h
+++ b/include/linux/sunrpc/clnt.h
@@ -63,6 +63,9 @@ struct rpc_clnt {
 	struct rpc_rtt		cl_rtt_default;
 	struct rpc_timeout	cl_timeout_default;
 	const struct rpc_program *cl_program;
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+	struct dentry		*cl_debugfs;	/* debugfs directory */
+#endif
 };
 
 /*
@@ -176,5 +179,6 @@ size_t		rpc_peeraddr(struct rpc_clnt *, struct sockaddr *, size_t);
 const char	*rpc_peeraddr2str(struct rpc_clnt *, enum rpc_display_format_t);
 int		rpc_localaddr(struct rpc_clnt *, struct sockaddr *, size_t);
 
+const char *rpc_proc_name(const struct rpc_task *task);
 #endif /* __KERNEL__ */
 #endif /* _LINUX_SUNRPC_CLNT_H */
diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 43f38ee9668c..835339707094 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -53,9 +53,40 @@ extern unsigned int		nlm_debug;
 /*
  * Sysctl interface for RPC debugging
  */
+
+struct rpc_clnt;
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 void		rpc_register_sysctl(void);
 void		rpc_unregister_sysctl(void);
+int		sunrpc_debugfs_init(void);
+void		sunrpc_debugfs_exit(void);
+int		rpc_clnt_debugfs_register(struct rpc_clnt *);
+void		rpc_clnt_debugfs_unregister(struct rpc_clnt *);
+#else
+static inline int
+sunrpc_debugfs_init(void)
+{
+	return 0;
+}
+
+static inline void
+sunrpc_debugfs_exit(void)
+{
+	return;
+}
+
+static inline int
+rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
+{
+	return 0;
+}
+
+static inline void
+rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt)
+{
+	return;
+}
 #endif
 
 #endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff --git a/net/sunrpc/Kconfig b/net/sunrpc/Kconfig
index 0754d0f466d2..fb78117b896c 100644
--- a/net/sunrpc/Kconfig
+++ b/net/sunrpc/Kconfig
@@ -35,6 +35,7 @@ config RPCSEC_GSS_KRB5
 config SUNRPC_DEBUG
 	bool "RPC: Enable dprintk debugging"
 	depends on SUNRPC && SYSCTL
+	select DEBUG_FS
 	help
 	  This option enables a sysctl-based debugging interface
 	  that is be used by the 'rpcdebug' utility to turn on or off
diff --git a/net/sunrpc/Makefile b/net/sunrpc/Makefile
index e5a7a1cac8f3..15e6f6c23c5d 100644
--- a/net/sunrpc/Makefile
+++ b/net/sunrpc/Makefile
@@ -14,6 +14,7 @@ sunrpc-y := clnt.o xprt.o socklib.o xprtsock.o sched.o \
 	    addr.o rpcb_clnt.o timer.o xdr.o \
 	    sunrpc_syms.o cache.o rpc_pipe.o \
 	    svc_xprt.o
+sunrpc-$(CONFIG_SUNRPC_DEBUG) += debugfs.o
 sunrpc-$(CONFIG_SUNRPC_BACKCHANNEL) += backchannel_rqst.o bc_svc.o
 sunrpc-$(CONFIG_PROC_FS) += stats.o
 sunrpc-$(CONFIG_SYSCTL) += sysctl.o
diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c
index 36c64ef460cf..05da12a33945 100644
--- a/net/sunrpc/clnt.c
+++ b/net/sunrpc/clnt.c
@@ -305,6 +305,10 @@ static int rpc_client_register(struct rpc_clnt *clnt,
 	struct super_block *pipefs_sb;
 	int err;
 
+	err = rpc_clnt_debugfs_register(clnt);
+	if (err)
+		return err;
+
 	pipefs_sb = rpc_get_sb_net(net);
 	if (pipefs_sb) {
 		err = rpc_setup_pipedir(pipefs_sb, clnt);
@@ -331,6 +335,7 @@ err_auth:
 out:
 	if (pipefs_sb)
 		rpc_put_sb_net(net);
+	rpc_clnt_debugfs_unregister(clnt);
 	return err;
 }
 
@@ -670,6 +675,7 @@ int rpc_switch_client_transport(struct rpc_clnt *clnt,
 
 	rpc_unregister_client(clnt);
 	__rpc_clnt_remove_pipedir(clnt);
+	rpc_clnt_debugfs_unregister(clnt);
 
 	/*
 	 * A new transport was created.  "clnt" therefore
@@ -771,6 +777,7 @@ rpc_free_client(struct rpc_clnt *clnt)
 			rcu_dereference(clnt->cl_xprt)->servername);
 	if (clnt->cl_parent != clnt)
 		parent = clnt->cl_parent;
+	rpc_clnt_debugfs_unregister(clnt);
 	rpc_clnt_remove_pipedir(clnt);
 	rpc_unregister_client(clnt);
 	rpc_free_iostats(clnt->cl_metrics);
@@ -1397,7 +1404,8 @@ rpc_restart_call(struct rpc_task *task)
 EXPORT_SYMBOL_GPL(rpc_restart_call);
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
-static const char *rpc_proc_name(const struct rpc_task *task)
+const char
+*rpc_proc_name(const struct rpc_task *task)
 {
 	const struct rpc_procinfo *proc = task->tk_msg.rpc_proc;
 
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
new file mode 100644
index 000000000000..3d7745683ca3
--- /dev/null
+++ b/net/sunrpc/debugfs.c
@@ -0,0 +1,191 @@
+/**
+ * debugfs interface for sunrpc
+ *
+ * (c) 2014 Jeff Layton <jlayton@primarydata.com>
+ */
+
+#include <linux/debugfs.h>
+#include <linux/sunrpc/sched.h>
+#include <linux/sunrpc/clnt.h>
+#include "netns.h"
+
+static struct dentry *topdir;
+static struct dentry *rpc_clnt_dir;
+
+struct rpc_clnt_iter {
+	struct rpc_clnt	*clnt;
+	loff_t		pos;
+};
+
+static int
+tasks_show(struct seq_file *f, void *v)
+{
+	u32 xid = 0;
+	struct rpc_task *task = v;
+	struct rpc_clnt *clnt = task->tk_client;
+	const char *rpc_waitq = "none";
+
+	if (RPC_IS_QUEUED(task))
+		rpc_waitq = rpc_qname(task->tk_waitqueue);
+
+	if (task->tk_rqstp)
+		xid = be32_to_cpu(task->tk_rqstp->rq_xid);
+
+	seq_printf(f, "%5u %04x %6d 0x%x 0x%x %8ld %ps %sv%u %s a:%ps q:%s\n",
+		task->tk_pid, task->tk_flags, task->tk_status,
+		clnt->cl_clid, xid, task->tk_timeout, task->tk_ops,
+		clnt->cl_program->name, clnt->cl_vers, rpc_proc_name(task),
+		task->tk_action, rpc_waitq);
+	return 0;
+}
+
+static void *
+tasks_start(struct seq_file *f, loff_t *ppos)
+	__acquires(&clnt->cl_lock)
+{
+	struct rpc_clnt_iter *iter = f->private;
+	loff_t pos = *ppos;
+	struct rpc_clnt *clnt = iter->clnt;
+	struct rpc_task *task;
+
+	iter->pos = pos + 1;
+	spin_lock(&clnt->cl_lock);
+	list_for_each_entry(task, &clnt->cl_tasks, tk_task)
+		if (pos-- == 0)
+			return task;
+	return NULL;
+}
+
+static void *
+tasks_next(struct seq_file *f, void *v, loff_t *pos)
+{
+	struct rpc_clnt_iter *iter = f->private;
+	struct rpc_clnt *clnt = iter->clnt;
+	struct rpc_task *task = v;
+	struct list_head *next = task->tk_task.next;
+
+	++iter->pos;
+	++*pos;
+
+	/* If there's another task on list, return it */
+	if (next == &clnt->cl_tasks)
+		return NULL;
+	return list_entry(next, struct rpc_task, tk_task);
+}
+
+static void
+tasks_stop(struct seq_file *f, void *v)
+	__releases(&clnt->cl_lock)
+{
+	struct rpc_clnt_iter *iter = f->private;
+	struct rpc_clnt *clnt = iter->clnt;
+
+	spin_unlock(&clnt->cl_lock);
+}
+
+static const struct seq_operations tasks_seq_operations = {
+	.start	= tasks_start,
+	.next	= tasks_next,
+	.stop	= tasks_stop,
+	.show	= tasks_show,
+};
+
+static int tasks_open(struct inode *inode, struct file *filp)
+{
+	int ret = seq_open_private(filp, &tasks_seq_operations,
+					sizeof(struct rpc_clnt_iter));
+
+	if (!ret) {
+		struct seq_file *seq = filp->private_data;
+		struct rpc_clnt_iter *iter = seq->private;
+
+		iter->clnt = inode->i_private;
+
+		if (!atomic_inc_not_zero(&iter->clnt->cl_count)) {
+			seq_release_private(inode, filp);
+			ret = -EINVAL;
+		}
+	}
+
+	return ret;
+}
+
+static int
+tasks_release(struct inode *inode, struct file *filp)
+{
+	struct seq_file *seq = filp->private_data;
+	struct rpc_clnt_iter *iter = seq->private;
+
+	rpc_release_client(iter->clnt);
+	return seq_release_private(inode, filp);
+}
+
+static const struct file_operations tasks_fops = {
+	.owner		= THIS_MODULE,
+	.open		= tasks_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= tasks_release,
+};
+
+int
+rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
+{
+	int len;
+	char name[9]; /* 8 for hex digits + NULL terminator */
+
+	/* Already registered? */
+	if (clnt->cl_debugfs)
+		return 0;
+
+	len = snprintf(name, sizeof(name), "%x", clnt->cl_clid);
+	if (len >= sizeof(name))
+		return -EINVAL;
+
+	/* make the per-client dir */
+	clnt->cl_debugfs = debugfs_create_dir(name, rpc_clnt_dir);
+	if (!clnt->cl_debugfs)
+		return -ENOMEM;
+
+	/* make tasks file */
+	if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
+				 clnt, &tasks_fops)) {
+		debugfs_remove_recursive(clnt->cl_debugfs);
+		clnt->cl_debugfs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void
+rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt)
+{
+	debugfs_remove_recursive(clnt->cl_debugfs);
+	clnt->cl_debugfs = NULL;
+}
+
+void __exit
+sunrpc_debugfs_exit(void)
+{
+	debugfs_remove_recursive(topdir);
+}
+
+int __init
+sunrpc_debugfs_init(void)
+{
+	topdir = debugfs_create_dir("sunrpc", NULL);
+	if (!topdir)
+		goto out;
+
+	rpc_clnt_dir = debugfs_create_dir("rpc_clnt", topdir);
+	if (!rpc_clnt_dir)
+		goto out_remove;
+
+	return 0;
+out_remove:
+	debugfs_remove_recursive(topdir);
+	topdir = NULL;
+out:
+	return -ENOMEM;
+}
diff --git a/net/sunrpc/sunrpc_syms.c b/net/sunrpc/sunrpc_syms.c
index f632e476ab6c..e37fbed87956 100644
--- a/net/sunrpc/sunrpc_syms.c
+++ b/net/sunrpc/sunrpc_syms.c
@@ -97,6 +97,11 @@ init_sunrpc(void)
 	err = register_rpc_pipefs();
 	if (err)
 		goto out4;
+
+	err = sunrpc_debugfs_init();
+	if (err)
+		goto out5;
+
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 	rpc_register_sysctl();
 #endif
@@ -104,6 +109,8 @@ init_sunrpc(void)
 	init_socket_xprt();	/* clnt sock transport */
 	return 0;
 
+out5:
+	unregister_rpc_pipefs();
 out4:
 	unregister_pernet_subsys(&sunrpc_net_ops);
 out3:
@@ -120,6 +127,7 @@ cleanup_sunrpc(void)
 	rpcauth_remove_module();
 	cleanup_socket_xprt();
 	svc_cleanup_xprt_sock();
+	sunrpc_debugfs_exit();
 	unregister_rpc_pipefs();
 	rpc_destroy_mempool();
 	unregister_pernet_subsys(&sunrpc_net_ops);
-- 
cgit v1.2.3


From 388f0c776781fe64ce951701bfe712b2182a31f2 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 26 Nov 2014 14:44:44 -0500
Subject: sunrpc: add a debugfs rpc_xprt directory with an info file in it

Add a new directory heirarchy under the debugfs sunrpc/ directory:

    sunrpc/
        rpc_xprt/
            <xprt id>/

Within that directory, we can put files that give info about the
xprts. We do have the (minor) problem that there is no succinct,
unique identifier for rpc_xprts. So we generate them synthetically
with a static atomic_t counter.

For now, this directory just holds an "info" file, but we may add
other files to it in the future.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: Trond Myklebust <trond.myklebust@primarydata.com>
---
 include/linux/sunrpc/debug.h |  15 ++++++
 include/linux/sunrpc/xprt.h  |   3 ++
 net/sunrpc/debugfs.c         | 115 ++++++++++++++++++++++++++++++++++++++++---
 net/sunrpc/xprt.c            |   8 +++
 4 files changed, 134 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/debug.h b/include/linux/sunrpc/debug.h
index 835339707094..c57d8ea0716c 100644
--- a/include/linux/sunrpc/debug.h
+++ b/include/linux/sunrpc/debug.h
@@ -55,6 +55,7 @@ extern unsigned int		nlm_debug;
  */
 
 struct rpc_clnt;
+struct rpc_xprt;
 
 #if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
 void		rpc_register_sysctl(void);
@@ -63,6 +64,8 @@ int		sunrpc_debugfs_init(void);
 void		sunrpc_debugfs_exit(void);
 int		rpc_clnt_debugfs_register(struct rpc_clnt *);
 void		rpc_clnt_debugfs_unregister(struct rpc_clnt *);
+int		rpc_xprt_debugfs_register(struct rpc_xprt *);
+void		rpc_xprt_debugfs_unregister(struct rpc_xprt *);
 #else
 static inline int
 sunrpc_debugfs_init(void)
@@ -87,6 +90,18 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt)
 {
 	return;
 }
+
+static inline int
+rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
+{
+	return 0;
+}
+
+static inline void
+rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
+{
+	return;
+}
 #endif
 
 #endif /* _LINUX_SUNRPC_DEBUG_H_ */
diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h
index cf391eef2e6d..9d27ac45b909 100644
--- a/include/linux/sunrpc/xprt.h
+++ b/include/linux/sunrpc/xprt.h
@@ -239,6 +239,9 @@ struct rpc_xprt {
 	struct net		*xprt_net;
 	const char		*servername;
 	const char		*address_strings[RPC_DISPLAY_MAX];
+#if IS_ENABLED(CONFIG_SUNRPC_DEBUG)
+	struct dentry		*debugfs;		/* debugfs directory */
+#endif
 };
 
 #if defined(CONFIG_SUNRPC_BACKCHANNEL)
diff --git a/net/sunrpc/debugfs.c b/net/sunrpc/debugfs.c
index 3d7745683ca3..e811f390f9f6 100644
--- a/net/sunrpc/debugfs.c
+++ b/net/sunrpc/debugfs.c
@@ -11,6 +11,7 @@
 
 static struct dentry *topdir;
 static struct dentry *rpc_clnt_dir;
+static struct dentry *rpc_xprt_dir;
 
 struct rpc_clnt_iter {
 	struct rpc_clnt	*clnt;
@@ -131,8 +132,8 @@ static const struct file_operations tasks_fops = {
 int
 rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 {
-	int len;
-	char name[9]; /* 8 for hex digits + NULL terminator */
+	int len, err;
+	char name[24]; /* enough for "../../rpc_xprt/ + 8 hex digits + NULL */
 
 	/* Already registered? */
 	if (clnt->cl_debugfs)
@@ -148,14 +149,28 @@ rpc_clnt_debugfs_register(struct rpc_clnt *clnt)
 		return -ENOMEM;
 
 	/* make tasks file */
+	err = -ENOMEM;
 	if (!debugfs_create_file("tasks", S_IFREG | S_IRUSR, clnt->cl_debugfs,
-				 clnt, &tasks_fops)) {
-		debugfs_remove_recursive(clnt->cl_debugfs);
-		clnt->cl_debugfs = NULL;
-		return -ENOMEM;
-	}
+				 clnt, &tasks_fops))
+		goto out_err;
+
+	err = -EINVAL;
+	rcu_read_lock();
+	len = snprintf(name, sizeof(name), "../../rpc_xprt/%s",
+			rcu_dereference(clnt->cl_xprt)->debugfs->d_name.name);
+	rcu_read_unlock();
+	if (len >= sizeof(name))
+		goto out_err;
+
+	err = -ENOMEM;
+	if (!debugfs_create_symlink("xprt", clnt->cl_debugfs, name))
+		goto out_err;
 
 	return 0;
+out_err:
+	debugfs_remove_recursive(clnt->cl_debugfs);
+	clnt->cl_debugfs = NULL;
+	return err;
 }
 
 void
@@ -165,6 +180,88 @@ rpc_clnt_debugfs_unregister(struct rpc_clnt *clnt)
 	clnt->cl_debugfs = NULL;
 }
 
+static int
+xprt_info_show(struct seq_file *f, void *v)
+{
+	struct rpc_xprt *xprt = f->private;
+
+	seq_printf(f, "netid: %s\n", xprt->address_strings[RPC_DISPLAY_NETID]);
+	seq_printf(f, "addr:  %s\n", xprt->address_strings[RPC_DISPLAY_ADDR]);
+	seq_printf(f, "port:  %s\n", xprt->address_strings[RPC_DISPLAY_PORT]);
+	seq_printf(f, "state: 0x%lx\n", xprt->state);
+	return 0;
+}
+
+static int
+xprt_info_open(struct inode *inode, struct file *filp)
+{
+	int ret;
+	struct rpc_xprt *xprt = inode->i_private;
+
+	ret = single_open(filp, xprt_info_show, xprt);
+
+	if (!ret) {
+		if (!xprt_get(xprt)) {
+			single_release(inode, filp);
+			ret = -EINVAL;
+		}
+	}
+	return ret;
+}
+
+static int
+xprt_info_release(struct inode *inode, struct file *filp)
+{
+	struct rpc_xprt *xprt = inode->i_private;
+
+	xprt_put(xprt);
+	return single_release(inode, filp);
+}
+
+static const struct file_operations xprt_info_fops = {
+	.owner		= THIS_MODULE,
+	.open		= xprt_info_open,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= xprt_info_release,
+};
+
+int
+rpc_xprt_debugfs_register(struct rpc_xprt *xprt)
+{
+	int len, id;
+	static atomic_t	cur_id;
+	char		name[9]; /* 8 hex digits + NULL term */
+
+	id = (unsigned int)atomic_inc_return(&cur_id);
+
+	len = snprintf(name, sizeof(name), "%x", id);
+	if (len >= sizeof(name))
+		return -EINVAL;
+
+	/* make the per-client dir */
+	xprt->debugfs = debugfs_create_dir(name, rpc_xprt_dir);
+	if (!xprt->debugfs)
+		return -ENOMEM;
+
+	/* make tasks file */
+	if (!debugfs_create_file("info", S_IFREG | S_IRUSR, xprt->debugfs,
+				 xprt, &xprt_info_fops)) {
+		debugfs_remove_recursive(xprt->debugfs);
+		xprt->debugfs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+void
+rpc_xprt_debugfs_unregister(struct rpc_xprt *xprt)
+{
+	debugfs_remove_recursive(xprt->debugfs);
+	xprt->debugfs = NULL;
+}
+
 void __exit
 sunrpc_debugfs_exit(void)
 {
@@ -182,6 +279,10 @@ sunrpc_debugfs_init(void)
 	if (!rpc_clnt_dir)
 		goto out_remove;
 
+	rpc_xprt_dir = debugfs_create_dir("rpc_xprt", topdir);
+	if (!rpc_xprt_dir)
+		goto out_remove;
+
 	return 0;
 out_remove:
 	debugfs_remove_recursive(topdir);
diff --git a/net/sunrpc/xprt.c b/net/sunrpc/xprt.c
index 894d071426b2..ebbefad21a37 100644
--- a/net/sunrpc/xprt.c
+++ b/net/sunrpc/xprt.c
@@ -1303,6 +1303,7 @@ static void xprt_init(struct rpc_xprt *xprt, struct net *net)
  */
 struct rpc_xprt *xprt_create_transport(struct xprt_create *args)
 {
+	int err;
 	struct rpc_xprt	*xprt;
 	struct xprt_class *t;
 
@@ -1343,6 +1344,12 @@ found:
 		return ERR_PTR(-ENOMEM);
 	}
 
+	err = rpc_xprt_debugfs_register(xprt);
+	if (err) {
+		xprt_destroy(xprt);
+		return ERR_PTR(err);
+	}
+
 	dprintk("RPC:       created transport %p with %u slots\n", xprt,
 			xprt->max_reqs);
 out:
@@ -1359,6 +1366,7 @@ static void xprt_destroy(struct rpc_xprt *xprt)
 	dprintk("RPC:       destroying transport %p\n", xprt);
 	del_timer_sync(&xprt->timer);
 
+	rpc_xprt_debugfs_unregister(xprt);
 	rpc_destroy_wait_queue(&xprt->binding);
 	rpc_destroy_wait_queue(&xprt->pending);
 	rpc_destroy_wait_queue(&xprt->sending);
-- 
cgit v1.2.3


From 8e53b0f190af2954309bbad76a78177ead15d824 Mon Sep 17 00:00:00 2001
From: Alexandre Courbot <acourbot@nvidia.com>
Date: Tue, 25 Nov 2014 17:16:31 +0900
Subject: gpio: remove const modifier from gpiod_get_direction()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Although gpiod_get_direction() can be considered side-effect free for
consumers, its internals involve setting or clearing bits in the
affected GPIO descriptor, for which we need to force-cast the const
descriptor variable to non-const. This could lead to incorrect behavior
if the compiler decides to optimize here, so remove this const
attribute. The intent is to make gpiod_get_direction() private anyway,
so it does not really matter.

Reported-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Alexandre Courbot <acourbot@nvidia.com>
Acked-by: Uwe Kleine-König <u.kleine-koenig@pengutronix.de>
Signed-off-by: Linus Walleij <linus.walleij@linaro.org>
---
 drivers/gpio/gpiolib-sysfs.c  | 2 +-
 drivers/gpio/gpiolib.c        | 8 +++-----
 include/linux/gpio/consumer.h | 2 +-
 3 files changed, 5 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/gpio/gpiolib-sysfs.c b/drivers/gpio/gpiolib-sysfs.c
index 781fbed00fc3..2ac1800b58bb 100644
--- a/drivers/gpio/gpiolib-sysfs.c
+++ b/drivers/gpio/gpiolib-sysfs.c
@@ -41,7 +41,7 @@ static DEFINE_MUTEX(sysfs_lock);
 static ssize_t gpio_direction_show(struct device *dev,
 		struct device_attribute *attr, char *buf)
 {
-	const struct gpio_desc	*desc = dev_get_drvdata(dev);
+	struct gpio_desc	*desc = dev_get_drvdata(dev);
 	ssize_t			status;
 
 	mutex_lock(&sysfs_lock);
diff --git a/drivers/gpio/gpiolib.c b/drivers/gpio/gpiolib.c
index 5619922ebf44..0b271ef87c09 100644
--- a/drivers/gpio/gpiolib.c
+++ b/drivers/gpio/gpiolib.c
@@ -148,7 +148,7 @@ static int gpiochip_find_base(int ngpio)
  *
  * This function may sleep if gpiod_cansleep() is true.
  */
-int gpiod_get_direction(const struct gpio_desc *desc)
+int gpiod_get_direction(struct gpio_desc *desc)
 {
 	struct gpio_chip	*chip;
 	unsigned		offset;
@@ -164,13 +164,11 @@ int gpiod_get_direction(const struct gpio_desc *desc)
 	if (status > 0) {
 		/* GPIOF_DIR_IN, or other positive */
 		status = 1;
-		/* FLAG_IS_OUT is just a cache of the result of get_direction(),
-		 * so it does not affect constness per se */
-		clear_bit(FLAG_IS_OUT, &((struct gpio_desc *)desc)->flags);
+		clear_bit(FLAG_IS_OUT, &desc->flags);
 	}
 	if (status == 0) {
 		/* GPIOF_DIR_OUT */
-		set_bit(FLAG_IS_OUT, &((struct gpio_desc *)desc)->flags);
+		set_bit(FLAG_IS_OUT, &desc->flags);
 	}
 	return status;
 }
diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h
index 83c0a61c605d..d54d158ca327 100644
--- a/include/linux/gpio/consumer.h
+++ b/include/linux/gpio/consumer.h
@@ -66,7 +66,7 @@ __devm_gpiod_get_index_optional(struct device *dev, const char *con_id,
 			      unsigned int index, enum gpiod_flags flags);
 void devm_gpiod_put(struct device *dev, struct gpio_desc *desc);
 
-int gpiod_get_direction(const struct gpio_desc *desc);
+int gpiod_get_direction(struct gpio_desc *desc);
 int gpiod_direction_input(struct gpio_desc *desc);
 int gpiod_direction_output(struct gpio_desc *desc, int value);
 int gpiod_direction_output_raw(struct gpio_desc *desc, int value);
-- 
cgit v1.2.3


From 81cc3f868d30884c6f2d2bf5d1861fbeb24ddebd Mon Sep 17 00:00:00 2001
From: Pawel Moll <pawel.moll@arm.com>
Date: Tue, 25 Nov 2014 18:17:34 +0000
Subject: ARM: vexpress: Remove non-DT code

Now, with the CLCD DT support available, there is no
more reason to keep the non-DT support for V2P-CA9.

Removed, together with "some" supporting code. It was
necessary to make PLAT_VERSATILE_SCHED_CLOCK optional
and selected by the machines still interested in it.

Acked-by: Mike Turquette <mturquette@linaro.org>
Signed-off-by: Pawel Moll <pawel.moll@arm.com>
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
---
 arch/arm/Kconfig                                  |   2 +
 arch/arm/mach-vexpress/Kconfig                    |   3 -
 arch/arm/mach-vexpress/Makefile                   |   3 +-
 arch/arm/mach-vexpress/core.h                     |   7 -
 arch/arm/mach-vexpress/ct-ca9x4.c                 | 212 ------------
 arch/arm/mach-vexpress/include/mach/ct-ca9x4.h    |  47 ---
 arch/arm/mach-vexpress/include/mach/hardware.h    |   1 -
 arch/arm/mach-vexpress/include/mach/irqs.h        |   6 -
 arch/arm/mach-vexpress/include/mach/motherboard.h |  88 -----
 arch/arm/mach-vexpress/platsmp.c                  |  42 ---
 arch/arm/mach-vexpress/v2m.c                      | 374 ----------------------
 arch/arm/plat-versatile/Kconfig                   |   2 +-
 drivers/clk/versatile/Makefile                    |   1 -
 drivers/clk/versatile/clk-vexpress-osc.c          |   7 -
 drivers/clk/versatile/clk-vexpress.c              |  86 -----
 drivers/misc/vexpress-syscfg.c                    |  60 +---
 include/linux/vexpress.h                          |  19 --
 17 files changed, 17 insertions(+), 943 deletions(-)
 delete mode 100644 arch/arm/mach-vexpress/ct-ca9x4.c
 delete mode 100644 arch/arm/mach-vexpress/include/mach/ct-ca9x4.h
 delete mode 100644 arch/arm/mach-vexpress/include/mach/hardware.h
 delete mode 100644 arch/arm/mach-vexpress/include/mach/irqs.h
 delete mode 100644 arch/arm/mach-vexpress/include/mach/motherboard.h
 delete mode 100644 drivers/clk/versatile/clk-vexpress.c

(limited to 'include/linux')

diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig
index 89c4b5ccc68d..6c6fdb8a36e1 100644
--- a/arch/arm/Kconfig
+++ b/arch/arm/Kconfig
@@ -350,6 +350,7 @@ config ARCH_REALVIEW
 	select ICST
 	select NEED_MACH_MEMORY_H
 	select PLAT_VERSATILE
+	select PLAT_VERSATILE_SCHED_CLOCK
 	help
 	  This enables support for ARM Ltd RealView boards.
 
@@ -365,6 +366,7 @@ config ARCH_VERSATILE
 	select ICST
 	select PLAT_VERSATILE
 	select PLAT_VERSATILE_CLOCK
+	select PLAT_VERSATILE_SCHED_CLOCK
 	select VERSATILE_FPGA_IRQ
 	help
 	  This enables support for ARM Ltd Versatile board.
diff --git a/arch/arm/mach-vexpress/Kconfig b/arch/arm/mach-vexpress/Kconfig
index b2cfba16c4e8..9a96bab12ef3 100644
--- a/arch/arm/mach-vexpress/Kconfig
+++ b/arch/arm/mach-vexpress/Kconfig
@@ -49,9 +49,6 @@ config ARCH_VEXPRESS_CORTEX_A5_A9_ERRATA
 	  build a working kernel, you must also enable relevant core
 	  tile support or Flattened Device Tree based support options.
 
-config ARCH_VEXPRESS_CA9X4
-	bool "Versatile Express Cortex-A9x4 tile"
-
 config ARCH_VEXPRESS_DCSCB
 	bool "Dual Cluster System Control Block (DCSCB) support"
 	depends on MCPM
diff --git a/arch/arm/mach-vexpress/Makefile b/arch/arm/mach-vexpress/Makefile
index fc649bc09d0c..f5c1006dd6a1 100644
--- a/arch/arm/mach-vexpress/Makefile
+++ b/arch/arm/mach-vexpress/Makefile
@@ -1,11 +1,10 @@
 #
 # Makefile for the linux kernel.
 #
-ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := -I$(srctree)/$(src)/include \
+ccflags-$(CONFIG_ARCH_MULTIPLATFORM) := \
 	-I$(srctree)/arch/arm/plat-versatile/include
 
 obj-y					:= v2m.o
-obj-$(CONFIG_ARCH_VEXPRESS_CA9X4)	+= ct-ca9x4.o
 obj-$(CONFIG_ARCH_VEXPRESS_DCSCB)	+= dcscb.o	dcscb_setup.o
 CFLAGS_dcscb.o				+= -march=armv7-a
 CFLAGS_REMOVE_dcscb.o			= -pg
diff --git a/arch/arm/mach-vexpress/core.h b/arch/arm/mach-vexpress/core.h
index 152fad91b3ae..2a11d3ac8c68 100644
--- a/arch/arm/mach-vexpress/core.h
+++ b/arch/arm/mach-vexpress/core.h
@@ -1,12 +1,5 @@
-/* 2MB large area for motherboard's peripherals static mapping */
-#define V2M_PERIPH 0xf8000000
-
-/* Tile's peripherals static mappings should start here */
-#define V2T_PERIPH 0xf8200000
-
 bool vexpress_smp_init_ops(void);
 
-extern struct smp_operations	vexpress_smp_ops;
 extern struct smp_operations	vexpress_smp_dt_ops;
 
 extern void vexpress_cpu_die(unsigned int cpu);
diff --git a/arch/arm/mach-vexpress/ct-ca9x4.c b/arch/arm/mach-vexpress/ct-ca9x4.c
deleted file mode 100644
index 27bea049380a..000000000000
--- a/arch/arm/mach-vexpress/ct-ca9x4.c
+++ /dev/null
@@ -1,212 +0,0 @@
-/*
- * Versatile Express Core Tile Cortex A9x4 Support
- */
-#include <linux/init.h>
-#include <linux/gfp.h>
-#include <linux/device.h>
-#include <linux/dma-mapping.h>
-#include <linux/platform_device.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/clcd.h>
-#include <linux/platform_data/video-clcd-versatile.h>
-#include <linux/clkdev.h>
-#include <linux/vexpress.h>
-#include <linux/irqchip/arm-gic.h>
-
-#include <asm/hardware/arm_timer.h>
-#include <asm/hardware/cache-l2x0.h>
-#include <asm/smp_scu.h>
-#include <asm/smp_twd.h>
-
-#include <mach/ct-ca9x4.h>
-
-#include <asm/hardware/timer-sp.h>
-
-#include <asm/mach/map.h>
-#include <asm/mach/time.h>
-
-#include "core.h"
-
-#include <mach/motherboard.h>
-#include <mach/irqs.h>
-
-static struct map_desc ct_ca9x4_io_desc[] __initdata = {
-	{
-		.virtual        = V2T_PERIPH,
-		.pfn            = __phys_to_pfn(CT_CA9X4_MPIC),
-		.length         = SZ_8K,
-		.type           = MT_DEVICE,
-	},
-};
-
-static void __init ct_ca9x4_map_io(void)
-{
-	iotable_init(ct_ca9x4_io_desc, ARRAY_SIZE(ct_ca9x4_io_desc));
-}
-
-static void __init ca9x4_l2_init(void)
-{
-#ifdef CONFIG_CACHE_L2X0
-	void __iomem *l2x0_base = ioremap(CT_CA9X4_L2CC, SZ_4K);
-
-	if (l2x0_base) {
-		/* set RAM latencies to 1 cycle for this core tile. */
-		writel(0, l2x0_base + L310_TAG_LATENCY_CTRL);
-		writel(0, l2x0_base + L310_DATA_LATENCY_CTRL);
-
-		l2x0_init(l2x0_base, 0x00400000, 0xfe0fffff);
-	} else {
-		pr_err("L2C: unable to map L2 cache controller\n");
-	}
-#endif
-}
-
-#ifdef CONFIG_HAVE_ARM_TWD
-static DEFINE_TWD_LOCAL_TIMER(twd_local_timer, A9_MPCORE_TWD, IRQ_LOCALTIMER);
-
-static void __init ca9x4_twd_init(void)
-{
-	int err = twd_local_timer_register(&twd_local_timer);
-	if (err)
-		pr_err("twd_local_timer_register failed %d\n", err);
-}
-#else
-#define ca9x4_twd_init()	do {} while(0)
-#endif
-
-static void __init ct_ca9x4_init_irq(void)
-{
-	gic_init(0, 29, ioremap(A9_MPCORE_GIC_DIST, SZ_4K),
-		 ioremap(A9_MPCORE_GIC_CPU, SZ_256));
-	ca9x4_twd_init();
-	ca9x4_l2_init();
-}
-
-static int ct_ca9x4_clcd_setup(struct clcd_fb *fb)
-{
-	unsigned long framesize = 1024 * 768 * 2;
-
-	fb->panel = versatile_clcd_get_panel("XVGA");
-	if (!fb->panel)
-		return -EINVAL;
-
-	return versatile_clcd_setup_dma(fb, framesize);
-}
-
-static struct clcd_board ct_ca9x4_clcd_data = {
-	.name		= "CT-CA9X4",
-	.caps		= CLCD_CAP_5551 | CLCD_CAP_565,
-	.check		= clcdfb_check,
-	.decode		= clcdfb_decode,
-	.setup		= ct_ca9x4_clcd_setup,
-	.mmap		= versatile_clcd_mmap_dma,
-	.remove		= versatile_clcd_remove_dma,
-};
-
-static AMBA_AHB_DEVICE(clcd, "ct:clcd", 0, CT_CA9X4_CLCDC, IRQ_CT_CA9X4_CLCDC, &ct_ca9x4_clcd_data);
-static AMBA_APB_DEVICE(dmc, "ct:dmc", 0, CT_CA9X4_DMC, IRQ_CT_CA9X4_DMC, NULL);
-static AMBA_APB_DEVICE(smc, "ct:smc", 0, CT_CA9X4_SMC, IRQ_CT_CA9X4_SMC, NULL);
-static AMBA_APB_DEVICE(gpio, "ct:gpio", 0, CT_CA9X4_GPIO, IRQ_CT_CA9X4_GPIO, NULL);
-
-static struct amba_device *ct_ca9x4_amba_devs[] __initdata = {
-	&clcd_device,
-	&dmc_device,
-	&smc_device,
-	&gpio_device,
-};
-
-static struct resource pmu_resources[] = {
-	[0] = {
-		.start	= IRQ_CT_CA9X4_PMU_CPU0,
-		.end	= IRQ_CT_CA9X4_PMU_CPU0,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[1] = {
-		.start	= IRQ_CT_CA9X4_PMU_CPU1,
-		.end	= IRQ_CT_CA9X4_PMU_CPU1,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[2] = {
-		.start	= IRQ_CT_CA9X4_PMU_CPU2,
-		.end	= IRQ_CT_CA9X4_PMU_CPU2,
-		.flags	= IORESOURCE_IRQ,
-	},
-	[3] = {
-		.start	= IRQ_CT_CA9X4_PMU_CPU3,
-		.end	= IRQ_CT_CA9X4_PMU_CPU3,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct platform_device pmu_device = {
-	.name		= "arm-pmu",
-	.id		= -1,
-	.num_resources	= ARRAY_SIZE(pmu_resources),
-	.resource	= pmu_resources,
-};
-
-static struct clk_lookup osc1_lookup = {
-	.dev_id		= "ct:clcd",
-};
-
-static struct platform_device osc1_device = {
-	.name		= "vexpress-osc",
-	.id		= 1,
-	.num_resources	= 1,
-	.resource	= (struct resource []) {
-		VEXPRESS_RES_FUNC(0xf, 1),
-	},
-	.dev.platform_data = &osc1_lookup,
-};
-
-static void __init ct_ca9x4_init(void)
-{
-	int i;
-
-	for (i = 0; i < ARRAY_SIZE(ct_ca9x4_amba_devs); i++)
-		amba_device_register(ct_ca9x4_amba_devs[i], &iomem_resource);
-
-	platform_device_register(&pmu_device);
-	vexpress_syscfg_device_register(&osc1_device);
-}
-
-#ifdef CONFIG_SMP
-static void *ct_ca9x4_scu_base __initdata;
-
-static void __init ct_ca9x4_init_cpu_map(void)
-{
-	int i, ncores;
-
-	ct_ca9x4_scu_base = ioremap(A9_MPCORE_SCU, SZ_128);
-	if (WARN_ON(!ct_ca9x4_scu_base))
-		return;
-
-	ncores = scu_get_core_count(ct_ca9x4_scu_base);
-
-	if (ncores > nr_cpu_ids) {
-		pr_warn("SMP: %u cores greater than maximum (%u), clipping\n",
-			ncores, nr_cpu_ids);
-		ncores = nr_cpu_ids;
-	}
-
-	for (i = 0; i < ncores; ++i)
-		set_cpu_possible(i, true);
-}
-
-static void __init ct_ca9x4_smp_enable(unsigned int max_cpus)
-{
-	scu_enable(ct_ca9x4_scu_base);
-}
-#endif
-
-struct ct_desc ct_ca9x4_desc __initdata = {
-	.id		= V2M_CT_ID_CA9,
-	.name		= "CA9x4",
-	.map_io		= ct_ca9x4_map_io,
-	.init_irq	= ct_ca9x4_init_irq,
-	.init_tile	= ct_ca9x4_init,
-#ifdef CONFIG_SMP
-	.init_cpu_map	= ct_ca9x4_init_cpu_map,
-	.smp_enable	= ct_ca9x4_smp_enable,
-#endif
-};
diff --git a/arch/arm/mach-vexpress/include/mach/ct-ca9x4.h b/arch/arm/mach-vexpress/include/mach/ct-ca9x4.h
deleted file mode 100644
index 84acf8439d4b..000000000000
--- a/arch/arm/mach-vexpress/include/mach/ct-ca9x4.h
+++ /dev/null
@@ -1,47 +0,0 @@
-#ifndef __MACH_CT_CA9X4_H
-#define __MACH_CT_CA9X4_H
-
-/*
- * Physical base addresses
- */
-#define CT_CA9X4_CLCDC		(0x10020000)
-#define CT_CA9X4_AXIRAM		(0x10060000)
-#define CT_CA9X4_DMC		(0x100e0000)
-#define CT_CA9X4_SMC		(0x100e1000)
-#define CT_CA9X4_SCC		(0x100e2000)
-#define CT_CA9X4_SP804_TIMER	(0x100e4000)
-#define CT_CA9X4_SP805_WDT	(0x100e5000)
-#define CT_CA9X4_TZPC		(0x100e6000)
-#define CT_CA9X4_GPIO		(0x100e8000)
-#define CT_CA9X4_FASTAXI	(0x100e9000)
-#define CT_CA9X4_SLOWAXI	(0x100ea000)
-#define CT_CA9X4_TZASC		(0x100ec000)
-#define CT_CA9X4_CORESIGHT	(0x10200000)
-#define CT_CA9X4_MPIC		(0x1e000000)
-#define CT_CA9X4_SYSTIMER	(0x1e004000)
-#define CT_CA9X4_SYSWDT		(0x1e007000)
-#define CT_CA9X4_L2CC		(0x1e00a000)
-
-#define A9_MPCORE_SCU		(CT_CA9X4_MPIC + 0x0000)
-#define A9_MPCORE_GIC_CPU	(CT_CA9X4_MPIC + 0x0100)
-#define A9_MPCORE_GIT		(CT_CA9X4_MPIC + 0x0200)
-#define A9_MPCORE_TWD		(CT_CA9X4_MPIC + 0x0600)
-#define A9_MPCORE_GIC_DIST	(CT_CA9X4_MPIC + 0x1000)
-
-/*
- * Interrupts.  Those in {} are for AMBA devices
- */
-#define IRQ_CT_CA9X4_CLCDC	{ 76 }
-#define IRQ_CT_CA9X4_DMC	{ 0 }
-#define IRQ_CT_CA9X4_SMC	{ 77, 78 }
-#define IRQ_CT_CA9X4_TIMER0	80
-#define IRQ_CT_CA9X4_TIMER1	81
-#define IRQ_CT_CA9X4_GPIO	{ 82 }
-#define IRQ_CT_CA9X4_PMU_CPU0	92
-#define IRQ_CT_CA9X4_PMU_CPU1	93
-#define IRQ_CT_CA9X4_PMU_CPU2	94
-#define IRQ_CT_CA9X4_PMU_CPU3	95
-
-extern struct ct_desc ct_ca9x4_desc;
-
-#endif
diff --git a/arch/arm/mach-vexpress/include/mach/hardware.h b/arch/arm/mach-vexpress/include/mach/hardware.h
deleted file mode 100644
index 40a8c178f10d..000000000000
--- a/arch/arm/mach-vexpress/include/mach/hardware.h
+++ /dev/null
@@ -1 +0,0 @@
-/* empty */
diff --git a/arch/arm/mach-vexpress/include/mach/irqs.h b/arch/arm/mach-vexpress/include/mach/irqs.h
deleted file mode 100644
index f8f7f782eb55..000000000000
--- a/arch/arm/mach-vexpress/include/mach/irqs.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#define IRQ_LOCALTIMER		29
-#define IRQ_LOCALWDOG		30
-
-#ifndef CONFIG_SPARSE_IRQ
-#define NR_IRQS	256
-#endif
diff --git a/arch/arm/mach-vexpress/include/mach/motherboard.h b/arch/arm/mach-vexpress/include/mach/motherboard.h
deleted file mode 100644
index 68abc8b72781..000000000000
--- a/arch/arm/mach-vexpress/include/mach/motherboard.h
+++ /dev/null
@@ -1,88 +0,0 @@
-#ifndef __MACH_MOTHERBOARD_H
-#define __MACH_MOTHERBOARD_H
-
-/*
- * Physical addresses, offset from V2M_PA_CS0-3
- */
-#define V2M_NOR0		(V2M_PA_CS0)
-#define V2M_NOR1		(V2M_PA_CS1)
-#define V2M_SRAM		(V2M_PA_CS2)
-#define V2M_VIDEO_SRAM		(V2M_PA_CS3 + 0x00000000)
-#define V2M_LAN9118		(V2M_PA_CS3 + 0x02000000)
-#define V2M_ISP1761		(V2M_PA_CS3 + 0x03000000)
-
-/*
- * Physical addresses, offset from V2M_PA_CS7
- */
-#define V2M_SYSREGS		(V2M_PA_CS7 + 0x00000000)
-#define V2M_SYSCTL		(V2M_PA_CS7 + 0x00001000)
-#define V2M_SERIAL_BUS_PCI	(V2M_PA_CS7 + 0x00002000)
-
-#define V2M_AACI		(V2M_PA_CS7 + 0x00004000)
-#define V2M_MMCI		(V2M_PA_CS7 + 0x00005000)
-#define V2M_KMI0		(V2M_PA_CS7 + 0x00006000)
-#define V2M_KMI1		(V2M_PA_CS7 + 0x00007000)
-
-#define V2M_UART0		(V2M_PA_CS7 + 0x00009000)
-#define V2M_UART1		(V2M_PA_CS7 + 0x0000a000)
-#define V2M_UART2		(V2M_PA_CS7 + 0x0000b000)
-#define V2M_UART3		(V2M_PA_CS7 + 0x0000c000)
-
-#define V2M_WDT			(V2M_PA_CS7 + 0x0000f000)
-
-#define V2M_TIMER01		(V2M_PA_CS7 + 0x00011000)
-#define V2M_TIMER23		(V2M_PA_CS7 + 0x00012000)
-
-#define V2M_SERIAL_BUS_DVI	(V2M_PA_CS7 + 0x00016000)
-#define V2M_RTC			(V2M_PA_CS7 + 0x00017000)
-
-#define V2M_CF			(V2M_PA_CS7 + 0x0001a000)
-#define V2M_CLCD		(V2M_PA_CS7 + 0x0001f000)
-
-
-/*
- * Interrupts.  Those in {} are for AMBA devices
- */
-#define IRQ_V2M_WDT		{ (32 + 0) }
-#define IRQ_V2M_TIMER0		(32 + 2)
-#define IRQ_V2M_TIMER1		(32 + 2)
-#define IRQ_V2M_TIMER2		(32 + 3)
-#define IRQ_V2M_TIMER3		(32 + 3)
-#define IRQ_V2M_RTC		{ (32 + 4) }
-#define IRQ_V2M_UART0		{ (32 + 5) }
-#define IRQ_V2M_UART1		{ (32 + 6) }
-#define IRQ_V2M_UART2		{ (32 + 7) }
-#define IRQ_V2M_UART3		{ (32 + 8) }
-#define IRQ_V2M_MMCI		{ (32 + 9), (32 + 10) }
-#define IRQ_V2M_AACI		{ (32 + 11) }
-#define IRQ_V2M_KMI0		{ (32 + 12) }
-#define IRQ_V2M_KMI1		{ (32 + 13) }
-#define IRQ_V2M_CLCD		{ (32 + 14) }
-#define IRQ_V2M_LAN9118		(32 + 15)
-#define IRQ_V2M_ISP1761		(32 + 16)
-#define IRQ_V2M_PCIE		(32 + 17)
-
-
-/*
- * Core tile IDs
- */
-#define V2M_CT_ID_CA9		0x0c000191
-#define V2M_CT_ID_UNSUPPORTED	0xff000191
-#define V2M_CT_ID_MASK		0xff000fff
-
-struct ct_desc {
-	u32			id;
-	const char		*name;
-	void			(*map_io)(void);
-	void			(*init_early)(void);
-	void			(*init_irq)(void);
-	void			(*init_tile)(void);
-#ifdef CONFIG_SMP
-	void			(*init_cpu_map)(void);
-	void			(*smp_enable)(unsigned int);
-#endif
-};
-
-extern struct ct_desc *ct_desc;
-
-#endif
diff --git a/arch/arm/mach-vexpress/platsmp.c b/arch/arm/mach-vexpress/platsmp.c
index a1f3804fd5a5..83188cf1875d 100644
--- a/arch/arm/mach-vexpress/platsmp.c
+++ b/arch/arm/mach-vexpress/platsmp.c
@@ -19,48 +19,10 @@
 #include <asm/smp_scu.h>
 #include <asm/mach/map.h>
 
-#include <mach/motherboard.h>
-
 #include <plat/platsmp.h>
 
 #include "core.h"
 
-/*
- * Initialise the CPU possible map early - this describes the CPUs
- * which may be present or become present in the system.
- */
-static void __init vexpress_smp_init_cpus(void)
-{
-	ct_desc->init_cpu_map();
-}
-
-static void __init vexpress_smp_prepare_cpus(unsigned int max_cpus)
-{
-	/*
-	 * Initialise the present map, which describes the set of CPUs
-	 * actually populated at the present time.
-	 */
-	ct_desc->smp_enable(max_cpus);
-
-	/*
-	 * Write the address of secondary startup into the
-	 * system-wide flags register. The boot monitor waits
-	 * until it receives a soft interrupt, and then the
-	 * secondary CPU branches to this address.
-	 */
-	vexpress_flags_set(virt_to_phys(versatile_secondary_startup));
-}
-
-struct smp_operations __initdata vexpress_smp_ops = {
-	.smp_init_cpus		= vexpress_smp_init_cpus,
-	.smp_prepare_cpus	= vexpress_smp_prepare_cpus,
-	.smp_secondary_init	= versatile_secondary_init,
-	.smp_boot_secondary	= versatile_boot_secondary,
-#ifdef CONFIG_HOTPLUG_CPU
-	.cpu_die		= vexpress_cpu_die,
-#endif
-};
-
 bool __init vexpress_smp_init_ops(void)
 {
 #ifdef CONFIG_MCPM
@@ -79,8 +41,6 @@ bool __init vexpress_smp_init_ops(void)
 	return false;
 }
 
-#if defined(CONFIG_OF)
-
 static const struct of_device_id vexpress_smp_dt_scu_match[] __initconst = {
 	{ .compatible = "arm,cortex-a5-scu", },
 	{ .compatible = "arm,cortex-a9-scu", },
@@ -112,5 +72,3 @@ struct smp_operations __initdata vexpress_smp_dt_ops = {
 	.cpu_die		= vexpress_cpu_die,
 #endif
 };
-
-#endif
diff --git a/arch/arm/mach-vexpress/v2m.c b/arch/arm/mach-vexpress/v2m.c
index 6ff681a24ba7..a0400f4cca89 100644
--- a/arch/arm/mach-vexpress/v2m.c
+++ b/arch/arm/mach-vexpress/v2m.c
@@ -1,380 +1,7 @@
-/*
- * Versatile Express V2M Motherboard Support
- */
-#include <linux/device.h>
-#include <linux/amba/bus.h>
-#include <linux/amba/mmci.h>
-#include <linux/io.h>
-#include <linux/smp.h>
-#include <linux/init.h>
-#include <linux/of_address.h>
-#include <linux/of_fdt.h>
-#include <linux/of_irq.h>
-#include <linux/of_platform.h>
-#include <linux/platform_device.h>
-#include <linux/ata_platform.h>
-#include <linux/smsc911x.h>
-#include <linux/spinlock.h>
-#include <linux/usb/isp1760.h>
-#include <linux/mtd/physmap.h>
-#include <linux/regulator/fixed.h>
-#include <linux/regulator/machine.h>
-#include <linux/vexpress.h>
-#include <linux/clkdev.h>
-
-#include <asm/mach-types.h>
-#include <asm/sizes.h>
 #include <asm/mach/arch.h>
-#include <asm/mach/map.h>
-#include <asm/mach/time.h>
-#include <asm/hardware/arm_timer.h>
-#include <asm/hardware/cache-l2x0.h>
-#include <asm/hardware/timer-sp.h>
-
-#include <mach/ct-ca9x4.h>
-#include <mach/motherboard.h>
-
-#include <plat/sched_clock.h>
-#include <plat/platsmp.h>
 
 #include "core.h"
 
-#define V2M_PA_CS0	0x40000000
-#define V2M_PA_CS1	0x44000000
-#define V2M_PA_CS2	0x48000000
-#define V2M_PA_CS3	0x4c000000
-#define V2M_PA_CS7	0x10000000
-
-static struct map_desc v2m_io_desc[] __initdata = {
-	{
-		.virtual	= V2M_PERIPH,
-		.pfn		= __phys_to_pfn(V2M_PA_CS7),
-		.length		= SZ_128K,
-		.type		= MT_DEVICE,
-	},
-};
-
-static void __init v2m_sp804_init(void __iomem *base, unsigned int irq)
-{
-	if (WARN_ON(!base || irq == NO_IRQ))
-		return;
-
-	sp804_clocksource_init(base + TIMER_2_BASE, "v2m-timer1");
-	sp804_clockevents_init(base + TIMER_1_BASE, irq, "v2m-timer0");
-}
-
-
-static struct resource v2m_pcie_i2c_resource = {
-	.start	= V2M_SERIAL_BUS_PCI,
-	.end	= V2M_SERIAL_BUS_PCI + SZ_4K - 1,
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct platform_device v2m_pcie_i2c_device = {
-	.name		= "versatile-i2c",
-	.id		= 0,
-	.num_resources	= 1,
-	.resource	= &v2m_pcie_i2c_resource,
-};
-
-static struct resource v2m_ddc_i2c_resource = {
-	.start	= V2M_SERIAL_BUS_DVI,
-	.end	= V2M_SERIAL_BUS_DVI + SZ_4K - 1,
-	.flags	= IORESOURCE_MEM,
-};
-
-static struct platform_device v2m_ddc_i2c_device = {
-	.name		= "versatile-i2c",
-	.id		= 1,
-	.num_resources	= 1,
-	.resource	= &v2m_ddc_i2c_resource,
-};
-
-static struct resource v2m_eth_resources[] = {
-	{
-		.start	= V2M_LAN9118,
-		.end	= V2M_LAN9118 + SZ_64K - 1,
-		.flags	= IORESOURCE_MEM,
-	}, {
-		.start	= IRQ_V2M_LAN9118,
-		.end	= IRQ_V2M_LAN9118,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct smsc911x_platform_config v2m_eth_config = {
-	.flags		= SMSC911X_USE_32BIT,
-	.irq_polarity	= SMSC911X_IRQ_POLARITY_ACTIVE_HIGH,
-	.irq_type	= SMSC911X_IRQ_TYPE_PUSH_PULL,
-	.phy_interface	= PHY_INTERFACE_MODE_MII,
-};
-
-static struct platform_device v2m_eth_device = {
-	.name		= "smsc911x",
-	.id		= -1,
-	.resource	= v2m_eth_resources,
-	.num_resources	= ARRAY_SIZE(v2m_eth_resources),
-	.dev.platform_data = &v2m_eth_config,
-};
-
-static struct regulator_consumer_supply v2m_eth_supplies[] = {
-	REGULATOR_SUPPLY("vddvario", "smsc911x"),
-	REGULATOR_SUPPLY("vdd33a", "smsc911x"),
-};
-
-static struct resource v2m_usb_resources[] = {
-	{
-		.start	= V2M_ISP1761,
-		.end	= V2M_ISP1761 + SZ_128K - 1,
-		.flags	= IORESOURCE_MEM,
-	}, {
-		.start	= IRQ_V2M_ISP1761,
-		.end	= IRQ_V2M_ISP1761,
-		.flags	= IORESOURCE_IRQ,
-	},
-};
-
-static struct isp1760_platform_data v2m_usb_config = {
-	.is_isp1761		= true,
-	.bus_width_16		= false,
-	.port1_otg		= true,
-	.analog_oc		= false,
-	.dack_polarity_high	= false,
-	.dreq_polarity_high	= false,
-};
-
-static struct platform_device v2m_usb_device = {
-	.name		= "isp1760",
-	.id		= -1,
-	.resource	= v2m_usb_resources,
-	.num_resources	= ARRAY_SIZE(v2m_usb_resources),
-	.dev.platform_data = &v2m_usb_config,
-};
-
-static struct physmap_flash_data v2m_flash_data = {
-	.width		= 4,
-};
-
-static struct resource v2m_flash_resources[] = {
-	{
-		.start	= V2M_NOR0,
-		.end	= V2M_NOR0 + SZ_64M - 1,
-		.flags	= IORESOURCE_MEM,
-	}, {
-		.start	= V2M_NOR1,
-		.end	= V2M_NOR1 + SZ_64M - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device v2m_flash_device = {
-	.name		= "physmap-flash",
-	.id		= -1,
-	.resource	= v2m_flash_resources,
-	.num_resources	= ARRAY_SIZE(v2m_flash_resources),
-	.dev.platform_data = &v2m_flash_data,
-};
-
-static struct pata_platform_info v2m_pata_data = {
-	.ioport_shift	= 2,
-};
-
-static struct resource v2m_pata_resources[] = {
-	{
-		.start	= V2M_CF,
-		.end	= V2M_CF + 0xff,
-		.flags	= IORESOURCE_MEM,
-	}, {
-		.start	= V2M_CF + 0x100,
-		.end	= V2M_CF + SZ_4K - 1,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device v2m_cf_device = {
-	.name		= "pata_platform",
-	.id		= -1,
-	.resource	= v2m_pata_resources,
-	.num_resources	= ARRAY_SIZE(v2m_pata_resources),
-	.dev.platform_data = &v2m_pata_data,
-};
-
-static struct mmci_platform_data v2m_mmci_data = {
-	.ocr_mask	= MMC_VDD_32_33|MMC_VDD_33_34,
-	.status		= vexpress_get_mci_cardin,
-	.gpio_cd	= -1,
-	.gpio_wp	= -1,
-};
-
-static struct resource v2m_sysreg_resources[] = {
-	{
-		.start	= V2M_SYSREGS,
-		.end	= V2M_SYSREGS + 0xfff,
-		.flags	= IORESOURCE_MEM,
-	},
-};
-
-static struct platform_device v2m_sysreg_device = {
-	.name		= "vexpress-sysreg",
-	.id		= -1,
-	.resource	= v2m_sysreg_resources,
-	.num_resources	= ARRAY_SIZE(v2m_sysreg_resources),
-};
-
-static struct platform_device v2m_muxfpga_device = {
-	.name		= "vexpress-muxfpga",
-	.id		= 0,
-	.num_resources	= 1,
-	.resource	= (struct resource []) {
-		VEXPRESS_RES_FUNC(0, 7),
-	}
-};
-
-static struct platform_device v2m_shutdown_device = {
-	.name		= "vexpress-shutdown",
-	.id		= 0,
-	.num_resources	= 1,
-	.resource	= (struct resource []) {
-		VEXPRESS_RES_FUNC(0, 8),
-	}
-};
-
-static struct platform_device v2m_reboot_device = {
-	.name		= "vexpress-reboot",
-	.id		= 0,
-	.num_resources	= 1,
-	.resource	= (struct resource []) {
-		VEXPRESS_RES_FUNC(0, 9),
-	}
-};
-
-static struct platform_device v2m_dvimode_device = {
-	.name		= "vexpress-dvimode",
-	.id		= 0,
-	.num_resources	= 1,
-	.resource	= (struct resource []) {
-		VEXPRESS_RES_FUNC(0, 11),
-	}
-};
-
-static AMBA_APB_DEVICE(aaci,  "mb:aaci",  0, V2M_AACI, IRQ_V2M_AACI, NULL);
-static AMBA_APB_DEVICE(mmci,  "mb:mmci",  0, V2M_MMCI, IRQ_V2M_MMCI, &v2m_mmci_data);
-static AMBA_APB_DEVICE(kmi0,  "mb:kmi0",  0, V2M_KMI0, IRQ_V2M_KMI0, NULL);
-static AMBA_APB_DEVICE(kmi1,  "mb:kmi1",  0, V2M_KMI1, IRQ_V2M_KMI1, NULL);
-static AMBA_APB_DEVICE(uart0, "mb:uart0", 0, V2M_UART0, IRQ_V2M_UART0, NULL);
-static AMBA_APB_DEVICE(uart1, "mb:uart1", 0, V2M_UART1, IRQ_V2M_UART1, NULL);
-static AMBA_APB_DEVICE(uart2, "mb:uart2", 0, V2M_UART2, IRQ_V2M_UART2, NULL);
-static AMBA_APB_DEVICE(uart3, "mb:uart3", 0, V2M_UART3, IRQ_V2M_UART3, NULL);
-static AMBA_APB_DEVICE(wdt,   "mb:wdt",   0, V2M_WDT, IRQ_V2M_WDT, NULL);
-static AMBA_APB_DEVICE(rtc,   "mb:rtc",   0, V2M_RTC, IRQ_V2M_RTC, NULL);
-
-static struct amba_device *v2m_amba_devs[] __initdata = {
-	&aaci_device,
-	&mmci_device,
-	&kmi0_device,
-	&kmi1_device,
-	&uart0_device,
-	&uart1_device,
-	&uart2_device,
-	&uart3_device,
-	&wdt_device,
-	&rtc_device,
-};
-
-static void __init v2m_timer_init(void)
-{
-	vexpress_clk_init(ioremap(V2M_SYSCTL, SZ_4K));
-	v2m_sp804_init(ioremap(V2M_TIMER01, SZ_4K), IRQ_V2M_TIMER0);
-}
-
-static void __init v2m_init_early(void)
-{
-	if (ct_desc->init_early)
-		ct_desc->init_early();
-	versatile_sched_clock_init(vexpress_get_24mhz_clock_base(), 24000000);
-}
-
-struct ct_desc *ct_desc;
-
-static struct ct_desc *ct_descs[] __initdata = {
-#ifdef CONFIG_ARCH_VEXPRESS_CA9X4
-	&ct_ca9x4_desc,
-#endif
-};
-
-static void __init v2m_populate_ct_desc(void)
-{
-	int i;
-	u32 current_tile_id;
-
-	ct_desc = NULL;
-	current_tile_id = vexpress_get_procid(VEXPRESS_SITE_MASTER)
-				& V2M_CT_ID_MASK;
-
-	for (i = 0; i < ARRAY_SIZE(ct_descs) && !ct_desc; ++i)
-		if (ct_descs[i]->id == current_tile_id)
-			ct_desc = ct_descs[i];
-
-	if (!ct_desc)
-		panic("vexpress: this kernel does not support core tile ID 0x%08x when booting via ATAGs.\n"
-		      "You may need a device tree blob or a different kernel to boot on this board.\n",
-		      current_tile_id);
-}
-
-static void __init v2m_map_io(void)
-{
-	iotable_init(v2m_io_desc, ARRAY_SIZE(v2m_io_desc));
-	vexpress_sysreg_early_init(ioremap(V2M_SYSREGS, SZ_4K));
-	v2m_populate_ct_desc();
-	ct_desc->map_io();
-}
-
-static void __init v2m_init_irq(void)
-{
-	ct_desc->init_irq();
-}
-
-static void __init v2m_init(void)
-{
-	int i;
-
-	regulator_register_fixed(0, v2m_eth_supplies,
-			ARRAY_SIZE(v2m_eth_supplies));
-
-	platform_device_register(&v2m_sysreg_device);
-	platform_device_register(&v2m_pcie_i2c_device);
-	platform_device_register(&v2m_ddc_i2c_device);
-	platform_device_register(&v2m_flash_device);
-	platform_device_register(&v2m_cf_device);
-	platform_device_register(&v2m_eth_device);
-	platform_device_register(&v2m_usb_device);
-
-	for (i = 0; i < ARRAY_SIZE(v2m_amba_devs); i++)
-		amba_device_register(v2m_amba_devs[i], &iomem_resource);
-
-	vexpress_syscfg_device_register(&v2m_muxfpga_device);
-	vexpress_syscfg_device_register(&v2m_shutdown_device);
-	vexpress_syscfg_device_register(&v2m_reboot_device);
-	vexpress_syscfg_device_register(&v2m_dvimode_device);
-
-	ct_desc->init_tile();
-}
-
-MACHINE_START(VEXPRESS, "ARM-Versatile Express")
-	.atag_offset	= 0x100,
-	.smp		= smp_ops(vexpress_smp_ops),
-	.map_io		= v2m_map_io,
-	.init_early	= v2m_init_early,
-	.init_irq	= v2m_init_irq,
-	.init_time	= v2m_timer_init,
-	.init_machine	= v2m_init,
-MACHINE_END
-
-static void __init v2m_dt_init(void)
-{
-	of_platform_populate(NULL, of_default_bus_match_table, NULL, NULL);
-}
-
 static const char * const v2m_dt_match[] __initconst = {
 	"arm,vexpress",
 	NULL,
@@ -386,5 +13,4 @@ DT_MACHINE_START(VEXPRESS_DT, "ARM-Versatile Express")
 	.l2c_aux_mask	= 0xfe0fffff,
 	.smp		= smp_ops(vexpress_smp_dt_ops),
 	.smp_init	= smp_init_ops(vexpress_smp_init_ops),
-	.init_machine	= v2m_dt_init,
 MACHINE_END
diff --git a/arch/arm/plat-versatile/Kconfig b/arch/arm/plat-versatile/Kconfig
index a301ca2c7d00..49b8ef91584a 100644
--- a/arch/arm/plat-versatile/Kconfig
+++ b/arch/arm/plat-versatile/Kconfig
@@ -4,6 +4,6 @@ config PLAT_VERSATILE_CLOCK
 	bool
 
 config PLAT_VERSATILE_SCHED_CLOCK
-	def_bool y
+	bool
 
 endif
diff --git a/drivers/clk/versatile/Makefile b/drivers/clk/versatile/Makefile
index 162e519cb0f9..8ff03744fe98 100644
--- a/drivers/clk/versatile/Makefile
+++ b/drivers/clk/versatile/Makefile
@@ -2,6 +2,5 @@
 obj-$(CONFIG_ICST)		+= clk-icst.o clk-versatile.o
 obj-$(CONFIG_INTEGRATOR_IMPD1)	+= clk-impd1.o
 obj-$(CONFIG_ARCH_REALVIEW)	+= clk-realview.o
-obj-$(CONFIG_ARCH_VEXPRESS)	+= clk-vexpress.o
 obj-$(CONFIG_CLK_SP810)		+= clk-sp810.o
 obj-$(CONFIG_CLK_VEXPRESS_OSC)	+= clk-vexpress-osc.o
diff --git a/drivers/clk/versatile/clk-vexpress-osc.c b/drivers/clk/versatile/clk-vexpress-osc.c
index 529a59c0fbfa..765f1e0eeeb2 100644
--- a/drivers/clk/versatile/clk-vexpress-osc.c
+++ b/drivers/clk/versatile/clk-vexpress-osc.c
@@ -70,7 +70,6 @@ static struct clk_ops vexpress_osc_ops = {
 
 static int vexpress_osc_probe(struct platform_device *pdev)
 {
-	struct clk_lookup *cl = pdev->dev.platform_data; /* Non-DT lookup */
 	struct clk_init_data init;
 	struct vexpress_osc *osc;
 	struct clk *clk;
@@ -106,12 +105,6 @@ static int vexpress_osc_probe(struct platform_device *pdev)
 
 	of_clk_add_provider(pdev->dev.of_node, of_clk_src_simple_get, clk);
 
-	/* Only happens for non-DT cases */
-	if (cl) {
-		cl->clk = clk;
-		clkdev_add(cl);
-	}
-
 	dev_dbg(&pdev->dev, "Registered clock '%s'\n", init.name);
 
 	return 0;
diff --git a/drivers/clk/versatile/clk-vexpress.c b/drivers/clk/versatile/clk-vexpress.c
deleted file mode 100644
index 2d5e1b4820e0..000000000000
--- a/drivers/clk/versatile/clk-vexpress.c
+++ /dev/null
@@ -1,86 +0,0 @@
-/*
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License version 2 as
- * published by the Free Software Foundation.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
- * GNU General Public License for more details.
- *
- * Copyright (C) 2012 ARM Limited
- */
-
-#include <linux/amba/sp810.h>
-#include <linux/clkdev.h>
-#include <linux/clk-provider.h>
-#include <linux/err.h>
-#include <linux/vexpress.h>
-
-static struct clk *vexpress_sp810_timerclken[4];
-static DEFINE_SPINLOCK(vexpress_sp810_lock);
-
-static void __init vexpress_sp810_init(void __iomem *base)
-{
-	int i;
-
-	if (WARN_ON(!base))
-		return;
-
-	for (i = 0; i < ARRAY_SIZE(vexpress_sp810_timerclken); i++) {
-		char name[12];
-		const char *parents[] = {
-			"v2m:refclk32khz", /* REFCLK */
-			"v2m:refclk1mhz" /* TIMCLK */
-		};
-
-		snprintf(name, ARRAY_SIZE(name), "timerclken%d", i);
-
-		vexpress_sp810_timerclken[i] = clk_register_mux(NULL, name,
-				parents, 2, CLK_SET_RATE_NO_REPARENT,
-				base + SCCTRL, SCCTRL_TIMERENnSEL_SHIFT(i), 1,
-				0, &vexpress_sp810_lock);
-
-		if (WARN_ON(IS_ERR(vexpress_sp810_timerclken[i])))
-			break;
-	}
-}
-
-
-static const char * const vexpress_clk_24mhz_periphs[] __initconst = {
-	"mb:uart0", "mb:uart1", "mb:uart2", "mb:uart3",
-	"mb:mmci", "mb:kmi0", "mb:kmi1"
-};
-
-void __init vexpress_clk_init(void __iomem *sp810_base)
-{
-	struct clk *clk;
-	int i;
-
-	clk = clk_register_fixed_rate(NULL, "dummy_apb_pclk", NULL,
-			CLK_IS_ROOT, 0);
-	WARN_ON(clk_register_clkdev(clk, "apb_pclk", NULL));
-
-	clk = clk_register_fixed_rate(NULL, "v2m:clk_24mhz", NULL,
-			CLK_IS_ROOT, 24000000);
-	for (i = 0; i < ARRAY_SIZE(vexpress_clk_24mhz_periphs); i++)
-		WARN_ON(clk_register_clkdev(clk, NULL,
-				vexpress_clk_24mhz_periphs[i]));
-
-	clk = clk_register_fixed_rate(NULL, "v2m:refclk32khz", NULL,
-			CLK_IS_ROOT, 32768);
-	WARN_ON(clk_register_clkdev(clk, NULL, "v2m:wdt"));
-
-	clk = clk_register_fixed_rate(NULL, "v2m:refclk1mhz", NULL,
-			CLK_IS_ROOT, 1000000);
-
-	vexpress_sp810_init(sp810_base);
-
-	for (i = 0; i < ARRAY_SIZE(vexpress_sp810_timerclken); i++)
-		WARN_ON(clk_set_parent(vexpress_sp810_timerclken[i], clk));
-
-	WARN_ON(clk_register_clkdev(vexpress_sp810_timerclken[0],
-				"v2m-timer0", "sp804"));
-	WARN_ON(clk_register_clkdev(vexpress_sp810_timerclken[1],
-				"v2m-timer1", "sp804"));
-}
diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c
index b3a812384a6f..c344483fa7d6 100644
--- a/drivers/misc/vexpress-syscfg.c
+++ b/drivers/misc/vexpress-syscfg.c
@@ -145,7 +145,7 @@ static struct regmap_config vexpress_syscfg_regmap_config = {
 static struct regmap *vexpress_syscfg_regmap_init(struct device *dev,
 		void *context)
 {
-	struct platform_device *pdev = to_platform_device(dev);
+	int err;
 	struct vexpress_syscfg *syscfg = context;
 	struct vexpress_syscfg_func *func;
 	struct property *prop;
@@ -155,32 +155,18 @@ static struct regmap *vexpress_syscfg_regmap_init(struct device *dev,
 	u32 site, position, dcc;
 	int i;
 
-	if (dev->of_node) {
-		int err = vexpress_config_get_topo(dev->of_node, &site,
+	err = vexpress_config_get_topo(dev->of_node, &site,
 				&position, &dcc);
+	if (err)
+		return ERR_PTR(err);
 
-		if (err)
-			return ERR_PTR(err);
-
-		prop = of_find_property(dev->of_node,
-				"arm,vexpress-sysreg,func", NULL);
-		if (!prop)
-			return ERR_PTR(-EINVAL);
-
-		num = prop->length / sizeof(u32) / 2;
-		val = prop->value;
-	} else {
-		if (pdev->num_resources != 1 ||
-				pdev->resource[0].flags != IORESOURCE_BUS)
-			return ERR_PTR(-EFAULT);
-
-		site = pdev->resource[0].start;
-		if (site == VEXPRESS_SITE_MASTER)
-			site = vexpress_config_get_master();
-		position = 0;
-		dcc = 0;
-		num = 1;
-	}
+	prop = of_find_property(dev->of_node,
+			"arm,vexpress-sysreg,func", NULL);
+	if (!prop)
+		return ERR_PTR(-EINVAL);
+
+	num = prop->length / sizeof(u32) / 2;
+	val = prop->value;
 
 	/*
 	 * "arm,vexpress-energy" function used to be described
@@ -207,13 +193,8 @@ static struct regmap *vexpress_syscfg_regmap_init(struct device *dev,
 	for (i = 0; i < num; i++) {
 		u32 function, device;
 
-		if (dev->of_node) {
-			function = be32_to_cpup(val++);
-			device = be32_to_cpup(val++);
-		} else {
-			function = pdev->resource[0].end;
-			device = pdev->id;
-		}
+		function = be32_to_cpup(val++);
+		device = be32_to_cpup(val++);
 
 		dev_dbg(dev, "func %p: %u/%u/%u/%u/%u\n",
 				func, site, position, dcc,
@@ -265,17 +246,6 @@ static struct vexpress_config_bridge_ops vexpress_syscfg_bridge_ops = {
 };
 
 
-/* Non-DT hack, to be gone... */
-static struct device *vexpress_syscfg_bridge;
-
-int vexpress_syscfg_device_register(struct platform_device *pdev)
-{
-	pdev->dev.parent = vexpress_syscfg_bridge;
-
-	return platform_device_register(pdev);
-}
-
-
 static int vexpress_syscfg_probe(struct platform_device *pdev)
 {
 	struct vexpress_syscfg *syscfg;
@@ -303,10 +273,6 @@ static int vexpress_syscfg_probe(struct platform_device *pdev)
 	if (IS_ERR(bridge))
 		return PTR_ERR(bridge);
 
-	/* Non-DT case */
-	if (!pdev->dev.of_node)
-		vexpress_syscfg_bridge = bridge;
-
 	return 0;
 }
 
diff --git a/include/linux/vexpress.h b/include/linux/vexpress.h
index a4c9547aae64..f8e76e08ebe4 100644
--- a/include/linux/vexpress.h
+++ b/include/linux/vexpress.h
@@ -15,8 +15,6 @@
 #define _LINUX_VEXPRESS_H
 
 #include <linux/device.h>
-#include <linux/platform_device.h>
-#include <linux/reboot.h>
 #include <linux/regmap.h>
 
 #define VEXPRESS_SITE_MB		0
@@ -24,13 +22,6 @@
 #define VEXPRESS_SITE_DB2		2
 #define VEXPRESS_SITE_MASTER		0xf
 
-#define VEXPRESS_RES_FUNC(_site, _func)	\
-{					\
-	.start = (_site),		\
-	.end = (_func),			\
-	.flags = IORESOURCE_BUS,	\
-}
-
 /* Config infrastructure */
 
 void vexpress_config_set_master(u32 site);
@@ -58,16 +49,6 @@ struct regmap *devm_regmap_init_vexpress_config(struct device *dev);
 
 /* Platform control */
 
-unsigned int vexpress_get_mci_cardin(struct device *dev);
-u32 vexpress_get_procid(int site);
-void *vexpress_get_24mhz_clock_base(void);
 void vexpress_flags_set(u32 data);
 
-void vexpress_sysreg_early_init(void __iomem *base);
-int vexpress_syscfg_device_register(struct platform_device *pdev);
-
-/* Clocks */
-
-void vexpress_clk_init(void __iomem *sp810_base);
-
 #endif
-- 
cgit v1.2.3


From df870c78848aac4d953f61a8926a792de8133b9e Mon Sep 17 00:00:00 2001
From: Jason Cooper <jason@lakedaemon.net>
Date: Thu, 27 Nov 2014 18:27:49 +0000
Subject: irqchip: gic: Remove warning by including linux/irqdomain.h

Commit

  853a33ce6932 irqchip: gic-v2m: Add support for ARM GICv2m MSI(-X) doorbell

Introduced a series of warnings when building ARM multi_v7_defconfig:

  include/linux/irqchip/arm-gic.h:109:53: warning: its scope is only this definition or declaration, which is probably not what you want
  In file included from arch/arm/mach-ux500/pm.c:13:0:
  include/linux/irqchip/arm-gic.h:109:53: warning: 'struct irq_domain' declared inside parameter list
   int gicv2m_of_init(struct device_node *node, struct irq_domain *parent);
                                                       ^

Fix this by adding the proper include.

Signed-off-by: Jason Cooper <jason@lakedaemon.net>
[ jac merged much more correct version from Marc into this patch ]
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Acked-by: Will Deacon <will.deacon@arm.com>
Link: https://lkml.kernel.org/r/1417170975-1163-1-git-send-email-marc.zyngier@arm.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 include/linux/irqchip/arm-gic.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h
index 60b09ed58cae..71d706d5f169 100644
--- a/include/linux/irqchip/arm-gic.h
+++ b/include/linux/irqchip/arm-gic.h
@@ -91,6 +91,8 @@
 
 #ifndef __ASSEMBLY__
 
+#include <linux/irqdomain.h>
+
 struct device_node;
 
 extern struct irq_chip gic_arch_extn;
-- 
cgit v1.2.3


From 90452e61137a3e88aa705d3efcb3874f3ce8d390 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 27 Nov 2014 06:07:49 +0530
Subject: cpufreq: Fix formatting issues in 'struct cpufreq_driver'

Adding any new callback to 'struct cpufreq_driver' gives following checkpatch
warning:

WARNING: Unnecessary space before function pointer arguments
+	void	(*ready)	(struct cpufreq_policy *policy);

This is because we have been using a tab spacing between function pointer name
and its arguments and the new one tried to follow that.

Though we normally don't try to fix every checkpatch warning, specially around
formatting issues as that creates unnecessary noise over lists. But I thought we
better fix this so that new additions don't generate these warnings plus it
looks far better/symmetric now.

So, remove these tab spacing issues in 'struct cpufreq_driver' only + fix
alignment of all members.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Eduardo Valentin <edubezval@gmail.com>
Tested-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/cpufreq.h | 50 ++++++++++++++++++++++++-------------------------
 1 file changed, 25 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index 503b085b7832..db3c13085671 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -217,26 +217,26 @@ __ATTR(_name, 0644, show_##_name, store_##_name)
 
 
 struct cpufreq_driver {
-	char			name[CPUFREQ_NAME_LEN];
-	u8			flags;
-	void			*driver_data;
+	char		name[CPUFREQ_NAME_LEN];
+	u8		flags;
+	void		*driver_data;
 
 	/* needed by all drivers */
-	int	(*init)		(struct cpufreq_policy *policy);
-	int	(*verify)	(struct cpufreq_policy *policy);
+	int		(*init)(struct cpufreq_policy *policy);
+	int		(*verify)(struct cpufreq_policy *policy);
 
 	/* define one out of two */
-	int	(*setpolicy)	(struct cpufreq_policy *policy);
+	int		(*setpolicy)(struct cpufreq_policy *policy);
 
 	/*
 	 * On failure, should always restore frequency to policy->restore_freq
 	 * (i.e. old freq).
 	 */
-	int	(*target)	(struct cpufreq_policy *policy,	/* Deprecated */
-				 unsigned int target_freq,
-				 unsigned int relation);
-	int	(*target_index)	(struct cpufreq_policy *policy,
-				 unsigned int index);
+	int		(*target)(struct cpufreq_policy *policy,
+				  unsigned int target_freq,
+				  unsigned int relation);	/* Deprecated */
+	int		(*target_index)(struct cpufreq_policy *policy,
+					unsigned int index);
 	/*
 	 * Only for drivers with target_index() and CPUFREQ_ASYNC_NOTIFICATION
 	 * unset.
@@ -252,27 +252,27 @@ struct cpufreq_driver {
 	 * wish to switch to intermediate frequency for some target frequency.
 	 * In that case core will directly call ->target_index().
 	 */
-	unsigned int (*get_intermediate)(struct cpufreq_policy *policy,
-					 unsigned int index);
-	int	(*target_intermediate)(struct cpufreq_policy *policy,
-				       unsigned int index);
+	unsigned int	(*get_intermediate)(struct cpufreq_policy *policy,
+					    unsigned int index);
+	int		(*target_intermediate)(struct cpufreq_policy *policy,
+					       unsigned int index);
 
 	/* should be defined, if possible */
-	unsigned int	(*get)	(unsigned int cpu);
+	unsigned int	(*get)(unsigned int cpu);
 
 	/* optional */
-	int	(*bios_limit)	(int cpu, unsigned int *limit);
+	int		(*bios_limit)(int cpu, unsigned int *limit);
 
-	int	(*exit)		(struct cpufreq_policy *policy);
-	void	(*stop_cpu)	(struct cpufreq_policy *policy);
-	int	(*suspend)	(struct cpufreq_policy *policy);
-	int	(*resume)	(struct cpufreq_policy *policy);
-	struct freq_attr	**attr;
+	int		(*exit)(struct cpufreq_policy *policy);
+	void		(*stop_cpu)(struct cpufreq_policy *policy);
+	int		(*suspend)(struct cpufreq_policy *policy);
+	int		(*resume)(struct cpufreq_policy *policy);
+	struct freq_attr **attr;
 
 	/* platform specific boost support code */
-	bool                    boost_supported;
-	bool                    boost_enabled;
-	int     (*set_boost)    (int state);
+	bool		boost_supported;
+	bool		boost_enabled;
+	int		(*set_boost)(int state);
 };
 
 /* flags */
-- 
cgit v1.2.3


From 7c45cf31b3ab9be270a7bf6af2926631dc566436 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 27 Nov 2014 06:07:51 +0530
Subject: cpufreq: Introduce ->ready() callback for cpufreq drivers

Currently there is no callback for cpufreq drivers which is called once the
policy is ready to be used. There are some requirements where such a callback is
required.

One of them is registering a cooling device with the help of
of_cpufreq_cooling_register(). This routine tries to get 'struct cpufreq_policy'
for CPUs which isn't yet initialed at the time ->init() is called and so we face
issues while registering the cooling device.

Because we can't register cooling device from ->init(), we need a callback that
is called after the policy is ready to be used and hence we introduce ->ready()
callback.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Eduardo Valentin <edubezval@gmail.com>
Tested-by: Eduardo Valentin <edubezval@gmail.com>
Reviewed-by: Lukasz Majewski <l.majewski@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 5 +++++
 include/linux/cpufreq.h   | 4 ++++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index de2c3e198b62..a09a29c312a9 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -1285,8 +1285,13 @@ static int __cpufreq_add_dev(struct device *dev, struct subsys_interface *sif)
 	up_write(&policy->rwsem);
 
 	kobject_uevent(&policy->kobj, KOBJ_ADD);
+
 	up_read(&cpufreq_rwsem);
 
+	/* Callback for handling stuff after policy is ready */
+	if (cpufreq_driver->ready)
+		cpufreq_driver->ready(policy);
+
 	pr_debug("initialization complete\n");
 
 	return 0;
diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h
index db3c13085671..4d078cebafd2 100644
--- a/include/linux/cpufreq.h
+++ b/include/linux/cpufreq.h
@@ -267,6 +267,10 @@ struct cpufreq_driver {
 	void		(*stop_cpu)(struct cpufreq_policy *policy);
 	int		(*suspend)(struct cpufreq_policy *policy);
 	int		(*resume)(struct cpufreq_policy *policy);
+
+	/* Will be called after the driver is fully initialized */
+	void		(*ready)(struct cpufreq_policy *policy);
+
 	struct freq_attr **attr;
 
 	/* platform specific boost support code */
-- 
cgit v1.2.3


From 129eec55df6ab1b5ecdd89fd7db7a2cd103200b5 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 27 Nov 2014 08:54:06 +0530
Subject: PM / OPP Introduce APIs to remove OPPs

OPPs are created statically (from DT) or dynamically. Currently we don't free
OPPs that are created statically, when the module unloads. And so if the module
is inserted back again, we get warning for duplicate OPPs as the same were
already present.

Also, there might be a need to remove dynamic OPPs in future and so API for that
is also added.

This patch adds helper APIs to remove/free existing static and dynamic OPPs.

Because the OPPs are used both under RCU and SRCU, we have to wait for grace
period of both. And so are using kfree_rcu() from within call_srcu().

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/opp.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_opp.h   |  12 +++++-
 2 files changed, 113 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c
index b249b0127eff..977474a3c64f 100644
--- a/drivers/base/power/opp.c
+++ b/drivers/base/power/opp.c
@@ -79,6 +79,7 @@ struct dev_pm_opp {
  *		however addition is possible and is secured by dev_opp_list_lock
  * @dev:	device pointer
  * @srcu_head:	notifier head to notify the OPP availability changes.
+ * @rcu_head:	RCU callback head used for deferred freeing
  * @opp_list:	list of opps
  *
  * This is an internal data structure maintaining the link to opps attached to
@@ -90,6 +91,7 @@ struct device_opp {
 
 	struct device *dev;
 	struct srcu_notifier_head srcu_head;
+	struct rcu_head rcu_head;
 	struct list_head opp_list;
 };
 
@@ -498,6 +500,76 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_add);
 
+static void kfree_opp_rcu(struct rcu_head *head)
+{
+	struct dev_pm_opp *opp = container_of(head, struct dev_pm_opp, rcu_head);
+
+	kfree_rcu(opp, rcu_head);
+}
+
+static void kfree_device_rcu(struct rcu_head *head)
+{
+	struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head);
+
+	kfree(device_opp);
+}
+
+void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp)
+{
+	/*
+	 * Notify the changes in the availability of the operable
+	 * frequency/voltage list.
+	 */
+	srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_REMOVE, opp);
+	list_del_rcu(&opp->node);
+	call_srcu(&dev_opp->srcu_head.srcu, &opp->rcu_head, kfree_opp_rcu);
+
+	if (list_empty(&dev_opp->opp_list)) {
+		list_del_rcu(&dev_opp->node);
+		call_srcu(&dev_opp->srcu_head.srcu, &dev_opp->rcu_head,
+			  kfree_device_rcu);
+	}
+}
+
+/**
+ * dev_pm_opp_remove()  - Remove an OPP from OPP list
+ * @dev:	device for which we do this operation
+ * @freq:	OPP to remove with matching 'freq'
+ *
+ * This function removes an opp from the opp list.
+ */
+void dev_pm_opp_remove(struct device *dev, unsigned long freq)
+{
+	struct dev_pm_opp *opp;
+	struct device_opp *dev_opp;
+	bool found = false;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	dev_opp = find_device_opp(dev);
+	if (IS_ERR(dev_opp))
+		goto unlock;
+
+	list_for_each_entry(opp, &dev_opp->opp_list, node) {
+		if (opp->rate == freq) {
+			found = true;
+			break;
+		}
+	}
+
+	if (!found) {
+		dev_warn(dev, "%s: Couldn't find OPP with freq: %lu\n",
+			 __func__, freq);
+		goto unlock;
+	}
+
+	__dev_pm_opp_remove(dev_opp, opp);
+unlock:
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
+
 /**
  * opp_set_availability() - helper to set the availability of an opp
  * @dev:		device for which we do this operation
@@ -687,4 +759,34 @@ int of_init_opp_table(struct device *dev)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_init_opp_table);
+
+/**
+ * of_free_opp_table() - Free OPP table entries created from static DT entries
+ * @dev:	device pointer used to lookup device OPPs.
+ *
+ * Free OPPs created using static entries present in DT.
+ */
+void of_free_opp_table(struct device *dev)
+{
+	struct device_opp *dev_opp = find_device_opp(dev);
+	struct dev_pm_opp *opp, *tmp;
+
+	/* Check for existing list for 'dev' */
+	dev_opp = find_device_opp(dev);
+	if (WARN(IS_ERR(dev_opp), "%s: dev_opp: %ld\n", dev_name(dev),
+		 PTR_ERR(dev_opp)))
+		return;
+
+	/* Hold our list modification lock here */
+	mutex_lock(&dev_opp_list_lock);
+
+	/* Free static OPPs */
+	list_for_each_entry_safe(opp, tmp, &dev_opp->opp_list, node) {
+		if (!opp->dynamic)
+			__dev_pm_opp_remove(dev_opp, opp);
+	}
+
+	mutex_unlock(&dev_opp_list_lock);
+}
+EXPORT_SYMBOL_GPL(of_free_opp_table);
 #endif
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 0330217abfad..cec2d4540914 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -21,7 +21,7 @@ struct dev_pm_opp;
 struct device;
 
 enum dev_pm_opp_event {
-	OPP_EVENT_ADD, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
+	OPP_EVENT_ADD, OPP_EVENT_REMOVE, OPP_EVENT_ENABLE, OPP_EVENT_DISABLE,
 };
 
 #if defined(CONFIG_PM_OPP)
@@ -44,6 +44,7 @@ struct dev_pm_opp *dev_pm_opp_find_freq_ceil(struct device *dev,
 
 int dev_pm_opp_add(struct device *dev, unsigned long freq,
 		   unsigned long u_volt);
+void dev_pm_opp_remove(struct device *dev, unsigned long freq);
 
 int dev_pm_opp_enable(struct device *dev, unsigned long freq);
 
@@ -90,6 +91,10 @@ static inline int dev_pm_opp_add(struct device *dev, unsigned long freq,
 	return -EINVAL;
 }
 
+static inline void dev_pm_opp_remove(struct device *dev, unsigned long freq)
+{
+}
+
 static inline int dev_pm_opp_enable(struct device *dev, unsigned long freq)
 {
 	return 0;
@@ -109,11 +114,16 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier(
 
 #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF)
 int of_init_opp_table(struct device *dev);
+void of_free_opp_table(struct device *dev);
 #else
 static inline int of_init_opp_table(struct device *dev)
 {
 	return -EINVAL;
 }
+
+static inline void of_free_opp_table(struct device *dev)
+{
+}
 #endif
 
 #endif		/* __LINUX_OPP_H__ */
-- 
cgit v1.2.3


From 4749c02b8da6d8dbc29218652985bda844017e95 Mon Sep 17 00:00:00 2001
From: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Date: Fri, 21 Nov 2014 17:00:04 +0100
Subject: bus: mvebu-mbus: provide a mechanism to save SDRAM window
 configuration

On Marvell EBU platforms, when doing suspend/resume, the SDRAM window
configuration must be saved on suspend, and restored on
resume. However, it needs to be restored on resume *before*
re-entering the kernel, because the SDRAM window configuration defines
the layout of the memory. For this reason, it cannot simply be done in
the ->suspend() and ->resume() hooks of the mvebu-mbus driver.

Instead, it needs to be restored by the bootloader "boot info"
mechanism used when resuming. This mechanism allows the kernel to
define a list of (address, value) pairs when suspending, that the
bootloader will restore on resume before jumping back into the kernel.

This commit therefore adds a new function to the mvebu-mbus driver,
called mvebu_mbus_save_cpu_target(), which will be called by the
platform code to make the mvebu-mbus driver save the SDRAM window
configuration in a way that can be understood by the bootloader "boot
info" mechanism.

Signed-off-by: Thomas Petazzoni <thomas.petazzoni@free-electrons.com>
Reviewed-by: Gregory CLEMENT <gregory.clement@free-electrons.com>
Link: https://lkml.kernel.org/r/1416585613-2113-8-git-send-email-thomas.petazzoni@free-electrons.com
Signed-off-by: Jason Cooper <jason@lakedaemon.net>
---
 drivers/bus/mvebu-mbus.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/mbus.h     |  1 +
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/bus/mvebu-mbus.c b/drivers/bus/mvebu-mbus.c
index e8c159399c82..eb7682dc123b 100644
--- a/drivers/bus/mvebu-mbus.c
+++ b/drivers/bus/mvebu-mbus.c
@@ -110,6 +110,8 @@ struct mvebu_mbus_soc_data {
 	bool has_mbus_bridge;
 	unsigned int (*win_cfg_offset)(const int win);
 	void (*setup_cpu_target)(struct mvebu_mbus_state *s);
+	int (*save_cpu_target)(struct mvebu_mbus_state *s,
+			       u32 *store_addr);
 	int (*show_cpu_target)(struct mvebu_mbus_state *s,
 			       struct seq_file *seq, void *v);
 };
@@ -128,6 +130,7 @@ struct mvebu_mbus_state {
 	void __iomem *mbuswins_base;
 	void __iomem *sdramwins_base;
 	void __iomem *mbusbridge_base;
+	phys_addr_t sdramwins_phys_base;
 	struct dentry *debugfs_root;
 	struct dentry *debugfs_sdram;
 	struct dentry *debugfs_devs;
@@ -541,6 +544,28 @@ mvebu_mbus_default_setup_cpu_target(struct mvebu_mbus_state *mbus)
 	mvebu_mbus_dram_info.num_cs = cs;
 }
 
+static int
+mvebu_mbus_default_save_cpu_target(struct mvebu_mbus_state *mbus,
+				   u32 *store_addr)
+{
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		u32 base = readl(mbus->sdramwins_base + DDR_BASE_CS_OFF(i));
+		u32 size = readl(mbus->sdramwins_base + DDR_SIZE_CS_OFF(i));
+
+		writel(mbus->sdramwins_phys_base + DDR_BASE_CS_OFF(i),
+		       store_addr++);
+		writel(base, store_addr++);
+		writel(mbus->sdramwins_phys_base + DDR_SIZE_CS_OFF(i),
+		       store_addr++);
+		writel(size, store_addr++);
+	}
+
+	/* We've written 16 words to the store address */
+	return 16;
+}
+
 static void __init
 mvebu_mbus_dove_setup_cpu_target(struct mvebu_mbus_state *mbus)
 {
@@ -571,11 +596,35 @@ mvebu_mbus_dove_setup_cpu_target(struct mvebu_mbus_state *mbus)
 	mvebu_mbus_dram_info.num_cs = cs;
 }
 
+static int
+mvebu_mbus_dove_save_cpu_target(struct mvebu_mbus_state *mbus,
+				u32 *store_addr)
+{
+	int i;
+
+	for (i = 0; i < 2; i++) {
+		u32 map = readl(mbus->sdramwins_base + DOVE_DDR_BASE_CS_OFF(i));
+
+		writel(mbus->sdramwins_phys_base + DOVE_DDR_BASE_CS_OFF(i),
+		       store_addr++);
+		writel(map, store_addr++);
+	}
+
+	/* We've written 4 words to the store address */
+	return 4;
+}
+
+int mvebu_mbus_save_cpu_target(u32 *store_addr)
+{
+	return mbus_state.soc->save_cpu_target(&mbus_state, store_addr);
+}
+
 static const struct mvebu_mbus_soc_data armada_370_xp_mbus_data = {
 	.num_wins            = 20,
 	.num_remappable_wins = 8,
 	.has_mbus_bridge     = true,
 	.win_cfg_offset      = armada_370_xp_mbus_win_offset,
+	.save_cpu_target     = mvebu_mbus_default_save_cpu_target,
 	.setup_cpu_target    = mvebu_mbus_default_setup_cpu_target,
 	.show_cpu_target     = mvebu_sdram_debug_show_orion,
 };
@@ -584,6 +633,7 @@ static const struct mvebu_mbus_soc_data kirkwood_mbus_data = {
 	.num_wins            = 8,
 	.num_remappable_wins = 4,
 	.win_cfg_offset      = orion_mbus_win_offset,
+	.save_cpu_target     = mvebu_mbus_default_save_cpu_target,
 	.setup_cpu_target    = mvebu_mbus_default_setup_cpu_target,
 	.show_cpu_target     = mvebu_sdram_debug_show_orion,
 };
@@ -592,6 +642,7 @@ static const struct mvebu_mbus_soc_data dove_mbus_data = {
 	.num_wins            = 8,
 	.num_remappable_wins = 4,
 	.win_cfg_offset      = orion_mbus_win_offset,
+	.save_cpu_target     = mvebu_mbus_dove_save_cpu_target,
 	.setup_cpu_target    = mvebu_mbus_dove_setup_cpu_target,
 	.show_cpu_target     = mvebu_sdram_debug_show_dove,
 };
@@ -604,6 +655,7 @@ static const struct mvebu_mbus_soc_data orion5x_4win_mbus_data = {
 	.num_wins            = 8,
 	.num_remappable_wins = 4,
 	.win_cfg_offset      = orion_mbus_win_offset,
+	.save_cpu_target     = mvebu_mbus_default_save_cpu_target,
 	.setup_cpu_target    = mvebu_mbus_default_setup_cpu_target,
 	.show_cpu_target     = mvebu_sdram_debug_show_orion,
 };
@@ -612,6 +664,7 @@ static const struct mvebu_mbus_soc_data orion5x_2win_mbus_data = {
 	.num_wins            = 8,
 	.num_remappable_wins = 2,
 	.win_cfg_offset      = orion_mbus_win_offset,
+	.save_cpu_target     = mvebu_mbus_default_save_cpu_target,
 	.setup_cpu_target    = mvebu_mbus_default_setup_cpu_target,
 	.show_cpu_target     = mvebu_sdram_debug_show_orion,
 };
@@ -620,6 +673,7 @@ static const struct mvebu_mbus_soc_data mv78xx0_mbus_data = {
 	.num_wins            = 14,
 	.num_remappable_wins = 8,
 	.win_cfg_offset      = mv78xx0_mbus_win_offset,
+	.save_cpu_target     = mvebu_mbus_default_save_cpu_target,
 	.setup_cpu_target    = mvebu_mbus_default_setup_cpu_target,
 	.show_cpu_target     = mvebu_sdram_debug_show_orion,
 };
@@ -804,6 +858,8 @@ static int __init mvebu_mbus_common_init(struct mvebu_mbus_state *mbus,
 		return -ENOMEM;
 	}
 
+	mbus->sdramwins_phys_base = sdramwins_phys_base;
+
 	if (mbusbridge_phys_base) {
 		mbus->mbusbridge_base = ioremap(mbusbridge_phys_base,
 						mbusbridge_size);
diff --git a/include/linux/mbus.h b/include/linux/mbus.h
index 550c88fb0267..611b69fa8594 100644
--- a/include/linux/mbus.h
+++ b/include/linux/mbus.h
@@ -61,6 +61,7 @@ static inline const struct mbus_dram_target_info *mv_mbus_dram_info(void)
 }
 #endif
 
+int mvebu_mbus_save_cpu_target(u32 *store_addr);
 void mvebu_mbus_get_pcie_mem_aperture(struct resource *res);
 void mvebu_mbus_get_pcie_io_aperture(struct resource *res);
 int mvebu_mbus_add_window_remap_by_id(unsigned int target,
-- 
cgit v1.2.3


From f30d0a815bb9b8ac1a3ab4d0c333d2379220d91d Mon Sep 17 00:00:00 2001
From: Arend van Spriel <arend@broadcom.com>
Date: Sun, 30 Nov 2014 16:31:21 +0100
Subject: fs: debugfs: add forward declaration for struct device type

The function debugfs_create_devm_seqfile() has a parameter of type
struct device pointer. This type needs to be forward declared to
avoid compilation issues on certain architectures and/or kernel
configurations. The function was introduced with:

  commit 98210b7f73f1db182bd9a558a031093cd166e907
  Author: Arend van Spriel <arend@broadcom.com>
  Date:   Sun Nov 9 11:31:58 2014 +0100

     debugfs: add helper function to create device related seq_file

The reported build failure for sparc64 architecture was:

  make.cross ARCH=sparc64

All warnings:

   In file included from fs/debugfs/file.c:21:0:
   include/linux/debugfs.h:105:10: warning: 'struct device' declared
   inside parameter list
             void *data));
             ^

Reported-by: Fengguang Wu <fengguang.wu@intel.com>
Signed-off-by: Arend van Spriel <arend@broadcom.com>
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 include/linux/debugfs.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h
index 4bbe2ace972a..e3e9f31f91d2 100644
--- a/include/linux/debugfs.h
+++ b/include/linux/debugfs.h
@@ -20,6 +20,7 @@
 
 #include <linux/types.h>
 
+struct device;
 struct file_operations;
 
 struct debugfs_blob_wrapper {
-- 
cgit v1.2.3


From ddbb4db4ced1ba784fcd3500179a7291b6c5d7b7 Mon Sep 17 00:00:00 2001
From: Alan Tull <atull@opensource.altera.com>
Date: Wed, 15 Oct 2014 13:55:09 -0500
Subject: hwmon: (pmbus) Add regulator support

Add support for simple on/off control of each channel.

To add regulator support, the pmbus part driver needs to add
regulator_desc information and number of regulators to its
pmbus_driver_info struct.

regulator_desc can be declared using default macro for a
regulator (PMBUS_REGULATOR) that is in pmbus.h

The regulator_init_data can be initialized from either
platform data or the device tree.

Signed-off-by: Alan Tull <atull@opensource.altera.com>
Reviewed-by: Mark Brown <broonie@kernel.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
---
 drivers/hwmon/pmbus/pmbus.h      | 26 ++++++++++++
 drivers/hwmon/pmbus/pmbus_core.c | 87 ++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c/pmbus.h        |  4 ++
 3 files changed, 117 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hwmon/pmbus/pmbus.h b/drivers/hwmon/pmbus/pmbus.h
index 3ae79a7d1b00..89a23ff836e7 100644
--- a/drivers/hwmon/pmbus/pmbus.h
+++ b/drivers/hwmon/pmbus/pmbus.h
@@ -19,6 +19,8 @@
  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
  */
 
+#include <linux/regulator/driver.h>
+
 #ifndef PMBUS_H
 #define PMBUS_H
 
@@ -185,6 +187,11 @@
 #define PMBUS_VIRT_VMON_OV_FAULT_LIMIT	(PMBUS_VIRT_BASE + 34)
 #define PMBUS_VIRT_STATUS_VMON		(PMBUS_VIRT_BASE + 35)
 
+/*
+ * OPERATION
+ */
+#define PB_OPERATION_CONTROL_ON		(1<<7)
+
 /*
  * CAPABILITY
  */
@@ -365,8 +372,27 @@ struct pmbus_driver_info {
 	 */
 	int (*identify)(struct i2c_client *client,
 			struct pmbus_driver_info *info);
+
+	/* Regulator functionality, if supported by this chip driver. */
+	int num_regulators;
+	const struct regulator_desc *reg_desc;
 };
 
+/* Regulator ops */
+
+extern struct regulator_ops pmbus_regulator_ops;
+
+/* Macro for filling in array of struct regulator_desc */
+#define PMBUS_REGULATOR(_name, _id)				\
+	[_id] = {						\
+		.name = (_name # _id),				\
+		.id = (_id),					\
+		.of_match = of_match_ptr(_name # _id),		\
+		.regulators_node = of_match_ptr("regulators"),	\
+		.ops = &pmbus_regulator_ops,			\
+		.owner = THIS_MODULE,				\
+	}
+
 /* Function declarations */
 
 void pmbus_clear_cache(struct i2c_client *client);
diff --git a/drivers/hwmon/pmbus/pmbus_core.c b/drivers/hwmon/pmbus/pmbus_core.c
index d6c3701eb7f9..f2e47c7dd808 100644
--- a/drivers/hwmon/pmbus/pmbus_core.c
+++ b/drivers/hwmon/pmbus/pmbus_core.c
@@ -29,6 +29,8 @@
 #include <linux/hwmon-sysfs.h>
 #include <linux/jiffies.h>
 #include <linux/i2c/pmbus.h>
+#include <linux/regulator/driver.h>
+#include <linux/regulator/machine.h>
 #include "pmbus.h"
 
 /*
@@ -1758,6 +1760,84 @@ static int pmbus_init_common(struct i2c_client *client, struct pmbus_data *data,
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_REGULATOR)
+static int pmbus_regulator_is_enabled(struct regulator_dev *rdev)
+{
+	struct device *dev = rdev_get_dev(rdev);
+	struct i2c_client *client = to_i2c_client(dev->parent);
+	u8 page = rdev_get_id(rdev);
+	int ret;
+
+	ret = pmbus_read_byte_data(client, page, PMBUS_OPERATION);
+	if (ret < 0)
+		return ret;
+
+	return !!(ret & PB_OPERATION_CONTROL_ON);
+}
+
+static int _pmbus_regulator_on_off(struct regulator_dev *rdev, bool enable)
+{
+	struct device *dev = rdev_get_dev(rdev);
+	struct i2c_client *client = to_i2c_client(dev->parent);
+	u8 page = rdev_get_id(rdev);
+
+	return pmbus_update_byte_data(client, page, PMBUS_OPERATION,
+				      PB_OPERATION_CONTROL_ON,
+				      enable ? PB_OPERATION_CONTROL_ON : 0);
+}
+
+static int pmbus_regulator_enable(struct regulator_dev *rdev)
+{
+	return _pmbus_regulator_on_off(rdev, 1);
+}
+
+static int pmbus_regulator_disable(struct regulator_dev *rdev)
+{
+	return _pmbus_regulator_on_off(rdev, 0);
+}
+
+struct regulator_ops pmbus_regulator_ops = {
+	.enable = pmbus_regulator_enable,
+	.disable = pmbus_regulator_disable,
+	.is_enabled = pmbus_regulator_is_enabled,
+};
+EXPORT_SYMBOL_GPL(pmbus_regulator_ops);
+
+static int pmbus_regulator_register(struct pmbus_data *data)
+{
+	struct device *dev = data->dev;
+	const struct pmbus_driver_info *info = data->info;
+	const struct pmbus_platform_data *pdata = dev_get_platdata(dev);
+	struct regulator_dev *rdev;
+	int i;
+
+	for (i = 0; i < info->num_regulators; i++) {
+		struct regulator_config config = { };
+
+		config.dev = dev;
+		config.driver_data = data;
+
+		if (pdata && pdata->reg_init_data)
+			config.init_data = &pdata->reg_init_data[i];
+
+		rdev = devm_regulator_register(dev, &info->reg_desc[i],
+					       &config);
+		if (IS_ERR(rdev)) {
+			dev_err(dev, "Failed to register %s regulator\n",
+				info->reg_desc[i].name);
+			return PTR_ERR(rdev);
+		}
+	}
+
+	return 0;
+}
+#else
+static int pmbus_regulator_register(struct pmbus_data *data)
+{
+	return 0;
+}
+#endif
+
 int pmbus_do_probe(struct i2c_client *client, const struct i2c_device_id *id,
 		   struct pmbus_driver_info *info)
 {
@@ -1812,8 +1892,15 @@ int pmbus_do_probe(struct i2c_client *client, const struct i2c_device_id *id,
 		dev_err(dev, "Failed to register hwmon device\n");
 		goto out_kfree;
 	}
+
+	ret = pmbus_regulator_register(data);
+	if (ret)
+		goto out_unregister;
+
 	return 0;
 
+out_unregister:
+	hwmon_device_unregister(data->hwmon_dev);
 out_kfree:
 	kfree(data->group.attrs);
 	return ret;
diff --git a/include/linux/i2c/pmbus.h b/include/linux/i2c/pmbus.h
index 69280db02c41..ee3c2aba2a8e 100644
--- a/include/linux/i2c/pmbus.h
+++ b/include/linux/i2c/pmbus.h
@@ -40,6 +40,10 @@
 
 struct pmbus_platform_data {
 	u32 flags;		/* Device specific flags */
+
+	/* regulator support */
+	int num_regulators;
+	struct regulator_init_data *reg_init_data;
 };
 
 #endif /* _PMBUS_H_ */
-- 
cgit v1.2.3


From 1cd076bf67793942ed921b766f7d461de2ebc0a2 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 27 Aug 2014 14:40:58 +0100
Subject: iommu: provide early initialisation hook for IOMMU drivers

IOMMU drivers must be initialised before any of their upstream devices,
otherwise the relevant iommu_ops won't be configured for the bus in
question. To solve this, a number of IOMMU drivers use initcalls to
initialise the driver before anything has a chance to be probed.

Whilst this solves the immediate problem, it leaves the job of probing
the IOMMU completely separate from the iommu_ops to configure the IOMMU,
which are called on a per-bus basis and require the driver to figure out
exactly which instance of the IOMMU is being requested. In particular,
the add_device callback simply passes a struct device to the driver,
which then has to parse firmware tables or probe buses to identify the
relevant IOMMU instance.

This patch takes the first step in addressing this problem by adding an
early initialisation pass for IOMMU drivers, giving them the ability to
store some per-instance data in their iommu_ops structure and store that
in their of_node. This can later be used when parsing OF masters to
identify the IOMMU instance in question.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/of_iommu.c          | 17 +++++++++++++++++
 include/asm-generic/vmlinux.lds.h |  2 ++
 include/linux/iommu.h             |  2 ++
 include/linux/of_iommu.h          | 25 +++++++++++++++++++++++++
 4 files changed, 46 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index e550ccb7634e..89b903406968 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -22,6 +22,9 @@
 #include <linux/of.h>
 #include <linux/of_iommu.h>
 
+static const struct of_device_id __iommu_of_table_sentinel
+	__used __section(__iommu_of_table_end);
+
 /**
  * of_get_dma_window - Parse *dma-window property and returns 0 if found.
  *
@@ -89,3 +92,17 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 	return 0;
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
+
+void __init of_iommu_init(void)
+{
+	struct device_node *np;
+	const struct of_device_id *match, *matches = &__iommu_of_table;
+
+	for_each_matching_node_and_match(np, matches, &match) {
+		const of_iommu_init_fn init_fn = match->data;
+
+		if (init_fn(np))
+			pr_err("Failed to initialise IOMMU %s\n",
+				of_node_full_name(np));
+	}
+}
diff --git a/include/asm-generic/vmlinux.lds.h b/include/asm-generic/vmlinux.lds.h
index aa70cbda327c..bee5d683074d 100644
--- a/include/asm-generic/vmlinux.lds.h
+++ b/include/asm-generic/vmlinux.lds.h
@@ -164,6 +164,7 @@
 #define CLKSRC_OF_TABLES()	OF_TABLE(CONFIG_CLKSRC_OF, clksrc)
 #define IRQCHIP_OF_MATCH_TABLE() OF_TABLE(CONFIG_IRQCHIP, irqchip)
 #define CLK_OF_TABLES()		OF_TABLE(CONFIG_COMMON_CLK, clk)
+#define IOMMU_OF_TABLES()	OF_TABLE(CONFIG_OF_IOMMU, iommu)
 #define RESERVEDMEM_OF_TABLES()	OF_TABLE(CONFIG_OF_RESERVED_MEM, reservedmem)
 #define CPU_METHOD_OF_TABLES()	OF_TABLE(CONFIG_SMP, cpu_method)
 #define EARLYCON_OF_TABLES()	OF_TABLE(CONFIG_SERIAL_EARLYCON, earlycon)
@@ -497,6 +498,7 @@
 	CLK_OF_TABLES()							\
 	RESERVEDMEM_OF_TABLES()						\
 	CLKSRC_OF_TABLES()						\
+	IOMMU_OF_TABLES()						\
 	CPU_METHOD_OF_TABLES()						\
 	KERNEL_DTB()							\
 	IRQCHIP_OF_MATCH_TABLE()					\
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index e6a7c9ff72f2..7b83f9f8e11d 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -103,6 +103,7 @@ enum iommu_attr {
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
  * @pgsize_bitmap: bitmap of supported page sizes
+ * @priv: per-instance data private to the iommu driver
  */
 struct iommu_ops {
 	bool (*capable)(enum iommu_cap);
@@ -133,6 +134,7 @@ struct iommu_ops {
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
 	unsigned long pgsize_bitmap;
+	void *priv;
 };
 
 #define IOMMU_GROUP_NOTIFY_ADD_DEVICE		1 /* Device added */
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 51a560f34bca..5762cdc8effe 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -1,12 +1,17 @@
 #ifndef __OF_IOMMU_H
 #define __OF_IOMMU_H
 
+#include <linux/iommu.h>
+#include <linux/of.h>
+
 #ifdef CONFIG_OF_IOMMU
 
 extern int of_get_dma_window(struct device_node *dn, const char *prefix,
 			     int index, unsigned long *busno, dma_addr_t *addr,
 			     size_t *size);
 
+extern void of_iommu_init(void);
+
 #else
 
 static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
@@ -16,6 +21,26 @@ static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
 	return -EINVAL;
 }
 
+static inline void of_iommu_init(void) { }
+
 #endif	/* CONFIG_OF_IOMMU */
 
+static inline void of_iommu_set_ops(struct device_node *np,
+				    const struct iommu_ops *ops)
+{
+	np->data = (struct iommu_ops *)ops;
+}
+
+static inline struct iommu_ops *of_iommu_get_ops(struct device_node *np)
+{
+	return np->data;
+}
+
+extern struct of_device_id __iommu_of_table;
+
+typedef int (*of_iommu_init_fn)(struct device_node *);
+
+#define IOMMU_OF_DECLARE(name, compat, fn) \
+	_OF_DECLARE(iommu, name, compat, fn, of_iommu_init_fn)
+
 #endif /* __OF_IOMMU_H */
-- 
cgit v1.2.3


From a3a60f81ee6f8fa65a57fa186b395bcd1f1bb097 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 27 Aug 2014 15:49:10 +0100
Subject: dma-mapping: replace set_arch_dma_coherent_ops with
 arch_setup_dma_ops

set_arch_dma_coherent_ops is called from of_dma_configure in order to
swizzle the architectural dma-mapping functions over to a cache-coherent
implementation. This is currently implemented only for ARM.

In anticipation of re-using this mechanism for IOMMU-backed dma-mapping
ops too, this patch replaces the function with a broader
arch_setup_dma_ops callback which will be extended in future.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/dma-mapping.h |  8 ++++----
 drivers/of/platform.c              | 31 +++++++++++++------------------
 include/linux/dma-mapping.h        |  7 ++-----
 3 files changed, 19 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index 85738b200023..dc3420e77758 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -121,12 +121,12 @@ static inline unsigned long dma_max_pfn(struct device *dev)
 }
 #define dma_max_pfn(dev) dma_max_pfn(dev)
 
-static inline int set_arch_dma_coherent_ops(struct device *dev)
+static inline void arch_setup_dma_ops(struct device *dev, bool coherent)
 {
-	set_dma_ops(dev, &arm_coherent_dma_ops);
-	return 0;
+	if (coherent)
+		set_dma_ops(dev, &arm_coherent_dma_ops);
 }
-#define set_arch_dma_coherent_ops(dev)	set_arch_dma_coherent_ops(dev)
+#define arch_setup_dma_ops arch_setup_dma_ops
 
 static inline dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr)
 {
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index 3b64d0bf5bba..ff1f4e9afccb 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -164,6 +164,8 @@ static void of_dma_configure(struct device *dev)
 {
 	u64 dma_addr, paddr, size;
 	int ret;
+	bool coherent;
+	unsigned long offset;
 
 	/*
 	 * Set default dma-mask to 32 bit. Drivers are expected to setup
@@ -178,28 +180,21 @@ static void of_dma_configure(struct device *dev)
 	if (!dev->dma_mask)
 		dev->dma_mask = &dev->coherent_dma_mask;
 
-	/*
-	 * if dma-coherent property exist, call arch hook to setup
-	 * dma coherent operations.
-	 */
-	if (of_dma_is_coherent(dev->of_node)) {
-		set_arch_dma_coherent_ops(dev);
-		dev_dbg(dev, "device is dma coherent\n");
-	}
-
-	/*
-	 * if dma-ranges property doesn't exist - just return else
-	 * setup the dma offset
-	 */
 	ret = of_dma_get_range(dev->of_node, &dma_addr, &paddr, &size);
 	if (ret < 0) {
-		dev_dbg(dev, "no dma range information to setup\n");
-		return;
+		dma_addr = offset = 0;
+		size = dev->coherent_dma_mask;
+	} else {
+		offset = PFN_DOWN(paddr - dma_addr);
+		dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", dev->dma_pfn_offset);
 	}
+	dev->dma_pfn_offset = offset;
+
+	coherent = of_dma_is_coherent(dev->of_node);
+	dev_dbg(dev, "device is%sdma coherent\n",
+		coherent ? " " : " not ");
 
-	/* DMA ranges found. Calculate and set dma_pfn_offset */
-	dev->dma_pfn_offset = PFN_DOWN(paddr - dma_addr);
-	dev_dbg(dev, "dma_pfn_offset(%#08lx)\n", dev->dma_pfn_offset);
+	arch_setup_dma_ops(dev, coherent);
 }
 
 /**
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index d5d388160f42..8a1560f95d4a 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -129,11 +129,8 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
 
 extern u64 dma_get_required_mask(struct device *dev);
 
-#ifndef set_arch_dma_coherent_ops
-static inline int set_arch_dma_coherent_ops(struct device *dev)
-{
-	return 0;
-}
+#ifndef arch_setup_dma_ops
+static inline void arch_setup_dma_ops(struct device *dev, bool coherent) { }
 #endif
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
-- 
cgit v1.2.3


From d0f60a44f5120a8e1c48995285c7d8d1e4915b35 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 27 Aug 2014 16:15:59 +0100
Subject: iommu: add new iommu_ops callback for adding an OF device

This patch adds a new function to the iommu_ops structure to allow an
OF device to be added to a specific IOMMU instance using the recently
merged generic devicetree binding for IOMMUs. The callback (of_xlate)
takes a struct device representing the master and an of_phandle_args
representing the IOMMU and the correspondong IDs for the new master.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 include/linux/iommu.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index 7b83f9f8e11d..415c7613d02c 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -21,6 +21,7 @@
 
 #include <linux/errno.h>
 #include <linux/err.h>
+#include <linux/of.h>
 #include <linux/types.h>
 #include <trace/events/iommu.h>
 
@@ -102,6 +103,7 @@ enum iommu_attr {
  * @remove_device: remove device from iommu grouping
  * @domain_get_attr: Query domain attributes
  * @domain_set_attr: Change domain attributes
+ * @of_xlate: add OF master IDs to iommu grouping
  * @pgsize_bitmap: bitmap of supported page sizes
  * @priv: per-instance data private to the iommu driver
  */
@@ -133,6 +135,10 @@ struct iommu_ops {
 	/* Get the numer of window per domain */
 	u32 (*domain_get_windows)(struct iommu_domain *domain);
 
+#ifdef CONFIG_OF_IOMMU
+	int (*of_xlate)(struct device *dev, struct of_phandle_args *args);
+#endif
+
 	unsigned long pgsize_bitmap;
 	void *priv;
 };
-- 
cgit v1.2.3


From 7eba1d51485197fa77c43c42eae3ce04b4b1c1c0 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 27 Aug 2014 16:20:32 +0100
Subject: iommu: provide helper function to configure an IOMMU for an of master

The generic IOMMU device-tree bindings can be used to add arbitrary OF
masters to an IOMMU with a compliant binding.

This patch introduces of_iommu_configure, which does exactly that.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Joerg Roedel <jroedel@suse.de>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/Kconfig    |  2 +-
 drivers/iommu/of_iommu.c | 33 +++++++++++++++++++++++++++++++++
 include/linux/of_iommu.h |  6 ++++++
 3 files changed, 40 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig
index dd5112265cc9..6d13f962f156 100644
--- a/drivers/iommu/Kconfig
+++ b/drivers/iommu/Kconfig
@@ -15,7 +15,7 @@ if IOMMU_SUPPORT
 
 config OF_IOMMU
        def_bool y
-       depends on OF
+       depends on OF && IOMMU_API
 
 config FSL_PAMU
 	bool "Freescale IOMMU support"
diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 89b903406968..73236d3cc955 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -18,6 +18,7 @@
  */
 
 #include <linux/export.h>
+#include <linux/iommu.h>
 #include <linux/limits.h>
 #include <linux/of.h>
 #include <linux/of_iommu.h>
@@ -93,6 +94,38 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
 
+struct iommu_ops *of_iommu_configure(struct device *dev)
+{
+	struct of_phandle_args iommu_spec;
+	struct device_node *np;
+	struct iommu_ops *ops = NULL;
+	int idx = 0;
+
+	/*
+	 * We don't currently walk up the tree looking for a parent IOMMU.
+	 * See the `Notes:' section of
+	 * Documentation/devicetree/bindings/iommu/iommu.txt
+	 */
+	while (!of_parse_phandle_with_args(dev->of_node, "iommus",
+					   "#iommu-cells", idx,
+					   &iommu_spec)) {
+		np = iommu_spec.np;
+		ops = of_iommu_get_ops(np);
+
+		if (!ops || !ops->of_xlate || ops->of_xlate(dev, &iommu_spec))
+			goto err_put_node;
+
+		of_node_put(np);
+		idx++;
+	}
+
+	return ops;
+
+err_put_node:
+	of_node_put(np);
+	return NULL;
+}
+
 void __init of_iommu_init(void)
 {
 	struct device_node *np;
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index 5762cdc8effe..d03abbb11c34 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -1,6 +1,7 @@
 #ifndef __OF_IOMMU_H
 #define __OF_IOMMU_H
 
+#include <linux/device.h>
 #include <linux/iommu.h>
 #include <linux/of.h>
 
@@ -11,6 +12,7 @@ extern int of_get_dma_window(struct device_node *dn, const char *prefix,
 			     size_t *size);
 
 extern void of_iommu_init(void);
+extern struct iommu_ops *of_iommu_configure(struct device *dev);
 
 #else
 
@@ -22,6 +24,10 @@ static inline int of_get_dma_window(struct device_node *dn, const char *prefix,
 }
 
 static inline void of_iommu_init(void) { }
+static inline struct iommu_ops *of_iommu_configure(struct device *dev)
+{
+	return NULL;
+}
 
 #endif	/* CONFIG_OF_IOMMU */
 
-- 
cgit v1.2.3


From 97890ba9289c66e23f2f2d431937693b6498d6f6 Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Wed, 27 Aug 2014 16:24:20 +0100
Subject: dma-mapping: detect and configure IOMMU in of_dma_configure

This patch extends of_dma_configure so that it sets up the IOMMU for a
device, as well as the coherent/non-coherent DMA mapping ops.

Acked-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Marek Szyprowski <m.szyprowski@samsung.com>
Tested-by: Robin Murphy <robin.murphy@arm.com>
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 arch/arm/include/asm/dma-mapping.h |  4 +++-
 drivers/of/platform.c              | 21 ++++++++++++++-------
 include/linux/dma-mapping.h        |  8 +++++++-
 3 files changed, 24 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h
index dc3420e77758..f3c0d953f6a2 100644
--- a/arch/arm/include/asm/dma-mapping.h
+++ b/arch/arm/include/asm/dma-mapping.h
@@ -121,7 +121,9 @@ static inline unsigned long dma_max_pfn(struct device *dev)
 }
 #define dma_max_pfn(dev) dma_max_pfn(dev)
 
-static inline void arch_setup_dma_ops(struct device *dev, bool coherent)
+static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
+				      u64 size, struct iommu_ops *iommu,
+				      bool coherent)
 {
 	if (coherent)
 		set_dma_ops(dev, &arm_coherent_dma_ops);
diff --git a/drivers/of/platform.c b/drivers/of/platform.c
index ff1f4e9afccb..b89caf8c7586 100644
--- a/drivers/of/platform.c
+++ b/drivers/of/platform.c
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/of_address.h>
 #include <linux/of_device.h>
+#include <linux/of_iommu.h>
 #include <linux/of_irq.h>
 #include <linux/of_platform.h>
 #include <linux/platform_device.h>
@@ -166,6 +167,7 @@ static void of_dma_configure(struct device *dev)
 	int ret;
 	bool coherent;
 	unsigned long offset;
+	struct iommu_ops *iommu;
 
 	/*
 	 * Set default dma-mask to 32 bit. Drivers are expected to setup
@@ -194,7 +196,16 @@ static void of_dma_configure(struct device *dev)
 	dev_dbg(dev, "device is%sdma coherent\n",
 		coherent ? " " : " not ");
 
-	arch_setup_dma_ops(dev, coherent);
+	iommu = of_iommu_configure(dev);
+	dev_dbg(dev, "device is%sbehind an iommu\n",
+		iommu ? " " : " not ");
+
+	arch_setup_dma_ops(dev, dma_addr, size, iommu, coherent);
+}
+
+static void of_dma_deconfigure(struct device *dev)
+{
+	arch_teardown_dma_ops(dev);
 }
 
 /**
@@ -223,16 +234,12 @@ static struct platform_device *of_platform_device_create_pdata(
 	if (!dev)
 		goto err_clear_flag;
 
-	of_dma_configure(&dev->dev);
 	dev->dev.bus = &platform_bus_type;
 	dev->dev.platform_data = platform_data;
-
-	/* We do not fill the DMA ops for platform devices by default.
-	 * This is currently the responsibility of the platform code
-	 * to do such, possibly using a device notifier
-	 */
+	of_dma_configure(&dev->dev);
 
 	if (of_device_add(dev) != 0) {
+		of_dma_deconfigure(&dev->dev);
 		platform_device_put(dev);
 		goto err_clear_flag;
 	}
diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h
index 8a1560f95d4a..c3007cb4bfa6 100644
--- a/include/linux/dma-mapping.h
+++ b/include/linux/dma-mapping.h
@@ -130,7 +130,13 @@ static inline int dma_coerce_mask_and_coherent(struct device *dev, u64 mask)
 extern u64 dma_get_required_mask(struct device *dev);
 
 #ifndef arch_setup_dma_ops
-static inline void arch_setup_dma_ops(struct device *dev, bool coherent) { }
+static inline void arch_setup_dma_ops(struct device *dev, u64 dma_base,
+				      u64 size, struct iommu_ops *iommu,
+				      bool coherent) { }
+#endif
+
+#ifndef arch_teardown_dma_ops
+static inline void arch_teardown_dma_ops(struct device *dev) { }
 #endif
 
 static inline unsigned int dma_get_max_seg_size(struct device *dev)
-- 
cgit v1.2.3


From 8f73110f6bac043026bc923b0a66abe24dd48058 Mon Sep 17 00:00:00 2001
From: Romain Perier <romain.perier@gmail.com>
Date: Tue, 25 Nov 2014 12:28:25 +0000
Subject: of: Rename "poweroff-source" property to "system-power-controller"

It reverts commit a4b4e0461ec5 ("of: Add standard property for poweroff capability").
As discussed on the mailing list, it makes more sense to rename back to the
old established property name, without the vendor prefix. Problem being that
the word "source" usually tends to be used for inputs and that is out of control
of the OS. The poweroff capability is an output which simply turns the
system-power off. Also, this property might be used by drivers which power-off
the system and power back on subsequent RTC alarms. This seems to suggest to
remove "poweroff" from the property name and to choose "system-power-controller"
as the more generic name. This patchs adds the required renaming changes and
defines an helper function which checks if this property is set.

Signed-off-by: Romain Perier <romain.perier@gmail.com>
Acked-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Johan Hovold <johan@kernel.org>
Signed-off-by: Mark Brown <broonie@kernel.org>
---
 .../devicetree/bindings/power/power-controller.txt     | 18 ++++++++++++++++++
 Documentation/devicetree/bindings/power/poweroff.txt   | 18 ------------------
 .../bindings/regulator/act8865-regulator.txt           |  4 ++--
 drivers/regulator/act8865-regulator.c                  |  2 +-
 include/linux/of.h                                     |  6 +++---
 5 files changed, 24 insertions(+), 24 deletions(-)
 create mode 100644 Documentation/devicetree/bindings/power/power-controller.txt
 delete mode 100644 Documentation/devicetree/bindings/power/poweroff.txt

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/power/power-controller.txt b/Documentation/devicetree/bindings/power/power-controller.txt
new file mode 100644
index 000000000000..845868bf3273
--- /dev/null
+++ b/Documentation/devicetree/bindings/power/power-controller.txt
@@ -0,0 +1,18 @@
+* Generic Poweroff capability
+
+Power-management integrated circuits or miscellaneous harware components are
+sometimes able to control the system power. The device driver associated to these
+components might needs to define poweroff capability, which tells to the kernel
+how to switch off the system. The corresponding driver must have the standard
+property "poweroff-source" in its device node. This property marks the device as
+able to shutdown the system. In order to test if this property is found
+programmatically, use the helper function "of_system_has_poweroff_source" from
+of.h .
+
+Example:
+
+act8846: act8846@5 {
+	 compatible = "active-semi,act8846";
+	 status = "okay";
+	 poweroff-source;
+}
diff --git a/Documentation/devicetree/bindings/power/poweroff.txt b/Documentation/devicetree/bindings/power/poweroff.txt
deleted file mode 100644
index 845868bf3273..000000000000
--- a/Documentation/devicetree/bindings/power/poweroff.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-* Generic Poweroff capability
-
-Power-management integrated circuits or miscellaneous harware components are
-sometimes able to control the system power. The device driver associated to these
-components might needs to define poweroff capability, which tells to the kernel
-how to switch off the system. The corresponding driver must have the standard
-property "poweroff-source" in its device node. This property marks the device as
-able to shutdown the system. In order to test if this property is found
-programmatically, use the helper function "of_system_has_poweroff_source" from
-of.h .
-
-Example:
-
-act8846: act8846@5 {
-	 compatible = "active-semi,act8846";
-	 status = "okay";
-	 poweroff-source;
-}
diff --git a/Documentation/devicetree/bindings/regulator/act8865-regulator.txt b/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
index 01a5b0766e53..dad6358074ac 100644
--- a/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
+++ b/Documentation/devicetree/bindings/regulator/act8865-regulator.txt
@@ -6,8 +6,8 @@ Required properties:
 - reg: I2C slave address
 
 Optional properties:
-- poweroff-source: Telling whether or not this pmic is controlling
-  the system power. See Documentation/devicetree/bindings/power/poweroff.txt .
+- system-power-controller: Telling whether or not this pmic is controlling
+  the system power. See Documentation/devicetree/bindings/power/power-controller.txt .
 
 Any standard regulator properties can be used to configure the single regulator.
 
diff --git a/drivers/regulator/act8865-regulator.c b/drivers/regulator/act8865-regulator.c
index 76301ed0f8d4..9eec453b745d 100644
--- a/drivers/regulator/act8865-regulator.c
+++ b/drivers/regulator/act8865-regulator.c
@@ -365,7 +365,7 @@ static int act8865_pmic_probe(struct i2c_client *client,
 		return ret;
 	}
 
-	if (of_system_has_poweroff_source(dev->of_node)) {
+	if (of_device_is_system_power_controller(dev->of_node)) {
 		if (!pm_power_off) {
 			act8865_i2c_client = client;
 			act8865->off_reg = off_reg;
diff --git a/include/linux/of.h b/include/linux/of.h
index 27b3ba1e9e59..257677256612 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -867,14 +867,14 @@ static inline int of_changeset_update_property(struct of_changeset *ocs,
 extern int of_resolve_phandles(struct device_node *tree);
 
 /**
- * of_system_has_poweroff_source - Tells if poweroff-source is found for device_node
+ * of_device_is_system_power_controller - Tells if system-power-controller is found for device_node
  * @np: Pointer to the given device_node
  *
  * return true if present false otherwise
  */
-static inline bool of_system_has_poweroff_source(const struct device_node *np)
+static inline bool of_device_is_system_power_controller(const struct device_node *np)
 {
-	return of_property_read_bool(np, "poweroff-source");
+	return of_property_read_bool(np, "system-power-controller");
 }
 
 #endif /* _LINUX_OF_H */
-- 
cgit v1.2.3


From 8d65ef760d50cc625c5364cba89be838b21c66a7 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Mon, 17 Nov 2014 17:02:57 -0500
Subject: sunrpc: eliminate the XPT_DETACHED flag

All it does is indicate whether a xprt has already been deleted from
a list or not, which is unnecessary since we use list_del_init and it's
always set and checked under the sv_lock anyway.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc_xprt.h | 7 +++----
 net/sunrpc/svc_xprt.c           | 4 +---
 2 files changed, 4 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc_xprt.h b/include/linux/sunrpc/svc_xprt.h
index ce6e4182a5b2..79f6f8f3dc0a 100644
--- a/include/linux/sunrpc/svc_xprt.h
+++ b/include/linux/sunrpc/svc_xprt.h
@@ -63,10 +63,9 @@ struct svc_xprt {
 #define	XPT_CHNGBUF	7		/* need to change snd/rcv buf sizes */
 #define	XPT_DEFERRED	8		/* deferred request pending */
 #define	XPT_OLD		9		/* used for xprt aging mark+sweep */
-#define	XPT_DETACHED	10		/* detached from tempsocks list */
-#define XPT_LISTENER	11		/* listening endpoint */
-#define XPT_CACHE_AUTH	12		/* cache auth info */
-#define XPT_LOCAL	13		/* connection from loopback interface */
+#define XPT_LISTENER	10		/* listening endpoint */
+#define XPT_CACHE_AUTH	11		/* cache auth info */
+#define XPT_LOCAL	12		/* connection from loopback interface */
 
 	struct svc_serv		*xpt_server;	/* service for transport */
 	atomic_t    	    	xpt_reserved;	/* space on outq that is rsvd */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index c179ca2a5aa4..97a75c17142b 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -884,7 +884,6 @@ static void svc_age_temp_xprts(unsigned long closure)
 			continue;
 		list_del_init(le);
 		set_bit(XPT_CLOSE, &xprt->xpt_flags);
-		set_bit(XPT_DETACHED, &xprt->xpt_flags);
 		dprintk("queuing xprt %p for closing\n", xprt);
 
 		/* a thread will dequeue and close it soon */
@@ -924,8 +923,7 @@ static void svc_delete_xprt(struct svc_xprt *xprt)
 	xprt->xpt_ops->xpo_detach(xprt);
 
 	spin_lock_bh(&serv->sv_lock);
-	if (!test_and_set_bit(XPT_DETACHED, &xprt->xpt_flags))
-		list_del_init(&xprt->xpt_list);
+	list_del_init(&xprt->xpt_list);
 	WARN_ON_ONCE(!list_empty(&xprt->xpt_ready));
 	if (test_bit(XPT_TEMP, &xprt->xpt_flags))
 		serv->sv_tmpcnt--;
-- 
cgit v1.2.3


From dabb05c6670e6e4415d7b56cd5864a5dbd90207f Mon Sep 17 00:00:00 2001
From: Mathieu Magnaudet <mathieu.magnaudet@gmail.com>
Date: Thu, 27 Nov 2014 16:02:36 +0100
Subject: HID: make hid_report_len as a static inline function in hid.h

In several hid drivers it is necessary to calculate the length of an
hid_report. This patch exports the existing static function hid_report_len of
hid-core.c as an inline function in hid.h

Signed-off-by: Mathieu Magnaudet <mathieu.magnaudet@enac.fr>
Reviewed-by: Benjamin Tissoires <benjamin.tissoires@gmail.com>
Reviewed-by: David Herrmann <dh.herrmann@gmail.com>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 drivers/hid/hid-core.c        |  6 ------
 drivers/hid/hid-input.c       |  2 +-
 drivers/hid/hid-multitouch.c  |  2 +-
 drivers/hid/usbhid/hid-core.c |  3 +--
 drivers/hid/wacom_sys.c       |  8 +-------
 include/linux/hid.h           | 11 +++++++++++
 6 files changed, 15 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/hid/hid-core.c b/drivers/hid/hid-core.c
index 3402033fa52a..ee6ba7625e77 100644
--- a/drivers/hid/hid-core.c
+++ b/drivers/hid/hid-core.c
@@ -1280,12 +1280,6 @@ void hid_output_report(struct hid_report *report, __u8 *data)
 }
 EXPORT_SYMBOL_GPL(hid_output_report);
 
-static int hid_report_len(struct hid_report *report)
-{
-	/* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */
-	return ((report->size - 1) >> 3) + 1 + (report->id > 0);
-}
-
 /*
  * Allocator for buffer that is going to be passed to hid_output_report()
  */
diff --git a/drivers/hid/hid-input.c b/drivers/hid/hid-input.c
index 725f22ca47fc..1e95f4df4146 100644
--- a/drivers/hid/hid-input.c
+++ b/drivers/hid/hid-input.c
@@ -1215,7 +1215,7 @@ static void hidinput_led_worker(struct work_struct *work)
 		return hid->ll_driver->request(hid, report, HID_REQ_SET_REPORT);
 
 	/* fall back to generic raw-output-report */
-	len = ((report->size - 1) >> 3) + 1 + (report->id > 0);
+	len = hid_report_len(report);
 	buf = hid_alloc_report_buf(report, GFP_KERNEL);
 	if (!buf)
 		return;
diff --git a/drivers/hid/hid-multitouch.c b/drivers/hid/hid-multitouch.c
index 683cda6c60ce..f65e78b46999 100644
--- a/drivers/hid/hid-multitouch.c
+++ b/drivers/hid/hid-multitouch.c
@@ -827,7 +827,7 @@ static void mt_set_input_mode(struct hid_device *hdev)
 	r = re->report_id_hash[td->inputmode];
 	if (r) {
 		if (cls->quirks & MT_QUIRK_FORCE_GET_FEATURE) {
-			report_len = ((r->size - 1) >> 3) + 1 + (r->id > 0);
+			report_len = hid_report_len(r);
 			buf = hid_alloc_report_buf(r, GFP_KERNEL);
 			if (!buf) {
 				hid_err(hdev, "failed to allocate buffer for report\n");
diff --git a/drivers/hid/usbhid/hid-core.c b/drivers/hid/usbhid/hid-core.c
index ca6849a0121e..b6cb7a5e4b27 100644
--- a/drivers/hid/usbhid/hid-core.c
+++ b/drivers/hid/usbhid/hid-core.c
@@ -338,8 +338,7 @@ static int hid_submit_out(struct hid_device *hid)
 	report = usbhid->out[usbhid->outtail].report;
 	raw_report = usbhid->out[usbhid->outtail].raw_report;
 
-	usbhid->urbout->transfer_buffer_length = ((report->size - 1) >> 3) +
-						 1 + (report->id > 0);
+	usbhid->urbout->transfer_buffer_length = hid_report_len(report);
 	usbhid->urbout->dev = hid_to_usb_dev(hid);
 	if (raw_report) {
 		memcpy(usbhid->outbuf, raw_report,
diff --git a/drivers/hid/wacom_sys.c b/drivers/hid/wacom_sys.c
index 8593047bb726..8e02a4a6fde0 100644
--- a/drivers/hid/wacom_sys.c
+++ b/drivers/hid/wacom_sys.c
@@ -1321,12 +1321,6 @@ static void wacom_calculate_res(struct wacom_features *features)
 						    features->unitExpo);
 }
 
-static int wacom_hid_report_len(struct hid_report *report)
-{
-	/* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */
-	return ((report->size - 1) >> 3) + 1 + (report->id > 0);
-}
-
 static size_t wacom_compute_pktlen(struct hid_device *hdev)
 {
 	struct hid_report_enum *report_enum;
@@ -1336,7 +1330,7 @@ static size_t wacom_compute_pktlen(struct hid_device *hdev)
 	report_enum = hdev->report_enum + HID_INPUT_REPORT;
 
 	list_for_each_entry(report, &report_enum->report_list, list) {
-		size_t report_size = wacom_hid_report_len(report);
+		size_t report_size = hid_report_len(report);
 		if (report_size > size)
 			size = report_size;
 	}
diff --git a/include/linux/hid.h b/include/linux/hid.h
index 78ea9bf941cd..2366fda010c8 100644
--- a/include/linux/hid.h
+++ b/include/linux/hid.h
@@ -1063,6 +1063,17 @@ static inline void hid_hw_wait(struct hid_device *hdev)
 		hdev->ll_driver->wait(hdev);
 }
 
+/**
+ * hid_report_len - calculate the report length
+ *
+ * @report: the report we want to know the length
+ */
+static inline int hid_report_len(struct hid_report *report)
+{
+	/* equivalent to DIV_ROUND_UP(report->size, 8) + !!(report->id > 0) */
+	return ((report->size - 1) >> 3) + 1 + (report->id > 0);
+}
+
 int hid_report_raw_event(struct hid_device *hid, int type, u8 *data, int size,
 		int interrupt);
 
-- 
cgit v1.2.3


From 40af86a40cdcabd48ab9636fe13d6763a7d74bc9 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Thu, 13 Nov 2014 00:30:23 +0100
Subject: NFC: st21nfca: Remove gpio_irq field in static and dts configuration

- phy->gpio_irq is never done out of the request resources.
- irq_of_parse_and_map is already done in the i2c core so client->irq is
already set when entering in st21nfca_hci_i2c_of_request_resources
- In case of static platform configuration client->irq can be set directly
- It simplifies the code a bit.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfca/i2c.c             | 30 +-----------------------------
 include/linux/platform_data/st21nfca.h |  1 -
 2 files changed, 1 insertion(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nfc/st21nfca/i2c.c b/drivers/nfc/st21nfca/i2c.c
index 22bc8c2311c2..44e1c769e335 100644
--- a/drivers/nfc/st21nfca/i2c.c
+++ b/drivers/nfc/st21nfca/i2c.c
@@ -72,7 +72,6 @@ struct st21nfca_i2c_phy {
 	struct nfc_hci_dev *hdev;
 
 	unsigned int gpio_ena;
-	unsigned int gpio_irq;
 	unsigned int irq_polarity;
 
 	struct sk_buff *pending_skb;
@@ -536,15 +535,7 @@ static int st21nfca_hci_i2c_of_request_resources(struct i2c_client *client)
 
 	phy->gpio_ena = gpio;
 
-	/* IRQ */
-	r = irq_of_parse_and_map(pp, 0);
-	if (r < 0) {
-		nfc_err(&client->dev, "Unable to get irq, error: %d\n", r);
-		return r;
-	}
-
-	phy->irq_polarity = irq_get_trigger_type(r);
-	client->irq = r;
+	phy->irq_polarity = irq_get_trigger_type(client->irq);
 
 	return 0;
 }
@@ -560,7 +551,6 @@ static int st21nfca_hci_i2c_request_resources(struct i2c_client *client)
 	struct st21nfca_nfc_platform_data *pdata;
 	struct st21nfca_i2c_phy *phy = i2c_get_clientdata(client);
 	int r;
-	int irq;
 
 	pdata = client->dev.platform_data;
 	if (pdata == NULL) {
@@ -569,17 +559,9 @@ static int st21nfca_hci_i2c_request_resources(struct i2c_client *client)
 	}
 
 	/* store for later use */
-	phy->gpio_irq = pdata->gpio_irq;
 	phy->gpio_ena = pdata->gpio_ena;
 	phy->irq_polarity = pdata->irq_polarity;
 
-	r = devm_gpio_request_one(&client->dev, phy->gpio_irq, GPIOF_IN,
-				  "wake_up");
-	if (r) {
-		pr_err("%s : gpio_request failed\n", __FILE__);
-		return -ENODEV;
-	}
-
 	if (phy->gpio_ena > 0) {
 		r = devm_gpio_request_one(&client->dev, phy->gpio_ena,
 					  GPIOF_OUT_INIT_HIGH, "clf_enable");
@@ -589,16 +571,6 @@ static int st21nfca_hci_i2c_request_resources(struct i2c_client *client)
 		}
 	}
 
-	/* IRQ */
-	irq = gpio_to_irq(phy->gpio_irq);
-	if (irq < 0) {
-		nfc_err(&client->dev,
-				"Unable to get irq number for GPIO %d error %d\n",
-				phy->gpio_irq, r);
-		return -ENODEV;
-	}
-	client->irq = irq;
-
 	return 0;
 }
 
diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h
index 1730312398ff..5087fff96d86 100644
--- a/include/linux/platform_data/st21nfca.h
+++ b/include/linux/platform_data/st21nfca.h
@@ -24,7 +24,6 @@
 #define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci"
 
 struct st21nfca_nfc_platform_data {
-	unsigned int gpio_irq;
 	unsigned int gpio_ena;
 	unsigned int irq_polarity;
 };
-- 
cgit v1.2.3


From a80d0cb6f6addc5a1f3852466fe8d37ca4fe1350 Mon Sep 17 00:00:00 2001
From: Christophe Ricard <christophe.ricard@gmail.com>
Date: Thu, 13 Nov 2014 00:30:26 +0100
Subject: NFC: st21nfcb: Remove gpio_irq field in static and dts configuration

- phy->gpio_irq is never done out of the request resources.
- irq_of_parse_and_map is already done in the i2c core so client->irq is
already set when entering in st21nfcb_hci_i2c_of_request_resources
- In case of static platform configuration client->irq can be set directly.
- It simplifies the code a bit.

Signed-off-by: Christophe Ricard <christophe-h.ricard@st.com>
Signed-off-by: Samuel Ortiz <sameo@linux.intel.com>
---
 drivers/nfc/st21nfcb/i2c.c             | 30 +-----------------------------
 include/linux/platform_data/st21nfcb.h |  1 -
 2 files changed, 1 insertion(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/nfc/st21nfcb/i2c.c b/drivers/nfc/st21nfcb/i2c.c
index 73f7ddef57c4..e0c4901936f0 100644
--- a/drivers/nfc/st21nfcb/i2c.c
+++ b/drivers/nfc/st21nfcb/i2c.c
@@ -50,7 +50,6 @@ struct st21nfcb_i2c_phy {
 	struct i2c_client *i2c_dev;
 	struct llt_ndlc *ndlc;
 
-	unsigned int gpio_irq;
 	unsigned int gpio_reset;
 	unsigned int irq_polarity;
 
@@ -262,15 +261,7 @@ static int st21nfcb_nci_i2c_of_request_resources(struct i2c_client *client)
 	}
 	phy->gpio_reset = gpio;
 
-	/* IRQ */
-	r = irq_of_parse_and_map(pp, 0);
-	if (r < 0) {
-		nfc_err(&client->dev, "Unable to get irq, error: %d\n", r);
-		return r;
-	}
-
-	phy->irq_polarity = irq_get_trigger_type(r);
-	client->irq = r;
+	phy->irq_polarity = irq_get_trigger_type(client->irq);
 
 	return 0;
 }
@@ -286,7 +277,6 @@ static int st21nfcb_nci_i2c_request_resources(struct i2c_client *client)
 	struct st21nfcb_nfc_platform_data *pdata;
 	struct st21nfcb_i2c_phy *phy = i2c_get_clientdata(client);
 	int r;
-	int irq;
 
 	pdata = client->dev.platform_data;
 	if (pdata == NULL) {
@@ -295,17 +285,9 @@ static int st21nfcb_nci_i2c_request_resources(struct i2c_client *client)
 	}
 
 	/* store for later use */
-	phy->gpio_irq = pdata->gpio_irq;
 	phy->gpio_reset = pdata->gpio_reset;
 	phy->irq_polarity = pdata->irq_polarity;
 
-	r = devm_gpio_request_one(&client->dev, phy->gpio_irq,
-				GPIOF_IN, "clf_irq");
-	if (r) {
-		pr_err("%s : gpio_request failed\n", __FILE__);
-		return -ENODEV;
-	}
-
 	r = devm_gpio_request_one(&client->dev,
 			phy->gpio_reset, GPIOF_OUT_INIT_HIGH, "clf_reset");
 	if (r) {
@@ -313,16 +295,6 @@ static int st21nfcb_nci_i2c_request_resources(struct i2c_client *client)
 		return -ENODEV;
 	}
 
-	/* IRQ */
-	irq = gpio_to_irq(phy->gpio_irq);
-	if (irq < 0) {
-		nfc_err(&client->dev,
-			"Unable to get irq number for GPIO %d error %d\n",
-			phy->gpio_irq, r);
-		return -ENODEV;
-	}
-	client->irq = irq;
-
 	return 0;
 }
 
diff --git a/include/linux/platform_data/st21nfcb.h b/include/linux/platform_data/st21nfcb.h
index 2d11f1f5efab..c3b432f5b63e 100644
--- a/include/linux/platform_data/st21nfcb.h
+++ b/include/linux/platform_data/st21nfcb.h
@@ -24,7 +24,6 @@
 #define ST21NFCB_NCI_DRIVER_NAME "st21nfcb_nci"
 
 struct st21nfcb_nfc_platform_data {
-	unsigned int gpio_irq;
 	unsigned int gpio_reset;
 	unsigned int irq_polarity;
 };
-- 
cgit v1.2.3


From 3f4994cfc15f38a3159c6e3a4b3ab2e1481a6b02 Mon Sep 17 00:00:00 2001
From: Michael Ellerman <mpe@ellerman.id.au>
Date: Tue, 2 Dec 2014 16:52:06 +1100
Subject: kcmp: Move kcmp.h into uapi

kcmp.h appears to be part of the API, it's documented in kcmp(2), and
the selftests/kcmp code uses it. So move it to uapi so it's actually
exported.

Signed-off-by: Michael Ellerman <mpe@ellerman.id.au>
Acked-by: Cyrill Gorcunov <gorcunov@openvz.org>
Signed-off-by: Shuah Khan <shuahkh@osg.samsung.com>
---
 include/linux/kcmp.h      | 17 -----------------
 include/uapi/linux/Kbuild |  1 +
 include/uapi/linux/kcmp.h | 17 +++++++++++++++++
 3 files changed, 18 insertions(+), 17 deletions(-)
 delete mode 100644 include/linux/kcmp.h
 create mode 100644 include/uapi/linux/kcmp.h

(limited to 'include/linux')

diff --git a/include/linux/kcmp.h b/include/linux/kcmp.h
deleted file mode 100644
index 2dcd1b3aafc8..000000000000
--- a/include/linux/kcmp.h
+++ /dev/null
@@ -1,17 +0,0 @@
-#ifndef _LINUX_KCMP_H
-#define _LINUX_KCMP_H
-
-/* Comparison type */
-enum kcmp_type {
-	KCMP_FILE,
-	KCMP_VM,
-	KCMP_FILES,
-	KCMP_FS,
-	KCMP_SIGHAND,
-	KCMP_IO,
-	KCMP_SYSVSEM,
-
-	KCMP_TYPES,
-};
-
-#endif /* _LINUX_KCMP_H */
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index b70237e8bc37..1cf50d682dbf 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -209,6 +209,7 @@ header-y += ivtvfb.h
 header-y += ixjuser.h
 header-y += jffs2.h
 header-y += joystick.h
+header-y += kcmp.h
 header-y += kd.h
 header-y += kdev_t.h
 header-y += kernel-page-flags.h
diff --git a/include/uapi/linux/kcmp.h b/include/uapi/linux/kcmp.h
new file mode 100644
index 000000000000..84df14b37360
--- /dev/null
+++ b/include/uapi/linux/kcmp.h
@@ -0,0 +1,17 @@
+#ifndef _UAPI_LINUX_KCMP_H
+#define _UAPI_LINUX_KCMP_H
+
+/* Comparison type */
+enum kcmp_type {
+	KCMP_FILE,
+	KCMP_VM,
+	KCMP_FILES,
+	KCMP_FS,
+	KCMP_SIGHAND,
+	KCMP_IO,
+	KCMP_SYSVSEM,
+
+	KCMP_TYPES,
+};
+
+#endif /* _UAPI_LINUX_KCMP_H */
-- 
cgit v1.2.3


From f6f6424ba773da6221ecaaa70973eb4dacfa03b2 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 28 Nov 2014 14:34:15 +0100
Subject: net: make vid as a parameter for ndo_fdb_add/ndo_fdb_del

Do the work of parsing NDA_VLAN directly in rtnetlink code, pass simple
u16 vid to drivers from there.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c      |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  4 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |  9 +++--
 drivers/net/macvlan.c                            |  4 +-
 drivers/net/vxlan.c                              |  4 +-
 include/linux/netdevice.h                        |  8 ++--
 include/linux/rtnetlink.h                        |  6 ++-
 net/bridge/br_fdb.c                              | 39 ++----------------
 net/bridge/br_private.h                          |  4 +-
 net/core/rtnetlink.c                             | 50 ++++++++++++++++++++----
 10 files changed, 70 insertions(+), 60 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 7262077ad547..5ed5e4036dd9 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7536,7 +7536,7 @@ static int i40e_get_phys_port_id(struct net_device *netdev,
  */
 static int i40e_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			    struct net_device *dev,
-			    const unsigned char *addr,
+			    const unsigned char *addr, u16 vid,
 			    u16 flags)
 {
 	struct i40e_netdev_priv *np = netdev_priv(dev);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 52776f9e1f71..74aadeb56ada 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7708,7 +7708,7 @@ static int ixgbe_set_features(struct net_device *netdev,
 
 static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			     struct net_device *dev,
-			     const unsigned char *addr,
+			     const unsigned char *addr, u16 vid,
 			     u16 flags)
 {
 	/* guarantee we can provide a unique filter for the unicast address */
@@ -7717,7 +7717,7 @@ static int ixgbe_ndo_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			return -ENOMEM;
 	}
 
-	return ndo_dflt_fdb_add(ndm, tb, dev, addr, flags);
+	return ndo_dflt_fdb_add(ndm, tb, dev, addr, vid, flags);
 }
 
 static int ixgbe_ndo_bridge_setlink(struct net_device *dev,
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index a913b3ad2f89..3227c8063edd 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -376,13 +376,14 @@ static int qlcnic_set_mac(struct net_device *netdev, void *p)
 }
 
 static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
-			struct net_device *netdev, const unsigned char *addr)
+			struct net_device *netdev,
+			const unsigned char *addr, u16 vid)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int err = -EOPNOTSUPP;
 
 	if (!adapter->fdb_mac_learn)
-		return ndo_dflt_fdb_del(ndm, tb, netdev, addr);
+		return ndo_dflt_fdb_del(ndm, tb, netdev, addr, vid);
 
 	if ((adapter->flags & QLCNIC_ESWITCH_ENABLED) ||
 	    qlcnic_sriov_check(adapter)) {
@@ -401,13 +402,13 @@ static int qlcnic_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 
 static int qlcnic_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			struct net_device *netdev,
-			const unsigned char *addr, u16 flags)
+			const unsigned char *addr, u16 vid, u16 flags)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	int err = 0;
 
 	if (!adapter->fdb_mac_learn)
-		return ndo_dflt_fdb_add(ndm, tb, netdev, addr, flags);
+		return ndo_dflt_fdb_add(ndm, tb, netdev, addr, vid, flags);
 
 	if (!(adapter->flags & QLCNIC_ESWITCH_ENABLED) &&
 	    !qlcnic_sriov_check(adapter)) {
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 11d4b3506d6e..9538674587aa 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -873,7 +873,7 @@ static int macvlan_vlan_rx_kill_vid(struct net_device *dev,
 
 static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
-			   const unsigned char *addr,
+			   const unsigned char *addr, u16 vid,
 			   u16 flags)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
@@ -898,7 +898,7 @@ static int macvlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 
 static int macvlan_fdb_del(struct ndmsg *ndm, struct nlattr *tb[],
 			   struct net_device *dev,
-			   const unsigned char *addr)
+			   const unsigned char *addr, u16 vid)
 {
 	struct macvlan_dev *vlan = netdev_priv(dev);
 	int err = -EINVAL;
diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index d802a1ae4560..31ecb03368c6 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -849,7 +849,7 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 /* Add static entry (via netlink) */
 static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 			 struct net_device *dev,
-			 const unsigned char *addr, u16 flags)
+			 const unsigned char *addr, u16 vid, u16 flags)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	/* struct net *net = dev_net(vxlan->dev); */
@@ -885,7 +885,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 /* Delete entry (via netlink) */
 static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 			    struct net_device *dev,
-			    const unsigned char *addr)
+			    const unsigned char *addr, u16 vid)
 {
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	struct vxlan_fdb *f;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 2cb772495f7a..589929cf4700 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -951,11 +951,11 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *
  * int (*ndo_fdb_add)(struct ndmsg *ndm, struct nlattr *tb[],
  *		      struct net_device *dev,
- *		      const unsigned char *addr, u16 flags)
+ *		      const unsigned char *addr, u16 vid, u16 flags)
  *	Adds an FDB entry to dev for addr.
  * int (*ndo_fdb_del)(struct ndmsg *ndm, struct nlattr *tb[],
  *		      struct net_device *dev,
- *		      const unsigned char *addr)
+ *		      const unsigned char *addr, u16 vid)
  *	Deletes the FDB entry from dev coresponding to addr.
  * int (*ndo_fdb_dump)(struct sk_buff *skb, struct netlink_callback *cb,
  *		       struct net_device *dev, struct net_device *filter_dev,
@@ -1128,11 +1128,13 @@ struct net_device_ops {
 					       struct nlattr *tb[],
 					       struct net_device *dev,
 					       const unsigned char *addr,
+					       u16 vid,
 					       u16 flags);
 	int			(*ndo_fdb_del)(struct ndmsg *ndm,
 					       struct nlattr *tb[],
 					       struct net_device *dev,
-					       const unsigned char *addr);
+					       const unsigned char *addr,
+					       u16 vid);
 	int			(*ndo_fdb_dump)(struct sk_buff *skb,
 						struct netlink_callback *cb,
 						struct net_device *dev,
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 6cacbce1a06c..063f0f581fe0 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -94,11 +94,13 @@ extern int ndo_dflt_fdb_add(struct ndmsg *ndm,
 			    struct nlattr *tb[],
 			    struct net_device *dev,
 			    const unsigned char *addr,
-			     u16 flags);
+			    u16 vid,
+			    u16 flags);
 extern int ndo_dflt_fdb_del(struct ndmsg *ndm,
 			    struct nlattr *tb[],
 			    struct net_device *dev,
-			    const unsigned char *addr);
+			    const unsigned char *addr,
+			    u16 vid);
 
 extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 				   struct net_device *dev, u16 mode);
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index 08ef4e7a2439..b1be971eb06c 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -805,33 +805,17 @@ static int __br_fdb_add(struct ndmsg *ndm, struct net_bridge_port *p,
 /* Add new permanent fdb entry with RTM_NEWNEIGH */
 int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	       struct net_device *dev,
-	       const unsigned char *addr, u16 nlh_flags)
+	       const unsigned char *addr, u16 vid, u16 nlh_flags)
 {
 	struct net_bridge_port *p;
 	int err = 0;
 	struct net_port_vlans *pv;
-	unsigned short vid = VLAN_N_VID;
 
 	if (!(ndm->ndm_state & (NUD_PERMANENT|NUD_NOARP|NUD_REACHABLE))) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid state %#x\n", ndm->ndm_state);
 		return -EINVAL;
 	}
 
-	if (tb[NDA_VLAN]) {
-		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
-			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
-			return -EINVAL;
-		}
-
-		vid = nla_get_u16(tb[NDA_VLAN]);
-
-		if (!vid || vid >= VLAN_VID_MASK) {
-			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
-				vid);
-			return -EINVAL;
-		}
-	}
-
 	if (is_zero_ether_addr(addr)) {
 		pr_info("bridge: RTM_NEWNEIGH with invalid ether address\n");
 		return -EINVAL;
@@ -845,7 +829,7 @@ int br_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	}
 
 	pv = nbp_get_vlan_info(p);
-	if (vid != VLAN_N_VID) {
+	if (vid) {
 		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
 			pr_info("bridge: RTM_NEWNEIGH with unconfigured "
 				"vlan %d on port %s\n", vid, dev->name);
@@ -903,27 +887,12 @@ static int __br_fdb_delete(struct net_bridge_port *p,
 /* Remove neighbor entry with RTM_DELNEIGH */
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 		  struct net_device *dev,
-		  const unsigned char *addr)
+		  const unsigned char *addr, u16 vid)
 {
 	struct net_bridge_port *p;
 	int err;
 	struct net_port_vlans *pv;
-	unsigned short vid = VLAN_N_VID;
-
-	if (tb[NDA_VLAN]) {
-		if (nla_len(tb[NDA_VLAN]) != sizeof(unsigned short)) {
-			pr_info("bridge: RTM_NEWNEIGH with invalid vlan\n");
-			return -EINVAL;
-		}
 
-		vid = nla_get_u16(tb[NDA_VLAN]);
-
-		if (!vid || vid >= VLAN_VID_MASK) {
-			pr_info("bridge: RTM_NEWNEIGH with invalid vlan id %d\n",
-				vid);
-			return -EINVAL;
-		}
-	}
 	p = br_port_get_rtnl(dev);
 	if (p == NULL) {
 		pr_info("bridge: RTM_DELNEIGH %s not a bridge port\n",
@@ -932,7 +901,7 @@ int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 	}
 
 	pv = nbp_get_vlan_info(p);
-	if (vid != VLAN_N_VID) {
+	if (vid) {
 		if (!pv || !test_bit(vid, pv->vlan_bitmap)) {
 			pr_info("bridge: RTM_DELNEIGH with unconfigured "
 				"vlan %d on port %s\n", vid, dev->name);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 5b370773ad9c..1b529da8234d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -404,9 +404,9 @@ void br_fdb_update(struct net_bridge *br, struct net_bridge_port *source,
 		   const unsigned char *addr, u16 vid, bool added_by_user);
 
 int br_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
-		  struct net_device *dev, const unsigned char *addr);
+		  struct net_device *dev, const unsigned char *addr, u16 vid);
 int br_fdb_add(struct ndmsg *nlh, struct nlattr *tb[], struct net_device *dev,
-	       const unsigned char *addr, u16 nlh_flags);
+	       const unsigned char *addr, u16 vid, u16 nlh_flags);
 int br_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		struct net_device *dev, struct net_device *fdev, int idx);
 int br_fdb_sync_static(struct net_bridge *br, struct net_bridge_port *p);
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index b9b7dfaf202b..1a233c1c8ab4 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -36,6 +36,7 @@
 #include <linux/mutex.h>
 #include <linux/if_addr.h>
 #include <linux/if_bridge.h>
+#include <linux/if_vlan.h>
 #include <linux/pci.h>
 #include <linux/etherdevice.h>
 
@@ -2312,7 +2313,7 @@ errout:
 int ndo_dflt_fdb_add(struct ndmsg *ndm,
 		     struct nlattr *tb[],
 		     struct net_device *dev,
-		     const unsigned char *addr,
+		     const unsigned char *addr, u16 vid,
 		     u16 flags)
 {
 	int err = -EINVAL;
@@ -2338,6 +2339,28 @@ int ndo_dflt_fdb_add(struct ndmsg *ndm,
 }
 EXPORT_SYMBOL(ndo_dflt_fdb_add);
 
+static int fdb_vid_parse(struct nlattr *vlan_attr, u16 *p_vid)
+{
+	u16 vid = 0;
+
+	if (vlan_attr) {
+		if (nla_len(vlan_attr) != sizeof(u16)) {
+			pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan\n");
+			return -EINVAL;
+		}
+
+		vid = nla_get_u16(vlan_attr);
+
+		if (!vid || vid >= VLAN_VID_MASK) {
+			pr_info("PF_BRIDGE: RTM_NEWNEIGH with invalid vlan id %d\n",
+				vid);
+			return -EINVAL;
+		}
+	}
+	*p_vid = vid;
+	return 0;
+}
+
 static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 {
 	struct net *net = sock_net(skb->sk);
@@ -2345,6 +2368,7 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct nlattr *tb[NDA_MAX+1];
 	struct net_device *dev;
 	u8 *addr;
+	u16 vid;
 	int err;
 
 	err = nlmsg_parse(nlh, sizeof(*ndm), tb, NDA_MAX, NULL);
@@ -2370,6 +2394,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	addr = nla_data(tb[NDA_LLADDR]);
 
+	err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+	if (err)
+		return err;
+
 	err = -EOPNOTSUPP;
 
 	/* Support fdb on master device the net/bridge default case */
@@ -2378,7 +2406,8 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 		struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 		const struct net_device_ops *ops = br_dev->netdev_ops;
 
-		err = ops->ndo_fdb_add(ndm, tb, dev, addr, nlh->nlmsg_flags);
+		err = ops->ndo_fdb_add(ndm, tb, dev, addr, vid,
+				       nlh->nlmsg_flags);
 		if (err)
 			goto out;
 		else
@@ -2389,9 +2418,10 @@ static int rtnl_fdb_add(struct sk_buff *skb, struct nlmsghdr *nlh)
 	if ((ndm->ndm_flags & NTF_SELF)) {
 		if (dev->netdev_ops->ndo_fdb_add)
 			err = dev->netdev_ops->ndo_fdb_add(ndm, tb, dev, addr,
+							   vid,
 							   nlh->nlmsg_flags);
 		else
-			err = ndo_dflt_fdb_add(ndm, tb, dev, addr,
+			err = ndo_dflt_fdb_add(ndm, tb, dev, addr, vid,
 					       nlh->nlmsg_flags);
 
 		if (!err) {
@@ -2409,7 +2439,7 @@ out:
 int ndo_dflt_fdb_del(struct ndmsg *ndm,
 		     struct nlattr *tb[],
 		     struct net_device *dev,
-		     const unsigned char *addr)
+		     const unsigned char *addr, u16 vid)
 {
 	int err = -EINVAL;
 
@@ -2438,6 +2468,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 	struct net_device *dev;
 	int err = -EINVAL;
 	__u8 *addr;
+	u16 vid;
 
 	if (!netlink_capable(skb, CAP_NET_ADMIN))
 		return -EPERM;
@@ -2465,6 +2496,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 
 	addr = nla_data(tb[NDA_LLADDR]);
 
+	err = fdb_vid_parse(tb[NDA_VLAN], &vid);
+	if (err)
+		return err;
+
 	err = -EOPNOTSUPP;
 
 	/* Support fdb on master device the net/bridge default case */
@@ -2474,7 +2509,7 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 		const struct net_device_ops *ops = br_dev->netdev_ops;
 
 		if (ops->ndo_fdb_del)
-			err = ops->ndo_fdb_del(ndm, tb, dev, addr);
+			err = ops->ndo_fdb_del(ndm, tb, dev, addr, vid);
 
 		if (err)
 			goto out;
@@ -2485,9 +2520,10 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh)
 	/* Embedded bridge, macvlan, and any other device support */
 	if (ndm->ndm_flags & NTF_SELF) {
 		if (dev->netdev_ops->ndo_fdb_del)
-			err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr);
+			err = dev->netdev_ops->ndo_fdb_del(ndm, tb, dev, addr,
+							   vid);
 		else
-			err = ndo_dflt_fdb_del(ndm, tb, dev, addr);
+			err = ndo_dflt_fdb_del(ndm, tb, dev, addr, vid);
 
 		if (!err) {
 			rtnl_fdb_notify(dev, addr, RTM_DELNEIGH);
-- 
cgit v1.2.3


From 02637fce3e0103ba086b9c33b6d529e69460e4b6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 28 Nov 2014 14:34:16 +0100
Subject: net: rename netdev_phys_port_id to more generic name

So this can be reused for identification of other "items" as well.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Thomas Graf <tgraf@suug.ch>
Acked-by: John Fastabend <john.r.fastabend@intel.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_main.c      |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   |  2 +-
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |  2 +-
 include/linux/netdevice.h                        | 16 ++++++++--------
 net/core/dev.c                                   |  2 +-
 net/core/net-sysfs.c                             |  2 +-
 net/core/rtnetlink.c                             |  6 +++---
 8 files changed, 17 insertions(+), 17 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index c4bd025c74c9..336ef3cf5773 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12537,7 +12537,7 @@ static int bnx2x_validate_addr(struct net_device *dev)
 }
 
 static int bnx2x_get_phys_port_id(struct net_device *netdev,
-				  struct netdev_phys_port_id *ppid)
+				  struct netdev_phys_item_id *ppid)
 {
 	struct bnx2x *bp = netdev_priv(netdev);
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 5ed5e4036dd9..9ae4270db0b3 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -7511,7 +7511,7 @@ static void i40e_del_vxlan_port(struct net_device *netdev,
 
 #endif
 static int i40e_get_phys_port_id(struct net_device *netdev,
-				 struct netdev_phys_port_id *ppid)
+				 struct netdev_phys_item_id *ppid)
 {
 	struct i40e_netdev_priv *np = netdev_priv(netdev);
 	struct i40e_pf *pf = np->vsi->back;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index b7c99780aef3..1597fb07576c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2259,7 +2259,7 @@ static int mlx4_en_set_vf_link_state(struct net_device *dev, int vf, int link_st
 
 #define PORT_ID_BYTE_LEN 8
 static int mlx4_en_get_phys_port_id(struct net_device *dev,
-				    struct netdev_phys_port_id *ppid)
+				    struct netdev_phys_item_id *ppid)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_dev *mdev = priv->mdev->dev;
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 3227c8063edd..1aa25b13ace1 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -461,7 +461,7 @@ static void qlcnic_82xx_cancel_idc_work(struct qlcnic_adapter *adapter)
 }
 
 static int qlcnic_get_phys_port_id(struct net_device *netdev,
-				   struct netdev_phys_port_id *ppid)
+				   struct netdev_phys_item_id *ppid)
 {
 	struct qlcnic_adapter *adapter = netdev_priv(netdev);
 	struct qlcnic_hardware_context *ahw = adapter->ahw;
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 589929cf4700..4bd41d72559d 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -754,13 +754,13 @@ struct netdev_fcoe_hbainfo {
 };
 #endif
 
-#define MAX_PHYS_PORT_ID_LEN 32
+#define MAX_PHYS_ITEM_ID_LEN 32
 
-/* This structure holds a unique identifier to identify the
- * physical port used by a netdevice.
+/* This structure holds a unique identifier to identify some
+ * physical item (port for example) used by a netdevice.
  */
-struct netdev_phys_port_id {
-	unsigned char id[MAX_PHYS_PORT_ID_LEN];
+struct netdev_phys_item_id {
+	unsigned char id[MAX_PHYS_ITEM_ID_LEN];
 	unsigned char id_len;
 };
 
@@ -976,7 +976,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	USB_CDC_NOTIFY_NETWORK_CONNECTION) should NOT implement this function.
  *
  * int (*ndo_get_phys_port_id)(struct net_device *dev,
- *			       struct netdev_phys_port_id *ppid);
+ *			       struct netdev_phys_item_id *ppid);
  *	Called to get ID of physical port of this device. If driver does
  *	not implement this, it is assumed that the hw is not able to have
  *	multiple net devices on single physical port.
@@ -1152,7 +1152,7 @@ struct net_device_ops {
 	int			(*ndo_change_carrier)(struct net_device *dev,
 						      bool new_carrier);
 	int			(*ndo_get_phys_port_id)(struct net_device *dev,
-							struct netdev_phys_port_id *ppid);
+							struct netdev_phys_item_id *ppid);
 	void			(*ndo_add_vxlan_port)(struct  net_device *dev,
 						      sa_family_t sa_family,
 						      __be16 port);
@@ -2870,7 +2870,7 @@ void dev_set_group(struct net_device *, int);
 int dev_set_mac_address(struct net_device *, struct sockaddr *);
 int dev_change_carrier(struct net_device *, bool new_carrier);
 int dev_get_phys_port_id(struct net_device *dev,
-			 struct netdev_phys_port_id *ppid);
+			 struct netdev_phys_item_id *ppid);
 struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev);
 struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev,
 				    struct netdev_queue *txq, int *ret);
diff --git a/net/core/dev.c b/net/core/dev.c
index ac4836241a96..0814a560e5f3 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5846,7 +5846,7 @@ EXPORT_SYMBOL(dev_change_carrier);
  *	Get device physical port ID
  */
 int dev_get_phys_port_id(struct net_device *dev,
-			 struct netdev_phys_port_id *ppid)
+			 struct netdev_phys_item_id *ppid)
 {
 	const struct net_device_ops *ops = dev->netdev_ops;
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 1a24602cd54e..26c46f4726c5 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -404,7 +404,7 @@ static ssize_t phys_port_id_show(struct device *dev,
 		return restart_syscall();
 
 	if (dev_isalive(netdev)) {
-		struct netdev_phys_port_id ppid;
+		struct netdev_phys_item_id ppid;
 
 		ret = dev_get_phys_port_id(netdev, &ppid);
 		if (!ret)
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 1a233c1c8ab4..0640f1fbe9dd 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -869,7 +869,7 @@ static noinline size_t if_nlmsg_size(const struct net_device *dev,
 	       + rtnl_port_size(dev, ext_filter_mask) /* IFLA_VF_PORTS + IFLA_PORT_SELF */
 	       + rtnl_link_get_size(dev) /* IFLA_LINKINFO */
 	       + rtnl_link_get_af_size(dev) /* IFLA_AF_SPEC */
-	       + nla_total_size(MAX_PHYS_PORT_ID_LEN); /* IFLA_PHYS_PORT_ID */
+	       + nla_total_size(MAX_PHYS_ITEM_ID_LEN); /* IFLA_PHYS_PORT_ID */
 }
 
 static int rtnl_vf_ports_fill(struct sk_buff *skb, struct net_device *dev)
@@ -953,7 +953,7 @@ static int rtnl_port_fill(struct sk_buff *skb, struct net_device *dev,
 static int rtnl_phys_port_id_fill(struct sk_buff *skb, struct net_device *dev)
 {
 	int err;
-	struct netdev_phys_port_id ppid;
+	struct netdev_phys_item_id ppid;
 
 	err = dev_get_phys_port_id(dev, &ppid);
 	if (err) {
@@ -1197,7 +1197,7 @@ static const struct nla_policy ifla_policy[IFLA_MAX+1] = {
 	[IFLA_PROMISCUITY]	= { .type = NLA_U32 },
 	[IFLA_NUM_TX_QUEUES]	= { .type = NLA_U32 },
 	[IFLA_NUM_RX_QUEUES]	= { .type = NLA_U32 },
-	[IFLA_PHYS_PORT_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_PORT_ID_LEN },
+	[IFLA_PHYS_PORT_ID]	= { .type = NLA_BINARY, .len = MAX_PHYS_ITEM_ID_LEN },
 	[IFLA_CARRIER_CHANGES]	= { .type = NLA_U32 },  /* ignored */
 };
 
-- 
cgit v1.2.3


From 007f790c8276271de26416f90d55561bcc96588a Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@resnulli.us>
Date: Fri, 28 Nov 2014 14:34:17 +0100
Subject: net: introduce generic switch devices support

The goal of this is to provide a possibility to support various switch
chips. Drivers should implement relevant ndos to do so. Now there is
only one ndo defined:
- for getting physical switch id is in place.

Note that user can use random port netdevice to access the switch.

Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Reviewed-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/switchdev.txt | 59 ++++++++++++++++++++++++++++++++++
 MAINTAINERS                            |  7 ++++
 include/linux/netdevice.h              | 10 ++++++
 include/net/switchdev.h                | 30 +++++++++++++++++
 net/Kconfig                            |  1 +
 net/Makefile                           |  3 ++
 net/switchdev/Kconfig                  | 13 ++++++++
 net/switchdev/Makefile                 |  5 +++
 net/switchdev/switchdev.c              | 33 +++++++++++++++++++
 9 files changed, 161 insertions(+)
 create mode 100644 Documentation/networking/switchdev.txt
 create mode 100644 include/net/switchdev.h
 create mode 100644 net/switchdev/Kconfig
 create mode 100644 net/switchdev/Makefile
 create mode 100644 net/switchdev/switchdev.c

(limited to 'include/linux')

diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
new file mode 100644
index 000000000000..f981a9295a39
--- /dev/null
+++ b/Documentation/networking/switchdev.txt
@@ -0,0 +1,59 @@
+Switch (and switch-ish) device drivers HOWTO
+===========================
+
+Please note that the word "switch" is here used in very generic meaning.
+This include devices supporting L2/L3 but also various flow offloading chips,
+including switches embedded into SR-IOV NICs.
+
+Lets describe a topology a bit. Imagine the following example:
+
+       +----------------------------+    +---------------+
+       |     SOME switch chip       |    |      CPU      |
+       +----------------------------+    +---------------+
+       port1 port2 port3 port4 MNGMNT    |     PCI-E     |
+         |     |     |     |     |       +---------------+
+        PHY   PHY    |     |     |         |  NIC0 NIC1
+                     |     |     |         |   |    |
+                     |     |     +- PCI-E -+   |    |
+                     |     +------- MII -------+    |
+                     +------------- MII ------------+
+
+In this example, there are two independent lines between the switch silicon
+and CPU. NIC0 and NIC1 drivers are not aware of a switch presence. They are
+separate from the switch driver. SOME switch chip is by managed by a driver
+via PCI-E device MNGMNT. Note that MNGMNT device, NIC0 and NIC1 may be
+connected to some other type of bus.
+
+Now, for the previous example show the representation in kernel:
+
+       +----------------------------+    +---------------+
+       |     SOME switch chip       |    |      CPU      |
+       +----------------------------+    +---------------+
+       sw0p0 sw0p1 sw0p2 sw0p3 MNGMNT    |     PCI-E     |
+         |     |     |     |     |       +---------------+
+        PHY   PHY    |     |     |         |  eth0 eth1
+                     |     |     |         |   |    |
+                     |     |     +- PCI-E -+   |    |
+                     |     +------- MII -------+    |
+                     +------------- MII ------------+
+
+Lets call the example switch driver for SOME switch chip "SOMEswitch". This
+driver takes care of PCI-E device MNGMNT. There is a netdevice instance sw0pX
+created for each port of a switch. These netdevices are instances
+of "SOMEswitch" driver. sw0pX netdevices serve as a "representation"
+of the switch chip. eth0 and eth1 are instances of some other existing driver.
+
+The only difference of the switch-port netdevice from the ordinary netdevice
+is that is implements couple more NDOs:
+
+  ndo_switch_parent_id_get - This returns the same ID for two port netdevices
+			     of the same physical switch chip. This is
+			     mandatory to be implemented by all switch drivers
+			     and serves the caller for recognition of a port
+			     netdevice.
+  ndo_switch_parent_* - Functions that serve for a manipulation of the switch
+			chip itself (it can be though of as a "parent" of the
+			port, therefore the name). They are not port-specific.
+			Caller might use arbitrary port netdevice of the same
+			switch and it will make no difference.
+  ndo_switch_port_* - Functions that serve for a port-specific manipulation.
diff --git a/MAINTAINERS b/MAINTAINERS
index 6b880deae3d2..3aba0ac2d5a3 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9059,6 +9059,13 @@ F:	lib/swiotlb.c
 F:	arch/*/kernel/pci-swiotlb.c
 F:	include/linux/swiotlb.h
 
+SWITCHDEV
+M:	Jiri Pirko <jiri@resnulli.us>
+L:	netdev@vger.kernel.org
+S:	Supported
+F:	net/switchdev/
+F:	include/net/switchdev.h
+
 SYNOPSYS ARC ARCHITECTURE
 M:	Vineet Gupta <vgupta@synopsys.com>
 S:	Supported
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 4bd41d72559d..3603f31e78f3 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1018,6 +1018,12 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	performing GSO on a packet. The device returns true if it is
  *	able to GSO the packet, false otherwise. If the return value is
  *	false the stack will do software GSO.
+ *
+ * int (*ndo_switch_parent_id_get)(struct net_device *dev,
+ *				   struct netdev_phys_item_id *psid);
+ *	Called to get an ID of the switch chip this port is part of.
+ *	If driver implements this, it indicates that it represents a port
+ *	of a switch chip.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1171,6 +1177,10 @@ struct net_device_ops {
 	int			(*ndo_get_lock_subclass)(struct net_device *dev);
 	bool			(*ndo_gso_check) (struct sk_buff *skb,
 						  struct net_device *dev);
+#ifdef CONFIG_NET_SWITCHDEV
+	int			(*ndo_switch_parent_id_get)(struct net_device *dev,
+							    struct netdev_phys_item_id *psid);
+#endif
 };
 
 /**
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
new file mode 100644
index 000000000000..7a52360a1446
--- /dev/null
+++ b/include/net/switchdev.h
@@ -0,0 +1,30 @@
+/*
+ * include/net/switchdev.h - Switch device API
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+#ifndef _LINUX_SWITCHDEV_H_
+#define _LINUX_SWITCHDEV_H_
+
+#include <linux/netdevice.h>
+
+#ifdef CONFIG_NET_SWITCHDEV
+
+int netdev_switch_parent_id_get(struct net_device *dev,
+				struct netdev_phys_item_id *psid);
+
+#else
+
+static inline int netdev_switch_parent_id_get(struct net_device *dev,
+					      struct netdev_phys_item_id *psid)
+{
+	return -EOPNOTSUPP;
+}
+
+#endif
+
+#endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/Kconfig b/net/Kconfig
index 99815b5454bf..ff9ffc17fa0e 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -228,6 +228,7 @@ source "net/vmw_vsock/Kconfig"
 source "net/netlink/Kconfig"
 source "net/mpls/Kconfig"
 source "net/hsr/Kconfig"
+source "net/switchdev/Kconfig"
 
 config RPS
 	boolean
diff --git a/net/Makefile b/net/Makefile
index 7ed1970074b0..95fc694e4ddc 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -73,3 +73,6 @@ obj-$(CONFIG_OPENVSWITCH)	+= openvswitch/
 obj-$(CONFIG_VSOCKETS)	+= vmw_vsock/
 obj-$(CONFIG_NET_MPLS_GSO)	+= mpls/
 obj-$(CONFIG_HSR)		+= hsr/
+ifneq ($(CONFIG_NET_SWITCHDEV),)
+obj-y				+= switchdev/
+endif
diff --git a/net/switchdev/Kconfig b/net/switchdev/Kconfig
new file mode 100644
index 000000000000..155754588fd6
--- /dev/null
+++ b/net/switchdev/Kconfig
@@ -0,0 +1,13 @@
+#
+# Configuration for Switch device support
+#
+
+config NET_SWITCHDEV
+	boolean "Switch (and switch-ish) device support (EXPERIMENTAL)"
+	depends on INET
+	---help---
+	  This module provides glue between core networking code and device
+	  drivers in order to support hardware switch chips in very generic
+	  meaning of the word "switch". This include devices supporting L2/L3 but
+	  also various flow offloading chips, including switches embedded into
+	  SR-IOV NICs.
diff --git a/net/switchdev/Makefile b/net/switchdev/Makefile
new file mode 100644
index 000000000000..5ed63ed324d0
--- /dev/null
+++ b/net/switchdev/Makefile
@@ -0,0 +1,5 @@
+#
+# Makefile for the Switch device API
+#
+
+obj-$(CONFIG_NET_SWITCHDEV) += switchdev.o
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
new file mode 100644
index 000000000000..66973deaae56
--- /dev/null
+++ b/net/switchdev/switchdev.c
@@ -0,0 +1,33 @@
+/*
+ * net/switchdev/switchdev.c - Switch device API
+ * Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/init.h>
+#include <linux/netdevice.h>
+#include <net/switchdev.h>
+
+/**
+ *	netdev_switch_parent_id_get - Get ID of a switch
+ *	@dev: port device
+ *	@psid: switch ID
+ *
+ *	Get ID of a switch this port is part of.
+ */
+int netdev_switch_parent_id_get(struct net_device *dev,
+				struct netdev_phys_item_id *psid)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!ops->ndo_switch_parent_id_get)
+		return -EOPNOTSUPP;
+	return ops->ndo_switch_parent_id_get(dev, psid);
+}
+EXPORT_SYMBOL(netdev_switch_parent_id_get);
-- 
cgit v1.2.3


From 38dcf357aed299186ecb090cc2f5290cc17d637d Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Fri, 28 Nov 2014 14:34:20 +0100
Subject: bridge: call netdev_sw_port_stp_update when bridge port STP status
 changes

To notify switch driver of change in STP state of bridge port, add new
.ndo op and provide switchdev wrapper func to call ndo op. Use it in bridge
code then.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  5 +++++
 include/net/switchdev.h   |  7 +++++++
 net/bridge/br_stp.c       |  7 +++++++
 net/switchdev/switchdev.c | 19 +++++++++++++++++++
 4 files changed, 38 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 3603f31e78f3..29c92ee9ed56 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1024,6 +1024,9 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	Called to get an ID of the switch chip this port is part of.
  *	If driver implements this, it indicates that it represents a port
  *	of a switch chip.
+ * int (*ndo_switch_port_stp_update)(struct net_device *dev, u8 state);
+ *	Called to notify switch device port of bridge port STP
+ *	state change.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1180,6 +1183,8 @@ struct net_device_ops {
 #ifdef CONFIG_NET_SWITCHDEV
 	int			(*ndo_switch_parent_id_get)(struct net_device *dev,
 							    struct netdev_phys_item_id *psid);
+	int			(*ndo_switch_port_stp_update)(struct net_device *dev,
+							      u8 state);
 #endif
 };
 
diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index 7a52360a1446..8a6d1641fd9b 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -16,6 +16,7 @@
 
 int netdev_switch_parent_id_get(struct net_device *dev,
 				struct netdev_phys_item_id *psid);
+int netdev_switch_port_stp_update(struct net_device *dev, u8 state);
 
 #else
 
@@ -25,6 +26,12 @@ static inline int netdev_switch_parent_id_get(struct net_device *dev,
 	return -EOPNOTSUPP;
 }
 
+static inline int netdev_switch_port_stp_update(struct net_device *dev,
+						u8 state)
+{
+	return -EOPNOTSUPP;
+}
+
 #endif
 
 #endif /* _LINUX_SWITCHDEV_H_ */
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 2b047bcf42a4..fb3ebe615513 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -12,6 +12,7 @@
  */
 #include <linux/kernel.h>
 #include <linux/rculist.h>
+#include <net/switchdev.h>
 
 #include "br_private.h"
 #include "br_private_stp.h"
@@ -38,7 +39,13 @@ void br_log_state(const struct net_bridge_port *p)
 
 void br_set_state(struct net_bridge_port *p, unsigned int state)
 {
+	int err;
+
 	p->state = state;
+	err = netdev_switch_port_stp_update(p->dev, state);
+	if (err && err != -EOPNOTSUPP)
+		br_warn(p->br, "error setting offload STP state on port %u(%s)\n",
+				(unsigned int) p->port_no, p->dev->name);
 }
 
 /* called under bridge lock */
diff --git a/net/switchdev/switchdev.c b/net/switchdev/switchdev.c
index 66973deaae56..d162b21b14bd 100644
--- a/net/switchdev/switchdev.c
+++ b/net/switchdev/switchdev.c
@@ -31,3 +31,22 @@ int netdev_switch_parent_id_get(struct net_device *dev,
 	return ops->ndo_switch_parent_id_get(dev, psid);
 }
 EXPORT_SYMBOL(netdev_switch_parent_id_get);
+
+/**
+ *	netdev_switch_port_stp_update - Notify switch device port of STP
+ *					state change
+ *	@dev: port device
+ *	@state: port STP state
+ *
+ *	Notify switch device port of bridge port STP state change.
+ */
+int netdev_switch_port_stp_update(struct net_device *dev, u8 state)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!ops->ndo_switch_port_stp_update)
+		return -EOPNOTSUPP;
+	WARN_ON(!ops->ndo_switch_parent_id_get);
+	return ops->ndo_switch_port_stp_update(dev, state);
+}
+EXPORT_SYMBOL(netdev_switch_port_stp_update);
-- 
cgit v1.2.3


From cf6b8e1eedffd9ef9a22c0c9453d752b07daf89a Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Fri, 28 Nov 2014 14:34:21 +0100
Subject: bridge: add API to notify bridge driver of learned FBD on offloaded
 device

When the swdev device learns a new mac/vlan on a port, it sends some async
notification to the driver and the driver installs an FDB in the device.
To give a holistic system view, the learned mac/vlan should be reflected
in the bridge's FBD table, so the user, using normal iproute2 cmds, can view
what is currently learned by the device.  This API on the bridge driver gives
a way for the swdev driver to install an FBD entry in the bridge FBD table.
(And remove one).

This is equivalent to the device running these cmds:

  bridge fdb [add|del] <mac> dev <dev> vid <vlan id> master

This patch needs some extra eyeballs for review, in paricular around the
locking and contexts.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h      | 18 +++++++++
 include/uapi/linux/neighbour.h |  1 +
 net/bridge/br_fdb.c            | 91 +++++++++++++++++++++++++++++++++++++++++-
 net/bridge/br_private.h        |  3 +-
 4 files changed, 111 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 808dcb8cc04f..fa2eca625129 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -37,6 +37,24 @@ extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __use
 typedef int br_should_route_hook_t(struct sk_buff *skb);
 extern br_should_route_hook_t __rcu *br_should_route_hook;
 
+#if IS_ENABLED(CONFIG_BRIDGE)
+int br_fdb_external_learn_add(struct net_device *dev,
+			      const unsigned char *addr, u16 vid);
+int br_fdb_external_learn_del(struct net_device *dev,
+			      const unsigned char *addr, u16 vid);
+#else
+static inline int br_fdb_external_learn_add(struct net_device *dev,
+					    const unsigned char *addr, u16 vid)
+{
+	return 0;
+}
+static inline int br_fdb_external_learn_del(struct net_device *dev,
+					    const unsigned char *addr, u16 vid)
+{
+	return 0;
+}
+#endif
+
 #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_IGMP_SNOOPING)
 int br_multicast_list_adjacent(struct net_device *dev,
 			       struct list_head *br_ip_list);
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index 2f043af7fcd6..f3d77f9f1e0b 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -38,6 +38,7 @@ enum {
 #define NTF_SELF	0x02
 #define NTF_MASTER	0x04
 #define NTF_PROXY	0x08	/* == ATF_PUBL */
+#define NTF_EXT_LEARNED	0x10
 #define NTF_ROUTER	0x80
 
 /*
diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c
index b1be971eb06c..cc36e59db7d7 100644
--- a/net/bridge/br_fdb.c
+++ b/net/bridge/br_fdb.c
@@ -481,6 +481,7 @@ static struct net_bridge_fdb_entry *fdb_create(struct hlist_head *head,
 		fdb->is_local = 0;
 		fdb->is_static = 0;
 		fdb->added_by_user = 0;
+		fdb->added_by_external_learn = 0;
 		fdb->updated = fdb->used = jiffies;
 		hlist_add_head_rcu(&fdb->hlist, head);
 	}
@@ -613,7 +614,7 @@ static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br,
 	ndm->ndm_family	 = AF_BRIDGE;
 	ndm->ndm_pad1    = 0;
 	ndm->ndm_pad2    = 0;
-	ndm->ndm_flags	 = 0;
+	ndm->ndm_flags	 = fdb->added_by_external_learn ? NTF_EXT_LEARNED : 0;
 	ndm->ndm_type	 = 0;
 	ndm->ndm_ifindex = fdb->dst ? fdb->dst->dev->ifindex : br->dev->ifindex;
 	ndm->ndm_state   = fdb_to_nud(fdb);
@@ -983,3 +984,91 @@ void br_fdb_unsync_static(struct net_bridge *br, struct net_bridge_port *p)
 		}
 	}
 }
+
+int br_fdb_external_learn_add(struct net_device *dev,
+			      const unsigned char *addr, u16 vid)
+{
+	struct net_bridge_port *p;
+	struct net_bridge *br;
+	struct hlist_head *head;
+	struct net_bridge_fdb_entry *fdb;
+	int err = 0;
+
+	rtnl_lock();
+
+	p = br_port_get_rtnl(dev);
+	if (!p) {
+		pr_info("bridge: %s not a bridge port\n", dev->name);
+		err = -EINVAL;
+		goto err_rtnl_unlock;
+	}
+
+	br = p->br;
+
+	spin_lock_bh(&br->hash_lock);
+
+	head = &br->hash[br_mac_hash(addr, vid)];
+	fdb = fdb_find(head, addr, vid);
+	if (!fdb) {
+		fdb = fdb_create(head, p, addr, vid);
+		if (!fdb) {
+			err = -ENOMEM;
+			goto err_unlock;
+		}
+		fdb->added_by_external_learn = 1;
+		fdb_notify(br, fdb, RTM_NEWNEIGH);
+	} else if (fdb->added_by_external_learn) {
+		/* Refresh entry */
+		fdb->updated = fdb->used = jiffies;
+	} else if (!fdb->added_by_user) {
+		/* Take over SW learned entry */
+		fdb->added_by_external_learn = 1;
+		fdb->updated = jiffies;
+		fdb_notify(br, fdb, RTM_NEWNEIGH);
+	}
+
+err_unlock:
+	spin_unlock_bh(&br->hash_lock);
+err_rtnl_unlock:
+	rtnl_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL(br_fdb_external_learn_add);
+
+int br_fdb_external_learn_del(struct net_device *dev,
+			      const unsigned char *addr, u16 vid)
+{
+	struct net_bridge_port *p;
+	struct net_bridge *br;
+	struct hlist_head *head;
+	struct net_bridge_fdb_entry *fdb;
+	int err = 0;
+
+	rtnl_lock();
+
+	p = br_port_get_rtnl(dev);
+	if (!p) {
+		pr_info("bridge: %s not a bridge port\n", dev->name);
+		err = -EINVAL;
+		goto err_rtnl_unlock;
+	}
+
+	br = p->br;
+
+	spin_lock_bh(&br->hash_lock);
+
+	head = &br->hash[br_mac_hash(addr, vid)];
+	fdb = fdb_find(head, addr, vid);
+	if (fdb && fdb->added_by_external_learn)
+		fdb_delete(br, fdb);
+	else
+		err = -ENOENT;
+
+	spin_unlock_bh(&br->hash_lock);
+err_rtnl_unlock:
+	rtnl_unlock();
+
+	return err;
+}
+EXPORT_SYMBOL(br_fdb_external_learn_del);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1b529da8234d..cc36fb3efbdd 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -100,7 +100,8 @@ struct net_bridge_fdb_entry
 	mac_addr			addr;
 	unsigned char			is_local:1,
 					is_static:1,
-					added_by_user:1;
+					added_by_user:1,
+					added_by_external_learn:1;
 	__u16				vlan_id;
 };
 
-- 
cgit v1.2.3


From 065c212a9e25172069f368b36228379521dadb65 Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Fri, 28 Nov 2014 14:34:22 +0100
Subject: bridge: move private brport flags to if_bridge.h so port drivers can
 use flags

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 12 ++++++++++++
 net/bridge/br_private.h   | 10 ----------
 2 files changed, 12 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index fa2eca625129..2c81a8efd24d 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -15,6 +15,7 @@
 
 #include <linux/netdevice.h>
 #include <uapi/linux/if_bridge.h>
+#include <linux/bitops.h>
 
 struct br_ip {
 	union {
@@ -32,6 +33,17 @@ struct br_ip_list {
 	struct br_ip addr;
 };
 
+#define BR_HAIRPIN_MODE		BIT(0)
+#define BR_BPDU_GUARD		BIT(1)
+#define BR_ROOT_BLOCK		BIT(2)
+#define BR_MULTICAST_FAST_LEAVE	BIT(3)
+#define BR_ADMIN_COST		BIT(4)
+#define BR_LEARNING		BIT(5)
+#define BR_FLOOD		BIT(6)
+#define BR_AUTO_MASK		(BR_FLOOD | BR_LEARNING)
+#define BR_PROMISC		BIT(7)
+#define BR_PROXYARP		BIT(8)
+
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
 typedef int br_should_route_hook_t(struct sk_buff *skb);
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index cc36fb3efbdd..aea3d1339b3f 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -164,16 +164,6 @@ struct net_bridge_port
 	struct rcu_head			rcu;
 
 	unsigned long 			flags;
-#define BR_HAIRPIN_MODE		0x00000001
-#define BR_BPDU_GUARD           0x00000002
-#define BR_ROOT_BLOCK		0x00000004
-#define BR_MULTICAST_FAST_LEAVE	0x00000008
-#define BR_ADMIN_COST		0x00000010
-#define BR_LEARNING		0x00000020
-#define BR_FLOOD		0x00000040
-#define BR_AUTO_MASK (BR_FLOOD | BR_LEARNING)
-#define BR_PROMISC		0x00000080
-#define BR_PROXYARP		0x00000100
 
 #ifdef CONFIG_BRIDGE_IGMP_SNOOPING
 	struct bridge_mcast_own_query	ip4_own_query;
-- 
cgit v1.2.3


From efacacdaf7cb5a0592ed772e3731636b2742e34a Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Fri, 28 Nov 2014 14:34:23 +0100
Subject: bridge: add new brport flag LEARNING_SYNC

This policy flag controls syncing of learned FDB entries to bridge's FDB.  If
on, FDB entries learned on bridge port device will be synced.  If off, device
may still learn new FDB entries but they will not be synced with bridge's FDB.

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h    | 1 +
 include/uapi/linux/if_link.h | 1 +
 2 files changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 2c81a8efd24d..0a8ce762a47f 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -43,6 +43,7 @@ struct br_ip_list {
 #define BR_AUTO_MASK		(BR_FLOOD | BR_LEARNING)
 #define BR_PROMISC		BIT(7)
 #define BR_PROXYARP		BIT(8)
+#define BR_LEARNING_SYNC	BIT(9)
 
 extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *));
 
diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 623f1a7c7627..f7d0d2d7173a 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -245,6 +245,7 @@ enum {
 	IFLA_BRPORT_LEARNING,	/* mac learning */
 	IFLA_BRPORT_UNICAST_FLOOD, /* flood unicast traffic */
 	IFLA_BRPORT_PROXYARP,	/* proxy ARP */
+	IFLA_BRPORT_LEARNING_SYNC, /* mac learning sync from device */
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
-- 
cgit v1.2.3


From 2c3c031c8f8930861815fa1685d7c5e8ccec047c Mon Sep 17 00:00:00 2001
From: Scott Feldman <sfeldma@gmail.com>
Date: Fri, 28 Nov 2014 14:34:25 +0100
Subject: bridge: add brport flags to dflt bridge_getlink

To allow brport device to return current brport flags set on port.  Add
returned flags to nested IFLA_PROTINFO netlink msg built in dflt getlink.
With this change, netlink msg returned for bridge_getlink contains the port's
offloaded flag settings (the port's SELF settings).

Signed-off-by: Scott Feldman <sfeldma@gmail.com>
Signed-off-by: Jiri Pirko <jiri@resnulli.us>
Acked-by: Andy Gospodarek <gospo@cumulusnetworks.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Acked-by: Jamal Hadi Salim <jhs@mojatatu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/emulex/benet/be_main.c   |  3 ++-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c |  2 +-
 include/linux/rtnetlink.h                     |  3 ++-
 net/core/rtnetlink.c                          | 39 ++++++++++++++++++++++++++-
 4 files changed, 43 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index dc77ec2bdafd..e0ab7673afe7 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4367,7 +4367,8 @@ static int be_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 
 	return ndo_dflt_bridge_getlink(skb, pid, seq, dev,
 				       hsw_mode == PORT_FWD_TYPE_VEPA ?
-				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB);
+				       BRIDGE_MODE_VEPA : BRIDGE_MODE_VEB,
+				       0, 0);
 }
 
 #ifdef CONFIG_BE2NET_VXLAN
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 74aadeb56ada..9afa167d52a6 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -7778,7 +7778,7 @@ static int ixgbe_ndo_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	else
 		mode = BRIDGE_MODE_VEPA;
 
-	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode);
+	return ndo_dflt_bridge_getlink(skb, pid, seq, dev, mode, 0, 0);
 }
 
 static void *ixgbe_fwd_add(struct net_device *pdev, struct net_device *vdev)
diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 063f0f581fe0..3b0419072f88 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -103,5 +103,6 @@ extern int ndo_dflt_fdb_del(struct ndmsg *ndm,
 			    u16 vid);
 
 extern int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-				   struct net_device *dev, u16 mode);
+				   struct net_device *dev, u16 mode,
+				   u32 flags, u32 mask);
 #endif	/* __LINUX_RTNETLINK_H */
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index aa5cd2c53a56..61cb7e7cc3c7 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2687,12 +2687,22 @@ static int rtnl_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
+static int brport_nla_put_flag(struct sk_buff *skb, u32 flags, u32 mask,
+			       unsigned int attrnum, unsigned int flag)
+{
+	if (mask & flag)
+		return nla_put_u8(skb, attrnum, !!(flags & flag));
+	return 0;
+}
+
 int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
-			    struct net_device *dev, u16 mode)
+			    struct net_device *dev, u16 mode,
+			    u32 flags, u32 mask)
 {
 	struct nlmsghdr *nlh;
 	struct ifinfomsg *ifm;
 	struct nlattr *br_afspec;
+	struct nlattr *protinfo;
 	u8 operstate = netif_running(dev) ? dev->operstate : IF_OPER_DOWN;
 	struct net_device *br_dev = netdev_master_upper_dev_get(dev);
 
@@ -2731,6 +2741,33 @@ int ndo_dflt_bridge_getlink(struct sk_buff *skb, u32 pid, u32 seq,
 	}
 	nla_nest_end(skb, br_afspec);
 
+	protinfo = nla_nest_start(skb, IFLA_PROTINFO | NLA_F_NESTED);
+	if (!protinfo)
+		goto nla_put_failure;
+
+	if (brport_nla_put_flag(skb, flags, mask,
+				IFLA_BRPORT_MODE, BR_HAIRPIN_MODE) ||
+	    brport_nla_put_flag(skb, flags, mask,
+				IFLA_BRPORT_GUARD, BR_BPDU_GUARD) ||
+	    brport_nla_put_flag(skb, flags, mask,
+				IFLA_BRPORT_FAST_LEAVE,
+				BR_MULTICAST_FAST_LEAVE) ||
+	    brport_nla_put_flag(skb, flags, mask,
+				IFLA_BRPORT_PROTECT, BR_ROOT_BLOCK) ||
+	    brport_nla_put_flag(skb, flags, mask,
+				IFLA_BRPORT_LEARNING, BR_LEARNING) ||
+	    brport_nla_put_flag(skb, flags, mask,
+				IFLA_BRPORT_LEARNING_SYNC, BR_LEARNING_SYNC) ||
+	    brport_nla_put_flag(skb, flags, mask,
+				IFLA_BRPORT_UNICAST_FLOOD, BR_FLOOD) ||
+	    brport_nla_put_flag(skb, flags, mask,
+				IFLA_BRPORT_PROXYARP, BR_PROXYARP)) {
+		nla_nest_cancel(skb, protinfo);
+		goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, protinfo);
+
 	return nlmsg_end(skb, nlh);
 nla_put_failure:
 	nlmsg_cancel(skb, nlh);
-- 
cgit v1.2.3


From 75c28c09af99a0db0ccd8b4395469761aa736543 Mon Sep 17 00:00:00 2001
From: Leif Lindholm <leif.lindholm@linaro.org>
Date: Fri, 28 Nov 2014 11:34:28 +0000
Subject: of: add optional options parameter to of_find_node_by_path()

Update of_find_node_by_path():
1) Rename function to of_find_node_opts_by_path(), adding an optional
   pointer argument. Provide a static inline wrapper version of
   of_find_node_by_path() which calls the new function with NULL as
   the optional argument.
2) Ignore any part of the path beyond and including the ':' separator.
3) Set the new provided pointer argument to the beginning of the string
   following the ':' separator.
4: Add tests.

Signed-off-by: Leif Lindholm <leif.lindholm@linaro.org>
Signed-off-by: Grant Likely <grant.likely@linaro.org>
---
 drivers/of/base.c     | 20 ++++++++++++++++----
 drivers/of/unittest.c | 30 ++++++++++++++++++++++++++++++
 include/linux/of.h    | 14 +++++++++++++-
 3 files changed, 59 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/base.c b/drivers/of/base.c
index 99bc95e83d09..bdf051d0a7e6 100644
--- a/drivers/of/base.c
+++ b/drivers/of/base.c
@@ -714,10 +714,15 @@ static struct device_node *__of_find_node_by_path(struct device_node *parent,
 {
 	struct device_node *child;
 	int len = strchrnul(path, '/') - path;
+	int term;
 
 	if (!len)
 		return NULL;
 
+	term = strchrnul(path, ':') - path;
+	if (term < len)
+		len = term;
+
 	__for_each_child_of_node(parent, child) {
 		const char *name = strrchr(child->full_name, '/');
 		if (WARN(!name, "malformed device_node %s\n", child->full_name))
@@ -730,11 +735,14 @@ static struct device_node *__of_find_node_by_path(struct device_node *parent,
 }
 
 /**
- *	of_find_node_by_path - Find a node matching a full OF path
+ *	of_find_node_opts_by_path - Find a node matching a full OF path
  *	@path: Either the full path to match, or if the path does not
  *	       start with '/', the name of a property of the /aliases
  *	       node (an alias).  In the case of an alias, the node
  *	       matching the alias' value will be returned.
+ *	@opts: Address of a pointer into which to store the start of
+ *	       an options string appended to the end of the path with
+ *	       a ':' separator.
  *
  *	Valid paths:
  *		/foo/bar	Full path
@@ -744,11 +752,15 @@ static struct device_node *__of_find_node_by_path(struct device_node *parent,
  *	Returns a node pointer with refcount incremented, use
  *	of_node_put() on it when done.
  */
-struct device_node *of_find_node_by_path(const char *path)
+struct device_node *of_find_node_opts_by_path(const char *path, const char **opts)
 {
 	struct device_node *np = NULL;
 	struct property *pp;
 	unsigned long flags;
+	const char *separator = strchr(path, ':');
+
+	if (opts)
+		*opts = separator ? separator + 1 : NULL;
 
 	if (strcmp(path, "/") == 0)
 		return of_node_get(of_root);
@@ -756,7 +768,7 @@ struct device_node *of_find_node_by_path(const char *path)
 	/* The path could begin with an alias */
 	if (*path != '/') {
 		char *p = strchrnul(path, '/');
-		int len = p - path;
+		int len = separator ? separator - path : p - path;
 
 		/* of_aliases must not be NULL */
 		if (!of_aliases)
@@ -785,7 +797,7 @@ struct device_node *of_find_node_by_path(const char *path)
 	raw_spin_unlock_irqrestore(&devtree_lock, flags);
 	return np;
 }
-EXPORT_SYMBOL(of_find_node_by_path);
+EXPORT_SYMBOL(of_find_node_opts_by_path);
 
 /**
  *	of_find_node_by_name - Find a node by its "name" property
diff --git a/drivers/of/unittest.c b/drivers/of/unittest.c
index 7a7ae07d592f..1807a0458648 100644
--- a/drivers/of/unittest.c
+++ b/drivers/of/unittest.c
@@ -47,6 +47,7 @@ static bool selftest_live_tree;
 static void __init of_selftest_find_node_by_name(void)
 {
 	struct device_node *np;
+	const char *options;
 
 	np = of_find_node_by_path("/testcase-data");
 	selftest(np && !strcmp("/testcase-data", np->full_name),
@@ -87,6 +88,35 @@ static void __init of_selftest_find_node_by_name(void)
 	np = of_find_node_by_path("testcase-alias/missing-path");
 	selftest(!np, "non-existent alias with relative path returned node %s\n", np->full_name);
 	of_node_put(np);
+
+	np = of_find_node_opts_by_path("/testcase-data:testoption", &options);
+	selftest(np && !strcmp("testoption", options),
+		 "option path test failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("/testcase-data:testoption", NULL);
+	selftest(np, "NULL option path test failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("testcase-alias:testaliasoption",
+				       &options);
+	selftest(np && !strcmp("testaliasoption", options),
+		 "option alias path test failed\n");
+	of_node_put(np);
+
+	np = of_find_node_opts_by_path("testcase-alias:testaliasoption", NULL);
+	selftest(np, "NULL option alias path test failed\n");
+	of_node_put(np);
+
+	options = "testoption";
+	np = of_find_node_opts_by_path("testcase-alias", &options);
+	selftest(np && !options, "option clearing test failed\n");
+	of_node_put(np);
+
+	options = "testoption";
+	np = of_find_node_opts_by_path("/", &options);
+	selftest(np && !options, "option clearing root node test failed\n");
+	of_node_put(np);
 }
 
 static void __init of_selftest_dynamic(void)
diff --git a/include/linux/of.h b/include/linux/of.h
index aa01cf5852f8..8b021db3e16e 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -236,7 +236,13 @@ extern struct device_node *of_find_matching_node_and_match(
 	const struct of_device_id *matches,
 	const struct of_device_id **match);
 
-extern struct device_node *of_find_node_by_path(const char *path);
+extern struct device_node *of_find_node_opts_by_path(const char *path,
+	const char **opts);
+static inline struct device_node *of_find_node_by_path(const char *path)
+{
+	return of_find_node_opts_by_path(path, NULL);
+}
+
 extern struct device_node *of_find_node_by_phandle(phandle handle);
 extern struct device_node *of_get_parent(const struct device_node *node);
 extern struct device_node *of_get_next_parent(struct device_node *node);
@@ -383,6 +389,12 @@ static inline struct device_node *of_find_node_by_path(const char *path)
 	return NULL;
 }
 
+static inline struct device_node *of_find_node_opts_by_path(const char *path,
+	const char **opts)
+{
+	return NULL;
+}
+
 static inline struct device_node *of_get_parent(const struct device_node *node)
 {
 	return NULL;
-- 
cgit v1.2.3


From 70161ff336674ecfd20614a9c0c61cb17a6e9e83 Mon Sep 17 00:00:00 2001
From: Grant Likely <grant.likely@linaro.org>
Date: Fri, 28 Nov 2014 16:03:33 +0000
Subject: of: Drop ->next pointer from struct device_node

The ->next pointer in struct device_node is a hanger-on from when it was
used to iterate over the whole tree by a particular device_type property
value. Those days are long over, but the fdt unflattening code still
uses it to put nodes in the unflattened tree into the same order as node
in the flat tree. By reworking the unflattening code to reverse the list
after unflattening all the children of a node, the pointer can be
dropped which gives a small amount of memory savings.

Signed-off-by: Grant Likely <grant.likely@linaro.org>
Acked-by: Frank Rowand <frank.rowand@sonymobile.com>
Cc: Gaurav Minocha <gaurav.minocha.os@gmail.com>
---
 drivers/of/fdt.c   | 24 ++++++++++++++++++------
 include/linux/of.h |  1 -
 2 files changed, 18 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/of/fdt.c b/drivers/of/fdt.c
index 7f6ee31d5650..a41f9fdb1aa0 100644
--- a/drivers/of/fdt.c
+++ b/drivers/of/fdt.c
@@ -226,12 +226,8 @@ static void * unflatten_dt_node(void *blob,
 		prev_pp = &np->properties;
 		if (dad != NULL) {
 			np->parent = dad;
-			/* we temporarily use the next field as `last_child'*/
-			if (dad->next == NULL)
-				dad->child = np;
-			else
-				dad->next->sibling = np;
-			dad->next = np;
+			np->sibling = dad->child;
+			dad->child = np;
 		}
 	}
 	/* process properties */
@@ -329,6 +325,22 @@ static void * unflatten_dt_node(void *blob,
 
 	if (*poffset < 0 && *poffset != -FDT_ERR_NOTFOUND)
 		pr_err("unflatten: error %d processing FDT\n", *poffset);
+
+	/*
+	 * Reverse the child list. Some drivers assumes node order matches .dts
+	 * node order
+	 */
+	if (!dryrun && np->child) {
+		struct device_node *child = np->child;
+		np->child = NULL;
+		while (child) {
+			struct device_node *next = child->sibling;
+			child->sibling = np->child;
+			np->child = child;
+			child = next;
+		}
+	}
+
 	if (nodepp)
 		*nodepp = np;
 
diff --git a/include/linux/of.h b/include/linux/of.h
index 8b021db3e16e..3f0f0ffbd5e5 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -56,7 +56,6 @@ struct device_node {
 	struct	device_node *parent;
 	struct	device_node *child;
 	struct	device_node *sibling;
-	struct	device_node *next;	/* next device of same type */
 	struct	kobject kobj;
 	unsigned long _flags;
 	void	*data;
-- 
cgit v1.2.3


From 4afbe1760d89fec07e7a8cce58beb1b4921a194c Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Tue, 2 Dec 2014 08:54:18 +0100
Subject: clk: Remove unused function __clk_get_prepare_count

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c            | 5 -----
 include/linux/clk-provider.h | 1 -
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 5307225684eb..42f940ff5edf 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -574,11 +574,6 @@ unsigned int __clk_get_enable_count(struct clk *clk)
 	return !clk ? 0 : clk->enable_count;
 }
 
-unsigned int __clk_get_prepare_count(struct clk *clk)
-{
-	return !clk ? 0 : clk->prepare_count;
-}
-
 unsigned long __clk_get_rate(struct clk *clk)
 {
 	unsigned long ret;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 2839c639f092..fcf3252b829f 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -544,7 +544,6 @@ u8 __clk_get_num_parents(struct clk *clk);
 struct clk *__clk_get_parent(struct clk *clk);
 struct clk *clk_get_parent_by_index(struct clk *clk, u8 index);
 unsigned int __clk_get_enable_count(struct clk *clk);
-unsigned int __clk_get_prepare_count(struct clk *clk);
 unsigned long __clk_get_rate(struct clk *clk);
 unsigned long __clk_get_accuracy(struct clk *clk);
 unsigned long __clk_get_flags(struct clk *clk);
-- 
cgit v1.2.3


From 920f1c7472f35cd3b3a3add10cccb0ef12376a17 Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Tue, 2 Dec 2014 08:54:20 +0100
Subject: clk: Don't expose __clk_get_accuracy

As it's only used internally, in drivers/clk/clk.c.

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c            | 2 +-
 include/linux/clk-provider.h | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 609e9db1a69a..73247e90ee59 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -596,7 +596,7 @@ out:
 }
 EXPORT_SYMBOL_GPL(__clk_get_rate);
 
-unsigned long __clk_get_accuracy(struct clk *clk)
+static unsigned long __clk_get_accuracy(struct clk *clk)
 {
 	if (!clk)
 		return 0;
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index fcf3252b829f..270137aaf3ab 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -545,7 +545,6 @@ struct clk *__clk_get_parent(struct clk *clk);
 struct clk *clk_get_parent_by_index(struct clk *clk, u8 index);
 unsigned int __clk_get_enable_count(struct clk *clk);
 unsigned long __clk_get_rate(struct clk *clk);
-unsigned long __clk_get_accuracy(struct clk *clk);
 unsigned long __clk_get_flags(struct clk *clk);
 bool __clk_is_prepared(struct clk *clk);
 bool __clk_is_enabled(struct clk *clk);
-- 
cgit v1.2.3


From 61c7cddfad266ebb86176723f9c679f25cf705fe Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Tue, 2 Dec 2014 08:54:21 +0100
Subject: clk: change clk_debugfs_add_file to take a struct clk_hw

Instead of struct clk, as this should be only used by providers.

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@linaro.org>
---
 drivers/clk/clk.c            | 6 +++---
 include/linux/clk-provider.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index 73247e90ee59..f549e8b1d5ed 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -354,13 +354,13 @@ out:
 	mutex_unlock(&clk_debug_lock);
 }
 
-struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode,
+struct dentry *clk_debugfs_add_file(struct clk_hw *hw, char *name, umode_t mode,
 				void *data, const struct file_operations *fops)
 {
 	struct dentry *d = NULL;
 
-	if (clk->dentry)
-		d = debugfs_create_file(name, mode, clk->dentry, data, fops);
+	if (hw->clk->dentry)
+		d = debugfs_create_file(name, mode, hw->clk->dentry, data, fops);
 
 	return d;
 }
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 270137aaf3ab..5e06f23eed41 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -650,7 +650,7 @@ static inline void clk_writel(u32 val, u32 __iomem *reg)
 #endif	/* platform dependent I/O accessors */
 
 #ifdef CONFIG_DEBUG_FS
-struct dentry *clk_debugfs_add_file(struct clk *clk, char *name, umode_t mode,
+struct dentry *clk_debugfs_add_file(struct clk_hw *hw, char *name, umode_t mode,
 				void *data, const struct file_operations *fops);
 #endif
 
-- 
cgit v1.2.3


From c11f6f5bb1e07db79c4c97d768b32b63378c69e0 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Mon, 1 Dec 2014 12:50:21 +0100
Subject: PM / Domains: Initial PM clock support for genpd

It's quite common for PM domains to use PM clocks. Typically from SOC
specific code, the per device PM clock list is created and
pm_clk_suspend|resume() are invoked to handle clock gating/ungating.

A step towards consolidation is to integrate PM clock support into
genpd, which is what this patch does.

In this initial step, the calls to the pm_clk_suspend|resume() are
handled within genpd, but the per device PM clock list still needs to
be created from SOC specific code. It seems reasonable to have gendp to
handle that as well, but that left to future patches to address.

It's not every users of genpd that are keen on using PM clocks, thus we
need to provide this a configuration option for genpd. Therefore let's
add flag field in the genpd struct to keep this information and define
a new GENDP_FLAG_PM_CLK bit for it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 7 +++++++
 include/linux/pm_domain.h   | 4 ++++
 2 files changed, 11 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 1bfb54ce60e8..5d7b7548873a 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -12,6 +12,7 @@
 #include <linux/pm_runtime.h>
 #include <linux/pm_domain.h>
 #include <linux/pm_qos.h>
+#include <linux/pm_clock.h>
 #include <linux/slab.h>
 #include <linux/err.h>
 #include <linux/sched.h>
@@ -1948,6 +1949,12 @@ void pm_genpd_init(struct generic_pm_domain *genpd,
 	genpd->domain.ops.complete = pm_genpd_complete;
 	genpd->dev_ops.save_state = pm_genpd_default_save_state;
 	genpd->dev_ops.restore_state = pm_genpd_default_restore_state;
+
+	if (genpd->flags & GENPD_FLAG_PM_CLK) {
+		genpd->dev_ops.stop = pm_clk_suspend;
+		genpd->dev_ops.start = pm_clk_resume;
+	}
+
 	mutex_lock(&gpd_list_lock);
 	list_add(&genpd->gpd_list_node, &gpd_list);
 	mutex_unlock(&gpd_list_lock);
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 9d254e25f866..1dd6c7f64166 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -17,6 +17,9 @@
 #include <linux/notifier.h>
 #include <linux/cpuidle.h>
 
+/* Defines used for the flags field in the struct generic_pm_domain */
+#define GENPD_FLAG_PM_CLK	(1U << 0) /* PM domain uses PM clk */
+
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
 	GPD_STATE_WAIT_MASTER,	/* PM domain's master is being waited for */
@@ -76,6 +79,7 @@ struct generic_pm_domain {
 			  struct device *dev);
 	void (*detach_dev)(struct generic_pm_domain *domain,
 			   struct device *dev);
+	unsigned int flags;		/* Bit field of configs for genpd */
 };
 
 static inline struct generic_pm_domain *pd_to_genpd(struct dev_pm_domain *pd)
-- 
cgit v1.2.3


From d30d819dc83107812d9b2876e5e7194e511ed6af Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 27 Nov 2014 22:38:05 +0100
Subject: PM: Drop CONFIG_PM_RUNTIME from the driver core

After commit b2b49ccbdd54 (PM: Kconfig: Set PM_RUNTIME if PM_SLEEP is
selected) PM_RUNTIME is always set if PM is set, so quite a few
depend on CONFIG_PM or even may be dropped entirely in some cases.

Replace CONFIG_PM_RUNTIME with CONFIG_PM in the PM core code.

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/clock_ops.c       | 69 ++----------------------------------
 drivers/base/power/domain.c          | 34 ++----------------
 drivers/base/power/domain_governor.c | 11 ------
 drivers/base/power/power.h           | 56 +++++++++++++----------------
 drivers/base/power/qos.c             |  5 ---
 drivers/base/power/runtime.c         |  9 -----
 drivers/base/power/sysfs.c           | 19 +---------
 include/linux/pm.h                   |  2 +-
 include/linux/pm_domain.h            |  8 ++---
 include/linux/pm_qos.h               | 38 +++++++++-----------
 include/linux/pm_runtime.h           | 21 +++++------
 kernel/power/Kconfig                 |  4 ---
 12 files changed, 60 insertions(+), 216 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/clock_ops.c b/drivers/base/power/clock_ops.c
index b32b5d47b3c5..d626576a4f75 100644
--- a/drivers/base/power/clock_ops.c
+++ b/drivers/base/power/clock_ops.c
@@ -256,10 +256,6 @@ void pm_clk_destroy(struct device *dev)
 	}
 }
 
-#endif /* CONFIG_PM */
-
-#ifdef CONFIG_PM_RUNTIME
-
 /**
  * pm_clk_suspend - Disable clocks in a device's PM clock list.
  * @dev: Device to disable the clocks for.
@@ -373,68 +369,7 @@ static int pm_clk_notify(struct notifier_block *nb,
 	return 0;
 }
 
-#else /* !CONFIG_PM_RUNTIME */
-
-#ifdef CONFIG_PM
-
-/**
- * pm_clk_suspend - Disable clocks in a device's PM clock list.
- * @dev: Device to disable the clocks for.
- */
-int pm_clk_suspend(struct device *dev)
-{
-	struct pm_subsys_data *psd = dev_to_psd(dev);
-	struct pm_clock_entry *ce;
-	unsigned long flags;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	/* If there is no driver, the clocks are already disabled. */
-	if (!psd || !dev->driver)
-		return 0;
-
-	spin_lock_irqsave(&psd->lock, flags);
-
-	list_for_each_entry_reverse(ce, &psd->clock_list, node) {
-		if (ce->status < PCE_STATUS_ERROR) {
-			if (ce->status == PCE_STATUS_ENABLED)
-				clk_disable(ce->clk);
-			ce->status = PCE_STATUS_ACQUIRED;
-		}
-	}
-
-	spin_unlock_irqrestore(&psd->lock, flags);
-
-	return 0;
-}
-
-/**
- * pm_clk_resume - Enable clocks in a device's PM clock list.
- * @dev: Device to enable the clocks for.
- */
-int pm_clk_resume(struct device *dev)
-{
-	struct pm_subsys_data *psd = dev_to_psd(dev);
-	struct pm_clock_entry *ce;
-	unsigned long flags;
-
-	dev_dbg(dev, "%s()\n", __func__);
-
-	/* If there is no driver, the clocks should remain disabled. */
-	if (!psd || !dev->driver)
-		return 0;
-
-	spin_lock_irqsave(&psd->lock, flags);
-
-	list_for_each_entry(ce, &psd->clock_list, node)
-		__pm_clk_enable(dev, ce);
-
-	spin_unlock_irqrestore(&psd->lock, flags);
-
-	return 0;
-}
-
-#endif /* CONFIG_PM */
+#else /* !CONFIG_PM */
 
 /**
  * enable_clock - Enable a device clock.
@@ -514,7 +449,7 @@ static int pm_clk_notify(struct notifier_block *nb,
 	return 0;
 }
 
-#endif /* !CONFIG_PM_RUNTIME */
+#endif /* !CONFIG_PM */
 
 /**
  * pm_clk_add_notifier - Add bus type notifier for power management clocks.
diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index fb83d4acd400..7c5c7410d76c 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -280,8 +280,6 @@ int pm_genpd_name_poweron(const char *domain_name)
 	return genpd ? pm_genpd_poweron(genpd) : -EINVAL;
 }
 
-#ifdef CONFIG_PM_RUNTIME
-
 static int genpd_start_dev_no_timing(struct generic_pm_domain *genpd,
 				     struct device *dev)
 {
@@ -755,24 +753,6 @@ static int __init genpd_poweroff_unused(void)
 }
 late_initcall(genpd_poweroff_unused);
 
-#else
-
-static inline int genpd_dev_pm_qos_notifier(struct notifier_block *nb,
-					    unsigned long val, void *ptr)
-{
-	return NOTIFY_DONE;
-}
-
-static inline void
-genpd_queue_power_off_work(struct generic_pm_domain *genpd) {}
-
-static inline void genpd_power_off_work_fn(struct work_struct *work) {}
-
-#define pm_genpd_runtime_suspend	NULL
-#define pm_genpd_runtime_resume		NULL
-
-#endif /* CONFIG_PM_RUNTIME */
-
 #ifdef CONFIG_PM_SLEEP
 
 /**
@@ -1364,7 +1344,7 @@ void pm_genpd_syscore_poweron(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(pm_genpd_syscore_poweron);
 
-#else
+#else /* !CONFIG_PM_SLEEP */
 
 #define pm_genpd_prepare		NULL
 #define pm_genpd_suspend		NULL
@@ -2220,7 +2200,7 @@ int genpd_dev_pm_attach(struct device *dev)
 	return 0;
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
-#endif
+#endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
 
 
 /***        debugfs support        ***/
@@ -2236,10 +2216,8 @@ static struct dentry *pm_genpd_debugfs_dir;
 
 /*
  * TODO: This function is a slightly modified version of rtpm_status_show
- * from sysfs.c, but dependencies between PM_GENERIC_DOMAINS and PM_RUNTIME
- * are too loose to generalize it.
+ * from sysfs.c, so generalize it.
  */
-#ifdef CONFIG_PM_RUNTIME
 static void rtpm_status_str(struct seq_file *s, struct device *dev)
 {
 	static const char * const status_lookup[] = {
@@ -2261,12 +2239,6 @@ static void rtpm_status_str(struct seq_file *s, struct device *dev)
 
 	seq_puts(s, p);
 }
-#else
-static void rtpm_status_str(struct seq_file *s, struct device *dev)
-{
-	seq_puts(s, "active");
-}
-#endif
 
 static int pm_genpd_summary_one(struct seq_file *s,
 		struct generic_pm_domain *gpd)
diff --git a/drivers/base/power/domain_governor.c b/drivers/base/power/domain_governor.c
index d88a62e104d4..2a4154a09e4d 100644
--- a/drivers/base/power/domain_governor.c
+++ b/drivers/base/power/domain_governor.c
@@ -11,8 +11,6 @@
 #include <linux/pm_qos.h>
 #include <linux/hrtimer.h>
 
-#ifdef CONFIG_PM_RUNTIME
-
 static int dev_update_qos_constraint(struct device *dev, void *data)
 {
 	s64 *constraint_ns_p = data;
@@ -227,15 +225,6 @@ static bool always_on_power_down_ok(struct dev_pm_domain *domain)
 	return false;
 }
 
-#else /* !CONFIG_PM_RUNTIME */
-
-static inline bool default_stop_ok(struct device *dev) { return false; }
-
-#define default_power_down_ok	NULL
-#define always_on_power_down_ok	NULL
-
-#endif /* !CONFIG_PM_RUNTIME */
-
 struct dev_power_governor simple_qos_governor = {
 	.stop_ok = default_stop_ok,
 	.power_down_ok = default_power_down_ok,
diff --git a/drivers/base/power/power.h b/drivers/base/power/power.h
index a21223d95926..b6b8a273c5da 100644
--- a/drivers/base/power/power.h
+++ b/drivers/base/power/power.h
@@ -9,7 +9,7 @@ static inline void device_pm_init_common(struct device *dev)
 	}
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 
 static inline void pm_runtime_early_init(struct device *dev)
 {
@@ -20,7 +20,21 @@ static inline void pm_runtime_early_init(struct device *dev)
 extern void pm_runtime_init(struct device *dev);
 extern void pm_runtime_remove(struct device *dev);
 
-#else /* !CONFIG_PM_RUNTIME */
+/*
+ * sysfs.c
+ */
+
+extern int dpm_sysfs_add(struct device *dev);
+extern void dpm_sysfs_remove(struct device *dev);
+extern void rpm_sysfs_remove(struct device *dev);
+extern int wakeup_sysfs_add(struct device *dev);
+extern void wakeup_sysfs_remove(struct device *dev);
+extern int pm_qos_sysfs_add_resume_latency(struct device *dev);
+extern void pm_qos_sysfs_remove_resume_latency(struct device *dev);
+extern int pm_qos_sysfs_add_flags(struct device *dev);
+extern void pm_qos_sysfs_remove_flags(struct device *dev);
+
+#else /* CONFIG_PM */
 
 static inline void pm_runtime_early_init(struct device *dev)
 {
@@ -30,7 +44,15 @@ static inline void pm_runtime_early_init(struct device *dev)
 static inline void pm_runtime_init(struct device *dev) {}
 static inline void pm_runtime_remove(struct device *dev) {}
 
-#endif /* !CONFIG_PM_RUNTIME */
+static inline int dpm_sysfs_add(struct device *dev) { return 0; }
+static inline void dpm_sysfs_remove(struct device *dev) {}
+static inline void rpm_sysfs_remove(struct device *dev) {}
+static inline int wakeup_sysfs_add(struct device *dev) { return 0; }
+static inline void wakeup_sysfs_remove(struct device *dev) {}
+static inline int pm_qos_sysfs_add(struct device *dev) { return 0; }
+static inline void pm_qos_sysfs_remove(struct device *dev) {}
+
+#endif
 
 #ifdef CONFIG_PM_SLEEP
 
@@ -77,31 +99,3 @@ static inline void device_pm_init(struct device *dev)
 	device_pm_sleep_init(dev);
 	pm_runtime_init(dev);
 }
-
-#ifdef CONFIG_PM
-
-/*
- * sysfs.c
- */
-
-extern int dpm_sysfs_add(struct device *dev);
-extern void dpm_sysfs_remove(struct device *dev);
-extern void rpm_sysfs_remove(struct device *dev);
-extern int wakeup_sysfs_add(struct device *dev);
-extern void wakeup_sysfs_remove(struct device *dev);
-extern int pm_qos_sysfs_add_resume_latency(struct device *dev);
-extern void pm_qos_sysfs_remove_resume_latency(struct device *dev);
-extern int pm_qos_sysfs_add_flags(struct device *dev);
-extern void pm_qos_sysfs_remove_flags(struct device *dev);
-
-#else /* CONFIG_PM */
-
-static inline int dpm_sysfs_add(struct device *dev) { return 0; }
-static inline void dpm_sysfs_remove(struct device *dev) {}
-static inline void rpm_sysfs_remove(struct device *dev) {}
-static inline int wakeup_sysfs_add(struct device *dev) { return 0; }
-static inline void wakeup_sysfs_remove(struct device *dev) {}
-static inline int pm_qos_sysfs_add(struct device *dev) { return 0; }
-static inline void pm_qos_sysfs_remove(struct device *dev) {}
-
-#endif
diff --git a/drivers/base/power/qos.c b/drivers/base/power/qos.c
index 36b9eb4862cb..a8fe4c1a8d07 100644
--- a/drivers/base/power/qos.c
+++ b/drivers/base/power/qos.c
@@ -599,7 +599,6 @@ int dev_pm_qos_add_ancestor_request(struct device *dev,
 }
 EXPORT_SYMBOL_GPL(dev_pm_qos_add_ancestor_request);
 
-#ifdef CONFIG_PM_RUNTIME
 static void __dev_pm_qos_drop_user_request(struct device *dev,
 					   enum dev_pm_qos_req_type type)
 {
@@ -880,7 +879,3 @@ int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val)
 	mutex_unlock(&dev_pm_qos_mtx);
 	return ret;
 }
-#else /* !CONFIG_PM_RUNTIME */
-static void __dev_pm_qos_hide_latency_limit(struct device *dev) {}
-static void __dev_pm_qos_hide_flags(struct device *dev) {}
-#endif /* CONFIG_PM_RUNTIME */
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index 8f1ab8446caa..5070c4fe8542 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -45,8 +45,6 @@ static pm_callback_t __rpm_get_callback(struct device *dev, size_t cb_offset)
 #define RPM_GET_CALLBACK(dev, callback) \
 		__rpm_get_callback(dev, offsetof(struct dev_pm_ops, callback))
 
-#ifdef CONFIG_PM_RUNTIME
-
 static int rpm_resume(struct device *dev, int rpmflags);
 static int rpm_suspend(struct device *dev, int rpmflags);
 
@@ -1399,7 +1397,6 @@ void pm_runtime_remove(struct device *dev)
 	if (dev->power.irq_safe && dev->parent)
 		pm_runtime_put(dev->parent);
 }
-#endif
 
 /**
  * pm_runtime_force_suspend - Force a device into suspend state if needed.
@@ -1419,12 +1416,6 @@ int pm_runtime_force_suspend(struct device *dev)
 	int ret = 0;
 
 	pm_runtime_disable(dev);
-
-	/*
-	 * Note that pm_runtime_status_suspended() returns false while
-	 * !CONFIG_PM_RUNTIME, which means the device will be put into low
-	 * power state.
-	 */
 	if (pm_runtime_status_suspended(dev))
 		return 0;
 
diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index a9d26ed11bf4..d2be3f9c211c 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -95,7 +95,6 @@
 const char power_group_name[] = "power";
 EXPORT_SYMBOL_GPL(power_group_name);
 
-#ifdef CONFIG_PM_RUNTIME
 static const char ctrl_auto[] = "auto";
 static const char ctrl_on[] = "on";
 
@@ -330,7 +329,6 @@ static ssize_t pm_qos_remote_wakeup_store(struct device *dev,
 
 static DEVICE_ATTR(pm_qos_remote_wakeup, 0644,
 		   pm_qos_remote_wakeup_show, pm_qos_remote_wakeup_store);
-#endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
 static const char _enabled[] = "enabled";
@@ -531,8 +529,6 @@ static DEVICE_ATTR(wakeup_prevent_sleep_time_ms, 0444,
 #endif /* CONFIG_PM_SLEEP */
 
 #ifdef CONFIG_PM_ADVANCED_DEBUG
-#ifdef CONFIG_PM_RUNTIME
-
 static ssize_t rtpm_usagecount_show(struct device *dev,
 				    struct device_attribute *attr, char *buf)
 {
@@ -562,10 +558,7 @@ static DEVICE_ATTR(runtime_usage, 0444, rtpm_usagecount_show, NULL);
 static DEVICE_ATTR(runtime_active_kids, 0444, rtpm_children_show, NULL);
 static DEVICE_ATTR(runtime_enabled, 0444, rtpm_enabled_show, NULL);
 
-#endif
-
 #ifdef CONFIG_PM_SLEEP
-
 static ssize_t async_show(struct device *dev, struct device_attribute *attr,
 			  char *buf)
 {
@@ -595,7 +588,7 @@ static ssize_t async_store(struct device *dev, struct device_attribute *attr,
 
 static DEVICE_ATTR(async, 0644, async_show, async_store);
 
-#endif
+#endif /* CONFIG_PM_SLEEP */
 #endif /* CONFIG_PM_ADVANCED_DEBUG */
 
 static struct attribute *power_attrs[] = {
@@ -603,12 +596,10 @@ static struct attribute *power_attrs[] = {
 #ifdef CONFIG_PM_SLEEP
 	&dev_attr_async.attr,
 #endif
-#ifdef CONFIG_PM_RUNTIME
 	&dev_attr_runtime_status.attr,
 	&dev_attr_runtime_usage.attr,
 	&dev_attr_runtime_active_kids.attr,
 	&dev_attr_runtime_enabled.attr,
-#endif
 #endif /* CONFIG_PM_ADVANCED_DEBUG */
 	NULL,
 };
@@ -640,7 +631,6 @@ static struct attribute_group pm_wakeup_attr_group = {
 };
 
 static struct attribute *runtime_attrs[] = {
-#ifdef CONFIG_PM_RUNTIME
 #ifndef CONFIG_PM_ADVANCED_DEBUG
 	&dev_attr_runtime_status.attr,
 #endif
@@ -648,7 +638,6 @@ static struct attribute *runtime_attrs[] = {
 	&dev_attr_runtime_suspended_time.attr,
 	&dev_attr_runtime_active_time.attr,
 	&dev_attr_autosuspend_delay_ms.attr,
-#endif /* CONFIG_PM_RUNTIME */
 	NULL,
 };
 static struct attribute_group pm_runtime_attr_group = {
@@ -657,9 +646,7 @@ static struct attribute_group pm_runtime_attr_group = {
 };
 
 static struct attribute *pm_qos_resume_latency_attrs[] = {
-#ifdef CONFIG_PM_RUNTIME
 	&dev_attr_pm_qos_resume_latency_us.attr,
-#endif /* CONFIG_PM_RUNTIME */
 	NULL,
 };
 static struct attribute_group pm_qos_resume_latency_attr_group = {
@@ -668,9 +655,7 @@ static struct attribute_group pm_qos_resume_latency_attr_group = {
 };
 
 static struct attribute *pm_qos_latency_tolerance_attrs[] = {
-#ifdef CONFIG_PM_RUNTIME
 	&dev_attr_pm_qos_latency_tolerance_us.attr,
-#endif /* CONFIG_PM_RUNTIME */
 	NULL,
 };
 static struct attribute_group pm_qos_latency_tolerance_attr_group = {
@@ -679,10 +664,8 @@ static struct attribute_group pm_qos_latency_tolerance_attr_group = {
 };
 
 static struct attribute *pm_qos_flags_attrs[] = {
-#ifdef CONFIG_PM_RUNTIME
 	&dev_attr_pm_qos_no_power_off.attr,
 	&dev_attr_pm_qos_remote_wakeup.attr,
-#endif /* CONFIG_PM_RUNTIME */
 	NULL,
 };
 static struct attribute_group pm_qos_flags_attr_group = {
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 383fd68aaee1..53b56cf5db3f 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -576,7 +576,7 @@ struct dev_pm_info {
 #else
 	unsigned int		should_wakeup:1;
 #endif
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	struct timer_list	suspend_timer;
 	unsigned long		timer_expires;
 	struct work_struct	work;
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 2e0e06daf8c0..f9dedf2f4103 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -147,6 +147,7 @@ extern void pm_genpd_init(struct generic_pm_domain *genpd,
 
 extern int pm_genpd_poweron(struct generic_pm_domain *genpd);
 extern int pm_genpd_name_poweron(const char *domain_name);
+extern void pm_genpd_poweroff_unused(void);
 
 extern struct dev_power_governor simple_qos_governor;
 extern struct dev_power_governor pm_domain_always_on_gov;
@@ -221,6 +222,7 @@ static inline int pm_genpd_name_poweron(const char *domain_name)
 {
 	return -ENOSYS;
 }
+static inline void pm_genpd_poweroff_unused(void) {}
 #define simple_qos_governor NULL
 #define pm_domain_always_on_gov NULL
 #endif
@@ -237,12 +239,6 @@ static inline int pm_genpd_name_add_device(const char *domain_name,
 	return __pm_genpd_name_add_device(domain_name, dev, NULL);
 }
 
-#ifdef CONFIG_PM_GENERIC_DOMAINS_RUNTIME
-extern void pm_genpd_poweroff_unused(void);
-#else
-static inline void pm_genpd_poweroff_unused(void) {}
-#endif
-
 #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP
 extern void pm_genpd_syscore_poweroff(struct device *dev);
 extern void pm_genpd_syscore_poweron(struct device *dev);
diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h
index 636e82834506..7b3ae0cffc05 100644
--- a/include/linux/pm_qos.h
+++ b/include/linux/pm_qos.h
@@ -154,6 +154,23 @@ void dev_pm_qos_constraints_destroy(struct device *dev);
 int dev_pm_qos_add_ancestor_request(struct device *dev,
 				    struct dev_pm_qos_request *req,
 				    enum dev_pm_qos_req_type type, s32 value);
+int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value);
+void dev_pm_qos_hide_latency_limit(struct device *dev);
+int dev_pm_qos_expose_flags(struct device *dev, s32 value);
+void dev_pm_qos_hide_flags(struct device *dev);
+int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set);
+s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev);
+int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val);
+
+static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev)
+{
+	return dev->power.qos->resume_latency_req->data.pnode.prio;
+}
+
+static inline s32 dev_pm_qos_requested_flags(struct device *dev)
+{
+	return dev->power.qos->flags_req->data.flr.flags;
+}
 #else
 static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev,
 							  s32 mask)
@@ -200,27 +217,6 @@ static inline int dev_pm_qos_add_ancestor_request(struct device *dev,
 						  enum dev_pm_qos_req_type type,
 						  s32 value)
 			{ return 0; }
-#endif
-
-#ifdef CONFIG_PM_RUNTIME
-int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value);
-void dev_pm_qos_hide_latency_limit(struct device *dev);
-int dev_pm_qos_expose_flags(struct device *dev, s32 value);
-void dev_pm_qos_hide_flags(struct device *dev);
-int dev_pm_qos_update_flags(struct device *dev, s32 mask, bool set);
-s32 dev_pm_qos_get_user_latency_tolerance(struct device *dev);
-int dev_pm_qos_update_user_latency_tolerance(struct device *dev, s32 val);
-
-static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev)
-{
-	return dev->power.qos->resume_latency_req->data.pnode.prio;
-}
-
-static inline s32 dev_pm_qos_requested_flags(struct device *dev)
-{
-	return dev->power.qos->flags_req->data.flr.flags;
-}
-#else
 static inline int dev_pm_qos_expose_latency_limit(struct device *dev, s32 value)
 			{ return 0; }
 static inline void dev_pm_qos_hide_latency_limit(struct device *dev) {}
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index 367f49b9a1c9..eda4feede048 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -35,16 +35,6 @@ extern int pm_generic_runtime_suspend(struct device *dev);
 extern int pm_generic_runtime_resume(struct device *dev);
 extern int pm_runtime_force_suspend(struct device *dev);
 extern int pm_runtime_force_resume(struct device *dev);
-#else
-static inline bool queue_pm_work(struct work_struct *work) { return false; }
-
-static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
-static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
-static inline int pm_runtime_force_suspend(struct device *dev) { return 0; }
-static inline int pm_runtime_force_resume(struct device *dev) { return 0; }
-#endif
-
-#ifdef CONFIG_PM_RUNTIME
 
 extern int __pm_runtime_idle(struct device *dev, int rpmflags);
 extern int __pm_runtime_suspend(struct device *dev, int rpmflags);
@@ -128,7 +118,14 @@ static inline void pm_runtime_mark_last_busy(struct device *dev)
 	ACCESS_ONCE(dev->power.last_busy) = jiffies;
 }
 
-#else /* !CONFIG_PM_RUNTIME */
+#else /* !CONFIG_PM */
+
+static inline bool queue_pm_work(struct work_struct *work) { return false; }
+
+static inline int pm_generic_runtime_suspend(struct device *dev) { return 0; }
+static inline int pm_generic_runtime_resume(struct device *dev) { return 0; }
+static inline int pm_runtime_force_suspend(struct device *dev) { return 0; }
+static inline int pm_runtime_force_resume(struct device *dev) { return 0; }
 
 static inline int __pm_runtime_idle(struct device *dev, int rpmflags)
 {
@@ -179,7 +176,7 @@ static inline unsigned long pm_runtime_autosuspend_expiration(
 static inline void pm_runtime_set_memalloc_noio(struct device *dev,
 						bool enable){}
 
-#endif /* !CONFIG_PM_RUNTIME */
+#endif /* !CONFIG_PM */
 
 static inline int pm_runtime_idle(struct device *dev)
 {
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 95d712e3677d..f8dc1cc8c4cb 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -298,10 +298,6 @@ config PM_GENERIC_DOMAINS_SLEEP
 	def_bool y
 	depends on PM_SLEEP && PM_GENERIC_DOMAINS
 
-config PM_GENERIC_DOMAINS_RUNTIME
-	def_bool y
-	depends on PM_RUNTIME && PM_GENERIC_DOMAINS
-
 config PM_GENERIC_DOMAINS_OF
 	def_bool y
 	depends on PM_GENERIC_DOMAINS && OF
-- 
cgit v1.2.3


From 5de21bb998b8e816e6a1df1f2c04d95fb6e27a5d Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 27 Nov 2014 22:38:23 +0100
Subject: ACPI / PM: Drop CONFIG_PM_RUNTIME from the ACPI core

After commit b2b49ccbdd54 (PM: Kconfig: Set PM_RUNTIME if PM_SLEEP is
selected) PM_RUNTIME is always set if PM is set, so quite a few
depend on CONFIG_PM.

Replace CONFIG_PM_RUNTIME with CONFIG_PM in the ACPI core code.

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpss.c |  4 +---
 drivers/acpi/device_pm.c |  8 ++------
 drivers/acpi/pci_irq.c   |  2 +-
 include/acpi/acpi_bus.h  |  6 +-----
 include/linux/acpi.h     | 26 +++++++++++---------------
 5 files changed, 16 insertions(+), 30 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c
index d1dd0ada14b7..4f3febf8a589 100644
--- a/drivers/acpi/acpi_lpss.c
+++ b/drivers/acpi/acpi_lpss.c
@@ -588,7 +588,6 @@ static int acpi_lpss_resume_early(struct device *dev)
 }
 #endif /* CONFIG_PM_SLEEP */
 
-#ifdef CONFIG_PM_RUNTIME
 static int acpi_lpss_runtime_suspend(struct device *dev)
 {
 	struct lpss_private_data *pdata = acpi_driver_data(ACPI_COMPANION(dev));
@@ -631,11 +630,11 @@ static int acpi_lpss_runtime_resume(struct device *dev)
 
 	return pm_generic_runtime_resume(dev);
 }
-#endif /* CONFIG_PM_RUNTIME */
 #endif /* CONFIG_PM */
 
 static struct dev_pm_domain acpi_lpss_pm_domain = {
 	.ops = {
+#ifdef CONFIG_PM
 #ifdef CONFIG_PM_SLEEP
 		.prepare = acpi_subsys_prepare,
 		.complete = acpi_subsys_complete,
@@ -647,7 +646,6 @@ static struct dev_pm_domain acpi_lpss_pm_domain = {
 		.poweroff_late = acpi_lpss_suspend_late,
 		.restore_early = acpi_lpss_resume_early,
 #endif
-#ifdef CONFIG_PM_RUNTIME
 		.runtime_suspend = acpi_lpss_runtime_suspend,
 		.runtime_resume = acpi_lpss_runtime_resume,
 #endif
diff --git a/drivers/acpi/device_pm.c b/drivers/acpi/device_pm.c
index 7db193160766..20c0a670c75a 100644
--- a/drivers/acpi/device_pm.c
+++ b/drivers/acpi/device_pm.c
@@ -692,7 +692,6 @@ static int acpi_device_wakeup(struct acpi_device *adev, u32 target_state,
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
 /**
  * acpi_pm_device_run_wake - Enable/disable remote wakeup for given device.
  * @dev: Device to enable/disable the platform to wake up.
@@ -714,7 +713,6 @@ int acpi_pm_device_run_wake(struct device *phys_dev, bool enable)
 	return acpi_device_wakeup(adev, ACPI_STATE_S0, enable);
 }
 EXPORT_SYMBOL(acpi_pm_device_run_wake);
-#endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
 /**
@@ -773,7 +771,6 @@ static int acpi_dev_pm_full_power(struct acpi_device *adev)
 		acpi_device_set_power(adev, ACPI_STATE_D0) : 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
 /**
  * acpi_dev_runtime_suspend - Put device into a low-power state using ACPI.
  * @dev: Device to put into a low-power state.
@@ -855,7 +852,6 @@ int acpi_subsys_runtime_resume(struct device *dev)
 	return ret ? ret : pm_generic_runtime_resume(dev);
 }
 EXPORT_SYMBOL_GPL(acpi_subsys_runtime_resume);
-#endif /* CONFIG_PM_RUNTIME */
 
 #ifdef CONFIG_PM_SLEEP
 /**
@@ -1023,10 +1019,9 @@ EXPORT_SYMBOL_GPL(acpi_subsys_freeze);
 
 static struct dev_pm_domain acpi_general_pm_domain = {
 	.ops = {
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 		.runtime_suspend = acpi_subsys_runtime_suspend,
 		.runtime_resume = acpi_subsys_runtime_resume,
-#endif
 #ifdef CONFIG_PM_SLEEP
 		.prepare = acpi_subsys_prepare,
 		.complete = acpi_subsys_complete,
@@ -1037,6 +1032,7 @@ static struct dev_pm_domain acpi_general_pm_domain = {
 		.poweroff = acpi_subsys_suspend,
 		.poweroff_late = acpi_subsys_suspend_late,
 		.restore_early = acpi_subsys_resume_early,
+#endif
 #endif
 	},
 };
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 6e6b80eb0bba..7cc4e33179f9 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -484,7 +484,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	/* Keep IOAPIC pin configuration when suspending */
 	if (dev->dev.power.is_prepared)
 		return;
-#ifdef	CONFIG_PM_RUNTIME
+#ifdef	CONFIG_PM
 	if (dev->dev.power.runtime_status == RPM_SUSPENDING)
 		return;
 #endif
diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h
index f34a0835aa4f..78f7521417e3 100644
--- a/include/acpi/acpi_bus.h
+++ b/include/acpi/acpi_bus.h
@@ -516,6 +516,7 @@ acpi_status acpi_add_pm_notifier(struct acpi_device *adev, struct device *dev,
 				 void (*work_func)(struct work_struct *work));
 acpi_status acpi_remove_pm_notifier(struct acpi_device *adev);
 int acpi_pm_device_sleep_state(struct device *, int *, int);
+int acpi_pm_device_run_wake(struct device *, bool);
 #else
 static inline acpi_status acpi_add_pm_notifier(struct acpi_device *adev,
 					       struct device *dev,
@@ -535,11 +536,6 @@ static inline int acpi_pm_device_sleep_state(struct device *d, int *p, int m)
 	return (m >= ACPI_STATE_D0 && m <= ACPI_STATE_D3_COLD) ?
 		m : ACPI_STATE_D0;
 }
-#endif
-
-#ifdef CONFIG_PM_RUNTIME
-int acpi_pm_device_run_wake(struct device *, bool);
-#else
 static inline int acpi_pm_device_run_wake(struct device *dev, bool enable)
 {
 	return -ENODEV;
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 407a12f663eb..77329be250b2 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -553,16 +553,26 @@ static inline void arch_reserve_mem_area(acpi_physical_address addr,
 #define acpi_os_set_prepare_sleep(func, pm1a_ctrl, pm1b_ctrl) do { } while (0)
 #endif
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_PM_RUNTIME)
+#if defined(CONFIG_ACPI) && defined(CONFIG_PM)
 int acpi_dev_runtime_suspend(struct device *dev);
 int acpi_dev_runtime_resume(struct device *dev);
 int acpi_subsys_runtime_suspend(struct device *dev);
 int acpi_subsys_runtime_resume(struct device *dev);
+struct acpi_device *acpi_dev_pm_get_node(struct device *dev);
+int acpi_dev_pm_attach(struct device *dev, bool power_on);
 #else
 static inline int acpi_dev_runtime_suspend(struct device *dev) { return 0; }
 static inline int acpi_dev_runtime_resume(struct device *dev) { return 0; }
 static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; }
 static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; }
+static inline struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
+{
+	return NULL;
+}
+static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
+{
+	return -ENODEV;
+}
 #endif
 
 #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP)
@@ -585,20 +595,6 @@ static inline int acpi_subsys_suspend(struct device *dev) { return 0; }
 static inline int acpi_subsys_freeze(struct device *dev) { return 0; }
 #endif
 
-#if defined(CONFIG_ACPI) && defined(CONFIG_PM)
-struct acpi_device *acpi_dev_pm_get_node(struct device *dev);
-int acpi_dev_pm_attach(struct device *dev, bool power_on);
-#else
-static inline struct acpi_device *acpi_dev_pm_get_node(struct device *dev)
-{
-	return NULL;
-}
-static inline int acpi_dev_pm_attach(struct device *dev, bool power_on)
-{
-	return -ENODEV;
-}
-#endif
-
 #ifdef CONFIG_ACPI
 __printf(3, 4)
 void acpi_handle_printk(const char *level, acpi_handle handle,
-- 
cgit v1.2.3


From 6ed23b806e73bdd5b17722df507b0f4570c606b6 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 4 Dec 2014 00:34:11 +0100
Subject: PM: Merge the SET*_RUNTIME_PM_OPS() macros

The SET_PM_RUNTIME_PM_OPS() and SET_RUNTIME_PM_OPS() macros are
identical except that one of them is not empty for CONFIG_PM set,
while the other one is not empty for CONFIG_PM_RUNTIME set,
respectively.

However, after commit b2b49ccbdd54 (PM: Kconfig: Set PM_RUNTIME if
PM_SLEEP is selected) PM_RUNTIME is always set if PM is set, so one
of these macros is now redundant.

For this reason, replace SET_PM_RUNTIME_PM_OPS() with
SET_RUNTIME_PM_OPS() everywhere and redefine the SET_PM_RUNTIME_PM_OPS
symbol as SET_RUNTIME_PM_OPS in case new code is starting to use the
macro being removed here.

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/amba/bus.c                |  2 +-
 drivers/dma/ste_dma40.c           |  2 +-
 drivers/gpio/gpio-zynq.c          |  2 +-
 drivers/i2c/busses/i2c-hix5hd2.c  |  2 +-
 drivers/i2c/busses/i2c-nomadik.c  |  2 +-
 drivers/mmc/host/mmci.c           |  2 +-
 drivers/mmc/host/sh_mobile_sdhi.c |  2 +-
 drivers/mmc/host/tmio_mmc.c       |  2 +-
 drivers/spi/spi-pl022.c           |  2 +-
 include/linux/pm.h                | 11 ++---------
 10 files changed, 11 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c
index 47bbdc1b5be3..973a3332a85f 100644
--- a/drivers/amba/bus.c
+++ b/drivers/amba/bus.c
@@ -124,7 +124,7 @@ static const struct dev_pm_ops amba_pm = {
 	.thaw		= pm_generic_thaw,
 	.poweroff	= pm_generic_poweroff,
 	.restore	= pm_generic_restore,
-	SET_PM_RUNTIME_PM_OPS(
+	SET_RUNTIME_PM_OPS(
 		amba_pm_runtime_suspend,
 		amba_pm_runtime_resume,
 		NULL
diff --git a/drivers/dma/ste_dma40.c b/drivers/dma/ste_dma40.c
index 5fe59335e247..d9ca3e32d748 100644
--- a/drivers/dma/ste_dma40.c
+++ b/drivers/dma/ste_dma40.c
@@ -3051,7 +3051,7 @@ static int dma40_runtime_resume(struct device *dev)
 
 static const struct dev_pm_ops dma40_pm_ops = {
 	SET_LATE_SYSTEM_SLEEP_PM_OPS(dma40_suspend, dma40_resume)
-	SET_PM_RUNTIME_PM_OPS(dma40_runtime_suspend,
+	SET_RUNTIME_PM_OPS(dma40_runtime_suspend,
 				dma40_runtime_resume,
 				NULL)
 };
diff --git a/drivers/gpio/gpio-zynq.c b/drivers/gpio/gpio-zynq.c
index 74cd480bf8de..184c4b1b2558 100644
--- a/drivers/gpio/gpio-zynq.c
+++ b/drivers/gpio/gpio-zynq.c
@@ -578,7 +578,7 @@ static void zynq_gpio_free(struct gpio_chip *chip, unsigned offset)
 
 static const struct dev_pm_ops zynq_gpio_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(zynq_gpio_suspend, zynq_gpio_resume)
-	SET_PM_RUNTIME_PM_OPS(zynq_gpio_runtime_suspend,
+	SET_RUNTIME_PM_OPS(zynq_gpio_runtime_suspend,
 			zynq_gpio_runtime_resume, NULL)
 };
 
diff --git a/drivers/i2c/busses/i2c-hix5hd2.c b/drivers/i2c/busses/i2c-hix5hd2.c
index 9490d0f4255c..8fe78d08e01c 100644
--- a/drivers/i2c/busses/i2c-hix5hd2.c
+++ b/drivers/i2c/busses/i2c-hix5hd2.c
@@ -528,7 +528,7 @@ static int hix5hd2_i2c_runtime_resume(struct device *dev)
 #endif
 
 static const struct dev_pm_ops hix5hd2_i2c_pm_ops = {
-	SET_PM_RUNTIME_PM_OPS(hix5hd2_i2c_runtime_suspend,
+	SET_RUNTIME_PM_OPS(hix5hd2_i2c_runtime_suspend,
 			      hix5hd2_i2c_runtime_resume,
 			      NULL)
 };
diff --git a/drivers/i2c/busses/i2c-nomadik.c b/drivers/i2c/busses/i2c-nomadik.c
index 9ad038d223c4..97998946c4f6 100644
--- a/drivers/i2c/busses/i2c-nomadik.c
+++ b/drivers/i2c/busses/i2c-nomadik.c
@@ -932,7 +932,7 @@ static int nmk_i2c_runtime_resume(struct device *dev)
 
 static const struct dev_pm_ops nmk_i2c_pm = {
 	SET_LATE_SYSTEM_SLEEP_PM_OPS(nmk_i2c_suspend_late, nmk_i2c_resume_early)
-	SET_PM_RUNTIME_PM_OPS(nmk_i2c_runtime_suspend,
+	SET_RUNTIME_PM_OPS(nmk_i2c_runtime_suspend,
 			nmk_i2c_runtime_resume,
 			NULL)
 };
diff --git a/drivers/mmc/host/mmci.c b/drivers/mmc/host/mmci.c
index 43af791e2e45..184ea59afa7e 100644
--- a/drivers/mmc/host/mmci.c
+++ b/drivers/mmc/host/mmci.c
@@ -1843,7 +1843,7 @@ static int mmci_runtime_resume(struct device *dev)
 static const struct dev_pm_ops mmci_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
 				pm_runtime_force_resume)
-	SET_PM_RUNTIME_PM_OPS(mmci_runtime_suspend, mmci_runtime_resume, NULL)
+	SET_RUNTIME_PM_OPS(mmci_runtime_suspend, mmci_runtime_resume, NULL)
 };
 
 static struct amba_id mmci_ids[] = {
diff --git a/drivers/mmc/host/sh_mobile_sdhi.c b/drivers/mmc/host/sh_mobile_sdhi.c
index a2e81a1ea6af..00c8ebdf8ec7 100644
--- a/drivers/mmc/host/sh_mobile_sdhi.c
+++ b/drivers/mmc/host/sh_mobile_sdhi.c
@@ -375,7 +375,7 @@ static int sh_mobile_sdhi_remove(struct platform_device *pdev)
 static const struct dev_pm_ops tmio_mmc_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pm_runtime_force_suspend,
 			pm_runtime_force_resume)
-	SET_PM_RUNTIME_PM_OPS(tmio_mmc_host_runtime_suspend,
+	SET_RUNTIME_PM_OPS(tmio_mmc_host_runtime_suspend,
 			tmio_mmc_host_runtime_resume,
 			NULL)
 };
diff --git a/drivers/mmc/host/tmio_mmc.c b/drivers/mmc/host/tmio_mmc.c
index 659028ddb8b1..2616fdfdbbeb 100644
--- a/drivers/mmc/host/tmio_mmc.c
+++ b/drivers/mmc/host/tmio_mmc.c
@@ -135,7 +135,7 @@ static int tmio_mmc_remove(struct platform_device *pdev)
 
 static const struct dev_pm_ops tmio_mmc_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(tmio_mmc_suspend, tmio_mmc_resume)
-	SET_PM_RUNTIME_PM_OPS(tmio_mmc_host_runtime_suspend,
+	SET_RUNTIME_PM_OPS(tmio_mmc_host_runtime_suspend,
 			tmio_mmc_host_runtime_resume,
 			NULL)
 };
diff --git a/drivers/spi/spi-pl022.c b/drivers/spi/spi-pl022.c
index fc2dd8441608..89ca162801da 100644
--- a/drivers/spi/spi-pl022.c
+++ b/drivers/spi/spi-pl022.c
@@ -2377,7 +2377,7 @@ static int pl022_runtime_resume(struct device *dev)
 
 static const struct dev_pm_ops pl022_dev_pm_ops = {
 	SET_SYSTEM_SLEEP_PM_OPS(pl022_suspend, pl022_resume)
-	SET_PM_RUNTIME_PM_OPS(pl022_runtime_suspend, pl022_runtime_resume, NULL)
+	SET_RUNTIME_PM_OPS(pl022_runtime_suspend, pl022_runtime_resume, NULL)
 };
 
 static struct vendor_data vendor_arm = {
diff --git a/include/linux/pm.h b/include/linux/pm.h
index 53b56cf5db3f..1dc338f6d4a9 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -342,7 +342,7 @@ struct dev_pm_ops {
 #define SET_LATE_SYSTEM_SLEEP_PM_OPS(suspend_fn, resume_fn)
 #endif
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
 	.runtime_suspend = suspend_fn, \
 	.runtime_resume = resume_fn, \
@@ -351,14 +351,7 @@ struct dev_pm_ops {
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
 #endif
 
-#ifdef CONFIG_PM
-#define SET_PM_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn) \
-	.runtime_suspend = suspend_fn, \
-	.runtime_resume = resume_fn, \
-	.runtime_idle = idle_fn,
-#else
-#define SET_PM_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
-#endif
+#define SET_PM_RUNTIME_PM_OPS	SET_RUNTIME_PM_OPS
 
 /*
  * Use this if you want to use the same suspend and resume callbacks for suspend
-- 
cgit v1.2.3


From ceb6c9c862c86423f41c1e20ecf8d454f837f519 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Sat, 29 Nov 2014 23:47:05 +0100
Subject: USB / PM: Drop CONFIG_PM_RUNTIME from the USB core

After commit b2b49ccbdd54 (PM: Kconfig: Set PM_RUNTIME if PM_SLEEP is
selected) PM_RUNTIME is always set if PM is set, so quite a few
depend on CONFIG_PM (or even dropped in some cases).

Replace CONFIG_PM_RUNTIME with CONFIG_PM in the USB core code
and documentation.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Alan Stern <stern@rowland.harvard.edu>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 Documentation/ABI/stable/sysfs-bus-usb  | 14 ++++++--------
 Documentation/ABI/testing/sysfs-bus-usb | 19 +++++++++----------
 Documentation/usb/power-management.txt  | 17 +++++++++--------
 drivers/usb/core/driver.c               |  6 +-----
 drivers/usb/core/hcd-pci.c              | 11 -----------
 drivers/usb/core/hcd.c                  | 12 ++++--------
 drivers/usb/core/hub.c                  |  6 +++---
 drivers/usb/core/port.c                 |  4 ++--
 drivers/usb/core/sysfs.c                | 13 ++++---------
 drivers/usb/core/usb.c                  |  4 +---
 drivers/usb/core/usb.h                  | 23 +++++++++--------------
 drivers/usb/host/ehci-pci.c             |  2 +-
 drivers/usb/host/sl811-hcd.c            |  5 ++---
 drivers/usb/host/u132-hcd.c             |  3 +--
 drivers/usb/host/xhci-hub.c             |  2 +-
 drivers/usb/host/xhci.c                 | 29 ++++++++++++-----------------
 drivers/usb/phy/phy-msm-usb.c           |  2 +-
 include/linux/usb.h                     |  2 +-
 include/linux/usb/hcd.h                 |  7 ++-----
 19 files changed, 69 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ABI/stable/sysfs-bus-usb b/Documentation/ABI/stable/sysfs-bus-usb
index e2bc700a6f9c..831f15d9672f 100644
--- a/Documentation/ABI/stable/sysfs-bus-usb
+++ b/Documentation/ABI/stable/sysfs-bus-usb
@@ -32,10 +32,9 @@ Date:		January 2008
 KernelVersion:	2.6.25
 Contact:	Sarah Sharp <sarah.a.sharp@intel.com>
 Description:
-		If CONFIG_PM_RUNTIME is enabled then this file
-		is present.  When read, it returns the total time (in msec)
-		that the USB device has been connected to the machine.  This
-		file is read-only.
+		If CONFIG_PM is enabled, then this file is present.  When read,
+		it returns the total time (in msec) that the USB device has been
+		connected to the machine.  This file is read-only.
 Users:
 		PowerTOP <powertop@lists.01.org>
 		https://01.org/powertop/
@@ -45,10 +44,9 @@ Date:		January 2008
 KernelVersion:	2.6.25
 Contact:	Sarah Sharp <sarah.a.sharp@intel.com>
 Description:
-		If CONFIG_PM_RUNTIME is enabled then this file
-		is present.  When read, it returns the total time (in msec)
-		that the USB device has been active, i.e. not in a suspended
-		state.  This file is read-only.
+		If CONFIG_PM is enabled, then this file is present.  When read,
+		it returns the total time (in msec) that the USB device has been
+		active, i.e. not in a suspended state.  This file is read-only.
 
 		Tools can use this file and the connected_duration file to
 		compute the percentage of time that a device has been active.
diff --git a/Documentation/ABI/testing/sysfs-bus-usb b/Documentation/ABI/testing/sysfs-bus-usb
index 614d451cee41..e5cc7633d013 100644
--- a/Documentation/ABI/testing/sysfs-bus-usb
+++ b/Documentation/ABI/testing/sysfs-bus-usb
@@ -104,16 +104,15 @@ What:		/sys/bus/usb/devices/.../power/usb2_hardware_lpm
 Date:		September 2011
 Contact:	Andiry Xu <andiry.xu@amd.com>
 Description:
-		If CONFIG_PM_RUNTIME is set and a USB 2.0 lpm-capable device
-		is plugged in to a xHCI host which support link PM, it will
-		perform a LPM test; if the test is passed and host supports
-		USB2 hardware LPM (xHCI 1.0 feature), USB2 hardware LPM will
-		be enabled for the device and the USB device directory will
-		contain a file named power/usb2_hardware_lpm.  The file holds
-		a string value (enable or disable) indicating whether or not
-		USB2 hardware LPM is enabled for the device. Developer can
-		write y/Y/1 or n/N/0 to the file to enable/disable the
-		feature.
+		If CONFIG_PM is set and a USB 2.0 lpm-capable device is plugged
+		in to a xHCI host which support link PM, it will perform a LPM
+		test; if the test is passed and host supports USB2 hardware LPM
+		(xHCI 1.0 feature), USB2 hardware LPM will be enabled for the
+		device and the USB device directory will contain a file named
+		power/usb2_hardware_lpm.  The file holds a string value (enable
+		or disable) indicating whether or not USB2 hardware LPM is
+		enabled for the device. Developer can write y/Y/1 or n/N/0 to
+		the file to enable/disable the feature.
 
 What:		/sys/bus/usb/devices/.../removable
 Date:		February 2012
diff --git a/Documentation/usb/power-management.txt b/Documentation/usb/power-management.txt
index 7b90fe034c4b..b5f83911732a 100644
--- a/Documentation/usb/power-management.txt
+++ b/Documentation/usb/power-management.txt
@@ -47,14 +47,15 @@ dynamic PM is implemented in the USB subsystem, although system PM is
 covered to some extent (see Documentation/power/*.txt for more
 information about system PM).
 
-Note: Dynamic PM support for USB is present only if the kernel was
-built with CONFIG_USB_SUSPEND enabled (which depends on
-CONFIG_PM_RUNTIME).  System PM support is present only if the kernel
-was built with CONFIG_SUSPEND or CONFIG_HIBERNATION enabled.
-
-(Starting with the 3.10 kernel release, dynamic PM support for USB is
-present whenever the kernel was built with CONFIG_PM_RUNTIME enabled.
-The CONFIG_USB_SUSPEND option has been eliminated.)
+System PM support is present only if the kernel was built with CONFIG_SUSPEND
+or CONFIG_HIBERNATION enabled.  Dynamic PM support for USB is present whenever
+the kernel was built with CONFIG_PM enabled.
+
+[Historically, dynamic PM support for USB was present only if the
+kernel had been built with CONFIG_USB_SUSPEND enabled (which depended on
+CONFIG_PM_RUNTIME).  Starting with the 3.10 kernel release, dynamic PM support
+for USB was present whenever the kernel was built with CONFIG_PM_RUNTIME
+enabled.  The CONFIG_USB_SUSPEND option had been eliminated.]
 
 
 	What is Remote Wakeup?
diff --git a/drivers/usb/core/driver.c b/drivers/usb/core/driver.c
index 9bffd26cea05..874dec31a111 100644
--- a/drivers/usb/core/driver.c
+++ b/drivers/usb/core/driver.c
@@ -1493,10 +1493,6 @@ int usb_resume(struct device *dev, pm_message_t msg)
 	return status;
 }
 
-#endif /* CONFIG_PM */
-
-#ifdef CONFIG_PM_RUNTIME
-
 /**
  * usb_enable_autosuspend - allow a USB device to be autosuspended
  * @udev: the USB device which may be autosuspended
@@ -1876,7 +1872,7 @@ int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable)
 	return ret;
 }
 
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 struct bus_type usb_bus_type = {
 	.name =		"usb",
diff --git a/drivers/usb/core/hcd-pci.c b/drivers/usb/core/hcd-pci.c
index efc953119ce2..9eb1cff28bd4 100644
--- a/drivers/usb/core/hcd-pci.c
+++ b/drivers/usb/core/hcd-pci.c
@@ -429,7 +429,6 @@ static int check_root_hub_suspended(struct device *dev)
 	return 0;
 }
 
-#if defined(CONFIG_PM_SLEEP) || defined(CONFIG_PM_RUNTIME)
 static int suspend_common(struct device *dev, bool do_wakeup)
 {
 	struct pci_dev		*pci_dev = to_pci_dev(dev);
@@ -528,7 +527,6 @@ static int resume_common(struct device *dev, int event)
 	}
 	return retval;
 }
-#endif	/* SLEEP || RUNTIME */
 
 #ifdef	CONFIG_PM_SLEEP
 
@@ -607,8 +605,6 @@ static int hcd_pci_restore(struct device *dev)
 
 #endif	/* CONFIG_PM_SLEEP */
 
-#ifdef	CONFIG_PM_RUNTIME
-
 static int hcd_pci_runtime_suspend(struct device *dev)
 {
 	int	retval;
@@ -630,13 +626,6 @@ static int hcd_pci_runtime_resume(struct device *dev)
 	return retval;
 }
 
-#else
-
-#define hcd_pci_runtime_suspend	NULL
-#define hcd_pci_runtime_resume	NULL
-
-#endif	/* CONFIG_PM_RUNTIME */
-
 const struct dev_pm_ops usb_hcd_pci_pm_ops = {
 	.suspend	= hcd_pci_suspend,
 	.suspend_noirq	= hcd_pci_suspend_noirq,
diff --git a/drivers/usb/core/hcd.c b/drivers/usb/core/hcd.c
index a6efb4184f2b..278be0515e8e 100644
--- a/drivers/usb/core/hcd.c
+++ b/drivers/usb/core/hcd.c
@@ -2258,10 +2258,6 @@ int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg)
 	return status;
 }
 
-#endif	/* CONFIG_PM */
-
-#ifdef	CONFIG_PM_RUNTIME
-
 /* Workqueue routine for root-hub remote wakeup */
 static void hcd_resume_work(struct work_struct *work)
 {
@@ -2293,7 +2289,7 @@ void usb_hcd_resume_root_hub (struct usb_hcd *hcd)
 }
 EXPORT_SYMBOL_GPL(usb_hcd_resume_root_hub);
 
-#endif	/* CONFIG_PM_RUNTIME */
+#endif	/* CONFIG_PM */
 
 /*-------------------------------------------------------------------------*/
 
@@ -2476,7 +2472,7 @@ struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver,
 	init_timer(&hcd->rh_timer);
 	hcd->rh_timer.function = rh_timer_func;
 	hcd->rh_timer.data = (unsigned long) hcd;
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	INIT_WORK(&hcd->wakeup_work, hcd_resume_work);
 #endif
 
@@ -2790,7 +2786,7 @@ error_create_attr_group:
 	hcd->rh_registered = 0;
 	spin_unlock_irq(&hcd_root_hub_lock);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	cancel_work_sync(&hcd->wakeup_work);
 #endif
 	mutex_lock(&usb_bus_list_lock);
@@ -2858,7 +2854,7 @@ void usb_remove_hcd(struct usb_hcd *hcd)
 	hcd->rh_registered = 0;
 	spin_unlock_irq (&hcd_root_hub_lock);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	cancel_work_sync(&hcd->wakeup_work);
 #endif
 
diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c
index b649fef2e35d..c9596525ba8c 100644
--- a/drivers/usb/core/hub.c
+++ b/drivers/usb/core/hub.c
@@ -1737,7 +1737,7 @@ static int hub_probe(struct usb_interface *intf, const struct usb_device_id *id)
 	 * - If user has indicated to prevent autosuspend by passing
 	 *   usbcore.autosuspend = -1 then keep autosuspend disabled.
 	 */
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	if (hdev->dev.power.autosuspend_delay >= 0)
 		pm_runtime_set_autosuspend_delay(&hdev->dev, 0);
 #endif
@@ -3449,7 +3449,7 @@ int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 	return status;
 }
 
-#ifdef	CONFIG_PM_RUNTIME
+#ifdef	CONFIG_PM
 
 int usb_remote_wakeup(struct usb_device *udev)
 {
@@ -4856,7 +4856,7 @@ static void hub_port_connect_change(struct usb_hub *hub, int port1,
 			udev->state != USB_STATE_NOTATTACHED) {
 		if (portstatus & USB_PORT_STAT_ENABLE) {
 			status = 0;		/* Nothing to do */
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 		} else if (udev->state == USB_STATE_SUSPENDED &&
 				udev->persist_enabled) {
 			/* For a suspended device, treat this as a
diff --git a/drivers/usb/core/port.c b/drivers/usb/core/port.c
index cd3f9dc24a06..210618319f10 100644
--- a/drivers/usb/core/port.c
+++ b/drivers/usb/core/port.c
@@ -72,7 +72,7 @@ static void usb_port_device_release(struct device *dev)
 	kfree(port_dev);
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int usb_port_runtime_resume(struct device *dev)
 {
 	struct usb_port *port_dev = to_usb_port(dev);
@@ -171,7 +171,7 @@ static int usb_port_runtime_suspend(struct device *dev)
 #endif
 
 static const struct dev_pm_ops usb_port_pm_ops = {
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	.runtime_suspend =	usb_port_runtime_suspend,
 	.runtime_resume =	usb_port_runtime_resume,
 #endif
diff --git a/drivers/usb/core/sysfs.c b/drivers/usb/core/sysfs.c
index 1236c6011c70..d26973844a4d 100644
--- a/drivers/usb/core/sysfs.c
+++ b/drivers/usb/core/sysfs.c
@@ -334,14 +334,6 @@ static void remove_persist_attributes(struct device *dev)
 			&dev_attr_persist.attr,
 			power_group_name);
 }
-#else
-
-#define add_persist_attributes(dev)	0
-#define remove_persist_attributes(dev)	do {} while (0)
-
-#endif	/* CONFIG_PM */
-
-#ifdef	CONFIG_PM_RUNTIME
 
 static ssize_t connected_duration_show(struct device *dev,
 				       struct device_attribute *attr, char *buf)
@@ -585,10 +577,13 @@ static void remove_power_attributes(struct device *dev)
 
 #else
 
+#define add_persist_attributes(dev)	0
+#define remove_persist_attributes(dev)	do {} while (0)
+
 #define add_power_attributes(dev)	0
 #define remove_power_attributes(dev)	do {} while (0)
 
-#endif	/* CONFIG_PM_RUNTIME */
+#endif	/* CONFIG_PM */
 
 
 /* Descriptor fields */
diff --git a/drivers/usb/core/usb.c b/drivers/usb/core/usb.c
index 2dd2362198d2..2a92b97f0144 100644
--- a/drivers/usb/core/usb.c
+++ b/drivers/usb/core/usb.c
@@ -49,7 +49,7 @@ const char *usbcore_name = "usbcore";
 
 static bool nousb;	/* Disable USB when built into kernel image */
 
-#ifdef	CONFIG_PM_RUNTIME
+#ifdef	CONFIG_PM
 static int usb_autosuspend_delay = 2;		/* Default delay value,
 						 * in seconds */
 module_param_named(autosuspend, usb_autosuspend_delay, int, 0644);
@@ -348,11 +348,9 @@ static const struct dev_pm_ops usb_device_pm_ops = {
 	.thaw =		usb_dev_thaw,
 	.poweroff =	usb_dev_poweroff,
 	.restore =	usb_dev_restore,
-#ifdef CONFIG_PM_RUNTIME
 	.runtime_suspend =	usb_runtime_suspend,
 	.runtime_resume =	usb_runtime_resume,
 	.runtime_idle =		usb_runtime_idle,
-#endif
 };
 
 #endif	/* CONFIG_PM */
diff --git a/drivers/usb/core/usb.h b/drivers/usb/core/usb.h
index b1b34d0557c9..7eb1e26798e5 100644
--- a/drivers/usb/core/usb.h
+++ b/drivers/usb/core/usb.h
@@ -75,6 +75,14 @@ extern int usb_resume_complete(struct device *dev);
 extern int usb_port_suspend(struct usb_device *dev, pm_message_t msg);
 extern int usb_port_resume(struct usb_device *dev, pm_message_t msg);
 
+extern void usb_autosuspend_device(struct usb_device *udev);
+extern int usb_autoresume_device(struct usb_device *udev);
+extern int usb_remote_wakeup(struct usb_device *dev);
+extern int usb_runtime_suspend(struct device *dev);
+extern int usb_runtime_resume(struct device *dev);
+extern int usb_runtime_idle(struct device *dev);
+extern int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable);
+
 #else
 
 static inline int usb_port_suspend(struct usb_device *udev, pm_message_t msg)
@@ -87,20 +95,6 @@ static inline int usb_port_resume(struct usb_device *udev, pm_message_t msg)
 	return 0;
 }
 
-#endif
-
-#ifdef CONFIG_PM_RUNTIME
-
-extern void usb_autosuspend_device(struct usb_device *udev);
-extern int usb_autoresume_device(struct usb_device *udev);
-extern int usb_remote_wakeup(struct usb_device *dev);
-extern int usb_runtime_suspend(struct device *dev);
-extern int usb_runtime_resume(struct device *dev);
-extern int usb_runtime_idle(struct device *dev);
-extern int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable);
-
-#else
-
 #define usb_autosuspend_device(udev)		do {} while (0)
 static inline int usb_autoresume_device(struct usb_device *udev)
 {
@@ -111,6 +105,7 @@ static inline int usb_set_usb2_hardware_lpm(struct usb_device *udev, int enable)
 {
 	return 0;
 }
+
 #endif
 
 extern struct bus_type usb_bus_type;
diff --git a/drivers/usb/host/ehci-pci.c b/drivers/usb/host/ehci-pci.c
index ca7b964124af..851006a0d97b 100644
--- a/drivers/usb/host/ehci-pci.c
+++ b/drivers/usb/host/ehci-pci.c
@@ -305,7 +305,7 @@ static int ehci_pci_setup(struct usb_hcd *hcd)
 		}
 	}
 
-#ifdef	CONFIG_PM_RUNTIME
+#ifdef	CONFIG_PM
 	if (ehci->no_selective_suspend && device_can_wakeup(&pdev->dev))
 		ehci_warn(ehci, "selective suspend/wakeup unavailable\n");
 #endif
diff --git a/drivers/usb/host/sl811-hcd.c b/drivers/usb/host/sl811-hcd.c
index ad0c348e68e9..25fb1da8d3d7 100644
--- a/drivers/usb/host/sl811-hcd.c
+++ b/drivers/usb/host/sl811-hcd.c
@@ -22,7 +22,7 @@
  * and usb-storage.
  *
  * TODO:
- * - usb suspend/resume triggered by sl811 (with PM_RUNTIME)
+ * - usb suspend/resume triggered by sl811
  * - various issues noted in the code
  * - performance work; use both register banks; ...
  * - use urb->iso_frame_desc[] with ISO transfers
@@ -1752,8 +1752,7 @@ sl811h_probe(struct platform_device *dev)
 #ifdef	CONFIG_PM
 
 /* for this device there's no useful distinction between the controller
- * and its root hub, except that the root hub only gets direct PM calls
- * when CONFIG_PM_RUNTIME is enabled.
+ * and its root hub.
  */
 
 static int
diff --git a/drivers/usb/host/u132-hcd.c b/drivers/usb/host/u132-hcd.c
index c0671750671f..bf86630b3cea 100644
--- a/drivers/usb/host/u132-hcd.c
+++ b/drivers/usb/host/u132-hcd.c
@@ -3144,8 +3144,7 @@ static int u132_probe(struct platform_device *pdev)
 #ifdef CONFIG_PM
 /*
  * for this device there's no useful distinction between the controller
- * and its root hub, except that the root hub only gets direct PM calls
- * when CONFIG_PM_RUNTIME is enabled.
+ * and its root hub.
  */
 static int u132_suspend(struct platform_device *pdev, pm_message_t state)
 {
diff --git a/drivers/usb/host/xhci-hub.c b/drivers/usb/host/xhci-hub.c
index 696160d48ae8..f674abd76865 100644
--- a/drivers/usb/host/xhci-hub.c
+++ b/drivers/usb/host/xhci-hub.c
@@ -1146,7 +1146,7 @@ int xhci_bus_suspend(struct usb_hcd *hcd)
 			set_bit(port_index, &bus_state->bus_suspended);
 		}
 		/* USB core sets remote wake mask for USB 3.0 hubs,
-		 * including the USB 3.0 roothub, but only if CONFIG_PM_RUNTIME
+		 * including the USB 3.0 roothub, but only if CONFIG_PM
 		 * is enabled, so also enable remote wake here.
 		 */
 		if (hcd->self.root_hub->do_remote_wakeup
diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c
index 2a5d45b4cb15..61173ca9cb8f 100644
--- a/drivers/usb/host/xhci.c
+++ b/drivers/usb/host/xhci.c
@@ -4024,7 +4024,7 @@ static int __maybe_unused xhci_change_max_exit_latency(struct xhci_hcd *xhci,
 	return ret;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 
 /* BESL to HIRD Encoding array for USB2 LPM */
 static int xhci_besl_encoding[16] = {125, 150, 200, 300, 400, 500, 1000, 2000,
@@ -4239,24 +4239,8 @@ int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev)
 	return 0;
 }
 
-#else
-
-int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd,
-				struct usb_device *udev, int enable)
-{
-	return 0;
-}
-
-int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev)
-{
-	return 0;
-}
-
-#endif /* CONFIG_PM_RUNTIME */
-
 /*---------------------- USB 3.0 Link PM functions ------------------------*/
 
-#ifdef CONFIG_PM
 /* Service interval in nanoseconds = 2^(bInterval - 1) * 125us * 1000ns / 1us */
 static unsigned long long xhci_service_interval_to_ns(
 		struct usb_endpoint_descriptor *desc)
@@ -4687,6 +4671,17 @@ int xhci_disable_usb3_lpm_timeout(struct usb_hcd *hcd,
 }
 #else /* CONFIG_PM */
 
+int xhci_set_usb2_hardware_lpm(struct usb_hcd *hcd,
+				struct usb_device *udev, int enable)
+{
+	return 0;
+}
+
+int xhci_update_device(struct usb_hcd *hcd, struct usb_device *udev)
+{
+	return 0;
+}
+
 int xhci_enable_usb3_lpm_timeout(struct usb_hcd *hcd,
 			struct usb_device *udev, enum usb3_link_state state)
 {
diff --git a/drivers/usb/phy/phy-msm-usb.c b/drivers/usb/phy/phy-msm-usb.c
index 7843ef7dd0ff..29be0e654ecc 100644
--- a/drivers/usb/phy/phy-msm-usb.c
+++ b/drivers/usb/phy/phy-msm-usb.c
@@ -1761,7 +1761,7 @@ static int msm_otg_remove(struct platform_device *pdev)
 	return 0;
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static int msm_otg_runtime_idle(struct device *dev)
 {
 	struct msm_otg *motg = dev_get_drvdata(dev);
diff --git a/include/linux/usb.h b/include/linux/usb.h
index 447a7e2fc19b..f89c24a03bd9 100644
--- a/include/linux/usb.h
+++ b/include/linux/usb.h
@@ -637,7 +637,7 @@ static inline bool usb_acpi_power_manageable(struct usb_device *hdev, int index)
 #endif
 
 /* USB autosuspend and autoresume */
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 extern void usb_enable_autosuspend(struct usb_device *udev);
 extern void usb_disable_autosuspend(struct usb_device *udev);
 
diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h
index cd96a2bc3388..668898e29d0e 100644
--- a/include/linux/usb/hcd.h
+++ b/include/linux/usb/hcd.h
@@ -93,7 +93,7 @@ struct usb_hcd {
 
 	struct timer_list	rh_timer;	/* drives root-hub polling */
 	struct urb		*status_urb;	/* the current status urb */
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	struct work_struct	wakeup_work;	/* for remote wakeup */
 #endif
 
@@ -625,16 +625,13 @@ extern int usb_find_interface_driver(struct usb_device *dev,
 extern void usb_root_hub_lost_power(struct usb_device *rhdev);
 extern int hcd_bus_suspend(struct usb_device *rhdev, pm_message_t msg);
 extern int hcd_bus_resume(struct usb_device *rhdev, pm_message_t msg);
-#endif /* CONFIG_PM */
-
-#ifdef CONFIG_PM_RUNTIME
 extern void usb_hcd_resume_root_hub(struct usb_hcd *hcd);
 #else
 static inline void usb_hcd_resume_root_hub(struct usb_hcd *hcd)
 {
 	return;
 }
-#endif /* CONFIG_PM_RUNTIME */
+#endif /* CONFIG_PM */
 
 /*-------------------------------------------------------------------------*/
 
-- 
cgit v1.2.3


From 47fafbc701fecf112cfa580f61db229b9f68aba4 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Thu, 4 Dec 2014 01:00:23 +0100
Subject: block / PM: Replace CONFIG_PM_RUNTIME with CONFIG_PM

After commit b2b49ccbdd54 (PM: Kconfig: Set PM_RUNTIME if PM_SLEEP is
selected) PM_RUNTIME is always set if PM is set, so #ifdef blocks
depending on CONFIG_PM_RUNTIME may now be changed to depend on
CONFIG_PM.

Replace CONFIG_PM_RUNTIME with CONFIG_PM in the block device core.

Reviewed-by: Aaron Lu <aaron.lu@intel.com>
Acked-by: Jens Axboe <axboe@kernel.dk>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 block/blk-core.c       | 6 +++---
 block/elevator.c       | 2 +-
 include/linux/blkdev.h | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/block/blk-core.c b/block/blk-core.c
index 0421b53e6431..2bb7d9c0f63e 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1325,7 +1325,7 @@ void part_round_stats(int cpu, struct hd_struct *part)
 }
 EXPORT_SYMBOL_GPL(part_round_stats);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static void blk_pm_put_request(struct request *rq)
 {
 	if (rq->q->dev && !(rq->cmd_flags & REQ_PM) && !--rq->q->nr_pending)
@@ -2134,7 +2134,7 @@ void blk_account_io_done(struct request *req)
 	}
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 /*
  * Don't process normal requests when queue is suspended
  * or in the process of suspending/resuming
@@ -3159,7 +3159,7 @@ void blk_finish_plug(struct blk_plug *plug)
 }
 EXPORT_SYMBOL(blk_finish_plug);
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 /**
  * blk_pm_runtime_init - Block layer runtime PM initialization routine
  * @q: the queue of the device
diff --git a/block/elevator.c b/block/elevator.c
index afa3b037a17c..59794d0d38e3 100644
--- a/block/elevator.c
+++ b/block/elevator.c
@@ -539,7 +539,7 @@ void elv_bio_merged(struct request_queue *q, struct request *rq,
 		e->type->ops.elevator_bio_merged_fn(q, rq, bio);
 }
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 static void blk_pm_requeue_request(struct request *rq)
 {
 	if (rq->q->dev && !(rq->cmd_flags & REQ_PM))
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index aac0f9ea952a..534dc402c54f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -398,7 +398,7 @@ struct request_queue {
 	 */
 	struct kobject mq_kobj;
 
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 	struct device		*dev;
 	int			rpm_status;
 	unsigned int		nr_pending;
@@ -1057,7 +1057,7 @@ extern void blk_put_queue(struct request_queue *);
 /*
  * block layer runtime pm functions
  */
-#ifdef CONFIG_PM_RUNTIME
+#ifdef CONFIG_PM
 extern void blk_pm_runtime_init(struct request_queue *q, struct device *dev);
 extern int blk_pre_runtime_suspend(struct request_queue *q);
 extern void blk_post_runtime_suspend(struct request_queue *q, int err);
-- 
cgit v1.2.3


From 646cafc6aa4d6004d189de1cdc267ab562069ba9 Mon Sep 17 00:00:00 2001
From: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Date: Tue, 2 Dec 2014 08:54:22 +0100
Subject: clk: Change clk_ops->determine_rate to return a clk_hw as the best
 parent

This is in preparation for clock providers to not have to deal with struct clk.

Signed-off-by: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Michael Turquette <mturquette@linaro.org>
---
 Documentation/clk.txt               |  2 +-
 arch/mips/alchemy/common/clock.c    | 10 +++++-----
 drivers/clk/at91/clk-programmable.c |  4 ++--
 drivers/clk/bcm/clk-kona.c          |  4 ++--
 drivers/clk/clk-composite.c         |  9 +++++----
 drivers/clk/clk.c                   | 17 +++++++++++------
 drivers/clk/hisilicon/clk-hi3620.c  |  2 +-
 drivers/clk/mmp/clk-mix.c           |  4 ++--
 drivers/clk/qcom/clk-pll.c          |  2 +-
 drivers/clk/qcom/clk-rcg.c          | 20 ++++++++++++--------
 drivers/clk/qcom/clk-rcg2.c         | 28 +++++++++++++++++-----------
 drivers/clk/sunxi/clk-factors.c     |  4 ++--
 drivers/clk/sunxi/clk-sun6i-ar100.c |  4 ++--
 include/linux/clk-provider.h        |  4 ++--
 14 files changed, 65 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/clk.txt b/Documentation/clk.txt
index 1fee72f4d331..4ff84623d5e1 100644
--- a/Documentation/clk.txt
+++ b/Documentation/clk.txt
@@ -74,7 +74,7 @@ the operations defined in clk.h:
 		long		(*determine_rate)(struct clk_hw *hw,
 						unsigned long rate,
 						unsigned long *best_parent_rate,
-						struct clk **best_parent_clk);
+						struct clk_hw **best_parent_clk);
 		int		(*set_parent)(struct clk_hw *hw, u8 index);
 		u8		(*get_parent)(struct clk_hw *hw);
 		int		(*set_rate)(struct clk_hw *hw,
diff --git a/arch/mips/alchemy/common/clock.c b/arch/mips/alchemy/common/clock.c
index d7557cde271a..f42dd0a4af20 100644
--- a/arch/mips/alchemy/common/clock.c
+++ b/arch/mips/alchemy/common/clock.c
@@ -375,7 +375,7 @@ static long alchemy_calc_div(unsigned long rate, unsigned long prate,
 
 static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk,
+					struct clk_hw **best_parent_clk,
 					int scale, int maxdiv)
 {
 	struct clk *pc, *bpc, *free;
@@ -454,7 +454,7 @@ static long alchemy_clk_fgcs_detr(struct clk_hw *hw, unsigned long rate,
 	}
 
 	*best_parent_rate = bpr;
-	*best_parent_clk = bpc;
+	*best_parent_clk = __clk_get_hw(bpc);
 	return br;
 }
 
@@ -548,7 +548,7 @@ static unsigned long alchemy_clk_fgv1_recalc(struct clk_hw *hw,
 
 static long alchemy_clk_fgv1_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	return alchemy_clk_fgcs_detr(hw, rate, best_parent_rate,
 				     best_parent_clk, 2, 512);
@@ -680,7 +680,7 @@ static unsigned long alchemy_clk_fgv2_recalc(struct clk_hw *hw,
 
 static long alchemy_clk_fgv2_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct alchemy_fgcs_clk *c = to_fgcs_clk(hw);
 	int scale, maxdiv;
@@ -899,7 +899,7 @@ static int alchemy_clk_csrc_setr(struct clk_hw *hw, unsigned long rate,
 
 static long alchemy_clk_csrc_detr(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct alchemy_fgcs_clk *c = to_fgcs_clk(hw);
 	int scale = c->dt[2] == 3 ? 1 : 2; /* au1300 check */
diff --git a/drivers/clk/at91/clk-programmable.c b/drivers/clk/at91/clk-programmable.c
index 62e2509f9df1..bbdb1b985c91 100644
--- a/drivers/clk/at91/clk-programmable.c
+++ b/drivers/clk/at91/clk-programmable.c
@@ -57,7 +57,7 @@ static unsigned long clk_programmable_recalc_rate(struct clk_hw *hw,
 static long clk_programmable_determine_rate(struct clk_hw *hw,
 					    unsigned long rate,
 					    unsigned long *best_parent_rate,
-					    struct clk **best_parent_clk)
+					    struct clk_hw **best_parent_hw)
 {
 	struct clk *parent = NULL;
 	long best_rate = -EINVAL;
@@ -84,7 +84,7 @@ static long clk_programmable_determine_rate(struct clk_hw *hw,
 		if (best_rate < 0 || (rate - tmp_rate) < (rate - best_rate)) {
 			best_rate = tmp_rate;
 			*best_parent_rate = parent_rate;
-			*best_parent_clk = parent;
+			*best_parent_hw = __clk_get_hw(parent);
 		}
 
 		if (!best_rate)
diff --git a/drivers/clk/bcm/clk-kona.c b/drivers/clk/bcm/clk-kona.c
index 95af2e665dd3..1c06f6f3a8c5 100644
--- a/drivers/clk/bcm/clk-kona.c
+++ b/drivers/clk/bcm/clk-kona.c
@@ -1032,7 +1032,7 @@ static long kona_peri_clk_round_rate(struct clk_hw *hw, unsigned long rate,
 }
 
 static long kona_peri_clk_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *best_parent_rate, struct clk **best_parent)
+		unsigned long *best_parent_rate, struct clk_hw **best_parent)
 {
 	struct kona_clk *bcm_clk = to_kona_clk(hw);
 	struct clk *clk = hw->clk;
@@ -1075,7 +1075,7 @@ static long kona_peri_clk_determine_rate(struct clk_hw *hw, unsigned long rate,
 		if (delta < best_delta) {
 			best_delta = delta;
 			best_rate = other_rate;
-			*best_parent = parent;
+			*best_parent = __clk_get_hw(parent);
 			*best_parent_rate = parent_rate;
 		}
 	}
diff --git a/drivers/clk/clk-composite.c b/drivers/clk/clk-composite.c
index b9355daf8065..4386697236a7 100644
--- a/drivers/clk/clk-composite.c
+++ b/drivers/clk/clk-composite.c
@@ -57,7 +57,7 @@ static unsigned long clk_composite_recalc_rate(struct clk_hw *hw,
 
 static long clk_composite_determine_rate(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_p)
+					struct clk_hw **best_parent_p)
 {
 	struct clk_composite *composite = to_clk_composite(hw);
 	const struct clk_ops *rate_ops = composite->rate_ops;
@@ -80,8 +80,9 @@ static long clk_composite_determine_rate(struct clk_hw *hw, unsigned long rate,
 		*best_parent_p = NULL;
 
 		if (__clk_get_flags(hw->clk) & CLK_SET_RATE_NO_REPARENT) {
-			*best_parent_p = clk_get_parent(mux_hw->clk);
-			*best_parent_rate = __clk_get_rate(*best_parent_p);
+			parent = clk_get_parent(mux_hw->clk);
+			*best_parent_p = __clk_get_hw(parent);
+			*best_parent_rate = __clk_get_rate(parent);
 
 			return rate_ops->round_rate(rate_hw, rate,
 						    best_parent_rate);
@@ -103,7 +104,7 @@ static long clk_composite_determine_rate(struct clk_hw *hw, unsigned long rate,
 
 			if (!rate_diff || !*best_parent_p
 				       || best_rate_diff > rate_diff) {
-				*best_parent_p = parent;
+				*best_parent_p = __clk_get_hw(parent);
 				*best_parent_rate = parent_rate;
 				best_rate_diff = rate_diff;
 				best_rate = tmp_rate;
diff --git a/drivers/clk/clk.c b/drivers/clk/clk.c
index f549e8b1d5ed..44cdc47a6cc5 100644
--- a/drivers/clk/clk.c
+++ b/drivers/clk/clk.c
@@ -702,7 +702,7 @@ struct clk *__clk_lookup(const char *name)
  */
 long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate,
 			      unsigned long *best_parent_rate,
-			      struct clk **best_parent_p)
+			      struct clk_hw **best_parent_p)
 {
 	struct clk *clk = hw->clk, *parent, *best_parent = NULL;
 	int i, num_parents;
@@ -738,7 +738,7 @@ long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate,
 
 out:
 	if (best_parent)
-		*best_parent_p = best_parent;
+		*best_parent_p = best_parent->hw;
 	*best_parent_rate = best;
 
 	return best;
@@ -946,6 +946,7 @@ unsigned long __clk_round_rate(struct clk *clk, unsigned long rate)
 {
 	unsigned long parent_rate = 0;
 	struct clk *parent;
+	struct clk_hw *parent_hw;
 
 	if (!clk)
 		return 0;
@@ -954,10 +955,11 @@ unsigned long __clk_round_rate(struct clk *clk, unsigned long rate)
 	if (parent)
 		parent_rate = parent->rate;
 
-	if (clk->ops->determine_rate)
+	if (clk->ops->determine_rate) {
+		parent_hw = parent ? parent->hw : NULL;
 		return clk->ops->determine_rate(clk->hw, rate, &parent_rate,
-						&parent);
-	else if (clk->ops->round_rate)
+						&parent_hw);
+	} else if (clk->ops->round_rate)
 		return clk->ops->round_rate(clk->hw, rate, &parent_rate);
 	else if (clk->flags & CLK_SET_RATE_PARENT)
 		return __clk_round_rate(clk->parent, rate);
@@ -1345,6 +1347,7 @@ static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate)
 {
 	struct clk *top = clk;
 	struct clk *old_parent, *parent;
+	struct clk_hw *parent_hw;
 	unsigned long best_parent_rate = 0;
 	unsigned long new_rate;
 	int p_index = 0;
@@ -1360,9 +1363,11 @@ static struct clk *clk_calc_new_rates(struct clk *clk, unsigned long rate)
 
 	/* find the closest rate and parent clk/rate */
 	if (clk->ops->determine_rate) {
+		parent_hw = parent ? parent->hw : NULL;
 		new_rate = clk->ops->determine_rate(clk->hw, rate,
 						    &best_parent_rate,
-						    &parent);
+						    &parent_hw);
+		parent = parent_hw->clk;
 	} else if (clk->ops->round_rate) {
 		new_rate = clk->ops->round_rate(clk->hw, rate,
 						&best_parent_rate);
diff --git a/drivers/clk/hisilicon/clk-hi3620.c b/drivers/clk/hisilicon/clk-hi3620.c
index 640ea3327c3e..007144f81f50 100644
--- a/drivers/clk/hisilicon/clk-hi3620.c
+++ b/drivers/clk/hisilicon/clk-hi3620.c
@@ -296,7 +296,7 @@ static unsigned long mmc_clk_recalc_rate(struct clk_hw *hw,
 
 static long mmc_clk_determine_rate(struct clk_hw *hw, unsigned long rate,
 			      unsigned long *best_parent_rate,
-			      struct clk **best_parent_p)
+			      struct clk_hw **best_parent_p)
 {
 	struct clk_mmc *mclk = to_mmc(hw);
 	unsigned long best = 0;
diff --git a/drivers/clk/mmp/clk-mix.c b/drivers/clk/mmp/clk-mix.c
index b79742c47d53..48fa53c7ce5e 100644
--- a/drivers/clk/mmp/clk-mix.c
+++ b/drivers/clk/mmp/clk-mix.c
@@ -203,7 +203,7 @@ error:
 
 static long mmp_clk_mix_determine_rate(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct mmp_clk_mix *mix = to_clk_mix(hw);
 	struct mmp_clk_mix_clk_table *item;
@@ -264,7 +264,7 @@ static long mmp_clk_mix_determine_rate(struct clk_hw *hw, unsigned long rate,
 
 found:
 	*best_parent_rate = parent_rate_best;
-	*best_parent_clk = parent_best;
+	*best_parent_clk = __clk_get_hw(parent_best);
 
 	return mix_rate_best;
 }
diff --git a/drivers/clk/qcom/clk-pll.c b/drivers/clk/qcom/clk-pll.c
index b823bc3b6250..60873a7f45d9 100644
--- a/drivers/clk/qcom/clk-pll.c
+++ b/drivers/clk/qcom/clk-pll.c
@@ -141,7 +141,7 @@ struct pll_freq_tbl *find_freq(const struct pll_freq_tbl *f, unsigned long rate)
 
 static long
 clk_pll_determine_rate(struct clk_hw *hw, unsigned long rate,
-		       unsigned long *p_rate, struct clk **p)
+		       unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_pll *pll = to_clk_pll(hw);
 	const struct pll_freq_tbl *f;
diff --git a/drivers/clk/qcom/clk-rcg.c b/drivers/clk/qcom/clk-rcg.c
index b6e6959e89aa..0b93972c8807 100644
--- a/drivers/clk/qcom/clk-rcg.c
+++ b/drivers/clk/qcom/clk-rcg.c
@@ -368,16 +368,17 @@ clk_dyn_rcg_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 
 static long _freq_tbl_determine_rate(struct clk_hw *hw,
 		const struct freq_tbl *f, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p_hw)
 {
 	unsigned long clk_flags;
+	struct clk *p;
 
 	f = qcom_find_freq(f, rate);
 	if (!f)
 		return -EINVAL;
 
 	clk_flags = __clk_get_flags(hw->clk);
-	*p = clk_get_parent_by_index(hw->clk, f->src);
+	p = clk_get_parent_by_index(hw->clk, f->src);
 	if (clk_flags & CLK_SET_RATE_PARENT) {
 		rate = rate * f->pre_div;
 		if (f->n) {
@@ -387,15 +388,16 @@ static long _freq_tbl_determine_rate(struct clk_hw *hw,
 			rate = tmp;
 		}
 	} else {
-		rate =  __clk_get_rate(*p);
+		rate =  __clk_get_rate(p);
 	}
+	*p_hw = __clk_get_hw(p);
 	*p_rate = rate;
 
 	return f->freq;
 }
 
 static long clk_rcg_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_rcg *rcg = to_clk_rcg(hw);
 
@@ -403,7 +405,7 @@ static long clk_rcg_determine_rate(struct clk_hw *hw, unsigned long rate,
 }
 
 static long clk_dyn_rcg_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_dyn_rcg *rcg = to_clk_dyn_rcg(hw);
 
@@ -411,13 +413,15 @@ static long clk_dyn_rcg_determine_rate(struct clk_hw *hw, unsigned long rate,
 }
 
 static long clk_rcg_bypass_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p_hw)
 {
 	struct clk_rcg *rcg = to_clk_rcg(hw);
 	const struct freq_tbl *f = rcg->freq_tbl;
+	struct clk *p;
 
-	*p = clk_get_parent_by_index(hw->clk, f->src);
-	*p_rate = __clk_round_rate(*p, rate);
+	p = clk_get_parent_by_index(hw->clk, f->src);
+	*p_hw = __clk_get_hw(p);
+	*p_rate = __clk_round_rate(p, rate);
 
 	return *p_rate;
 }
diff --git a/drivers/clk/qcom/clk-rcg2.c b/drivers/clk/qcom/clk-rcg2.c
index cfa9eb4fe9ca..08b8b3729f53 100644
--- a/drivers/clk/qcom/clk-rcg2.c
+++ b/drivers/clk/qcom/clk-rcg2.c
@@ -175,16 +175,17 @@ clk_rcg2_recalc_rate(struct clk_hw *hw, unsigned long parent_rate)
 
 static long _freq_tbl_determine_rate(struct clk_hw *hw,
 		const struct freq_tbl *f, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p_hw)
 {
 	unsigned long clk_flags;
+	struct clk *p;
 
 	f = qcom_find_freq(f, rate);
 	if (!f)
 		return -EINVAL;
 
 	clk_flags = __clk_get_flags(hw->clk);
-	*p = clk_get_parent_by_index(hw->clk, f->src);
+	p = clk_get_parent_by_index(hw->clk, f->src);
 	if (clk_flags & CLK_SET_RATE_PARENT) {
 		if (f->pre_div) {
 			rate /= 2;
@@ -198,15 +199,16 @@ static long _freq_tbl_determine_rate(struct clk_hw *hw,
 			rate = tmp;
 		}
 	} else {
-		rate =  __clk_get_rate(*p);
+		rate =  __clk_get_rate(p);
 	}
+	*p_hw = __clk_get_hw(p);
 	*p_rate = rate;
 
 	return f->freq;
 }
 
 static long clk_rcg2_determine_rate(struct clk_hw *hw, unsigned long rate,
-		unsigned long *p_rate, struct clk **p)
+		unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 
@@ -359,7 +361,7 @@ static int clk_edp_pixel_set_rate_and_parent(struct clk_hw *hw,
 }
 
 static long clk_edp_pixel_determine_rate(struct clk_hw *hw, unsigned long rate,
-				 unsigned long *p_rate, struct clk **p)
+				 unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 	const struct freq_tbl *f = rcg->freq_tbl;
@@ -371,7 +373,7 @@ static long clk_edp_pixel_determine_rate(struct clk_hw *hw, unsigned long rate,
 	u32 hid_div;
 
 	/* Force the correct parent */
-	*p = clk_get_parent_by_index(hw->clk, f->src);
+	*p = __clk_get_hw(clk_get_parent_by_index(hw->clk, f->src));
 
 	if (src_rate == 810000000)
 		frac = frac_table_810m;
@@ -410,18 +412,20 @@ const struct clk_ops clk_edp_pixel_ops = {
 EXPORT_SYMBOL_GPL(clk_edp_pixel_ops);
 
 static long clk_byte_determine_rate(struct clk_hw *hw, unsigned long rate,
-			 unsigned long *p_rate, struct clk **p)
+			 unsigned long *p_rate, struct clk_hw **p_hw)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 	const struct freq_tbl *f = rcg->freq_tbl;
 	unsigned long parent_rate, div;
 	u32 mask = BIT(rcg->hid_width) - 1;
+	struct clk *p;
 
 	if (rate == 0)
 		return -EINVAL;
 
-	*p = clk_get_parent_by_index(hw->clk, f->src);
-	*p_rate = parent_rate = __clk_round_rate(*p, rate);
+	p = clk_get_parent_by_index(hw->clk, f->src);
+	*p_hw = __clk_get_hw(p);
+	*p_rate = parent_rate = __clk_round_rate(p, rate);
 
 	div = DIV_ROUND_UP((2 * parent_rate), rate) - 1;
 	div = min_t(u32, div, mask);
@@ -472,14 +476,16 @@ static const struct frac_entry frac_table_pixel[] = {
 };
 
 static long clk_pixel_determine_rate(struct clk_hw *hw, unsigned long rate,
-				 unsigned long *p_rate, struct clk **p)
+				 unsigned long *p_rate, struct clk_hw **p)
 {
 	struct clk_rcg2 *rcg = to_clk_rcg2(hw);
 	unsigned long request, src_rate;
 	int delta = 100000;
 	const struct freq_tbl *f = rcg->freq_tbl;
 	const struct frac_entry *frac = frac_table_pixel;
-	struct clk *parent = *p = clk_get_parent_by_index(hw->clk, f->src);
+	struct clk *parent = clk_get_parent_by_index(hw->clk, f->src);
+
+	*p = __clk_get_hw(parent);
 
 	for (; frac->num; frac++) {
 		request = (rate * frac->den) / frac->num;
diff --git a/drivers/clk/sunxi/clk-factors.c b/drivers/clk/sunxi/clk-factors.c
index 5521e866fa5e..62e08fb58554 100644
--- a/drivers/clk/sunxi/clk-factors.c
+++ b/drivers/clk/sunxi/clk-factors.c
@@ -81,7 +81,7 @@ static long clk_factors_round_rate(struct clk_hw *hw, unsigned long rate,
 
 static long clk_factors_determine_rate(struct clk_hw *hw, unsigned long rate,
 				       unsigned long *best_parent_rate,
-				       struct clk **best_parent_p)
+				       struct clk_hw **best_parent_p)
 {
 	struct clk *clk = hw->clk, *parent, *best_parent = NULL;
 	int i, num_parents;
@@ -108,7 +108,7 @@ static long clk_factors_determine_rate(struct clk_hw *hw, unsigned long rate,
 	}
 
 	if (best_parent)
-		*best_parent_p = best_parent;
+		*best_parent_p = __clk_get_hw(best_parent);
 	*best_parent_rate = best;
 
 	return best_child_rate;
diff --git a/drivers/clk/sunxi/clk-sun6i-ar100.c b/drivers/clk/sunxi/clk-sun6i-ar100.c
index acca53290be2..3d282fb8f85c 100644
--- a/drivers/clk/sunxi/clk-sun6i-ar100.c
+++ b/drivers/clk/sunxi/clk-sun6i-ar100.c
@@ -46,7 +46,7 @@ static unsigned long ar100_recalc_rate(struct clk_hw *hw,
 
 static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate,
 				 unsigned long *best_parent_rate,
-				 struct clk **best_parent_clk)
+				 struct clk_hw **best_parent_clk)
 {
 	int nparents = __clk_get_num_parents(hw->clk);
 	long best_rate = -EINVAL;
@@ -100,7 +100,7 @@ static long ar100_determine_rate(struct clk_hw *hw, unsigned long rate,
 
 		tmp_rate = (parent_rate >> shift) / div;
 		if (!*best_parent_clk || tmp_rate > best_rate) {
-			*best_parent_clk = parent;
+			*best_parent_clk = __clk_get_hw(parent);
 			*best_parent_rate = parent_rate;
 			best_rate = tmp_rate;
 		}
diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h
index 5e06f23eed41..d936409520f8 100644
--- a/include/linux/clk-provider.h
+++ b/include/linux/clk-provider.h
@@ -176,7 +176,7 @@ struct clk_ops {
 					unsigned long *parent_rate);
 	long		(*determine_rate)(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk);
+					struct clk_hw **best_parent_hw);
 	int		(*set_parent)(struct clk_hw *hw, u8 index);
 	u8		(*get_parent)(struct clk_hw *hw);
 	int		(*set_rate)(struct clk_hw *hw, unsigned long rate,
@@ -551,7 +551,7 @@ bool __clk_is_enabled(struct clk *clk);
 struct clk *__clk_lookup(const char *name);
 long __clk_mux_determine_rate(struct clk_hw *hw, unsigned long rate,
 			      unsigned long *best_parent_rate,
-			      struct clk **best_parent_p);
+			      struct clk_hw **best_parent_p);
 
 /*
  * FIXME clock api without lock protection
-- 
cgit v1.2.3


From ac8010221d3fa8697151dfe9a1bb2c504adc68c1 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 3 Dec 2014 16:40:27 -0500
Subject: driver core: Provide an wrapper around the mutex to do lockdep
 warnings

Instead of open-coding it in drivers that want to double check
that their functions are indeed holding the device lock.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Suggested-by: David Vrabel <david.vrabel@citrix.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/xen/xen-pciback/pci_stub.c | 2 +-
 include/linux/device.h             | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/xen/xen-pciback/pci_stub.c b/drivers/xen/xen-pciback/pci_stub.c
index 9cbe1a31c1e5..8b77089eddac 100644
--- a/drivers/xen/xen-pciback/pci_stub.c
+++ b/drivers/xen/xen-pciback/pci_stub.c
@@ -278,7 +278,7 @@ void pcistub_put_pci_dev(struct pci_dev *dev)
 	/* Cleanup our device
 	 * (so it's ready for the next domain)
 	 */
-	lockdep_assert_held(&dev->dev.mutex);
+	device_lock_assert(&dev->dev);
 	__pci_reset_function_locked(dev);
 	pci_restore_state(dev);
 
diff --git a/include/linux/device.h b/include/linux/device.h
index ce1f21608b16..41d6a7555c6b 100644
--- a/include/linux/device.h
+++ b/include/linux/device.h
@@ -911,6 +911,11 @@ static inline void device_unlock(struct device *dev)
 	mutex_unlock(&dev->mutex);
 }
 
+static inline void device_lock_assert(struct device *dev)
+{
+	lockdep_assert_held(&dev->mutex);
+}
+
 void driver_init(void);
 
 /*
-- 
cgit v1.2.3


From 98d9b271250bd83337e3ed66d63e7deefcda8712 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Wed, 3 Dec 2014 16:40:31 -0500
Subject: PCI: Expose pci_load_saved_state for public consumption.

We have the pci_load_and_free_saved_state, and pci_store_saved_state
but are missing the functionality to just load the state
multiple times in the PCI device without having to free/save
the state.

This patch makes it possible to use this function.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Acked-by: Bjorn Helgaas <bhelgaas@google.com>
Signed-off-by: David Vrabel <david.vrabel@citrix.com>
---
 drivers/pci/pci.c   | 5 +++--
 include/linux/pci.h | 2 ++
 2 files changed, 5 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/pci/pci.c b/drivers/pci/pci.c
index 625a4ace10b4..f00a9d63fd7d 100644
--- a/drivers/pci/pci.c
+++ b/drivers/pci/pci.c
@@ -1142,8 +1142,8 @@ EXPORT_SYMBOL_GPL(pci_store_saved_state);
  * @dev: PCI device that we're dealing with
  * @state: Saved state returned from pci_store_saved_state()
  */
-static int pci_load_saved_state(struct pci_dev *dev,
-				struct pci_saved_state *state)
+int pci_load_saved_state(struct pci_dev *dev,
+			 struct pci_saved_state *state)
 {
 	struct pci_cap_saved_data *cap;
 
@@ -1171,6 +1171,7 @@ static int pci_load_saved_state(struct pci_dev *dev,
 	dev->state_saved = true;
 	return 0;
 }
+EXPORT_SYMBOL_GPL(pci_load_saved_state);
 
 /**
  * pci_load_and_free_saved_state - Reload the save state pointed to by state,
diff --git a/include/linux/pci.h b/include/linux/pci.h
index 4c8ac5fcc224..42ae347d3fc7 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -1004,6 +1004,8 @@ void __iomem __must_check *pci_platform_rom(struct pci_dev *pdev, size_t *size);
 int pci_save_state(struct pci_dev *dev);
 void pci_restore_state(struct pci_dev *dev);
 struct pci_saved_state *pci_store_saved_state(struct pci_dev *dev);
+int pci_load_saved_state(struct pci_dev *dev,
+			 struct pci_saved_state *state);
 int pci_load_and_free_saved_state(struct pci_dev *dev,
 				  struct pci_saved_state **state);
 struct pci_cap_saved_state *pci_find_saved_cap(struct pci_dev *dev, char cap);
-- 
cgit v1.2.3


From d4ae84a02bc65cec29608bc417a969fc2ec75449 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Mon, 1 Dec 2014 17:29:25 +0000
Subject: kvm: search_memslots: add simple LRU memslot caching

In typical guest boot workload only 2-3 memslots are used
extensively, and at that it's mostly the same memslot
lookup operation.

Adding LRU cache improves average lookup time from
46 to 28 cycles (~40%) for this workload.

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 231dd9472226..1a371447fd45 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -353,6 +353,7 @@ struct kvm_memslots {
 	struct kvm_memory_slot memslots[KVM_MEM_SLOTS_NUM];
 	/* The mapping table from slot id to the index in memslots[]. */
 	short id_to_index[KVM_MEM_SLOTS_NUM];
+	atomic_t lru_slot;
 };
 
 struct kvm {
@@ -790,12 +791,19 @@ static inline void kvm_guest_exit(void)
 static inline struct kvm_memory_slot *
 search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 {
-	struct kvm_memory_slot *memslot;
+	int slot = atomic_read(&slots->lru_slot);
+	struct kvm_memory_slot *memslot = &slots->memslots[slot];
+
+	if (gfn >= memslot->base_gfn &&
+	    gfn < memslot->base_gfn + memslot->npages)
+		return memslot;
 
 	kvm_for_each_memslot(memslot, slots)
 		if (gfn >= memslot->base_gfn &&
-		      gfn < memslot->base_gfn + memslot->npages)
+		      gfn < memslot->base_gfn + memslot->npages) {
+			atomic_set(&slots->lru_slot, memslot - slots->memslots);
 			return memslot;
+		}
 
 	return NULL;
 }
-- 
cgit v1.2.3


From 9c1a5d38780e652275aa55362dbee0d7e827e069 Mon Sep 17 00:00:00 2001
From: Igor Mammedov <imammedo@redhat.com>
Date: Mon, 1 Dec 2014 17:29:27 +0000
Subject: kvm: optimize GFN to memslot lookup with large slots amount

Current linear search doesn't scale well when
large amount of memslots is used and looked up slot
is not in the beginning memslots array.
Taking in account that memslots don't overlap, it's
possible to switch sorting order of memslots array from
'npages' to 'base_gfn' and use binary search for
memslot lookup by GFN.

As result of switching to binary search lookup times
are reduced with large amount of memslots.

Following is a table of search_memslot() cycles
during WS2008R2 guest boot.

                         boot,          boot + ~10 min
                         mostly same    of using it,
                         slot lookup    randomized lookup
                max      average        average
                cycles   cycles         cycles

13 slots      : 1450       28           30

13 slots      : 1400       30           40
binary search

117 slots     : 13000      30           460

117 slots     : 2000       35           180
binary search

Signed-off-by: Igor Mammedov <imammedo@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/kvm_host.h | 34 ++++++++++++++++++++++------------
 virt/kvm/kvm_main.c      |  8 +++++++-
 2 files changed, 29 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h
index 1a371447fd45..193bca68372d 100644
--- a/include/linux/kvm_host.h
+++ b/include/linux/kvm_host.h
@@ -354,6 +354,7 @@ struct kvm_memslots {
 	/* The mapping table from slot id to the index in memslots[]. */
 	short id_to_index[KVM_MEM_SLOTS_NUM];
 	atomic_t lru_slot;
+	int used_slots;
 };
 
 struct kvm {
@@ -791,19 +792,28 @@ static inline void kvm_guest_exit(void)
 static inline struct kvm_memory_slot *
 search_memslots(struct kvm_memslots *slots, gfn_t gfn)
 {
+	int start = 0, end = slots->used_slots;
 	int slot = atomic_read(&slots->lru_slot);
-	struct kvm_memory_slot *memslot = &slots->memslots[slot];
-
-	if (gfn >= memslot->base_gfn &&
-	    gfn < memslot->base_gfn + memslot->npages)
-		return memslot;
-
-	kvm_for_each_memslot(memslot, slots)
-		if (gfn >= memslot->base_gfn &&
-		      gfn < memslot->base_gfn + memslot->npages) {
-			atomic_set(&slots->lru_slot, memslot - slots->memslots);
-			return memslot;
-		}
+	struct kvm_memory_slot *memslots = slots->memslots;
+
+	if (gfn >= memslots[slot].base_gfn &&
+	    gfn < memslots[slot].base_gfn + memslots[slot].npages)
+		return &memslots[slot];
+
+	while (start < end) {
+		slot = start + (end - start) / 2;
+
+		if (gfn >= memslots[slot].base_gfn)
+			end = slot;
+		else
+			start = slot + 1;
+	}
+
+	if (gfn >= memslots[start].base_gfn &&
+	    gfn < memslots[start].base_gfn + memslots[start].npages) {
+		atomic_set(&slots->lru_slot, start);
+		return &memslots[start];
+	}
 
 	return NULL;
 }
diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c
index 162817f853ec..759af6596a07 100644
--- a/virt/kvm/kvm_main.c
+++ b/virt/kvm/kvm_main.c
@@ -679,8 +679,14 @@ static void update_memslots(struct kvm_memslots *slots,
 	struct kvm_memory_slot *mslots = slots->memslots;
 
 	WARN_ON(mslots[i].id != id);
-	if (!new->npages)
+	if (!new->npages) {
 		new->base_gfn = 0;
+		if (mslots[i].npages)
+			slots->used_slots--;
+	} else {
+		if (!mslots[i].npages)
+			slots->used_slots++;
+	}
 
 	while (i < KVM_MEM_SLOTS_NUM - 1 &&
 	       new->base_gfn <= mslots[i + 1].base_gfn) {
-- 
cgit v1.2.3


From 435d5f4bb2ccba3b791d9ef61d2590e30b8e806e Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Fri, 31 Oct 2014 22:56:04 -0400
Subject: common object embedded into various struct ....ns

for now - just move corresponding ->proc_inum instances over there

Acked-by: "Eric W. Biederman" <ebiederm@xmission.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/mount.h                     | 3 ++-
 fs/namespace.c                 | 6 +++---
 include/linux/ipc_namespace.h  | 3 ++-
 include/linux/ns_common.h      | 8 ++++++++
 include/linux/pid_namespace.h  | 3 ++-
 include/linux/user_namespace.h | 3 ++-
 include/linux/utsname.h        | 3 ++-
 include/net/net_namespace.h    | 3 ++-
 init/version.c                 | 2 +-
 ipc/msgutil.c                  | 2 +-
 ipc/namespace.c                | 8 ++++----
 kernel/pid.c                   | 2 +-
 kernel/pid_namespace.c         | 6 +++---
 kernel/user.c                  | 2 +-
 kernel/user_namespace.c        | 6 +++---
 kernel/utsname.c               | 6 +++---
 net/core/net_namespace.c       | 6 +++---
 17 files changed, 43 insertions(+), 29 deletions(-)
 create mode 100644 include/linux/ns_common.h

(limited to 'include/linux')

diff --git a/fs/mount.h b/fs/mount.h
index f82c62840905..0ad6f760ce52 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -1,10 +1,11 @@
 #include <linux/mount.h>
 #include <linux/seq_file.h>
 #include <linux/poll.h>
+#include <linux/ns_common.h>
 
 struct mnt_namespace {
 	atomic_t		count;
-	unsigned int		proc_inum;
+	struct ns_common	ns;
 	struct mount *	root;
 	struct list_head	list;
 	struct user_namespace	*user_ns;
diff --git a/fs/namespace.c b/fs/namespace.c
index 5b66b2b3624d..adc2ea2532a0 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2640,7 +2640,7 @@ dput_out:
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
-	proc_free_inum(ns->proc_inum);
+	proc_free_inum(ns->ns.inum);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
@@ -2662,7 +2662,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
-	ret = proc_alloc_inum(&new_ns->proc_inum);
+	ret = proc_alloc_inum(&new_ns->ns.inum);
 	if (ret) {
 		kfree(new_ns);
 		return ERR_PTR(ret);
@@ -3201,7 +3201,7 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
 static unsigned int mntns_inum(void *ns)
 {
 	struct mnt_namespace *mnt_ns = ns;
-	return mnt_ns->proc_inum;
+	return mnt_ns->ns.inum;
 }
 
 const struct proc_ns_operations mntns_operations = {
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 35e7eca4e33b..52a640128151 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -6,6 +6,7 @@
 #include <linux/rwsem.h>
 #include <linux/notifier.h>
 #include <linux/nsproxy.h>
+#include <linux/ns_common.h>
 
 /*
  * ipc namespace events
@@ -68,7 +69,7 @@ struct ipc_namespace {
 	/* user_ns which owns the ipc ns */
 	struct user_namespace *user_ns;
 
-	unsigned int	proc_inum;
+	struct ns_common ns;
 };
 
 extern struct ipc_namespace init_ipc_ns;
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
new file mode 100644
index 000000000000..e7db1cd54047
--- /dev/null
+++ b/include/linux/ns_common.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_NS_COMMON_H
+#define _LINUX_NS_COMMON_H
+
+struct ns_common {
+	unsigned int inum;
+};
+
+#endif
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 1997ffc295a7..b9cf6c51b181 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -8,6 +8,7 @@
 #include <linux/threads.h>
 #include <linux/nsproxy.h>
 #include <linux/kref.h>
+#include <linux/ns_common.h>
 
 struct pidmap {
        atomic_t nr_free;
@@ -43,7 +44,7 @@ struct pid_namespace {
 	kgid_t pid_gid;
 	int hide_pid;
 	int reboot;	/* group exit code if this pidns was rebooted */
-	unsigned int proc_inum;
+	struct ns_common ns;
 };
 
 extern struct pid_namespace init_pid_ns;
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index e95372654f09..4cf06c140e21 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -3,6 +3,7 @@
 
 #include <linux/kref.h>
 #include <linux/nsproxy.h>
+#include <linux/ns_common.h>
 #include <linux/sched.h>
 #include <linux/err.h>
 
@@ -26,7 +27,7 @@ struct user_namespace {
 	int			level;
 	kuid_t			owner;
 	kgid_t			group;
-	unsigned int		proc_inum;
+	struct ns_common	ns;
 
 	/* Register of per-UID persistent keyrings for this namespace */
 #ifdef CONFIG_PERSISTENT_KEYRINGS
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 239e27733d6c..5093f58ae192 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 #include <linux/kref.h>
 #include <linux/nsproxy.h>
+#include <linux/ns_common.h>
 #include <linux/err.h>
 #include <uapi/linux/utsname.h>
 
@@ -23,7 +24,7 @@ struct uts_namespace {
 	struct kref kref;
 	struct new_utsname name;
 	struct user_namespace *user_ns;
-	unsigned int proc_inum;
+	struct ns_common ns;
 };
 extern struct uts_namespace init_uts_ns;
 
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index e0d64667a4b3..2e8756b8c775 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -26,6 +26,7 @@
 #endif
 #include <net/netns/nftables.h>
 #include <net/netns/xfrm.h>
+#include <linux/ns_common.h>
 
 struct user_namespace;
 struct proc_dir_entry;
@@ -60,7 +61,7 @@ struct net {
 
 	struct user_namespace   *user_ns;	/* Owning user namespace */
 
-	unsigned int		proc_inum;
+	struct ns_common	ns;
 
 	struct proc_dir_entry 	*proc_net;
 	struct proc_dir_entry 	*proc_net_stat;
diff --git a/init/version.c b/init/version.c
index 1a4718e500fe..e23dbdabb26b 100644
--- a/init/version.c
+++ b/init/version.c
@@ -35,7 +35,7 @@ struct uts_namespace init_uts_ns = {
 		.domainname	= UTS_DOMAINNAME,
 	},
 	.user_ns = &init_user_ns,
-	.proc_inum = PROC_UTS_INIT_INO,
+	.ns.inum = PROC_UTS_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_uts_ns);
 
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 7e7095974d54..5930471a2902 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -31,7 +31,7 @@ DEFINE_SPINLOCK(mq_lock);
 struct ipc_namespace init_ipc_ns = {
 	.count		= ATOMIC_INIT(1),
 	.user_ns = &init_user_ns,
-	.proc_inum = PROC_IPC_INIT_INO,
+	.ns.inum = PROC_IPC_INIT_INO,
 };
 
 atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index b54468e48e32..177fa9db391d 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -26,7 +26,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (ns == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	err = proc_alloc_inum(&ns->proc_inum);
+	err = proc_alloc_inum(&ns->ns.inum);
 	if (err) {
 		kfree(ns);
 		return ERR_PTR(err);
@@ -35,7 +35,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	atomic_set(&ns->count, 1);
 	err = mq_init_ns(ns);
 	if (err) {
-		proc_free_inum(ns->proc_inum);
+		proc_free_inum(ns->ns.inum);
 		kfree(ns);
 		return ERR_PTR(err);
 	}
@@ -119,7 +119,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	 */
 	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
-	proc_free_inum(ns->proc_inum);
+	proc_free_inum(ns->ns.inum);
 	kfree(ns);
 }
 
@@ -186,7 +186,7 @@ static unsigned int ipcns_inum(void *vp)
 {
 	struct ipc_namespace *ns = vp;
 
-	return ns->proc_inum;
+	return ns->ns.inum;
 }
 
 const struct proc_ns_operations ipcns_operations = {
diff --git a/kernel/pid.c b/kernel/pid.c
index 9b9a26698144..3650698cf1dc 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -79,7 +79,7 @@ struct pid_namespace init_pid_ns = {
 	.level = 0,
 	.child_reaper = &init_task,
 	.user_ns = &init_user_ns,
-	.proc_inum = PROC_PID_INIT_INO,
+	.ns.inum = PROC_PID_INIT_INO,
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index db95d8eb761b..99e27e5bf906 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -105,7 +105,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	if (ns->pid_cachep == NULL)
 		goto out_free_map;
 
-	err = proc_alloc_inum(&ns->proc_inum);
+	err = proc_alloc_inum(&ns->ns.inum);
 	if (err)
 		goto out_free_map;
 
@@ -142,7 +142,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 {
 	int i;
 
-	proc_free_inum(ns->proc_inum);
+	proc_free_inum(ns->ns.inum);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
 	put_user_ns(ns->user_ns);
@@ -365,7 +365,7 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
 static unsigned int pidns_inum(void *ns)
 {
 	struct pid_namespace *pid_ns = ns;
-	return pid_ns->proc_inum;
+	return pid_ns->ns.inum;
 }
 
 const struct proc_ns_operations pidns_operations = {
diff --git a/kernel/user.c b/kernel/user.c
index 4efa39350e44..a7ca84bad8e6 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -50,7 +50,7 @@ struct user_namespace init_user_ns = {
 	.count = ATOMIC_INIT(3),
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
-	.proc_inum = PROC_USER_INIT_INO,
+	.ns.inum = PROC_USER_INIT_INO,
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	.persistent_keyring_register_sem =
 	__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index aa312b0dc3ec..fde584082673 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -86,7 +86,7 @@ int create_user_ns(struct cred *new)
 	if (!ns)
 		return -ENOMEM;
 
-	ret = proc_alloc_inum(&ns->proc_inum);
+	ret = proc_alloc_inum(&ns->ns.inum);
 	if (ret) {
 		kmem_cache_free(user_ns_cachep, ns);
 		return ret;
@@ -136,7 +136,7 @@ void free_user_ns(struct user_namespace *ns)
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 		key_put(ns->persistent_keyring_register);
 #endif
-		proc_free_inum(ns->proc_inum);
+		proc_free_inum(ns->ns.inum);
 		kmem_cache_free(user_ns_cachep, ns);
 		ns = parent;
 	} while (atomic_dec_and_test(&parent->count));
@@ -891,7 +891,7 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
 static unsigned int userns_inum(void *ns)
 {
 	struct user_namespace *user_ns = ns;
-	return user_ns->proc_inum;
+	return user_ns->ns.inum;
 }
 
 const struct proc_ns_operations userns_operations = {
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 883aaaa7de8a..b1cd00b828f2 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -42,7 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	if (!ns)
 		return ERR_PTR(-ENOMEM);
 
-	err = proc_alloc_inum(&ns->proc_inum);
+	err = proc_alloc_inum(&ns->ns.inum);
 	if (err) {
 		kfree(ns);
 		return ERR_PTR(err);
@@ -84,7 +84,7 @@ void free_uts_ns(struct kref *kref)
 
 	ns = container_of(kref, struct uts_namespace, kref);
 	put_user_ns(ns->user_ns);
-	proc_free_inum(ns->proc_inum);
+	proc_free_inum(ns->ns.inum);
 	kfree(ns);
 }
 
@@ -127,7 +127,7 @@ static unsigned int utsns_inum(void *vp)
 {
 	struct uts_namespace *ns = vp;
 
-	return ns->proc_inum;
+	return ns->ns.inum;
 }
 
 const struct proc_ns_operations utsns_operations = {
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 7f155175bba8..f2f756ce99d7 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -386,12 +386,12 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
 static __net_init int net_ns_net_init(struct net *net)
 {
-	return proc_alloc_inum(&net->proc_inum);
+	return proc_alloc_inum(&net->ns.inum);
 }
 
 static __net_exit void net_ns_net_exit(struct net *net)
 {
-	proc_free_inum(net->proc_inum);
+	proc_free_inum(net->ns.inum);
 }
 
 static struct pernet_operations __net_initdata net_ns_ops = {
@@ -664,7 +664,7 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
 static unsigned int netns_inum(void *ns)
 {
 	struct net *net = ns;
-	return net->proc_inum;
+	return net->ns.inum;
 }
 
 const struct proc_ns_operations netns_operations = {
-- 
cgit v1.2.3


From 64964528b24ea390824f0e5ce9d34b8d39b28cde Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Nov 2014 00:37:32 -0400
Subject: make proc_ns_operations work with struct ns_common * instead of void
 *

We can do that now.  And kill ->inum(), while we are at it - all instances
are identical.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c           | 13 +++----------
 fs/proc/inode.c          |  2 +-
 fs/proc/namespaces.c     |  8 ++++----
 include/linux/proc_ns.h  | 10 +++++-----
 ipc/namespace.c          | 12 +++---------
 kernel/pid_namespace.c   | 12 +++---------
 kernel/user_namespace.c  | 12 +++---------
 kernel/utsname.c         | 12 +++---------
 net/core/net_namespace.c | 12 +++---------
 9 files changed, 28 insertions(+), 65 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 5c21fdadabe4..b9c16c3f63f5 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -3149,7 +3149,7 @@ found:
 	return visible;
 }
 
-static void *mntns_get(struct task_struct *task)
+static struct ns_common *mntns_get(struct task_struct *task)
 {
 	struct ns_common *ns = NULL;
 	struct nsproxy *nsproxy;
@@ -3165,12 +3165,12 @@ static void *mntns_get(struct task_struct *task)
 	return ns;
 }
 
-static void mntns_put(void *ns)
+static void mntns_put(struct ns_common *ns)
 {
 	put_mnt_ns(to_mnt_ns(ns));
 }
 
-static int mntns_install(struct nsproxy *nsproxy, void *ns)
+static int mntns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 {
 	struct fs_struct *fs = current->fs;
 	struct mnt_namespace *mnt_ns = to_mnt_ns(ns);
@@ -3203,17 +3203,10 @@ static int mntns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
-static unsigned int mntns_inum(void *ns)
-{
-	struct ns_common *p = ns;
-	return p->inum;
-}
-
 const struct proc_ns_operations mntns_operations = {
 	.name		= "mnt",
 	.type		= CLONE_NEWNS,
 	.get		= mntns_get,
 	.put		= mntns_put,
 	.install	= mntns_install,
-	.inum		= mntns_inum,
 };
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 333080d7a671..43b703c6cd3b 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -33,7 +33,7 @@ static void proc_evict_inode(struct inode *inode)
 	struct proc_dir_entry *de;
 	struct ctl_table_header *head;
 	const struct proc_ns_operations *ns_ops;
-	void *ns;
+	struct ns_common *ns;
 
 	truncate_inode_pages_final(&inode->i_data);
 	clear_inode(inode);
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 89026095f2b5..995e8e98237d 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -64,7 +64,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 	struct inode *inode;
 	struct proc_inode *ei;
 	struct qstr qname = { .name = "", };
-	void *ns;
+	struct ns_common *ns;
 
 	ns = ns_ops->get(task);
 	if (!ns)
@@ -76,7 +76,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 		return ERR_PTR(-ENOMEM);
 	}
 
-	inode = iget_locked(sb, ns_ops->inum(ns));
+	inode = iget_locked(sb, ns->inum);
 	if (!inode) {
 		dput(dentry);
 		ns_ops->put(ns);
@@ -144,7 +144,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
 	struct proc_inode *ei = PROC_I(inode);
 	const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
 	struct task_struct *task;
-	void *ns;
+	struct ns_common *ns;
 	char name[50];
 	int res = -EACCES;
 
@@ -160,7 +160,7 @@ static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int bufl
 	if (!ns)
 		goto out_put_task;
 
-	snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns_ops->inum(ns));
+	snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum);
 	res = readlink_copy(buffer, buflen, name);
 	ns_ops->put(ns);
 out_put_task:
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 34a1e105bef4..f284959391fd 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -6,18 +6,18 @@
 
 struct pid_namespace;
 struct nsproxy;
+struct ns_common;
 
 struct proc_ns_operations {
 	const char *name;
 	int type;
-	void *(*get)(struct task_struct *task);
-	void (*put)(void *ns);
-	int (*install)(struct nsproxy *nsproxy, void *ns);
-	unsigned int (*inum)(void *ns);
+	struct ns_common *(*get)(struct task_struct *task);
+	void (*put)(struct ns_common *ns);
+	int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
 };
 
 struct proc_ns {
-	void *ns;
+	struct ns_common *ns;
 	const struct proc_ns_operations *ns_ops;
 };
 
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 3c1e8d3bd7d3..531029a67fef 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -154,7 +154,7 @@ static inline struct ipc_namespace *to_ipc_ns(struct ns_common *ns)
 	return container_of(ns, struct ipc_namespace, ns);
 }
 
-static void *ipcns_get(struct task_struct *task)
+static struct ns_common *ipcns_get(struct task_struct *task)
 {
 	struct ipc_namespace *ns = NULL;
 	struct nsproxy *nsproxy;
@@ -168,12 +168,12 @@ static void *ipcns_get(struct task_struct *task)
 	return ns ? &ns->ns : NULL;
 }
 
-static void ipcns_put(void *ns)
+static void ipcns_put(struct ns_common *ns)
 {
 	return put_ipc_ns(to_ipc_ns(ns));
 }
 
-static int ipcns_install(struct nsproxy *nsproxy, void *new)
+static int ipcns_install(struct nsproxy *nsproxy, struct ns_common *new)
 {
 	struct ipc_namespace *ns = to_ipc_ns(new);
 	if (!ns_capable(ns->user_ns, CAP_SYS_ADMIN) ||
@@ -187,16 +187,10 @@ static int ipcns_install(struct nsproxy *nsproxy, void *new)
 	return 0;
 }
 
-static unsigned int ipcns_inum(void *vp)
-{
-	return ((struct ns_common *)vp)->inum;
-}
-
 const struct proc_ns_operations ipcns_operations = {
 	.name		= "ipc",
 	.type		= CLONE_NEWIPC,
 	.get		= ipcns_get,
 	.put		= ipcns_put,
 	.install	= ipcns_install,
-	.inum		= ipcns_inum,
 };
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index dd961ad86fbd..79aabce49a85 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -318,7 +318,7 @@ static inline struct pid_namespace *to_pid_ns(struct ns_common *ns)
 	return container_of(ns, struct pid_namespace, ns);
 }
 
-static void *pidns_get(struct task_struct *task)
+static struct ns_common *pidns_get(struct task_struct *task)
 {
 	struct pid_namespace *ns;
 
@@ -331,12 +331,12 @@ static void *pidns_get(struct task_struct *task)
 	return ns ? &ns->ns : NULL;
 }
 
-static void pidns_put(void *ns)
+static void pidns_put(struct ns_common *ns)
 {
 	put_pid_ns(to_pid_ns(ns));
 }
 
-static int pidns_install(struct nsproxy *nsproxy, void *ns)
+static int pidns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 {
 	struct pid_namespace *active = task_active_pid_ns(current);
 	struct pid_namespace *ancestor, *new = to_pid_ns(ns);
@@ -367,18 +367,12 @@ static int pidns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
-static unsigned int pidns_inum(void *ns)
-{
-	return ((struct ns_common *)ns)->inum;
-}
-
 const struct proc_ns_operations pidns_operations = {
 	.name		= "pid",
 	.type		= CLONE_NEWPID,
 	.get		= pidns_get,
 	.put		= pidns_put,
 	.install	= pidns_install,
-	.inum		= pidns_inum,
 };
 
 static __init int pid_namespaces_init(void)
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 1ab2209228ff..29cd5ccfc37a 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -846,7 +846,7 @@ static inline struct user_namespace *to_user_ns(struct ns_common *ns)
 	return container_of(ns, struct user_namespace, ns);
 }
 
-static void *userns_get(struct task_struct *task)
+static struct ns_common *userns_get(struct task_struct *task)
 {
 	struct user_namespace *user_ns;
 
@@ -857,12 +857,12 @@ static void *userns_get(struct task_struct *task)
 	return user_ns ? &user_ns->ns : NULL;
 }
 
-static void userns_put(void *ns)
+static void userns_put(struct ns_common *ns)
 {
 	put_user_ns(to_user_ns(ns));
 }
 
-static int userns_install(struct nsproxy *nsproxy, void *ns)
+static int userns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 {
 	struct user_namespace *user_ns = to_user_ns(ns);
 	struct cred *cred;
@@ -893,18 +893,12 @@ static int userns_install(struct nsproxy *nsproxy, void *ns)
 	return commit_creds(cred);
 }
 
-static unsigned int userns_inum(void *ns)
-{
-	return ((struct ns_common *)ns)->inum;
-}
-
 const struct proc_ns_operations userns_operations = {
 	.name		= "user",
 	.type		= CLONE_NEWUSER,
 	.get		= userns_get,
 	.put		= userns_put,
 	.install	= userns_install,
-	.inum		= userns_inum,
 };
 
 static __init int user_namespaces_init(void)
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 1917f74be8ec..20697befe466 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -93,7 +93,7 @@ static inline struct uts_namespace *to_uts_ns(struct ns_common *ns)
 	return container_of(ns, struct uts_namespace, ns);
 }
 
-static void *utsns_get(struct task_struct *task)
+static struct ns_common *utsns_get(struct task_struct *task)
 {
 	struct uts_namespace *ns = NULL;
 	struct nsproxy *nsproxy;
@@ -109,12 +109,12 @@ static void *utsns_get(struct task_struct *task)
 	return ns ? &ns->ns : NULL;
 }
 
-static void utsns_put(void *ns)
+static void utsns_put(struct ns_common *ns)
 {
 	put_uts_ns(to_uts_ns(ns));
 }
 
-static int utsns_install(struct nsproxy *nsproxy, void *new)
+static int utsns_install(struct nsproxy *nsproxy, struct ns_common *new)
 {
 	struct uts_namespace *ns = to_uts_ns(new);
 
@@ -128,16 +128,10 @@ static int utsns_install(struct nsproxy *nsproxy, void *new)
 	return 0;
 }
 
-static unsigned int utsns_inum(void *vp)
-{
-	return ((struct ns_common *)vp)->inum;
-}
-
 const struct proc_ns_operations utsns_operations = {
 	.name		= "uts",
 	.type		= CLONE_NEWUTS,
 	.get		= utsns_get,
 	.put		= utsns_put,
 	.install	= utsns_install,
-	.inum		= utsns_inum,
 };
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 97f4dc2132ad..2161f0979fce 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -629,7 +629,7 @@ void unregister_pernet_device(struct pernet_operations *ops)
 EXPORT_SYMBOL_GPL(unregister_pernet_device);
 
 #ifdef CONFIG_NET_NS
-static void *netns_get(struct task_struct *task)
+static struct ns_common *netns_get(struct task_struct *task)
 {
 	struct net *net = NULL;
 	struct nsproxy *nsproxy;
@@ -648,12 +648,12 @@ static inline struct net *to_net_ns(struct ns_common *ns)
 	return container_of(ns, struct net, ns);
 }
 
-static void netns_put(void *ns)
+static void netns_put(struct ns_common *ns)
 {
 	put_net(to_net_ns(ns));
 }
 
-static int netns_install(struct nsproxy *nsproxy, void *ns)
+static int netns_install(struct nsproxy *nsproxy, struct ns_common *ns)
 {
 	struct net *net = to_net_ns(ns);
 
@@ -666,17 +666,11 @@ static int netns_install(struct nsproxy *nsproxy, void *ns)
 	return 0;
 }
 
-static unsigned int netns_inum(void *ns)
-{
-	return ((struct ns_common *)ns)->inum;
-}
-
 const struct proc_ns_operations netns_operations = {
 	.name		= "net",
 	.type		= CLONE_NEWNET,
 	.get		= netns_get,
 	.put		= netns_put,
 	.install	= netns_install,
-	.inum		= netns_inum,
 };
 #endif
-- 
cgit v1.2.3


From 6344c433a452b1a05d03a61a6a85d89f793bb7b8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Nov 2014 00:45:45 -0400
Subject: new helpers: ns_alloc_inum/ns_free_inum

take struct ns_common *, for now simply wrappers around proc_{alloc,free}_inum()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c           | 4 ++--
 include/linux/proc_ns.h  | 3 +++
 ipc/namespace.c          | 6 +++---
 kernel/pid_namespace.c   | 4 ++--
 kernel/user_namespace.c  | 4 ++--
 kernel/utsname.c         | 4 ++--
 net/core/net_namespace.c | 4 ++--
 7 files changed, 16 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index b9c16c3f63f5..30738d200866 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2645,7 +2645,7 @@ dput_out:
 
 static void free_mnt_ns(struct mnt_namespace *ns)
 {
-	proc_free_inum(ns->ns.inum);
+	ns_free_inum(&ns->ns);
 	put_user_ns(ns->user_ns);
 	kfree(ns);
 }
@@ -2667,7 +2667,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 	new_ns = kmalloc(sizeof(struct mnt_namespace), GFP_KERNEL);
 	if (!new_ns)
 		return ERR_PTR(-ENOMEM);
-	ret = proc_alloc_inum(&new_ns->ns.inum);
+	ret = ns_alloc_inum(&new_ns->ns);
 	if (ret) {
 		kfree(new_ns);
 		return ERR_PTR(ret);
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index f284959391fd..f5780ee7f8f7 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -71,4 +71,7 @@ static inline bool proc_ns_inode(struct inode *inode) { return false; }
 
 #endif /* CONFIG_PROC_FS */
 
+#define ns_alloc_inum(ns) proc_alloc_inum(&(ns)->inum)
+#define ns_free_inum(ns) proc_free_inum((ns)->inum)
+
 #endif /* _LINUX_PROC_NS_H */
diff --git a/ipc/namespace.c b/ipc/namespace.c
index 531029a67fef..bcdd7a5c122a 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -26,7 +26,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	if (ns == NULL)
 		return ERR_PTR(-ENOMEM);
 
-	err = proc_alloc_inum(&ns->ns.inum);
+	err = ns_alloc_inum(&ns->ns);
 	if (err) {
 		kfree(ns);
 		return ERR_PTR(err);
@@ -35,7 +35,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	atomic_set(&ns->count, 1);
 	err = mq_init_ns(ns);
 	if (err) {
-		proc_free_inum(ns->ns.inum);
+		ns_free_inum(&ns->ns);
 		kfree(ns);
 		return ERR_PTR(err);
 	}
@@ -119,7 +119,7 @@ static void free_ipc_ns(struct ipc_namespace *ns)
 	 */
 	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
-	proc_free_inum(ns->ns.inum);
+	ns_free_inum(&ns->ns);
 	kfree(ns);
 }
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 79aabce49a85..5aa9158a84d5 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -105,7 +105,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	if (ns->pid_cachep == NULL)
 		goto out_free_map;
 
-	err = proc_alloc_inum(&ns->ns.inum);
+	err = ns_alloc_inum(&ns->ns);
 	if (err)
 		goto out_free_map;
 
@@ -142,7 +142,7 @@ static void destroy_pid_namespace(struct pid_namespace *ns)
 {
 	int i;
 
-	proc_free_inum(ns->ns.inum);
+	ns_free_inum(&ns->ns);
 	for (i = 0; i < PIDMAP_ENTRIES; i++)
 		kfree(ns->pidmap[i].page);
 	put_user_ns(ns->user_ns);
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 29cd5ccfc37a..6bf8177768e5 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -86,7 +86,7 @@ int create_user_ns(struct cred *new)
 	if (!ns)
 		return -ENOMEM;
 
-	ret = proc_alloc_inum(&ns->ns.inum);
+	ret = ns_alloc_inum(&ns->ns);
 	if (ret) {
 		kmem_cache_free(user_ns_cachep, ns);
 		return ret;
@@ -136,7 +136,7 @@ void free_user_ns(struct user_namespace *ns)
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 		key_put(ns->persistent_keyring_register);
 #endif
-		proc_free_inum(ns->ns.inum);
+		ns_free_inum(&ns->ns);
 		kmem_cache_free(user_ns_cachep, ns);
 		ns = parent;
 	} while (atomic_dec_and_test(&parent->count));
diff --git a/kernel/utsname.c b/kernel/utsname.c
index 20697befe466..c2a2b321d88a 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -42,7 +42,7 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 	if (!ns)
 		return ERR_PTR(-ENOMEM);
 
-	err = proc_alloc_inum(&ns->ns.inum);
+	err = ns_alloc_inum(&ns->ns);
 	if (err) {
 		kfree(ns);
 		return ERR_PTR(err);
@@ -84,7 +84,7 @@ void free_uts_ns(struct kref *kref)
 
 	ns = container_of(kref, struct uts_namespace, kref);
 	put_user_ns(ns->user_ns);
-	proc_free_inum(ns->ns.inum);
+	ns_free_inum(&ns->ns);
 	kfree(ns);
 }
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 2161f0979fce..da775f53f3fd 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -386,12 +386,12 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
 static __net_init int net_ns_net_init(struct net *net)
 {
-	return proc_alloc_inum(&net->ns.inum);
+	return ns_alloc_inum(&net->ns);
 }
 
 static __net_exit void net_ns_net_exit(struct net *net)
 {
-	proc_free_inum(net->ns.inum);
+	ns_free_inum(&net->ns);
 }
 
 static struct pernet_operations __net_initdata net_ns_ops = {
-- 
cgit v1.2.3


From 33c429405a2c8d9e42afb9fee88a63cfb2de1e98 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Nov 2014 02:32:53 -0400
Subject: copy address of proc_ns_ops into ns_common

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c            | 1 +
 fs/proc/inode.c           | 6 ++----
 include/linux/ns_common.h | 3 +++
 init/version.c            | 3 +++
 ipc/msgutil.c             | 3 +++
 ipc/namespace.c           | 1 +
 kernel/nsproxy.c          | 8 ++++----
 kernel/pid.c              | 3 +++
 kernel/pid_namespace.c    | 1 +
 kernel/user.c             | 3 +++
 kernel/user_namespace.c   | 1 +
 kernel/utsname.c          | 2 ++
 net/core/net_namespace.c  | 9 +++++++--
 13 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index 30738d200866..f815218f92d3 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -2672,6 +2672,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns)
 		kfree(new_ns);
 		return ERR_PTR(ret);
 	}
+	new_ns->ns.ops = &mntns_operations;
 	new_ns->seq = atomic64_add_return(1, &mnt_ns_seq);
 	atomic_set(&new_ns->count, 1);
 	new_ns->root = NULL;
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index 43b703c6cd3b..a212996e0987 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -32,7 +32,6 @@ static void proc_evict_inode(struct inode *inode)
 {
 	struct proc_dir_entry *de;
 	struct ctl_table_header *head;
-	const struct proc_ns_operations *ns_ops;
 	struct ns_common *ns;
 
 	truncate_inode_pages_final(&inode->i_data);
@@ -51,10 +50,9 @@ static void proc_evict_inode(struct inode *inode)
 		sysctl_head_put(head);
 	}
 	/* Release any associated namespace */
-	ns_ops = PROC_I(inode)->ns.ns_ops;
 	ns = PROC_I(inode)->ns.ns;
-	if (ns_ops && ns)
-		ns_ops->put(ns);
+	if (ns && ns->ops)
+		ns->ops->put(ns);
 }
 
 static struct kmem_cache * proc_inode_cachep;
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index e7db1cd54047..ce23cf4bbe69 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -1,7 +1,10 @@
 #ifndef _LINUX_NS_COMMON_H
 #define _LINUX_NS_COMMON_H
 
+struct proc_ns_operations;
+
 struct ns_common {
+	const struct proc_ns_operations *ops;
 	unsigned int inum;
 };
 
diff --git a/init/version.c b/init/version.c
index e23dbdabb26b..fe41a63efed6 100644
--- a/init/version.c
+++ b/init/version.c
@@ -36,6 +36,9 @@ struct uts_namespace init_uts_ns = {
 	},
 	.user_ns = &init_user_ns,
 	.ns.inum = PROC_UTS_INIT_INO,
+#ifdef CONFIG_UTS_NS
+	.ns.ops = &utsns_operations,
+#endif
 };
 EXPORT_SYMBOL_GPL(init_uts_ns);
 
diff --git a/ipc/msgutil.c b/ipc/msgutil.c
index 5930471a2902..2b491590ebab 100644
--- a/ipc/msgutil.c
+++ b/ipc/msgutil.c
@@ -32,6 +32,9 @@ struct ipc_namespace init_ipc_ns = {
 	.count		= ATOMIC_INIT(1),
 	.user_ns = &init_user_ns,
 	.ns.inum = PROC_IPC_INIT_INO,
+#ifdef CONFIG_IPC_NS
+	.ns.ops = &ipcns_operations,
+#endif
 };
 
 atomic_t nr_ipc_ns = ATOMIC_INIT(1);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index bcdd7a5c122a..382e2aa42d8a 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -31,6 +31,7 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 		kfree(ns);
 		return ERR_PTR(err);
 	}
+	ns->ns.ops = &ipcns_operations;
 
 	atomic_set(&ns->count, 1);
 	err = mq_init_ns(ns);
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index ef42d0ab3115..87c37221cb7f 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -220,11 +220,11 @@ void exit_task_namespaces(struct task_struct *p)
 
 SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 {
-	const struct proc_ns_operations *ops;
 	struct task_struct *tsk = current;
 	struct nsproxy *new_nsproxy;
 	struct proc_ns *ei;
 	struct file *file;
+	struct ns_common *ns;
 	int err;
 
 	file = proc_ns_fget(fd);
@@ -233,8 +233,8 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 
 	err = -EINVAL;
 	ei = get_proc_ns(file_inode(file));
-	ops = ei->ns_ops;
-	if (nstype && (ops->type != nstype))
+	ns = ei->ns;
+	if (nstype && (ns->ops->type != nstype))
 		goto out;
 
 	new_nsproxy = create_new_namespaces(0, tsk, current_user_ns(), tsk->fs);
@@ -243,7 +243,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 		goto out;
 	}
 
-	err = ops->install(new_nsproxy, ei->ns);
+	err = ns->ops->install(new_nsproxy, ns);
 	if (err) {
 		free_nsproxy(new_nsproxy);
 		goto out;
diff --git a/kernel/pid.c b/kernel/pid.c
index 3650698cf1dc..c17a993a4d2a 100644
--- a/kernel/pid.c
+++ b/kernel/pid.c
@@ -80,6 +80,9 @@ struct pid_namespace init_pid_ns = {
 	.child_reaper = &init_task,
 	.user_ns = &init_user_ns,
 	.ns.inum = PROC_PID_INIT_INO,
+#ifdef CONFIG_PID_NS
+	.ns.ops = &pidns_operations,
+#endif
 };
 EXPORT_SYMBOL_GPL(init_pid_ns);
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 5aa9158a84d5..e1bafe3b47bb 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -108,6 +108,7 @@ static struct pid_namespace *create_pid_namespace(struct user_namespace *user_ns
 	err = ns_alloc_inum(&ns->ns);
 	if (err)
 		goto out_free_map;
+	ns->ns.ops = &pidns_operations;
 
 	kref_init(&ns->kref);
 	ns->level = level;
diff --git a/kernel/user.c b/kernel/user.c
index a7ca84bad8e6..69b800aebf13 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,9 @@ struct user_namespace init_user_ns = {
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.ns.inum = PROC_USER_INIT_INO,
+#ifdef CONFIG_USER_NS
+	.ns.ops = &userns_operations,
+#endif
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	.persistent_keyring_register_sem =
 	__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 6bf8177768e5..1491ad00388f 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -91,6 +91,7 @@ int create_user_ns(struct cred *new)
 		kmem_cache_free(user_ns_cachep, ns);
 		return ret;
 	}
+	ns->ns.ops = &userns_operations;
 
 	atomic_set(&ns->count, 1);
 	/* Leave the new->user_ns reference with the new user namespace. */
diff --git a/kernel/utsname.c b/kernel/utsname.c
index c2a2b321d88a..831ea7108232 100644
--- a/kernel/utsname.c
+++ b/kernel/utsname.c
@@ -48,6 +48,8 @@ static struct uts_namespace *clone_uts_ns(struct user_namespace *user_ns,
 		return ERR_PTR(err);
 	}
 
+	ns->ns.ops = &utsns_operations;
+
 	down_read(&uts_sem);
 	memcpy(&ns->name, &old_ns->name, sizeof(ns->name));
 	ns->user_ns = get_user_ns(user_ns);
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index da775f53f3fd..4d4acaf7b498 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -339,6 +339,7 @@ struct net *get_net_ns_by_fd(int fd)
 {
 	struct proc_ns *ei;
 	struct file *file;
+	struct ns_common *ns;
 	struct net *net;
 
 	file = proc_ns_fget(fd);
@@ -346,8 +347,9 @@ struct net *get_net_ns_by_fd(int fd)
 		return ERR_CAST(file);
 
 	ei = get_proc_ns(file_inode(file));
-	if (ei->ns_ops == &netns_operations)
-		net = get_net(container_of(ei->ns, struct net, ns));
+	ns = ei->ns;
+	if (ns->ops == &netns_operations)
+		net = get_net(container_of(ns, struct net, ns));
 	else
 		net = ERR_PTR(-EINVAL);
 
@@ -386,6 +388,9 @@ EXPORT_SYMBOL_GPL(get_net_ns_by_pid);
 
 static __net_init int net_ns_net_init(struct net *net)
 {
+#ifdef CONFIG_NET_NS
+	net->ns.ops = &netns_operations;
+#endif
 	return ns_alloc_inum(&net->ns);
 }
 
-- 
cgit v1.2.3


From f77c80142e1afe6d5c16975ca5d7d1fc324b16f9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Nov 2014 03:13:17 -0400
Subject: bury struct proc_ns in fs/proc

a) make get_proc_ns() return a pointer to struct ns_common
b) mirror ns_ops in dentry->d_fsdata of ns dentries, so that
is_mnt_ns_file() could get away with fewer dereferences.

That way struct proc_ns becomes invisible outside of fs/proc/*.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namespace.c           | 13 ++-----------
 fs/proc/internal.h       |  5 +++++
 fs/proc/namespaces.c     |  7 ++++---
 include/linux/proc_ns.h  |  9 ++-------
 kernel/nsproxy.c         |  4 +---
 net/core/net_namespace.c |  4 +---
 6 files changed, 15 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namespace.c b/fs/namespace.c
index f815218f92d3..9dfb4cac0c41 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1570,16 +1570,7 @@ static bool is_mnt_ns_file(struct dentry *dentry)
 {
 	/* Is this a proxy for a mount namespace? */
 	struct inode *inode = dentry->d_inode;
-	struct proc_ns *ei;
-
-	if (!proc_ns_inode(inode))
-		return false;
-
-	ei = get_proc_ns(inode);
-	if (ei->ns_ops != &mntns_operations)
-		return false;
-
-	return true;
+	return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations;
 }
 
 struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
@@ -1596,7 +1587,7 @@ static bool mnt_ns_loop(struct dentry *dentry)
 	if (!is_mnt_ns_file(dentry))
 		return false;
 
-	mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode)->ns);
+	mnt_ns = to_mnt_ns(get_proc_ns(dentry->d_inode));
 	return current->nsproxy->mnt_ns->seq >= mnt_ns->seq;
 }
 
diff --git a/fs/proc/internal.h b/fs/proc/internal.h
index aa7a0ee182e1..0fabc48d905f 100644
--- a/fs/proc/internal.h
+++ b/fs/proc/internal.h
@@ -57,6 +57,11 @@ union proc_op {
 		struct task_struct *task);
 };
 
+struct proc_ns {
+	struct ns_common *ns;
+	const struct proc_ns_operations *ns_ops;
+};
+
 struct proc_inode {
 	struct pid *pid;
 	int fd;
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 995e8e98237d..18fc1cf899de 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -45,7 +45,7 @@ static const struct inode_operations ns_inode_operations = {
 static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
 {
 	struct inode *inode = dentry->d_inode;
-	const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
+	const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
 
 	return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
 		ns_ops->name, inode->i_ino);
@@ -75,6 +75,7 @@ static struct dentry *proc_ns_get_dentry(struct super_block *sb,
 		ns_ops->put(ns);
 		return ERR_PTR(-ENOMEM);
 	}
+	dentry->d_fsdata = (void *)ns_ops;
 
 	inode = iget_locked(sb, ns->inum);
 	if (!inode) {
@@ -286,9 +287,9 @@ out_invalid:
 	return ERR_PTR(-EINVAL);
 }
 
-struct proc_ns *get_proc_ns(struct inode *inode)
+struct ns_common *get_proc_ns(struct inode *inode)
 {
-	return &PROC_I(inode)->ns;
+	return PROC_I(inode)->ns.ns;
 }
 
 bool proc_ns_inode(struct inode *inode)
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index f5780ee7f8f7..2837ff41cfe3 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -16,11 +16,6 @@ struct proc_ns_operations {
 	int (*install)(struct nsproxy *nsproxy, struct ns_common *ns);
 };
 
-struct proc_ns {
-	struct ns_common *ns;
-	const struct proc_ns_operations *ns_ops;
-};
-
 extern const struct proc_ns_operations netns_operations;
 extern const struct proc_ns_operations utsns_operations;
 extern const struct proc_ns_operations ipcns_operations;
@@ -44,7 +39,7 @@ enum {
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 extern void pid_ns_release_proc(struct pid_namespace *ns);
 extern struct file *proc_ns_fget(int fd);
-extern struct proc_ns *get_proc_ns(struct inode *);
+extern struct ns_common *get_proc_ns(struct inode *);
 extern int proc_alloc_inum(unsigned int *pino);
 extern void proc_free_inum(unsigned int inum);
 extern bool proc_ns_inode(struct inode *inode);
@@ -59,7 +54,7 @@ static inline struct file *proc_ns_fget(int fd)
 	return ERR_PTR(-EINVAL);
 }
 
-static inline struct proc_ns *get_proc_ns(struct inode *inode) { return NULL; }
+static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; }
 
 static inline int proc_alloc_inum(unsigned int *inum)
 {
diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
index 87c37221cb7f..49746c81ad8d 100644
--- a/kernel/nsproxy.c
+++ b/kernel/nsproxy.c
@@ -222,7 +222,6 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 {
 	struct task_struct *tsk = current;
 	struct nsproxy *new_nsproxy;
-	struct proc_ns *ei;
 	struct file *file;
 	struct ns_common *ns;
 	int err;
@@ -232,8 +231,7 @@ SYSCALL_DEFINE2(setns, int, fd, int, nstype)
 		return PTR_ERR(file);
 
 	err = -EINVAL;
-	ei = get_proc_ns(file_inode(file));
-	ns = ei->ns;
+	ns = get_proc_ns(file_inode(file));
 	if (nstype && (ns->ops->type != nstype))
 		goto out;
 
diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c
index 4d4acaf7b498..ce780c722e48 100644
--- a/net/core/net_namespace.c
+++ b/net/core/net_namespace.c
@@ -337,7 +337,6 @@ EXPORT_SYMBOL_GPL(__put_net);
 
 struct net *get_net_ns_by_fd(int fd)
 {
-	struct proc_ns *ei;
 	struct file *file;
 	struct ns_common *ns;
 	struct net *net;
@@ -346,8 +345,7 @@ struct net *get_net_ns_by_fd(int fd)
 	if (IS_ERR(file))
 		return ERR_CAST(file);
 
-	ei = get_proc_ns(file_inode(file));
-	ns = ei->ns;
+	ns = get_proc_ns(file_inode(file));
 	if (ns->ops == &netns_operations)
 		net = get_net(container_of(ns, struct net, ns));
 	else
-- 
cgit v1.2.3


From a42a7a1fb5f1f9004b023594609dc22da02fc08b Mon Sep 17 00:00:00 2001
From: Robin Murphy <Robin.Murphy@arm.com>
Date: Fri, 5 Dec 2014 13:41:02 +0000
Subject: iommu: store DT-probed IOMMU data privately

Since the data pointer in the DT node is public and may be overwritten
by conflicting code, move the DT-probed IOMMU ops to a private list
where they will be safe.

Acked-by: Grant Likely <grant.likely@linaro.org>
Signed-off-by: Robin Murphy <robin.murphy@arm.com>
[will: added missing #include and missing ')']
Signed-off-by: Will Deacon <will.deacon@arm.com>
---
 drivers/iommu/of_iommu.c | 39 +++++++++++++++++++++++++++++++++++++++
 include/linux/of_iommu.h | 12 ++----------
 2 files changed, 41 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/iommu/of_iommu.c b/drivers/iommu/of_iommu.c
index 73236d3cc955..af1dc6a1c0a1 100644
--- a/drivers/iommu/of_iommu.c
+++ b/drivers/iommu/of_iommu.c
@@ -22,6 +22,7 @@
 #include <linux/limits.h>
 #include <linux/of.h>
 #include <linux/of_iommu.h>
+#include <linux/slab.h>
 
 static const struct of_device_id __iommu_of_table_sentinel
 	__used __section(__iommu_of_table_end);
@@ -94,6 +95,44 @@ int of_get_dma_window(struct device_node *dn, const char *prefix, int index,
 }
 EXPORT_SYMBOL_GPL(of_get_dma_window);
 
+struct of_iommu_node {
+	struct list_head list;
+	struct device_node *np;
+	struct iommu_ops *ops;
+};
+static LIST_HEAD(of_iommu_list);
+static DEFINE_SPINLOCK(of_iommu_lock);
+
+void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops)
+{
+	struct of_iommu_node *iommu = kzalloc(sizeof(*iommu), GFP_KERNEL);
+
+	if (WARN_ON(!iommu))
+		return;
+
+	INIT_LIST_HEAD(&iommu->list);
+	iommu->np = np;
+	iommu->ops = ops;
+	spin_lock(&of_iommu_lock);
+	list_add_tail(&iommu->list, &of_iommu_list);
+	spin_unlock(&of_iommu_lock);
+}
+
+struct iommu_ops *of_iommu_get_ops(struct device_node *np)
+{
+	struct of_iommu_node *node;
+	struct iommu_ops *ops = NULL;
+
+	spin_lock(&of_iommu_lock);
+	list_for_each_entry(node, &of_iommu_list, list)
+		if (node->np == np) {
+			ops = node->ops;
+			break;
+		}
+	spin_unlock(&of_iommu_lock);
+	return ops;
+}
+
 struct iommu_ops *of_iommu_configure(struct device *dev)
 {
 	struct of_phandle_args iommu_spec;
diff --git a/include/linux/of_iommu.h b/include/linux/of_iommu.h
index d03abbb11c34..16c75547d725 100644
--- a/include/linux/of_iommu.h
+++ b/include/linux/of_iommu.h
@@ -31,16 +31,8 @@ static inline struct iommu_ops *of_iommu_configure(struct device *dev)
 
 #endif	/* CONFIG_OF_IOMMU */
 
-static inline void of_iommu_set_ops(struct device_node *np,
-				    const struct iommu_ops *ops)
-{
-	np->data = (struct iommu_ops *)ops;
-}
-
-static inline struct iommu_ops *of_iommu_get_ops(struct device_node *np)
-{
-	return np->data;
-}
+void of_iommu_set_ops(struct device_node *np, struct iommu_ops *ops);
+struct iommu_ops *of_iommu_get_ops(struct device_node *np);
 
 extern struct of_device_id __iommu_of_table;
 
-- 
cgit v1.2.3


From aba96bada40d19a0afbc3bfcb3a47e29e23df7ea Mon Sep 17 00:00:00 2001
From: Vinod Koul <vinod.koul@intel.com>
Date: Fri, 5 Dec 2014 20:49:07 +0530
Subject: dmaengine: clarify the issue_pending expectations

Although Documentation explicitly mentions the expectations, the comment in
header can be lead to different expectation so clear up the confusion

Signed-off-by: Vinod Koul <vinod.koul@intel.com>
---
 include/linux/dmaengine.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h
index 653a1fd07ae8..40cd75e21ea2 100644
--- a/include/linux/dmaengine.h
+++ b/include/linux/dmaengine.h
@@ -447,7 +447,8 @@ struct dmaengine_unmap_data {
  * 	communicate status
  * @phys: physical address of the descriptor
  * @chan: target channel for this operation
- * @tx_submit: set the prepared descriptor(s) to be executed by the engine
+ * @tx_submit: accept the descriptor, assign ordered cookie and mark the
+ * descriptor pending. To be pushed on .issue_pending() call
  * @callback: routine to call after this operation is complete
  * @callback_param: general parameter to pass to the callback routine
  * ---async_tx api specific fields---
-- 
cgit v1.2.3


From 7ff4d90b4c24a03666f296c3d4878cd39001e81e Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 5 Dec 2014 17:19:27 -0600
Subject: groups: Consolidate the setgroups permission checks

Today there are 3 instances of setgroups and due to an oversight their
permission checking has diverged.  Add a common function so that
they may all share the same permission checking code.

This corrects the current oversight in the current permission checks
and adds a helper to avoid this in the future.

A user namespace security fix will update this new helper, shortly.

Cc: stable@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 arch/s390/kernel/compat_linux.c | 2 +-
 include/linux/cred.h            | 1 +
 kernel/groups.c                 | 9 ++++++++-
 kernel/uid16.c                  | 2 +-
 4 files changed, 11 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/arch/s390/kernel/compat_linux.c b/arch/s390/kernel/compat_linux.c
index ca38139423ae..437e61159279 100644
--- a/arch/s390/kernel/compat_linux.c
+++ b/arch/s390/kernel/compat_linux.c
@@ -249,7 +249,7 @@ COMPAT_SYSCALL_DEFINE2(s390_setgroups16, int, gidsetsize, u16 __user *, grouplis
 	struct group_info *group_info;
 	int retval;
 
-	if (!capable(CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/include/linux/cred.h b/include/linux/cred.h
index b2d0820837c4..2fb2ca2127ed 100644
--- a/include/linux/cred.h
+++ b/include/linux/cred.h
@@ -68,6 +68,7 @@ extern void groups_free(struct group_info *);
 extern int set_current_groups(struct group_info *);
 extern void set_groups(struct cred *, struct group_info *);
 extern int groups_search(const struct group_info *, kgid_t);
+extern bool may_setgroups(void);
 
 /* access the groups "array" with this macro */
 #define GROUP_AT(gi, i) \
diff --git a/kernel/groups.c b/kernel/groups.c
index 451698f86cfa..02d8a251c476 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -213,6 +213,13 @@ out:
 	return i;
 }
 
+bool may_setgroups(void)
+{
+	struct user_namespace *user_ns = current_user_ns();
+
+	return ns_capable(user_ns, CAP_SETGID);
+}
+
 /*
  *	SMP: Our groups are copy-on-write. We can set them safely
  *	without another task interfering.
@@ -223,7 +230,7 @@ SYSCALL_DEFINE2(setgroups, int, gidsetsize, gid_t __user *, grouplist)
 	struct group_info *group_info;
 	int retval;
 
-	if (!ns_capable(current_user_ns(), CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
diff --git a/kernel/uid16.c b/kernel/uid16.c
index 602e5bbbceff..d58cc4d8f0d1 100644
--- a/kernel/uid16.c
+++ b/kernel/uid16.c
@@ -176,7 +176,7 @@ SYSCALL_DEFINE2(setgroups16, int, gidsetsize, old_gid_t __user *, grouplist)
 	struct group_info *group_info;
 	int retval;
 
-	if (!ns_capable(current_user_ns(), CAP_SETGID))
+	if (!may_setgroups())
 		return -EPERM;
 	if ((unsigned)gidsetsize > NGROUPS_MAX)
 		return -EINVAL;
-- 
cgit v1.2.3


From 89aa075832b0da4402acebd698d0411dcc82d03e Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@plumgrid.com>
Date: Mon, 1 Dec 2014 15:06:35 -0800
Subject: net: sock: allow eBPF programs to be attached to sockets

introduce new setsockopt() command:

setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd, sizeof(prog_fd))

where prog_fd was received from syscall bpf(BPF_PROG_LOAD, attr, ...)
and attr->prog_type == BPF_PROG_TYPE_SOCKET_FILTER

setsockopt() calls bpf_prog_get() which increments refcnt of the program,
so it doesn't get unloaded while socket is using the program.

The same eBPF program can be attached to multiple sockets.

User task exit automatically closes socket which calls sk_filter_uncharge()
which decrements refcnt of eBPF program

Signed-off-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/uapi/asm/socket.h   |  3 ++
 arch/avr32/include/uapi/asm/socket.h   |  3 ++
 arch/cris/include/uapi/asm/socket.h    |  3 ++
 arch/frv/include/uapi/asm/socket.h     |  3 ++
 arch/ia64/include/uapi/asm/socket.h    |  3 ++
 arch/m32r/include/uapi/asm/socket.h    |  3 ++
 arch/mips/include/uapi/asm/socket.h    |  3 ++
 arch/mn10300/include/uapi/asm/socket.h |  3 ++
 arch/parisc/include/uapi/asm/socket.h  |  3 ++
 arch/powerpc/include/uapi/asm/socket.h |  3 ++
 arch/s390/include/uapi/asm/socket.h    |  3 ++
 arch/sparc/include/uapi/asm/socket.h   |  3 ++
 arch/xtensa/include/uapi/asm/socket.h  |  3 ++
 include/linux/bpf.h                    |  4 ++
 include/linux/filter.h                 |  1 +
 include/uapi/asm-generic/socket.h      |  3 ++
 net/core/filter.c                      | 97 +++++++++++++++++++++++++++++++++-
 net/core/sock.c                        | 13 +++++
 18 files changed, 155 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h
index e2fe0700b3b4..9a20821b111c 100644
--- a/arch/alpha/include/uapi/asm/socket.h
+++ b/arch/alpha/include/uapi/asm/socket.h
@@ -89,4 +89,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/avr32/include/uapi/asm/socket.h b/arch/avr32/include/uapi/asm/socket.h
index 92121b0f5b98..2b65ed6b277c 100644
--- a/arch/avr32/include/uapi/asm/socket.h
+++ b/arch/avr32/include/uapi/asm/socket.h
@@ -82,4 +82,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _UAPI__ASM_AVR32_SOCKET_H */
diff --git a/arch/cris/include/uapi/asm/socket.h b/arch/cris/include/uapi/asm/socket.h
index 60f60f5b9b35..e2503d9f1869 100644
--- a/arch/cris/include/uapi/asm/socket.h
+++ b/arch/cris/include/uapi/asm/socket.h
@@ -84,6 +84,9 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _ASM_SOCKET_H */
 
 
diff --git a/arch/frv/include/uapi/asm/socket.h b/arch/frv/include/uapi/asm/socket.h
index 2c6890209ea6..4823ad125578 100644
--- a/arch/frv/include/uapi/asm/socket.h
+++ b/arch/frv/include/uapi/asm/socket.h
@@ -82,5 +82,8 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _ASM_SOCKET_H */
 
diff --git a/arch/ia64/include/uapi/asm/socket.h b/arch/ia64/include/uapi/asm/socket.h
index 09a93fb566f6..59be3d87f86d 100644
--- a/arch/ia64/include/uapi/asm/socket.h
+++ b/arch/ia64/include/uapi/asm/socket.h
@@ -91,4 +91,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _ASM_IA64_SOCKET_H */
diff --git a/arch/m32r/include/uapi/asm/socket.h b/arch/m32r/include/uapi/asm/socket.h
index e8589819c274..7bc4cb273856 100644
--- a/arch/m32r/include/uapi/asm/socket.h
+++ b/arch/m32r/include/uapi/asm/socket.h
@@ -82,4 +82,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _ASM_M32R_SOCKET_H */
diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h
index 2e9ee8c55a10..dec3c850f36b 100644
--- a/arch/mips/include/uapi/asm/socket.h
+++ b/arch/mips/include/uapi/asm/socket.h
@@ -100,4 +100,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/mn10300/include/uapi/asm/socket.h b/arch/mn10300/include/uapi/asm/socket.h
index f3492e8c9f70..cab7d6d50051 100644
--- a/arch/mn10300/include/uapi/asm/socket.h
+++ b/arch/mn10300/include/uapi/asm/socket.h
@@ -82,4 +82,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h
index 7984a1cab3da..a5cd40cd8ee1 100644
--- a/arch/parisc/include/uapi/asm/socket.h
+++ b/arch/parisc/include/uapi/asm/socket.h
@@ -81,4 +81,7 @@
 
 #define SO_INCOMING_CPU		0x402A
 
+#define SO_ATTACH_BPF		0x402B
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _UAPI_ASM_SOCKET_H */
diff --git a/arch/powerpc/include/uapi/asm/socket.h b/arch/powerpc/include/uapi/asm/socket.h
index 3474e4ef166d..c046666038f8 100644
--- a/arch/powerpc/include/uapi/asm/socket.h
+++ b/arch/powerpc/include/uapi/asm/socket.h
@@ -89,4 +89,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif	/* _ASM_POWERPC_SOCKET_H */
diff --git a/arch/s390/include/uapi/asm/socket.h b/arch/s390/include/uapi/asm/socket.h
index 8457636c33e1..296942d56e6a 100644
--- a/arch/s390/include/uapi/asm/socket.h
+++ b/arch/s390/include/uapi/asm/socket.h
@@ -88,4 +88,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* _ASM_SOCKET_H */
diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h
index 4a8003a94163..e6a16c40be5f 100644
--- a/arch/sparc/include/uapi/asm/socket.h
+++ b/arch/sparc/include/uapi/asm/socket.h
@@ -78,6 +78,9 @@
 
 #define SO_INCOMING_CPU		0x0033
 
+#define SO_ATTACH_BPF		0x0034
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 /* Security levels - as per NRL IPv6 - don't actually do anything */
 #define SO_SECURITY_AUTHENTICATION		0x5001
 #define SO_SECURITY_ENCRYPTION_TRANSPORT	0x5002
diff --git a/arch/xtensa/include/uapi/asm/socket.h b/arch/xtensa/include/uapi/asm/socket.h
index c46f6a696849..4120af086160 100644
--- a/arch/xtensa/include/uapi/asm/socket.h
+++ b/arch/xtensa/include/uapi/asm/socket.h
@@ -93,4 +93,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif	/* _XTENSA_SOCKET_H */
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 75e94eaa228b..bbfceb756452 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -128,7 +128,11 @@ struct bpf_prog_aux {
 	struct work_struct work;
 };
 
+#ifdef CONFIG_BPF_SYSCALL
 void bpf_prog_put(struct bpf_prog *prog);
+#else
+static inline void bpf_prog_put(struct bpf_prog *prog) {}
+#endif
 struct bpf_prog *bpf_prog_get(u32 ufd);
 /* verify correctness of eBPF program */
 int bpf_check(struct bpf_prog *fp, union bpf_attr *attr);
diff --git a/include/linux/filter.h b/include/linux/filter.h
index ca95abd2bed1..caac2087a4d5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -381,6 +381,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 void bpf_prog_destroy(struct bpf_prog *fp);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
+int sk_attach_bpf(u32 ufd, struct sock *sk);
 int sk_detach_filter(struct sock *sk);
 
 int bpf_check_classic(const struct sock_filter *filter, unsigned int flen);
diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h
index f541ccefd4ac..5c15c2a5c123 100644
--- a/include/uapi/asm-generic/socket.h
+++ b/include/uapi/asm-generic/socket.h
@@ -84,4 +84,7 @@
 
 #define SO_INCOMING_CPU		49
 
+#define SO_ATTACH_BPF		50
+#define SO_DETACH_BPF		SO_DETACH_FILTER
+
 #endif /* __ASM_GENERIC_SOCKET_H */
diff --git a/net/core/filter.c b/net/core/filter.c
index 647b12265e18..8cc3c03078b3 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -44,6 +44,7 @@
 #include <linux/ratelimit.h>
 #include <linux/seccomp.h>
 #include <linux/if_vlan.h>
+#include <linux/bpf.h>
 
 /**
  *	sk_filter - run a packet through a socket filter
@@ -813,8 +814,12 @@ static void bpf_release_orig_filter(struct bpf_prog *fp)
 
 static void __bpf_prog_release(struct bpf_prog *prog)
 {
-	bpf_release_orig_filter(prog);
-	bpf_prog_free(prog);
+	if (prog->aux->prog_type == BPF_PROG_TYPE_SOCKET_FILTER) {
+		bpf_prog_put(prog);
+	} else {
+		bpf_release_orig_filter(prog);
+		bpf_prog_free(prog);
+	}
 }
 
 static void __sk_filter_release(struct sk_filter *fp)
@@ -1088,6 +1093,94 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk)
 }
 EXPORT_SYMBOL_GPL(sk_attach_filter);
 
+#ifdef CONFIG_BPF_SYSCALL
+int sk_attach_bpf(u32 ufd, struct sock *sk)
+{
+	struct sk_filter *fp, *old_fp;
+	struct bpf_prog *prog;
+
+	if (sock_flag(sk, SOCK_FILTER_LOCKED))
+		return -EPERM;
+
+	prog = bpf_prog_get(ufd);
+	if (!prog)
+		return -EINVAL;
+
+	if (prog->aux->prog_type != BPF_PROG_TYPE_SOCKET_FILTER) {
+		/* valid fd, but invalid program type */
+		bpf_prog_put(prog);
+		return -EINVAL;
+	}
+
+	fp = kmalloc(sizeof(*fp), GFP_KERNEL);
+	if (!fp) {
+		bpf_prog_put(prog);
+		return -ENOMEM;
+	}
+	fp->prog = prog;
+
+	atomic_set(&fp->refcnt, 0);
+
+	if (!sk_filter_charge(sk, fp)) {
+		__sk_filter_release(fp);
+		return -ENOMEM;
+	}
+
+	old_fp = rcu_dereference_protected(sk->sk_filter,
+					   sock_owned_by_user(sk));
+	rcu_assign_pointer(sk->sk_filter, fp);
+
+	if (old_fp)
+		sk_filter_uncharge(sk, old_fp);
+
+	return 0;
+}
+
+/* allow socket filters to call
+ * bpf_map_lookup_elem(), bpf_map_update_elem(), bpf_map_delete_elem()
+ */
+static const struct bpf_func_proto *sock_filter_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	default:
+		return NULL;
+	}
+}
+
+static bool sock_filter_is_valid_access(int off, int size, enum bpf_access_type type)
+{
+	/* skb fields cannot be accessed yet */
+	return false;
+}
+
+static struct bpf_verifier_ops sock_filter_ops = {
+	.get_func_proto = sock_filter_func_proto,
+	.is_valid_access = sock_filter_is_valid_access,
+};
+
+static struct bpf_prog_type_list tl = {
+	.ops = &sock_filter_ops,
+	.type = BPF_PROG_TYPE_SOCKET_FILTER,
+};
+
+static int __init register_sock_filter_ops(void)
+{
+	bpf_register_prog_type(&tl);
+	return 0;
+}
+late_initcall(register_sock_filter_ops);
+#else
+int sk_attach_bpf(u32 ufd, struct sock *sk)
+{
+	return -EOPNOTSUPP;
+}
+#endif
 int sk_detach_filter(struct sock *sk)
 {
 	int ret = -ENOENT;
diff --git a/net/core/sock.c b/net/core/sock.c
index 0725cf0cb685..9a56b2000c3f 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -888,6 +888,19 @@ set_rcvbuf:
 		}
 		break;
 
+	case SO_ATTACH_BPF:
+		ret = -EINVAL;
+		if (optlen == sizeof(u32)) {
+			u32 ufd;
+
+			ret = -EFAULT;
+			if (copy_from_user(&ufd, optval, sizeof(ufd)))
+				break;
+
+			ret = sk_attach_bpf(ufd, sk);
+		}
+		break;
+
 	case SO_DETACH_FILTER:
 		ret = sk_detach_filter(sk);
 		break;
-- 
cgit v1.2.3


From bac78aabcfece0c493b2ad824c68fbdc20448cbc Mon Sep 17 00:00:00 2001
From: Andri Yngvason <andri.yngvason@marel.com>
Date: Wed, 3 Dec 2014 17:54:13 +0000
Subject: can: dev: Consolidate and unify state change handling

The handling of can error states is different between platforms.
This is an attempt to correct that problem.

I've moved this handling into a generic function for changing the
error state. This ensures that error state changes are handled
the same way everywhere (where this function is used).

This new mechanism also adds reverse state transitioning in error
frames, i.e. the user will be notified through the socket interface
when the state goes down.

Signed-off-by: Andri Yngvason <andri.yngvason@marel.com>
Acked-by: Wolfgang Grandegger <wg@grandegger.com>
Signed-off-by: Marc Kleine-Budde <mkl@pengutronix.de>
---
 drivers/net/can/dev.c          | 78 ++++++++++++++++++++++++++++++++++++++++++
 include/linux/can/dev.h        |  3 ++
 include/uapi/linux/can/error.h |  1 +
 3 files changed, 82 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/net/can/dev.c b/drivers/net/can/dev.c
index 2cfe5012e4e5..3ec8f6f25e5f 100644
--- a/drivers/net/can/dev.c
+++ b/drivers/net/can/dev.c
@@ -273,6 +273,84 @@ static int can_get_bittiming(struct net_device *dev, struct can_bittiming *bt,
 	return err;
 }
 
+static void can_update_state_error_stats(struct net_device *dev,
+					 enum can_state new_state)
+{
+	struct can_priv *priv = netdev_priv(dev);
+
+	if (new_state <= priv->state)
+		return;
+
+	switch (new_state) {
+	case CAN_STATE_ERROR_WARNING:
+		priv->can_stats.error_warning++;
+		break;
+	case CAN_STATE_ERROR_PASSIVE:
+		priv->can_stats.error_passive++;
+		break;
+	case CAN_STATE_BUS_OFF:
+	default:
+		break;
+	};
+}
+
+static int can_tx_state_to_frame(struct net_device *dev, enum can_state state)
+{
+	switch (state) {
+	case CAN_STATE_ERROR_ACTIVE:
+		return CAN_ERR_CRTL_ACTIVE;
+	case CAN_STATE_ERROR_WARNING:
+		return CAN_ERR_CRTL_TX_WARNING;
+	case CAN_STATE_ERROR_PASSIVE:
+		return CAN_ERR_CRTL_TX_PASSIVE;
+	default:
+		return 0;
+	}
+}
+
+static int can_rx_state_to_frame(struct net_device *dev, enum can_state state)
+{
+	switch (state) {
+	case CAN_STATE_ERROR_ACTIVE:
+		return CAN_ERR_CRTL_ACTIVE;
+	case CAN_STATE_ERROR_WARNING:
+		return CAN_ERR_CRTL_RX_WARNING;
+	case CAN_STATE_ERROR_PASSIVE:
+		return CAN_ERR_CRTL_RX_PASSIVE;
+	default:
+		return 0;
+	}
+}
+
+void can_change_state(struct net_device *dev, struct can_frame *cf,
+		      enum can_state tx_state, enum can_state rx_state)
+{
+	struct can_priv *priv = netdev_priv(dev);
+	enum can_state new_state = max(tx_state, rx_state);
+
+	if (unlikely(new_state == priv->state)) {
+		netdev_warn(dev, "%s: oops, state did not change", __func__);
+		return;
+	}
+
+	netdev_dbg(dev, "New error state: %d\n", new_state);
+
+	can_update_state_error_stats(dev, new_state);
+	priv->state = new_state;
+
+	if (unlikely(new_state == CAN_STATE_BUS_OFF)) {
+		cf->can_id |= CAN_ERR_BUSOFF;
+		return;
+	}
+
+	cf->can_id |= CAN_ERR_CRTL;
+	cf->data[1] |= tx_state >= rx_state ?
+		       can_tx_state_to_frame(dev, tx_state) : 0;
+	cf->data[1] |= tx_state <= rx_state ?
+		       can_rx_state_to_frame(dev, rx_state) : 0;
+}
+EXPORT_SYMBOL_GPL(can_change_state);
+
 /*
  * Local echo of CAN messages
  *
diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h
index b37ea95bc348..c05ff0f9f9a5 100644
--- a/include/linux/can/dev.h
+++ b/include/linux/can/dev.h
@@ -127,6 +127,9 @@ void unregister_candev(struct net_device *dev);
 int can_restart_now(struct net_device *dev);
 void can_bus_off(struct net_device *dev);
 
+void can_change_state(struct net_device *dev, struct can_frame *cf,
+		      enum can_state tx_state, enum can_state rx_state);
+
 void can_put_echo_skb(struct sk_buff *skb, struct net_device *dev,
 		      unsigned int idx);
 unsigned int can_get_echo_skb(struct net_device *dev, unsigned int idx);
diff --git a/include/uapi/linux/can/error.h b/include/uapi/linux/can/error.h
index c247446ab25a..1c508be9687f 100644
--- a/include/uapi/linux/can/error.h
+++ b/include/uapi/linux/can/error.h
@@ -71,6 +71,7 @@
 #define CAN_ERR_CRTL_TX_PASSIVE  0x20 /* reached error passive status TX */
 				      /* (at least one error counter exceeds */
 				      /* the protocol-defined level of 127)  */
+#define CAN_ERR_CRTL_ACTIVE      0x40 /* recovered to error active state */
 
 /* error in CAN protocol (type) / data[2] */
 #define CAN_ERR_PROT_UNSPEC      0x00 /* unspecified */
-- 
cgit v1.2.3


From 74faaf7aa64c76b60db0f5c994fd43a46be772ce Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Sat, 6 Dec 2014 21:20:20 +0100
Subject: genirq: Move irq_chip_write_msi_msg() helper to core

No point to expose this to the world. The only legitimate user is the
core code.

Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Marc Zyngier <marc.zyngier@arm.com>
---
 include/linux/irq.h | 6 ------
 kernel/irq/msi.c    | 6 ++++++
 2 files changed, 6 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/irq.h b/include/linux/irq.h
index 8badf34baf0f..33da579d727c 100644
--- a/include/linux/irq.h
+++ b/include/linux/irq.h
@@ -461,12 +461,6 @@ extern int irq_chip_set_affinity_parent(struct irq_data *data,
 					bool force);
 #endif
 
-static inline void irq_chip_write_msi_msg(struct irq_data *data,
-					  struct msi_msg *msg)
-{
-	data->chip->irq_write_msi_msg(data, msg);
-}
-
 /* Handling of unhandled and spurious interrupts: */
 extern void note_interrupt(unsigned int irq, struct irq_desc *desc,
 			   irqreturn_t action_ret);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index f477a2f8ce56..3e18163f336f 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -32,6 +32,12 @@ void get_cached_msi_msg(unsigned int irq, struct msi_msg *msg)
 EXPORT_SYMBOL_GPL(get_cached_msi_msg);
 
 #ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
+static inline void irq_chip_write_msi_msg(struct irq_data *data,
+					  struct msi_msg *msg)
+{
+	data->chip->irq_write_msi_msg(data, msg);
+}
+
 /**
  * msi_domain_set_affinity - Generic affinity setter function for MSI domains
  * @irq_data:	The irq data associated to the interrupt
-- 
cgit v1.2.3


From fe5afb13d46e76b07ab7e732f2b694dcafef4d9d Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Fri, 5 Dec 2014 11:31:22 +0100
Subject: mmc: core: Let mmc_send_tuning() to take struct mmc_host* as
 parameter

To be able to use mmc_send_tuning() prior the struct mmc_card has been
allocated, let's convert it to take the struct mmc_host* as parameter
instead.

Suggested-by: Stephen Boyd <sboyd@codeaurora.org>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Dong Aisheng <b29396@freescale.com>
Reviewed-by: Stephen Boyd <sboyd@codeaurora.org>
---
 drivers/mmc/core/mmc_ops.c | 7 +++----
 include/linux/mmc/core.h   | 2 +-
 2 files changed, 4 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/mmc/core/mmc_ops.c b/drivers/mmc/core/mmc_ops.c
index 12b2a32df346..3b044c5b029c 100644
--- a/drivers/mmc/core/mmc_ops.c
+++ b/drivers/mmc/core/mmc_ops.c
@@ -547,14 +547,13 @@ int mmc_switch(struct mmc_card *card, u8 set, u8 index, u8 value,
 }
 EXPORT_SYMBOL_GPL(mmc_switch);
 
-int mmc_send_tuning(struct mmc_card *card)
+int mmc_send_tuning(struct mmc_host *host)
 {
 	struct mmc_request mrq = {NULL};
 	struct mmc_command cmd = {0};
 	struct mmc_data data = {0};
 	struct scatterlist sg;
-	struct mmc_host *mmc = card->host;
-	struct mmc_ios *ios = &mmc->ios;
+	struct mmc_ios *ios = &host->ios;
 	const u8 *tuning_block_pattern;
 	int size, err = 0;
 	u8 *data_buf;
@@ -596,7 +595,7 @@ int mmc_send_tuning(struct mmc_card *card)
 	data.sg_len = 1;
 	sg_init_one(&sg, data_buf, size);
 
-	mmc_wait_for_req(mmc, &mrq);
+	mmc_wait_for_req(host, &mrq);
 
 	if (cmd.error) {
 		err = cmd.error;
diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h
index c4bdaa128693..cb2b0400d284 100644
--- a/include/linux/mmc/core.h
+++ b/include/linux/mmc/core.h
@@ -154,7 +154,7 @@ extern void mmc_start_bkops(struct mmc_card *card, bool from_exception);
 extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool,
 			bool, bool);
 extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int);
-extern int mmc_send_tuning(struct mmc_card *card);
+extern int mmc_send_tuning(struct mmc_host *host);
 extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd);
 
 #define MMC_ERASE_ARG		0x00000000
-- 
cgit v1.2.3


From a3b63979f8a32af9e975a793fd0f612d42072740 Mon Sep 17 00:00:00 2001
From: Micky Ching <micky_ching@realsil.com.cn>
Date: Fri, 5 Dec 2014 13:54:26 +0800
Subject: mfd: rtsx: Add func to split u32 into register

Add helper function to write u32 to registers, if we want to put u32
value to 4 continuous register, this can help us reduce tedious work.

Signed-off-by: Micky Ching <micky_ching@realsil.com.cn>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 include/linux/mfd/rtsx_pci.h | 9 +++++++++
 1 file changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h
index 1604dda4edcf..0c12628e91c6 100644
--- a/include/linux/mfd/rtsx_pci.h
+++ b/include/linux/mfd/rtsx_pci.h
@@ -558,6 +558,7 @@
 #define SD_SAMPLE_POINT_CTL		0xFDA7
 #define SD_PUSH_POINT_CTL		0xFDA8
 #define SD_CMD0				0xFDA9
+#define   SD_CMD_START			0x40
 #define SD_CMD1				0xFDAA
 #define SD_CMD2				0xFDAB
 #define SD_CMD3				0xFDAC
@@ -995,4 +996,12 @@ static inline int rtsx_pci_update_cfg_byte(struct rtsx_pcr *pcr, int addr,
 	return pci_write_config_byte(pcr->pci, addr, (val & mask) | append);
 }
 
+static inline void rtsx_pci_write_be32(struct rtsx_pcr *pcr, u16 reg, u32 val)
+{
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg,     0xFF, val >> 24);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 1, 0xFF, val >> 16);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 2, 0xFF, val >> 8);
+	rtsx_pci_add_cmd(pcr, WRITE_REG_CMD, reg + 3, 0xFF, val);
+}
+
 #endif
-- 
cgit v1.2.3


From ad9914ac3b1f5c12ef2cf1c58a6ddda306fb79d4 Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@samsung.com>
Date: Mon, 8 Dec 2014 18:04:19 +0100
Subject: thermal: of: Rename struct __thermal_trip to struct thermal_trip

This patch changes name of struct __thermal_trip to thermal_trip and moves
declaration of the latter to ./include/linux/thermal.h for better visibility.

Signed-off-by: Lukasz Majewski <l.majewski@samsung.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/of-thermal.c | 21 +++------------------
 include/linux/thermal.h      | 15 +++++++++++++++
 2 files changed, 18 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index 87b9cfe28eb8..d3ac117e2ddd 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -36,21 +36,6 @@
 
 /***   Private data structures to represent thermal device tree data ***/
 
-/**
- * struct __thermal_trip - representation of a point in temperature domain
- * @np: pointer to struct device_node that this trip point was created from
- * @temperature: temperature value in miliCelsius
- * @hysteresis: relative hysteresis in miliCelsius
- * @type: trip point type
- */
-
-struct __thermal_trip {
-	struct device_node *np;
-	unsigned long int temperature;
-	unsigned long int hysteresis;
-	enum thermal_trip_type type;
-};
-
 /**
  * struct __thermal_bind_param - a match between trip and cooling device
  * @cooling_device: a pointer to identify the referred cooling device
@@ -88,7 +73,7 @@ struct __thermal_zone {
 
 	/* trip data */
 	int ntrips;
-	struct __thermal_trip *trips;
+	struct thermal_trip *trips;
 
 	/* cooling binding data */
 	int num_tbps;
@@ -538,7 +523,7 @@ EXPORT_SYMBOL_GPL(thermal_zone_of_sensor_unregister);
  */
 static int thermal_of_populate_bind_params(struct device_node *np,
 					   struct __thermal_bind_params *__tbp,
-					   struct __thermal_trip *trips,
+					   struct thermal_trip *trips,
 					   int ntrips)
 {
 	struct of_phandle_args cooling_spec;
@@ -641,7 +626,7 @@ static int thermal_of_get_trip_type(struct device_node *np,
  * Return: 0 on success, proper error code otherwise
  */
 static int thermal_of_populate_trip(struct device_node *np,
-				    struct __thermal_trip *trip)
+				    struct thermal_trip *trip)
 {
 	int prop;
 	int ret;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index 5bc28a70014e..b8d91efa854e 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -303,6 +303,21 @@ struct thermal_zone_of_device_ops {
 	int (*get_trend)(void *, long *);
 };
 
+/**
+ * struct thermal_trip - representation of a point in temperature domain
+ * @np: pointer to struct device_node that this trip point was created from
+ * @temperature: temperature value in miliCelsius
+ * @hysteresis: relative hysteresis in miliCelsius
+ * @type: trip point type
+ */
+
+struct thermal_trip {
+	struct device_node *np;
+	unsigned long int temperature;
+	unsigned long int hysteresis;
+	enum thermal_trip_type type;
+};
+
 /* Function declarations */
 #ifdef CONFIG_THERMAL_OF
 struct thermal_zone_device *
-- 
cgit v1.2.3


From a280455fa87053eed59de8464200d03ae4caa8c3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 14:48:42 -0500
Subject: iov_iter.c: handle ITER_KVEC directly

... without bothering with copy_..._user()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  1 +
 mm/iov_iter.c       | 82 ++++++++++++++++++++++++++++++++++++++++++++---------
 2 files changed, 70 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 9b1581414cd4..6e16945ec832 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -31,6 +31,7 @@ struct iov_iter {
 	size_t count;
 	union {
 		const struct iovec *iov;
+		const struct kvec *kvec;
 		const struct bio_vec *bvec;
 	};
 	unsigned long nr_segs;
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 666654498ccf..1618e378277e 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -32,6 +32,29 @@
 	n = wanted - n;					\
 }
 
+#define iterate_kvec(i, n, __v, __p, skip, STEP) {	\
+	size_t wanted = n;				\
+	__p = i->kvec;					\
+	__v.iov_len = min(n, __p->iov_len - skip);	\
+	if (likely(__v.iov_len)) {			\
+		__v.iov_base = __p->iov_base + skip;	\
+		(void)(STEP);				\
+		skip += __v.iov_len;			\
+		n -= __v.iov_len;			\
+	}						\
+	while (unlikely(n)) {				\
+		__p++;					\
+		__v.iov_len = min(n, __p->iov_len);	\
+		if (unlikely(!__v.iov_len))		\
+			continue;			\
+		__v.iov_base = __p->iov_base;		\
+		(void)(STEP);				\
+		skip = __v.iov_len;			\
+		n -= __v.iov_len;			\
+	}						\
+	n = wanted;					\
+}
+
 #define iterate_bvec(i, n, __v, __p, skip, STEP) {	\
 	size_t wanted = n;				\
 	__p = i->bvec;					\
@@ -57,12 +80,16 @@
 	n = wanted;					\
 }
 
-#define iterate_all_kinds(i, n, v, I, B) {			\
+#define iterate_all_kinds(i, n, v, I, B, K) {			\
 	size_t skip = i->iov_offset;				\
 	if (unlikely(i->type & ITER_BVEC)) {			\
 		const struct bio_vec *bvec;			\
 		struct bio_vec v;				\
 		iterate_bvec(i, n, v, bvec, skip, (B))		\
+	} else if (unlikely(i->type & ITER_KVEC)) {		\
+		const struct kvec *kvec;			\
+		struct kvec v;					\
+		iterate_kvec(i, n, v, kvec, skip, (K))		\
 	} else {						\
 		const struct iovec *iov;			\
 		struct iovec v;					\
@@ -70,7 +97,7 @@
 	}							\
 }
 
-#define iterate_and_advance(i, n, v, I, B) {			\
+#define iterate_and_advance(i, n, v, I, B, K) {			\
 	size_t skip = i->iov_offset;				\
 	if (unlikely(i->type & ITER_BVEC)) {			\
 		const struct bio_vec *bvec;			\
@@ -82,6 +109,16 @@
 		}						\
 		i->nr_segs -= bvec - i->bvec;			\
 		i->bvec = bvec;					\
+	} else if (unlikely(i->type & ITER_KVEC)) {		\
+		const struct kvec *kvec;			\
+		struct kvec v;					\
+		iterate_kvec(i, n, v, kvec, skip, (K))		\
+		if (skip == kvec->iov_len) {			\
+			kvec++;					\
+			skip = 0;				\
+		}						\
+		i->nr_segs -= kvec - i->kvec;			\
+		i->kvec = kvec;					\
 	} else {						\
 		const struct iovec *iov;			\
 		struct iovec v;					\
@@ -270,7 +307,7 @@ done:
  */
 int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes)
 {
-	if (!(i->type & ITER_BVEC)) {
+	if (!(i->type & (ITER_BVEC|ITER_KVEC))) {
 		char __user *buf = i->iov->iov_base + i->iov_offset;
 		bytes = min(bytes, i->iov->iov_len - i->iov_offset);
 		return fault_in_pages_readable(buf, bytes);
@@ -284,10 +321,14 @@ void iov_iter_init(struct iov_iter *i, int direction,
 			size_t count)
 {
 	/* It will get better.  Eventually... */
-	if (segment_eq(get_fs(), KERNEL_DS))
+	if (segment_eq(get_fs(), KERNEL_DS)) {
 		direction |= ITER_KVEC;
-	i->type = direction;
-	i->iov = iov;
+		i->type = direction;
+		i->kvec = (struct kvec *)iov;
+	} else {
+		i->type = direction;
+		i->iov = iov;
+	}
 	i->nr_segs = nr_segs;
 	i->iov_offset = 0;
 	i->count = count;
@@ -328,7 +369,8 @@ size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 		__copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len,
 			       v.iov_len),
 		memcpy_to_page(v.bv_page, v.bv_offset,
-			       (from += v.bv_len) - v.bv_len, v.bv_len)
+			       (from += v.bv_len) - v.bv_len, v.bv_len),
+		memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len)
 	)
 
 	return bytes;
@@ -348,7 +390,8 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 		__copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base,
 				 v.iov_len),
 		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
-				 v.bv_offset, v.bv_len)
+				 v.bv_offset, v.bv_len),
+		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 	)
 
 	return bytes;
@@ -371,7 +414,7 @@ EXPORT_SYMBOL(copy_page_to_iter);
 size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
-	if (i->type & ITER_BVEC) {
+	if (i->type & (ITER_BVEC|ITER_KVEC)) {
 		void *kaddr = kmap_atomic(page);
 		size_t wanted = copy_from_iter(kaddr + offset, bytes, i);
 		kunmap_atomic(kaddr);
@@ -391,7 +434,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i)
 
 	iterate_and_advance(i, bytes, v,
 		__clear_user(v.iov_base, v.iov_len),
-		memzero_page(v.bv_page, v.bv_offset, v.bv_len)
+		memzero_page(v.bv_page, v.bv_offset, v.bv_len),
+		memset(v.iov_base, 0, v.iov_len)
 	)
 
 	return bytes;
@@ -406,7 +450,8 @@ size_t iov_iter_copy_from_user_atomic(struct page *page,
 		__copy_from_user_inatomic((p += v.iov_len) - v.iov_len,
 					  v.iov_base, v.iov_len),
 		memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page,
-				 v.bv_offset, v.bv_len)
+				 v.bv_offset, v.bv_len),
+		memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
 	)
 	kunmap_atomic(kaddr);
 	return bytes;
@@ -415,7 +460,7 @@ EXPORT_SYMBOL(iov_iter_copy_from_user_atomic);
 
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
-	iterate_and_advance(i, size, v, 0, 0)
+	iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
 
@@ -443,7 +488,8 @@ unsigned long iov_iter_alignment(const struct iov_iter *i)
 
 	iterate_all_kinds(i, size, v,
 		(res |= (unsigned long)v.iov_base | v.iov_len, 0),
-		res |= v.bv_offset | v.bv_len
+		res |= v.bv_offset | v.bv_len,
+		res |= (unsigned long)v.iov_base | v.iov_len
 	)
 	return res;
 }
@@ -478,6 +524,8 @@ ssize_t iov_iter_get_pages(struct iov_iter *i,
 		*start = v.bv_offset;
 		get_page(*pages = v.bv_page);
 		return v.bv_len;
+	}),({
+		return -EFAULT;
 	})
 	)
 	return 0;
@@ -530,6 +578,8 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 			return -ENOMEM;
 		get_page(*p = v.bv_page);
 		return v.bv_len;
+	}),({
+		return -EFAULT;
 	})
 	)
 	return 0;
@@ -554,6 +604,12 @@ int iov_iter_npages(const struct iov_iter *i, int maxpages)
 		npages++;
 		if (npages >= maxpages)
 			return maxpages;
+	}),({
+		unsigned long p = (unsigned long)v.iov_base;
+		npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE)
+			- p / PAGE_SIZE;
+		if (npages >= maxpages)
+			return maxpages;
 	})
 	)
 	return npages;
-- 
cgit v1.2.3


From 184a4bf623fa587067851d25435fcb2f41de445b Mon Sep 17 00:00:00 2001
From: Lukasz Majewski <l.majewski@samsung.com>
Date: Mon, 8 Dec 2014 18:04:21 +0100
Subject: thermal: of: Extend current of-thermal.c code to allow setting
 emulated temp

Before this change it was only possible to set get_temp() and get_trend()
methods to be used in the common code handling passing parameters via
device tree to "cpu-thermal" CPU thermal zone device.

Now it is possible to also set emulated value of temperature for debug
purposes.

Signed-off-by: Lukasz Majewski <l.majewski@samsung.com>
Acked-by: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 drivers/thermal/of-thermal.c | 24 ++++++++++++++++++++++++
 include/linux/thermal.h      |  3 +++
 2 files changed, 27 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c
index e062bf59ab6c..e145b66df444 100644
--- a/drivers/thermal/of-thermal.c
+++ b/drivers/thermal/of-thermal.c
@@ -161,6 +161,28 @@ of_thermal_get_trip_points(struct thermal_zone_device *tz)
 }
 EXPORT_SYMBOL_GPL(of_thermal_get_trip_points);
 
+/**
+ * of_thermal_set_emul_temp - function to set emulated temperature
+ *
+ * @tz:	pointer to a thermal zone
+ * @temp:	temperature to set
+ *
+ * This function gives the ability to set emulated value of temperature,
+ * which is handy for debugging
+ *
+ * Return: zero on success, error code otherwise
+ */
+static int of_thermal_set_emul_temp(struct thermal_zone_device *tz,
+				    unsigned long temp)
+{
+	struct __thermal_zone *data = tz->devdata;
+
+	if (!data->ops || !data->ops->set_emul_temp)
+		return -EINVAL;
+
+	return data->ops->set_emul_temp(data->sensor_data, temp);
+}
+
 static int of_thermal_get_trend(struct thermal_zone_device *tz, int trip,
 				enum thermal_trend *trend)
 {
@@ -392,6 +414,7 @@ thermal_zone_of_add_sensor(struct device_node *zone,
 
 	tzd->ops->get_temp = of_thermal_get_temp;
 	tzd->ops->get_trend = of_thermal_get_trend;
+	tzd->ops->set_emul_temp = of_thermal_set_emul_temp;
 	mutex_unlock(&tzd->lock);
 
 	return tzd;
@@ -520,6 +543,7 @@ void thermal_zone_of_sensor_unregister(struct device *dev,
 	mutex_lock(&tzd->lock);
 	tzd->ops->get_temp = NULL;
 	tzd->ops->get_trend = NULL;
+	tzd->ops->set_emul_temp = NULL;
 
 	tz->ops = NULL;
 	tz->sensor_data = NULL;
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index b8d91efa854e..99be7fc79c3b 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -297,10 +297,13 @@ struct thermal_genl_event {
  *
  * Optional:
  * @get_trend: a pointer to a function that reads the sensor temperature trend.
+ * @set_emul_temp: a pointer to a function that sets sensor emulated
+ *		   temperature.
  */
 struct thermal_zone_of_device_ops {
 	int (*get_temp)(void *, long *);
 	int (*get_trend)(void *, long *);
+	int (*set_emul_temp)(void *, unsigned long);
 };
 
 /**
-- 
cgit v1.2.3


From c3582a2c4d0baf1fa3955c8b3d3d61308df474c7 Mon Sep 17 00:00:00 2001
From: Haiyang Zhang <haiyangz@microsoft.com>
Date: Mon, 1 Dec 2014 13:28:39 -0800
Subject: hyperv: Add support for vNIC hot removal

This patch adds proper handling of the vNIC hot removal event, which includes
a rescind-channel-offer message from the host side that triggers vNIC close and
removal. In this case, the notices to the host during close and removal is not
necessary because the channel is rescinded. This patch blocks these unnecessary
messages, and lets vNIC removal process complete normally.

Signed-off-by: Haiyang Zhang <haiyangz@microsoft.com>
Reviewed-by: K. Y. Srinivasan <kys@microsoft.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/hv/channel_mgmt.c         | 2 ++
 drivers/net/hyperv/netvsc.c       | 3 +++
 drivers/net/hyperv/rndis_filter.c | 3 +++
 include/linux/hyperv.h            | 2 ++
 4 files changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c
index a2d1a9612c86..191a6a3ae6ca 100644
--- a/drivers/hv/channel_mgmt.c
+++ b/drivers/hv/channel_mgmt.c
@@ -517,6 +517,8 @@ static void vmbus_onoffer_rescind(struct vmbus_channel_message_header *hdr)
 		/* Just return here, no channel found */
 		return;
 
+	channel->rescind = true;
+
 	/* work is initialized for vmbus_process_rescind_offer() from
 	 * vmbus_process_offer() where the channel got created */
 	queue_work(channel->controlwq, &channel->work);
diff --git a/drivers/net/hyperv/netvsc.c b/drivers/net/hyperv/netvsc.c
index 6fc834e4306d..dd867e6cabd6 100644
--- a/drivers/net/hyperv/netvsc.c
+++ b/drivers/net/hyperv/netvsc.c
@@ -764,6 +764,9 @@ int netvsc_send(struct hv_device *device,
 		out_channel = device->channel;
 	packet->channel = out_channel;
 
+	if (out_channel->rescind)
+		return -ENODEV;
+
 	if (packet->page_buf_cnt) {
 		ret = vmbus_sendpacket_pagebuffer(out_channel,
 						  packet->page_buf,
diff --git a/drivers/net/hyperv/rndis_filter.c b/drivers/net/hyperv/rndis_filter.c
index 7b2c5d1e9bad..ec0c40a8f653 100644
--- a/drivers/net/hyperv/rndis_filter.c
+++ b/drivers/net/hyperv/rndis_filter.c
@@ -958,6 +958,9 @@ static int rndis_filter_close_device(struct rndis_device *dev)
 		return 0;
 
 	ret = rndis_filter_set_packet_filter(dev, 0);
+	if (ret == -ENODEV)
+		ret = 0;
+
 	if (ret == 0)
 		dev->state = RNDIS_DEV_INITIALIZED;
 
diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h
index 08cfaff8a072..476c685ca6f9 100644
--- a/include/linux/hyperv.h
+++ b/include/linux/hyperv.h
@@ -650,6 +650,8 @@ struct vmbus_channel {
 	u8 monitor_grp;
 	u8 monitor_bit;
 
+	bool rescind; /* got rescind msg */
+
 	u32 ringbuffer_gpadlhandle;
 
 	/* Allocated memory for ring buffer */
-- 
cgit v1.2.3


From a604ec7e9ffea22fed84db8306585090e7a6e85d Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Nov 2014 01:08:00 -0500
Subject: csum_and_copy_..._iter()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  2 ++
 mm/iov_iter.c       | 89 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 91 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 6e16945ec832..28ed2d904deb 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -124,6 +124,8 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count)
 {
 	i->count = count;
 }
+size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
+size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
 int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 1618e378277e..1d2cdeb57c58 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -3,6 +3,7 @@
 #include <linux/pagemap.h>
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
+#include <net/checksum.h>
 
 #define iterate_iovec(i, n, __v, __p, skip, STEP) {	\
 	size_t left;					\
@@ -586,6 +587,94 @@ ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
 }
 EXPORT_SYMBOL(iov_iter_get_pages_alloc);
 
+size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
+			       struct iov_iter *i)
+{
+	char *to = addr;
+	__wsum sum, next;
+	size_t off = 0;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	sum = *csum;
+	iterate_and_advance(i, bytes, v, ({
+		int err = 0;
+		next = csum_and_copy_from_user(v.iov_base, 
+					       (to += v.iov_len) - v.iov_len,
+					       v.iov_len, 0, &err);
+		if (!err) {
+			sum = csum_block_add(sum, next, off);
+			off += v.iov_len;
+		}
+		err ? v.iov_len : 0;
+	}), ({
+		char *p = kmap_atomic(v.bv_page);
+		next = csum_partial_copy_nocheck(p + v.bv_offset,
+						 (to += v.bv_len) - v.bv_len,
+						 v.bv_len, 0);
+		kunmap_atomic(p);
+		sum = csum_block_add(sum, next, off);
+		off += v.bv_len;
+	}),({
+		next = csum_partial_copy_nocheck(v.iov_base,
+						 (to += v.iov_len) - v.iov_len,
+						 v.iov_len, 0);
+		sum = csum_block_add(sum, next, off);
+		off += v.iov_len;
+	})
+	)
+	*csum = sum;
+	return bytes;
+}
+EXPORT_SYMBOL(csum_and_copy_from_iter);
+
+size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum,
+			     struct iov_iter *i)
+{
+	char *from = addr;
+	__wsum sum, next;
+	size_t off = 0;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	sum = *csum;
+	iterate_and_advance(i, bytes, v, ({
+		int err = 0;
+		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
+					     v.iov_base, 
+					     v.iov_len, 0, &err);
+		if (!err) {
+			sum = csum_block_add(sum, next, off);
+			off += v.iov_len;
+		}
+		err ? v.iov_len : 0;
+	}), ({
+		char *p = kmap_atomic(v.bv_page);
+		next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len,
+						 p + v.bv_offset,
+						 v.bv_len, 0);
+		kunmap_atomic(p);
+		sum = csum_block_add(sum, next, off);
+		off += v.bv_len;
+	}),({
+		next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len,
+						 v.iov_base,
+						 v.iov_len, 0);
+		sum = csum_block_add(sum, next, off);
+		off += v.iov_len;
+	})
+	)
+	*csum = sum;
+	return bytes;
+}
+EXPORT_SYMBOL(csum_and_copy_to_iter);
+
 int iov_iter_npages(const struct iov_iter *i, int maxpages)
 {
 	size_t size = i->count;
-- 
cgit v1.2.3


From abb78f875f3fcedb88d85eef9f7be7aa474c6727 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Nov 2014 14:46:11 -0500
Subject: new helper: iov_iter_kvec()

initialization of kvec-backed iov_iter

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  2 ++
 mm/iov_iter.c       | 13 +++++++++++++
 2 files changed, 15 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 28ed2d904deb..c567655b9595 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -87,6 +87,8 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
 			unsigned long nr_segs, size_t count);
+void iov_iter_kvec(struct iov_iter *i, int direction, const struct kvec *iov,
+			unsigned long nr_segs, size_t count);
 ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
 ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages,
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 1d2cdeb57c58..88c052e63a1d 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -479,6 +479,19 @@ size_t iov_iter_single_seg_count(const struct iov_iter *i)
 }
 EXPORT_SYMBOL(iov_iter_single_seg_count);
 
+void iov_iter_kvec(struct iov_iter *i, int direction,
+			const struct kvec *iov, unsigned long nr_segs,
+			size_t count)
+{
+	BUG_ON(!(direction & ITER_KVEC));
+	i->type = direction;
+	i->kvec = (struct kvec *)iov;
+	i->nr_segs = nr_segs;
+	i->iov_offset = 0;
+	i->count = count;
+}
+EXPORT_SYMBOL(iov_iter_kvec);
+
 unsigned long iov_iter_alignment(const struct iov_iter *i)
 {
 	unsigned long res = 0;
-- 
cgit v1.2.3


From aa583096d9767892983332e7c1a984bd17e3cd39 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 27 Nov 2014 20:27:08 -0500
Subject: copy_from_iter_nocache()

BTW, do we want memcpy_nocache()?

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  1 +
 mm/iov_iter.c       | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index c567655b9595..bd8569a14c4a 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -83,6 +83,7 @@ size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i);
 size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i);
 size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i);
+size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i);
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
 unsigned long iov_iter_alignment(const struct iov_iter *i);
 void iov_iter_init(struct iov_iter *i, int direction, const struct iovec *iov,
diff --git a/mm/iov_iter.c b/mm/iov_iter.c
index 88c052e63a1d..a1599ca4ab0e 100644
--- a/mm/iov_iter.c
+++ b/mm/iov_iter.c
@@ -399,6 +399,27 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(copy_from_iter);
 
+size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i)
+{
+	char *to = addr;
+	if (unlikely(bytes > i->count))
+		bytes = i->count;
+
+	if (unlikely(!bytes))
+		return 0;
+
+	iterate_and_advance(i, bytes, v,
+		__copy_from_user_nocache((to += v.iov_len) - v.iov_len,
+					 v.iov_base, v.iov_len),
+		memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page,
+				 v.bv_offset, v.bv_len),
+		memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)
+	)
+
+	return bytes;
+}
+EXPORT_SYMBOL(copy_from_iter_nocache);
+
 size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes,
 			 struct iov_iter *i)
 {
-- 
cgit v1.2.3


From b0ba512e25d729a43858ad1f6cb8b94dbb95dbeb Mon Sep 17 00:00:00 2001
From: Petri Gynther <pgynther@google.com>
Date: Mon, 1 Dec 2014 16:18:08 -0800
Subject: net: bcmgenet: enable driver to work without a device tree

Modify bcmgenet driver so that it can be used on Broadcom 7xxx
MIPS-based STB platforms without a device tree.

Signed-off-by: Petri Gynther <pgynther@google.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/Kconfig          |   1 -
 drivers/net/ethernet/broadcom/genet/bcmgenet.c |  31 ++++--
 drivers/net/ethernet/broadcom/genet/bcmmii.c   | 126 ++++++++++++++++++++-----
 include/linux/platform_data/bcmgenet.h         |  18 ++++
 4 files changed, 143 insertions(+), 33 deletions(-)
 create mode 100644 include/linux/platform_data/bcmgenet.h

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/Kconfig b/drivers/net/ethernet/broadcom/Kconfig
index c3e260c21734..888247ad9068 100644
--- a/drivers/net/ethernet/broadcom/Kconfig
+++ b/drivers/net/ethernet/broadcom/Kconfig
@@ -62,7 +62,6 @@ config BCM63XX_ENET
 
 config BCMGENET
 	tristate "Broadcom GENET internal MAC support"
-	depends on OF
 	select MII
 	select PHYLIB
 	select FIXED_PHY if BCMGENET=y
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index f2fadb053d52..adfef5ca0d55 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -42,6 +42,7 @@
 #include <linux/ip.h>
 #include <linux/ipv6.h>
 #include <linux/phy.h>
+#include <linux/platform_data/bcmgenet.h>
 
 #include <asm/unaligned.h>
 
@@ -2586,8 +2587,9 @@ static const struct of_device_id bcmgenet_match[] = {
 
 static int bcmgenet_probe(struct platform_device *pdev)
 {
+	struct bcmgenet_platform_data *pd = pdev->dev.platform_data;
 	struct device_node *dn = pdev->dev.of_node;
-	const struct of_device_id *of_id;
+	const struct of_device_id *of_id = NULL;
 	struct bcmgenet_priv *priv;
 	struct net_device *dev;
 	const void *macaddr;
@@ -2601,9 +2603,11 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	of_id = of_match_node(bcmgenet_match, dn);
-	if (!of_id)
-		return -EINVAL;
+	if (dn) {
+		of_id = of_match_node(bcmgenet_match, dn);
+		if (!of_id)
+			return -EINVAL;
+	}
 
 	priv = netdev_priv(dev);
 	priv->irq0 = platform_get_irq(pdev, 0);
@@ -2615,11 +2619,15 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		goto err;
 	}
 
-	macaddr = of_get_mac_address(dn);
-	if (!macaddr) {
-		dev_err(&pdev->dev, "can't find MAC address\n");
-		err = -EINVAL;
-		goto err;
+	if (dn) {
+		macaddr = of_get_mac_address(dn);
+		if (!macaddr) {
+			dev_err(&pdev->dev, "can't find MAC address\n");
+			err = -EINVAL;
+			goto err;
+		}
+	} else {
+		macaddr = pd->mac_address;
 	}
 
 	r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
@@ -2659,7 +2667,10 @@ static int bcmgenet_probe(struct platform_device *pdev)
 
 	priv->dev = dev;
 	priv->pdev = pdev;
-	priv->version = (enum bcmgenet_version)of_id->data;
+	if (of_id)
+		priv->version = (enum bcmgenet_version)of_id->data;
+	else
+		priv->version = pd->genet_version;
 
 	priv->clk = devm_clk_get(&priv->pdev->dev, "enet");
 	if (IS_ERR(priv->clk))
diff --git a/drivers/net/ethernet/broadcom/genet/bcmmii.c b/drivers/net/ethernet/broadcom/genet/bcmmii.c
index 933cd7e7cd33..446889cc3c6a 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmmii.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmmii.c
@@ -23,6 +23,7 @@
 #include <linux/of.h>
 #include <linux/of_net.h>
 #include <linux/of_mdio.h>
+#include <linux/platform_data/bcmgenet.h>
 
 #include "bcmgenet.h"
 
@@ -312,22 +313,6 @@ static int bcmgenet_mii_probe(struct net_device *dev)
 	u32 phy_flags;
 	int ret;
 
-	if (priv->phydev) {
-		pr_info("PHY already attached\n");
-		return 0;
-	}
-
-	/* In the case of a fixed PHY, the DT node associated
-	 * to the PHY is the Ethernet MAC DT node.
-	 */
-	if (!priv->phy_dn && of_phy_is_fixed_link(dn)) {
-		ret = of_phy_register_fixed_link(dn);
-		if (ret)
-			return ret;
-
-		priv->phy_dn = of_node_get(dn);
-	}
-
 	/* Communicate the integrated PHY revision */
 	phy_flags = priv->gphy_rev;
 
@@ -337,11 +322,39 @@ static int bcmgenet_mii_probe(struct net_device *dev)
 	priv->old_duplex = -1;
 	priv->old_pause = -1;
 
-	phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
-				phy_flags, priv->phy_interface);
-	if (!phydev) {
-		pr_err("could not attach to PHY\n");
-		return -ENODEV;
+	if (dn) {
+		if (priv->phydev) {
+			pr_info("PHY already attached\n");
+			return 0;
+		}
+
+		/* In the case of a fixed PHY, the DT node associated
+		 * to the PHY is the Ethernet MAC DT node.
+		 */
+		if (!priv->phy_dn && of_phy_is_fixed_link(dn)) {
+			ret = of_phy_register_fixed_link(dn);
+			if (ret)
+				return ret;
+
+			priv->phy_dn = of_node_get(dn);
+		}
+
+		phydev = of_phy_connect(dev, priv->phy_dn, bcmgenet_mii_setup,
+					phy_flags, priv->phy_interface);
+		if (!phydev) {
+			pr_err("could not attach to PHY\n");
+			return -ENODEV;
+		}
+	} else {
+		phydev = priv->phydev;
+		phydev->dev_flags = phy_flags;
+
+		ret = phy_connect_direct(dev, phydev, bcmgenet_mii_setup,
+					 priv->phy_interface);
+		if (ret) {
+			pr_err("could not attach to PHY\n");
+			return -ENODEV;
+		}
 	}
 
 	priv->phydev = phydev;
@@ -438,6 +451,75 @@ static int bcmgenet_mii_of_init(struct bcmgenet_priv *priv)
 	return 0;
 }
 
+static int bcmgenet_mii_pd_init(struct bcmgenet_priv *priv)
+{
+	struct device *kdev = &priv->pdev->dev;
+	struct bcmgenet_platform_data *pd = kdev->platform_data;
+	struct mii_bus *mdio = priv->mii_bus;
+	struct phy_device *phydev;
+	int ret;
+
+	if (pd->phy_interface != PHY_INTERFACE_MODE_MOCA && pd->mdio_enabled) {
+		/*
+		 * Internal or external PHY with MDIO access
+		 */
+		if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR)
+			mdio->phy_mask = ~(1 << pd->phy_address);
+		else
+			mdio->phy_mask = 0;
+
+		ret = mdiobus_register(mdio);
+		if (ret) {
+			dev_err(kdev, "failed to register MDIO bus\n");
+			return ret;
+		}
+
+		if (pd->phy_address >= 0 && pd->phy_address < PHY_MAX_ADDR)
+			phydev = mdio->phy_map[pd->phy_address];
+		else
+			phydev = phy_find_first(mdio);
+
+		if (!phydev) {
+			dev_err(kdev, "failed to register PHY device\n");
+			mdiobus_unregister(mdio);
+			return -ENODEV;
+		}
+	} else {
+		/*
+		 * MoCA port or no MDIO access.
+		 * Use fixed PHY to represent the link layer.
+		 */
+		struct fixed_phy_status fphy_status = {
+			.link = 1,
+			.speed = pd->phy_speed,
+			.duplex = pd->phy_duplex,
+			.pause = 0,
+			.asym_pause = 0,
+		};
+
+		phydev = fixed_phy_register(PHY_POLL, &fphy_status, NULL);
+		if (!phydev || IS_ERR(phydev)) {
+			dev_err(kdev, "failed to register fixed PHY device\n");
+			return -ENODEV;
+		}
+	}
+
+	priv->phydev = phydev;
+	priv->phy_interface = pd->phy_interface;
+
+	return 0;
+}
+
+static int bcmgenet_mii_bus_init(struct bcmgenet_priv *priv)
+{
+	struct device_node *dn = priv->pdev->dev.of_node;
+
+	if (dn)
+		return bcmgenet_mii_of_init(priv);
+	else
+		return bcmgenet_mii_pd_init(priv);
+}
+
 int bcmgenet_mii_init(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -447,7 +529,7 @@ int bcmgenet_mii_init(struct net_device *dev)
 	if (ret)
 		return ret;
 
-	ret = bcmgenet_mii_of_init(priv);
+	ret = bcmgenet_mii_bus_init(priv);
 	if (ret)
 		goto out_free;
 
diff --git a/include/linux/platform_data/bcmgenet.h b/include/linux/platform_data/bcmgenet.h
new file mode 100644
index 000000000000..26af54321958
--- /dev/null
+++ b/include/linux/platform_data/bcmgenet.h
@@ -0,0 +1,18 @@
+#ifndef __LINUX_PLATFORM_DATA_BCMGENET_H__
+#define __LINUX_PLATFORM_DATA_BCMGENET_H__
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+#include <linux/phy.h>
+
+struct bcmgenet_platform_data {
+	bool		mdio_enabled;
+	phy_interface_t	phy_interface;
+	int		phy_address;
+	int		phy_speed;
+	int		phy_duplex;
+	u8		mac_address[ETH_ALEN];
+	int		genet_version;
+};
+
+#endif
-- 
cgit v1.2.3


From 97ede29e80eead50d8bd533cf163401b88c027be Mon Sep 17 00:00:00 2001
From: Ying Xue <ying.xue@windriver.com>
Date: Tue, 2 Dec 2014 15:00:30 +0800
Subject: tipc: convert name table read-write lock to RCU

Convert tipc name table read-write lock to RCU. After this change,
a new spin lock is used to protect name table on write side while
RCU is applied on read side.

Signed-off-by: Ying Xue <ying.xue@windriver.com>
Reviewed-by: Erik Hugne <erik.hugne@ericsson.com>
Reviewed-by: Jon Maloy <jon.maloy@ericsson.com>
Tested-by: Erik Hugne <erik.hugne@ericsson.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rculist.h |  9 +++++
 net/tipc/name_distr.c   | 28 ++++++++--------
 net/tipc/name_table.c   | 87 ++++++++++++++++++++++++-------------------------
 net/tipc/name_table.h   |  4 ++-
 4 files changed, 69 insertions(+), 59 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rculist.h b/include/linux/rculist.h
index 372ad5e0dcb8..aa79b3c24f66 100644
--- a/include/linux/rculist.h
+++ b/include/linux/rculist.h
@@ -542,6 +542,15 @@ static inline void hlist_add_behind_rcu(struct hlist_node *n,
 	     pos = hlist_entry_safe(rcu_dereference_bh((pos)->member.next),\
 			typeof(*(pos)), member))
 
+/**
+ * hlist_for_each_entry_from_rcu - iterate over a hlist continuing from current point
+ * @pos:	the type * to use as a loop cursor.
+ * @member:	the name of the hlist_node within the struct.
+ */
+#define hlist_for_each_entry_from_rcu(pos, member)			\
+	for (; pos;							\
+	     pos = hlist_entry_safe(rcu_dereference((pos)->member.next),\
+			typeof(*(pos)), member))
 
 #endif	/* __KERNEL__ */
 #endif
diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c
index ed00929f16c8..ba6083dca95b 100644
--- a/net/tipc/name_distr.c
+++ b/net/tipc/name_distr.c
@@ -113,8 +113,8 @@ struct sk_buff *tipc_named_publish(struct publication *publ)
 	struct sk_buff *buf;
 	struct distr_item *item;
 
-	list_add_tail(&publ->local_list,
-		      &tipc_nametbl->publ_list[publ->scope]);
+	list_add_tail_rcu(&publ->local_list,
+			  &tipc_nametbl->publ_list[publ->scope]);
 
 	if (publ->scope == TIPC_NODE_SCOPE)
 		return NULL;
@@ -208,12 +208,12 @@ void tipc_named_node_up(u32 dnode)
 
 	__skb_queue_head_init(&head);
 
-	read_lock_bh(&tipc_nametbl_lock);
+	rcu_read_lock();
 	named_distribute(&head, dnode,
 			 &tipc_nametbl->publ_list[TIPC_CLUSTER_SCOPE]);
 	named_distribute(&head, dnode,
 			 &tipc_nametbl->publ_list[TIPC_ZONE_SCOPE]);
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 
 	tipc_link_xmit(&head, dnode, dnode);
 }
@@ -260,12 +260,12 @@ static void tipc_publ_purge(struct publication *publ, u32 addr)
 {
 	struct publication *p;
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	p = tipc_nametbl_remove_publ(publ->type, publ->lower,
 				     publ->node, publ->ref, publ->key);
 	if (p)
 		tipc_publ_unsubscribe(p, addr);
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 
 	if (p != publ) {
 		pr_err("Unable to remove publication from failed node\n"
@@ -274,7 +274,7 @@ static void tipc_publ_purge(struct publication *publ, u32 addr)
 		       publ->key);
 	}
 
-	kfree(p);
+	kfree_rcu(p, rcu);
 }
 
 void tipc_publ_notify(struct list_head *nsub_list, u32 addr)
@@ -311,7 +311,7 @@ static bool tipc_update_nametbl(struct distr_item *i, u32 node, u32 dtype)
 						ntohl(i->key));
 		if (publ) {
 			tipc_publ_unsubscribe(publ, node);
-			kfree(publ);
+			kfree_rcu(publ, rcu);
 			return true;
 		}
 	} else {
@@ -376,14 +376,14 @@ void tipc_named_rcv(struct sk_buff *buf)
 	u32 count = msg_data_sz(msg) / ITEM_SIZE;
 	u32 node = msg_orignode(msg);
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	while (count--) {
 		if (!tipc_update_nametbl(item, node, msg_type(msg)))
 			tipc_named_add_backlog(item, msg_type(msg), node);
 		item++;
 	}
 	tipc_named_process_backlog();
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 	kfree_skb(buf);
 }
 
@@ -399,12 +399,12 @@ void tipc_named_reinit(void)
 	struct publication *publ;
 	int scope;
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 
 	for (scope = TIPC_ZONE_SCOPE; scope <= TIPC_NODE_SCOPE; scope++)
-		list_for_each_entry(publ, &tipc_nametbl->publ_list[scope],
-				    local_list)
+		list_for_each_entry_rcu(publ, &tipc_nametbl->publ_list[scope],
+					local_list)
 			publ->node = tipc_own_addr;
 
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 }
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index 3c2e0c300fe2..aafa684c4db9 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -92,6 +92,7 @@ struct sub_seq {
  * @ns_list: links to adjacent name sequences in hash chain
  * @subscriptions: list of subscriptions for this 'type'
  * @lock: spinlock controlling access to publication lists of all sub-sequences
+ * @rcu: RCU callback head used for deferred freeing
  */
 struct name_seq {
 	u32 type;
@@ -101,10 +102,11 @@ struct name_seq {
 	struct hlist_node ns_list;
 	struct list_head subscriptions;
 	spinlock_t lock;
+	struct rcu_head rcu;
 };
 
 struct name_table *tipc_nametbl;
-DEFINE_RWLOCK(tipc_nametbl_lock);
+DEFINE_SPINLOCK(tipc_nametbl_lock);
 
 static int hash(int x)
 {
@@ -166,7 +168,7 @@ static struct name_seq *tipc_nameseq_create(u32 type, struct hlist_head *seq_hea
 	nseq->alloc = 1;
 	INIT_HLIST_NODE(&nseq->ns_list);
 	INIT_LIST_HEAD(&nseq->subscriptions);
-	hlist_add_head(&nseq->ns_list, seq_head);
+	hlist_add_head_rcu(&nseq->ns_list, seq_head);
 	return nseq;
 }
 
@@ -451,7 +453,7 @@ static struct name_seq *nametbl_find_seq(u32 type)
 	struct name_seq *ns;
 
 	seq_head = &tipc_nametbl->seq_hlist[hash(type)];
-	hlist_for_each_entry(ns, seq_head, ns_list) {
+	hlist_for_each_entry_rcu(ns, seq_head, ns_list) {
 		if (ns->type == type)
 			return ns;
 	}
@@ -498,10 +500,10 @@ struct publication *tipc_nametbl_remove_publ(u32 type, u32 lower,
 	spin_lock_bh(&seq->lock);
 	publ = tipc_nameseq_remove_publ(seq, lower, node, ref, key);
 	if (!seq->first_free && list_empty(&seq->subscriptions)) {
-		hlist_del_init(&seq->ns_list);
-		spin_unlock_bh(&seq->lock);
+		hlist_del_init_rcu(&seq->ns_list);
 		kfree(seq->sseqs);
-		kfree(seq);
+		spin_unlock_bh(&seq->lock);
+		kfree_rcu(seq, rcu);
 		return publ;
 	}
 	spin_unlock_bh(&seq->lock);
@@ -533,7 +535,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
 	if (!tipc_in_scope(*destnode, tipc_own_addr))
 		return 0;
 
-	read_lock_bh(&tipc_nametbl_lock);
+	rcu_read_lock();
 	seq = nametbl_find_seq(type);
 	if (unlikely(!seq))
 		goto not_found;
@@ -590,7 +592,7 @@ u32 tipc_nametbl_translate(u32 type, u32 instance, u32 *destnode)
 no_match:
 	spin_unlock_bh(&seq->lock);
 not_found:
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 	*destnode = node;
 	return ref;
 }
@@ -616,7 +618,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
 	struct name_info *info;
 	int res = 0;
 
-	read_lock_bh(&tipc_nametbl_lock);
+	rcu_read_lock();
 	seq = nametbl_find_seq(type);
 	if (!seq)
 		goto exit;
@@ -641,7 +643,7 @@ int tipc_nametbl_mc_translate(u32 type, u32 lower, u32 upper, u32 limit,
 	}
 	spin_unlock_bh(&seq->lock);
 exit:
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 	return res;
 }
 
@@ -654,11 +656,11 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
 	struct publication *publ;
 	struct sk_buff *buf = NULL;
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	if (tipc_nametbl->local_publ_count >= TIPC_MAX_PUBLICATIONS) {
 		pr_warn("Publication failed, local publication limit reached (%u)\n",
 			TIPC_MAX_PUBLICATIONS);
-		write_unlock_bh(&tipc_nametbl_lock);
+		spin_unlock_bh(&tipc_nametbl_lock);
 		return NULL;
 	}
 
@@ -670,7 +672,7 @@ struct publication *tipc_nametbl_publish(u32 type, u32 lower, u32 upper,
 		/* Any pending external events? */
 		tipc_named_process_backlog();
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 
 	if (buf)
 		named_cluster_distribute(buf);
@@ -685,7 +687,7 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 	struct publication *publ;
 	struct sk_buff *skb = NULL;
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	publ = tipc_nametbl_remove_publ(type, lower, tipc_own_addr, ref, key);
 	if (likely(publ)) {
 		tipc_nametbl->local_publ_count--;
@@ -693,13 +695,13 @@ int tipc_nametbl_withdraw(u32 type, u32 lower, u32 ref, u32 key)
 		/* Any pending external events? */
 		tipc_named_process_backlog();
 		list_del_init(&publ->pport_list);
-		kfree(publ);
+		kfree_rcu(publ, rcu);
 	} else {
 		pr_err("Unable to remove local publication\n"
 		       "(type=%u, lower=%u, ref=%u, key=%u)\n",
 		       type, lower, ref, key);
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 
 	if (skb) {
 		named_cluster_distribute(skb);
@@ -717,7 +719,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
 	int index = hash(type);
 	struct name_seq *seq;
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	seq = nametbl_find_seq(type);
 	if (!seq)
 		seq = tipc_nameseq_create(type,
@@ -730,7 +732,7 @@ void tipc_nametbl_subscribe(struct tipc_subscription *s)
 		pr_warn("Failed to create subscription for {%u,%u,%u}\n",
 			s->seq.type, s->seq.lower, s->seq.upper);
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 }
 
 /**
@@ -740,24 +742,23 @@ void tipc_nametbl_unsubscribe(struct tipc_subscription *s)
 {
 	struct name_seq *seq;
 
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	seq = nametbl_find_seq(s->seq.type);
 	if (seq != NULL) {
 		spin_lock_bh(&seq->lock);
 		list_del_init(&s->nameseq_list);
 		if (!seq->first_free && list_empty(&seq->subscriptions)) {
-			hlist_del_init(&seq->ns_list);
-			spin_unlock_bh(&seq->lock);
+			hlist_del_init_rcu(&seq->ns_list);
 			kfree(seq->sseqs);
-			kfree(seq);
+			spin_unlock_bh(&seq->lock);
+			kfree_rcu(seq, rcu);
 		} else {
 			spin_unlock_bh(&seq->lock);
 		}
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 }
 
-
 /**
  * subseq_list - print specified sub-sequence contents into the given buffer
  */
@@ -880,7 +881,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		upbound = ~0;
 		for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
 			seq_head = &tipc_nametbl->seq_hlist[i];
-			hlist_for_each_entry(seq, seq_head, ns_list) {
+			hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, seq->type,
 						   lowbound, upbound, i);
@@ -896,7 +897,7 @@ static int nametbl_list(char *buf, int len, u32 depth_info,
 		ret += nametbl_header(buf + ret, len - ret, depth);
 		i = hash(type);
 		seq_head = &tipc_nametbl->seq_hlist[i];
-		hlist_for_each_entry(seq, seq_head, ns_list) {
+		hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
 			if (seq->type == type) {
 				ret += nameseq_list(seq, buf + ret, len - ret,
 						   depth, type,
@@ -928,11 +929,11 @@ struct sk_buff *tipc_nametbl_get(const void *req_tlv_area, int req_tlv_space)
 	pb = TLV_DATA(rep_tlv);
 	pb_len = ULTRA_STRING_MAX_LEN;
 	argv = (struct tipc_name_table_query *)TLV_DATA(req_tlv_area);
-	read_lock_bh(&tipc_nametbl_lock);
+	rcu_read_lock();
 	str_len = nametbl_list(pb, pb_len, ntohl(argv->depth),
 			       ntohl(argv->type),
 			       ntohl(argv->lowbound), ntohl(argv->upbound));
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 	str_len += 1;	/* for "\0" */
 	skb_put(buf, TLV_SPACE(str_len));
 	TLV_SET(rep_tlv, TIPC_TLV_ULTRA_STRING, NULL, str_len);
@@ -974,13 +975,13 @@ static void tipc_purge_publications(struct name_seq *seq)
 	list_for_each_entry_safe(publ, safe, &info->zone_list, zone_list) {
 		tipc_nametbl_remove_publ(publ->type, publ->lower, publ->node,
 					 publ->ref, publ->key);
-		kfree(publ);
+		kfree_rcu(publ, rcu);
 	}
-	hlist_del_init(&seq->ns_list);
+	hlist_del_init_rcu(&seq->ns_list);
+	kfree(seq->sseqs);
 	spin_lock_bh(&seq->lock);
 
-	kfree(seq->sseqs);
-	kfree(seq);
+	kfree_rcu(seq, rcu);
 }
 
 void tipc_nametbl_stop(void)
@@ -988,22 +989,22 @@ void tipc_nametbl_stop(void)
 	u32 i;
 	struct name_seq *seq;
 	struct hlist_head *seq_head;
-	struct hlist_node *safe;
 
 	/* Verify name table is empty and purge any lingering
 	 * publications, then release the name table
 	 */
-	write_lock_bh(&tipc_nametbl_lock);
+	spin_lock_bh(&tipc_nametbl_lock);
 	for (i = 0; i < TIPC_NAMETBL_SIZE; i++) {
 		if (hlist_empty(&tipc_nametbl->seq_hlist[i]))
 			continue;
 		seq_head = &tipc_nametbl->seq_hlist[i];
-		hlist_for_each_entry_safe(seq, safe, seq_head, ns_list) {
+		hlist_for_each_entry_rcu(seq, seq_head, ns_list) {
 			tipc_purge_publications(seq);
 		}
 	}
-	write_unlock_bh(&tipc_nametbl_lock);
+	spin_unlock_bh(&tipc_nametbl_lock);
 
+	synchronize_net();
 	kfree(tipc_nametbl);
 
 }
@@ -1109,7 +1110,7 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type,
 			      u32 *last_lower, u32 *last_publ)
 {
 	struct hlist_head *seq_head;
-	struct name_seq *seq;
+	struct name_seq *seq = NULL;
 	int err;
 	int i;
 
@@ -1126,13 +1127,13 @@ static int __tipc_nl_seq_list(struct tipc_nl_msg *msg, u32 *last_type,
 			if (!seq)
 				return -EPIPE;
 		} else {
-			seq = hlist_entry_safe((seq_head)->first,
-					       struct name_seq, ns_list);
+			hlist_for_each_entry_rcu(seq, seq_head, ns_list)
+				break;
 			if (!seq)
 				continue;
 		}
 
-		hlist_for_each_entry_from(seq, ns_list) {
+		hlist_for_each_entry_from_rcu(seq, ns_list) {
 			spin_lock_bh(&seq->lock);
 			err = __tipc_nl_subseq_list(msg, seq, last_lower,
 						    last_publ);
@@ -1165,8 +1166,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 	msg.portid = NETLINK_CB(cb->skb).portid;
 	msg.seq = cb->nlh->nlmsg_seq;
 
-	read_lock_bh(&tipc_nametbl_lock);
-
+	rcu_read_lock();
 	err = __tipc_nl_seq_list(&msg, &last_type, &last_lower, &last_publ);
 	if (!err) {
 		done = 1;
@@ -1179,8 +1179,7 @@ int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb)
 		 */
 		cb->prev_seq = 1;
 	}
-
-	read_unlock_bh(&tipc_nametbl_lock);
+	rcu_read_unlock();
 
 	cb->args[0] = last_type;
 	cb->args[1] = last_lower;
diff --git a/net/tipc/name_table.h b/net/tipc/name_table.h
index c1fd734eb0d5..5f0dee92010d 100644
--- a/net/tipc/name_table.h
+++ b/net/tipc/name_table.h
@@ -62,6 +62,7 @@ struct tipc_port_list;
  * @node_list: adjacent matching name seq publications with >= node scope
  * @cluster_list: adjacent matching name seq publications with >= cluster scope
  * @zone_list: adjacent matching name seq publications with >= zone scope
+ * @rcu: RCU callback head used for deferred freeing
  *
  * Note that the node list, cluster list, and zone list are circular lists.
  */
@@ -79,6 +80,7 @@ struct publication {
 	struct list_head node_list;
 	struct list_head cluster_list;
 	struct list_head zone_list;
+	struct rcu_head rcu;
 };
 
 /**
@@ -93,7 +95,7 @@ struct name_table {
 	u32 local_publ_count;
 };
 
-extern rwlock_t tipc_nametbl_lock;
+extern spinlock_t tipc_nametbl_lock;
 extern struct name_table *tipc_nametbl;
 
 int tipc_nl_name_table_dump(struct sk_buff *skb, struct netlink_callback *cb);
-- 
cgit v1.2.3


From 0c7aac854f52d3302b88fd599216a810f490ab1f Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@dev.mellanox.co.il>
Date: Tue, 2 Dec 2014 12:26:14 +0200
Subject: net/mlx5_core: Remove unused dev cap enum fields

These enumerations are not used so remove them.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/mlx5/device.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 1d67fd32e71c..ea4f1c46f761 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -219,23 +219,15 @@ enum {
 };
 
 enum {
-	MLX5_DEV_CAP_FLAG_RC		= 1LL <<  0,
-	MLX5_DEV_CAP_FLAG_UC		= 1LL <<  1,
-	MLX5_DEV_CAP_FLAG_UD		= 1LL <<  2,
 	MLX5_DEV_CAP_FLAG_XRC		= 1LL <<  3,
-	MLX5_DEV_CAP_FLAG_SRQ		= 1LL <<  6,
 	MLX5_DEV_CAP_FLAG_BAD_PKEY_CNTR	= 1LL <<  8,
 	MLX5_DEV_CAP_FLAG_BAD_QKEY_CNTR	= 1LL <<  9,
 	MLX5_DEV_CAP_FLAG_APM		= 1LL << 17,
 	MLX5_DEV_CAP_FLAG_ATOMIC	= 1LL << 18,
 	MLX5_DEV_CAP_FLAG_BLOCK_MCAST	= 1LL << 23,
-	MLX5_DEV_CAP_FLAG_ON_DMND_PG	= 1LL << 24,
 	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
 	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
-	MLX5_DEV_CAP_FLAG_RESIZE_SRQ	= 1LL << 32,
 	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 37,
-	MLX5_DEV_CAP_FLAG_REMOTE_FENCE	= 1LL << 38,
-	MLX5_DEV_CAP_FLAG_TLP_HINTS	= 1LL << 39,
 	MLX5_DEV_CAP_FLAG_SIG_HAND_OVER	= 1LL << 40,
 	MLX5_DEV_CAP_FLAG_CMDIF_CSUM	= 3LL << 46,
 };
-- 
cgit v1.2.3


From 9c0c112422a2a6b06fcddcaf21957676490cebba Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Wed, 3 Dec 2014 08:17:33 -0800
Subject: net: Add functions for handling padding frame and adding to length

This patch adds two new helper functions skb_put_padto and eth_skb_pad.
These functions deviate from the standard skb_pad or skb_padto in that they
will also update the length and tail pointers so that they reflect the
padding added to the frame.

The eth_skb_pad helper is meant to be used with Ethernet devices to update
either Rx or Tx frames so that they report the correct size.  The
skb_put_padto helper is meant to be used primarily in the transmit path for
network devices that need frames to be padded up to some minimum size and
don't wish to simply update the length somewhere external to the frame.

The motivation behind this is that there are a number of implementations
throughout the network device drivers that are all doing the same thing,
but each a little bit differently and as a result several implementations
contain bugs such as updating the length without updating the tail offset
and other similar issues.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 12 ++++++++++++
 include/linux/skbuff.h      | 24 +++++++++++++++++++++++-
 2 files changed, 35 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 733980fce8e3..41c891d05f04 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -392,4 +392,16 @@ static inline unsigned long compare_ether_header(const void *a, const void *b)
 #endif
 }
 
+/**
+ * eth_skb_pad - Pad buffer to mininum number of octets for Ethernet frame
+ * @skb: Buffer to pad
+ *
+ * An Ethernet frame should have a minimum size of 60 bytes.  This function
+ * takes short frames and pads them with zeros up to the 60 byte limit.
+ */
+static inline int eth_skb_pad(struct sk_buff *skb)
+{
+	return skb_put_padto(skb, ETH_ZLEN);
+}
+
 #endif	/* _LINUX_ETHERDEVICE_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 7691ad5b4771..d1e2575000b9 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2461,7 +2461,6 @@ static inline int skb_cow_head(struct sk_buff *skb, unsigned int headroom)
  *	is untouched. Otherwise it is extended. Returns zero on
  *	success. The skb is freed on error.
  */
- 
 static inline int skb_padto(struct sk_buff *skb, unsigned int len)
 {
 	unsigned int size = skb->len;
@@ -2470,6 +2469,29 @@ static inline int skb_padto(struct sk_buff *skb, unsigned int len)
 	return skb_pad(skb, len - size);
 }
 
+/**
+ *	skb_put_padto - increase size and pad an skbuff up to a minimal size
+ *	@skb: buffer to pad
+ *	@len: minimal length
+ *
+ *	Pads up a buffer to ensure the trailing bytes exist and are
+ *	blanked. If the buffer already contains sufficient data it
+ *	is untouched. Otherwise it is extended. Returns zero on
+ *	success. The skb is freed on error.
+ */
+static inline int skb_put_padto(struct sk_buff *skb, unsigned int len)
+{
+	unsigned int size = skb->len;
+
+	if (unlikely(size < len)) {
+		len -= size;
+		if (skb_pad(skb, len))
+			return -ENOMEM;
+		__skb_put(skb, len);
+	}
+	return 0;
+}
+
 static inline int skb_add_data(struct sk_buff *skb,
 			       char __user *from, int copy)
 {
-- 
cgit v1.2.3


From 892311f66f2411b813ca631009356891a0c2b0a1 Mon Sep 17 00:00:00 2001
From: Eyal Perry <eyalpe@mellanox.com>
Date: Tue, 2 Dec 2014 18:12:10 +0200
Subject: ethtool: Support for configurable RSS hash function

This patch extends the set/get_rxfh ethtool-options for getting or
setting the RSS hash function.

It modifies drivers implementation of set/get_rxfh accordingly.

This change also delegates the responsibility of checking whether a
modification to a certain RX flow hash parameter is supported to the
driver implementation of set_rxfh.

User-kernel API is done through the new hfunc bitmask field in the
ethtool_rxfh struct. A bit set in the hfunc field is corresponding to an
index in the new string-set ETH_SS_RSS_HASH_FUNCS.

Got approval from most of the relevant driver maintainers that their
driver is using Toeplitz, and for the few that didn't answered, also
assumed it is Toeplitz.

Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Ariel Elior <ariel.elior@qlogic.com>
Cc: Prashant Sreedharan <prashant@broadcom.com>
Cc: Michael Chan <mchan@broadcom.com>
Cc: Hariprasad S <hariprasad@chelsio.com>
Cc: Sathya Perla <sathya.perla@emulex.com>
Cc: Subbu Seetharaman <subbu.seetharaman@emulex.com>
Cc: Ajit Khaparde <ajit.khaparde@emulex.com>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Jesse Brandeburg <jesse.brandeburg@intel.com>
Cc: Bruce Allan <bruce.w.allan@intel.com>
Cc: Carolyn Wyborny <carolyn.wyborny@intel.com>
Cc: Don Skidmore <donald.c.skidmore@intel.com>
Cc: Greg Rose <gregory.v.rose@intel.com>
Cc: Matthew Vick <matthew.vick@intel.com>
Cc: John Ronciak <john.ronciak@intel.com>
Cc: Mitch Williams <mitch.a.williams@intel.com>
Cc: Amir Vadai <amirv@mellanox.com>
Cc: Solarflare linux maintainers <linux-net-drivers@solarflare.com>
Cc: Shradha Shah <sshah@solarflare.com>
Cc: Shreyas Bhatewara <sbhatewara@vmware.com>
Cc: "VMware, Inc." <pv-drivers@vmware.com>
Cc: Ben Hutchings <ben@decadent.org.uk>
Signed-off-by: Eyal Perry <eyalpe@mellanox.com>
Signed-off-by: Amir Vadai <amirv@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c       | 11 +++-
 .../net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c    | 20 ++++++-
 drivers/net/ethernet/broadcom/tg3.c                | 20 ++++++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 18 +++++-
 drivers/net/ethernet/emulex/benet/be_ethtool.c     | 12 +++-
 drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c   | 12 +++-
 drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c | 17 +++++-
 drivers/net/ethernet/intel/igb/igb_ethtool.c       | 16 ++++-
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c    | 13 +++-
 drivers/net/ethernet/sfc/ethtool.c                 | 18 ++++--
 drivers/net/vmxnet3/vmxnet3_ethtool.c              | 15 ++++-
 include/linux/ethtool.h                            | 42 +++++++++----
 include/uapi/linux/ethtool.h                       | 10 +++-
 net/core/ethtool.c                                 | 69 ++++++++++++----------
 14 files changed, 223 insertions(+), 70 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
index 95d44538357f..ebf489351555 100644
--- a/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
+++ b/drivers/net/ethernet/amd/xgbe/xgbe-ethtool.c
@@ -511,7 +511,8 @@ static u32 xgbe_get_rxfh_indir_size(struct net_device *netdev)
 	return ARRAY_SIZE(pdata->rss_table);
 }
 
-static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
+static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			 u8 *hfunc)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	unsigned int i;
@@ -525,16 +526,22 @@ static int xgbe_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
 	if (key)
 		memcpy(key, pdata->rss_key, sizeof(pdata->rss_key));
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
 	return 0;
 }
 
 static int xgbe_set_rxfh(struct net_device *netdev, const u32 *indir,
-			 const u8 *key)
+			 const u8 *key, const u8 hfunc)
 {
 	struct xgbe_prv_data *pdata = netdev_priv(netdev);
 	struct xgbe_hw_if *hw_if = &pdata->hw_if;
 	unsigned int ret;
 
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
 	if (indir) {
 		ret = hw_if->set_rss_lookup_table(pdata, indir);
 		if (ret)
diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
index 1edc931b1458..ffe4e003e636 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_ethtool.c
@@ -3358,12 +3358,18 @@ static u32 bnx2x_get_rxfh_indir_size(struct net_device *dev)
 	return T_ETH_INDIRECTION_TABLE_SIZE;
 }
 
-static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
+static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key,
+			  u8 *hfunc)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	u8 ind_table[T_ETH_INDIRECTION_TABLE_SIZE] = {0};
 	size_t i;
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
+
 	/* Get the current configuration of the RSS indirection table */
 	bnx2x_get_rss_ind_table(&bp->rss_conf_obj, ind_table);
 
@@ -3383,11 +3389,21 @@ static int bnx2x_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
 }
 
 static int bnx2x_set_rxfh(struct net_device *dev, const u32 *indir,
-			  const u8 *key)
+			  const u8 *key, const u8 hfunc)
 {
 	struct bnx2x *bp = netdev_priv(dev);
 	size_t i;
 
+	/* We require at least one supported parameter to be changed and no
+	 * change in any of the unsupported parameters
+	 */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+
+	if (!indir)
+		return 0;
+
 	for (i = 0; i < T_ETH_INDIRECTION_TABLE_SIZE; i++) {
 		/*
 		 * The same as in bnx2x_get_rxfh: we can't use a memcpy()
diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index 43fd1b72c1ea..bb48a610b72a 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -12561,22 +12561,38 @@ static u32 tg3_get_rxfh_indir_size(struct net_device *dev)
 	return size;
 }
 
-static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key)
+static int tg3_get_rxfh(struct net_device *dev, u32 *indir, u8 *key, u8 *hfunc)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	int i;
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
+
 	for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
 		indir[i] = tp->rss_ind_tbl[i];
 
 	return 0;
 }
 
-static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key)
+static int tg3_set_rxfh(struct net_device *dev, const u32 *indir, const u8 *key,
+			const u8 hfunc)
 {
 	struct tg3 *tp = netdev_priv(dev);
 	size_t i;
 
+	/* We require at least one supported parameter to be changed and no
+	 * change in any of the unsupported parameters
+	 */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+
+	if (!indir)
+		return 0;
+
 	for (i = 0; i < TG3_RSS_INDIR_TBL_SIZE; i++)
 		tp->rss_ind_tbl[i] = indir[i];
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 3aea82bb9039..e7342bc85026 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -2923,21 +2923,35 @@ static u32 get_rss_table_size(struct net_device *dev)
 	return pi->rss_size;
 }
 
-static int get_rss_table(struct net_device *dev, u32 *p, u8 *key)
+static int get_rss_table(struct net_device *dev, u32 *p, u8 *key, u8 *hfunc)
 {
 	const struct port_info *pi = netdev_priv(dev);
 	unsigned int n = pi->rss_size;
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!p)
+		return 0;
 	while (n--)
 		p[n] = pi->rss[n];
 	return 0;
 }
 
-static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key)
+static int set_rss_table(struct net_device *dev, const u32 *p, const u8 *key,
+			 const u8 hfunc)
 {
 	unsigned int i;
 	struct port_info *pi = netdev_priv(dev);
 
+	/* We require at least one supported parameter to be changed and no
+	 * change in any of the unsupported parameters
+	 */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!p)
+		return 0;
+
 	for (i = 0; i < pi->rss_size; i++)
 		pi->rss[i] = p[i];
 	if (pi->adapter->flags & FULL_INIT_DONE)
diff --git a/drivers/net/ethernet/emulex/benet/be_ethtool.c b/drivers/net/ethernet/emulex/benet/be_ethtool.c
index e42a791c1835..73a500ccbf69 100644
--- a/drivers/net/ethernet/emulex/benet/be_ethtool.c
+++ b/drivers/net/ethernet/emulex/benet/be_ethtool.c
@@ -1171,7 +1171,8 @@ static u32 be_get_rxfh_key_size(struct net_device *netdev)
 	return RSS_HASH_KEY_LEN;
 }
 
-static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
+static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey,
+		       u8 *hfunc)
 {
 	struct be_adapter *adapter = netdev_priv(netdev);
 	int i;
@@ -1185,16 +1186,23 @@ static int be_get_rxfh(struct net_device *netdev, u32 *indir, u8 *hkey)
 	if (hkey)
 		memcpy(hkey, rss->rss_hkey, RSS_HASH_KEY_LEN);
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
 	return 0;
 }
 
 static int be_set_rxfh(struct net_device *netdev, const u32 *indir,
-		       const u8 *hkey)
+		       const u8 *hkey, const u8 hfunc)
 {
 	int rc = 0, i, j;
 	struct be_adapter *adapter = netdev_priv(netdev);
 	u8 rsstable[RSS_INDIR_TABLE_LEN];
 
+	/* We do not allow change in unsupported parameters */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
 	if (indir) {
 		struct be_rx_obj *rxo;
 
diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
index 2d04464e6aa3..651f53bc7376 100644
--- a/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
+++ b/drivers/net/ethernet/intel/fm10k/fm10k_ethtool.c
@@ -916,11 +916,15 @@ static u32 fm10k_get_rssrk_size(struct net_device *netdev)
 	return FM10K_RSSRK_SIZE * FM10K_RSSRK_ENTRIES_PER_REG;
 }
 
-static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key)
+static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key,
+			  u8 *hfunc)
 {
 	struct fm10k_intfc *interface = netdev_priv(netdev);
 	int i, err;
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+
 	err = fm10k_get_reta(netdev, indir);
 	if (err || !key)
 		return err;
@@ -932,12 +936,16 @@ static int fm10k_get_rssh(struct net_device *netdev, u32 *indir, u8 *key)
 }
 
 static int fm10k_set_rssh(struct net_device *netdev, const u32 *indir,
-			  const u8 *key)
+			  const u8 *key, const u8 hfunc)
 {
 	struct fm10k_intfc *interface = netdev_priv(netdev);
 	struct fm10k_hw *hw = &interface->hw;
 	int i, err;
 
+	/* We do not allow change in unsupported parameters */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
 	err = fm10k_set_reta(netdev, indir);
 	if (err || !key)
 		return err;
diff --git a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
index 69a269b23be6..69b97bac182c 100644
--- a/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
+++ b/drivers/net/ethernet/intel/i40evf/i40evf_ethtool.c
@@ -627,13 +627,19 @@ static u32 i40evf_get_rxfh_indir_size(struct net_device *netdev)
  *
  * Reads the indirection table directly from the hardware. Always returns 0.
  **/
-static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
+static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			   u8 *hfunc)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	struct i40e_hw *hw = &adapter->hw;
 	u32 hlut_val;
 	int i, j;
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
+
 	for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
 		hlut_val = rd32(hw, I40E_VFQF_HLUT(i));
 		indir[j++] = hlut_val & 0xff;
@@ -654,13 +660,20 @@ static int i40evf_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
  * returns 0 after programming the table.
  **/
 static int i40evf_set_rxfh(struct net_device *netdev, const u32 *indir,
-			   const u8 *key)
+			   const u8 *key, const u8 hfunc)
 {
 	struct i40evf_adapter *adapter = netdev_priv(netdev);
 	struct i40e_hw *hw = &adapter->hw;
 	u32 hlut_val;
 	int i, j;
 
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
 	for (i = 0, j = 0; i <= I40E_VFQF_HLUT_MAX_INDEX; i++) {
 		hlut_val = indir[j++];
 		hlut_val |= indir[j++] << 8;
diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 02cfd3b14762..d5673eb90c54 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -2842,11 +2842,16 @@ static u32 igb_get_rxfh_indir_size(struct net_device *netdev)
 	return IGB_RETA_SIZE;
 }
 
-static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key)
+static int igb_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+			u8 *hfunc)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	int i;
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!indir)
+		return 0;
 	for (i = 0; i < IGB_RETA_SIZE; i++)
 		indir[i] = adapter->rss_indir_tbl[i];
 
@@ -2889,13 +2894,20 @@ void igb_write_rss_indir_tbl(struct igb_adapter *adapter)
 }
 
 static int igb_set_rxfh(struct net_device *netdev, const u32 *indir,
-			const u8 *key)
+			const u8 *key, const u8 hfunc)
 {
 	struct igb_adapter *adapter = netdev_priv(netdev);
 	struct e1000_hw *hw = &adapter->hw;
 	int i;
 	u32 num_queues;
 
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
+
 	num_queues = adapter->rss_queues;
 
 	switch (hw->mac.type) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index c45e06abc073..28c3fc5a0791 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -978,7 +978,8 @@ static u32 mlx4_en_get_rxfh_key_size(struct net_device *netdev)
 	return MLX4_EN_RSS_KEY_SIZE;
 }
 
-static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key)
+static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key,
+			    u8 *hfunc)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_rss_map *rss_map = &priv->rss_map;
@@ -990,16 +991,20 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key)
 	rss_rings = 1 << ilog2(rss_rings);
 
 	while (n--) {
+		if (!ring_index)
+			break;
 		ring_index[n] = rss_map->qps[n % rss_rings].qpn -
 			rss_map->base_qpn;
 	}
 	if (key)
 		memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE);
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
 	return err;
 }
 
 static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
-			    const u8 *key)
+			    const u8 *key, const u8 hfunc)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -1008,6 +1013,10 @@ static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
 	int i;
 	int rss_rings = 0;
 
+	/* We do not allow change in unsupported parameters */
+	if (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP)
+		return -EOPNOTSUPP;
+
 	/* Calculate RSS table size and make sure flows are spread evenly
 	 * between rings
 	 */
diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c
index cad258a78708..4835bc0d0de8 100644
--- a/drivers/net/ethernet/sfc/ethtool.c
+++ b/drivers/net/ethernet/sfc/ethtool.c
@@ -1086,19 +1086,29 @@ static u32 efx_ethtool_get_rxfh_indir_size(struct net_device *net_dev)
 		0 : ARRAY_SIZE(efx->rx_indir_table));
 }
 
-static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key)
+static int efx_ethtool_get_rxfh(struct net_device *net_dev, u32 *indir, u8 *key,
+				u8 *hfunc)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
-	memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (indir)
+		memcpy(indir, efx->rx_indir_table, sizeof(efx->rx_indir_table));
 	return 0;
 }
 
-static int efx_ethtool_set_rxfh(struct net_device *net_dev,
-				const u32 *indir, const u8 *key)
+static int efx_ethtool_set_rxfh(struct net_device *net_dev, const u32 *indir,
+				const u8 *key, const u8 hfunc)
 {
 	struct efx_nic *efx = netdev_priv(net_dev);
 
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!indir)
+		return 0;
 	memcpy(efx->rx_indir_table, indir, sizeof(efx->rx_indir_table));
 	efx->type->rx_push_rss_config(efx);
 	return 0;
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index b725fd9e7803..b7b53329d575 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -583,12 +583,16 @@ vmxnet3_get_rss_indir_size(struct net_device *netdev)
 }
 
 static int
-vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key)
+vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key, u8 *hfunc)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
 	unsigned int n = rssConf->indTableSize;
 
+	if (hfunc)
+		*hfunc = ETH_RSS_HASH_TOP;
+	if (!p)
+		return 0;
 	while (n--)
 		p[n] = rssConf->indTable[n];
 	return 0;
@@ -596,13 +600,20 @@ vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key)
 }
 
 static int
-vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key)
+vmxnet3_set_rss(struct net_device *netdev, const u32 *p, const u8 *key,
+		const u8 hfunc)
 {
 	unsigned int i;
 	unsigned long flags;
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	struct UPT1_RSSConf *rssConf = adapter->rss_conf;
 
+	/* We do not allow change in unsupported parameters */
+	if (key ||
+	    (hfunc != ETH_RSS_HASH_NO_CHANGE && hfunc != ETH_RSS_HASH_TOP))
+		return -EOPNOTSUPP;
+	if (!p)
+		return 0;
 	for (i = 0; i < rssConf->indTableSize; i++)
 		rssConf->indTable[i] = p[i];
 
diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h
index c1a2d60dfb82..653dc9c4ebac 100644
--- a/include/linux/ethtool.h
+++ b/include/linux/ethtool.h
@@ -59,6 +59,26 @@ enum ethtool_phys_id_state {
 	ETHTOOL_ID_OFF
 };
 
+enum {
+	ETH_RSS_HASH_TOP_BIT, /* Configurable RSS hash function - Toeplitz */
+	ETH_RSS_HASH_XOR_BIT, /* Configurable RSS hash function - Xor */
+
+	/*
+	 * Add your fresh new hash function bits above and remember to update
+	 * rss_hash_func_strings[] in ethtool.c
+	 */
+	ETH_RSS_HASH_FUNCS_COUNT
+};
+
+#define __ETH_RSS_HASH_BIT(bit)	((u32)1 << (bit))
+#define __ETH_RSS_HASH(name)	__ETH_RSS_HASH_BIT(ETH_RSS_HASH_##name##_BIT)
+
+#define ETH_RSS_HASH_TOP	__ETH_RSS_HASH(TOP)
+#define ETH_RSS_HASH_XOR	__ETH_RSS_HASH(XOR)
+
+#define ETH_RSS_HASH_UNKNOWN	0
+#define ETH_RSS_HASH_NO_CHANGE	0
+
 struct net_device;
 
 /* Some generic methods drivers may use in their ethtool_ops */
@@ -158,17 +178,14 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings)
  *	Returns zero if not supported for this specific device.
  * @get_rxfh_indir_size: Get the size of the RX flow hash indirection table.
  *	Returns zero if not supported for this specific device.
- * @get_rxfh: Get the contents of the RX flow hash indirection table and hash
- *	key.
- *	Will only be called if one or both of @get_rxfh_indir_size and
- *	@get_rxfh_key_size are implemented and return non-zero.
- *	Returns a negative error code or zero.
- * @set_rxfh: Set the contents of the RX flow hash indirection table and/or
- *	hash key.  In case only the indirection table or hash key is to be
- *	changed, the other argument will be %NULL.
- *	Will only be called if one or both of @get_rxfh_indir_size and
- *	@get_rxfh_key_size are implemented and return non-zero.
+ * @get_rxfh: Get the contents of the RX flow hash indirection table, hash key
+ *	and/or hash function.
  *	Returns a negative error code or zero.
+ * @set_rxfh: Set the contents of the RX flow hash indirection table, hash
+ *	key, and/or hash function.  Arguments which are set to %NULL or zero
+ *	will remain unchanged.
+ *	Returns a negative error code or zero. An error code must be returned
+ *	if at least one unsupported change was requested.
  * @get_channels: Get number of channels.
  * @set_channels: Set number of channels.  Returns a negative error code or
  *	zero.
@@ -241,9 +258,10 @@ struct ethtool_ops {
 	int	(*reset)(struct net_device *, u32 *);
 	u32	(*get_rxfh_key_size)(struct net_device *);
 	u32	(*get_rxfh_indir_size)(struct net_device *);
-	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key);
+	int	(*get_rxfh)(struct net_device *, u32 *indir, u8 *key,
+			    u8 *hfunc);
 	int	(*set_rxfh)(struct net_device *, const u32 *indir,
-			    const u8 *key);
+			    const u8 *key, const u8 hfunc);
 	void	(*get_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*set_channels)(struct net_device *, struct ethtool_channels *);
 	int	(*get_dump_flag)(struct net_device *, struct ethtool_dump *);
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index eb2095b42fbb..5f66d9c2889d 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -534,6 +534,7 @@ struct ethtool_pauseparam {
  * @ETH_SS_NTUPLE_FILTERS: Previously used with %ETHTOOL_GRXNTUPLE;
  *	now deprecated
  * @ETH_SS_FEATURES: Device feature names
+ * @ETH_SS_RSS_HASH_FUNCS: RSS hush function names
  */
 enum ethtool_stringset {
 	ETH_SS_TEST		= 0,
@@ -541,6 +542,7 @@ enum ethtool_stringset {
 	ETH_SS_PRIV_FLAGS,
 	ETH_SS_NTUPLE_FILTERS,
 	ETH_SS_FEATURES,
+	ETH_SS_RSS_HASH_FUNCS,
 };
 
 /**
@@ -884,6 +886,8 @@ struct ethtool_rxfh_indir {
  * @key_size: On entry, the array size of the user buffer for the hash key,
  *	which may be zero.  On return from %ETHTOOL_GRSSH, the size of the
  *	hardware hash key.
+ * @hfunc: Defines the current RSS hash function used by HW (or to be set to).
+ *	Valid values are one of the %ETH_RSS_HASH_*.
  * @rsvd:	Reserved for future extensions.
  * @rss_config: RX ring/queue index for each hash value i.e., indirection table
  *	of @indir_size __u32 elements, followed by hash key of @key_size
@@ -893,14 +897,16 @@ struct ethtool_rxfh_indir {
  * size should be returned.  For %ETHTOOL_SRSSH, an @indir_size of
  * %ETH_RXFH_INDIR_NO_CHANGE means that indir table setting is not requested
  * and a @indir_size of zero means the indir table should be reset to default
- * values.
+ * values. An hfunc of zero means that hash function setting is not requested.
  */
 struct ethtool_rxfh {
 	__u32   cmd;
 	__u32	rss_context;
 	__u32   indir_size;
 	__u32   key_size;
-	__u32	rsvd[2];
+	__u8	hfunc;
+	__u8	rsvd8[3];
+	__u32	rsvd32;
 	__u32   rss_config[0];
 };
 #define ETH_RXFH_INDIR_NO_CHANGE	0xffffffff
diff --git a/net/core/ethtool.c b/net/core/ethtool.c
index 715f51f321e9..550892cd6b3f 100644
--- a/net/core/ethtool.c
+++ b/net/core/ethtool.c
@@ -100,6 +100,12 @@ static const char netdev_features_strings[NETDEV_FEATURE_COUNT][ETH_GSTRING_LEN]
 	[NETIF_F_BUSY_POLL_BIT] =        "busy-poll",
 };
 
+static const char
+rss_hash_func_strings[ETH_RSS_HASH_FUNCS_COUNT][ETH_GSTRING_LEN] = {
+	[ETH_RSS_HASH_TOP_BIT] =	"toeplitz",
+	[ETH_RSS_HASH_XOR_BIT] =	"xor",
+};
+
 static int ethtool_get_features(struct net_device *dev, void __user *useraddr)
 {
 	struct ethtool_gfeatures cmd = {
@@ -185,6 +191,9 @@ static int __ethtool_get_sset_count(struct net_device *dev, int sset)
 	if (sset == ETH_SS_FEATURES)
 		return ARRAY_SIZE(netdev_features_strings);
 
+	if (sset == ETH_SS_RSS_HASH_FUNCS)
+		return ARRAY_SIZE(rss_hash_func_strings);
+
 	if (ops->get_sset_count && ops->get_strings)
 		return ops->get_sset_count(dev, sset);
 	else
@@ -199,6 +208,9 @@ static void __ethtool_get_strings(struct net_device *dev,
 	if (stringset == ETH_SS_FEATURES)
 		memcpy(data, netdev_features_strings,
 			sizeof(netdev_features_strings));
+	else if (stringset == ETH_SS_RSS_HASH_FUNCS)
+		memcpy(data, rss_hash_func_strings,
+		       sizeof(rss_hash_func_strings));
 	else
 		/* ops->get_strings is valid because checked earlier */
 		ops->get_strings(dev, stringset, data);
@@ -618,7 +630,7 @@ static noinline_for_stack int ethtool_get_rxfh_indir(struct net_device *dev,
 	if (!indir)
 		return -ENOMEM;
 
-	ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL);
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, NULL, NULL);
 	if (ret)
 		goto out;
 
@@ -679,7 +691,7 @@ static noinline_for_stack int ethtool_set_rxfh_indir(struct net_device *dev,
 			goto out;
 	}
 
-	ret = ops->set_rxfh(dev, indir, NULL);
+	ret = ops->set_rxfh(dev, indir, NULL, ETH_RSS_HASH_NO_CHANGE);
 
 out:
 	kfree(indir);
@@ -697,12 +709,11 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	u32 total_size;
 	u32 indir_bytes;
 	u32 *indir = NULL;
+	u8 dev_hfunc = 0;
 	u8 *hkey = NULL;
 	u8 *rss_config;
 
-	if (!(dev->ethtool_ops->get_rxfh_indir_size ||
-	      dev->ethtool_ops->get_rxfh_key_size) ||
-	      !dev->ethtool_ops->get_rxfh)
+	if (!ops->get_rxfh)
 		return -EOPNOTSUPP;
 
 	if (ops->get_rxfh_indir_size)
@@ -710,16 +721,14 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	if (ops->get_rxfh_key_size)
 		dev_key_size = ops->get_rxfh_key_size(dev);
 
-	if ((dev_key_size + dev_indir_size) == 0)
-		return -EOPNOTSUPP;
-
 	if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
 		return -EFAULT;
 	user_indir_size = rxfh.indir_size;
 	user_key_size = rxfh.key_size;
 
 	/* Check that reserved fields are 0 for now */
-	if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
+	    rxfh.rsvd8[2] || rxfh.rsvd32)
 		return -EINVAL;
 
 	rxfh.indir_size = dev_indir_size;
@@ -727,13 +736,6 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	if (copy_to_user(useraddr, &rxfh, sizeof(rxfh)))
 		return -EFAULT;
 
-	/* If the user buffer size is 0, this is just a query for the
-	 * device table size and key size.  Otherwise, if the User size is
-	 * not equal to device table size or key size it's an error.
-	 */
-	if (!user_indir_size && !user_key_size)
-		return 0;
-
 	if ((user_indir_size && (user_indir_size != dev_indir_size)) ||
 	    (user_key_size && (user_key_size != dev_key_size)))
 		return -EINVAL;
@@ -750,14 +752,19 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev,
 	if (user_key_size)
 		hkey = rss_config + indir_bytes;
 
-	ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey);
-	if (!ret) {
-		if (copy_to_user(useraddr +
-				 offsetof(struct ethtool_rxfh, rss_config[0]),
-				 rss_config, total_size))
-			ret = -EFAULT;
-	}
+	ret = dev->ethtool_ops->get_rxfh(dev, indir, hkey, &dev_hfunc);
+	if (ret)
+		goto out;
 
+	if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc),
+			 &dev_hfunc, sizeof(rxfh.hfunc))) {
+		ret = -EFAULT;
+	} else if (copy_to_user(useraddr +
+			      offsetof(struct ethtool_rxfh, rss_config[0]),
+			      rss_config, total_size)) {
+		ret = -EFAULT;
+	}
+out:
 	kfree(rss_config);
 
 	return ret;
@@ -776,33 +783,31 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 	u8 *rss_config;
 	u32 rss_cfg_offset = offsetof(struct ethtool_rxfh, rss_config[0]);
 
-	if (!(ops->get_rxfh_indir_size || ops->get_rxfh_key_size) ||
-	    !ops->get_rxnfc || !ops->set_rxfh)
+	if (!ops->get_rxnfc || !ops->set_rxfh)
 		return -EOPNOTSUPP;
 
 	if (ops->get_rxfh_indir_size)
 		dev_indir_size = ops->get_rxfh_indir_size(dev);
 	if (ops->get_rxfh_key_size)
 		dev_key_size = dev->ethtool_ops->get_rxfh_key_size(dev);
-	if ((dev_key_size + dev_indir_size) == 0)
-		return -EOPNOTSUPP;
 
 	if (copy_from_user(&rxfh, useraddr, sizeof(rxfh)))
 		return -EFAULT;
 
 	/* Check that reserved fields are 0 for now */
-	if (rxfh.rss_context || rxfh.rsvd[0] || rxfh.rsvd[1])
+	if (rxfh.rss_context || rxfh.rsvd8[0] || rxfh.rsvd8[1] ||
+	    rxfh.rsvd8[2] || rxfh.rsvd32)
 		return -EINVAL;
 
-	/* If either indir or hash key is valid, proceed further.
-	 * It is not valid to request that both be unchanged.
+	/* If either indir, hash key or function is valid, proceed further.
+	 * Must request at least one change: indir size, hash key or function.
 	 */
 	if ((rxfh.indir_size &&
 	     rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE &&
 	     rxfh.indir_size != dev_indir_size) ||
 	    (rxfh.key_size && (rxfh.key_size != dev_key_size)) ||
 	    (rxfh.indir_size == ETH_RXFH_INDIR_NO_CHANGE &&
-	     rxfh.key_size == 0))
+	     rxfh.key_size == 0 && rxfh.hfunc == ETH_RSS_HASH_NO_CHANGE))
 		return -EINVAL;
 
 	if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE)
@@ -845,7 +850,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev,
 		}
 	}
 
-	ret = ops->set_rxfh(dev, indir, hkey);
+	ret = ops->set_rxfh(dev, indir, hkey, rxfh.hfunc);
 
 out:
 	kfree(rss_config);
-- 
cgit v1.2.3


From af6c9f1657ca6d2ef2b2c0e31ad17c6fbf773baf Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Wed, 19 Nov 2014 18:11:00 -0800
Subject: thermal: provide an UAPI header file

include/linux/thermal.h contains definitions for the Thermal generic
netlink family, but none of the valuable information relevant to
user-space such as the Genl family name, multicast group, version or
command set and data types is exported to user-space.

Export all the relevant generic netlink information to user-space to
make this genl family usable by user-space, and while at it, export
THERMAL_NAME_LENGTH since it limits name length for thermal_hwmon
devices.

Kbuild and MAINTAINERS are also updated accordingly to reflect this new
file: include/uapi/linux/thermal.h.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Zhang Rui <rui.zhang@intel.com>
---
 MAINTAINERS                  |  1 +
 include/linux/thermal.h      | 31 +------------------------------
 include/uapi/linux/Kbuild    |  1 +
 include/uapi/linux/thermal.h | 35 +++++++++++++++++++++++++++++++++++
 4 files changed, 38 insertions(+), 30 deletions(-)
 create mode 100644 include/uapi/linux/thermal.h

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 0ff630de8a6d..9c709f503c03 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9295,6 +9295,7 @@ Q:	https://patchwork.kernel.org/project/linux-pm/list/
 S:	Supported
 F:	drivers/thermal/
 F:	include/linux/thermal.h
+F:	include/uapi/linux/thermal.h
 F:	include/linux/cpu_cooling.h
 F:	Documentation/devicetree/bindings/thermal/
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index ef90838b36a0..70f87406bed9 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -29,10 +29,10 @@
 #include <linux/idr.h>
 #include <linux/device.h>
 #include <linux/workqueue.h>
+#include <uapi/linux/thermal.h>
 
 #define THERMAL_TRIPS_NONE	-1
 #define THERMAL_MAX_TRIPS	12
-#define THERMAL_NAME_LENGTH	20
 
 /* invalid cooling state */
 #define THERMAL_CSTATE_INVALID -1UL
@@ -49,11 +49,6 @@
 #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off))
 #define MILLICELSIUS_TO_DECI_KELVIN(t) MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, 2732)
 
-/* Adding event notification support elements */
-#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
-#define THERMAL_GENL_VERSION                    0x01
-#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_grp"
-
 /* Default Thermal Governor */
 #if defined(CONFIG_THERMAL_DEFAULT_GOV_STEP_WISE)
 #define DEFAULT_THERMAL_GOVERNOR       "step_wise"
@@ -86,30 +81,6 @@ enum thermal_trend {
 	THERMAL_TREND_DROP_FULL, /* apply lowest cooling action */
 };
 
-/* Events supported by Thermal Netlink */
-enum events {
-	THERMAL_AUX0,
-	THERMAL_AUX1,
-	THERMAL_CRITICAL,
-	THERMAL_DEV_FAULT,
-};
-
-/* attributes of thermal_genl_family */
-enum {
-	THERMAL_GENL_ATTR_UNSPEC,
-	THERMAL_GENL_ATTR_EVENT,
-	__THERMAL_GENL_ATTR_MAX,
-};
-#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
-
-/* commands supported by the thermal_genl_family */
-enum {
-	THERMAL_GENL_CMD_UNSPEC,
-	THERMAL_GENL_CMD_EVENT,
-	__THERMAL_GENL_CMD_MAX,
-};
-#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
-
 struct thermal_zone_device_ops {
 	int (*bind) (struct thermal_zone_device *,
 		     struct thermal_cooling_device *);
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 4c94f31a8c99..a1943e2d1264 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -383,6 +383,7 @@ header-y += tcp.h
 header-y += tcp_metrics.h
 header-y += telephony.h
 header-y += termios.h
+header-y += thermal.h
 header-y += time.h
 header-y += times.h
 header-y += timex.h
diff --git a/include/uapi/linux/thermal.h b/include/uapi/linux/thermal.h
new file mode 100644
index 000000000000..ac5535855982
--- /dev/null
+++ b/include/uapi/linux/thermal.h
@@ -0,0 +1,35 @@
+#ifndef _UAPI_LINUX_THERMAL_H
+#define _UAPI_LINUX_THERMAL_H
+
+#define THERMAL_NAME_LENGTH	20
+
+/* Adding event notification support elements */
+#define THERMAL_GENL_FAMILY_NAME                "thermal_event"
+#define THERMAL_GENL_VERSION                    0x01
+#define THERMAL_GENL_MCAST_GROUP_NAME           "thermal_mc_grp"
+
+/* Events supported by Thermal Netlink */
+enum events {
+	THERMAL_AUX0,
+	THERMAL_AUX1,
+	THERMAL_CRITICAL,
+	THERMAL_DEV_FAULT,
+};
+
+/* attributes of thermal_genl_family */
+enum {
+	THERMAL_GENL_ATTR_UNSPEC,
+	THERMAL_GENL_ATTR_EVENT,
+	__THERMAL_GENL_ATTR_MAX,
+};
+#define THERMAL_GENL_ATTR_MAX (__THERMAL_GENL_ATTR_MAX - 1)
+
+/* commands supported by the thermal_genl_family */
+enum {
+	THERMAL_GENL_CMD_UNSPEC,
+	THERMAL_GENL_CMD_EVENT,
+	__THERMAL_GENL_CMD_MAX,
+};
+#define THERMAL_GENL_CMD_MAX (__THERMAL_GENL_CMD_MAX - 1)
+
+#endif /* _UAPI_LINUX_THERMAL_H */
-- 
cgit v1.2.3


From 829b030e58f8349a63909c0fff2fa1913d79314c Mon Sep 17 00:00:00 2001
From: Sean Paul <seanpaul@chromium.org>
Date: Tue, 2 Dec 2014 17:39:12 -0800
Subject: backlight: lp855x: Add supply regulator to lp855x
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds a supply regulator to the lp855x platform data to facilitate
powering on/off the 3V rail attached to the controller.

Cc: Stéphane Marchesin <marcheu@chromium.org>
Cc: Aaron Durbin <adurbin@chromium.org>
Signed-off-by: Sean Paul <seanpaul@chromium.org>
Acked-by: Milo Kim <milo.kim@ti.com>
Acked-by: Bryan Wu <cooloney@gmail.com>
Acked-by: Jingoo Han <jg1.han@samsung.com>
Signed-off-by: Lee Jones <lee.jones@linaro.org>
---
 .../devicetree/bindings/video/backlight/lp855x.txt     |  2 ++
 drivers/video/backlight/lp855x_bl.c                    | 18 ++++++++++++++++++
 include/linux/platform_data/lp855x.h                   |  2 ++
 3 files changed, 22 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/devicetree/bindings/video/backlight/lp855x.txt b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
index 96e83a56048e..0a3ecbc3a1b9 100644
--- a/Documentation/devicetree/bindings/video/backlight/lp855x.txt
+++ b/Documentation/devicetree/bindings/video/backlight/lp855x.txt
@@ -12,6 +12,7 @@ Optional properties:
   - pwm-period: PWM period value. Set only PWM input mode used (u32)
   - rom-addr: Register address of ROM area to be updated (u8)
   - rom-val: Register value to be updated (u8)
+  - power-supply: Regulator which controls the 3V rail
 
 Example:
 
@@ -56,6 +57,7 @@ Example:
 	backlight@2c {
 		compatible = "ti,lp8557";
 		reg = <0x2c>;
+		power-supply = <&backlight_vdd>;
 
 		dev-ctrl = /bits/ 8 <0x41>;
 		init-brt = /bits/ 8 <0x0a>;
diff --git a/drivers/video/backlight/lp855x_bl.c b/drivers/video/backlight/lp855x_bl.c
index 257b3badd8e4..a26d3bb25650 100644
--- a/drivers/video/backlight/lp855x_bl.c
+++ b/drivers/video/backlight/lp855x_bl.c
@@ -17,6 +17,7 @@
 #include <linux/of.h>
 #include <linux/platform_data/lp855x.h>
 #include <linux/pwm.h>
+#include <linux/regulator/consumer.h>
 
 /* LP8550/1/2/3/6 Registers */
 #define LP855X_BRIGHTNESS_CTRL		0x00
@@ -383,6 +384,13 @@ static int lp855x_parse_dt(struct lp855x *lp)
 		pdata->rom_data = &rom[0];
 	}
 
+	pdata->supply = devm_regulator_get(dev, "power");
+	if (IS_ERR(pdata->supply)) {
+		if (PTR_ERR(pdata->supply) == -EPROBE_DEFER)
+			return -EPROBE_DEFER;
+		pdata->supply = NULL;
+	}
+
 	lp->pdata = pdata;
 
 	return 0;
@@ -423,6 +431,14 @@ static int lp855x_probe(struct i2c_client *cl, const struct i2c_device_id *id)
 	else
 		lp->mode = REGISTER_BASED;
 
+	if (lp->pdata->supply) {
+		ret = regulator_enable(lp->pdata->supply);
+		if (ret < 0) {
+			dev_err(&cl->dev, "failed to enable supply: %d\n", ret);
+			return ret;
+		}
+	}
+
 	i2c_set_clientdata(cl, lp);
 
 	ret = lp855x_configure(lp);
@@ -454,6 +470,8 @@ static int lp855x_remove(struct i2c_client *cl)
 
 	lp->bl->props.brightness = 0;
 	backlight_update_status(lp->bl);
+	if (lp->pdata->supply)
+		regulator_disable(lp->pdata->supply);
 	sysfs_remove_group(&lp->dev->kobj, &lp855x_attr_group);
 
 	return 0;
diff --git a/include/linux/platform_data/lp855x.h b/include/linux/platform_data/lp855x.h
index 1b2ba24e4e03..9c7fd1efe495 100644
--- a/include/linux/platform_data/lp855x.h
+++ b/include/linux/platform_data/lp855x.h
@@ -136,6 +136,7 @@ struct lp855x_rom_data {
 		Only valid when mode is PWM_BASED.
  * @size_program : total size of lp855x_rom_data
  * @rom_data : list of new eeprom/eprom registers
+ * @supply : regulator that supplies 3V input
  */
 struct lp855x_platform_data {
 	const char *name;
@@ -144,6 +145,7 @@ struct lp855x_platform_data {
 	unsigned int period_ns;
 	int size_program;
 	struct lp855x_rom_data *rom_data;
+	struct regulator *supply;
 };
 
 #endif
-- 
cgit v1.2.3


From d4024af56f7c6cdb7e721994204fb07b2cda8be9 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 27 Nov 2014 21:19:02 +0200
Subject: virtio: add low-level APIs for feature bits

Add low level APIs to test/set/clear feature bits.
For use by transports, to make it easier to
write code independent of feature bit array format.

Note: APIs is prefixed with __ and has _bit suffix
to stress its low level nature.  It's for use by transports only:
drivers should use virtio_has_feature and never need to set/clear
features.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 include/linux/virtio_config.h | 53 ++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 50 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7f4ef66873ef..d8e28a2a5738 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -77,11 +77,47 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
 					 unsigned int fbit);
 
 /**
- * virtio_has_feature - helper to determine if this device has this feature.
+ * __virtio_test_bit - helper to test feature bits. For use by transports.
+ *                     Devices should normally use virtio_has_feature,
+ *                     which includes more checks.
  * @vdev: the device
  * @fbit: the feature bit
  */
-static inline bool virtio_has_feature(const struct virtio_device *vdev,
+static inline bool __virtio_test_bit(const struct virtio_device *vdev,
+				     unsigned int fbit)
+{
+	/* Did you forget to fix assumptions on max features? */
+	if (__builtin_constant_p(fbit))
+		BUILD_BUG_ON(fbit >= 32);
+	else
+		BUG_ON(fbit >= 32);
+
+	return test_bit(fbit, vdev->features);
+}
+
+/**
+ * __virtio_set_bit - helper to set feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_set_bit(struct virtio_device *vdev,
+				    unsigned int fbit)
+{
+	/* Did you forget to fix assumptions on max features? */
+	if (__builtin_constant_p(fbit))
+		BUILD_BUG_ON(fbit >= 32);
+	else
+		BUG_ON(fbit >= 32);
+
+	set_bit(fbit, vdev->features);
+}
+
+/**
+ * __virtio_clear_bit - helper to clear feature bits. For use by transports.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline void __virtio_clear_bit(struct virtio_device *vdev,
 				      unsigned int fbit)
 {
 	/* Did you forget to fix assumptions on max features? */
@@ -90,10 +126,21 @@ static inline bool virtio_has_feature(const struct virtio_device *vdev,
 	else
 		BUG_ON(fbit >= 32);
 
+	clear_bit(fbit, vdev->features);
+}
+
+/**
+ * virtio_has_feature - helper to determine if this device has this feature.
+ * @vdev: the device
+ * @fbit: the feature bit
+ */
+static inline bool virtio_has_feature(const struct virtio_device *vdev,
+				      unsigned int fbit)
+{
 	if (fbit < VIRTIO_TRANSPORT_F_START)
 		virtio_check_driver_offered_feature(vdev, fbit);
 
-	return test_bit(fbit, vdev->features);
+	return __virtio_test_bit(vdev, fbit);
 }
 
 static inline
-- 
cgit v1.2.3


From e16e12be34648777606a2c03a3526409b38f0e63 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 7 Oct 2014 16:39:42 +0200
Subject: virtio: use u32, not bitmap for features

It seemed like a good idea to use bitmap for features
in struct virtio_device, but it's actually a pain,
and seems to become even more painful when we get more
than 32 feature bits.  Just change it to a u32 for now.

Based on patch by Rusty.

Suggested-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 drivers/char/virtio_console.c          |  2 +-
 drivers/lguest/lguest_device.c         |  8 ++++----
 drivers/misc/mic/card/mic_virtio.c     |  2 +-
 drivers/remoteproc/remoteproc_virtio.c |  2 +-
 drivers/s390/kvm/kvm_virtio.c          |  2 +-
 drivers/s390/kvm/virtio_ccw.c          | 23 +++++++++--------------
 drivers/virtio/virtio.c                | 10 +++++-----
 drivers/virtio/virtio_mmio.c           |  8 ++------
 drivers/virtio/virtio_pci.c            |  3 +--
 drivers/virtio/virtio_ring.c           |  2 +-
 include/linux/virtio.h                 |  3 +--
 include/linux/virtio_config.h          |  6 +++---
 tools/virtio/linux/virtio.h            | 22 +---------------------
 tools/virtio/linux/virtio_config.h     |  2 +-
 tools/virtio/virtio_test.c             |  5 ++---
 tools/virtio/vringh_test.c             | 16 ++++++++--------
 16 files changed, 42 insertions(+), 74 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/virtio_console.c b/drivers/char/virtio_console.c
index cf7a561fad7c..8d00aa7f60f0 100644
--- a/drivers/char/virtio_console.c
+++ b/drivers/char/virtio_console.c
@@ -355,7 +355,7 @@ static inline bool use_multiport(struct ports_device *portdev)
 	 */
 	if (!portdev->vdev)
 		return 0;
-	return portdev->vdev->features[0] & (1 << VIRTIO_CONSOLE_F_MULTIPORT);
+	return __virtio_test_bit(portdev->vdev, VIRTIO_CONSOLE_F_MULTIPORT);
 }
 
 static DEFINE_SPINLOCK(dma_bufs_lock);
diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index d0a1d8a45c81..97aeb7dce5fc 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -137,14 +137,14 @@ static void lg_finalize_features(struct virtio_device *vdev)
 	vring_transport_features(vdev);
 
 	/*
-	 * The vdev->feature array is a Linux bitmask: this isn't the same as a
-	 * the simple array of bits used by lguest devices for features.  So we
-	 * do this slow, manual conversion which is completely general.
+	 * Since lguest is currently x86-only, we're little-endian.  That
+	 * means we could just memcpy.  But it's not time critical, and in
+	 * case someone copies this code, we do it the slow, obvious way.
 	 */
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
-		if (test_bit(i, vdev->features))
+		if (__virtio_test_bit(vdev, i))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index e64794730e21..4f070ad1875c 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -101,7 +101,7 @@ static void mic_finalize_features(struct virtio_device *vdev)
 	bits = min_t(unsigned, feature_len,
 		sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
-		if (test_bit(i, vdev->features))
+		if (__virtio_test_bit(vdev, i))
 			iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)),
 				 &out_features[i / 8]);
 	}
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index a34b50690b4e..dafaf38dac0a 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -231,7 +231,7 @@ static void rproc_virtio_finalize_features(struct virtio_device *vdev)
 	 * Remember the finalized features of our vdev, and provide it
 	 * to the remote processor once it is powered on.
 	 */
-	rsc->gfeatures = vdev->features[0];
+	rsc->gfeatures = vdev->features;
 }
 
 static void rproc_virtio_get(struct virtio_device *vdev, unsigned offset,
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index 643129070c51..fcd312d0c018 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -106,7 +106,7 @@ static void kvm_finalize_features(struct virtio_device *vdev)
 	memset(out_features, 0, desc->feature_len);
 	bits = min_t(unsigned, desc->feature_len, sizeof(vdev->features)) * 8;
 	for (i = 0; i < bits; i++) {
-		if (test_bit(i, vdev->features))
+		if (__virtio_test_bit(vdev, i))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
 }
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index bda52f18e967..1dbee95838fe 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -701,7 +701,6 @@ static void virtio_ccw_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
 	struct virtio_feature_desc *features;
-	int i;
 	struct ccw1 *ccw;
 
 	ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
@@ -715,19 +714,15 @@ static void virtio_ccw_finalize_features(struct virtio_device *vdev)
 	/* Give virtio_ring a chance to accept features. */
 	vring_transport_features(vdev);
 
-	for (i = 0; i < sizeof(*vdev->features) / sizeof(features->features);
-	     i++) {
-		int highbits = i % 2 ? 32 : 0;
-		features->index = i;
-		features->features = cpu_to_le32(vdev->features[i / 2]
-						 >> highbits);
-		/* Write the feature bits to the host. */
-		ccw->cmd_code = CCW_CMD_WRITE_FEAT;
-		ccw->flags = 0;
-		ccw->count = sizeof(*features);
-		ccw->cda = (__u32)(unsigned long)features;
-		ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT);
-	}
+	features->index = 0;
+	features->features = cpu_to_le32(vdev->features);
+	/* Write the feature bits to the host. */
+	ccw->cmd_code = CCW_CMD_WRITE_FEAT;
+	ccw->flags = 0;
+	ccw->count = sizeof(*features);
+	ccw->cda = (__u32)(unsigned long)features;
+	ccw_io_helper(vcdev, ccw, VIRTIO_CCW_DOING_WRITE_FEAT);
+
 out_free:
 	kfree(features);
 	kfree(ccw);
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index df598dd8c5c8..2b9aafb4d679 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -49,9 +49,9 @@ static ssize_t features_show(struct device *_d,
 
 	/* We actually represent this as a bitstring, as it could be
 	 * arbitrary length in future. */
-	for (i = 0; i < ARRAY_SIZE(dev->features)*BITS_PER_LONG; i++)
+	for (i = 0; i < sizeof(dev->features)*8; i++)
 		len += sprintf(buf+len, "%c",
-			       test_bit(i, dev->features) ? '1' : '0');
+			       __virtio_test_bit(dev, i) ? '1' : '0');
 	len += sprintf(buf+len, "\n");
 	return len;
 }
@@ -168,18 +168,18 @@ static int virtio_dev_probe(struct device *_d)
 	device_features = dev->config->get_features(dev);
 
 	/* Features supported by both device and driver into dev->features. */
-	memset(dev->features, 0, sizeof(dev->features));
+	dev->features = 0;
 	for (i = 0; i < drv->feature_table_size; i++) {
 		unsigned int f = drv->feature_table[i];
 		BUG_ON(f >= 32);
 		if (device_features & (1 << f))
-			set_bit(f, dev->features);
+			__virtio_set_bit(dev, f);
 	}
 
 	/* Transport features always preserved to pass to finalize_features. */
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
 		if (device_features & (1 << i))
-			set_bit(i, dev->features);
+			__virtio_set_bit(dev, i);
 
 	dev->config->finalize_features(dev);
 
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index ef9a1650bb80..eb5b0e2b434a 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -155,16 +155,12 @@ static u32 vm_get_features(struct virtio_device *vdev)
 static void vm_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
-	int i;
 
 	/* Give virtio_ring a chance to accept features. */
 	vring_transport_features(vdev);
 
-	for (i = 0; i < ARRAY_SIZE(vdev->features); i++) {
-		writel(i, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
-		writel(vdev->features[i],
-				vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
-	}
+	writel(0, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
+	writel(vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
 }
 
 static void vm_get(struct virtio_device *vdev, unsigned offset,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index d34ebfa604f3..4e112c158488 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -120,8 +120,7 @@ static void vp_finalize_features(struct virtio_device *vdev)
 	vring_transport_features(vdev);
 
 	/* We only support 32 feature bits. */
-	BUILD_BUG_ON(ARRAY_SIZE(vdev->features) != 1);
-	iowrite32(vdev->features[0], vp_dev->ioaddr+VIRTIO_PCI_GUEST_FEATURES);
+	iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
 }
 
 /* virtio config->get() implementation */
diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 3b1f89b6e743..839247cd8263 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -781,7 +781,7 @@ void vring_transport_features(struct virtio_device *vdev)
 			break;
 		default:
 			/* We don't understand this bit. */
-			clear_bit(i, vdev->features);
+			__virtio_clear_bit(vdev, i);
 		}
 	}
 }
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 65261a7244fc..7828a7f47c2c 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -101,8 +101,7 @@ struct virtio_device {
 	const struct virtio_config_ops *config;
 	const struct vringh_config_ops *vringh_config;
 	struct list_head vqs;
-	/* Note that this is a Linux set_bit-style bitmap. */
-	unsigned long features[1];
+	u32 features;
 	void *priv;
 };
 
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index d8e28a2a5738..ffc2ae04879c 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -92,7 +92,7 @@ static inline bool __virtio_test_bit(const struct virtio_device *vdev,
 	else
 		BUG_ON(fbit >= 32);
 
-	return test_bit(fbit, vdev->features);
+	return vdev->features & BIT(fbit);
 }
 
 /**
@@ -109,7 +109,7 @@ static inline void __virtio_set_bit(struct virtio_device *vdev,
 	else
 		BUG_ON(fbit >= 32);
 
-	set_bit(fbit, vdev->features);
+	vdev->features |= BIT(fbit);
 }
 
 /**
@@ -126,7 +126,7 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
 	else
 		BUG_ON(fbit >= 32);
 
-	clear_bit(fbit, vdev->features);
+	vdev->features &= ~BIT(fbit);
 }
 
 /**
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 5a2d1f0f6bc7..72bff70bfeeb 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -6,31 +6,11 @@
 /* TODO: empty stubs for now. Broken but enough for virtio_ring.c */
 #define list_add_tail(a, b) do {} while (0)
 #define list_del(a) do {} while (0)
-
-#define BIT_WORD(nr)		((nr) / BITS_PER_LONG)
-#define BITS_PER_BYTE		8
-#define BITS_PER_LONG (sizeof(long) * BITS_PER_BYTE)
-#define BIT_MASK(nr)		(1UL << ((nr) % BITS_PER_LONG))
-
-/* TODO: Not atomic as it should be:
- * we don't use this for anything important. */
-static inline void clear_bit(int nr, volatile unsigned long *addr)
-{
-	unsigned long mask = BIT_MASK(nr);
-	unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr);
-
-	*p &= ~mask;
-}
-
-static inline int test_bit(int nr, const volatile unsigned long *addr)
-{
-        return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1)));
-}
 /* end of stubs */
 
 struct virtio_device {
 	void *dev;
-	unsigned long features[1];
+	u32 features;
 };
 
 struct virtqueue {
diff --git a/tools/virtio/linux/virtio_config.h b/tools/virtio/linux/virtio_config.h
index 5049967f99f7..83b27e8e9d72 100644
--- a/tools/virtio/linux/virtio_config.h
+++ b/tools/virtio/linux/virtio_config.h
@@ -2,5 +2,5 @@
 #define VIRTIO_TRANSPORT_F_END		32
 
 #define virtio_has_feature(dev, feature) \
-	test_bit((feature), (dev)->features)
+	(__virtio_test_bit((dev), feature))
 
diff --git a/tools/virtio/virtio_test.c b/tools/virtio/virtio_test.c
index 00ea679b3826..db3437c641a6 100644
--- a/tools/virtio/virtio_test.c
+++ b/tools/virtio/virtio_test.c
@@ -60,7 +60,7 @@ void vhost_vq_setup(struct vdev_info *dev, struct vq_info *info)
 {
 	struct vhost_vring_state state = { .index = info->idx };
 	struct vhost_vring_file file = { .index = info->idx };
-	unsigned long long features = dev->vdev.features[0];
+	unsigned long long features = dev->vdev.features;
 	struct vhost_vring_addr addr = {
 		.index = info->idx,
 		.desc_user_addr = (uint64_t)(unsigned long)info->vring.desc,
@@ -113,8 +113,7 @@ static void vdev_info_init(struct vdev_info* dev, unsigned long long features)
 {
 	int r;
 	memset(dev, 0, sizeof *dev);
-	dev->vdev.features[0] = features;
-	dev->vdev.features[1] = features >> 32;
+	dev->vdev.features = features;
 	dev->buf_size = 1024;
 	dev->buf = malloc(dev->buf_size);
 	assert(dev->buf);
diff --git a/tools/virtio/vringh_test.c b/tools/virtio/vringh_test.c
index 14a4f4cab5b9..9d4b1bca54be 100644
--- a/tools/virtio/vringh_test.c
+++ b/tools/virtio/vringh_test.c
@@ -304,7 +304,7 @@ static int parallel_test(unsigned long features,
 		close(to_guest[1]);
 		close(to_host[0]);
 
-		gvdev.vdev.features[0] = features;
+		gvdev.vdev.features = features;
 		gvdev.to_host_fd = to_host[1];
 		gvdev.notifies = 0;
 
@@ -449,13 +449,13 @@ int main(int argc, char *argv[])
 	bool fast_vringh = false, parallel = false;
 
 	getrange = getrange_iov;
-	vdev.features[0] = 0;
+	vdev.features = 0;
 
 	while (argv[1]) {
 		if (strcmp(argv[1], "--indirect") == 0)
-			vdev.features[0] |= (1 << VIRTIO_RING_F_INDIRECT_DESC);
+			__virtio_set_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
 		else if (strcmp(argv[1], "--eventidx") == 0)
-			vdev.features[0] |= (1 << VIRTIO_RING_F_EVENT_IDX);
+			__virtio_set_bit(&vdev, VIRTIO_RING_F_EVENT_IDX);
 		else if (strcmp(argv[1], "--slow-range") == 0)
 			getrange = getrange_slow;
 		else if (strcmp(argv[1], "--fast-vringh") == 0)
@@ -468,7 +468,7 @@ int main(int argc, char *argv[])
 	}
 
 	if (parallel)
-		return parallel_test(vdev.features[0], getrange, fast_vringh);
+		return parallel_test(vdev.features, getrange, fast_vringh);
 
 	if (posix_memalign(&__user_addr_min, PAGE_SIZE, USER_MEM) != 0)
 		abort();
@@ -483,7 +483,7 @@ int main(int argc, char *argv[])
 
 	/* Set up host side. */
 	vring_init(&vrh.vring, RINGSIZE, __user_addr_min, ALIGN);
-	vringh_init_user(&vrh, vdev.features[0], RINGSIZE, true,
+	vringh_init_user(&vrh, vdev.features, RINGSIZE, true,
 			 vrh.vring.desc, vrh.vring.avail, vrh.vring.used);
 
 	/* No descriptor to get yet... */
@@ -652,13 +652,13 @@ int main(int argc, char *argv[])
 	}
 
 	/* Test weird (but legal!) indirect. */
-	if (vdev.features[0] & (1 << VIRTIO_RING_F_INDIRECT_DESC)) {
+	if (__virtio_test_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC)) {
 		char *data = __user_addr_max - USER_MEM/4;
 		struct vring_desc *d = __user_addr_max - USER_MEM/2;
 		struct vring vring;
 
 		/* Force creation of direct, which we modify. */
-		vdev.features[0] &= ~(1 << VIRTIO_RING_F_INDIRECT_DESC);
+		__virtio_clear_bit(&vdev, VIRTIO_RING_F_INDIRECT_DESC);
 		vq = vring_new_virtqueue(0, RINGSIZE, ALIGN, &vdev, true,
 					 __user_addr_min,
 					 never_notify_host,
-- 
cgit v1.2.3


From d025477368792b272802146a86e41f81a54d8a19 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Tue, 7 Oct 2014 16:39:43 +0200
Subject: virtio: add support for 64 bit features.

Change u32 to u64, and use BIT_ULL and 1ULL everywhere.

Note: transports are unchanged, and only set low 32 bit.
This guarantees that no transport sets e.g. VERSION_1
by mistake without proper support.

Based on patch by Rusty.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 drivers/lguest/lguest_device.c         |  2 +-
 drivers/misc/mic/card/mic_virtio.c     |  2 +-
 drivers/remoteproc/remoteproc_virtio.c |  2 +-
 drivers/s390/kvm/kvm_virtio.c          |  2 +-
 drivers/s390/kvm/virtio_ccw.c          |  2 +-
 drivers/virtio/virtio.c                |  8 ++++----
 drivers/virtio/virtio_mmio.c           |  2 +-
 drivers/virtio/virtio_pci.c            |  2 +-
 include/linux/virtio.h                 |  2 +-
 include/linux/virtio_config.h          | 20 ++++++++++----------
 tools/virtio/linux/virtio.h            |  2 +-
 11 files changed, 23 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 97aeb7dce5fc..d81170a78a8d 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -94,7 +94,7 @@ static unsigned desc_size(const struct lguest_device_desc *desc)
 }
 
 /* This gets the device's feature bits. */
-static u32 lg_get_features(struct virtio_device *vdev)
+static u64 lg_get_features(struct virtio_device *vdev)
 {
 	unsigned int i;
 	u32 features = 0;
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index d5da9ff646cb..f5e756132bdb 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -68,7 +68,7 @@ static inline struct device *mic_dev(struct mic_vdev *mvdev)
 }
 
 /* This gets the device's feature bits. */
-static u32 mic_get_features(struct virtio_device *vdev)
+static u64 mic_get_features(struct virtio_device *vdev)
 {
 	unsigned int i, bits;
 	u32 features = 0;
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index dafaf38dac0a..62897db1331e 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -207,7 +207,7 @@ static void rproc_virtio_reset(struct virtio_device *vdev)
 }
 
 /* provide the vdev features as retrieved from the firmware */
-static u32 rproc_virtio_get_features(struct virtio_device *vdev)
+static u64 rproc_virtio_get_features(struct virtio_device *vdev)
 {
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct fw_rsc_vdev *rsc;
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index fcd312d0c018..2336c7e3b0cf 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -80,7 +80,7 @@ static unsigned desc_size(const struct kvm_device_desc *desc)
 }
 
 /* This gets the device's feature bits. */
-static u32 kvm_get_features(struct virtio_device *vdev)
+static u64 kvm_get_features(struct virtio_device *vdev)
 {
 	unsigned int i;
 	u32 features = 0;
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index 1dbee95838fe..56d78956a8cb 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -660,7 +660,7 @@ static void virtio_ccw_reset(struct virtio_device *vdev)
 	kfree(ccw);
 }
 
-static u32 virtio_ccw_get_features(struct virtio_device *vdev)
+static u64 virtio_ccw_get_features(struct virtio_device *vdev)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
 	struct virtio_feature_desc *features;
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 2b9aafb4d679..746d350c8062 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -159,7 +159,7 @@ static int virtio_dev_probe(struct device *_d)
 	int err, i;
 	struct virtio_device *dev = dev_to_virtio(_d);
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
-	u32 device_features;
+	u64 device_features;
 
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
@@ -171,14 +171,14 @@ static int virtio_dev_probe(struct device *_d)
 	dev->features = 0;
 	for (i = 0; i < drv->feature_table_size; i++) {
 		unsigned int f = drv->feature_table[i];
-		BUG_ON(f >= 32);
-		if (device_features & (1 << f))
+		BUG_ON(f >= 64);
+		if (device_features & (1ULL << f))
 			__virtio_set_bit(dev, f);
 	}
 
 	/* Transport features always preserved to pass to finalize_features. */
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
-		if (device_features & (1 << i))
+		if (device_features & (1ULL << i))
 			__virtio_set_bit(dev, i);
 
 	dev->config->finalize_features(dev);
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index eb5b0e2b434a..c63d0efa947b 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -142,7 +142,7 @@ struct virtio_mmio_vq_info {
 
 /* Configuration interface */
 
-static u32 vm_get_features(struct virtio_device *vdev)
+static u64 vm_get_features(struct virtio_device *vdev)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index 4e112c158488..7e0efa7fc384 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -102,7 +102,7 @@ static struct virtio_pci_device *to_vp_device(struct virtio_device *vdev)
 }
 
 /* virtio config->get_features() implementation */
-static u32 vp_get_features(struct virtio_device *vdev)
+static u64 vp_get_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 7828a7f47c2c..149284e5196d 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -101,7 +101,7 @@ struct virtio_device {
 	const struct virtio_config_ops *config;
 	const struct vringh_config_ops *vringh_config;
 	struct list_head vqs;
-	u32 features;
+	u64 features;
 	void *priv;
 };
 
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index ffc2ae04879c..f51788439574 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -66,7 +66,7 @@ struct virtio_config_ops {
 			vq_callback_t *callbacks[],
 			const char *names[]);
 	void (*del_vqs)(struct virtio_device *);
-	u32 (*get_features)(struct virtio_device *vdev);
+	u64 (*get_features)(struct virtio_device *vdev);
 	void (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
 	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
@@ -88,11 +88,11 @@ static inline bool __virtio_test_bit(const struct virtio_device *vdev,
 {
 	/* Did you forget to fix assumptions on max features? */
 	if (__builtin_constant_p(fbit))
-		BUILD_BUG_ON(fbit >= 32);
+		BUILD_BUG_ON(fbit >= 64);
 	else
-		BUG_ON(fbit >= 32);
+		BUG_ON(fbit >= 64);
 
-	return vdev->features & BIT(fbit);
+	return vdev->features & BIT_ULL(fbit);
 }
 
 /**
@@ -105,11 +105,11 @@ static inline void __virtio_set_bit(struct virtio_device *vdev,
 {
 	/* Did you forget to fix assumptions on max features? */
 	if (__builtin_constant_p(fbit))
-		BUILD_BUG_ON(fbit >= 32);
+		BUILD_BUG_ON(fbit >= 64);
 	else
-		BUG_ON(fbit >= 32);
+		BUG_ON(fbit >= 64);
 
-	vdev->features |= BIT(fbit);
+	vdev->features |= BIT_ULL(fbit);
 }
 
 /**
@@ -122,11 +122,11 @@ static inline void __virtio_clear_bit(struct virtio_device *vdev,
 {
 	/* Did you forget to fix assumptions on max features? */
 	if (__builtin_constant_p(fbit))
-		BUILD_BUG_ON(fbit >= 32);
+		BUILD_BUG_ON(fbit >= 64);
 	else
-		BUG_ON(fbit >= 32);
+		BUG_ON(fbit >= 64);
 
-	vdev->features &= ~BIT(fbit);
+	vdev->features &= ~BIT_ULL(fbit);
 }
 
 /**
diff --git a/tools/virtio/linux/virtio.h b/tools/virtio/linux/virtio.h
index 72bff70bfeeb..8eb6421761cd 100644
--- a/tools/virtio/linux/virtio.h
+++ b/tools/virtio/linux/virtio.h
@@ -10,7 +10,7 @@
 
 struct virtio_device {
 	void *dev;
-	u32 features;
+	u64 features;
 };
 
 struct virtqueue {
-- 
cgit v1.2.3


From eef960a04354d13426c43a4e3750a5e2b383040c Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 22 Oct 2014 15:35:56 +0300
Subject: virtio: memory access APIs

virtio 1.0 makes all memory structures LE, so
we need APIs to conditionally do a byteswap on BE
architectures.

To make it easier to check code statically,
add virtio specific types for multi-byte integers
in memory.

Add low level wrappers that do a byteswap conditionally, these will be
useful e.g. for vhost.  Add high level wrappers that
query device endian-ness and act accordingly.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 include/linux/virtio_byteorder.h  | 59 +++++++++++++++++++++++++++++++++++++++
 include/linux/virtio_config.h     | 32 +++++++++++++++++++++
 include/uapi/linux/Kbuild         |  1 +
 include/uapi/linux/virtio_ring.h  | 45 ++++++++++++++---------------
 include/uapi/linux/virtio_types.h | 46 ++++++++++++++++++++++++++++++
 5 files changed, 161 insertions(+), 22 deletions(-)
 create mode 100644 include/linux/virtio_byteorder.h
 create mode 100644 include/uapi/linux/virtio_types.h

(limited to 'include/linux')

diff --git a/include/linux/virtio_byteorder.h b/include/linux/virtio_byteorder.h
new file mode 100644
index 000000000000..51865d05b267
--- /dev/null
+++ b/include/linux/virtio_byteorder.h
@@ -0,0 +1,59 @@
+#ifndef _LINUX_VIRTIO_BYTEORDER_H
+#define _LINUX_VIRTIO_BYTEORDER_H
+#include <linux/types.h>
+#include <uapi/linux/virtio_types.h>
+
+/*
+ * Low-level memory accessors for handling virtio in modern little endian and in
+ * compatibility native endian format.
+ */
+
+static inline u16 __virtio16_to_cpu(bool little_endian, __virtio16 val)
+{
+	if (little_endian)
+		return le16_to_cpu((__force __le16)val);
+	else
+		return (__force u16)val;
+}
+
+static inline __virtio16 __cpu_to_virtio16(bool little_endian, u16 val)
+{
+	if (little_endian)
+		return (__force __virtio16)cpu_to_le16(val);
+	else
+		return (__force __virtio16)val;
+}
+
+static inline u32 __virtio32_to_cpu(bool little_endian, __virtio32 val)
+{
+	if (little_endian)
+		return le32_to_cpu((__force __le32)val);
+	else
+		return (__force u32)val;
+}
+
+static inline __virtio32 __cpu_to_virtio32(bool little_endian, u32 val)
+{
+	if (little_endian)
+		return (__force __virtio32)cpu_to_le32(val);
+	else
+		return (__force __virtio32)val;
+}
+
+static inline u64 __virtio64_to_cpu(bool little_endian, __virtio64 val)
+{
+	if (little_endian)
+		return le64_to_cpu((__force __le64)val);
+	else
+		return (__force u64)val;
+}
+
+static inline __virtio64 __cpu_to_virtio64(bool little_endian, u64 val)
+{
+	if (little_endian)
+		return (__force __virtio64)cpu_to_le64(val);
+	else
+		return (__force __virtio64)val;
+}
+
+#endif /* _LINUX_VIRTIO_BYTEORDER */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index f51788439574..02f0acb549d6 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -4,6 +4,7 @@
 #include <linux/err.h>
 #include <linux/bug.h>
 #include <linux/virtio.h>
+#include <linux/virtio_byteorder.h>
 #include <uapi/linux/virtio_config.h>
 
 /**
@@ -199,6 +200,37 @@ int virtqueue_set_affinity(struct virtqueue *vq, int cpu)
 	return 0;
 }
 
+/* Memory accessors */
+static inline u16 virtio16_to_cpu(struct virtio_device *vdev, __virtio16 val)
+{
+	return __virtio16_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio16 cpu_to_virtio16(struct virtio_device *vdev, u16 val)
+{
+	return __cpu_to_virtio16(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u32 virtio32_to_cpu(struct virtio_device *vdev, __virtio32 val)
+{
+	return __virtio32_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio32 cpu_to_virtio32(struct virtio_device *vdev, u32 val)
+{
+	return __cpu_to_virtio32(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline u64 virtio64_to_cpu(struct virtio_device *vdev, __virtio64 val)
+{
+	return __virtio64_to_cpu(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
+static inline __virtio64 cpu_to_virtio64(struct virtio_device *vdev, u64 val)
+{
+	return __cpu_to_virtio64(virtio_has_feature(vdev, VIRTIO_F_VERSION_1), val);
+}
+
 /* Config space accessors. */
 #define virtio_cread(vdev, structname, member, ptr)			\
 	do {								\
diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild
index 8523f9bb72f2..615f96a6022a 100644
--- a/include/uapi/linux/Kbuild
+++ b/include/uapi/linux/Kbuild
@@ -423,6 +423,7 @@ header-y += virtio_blk.h
 header-y += virtio_config.h
 header-y += virtio_console.h
 header-y += virtio_ids.h
+header-y += virtio_types.h
 header-y += virtio_net.h
 header-y += virtio_pci.h
 header-y += virtio_ring.h
diff --git a/include/uapi/linux/virtio_ring.h b/include/uapi/linux/virtio_ring.h
index a99f9b7caa67..61c818a7fe70 100644
--- a/include/uapi/linux/virtio_ring.h
+++ b/include/uapi/linux/virtio_ring.h
@@ -32,6 +32,7 @@
  *
  * Copyright Rusty Russell IBM Corporation 2007. */
 #include <linux/types.h>
+#include <linux/virtio_types.h>
 
 /* This marks a buffer as continuing via the next field. */
 #define VRING_DESC_F_NEXT	1
@@ -61,32 +62,32 @@
 /* Virtio ring descriptors: 16 bytes.  These can chain together via "next". */
 struct vring_desc {
 	/* Address (guest-physical). */
-	__u64 addr;
+	__virtio64 addr;
 	/* Length. */
-	__u32 len;
+	__virtio32 len;
 	/* The flags as indicated above. */
-	__u16 flags;
+	__virtio16 flags;
 	/* We chain unused descriptors via this, too */
-	__u16 next;
+	__virtio16 next;
 };
 
 struct vring_avail {
-	__u16 flags;
-	__u16 idx;
-	__u16 ring[];
+	__virtio16 flags;
+	__virtio16 idx;
+	__virtio16 ring[];
 };
 
 /* u32 is used here for ids for padding reasons. */
 struct vring_used_elem {
 	/* Index of start of used descriptor chain. */
-	__u32 id;
+	__virtio32 id;
 	/* Total length of the descriptor chain which was used (written to) */
-	__u32 len;
+	__virtio32 len;
 };
 
 struct vring_used {
-	__u16 flags;
-	__u16 idx;
+	__virtio16 flags;
+	__virtio16 idx;
 	struct vring_used_elem ring[];
 };
 
@@ -109,25 +110,25 @@ struct vring {
  *	struct vring_desc desc[num];
  *
  *	// A ring of available descriptor heads with free-running index.
- *	__u16 avail_flags;
- *	__u16 avail_idx;
- *	__u16 available[num];
- *	__u16 used_event_idx;
+ *	__virtio16 avail_flags;
+ *	__virtio16 avail_idx;
+ *	__virtio16 available[num];
+ *	__virtio16 used_event_idx;
  *
  *	// Padding to the next align boundary.
  *	char pad[];
  *
  *	// A ring of used descriptor heads with free-running index.
- *	__u16 used_flags;
- *	__u16 used_idx;
+ *	__virtio16 used_flags;
+ *	__virtio16 used_idx;
  *	struct vring_used_elem used[num];
- *	__u16 avail_event_idx;
+ *	__virtio16 avail_event_idx;
  * };
  */
 /* We publish the used event index at the end of the available ring, and vice
  * versa. They are at the end for backwards compatibility. */
 #define vring_used_event(vr) ((vr)->avail->ring[(vr)->num])
-#define vring_avail_event(vr) (*(__u16 *)&(vr)->used->ring[(vr)->num])
+#define vring_avail_event(vr) (*(__virtio16 *)&(vr)->used->ring[(vr)->num])
 
 static inline void vring_init(struct vring *vr, unsigned int num, void *p,
 			      unsigned long align)
@@ -135,15 +136,15 @@ static inline void vring_init(struct vring *vr, unsigned int num, void *p,
 	vr->num = num;
 	vr->desc = p;
 	vr->avail = p + num*sizeof(struct vring_desc);
-	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__u16)
+	vr->used = (void *)(((unsigned long)&vr->avail->ring[num] + sizeof(__virtio16)
 		+ align-1) & ~(align - 1));
 }
 
 static inline unsigned vring_size(unsigned int num, unsigned long align)
 {
-	return ((sizeof(struct vring_desc) * num + sizeof(__u16) * (3 + num)
+	return ((sizeof(struct vring_desc) * num + sizeof(__virtio16) * (3 + num)
 		 + align - 1) & ~(align - 1))
-		+ sizeof(__u16) * 3 + sizeof(struct vring_used_elem) * num;
+		+ sizeof(__virtio16) * 3 + sizeof(struct vring_used_elem) * num;
 }
 
 /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */
diff --git a/include/uapi/linux/virtio_types.h b/include/uapi/linux/virtio_types.h
new file mode 100644
index 000000000000..e845e8c4cbee
--- /dev/null
+++ b/include/uapi/linux/virtio_types.h
@@ -0,0 +1,46 @@
+#ifndef _UAPI_LINUX_VIRTIO_TYPES_H
+#define _UAPI_LINUX_VIRTIO_TYPES_H
+/* Type definitions for virtio implementations.
+ *
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. Neither the name of IBM nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL IBM OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * Copyright (C) 2014 Red Hat, Inc.
+ * Author: Michael S. Tsirkin <mst@redhat.com>
+ */
+#include <linux/types.h>
+
+/*
+ * __virtio{16,32,64} have the following meaning:
+ * - __u{16,32,64} for virtio devices in legacy mode, accessed in native endian
+ * - __le{16,32,64} for standard-compliant virtio devices
+ */
+
+typedef __u16 __bitwise__ __virtio16;
+typedef __u32 __bitwise__ __virtio32;
+typedef __u64 __bitwise__ __virtio64;
+
+#endif /* _UAPI_LINUX_VIRTIO_TYPES_H */
-- 
cgit v1.2.3


From 92e6d7438ee631aac85258ee3d4de8c860d8c32f Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Wed, 22 Oct 2014 16:59:01 +0300
Subject: virtio_config: endian conversion for v1.0

We (ab)use virtio conversion functions for device-specific
config space accesses.

Based on original patches by Cornelia and Rusty.

Signed-off-by: Rusty Russell <rusty@rustcorp.com.au>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>
Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: David Hildenbrand <dahi@linux.vnet.com>
---
 include/linux/virtio_config.h | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 02f0acb549d6..1fa5faa26440 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -318,12 +318,13 @@ static inline u16 virtio_cread16(struct virtio_device *vdev,
 {
 	u16 ret;
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
-	return ret;
+	return virtio16_to_cpu(vdev, (__force __virtio16)ret);
 }
 
 static inline void virtio_cwrite16(struct virtio_device *vdev,
 				   unsigned int offset, u16 val)
 {
+	val = (__force u16)cpu_to_virtio16(vdev, val);
 	vdev->config->set(vdev, offset, &val, sizeof(val));
 }
 
@@ -332,12 +333,13 @@ static inline u32 virtio_cread32(struct virtio_device *vdev,
 {
 	u32 ret;
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
-	return ret;
+	return virtio32_to_cpu(vdev, (__force __virtio32)ret);
 }
 
 static inline void virtio_cwrite32(struct virtio_device *vdev,
 				   unsigned int offset, u32 val)
 {
+	val = (__force u32)cpu_to_virtio32(vdev, val);
 	vdev->config->set(vdev, offset, &val, sizeof(val));
 }
 
@@ -346,12 +348,13 @@ static inline u64 virtio_cread64(struct virtio_device *vdev,
 {
 	u64 ret;
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
-	return ret;
+	return virtio64_to_cpu(vdev, (__force __virtio64)ret);
 }
 
 static inline void virtio_cwrite64(struct virtio_device *vdev,
 				   unsigned int offset, u64 val)
 {
+	val = (__force u64)cpu_to_virtio64(vdev, val);
 	vdev->config->set(vdev, offset, &val, sizeof(val));
 }
 
-- 
cgit v1.2.3


From 890626521503318d7ac92a4a3b9feba55c0131ec Mon Sep 17 00:00:00 2001
From: Cornelia Huck <cornelia.huck@de.ibm.com>
Date: Tue, 7 Oct 2014 16:39:47 +0200
Subject: virtio: allow transports to get avail/used addresses

For virtio-1, we can theoretically have a more complex virtqueue
layout with avail and used buffers not on a contiguous memory area
with the descriptor table. For now, it's fine for a transport driver
to stay with the old layout: It needs, however, a way to access
the locations of the avail/used rings so it can register them with
the host.

Reviewed-by: David Hildenbrand <dahi@linux.vnet.ibm.com>
Signed-off-by: Cornelia Huck <cornelia.huck@de.ibm.com>

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_ring.c | 16 ++++++++++++++++
 include/linux/virtio.h       |  3 +++
 2 files changed, 19 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_ring.c b/drivers/virtio/virtio_ring.c
index 0d3c73737652..55532a43ead3 100644
--- a/drivers/virtio/virtio_ring.c
+++ b/drivers/virtio/virtio_ring.c
@@ -827,4 +827,20 @@ void virtio_break_device(struct virtio_device *dev)
 }
 EXPORT_SYMBOL_GPL(virtio_break_device);
 
+void *virtqueue_get_avail(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->vring.avail;
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_avail);
+
+void *virtqueue_get_used(struct virtqueue *_vq)
+{
+	struct vring_virtqueue *vq = to_vvq(_vq);
+
+	return vq->vring.used;
+}
+EXPORT_SYMBOL_GPL(virtqueue_get_used);
+
 MODULE_LICENSE("GPL");
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 149284e5196d..d6359a5d5d4e 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -75,6 +75,9 @@ unsigned int virtqueue_get_vring_size(struct virtqueue *vq);
 
 bool virtqueue_is_broken(struct virtqueue *vq);
 
+void *virtqueue_get_avail(struct virtqueue *vq);
+void *virtqueue_get_used(struct virtqueue *vq);
+
 /**
  * virtio_device - representation of a device using virtio
  * @index: unique position on the virtio bus
-- 
cgit v1.2.3


From b3bb62d11950eb6ac87403cacd667f84fa9495bc Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 23 Oct 2014 18:07:47 +0300
Subject: virtio: add legacy feature table support

virtio-blk has some legacy feature bits that modern drivers
must not negotiate, but are needed for old legacy hosts
(that e.g. don't support virtio-scsi).
Allow a separate legacy feature table for such cases.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Reviewed-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 drivers/virtio/virtio.c | 25 ++++++++++++++++++++++++-
 include/linux/virtio.h  |  4 ++++
 2 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 3e78f4bcfa8e..f9ad99c8b352 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -113,6 +113,13 @@ void virtio_check_driver_offered_feature(const struct virtio_device *vdev,
 	for (i = 0; i < drv->feature_table_size; i++)
 		if (drv->feature_table[i] == fbit)
 			return;
+
+	if (drv->feature_table_legacy) {
+		for (i = 0; i < drv->feature_table_size_legacy; i++)
+			if (drv->feature_table_legacy[i] == fbit)
+				return;
+	}
+
 	BUG();
 }
 EXPORT_SYMBOL_GPL(virtio_check_driver_offered_feature);
@@ -161,6 +168,7 @@ static int virtio_dev_probe(struct device *_d)
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
 	u64 device_features;
 	u64 driver_features;
+	u64 driver_features_legacy;
 	unsigned status;
 
 	/* We have a driver! */
@@ -177,7 +185,22 @@ static int virtio_dev_probe(struct device *_d)
 		driver_features |= (1ULL << f);
 	}
 
-	dev->features = driver_features & device_features;
+	/* Some drivers have a separate feature table for virtio v1.0 */
+	if (drv->feature_table_legacy) {
+		driver_features_legacy = 0;
+		for (i = 0; i < drv->feature_table_size_legacy; i++) {
+			unsigned int f = drv->feature_table_legacy[i];
+			BUG_ON(f >= 64);
+			driver_features_legacy |= (1ULL << f);
+		}
+	} else {
+		driver_features_legacy = driver_features;
+	}
+
+	if (driver_features & device_features & (1ULL << VIRTIO_F_VERSION_1))
+		dev->features = driver_features & device_features;
+	else
+		dev->features = driver_features_legacy & device_features;
 
 	/* Transport features always preserved to pass to finalize_features. */
 	for (i = VIRTIO_TRANSPORT_F_START; i < VIRTIO_TRANSPORT_F_END; i++)
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d6359a5d5d4e..f70411eb9d27 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -130,6 +130,8 @@ int virtio_device_restore(struct virtio_device *dev);
  * @id_table: the ids serviced by this driver.
  * @feature_table: an array of feature numbers supported by this driver.
  * @feature_table_size: number of entries in the feature table array.
+ * @feature_table_legacy: same as feature_table but when working in legacy mode.
+ * @feature_table_size_legacy: number of entries in feature table legacy array.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @remove: the function to call when a device is removed.
  * @config_changed: optional function to call when the device configuration
@@ -140,6 +142,8 @@ struct virtio_driver {
 	const struct virtio_device_id *id_table;
 	const unsigned int *feature_table;
 	unsigned int feature_table_size;
+	const unsigned int *feature_table_legacy;
+	unsigned int feature_table_size_legacy;
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
-- 
cgit v1.2.3


From d75dff39dfb86b0b8925f10b1f13fc353ae9f1b0 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 23 Nov 2014 17:28:57 +0200
Subject: virtio_scsi: v1.0 support

Note: for consistency, and to avoid sparse errors,
  convert all fields, even those no longer in use
  for virtio v1.0.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 drivers/scsi/virtio_scsi.c  | 51 ++++++++++++++++++++++++++++-----------------
 include/linux/virtio_scsi.h | 32 +++++++++++++++-------------
 2 files changed, 49 insertions(+), 34 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/scsi/virtio_scsi.c b/drivers/scsi/virtio_scsi.c
index b83846fc7859..d9ec80698d63 100644
--- a/drivers/scsi/virtio_scsi.c
+++ b/drivers/scsi/virtio_scsi.c
@@ -158,7 +158,7 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
 		sc, resp->response, resp->status, resp->sense_len);
 
 	sc->result = resp->status;
-	virtscsi_compute_resid(sc, resp->resid);
+	virtscsi_compute_resid(sc, virtio32_to_cpu(vscsi->vdev, resp->resid));
 	switch (resp->response) {
 	case VIRTIO_SCSI_S_OK:
 		set_host_byte(sc, DID_OK);
@@ -196,10 +196,13 @@ static void virtscsi_complete_cmd(struct virtio_scsi *vscsi, void *buf)
 		break;
 	}
 
-	WARN_ON(resp->sense_len > VIRTIO_SCSI_SENSE_SIZE);
+	WARN_ON(virtio32_to_cpu(vscsi->vdev, resp->sense_len) >
+		VIRTIO_SCSI_SENSE_SIZE);
 	if (sc->sense_buffer) {
 		memcpy(sc->sense_buffer, resp->sense,
-		       min_t(u32, resp->sense_len, VIRTIO_SCSI_SENSE_SIZE));
+		       min_t(u32,
+			     virtio32_to_cpu(vscsi->vdev, resp->sense_len),
+			     VIRTIO_SCSI_SENSE_SIZE));
 		if (resp->sense_len)
 			set_driver_byte(sc, DRIVER_SENSE);
 	}
@@ -323,7 +326,7 @@ static void virtscsi_handle_transport_reset(struct virtio_scsi *vscsi,
 	unsigned int target = event->lun[1];
 	unsigned int lun = (event->lun[2] << 8) | event->lun[3];
 
-	switch (event->reason) {
+	switch (virtio32_to_cpu(vscsi->vdev, event->reason)) {
 	case VIRTIO_SCSI_EVT_RESET_RESCAN:
 		scsi_add_device(shost, 0, target, lun);
 		break;
@@ -349,8 +352,8 @@ static void virtscsi_handle_param_change(struct virtio_scsi *vscsi,
 	struct Scsi_Host *shost = virtio_scsi_host(vscsi->vdev);
 	unsigned int target = event->lun[1];
 	unsigned int lun = (event->lun[2] << 8) | event->lun[3];
-	u8 asc = event->reason & 255;
-	u8 ascq = event->reason >> 8;
+	u8 asc = virtio32_to_cpu(vscsi->vdev, event->reason) & 255;
+	u8 ascq = virtio32_to_cpu(vscsi->vdev, event->reason) >> 8;
 
 	sdev = scsi_device_lookup(shost, 0, target, lun);
 	if (!sdev) {
@@ -374,12 +377,14 @@ static void virtscsi_handle_event(struct work_struct *work)
 	struct virtio_scsi *vscsi = event_node->vscsi;
 	struct virtio_scsi_event *event = &event_node->event;
 
-	if (event->event & VIRTIO_SCSI_T_EVENTS_MISSED) {
-		event->event &= ~VIRTIO_SCSI_T_EVENTS_MISSED;
+	if (event->event &
+	    cpu_to_virtio32(vscsi->vdev, VIRTIO_SCSI_T_EVENTS_MISSED)) {
+		event->event &= ~cpu_to_virtio32(vscsi->vdev,
+						   VIRTIO_SCSI_T_EVENTS_MISSED);
 		scsi_scan_host(virtio_scsi_host(vscsi->vdev));
 	}
 
-	switch (event->event) {
+	switch (virtio32_to_cpu(vscsi->vdev, event->event)) {
 	case VIRTIO_SCSI_T_NO_EVENT:
 		break;
 	case VIRTIO_SCSI_T_TRANSPORT_RESET:
@@ -482,26 +487,28 @@ static int virtscsi_kick_cmd(struct virtio_scsi_vq *vq,
 	return err;
 }
 
-static void virtio_scsi_init_hdr(struct virtio_scsi_cmd_req *cmd,
+static void virtio_scsi_init_hdr(struct virtio_device *vdev,
+				 struct virtio_scsi_cmd_req *cmd,
 				 struct scsi_cmnd *sc)
 {
 	cmd->lun[0] = 1;
 	cmd->lun[1] = sc->device->id;
 	cmd->lun[2] = (sc->device->lun >> 8) | 0x40;
 	cmd->lun[3] = sc->device->lun & 0xff;
-	cmd->tag = (unsigned long)sc;
+	cmd->tag = cpu_to_virtio64(vdev, (unsigned long)sc);
 	cmd->task_attr = VIRTIO_SCSI_S_SIMPLE;
 	cmd->prio = 0;
 	cmd->crn = 0;
 }
 
-static void virtio_scsi_init_hdr_pi(struct virtio_scsi_cmd_req_pi *cmd_pi,
+static void virtio_scsi_init_hdr_pi(struct virtio_device *vdev,
+				    struct virtio_scsi_cmd_req_pi *cmd_pi,
 				    struct scsi_cmnd *sc)
 {
 	struct request *rq = sc->request;
 	struct blk_integrity *bi;
 
-	virtio_scsi_init_hdr((struct virtio_scsi_cmd_req *)cmd_pi, sc);
+	virtio_scsi_init_hdr(vdev, (struct virtio_scsi_cmd_req *)cmd_pi, sc);
 
 	if (!rq || !scsi_prot_sg_count(sc))
 		return;
@@ -509,9 +516,13 @@ static void virtio_scsi_init_hdr_pi(struct virtio_scsi_cmd_req_pi *cmd_pi,
 	bi = blk_get_integrity(rq->rq_disk);
 
 	if (sc->sc_data_direction == DMA_TO_DEVICE)
-		cmd_pi->pi_bytesout = blk_rq_sectors(rq) * bi->tuple_size;
+		cmd_pi->pi_bytesout = cpu_to_virtio32(vdev,
+							blk_rq_sectors(rq) *
+							bi->tuple_size);
 	else if (sc->sc_data_direction == DMA_FROM_DEVICE)
-		cmd_pi->pi_bytesin = blk_rq_sectors(rq) * bi->tuple_size;
+		cmd_pi->pi_bytesin = cpu_to_virtio32(vdev,
+						       blk_rq_sectors(rq) *
+						       bi->tuple_size);
 }
 
 static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
@@ -536,11 +547,11 @@ static int virtscsi_queuecommand(struct virtio_scsi *vscsi,
 	BUG_ON(sc->cmd_len > VIRTIO_SCSI_CDB_SIZE);
 
 	if (virtio_has_feature(vscsi->vdev, VIRTIO_SCSI_F_T10_PI)) {
-		virtio_scsi_init_hdr_pi(&cmd->req.cmd_pi, sc);
+		virtio_scsi_init_hdr_pi(vscsi->vdev, &cmd->req.cmd_pi, sc);
 		memcpy(cmd->req.cmd_pi.cdb, sc->cmnd, sc->cmd_len);
 		req_size = sizeof(cmd->req.cmd_pi);
 	} else {
-		virtio_scsi_init_hdr(&cmd->req.cmd, sc);
+		virtio_scsi_init_hdr(vscsi->vdev, &cmd->req.cmd, sc);
 		memcpy(cmd->req.cmd.cdb, sc->cmnd, sc->cmd_len);
 		req_size = sizeof(cmd->req.cmd);
 	}
@@ -655,7 +666,8 @@ static int virtscsi_device_reset(struct scsi_cmnd *sc)
 	cmd->sc = sc;
 	cmd->req.tmf = (struct virtio_scsi_ctrl_tmf_req){
 		.type = VIRTIO_SCSI_T_TMF,
-		.subtype = VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET,
+		.subtype = cpu_to_virtio32(vscsi->vdev,
+					     VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET),
 		.lun[0] = 1,
 		.lun[1] = sc->device->id,
 		.lun[2] = (sc->device->lun >> 8) | 0x40,
@@ -713,7 +725,7 @@ static int virtscsi_abort(struct scsi_cmnd *sc)
 		.lun[1] = sc->device->id,
 		.lun[2] = (sc->device->lun >> 8) | 0x40,
 		.lun[3] = sc->device->lun & 0xff,
-		.tag = (unsigned long)sc,
+		.tag = cpu_to_virtio64(vscsi->vdev, (unsigned long)sc),
 	};
 	return virtscsi_tmf(vscsi, cmd);
 }
@@ -1073,6 +1085,7 @@ static unsigned int features[] = {
 	VIRTIO_SCSI_F_HOTPLUG,
 	VIRTIO_SCSI_F_CHANGE,
 	VIRTIO_SCSI_F_T10_PI,
+	VIRTIO_F_VERSION_1,
 };
 
 static struct virtio_driver virtio_scsi_driver = {
diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
index de429d1f4357..af448649a975 100644
--- a/include/linux/virtio_scsi.h
+++ b/include/linux/virtio_scsi.h
@@ -27,13 +27,15 @@
 #ifndef _LINUX_VIRTIO_SCSI_H
 #define _LINUX_VIRTIO_SCSI_H
 
+#include <linux/virtio_types.h>
+
 #define VIRTIO_SCSI_CDB_SIZE   32
 #define VIRTIO_SCSI_SENSE_SIZE 96
 
 /* SCSI command request, followed by data-out */
 struct virtio_scsi_cmd_req {
 	u8 lun[8];		/* Logical Unit Number */
-	u64 tag;		/* Command identifier */
+	__virtio64 tag;		/* Command identifier */
 	u8 task_attr;		/* Task attribute */
 	u8 prio;		/* SAM command priority field */
 	u8 crn;
@@ -43,20 +45,20 @@ struct virtio_scsi_cmd_req {
 /* SCSI command request, followed by protection information */
 struct virtio_scsi_cmd_req_pi {
 	u8 lun[8];		/* Logical Unit Number */
-	u64 tag;		/* Command identifier */
+	__virtio64 tag;		/* Command identifier */
 	u8 task_attr;		/* Task attribute */
 	u8 prio;		/* SAM command priority field */
 	u8 crn;
-	u32 pi_bytesout;	/* DataOUT PI Number of bytes */
-	u32 pi_bytesin;		/* DataIN PI Number of bytes */
+	__virtio32 pi_bytesout;	/* DataOUT PI Number of bytes */
+	__virtio32 pi_bytesin;		/* DataIN PI Number of bytes */
 	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
 } __packed;
 
 /* Response, followed by sense data and data-in */
 struct virtio_scsi_cmd_resp {
-	u32 sense_len;		/* Sense data length */
-	u32 resid;		/* Residual bytes in data buffer */
-	u16 status_qualifier;	/* Status qualifier */
+	__virtio32 sense_len;		/* Sense data length */
+	__virtio32 resid;		/* Residual bytes in data buffer */
+	__virtio16 status_qualifier;	/* Status qualifier */
 	u8 status;		/* Command completion status */
 	u8 response;		/* Response values */
 	u8 sense[VIRTIO_SCSI_SENSE_SIZE];
@@ -64,10 +66,10 @@ struct virtio_scsi_cmd_resp {
 
 /* Task Management Request */
 struct virtio_scsi_ctrl_tmf_req {
-	u32 type;
-	u32 subtype;
+	__virtio32 type;
+	__virtio32 subtype;
 	u8 lun[8];
-	u64 tag;
+	__virtio64 tag;
 } __packed;
 
 struct virtio_scsi_ctrl_tmf_resp {
@@ -76,20 +78,20 @@ struct virtio_scsi_ctrl_tmf_resp {
 
 /* Asynchronous notification query/subscription */
 struct virtio_scsi_ctrl_an_req {
-	u32 type;
+	__virtio32 type;
 	u8 lun[8];
-	u32 event_requested;
+	__virtio32 event_requested;
 } __packed;
 
 struct virtio_scsi_ctrl_an_resp {
-	u32 event_actual;
+	__virtio32 event_actual;
 	u8 response;
 } __packed;
 
 struct virtio_scsi_event {
-	u32 event;
+	__virtio32 event;
 	u8 lun[8];
-	u32 reason;
+	__virtio32 reason;
 } __packed;
 
 struct virtio_scsi_config {
-- 
cgit v1.2.3


From 106d81f58a5e7f41e857b8805e9d899e87d7a874 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 23 Nov 2014 17:45:53 +0200
Subject: virtio_scsi: move to uapi

Guests need to use virtio scsi API, so export it to uapi,
nice to e.g. qemu and will help us remember this file
affects ABI.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Paolo Bonzini <pbonzini@redhat.com>
---
 include/linux/virtio_scsi.h      | 164 ---------------------------------------
 include/uapi/linux/virtio_scsi.h | 164 +++++++++++++++++++++++++++++++++++++++
 2 files changed, 164 insertions(+), 164 deletions(-)
 delete mode 100644 include/linux/virtio_scsi.h
 create mode 100644 include/uapi/linux/virtio_scsi.h

(limited to 'include/linux')

diff --git a/include/linux/virtio_scsi.h b/include/linux/virtio_scsi.h
deleted file mode 100644
index af448649a975..000000000000
--- a/include/linux/virtio_scsi.h
+++ /dev/null
@@ -1,164 +0,0 @@
-/*
- * This header is BSD licensed so anyone can use the definitions to implement
- * compatible drivers/servers.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- */
-
-#ifndef _LINUX_VIRTIO_SCSI_H
-#define _LINUX_VIRTIO_SCSI_H
-
-#include <linux/virtio_types.h>
-
-#define VIRTIO_SCSI_CDB_SIZE   32
-#define VIRTIO_SCSI_SENSE_SIZE 96
-
-/* SCSI command request, followed by data-out */
-struct virtio_scsi_cmd_req {
-	u8 lun[8];		/* Logical Unit Number */
-	__virtio64 tag;		/* Command identifier */
-	u8 task_attr;		/* Task attribute */
-	u8 prio;		/* SAM command priority field */
-	u8 crn;
-	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
-} __packed;
-
-/* SCSI command request, followed by protection information */
-struct virtio_scsi_cmd_req_pi {
-	u8 lun[8];		/* Logical Unit Number */
-	__virtio64 tag;		/* Command identifier */
-	u8 task_attr;		/* Task attribute */
-	u8 prio;		/* SAM command priority field */
-	u8 crn;
-	__virtio32 pi_bytesout;	/* DataOUT PI Number of bytes */
-	__virtio32 pi_bytesin;		/* DataIN PI Number of bytes */
-	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
-} __packed;
-
-/* Response, followed by sense data and data-in */
-struct virtio_scsi_cmd_resp {
-	__virtio32 sense_len;		/* Sense data length */
-	__virtio32 resid;		/* Residual bytes in data buffer */
-	__virtio16 status_qualifier;	/* Status qualifier */
-	u8 status;		/* Command completion status */
-	u8 response;		/* Response values */
-	u8 sense[VIRTIO_SCSI_SENSE_SIZE];
-} __packed;
-
-/* Task Management Request */
-struct virtio_scsi_ctrl_tmf_req {
-	__virtio32 type;
-	__virtio32 subtype;
-	u8 lun[8];
-	__virtio64 tag;
-} __packed;
-
-struct virtio_scsi_ctrl_tmf_resp {
-	u8 response;
-} __packed;
-
-/* Asynchronous notification query/subscription */
-struct virtio_scsi_ctrl_an_req {
-	__virtio32 type;
-	u8 lun[8];
-	__virtio32 event_requested;
-} __packed;
-
-struct virtio_scsi_ctrl_an_resp {
-	__virtio32 event_actual;
-	u8 response;
-} __packed;
-
-struct virtio_scsi_event {
-	__virtio32 event;
-	u8 lun[8];
-	__virtio32 reason;
-} __packed;
-
-struct virtio_scsi_config {
-	u32 num_queues;
-	u32 seg_max;
-	u32 max_sectors;
-	u32 cmd_per_lun;
-	u32 event_info_size;
-	u32 sense_size;
-	u32 cdb_size;
-	u16 max_channel;
-	u16 max_target;
-	u32 max_lun;
-} __packed;
-
-/* Feature Bits */
-#define VIRTIO_SCSI_F_INOUT                    0
-#define VIRTIO_SCSI_F_HOTPLUG                  1
-#define VIRTIO_SCSI_F_CHANGE                   2
-#define VIRTIO_SCSI_F_T10_PI                   3
-
-/* Response codes */
-#define VIRTIO_SCSI_S_OK                       0
-#define VIRTIO_SCSI_S_OVERRUN                  1
-#define VIRTIO_SCSI_S_ABORTED                  2
-#define VIRTIO_SCSI_S_BAD_TARGET               3
-#define VIRTIO_SCSI_S_RESET                    4
-#define VIRTIO_SCSI_S_BUSY                     5
-#define VIRTIO_SCSI_S_TRANSPORT_FAILURE        6
-#define VIRTIO_SCSI_S_TARGET_FAILURE           7
-#define VIRTIO_SCSI_S_NEXUS_FAILURE            8
-#define VIRTIO_SCSI_S_FAILURE                  9
-#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED       10
-#define VIRTIO_SCSI_S_FUNCTION_REJECTED        11
-#define VIRTIO_SCSI_S_INCORRECT_LUN            12
-
-/* Controlq type codes.  */
-#define VIRTIO_SCSI_T_TMF                      0
-#define VIRTIO_SCSI_T_AN_QUERY                 1
-#define VIRTIO_SCSI_T_AN_SUBSCRIBE             2
-
-/* Valid TMF subtypes.  */
-#define VIRTIO_SCSI_T_TMF_ABORT_TASK           0
-#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET       1
-#define VIRTIO_SCSI_T_TMF_CLEAR_ACA            2
-#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET       3
-#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET      4
-#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET   5
-#define VIRTIO_SCSI_T_TMF_QUERY_TASK           6
-#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET       7
-
-/* Events.  */
-#define VIRTIO_SCSI_T_EVENTS_MISSED            0x80000000
-#define VIRTIO_SCSI_T_NO_EVENT                 0
-#define VIRTIO_SCSI_T_TRANSPORT_RESET          1
-#define VIRTIO_SCSI_T_ASYNC_NOTIFY             2
-#define VIRTIO_SCSI_T_PARAM_CHANGE             3
-
-/* Reasons of transport reset event */
-#define VIRTIO_SCSI_EVT_RESET_HARD             0
-#define VIRTIO_SCSI_EVT_RESET_RESCAN           1
-#define VIRTIO_SCSI_EVT_RESET_REMOVED          2
-
-#define VIRTIO_SCSI_S_SIMPLE                   0
-#define VIRTIO_SCSI_S_ORDERED                  1
-#define VIRTIO_SCSI_S_HEAD                     2
-#define VIRTIO_SCSI_S_ACA                      3
-
-
-#endif /* _LINUX_VIRTIO_SCSI_H */
diff --git a/include/uapi/linux/virtio_scsi.h b/include/uapi/linux/virtio_scsi.h
new file mode 100644
index 000000000000..af448649a975
--- /dev/null
+++ b/include/uapi/linux/virtio_scsi.h
@@ -0,0 +1,164 @@
+/*
+ * This header is BSD licensed so anyone can use the definitions to implement
+ * compatible drivers/servers.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#ifndef _LINUX_VIRTIO_SCSI_H
+#define _LINUX_VIRTIO_SCSI_H
+
+#include <linux/virtio_types.h>
+
+#define VIRTIO_SCSI_CDB_SIZE   32
+#define VIRTIO_SCSI_SENSE_SIZE 96
+
+/* SCSI command request, followed by data-out */
+struct virtio_scsi_cmd_req {
+	u8 lun[8];		/* Logical Unit Number */
+	__virtio64 tag;		/* Command identifier */
+	u8 task_attr;		/* Task attribute */
+	u8 prio;		/* SAM command priority field */
+	u8 crn;
+	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
+} __packed;
+
+/* SCSI command request, followed by protection information */
+struct virtio_scsi_cmd_req_pi {
+	u8 lun[8];		/* Logical Unit Number */
+	__virtio64 tag;		/* Command identifier */
+	u8 task_attr;		/* Task attribute */
+	u8 prio;		/* SAM command priority field */
+	u8 crn;
+	__virtio32 pi_bytesout;	/* DataOUT PI Number of bytes */
+	__virtio32 pi_bytesin;		/* DataIN PI Number of bytes */
+	u8 cdb[VIRTIO_SCSI_CDB_SIZE];
+} __packed;
+
+/* Response, followed by sense data and data-in */
+struct virtio_scsi_cmd_resp {
+	__virtio32 sense_len;		/* Sense data length */
+	__virtio32 resid;		/* Residual bytes in data buffer */
+	__virtio16 status_qualifier;	/* Status qualifier */
+	u8 status;		/* Command completion status */
+	u8 response;		/* Response values */
+	u8 sense[VIRTIO_SCSI_SENSE_SIZE];
+} __packed;
+
+/* Task Management Request */
+struct virtio_scsi_ctrl_tmf_req {
+	__virtio32 type;
+	__virtio32 subtype;
+	u8 lun[8];
+	__virtio64 tag;
+} __packed;
+
+struct virtio_scsi_ctrl_tmf_resp {
+	u8 response;
+} __packed;
+
+/* Asynchronous notification query/subscription */
+struct virtio_scsi_ctrl_an_req {
+	__virtio32 type;
+	u8 lun[8];
+	__virtio32 event_requested;
+} __packed;
+
+struct virtio_scsi_ctrl_an_resp {
+	__virtio32 event_actual;
+	u8 response;
+} __packed;
+
+struct virtio_scsi_event {
+	__virtio32 event;
+	u8 lun[8];
+	__virtio32 reason;
+} __packed;
+
+struct virtio_scsi_config {
+	u32 num_queues;
+	u32 seg_max;
+	u32 max_sectors;
+	u32 cmd_per_lun;
+	u32 event_info_size;
+	u32 sense_size;
+	u32 cdb_size;
+	u16 max_channel;
+	u16 max_target;
+	u32 max_lun;
+} __packed;
+
+/* Feature Bits */
+#define VIRTIO_SCSI_F_INOUT                    0
+#define VIRTIO_SCSI_F_HOTPLUG                  1
+#define VIRTIO_SCSI_F_CHANGE                   2
+#define VIRTIO_SCSI_F_T10_PI                   3
+
+/* Response codes */
+#define VIRTIO_SCSI_S_OK                       0
+#define VIRTIO_SCSI_S_OVERRUN                  1
+#define VIRTIO_SCSI_S_ABORTED                  2
+#define VIRTIO_SCSI_S_BAD_TARGET               3
+#define VIRTIO_SCSI_S_RESET                    4
+#define VIRTIO_SCSI_S_BUSY                     5
+#define VIRTIO_SCSI_S_TRANSPORT_FAILURE        6
+#define VIRTIO_SCSI_S_TARGET_FAILURE           7
+#define VIRTIO_SCSI_S_NEXUS_FAILURE            8
+#define VIRTIO_SCSI_S_FAILURE                  9
+#define VIRTIO_SCSI_S_FUNCTION_SUCCEEDED       10
+#define VIRTIO_SCSI_S_FUNCTION_REJECTED        11
+#define VIRTIO_SCSI_S_INCORRECT_LUN            12
+
+/* Controlq type codes.  */
+#define VIRTIO_SCSI_T_TMF                      0
+#define VIRTIO_SCSI_T_AN_QUERY                 1
+#define VIRTIO_SCSI_T_AN_SUBSCRIBE             2
+
+/* Valid TMF subtypes.  */
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK           0
+#define VIRTIO_SCSI_T_TMF_ABORT_TASK_SET       1
+#define VIRTIO_SCSI_T_TMF_CLEAR_ACA            2
+#define VIRTIO_SCSI_T_TMF_CLEAR_TASK_SET       3
+#define VIRTIO_SCSI_T_TMF_I_T_NEXUS_RESET      4
+#define VIRTIO_SCSI_T_TMF_LOGICAL_UNIT_RESET   5
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK           6
+#define VIRTIO_SCSI_T_TMF_QUERY_TASK_SET       7
+
+/* Events.  */
+#define VIRTIO_SCSI_T_EVENTS_MISSED            0x80000000
+#define VIRTIO_SCSI_T_NO_EVENT                 0
+#define VIRTIO_SCSI_T_TRANSPORT_RESET          1
+#define VIRTIO_SCSI_T_ASYNC_NOTIFY             2
+#define VIRTIO_SCSI_T_PARAM_CHANGE             3
+
+/* Reasons of transport reset event */
+#define VIRTIO_SCSI_EVT_RESET_HARD             0
+#define VIRTIO_SCSI_EVT_RESET_RESCAN           1
+#define VIRTIO_SCSI_EVT_RESET_REMOVED          2
+
+#define VIRTIO_SCSI_S_SIMPLE                   0
+#define VIRTIO_SCSI_S_ORDERED                  1
+#define VIRTIO_SCSI_S_HEAD                     2
+#define VIRTIO_SCSI_S_ACA                      3
+
+
+#endif /* _LINUX_VIRTIO_SCSI_H */
-- 
cgit v1.2.3


From df1b57fe59bc5b1486fe142e6ee76c493c52c543 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Mon, 1 Dec 2014 15:50:29 +0200
Subject: virtio_balloon: add legacy_only flag

We have no plans to support virtio 1.0 in balloon driver.  Add an
explicit flag to mark it legacy only.

This will be used by follow-up patches.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/virtio/virtio_balloon.c | 1 +
 include/linux/virtio.h          | 2 ++
 2 files changed, 3 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio_balloon.c b/drivers/virtio/virtio_balloon.c
index c9703d4d6f67..4497def48dae 100644
--- a/drivers/virtio/virtio_balloon.c
+++ b/drivers/virtio/virtio_balloon.c
@@ -518,6 +518,7 @@ static unsigned int features[] = {
 };
 
 static struct virtio_driver virtio_balloon_driver = {
+	.legacy_only = true,
 	.feature_table = features,
 	.feature_table_size = ARRAY_SIZE(features),
 	.driver.name =	KBUILD_MODNAME,
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index f70411eb9d27..2bbf62697671 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -132,6 +132,7 @@ int virtio_device_restore(struct virtio_device *dev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
+ * @legacy_only: driver does not support virtio 1.0.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @remove: the function to call when a device is removed.
  * @config_changed: optional function to call when the device configuration
@@ -144,6 +145,7 @@ struct virtio_driver {
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
+	bool legacy_only;
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
-- 
cgit v1.2.3


From b6098c30423cb1f0f5a4d0a5495e2670e274e544 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 4 Dec 2014 18:49:58 +0200
Subject: virtio: add API to detect legacy devices

transports need to be able to detect legacy-only
devices (ATM balloon only) to use legacy path
to drive them.

Add a core API to do just that.
The implementation just blacklists balloon:
not too pretty, but let's not over-engineer.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 drivers/virtio/virtio.c | 7 +++++++
 include/linux/virtio.h  | 2 ++
 2 files changed, 9 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index fa6b75db5f1f..224f85442f3f 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -3,6 +3,7 @@
 #include <linux/virtio_config.h>
 #include <linux/module.h>
 #include <linux/idr.h>
+#include <uapi/linux/virtio_ids.h>
 
 /* Unique numbering for virtio devices. */
 static DEFINE_IDA(virtio_index_ida);
@@ -267,6 +268,12 @@ static struct bus_type virtio_bus = {
 	.remove = virtio_dev_remove,
 };
 
+bool virtio_device_is_legacy_only(struct virtio_device_id id)
+{
+	return id.device == VIRTIO_ID_BALLOON;
+}
+EXPORT_SYMBOL_GPL(virtio_device_is_legacy_only);
+
 int register_virtio_driver(struct virtio_driver *driver)
 {
 	/* Catch this early. */
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index 2bbf62697671..d666bcb57401 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -108,6 +108,8 @@ struct virtio_device {
 	void *priv;
 };
 
+bool virtio_device_is_legacy_only(struct virtio_device_id id);
+
 static inline struct virtio_device *dev_to_virtio(struct device *_dev)
 {
 	return container_of(_dev, struct virtio_device, dev);
-- 
cgit v1.2.3


From 5c609a5ef05d98e26778824ba84581fe5e400db6 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 4 Dec 2014 20:20:27 +0200
Subject: virtio: allow finalize_features to fail

This will make it easy for transports to validate features and return
failure.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/lguest/lguest_device.c         |  4 +++-
 drivers/misc/mic/card/mic_virtio.c     |  4 +++-
 drivers/remoteproc/remoteproc_virtio.c |  4 +++-
 drivers/s390/kvm/kvm_virtio.c          |  4 +++-
 drivers/s390/kvm/virtio_ccw.c          |  6 ++++--
 drivers/virtio/virtio.c                | 21 ++++++++++++++-------
 drivers/virtio/virtio_mmio.c           |  4 +++-
 drivers/virtio/virtio_pci.c            |  4 +++-
 include/linux/virtio_config.h          |  3 ++-
 9 files changed, 38 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/lguest/lguest_device.c b/drivers/lguest/lguest_device.c
index 9b77b6623ff8..89088d6538fd 100644
--- a/drivers/lguest/lguest_device.c
+++ b/drivers/lguest/lguest_device.c
@@ -126,7 +126,7 @@ static void status_notify(struct virtio_device *vdev)
  * sorted out, this routine is called so we can tell the Host which features we
  * understand and accept.
  */
-static void lg_finalize_features(struct virtio_device *vdev)
+static int lg_finalize_features(struct virtio_device *vdev)
 {
 	unsigned int i, bits;
 	struct lguest_device_desc *desc = to_lgdev(vdev)->desc;
@@ -153,6 +153,8 @@ static void lg_finalize_features(struct virtio_device *vdev)
 
 	/* Tell Host we've finished with this device's feature negotiation */
 	status_notify(vdev);
+
+	return 0;
 }
 
 /* Once they've found a field, getting a copy of it is easy. */
diff --git a/drivers/misc/mic/card/mic_virtio.c b/drivers/misc/mic/card/mic_virtio.c
index d027d299602f..e486a0c26267 100644
--- a/drivers/misc/mic/card/mic_virtio.c
+++ b/drivers/misc/mic/card/mic_virtio.c
@@ -84,7 +84,7 @@ static u64 mic_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-static void mic_finalize_features(struct virtio_device *vdev)
+static int mic_finalize_features(struct virtio_device *vdev)
 {
 	unsigned int i, bits;
 	struct mic_device_desc __iomem *desc = to_micvdev(vdev)->desc;
@@ -107,6 +107,8 @@ static void mic_finalize_features(struct virtio_device *vdev)
 			iowrite8(ioread8(&out_features[i / 8]) | (1 << (i % 8)),
 				 &out_features[i / 8]);
 	}
+
+	return 0;
 }
 
 /*
diff --git a/drivers/remoteproc/remoteproc_virtio.c b/drivers/remoteproc/remoteproc_virtio.c
index 627737ee7632..e1a10232a943 100644
--- a/drivers/remoteproc/remoteproc_virtio.c
+++ b/drivers/remoteproc/remoteproc_virtio.c
@@ -217,7 +217,7 @@ static u64 rproc_virtio_get_features(struct virtio_device *vdev)
 	return rsc->dfeatures;
 }
 
-static void rproc_virtio_finalize_features(struct virtio_device *vdev)
+static int rproc_virtio_finalize_features(struct virtio_device *vdev)
 {
 	struct rproc_vdev *rvdev = vdev_to_rvdev(vdev);
 	struct fw_rsc_vdev *rsc;
@@ -235,6 +235,8 @@ static void rproc_virtio_finalize_features(struct virtio_device *vdev)
 	 * to the remote processor once it is powered on.
 	 */
 	rsc->gfeatures = vdev->features;
+
+	return 0;
 }
 
 static void rproc_virtio_get(struct virtio_device *vdev, unsigned offset,
diff --git a/drivers/s390/kvm/kvm_virtio.c b/drivers/s390/kvm/kvm_virtio.c
index f5575ccdbb65..dd65c8b4c7fe 100644
--- a/drivers/s390/kvm/kvm_virtio.c
+++ b/drivers/s390/kvm/kvm_virtio.c
@@ -93,7 +93,7 @@ static u64 kvm_get_features(struct virtio_device *vdev)
 	return features;
 }
 
-static void kvm_finalize_features(struct virtio_device *vdev)
+static int kvm_finalize_features(struct virtio_device *vdev)
 {
 	unsigned int i, bits;
 	struct kvm_device_desc *desc = to_kvmdev(vdev)->desc;
@@ -112,6 +112,8 @@ static void kvm_finalize_features(struct virtio_device *vdev)
 		if (__virtio_test_bit(vdev, i))
 			out_features[i / 8] |= (1 << (i % 8));
 	}
+
+	return 0;
 }
 
 /*
diff --git a/drivers/s390/kvm/virtio_ccw.c b/drivers/s390/kvm/virtio_ccw.c
index c792b5fe0bc9..789275fb577f 100644
--- a/drivers/s390/kvm/virtio_ccw.c
+++ b/drivers/s390/kvm/virtio_ccw.c
@@ -752,7 +752,7 @@ out_free:
 	return rc;
 }
 
-static void virtio_ccw_finalize_features(struct virtio_device *vdev)
+static int virtio_ccw_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_ccw_device *vcdev = to_vc_device(vdev);
 	struct virtio_feature_desc *features;
@@ -760,7 +760,7 @@ static void virtio_ccw_finalize_features(struct virtio_device *vdev)
 
 	ccw = kzalloc(sizeof(*ccw), GFP_DMA | GFP_KERNEL);
 	if (!ccw)
-		return;
+		return 0;
 
 	features = kzalloc(sizeof(*features), GFP_DMA | GFP_KERNEL);
 	if (!features)
@@ -793,6 +793,8 @@ static void virtio_ccw_finalize_features(struct virtio_device *vdev)
 out_free:
 	kfree(features);
 	kfree(ccw);
+
+	return 0;
 }
 
 static void virtio_ccw_get_config(struct virtio_device *vdev,
diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index 224f85442f3f..e1673a511d17 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -212,7 +212,9 @@ static int virtio_dev_probe(struct device *_d)
 		if (device_features & (1ULL << i))
 			__virtio_set_bit(dev, i);
 
-	dev->config->finalize_features(dev);
+	err = dev->config->finalize_features(dev);
+	if (err)
+		goto err;
 
 	if (virtio_has_feature(dev, VIRTIO_F_VERSION_1)) {
 		add_status(dev, VIRTIO_CONFIG_S_FEATURES_OK);
@@ -354,6 +356,7 @@ EXPORT_SYMBOL_GPL(virtio_device_freeze);
 int virtio_device_restore(struct virtio_device *dev)
 {
 	struct virtio_driver *drv = drv_to_virtio(dev->dev.driver);
+	int ret;
 
 	/* We always start by resetting the device, in case a previous
 	 * driver messed it up. */
@@ -373,14 +376,14 @@ int virtio_device_restore(struct virtio_device *dev)
 	/* We have a driver! */
 	add_status(dev, VIRTIO_CONFIG_S_DRIVER);
 
-	dev->config->finalize_features(dev);
+	ret = dev->config->finalize_features(dev);
+	if (ret)
+		goto err;
 
 	if (drv->restore) {
-		int ret = drv->restore(dev);
-		if (ret) {
-			add_status(dev, VIRTIO_CONFIG_S_FAILED);
-			return ret;
-		}
+		ret = drv->restore(dev);
+		if (ret)
+			goto err;
 	}
 
 	/* Finally, tell the device we're all set */
@@ -389,6 +392,10 @@ int virtio_device_restore(struct virtio_device *dev)
 	virtio_config_enable(dev);
 
 	return 0;
+
+err:
+	add_status(dev, VIRTIO_CONFIG_S_FAILED);
+	return ret;
 }
 EXPORT_SYMBOL_GPL(virtio_device_restore);
 #endif
diff --git a/drivers/virtio/virtio_mmio.c b/drivers/virtio/virtio_mmio.c
index aec1daee9ada..5219210d31ce 100644
--- a/drivers/virtio/virtio_mmio.c
+++ b/drivers/virtio/virtio_mmio.c
@@ -152,7 +152,7 @@ static u64 vm_get_features(struct virtio_device *vdev)
 	return readl(vm_dev->base + VIRTIO_MMIO_HOST_FEATURES);
 }
 
-static void vm_finalize_features(struct virtio_device *vdev)
+static int vm_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_mmio_device *vm_dev = to_virtio_mmio_device(vdev);
 
@@ -164,6 +164,8 @@ static void vm_finalize_features(struct virtio_device *vdev)
 
 	writel(0, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES_SEL);
 	writel(vdev->features, vm_dev->base + VIRTIO_MMIO_GUEST_FEATURES);
+
+	return 0;
 }
 
 static void vm_get(struct virtio_device *vdev, unsigned offset,
diff --git a/drivers/virtio/virtio_pci.c b/drivers/virtio/virtio_pci.c
index dd6df979862b..9be59d9f2f19 100644
--- a/drivers/virtio/virtio_pci.c
+++ b/drivers/virtio/virtio_pci.c
@@ -112,7 +112,7 @@ static u64 vp_get_features(struct virtio_device *vdev)
 }
 
 /* virtio config->finalize_features() implementation */
-static void vp_finalize_features(struct virtio_device *vdev)
+static int vp_finalize_features(struct virtio_device *vdev)
 {
 	struct virtio_pci_device *vp_dev = to_vp_device(vdev);
 
@@ -124,6 +124,8 @@ static void vp_finalize_features(struct virtio_device *vdev)
 
 	/* We only support 32 feature bits. */
 	iowrite32(vdev->features, vp_dev->ioaddr + VIRTIO_PCI_GUEST_FEATURES);
+
+	return 0;
 }
 
 /* virtio config->get() implementation */
diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 1fa5faa26440..7979f850e7ac 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -47,6 +47,7 @@
  *	vdev: the virtio_device
  *	This gives the final feature bits for the device: it can change
  *	the dev->feature bits if it wants.
+ *	Returns 0 on success or error status
  * @bus_name: return the bus name associated with the device
  *	vdev: the virtio_device
  *      This returns a pointer to the bus name a la pci_name from which
@@ -68,7 +69,7 @@ struct virtio_config_ops {
 			const char *names[]);
 	void (*del_vqs)(struct virtio_device *);
 	u64 (*get_features)(struct virtio_device *vdev);
-	void (*finalize_features)(struct virtio_device *vdev);
+	int (*finalize_features)(struct virtio_device *vdev);
 	const char *(*bus_name)(struct virtio_device *vdev);
 	int (*set_vq_affinity)(struct virtqueue *vq, int cpu);
 };
-- 
cgit v1.2.3


From 4d152e2c9a6a3e3556ce5da7782a9e2836edbe0f Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 19 Nov 2014 07:51:14 -0500
Subject: sunrpc: add a generic rq_flags field to svc_rqst and move rq_secure
 to it

In a later patch, we're going to need some atomic bit flags. Since that
field will need to be an unsigned long, we mitigate that space
consumption by migrating some other bitflags to the new field. Start
with the rq_secure flag.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfscache.c            |  4 ++--
 fs/nfsd/nfsfh.c               |  2 +-
 include/linux/sunrpc/svc.h    |  4 ++--
 include/trace/events/sunrpc.h | 17 +++++++++++++----
 net/sunrpc/svc_xprt.c         |  5 ++++-
 5 files changed, 22 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfscache.c b/fs/nfsd/nfscache.c
index 122f69185ef5..83a9694ec485 100644
--- a/fs/nfsd/nfscache.c
+++ b/fs/nfsd/nfscache.c
@@ -490,7 +490,7 @@ found_entry:
 	/* From the hall of fame of impractical attacks:
 	 * Is this a user who tries to snoop on the cache? */
 	rtn = RC_DOIT;
-	if (!rqstp->rq_secure && rp->c_secure)
+	if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && rp->c_secure)
 		goto out;
 
 	/* Compose RPC reply header */
@@ -579,7 +579,7 @@ nfsd_cache_update(struct svc_rqst *rqstp, int cachetype, __be32 *statp)
 	spin_lock(&b->cache_lock);
 	drc_mem_usage += bufsize;
 	lru_put_end(b, rp);
-	rp->c_secure = rqstp->rq_secure;
+	rp->c_secure = test_bit(RQ_SECURE, &rqstp->rq_flags);
 	rp->c_type = cachetype;
 	rp->c_state = RC_DONE;
 	spin_unlock(&b->cache_lock);
diff --git a/fs/nfsd/nfsfh.c b/fs/nfsd/nfsfh.c
index 88026fc6a981..965b478d50fc 100644
--- a/fs/nfsd/nfsfh.c
+++ b/fs/nfsd/nfsfh.c
@@ -86,7 +86,7 @@ static __be32 nfsd_setuser_and_check_port(struct svc_rqst *rqstp,
 	int flags = nfsexp_flags(rqstp, exp);
 
 	/* Check if the request originated from a secure port. */
-	if (!rqstp->rq_secure && !(flags & NFSEXP_INSECURE_PORT)) {
+	if (!test_bit(RQ_SECURE, &rqstp->rq_flags) && !(flags & NFSEXP_INSECURE_PORT)) {
 		RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 		dprintk("nfsd: request from insecure port %s!\n",
 		        svc_print_addr(rqstp, buf, sizeof(buf)));
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 21678464883a..b60eb7c3f3f7 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -253,8 +253,8 @@ struct svc_rqst {
 	u32			rq_vers;	/* program version */
 	u32			rq_proc;	/* procedure number */
 	u32			rq_prot;	/* IP protocol */
-	unsigned short
-				rq_secure  : 1;	/* secure port */
+#define	RQ_SECURE	(0)			/* secure port */
+	unsigned long		rq_flags;	/* flags field */
 	unsigned short		rq_local   : 1;	/* local request */
 
 	void *			rq_argp;	/* decoded arguments */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 171ca4ff6d99..5eb5f79d9794 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -412,6 +412,10 @@ TRACE_EVENT(xs_tcp_data_recv,
 			__entry->copied, __entry->reclen, __entry->offset)
 );
 
+#define show_rqstp_flags(flags)				\
+	__print_flags(flags, "|",			\
+		{ (1UL << RQ_SECURE),	"RQ_SECURE"})
+
 TRACE_EVENT(svc_recv,
 	TP_PROTO(struct svc_rqst *rqst, int status),
 
@@ -421,16 +425,19 @@ TRACE_EVENT(svc_recv,
 		__field(struct sockaddr *, addr)
 		__field(__be32, xid)
 		__field(int, status)
+		__field(unsigned long, flags)
 	),
 
 	TP_fast_assign(
 		__entry->addr = (struct sockaddr *)&rqst->rq_addr;
 		__entry->xid = status > 0 ? rqst->rq_xid : 0;
 		__entry->status = status;
+		__entry->flags = rqst->rq_flags;
 	),
 
-	TP_printk("addr=%pIScp xid=0x%x status=%d", __entry->addr,
-			be32_to_cpu(__entry->xid), __entry->status)
+	TP_printk("addr=%pIScp xid=0x%x status=%d flags=%s", __entry->addr,
+			be32_to_cpu(__entry->xid), __entry->status,
+			show_rqstp_flags(__entry->flags))
 );
 
 DECLARE_EVENT_CLASS(svc_rqst_status,
@@ -444,6 +451,7 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
 		__field(__be32, xid)
 		__field(int, dropme)
 		__field(int, status)
+		__field(unsigned long, flags)
 	),
 
 	TP_fast_assign(
@@ -451,11 +459,12 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
 		__entry->xid = rqst->rq_xid;
 		__entry->dropme = (int)rqst->rq_dropme;
 		__entry->status = status;
+		__entry->flags = rqst->rq_flags;
 	),
 
-	TP_printk("addr=%pIScp rq_xid=0x%x dropme=%d status=%d",
+	TP_printk("addr=%pIScp rq_xid=0x%x dropme=%d status=%d flags=%s",
 		__entry->addr, be32_to_cpu(__entry->xid), __entry->dropme,
-		__entry->status)
+		__entry->status, show_rqstp_flags(__entry->flags))
 );
 
 DEFINE_EVENT(svc_rqst_status, svc_process,
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 5c71ccb9659d..eaa9263c2d28 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -797,7 +797,10 @@ int svc_recv(struct svc_rqst *rqstp, long timeout)
 
 	clear_bit(XPT_OLD, &xprt->xpt_flags);
 
-	rqstp->rq_secure = xprt->xpt_ops->xpo_secure_port(rqstp);
+	if (xprt->xpt_ops->xpo_secure_port(rqstp))
+		set_bit(RQ_SECURE, &rqstp->rq_flags);
+	else
+		clear_bit(RQ_SECURE, &rqstp->rq_flags);
 	rqstp->rq_chandle.defer = svc_defer;
 	rqstp->rq_xid = svc_getu32(&rqstp->rq_arg.head[0]);
 
-- 
cgit v1.2.3


From 7501cc2bcf9a71cc1f19e38775c234815ee44578 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 19 Nov 2014 07:51:15 -0500
Subject: sunrpc: move rq_local field to rq_flags

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/vfs.c                 | 4 ++--
 include/linux/sunrpc/svc.h    | 2 +-
 include/trace/events/sunrpc.h | 3 ++-
 net/sunrpc/svcsock.c          | 5 ++++-
 4 files changed, 9 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index f1999619d516..60c25850fb16 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -962,7 +962,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
 	loff_t			end = LLONG_MAX;
 	unsigned int		pflags = current->flags;
 
-	if (rqstp->rq_local)
+	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
 		/*
 		 * We want less throttling in balance_dirty_pages()
 		 * and shrink_inactive_list() so that nfs to
@@ -1006,7 +1006,7 @@ out_nfserr:
 		err = 0;
 	else
 		err = nfserrno(host_err);
-	if (rqstp->rq_local)
+	if (test_bit(RQ_LOCAL, &rqstp->rq_flags))
 		tsk_restore_flags(current, pflags, PF_LESS_THROTTLE);
 	return err;
 }
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index b60eb7c3f3f7..a91df9047f32 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -254,8 +254,8 @@ struct svc_rqst {
 	u32			rq_proc;	/* procedure number */
 	u32			rq_prot;	/* IP protocol */
 #define	RQ_SECURE	(0)			/* secure port */
+#define	RQ_LOCAL	(1)			/* local request */
 	unsigned long		rq_flags;	/* flags field */
-	unsigned short		rq_local   : 1;	/* local request */
 
 	void *			rq_argp;	/* decoded arguments */
 	void *			rq_resp;	/* xdr'd results */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 5eb5f79d9794..98259f163cd8 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -414,7 +414,8 @@ TRACE_EVENT(xs_tcp_data_recv,
 
 #define show_rqstp_flags(flags)				\
 	__print_flags(flags, "|",			\
-		{ (1UL << RQ_SECURE),	"RQ_SECURE"})
+		{ (1UL << RQ_SECURE),	"RQ_SECURE"},	\
+		{ (1UL << RQ_LOCAL),	"RQ_LOCAL"})
 
 TRACE_EVENT(svc_recv,
 	TP_PROTO(struct svc_rqst *rqst, int status),
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index f9c052d508f0..cc331b6cf573 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1145,7 +1145,10 @@ static int svc_tcp_recvfrom(struct svc_rqst *rqstp)
 
 	rqstp->rq_xprt_ctxt   = NULL;
 	rqstp->rq_prot	      = IPPROTO_TCP;
-	rqstp->rq_local	      = !!test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags);
+	if (test_bit(XPT_LOCAL, &svsk->sk_xprt.xpt_flags))
+		set_bit(RQ_LOCAL, &rqstp->rq_flags);
+	else
+		clear_bit(RQ_LOCAL, &rqstp->rq_flags);
 
 	p = (__be32 *)rqstp->rq_arg.head[0].iov_base;
 	calldir = p[1];
-- 
cgit v1.2.3


From 30660e04b0d4bbbd15fd21098681f45a9f4080b9 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 19 Nov 2014 07:51:16 -0500
Subject: sunrpc: move rq_usedeferral flag to rq_flags

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c            | 4 ++--
 include/linux/sunrpc/svc.h    | 2 +-
 include/trace/events/sunrpc.h | 9 +++++----
 net/sunrpc/svc.c              | 2 +-
 net/sunrpc/svc_xprt.c         | 2 +-
 5 files changed, 10 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 74fb15eefc31..6f98393ad0d9 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -1370,7 +1370,7 @@ nfsd4_proc_compound(struct svc_rqst *rqstp,
 	 * Don't use the deferral mechanism for NFSv4; compounds make it
 	 * too hard to avoid non-idempotency problems.
 	 */
-	rqstp->rq_usedeferral = false;
+	clear_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
 
 	/*
 	 * According to RFC3010, this takes precedence over all other errors.
@@ -1486,7 +1486,7 @@ encode_op:
 	BUG_ON(cstate->replay_owner);
 out:
 	/* Reset deferral mechanism for RPC deferrals */
-	rqstp->rq_usedeferral = true;
+	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
 	dprintk("nfsv4 compound returned %d\n", ntohl(status));
 	return status;
 }
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index a91df9047f32..6a3cf4c76dce 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -236,7 +236,6 @@ struct svc_rqst {
 	struct svc_cred		rq_cred;	/* auth info */
 	void *			rq_xprt_ctxt;	/* transport specific context ptr */
 	struct svc_deferred_req*rq_deferred;	/* deferred request we are replaying */
-	bool			rq_usedeferral;	/* use deferral */
 
 	size_t			rq_xprt_hlen;	/* xprt header len */
 	struct xdr_buf		rq_arg;
@@ -255,6 +254,7 @@ struct svc_rqst {
 	u32			rq_prot;	/* IP protocol */
 #define	RQ_SECURE	(0)			/* secure port */
 #define	RQ_LOCAL	(1)			/* local request */
+#define	RQ_USEDEFERRAL	(2)			/* use deferral */
 	unsigned long		rq_flags;	/* flags field */
 
 	void *			rq_argp;	/* decoded arguments */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 98259f163cd8..6d1facdebc92 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -412,10 +412,11 @@ TRACE_EVENT(xs_tcp_data_recv,
 			__entry->copied, __entry->reclen, __entry->offset)
 );
 
-#define show_rqstp_flags(flags)				\
-	__print_flags(flags, "|",			\
-		{ (1UL << RQ_SECURE),	"RQ_SECURE"},	\
-		{ (1UL << RQ_LOCAL),	"RQ_LOCAL"})
+#define show_rqstp_flags(flags)						\
+	__print_flags(flags, "|",					\
+		{ (1UL << RQ_SECURE),		"RQ_SECURE"},		\
+		{ (1UL << RQ_LOCAL),		"RQ_LOCAL"},		\
+		{ (1UL << RQ_USEDEFERRAL),	"RQ_USEDEFERRAL"})
 
 TRACE_EVENT(svc_recv,
 	TP_PROTO(struct svc_rqst *rqst, int status),
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index 163df468d6d5..f6a8f2f7a253 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1090,7 +1090,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	/* Will be turned off only in gss privacy case: */
 	rqstp->rq_splice_ok = true;
 	/* Will be turned off only when NFSv4 Sessions are used */
-	rqstp->rq_usedeferral = true;
+	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
 	rqstp->rq_dropme = false;
 
 	/* Setup reply header */
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index eaa9263c2d28..a40f3755a33d 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1081,7 +1081,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
 	struct svc_rqst *rqstp = container_of(req, struct svc_rqst, rq_chandle);
 	struct svc_deferred_req *dr;
 
-	if (rqstp->rq_arg.page_len || !rqstp->rq_usedeferral)
+	if (rqstp->rq_arg.page_len || !test_bit(RQ_USEDEFERRAL, &rqstp->rq_flags))
 		return NULL; /* if more than a page, give up FIXME */
 	if (rqstp->rq_deferred) {
 		dr = rqstp->rq_deferred;
-- 
cgit v1.2.3


From 78b65eb3fda95c6d131c4bbb0536e21f0bd7a7d4 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 19 Nov 2014 07:51:17 -0500
Subject: sunrpc: move rq_dropme flag into rq_flags

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfssvc.c              | 2 +-
 include/linux/sunrpc/svc.h    | 2 +-
 include/trace/events/sunrpc.h | 8 ++++----
 net/sunrpc/svc.c              | 4 ++--
 net/sunrpc/svc_xprt.c         | 2 +-
 5 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c
index 752d56bbe0ba..314f5c8f8f1a 100644
--- a/fs/nfsd/nfssvc.c
+++ b/fs/nfsd/nfssvc.c
@@ -692,7 +692,7 @@ nfsd_dispatch(struct svc_rqst *rqstp, __be32 *statp)
 	/* Now call the procedure handler, and encode NFS status. */
 	nfserr = proc->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
 	nfserr = map_new_errors(rqstp->rq_vers, nfserr);
-	if (nfserr == nfserr_dropit || rqstp->rq_dropme) {
+	if (nfserr == nfserr_dropit || test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 		dprintk("nfsd: Dropping request; may be revisited later\n");
 		nfsd_cache_update(rqstp, RC_NOCACHE, NULL);
 		return 0;
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 6a3cf4c76dce..d4ea3e5246b0 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -255,6 +255,7 @@ struct svc_rqst {
 #define	RQ_SECURE	(0)			/* secure port */
 #define	RQ_LOCAL	(1)			/* local request */
 #define	RQ_USEDEFERRAL	(2)			/* use deferral */
+#define	RQ_DROPME	(3)			/* drop current reply */
 	unsigned long		rq_flags;	/* flags field */
 
 	void *			rq_argp;	/* decoded arguments */
@@ -271,7 +272,6 @@ struct svc_rqst {
 	struct cache_req	rq_chandle;	/* handle passed to caches for 
 						 * request delaying 
 						 */
-	bool			rq_dropme;
 	/* Catering to nfsd */
 	struct auth_domain *	rq_client;	/* RPC peer info */
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 6d1facdebc92..355671f19a9f 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -416,7 +416,8 @@ TRACE_EVENT(xs_tcp_data_recv,
 	__print_flags(flags, "|",					\
 		{ (1UL << RQ_SECURE),		"RQ_SECURE"},		\
 		{ (1UL << RQ_LOCAL),		"RQ_LOCAL"},		\
-		{ (1UL << RQ_USEDEFERRAL),	"RQ_USEDEFERRAL"})
+		{ (1UL << RQ_USEDEFERRAL),	"RQ_USEDEFERRAL"},	\
+		{ (1UL << RQ_DROPME),		"RQ_DROPME"})
 
 TRACE_EVENT(svc_recv,
 	TP_PROTO(struct svc_rqst *rqst, int status),
@@ -459,13 +460,12 @@ DECLARE_EVENT_CLASS(svc_rqst_status,
 	TP_fast_assign(
 		__entry->addr = (struct sockaddr *)&rqst->rq_addr;
 		__entry->xid = rqst->rq_xid;
-		__entry->dropme = (int)rqst->rq_dropme;
 		__entry->status = status;
 		__entry->flags = rqst->rq_flags;
 	),
 
-	TP_printk("addr=%pIScp rq_xid=0x%x dropme=%d status=%d flags=%s",
-		__entry->addr, be32_to_cpu(__entry->xid), __entry->dropme,
+	TP_printk("addr=%pIScp rq_xid=0x%x status=%d flags=%s",
+		__entry->addr, be32_to_cpu(__entry->xid),
 		__entry->status, show_rqstp_flags(__entry->flags))
 );
 
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f6a8f2f7a253..d8a9d603b786 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1091,7 +1091,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 	rqstp->rq_splice_ok = true;
 	/* Will be turned off only when NFSv4 Sessions are used */
 	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
-	rqstp->rq_dropme = false;
+	clear_bit(RQ_DROPME, &rqstp->rq_flags);
 
 	/* Setup reply header */
 	rqstp->rq_xprt->xpt_ops->xpo_prep_reply_hdr(rqstp);
@@ -1191,7 +1191,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		*statp = procp->pc_func(rqstp, rqstp->rq_argp, rqstp->rq_resp);
 
 		/* Encode reply */
-		if (rqstp->rq_dropme) {
+		if (test_bit(RQ_DROPME, &rqstp->rq_flags)) {
 			if (procp->pc_release)
 				procp->pc_release(rqstp, NULL, rqstp->rq_resp);
 			goto dropit;
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index a40f3755a33d..143c4c8ea2f1 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -1110,7 +1110,7 @@ static struct cache_deferred_req *svc_defer(struct cache_req *req)
 	}
 	svc_xprt_get(rqstp->rq_xprt);
 	dr->xprt = rqstp->rq_xprt;
-	rqstp->rq_dropme = true;
+	set_bit(RQ_DROPME, &rqstp->rq_flags);
 
 	dr->handle.revisit = svc_revisit;
 	return &dr->handle;
-- 
cgit v1.2.3


From 779fb0f3af3089daa2e88cf8ef0ef0c5d2fecb40 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 19 Nov 2014 07:51:18 -0500
Subject: sunrpc: move rq_splice_ok flag into rq_flags

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/nfsd/nfs4proc.c                | 2 +-
 fs/nfsd/nfs4xdr.c                 | 8 ++++----
 fs/nfsd/vfs.c                     | 2 +-
 include/linux/sunrpc/svc.h        | 6 +++---
 include/trace/events/sunrpc.h     | 3 ++-
 net/sunrpc/auth_gss/svcauth_gss.c | 2 +-
 net/sunrpc/svc.c                  | 2 +-
 7 files changed, 13 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index 6f98393ad0d9..ac71d13c69ef 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -773,7 +773,7 @@ nfsd4_read(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
 	 * the client wants us to do more in this compound:
 	 */
 	if (!nfsd4_last_compound_op(rqstp))
-		rqstp->rq_splice_ok = false;
+		clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 
 	/* check stateid */
 	if ((status = nfs4_preprocess_stateid_op(SVC_NET(rqstp),
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 0622d4f4a9a2..8880ec8cfd11 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -1731,7 +1731,7 @@ nfsd4_decode_compound(struct nfsd4_compoundargs *argp)
 	argp->rqstp->rq_cachetype = cachethis ? RC_REPLBUFF : RC_NOCACHE;
 
 	if (readcount > 1 || max_reply > PAGE_SIZE - auth_slack)
-		argp->rqstp->rq_splice_ok = false;
+		clear_bit(RQ_SPLICE_OK, &argp->rqstp->rq_flags);
 
 	DECODE_TAIL;
 }
@@ -3253,10 +3253,10 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 
 	p = xdr_reserve_space(xdr, 8); /* eof flag and byte count */
 	if (!p) {
-		WARN_ON_ONCE(resp->rqstp->rq_splice_ok);
+		WARN_ON_ONCE(test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags));
 		return nfserr_resource;
 	}
-	if (resp->xdr.buf->page_len && resp->rqstp->rq_splice_ok) {
+	if (resp->xdr.buf->page_len && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags)) {
 		WARN_ON_ONCE(1);
 		return nfserr_resource;
 	}
@@ -3273,7 +3273,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
 			goto err_truncate;
 	}
 
-	if (file->f_op->splice_read && resp->rqstp->rq_splice_ok)
+	if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &resp->rqstp->rq_flags))
 		err = nfsd4_encode_splice_read(resp, read, file, maxcount);
 	else
 		err = nfsd4_encode_readv(resp, read, file, maxcount);
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 60c25850fb16..cb00e48bc2b0 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -902,7 +902,7 @@ static __be32
 nfsd_vfs_read(struct svc_rqst *rqstp, struct file *file,
 	      loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
 {
-	if (file->f_op->splice_read && rqstp->rq_splice_ok)
+	if (file->f_op->splice_read && test_bit(RQ_SPLICE_OK, &rqstp->rq_flags))
 		return nfsd_splice_read(rqstp, file, offset, count);
 	else
 		return nfsd_readv(file, offset, vec, vlen, count);
diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index d4ea3e5246b0..2714287fc4f6 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -256,6 +256,9 @@ struct svc_rqst {
 #define	RQ_LOCAL	(1)			/* local request */
 #define	RQ_USEDEFERRAL	(2)			/* use deferral */
 #define	RQ_DROPME	(3)			/* drop current reply */
+#define	RQ_SPLICE_OK	(4)			/* turned off in gss privacy
+						 * to prevent encrypting page
+						 * cache pages */
 	unsigned long		rq_flags;	/* flags field */
 
 	void *			rq_argp;	/* decoded arguments */
@@ -277,9 +280,6 @@ struct svc_rqst {
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
 	int			rq_cachetype;
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
-	bool			rq_splice_ok;   /* turned off in gss privacy
-						 * to prevent encrypting page
-						 * cache pages */
 	struct task_struct	*rq_task;	/* service thread */
 };
 
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 355671f19a9f..5848fc235869 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -417,7 +417,8 @@ TRACE_EVENT(xs_tcp_data_recv,
 		{ (1UL << RQ_SECURE),		"RQ_SECURE"},		\
 		{ (1UL << RQ_LOCAL),		"RQ_LOCAL"},		\
 		{ (1UL << RQ_USEDEFERRAL),	"RQ_USEDEFERRAL"},	\
-		{ (1UL << RQ_DROPME),		"RQ_DROPME"})
+		{ (1UL << RQ_DROPME),		"RQ_DROPME"},		\
+		{ (1UL << RQ_SPLICE_OK),	"RQ_SPLICE_OK"})
 
 TRACE_EVENT(svc_recv,
 	TP_PROTO(struct svc_rqst *rqst, int status),
diff --git a/net/sunrpc/auth_gss/svcauth_gss.c b/net/sunrpc/auth_gss/svcauth_gss.c
index de856ddf5fed..224a82f24d3c 100644
--- a/net/sunrpc/auth_gss/svcauth_gss.c
+++ b/net/sunrpc/auth_gss/svcauth_gss.c
@@ -886,7 +886,7 @@ unwrap_priv_data(struct svc_rqst *rqstp, struct xdr_buf *buf, u32 seq, struct gs
 	u32 priv_len, maj_stat;
 	int pad, saved_len, remaining_len, offset;
 
-	rqstp->rq_splice_ok = false;
+	clear_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 
 	priv_len = svc_getnl(&buf->head[0]);
 	if (rqstp->rq_deferred) {
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index d8a9d603b786..2c1c49ebd84d 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1088,7 +1088,7 @@ svc_process_common(struct svc_rqst *rqstp, struct kvec *argv, struct kvec *resv)
 		goto err_short_len;
 
 	/* Will be turned off only in gss privacy case: */
-	rqstp->rq_splice_ok = true;
+	set_bit(RQ_SPLICE_OK, &rqstp->rq_flags);
 	/* Will be turned off only when NFSv4 Sessions are used */
 	set_bit(RQ_USEDEFERRAL, &rqstp->rq_flags);
 	clear_bit(RQ_DROPME, &rqstp->rq_flags);
-- 
cgit v1.2.3


From 62978b3c619422d0ea17dbd39efdb2328295bcfb Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 19 Nov 2014 07:51:19 -0500
Subject: sunrpc: move rq_cachetype field to better optimize space

There are a couple of holes in the svc_rqst field on x86_64. Move the
rq_cachetype to a different location to eliminate both of them.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 2714287fc4f6..8054a30c8a95 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -252,6 +252,7 @@ struct svc_rqst {
 	u32			rq_vers;	/* program version */
 	u32			rq_proc;	/* procedure number */
 	u32			rq_prot;	/* IP protocol */
+	int			rq_cachetype;	/* catering to nfsd */
 #define	RQ_SECURE	(0)			/* secure port */
 #define	RQ_LOCAL	(1)			/* local request */
 #define	RQ_USEDEFERRAL	(2)			/* use deferral */
@@ -278,7 +279,6 @@ struct svc_rqst {
 	/* Catering to nfsd */
 	struct auth_domain *	rq_client;	/* RPC peer info */
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
-	int			rq_cachetype;
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
 	struct task_struct	*rq_task;	/* service thread */
 };
-- 
cgit v1.2.3


From 4d5db3f536ae3886ac86877742e6f8ce69a5de06 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Wed, 19 Nov 2014 07:51:20 -0500
Subject: sunrpc: convert sp_task_pending flag to use atomic bitops

In a later patch, we'll want to be able to handle this flag without
holding the sp_lock. Change this field to an unsigned long flags
field, and declare a new flag in it that can be managed with atomic
bitops.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h | 4 +++-
 net/sunrpc/svc_xprt.c      | 7 +++----
 2 files changed, 6 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 8054a30c8a95..5f0ab39bf7c3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -50,7 +50,9 @@ struct svc_pool {
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
 	struct svc_pool_stats	sp_stats;	/* statistics on pool operation */
-	int			sp_task_pending;/* has pending task */
+#define	SP_TASK_PENDING		(0)		/* still work to do even if no
+						 * xprt is queued. */
+	unsigned long		sp_flags;
 } ____cacheline_aligned_in_smp;
 
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 143c4c8ea2f1..37446046f4bf 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -509,7 +509,7 @@ void svc_wake_up(struct svc_serv *serv)
 			 */
 			wake_up_process(rqstp->rq_task);
 		} else
-			pool->sp_task_pending = 1;
+			set_bit(SP_TASK_PENDING, &pool->sp_flags);
 		spin_unlock_bh(&pool->sp_lock);
 	}
 }
@@ -644,10 +644,9 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 		 * long for cache updates.
 		 */
 		rqstp->rq_chandle.thread_wait = 1*HZ;
-		pool->sp_task_pending = 0;
+		clear_bit(SP_TASK_PENDING, &pool->sp_flags);
 	} else {
-		if (pool->sp_task_pending) {
-			pool->sp_task_pending = 0;
+		if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags)) {
 			xprt = ERR_PTR(-EAGAIN);
 			goto out;
 		}
-- 
cgit v1.2.3


From 812443865c5fc255363d4a684a62c086af1addca Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 21 Nov 2014 14:19:28 -0500
Subject: sunrpc: add a rcu_head to svc_rqst and use kfree_rcu to free it

...also make the manipulation of sp_all_threads list use RCU-friendly
functions.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Tested-by: Chris Worley <chris.worley@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h    |  2 ++
 include/trace/events/sunrpc.h |  3 ++-
 net/sunrpc/svc.c              | 10 ++++++----
 3 files changed, 10 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 5f0ab39bf7c3..7f80a99c59e4 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -223,6 +223,7 @@ static inline void svc_putu32(struct kvec *iov, __be32 val)
 struct svc_rqst {
 	struct list_head	rq_list;	/* idle list */
 	struct list_head	rq_all;		/* all threads list */
+	struct rcu_head		rq_rcu_head;	/* for RCU deferred kfree */
 	struct svc_xprt *	rq_xprt;	/* transport ptr */
 
 	struct sockaddr_storage	rq_addr;	/* peer address */
@@ -262,6 +263,7 @@ struct svc_rqst {
 #define	RQ_SPLICE_OK	(4)			/* turned off in gss privacy
 						 * to prevent encrypting page
 						 * cache pages */
+#define	RQ_VICTIM	(5)			/* about to be shut down */
 	unsigned long		rq_flags;	/* flags field */
 
 	void *			rq_argp;	/* decoded arguments */
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 5848fc235869..08a5fed50f34 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -418,7 +418,8 @@ TRACE_EVENT(xs_tcp_data_recv,
 		{ (1UL << RQ_LOCAL),		"RQ_LOCAL"},		\
 		{ (1UL << RQ_USEDEFERRAL),	"RQ_USEDEFERRAL"},	\
 		{ (1UL << RQ_DROPME),		"RQ_DROPME"},		\
-		{ (1UL << RQ_SPLICE_OK),	"RQ_SPLICE_OK"})
+		{ (1UL << RQ_SPLICE_OK),	"RQ_SPLICE_OK"},	\
+		{ (1UL << RQ_VICTIM),		"RQ_VICTIM"})
 
 TRACE_EVENT(svc_recv,
 	TP_PROTO(struct svc_rqst *rqst, int status),
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index a06a891012e5..b90d1bca4349 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -616,7 +616,7 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 	serv->sv_nrthreads++;
 	spin_lock_bh(&pool->sp_lock);
 	pool->sp_nrthreads++;
-	list_add(&rqstp->rq_all, &pool->sp_all_threads);
+	list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
 	spin_unlock_bh(&pool->sp_lock);
 	rqstp->rq_server = serv;
 	rqstp->rq_pool = pool;
@@ -684,7 +684,8 @@ found_pool:
 		 * so we don't try to kill it again.
 		 */
 		rqstp = list_entry(pool->sp_all_threads.next, struct svc_rqst, rq_all);
-		list_del_init(&rqstp->rq_all);
+		set_bit(RQ_VICTIM, &rqstp->rq_flags);
+		list_del_rcu(&rqstp->rq_all);
 		task = rqstp->rq_task;
 	}
 	spin_unlock_bh(&pool->sp_lock);
@@ -782,10 +783,11 @@ svc_exit_thread(struct svc_rqst *rqstp)
 
 	spin_lock_bh(&pool->sp_lock);
 	pool->sp_nrthreads--;
-	list_del(&rqstp->rq_all);
+	if (!test_and_set_bit(RQ_VICTIM, &rqstp->rq_flags))
+		list_del_rcu(&rqstp->rq_all);
 	spin_unlock_bh(&pool->sp_lock);
 
-	kfree(rqstp);
+	kfree_rcu(rqstp, rq_rcu_head);
 
 	/* Release the server */
 	if (serv)
-- 
cgit v1.2.3


From 403c7b44441d60aba7f8a134c31279ffa60ea769 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 21 Nov 2014 14:19:29 -0500
Subject: sunrpc: fix potential races in pool_stats collection

In a later patch, we'll be removing some spinlocking around the socket
and thread queueing code in order to fix some contention problems. At
that point, the stats counters will no longer be protected by the
sp_lock.

Change the counters to atomic_long_t fields, except for the
"sockets_queued" counter which will still be manipulated under a
spinlock.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Tested-by: Chris Worley <chris.worley@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h |  6 +++---
 net/sunrpc/svc_xprt.c      | 12 ++++++------
 2 files changed, 9 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 7f80a99c59e4..513957eba0a5 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -26,10 +26,10 @@ typedef int		(*svc_thread_fn)(void *);
 
 /* statistics for svc_pool structures */
 struct svc_pool_stats {
-	unsigned long	packets;
+	atomic_long_t	packets;
 	unsigned long	sockets_queued;
-	unsigned long	threads_woken;
-	unsigned long	threads_timedout;
+	atomic_long_t	threads_woken;
+	atomic_long_t	threads_timedout;
 };
 
 /*
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index b2676e597fc4..579ff2249562 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -362,7 +362,7 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
 	spin_lock_bh(&pool->sp_lock);
 
-	pool->sp_stats.packets++;
+	atomic_long_inc(&pool->sp_stats.packets);
 
 	if (!list_empty(&pool->sp_threads)) {
 		rqstp = list_entry(pool->sp_threads.next,
@@ -383,7 +383,7 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 		svc_xprt_get(xprt);
 		wake_up_process(rqstp->rq_task);
 		rqstp->rq_xprt = xprt;
-		pool->sp_stats.threads_woken++;
+		atomic_long_inc(&pool->sp_stats.threads_woken);
 	} else {
 		dprintk("svc: transport %p put into queue\n", xprt);
 		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
@@ -669,7 +669,7 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 
 		spin_lock_bh(&pool->sp_lock);
 		if (!time_left)
-			pool->sp_stats.threads_timedout++;
+			atomic_long_inc(&pool->sp_stats.threads_timedout);
 
 		xprt = rqstp->rq_xprt;
 		if (!xprt) {
@@ -1306,10 +1306,10 @@ static int svc_pool_stats_show(struct seq_file *m, void *p)
 
 	seq_printf(m, "%u %lu %lu %lu %lu\n",
 		pool->sp_id,
-		pool->sp_stats.packets,
+		(unsigned long)atomic_long_read(&pool->sp_stats.packets),
 		pool->sp_stats.sockets_queued,
-		pool->sp_stats.threads_woken,
-		pool->sp_stats.threads_timedout);
+		(unsigned long)atomic_long_read(&pool->sp_stats.threads_woken),
+		(unsigned long)atomic_long_read(&pool->sp_stats.threads_timedout));
 
 	return 0;
 }
-- 
cgit v1.2.3


From b1691bc03d4eddb959234409167bef9be9e62d74 Mon Sep 17 00:00:00 2001
From: Jeff Layton <jlayton@primarydata.com>
Date: Fri, 21 Nov 2014 14:19:30 -0500
Subject: sunrpc: convert to lockless lookup of queued server threads

Testing has shown that the pool->sp_lock can be a bottleneck on a busy
server. Every time data is received on a socket, the server must take
that lock in order to dequeue a thread from the sp_threads list.

Address this problem by eliminating the sp_threads list (which contains
threads that are currently idle) and replacing it with a RQ_BUSY flag in
svc_rqst. This allows us to walk the sp_all_threads list under the
rcu_read_lock and find a suitable thread for the xprt by doing a
test_and_set_bit.

Note that we do still have a potential atomicity problem however with
this approach.  We don't want svc_xprt_do_enqueue to set the
rqst->rq_xprt pointer unless a test_and_set_bit of RQ_BUSY returned
zero (which indicates that the thread was idle). But, by the time we
check that, the bit could be flipped by a waking thread.

To address this, we acquire a new per-rqst spinlock (rq_lock) and take
that before doing the test_and_set_bit. If that returns false, then we
can set rq_xprt and drop the spinlock. Then, when the thread wakes up,
it must set the bit under the same spinlock and can trust that if it was
already set then the rq_xprt is also properly set.

With this scheme, the case where we have an idle thread no longer needs
to take the highly contended pool->sp_lock at all, and that removes the
bottleneck.

That still leaves one issue: What of the case where we walk the whole
sp_all_threads list and don't find an idle thread? Because the search is
lockess, it's possible for the queueing to race with a thread that is
going to sleep. To address that, we queue the xprt and then search again.

If we find an idle thread at that point, we can't attach the xprt to it
directly since that might race with a different thread waking up and
finding it.  All we can do is wake the idle thread back up and let it
attempt to find the now-queued xprt.

Signed-off-by: Jeff Layton <jlayton@primarydata.com>
Tested-by: Chris Worley <chris.worley@primarydata.com>
Signed-off-by: J. Bruce Fields <bfields@redhat.com>
---
 include/linux/sunrpc/svc.h    |   4 +-
 include/trace/events/sunrpc.h |   3 +-
 net/sunrpc/svc.c              |   7 +-
 net/sunrpc/svc_xprt.c         | 221 ++++++++++++++++++++++++------------------
 4 files changed, 132 insertions(+), 103 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h
index 513957eba0a5..6f22cfeef5e3 100644
--- a/include/linux/sunrpc/svc.h
+++ b/include/linux/sunrpc/svc.h
@@ -45,7 +45,6 @@ struct svc_pool_stats {
 struct svc_pool {
 	unsigned int		sp_id;	    	/* pool id; also node id on NUMA */
 	spinlock_t		sp_lock;	/* protects all fields */
-	struct list_head	sp_threads;	/* idle server threads */
 	struct list_head	sp_sockets;	/* pending sockets */
 	unsigned int		sp_nrthreads;	/* # of threads in pool */
 	struct list_head	sp_all_threads;	/* all server threads */
@@ -221,7 +220,6 @@ static inline void svc_putu32(struct kvec *iov, __be32 val)
  * processed.
  */
 struct svc_rqst {
-	struct list_head	rq_list;	/* idle list */
 	struct list_head	rq_all;		/* all threads list */
 	struct rcu_head		rq_rcu_head;	/* for RCU deferred kfree */
 	struct svc_xprt *	rq_xprt;	/* transport ptr */
@@ -264,6 +262,7 @@ struct svc_rqst {
 						 * to prevent encrypting page
 						 * cache pages */
 #define	RQ_VICTIM	(5)			/* about to be shut down */
+#define	RQ_BUSY		(6)			/* request is busy */
 	unsigned long		rq_flags;	/* flags field */
 
 	void *			rq_argp;	/* decoded arguments */
@@ -285,6 +284,7 @@ struct svc_rqst {
 	struct auth_domain *	rq_gssclient;	/* "gss/"-style peer info */
 	struct svc_cacherep *	rq_cacherep;	/* cache info */
 	struct task_struct	*rq_task;	/* service thread */
+	spinlock_t		rq_lock;	/* per-request lock */
 };
 
 #define SVC_NET(svc_rqst)	(svc_rqst->rq_xprt->xpt_net)
diff --git a/include/trace/events/sunrpc.h b/include/trace/events/sunrpc.h
index 08a5fed50f34..ee4438a63a48 100644
--- a/include/trace/events/sunrpc.h
+++ b/include/trace/events/sunrpc.h
@@ -419,7 +419,8 @@ TRACE_EVENT(xs_tcp_data_recv,
 		{ (1UL << RQ_USEDEFERRAL),	"RQ_USEDEFERRAL"},	\
 		{ (1UL << RQ_DROPME),		"RQ_DROPME"},		\
 		{ (1UL << RQ_SPLICE_OK),	"RQ_SPLICE_OK"},	\
-		{ (1UL << RQ_VICTIM),		"RQ_VICTIM"})
+		{ (1UL << RQ_VICTIM),		"RQ_VICTIM"},		\
+		{ (1UL << RQ_BUSY),		"RQ_BUSY"})
 
 TRACE_EVENT(svc_recv,
 	TP_PROTO(struct svc_rqst *rqst, int status),
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index b90d1bca4349..91eaef1844c8 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -476,7 +476,6 @@ __svc_create(struct svc_program *prog, unsigned int bufsize, int npools,
 				i, serv->sv_name);
 
 		pool->sp_id = i;
-		INIT_LIST_HEAD(&pool->sp_threads);
 		INIT_LIST_HEAD(&pool->sp_sockets);
 		INIT_LIST_HEAD(&pool->sp_all_threads);
 		spin_lock_init(&pool->sp_lock);
@@ -614,12 +613,14 @@ svc_prepare_thread(struct svc_serv *serv, struct svc_pool *pool, int node)
 		goto out_enomem;
 
 	serv->sv_nrthreads++;
+	__set_bit(RQ_BUSY, &rqstp->rq_flags);
+	spin_lock_init(&rqstp->rq_lock);
+	rqstp->rq_server = serv;
+	rqstp->rq_pool = pool;
 	spin_lock_bh(&pool->sp_lock);
 	pool->sp_nrthreads++;
 	list_add_rcu(&rqstp->rq_all, &pool->sp_all_threads);
 	spin_unlock_bh(&pool->sp_lock);
-	rqstp->rq_server = serv;
-	rqstp->rq_pool = pool;
 
 	rqstp->rq_argp = kmalloc_node(serv->sv_xdrsize, GFP_KERNEL, node);
 	if (!rqstp->rq_argp)
diff --git a/net/sunrpc/svc_xprt.c b/net/sunrpc/svc_xprt.c
index 579ff2249562..ed90d955f733 100644
--- a/net/sunrpc/svc_xprt.c
+++ b/net/sunrpc/svc_xprt.c
@@ -310,25 +310,6 @@ char *svc_print_addr(struct svc_rqst *rqstp, char *buf, size_t len)
 }
 EXPORT_SYMBOL_GPL(svc_print_addr);
 
-/*
- * Queue up an idle server thread.  Must have pool->sp_lock held.
- * Note: this is really a stack rather than a queue, so that we only
- * use as many different threads as we need, and the rest don't pollute
- * the cache.
- */
-static void svc_thread_enqueue(struct svc_pool *pool, struct svc_rqst *rqstp)
-{
-	list_add(&rqstp->rq_list, &pool->sp_threads);
-}
-
-/*
- * Dequeue an nfsd thread.  Must have pool->sp_lock held.
- */
-static void svc_thread_dequeue(struct svc_pool *pool, struct svc_rqst *rqstp)
-{
-	list_del(&rqstp->rq_list);
-}
-
 static bool svc_xprt_has_something_to_do(struct svc_xprt *xprt)
 {
 	if (xprt->xpt_flags & ((1<<XPT_CONN)|(1<<XPT_CLOSE)))
@@ -343,6 +324,7 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 	struct svc_pool *pool;
 	struct svc_rqst	*rqstp;
 	int cpu;
+	bool queued = false;
 
 	if (!svc_xprt_has_something_to_do(xprt))
 		return;
@@ -360,37 +342,60 @@ static void svc_xprt_do_enqueue(struct svc_xprt *xprt)
 
 	cpu = get_cpu();
 	pool = svc_pool_for_cpu(xprt->xpt_server, cpu);
-	spin_lock_bh(&pool->sp_lock);
 
 	atomic_long_inc(&pool->sp_stats.packets);
 
-	if (!list_empty(&pool->sp_threads)) {
-		rqstp = list_entry(pool->sp_threads.next,
-				   struct svc_rqst,
-				   rq_list);
-		dprintk("svc: transport %p served by daemon %p\n",
-			xprt, rqstp);
-		svc_thread_dequeue(pool, rqstp);
-		if (rqstp->rq_xprt)
-			printk(KERN_ERR
-				"svc_xprt_enqueue: server %p, rq_xprt=%p!\n",
-				rqstp, rqstp->rq_xprt);
-		/* Note the order of the following 3 lines:
-		 * We want to assign xprt to rqstp->rq_xprt only _after_
-		 * we've woken up the process, so that we don't race with
-		 * the lockless check in svc_get_next_xprt().
+redo_search:
+	/* find a thread for this xprt */
+	rcu_read_lock();
+	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+		/* Do a lockless check first */
+		if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+			continue;
+
+		/*
+		 * Once the xprt has been queued, it can only be dequeued by
+		 * the task that intends to service it. All we can do at that
+		 * point is to try to wake this thread back up so that it can
+		 * do so.
 		 */
-		svc_xprt_get(xprt);
-		wake_up_process(rqstp->rq_task);
-		rqstp->rq_xprt = xprt;
+		if (!queued) {
+			spin_lock_bh(&rqstp->rq_lock);
+			if (test_and_set_bit(RQ_BUSY, &rqstp->rq_flags)) {
+				/* already busy, move on... */
+				spin_unlock_bh(&rqstp->rq_lock);
+				continue;
+			}
+
+			/* this one will do */
+			rqstp->rq_xprt = xprt;
+			svc_xprt_get(xprt);
+			spin_unlock_bh(&rqstp->rq_lock);
+		}
+		rcu_read_unlock();
+
 		atomic_long_inc(&pool->sp_stats.threads_woken);
-	} else {
+		wake_up_process(rqstp->rq_task);
+		put_cpu();
+		return;
+	}
+	rcu_read_unlock();
+
+	/*
+	 * We didn't find an idle thread to use, so we need to queue the xprt.
+	 * Do so and then search again. If we find one, we can't hook this one
+	 * up to it directly but we can wake the thread up in the hopes that it
+	 * will pick it up once it searches for a xprt to service.
+	 */
+	if (!queued) {
+		queued = true;
 		dprintk("svc: transport %p put into queue\n", xprt);
+		spin_lock_bh(&pool->sp_lock);
 		list_add_tail(&xprt->xpt_ready, &pool->sp_sockets);
 		pool->sp_stats.sockets_queued++;
+		spin_unlock_bh(&pool->sp_lock);
+		goto redo_search;
 	}
-
-	spin_unlock_bh(&pool->sp_lock);
 	put_cpu();
 }
 
@@ -408,21 +413,26 @@ void svc_xprt_enqueue(struct svc_xprt *xprt)
 EXPORT_SYMBOL_GPL(svc_xprt_enqueue);
 
 /*
- * Dequeue the first transport.  Must be called with the pool->sp_lock held.
+ * Dequeue the first transport, if there is one.
  */
 static struct svc_xprt *svc_xprt_dequeue(struct svc_pool *pool)
 {
-	struct svc_xprt	*xprt;
+	struct svc_xprt	*xprt = NULL;
 
 	if (list_empty(&pool->sp_sockets))
 		return NULL;
 
-	xprt = list_entry(pool->sp_sockets.next,
-			  struct svc_xprt, xpt_ready);
-	list_del_init(&xprt->xpt_ready);
+	spin_lock_bh(&pool->sp_lock);
+	if (likely(!list_empty(&pool->sp_sockets))) {
+		xprt = list_first_entry(&pool->sp_sockets,
+					struct svc_xprt, xpt_ready);
+		list_del_init(&xprt->xpt_ready);
+		svc_xprt_get(xprt);
 
-	dprintk("svc: transport %p dequeued, inuse=%d\n",
-		xprt, atomic_read(&xprt->xpt_ref.refcount));
+		dprintk("svc: transport %p dequeued, inuse=%d\n",
+			xprt, atomic_read(&xprt->xpt_ref.refcount));
+	}
+	spin_unlock_bh(&pool->sp_lock);
 
 	return xprt;
 }
@@ -497,16 +507,21 @@ void svc_wake_up(struct svc_serv *serv)
 
 	pool = &serv->sv_pools[0];
 
-	spin_lock_bh(&pool->sp_lock);
-	if (!list_empty(&pool->sp_threads)) {
-		rqstp = list_entry(pool->sp_threads.next,
-				   struct svc_rqst,
-				   rq_list);
+	rcu_read_lock();
+	list_for_each_entry_rcu(rqstp, &pool->sp_all_threads, rq_all) {
+		/* skip any that aren't queued */
+		if (test_bit(RQ_BUSY, &rqstp->rq_flags))
+			continue;
+		rcu_read_unlock();
 		dprintk("svc: daemon %p woken up.\n", rqstp);
 		wake_up_process(rqstp->rq_task);
-	} else
-		set_bit(SP_TASK_PENDING, &pool->sp_flags);
-	spin_unlock_bh(&pool->sp_lock);
+		return;
+	}
+	rcu_read_unlock();
+
+	/* No free entries available */
+	set_bit(SP_TASK_PENDING, &pool->sp_flags);
+	smp_wmb();
 }
 EXPORT_SYMBOL_GPL(svc_wake_up);
 
@@ -617,22 +632,47 @@ static int svc_alloc_arg(struct svc_rqst *rqstp)
 	return 0;
 }
 
+static bool
+rqst_should_sleep(struct svc_rqst *rqstp)
+{
+	struct svc_pool		*pool = rqstp->rq_pool;
+
+	/* did someone call svc_wake_up? */
+	if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags))
+		return false;
+
+	/* was a socket queued? */
+	if (!list_empty(&pool->sp_sockets))
+		return false;
+
+	/* are we shutting down? */
+	if (signalled() || kthread_should_stop())
+		return false;
+
+	/* are we freezing? */
+	if (freezing(current))
+		return false;
+
+	return true;
+}
+
 static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 {
 	struct svc_xprt *xprt;
 	struct svc_pool		*pool = rqstp->rq_pool;
 	long			time_left = 0;
 
+	/* rq_xprt should be clear on entry */
+	WARN_ON_ONCE(rqstp->rq_xprt);
+
 	/* Normally we will wait up to 5 seconds for any required
 	 * cache information to be provided.
 	 */
 	rqstp->rq_chandle.thread_wait = 5*HZ;
 
-	spin_lock_bh(&pool->sp_lock);
 	xprt = svc_xprt_dequeue(pool);
 	if (xprt) {
 		rqstp->rq_xprt = xprt;
-		svc_xprt_get(xprt);
 
 		/* As there is a shortage of threads and this request
 		 * had to be queued, don't allow the thread to wait so
@@ -640,51 +680,38 @@ static struct svc_xprt *svc_get_next_xprt(struct svc_rqst *rqstp, long timeout)
 		 */
 		rqstp->rq_chandle.thread_wait = 1*HZ;
 		clear_bit(SP_TASK_PENDING, &pool->sp_flags);
-	} else {
-		if (test_and_clear_bit(SP_TASK_PENDING, &pool->sp_flags)) {
-			xprt = ERR_PTR(-EAGAIN);
-			goto out;
-		}
-		/*
-		 * We have to be able to interrupt this wait
-		 * to bring down the daemons ...
-		 */
-		set_current_state(TASK_INTERRUPTIBLE);
+		return xprt;
+	}
 
-		/* No data pending. Go to sleep */
-		svc_thread_enqueue(pool, rqstp);
-		spin_unlock_bh(&pool->sp_lock);
+	/*
+	 * We have to be able to interrupt this wait
+	 * to bring down the daemons ...
+	 */
+	set_current_state(TASK_INTERRUPTIBLE);
+	clear_bit(RQ_BUSY, &rqstp->rq_flags);
+	smp_mb();
+
+	if (likely(rqst_should_sleep(rqstp)))
+		time_left = schedule_timeout(timeout);
+	else
+		__set_current_state(TASK_RUNNING);
 
-		if (!(signalled() || kthread_should_stop())) {
-			time_left = schedule_timeout(timeout);
-			__set_current_state(TASK_RUNNING);
+	try_to_freeze();
 
-			try_to_freeze();
+	spin_lock_bh(&rqstp->rq_lock);
+	set_bit(RQ_BUSY, &rqstp->rq_flags);
+	spin_unlock_bh(&rqstp->rq_lock);
 
-			xprt = rqstp->rq_xprt;
-			if (xprt != NULL)
-				return xprt;
-		} else
-			__set_current_state(TASK_RUNNING);
+	xprt = rqstp->rq_xprt;
+	if (xprt != NULL)
+		return xprt;
 
-		spin_lock_bh(&pool->sp_lock);
-		if (!time_left)
-			atomic_long_inc(&pool->sp_stats.threads_timedout);
+	if (!time_left)
+		atomic_long_inc(&pool->sp_stats.threads_timedout);
 
-		xprt = rqstp->rq_xprt;
-		if (!xprt) {
-			svc_thread_dequeue(pool, rqstp);
-			spin_unlock_bh(&pool->sp_lock);
-			dprintk("svc: server %p, no data yet\n", rqstp);
-			if (signalled() || kthread_should_stop())
-				return ERR_PTR(-EINTR);
-			else
-				return ERR_PTR(-EAGAIN);
-		}
-	}
-out:
-	spin_unlock_bh(&pool->sp_lock);
-	return xprt;
+	if (signalled() || kthread_should_stop())
+		return ERR_PTR(-EINTR);
+	return ERR_PTR(-EAGAIN);
 }
 
 static void svc_add_new_temp_xprt(struct svc_serv *serv, struct svc_xprt *newxpt)
-- 
cgit v1.2.3


From 395eea6ccf2b253f81b4718ffbcae67d36fe2e69 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Wed, 3 Dec 2014 13:46:24 -0800
Subject: rtnetlink: delay RTM_DELLINK notification until after ndo_uninit()

The commit 56bfa7ee7c ("unregister_netdevice : move RTM_DELLINK to
until after ndo_uninit") tried to do this ealier but while doing so
it created a problem. Unfortunately the delayed rtmsg_ifinfo() also
delayed call to fill_info(). So this translated into asking driver
to remove private state and then query it's private state. This
could have catastropic consequences.

This change breaks the rtmsg_ifinfo() into two parts - one takes the
precise snapshot of the device by called fill_info() before calling
the ndo_uninit() and the second part sends the notification using
collected snapshot.

It was brought to notice when last link is deleted from an ipvlan device
when it has free-ed the port and the subsequent .fill_info() call is
trying to get the info from the port.

kernel: [  255.139429] ------------[ cut here ]------------
kernel: [  255.139439] WARNING: CPU: 12 PID: 11173 at net/core/rtnetlink.c:2238 rtmsg_ifinfo+0x100/0x110()
kernel: [  255.139493] Modules linked in: ipvlan bonding w1_therm ds2482 wire cdc_acm ehci_pci ehci_hcd i2c_dev i2c_i801 i2c_core msr cpuid bnx2x ptp pps_core mdio libcrc32c
kernel: [  255.139513] CPU: 12 PID: 11173 Comm: ip Not tainted 3.18.0-smp-DEV #167
kernel: [  255.139514] Hardware name: Intel RML,PCH/Ibis_QC_18, BIOS 1.0.10 05/15/2012
kernel: [  255.139515]  0000000000000009 ffff880851b6b828 ffffffff815d87f4 00000000000000e0
kernel: [  255.139516]  0000000000000000 ffff880851b6b868 ffffffff8109c29c 0000000000000000
kernel: [  255.139518]  00000000ffffffa6 00000000000000d0 ffffffff81aaf580 0000000000000011
kernel: [  255.139520] Call Trace:
kernel: [  255.139527]  [<ffffffff815d87f4>] dump_stack+0x46/0x58
kernel: [  255.139531]  [<ffffffff8109c29c>] warn_slowpath_common+0x8c/0xc0
kernel: [  255.139540]  [<ffffffff8109c2ea>] warn_slowpath_null+0x1a/0x20
kernel: [  255.139544]  [<ffffffff8150d570>] rtmsg_ifinfo+0x100/0x110
kernel: [  255.139547]  [<ffffffff814f78b5>] rollback_registered_many+0x1d5/0x2d0
kernel: [  255.139549]  [<ffffffff814f79cf>] unregister_netdevice_many+0x1f/0xb0
kernel: [  255.139551]  [<ffffffff8150acab>] rtnl_dellink+0xbb/0x110
kernel: [  255.139553]  [<ffffffff8150da90>] rtnetlink_rcv_msg+0xa0/0x240
kernel: [  255.139557]  [<ffffffff81329283>] ? rhashtable_lookup_compare+0x43/0x80
kernel: [  255.139558]  [<ffffffff8150d9f0>] ? __rtnl_unlock+0x20/0x20
kernel: [  255.139562]  [<ffffffff8152cb11>] netlink_rcv_skb+0xb1/0xc0
kernel: [  255.139563]  [<ffffffff8150a495>] rtnetlink_rcv+0x25/0x40
kernel: [  255.139565]  [<ffffffff8152c398>] netlink_unicast+0x178/0x230
kernel: [  255.139567]  [<ffffffff8152c75f>] netlink_sendmsg+0x30f/0x420
kernel: [  255.139571]  [<ffffffff814e0b0c>] sock_sendmsg+0x9c/0xd0
kernel: [  255.139575]  [<ffffffff811d1d7f>] ? rw_copy_check_uvector+0x6f/0x130
kernel: [  255.139577]  [<ffffffff814e11c9>] ? copy_msghdr_from_user+0x139/0x1b0
kernel: [  255.139578]  [<ffffffff814e1774>] ___sys_sendmsg+0x304/0x310
kernel: [  255.139581]  [<ffffffff81198723>] ? handle_mm_fault+0xca3/0xde0
kernel: [  255.139585]  [<ffffffff811ebc4c>] ? destroy_inode+0x3c/0x70
kernel: [  255.139589]  [<ffffffff8108e6ec>] ? __do_page_fault+0x20c/0x500
kernel: [  255.139597]  [<ffffffff811e8336>] ? dput+0xb6/0x190
kernel: [  255.139606]  [<ffffffff811f05f6>] ? mntput+0x26/0x40
kernel: [  255.139611]  [<ffffffff811d2b94>] ? __fput+0x174/0x1e0
kernel: [  255.139613]  [<ffffffff814e2129>] __sys_sendmsg+0x49/0x90
kernel: [  255.139615]  [<ffffffff814e2182>] SyS_sendmsg+0x12/0x20
kernel: [  255.139617]  [<ffffffff815df092>] system_call_fastpath+0x12/0x17
kernel: [  255.139619] ---[ end trace 5e6703e87d984f6b ]---

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Reported-by: Toshiaki Makita <makita.toshiaki@lab.ntt.co.jp>
Cc: Eric Dumazet <edumazet@google.com>
Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
Cc: David S. Miller <davem@davemloft.net>
Acked-by: Eric Dumazet <edumazet@google.com>
Acked-by: Thomas Graf <tgraf@suug.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/rtnetlink.h |  5 +++++
 net/core/dev.c            | 12 +++++++++---
 net/core/rtnetlink.c      | 25 +++++++++++++++++++++----
 3 files changed, 35 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h
index 3b0419072f88..5db76a32fcab 100644
--- a/include/linux/rtnetlink.h
+++ b/include/linux/rtnetlink.h
@@ -17,6 +17,11 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst,
 			      u32 id, long expires, u32 error);
 
 void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags);
+struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
+				       unsigned change, gfp_t flags);
+void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev,
+		       gfp_t flags);
+
 
 /* RTNL is used as a global lock for all changes to network configuration  */
 extern void rtnl_lock(void);
diff --git a/net/core/dev.c b/net/core/dev.c
index 0814a560e5f3..dd3bf582e6f0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5925,6 +5925,8 @@ static void rollback_registered_many(struct list_head *head)
 	synchronize_net();
 
 	list_for_each_entry(dev, head, unreg_list) {
+		struct sk_buff *skb = NULL;
+
 		/* Shutdown queueing discipline. */
 		dev_shutdown(dev);
 
@@ -5934,6 +5936,11 @@ static void rollback_registered_many(struct list_head *head)
 		*/
 		call_netdevice_notifiers(NETDEV_UNREGISTER, dev);
 
+		if (!dev->rtnl_link_ops ||
+		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
+			skb = rtmsg_ifinfo_build_skb(RTM_DELLINK, dev, ~0U,
+						     GFP_KERNEL);
+
 		/*
 		 *	Flush the unicast and multicast chains
 		 */
@@ -5943,9 +5950,8 @@ static void rollback_registered_many(struct list_head *head)
 		if (dev->netdev_ops->ndo_uninit)
 			dev->netdev_ops->ndo_uninit(dev);
 
-		if (!dev->rtnl_link_ops ||
-		    dev->rtnl_link_state == RTNL_LINK_INITIALIZED)
-			rtmsg_ifinfo(RTM_DELLINK, dev, ~0U, GFP_KERNEL);
+		if (skb)
+			rtmsg_ifinfo_send(skb, dev, GFP_KERNEL);
 
 		/* Notifier chain MUST detach us all upper devices. */
 		WARN_ON(netdev_has_any_upper_dev(dev));
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 61cb7e7cc3c7..a9be2c161702 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -2245,8 +2245,8 @@ static int rtnl_dump_all(struct sk_buff *skb, struct netlink_callback *cb)
 	return skb->len;
 }
 
-void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
-		  gfp_t flags)
+struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev,
+				       unsigned int change, gfp_t flags)
 {
 	struct net *net = dev_net(dev);
 	struct sk_buff *skb;
@@ -2264,11 +2264,28 @@ void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
 		kfree_skb(skb);
 		goto errout;
 	}
-	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
-	return;
+	return skb;
 errout:
 	if (err < 0)
 		rtnl_set_sk_err(net, RTNLGRP_LINK, err);
+	return NULL;
+}
+
+void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags)
+{
+	struct net *net = dev_net(dev);
+
+	rtnl_notify(skb, net, 0, RTNLGRP_LINK, NULL, flags);
+}
+
+void rtmsg_ifinfo(int type, struct net_device *dev, unsigned int change,
+		  gfp_t flags)
+{
+	struct sk_buff *skb;
+
+	skb = rtmsg_ifinfo_build_skb(type, dev, change, flags);
+	if (skb)
+		rtmsg_ifinfo_send(skb, dev, flags);
 }
 EXPORT_SYMBOL(rtmsg_ifinfo);
 
-- 
cgit v1.2.3


From 6ffe75eb53564953e75c051e1c28676e1e56f385 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 3 Dec 2014 17:04:39 -0800
Subject: net: avoid two atomic operations in fast clones

Commit ce1a4ea3f125 ("net: avoid one atomic operation in skb_clone()")
took the wrong way to save one atomic operation.

It is actually possible to avoid two atomic operations, if we
do not change skb->fclone values, and only rely on clone_ref
content to signal if the clone is available or not.

skb_clone() can simply use the fast clone if clone_ref is 1.

kfree_skbmem() can avoid the atomic_dec_and_test() if clone_ref is 1.

Note that because we usually free the clone before the original skb,
this particular attempt is only done for the original skb to have better
branch prediction.

SKB_FCLONE_FREE is removed.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Chris Mason <clm@fb.com>
Cc: Sabrina Dubroca <sd@queasysnail.net>
Cc: Vijay Subramanian <subramanian.vijay@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  3 +--
 net/core/skbuff.c      | 35 ++++++++++++++++++-----------------
 2 files changed, 19 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index d1e2575000b9..e9281b5b7f59 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -344,7 +344,6 @@ enum {
 	SKB_FCLONE_UNAVAILABLE,	/* skb has no fclone (from head_cache) */
 	SKB_FCLONE_ORIG,	/* orig skb (from fclone_cache) */
 	SKB_FCLONE_CLONE,	/* companion fclone skb (from fclone_cache) */
-	SKB_FCLONE_FREE,	/* this companion fclone skb is available */
 };
 
 enum {
@@ -818,7 +817,7 @@ static inline bool skb_fclone_busy(const struct sock *sk,
 	fclones = container_of(skb, struct sk_buff_fclones, skb1);
 
 	return skb->fclone == SKB_FCLONE_ORIG &&
-	       fclones->skb2.fclone == SKB_FCLONE_CLONE &&
+	       atomic_read(&fclones->fclone_ref) > 1 &&
 	       fclones->skb2.sk == sk;
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 92116dfe827c..7a338fb55cc4 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -265,7 +265,7 @@ struct sk_buff *__alloc_skb(unsigned int size, gfp_t gfp_mask,
 		skb->fclone = SKB_FCLONE_ORIG;
 		atomic_set(&fclones->fclone_ref, 1);
 
-		fclones->skb2.fclone = SKB_FCLONE_FREE;
+		fclones->skb2.fclone = SKB_FCLONE_CLONE;
 		fclones->skb2.pfmemalloc = pfmemalloc;
 	}
 out:
@@ -541,26 +541,27 @@ static void kfree_skbmem(struct sk_buff *skb)
 	switch (skb->fclone) {
 	case SKB_FCLONE_UNAVAILABLE:
 		kmem_cache_free(skbuff_head_cache, skb);
-		break;
+		return;
 
 	case SKB_FCLONE_ORIG:
 		fclones = container_of(skb, struct sk_buff_fclones, skb1);
-		if (atomic_dec_and_test(&fclones->fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, fclones);
-		break;
-
-	case SKB_FCLONE_CLONE:
-		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 
-		/* The clone portion is available for
-		 * fast-cloning again.
+		/* We usually free the clone (TX completion) before original skb
+		 * This test would have no chance to be true for the clone,
+		 * while here, branch prediction will be good.
 		 */
-		skb->fclone = SKB_FCLONE_FREE;
+		if (atomic_read(&fclones->fclone_ref) == 1)
+			goto fastpath;
+		break;
 
-		if (atomic_dec_and_test(&fclones->fclone_ref))
-			kmem_cache_free(skbuff_fclone_cache, fclones);
+	default: /* SKB_FCLONE_CLONE */
+		fclones = container_of(skb, struct sk_buff_fclones, skb2);
 		break;
 	}
+	if (!atomic_dec_and_test(&fclones->fclone_ref))
+		return;
+fastpath:
+	kmem_cache_free(skbuff_fclone_cache, fclones);
 }
 
 static void skb_release_head_state(struct sk_buff *skb)
@@ -872,15 +873,15 @@ struct sk_buff *skb_clone(struct sk_buff *skb, gfp_t gfp_mask)
 	struct sk_buff_fclones *fclones = container_of(skb,
 						       struct sk_buff_fclones,
 						       skb1);
-	struct sk_buff *n = &fclones->skb2;
+	struct sk_buff *n;
 
 	if (skb_orphan_frags(skb, gfp_mask))
 		return NULL;
 
 	if (skb->fclone == SKB_FCLONE_ORIG &&
-	    n->fclone == SKB_FCLONE_FREE) {
-		n->fclone = SKB_FCLONE_CLONE;
-		atomic_inc(&fclones->fclone_ref);
+	    atomic_read(&fclones->fclone_ref) == 1) {
+		n = &fclones->skb2;
+		atomic_set(&fclones->fclone_ref, 2);
 	} else {
 		if (skb_pfmemalloc(skb))
 			gfp_mask |= __GFP_MEMALLOC;
-- 
cgit v1.2.3


From d71a6fc6b97fba1de999c6f0fab5c75544afab76 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 4 Dec 2014 19:31:45 +0200
Subject: virtio: drop legacy_only driver flag

legacy_only flag is now unused, drop it from core.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Cornelia Huck <cornelia.huck@de.ibm.com>
---
 drivers/virtio/virtio.c | 4 ----
 include/linux/virtio.h  | 2 --
 2 files changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/virtio/virtio.c b/drivers/virtio/virtio.c
index e1673a511d17..f22665868781 100644
--- a/drivers/virtio/virtio.c
+++ b/drivers/virtio/virtio.c
@@ -198,10 +198,6 @@ static int virtio_dev_probe(struct device *_d)
 		driver_features_legacy = driver_features;
 	}
 
-	/* Detect legacy-only drivers and disable VIRTIO_F_VERSION_1. */
-	if (drv->legacy_only)
-		device_features &= ~(1ULL << VIRTIO_F_VERSION_1);
-
 	if (device_features & (1ULL << VIRTIO_F_VERSION_1))
 		dev->features = driver_features & device_features;
 	else
diff --git a/include/linux/virtio.h b/include/linux/virtio.h
index d666bcb57401..d09e0938fd60 100644
--- a/include/linux/virtio.h
+++ b/include/linux/virtio.h
@@ -134,7 +134,6 @@ int virtio_device_restore(struct virtio_device *dev);
  * @feature_table_size: number of entries in the feature table array.
  * @feature_table_legacy: same as feature_table but when working in legacy mode.
  * @feature_table_size_legacy: number of entries in feature table legacy array.
- * @legacy_only: driver does not support virtio 1.0.
  * @probe: the function to call when a device is found.  Returns 0 or -errno.
  * @remove: the function to call when a device is removed.
  * @config_changed: optional function to call when the device configuration
@@ -147,7 +146,6 @@ struct virtio_driver {
 	unsigned int feature_table_size;
 	const unsigned int *feature_table_legacy;
 	unsigned int feature_table_size_legacy;
-	bool legacy_only;
 	int (*probe)(struct virtio_device *dev);
 	void (*scan)(struct virtio_device *dev);
 	void (*remove)(struct virtio_device *dev);
-- 
cgit v1.2.3


From dbfc4fb7d578d4f224faa6b60deb40804dfdc1b1 Mon Sep 17 00:00:00 2001
From: Hannes Frederic Sowa <hannes@stressinduktion.org>
Date: Sat, 6 Dec 2014 19:19:42 +0100
Subject: dst: no need to take reference on DST_NOCACHE dsts

Since commit f8864972126899 ("ipv4: fix dst race in sk_dst_get()")
DST_NOCACHE dst_entries get freed by RCU. So there is no need to get a
reference on them when we are in rcu protected sections.

Cc: Eric Dumazet <edumazet@google.com>
Cc: Julian Anastasov <ja@ssi.bg>
Signed-off-by: Hannes Frederic Sowa <hannes@stressinduktion.org>
Reviewed-by: Julian Anastasov <ja@ssi.bg>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h          | 23 ++---------------------
 net/core/dst.c                  | 24 ------------------------
 net/netfilter/ipvs/ip_vs_xmit.c |  4 ++--
 3 files changed, 4 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index e9281b5b7f59..ef64cec42804 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -717,9 +717,6 @@ static inline void skb_dst_set(struct sk_buff *skb, struct dst_entry *dst)
 	skb->_skb_refdst = (unsigned long)dst;
 }
 
-void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
-			 bool force);
-
 /**
  * skb_dst_set_noref - sets skb dst, hopefully, without taking reference
  * @skb: buffer
@@ -732,24 +729,8 @@ void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst,
  */
 static inline void skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst)
 {
-	__skb_dst_set_noref(skb, dst, false);
-}
-
-/**
- * skb_dst_set_noref_force - sets skb dst, without taking reference
- * @skb: buffer
- * @dst: dst entry
- *
- * Sets skb dst, assuming a reference was not taken on dst.
- * No reference is taken and no dst_release will be called. While for
- * cached dsts deferred reclaim is a basic feature, for entries that are
- * not cached it is caller's job to guarantee that last dst_release for
- * provided dst happens when nobody uses it, eg. after a RCU grace period.
- */
-static inline void skb_dst_set_noref_force(struct sk_buff *skb,
-					   struct dst_entry *dst)
-{
-	__skb_dst_set_noref(skb, dst, true);
+	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
+	skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
 }
 
 /**
diff --git a/net/core/dst.c b/net/core/dst.c
index a028409ee438..e956ce6d1378 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -327,30 +327,6 @@ void __dst_destroy_metrics_generic(struct dst_entry *dst, unsigned long old)
 }
 EXPORT_SYMBOL(__dst_destroy_metrics_generic);
 
-/**
- * __skb_dst_set_noref - sets skb dst, without a reference
- * @skb: buffer
- * @dst: dst entry
- * @force: if force is set, use noref version even for DST_NOCACHE entries
- *
- * Sets skb dst, assuming a reference was not taken on dst
- * skb_dst_drop() should not dst_release() this dst
- */
-void __skb_dst_set_noref(struct sk_buff *skb, struct dst_entry *dst, bool force)
-{
-	WARN_ON(!rcu_read_lock_held() && !rcu_read_lock_bh_held());
-	/* If dst not in cache, we must take a reference, because
-	 * dst_release() will destroy dst as soon as its refcount becomes zero
-	 */
-	if (unlikely((dst->flags & DST_NOCACHE) && !force)) {
-		dst_hold(dst);
-		skb_dst_set(skb, dst);
-	} else {
-		skb->_skb_refdst = (unsigned long)dst | SKB_DST_NOREF;
-	}
-}
-EXPORT_SYMBOL(__skb_dst_set_noref);
-
 /* Dirty hack. We did it in 2.2 (in __dst_free),
  * we have _very_ good reasons not to repeat
  * this mistake in 2.3, but we have no choice
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index 1f933136155a..3aedbda7658a 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -343,7 +343,7 @@ __ip_vs_get_out_rt(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
 	skb_dst_drop(skb);
 	if (noref) {
 		if (!local)
-			skb_dst_set_noref_force(skb, &rt->dst);
+			skb_dst_set_noref(skb, &rt->dst);
 		else
 			skb_dst_set(skb, dst_clone(&rt->dst));
 	} else
@@ -487,7 +487,7 @@ __ip_vs_get_out_rt_v6(int skb_af, struct sk_buff *skb, struct ip_vs_dest *dest,
 	skb_dst_drop(skb);
 	if (noref) {
 		if (!local)
-			skb_dst_set_noref_force(skb, &rt->dst);
+			skb_dst_set_noref(skb, &rt->dst);
 		else
 			skb_dst_set(skb, dst_clone(&rt->dst));
 	} else
-- 
cgit v1.2.3


From 2f33e7d59cb09d9b20d6648086b314f0ba61fb03 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Sat, 6 Dec 2014 15:53:04 -0800
Subject: netdevice: Add a function to check macvlan port

Similar to a check for macvlan device, netif_is_macvlan(), add
another function to check if a device is used as macvlan port.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 29c92ee9ed56..1f49aac258f9 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3641,6 +3641,11 @@ static inline bool netif_is_macvlan(struct net_device *dev)
 	return dev->priv_flags & IFF_MACVLAN;
 }
 
+static inline bool netif_is_macvlan_port(struct net_device *dev)
+{
+	return dev->priv_flags & IFF_MACVLAN_PORT;
+}
+
 static inline bool netif_is_bond_master(struct net_device *dev)
 {
 	return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;
-- 
cgit v1.2.3


From 5933fea7aa7237ba52d67c069c39ad5c3ab7a036 Mon Sep 17 00:00:00 2001
From: Mahesh Bandewar <maheshb@google.com>
Date: Sat, 6 Dec 2014 15:53:33 -0800
Subject: ipvlan: move the device check function into netdevice.h

Move the port check [ipvlan_dev_master()] and device check
[ipvlan_dev_slave()] functions to netdevice.h and rename them
netif_is_ipvlan_port() and netif_is_ipvlan() resp. to be
consistent with macvlan api naming.

Signed-off-by: Mahesh Bandewar <maheshb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipvlan/ipvlan.h      | 10 ----------
 drivers/net/ipvlan/ipvlan_main.c | 10 +++++-----
 include/linux/netdevice.h        | 10 ++++++++++
 3 files changed, 15 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ipvlan/ipvlan.h b/drivers/net/ipvlan/ipvlan.h
index c44d29eca6c0..2729f64b3e7e 100644
--- a/drivers/net/ipvlan/ipvlan.h
+++ b/drivers/net/ipvlan/ipvlan.h
@@ -107,16 +107,6 @@ static inline struct ipvl_port *ipvlan_port_get_rtnl(const struct net_device *d)
 	return rtnl_dereference(d->rx_handler_data);
 }
 
-static inline bool ipvlan_dev_master(struct net_device *d)
-{
-	return d->priv_flags & IFF_IPVLAN_MASTER;
-}
-
-static inline bool ipvlan_dev_slave(struct net_device *d)
-{
-	return d->priv_flags & IFF_IPVLAN_SLAVE;
-}
-
 void ipvlan_adjust_mtu(struct ipvl_dev *ipvlan, struct net_device *dev);
 void ipvlan_set_port_mode(struct ipvl_port *port, u32 nval);
 void ipvlan_init_secret(void);
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index a66ff8197008..4f4099d5603d 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -446,11 +446,11 @@ static int ipvlan_link_new(struct net *src_net, struct net_device *dev,
 	if (!phy_dev)
 		return -ENODEV;
 
-	if (ipvlan_dev_slave(phy_dev)) {
+	if (netif_is_ipvlan(phy_dev)) {
 		struct ipvl_dev *tmp = netdev_priv(phy_dev);
 
 		phy_dev = tmp->phy_dev;
-	} else if (!ipvlan_dev_master(phy_dev)) {
+	} else if (!netif_is_ipvlan_port(phy_dev)) {
 		err = ipvlan_port_create(phy_dev);
 		if (err < 0)
 			return err;
@@ -560,7 +560,7 @@ static int ipvlan_device_event(struct notifier_block *unused,
 	struct ipvl_port *port;
 	LIST_HEAD(lst_kill);
 
-	if (!ipvlan_dev_master(dev))
+	if (!netif_is_ipvlan_port(dev))
 		return NOTIFY_DONE;
 
 	port = ipvlan_port_get_rtnl(dev);
@@ -651,7 +651,7 @@ static int ipvlan_addr6_event(struct notifier_block *unused,
 	struct net_device *dev = (struct net_device *)if6->idev->dev;
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 
-	if (!ipvlan_dev_slave(dev))
+	if (!netif_is_ipvlan(dev))
 		return NOTIFY_DONE;
 
 	if (!ipvlan || !ipvlan->port)
@@ -723,7 +723,7 @@ static int ipvlan_addr4_event(struct notifier_block *unused,
 	struct ipvl_dev *ipvlan = netdev_priv(dev);
 	struct in_addr ip4_addr;
 
-	if (!ipvlan_dev_slave(dev))
+	if (!netif_is_ipvlan(dev))
 		return NOTIFY_DONE;
 
 	if (!ipvlan || !ipvlan->port)
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 1f49aac258f9..c31f74d76ebd 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -3646,6 +3646,16 @@ static inline bool netif_is_macvlan_port(struct net_device *dev)
 	return dev->priv_flags & IFF_MACVLAN_PORT;
 }
 
+static inline bool netif_is_ipvlan(struct net_device *dev)
+{
+	return dev->priv_flags & IFF_IPVLAN_SLAVE;
+}
+
+static inline bool netif_is_ipvlan_port(struct net_device *dev)
+{
+	return dev->priv_flags & IFF_IPVLAN_MASTER;
+}
+
 static inline bool netif_is_bond_master(struct net_device *dev)
 {
 	return dev->flags & IFF_MASTER && dev->priv_flags & IFF_BONDING;
-- 
cgit v1.2.3


From f4362a2c9524678f0459cf410403f8595e5cfce5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Nov 2014 13:26:06 -0500
Subject: switch tcp_sock->ucopy from iovec (ucopy.iov) to msghdr (ucopy.msg)

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/tcp.h  | 2 +-
 net/ipv4/tcp.c       | 2 +-
 net/ipv4/tcp_input.c | 7 +++----
 3 files changed, 5 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f566b8567892..5d9cc9cd2855 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -162,7 +162,7 @@ struct tcp_sock {
 	struct {
 		struct sk_buff_head	prequeue;
 		struct task_struct	*task;
-		struct iovec		*iov;
+		struct msghdr		*msg;
 		int			memory;
 		int			len;
 	} ucopy;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dc13a3657e8e..4a96f3730170 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1729,7 +1729,7 @@ int tcp_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 			if (!user_recv && !(flags & (MSG_TRUNC | MSG_PEEK))) {
 				user_recv = current;
 				tp->ucopy.task = user_recv;
-				tp->ucopy.iov = msg->msg_iov;
+				tp->ucopy.msg = msg;
 			}
 
 			tp->ucopy.len = len;
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 69de1a1c05c9..075ab4d5af5e 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4421,7 +4421,7 @@ static void tcp_data_queue(struct sock *sk, struct sk_buff *skb)
 			__set_current_state(TASK_RUNNING);
 
 			local_bh_enable();
-			if (!skb_copy_datagram_iovec(skb, 0, tp->ucopy.iov, chunk)) {
+			if (!skb_copy_datagram_msg(skb, 0, tp->ucopy.msg, chunk)) {
 				tp->ucopy.len -= chunk;
 				tp->copied_seq += chunk;
 				eaten = (chunk == skb->len);
@@ -4941,10 +4941,9 @@ static int tcp_copy_to_iovec(struct sock *sk, struct sk_buff *skb, int hlen)
 
 	local_bh_enable();
 	if (skb_csum_unnecessary(skb))
-		err = skb_copy_datagram_iovec(skb, hlen, tp->ucopy.iov, chunk);
+		err = skb_copy_datagram_msg(skb, hlen, tp->ucopy.msg, chunk);
 	else
-		err = skb_copy_and_csum_datagram_iovec(skb, hlen,
-						       tp->ucopy.iov);
+		err = skb_copy_and_csum_datagram_msg(skb, hlen, tp->ucopy.msg);
 
 	if (!err) {
 		tp->ucopy.len -= chunk;
-- 
cgit v1.2.3


From d838df2e5dcbb6ed4d82854869e9a30f9aeef6da Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Nov 2014 19:32:50 -0500
Subject: vmci: propagate msghdr all way down to __qp_memcpy_from_queue()

... and switch it to memcpy_to_msg()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/misc/vmw_vmci/vmci_queue_pair.c | 17 +++++++++--------
 include/linux/vmw_vmci_api.h            |  5 +++--
 net/vmw_vsock/vmci_transport.c          |  4 ++--
 3 files changed, 14 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/misc/vmw_vmci/vmci_queue_pair.c b/drivers/misc/vmw_vmci/vmci_queue_pair.c
index 1b7b303085d2..7aaaf51e1596 100644
--- a/drivers/misc/vmw_vmci/vmci_queue_pair.c
+++ b/drivers/misc/vmw_vmci/vmci_queue_pair.c
@@ -27,6 +27,7 @@
 #include <linux/uio.h>
 #include <linux/wait.h>
 #include <linux/vmalloc.h>
+#include <linux/skbuff.h>
 
 #include "vmci_handle_array.h"
 #include "vmci_queue_pair.h"
@@ -429,11 +430,11 @@ static int __qp_memcpy_from_queue(void *dest,
 			to_copy = size - bytes_copied;
 
 		if (is_iovec) {
-			struct iovec *iov = (struct iovec *)dest;
+			struct msghdr *msg = dest;
 			int err;
 
 			/* The iovec will track bytes_copied internally. */
-			err = memcpy_toiovec(iov, (u8 *)va + page_offset,
+			err = memcpy_to_msg(msg, (u8 *)va + page_offset,
 					     to_copy);
 			if (err != 0) {
 				if (kernel_if->host)
@@ -3264,13 +3265,13 @@ EXPORT_SYMBOL_GPL(vmci_qpair_enquev);
  * of bytes dequeued or < 0 on error.
  */
 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
-			  void *iov,
+			  struct msghdr *msg,
 			  size_t iov_size,
 			  int buf_type)
 {
 	ssize_t result;
 
-	if (!qpair || !iov)
+	if (!qpair)
 		return VMCI_ERROR_INVALID_ARGS;
 
 	qp_lock(qpair);
@@ -3279,7 +3280,7 @@ ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
 		result = qp_dequeue_locked(qpair->produce_q,
 					   qpair->consume_q,
 					   qpair->consume_q_size,
-					   iov, iov_size,
+					   msg, iov_size,
 					   qp_memcpy_from_queue_iov,
 					   true);
 
@@ -3308,13 +3309,13 @@ EXPORT_SYMBOL_GPL(vmci_qpair_dequev);
  * of bytes peeked or < 0 on error.
  */
 ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
-			 void *iov,
+			 struct msghdr *msg,
 			 size_t iov_size,
 			 int buf_type)
 {
 	ssize_t result;
 
-	if (!qpair || !iov)
+	if (!qpair)
 		return VMCI_ERROR_INVALID_ARGS;
 
 	qp_lock(qpair);
@@ -3323,7 +3324,7 @@ ssize_t vmci_qpair_peekv(struct vmci_qp *qpair,
 		result = qp_dequeue_locked(qpair->produce_q,
 					   qpair->consume_q,
 					   qpair->consume_q_size,
-					   iov, iov_size,
+					   msg, iov_size,
 					   qp_memcpy_from_queue_iov,
 					   false);
 
diff --git a/include/linux/vmw_vmci_api.h b/include/linux/vmw_vmci_api.h
index 023430e265fe..5691f752ce8f 100644
--- a/include/linux/vmw_vmci_api.h
+++ b/include/linux/vmw_vmci_api.h
@@ -24,6 +24,7 @@
 #define VMCI_KERNEL_API_VERSION_2 2
 #define VMCI_KERNEL_API_VERSION   VMCI_KERNEL_API_VERSION_2
 
+struct msghdr;
 typedef void (vmci_device_shutdown_fn) (void *device_registration,
 					void *user_data);
 
@@ -75,8 +76,8 @@ ssize_t vmci_qpair_peek(struct vmci_qp *qpair, void *buf, size_t buf_size,
 ssize_t vmci_qpair_enquev(struct vmci_qp *qpair,
 			  void *iov, size_t iov_size, int mode);
 ssize_t vmci_qpair_dequev(struct vmci_qp *qpair,
-			  void *iov, size_t iov_size, int mode);
-ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, void *iov, size_t iov_size,
+			  struct msghdr *msg, size_t iov_size, int mode);
+ssize_t vmci_qpair_peekv(struct vmci_qp *qpair, struct msghdr *msg, size_t iov_size,
 			 int mode);
 
 #endif /* !__VMW_VMCI_API_H__ */
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index c1c038952973..20a0ba3bfff6 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1840,9 +1840,9 @@ static ssize_t vmci_transport_stream_dequeue(
 	int flags)
 {
 	if (flags & MSG_PEEK)
-		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg->msg_iov, len, 0);
+		return vmci_qpair_peekv(vmci_trans(vsk)->qpair, msg, len, 0);
 	else
-		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg->msg_iov, len, 0);
+		return vmci_qpair_dequev(vmci_trans(vsk)->qpair, msg, len, 0);
 }
 
 static ssize_t vmci_transport_stream_enqueue(
-- 
cgit v1.2.3


From c0371da6047abd261bc483c744dbc7d81a116172 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Nov 2014 10:42:55 -0500
Subject: put iov_iter into msghdr

Note that the code _using_ ->msg_iter at that point will be very
unhappy with anything other than unshifted iovec-backed iov_iter.
We still need to convert users to proper primitives.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 crypto/algif_hash.c            |  4 ++--
 crypto/algif_skcipher.c        |  4 ++--
 drivers/net/macvtap.c          |  8 ++------
 drivers/net/tun.c              |  8 ++------
 drivers/vhost/net.c            |  8 +++-----
 fs/afs/rxrpc.c                 | 14 ++++++--------
 include/linux/skbuff.h         | 16 ++++++++++------
 include/linux/socket.h         |  3 +--
 include/net/bluetooth/l2cap.h  |  2 +-
 include/net/udplite.h          |  3 ++-
 net/atm/common.c               |  5 +----
 net/bluetooth/6lowpan.c        |  6 +++---
 net/bluetooth/a2mp.c           |  3 +--
 net/bluetooth/smp.c            |  3 +--
 net/caif/caif_socket.c         |  2 +-
 net/compat.c                   | 10 ++++++----
 net/ipv4/ip_output.c           |  6 ++++--
 net/ipv4/ping.c                |  3 ++-
 net/ipv4/raw.c                 |  3 ++-
 net/ipv4/tcp.c                 |  6 +++---
 net/ipv4/tcp_output.c          |  2 +-
 net/ipv6/ping.c                |  3 ++-
 net/ipv6/raw.c                 |  3 ++-
 net/netlink/af_netlink.c       |  2 +-
 net/packet/af_packet.c         |  7 ++-----
 net/rds/recv.c                 |  7 ++++---
 net/rds/send.c                 |  4 +---
 net/rxrpc/ar-output.c          |  8 +++-----
 net/sctp/socket.c              |  5 +----
 net/socket.c                   | 27 ++++++++++++---------------
 net/tipc/msg.c                 |  4 ++--
 net/unix/af_unix.c             | 10 ++--------
 net/vmw_vsock/vmci_transport.c |  3 ++-
 33 files changed, 90 insertions(+), 112 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/algif_hash.c b/crypto/algif_hash.c
index 35c93ff11f35..83cd2cc49c9f 100644
--- a/crypto/algif_hash.c
+++ b/crypto/algif_hash.c
@@ -42,7 +42,7 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
 	struct alg_sock *ask = alg_sk(sk);
 	struct hash_ctx *ctx = ask->private;
 	unsigned long iovlen;
-	struct iovec *iov;
+	const struct iovec *iov;
 	long copied = 0;
 	int err;
 
@@ -58,7 +58,7 @@ static int hash_sendmsg(struct kiocb *unused, struct socket *sock,
 
 	ctx->more = 0;
 
-	for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
+	for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0;
 	     iovlen--, iov++) {
 		unsigned long seglen = iov->iov_len;
 		char __user *from = iov->iov_base;
diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c
index c3b482bee208..4f45dab24648 100644
--- a/crypto/algif_skcipher.c
+++ b/crypto/algif_skcipher.c
@@ -429,13 +429,13 @@ static int skcipher_recvmsg(struct kiocb *unused, struct socket *sock,
 	struct skcipher_sg_list *sgl;
 	struct scatterlist *sg;
 	unsigned long iovlen;
-	struct iovec *iov;
+	const struct iovec *iov;
 	int err = -EAGAIN;
 	int used;
 	long copied = 0;
 
 	lock_sock(sk);
-	for (iov = msg->msg_iov, iovlen = msg->msg_iovlen; iovlen > 0;
+	for (iov = msg->msg_iter.iov, iovlen = msg->msg_iter.nr_segs; iovlen > 0;
 	     iovlen--, iov++) {
 		unsigned long seglen = iov->iov_len;
 		char __user *from = iov->iov_base;
diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c
index ba1e5db2152e..2c157cced81f 100644
--- a/drivers/net/macvtap.c
+++ b/drivers/net/macvtap.c
@@ -1095,9 +1095,7 @@ static int macvtap_sendmsg(struct kiocb *iocb, struct socket *sock,
 			   struct msghdr *m, size_t total_len)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
-	struct iov_iter from;
-	iov_iter_init(&from, WRITE, m->msg_iov, m->msg_iovlen, total_len);
-	return macvtap_get_user(q, m, &from, m->msg_flags & MSG_DONTWAIT);
+	return macvtap_get_user(q, m, &m->msg_iter, m->msg_flags & MSG_DONTWAIT);
 }
 
 static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
@@ -1105,12 +1103,10 @@ static int macvtap_recvmsg(struct kiocb *iocb, struct socket *sock,
 			   int flags)
 {
 	struct macvtap_queue *q = container_of(sock, struct macvtap_queue, sock);
-	struct iov_iter to;
 	int ret;
 	if (flags & ~(MSG_DONTWAIT|MSG_TRUNC))
 		return -EINVAL;
-	iov_iter_init(&to, READ, m->msg_iov, m->msg_iovlen, total_len);
-	ret = macvtap_do_read(q, &to, flags & MSG_DONTWAIT);
+	ret = macvtap_do_read(q, &m->msg_iter, flags & MSG_DONTWAIT);
 	if (ret > total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 9c58286b8a42..f3e992ed87ac 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1449,13 +1449,11 @@ static int tun_sendmsg(struct kiocb *iocb, struct socket *sock,
 	int ret;
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = __tun_get(tfile);
-	struct iov_iter from;
 
 	if (!tun)
 		return -EBADFD;
 
-	iov_iter_init(&from, WRITE, m->msg_iov, m->msg_iovlen, total_len);
-	ret = tun_get_user(tun, tfile, m->msg_control, &from,
+	ret = tun_get_user(tun, tfile, m->msg_control, &m->msg_iter,
 			   m->msg_flags & MSG_DONTWAIT);
 	tun_put(tun);
 	return ret;
@@ -1467,7 +1465,6 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
 {
 	struct tun_file *tfile = container_of(sock, struct tun_file, socket);
 	struct tun_struct *tun = __tun_get(tfile);
-	struct iov_iter to;
 	int ret;
 
 	if (!tun)
@@ -1482,8 +1479,7 @@ static int tun_recvmsg(struct kiocb *iocb, struct socket *sock,
 					 SOL_PACKET, TUN_TX_TIMESTAMP);
 		goto out;
 	}
-	iov_iter_init(&to, READ, m->msg_iov, m->msg_iovlen, total_len);
-	ret = tun_do_read(tun, tfile, &to, flags & MSG_DONTWAIT);
+	ret = tun_do_read(tun, tfile, &m->msg_iter, flags & MSG_DONTWAIT);
 	if (ret > total_len) {
 		m->msg_flags |= MSG_TRUNC;
 		ret = flags & MSG_TRUNC ? ret : total_len;
diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 8dae2f724a35..9f06e70a2631 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -342,7 +342,6 @@ static void handle_tx(struct vhost_net *net)
 		.msg_namelen = 0,
 		.msg_control = NULL,
 		.msg_controllen = 0,
-		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
 	size_t len, total_len = 0;
@@ -396,8 +395,8 @@ static void handle_tx(struct vhost_net *net)
 		}
 		/* Skip header. TODO: support TSO. */
 		s = move_iovec_hdr(vq->iov, nvq->hdr, hdr_size, out);
-		msg.msg_iovlen = out;
 		len = iov_length(vq->iov, out);
+		iov_iter_init(&msg.msg_iter, WRITE, vq->iov, out, len);
 		/* Sanity check */
 		if (!len) {
 			vq_err(vq, "Unexpected header len for TX: "
@@ -562,7 +561,6 @@ static void handle_rx(struct vhost_net *net)
 		.msg_namelen = 0,
 		.msg_control = NULL, /* FIXME: get and handle RX aux data. */
 		.msg_controllen = 0,
-		.msg_iov = vq->iov,
 		.msg_flags = MSG_DONTWAIT,
 	};
 	struct virtio_net_hdr_mrg_rxbuf hdr = {
@@ -600,7 +598,7 @@ static void handle_rx(struct vhost_net *net)
 			break;
 		/* On overrun, truncate and discard */
 		if (unlikely(headcount > UIO_MAXIOV)) {
-			msg.msg_iovlen = 1;
+			iov_iter_init(&msg.msg_iter, READ, vq->iov, 1, 1);
 			err = sock->ops->recvmsg(NULL, sock, &msg,
 						 1, MSG_DONTWAIT | MSG_TRUNC);
 			pr_debug("Discarded rx packet: len %zd\n", sock_len);
@@ -626,7 +624,7 @@ static void handle_rx(struct vhost_net *net)
 			/* Copy the header for use in VIRTIO_NET_F_MRG_RXBUF:
 			 * needed because recvmsg can modify msg_iov. */
 			copy_iovec_hdr(vq->iov, nvq->hdr, sock_hlen, in);
-		msg.msg_iovlen = in;
+		iov_iter_init(&msg.msg_iter, READ, vq->iov, in, sock_len);
 		err = sock->ops->recvmsg(NULL, sock, &msg,
 					 sock_len, MSG_DONTWAIT | MSG_TRUNC);
 		/* Userspace might have consumed the packet meanwhile:
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 03a3beb17004..06e14bfb3496 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -306,8 +306,8 @@ static int afs_send_pages(struct afs_call *call, struct msghdr *msg,
 
 			_debug("- range %u-%u%s",
 			       offset, to, msg->msg_flags ? " [more]" : "");
-			msg->msg_iov = (struct iovec *) iov;
-			msg->msg_iovlen = 1;
+			iov_iter_init(&msg->msg_iter, WRITE,
+				      (struct iovec *) iov, 1, to - offset);
 
 			/* have to change the state *before* sending the last
 			 * packet as RxRPC might give us the reply before it
@@ -384,8 +384,8 @@ int afs_make_call(struct in_addr *addr, struct afs_call *call, gfp_t gfp,
 
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= (struct iovec *) iov;
-	msg.msg_iovlen		= 1;
+	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iov, 1,
+		      call->request_size);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= (call->send_pages ? MSG_MORE : 0);
@@ -778,8 +778,7 @@ void afs_send_empty_reply(struct afs_call *call)
 	iov[0].iov_len		= 0;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= iov;
-	msg.msg_iovlen		= 0;
+	iov_iter_init(&msg.msg_iter, WRITE, iov, 0, 0);	/* WTF? */
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
@@ -815,8 +814,7 @@ void afs_send_simple_reply(struct afs_call *call, const void *buf, size_t len)
 	iov[0].iov_len		= len;
 	msg.msg_name		= NULL;
 	msg.msg_namelen		= 0;
-	msg.msg_iov		= iov;
-	msg.msg_iovlen		= 1;
+	iov_iter_init(&msg.msg_iter, WRITE, iov, 1, len);
 	msg.msg_control		= NULL;
 	msg.msg_controllen	= 0;
 	msg.msg_flags		= 0;
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ef64cec42804..52cf1bdac0d8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2646,22 +2646,24 @@ unsigned int datagram_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait);
 int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
 			    struct iovec *to, int size);
+int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
+			   struct iov_iter *to, int size);
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
 					struct msghdr *msg, int size)
 {
-	return skb_copy_datagram_iovec(from, offset, msg->msg_iov, size);
+	/* XXX: stripping const */
+	return skb_copy_datagram_iovec(from, offset, (struct iovec *)msg->msg_iter.iov, size);
 }
 int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
 				     struct iovec *iov);
 static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
 			    struct msghdr *msg)
 {
-	return skb_copy_and_csum_datagram_iovec(skb, hlen, msg->msg_iov);
+	/* XXX: stripping const */
+	return skb_copy_and_csum_datagram_iovec(skb, hlen, (struct iovec *)msg->msg_iter.iov);
 }
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
-int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
-			   struct iov_iter *to, int size);
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
 void skb_free_datagram(struct sock *sk, struct sk_buff *skb);
 void skb_free_datagram_locked(struct sock *sk, struct sk_buff *skb);
@@ -2689,12 +2691,14 @@ int skb_vlan_push(struct sk_buff *skb, __be16 vlan_proto, u16 vlan_tci);
 
 static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 {
-	return memcpy_fromiovec(data, msg->msg_iov, len);
+	/* XXX: stripping const */
+	return memcpy_fromiovec(data, (struct iovec *)msg->msg_iter.iov, len);
 }
 
 static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
 {
-	return memcpy_toiovec(msg->msg_iov, data, len);
+	/* XXX: stripping const */
+	return memcpy_toiovec((struct iovec *)msg->msg_iter.iov, data, len);
 }
 
 struct skb_checksum_ops {
diff --git a/include/linux/socket.h b/include/linux/socket.h
index de5222832be4..048d6d6eed6d 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -47,8 +47,7 @@ struct linger {
 struct msghdr {
 	void		*msg_name;	/* ptr to socket address structure */
 	int		msg_namelen;	/* size of socket address structure */
-	struct iovec	*msg_iov;	/* scatter/gather array */
-	__kernel_size_t	msg_iovlen;	/* # elements in msg_iov */
+	struct iov_iter	msg_iter;	/* data */
 	void		*msg_control;	/* ancillary data */
 	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
 	unsigned int	msg_flags;	/* flags on received message */
diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index 4e23674d3649..bca6fc0a3196 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -911,7 +911,7 @@ static inline int l2cap_chan_no_memcpy_fromiovec(struct l2cap_chan *chan,
 	/* Following is safe since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	struct kvec *vec = (struct kvec *)msg->msg_iov;
+	struct kvec *vec = (struct kvec *)msg->msg_iter.iov;
 
 	while (len > 0) {
 		if (vec->iov_len) {
diff --git a/include/net/udplite.h b/include/net/udplite.h
index d5baaba65b46..ae7c8d1fbcad 100644
--- a/include/net/udplite.h
+++ b/include/net/udplite.h
@@ -20,7 +20,8 @@ static __inline__ int udplite_getfrag(void *from, char *to, int  offset,
 				      int len, int odd, struct sk_buff *skb)
 {
 	struct msghdr *msg = from;
-	return memcpy_fromiovecend(to, msg->msg_iov, offset, len);
+	/* XXX: stripping const */
+	return memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len);
 }
 
 /* Designate sk as UDP-Lite socket */
diff --git a/net/atm/common.c b/net/atm/common.c
index f59112944c91..b84057e41bd6 100644
--- a/net/atm/common.c
+++ b/net/atm/common.c
@@ -577,9 +577,6 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 	struct atm_vcc *vcc;
 	struct sk_buff *skb;
 	int eff, error;
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, m->msg_iov, m->msg_iovlen, size);
 
 	lock_sock(sk);
 	if (sock->state != SS_CONNECTED) {
@@ -634,7 +631,7 @@ int vcc_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *m,
 		goto out;
 	skb->dev = NULL; /* for paths shared with net_device interfaces */
 	ATM_SKB(skb)->atm_options = vcc->atm_options;
-	if (copy_from_iter(skb_put(skb, size), size, &from) != size) {
+	if (copy_from_iter(skb_put(skb, size), size, &m->msg_iter) != size) {
 		kfree_skb(skb);
 		error = -EFAULT;
 		goto out;
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index bdcaefd2db12..d8c67a5e7a02 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -537,12 +537,12 @@ static int send_pkt(struct l2cap_chan *chan, struct sk_buff *skb,
 	 */
 	chan->data = skb;
 
-	memset(&msg, 0, sizeof(msg));
-	msg.msg_iov = (struct iovec *) &iv;
-	msg.msg_iovlen = 1;
 	iv.iov_base = skb->data;
 	iv.iov_len = skb->len;
 
+	memset(&msg, 0, sizeof(msg));
+	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *) &iv, 1, skb->len);
+
 	err = l2cap_chan_send(chan, &msg, skb->len);
 	if (err > 0) {
 		netdev->stats.tx_bytes += err;
diff --git a/net/bluetooth/a2mp.c b/net/bluetooth/a2mp.c
index 5dcade511fdb..716d2a388858 100644
--- a/net/bluetooth/a2mp.c
+++ b/net/bluetooth/a2mp.c
@@ -60,8 +60,7 @@ void a2mp_send(struct amp_mgr *mgr, u8 code, u8 ident, u16 len, void *data)
 
 	memset(&msg, 0, sizeof(msg));
 
-	msg.msg_iov = (struct iovec *) &iv;
-	msg.msg_iovlen = 1;
+	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)&iv, 1, total_len);
 
 	l2cap_chan_send(chan, &msg, total_len);
 
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 069b76e03b57..21f555b4df17 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -268,8 +268,7 @@ static void smp_send_cmd(struct l2cap_conn *conn, u8 code, u16 len, void *data)
 
 	memset(&msg, 0, sizeof(msg));
 
-	msg.msg_iov = (struct iovec *) &iv;
-	msg.msg_iovlen = 2;
+	iov_iter_init(&msg.msg_iter, WRITE, (struct iovec *)iv, 2, 1 + len);
 
 	l2cap_chan_send(chan, &msg, 1 + len);
 
diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c
index ac618b0b8a4f..769b185fefbd 100644
--- a/net/caif/caif_socket.c
+++ b/net/caif/caif_socket.c
@@ -535,7 +535,7 @@ static int caif_seqpkt_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		goto err;
 
 	ret = -EINVAL;
-	if (unlikely(msg->msg_iov->iov_base == NULL))
+	if (unlikely(msg->msg_iter.iov->iov_base == NULL))
 		goto err;
 	noblock = msg->msg_flags & MSG_DONTWAIT;
 
diff --git a/net/compat.c b/net/compat.c
index 062f157d2a6b..3236b4167a32 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -37,13 +37,14 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
 			  struct iovec **iov)
 {
 	compat_uptr_t uaddr, uiov, tmp3;
+	compat_size_t nr_segs;
 	ssize_t err;
 
 	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
 	    __get_user(uaddr, &umsg->msg_name) ||
 	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
 	    __get_user(uiov, &umsg->msg_iov) ||
-	    __get_user(kmsg->msg_iovlen, &umsg->msg_iovlen) ||
+	    __get_user(nr_segs, &umsg->msg_iovlen) ||
 	    __get_user(tmp3, &umsg->msg_control) ||
 	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
 	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
@@ -68,14 +69,15 @@ ssize_t get_compat_msghdr(struct msghdr *kmsg,
 		kmsg->msg_namelen = 0;
 	}
 
-	if (kmsg->msg_iovlen > UIO_MAXIOV)
+	if (nr_segs > UIO_MAXIOV)
 		return -EMSGSIZE;
 
 	err = compat_rw_copy_check_uvector(save_addr ? READ : WRITE,
-					   compat_ptr(uiov), kmsg->msg_iovlen,
+					   compat_ptr(uiov), nr_segs,
 					   UIO_FASTIOV, *iov, iov);
 	if (err >= 0)
-		kmsg->msg_iov = *iov;
+		iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
+			      *iov, nr_segs, err);
 	return err;
 }
 
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index cdedcf144463..b50861b22b6b 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -755,11 +755,13 @@ ip_generic_getfrag(void *from, char *to, int offset, int len, int odd, struct sk
 	struct msghdr *msg = from;
 
 	if (skb->ip_summed == CHECKSUM_PARTIAL) {
-		if (memcpy_fromiovecend(to, msg->msg_iov, offset, len) < 0)
+		/* XXX: stripping const */
+		if (memcpy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len) < 0)
 			return -EFAULT;
 	} else {
 		__wsum csum = 0;
-		if (csum_partial_copy_fromiovecend(to, msg->msg_iov, offset, len, &csum) < 0)
+		/* XXX: stripping const */
+		if (csum_partial_copy_fromiovecend(to, (struct iovec *)msg->msg_iter.iov, offset, len, &csum) < 0)
 			return -EFAULT;
 		skb->csum = csum_block_add(skb->csum, csum, odd);
 	}
diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c
index 8dd4ae0424fc..c0d82f78d364 100644
--- a/net/ipv4/ping.c
+++ b/net/ipv4/ping.c
@@ -811,7 +811,8 @@ back_from_confirm:
 	pfh.icmph.checksum = 0;
 	pfh.icmph.un.echo.id = inet->inet_sport;
 	pfh.icmph.un.echo.sequence = user_icmph.un.echo.sequence;
-	pfh.iov = msg->msg_iov;
+	/* XXX: stripping const */
+	pfh.iov = (struct iovec *)msg->msg_iter.iov;
 	pfh.wcheck = 0;
 	pfh.family = AF_INET;
 
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 5d83bd2fcedb..0bb68df5055d 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -625,7 +625,8 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 back_from_confirm:
 
 	if (inet->hdrincl)
-		err = raw_send_hdrinc(sk, &fl4, msg->msg_iov, len,
+		/* XXX: stripping const */
+		err = raw_send_hdrinc(sk, &fl4, (struct iovec *)msg->msg_iter.iov, len,
 				      &rt, msg->msg_flags);
 
 	 else {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 4a96f3730170..54ba6209eea9 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -1085,7 +1085,7 @@ static int tcp_sendmsg_fastopen(struct sock *sk, struct msghdr *msg,
 int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 		size_t size)
 {
-	struct iovec *iov;
+	const struct iovec *iov;
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
 	int iovlen, flags, err, copied = 0;
@@ -1136,8 +1136,8 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	mss_now = tcp_send_mss(sk, &size_goal, flags);
 
 	/* Ok commence sending. */
-	iovlen = msg->msg_iovlen;
-	iov = msg->msg_iov;
+	iovlen = msg->msg_iter.nr_segs;
+	iov = msg->msg_iter.iov;
 	copied = 0;
 
 	err = -EPIPE;
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f5bd4bd3f7e6..3e225b03eb95 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3050,7 +3050,7 @@ static int tcp_send_syn_data(struct sock *sk, struct sk_buff *syn)
 	syn_data->ip_summed = CHECKSUM_PARTIAL;
 	memcpy(syn_data->cb, syn->cb, sizeof(syn->cb));
 	if (unlikely(memcpy_fromiovecend(skb_put(syn_data, space),
-					 fo->data->msg_iov, 0, space))) {
+					 fo->data->msg_iter.iov, 0, space))) {
 		kfree_skb(syn_data);
 		goto fallback;
 	}
diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c
index 5b7a1ed2aba9..2d3148378a1f 100644
--- a/net/ipv6/ping.c
+++ b/net/ipv6/ping.c
@@ -163,7 +163,8 @@ int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
 	pfh.icmph.checksum = 0;
 	pfh.icmph.un.echo.id = inet->inet_sport;
 	pfh.icmph.un.echo.sequence = user_icmph.icmp6_sequence;
-	pfh.iov = msg->msg_iov;
+	/* XXX: stripping const */
+	pfh.iov = (struct iovec *)msg->msg_iter.iov;
 	pfh.wcheck = 0;
 	pfh.family = AF_INET6;
 
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 11a9283fda51..ee25631f8c29 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -886,7 +886,8 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
 
 back_from_confirm:
 	if (inet->hdrincl)
-		err = rawv6_send_hdrinc(sk, msg->msg_iov, len, &fl6, &dst, msg->msg_flags);
+		/* XXX: stripping const */
+		err = rawv6_send_hdrinc(sk, (struct iovec *)msg->msg_iter.iov, len, &fl6, &dst, msg->msg_flags);
 	else {
 		lock_sock(sk);
 		err = ip6_append_data(sk, raw6_getfrag, &rfv,
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 63aa5c8acf12..cc9bcf008b03 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2305,7 +2305,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	}
 
 	if (netlink_tx_is_mmaped(sk) &&
-	    msg->msg_iov->iov_base == NULL) {
+	    msg->msg_iter.iov->iov_base == NULL) {
 		err = netlink_mmap_sendmsg(sk, msg, dst_portid, dst_group,
 					   siocb);
 		goto out;
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index efa844501136..ed2e620b61df 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -2408,11 +2408,8 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	unsigned short gso_type = 0;
 	int hlen, tlen;
 	int extra_len = 0;
-	struct iov_iter from;
 	ssize_t n;
 
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
-
 	/*
 	 *	Get and verify the address.
 	 */
@@ -2451,7 +2448,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 		len -= vnet_hdr_len;
 
 		err = -EFAULT;
-		n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &from);
+		n = copy_from_iter(&vnet_hdr, vnet_hdr_len, &msg->msg_iter);
 		if (n != vnet_hdr_len)
 			goto out_unlock;
 
@@ -2522,7 +2519,7 @@ static int packet_snd(struct socket *sock, struct msghdr *msg, size_t len)
 	}
 
 	/* Returns -EFAULT on error */
-	err = skb_copy_datagram_from_iter(skb, offset, &from, len);
+	err = skb_copy_datagram_from_iter(skb, offset, &msg->msg_iter, len);
 	if (err)
 		goto out_free;
 
diff --git a/net/rds/recv.c b/net/rds/recv.c
index 47d7b1029b33..f9ec1acd801c 100644
--- a/net/rds/recv.c
+++ b/net/rds/recv.c
@@ -404,7 +404,6 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	int ret = 0, nonblock = msg_flags & MSG_DONTWAIT;
 	DECLARE_SOCKADDR(struct sockaddr_in *, sin, msg->msg_name);
 	struct rds_incoming *inc = NULL;
-	struct iov_iter to;
 
 	/* udp_recvmsg()->sock_recvtimeo() gets away without locking too.. */
 	timeo = sock_rcvtimeo(sk, nonblock);
@@ -415,6 +414,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		goto out;
 
 	while (1) {
+		struct iov_iter save;
 		/* If there are pending notifications, do those - and nothing else */
 		if (!list_empty(&rs->rs_notify_queue)) {
 			ret = rds_notify_queue_get(rs, msg);
@@ -450,8 +450,8 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 		rdsdebug("copying inc %p from %pI4:%u to user\n", inc,
 			 &inc->i_conn->c_faddr,
 			 ntohs(inc->i_hdr.h_sport));
-		iov_iter_init(&to, READ, msg->msg_iov, msg->msg_iovlen, size);
-		ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &to);
+		save = msg->msg_iter;
+		ret = inc->i_conn->c_trans->inc_copy_to_user(inc, &msg->msg_iter);
 		if (ret < 0)
 			break;
 
@@ -464,6 +464,7 @@ int rds_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 			rds_inc_put(inc);
 			inc = NULL;
 			rds_stats_inc(s_recv_deliver_raced);
+			msg->msg_iter = save;
 			continue;
 		}
 
diff --git a/net/rds/send.c b/net/rds/send.c
index 4de62ead1c71..40a5629a0a13 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -934,9 +934,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 	int queued = 0, allocated_mr = 0;
 	int nonblock = msg->msg_flags & MSG_DONTWAIT;
 	long timeo = sock_sndtimeo(sk, nonblock);
-	struct iov_iter from;
 
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, payload_len);
 	/* Mirror Linux UDP mirror of BSD error message compatibility */
 	/* XXX: Perhaps MSG_MORE someday */
 	if (msg->msg_flags & ~(MSG_DONTWAIT | MSG_CMSG_COMPAT)) {
@@ -984,7 +982,7 @@ int rds_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
 			ret = -ENOMEM;
 			goto out;
 		}
-		ret = rds_message_copy_from_user(rm, &from);
+		ret = rds_message_copy_from_user(rm, &msg->msg_iter);
 		if (ret)
 			goto out;
 	}
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 0b4b9a79f5ab..86e0f10aa2c0 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -531,14 +531,12 @@ static int rxrpc_send_data(struct kiocb *iocb,
 	struct rxrpc_skb_priv *sp;
 	unsigned char __user *from;
 	struct sk_buff *skb;
-	struct iovec *iov;
+	const struct iovec *iov;
 	struct sock *sk = &rx->sk;
 	long timeo;
 	bool more;
 	int ret, ioc, segment, copied;
 
-	_enter(",,,{%zu},%zu", msg->msg_iovlen, len);
-
 	timeo = sock_sndtimeo(sk, msg->msg_flags & MSG_DONTWAIT);
 
 	/* this should be in poll */
@@ -547,8 +545,8 @@ static int rxrpc_send_data(struct kiocb *iocb,
 	if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN))
 		return -EPIPE;
 
-	iov = msg->msg_iov;
-	ioc = msg->msg_iovlen - 1;
+	iov = msg->msg_iter.iov;
+	ioc = msg->msg_iter.nr_segs - 1;
 	from = iov->iov_base;
 	segment = iov->iov_len;
 	iov++;
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 0397ac9fd98c..c92f96cda699 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1609,9 +1609,6 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	__u16 sinfo_flags = 0;
 	long timeo;
 	int err;
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, msg_len);
 
 	err = 0;
 	sp = sctp_sk(sk);
@@ -1950,7 +1947,7 @@ static int sctp_sendmsg(struct kiocb *iocb, struct sock *sk,
 	}
 
 	/* Break the message into multiple chunks of maximum size. */
-	datamsg = sctp_datamsg_from_user(asoc, sinfo, &from);
+	datamsg = sctp_datamsg_from_user(asoc, sinfo, &msg->msg_iter);
 	if (IS_ERR(datamsg)) {
 		err = PTR_ERR(datamsg);
 		goto out_free;
diff --git a/net/socket.c b/net/socket.c
index f676ac4a3701..8809afccf7fa 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -689,8 +689,7 @@ int kernel_sendmsg(struct socket *sock, struct msghdr *msg,
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec;
-	msg->msg_iovlen = num;
+	iov_iter_init(&msg->msg_iter, WRITE, (struct iovec *)vec, num, size);
 	result = sock_sendmsg(sock, msg, size);
 	set_fs(oldfs);
 	return result;
@@ -853,7 +852,7 @@ int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 	 * the following is safe, since for compiler definitions of kvec and
 	 * iovec are identical, yielding the same in-core layout and alignment
 	 */
-	msg->msg_iov = (struct iovec *)vec, msg->msg_iovlen = num;
+	iov_iter_init(&msg->msg_iter, READ, (struct iovec *)vec, num, size);
 	result = sock_recvmsg(sock, msg, size, flags);
 	set_fs(oldfs);
 	return result;
@@ -913,8 +912,7 @@ static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb,
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *)iov;
-	msg->msg_iovlen = nr_segs;
+	iov_iter_init(&msg->msg_iter, READ, iov, nr_segs, size);
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 
 	return __sock_recvmsg(iocb, sock, msg, size, msg->msg_flags);
@@ -953,8 +951,7 @@ static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb,
 	msg->msg_namelen = 0;
 	msg->msg_control = NULL;
 	msg->msg_controllen = 0;
-	msg->msg_iov = (struct iovec *)iov;
-	msg->msg_iovlen = nr_segs;
+	iov_iter_init(&msg->msg_iter, WRITE, iov, nr_segs, size);
 	msg->msg_flags = (file->f_flags & O_NONBLOCK) ? MSG_DONTWAIT : 0;
 	if (sock->type == SOCK_SEQPACKET)
 		msg->msg_flags |= MSG_EOR;
@@ -1798,8 +1795,7 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len,
 	iov.iov_base = buff;
 	iov.iov_len = len;
 	msg.msg_name = NULL;
-	msg.msg_iov = &iov;
-	msg.msg_iovlen = 1;
+	iov_iter_init(&msg.msg_iter, WRITE, &iov, 1, len);
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
 	msg.msg_namelen = 0;
@@ -1856,10 +1852,9 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size,
 
 	msg.msg_control = NULL;
 	msg.msg_controllen = 0;
-	msg.msg_iovlen = 1;
-	msg.msg_iov = &iov;
 	iov.iov_len = size;
 	iov.iov_base = ubuf;
+	iov_iter_init(&msg.msg_iter, READ, &iov, 1, size);
 	/* Save some cycles and don't copy the address if not needed */
 	msg.msg_name = addr ? (struct sockaddr *)&address : NULL;
 	/* We assume all kernel code knows the size of sockaddr_storage */
@@ -1993,13 +1988,14 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
 {
 	struct sockaddr __user *uaddr;
 	struct iovec __user *uiov;
+	size_t nr_segs;
 	ssize_t err;
 
 	if (!access_ok(VERIFY_READ, umsg, sizeof(*umsg)) ||
 	    __get_user(uaddr, &umsg->msg_name) ||
 	    __get_user(kmsg->msg_namelen, &umsg->msg_namelen) ||
 	    __get_user(uiov, &umsg->msg_iov) ||
-	    __get_user(kmsg->msg_iovlen, &umsg->msg_iovlen) ||
+	    __get_user(nr_segs, &umsg->msg_iovlen) ||
 	    __get_user(kmsg->msg_control, &umsg->msg_control) ||
 	    __get_user(kmsg->msg_controllen, &umsg->msg_controllen) ||
 	    __get_user(kmsg->msg_flags, &umsg->msg_flags))
@@ -2029,14 +2025,15 @@ static ssize_t copy_msghdr_from_user(struct msghdr *kmsg,
 		kmsg->msg_namelen = 0;
 	}
 
-	if (kmsg->msg_iovlen > UIO_MAXIOV)
+	if (nr_segs > UIO_MAXIOV)
 		return -EMSGSIZE;
 
 	err = rw_copy_check_uvector(save_addr ? READ : WRITE,
-				    uiov, kmsg->msg_iovlen,
+				    uiov, nr_segs,
 				    UIO_FASTIOV, *iov, iov);
 	if (err >= 0)
-		kmsg->msg_iov = *iov;
+		iov_iter_init(&kmsg->msg_iter, save_addr ? READ : WRITE,
+			      *iov, nr_segs, err);
 	return err;
 }
 
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 5b0659791c07..a687b30a699c 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -194,7 +194,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 		__skb_queue_tail(list, skb);
 		skb_copy_to_linear_data(skb, mhdr, mhsz);
 		pktpos = skb->data + mhsz;
-		if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iov, offset,
+		if (!dsz || !memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset,
 						 dsz))
 			return dsz;
 		rc = -EFAULT;
@@ -224,7 +224,7 @@ int tipc_msg_build(struct tipc_msg *mhdr, struct msghdr *m, int offset,
 		if (drem < pktrem)
 			pktrem = drem;
 
-		if (memcpy_fromiovecend(pktpos, m->msg_iov, offset, pktrem)) {
+		if (memcpy_fromiovecend(pktpos, m->msg_iter.iov, offset, pktrem)) {
 			rc = -EFAULT;
 			goto error;
 		}
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 4450d6226602..8e1b10274b02 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1459,9 +1459,6 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	struct scm_cookie tmp_scm;
 	int max_level;
 	int data_len = 0;
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1519,7 +1516,7 @@ static int unix_dgram_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	skb_put(skb, len - data_len);
 	skb->data_len = data_len;
 	skb->len = len;
-	err = skb_copy_datagram_from_iter(skb, 0, &from, len);
+	err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, len);
 	if (err)
 		goto out_free;
 
@@ -1641,9 +1638,6 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 	bool fds_sent = false;
 	int max_level;
 	int data_len;
-	struct iov_iter from;
-
-	iov_iter_init(&from, WRITE, msg->msg_iov, msg->msg_iovlen, len);
 
 	if (NULL == siocb->scm)
 		siocb->scm = &tmp_scm;
@@ -1700,7 +1694,7 @@ static int unix_stream_sendmsg(struct kiocb *kiocb, struct socket *sock,
 		skb_put(skb, size - data_len);
 		skb->data_len = data_len;
 		skb->len = size;
-		err = skb_copy_datagram_from_iter(skb, 0, &from, size);
+		err = skb_copy_datagram_from_iter(skb, 0, &msg->msg_iter, size);
 		if (err) {
 			kfree_skb(skb);
 			goto out_err;
diff --git a/net/vmw_vsock/vmci_transport.c b/net/vmw_vsock/vmci_transport.c
index 20a0ba3bfff6..02d2e5229240 100644
--- a/net/vmw_vsock/vmci_transport.c
+++ b/net/vmw_vsock/vmci_transport.c
@@ -1850,7 +1850,8 @@ static ssize_t vmci_transport_stream_enqueue(
 	struct msghdr *msg,
 	size_t len)
 {
-	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, msg->msg_iov, len, 0);
+	/* XXX: stripping const */
+	return vmci_qpair_enquev(vmci_trans(vsk)->qpair, (struct iovec *)msg->msg_iter.iov, len, 0);
 }
 
 static s64 vmci_transport_stream_has_data(struct vsock_sock *vsk)
-- 
cgit v1.2.3


From e5a4b0bb803b39a36478451eae53a880d2663d5b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Nov 2014 18:17:55 -0500
Subject: switch memcpy_to_msg() and skb_copy{,_and_csum}_datagram_msg() to
 primitives

... making both non-draining.  That means that tcp_recvmsg() becomes
non-draining.  And _that_ would break iscsit_do_rx_data() unless we
	a) make sure tcp_recvmsg() is uniformly non-draining (it is)
	b) make sure it copes with arbitrary (including shifted)
iov_iter (it does, all it uses is iov_iter primitives)
	c) make iscsit_do_rx_data() initialize ->msg_iter only once.

Fortunately, (c) is doable with minimal work and we are rid of one
the two places where kernel send/recvmsg users would be unhappy with
non-draining behaviour.

Actually, that makes all but one of ->recvmsg() instances iov_iter-clean.
The exception is skcipher_recvmsg() and it also isn't hard to convert
to primitives (iov_iter_get_pages() is needed there).  That'll wait
a bit - there's some interplay with ->sendmsg() path for that one.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 drivers/target/iscsi/iscsi_target_util.c | 12 +++----
 include/linux/skbuff.h                   | 16 +++-------
 net/core/datagram.c                      | 54 +++++++++++---------------------
 3 files changed, 28 insertions(+), 54 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/target/iscsi/iscsi_target_util.c b/drivers/target/iscsi/iscsi_target_util.c
index ce87ce9bdb9c..7c6a95bcb35e 100644
--- a/drivers/target/iscsi/iscsi_target_util.c
+++ b/drivers/target/iscsi/iscsi_target_util.c
@@ -1326,21 +1326,19 @@ static int iscsit_do_rx_data(
 	struct iscsi_conn *conn,
 	struct iscsi_data_count *count)
 {
-	int data = count->data_length, rx_loop = 0, total_rx = 0, iov_len;
-	struct kvec *iov_p;
+	int data = count->data_length, rx_loop = 0, total_rx = 0;
 	struct msghdr msg;
 
 	if (!conn || !conn->sock || !conn->conn_ops)
 		return -1;
 
 	memset(&msg, 0, sizeof(struct msghdr));
-
-	iov_p = count->iov;
-	iov_len	= count->iov_count;
+	iov_iter_kvec(&msg.msg_iter, READ | ITER_KVEC,
+		      count->iov, count->iov_count, data);
 
 	while (total_rx < data) {
-		rx_loop = kernel_recvmsg(conn->sock, &msg, iov_p, iov_len,
-					(data - total_rx), MSG_WAITALL);
+		rx_loop = sock_recvmsg(conn->sock, &msg,
+				      (data - total_rx), MSG_WAITALL);
 		if (rx_loop <= 0) {
 			pr_debug("rx_loop: %d total_rx: %d\n",
 				rx_loop, total_rx);
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 52cf1bdac0d8..4902f2df90c8 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2651,17 +2651,10 @@ int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
 					struct msghdr *msg, int size)
 {
-	/* XXX: stripping const */
-	return skb_copy_datagram_iovec(from, offset, (struct iovec *)msg->msg_iter.iov, size);
-}
-int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb, int hlen,
-				     struct iovec *iov);
-static inline int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
-			    struct msghdr *msg)
-{
-	/* XXX: stripping const */
-	return skb_copy_and_csum_datagram_iovec(skb, hlen, (struct iovec *)msg->msg_iter.iov);
+	return skb_copy_datagram_iter(from, offset, &msg->msg_iter, size);
 }
+int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen,
+				   struct msghdr *msg);
 int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset,
 				 struct iov_iter *from, int len);
 int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm);
@@ -2697,8 +2690,7 @@ static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len)
 
 static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len)
 {
-	/* XXX: stripping const */
-	return memcpy_toiovec((struct iovec *)msg->msg_iter.iov, data, len);
+	return copy_to_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT;
 }
 
 struct skb_checksum_ops {
diff --git a/net/core/datagram.c b/net/core/datagram.c
index b6e303b0f01f..41075ed6bb52 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -615,27 +615,25 @@ int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *from)
 EXPORT_SYMBOL(zerocopy_sg_from_iter);
 
 static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
-				      u8 __user *to, int len,
+				      struct iov_iter *to, int len,
 				      __wsum *csump)
 {
 	int start = skb_headlen(skb);
 	int i, copy = start - offset;
 	struct sk_buff *frag_iter;
 	int pos = 0;
+	int n;
 
 	/* Copy header. */
 	if (copy > 0) {
-		int err = 0;
 		if (copy > len)
 			copy = len;
-		*csump = csum_and_copy_to_user(skb->data + offset, to, copy,
-					       *csump, &err);
-		if (err)
+		n = csum_and_copy_to_iter(skb->data + offset, copy, csump, to);
+		if (n != copy)
 			goto fault;
 		if ((len -= copy) == 0)
 			return 0;
 		offset += copy;
-		to += copy;
 		pos = copy;
 	}
 
@@ -647,26 +645,22 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 
 		end = start + skb_frag_size(frag);
 		if ((copy = end - offset) > 0) {
-			__wsum csum2;
-			int err = 0;
-			u8  *vaddr;
+			__wsum csum2 = 0;
 			struct page *page = skb_frag_page(frag);
+			u8  *vaddr = kmap(page);
 
 			if (copy > len)
 				copy = len;
-			vaddr = kmap(page);
-			csum2 = csum_and_copy_to_user(vaddr +
-							frag->page_offset +
-							offset - start,
-						      to, copy, 0, &err);
+			n = csum_and_copy_to_iter(vaddr + frag->page_offset +
+						  offset - start, copy,
+						  &csum2, to);
 			kunmap(page);
-			if (err)
+			if (n != copy)
 				goto fault;
 			*csump = csum_block_add(*csump, csum2, pos);
 			if (!(len -= copy))
 				return 0;
 			offset += copy;
-			to += copy;
 			pos += copy;
 		}
 		start = end;
@@ -691,7 +685,6 @@ static int skb_copy_and_csum_datagram(const struct sk_buff *skb, int offset,
 			if ((len -= copy) == 0)
 				return 0;
 			offset += copy;
-			to += copy;
 			pos += copy;
 		}
 		start = end;
@@ -744,20 +737,19 @@ __sum16 __skb_checksum_complete(struct sk_buff *skb)
 EXPORT_SYMBOL(__skb_checksum_complete);
 
 /**
- *	skb_copy_and_csum_datagram_iovec - Copy and checksum skb to user iovec.
+ *	skb_copy_and_csum_datagram_msg - Copy and checksum skb to user iovec.
  *	@skb: skbuff
  *	@hlen: hardware length
- *	@iov: io vector
+ *	@msg: destination
  *
  *	Caller _must_ check that skb will fit to this iovec.
  *
  *	Returns: 0       - success.
  *		 -EINVAL - checksum failure.
- *		 -EFAULT - fault during copy. Beware, in this case iovec
- *			   can be modified!
+ *		 -EFAULT - fault during copy.
  */
-int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
-				     int hlen, struct iovec *iov)
+int skb_copy_and_csum_datagram_msg(struct sk_buff *skb,
+				   int hlen, struct msghdr *msg)
 {
 	__wsum csum;
 	int chunk = skb->len - hlen;
@@ -765,28 +757,20 @@ int skb_copy_and_csum_datagram_iovec(struct sk_buff *skb,
 	if (!chunk)
 		return 0;
 
-	/* Skip filled elements.
-	 * Pretty silly, look at memcpy_toiovec, though 8)
-	 */
-	while (!iov->iov_len)
-		iov++;
-
-	if (iov->iov_len < chunk) {
+	if (iov_iter_count(&msg->msg_iter) < chunk) {
 		if (__skb_checksum_complete(skb))
 			goto csum_error;
-		if (skb_copy_datagram_iovec(skb, hlen, iov, chunk))
+		if (skb_copy_datagram_msg(skb, hlen, msg, chunk))
 			goto fault;
 	} else {
 		csum = csum_partial(skb->data, hlen, skb->csum);
-		if (skb_copy_and_csum_datagram(skb, hlen, iov->iov_base,
+		if (skb_copy_and_csum_datagram(skb, hlen, &msg->msg_iter,
 					       chunk, &csum))
 			goto fault;
 		if (csum_fold(csum))
 			goto csum_error;
 		if (unlikely(skb->ip_summed == CHECKSUM_COMPLETE))
 			netdev_rx_csum_fault(skb->dev);
-		iov->iov_len -= chunk;
-		iov->iov_base += chunk;
 	}
 	return 0;
 csum_error:
@@ -794,7 +778,7 @@ csum_error:
 fault:
 	return -EFAULT;
 }
-EXPORT_SYMBOL(skb_copy_and_csum_datagram_iovec);
+EXPORT_SYMBOL(skb_copy_and_csum_datagram_msg);
 
 /**
  * 	datagram_poll - generic datagram poll
-- 
cgit v1.2.3


From d3a9632f09153bc46a8077844e05e179f1c10c3f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Nov 2014 18:29:54 -0500
Subject: skb_copy_datagram_iovec() can die

no callers other than itself.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/skbuff.h |  2 --
 net/core/datagram.c    | 84 --------------------------------------------------
 2 files changed, 86 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 4902f2df90c8..ab0bc43c82a4 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2644,8 +2644,6 @@ struct sk_buff *skb_recv_datagram(struct sock *sk, unsigned flags, int noblock,
 				  int *err);
 unsigned int datagram_poll(struct file *file, struct socket *sock,
 			   struct poll_table_struct *wait);
-int skb_copy_datagram_iovec(const struct sk_buff *from, int offset,
-			    struct iovec *to, int size);
 int skb_copy_datagram_iter(const struct sk_buff *from, int offset,
 			   struct iov_iter *to, int size);
 static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset,
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 41075ed6bb52..df493d68330c 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -309,90 +309,6 @@ int skb_kill_datagram(struct sock *sk, struct sk_buff *skb, unsigned int flags)
 }
 EXPORT_SYMBOL(skb_kill_datagram);
 
-/**
- *	skb_copy_datagram_iovec - Copy a datagram to an iovec.
- *	@skb: buffer to copy
- *	@offset: offset in the buffer to start copying from
- *	@to: io vector to copy to
- *	@len: amount of data to copy from buffer to iovec
- *
- *	Note: the iovec is modified during the copy.
- */
-int skb_copy_datagram_iovec(const struct sk_buff *skb, int offset,
-			    struct iovec *to, int len)
-{
-	int start = skb_headlen(skb);
-	int i, copy = start - offset;
-	struct sk_buff *frag_iter;
-
-	trace_skb_copy_datagram_iovec(skb, len);
-
-	/* Copy header. */
-	if (copy > 0) {
-		if (copy > len)
-			copy = len;
-		if (memcpy_toiovec(to, skb->data + offset, copy))
-			goto fault;
-		if ((len -= copy) == 0)
-			return 0;
-		offset += copy;
-	}
-
-	/* Copy paged appendix. Hmm... why does this look so complicated? */
-	for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
-		int end;
-		const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
-
-		WARN_ON(start > offset + len);
-
-		end = start + skb_frag_size(frag);
-		if ((copy = end - offset) > 0) {
-			int err;
-			u8  *vaddr;
-			struct page *page = skb_frag_page(frag);
-
-			if (copy > len)
-				copy = len;
-			vaddr = kmap(page);
-			err = memcpy_toiovec(to, vaddr + frag->page_offset +
-					     offset - start, copy);
-			kunmap(page);
-			if (err)
-				goto fault;
-			if (!(len -= copy))
-				return 0;
-			offset += copy;
-		}
-		start = end;
-	}
-
-	skb_walk_frags(skb, frag_iter) {
-		int end;
-
-		WARN_ON(start > offset + len);
-
-		end = start + frag_iter->len;
-		if ((copy = end - offset) > 0) {
-			if (copy > len)
-				copy = len;
-			if (skb_copy_datagram_iovec(frag_iter,
-						    offset - start,
-						    to, copy))
-				goto fault;
-			if ((len -= copy) == 0)
-				return 0;
-			offset += copy;
-		}
-		start = end;
-	}
-	if (!len)
-		return 0;
-
-fault:
-	return -EFAULT;
-}
-EXPORT_SYMBOL(skb_copy_datagram_iovec);
-
 /**
  *	skb_copy_datagram_iter - Copy a datagram to an iovec iterator.
  *	@skb: buffer to copy
-- 
cgit v1.2.3


From 218321e7a0838c2be974539f0a5341b398d4432b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 24 Nov 2014 19:45:05 -0500
Subject: bury memcpy_toiovec()

no users left

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h |  1 -
 lib/iovec.c         | 25 -------------------------
 2 files changed, 26 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index bd8569a14c4a..a41e252396c0 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -131,7 +131,6 @@ size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, struct iov_
 size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i);
 
 int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len);
-int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len);
 int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov,
 			int offset, int len);
 int memcpy_toiovecend(const struct iovec *v, unsigned char *kdata,
diff --git a/lib/iovec.c b/lib/iovec.c
index df3abd1eaa4a..2d99cb4a5006 100644
--- a/lib/iovec.c
+++ b/lib/iovec.c
@@ -27,31 +27,6 @@ int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len)
 }
 EXPORT_SYMBOL(memcpy_fromiovec);
 
-/*
- *	Copy kernel to iovec. Returns -EFAULT on error.
- *
- *	Note: this modifies the original iovec.
- */
-
-int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len)
-{
-	while (len > 0) {
-		if (iov->iov_len) {
-			int copy = min_t(unsigned int, iov->iov_len, len);
-			if (copy_to_user(iov->iov_base, kdata, copy))
-				return -EFAULT;
-			kdata += copy;
-			len -= copy;
-			iov->iov_len -= copy;
-			iov->iov_base += copy;
-		}
-		iov++;
-	}
-
-	return 0;
-}
-EXPORT_SYMBOL(memcpy_toiovec);
-
 /*
  *	Copy kernel to iovec. Returns -EFAULT on error.
  */
-- 
cgit v1.2.3


From 605ad7f184b60cfaacbc038aa6c55ee68dee3c89 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 7 Dec 2014 12:22:18 -0800
Subject: tcp: refine TSO autosizing

Commit 95bd09eb2750 ("tcp: TSO packets automatic sizing") tried to
control TSO size, but did this at the wrong place (sendmsg() time)

At sendmsg() time, we might have a pessimistic view of flow rate,
and we end up building very small skbs (with 2 MSS per skb).

This is bad because :

 - It sends small TSO packets even in Slow Start where rate quickly
   increases.
 - It tends to make socket write queue very big, increasing tcp_ack()
   processing time, but also increasing memory needs, not necessarily
   accounted for, as fast clones overhead is currently ignored.
 - Lower GRO efficiency and more ACK packets.

Servers with a lot of small lived connections suffer from this.

Lets instead fill skbs as much as possible (64KB of payload), but split
them at xmit time, when we have a precise idea of the flow rate.
skb split is actually quite efficient.

Patch looks bigger than necessary, because TCP Small Queue decision now
has to take place after the eventual split.

As Neal suggested, introduce a new tcp_tso_autosize() helper, so that
tcp_tso_should_defer() can be synchronized on same goal.

Rename tp->xmit_size_goal_segs to tp->gso_segs, as this variable
contains number of mss that we can put in GSO packet, and is not
related to the autosizing goal anymore.

Tested:

40 ms rtt link

nstat >/dev/null
netperf -H remote -l -2000000 -- -s 1000000
nstat | egrep "IpInReceives|IpOutRequests|TcpOutSegs|IpExtOutOctets"

Before patch :

Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/s

 87380 2000000 2000000    0.36         44.22
IpInReceives                    600                0.0
IpOutRequests                   599                0.0
TcpOutSegs                      1397               0.0
IpExtOutOctets                  2033249            0.0

After patch :

Recv   Send    Send
Socket Socket  Message  Elapsed
Size   Size    Size     Time     Throughput
bytes  bytes   bytes    secs.    10^6bits/sec

 87380 2000000 2000000    0.36       44.27
IpInReceives                    221                0.0
IpOutRequests                   232                0.0
TcpOutSegs                      1397               0.0
IpExtOutOctets                  2013953            0.0

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Neal Cardwell <ncardwell@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  2 +-
 net/ipv4/tcp.c        | 60 ++++++++++++++++++---------------------------------
 net/ipv4/tcp_output.c | 59 ++++++++++++++++++++++++++++++++++----------------
 3 files changed, 63 insertions(+), 58 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index f566b8567892..3fa0a9669a3a 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -130,7 +130,7 @@ struct tcp_sock {
 	/* inet_connection_sock has to be the first member of tcp_sock */
 	struct inet_connection_sock	inet_conn;
 	u16	tcp_header_len;	/* Bytes of tcp header to send		*/
-	u16	xmit_size_goal_segs; /* Goal for segmenting output packets */
+	u16	gso_segs;	/* Max number of segs per GSO packet	*/
 
 /*
  *	Header prediction flags
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index dc13a3657e8e..427aee33ffc0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -835,47 +835,29 @@ static unsigned int tcp_xmit_size_goal(struct sock *sk, u32 mss_now,
 				       int large_allowed)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
-	u32 xmit_size_goal, old_size_goal;
-
-	xmit_size_goal = mss_now;
-
-	if (large_allowed && sk_can_gso(sk)) {
-		u32 gso_size, hlen;
-
-		/* Maybe we should/could use sk->sk_prot->max_header here ? */
-		hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
-		       inet_csk(sk)->icsk_ext_hdr_len +
-		       tp->tcp_header_len;
-
-		/* Goal is to send at least one packet per ms,
-		 * not one big TSO packet every 100 ms.
-		 * This preserves ACK clocking and is consistent
-		 * with tcp_tso_should_defer() heuristic.
-		 */
-		gso_size = sk->sk_pacing_rate / (2 * MSEC_PER_SEC);
-		gso_size = max_t(u32, gso_size,
-				 sysctl_tcp_min_tso_segs * mss_now);
-
-		xmit_size_goal = min_t(u32, gso_size,
-				       sk->sk_gso_max_size - 1 - hlen);
-
-		xmit_size_goal = tcp_bound_to_half_wnd(tp, xmit_size_goal);
-
-		/* We try hard to avoid divides here */
-		old_size_goal = tp->xmit_size_goal_segs * mss_now;
-
-		if (likely(old_size_goal <= xmit_size_goal &&
-			   old_size_goal + mss_now > xmit_size_goal)) {
-			xmit_size_goal = old_size_goal;
-		} else {
-			tp->xmit_size_goal_segs =
-				min_t(u16, xmit_size_goal / mss_now,
-				      sk->sk_gso_max_segs);
-			xmit_size_goal = tp->xmit_size_goal_segs * mss_now;
-		}
+	u32 new_size_goal, size_goal, hlen;
+
+	if (!large_allowed || !sk_can_gso(sk))
+		return mss_now;
+
+	/* Maybe we should/could use sk->sk_prot->max_header here ? */
+	hlen = inet_csk(sk)->icsk_af_ops->net_header_len +
+	       inet_csk(sk)->icsk_ext_hdr_len +
+	       tp->tcp_header_len;
+
+	new_size_goal = sk->sk_gso_max_size - 1 - hlen;
+	new_size_goal = tcp_bound_to_half_wnd(tp, new_size_goal);
+
+	/* We try hard to avoid divides here */
+	size_goal = tp->gso_segs * mss_now;
+	if (unlikely(new_size_goal < size_goal ||
+		     new_size_goal >= size_goal + mss_now)) {
+		tp->gso_segs = min_t(u16, new_size_goal / mss_now,
+				     sk->sk_gso_max_segs);
+		size_goal = tp->gso_segs * mss_now;
 	}
 
-	return max(xmit_size_goal, mss_now);
+	return max(size_goal, mss_now);
 }
 
 static int tcp_send_mss(struct sock *sk, int *size_goal, int flags)
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index f5bd4bd3f7e6..f37ecf53ee8a 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -1524,6 +1524,27 @@ static bool tcp_nagle_check(bool partial, const struct tcp_sock *tp,
 		((nonagle & TCP_NAGLE_CORK) ||
 		 (!nonagle && tp->packets_out && tcp_minshall_check(tp)));
 }
+
+/* Return how many segs we'd like on a TSO packet,
+ * to send one TSO packet per ms
+ */
+static u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now)
+{
+	u32 bytes, segs;
+
+	bytes = min(sk->sk_pacing_rate >> 10,
+		    sk->sk_gso_max_size - 1 - MAX_TCP_HEADER);
+
+	/* Goal is to send at least one packet per ms,
+	 * not one big TSO packet every 100 ms.
+	 * This preserves ACK clocking and is consistent
+	 * with tcp_tso_should_defer() heuristic.
+	 */
+	segs = max_t(u32, bytes / mss_now, sysctl_tcp_min_tso_segs);
+
+	return min_t(u32, segs, sk->sk_gso_max_segs);
+}
+
 /* Returns the portion of skb which can be sent right away */
 static unsigned int tcp_mss_split_point(const struct sock *sk,
 					const struct sk_buff *skb,
@@ -1731,7 +1752,7 @@ static int tso_fragment(struct sock *sk, struct sk_buff *skb, unsigned int len,
  * This algorithm is from John Heffner.
  */
 static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
-				 bool *is_cwnd_limited)
+				 bool *is_cwnd_limited, u32 max_segs)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	const struct inet_connection_sock *icsk = inet_csk(sk);
@@ -1761,8 +1782,7 @@ static bool tcp_tso_should_defer(struct sock *sk, struct sk_buff *skb,
 	limit = min(send_win, cong_win);
 
 	/* If a full-sized TSO skb can be sent, do it. */
-	if (limit >= min_t(unsigned int, sk->sk_gso_max_size,
-			   tp->xmit_size_goal_segs * tp->mss_cache))
+	if (limit >= max_segs * tp->mss_cache)
 		goto send_now;
 
 	/* Middle in queue won't get any more data, full sendable already? */
@@ -1959,6 +1979,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 	int cwnd_quota;
 	int result;
 	bool is_cwnd_limited = false;
+	u32 max_segs;
 
 	sent_pkts = 0;
 
@@ -1972,6 +1993,7 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		}
 	}
 
+	max_segs = tcp_tso_autosize(sk, mss_now);
 	while ((skb = tcp_send_head(sk))) {
 		unsigned int limit;
 
@@ -2004,10 +2026,23 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 				break;
 		} else {
 			if (!push_one &&
-			    tcp_tso_should_defer(sk, skb, &is_cwnd_limited))
+			    tcp_tso_should_defer(sk, skb, &is_cwnd_limited,
+						 max_segs))
 				break;
 		}
 
+		limit = mss_now;
+		if (tso_segs > 1 && !tcp_urg_mode(tp))
+			limit = tcp_mss_split_point(sk, skb, mss_now,
+						    min_t(unsigned int,
+							  cwnd_quota,
+							  max_segs),
+						    nonagle);
+
+		if (skb->len > limit &&
+		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
+			break;
+
 		/* TCP Small Queues :
 		 * Control number of packets in qdisc/devices to two packets / or ~1 ms.
 		 * This allows for :
@@ -2018,8 +2053,8 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 		 * of queued bytes to ensure line rate.
 		 * One example is wifi aggregation (802.11 AMPDU)
 		 */
-		limit = max_t(unsigned int, sysctl_tcp_limit_output_bytes,
-			      sk->sk_pacing_rate >> 10);
+		limit = max(2 * skb->truesize, sk->sk_pacing_rate >> 10);
+		limit = min_t(u32, limit, sysctl_tcp_limit_output_bytes);
 
 		if (atomic_read(&sk->sk_wmem_alloc) > limit) {
 			set_bit(TSQ_THROTTLED, &tp->tsq_flags);
@@ -2032,18 +2067,6 @@ static bool tcp_write_xmit(struct sock *sk, unsigned int mss_now, int nonagle,
 				break;
 		}
 
-		limit = mss_now;
-		if (tso_segs > 1 && !tcp_urg_mode(tp))
-			limit = tcp_mss_split_point(sk, skb, mss_now,
-						    min_t(unsigned int,
-							  cwnd_quota,
-							  sk->sk_gso_max_segs),
-						    nonagle);
-
-		if (skb->len > limit &&
-		    unlikely(tso_fragment(sk, skb, limit, mss_now, gfp)))
-			break;
-
 		if (unlikely(tcp_transmit_skb(sk, skb, 1, gfp)))
 			break;
 
-- 
cgit v1.2.3


From 273d2c67c3e179adb1e74f403d1e9a06e3f841b5 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Fri, 5 Dec 2014 18:01:11 -0600
Subject: userns: Don't allow setgroups until a gid mapping has been
 setablished

setgroups is unique in not needing a valid mapping before it can be called,
in the case of setgroups(0, NULL) which drops all supplemental groups.

The design of the user namespace assumes that CAP_SETGID can not actually
be used until a gid mapping is established.  Therefore add a helper function
to see if the user namespace gid mapping has been established and call
that function in the setgroups permission check.

This is part of the fix for CVE-2014-8989, being able to drop groups
without privilege using user namespaces.

Cc: stable@vger.kernel.org
Reviewed-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 include/linux/user_namespace.h |  5 +++++
 kernel/groups.c                |  4 +++-
 kernel/user_namespace.c        | 14 ++++++++++++++
 3 files changed, 22 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index e95372654f09..8d493083486a 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -63,6 +63,7 @@ extern const struct seq_operations proc_projid_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
+extern bool userns_may_setgroups(const struct user_namespace *ns);
 #else
 
 static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
@@ -87,6 +88,10 @@ static inline void put_user_ns(struct user_namespace *ns)
 {
 }
 
+static inline bool userns_may_setgroups(const struct user_namespace *ns)
+{
+	return true;
+}
 #endif
 
 #endif /* _LINUX_USER_H */
diff --git a/kernel/groups.c b/kernel/groups.c
index 02d8a251c476..664411f171b5 100644
--- a/kernel/groups.c
+++ b/kernel/groups.c
@@ -6,6 +6,7 @@
 #include <linux/slab.h>
 #include <linux/security.h>
 #include <linux/syscalls.h>
+#include <linux/user_namespace.h>
 #include <asm/uaccess.h>
 
 /* init to 2 - one for init_task, one to ensure it is never freed */
@@ -217,7 +218,8 @@ bool may_setgroups(void)
 {
 	struct user_namespace *user_ns = current_user_ns();
 
-	return ns_capable(user_ns, CAP_SETGID);
+	return ns_capable(user_ns, CAP_SETGID) &&
+		userns_may_setgroups(user_ns);
 }
 
 /*
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index b99c862a2e3f..27c8dab48c07 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -843,6 +843,20 @@ static bool new_idmap_permitted(const struct file *file,
 	return false;
 }
 
+bool userns_may_setgroups(const struct user_namespace *ns)
+{
+	bool allowed;
+
+	mutex_lock(&id_map_mutex);
+	/* It is not safe to use setgroups until a gid mapping in
+	 * the user namespace has been established.
+	 */
+	allowed = ns->gid_map.nr_extents != 0;
+	mutex_unlock(&id_map_mutex);
+
+	return allowed;
+}
+
 static void *userns_get(struct task_struct *task)
 {
 	struct user_namespace *user_ns;
-- 
cgit v1.2.3


From a940cb34fed73b2d4809a4575f2981d5927e2c21 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit.agrawal@arm.com>
Date: Tue, 9 Dec 2014 12:22:01 +0000
Subject: thermal: Fix cdev registration with THERMAL_NO_LIMIT on 64bit

The size of unsigned long varies between 32 and 64 bit systems while
the size of phandle arguments is always 32 bits per parameter.

On 64-bit systems, cooling devices registered via of-thermal apis fail
to bind when the min/max cooling state is specified as
THERMAL_NO_LIMIT (-1UL) as there is a mis-match between the value read
from the device tree (32bit) and the pre-processor define (64bit).

As we're unlikely to need cooling states larger than 32 bits, and for
consistency with the size of phandle arguments, explicitly limit
THERMAL_NO_LIMIT to 32 bits.

Reported-by: Hyungwoo Yang <hwoo.yang@gmail.com>
Acked-by: Zhang Rui <rui.zhang@intel.com>
Signed-off-by: Punit Agrawal <punit.agrawal@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/dt-bindings/thermal/thermal.h | 2 +-
 include/linux/thermal.h               | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/dt-bindings/thermal/thermal.h b/include/dt-bindings/thermal/thermal.h
index 59822a995858..b5e6b0069ac7 100644
--- a/include/dt-bindings/thermal/thermal.h
+++ b/include/dt-bindings/thermal/thermal.h
@@ -11,7 +11,7 @@
 #define _DT_BINDINGS_THERMAL_THERMAL_H
 
 /* On cooling devices upper and lower limits */
-#define THERMAL_NO_LIMIT		(-1UL)
+#define THERMAL_NO_LIMIT		(~0)
 
 #endif
 
diff --git a/include/linux/thermal.h b/include/linux/thermal.h
index ef90838b36a0..005586fdf82d 100644
--- a/include/linux/thermal.h
+++ b/include/linux/thermal.h
@@ -38,7 +38,7 @@
 #define THERMAL_CSTATE_INVALID -1UL
 
 /* No upper/lower limit requirement */
-#define THERMAL_NO_LIMIT	THERMAL_CSTATE_INVALID
+#define THERMAL_NO_LIMIT	((u32)~0)
 
 /* Unit conversion macros */
 #define KELVIN_TO_CELSIUS(t)	(long)(((long)t-2732 >= 0) ?	\
-- 
cgit v1.2.3


From ffde7328a36d16e626bae8468571858d71cd010b Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Tue, 9 Dec 2014 19:40:42 -0800
Subject: net: Split netdev_alloc_frag into __alloc_page_frag and add
 __napi_alloc_frag

This patch splits the netdev_alloc_frag function up so that it can be used
on one of two page frag pools instead of being fixed on the
netdev_alloc_cache.  By doing this we can add a NAPI specific function
__napi_alloc_frag that accesses a pool that is only used from softirq
context.  The advantage to this is that we do not need to call
local_irq_save/restore which can be a significant savings.

I also took the opportunity to refactor the core bits that were placed in
__alloc_page_frag.  First I updated the allocation to do either a 32K
allocation or an order 0 page.  This is based on the changes in commmit
d9b2938aa where it was found that latencies could be reduced in case of
failures.  Then I also rewrote the logic to work from the end of the page to
the start.  By doing this the size value doesn't have to be used unless we
have run out of space for page fragments.  Finally I cleaned up the atomic
bits so that we just do an atomic_sub_and_test and if that returns true then
we set the page->_count via an atomic_set.  This way we can remove the extra
conditional for the atomic_read since it would have led to an atomic_inc in
the case of success anyway.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Acked-by: Alexei Starovoitov <ast@plumgrid.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |   2 +
 net/core/skbuff.c      | 117 ++++++++++++++++++++++++++++++++-----------------
 2 files changed, 79 insertions(+), 40 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index ab0bc43c82a4..736cc99f3f6c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -2164,6 +2164,8 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 	return __netdev_alloc_skb_ip_align(dev, length, GFP_ATOMIC);
 }
 
+void *napi_alloc_frag(unsigned int fragsz);
+
 /**
  * __dev_alloc_pages - allocate page for network Rx
  * @gfp_mask: allocation priority. Set __GFP_NOMEMALLOC if not for network Rx
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7a338fb55cc4..56ed17cd2151 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -336,59 +336,85 @@ struct netdev_alloc_cache {
 	unsigned int		pagecnt_bias;
 };
 static DEFINE_PER_CPU(struct netdev_alloc_cache, netdev_alloc_cache);
+static DEFINE_PER_CPU(struct netdev_alloc_cache, napi_alloc_cache);
 
-static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+static struct page *__page_frag_refill(struct netdev_alloc_cache *nc,
+				       gfp_t gfp_mask)
 {
-	struct netdev_alloc_cache *nc;
-	void *data = NULL;
-	int order;
-	unsigned long flags;
+	const unsigned int order = NETDEV_FRAG_PAGE_MAX_ORDER;
+	struct page *page = NULL;
+	gfp_t gfp = gfp_mask;
+
+	if (order) {
+		gfp_mask |= __GFP_COMP | __GFP_NOWARN | __GFP_NORETRY;
+		page = alloc_pages_node(NUMA_NO_NODE, gfp_mask, order);
+		nc->frag.size = PAGE_SIZE << (page ? order : 0);
+	}
 
-	local_irq_save(flags);
-	nc = this_cpu_ptr(&netdev_alloc_cache);
-	if (unlikely(!nc->frag.page)) {
+	if (unlikely(!page))
+		page = alloc_pages_node(NUMA_NO_NODE, gfp, 0);
+
+	nc->frag.page = page;
+
+	return page;
+}
+
+static void *__alloc_page_frag(struct netdev_alloc_cache __percpu *cache,
+			       unsigned int fragsz, gfp_t gfp_mask)
+{
+	struct netdev_alloc_cache *nc = this_cpu_ptr(cache);
+	struct page *page = nc->frag.page;
+	unsigned int size;
+	int offset;
+
+	if (unlikely(!page)) {
 refill:
-		for (order = NETDEV_FRAG_PAGE_MAX_ORDER; ;) {
-			gfp_t gfp = gfp_mask;
+		page = __page_frag_refill(nc, gfp_mask);
+		if (!page)
+			return NULL;
+
+		/* if size can vary use frag.size else just use PAGE_SIZE */
+		size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
 
-			if (order)
-				gfp |= __GFP_COMP | __GFP_NOWARN;
-			nc->frag.page = alloc_pages(gfp, order);
-			if (likely(nc->frag.page))
-				break;
-			if (--order < 0)
-				goto end;
-		}
-		nc->frag.size = PAGE_SIZE << order;
 		/* Even if we own the page, we do not use atomic_set().
 		 * This would break get_page_unless_zero() users.
 		 */
-		atomic_add(NETDEV_PAGECNT_MAX_BIAS - 1,
-			   &nc->frag.page->_count);
-		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
-		nc->frag.offset = 0;
+		atomic_add(size - 1, &page->_count);
+
+		/* reset page count bias and offset to start of new frag */
+		nc->pagecnt_bias = size;
+		nc->frag.offset = size;
 	}
 
-	if (nc->frag.offset + fragsz > nc->frag.size) {
-		if (atomic_read(&nc->frag.page->_count) != nc->pagecnt_bias) {
-			if (!atomic_sub_and_test(nc->pagecnt_bias,
-						 &nc->frag.page->_count))
-				goto refill;
-			/* OK, page count is 0, we can safely set it */
-			atomic_set(&nc->frag.page->_count,
-				   NETDEV_PAGECNT_MAX_BIAS);
-		} else {
-			atomic_add(NETDEV_PAGECNT_MAX_BIAS - nc->pagecnt_bias,
-				   &nc->frag.page->_count);
-		}
-		nc->pagecnt_bias = NETDEV_PAGECNT_MAX_BIAS;
-		nc->frag.offset = 0;
+	offset = nc->frag.offset - fragsz;
+	if (unlikely(offset < 0)) {
+		if (!atomic_sub_and_test(nc->pagecnt_bias, &page->_count))
+			goto refill;
+
+		/* if size can vary use frag.size else just use PAGE_SIZE */
+		size = NETDEV_FRAG_PAGE_MAX_ORDER ? nc->frag.size : PAGE_SIZE;
+
+		/* OK, page count is 0, we can safely set it */
+		atomic_set(&page->_count, size);
+
+		/* reset page count bias and offset to start of new frag */
+		nc->pagecnt_bias = size;
+		offset = size - fragsz;
 	}
 
-	data = page_address(nc->frag.page) + nc->frag.offset;
-	nc->frag.offset += fragsz;
 	nc->pagecnt_bias--;
-end:
+	nc->frag.offset = offset;
+
+	return page_address(page) + offset;
+}
+
+static void *__netdev_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+{
+	unsigned long flags;
+	void *data;
+
+	local_irq_save(flags);
+	data = __alloc_page_frag(&netdev_alloc_cache, fragsz, gfp_mask);
 	local_irq_restore(flags);
 	return data;
 }
@@ -406,6 +432,17 @@ void *netdev_alloc_frag(unsigned int fragsz)
 }
 EXPORT_SYMBOL(netdev_alloc_frag);
 
+static void *__napi_alloc_frag(unsigned int fragsz, gfp_t gfp_mask)
+{
+	return __alloc_page_frag(&napi_alloc_cache, fragsz, gfp_mask);
+}
+
+void *napi_alloc_frag(unsigned int fragsz)
+{
+	return __napi_alloc_frag(fragsz, GFP_ATOMIC | __GFP_COLD);
+}
+EXPORT_SYMBOL(napi_alloc_frag);
+
 /**
  *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
  *	@dev: network device to receive on
-- 
cgit v1.2.3


From fd11a83dd3630ec6a60f8a702446532c5c7e1991 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@redhat.com>
Date: Tue, 9 Dec 2014 19:40:49 -0800
Subject: net: Pull out core bits of __netdev_alloc_skb and add
 __napi_alloc_skb

This change pulls the core functionality out of __netdev_alloc_skb and
places them in a new function named __alloc_rx_skb.  The reason for doing
this is to make these bits accessible to a new function __napi_alloc_skb.
In addition __alloc_rx_skb now has a new flags value that is used to
determine which page frag pool to allocate from.  If the SKB_ALLOC_NAPI
flag is set then the NAPI pool is used.  The advantage of this is that we
do not have to use local_irq_save/restore when accessing the NAPI pool from
NAPI context.

In my test setup I saw at least 11ns of savings using the napi_alloc_skb
function versus the netdev_alloc_skb function, most of this being due to
the fact that we didn't have to call local_irq_save/restore.

The main use case for napi_alloc_skb would be for things such as copybreak
or page fragment based receive paths where an skb is allocated after the
data has been received instead of before.

Signed-off-by: Alexander Duyck <alexander.h.duyck@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h |  9 ++++++
 net/core/dev.c         |  2 +-
 net/core/skbuff.c      | 74 +++++++++++++++++++++++++++++++++++++++++++++-----
 3 files changed, 77 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 736cc99f3f6c..85ab7d72b54c 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -151,6 +151,7 @@ struct net_device;
 struct scatterlist;
 struct pipe_inode_info;
 struct iov_iter;
+struct napi_struct;
 
 #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE)
 struct nf_conntrack {
@@ -673,6 +674,7 @@ struct sk_buff {
 
 #define SKB_ALLOC_FCLONE	0x01
 #define SKB_ALLOC_RX		0x02
+#define SKB_ALLOC_NAPI		0x04
 
 /* Returns true if the skb was allocated from PFMEMALLOC reserves */
 static inline bool skb_pfmemalloc(const struct sk_buff *skb)
@@ -2165,6 +2167,13 @@ static inline struct sk_buff *netdev_alloc_skb_ip_align(struct net_device *dev,
 }
 
 void *napi_alloc_frag(unsigned int fragsz);
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
+				 unsigned int length, gfp_t gfp_mask);
+static inline struct sk_buff *napi_alloc_skb(struct napi_struct *napi,
+					     unsigned int length)
+{
+	return __napi_alloc_skb(napi, length, GFP_ATOMIC);
+}
 
 /**
  * __dev_alloc_pages - allocate page for network Rx
diff --git a/net/core/dev.c b/net/core/dev.c
index 3f191da383f6..80f798da3d9f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4172,7 +4172,7 @@ struct sk_buff *napi_get_frags(struct napi_struct *napi)
 	struct sk_buff *skb = napi->skb;
 
 	if (!skb) {
-		skb = netdev_alloc_skb_ip_align(napi->dev, GRO_MAX_HEAD);
+		skb = napi_alloc_skb(napi, GRO_MAX_HEAD);
 		napi->skb = skb;
 	}
 	return skb;
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 56ed17cd2151..ae13ef6b3ea7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -444,10 +444,13 @@ void *napi_alloc_frag(unsigned int fragsz)
 EXPORT_SYMBOL(napi_alloc_frag);
 
 /**
- *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
- *	@dev: network device to receive on
+ *	__alloc_rx_skb - allocate an skbuff for rx
  *	@length: length to allocate
  *	@gfp_mask: get_free_pages mask, passed to alloc_skb
+ *	@flags:	If SKB_ALLOC_RX is set, __GFP_MEMALLOC will be used for
+ *		allocations in case we have to fallback to __alloc_skb()
+ *		If SKB_ALLOC_NAPI is set, page fragment will be allocated
+ *		from napi_cache instead of netdev_cache.
  *
  *	Allocate a new &sk_buff and assign it a usage count of one. The
  *	buffer has unspecified headroom built in. Users should allocate
@@ -456,11 +459,11 @@ EXPORT_SYMBOL(napi_alloc_frag);
  *
  *	%NULL is returned if there is no free memory.
  */
-struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
-				   unsigned int length, gfp_t gfp_mask)
+static struct sk_buff *__alloc_rx_skb(unsigned int length, gfp_t gfp_mask,
+				      int flags)
 {
 	struct sk_buff *skb = NULL;
-	unsigned int fragsz = SKB_DATA_ALIGN(length + NET_SKB_PAD) +
+	unsigned int fragsz = SKB_DATA_ALIGN(length) +
 			      SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
 
 	if (fragsz <= PAGE_SIZE && !(gfp_mask & (__GFP_WAIT | GFP_DMA))) {
@@ -469,7 +472,9 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 		if (sk_memalloc_socks())
 			gfp_mask |= __GFP_MEMALLOC;
 
-		data = __netdev_alloc_frag(fragsz, gfp_mask);
+		data = (flags & SKB_ALLOC_NAPI) ?
+			__napi_alloc_frag(fragsz, gfp_mask) :
+			__netdev_alloc_frag(fragsz, gfp_mask);
 
 		if (likely(data)) {
 			skb = build_skb(data, fragsz);
@@ -477,17 +482,72 @@ struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
 				put_page(virt_to_head_page(data));
 		}
 	} else {
-		skb = __alloc_skb(length + NET_SKB_PAD, gfp_mask,
+		skb = __alloc_skb(length, gfp_mask,
 				  SKB_ALLOC_RX, NUMA_NO_NODE);
 	}
+	return skb;
+}
+
+/**
+ *	__netdev_alloc_skb - allocate an skbuff for rx on a specific device
+ *	@dev: network device to receive on
+ *	@length: length to allocate
+ *	@gfp_mask: get_free_pages mask, passed to alloc_skb
+ *
+ *	Allocate a new &sk_buff and assign it a usage count of one. The
+ *	buffer has NET_SKB_PAD headroom built in. Users should allocate
+ *	the headroom they think they need without accounting for the
+ *	built in space. The built in space is used for optimisations.
+ *
+ *	%NULL is returned if there is no free memory.
+ */
+struct sk_buff *__netdev_alloc_skb(struct net_device *dev,
+				   unsigned int length, gfp_t gfp_mask)
+{
+	struct sk_buff *skb;
+
+	length += NET_SKB_PAD;
+	skb = __alloc_rx_skb(length, gfp_mask, 0);
+
 	if (likely(skb)) {
 		skb_reserve(skb, NET_SKB_PAD);
 		skb->dev = dev;
 	}
+
 	return skb;
 }
 EXPORT_SYMBOL(__netdev_alloc_skb);
 
+/**
+ *	__napi_alloc_skb - allocate skbuff for rx in a specific NAPI instance
+ *	@napi: napi instance this buffer was allocated for
+ *	@length: length to allocate
+ *	@gfp_mask: get_free_pages mask, passed to alloc_skb and alloc_pages
+ *
+ *	Allocate a new sk_buff for use in NAPI receive.  This buffer will
+ *	attempt to allocate the head from a special reserved region used
+ *	only for NAPI Rx allocation.  By doing this we can save several
+ *	CPU cycles by avoiding having to disable and re-enable IRQs.
+ *
+ *	%NULL is returned if there is no free memory.
+ */
+struct sk_buff *__napi_alloc_skb(struct napi_struct *napi,
+				 unsigned int length, gfp_t gfp_mask)
+{
+	struct sk_buff *skb;
+
+	length += NET_SKB_PAD + NET_IP_ALIGN;
+	skb = __alloc_rx_skb(length, gfp_mask, SKB_ALLOC_NAPI);
+
+	if (likely(skb)) {
+		skb_reserve(skb, NET_SKB_PAD + NET_IP_ALIGN);
+		skb->dev = napi->dev;
+	}
+
+	return skb;
+}
+EXPORT_SYMBOL(__napi_alloc_skb);
+
 void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off,
 		     int size, unsigned int truesize)
 {
-- 
cgit v1.2.3


From 0cb6c969ed9de43687abdfc63714b6fe4385d2fc Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <dborkman@redhat.com>
Date: Wed, 10 Dec 2014 16:33:12 +0100
Subject: net, lib: kill arch_fast_hash library bits

As there are now no remaining users of arch_fast_hash(), lets kill
it entirely.

This basically reverts commit 71ae8aac3e19 ("lib: introduce arch
optimized hash library") and follow-up work, that is f.e., commit
237217546d44 ("lib: hash: follow-up fixups for arch hash"),
commit e3fec2f74f7f ("lib: Add missing arch generic-y entries for
asm-generic/hash.h") and last but not least commit 6a02652df511
("perf tools: Fix include for non x86 architectures").

Cc: Francesco Fusco <fusco@ntop.org>
Cc: Thomas Graf <tgraf@suug.ch>
Cc: Arnaldo Carvalho de Melo <acme@redhat.com>
Signed-off-by: Daniel Borkmann <dborkman@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/alpha/include/asm/Kbuild      |  1 -
 arch/arc/include/asm/Kbuild        |  1 -
 arch/arm/include/asm/Kbuild        |  1 -
 arch/arm64/include/asm/Kbuild      |  1 -
 arch/avr32/include/asm/Kbuild      |  1 -
 arch/blackfin/include/asm/Kbuild   |  1 -
 arch/c6x/include/asm/Kbuild        |  1 -
 arch/cris/include/asm/Kbuild       |  1 -
 arch/frv/include/asm/Kbuild        |  1 -
 arch/hexagon/include/asm/Kbuild    |  1 -
 arch/ia64/include/asm/Kbuild       |  1 -
 arch/m32r/include/asm/Kbuild       |  1 -
 arch/m68k/include/asm/Kbuild       |  1 -
 arch/metag/include/asm/Kbuild      |  1 -
 arch/microblaze/include/asm/Kbuild |  1 -
 arch/mips/include/asm/Kbuild       |  1 -
 arch/mn10300/include/asm/Kbuild    |  1 -
 arch/openrisc/include/asm/Kbuild   |  1 -
 arch/parisc/include/asm/Kbuild     |  1 -
 arch/powerpc/include/asm/Kbuild    |  1 -
 arch/s390/include/asm/Kbuild       |  1 -
 arch/score/include/asm/Kbuild      |  1 -
 arch/sh/include/asm/Kbuild         |  1 -
 arch/sparc/include/asm/Kbuild      |  1 -
 arch/tile/include/asm/Kbuild       |  1 -
 arch/um/include/asm/Kbuild         |  1 -
 arch/unicore32/include/asm/Kbuild  |  1 -
 arch/x86/include/asm/hash.h        |  7 ---
 arch/x86/lib/Makefile              |  2 +-
 arch/x86/lib/hash.c                | 92 --------------------------------------
 arch/xtensa/include/asm/Kbuild     |  1 -
 include/asm-generic/hash.h         |  9 ----
 include/linux/hash.h               | 35 ---------------
 lib/Makefile                       |  2 +-
 lib/hash.c                         | 39 ----------------
 tools/perf/util/include/asm/hash.h |  6 ---
 36 files changed, 2 insertions(+), 218 deletions(-)
 delete mode 100644 arch/x86/include/asm/hash.h
 delete mode 100644 arch/x86/lib/hash.c
 delete mode 100644 include/asm-generic/hash.h
 delete mode 100644 lib/hash.c
 delete mode 100644 tools/perf/util/include/asm/hash.h

(limited to 'include/linux')

diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild
index 25b49725df07..76aeb8fa551a 100644
--- a/arch/alpha/include/asm/Kbuild
+++ b/arch/alpha/include/asm/Kbuild
@@ -3,7 +3,6 @@
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild
index b8fffc1a2ac2..be0c39e76f7c 100644
--- a/arch/arc/include/asm/Kbuild
+++ b/arch/arc/include/asm/Kbuild
@@ -12,7 +12,6 @@ generic-y += fb.h
 generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += hardirq.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild
index 70cd84eb7fda..fe74c0d1e485 100644
--- a/arch/arm/include/asm/Kbuild
+++ b/arch/arm/include/asm/Kbuild
@@ -7,7 +7,6 @@ generic-y += current.h
 generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild
index dc770bd4f5a5..6b61091c7f4c 100644
--- a/arch/arm64/include/asm/Kbuild
+++ b/arch/arm64/include/asm/Kbuild
@@ -14,7 +14,6 @@ generic-y += early_ioremap.h
 generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += ftrace.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild
index 2a71b1cb9848..528d70d47a54 100644
--- a/arch/avr32/include/asm/Kbuild
+++ b/arch/avr32/include/asm/Kbuild
@@ -7,7 +7,6 @@ generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += exec.h
 generic-y += futex.h
-generic-y += hash.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += local.h
diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild
index 46ed6bb9c679..4bd3c3cfc9ab 100644
--- a/arch/blackfin/include/asm/Kbuild
+++ b/arch/blackfin/include/asm/Kbuild
@@ -10,7 +10,6 @@ generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += fb.h
 generic-y += futex.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild
index e77e0c1dbe75..2de73391b81e 100644
--- a/arch/c6x/include/asm/Kbuild
+++ b/arch/c6x/include/asm/Kbuild
@@ -15,7 +15,6 @@ generic-y += exec.h
 generic-y += fb.h
 generic-y += fcntl.h
 generic-y += futex.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += io.h
 generic-y += ioctl.h
diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild
index 2ca489eaadd3..d5f124832fd1 100644
--- a/arch/cris/include/asm/Kbuild
+++ b/arch/cris/include/asm/Kbuild
@@ -7,7 +7,6 @@ generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += linkage.h
diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild
index 3caf05cabfc5..e3f81b53578e 100644
--- a/arch/frv/include/asm/Kbuild
+++ b/arch/frv/include/asm/Kbuild
@@ -2,7 +2,6 @@
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild
index 5f234a5a2320..c7a99f860b40 100644
--- a/arch/hexagon/include/asm/Kbuild
+++ b/arch/hexagon/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += fb.h
 generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += hardirq.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
diff --git a/arch/ia64/include/asm/Kbuild b/arch/ia64/include/asm/Kbuild
index 747320be9d0e..9b41b4bcc073 100644
--- a/arch/ia64/include/asm/Kbuild
+++ b/arch/ia64/include/asm/Kbuild
@@ -1,7 +1,6 @@
 
 generic-y += clkdev.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += kvm_para.h
 generic-y += mcs_spinlock.h
diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild
index 3796801d6e0c..2edc793372fc 100644
--- a/arch/m32r/include/asm/Kbuild
+++ b/arch/m32r/include/asm/Kbuild
@@ -2,7 +2,6 @@
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += module.h
diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild
index dbaf9f3065e8..9b6c691874bd 100644
--- a/arch/m68k/include/asm/Kbuild
+++ b/arch/m68k/include/asm/Kbuild
@@ -6,7 +6,6 @@ generic-y += device.h
 generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild
index 7b8111c8f937..0bf5d525b945 100644
--- a/arch/metag/include/asm/Kbuild
+++ b/arch/metag/include/asm/Kbuild
@@ -13,7 +13,6 @@ generic-y += fb.h
 generic-y += fcntl.h
 generic-y += futex.h
 generic-y += hardirq.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild
index 448143b8cabd..ab564a6db5c3 100644
--- a/arch/microblaze/include/asm/Kbuild
+++ b/arch/microblaze/include/asm/Kbuild
@@ -4,7 +4,6 @@ generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += device.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild
index 72e1cf1cab00..200efeac4181 100644
--- a/arch/mips/include/asm/Kbuild
+++ b/arch/mips/include/asm/Kbuild
@@ -3,7 +3,6 @@ generic-y += cputime.h
 generic-y += current.h
 generic-y += dma-contiguous.h
 generic-y += emergency-restart.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += local64.h
 generic-y += mcs_spinlock.h
diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild
index 54a062cb9f2c..f892d9de47d9 100644
--- a/arch/mn10300/include/asm/Kbuild
+++ b/arch/mn10300/include/asm/Kbuild
@@ -3,7 +3,6 @@ generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild
index 89b61d7dc790..91f1f360a7c4 100644
--- a/arch/openrisc/include/asm/Kbuild
+++ b/arch/openrisc/include/asm/Kbuild
@@ -25,7 +25,6 @@ generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild
index ffb024b8423f..8686237a3c3c 100644
--- a/arch/parisc/include/asm/Kbuild
+++ b/arch/parisc/include/asm/Kbuild
@@ -7,7 +7,6 @@ generic-y += device.h
 generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
diff --git a/arch/powerpc/include/asm/Kbuild b/arch/powerpc/include/asm/Kbuild
index 31e8f59aff38..382b28e364dc 100644
--- a/arch/powerpc/include/asm/Kbuild
+++ b/arch/powerpc/include/asm/Kbuild
@@ -1,6 +1,5 @@
 
 generic-y += clkdev.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/s390/include/asm/Kbuild b/arch/s390/include/asm/Kbuild
index 773f86676588..c631f98fd524 100644
--- a/arch/s390/include/asm/Kbuild
+++ b/arch/s390/include/asm/Kbuild
@@ -1,7 +1,6 @@
 
 
 generic-y += clkdev.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild
index 46461c19f284..83ed116d414c 100644
--- a/arch/score/include/asm/Kbuild
+++ b/arch/score/include/asm/Kbuild
@@ -5,7 +5,6 @@ header-y +=
 generic-y += barrier.h
 generic-y += clkdev.h
 generic-y += cputime.h
-generic-y += hash.h
 generic-y += irq_work.h
 generic-y += mcs_spinlock.h
 generic-y += preempt.h
diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild
index 5a6c9acff0d2..654ebb6bd5d8 100644
--- a/arch/sh/include/asm/Kbuild
+++ b/arch/sh/include/asm/Kbuild
@@ -8,7 +8,6 @@ generic-y += emergency-restart.h
 generic-y += errno.h
 generic-y += exec.h
 generic-y += fcntl.h
-generic-y += hash.h
 generic-y += ioctl.h
 generic-y += ipcbuf.h
 generic-y += irq_regs.h
diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild
index f5f94ce1692c..94f36e7086a7 100644
--- a/arch/sparc/include/asm/Kbuild
+++ b/arch/sparc/include/asm/Kbuild
@@ -6,7 +6,6 @@ generic-y += cputime.h
 generic-y += div64.h
 generic-y += emergency-restart.h
 generic-y += exec.h
-generic-y += hash.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
 generic-y += linkage.h
diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild
index e6462b8a6284..b4c488b65745 100644
--- a/arch/tile/include/asm/Kbuild
+++ b/arch/tile/include/asm/Kbuild
@@ -11,7 +11,6 @@ generic-y += errno.h
 generic-y += exec.h
 generic-y += fb.h
 generic-y += fcntl.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild
index 244b12c8cb39..9176fa11d49b 100644
--- a/arch/um/include/asm/Kbuild
+++ b/arch/um/include/asm/Kbuild
@@ -10,7 +10,6 @@ generic-y += exec.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += io.h
 generic-y += irq_regs.h
diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild
index 5a2bb53faa42..3e0c19d0f4c5 100644
--- a/arch/unicore32/include/asm/Kbuild
+++ b/arch/unicore32/include/asm/Kbuild
@@ -16,7 +16,6 @@ generic-y += fcntl.h
 generic-y += ftrace.h
 generic-y += futex.h
 generic-y += hardirq.h
-generic-y += hash.h
 generic-y += hw_irq.h
 generic-y += ioctl.h
 generic-y += ioctls.h
diff --git a/arch/x86/include/asm/hash.h b/arch/x86/include/asm/hash.h
deleted file mode 100644
index e8c58f88b1d4..000000000000
--- a/arch/x86/include/asm/hash.h
+++ /dev/null
@@ -1,7 +0,0 @@
-#ifndef _ASM_X86_HASH_H
-#define _ASM_X86_HASH_H
-
-struct fast_hash_ops;
-extern void setup_arch_fast_hash(struct fast_hash_ops *ops);
-
-#endif /* _ASM_X86_HASH_H */
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index db92793b7e23..1530afb07c85 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -23,7 +23,7 @@ lib-y += memcpy_$(BITS).o
 lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o
 
-obj-y += msr.o msr-reg.o msr-reg-export.o hash.o
+obj-y += msr.o msr-reg.o msr-reg-export.o
 
 ifeq ($(CONFIG_X86_32),y)
         obj-y += atomic64_32.o
diff --git a/arch/x86/lib/hash.c b/arch/x86/lib/hash.c
deleted file mode 100644
index ff4fa51a5b1f..000000000000
--- a/arch/x86/lib/hash.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Some portions derived from code covered by the following notice:
- *
- * Copyright (c) 2010-2013 Intel Corporation. All rights reserved.
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- *   * Redistributions of source code must retain the above copyright
- *     notice, this list of conditions and the following disclaimer.
- *   * Redistributions in binary form must reproduce the above copyright
- *     notice, this list of conditions and the following disclaimer in
- *     the documentation and/or other materials provided with the
- *     distribution.
- *   * Neither the name of Intel Corporation nor the names of its
- *     contributors may be used to endorse or promote products derived
- *     from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
- * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
- * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
- * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
- * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
- * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
- * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
- * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/hash.h>
-#include <linux/init.h>
-
-#include <asm/processor.h>
-#include <asm/cpufeature.h>
-#include <asm/hash.h>
-
-static inline u32 crc32_u32(u32 crc, u32 val)
-{
-#ifdef CONFIG_AS_CRC32
-	asm ("crc32l %1,%0\n" : "+r" (crc) : "rm" (val));
-#else
-	asm (".byte 0xf2, 0x0f, 0x38, 0xf1, 0xc1" : "+a" (crc) : "c" (val));
-#endif
-	return crc;
-}
-
-static u32 intel_crc4_2_hash(const void *data, u32 len, u32 seed)
-{
-	const u32 *p32 = (const u32 *) data;
-	u32 i, tmp = 0;
-
-	for (i = 0; i < len / 4; i++)
-		seed = crc32_u32(seed, *p32++);
-
-	switch (len & 3) {
-	case 3:
-		tmp |= *((const u8 *) p32 + 2) << 16;
-		/* fallthrough */
-	case 2:
-		tmp |= *((const u8 *) p32 + 1) << 8;
-		/* fallthrough */
-	case 1:
-		tmp |= *((const u8 *) p32);
-		seed = crc32_u32(seed, tmp);
-		break;
-	}
-
-	return seed;
-}
-
-static u32 intel_crc4_2_hash2(const u32 *data, u32 len, u32 seed)
-{
-	const u32 *p32 = (const u32 *) data;
-	u32 i;
-
-	for (i = 0; i < len; i++)
-		seed = crc32_u32(seed, *p32++);
-
-	return seed;
-}
-
-void __init setup_arch_fast_hash(struct fast_hash_ops *ops)
-{
-	if (cpu_has_xmm4_2) {
-		ops->hash  = intel_crc4_2_hash;
-		ops->hash2 = intel_crc4_2_hash2;
-	}
-}
diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild
index 105d38922c44..86a9ab2e2ca9 100644
--- a/arch/xtensa/include/asm/Kbuild
+++ b/arch/xtensa/include/asm/Kbuild
@@ -9,7 +9,6 @@ generic-y += errno.h
 generic-y += exec.h
 generic-y += fcntl.h
 generic-y += hardirq.h
-generic-y += hash.h
 generic-y += ioctl.h
 generic-y += irq_regs.h
 generic-y += irq_work.h
diff --git a/include/asm-generic/hash.h b/include/asm-generic/hash.h
deleted file mode 100644
index b6312843dbd9..000000000000
--- a/include/asm-generic/hash.h
+++ /dev/null
@@ -1,9 +0,0 @@
-#ifndef __ASM_GENERIC_HASH_H
-#define __ASM_GENERIC_HASH_H
-
-struct fast_hash_ops;
-static inline void setup_arch_fast_hash(struct fast_hash_ops *ops)
-{
-}
-
-#endif /* __ASM_GENERIC_HASH_H */
diff --git a/include/linux/hash.h b/include/linux/hash.h
index d0494c399392..1afde47e1528 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -15,7 +15,6 @@
  */
 
 #include <asm/types.h>
-#include <asm/hash.h>
 #include <linux/compiler.h>
 
 /* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */
@@ -84,38 +83,4 @@ static inline u32 hash32_ptr(const void *ptr)
 	return (u32)val;
 }
 
-struct fast_hash_ops {
-	u32 (*hash)(const void *data, u32 len, u32 seed);
-	u32 (*hash2)(const u32 *data, u32 len, u32 seed);
-};
-
-/**
- *	arch_fast_hash - Caclulates a hash over a given buffer that can have
- *			 arbitrary size. This function will eventually use an
- *			 architecture-optimized hashing implementation if
- *			 available, and trades off distribution for speed.
- *
- *	@data: buffer to hash
- *	@len: length of buffer in bytes
- *	@seed: start seed
- *
- *	Returns 32bit hash.
- */
-extern u32 arch_fast_hash(const void *data, u32 len, u32 seed);
-
-/**
- *	arch_fast_hash2 - Caclulates a hash over a given buffer that has a
- *			  size that is of a multiple of 32bit words. This
- *			  function will eventually use an architecture-
- *			  optimized hashing implementation if available,
- *			  and trades off distribution for speed.
- *
- *	@data: buffer to hash (must be 32bit padded)
- *	@len: number of 32bit words
- *	@seed: start seed
- *
- *	Returns 32bit hash.
- */
-extern u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed);
-
 #endif /* _LINUX_HASH_H */
diff --git a/lib/Makefile b/lib/Makefile
index 0211d2bd5e17..4b9baa45a4d9 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -26,7 +26,7 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \
 	 bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \
 	 gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \
 	 bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \
-	 percpu-refcount.o percpu_ida.o hash.o rhashtable.o reciprocal_div.o
+	 percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o
 obj-y += string_helpers.o
 obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += kstrtox.o
diff --git a/lib/hash.c b/lib/hash.c
deleted file mode 100644
index fea973f4bd57..000000000000
--- a/lib/hash.c
+++ /dev/null
@@ -1,39 +0,0 @@
-/* General purpose hashing library
- *
- * That's a start of a kernel hashing library, which can be extended
- * with further algorithms in future. arch_fast_hash{2,}() will
- * eventually resolve to an architecture optimized implementation.
- *
- * Copyright 2013 Francesco Fusco <ffusco@redhat.com>
- * Copyright 2013 Daniel Borkmann <dborkman@redhat.com>
- * Copyright 2013 Thomas Graf <tgraf@redhat.com>
- * Licensed under the GNU General Public License, version 2.0 (GPLv2)
- */
-
-#include <linux/jhash.h>
-#include <linux/hash.h>
-#include <linux/cache.h>
-
-static struct fast_hash_ops arch_hash_ops __read_mostly = {
-	.hash  = jhash,
-	.hash2 = jhash2,
-};
-
-u32 arch_fast_hash(const void *data, u32 len, u32 seed)
-{
-	return arch_hash_ops.hash(data, len, seed);
-}
-EXPORT_SYMBOL_GPL(arch_fast_hash);
-
-u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed)
-{
-	return arch_hash_ops.hash2(data, len, seed);
-}
-EXPORT_SYMBOL_GPL(arch_fast_hash2);
-
-static int __init hashlib_init(void)
-{
-	setup_arch_fast_hash(&arch_hash_ops);
-	return 0;
-}
-early_initcall(hashlib_init);
diff --git a/tools/perf/util/include/asm/hash.h b/tools/perf/util/include/asm/hash.h
deleted file mode 100644
index d82b170bb216..000000000000
--- a/tools/perf/util/include/asm/hash.h
+++ /dev/null
@@ -1,6 +0,0 @@
-#ifndef __ASM_GENERIC_HASH_H
-#define __ASM_GENERIC_HASH_H
-
-/* Stub */
-
-#endif /* __ASM_GENERIC_HASH_H */
-- 
cgit v1.2.3


From 3e32cb2e0a12b6915056ff04601cf1bb9b44f967 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:42:31 -0800
Subject: mm: memcontrol: lockless page counters

Memory is internally accounted in bytes, using spinlock-protected 64-bit
counters, even though the smallest accounting delta is a page.  The
counter interface is also convoluted and does too many things.

Introduce a new lockless word-sized page counter API, then change all
memory accounting over to it.  The translation from and to bytes then only
happens when interfacing with userspace.

The removed locking overhead is noticable when scaling beyond the per-cpu
charge caches - on a 4-socket machine with 144-threads, the following test
shows the performance differences of 288 memcgs concurrently running a
page fault benchmark:

vanilla:

   18631648.500498      task-clock (msec)         #  140.643 CPUs utilized            ( +-  0.33% )
         1,380,638      context-switches          #    0.074 K/sec                    ( +-  0.75% )
            24,390      cpu-migrations            #    0.001 K/sec                    ( +-  8.44% )
     1,843,305,768      page-faults               #    0.099 M/sec                    ( +-  0.00% )
50,134,994,088,218      cycles                    #    2.691 GHz                      ( +-  0.33% )
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
 8,049,712,224,651      instructions              #    0.16  insns per cycle          ( +-  0.04% )
 1,586,970,584,979      branches                  #   85.176 M/sec                    ( +-  0.05% )
     1,724,989,949      branch-misses             #    0.11% of all branches          ( +-  0.48% )

     132.474343877 seconds time elapsed                                          ( +-  0.21% )

lockless:

   12195979.037525      task-clock (msec)         #  133.480 CPUs utilized            ( +-  0.18% )
           832,850      context-switches          #    0.068 K/sec                    ( +-  0.54% )
            15,624      cpu-migrations            #    0.001 K/sec                    ( +- 10.17% )
     1,843,304,774      page-faults               #    0.151 M/sec                    ( +-  0.00% )
32,811,216,801,141      cycles                    #    2.690 GHz                      ( +-  0.18% )
   <not supported>      stalled-cycles-frontend
   <not supported>      stalled-cycles-backend
 9,999,265,091,727      instructions              #    0.30  insns per cycle          ( +-  0.10% )
 2,076,759,325,203      branches                  #  170.282 M/sec                    ( +-  0.12% )
     1,656,917,214      branch-misses             #    0.08% of all branches          ( +-  0.55% )

      91.369330729 seconds time elapsed                                          ( +-  0.45% )

On top of improved scalability, this also gets rid of the icky long long
types in the very heart of memcg, which is great for 32 bit and also makes
the code a lot more readable.

Notable differences between the old and new API:

- res_counter_charge() and res_counter_charge_nofail() become
  page_counter_try_charge() and page_counter_charge() resp. to match
  the more common kernel naming scheme of try_do()/do()

- res_counter_uncharge_until() is only ever used to cancel a local
  counter and never to uncharge bigger segments of a hierarchy, so
  it's replaced by the simpler page_counter_cancel()

- res_counter_set_limit() is replaced by page_counter_limit(), which
  expects its callers to serialize against themselves

- res_counter_memparse_write_strategy() is replaced by
  page_counter_limit(), which rounds down to the nearest page size -
  rather than up.  This is more reasonable for explicitely requested
  hard upper limits.

- to keep charging light-weight, page_counter_try_charge() charges
  speculatively, only to roll back if the result exceeds the limit.
  Because of this, a failing bigger charge can temporarily lock out
  smaller charges that would otherwise succeed.  The error is bounded
  to the difference between the smallest and the biggest possible
  charge size, so for memcg, this means that a failing THP charge can
  send base page charges into reclaim upto 2MB (4MB) before the limit
  would have been reached.  This should be acceptable.

[akpm@linux-foundation.org: add includes for WARN_ON_ONCE and memparse]
[akpm@linux-foundation.org: add includes for WARN_ON_ONCE, memparse, strncmp, and PAGE_SIZE]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt |   4 +-
 include/linux/memcontrol.h       |   5 +-
 include/linux/page_counter.h     |  51 ++++
 include/net/sock.h               |  26 +-
 init/Kconfig                     |   5 +-
 mm/Makefile                      |   1 +
 mm/memcontrol.c                  | 633 ++++++++++++++++++---------------------
 mm/page_counter.c                | 207 +++++++++++++
 net/ipv4/tcp_memcontrol.c        |  87 +++---
 9 files changed, 615 insertions(+), 404 deletions(-)
 create mode 100644 include/linux/page_counter.h
 create mode 100644 mm/page_counter.c

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 02ab997a1ed2..f624727ab404 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -52,9 +52,9 @@ Brief summary of control files.
  tasks				 # attach a task(thread) and show list of threads
  cgroup.procs			 # show list of processes
  cgroup.event_control		 # an interface for event_fd()
- memory.usage_in_bytes		 # show current res_counter usage for memory
+ memory.usage_in_bytes		 # show current usage for memory
 				 (See 5.5 for details)
- memory.memsw.usage_in_bytes	 # show current res_counter usage for memory+Swap
+ memory.memsw.usage_in_bytes	 # show current usage for memory+Swap
 				 (See 5.5 for details)
  memory.limit_in_bytes		 # set/show limit of memory usage
  memory.memsw.limit_in_bytes	 # set/show limit of memory+Swap usage
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6b75640ef5ab..ea007615e8f9 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -447,9 +447,8 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
 	/*
 	 * __GFP_NOFAIL allocations will move on even if charging is not
 	 * possible. Therefore we don't even try, and have this allocation
-	 * unaccounted. We could in theory charge it with
-	 * res_counter_charge_nofail, but we hope those allocations are rare,
-	 * and won't be worth the trouble.
+	 * unaccounted. We could in theory charge it forcibly, but we hope
+	 * those allocations are rare, and won't be worth the trouble.
 	 */
 	if (gfp & __GFP_NOFAIL)
 		return true;
diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
new file mode 100644
index 000000000000..7cce3be99ff3
--- /dev/null
+++ b/include/linux/page_counter.h
@@ -0,0 +1,51 @@
+#ifndef _LINUX_PAGE_COUNTER_H
+#define _LINUX_PAGE_COUNTER_H
+
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <asm/page.h>
+
+struct page_counter {
+	atomic_long_t count;
+	unsigned long limit;
+	struct page_counter *parent;
+
+	/* legacy */
+	unsigned long watermark;
+	unsigned long failcnt;
+};
+
+#if BITS_PER_LONG == 32
+#define PAGE_COUNTER_MAX LONG_MAX
+#else
+#define PAGE_COUNTER_MAX (LONG_MAX / PAGE_SIZE)
+#endif
+
+static inline void page_counter_init(struct page_counter *counter,
+				     struct page_counter *parent)
+{
+	atomic_long_set(&counter->count, 0);
+	counter->limit = PAGE_COUNTER_MAX;
+	counter->parent = parent;
+}
+
+static inline unsigned long page_counter_read(struct page_counter *counter)
+{
+	return atomic_long_read(&counter->count);
+}
+
+int page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
+void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
+int page_counter_try_charge(struct page_counter *counter,
+			    unsigned long nr_pages,
+			    struct page_counter **fail);
+int page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
+int page_counter_limit(struct page_counter *counter, unsigned long limit);
+int page_counter_memparse(const char *buf, unsigned long *nr_pages);
+
+static inline void page_counter_reset_watermark(struct page_counter *counter)
+{
+	counter->watermark = page_counter_read(counter);
+}
+
+#endif /* _LINUX_PAGE_COUNTER_H */
diff --git a/include/net/sock.h b/include/net/sock.h
index e6f235ebf6c9..7ff44e062a38 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -54,8 +54,8 @@
 #include <linux/security.h>
 #include <linux/slab.h>
 #include <linux/uaccess.h>
+#include <linux/page_counter.h>
 #include <linux/memcontrol.h>
-#include <linux/res_counter.h>
 #include <linux/static_key.h>
 #include <linux/aio.h>
 #include <linux/sched.h>
@@ -1062,7 +1062,7 @@ enum cg_proto_flags {
 };
 
 struct cg_proto {
-	struct res_counter	memory_allocated;	/* Current allocated memory. */
+	struct page_counter	memory_allocated;	/* Current allocated memory. */
 	struct percpu_counter	sockets_allocated;	/* Current number of sockets. */
 	int			memory_pressure;
 	long			sysctl_mem[3];
@@ -1214,34 +1214,26 @@ static inline void memcg_memory_allocated_add(struct cg_proto *prot,
 					      unsigned long amt,
 					      int *parent_status)
 {
-	struct res_counter *fail;
-	int ret;
+	page_counter_charge(&prot->memory_allocated, amt);
 
-	ret = res_counter_charge_nofail(&prot->memory_allocated,
-					amt << PAGE_SHIFT, &fail);
-	if (ret < 0)
+	if (page_counter_read(&prot->memory_allocated) >
+	    prot->memory_allocated.limit)
 		*parent_status = OVER_LIMIT;
 }
 
 static inline void memcg_memory_allocated_sub(struct cg_proto *prot,
 					      unsigned long amt)
 {
-	res_counter_uncharge(&prot->memory_allocated, amt << PAGE_SHIFT);
-}
-
-static inline u64 memcg_memory_allocated_read(struct cg_proto *prot)
-{
-	u64 ret;
-	ret = res_counter_read_u64(&prot->memory_allocated, RES_USAGE);
-	return ret >> PAGE_SHIFT;
+	page_counter_uncharge(&prot->memory_allocated, amt);
 }
 
 static inline long
 sk_memory_allocated(const struct sock *sk)
 {
 	struct proto *prot = sk->sk_prot;
+
 	if (mem_cgroup_sockets_enabled && sk->sk_cgrp)
-		return memcg_memory_allocated_read(sk->sk_cgrp);
+		return page_counter_read(&sk->sk_cgrp->memory_allocated);
 
 	return atomic_long_read(prot->memory_allocated);
 }
@@ -1255,7 +1247,7 @@ sk_memory_allocated_add(struct sock *sk, int amt, int *parent_status)
 		memcg_memory_allocated_add(sk->sk_cgrp, amt, parent_status);
 		/* update the root cgroup regardless */
 		atomic_long_add_return(amt, prot->memory_allocated);
-		return memcg_memory_allocated_read(sk->sk_cgrp);
+		return page_counter_read(&sk->sk_cgrp->memory_allocated);
 	}
 
 	return atomic_long_add_return(amt, prot->memory_allocated);
diff --git a/init/Kconfig b/init/Kconfig
index 903505e66d1d..fd9e88791ba4 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -978,9 +978,12 @@ config RESOURCE_COUNTERS
 	  This option enables controller independent resource accounting
 	  infrastructure that works with cgroups.
 
+config PAGE_COUNTER
+       bool
+
 config MEMCG
 	bool "Memory Resource Controller for Control Groups"
-	depends on RESOURCE_COUNTERS
+	select PAGE_COUNTER
 	select EVENTFD
 	help
 	  Provides a memory resource controller that manages both anonymous
diff --git a/mm/Makefile b/mm/Makefile
index 8405eb0023a9..6d9f40e922f7 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -55,6 +55,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o
 obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
+obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
 obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o
 obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d6ac0e33e150..4129ad74e93b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -25,7 +25,7 @@
  * GNU General Public License for more details.
  */
 
-#include <linux/res_counter.h>
+#include <linux/page_counter.h>
 #include <linux/memcontrol.h>
 #include <linux/cgroup.h>
 #include <linux/mm.h>
@@ -165,7 +165,7 @@ struct mem_cgroup_per_zone {
 	struct mem_cgroup_reclaim_iter reclaim_iter[DEF_PRIORITY + 1];
 
 	struct rb_node		tree_node;	/* RB tree node */
-	unsigned long long	usage_in_excess;/* Set to the value by which */
+	unsigned long		usage_in_excess;/* Set to the value by which */
 						/* the soft limit is exceeded*/
 	bool			on_tree;
 	struct mem_cgroup	*memcg;		/* Back pointer, we cannot */
@@ -198,7 +198,7 @@ static struct mem_cgroup_tree soft_limit_tree __read_mostly;
 
 struct mem_cgroup_threshold {
 	struct eventfd_ctx *eventfd;
-	u64 threshold;
+	unsigned long threshold;
 };
 
 /* For threshold */
@@ -284,10 +284,13 @@ static void mem_cgroup_oom_notify(struct mem_cgroup *memcg);
  */
 struct mem_cgroup {
 	struct cgroup_subsys_state css;
-	/*
-	 * the counter to account for memory usage
-	 */
-	struct res_counter res;
+
+	/* Accounted resources */
+	struct page_counter memory;
+	struct page_counter memsw;
+	struct page_counter kmem;
+
+	unsigned long soft_limit;
 
 	/* vmpressure notifications */
 	struct vmpressure vmpressure;
@@ -295,15 +298,6 @@ struct mem_cgroup {
 	/* css_online() has been completed */
 	int initialized;
 
-	/*
-	 * the counter to account for mem+swap usage.
-	 */
-	struct res_counter memsw;
-
-	/*
-	 * the counter to account for kernel memory usage.
-	 */
-	struct res_counter kmem;
 	/*
 	 * Should the accounting and control be hierarchical, per subtree?
 	 */
@@ -650,7 +644,7 @@ static void disarm_kmem_keys(struct mem_cgroup *memcg)
 	 * This check can't live in kmem destruction function,
 	 * since the charges will outlive the cgroup
 	 */
-	WARN_ON(res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0);
+	WARN_ON(page_counter_read(&memcg->kmem));
 }
 #else
 static void disarm_kmem_keys(struct mem_cgroup *memcg)
@@ -706,7 +700,7 @@ soft_limit_tree_from_page(struct page *page)
 
 static void __mem_cgroup_insert_exceeded(struct mem_cgroup_per_zone *mz,
 					 struct mem_cgroup_tree_per_zone *mctz,
-					 unsigned long long new_usage_in_excess)
+					 unsigned long new_usage_in_excess)
 {
 	struct rb_node **p = &mctz->rb_root.rb_node;
 	struct rb_node *parent = NULL;
@@ -755,10 +749,21 @@ static void mem_cgroup_remove_exceeded(struct mem_cgroup_per_zone *mz,
 	spin_unlock_irqrestore(&mctz->lock, flags);
 }
 
+static unsigned long soft_limit_excess(struct mem_cgroup *memcg)
+{
+	unsigned long nr_pages = page_counter_read(&memcg->memory);
+	unsigned long soft_limit = ACCESS_ONCE(memcg->soft_limit);
+	unsigned long excess = 0;
+
+	if (nr_pages > soft_limit)
+		excess = nr_pages - soft_limit;
+
+	return excess;
+}
 
 static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 {
-	unsigned long long excess;
+	unsigned long excess;
 	struct mem_cgroup_per_zone *mz;
 	struct mem_cgroup_tree_per_zone *mctz;
 
@@ -769,7 +774,7 @@ static void mem_cgroup_update_tree(struct mem_cgroup *memcg, struct page *page)
 	 */
 	for (; memcg; memcg = parent_mem_cgroup(memcg)) {
 		mz = mem_cgroup_page_zoneinfo(memcg, page);
-		excess = res_counter_soft_limit_excess(&memcg->res);
+		excess = soft_limit_excess(memcg);
 		/*
 		 * We have to update the tree if mz is on RB-tree or
 		 * mem is over its softlimit.
@@ -825,7 +830,7 @@ retry:
 	 * position in the tree.
 	 */
 	__mem_cgroup_remove_exceeded(mz, mctz);
-	if (!res_counter_soft_limit_excess(&mz->memcg->res) ||
+	if (!soft_limit_excess(mz->memcg) ||
 	    !css_tryget_online(&mz->memcg->css))
 		goto retry;
 done:
@@ -1492,7 +1497,7 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
 	return inactive * inactive_ratio < active;
 }
 
-#define mem_cgroup_from_res_counter(counter, member)	\
+#define mem_cgroup_from_counter(counter, member)	\
 	container_of(counter, struct mem_cgroup, member)
 
 /**
@@ -1504,12 +1509,23 @@ int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec)
  */
 static unsigned long mem_cgroup_margin(struct mem_cgroup *memcg)
 {
-	unsigned long long margin;
+	unsigned long margin = 0;
+	unsigned long count;
+	unsigned long limit;
 
-	margin = res_counter_margin(&memcg->res);
-	if (do_swap_account)
-		margin = min(margin, res_counter_margin(&memcg->memsw));
-	return margin >> PAGE_SHIFT;
+	count = page_counter_read(&memcg->memory);
+	limit = ACCESS_ONCE(memcg->memory.limit);
+	if (count < limit)
+		margin = limit - count;
+
+	if (do_swap_account) {
+		count = page_counter_read(&memcg->memsw);
+		limit = ACCESS_ONCE(memcg->memsw.limit);
+		if (count <= limit)
+			margin = min(margin, limit - count);
+	}
+
+	return margin;
 }
 
 int mem_cgroup_swappiness(struct mem_cgroup *memcg)
@@ -1644,18 +1660,15 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p)
 
 	rcu_read_unlock();
 
-	pr_info("memory: usage %llukB, limit %llukB, failcnt %llu\n",
-		res_counter_read_u64(&memcg->res, RES_USAGE) >> 10,
-		res_counter_read_u64(&memcg->res, RES_LIMIT) >> 10,
-		res_counter_read_u64(&memcg->res, RES_FAILCNT));
-	pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %llu\n",
-		res_counter_read_u64(&memcg->memsw, RES_USAGE) >> 10,
-		res_counter_read_u64(&memcg->memsw, RES_LIMIT) >> 10,
-		res_counter_read_u64(&memcg->memsw, RES_FAILCNT));
-	pr_info("kmem: usage %llukB, limit %llukB, failcnt %llu\n",
-		res_counter_read_u64(&memcg->kmem, RES_USAGE) >> 10,
-		res_counter_read_u64(&memcg->kmem, RES_LIMIT) >> 10,
-		res_counter_read_u64(&memcg->kmem, RES_FAILCNT));
+	pr_info("memory: usage %llukB, limit %llukB, failcnt %lu\n",
+		K((u64)page_counter_read(&memcg->memory)),
+		K((u64)memcg->memory.limit), memcg->memory.failcnt);
+	pr_info("memory+swap: usage %llukB, limit %llukB, failcnt %lu\n",
+		K((u64)page_counter_read(&memcg->memsw)),
+		K((u64)memcg->memsw.limit), memcg->memsw.failcnt);
+	pr_info("kmem: usage %llukB, limit %llukB, failcnt %lu\n",
+		K((u64)page_counter_read(&memcg->kmem)),
+		K((u64)memcg->kmem.limit), memcg->kmem.failcnt);
 
 	for_each_mem_cgroup_tree(iter, memcg) {
 		pr_info("Memory cgroup stats for ");
@@ -1695,28 +1708,17 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg)
 /*
  * Return the memory (and swap, if configured) limit for a memcg.
  */
-static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg)
+static unsigned long mem_cgroup_get_limit(struct mem_cgroup *memcg)
 {
-	u64 limit;
+	unsigned long limit;
 
-	limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
-
-	/*
-	 * Do not consider swap space if we cannot swap due to swappiness
-	 */
+	limit = memcg->memory.limit;
 	if (mem_cgroup_swappiness(memcg)) {
-		u64 memsw;
+		unsigned long memsw_limit;
 
-		limit += total_swap_pages << PAGE_SHIFT;
-		memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
-
-		/*
-		 * If memsw is finite and limits the amount of swap space
-		 * available to this memcg, return that limit.
-		 */
-		limit = min(limit, memsw);
+		memsw_limit = memcg->memsw.limit;
+		limit = min(limit + total_swap_pages, memsw_limit);
 	}
-
 	return limit;
 }
 
@@ -1740,7 +1742,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	}
 
 	check_panic_on_oom(CONSTRAINT_MEMCG, gfp_mask, order, NULL);
-	totalpages = mem_cgroup_get_limit(memcg) >> PAGE_SHIFT ? : 1;
+	totalpages = mem_cgroup_get_limit(memcg) ? : 1;
 	for_each_mem_cgroup_tree(iter, memcg) {
 		struct css_task_iter it;
 		struct task_struct *task;
@@ -1943,7 +1945,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
 		.priority = 0,
 	};
 
-	excess = res_counter_soft_limit_excess(&root_memcg->res) >> PAGE_SHIFT;
+	excess = soft_limit_excess(root_memcg);
 
 	while (1) {
 		victim = mem_cgroup_iter(root_memcg, victim, &reclaim);
@@ -1974,7 +1976,7 @@ static int mem_cgroup_soft_reclaim(struct mem_cgroup *root_memcg,
 		total += mem_cgroup_shrink_node_zone(victim, gfp_mask, false,
 						     zone, &nr_scanned);
 		*total_scanned += nr_scanned;
-		if (!res_counter_soft_limit_excess(&root_memcg->res))
+		if (!soft_limit_excess(root_memcg))
 			break;
 	}
 	mem_cgroup_iter_break(root_memcg, victim);
@@ -2316,33 +2318,31 @@ static DEFINE_MUTEX(percpu_charge_mutex);
 static bool consume_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
 	struct memcg_stock_pcp *stock;
-	bool ret = true;
+	bool ret = false;
 
 	if (nr_pages > CHARGE_BATCH)
-		return false;
+		return ret;
 
 	stock = &get_cpu_var(memcg_stock);
-	if (memcg == stock->cached && stock->nr_pages >= nr_pages)
+	if (memcg == stock->cached && stock->nr_pages >= nr_pages) {
 		stock->nr_pages -= nr_pages;
-	else /* need to call res_counter_charge */
-		ret = false;
+		ret = true;
+	}
 	put_cpu_var(memcg_stock);
 	return ret;
 }
 
 /*
- * Returns stocks cached in percpu to res_counter and reset cached information.
+ * Returns stocks cached in percpu and reset cached information.
  */
 static void drain_stock(struct memcg_stock_pcp *stock)
 {
 	struct mem_cgroup *old = stock->cached;
 
 	if (stock->nr_pages) {
-		unsigned long bytes = stock->nr_pages * PAGE_SIZE;
-
-		res_counter_uncharge(&old->res, bytes);
+		page_counter_uncharge(&old->memory, stock->nr_pages);
 		if (do_swap_account)
-			res_counter_uncharge(&old->memsw, bytes);
+			page_counter_uncharge(&old->memsw, stock->nr_pages);
 		stock->nr_pages = 0;
 	}
 	stock->cached = NULL;
@@ -2371,7 +2371,7 @@ static void __init memcg_stock_init(void)
 }
 
 /*
- * Cache charges(val) which is from res_counter, to local per_cpu area.
+ * Cache charges(val) to local per_cpu area.
  * This will be consumed by consume_stock() function, later.
  */
 static void refill_stock(struct mem_cgroup *memcg, unsigned int nr_pages)
@@ -2431,8 +2431,7 @@ out:
 /*
  * Tries to drain stocked charges in other cpus. This function is asynchronous
  * and just put a work per cpu for draining localy on each cpu. Caller can
- * expects some charges will be back to res_counter later but cannot wait for
- * it.
+ * expects some charges will be back later but cannot wait for it.
  */
 static void drain_all_stock_async(struct mem_cgroup *root_memcg)
 {
@@ -2506,9 +2505,8 @@ static int try_charge(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	unsigned int batch = max(CHARGE_BATCH, nr_pages);
 	int nr_retries = MEM_CGROUP_RECLAIM_RETRIES;
 	struct mem_cgroup *mem_over_limit;
-	struct res_counter *fail_res;
+	struct page_counter *counter;
 	unsigned long nr_reclaimed;
-	unsigned long long size;
 	bool may_swap = true;
 	bool drained = false;
 	int ret = 0;
@@ -2519,16 +2517,15 @@ retry:
 	if (consume_stock(memcg, nr_pages))
 		goto done;
 
-	size = batch * PAGE_SIZE;
 	if (!do_swap_account ||
-	    !res_counter_charge(&memcg->memsw, size, &fail_res)) {
-		if (!res_counter_charge(&memcg->res, size, &fail_res))
+	    !page_counter_try_charge(&memcg->memsw, batch, &counter)) {
+		if (!page_counter_try_charge(&memcg->memory, batch, &counter))
 			goto done_restock;
 		if (do_swap_account)
-			res_counter_uncharge(&memcg->memsw, size);
-		mem_over_limit = mem_cgroup_from_res_counter(fail_res, res);
+			page_counter_uncharge(&memcg->memsw, batch);
+		mem_over_limit = mem_cgroup_from_counter(counter, memory);
 	} else {
-		mem_over_limit = mem_cgroup_from_res_counter(fail_res, memsw);
+		mem_over_limit = mem_cgroup_from_counter(counter, memsw);
 		may_swap = false;
 	}
 
@@ -2611,32 +2608,12 @@ done:
 
 static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 {
-	unsigned long bytes = nr_pages * PAGE_SIZE;
-
 	if (mem_cgroup_is_root(memcg))
 		return;
 
-	res_counter_uncharge(&memcg->res, bytes);
+	page_counter_uncharge(&memcg->memory, nr_pages);
 	if (do_swap_account)
-		res_counter_uncharge(&memcg->memsw, bytes);
-}
-
-/*
- * Cancel chrages in this cgroup....doesn't propagate to parent cgroup.
- * This is useful when moving usage to parent cgroup.
- */
-static void __mem_cgroup_cancel_local_charge(struct mem_cgroup *memcg,
-					unsigned int nr_pages)
-{
-	unsigned long bytes = nr_pages * PAGE_SIZE;
-
-	if (mem_cgroup_is_root(memcg))
-		return;
-
-	res_counter_uncharge_until(&memcg->res, memcg->res.parent, bytes);
-	if (do_swap_account)
-		res_counter_uncharge_until(&memcg->memsw,
-						memcg->memsw.parent, bytes);
+		page_counter_uncharge(&memcg->memsw, nr_pages);
 }
 
 /*
@@ -2760,8 +2737,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 		unlock_page_lru(page, isolated);
 }
 
-static DEFINE_MUTEX(set_limit_mutex);
-
 #ifdef CONFIG_MEMCG_KMEM
 /*
  * The memcg_slab_mutex is held whenever a per memcg kmem cache is created or
@@ -2804,16 +2779,17 @@ static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
 }
 #endif
 
-static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
+static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
+			     unsigned long nr_pages)
 {
-	struct res_counter *fail_res;
+	struct page_counter *counter;
 	int ret = 0;
 
-	ret = res_counter_charge(&memcg->kmem, size, &fail_res);
-	if (ret)
+	ret = page_counter_try_charge(&memcg->kmem, nr_pages, &counter);
+	if (ret < 0)
 		return ret;
 
-	ret = try_charge(memcg, gfp, size >> PAGE_SHIFT);
+	ret = try_charge(memcg, gfp, nr_pages);
 	if (ret == -EINTR)  {
 		/*
 		 * try_charge() chose to bypass to root due to OOM kill or
@@ -2830,25 +2806,25 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, u64 size)
 		 * when the allocation triggers should have been already
 		 * directed to the root cgroup in memcontrol.h
 		 */
-		res_counter_charge_nofail(&memcg->res, size, &fail_res);
+		page_counter_charge(&memcg->memory, nr_pages);
 		if (do_swap_account)
-			res_counter_charge_nofail(&memcg->memsw, size,
-						  &fail_res);
+			page_counter_charge(&memcg->memsw, nr_pages);
 		ret = 0;
 	} else if (ret)
-		res_counter_uncharge(&memcg->kmem, size);
+		page_counter_uncharge(&memcg->kmem, nr_pages);
 
 	return ret;
 }
 
-static void memcg_uncharge_kmem(struct mem_cgroup *memcg, u64 size)
+static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
+				unsigned long nr_pages)
 {
-	res_counter_uncharge(&memcg->res, size);
+	page_counter_uncharge(&memcg->memory, nr_pages);
 	if (do_swap_account)
-		res_counter_uncharge(&memcg->memsw, size);
+		page_counter_uncharge(&memcg->memsw, nr_pages);
 
 	/* Not down to 0 */
-	if (res_counter_uncharge(&memcg->kmem, size))
+	if (page_counter_uncharge(&memcg->kmem, nr_pages))
 		return;
 
 	/*
@@ -3124,19 +3100,21 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
 
 int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
 {
+	unsigned int nr_pages = 1 << order;
 	int res;
 
-	res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp,
-				PAGE_SIZE << order);
+	res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
 	if (!res)
-		atomic_add(1 << order, &cachep->memcg_params->nr_pages);
+		atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
 	return res;
 }
 
 void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
 {
-	memcg_uncharge_kmem(cachep->memcg_params->memcg, PAGE_SIZE << order);
-	atomic_sub(1 << order, &cachep->memcg_params->nr_pages);
+	unsigned int nr_pages = 1 << order;
+
+	memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
+	atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
 }
 
 /*
@@ -3257,7 +3235,7 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
 		return true;
 	}
 
-	ret = memcg_charge_kmem(memcg, gfp, PAGE_SIZE << order);
+	ret = memcg_charge_kmem(memcg, gfp, 1 << order);
 	if (!ret)
 		*_memcg = memcg;
 
@@ -3274,7 +3252,7 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
 
 	/* The page allocation failed. Revert */
 	if (!page) {
-		memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+		memcg_uncharge_kmem(memcg, 1 << order);
 		return;
 	}
 	/*
@@ -3307,7 +3285,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
 		return;
 
 	VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
-	memcg_uncharge_kmem(memcg, PAGE_SIZE << order);
+	memcg_uncharge_kmem(memcg, 1 << order);
 }
 #else
 static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
@@ -3485,8 +3463,12 @@ static int mem_cgroup_move_parent(struct page *page,
 
 	ret = mem_cgroup_move_account(page, nr_pages,
 				pc, child, parent);
-	if (!ret)
-		__mem_cgroup_cancel_local_charge(child, nr_pages);
+	if (!ret) {
+		/* Take charge off the local counters */
+		page_counter_cancel(&child->memory, nr_pages);
+		if (do_swap_account)
+			page_counter_cancel(&child->memsw, nr_pages);
+	}
 
 	if (nr_pages > 1)
 		compound_unlock_irqrestore(page, flags);
@@ -3516,7 +3498,7 @@ static void mem_cgroup_swap_statistics(struct mem_cgroup *memcg,
  *
  * Returns 0 on success, -EINVAL on failure.
  *
- * The caller must have charged to @to, IOW, called res_counter_charge() about
+ * The caller must have charged to @to, IOW, called page_counter_charge() about
  * both res and memsw, and called css_get().
  */
 static int mem_cgroup_move_swap_account(swp_entry_t entry,
@@ -3532,7 +3514,7 @@ static int mem_cgroup_move_swap_account(swp_entry_t entry,
 		mem_cgroup_swap_statistics(to, true);
 		/*
 		 * This function is only called from task migration context now.
-		 * It postpones res_counter and refcount handling till the end
+		 * It postpones page_counter and refcount handling till the end
 		 * of task migration(mem_cgroup_clear_mc()) for performance
 		 * improvement. But we cannot postpone css_get(to)  because if
 		 * the process that has been moved to @to does swap-in, the
@@ -3590,60 +3572,57 @@ void mem_cgroup_print_bad_page(struct page *page)
 }
 #endif
 
+static DEFINE_MUTEX(memcg_limit_mutex);
+
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
-				unsigned long long val)
+				   unsigned long limit)
 {
+	unsigned long curusage;
+	unsigned long oldusage;
+	bool enlarge = false;
 	int retry_count;
-	int ret = 0;
-	int children = mem_cgroup_count_children(memcg);
-	u64 curusage, oldusage;
-	int enlarge;
+	int ret;
 
 	/*
 	 * For keeping hierarchical_reclaim simple, how long we should retry
 	 * is depends on callers. We set our retry-count to be function
 	 * of # of children which we should visit in this loop.
 	 */
-	retry_count = MEM_CGROUP_RECLAIM_RETRIES * children;
+	retry_count = MEM_CGROUP_RECLAIM_RETRIES *
+		      mem_cgroup_count_children(memcg);
 
-	oldusage = res_counter_read_u64(&memcg->res, RES_USAGE);
+	oldusage = page_counter_read(&memcg->memory);
 
-	enlarge = 0;
-	while (retry_count) {
+	do {
 		if (signal_pending(current)) {
 			ret = -EINTR;
 			break;
 		}
-		/*
-		 * Rather than hide all in some function, I do this in
-		 * open coded manner. You see what this really does.
-		 * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
-		 */
-		mutex_lock(&set_limit_mutex);
-		if (res_counter_read_u64(&memcg->memsw, RES_LIMIT) < val) {
+
+		mutex_lock(&memcg_limit_mutex);
+		if (limit > memcg->memsw.limit) {
+			mutex_unlock(&memcg_limit_mutex);
 			ret = -EINVAL;
-			mutex_unlock(&set_limit_mutex);
 			break;
 		}
-
-		if (res_counter_read_u64(&memcg->res, RES_LIMIT) < val)
-			enlarge = 1;
-
-		ret = res_counter_set_limit(&memcg->res, val);
-		mutex_unlock(&set_limit_mutex);
+		if (limit > memcg->memory.limit)
+			enlarge = true;
+		ret = page_counter_limit(&memcg->memory, limit);
+		mutex_unlock(&memcg_limit_mutex);
 
 		if (!ret)
 			break;
 
 		try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, true);
 
-		curusage = res_counter_read_u64(&memcg->res, RES_USAGE);
+		curusage = page_counter_read(&memcg->memory);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
 			retry_count--;
 		else
 			oldusage = curusage;
-	}
+	} while (retry_count);
+
 	if (!ret && enlarge)
 		memcg_oom_recover(memcg);
 
@@ -3651,52 +3630,53 @@ static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
 }
 
 static int mem_cgroup_resize_memsw_limit(struct mem_cgroup *memcg,
-					unsigned long long val)
+					 unsigned long limit)
 {
+	unsigned long curusage;
+	unsigned long oldusage;
+	bool enlarge = false;
 	int retry_count;
-	u64 oldusage, curusage;
-	int children = mem_cgroup_count_children(memcg);
-	int ret = -EBUSY;
-	int enlarge = 0;
+	int ret;
 
 	/* see mem_cgroup_resize_res_limit */
-	retry_count = children * MEM_CGROUP_RECLAIM_RETRIES;
-	oldusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
-	while (retry_count) {
+	retry_count = MEM_CGROUP_RECLAIM_RETRIES *
+		      mem_cgroup_count_children(memcg);
+
+	oldusage = page_counter_read(&memcg->memsw);
+
+	do {
 		if (signal_pending(current)) {
 			ret = -EINTR;
 			break;
 		}
-		/*
-		 * Rather than hide all in some function, I do this in
-		 * open coded manner. You see what this really does.
-		 * We have to guarantee memcg->res.limit <= memcg->memsw.limit.
-		 */
-		mutex_lock(&set_limit_mutex);
-		if (res_counter_read_u64(&memcg->res, RES_LIMIT) > val) {
+
+		mutex_lock(&memcg_limit_mutex);
+		if (limit < memcg->memory.limit) {
+			mutex_unlock(&memcg_limit_mutex);
 			ret = -EINVAL;
-			mutex_unlock(&set_limit_mutex);
 			break;
 		}
-		if (res_counter_read_u64(&memcg->memsw, RES_LIMIT) < val)
-			enlarge = 1;
-		ret = res_counter_set_limit(&memcg->memsw, val);
-		mutex_unlock(&set_limit_mutex);
+		if (limit > memcg->memsw.limit)
+			enlarge = true;
+		ret = page_counter_limit(&memcg->memsw, limit);
+		mutex_unlock(&memcg_limit_mutex);
 
 		if (!ret)
 			break;
 
 		try_to_free_mem_cgroup_pages(memcg, 1, GFP_KERNEL, false);
 
-		curusage = res_counter_read_u64(&memcg->memsw, RES_USAGE);
+		curusage = page_counter_read(&memcg->memsw);
 		/* Usage is reduced ? */
 		if (curusage >= oldusage)
 			retry_count--;
 		else
 			oldusage = curusage;
-	}
+	} while (retry_count);
+
 	if (!ret && enlarge)
 		memcg_oom_recover(memcg);
+
 	return ret;
 }
 
@@ -3709,7 +3689,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 	unsigned long reclaimed;
 	int loop = 0;
 	struct mem_cgroup_tree_per_zone *mctz;
-	unsigned long long excess;
+	unsigned long excess;
 	unsigned long nr_scanned;
 
 	if (order > 0)
@@ -3763,7 +3743,7 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order,
 			} while (1);
 		}
 		__mem_cgroup_remove_exceeded(mz, mctz);
-		excess = res_counter_soft_limit_excess(&mz->memcg->res);
+		excess = soft_limit_excess(mz->memcg);
 		/*
 		 * One school of thought says that we should not add
 		 * back the node to the tree if reclaim returns 0.
@@ -3856,7 +3836,6 @@ static void mem_cgroup_force_empty_list(struct mem_cgroup *memcg,
 static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
 {
 	int node, zid;
-	u64 usage;
 
 	do {
 		/* This is for making all *used* pages to be on LRU. */
@@ -3888,9 +3867,8 @@ static void mem_cgroup_reparent_charges(struct mem_cgroup *memcg)
 		 * right after the check. RES_USAGE should be safe as we always
 		 * charge before adding to the LRU.
 		 */
-		usage = res_counter_read_u64(&memcg->res, RES_USAGE) -
-			res_counter_read_u64(&memcg->kmem, RES_USAGE);
-	} while (usage > 0);
+	} while (page_counter_read(&memcg->memory) -
+		 page_counter_read(&memcg->kmem) > 0);
 }
 
 /*
@@ -3930,7 +3908,7 @@ static int mem_cgroup_force_empty(struct mem_cgroup *memcg)
 	/* we call try-to-free pages for make this cgroup empty */
 	lru_add_drain_all();
 	/* try to free all pages in this cgroup */
-	while (nr_retries && res_counter_read_u64(&memcg->res, RES_USAGE) > 0) {
+	while (nr_retries && page_counter_read(&memcg->memory)) {
 		int progress;
 
 		if (signal_pending(current))
@@ -4001,8 +3979,8 @@ out:
 	return retval;
 }
 
-static unsigned long mem_cgroup_recursive_stat(struct mem_cgroup *memcg,
-					       enum mem_cgroup_stat_index idx)
+static unsigned long tree_stat(struct mem_cgroup *memcg,
+			       enum mem_cgroup_stat_index idx)
 {
 	struct mem_cgroup *iter;
 	long val = 0;
@@ -4020,55 +3998,72 @@ static inline u64 mem_cgroup_usage(struct mem_cgroup *memcg, bool swap)
 {
 	u64 val;
 
-	if (!mem_cgroup_is_root(memcg)) {
+	if (mem_cgroup_is_root(memcg)) {
+		val = tree_stat(memcg, MEM_CGROUP_STAT_CACHE);
+		val += tree_stat(memcg, MEM_CGROUP_STAT_RSS);
+		if (swap)
+			val += tree_stat(memcg, MEM_CGROUP_STAT_SWAP);
+	} else {
 		if (!swap)
-			return res_counter_read_u64(&memcg->res, RES_USAGE);
+			val = page_counter_read(&memcg->memory);
 		else
-			return res_counter_read_u64(&memcg->memsw, RES_USAGE);
+			val = page_counter_read(&memcg->memsw);
 	}
-
-	/*
-	 * Transparent hugepages are still accounted for in MEM_CGROUP_STAT_RSS
-	 * as well as in MEM_CGROUP_STAT_RSS_HUGE.
-	 */
-	val = mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_CACHE);
-	val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_RSS);
-
-	if (swap)
-		val += mem_cgroup_recursive_stat(memcg, MEM_CGROUP_STAT_SWAP);
-
 	return val << PAGE_SHIFT;
 }
 
+enum {
+	RES_USAGE,
+	RES_LIMIT,
+	RES_MAX_USAGE,
+	RES_FAILCNT,
+	RES_SOFT_LIMIT,
+};
 
 static u64 mem_cgroup_read_u64(struct cgroup_subsys_state *css,
 			       struct cftype *cft)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
-	enum res_type type = MEMFILE_TYPE(cft->private);
-	int name = MEMFILE_ATTR(cft->private);
+	struct page_counter *counter;
 
-	switch (type) {
+	switch (MEMFILE_TYPE(cft->private)) {
 	case _MEM:
-		if (name == RES_USAGE)
-			return mem_cgroup_usage(memcg, false);
-		return res_counter_read_u64(&memcg->res, name);
+		counter = &memcg->memory;
+		break;
 	case _MEMSWAP:
-		if (name == RES_USAGE)
-			return mem_cgroup_usage(memcg, true);
-		return res_counter_read_u64(&memcg->memsw, name);
+		counter = &memcg->memsw;
+		break;
 	case _KMEM:
-		return res_counter_read_u64(&memcg->kmem, name);
+		counter = &memcg->kmem;
 		break;
 	default:
 		BUG();
 	}
+
+	switch (MEMFILE_ATTR(cft->private)) {
+	case RES_USAGE:
+		if (counter == &memcg->memory)
+			return mem_cgroup_usage(memcg, false);
+		if (counter == &memcg->memsw)
+			return mem_cgroup_usage(memcg, true);
+		return (u64)page_counter_read(counter) * PAGE_SIZE;
+	case RES_LIMIT:
+		return (u64)counter->limit * PAGE_SIZE;
+	case RES_MAX_USAGE:
+		return (u64)counter->watermark * PAGE_SIZE;
+	case RES_FAILCNT:
+		return counter->failcnt;
+	case RES_SOFT_LIMIT:
+		return (u64)memcg->soft_limit * PAGE_SIZE;
+	default:
+		BUG();
+	}
 }
 
 #ifdef CONFIG_MEMCG_KMEM
 /* should be called with activate_kmem_mutex held */
 static int __memcg_activate_kmem(struct mem_cgroup *memcg,
-				 unsigned long long limit)
+				 unsigned long nr_pages)
 {
 	int err = 0;
 	int memcg_id;
@@ -4115,7 +4110,7 @@ static int __memcg_activate_kmem(struct mem_cgroup *memcg,
 	 * We couldn't have accounted to this cgroup, because it hasn't got the
 	 * active bit set yet, so this should succeed.
 	 */
-	err = res_counter_set_limit(&memcg->kmem, limit);
+	err = page_counter_limit(&memcg->kmem, nr_pages);
 	VM_BUG_ON(err);
 
 	static_key_slow_inc(&memcg_kmem_enabled_key);
@@ -4131,25 +4126,27 @@ out:
 }
 
 static int memcg_activate_kmem(struct mem_cgroup *memcg,
-			       unsigned long long limit)
+			       unsigned long nr_pages)
 {
 	int ret;
 
 	mutex_lock(&activate_kmem_mutex);
-	ret = __memcg_activate_kmem(memcg, limit);
+	ret = __memcg_activate_kmem(memcg, nr_pages);
 	mutex_unlock(&activate_kmem_mutex);
 	return ret;
 }
 
 static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
-				   unsigned long long val)
+				   unsigned long limit)
 {
 	int ret;
 
+	mutex_lock(&memcg_limit_mutex);
 	if (!memcg_kmem_is_active(memcg))
-		ret = memcg_activate_kmem(memcg, val);
+		ret = memcg_activate_kmem(memcg, limit);
 	else
-		ret = res_counter_set_limit(&memcg->kmem, val);
+		ret = page_counter_limit(&memcg->kmem, limit);
+	mutex_unlock(&memcg_limit_mutex);
 	return ret;
 }
 
@@ -4167,13 +4164,13 @@ static int memcg_propagate_kmem(struct mem_cgroup *memcg)
 	 * after this point, because it has at least one child already.
 	 */
 	if (memcg_kmem_is_active(parent))
-		ret = __memcg_activate_kmem(memcg, RES_COUNTER_MAX);
+		ret = __memcg_activate_kmem(memcg, PAGE_COUNTER_MAX);
 	mutex_unlock(&activate_kmem_mutex);
 	return ret;
 }
 #else
 static int memcg_update_kmem_limit(struct mem_cgroup *memcg,
-				   unsigned long long val)
+				   unsigned long limit)
 {
 	return -EINVAL;
 }
@@ -4187,110 +4184,69 @@ static ssize_t mem_cgroup_write(struct kernfs_open_file *of,
 				char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
-	enum res_type type;
-	int name;
-	unsigned long long val;
+	unsigned long nr_pages;
 	int ret;
 
 	buf = strstrip(buf);
-	type = MEMFILE_TYPE(of_cft(of)->private);
-	name = MEMFILE_ATTR(of_cft(of)->private);
+	ret = page_counter_memparse(buf, &nr_pages);
+	if (ret)
+		return ret;
 
-	switch (name) {
+	switch (MEMFILE_ATTR(of_cft(of)->private)) {
 	case RES_LIMIT:
 		if (mem_cgroup_is_root(memcg)) { /* Can't set limit on root */
 			ret = -EINVAL;
 			break;
 		}
-		/* This function does all necessary parse...reuse it */
-		ret = res_counter_memparse_write_strategy(buf, &val);
-		if (ret)
+		switch (MEMFILE_TYPE(of_cft(of)->private)) {
+		case _MEM:
+			ret = mem_cgroup_resize_limit(memcg, nr_pages);
 			break;
-		if (type == _MEM)
-			ret = mem_cgroup_resize_limit(memcg, val);
-		else if (type == _MEMSWAP)
-			ret = mem_cgroup_resize_memsw_limit(memcg, val);
-		else if (type == _KMEM)
-			ret = memcg_update_kmem_limit(memcg, val);
-		else
-			return -EINVAL;
-		break;
-	case RES_SOFT_LIMIT:
-		ret = res_counter_memparse_write_strategy(buf, &val);
-		if (ret)
+		case _MEMSWAP:
+			ret = mem_cgroup_resize_memsw_limit(memcg, nr_pages);
 			break;
-		/*
-		 * For memsw, soft limits are hard to implement in terms
-		 * of semantics, for now, we support soft limits for
-		 * control without swap
-		 */
-		if (type == _MEM)
-			ret = res_counter_set_soft_limit(&memcg->res, val);
-		else
-			ret = -EINVAL;
+		case _KMEM:
+			ret = memcg_update_kmem_limit(memcg, nr_pages);
+			break;
+		}
 		break;
-	default:
-		ret = -EINVAL; /* should be BUG() ? */
+	case RES_SOFT_LIMIT:
+		memcg->soft_limit = nr_pages;
+		ret = 0;
 		break;
 	}
 	return ret ?: nbytes;
 }
 
-static void memcg_get_hierarchical_limit(struct mem_cgroup *memcg,
-		unsigned long long *mem_limit, unsigned long long *memsw_limit)
-{
-	unsigned long long min_limit, min_memsw_limit, tmp;
-
-	min_limit = res_counter_read_u64(&memcg->res, RES_LIMIT);
-	min_memsw_limit = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
-	if (!memcg->use_hierarchy)
-		goto out;
-
-	while (memcg->css.parent) {
-		memcg = mem_cgroup_from_css(memcg->css.parent);
-		if (!memcg->use_hierarchy)
-			break;
-		tmp = res_counter_read_u64(&memcg->res, RES_LIMIT);
-		min_limit = min(min_limit, tmp);
-		tmp = res_counter_read_u64(&memcg->memsw, RES_LIMIT);
-		min_memsw_limit = min(min_memsw_limit, tmp);
-	}
-out:
-	*mem_limit = min_limit;
-	*memsw_limit = min_memsw_limit;
-}
-
 static ssize_t mem_cgroup_reset(struct kernfs_open_file *of, char *buf,
 				size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
-	int name;
-	enum res_type type;
+	struct page_counter *counter;
 
-	type = MEMFILE_TYPE(of_cft(of)->private);
-	name = MEMFILE_ATTR(of_cft(of)->private);
+	switch (MEMFILE_TYPE(of_cft(of)->private)) {
+	case _MEM:
+		counter = &memcg->memory;
+		break;
+	case _MEMSWAP:
+		counter = &memcg->memsw;
+		break;
+	case _KMEM:
+		counter = &memcg->kmem;
+		break;
+	default:
+		BUG();
+	}
 
-	switch (name) {
+	switch (MEMFILE_ATTR(of_cft(of)->private)) {
 	case RES_MAX_USAGE:
-		if (type == _MEM)
-			res_counter_reset_max(&memcg->res);
-		else if (type == _MEMSWAP)
-			res_counter_reset_max(&memcg->memsw);
-		else if (type == _KMEM)
-			res_counter_reset_max(&memcg->kmem);
-		else
-			return -EINVAL;
+		page_counter_reset_watermark(counter);
 		break;
 	case RES_FAILCNT:
-		if (type == _MEM)
-			res_counter_reset_failcnt(&memcg->res);
-		else if (type == _MEMSWAP)
-			res_counter_reset_failcnt(&memcg->memsw);
-		else if (type == _KMEM)
-			res_counter_reset_failcnt(&memcg->kmem);
-		else
-			return -EINVAL;
+		counter->failcnt = 0;
 		break;
+	default:
+		BUG();
 	}
 
 	return nbytes;
@@ -4387,6 +4343,7 @@ static inline void mem_cgroup_lru_names_not_uptodate(void)
 static int memcg_stat_show(struct seq_file *m, void *v)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+	unsigned long memory, memsw;
 	struct mem_cgroup *mi;
 	unsigned int i;
 
@@ -4406,14 +4363,16 @@ static int memcg_stat_show(struct seq_file *m, void *v)
 			   mem_cgroup_nr_lru_pages(memcg, BIT(i)) * PAGE_SIZE);
 
 	/* Hierarchical information */
-	{
-		unsigned long long limit, memsw_limit;
-		memcg_get_hierarchical_limit(memcg, &limit, &memsw_limit);
-		seq_printf(m, "hierarchical_memory_limit %llu\n", limit);
-		if (do_swap_account)
-			seq_printf(m, "hierarchical_memsw_limit %llu\n",
-				   memsw_limit);
+	memory = memsw = PAGE_COUNTER_MAX;
+	for (mi = memcg; mi; mi = parent_mem_cgroup(mi)) {
+		memory = min(memory, mi->memory.limit);
+		memsw = min(memsw, mi->memsw.limit);
 	}
+	seq_printf(m, "hierarchical_memory_limit %llu\n",
+		   (u64)memory * PAGE_SIZE);
+	if (do_swap_account)
+		seq_printf(m, "hierarchical_memsw_limit %llu\n",
+			   (u64)memsw * PAGE_SIZE);
 
 	for (i = 0; i < MEM_CGROUP_STAT_NSTATS; i++) {
 		long long val = 0;
@@ -4497,7 +4456,7 @@ static int mem_cgroup_swappiness_write(struct cgroup_subsys_state *css,
 static void __mem_cgroup_threshold(struct mem_cgroup *memcg, bool swap)
 {
 	struct mem_cgroup_threshold_ary *t;
-	u64 usage;
+	unsigned long usage;
 	int i;
 
 	rcu_read_lock();
@@ -4596,10 +4555,11 @@ static int __mem_cgroup_usage_register_event(struct mem_cgroup *memcg,
 {
 	struct mem_cgroup_thresholds *thresholds;
 	struct mem_cgroup_threshold_ary *new;
-	u64 threshold, usage;
+	unsigned long threshold;
+	unsigned long usage;
 	int i, size, ret;
 
-	ret = res_counter_memparse_write_strategy(args, &threshold);
+	ret = page_counter_memparse(args, &threshold);
 	if (ret)
 		return ret;
 
@@ -4689,7 +4649,7 @@ static void __mem_cgroup_usage_unregister_event(struct mem_cgroup *memcg,
 {
 	struct mem_cgroup_thresholds *thresholds;
 	struct mem_cgroup_threshold_ary *new;
-	u64 usage;
+	unsigned long usage;
 	int i, j, size;
 
 	mutex_lock(&memcg->thresholds_lock);
@@ -4883,7 +4843,7 @@ static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
 
 	memcg_kmem_mark_dead(memcg);
 
-	if (res_counter_read_u64(&memcg->kmem, RES_USAGE) != 0)
+	if (page_counter_read(&memcg->kmem))
 		return;
 
 	if (memcg_kmem_test_and_clear_dead(memcg))
@@ -5363,9 +5323,9 @@ static void __mem_cgroup_free(struct mem_cgroup *memcg)
  */
 struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg)
 {
-	if (!memcg->res.parent)
+	if (!memcg->memory.parent)
 		return NULL;
-	return mem_cgroup_from_res_counter(memcg->res.parent, res);
+	return mem_cgroup_from_counter(memcg->memory.parent, memory);
 }
 EXPORT_SYMBOL(parent_mem_cgroup);
 
@@ -5410,9 +5370,9 @@ mem_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 	/* root ? */
 	if (parent_css == NULL) {
 		root_mem_cgroup = memcg;
-		res_counter_init(&memcg->res, NULL);
-		res_counter_init(&memcg->memsw, NULL);
-		res_counter_init(&memcg->kmem, NULL);
+		page_counter_init(&memcg->memory, NULL);
+		page_counter_init(&memcg->memsw, NULL);
+		page_counter_init(&memcg->kmem, NULL);
 	}
 
 	memcg->last_scanned_node = MAX_NUMNODES;
@@ -5451,18 +5411,18 @@ mem_cgroup_css_online(struct cgroup_subsys_state *css)
 	memcg->swappiness = mem_cgroup_swappiness(parent);
 
 	if (parent->use_hierarchy) {
-		res_counter_init(&memcg->res, &parent->res);
-		res_counter_init(&memcg->memsw, &parent->memsw);
-		res_counter_init(&memcg->kmem, &parent->kmem);
+		page_counter_init(&memcg->memory, &parent->memory);
+		page_counter_init(&memcg->memsw, &parent->memsw);
+		page_counter_init(&memcg->kmem, &parent->kmem);
 
 		/*
 		 * No need to take a reference to the parent because cgroup
 		 * core guarantees its existence.
 		 */
 	} else {
-		res_counter_init(&memcg->res, NULL);
-		res_counter_init(&memcg->memsw, NULL);
-		res_counter_init(&memcg->kmem, NULL);
+		page_counter_init(&memcg->memory, NULL);
+		page_counter_init(&memcg->memsw, NULL);
+		page_counter_init(&memcg->kmem, NULL);
 		/*
 		 * Deeper hierachy with use_hierarchy == false doesn't make
 		 * much sense so let cgroup subsystem know about this
@@ -5544,7 +5504,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	/*
 	 * XXX: css_offline() would be where we should reparent all
 	 * memory to prepare the cgroup for destruction.  However,
-	 * memcg does not do css_tryget_online() and res_counter charging
+	 * memcg does not do css_tryget_online() and page_counter charging
 	 * under the same RCU lock region, which means that charging
 	 * could race with offlining.  Offlining only happens to
 	 * cgroups with no tasks in them but charges can show up
@@ -5564,7 +5524,7 @@ static void mem_cgroup_css_free(struct cgroup_subsys_state *css)
 	 * call_rcu()
 	 *   offline_css()
 	 *     reparent_charges()
-	 *                           res_counter_charge()
+	 *                           page_counter_try_charge()
 	 *                           css_put()
 	 *                             css_free()
 	 *                           pc->mem_cgroup = dead memcg
@@ -5599,10 +5559,10 @@ static void mem_cgroup_css_reset(struct cgroup_subsys_state *css)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
 
-	mem_cgroup_resize_limit(memcg, ULLONG_MAX);
-	mem_cgroup_resize_memsw_limit(memcg, ULLONG_MAX);
-	memcg_update_kmem_limit(memcg, ULLONG_MAX);
-	res_counter_set_soft_limit(&memcg->res, ULLONG_MAX);
+	mem_cgroup_resize_limit(memcg, PAGE_COUNTER_MAX);
+	mem_cgroup_resize_memsw_limit(memcg, PAGE_COUNTER_MAX);
+	memcg_update_kmem_limit(memcg, PAGE_COUNTER_MAX);
+	memcg->soft_limit = 0;
 }
 
 #ifdef CONFIG_MMU
@@ -5916,19 +5876,18 @@ static void __mem_cgroup_clear_mc(void)
 	if (mc.moved_swap) {
 		/* uncharge swap account from the old cgroup */
 		if (!mem_cgroup_is_root(mc.from))
-			res_counter_uncharge(&mc.from->memsw,
-					     PAGE_SIZE * mc.moved_swap);
-
-		for (i = 0; i < mc.moved_swap; i++)
-			css_put(&mc.from->css);
+			page_counter_uncharge(&mc.from->memsw, mc.moved_swap);
 
 		/*
-		 * we charged both to->res and to->memsw, so we should
-		 * uncharge to->res.
+		 * we charged both to->memory and to->memsw, so we
+		 * should uncharge to->memory.
 		 */
 		if (!mem_cgroup_is_root(mc.to))
-			res_counter_uncharge(&mc.to->res,
-					     PAGE_SIZE * mc.moved_swap);
+			page_counter_uncharge(&mc.to->memory, mc.moved_swap);
+
+		for (i = 0; i < mc.moved_swap; i++)
+			css_put(&mc.from->css);
+
 		/* we've already done css_get(mc.to) */
 		mc.moved_swap = 0;
 	}
@@ -6294,7 +6253,7 @@ void mem_cgroup_uncharge_swap(swp_entry_t entry)
 	memcg = mem_cgroup_lookup(id);
 	if (memcg) {
 		if (!mem_cgroup_is_root(memcg))
-			res_counter_uncharge(&memcg->memsw, PAGE_SIZE);
+			page_counter_uncharge(&memcg->memsw, 1);
 		mem_cgroup_swap_statistics(memcg, false);
 		css_put(&memcg->css);
 	}
@@ -6460,11 +6419,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 
 	if (!mem_cgroup_is_root(memcg)) {
 		if (nr_mem)
-			res_counter_uncharge(&memcg->res,
-					     nr_mem * PAGE_SIZE);
+			page_counter_uncharge(&memcg->memory, nr_mem);
 		if (nr_memsw)
-			res_counter_uncharge(&memcg->memsw,
-					     nr_memsw * PAGE_SIZE);
+			page_counter_uncharge(&memcg->memsw, nr_memsw);
 		memcg_oom_recover(memcg);
 	}
 
diff --git a/mm/page_counter.c b/mm/page_counter.c
new file mode 100644
index 000000000000..f0cbc0825426
--- /dev/null
+++ b/mm/page_counter.c
@@ -0,0 +1,207 @@
+/*
+ * Lockless hierarchical page accounting & limiting
+ *
+ * Copyright (C) 2014 Red Hat, Inc., Johannes Weiner
+ */
+
+#include <linux/page_counter.h>
+#include <linux/atomic.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/sched.h>
+#include <linux/bug.h>
+#include <asm/page.h>
+
+/**
+ * page_counter_cancel - take pages out of the local counter
+ * @counter: counter
+ * @nr_pages: number of pages to cancel
+ *
+ * Returns whether there are remaining pages in the counter.
+ */
+int page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
+{
+	long new;
+
+	new = atomic_long_sub_return(nr_pages, &counter->count);
+
+	/* More uncharges than charges? */
+	WARN_ON_ONCE(new < 0);
+
+	return new > 0;
+}
+
+/**
+ * page_counter_charge - hierarchically charge pages
+ * @counter: counter
+ * @nr_pages: number of pages to charge
+ *
+ * NOTE: This does not consider any configured counter limits.
+ */
+void page_counter_charge(struct page_counter *counter, unsigned long nr_pages)
+{
+	struct page_counter *c;
+
+	for (c = counter; c; c = c->parent) {
+		long new;
+
+		new = atomic_long_add_return(nr_pages, &c->count);
+		/*
+		 * This is indeed racy, but we can live with some
+		 * inaccuracy in the watermark.
+		 */
+		if (new > c->watermark)
+			c->watermark = new;
+	}
+}
+
+/**
+ * page_counter_try_charge - try to hierarchically charge pages
+ * @counter: counter
+ * @nr_pages: number of pages to charge
+ * @fail: points first counter to hit its limit, if any
+ *
+ * Returns 0 on success, or -ENOMEM and @fail if the counter or one of
+ * its ancestors has hit its configured limit.
+ */
+int page_counter_try_charge(struct page_counter *counter,
+			    unsigned long nr_pages,
+			    struct page_counter **fail)
+{
+	struct page_counter *c;
+
+	for (c = counter; c; c = c->parent) {
+		long new;
+		/*
+		 * Charge speculatively to avoid an expensive CAS.  If
+		 * a bigger charge fails, it might falsely lock out a
+		 * racing smaller charge and send it into reclaim
+		 * early, but the error is limited to the difference
+		 * between the two sizes, which is less than 2M/4M in
+		 * case of a THP locking out a regular page charge.
+		 *
+		 * The atomic_long_add_return() implies a full memory
+		 * barrier between incrementing the count and reading
+		 * the limit.  When racing with page_counter_limit(),
+		 * we either see the new limit or the setter sees the
+		 * counter has changed and retries.
+		 */
+		new = atomic_long_add_return(nr_pages, &c->count);
+		if (new > c->limit) {
+			atomic_long_sub(nr_pages, &c->count);
+			/*
+			 * This is racy, but we can live with some
+			 * inaccuracy in the failcnt.
+			 */
+			c->failcnt++;
+			*fail = c;
+			goto failed;
+		}
+		/*
+		 * Just like with failcnt, we can live with some
+		 * inaccuracy in the watermark.
+		 */
+		if (new > c->watermark)
+			c->watermark = new;
+	}
+	return 0;
+
+failed:
+	for (c = counter; c != *fail; c = c->parent)
+		page_counter_cancel(c, nr_pages);
+
+	return -ENOMEM;
+}
+
+/**
+ * page_counter_uncharge - hierarchically uncharge pages
+ * @counter: counter
+ * @nr_pages: number of pages to uncharge
+ *
+ * Returns whether there are remaining charges in @counter.
+ */
+int page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
+{
+	struct page_counter *c;
+	int ret = 1;
+
+	for (c = counter; c; c = c->parent) {
+		int remainder;
+
+		remainder = page_counter_cancel(c, nr_pages);
+		if (c == counter && !remainder)
+			ret = 0;
+	}
+
+	return ret;
+}
+
+/**
+ * page_counter_limit - limit the number of pages allowed
+ * @counter: counter
+ * @limit: limit to set
+ *
+ * Returns 0 on success, -EBUSY if the current number of pages on the
+ * counter already exceeds the specified limit.
+ *
+ * The caller must serialize invocations on the same counter.
+ */
+int page_counter_limit(struct page_counter *counter, unsigned long limit)
+{
+	for (;;) {
+		unsigned long old;
+		long count;
+
+		/*
+		 * Update the limit while making sure that it's not
+		 * below the concurrently-changing counter value.
+		 *
+		 * The xchg implies two full memory barriers before
+		 * and after, so the read-swap-read is ordered and
+		 * ensures coherency with page_counter_try_charge():
+		 * that function modifies the count before checking
+		 * the limit, so if it sees the old limit, we see the
+		 * modified counter and retry.
+		 */
+		count = atomic_long_read(&counter->count);
+
+		if (count > limit)
+			return -EBUSY;
+
+		old = xchg(&counter->limit, limit);
+
+		if (atomic_long_read(&counter->count) <= count)
+			return 0;
+
+		counter->limit = old;
+		cond_resched();
+	}
+}
+
+/**
+ * page_counter_memparse - memparse() for page counter limits
+ * @buf: string to parse
+ * @nr_pages: returns the result in number of pages
+ *
+ * Returns -EINVAL, or 0 and @nr_pages on success.  @nr_pages will be
+ * limited to %PAGE_COUNTER_MAX.
+ */
+int page_counter_memparse(const char *buf, unsigned long *nr_pages)
+{
+	char unlimited[] = "-1";
+	char *end;
+	u64 bytes;
+
+	if (!strncmp(buf, unlimited, sizeof(unlimited))) {
+		*nr_pages = PAGE_COUNTER_MAX;
+		return 0;
+	}
+
+	bytes = memparse(buf, &end);
+	if (*end != '\0')
+		return -EINVAL;
+
+	*nr_pages = min(bytes / PAGE_SIZE, (u64)PAGE_COUNTER_MAX);
+
+	return 0;
+}
diff --git a/net/ipv4/tcp_memcontrol.c b/net/ipv4/tcp_memcontrol.c
index 1d191357bf88..272327134a1b 100644
--- a/net/ipv4/tcp_memcontrol.c
+++ b/net/ipv4/tcp_memcontrol.c
@@ -9,13 +9,13 @@
 int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
 	/*
-	 * The root cgroup does not use res_counters, but rather,
+	 * The root cgroup does not use page_counters, but rather,
 	 * rely on the data already collected by the network
 	 * subsystem
 	 */
-	struct res_counter *res_parent = NULL;
-	struct cg_proto *cg_proto, *parent_cg;
 	struct mem_cgroup *parent = parent_mem_cgroup(memcg);
+	struct page_counter *counter_parent = NULL;
+	struct cg_proto *cg_proto, *parent_cg;
 
 	cg_proto = tcp_prot.proto_cgroup(memcg);
 	if (!cg_proto)
@@ -29,9 +29,9 @@ int tcp_init_cgroup(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 
 	parent_cg = tcp_prot.proto_cgroup(parent);
 	if (parent_cg)
-		res_parent = &parent_cg->memory_allocated;
+		counter_parent = &parent_cg->memory_allocated;
 
-	res_counter_init(&cg_proto->memory_allocated, res_parent);
+	page_counter_init(&cg_proto->memory_allocated, counter_parent);
 	percpu_counter_init(&cg_proto->sockets_allocated, 0, GFP_KERNEL);
 
 	return 0;
@@ -50,7 +50,7 @@ void tcp_destroy_cgroup(struct mem_cgroup *memcg)
 }
 EXPORT_SYMBOL(tcp_destroy_cgroup);
 
-static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
+static int tcp_update_limit(struct mem_cgroup *memcg, unsigned long nr_pages)
 {
 	struct cg_proto *cg_proto;
 	int i;
@@ -60,20 +60,17 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
 	if (!cg_proto)
 		return -EINVAL;
 
-	if (val > RES_COUNTER_MAX)
-		val = RES_COUNTER_MAX;
-
-	ret = res_counter_set_limit(&cg_proto->memory_allocated, val);
+	ret = page_counter_limit(&cg_proto->memory_allocated, nr_pages);
 	if (ret)
 		return ret;
 
 	for (i = 0; i < 3; i++)
-		cg_proto->sysctl_mem[i] = min_t(long, val >> PAGE_SHIFT,
+		cg_proto->sysctl_mem[i] = min_t(long, nr_pages,
 						sysctl_tcp_mem[i]);
 
-	if (val == RES_COUNTER_MAX)
+	if (nr_pages == PAGE_COUNTER_MAX)
 		clear_bit(MEMCG_SOCK_ACTIVE, &cg_proto->flags);
-	else if (val != RES_COUNTER_MAX) {
+	else {
 		/*
 		 * The active bit needs to be written after the static_key
 		 * update. This is what guarantees that the socket activation
@@ -102,11 +99,20 @@ static int tcp_update_limit(struct mem_cgroup *memcg, u64 val)
 	return 0;
 }
 
+enum {
+	RES_USAGE,
+	RES_LIMIT,
+	RES_MAX_USAGE,
+	RES_FAILCNT,
+};
+
+static DEFINE_MUTEX(tcp_limit_mutex);
+
 static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
 				char *buf, size_t nbytes, loff_t off)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(of_css(of));
-	unsigned long long val;
+	unsigned long nr_pages;
 	int ret = 0;
 
 	buf = strstrip(buf);
@@ -114,10 +120,12 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
 	switch (of_cft(of)->private) {
 	case RES_LIMIT:
 		/* see memcontrol.c */
-		ret = res_counter_memparse_write_strategy(buf, &val);
+		ret = page_counter_memparse(buf, &nr_pages);
 		if (ret)
 			break;
-		ret = tcp_update_limit(memcg, val);
+		mutex_lock(&tcp_limit_mutex);
+		ret = tcp_update_limit(memcg, nr_pages);
+		mutex_unlock(&tcp_limit_mutex);
 		break;
 	default:
 		ret = -EINVAL;
@@ -126,43 +134,36 @@ static ssize_t tcp_cgroup_write(struct kernfs_open_file *of,
 	return ret ?: nbytes;
 }
 
-static u64 tcp_read_stat(struct mem_cgroup *memcg, int type, u64 default_val)
-{
-	struct cg_proto *cg_proto;
-
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
-		return default_val;
-
-	return res_counter_read_u64(&cg_proto->memory_allocated, type);
-}
-
-static u64 tcp_read_usage(struct mem_cgroup *memcg)
-{
-	struct cg_proto *cg_proto;
-
-	cg_proto = tcp_prot.proto_cgroup(memcg);
-	if (!cg_proto)
-		return atomic_long_read(&tcp_memory_allocated) << PAGE_SHIFT;
-
-	return res_counter_read_u64(&cg_proto->memory_allocated, RES_USAGE);
-}
-
 static u64 tcp_cgroup_read(struct cgroup_subsys_state *css, struct cftype *cft)
 {
 	struct mem_cgroup *memcg = mem_cgroup_from_css(css);
+	struct cg_proto *cg_proto = tcp_prot.proto_cgroup(memcg);
 	u64 val;
 
 	switch (cft->private) {
 	case RES_LIMIT:
-		val = tcp_read_stat(memcg, RES_LIMIT, RES_COUNTER_MAX);
+		if (!cg_proto)
+			return PAGE_COUNTER_MAX;
+		val = cg_proto->memory_allocated.limit;
+		val *= PAGE_SIZE;
 		break;
 	case RES_USAGE:
-		val = tcp_read_usage(memcg);
+		if (!cg_proto)
+			val = atomic_long_read(&tcp_memory_allocated);
+		else
+			val = page_counter_read(&cg_proto->memory_allocated);
+		val *= PAGE_SIZE;
 		break;
 	case RES_FAILCNT:
+		if (!cg_proto)
+			return 0;
+		val = cg_proto->memory_allocated.failcnt;
+		break;
 	case RES_MAX_USAGE:
-		val = tcp_read_stat(memcg, cft->private, 0);
+		if (!cg_proto)
+			return 0;
+		val = cg_proto->memory_allocated.watermark;
+		val *= PAGE_SIZE;
 		break;
 	default:
 		BUG();
@@ -183,10 +184,10 @@ static ssize_t tcp_cgroup_reset(struct kernfs_open_file *of,
 
 	switch (of_cft(of)->private) {
 	case RES_MAX_USAGE:
-		res_counter_reset_max(&cg_proto->memory_allocated);
+		page_counter_reset_watermark(&cg_proto->memory_allocated);
 		break;
 	case RES_FAILCNT:
-		res_counter_reset_failcnt(&cg_proto->memory_allocated);
+		cg_proto->memory_allocated.failcnt = 0;
 		break;
 	}
 
-- 
cgit v1.2.3


From 71f87bee38edddb21d97895fa938744cf3f477bb Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:42:34 -0800
Subject: mm: hugetlb_cgroup: convert to lockless page counters

Abandon the spinlock-protected byte counters in favor of the unlocked
page counters in the hugetlb controller as well.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/hugetlb.txt |   2 +-
 include/linux/hugetlb_cgroup.h    |   1 -
 init/Kconfig                      |   3 +-
 mm/hugetlb_cgroup.c               | 103 +++++++++++++++++++++-----------------
 4 files changed, 61 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/hugetlb.txt b/Documentation/cgroups/hugetlb.txt
index a9faaca1f029..106245c3aecc 100644
--- a/Documentation/cgroups/hugetlb.txt
+++ b/Documentation/cgroups/hugetlb.txt
@@ -29,7 +29,7 @@ Brief summary of control files
 
  hugetlb.<hugepagesize>.limit_in_bytes     # set/show limit of "hugepagesize" hugetlb usage
  hugetlb.<hugepagesize>.max_usage_in_bytes # show max "hugepagesize" hugetlb  usage recorded
- hugetlb.<hugepagesize>.usage_in_bytes     # show current res_counter usage for "hugepagesize" hugetlb
+ hugetlb.<hugepagesize>.usage_in_bytes     # show current usage for "hugepagesize" hugetlb
  hugetlb.<hugepagesize>.failcnt		   # show the number of allocation failure due to HugeTLB limit
 
 For a system supporting two hugepage size (16M and 16G) the control
diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h
index 0129f89cf98d..bcc853eccc85 100644
--- a/include/linux/hugetlb_cgroup.h
+++ b/include/linux/hugetlb_cgroup.h
@@ -16,7 +16,6 @@
 #define _LINUX_HUGETLB_CGROUP_H
 
 #include <linux/mmdebug.h>
-#include <linux/res_counter.h>
 
 struct hugetlb_cgroup;
 /*
diff --git a/init/Kconfig b/init/Kconfig
index fd9e88791ba4..a60d1442d1df 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -1051,7 +1051,8 @@ config MEMCG_KMEM
 
 config CGROUP_HUGETLB
 	bool "HugeTLB Resource Controller for Control Groups"
-	depends on RESOURCE_COUNTERS && HUGETLB_PAGE
+	depends on HUGETLB_PAGE
+	select PAGE_COUNTER
 	default n
 	help
 	  Provides a cgroup Resource Controller for HugeTLB pages.
diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c
index a67c26e0f360..037e1c00a5b7 100644
--- a/mm/hugetlb_cgroup.c
+++ b/mm/hugetlb_cgroup.c
@@ -14,6 +14,7 @@
  */
 
 #include <linux/cgroup.h>
+#include <linux/page_counter.h>
 #include <linux/slab.h>
 #include <linux/hugetlb.h>
 #include <linux/hugetlb_cgroup.h>
@@ -23,7 +24,7 @@ struct hugetlb_cgroup {
 	/*
 	 * the counter to account for hugepages from hugetlb.
 	 */
-	struct res_counter hugepage[HUGE_MAX_HSTATE];
+	struct page_counter hugepage[HUGE_MAX_HSTATE];
 };
 
 #define MEMFILE_PRIVATE(x, val)	(((x) << 16) | (val))
@@ -60,7 +61,7 @@ static inline bool hugetlb_cgroup_have_usage(struct hugetlb_cgroup *h_cg)
 	int idx;
 
 	for (idx = 0; idx < hugetlb_max_hstate; idx++) {
-		if ((res_counter_read_u64(&h_cg->hugepage[idx], RES_USAGE)) > 0)
+		if (page_counter_read(&h_cg->hugepage[idx]))
 			return true;
 	}
 	return false;
@@ -79,12 +80,12 @@ hugetlb_cgroup_css_alloc(struct cgroup_subsys_state *parent_css)
 
 	if (parent_h_cgroup) {
 		for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
-			res_counter_init(&h_cgroup->hugepage[idx],
-					 &parent_h_cgroup->hugepage[idx]);
+			page_counter_init(&h_cgroup->hugepage[idx],
+					  &parent_h_cgroup->hugepage[idx]);
 	} else {
 		root_h_cgroup = h_cgroup;
 		for (idx = 0; idx < HUGE_MAX_HSTATE; idx++)
-			res_counter_init(&h_cgroup->hugepage[idx], NULL);
+			page_counter_init(&h_cgroup->hugepage[idx], NULL);
 	}
 	return &h_cgroup->css;
 }
@@ -108,9 +109,8 @@ static void hugetlb_cgroup_css_free(struct cgroup_subsys_state *css)
 static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 				       struct page *page)
 {
-	int csize;
-	struct res_counter *counter;
-	struct res_counter *fail_res;
+	unsigned int nr_pages;
+	struct page_counter *counter;
 	struct hugetlb_cgroup *page_hcg;
 	struct hugetlb_cgroup *parent = parent_hugetlb_cgroup(h_cg);
 
@@ -123,15 +123,15 @@ static void hugetlb_cgroup_move_parent(int idx, struct hugetlb_cgroup *h_cg,
 	if (!page_hcg || page_hcg != h_cg)
 		goto out;
 
-	csize = PAGE_SIZE << compound_order(page);
+	nr_pages = 1 << compound_order(page);
 	if (!parent) {
 		parent = root_h_cgroup;
 		/* root has no limit */
-		res_counter_charge_nofail(&parent->hugepage[idx],
-					  csize, &fail_res);
+		page_counter_charge(&parent->hugepage[idx], nr_pages);
 	}
 	counter = &h_cg->hugepage[idx];
-	res_counter_uncharge_until(counter, counter->parent, csize);
+	/* Take the pages off the local counter */
+	page_counter_cancel(counter, nr_pages);
 
 	set_hugetlb_cgroup(page, parent);
 out:
@@ -166,9 +166,8 @@ int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages,
 				 struct hugetlb_cgroup **ptr)
 {
 	int ret = 0;
-	struct res_counter *fail_res;
+	struct page_counter *counter;
 	struct hugetlb_cgroup *h_cg = NULL;
-	unsigned long csize = nr_pages * PAGE_SIZE;
 
 	if (hugetlb_cgroup_disabled())
 		goto done;
@@ -187,7 +186,7 @@ again:
 	}
 	rcu_read_unlock();
 
-	ret = res_counter_charge(&h_cg->hugepage[idx], csize, &fail_res);
+	ret = page_counter_try_charge(&h_cg->hugepage[idx], nr_pages, &counter);
 	css_put(&h_cg->css);
 done:
 	*ptr = h_cg;
@@ -213,7 +212,6 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 				  struct page *page)
 {
 	struct hugetlb_cgroup *h_cg;
-	unsigned long csize = nr_pages * PAGE_SIZE;
 
 	if (hugetlb_cgroup_disabled())
 		return;
@@ -222,61 +220,76 @@ void hugetlb_cgroup_uncharge_page(int idx, unsigned long nr_pages,
 	if (unlikely(!h_cg))
 		return;
 	set_hugetlb_cgroup(page, NULL);
-	res_counter_uncharge(&h_cg->hugepage[idx], csize);
+	page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
 	return;
 }
 
 void hugetlb_cgroup_uncharge_cgroup(int idx, unsigned long nr_pages,
 				    struct hugetlb_cgroup *h_cg)
 {
-	unsigned long csize = nr_pages * PAGE_SIZE;
-
 	if (hugetlb_cgroup_disabled() || !h_cg)
 		return;
 
 	if (huge_page_order(&hstates[idx]) < HUGETLB_CGROUP_MIN_ORDER)
 		return;
 
-	res_counter_uncharge(&h_cg->hugepage[idx], csize);
+	page_counter_uncharge(&h_cg->hugepage[idx], nr_pages);
 	return;
 }
 
+enum {
+	RES_USAGE,
+	RES_LIMIT,
+	RES_MAX_USAGE,
+	RES_FAILCNT,
+};
+
 static u64 hugetlb_cgroup_read_u64(struct cgroup_subsys_state *css,
 				   struct cftype *cft)
 {
-	int idx, name;
+	struct page_counter *counter;
 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(css);
 
-	idx = MEMFILE_IDX(cft->private);
-	name = MEMFILE_ATTR(cft->private);
+	counter = &h_cg->hugepage[MEMFILE_IDX(cft->private)];
 
-	return res_counter_read_u64(&h_cg->hugepage[idx], name);
+	switch (MEMFILE_ATTR(cft->private)) {
+	case RES_USAGE:
+		return (u64)page_counter_read(counter) * PAGE_SIZE;
+	case RES_LIMIT:
+		return (u64)counter->limit * PAGE_SIZE;
+	case RES_MAX_USAGE:
+		return (u64)counter->watermark * PAGE_SIZE;
+	case RES_FAILCNT:
+		return counter->failcnt;
+	default:
+		BUG();
+	}
 }
 
+static DEFINE_MUTEX(hugetlb_limit_mutex);
+
 static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 				    char *buf, size_t nbytes, loff_t off)
 {
-	int idx, name, ret;
-	unsigned long long val;
+	int ret, idx;
+	unsigned long nr_pages;
 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 
+	if (hugetlb_cgroup_is_root(h_cg)) /* Can't set limit on root */
+		return -EINVAL;
+
 	buf = strstrip(buf);
+	ret = page_counter_memparse(buf, &nr_pages);
+	if (ret)
+		return ret;
+
 	idx = MEMFILE_IDX(of_cft(of)->private);
-	name = MEMFILE_ATTR(of_cft(of)->private);
 
-	switch (name) {
+	switch (MEMFILE_ATTR(of_cft(of)->private)) {
 	case RES_LIMIT:
-		if (hugetlb_cgroup_is_root(h_cg)) {
-			/* Can't set limit on root */
-			ret = -EINVAL;
-			break;
-		}
-		/* This function does all necessary parse...reuse it */
-		ret = res_counter_memparse_write_strategy(buf, &val);
-		if (ret)
-			break;
-		val = ALIGN(val, 1ULL << huge_page_shift(&hstates[idx]));
-		ret = res_counter_set_limit(&h_cg->hugepage[idx], val);
+		mutex_lock(&hugetlb_limit_mutex);
+		ret = page_counter_limit(&h_cg->hugepage[idx], nr_pages);
+		mutex_unlock(&hugetlb_limit_mutex);
 		break;
 	default:
 		ret = -EINVAL;
@@ -288,18 +301,18 @@ static ssize_t hugetlb_cgroup_write(struct kernfs_open_file *of,
 static ssize_t hugetlb_cgroup_reset(struct kernfs_open_file *of,
 				    char *buf, size_t nbytes, loff_t off)
 {
-	int idx, name, ret = 0;
+	int ret = 0;
+	struct page_counter *counter;
 	struct hugetlb_cgroup *h_cg = hugetlb_cgroup_from_css(of_css(of));
 
-	idx = MEMFILE_IDX(of_cft(of)->private);
-	name = MEMFILE_ATTR(of_cft(of)->private);
+	counter = &h_cg->hugepage[MEMFILE_IDX(of_cft(of)->private)];
 
-	switch (name) {
+	switch (MEMFILE_ATTR(of_cft(of)->private)) {
 	case RES_MAX_USAGE:
-		res_counter_reset_max(&h_cg->hugepage[idx]);
+		page_counter_reset_watermark(counter);
 		break;
 	case RES_FAILCNT:
-		res_counter_reset_failcnt(&h_cg->hugepage[idx]);
+		counter->failcnt = 0;
 		break;
 	default:
 		ret = -EINVAL;
-- 
cgit v1.2.3


From 5b1efc027c0b51ca3e76f4e00c83358f8349f543 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:42:37 -0800
Subject: kernel: res_counter: remove the unused API

All memory accounting and limiting has been switched over to the
lockless page counters.  Bye, res_counter!

[akpm@linux-foundation.org: update Documentation/cgroups/memory.txt]
[mhocko@suse.cz: ditch the last remainings of res_counter]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: Tejun Heo <tj@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Paul Bolle <pebolle@tiscali.nl>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt           |  17 ++-
 Documentation/cgroups/resource_counter.txt | 197 -------------------------
 include/linux/res_counter.h                | 223 -----------------------------
 init/Kconfig                               |   6 -
 kernel/Makefile                            |   1 -
 kernel/res_counter.c                       | 211 ---------------------------
 6 files changed, 8 insertions(+), 647 deletions(-)
 delete mode 100644 Documentation/cgroups/resource_counter.txt
 delete mode 100644 include/linux/res_counter.h
 delete mode 100644 kernel/res_counter.c

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index f624727ab404..67613ff0270c 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -116,16 +116,16 @@ The memory controller is the first controller developed.
 
 2.1. Design
 
-The core of the design is a counter called the res_counter. The res_counter
-tracks the current memory usage and limit of the group of processes associated
-with the controller. Each cgroup has a memory controller specific data
-structure (mem_cgroup) associated with it.
+The core of the design is a counter called the page_counter. The
+page_counter tracks the current memory usage and limit of the group of
+processes associated with the controller. Each cgroup has a memory controller
+specific data structure (mem_cgroup) associated with it.
 
 2.2. Accounting
 
 		+--------------------+
-		|  mem_cgroup     |
-		|  (res_counter)     |
+		|  mem_cgroup        |
+		|  (page_counter)    |
 		+--------------------+
 		 /            ^      \
 		/             |       \
@@ -352,9 +352,8 @@ set:
 0. Configuration
 
 a. Enable CONFIG_CGROUPS
-b. Enable CONFIG_RESOURCE_COUNTERS
-c. Enable CONFIG_MEMCG
-d. Enable CONFIG_MEMCG_SWAP (to use swap extension)
+b. Enable CONFIG_MEMCG
+c. Enable CONFIG_MEMCG_SWAP (to use swap extension)
 d. Enable CONFIG_MEMCG_KMEM (to use kmem extension)
 
 1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?)
diff --git a/Documentation/cgroups/resource_counter.txt b/Documentation/cgroups/resource_counter.txt
deleted file mode 100644
index 762ca54eb929..000000000000
--- a/Documentation/cgroups/resource_counter.txt
+++ /dev/null
@@ -1,197 +0,0 @@
-
-		The Resource Counter
-
-The resource counter, declared at include/linux/res_counter.h,
-is supposed to facilitate the resource management by controllers
-by providing common stuff for accounting.
-
-This "stuff" includes the res_counter structure and routines
-to work with it.
-
-
-
-1. Crucial parts of the res_counter structure
-
- a. unsigned long long usage
-
- 	The usage value shows the amount of a resource that is consumed
-	by a group at a given time. The units of measurement should be
-	determined by the controller that uses this counter. E.g. it can
-	be bytes, items or any other unit the controller operates on.
-
- b. unsigned long long max_usage
-
- 	The maximal value of the usage over time.
-
- 	This value is useful when gathering statistical information about
-	the particular group, as it shows the actual resource requirements
-	for a particular group, not just some usage snapshot.
-
- c. unsigned long long limit
-
- 	The maximal allowed amount of resource to consume by the group. In
-	case the group requests for more resources, so that the usage value
-	would exceed the limit, the resource allocation is rejected (see
-	the next section).
-
- d. unsigned long long failcnt
-
- 	The failcnt stands for "failures counter". This is the number of
-	resource allocation attempts that failed.
-
- c. spinlock_t lock
-
- 	Protects changes of the above values.
-
-
-
-2. Basic accounting routines
-
- a. void res_counter_init(struct res_counter *rc,
-				struct res_counter *rc_parent)
-
- 	Initializes the resource counter. As usual, should be the first
-	routine called for a new counter.
-
-	The struct res_counter *parent can be used to define a hierarchical
-	child -> parent relationship directly in the res_counter structure,
-	NULL can be used to define no relationship.
-
- c. int res_counter_charge(struct res_counter *rc, unsigned long val,
-				struct res_counter **limit_fail_at)
-
-	When a resource is about to be allocated it has to be accounted
-	with the appropriate resource counter (controller should determine
-	which one to use on its own). This operation is called "charging".
-
-	This is not very important which operation - resource allocation
-	or charging - is performed first, but
-	  * if the allocation is performed first, this may create a
-	    temporary resource over-usage by the time resource counter is
-	    charged;
-	  * if the charging is performed first, then it should be uncharged
-	    on error path (if the one is called).
-
-	If the charging fails and a hierarchical dependency exists, the
-	limit_fail_at parameter is set to the particular res_counter element
-	where the charging failed.
-
- d. u64 res_counter_uncharge(struct res_counter *rc, unsigned long val)
-
-	When a resource is released (freed) it should be de-accounted
-	from the resource counter it was accounted to.  This is called
-	"uncharging". The return value of this function indicate the amount
-	of charges still present in the counter.
-
-	The _locked routines imply that the res_counter->lock is taken.
-
- e. u64 res_counter_uncharge_until
-		(struct res_counter *rc, struct res_counter *top,
-		 unsigned long val)
-
-	Almost same as res_counter_uncharge() but propagation of uncharge
-	stops when rc == top. This is useful when kill a res_counter in
-	child cgroup.
-
- 2.1 Other accounting routines
-
-    There are more routines that may help you with common needs, like
-    checking whether the limit is reached or resetting the max_usage
-    value. They are all declared in include/linux/res_counter.h.
-
-
-
-3. Analyzing the resource counter registrations
-
- a. If the failcnt value constantly grows, this means that the counter's
-    limit is too tight. Either the group is misbehaving and consumes too
-    many resources, or the configuration is not suitable for the group
-    and the limit should be increased.
-
- b. The max_usage value can be used to quickly tune the group. One may
-    set the limits to maximal values and either load the container with
-    a common pattern or leave one for a while. After this the max_usage
-    value shows the amount of memory the container would require during
-    its common activity.
-
-    Setting the limit a bit above this value gives a pretty good
-    configuration that works in most of the cases.
-
- c. If the max_usage is much less than the limit, but the failcnt value
-    is growing, then the group tries to allocate a big chunk of resource
-    at once.
-
- d. If the max_usage is much less than the limit, but the failcnt value
-    is 0, then this group is given too high limit, that it does not
-    require. It is better to lower the limit a bit leaving more resource
-    for other groups.
-
-
-
-4. Communication with the control groups subsystem (cgroups)
-
-All the resource controllers that are using cgroups and resource counters
-should provide files (in the cgroup filesystem) to work with the resource
-counter fields. They are recommended to adhere to the following rules:
-
- a. File names
-
- 	Field name	File name
-	---------------------------------------------------
-	usage		usage_in_<unit_of_measurement>
-	max_usage	max_usage_in_<unit_of_measurement>
-	limit		limit_in_<unit_of_measurement>
-	failcnt		failcnt
-	lock		no file :)
-
- b. Reading from file should show the corresponding field value in the
-    appropriate format.
-
- c. Writing to file
-
- 	Field		Expected behavior
-	----------------------------------
-	usage		prohibited
-	max_usage	reset to usage
-	limit		set the limit
-	failcnt		reset to zero
-
-
-
-5. Usage example
-
- a. Declare a task group (take a look at cgroups subsystem for this) and
-    fold a res_counter into it
-
-	struct my_group {
-		struct res_counter res;
-
-		<other fields>
-	}
-
- b. Put hooks in resource allocation/release paths
-
- 	int alloc_something(...)
-	{
-		if (res_counter_charge(res_counter_ptr, amount) < 0)
-			return -ENOMEM;
-
-		<allocate the resource and return to the caller>
-	}
-
-	void release_something(...)
-	{
-		res_counter_uncharge(res_counter_ptr, amount);
-
-		<release the resource>
-	}
-
-    In order to keep the usage value self-consistent, both the
-    "res_counter_ptr" and the "amount" in release_something() should be
-    the same as they were in the alloc_something() when the releasing
-    resource was allocated.
-
- c. Provide the way to read res_counter values and set them (the cgroups
-    still can help with it).
-
- c. Compile and run :)
diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h
deleted file mode 100644
index 56b7bc32db4f..000000000000
--- a/include/linux/res_counter.h
+++ /dev/null
@@ -1,223 +0,0 @@
-#ifndef __RES_COUNTER_H__
-#define __RES_COUNTER_H__
-
-/*
- * Resource Counters
- * Contain common data types and routines for resource accounting
- *
- * Copyright 2007 OpenVZ SWsoft Inc
- *
- * Author: Pavel Emelianov <xemul@openvz.org>
- *
- * See Documentation/cgroups/resource_counter.txt for more
- * info about what this counter is.
- */
-
-#include <linux/spinlock.h>
-#include <linux/errno.h>
-
-/*
- * The core object. the cgroup that wishes to account for some
- * resource may include this counter into its structures and use
- * the helpers described beyond
- */
-
-struct res_counter {
-	/*
-	 * the current resource consumption level
-	 */
-	unsigned long long usage;
-	/*
-	 * the maximal value of the usage from the counter creation
-	 */
-	unsigned long long max_usage;
-	/*
-	 * the limit that usage cannot exceed
-	 */
-	unsigned long long limit;
-	/*
-	 * the limit that usage can be exceed
-	 */
-	unsigned long long soft_limit;
-	/*
-	 * the number of unsuccessful attempts to consume the resource
-	 */
-	unsigned long long failcnt;
-	/*
-	 * the lock to protect all of the above.
-	 * the routines below consider this to be IRQ-safe
-	 */
-	spinlock_t lock;
-	/*
-	 * Parent counter, used for hierarchial resource accounting
-	 */
-	struct res_counter *parent;
-};
-
-#define RES_COUNTER_MAX ULLONG_MAX
-
-/**
- * Helpers to interact with userspace
- * res_counter_read_u64() - returns the value of the specified member.
- * res_counter_read/_write - put/get the specified fields from the
- * res_counter struct to/from the user
- *
- * @counter:     the counter in question
- * @member:  the field to work with (see RES_xxx below)
- * @buf:     the buffer to opeate on,...
- * @nbytes:  its size...
- * @pos:     and the offset.
- */
-
-u64 res_counter_read_u64(struct res_counter *counter, int member);
-
-ssize_t res_counter_read(struct res_counter *counter, int member,
-		const char __user *buf, size_t nbytes, loff_t *pos,
-		int (*read_strategy)(unsigned long long val, char *s));
-
-int res_counter_memparse_write_strategy(const char *buf,
-					unsigned long long *res);
-
-/*
- * the field descriptors. one for each member of res_counter
- */
-
-enum {
-	RES_USAGE,
-	RES_MAX_USAGE,
-	RES_LIMIT,
-	RES_FAILCNT,
-	RES_SOFT_LIMIT,
-};
-
-/*
- * helpers for accounting
- */
-
-void res_counter_init(struct res_counter *counter, struct res_counter *parent);
-
-/*
- * charge - try to consume more resource.
- *
- * @counter: the counter
- * @val: the amount of the resource. each controller defines its own
- *       units, e.g. numbers, bytes, Kbytes, etc
- *
- * returns 0 on success and <0 if the counter->usage will exceed the
- * counter->limit
- *
- * charge_nofail works the same, except that it charges the resource
- * counter unconditionally, and returns < 0 if the after the current
- * charge we are over limit.
- */
-
-int __must_check res_counter_charge(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at);
-int res_counter_charge_nofail(struct res_counter *counter,
-		unsigned long val, struct res_counter **limit_fail_at);
-
-/*
- * uncharge - tell that some portion of the resource is released
- *
- * @counter: the counter
- * @val: the amount of the resource
- *
- * these calls check for usage underflow and show a warning on the console
- *
- * returns the total charges still present in @counter.
- */
-
-u64 res_counter_uncharge(struct res_counter *counter, unsigned long val);
-
-u64 res_counter_uncharge_until(struct res_counter *counter,
-			       struct res_counter *top,
-			       unsigned long val);
-/**
- * res_counter_margin - calculate chargeable space of a counter
- * @cnt: the counter
- *
- * Returns the difference between the hard limit and the current usage
- * of resource counter @cnt.
- */
-static inline unsigned long long res_counter_margin(struct res_counter *cnt)
-{
-	unsigned long long margin;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	if (cnt->limit > cnt->usage)
-		margin = cnt->limit - cnt->usage;
-	else
-		margin = 0;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-	return margin;
-}
-
-/**
- * Get the difference between the usage and the soft limit
- * @cnt: The counter
- *
- * Returns 0 if usage is less than or equal to soft limit
- * The difference between usage and soft limit, otherwise.
- */
-static inline unsigned long long
-res_counter_soft_limit_excess(struct res_counter *cnt)
-{
-	unsigned long long excess;
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	if (cnt->usage <= cnt->soft_limit)
-		excess = 0;
-	else
-		excess = cnt->usage - cnt->soft_limit;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-	return excess;
-}
-
-static inline void res_counter_reset_max(struct res_counter *cnt)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	cnt->max_usage = cnt->usage;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-}
-
-static inline void res_counter_reset_failcnt(struct res_counter *cnt)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	cnt->failcnt = 0;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-}
-
-static inline int res_counter_set_limit(struct res_counter *cnt,
-		unsigned long long limit)
-{
-	unsigned long flags;
-	int ret = -EBUSY;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	if (cnt->usage <= limit) {
-		cnt->limit = limit;
-		ret = 0;
-	}
-	spin_unlock_irqrestore(&cnt->lock, flags);
-	return ret;
-}
-
-static inline int
-res_counter_set_soft_limit(struct res_counter *cnt,
-				unsigned long long soft_limit)
-{
-	unsigned long flags;
-
-	spin_lock_irqsave(&cnt->lock, flags);
-	cnt->soft_limit = soft_limit;
-	spin_unlock_irqrestore(&cnt->lock, flags);
-	return 0;
-}
-
-#endif
diff --git a/init/Kconfig b/init/Kconfig
index a60d1442d1df..1761c72bc1a0 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -972,12 +972,6 @@ config CGROUP_CPUACCT
 	  Provides a simple Resource Controller for monitoring the
 	  total CPU consumed by the tasks in a cgroup.
 
-config RESOURCE_COUNTERS
-	bool "Resource counters"
-	help
-	  This option enables controller independent resource accounting
-	  infrastructure that works with cgroups.
-
 config PAGE_COUNTER
        bool
 
diff --git a/kernel/Makefile b/kernel/Makefile
index 17ea6d4a9a24..a59481a3fa6c 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -57,7 +57,6 @@ obj-$(CONFIG_UTS_NS) += utsname.o
 obj-$(CONFIG_USER_NS) += user_namespace.o
 obj-$(CONFIG_PID_NS) += pid_namespace.o
 obj-$(CONFIG_IKCONFIG) += configs.o
-obj-$(CONFIG_RESOURCE_COUNTERS) += res_counter.o
 obj-$(CONFIG_SMP) += stop_machine.o
 obj-$(CONFIG_KPROBES_SANITY_TEST) += test_kprobes.o
 obj-$(CONFIG_AUDIT) += audit.o auditfilter.o
diff --git a/kernel/res_counter.c b/kernel/res_counter.c
deleted file mode 100644
index e791130f85a7..000000000000
--- a/kernel/res_counter.c
+++ /dev/null
@@ -1,211 +0,0 @@
-/*
- * resource cgroups
- *
- * Copyright 2007 OpenVZ SWsoft Inc
- *
- * Author: Pavel Emelianov <xemul@openvz.org>
- *
- */
-
-#include <linux/types.h>
-#include <linux/parser.h>
-#include <linux/fs.h>
-#include <linux/res_counter.h>
-#include <linux/uaccess.h>
-#include <linux/mm.h>
-
-void res_counter_init(struct res_counter *counter, struct res_counter *parent)
-{
-	spin_lock_init(&counter->lock);
-	counter->limit = RES_COUNTER_MAX;
-	counter->soft_limit = RES_COUNTER_MAX;
-	counter->parent = parent;
-}
-
-static u64 res_counter_uncharge_locked(struct res_counter *counter,
-				       unsigned long val)
-{
-	if (WARN_ON(counter->usage < val))
-		val = counter->usage;
-
-	counter->usage -= val;
-	return counter->usage;
-}
-
-static int res_counter_charge_locked(struct res_counter *counter,
-				     unsigned long val, bool force)
-{
-	int ret = 0;
-
-	if (counter->usage + val > counter->limit) {
-		counter->failcnt++;
-		ret = -ENOMEM;
-		if (!force)
-			return ret;
-	}
-
-	counter->usage += val;
-	if (counter->usage > counter->max_usage)
-		counter->max_usage = counter->usage;
-	return ret;
-}
-
-static int __res_counter_charge(struct res_counter *counter, unsigned long val,
-				struct res_counter **limit_fail_at, bool force)
-{
-	int ret, r;
-	unsigned long flags;
-	struct res_counter *c, *u;
-
-	r = ret = 0;
-	*limit_fail_at = NULL;
-	local_irq_save(flags);
-	for (c = counter; c != NULL; c = c->parent) {
-		spin_lock(&c->lock);
-		r = res_counter_charge_locked(c, val, force);
-		spin_unlock(&c->lock);
-		if (r < 0 && !ret) {
-			ret = r;
-			*limit_fail_at = c;
-			if (!force)
-				break;
-		}
-	}
-
-	if (ret < 0 && !force) {
-		for (u = counter; u != c; u = u->parent) {
-			spin_lock(&u->lock);
-			res_counter_uncharge_locked(u, val);
-			spin_unlock(&u->lock);
-		}
-	}
-	local_irq_restore(flags);
-
-	return ret;
-}
-
-int res_counter_charge(struct res_counter *counter, unsigned long val,
-			struct res_counter **limit_fail_at)
-{
-	return __res_counter_charge(counter, val, limit_fail_at, false);
-}
-
-int res_counter_charge_nofail(struct res_counter *counter, unsigned long val,
-			      struct res_counter **limit_fail_at)
-{
-	return __res_counter_charge(counter, val, limit_fail_at, true);
-}
-
-u64 res_counter_uncharge_until(struct res_counter *counter,
-			       struct res_counter *top,
-			       unsigned long val)
-{
-	unsigned long flags;
-	struct res_counter *c;
-	u64 ret = 0;
-
-	local_irq_save(flags);
-	for (c = counter; c != top; c = c->parent) {
-		u64 r;
-		spin_lock(&c->lock);
-		r = res_counter_uncharge_locked(c, val);
-		if (c == counter)
-			ret = r;
-		spin_unlock(&c->lock);
-	}
-	local_irq_restore(flags);
-	return ret;
-}
-
-u64 res_counter_uncharge(struct res_counter *counter, unsigned long val)
-{
-	return res_counter_uncharge_until(counter, NULL, val);
-}
-
-static inline unsigned long long *
-res_counter_member(struct res_counter *counter, int member)
-{
-	switch (member) {
-	case RES_USAGE:
-		return &counter->usage;
-	case RES_MAX_USAGE:
-		return &counter->max_usage;
-	case RES_LIMIT:
-		return &counter->limit;
-	case RES_FAILCNT:
-		return &counter->failcnt;
-	case RES_SOFT_LIMIT:
-		return &counter->soft_limit;
-	};
-
-	BUG();
-	return NULL;
-}
-
-ssize_t res_counter_read(struct res_counter *counter, int member,
-		const char __user *userbuf, size_t nbytes, loff_t *pos,
-		int (*read_strategy)(unsigned long long val, char *st_buf))
-{
-	unsigned long long *val;
-	char buf[64], *s;
-
-	s = buf;
-	val = res_counter_member(counter, member);
-	if (read_strategy)
-		s += read_strategy(*val, s);
-	else
-		s += sprintf(s, "%llu\n", *val);
-	return simple_read_from_buffer((void __user *)userbuf, nbytes,
-			pos, buf, s - buf);
-}
-
-#if BITS_PER_LONG == 32
-u64 res_counter_read_u64(struct res_counter *counter, int member)
-{
-	unsigned long flags;
-	u64 ret;
-
-	spin_lock_irqsave(&counter->lock, flags);
-	ret = *res_counter_member(counter, member);
-	spin_unlock_irqrestore(&counter->lock, flags);
-
-	return ret;
-}
-#else
-u64 res_counter_read_u64(struct res_counter *counter, int member)
-{
-	return *res_counter_member(counter, member);
-}
-#endif
-
-int res_counter_memparse_write_strategy(const char *buf,
-					unsigned long long *resp)
-{
-	char *end;
-	unsigned long long res;
-
-	/* return RES_COUNTER_MAX(unlimited) if "-1" is specified */
-	if (*buf == '-') {
-		int rc = kstrtoull(buf + 1, 10, &res);
-
-		if (rc)
-			return rc;
-		if (res != 1)
-			return -EINVAL;
-		*resp = RES_COUNTER_MAX;
-		return 0;
-	}
-
-	res = memparse(buf, &end);
-	if (*end != '\0')
-		return -EINVAL;
-
-	if (PAGE_ALIGN(res) >= res)
-		res = PAGE_ALIGN(res);
-	else
-		res = RES_COUNTER_MAX;
-
-	*resp = res;
-
-	return 0;
-}
-- 
cgit v1.2.3


From e8ea14cc6eadfe2ea63e9989e16e62625a2619f8 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:42:42 -0800
Subject: mm: memcontrol: take a css reference for each charged page

Charges currently pin the css indirectly by playing tricks during
css_offline(): user pages stall the offlining process until all of them
have been reparented, whereas kmemcg acquires a keep-alive reference if
outstanding kernel pages are detected at that point.

In preparation for removing all this complexity, make the pinning explicit
and acquire a css references for every charged page.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cgroup.h          | 26 +++++++++++++++++++++++
 include/linux/percpu-refcount.h | 47 +++++++++++++++++++++++++++++++++--------
 mm/memcontrol.c                 | 21 ++++++++++++++----
 3 files changed, 81 insertions(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 1d5196889048..9f96b25965c2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -112,6 +112,19 @@ static inline void css_get(struct cgroup_subsys_state *css)
 		percpu_ref_get(&css->refcnt);
 }
 
+/**
+ * css_get_many - obtain references on the specified css
+ * @css: target css
+ * @n: number of references to get
+ *
+ * The caller must already have a reference.
+ */
+static inline void css_get_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+	if (!(css->flags & CSS_NO_REF))
+		percpu_ref_get_many(&css->refcnt, n);
+}
+
 /**
  * css_tryget - try to obtain a reference on the specified css
  * @css: target css
@@ -159,6 +172,19 @@ static inline void css_put(struct cgroup_subsys_state *css)
 		percpu_ref_put(&css->refcnt);
 }
 
+/**
+ * css_put_many - put css references
+ * @css: target css
+ * @n: number of references to put
+ *
+ * Put references obtained via css_get() and css_tryget_online().
+ */
+static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n)
+{
+	if (!(css->flags & CSS_NO_REF))
+		percpu_ref_put_many(&css->refcnt, n);
+}
+
 /* bits in struct cgroup flags field */
 enum {
 	/* Control Group requires release notifications to userspace */
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 51ce60c35f4c..530b249f7ea4 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -147,27 +147,41 @@ static inline bool __ref_is_percpu(struct percpu_ref *ref,
 }
 
 /**
- * percpu_ref_get - increment a percpu refcount
+ * percpu_ref_get_many - increment a percpu refcount
  * @ref: percpu_ref to get
+ * @nr: number of references to get
  *
- * Analagous to atomic_long_inc().
+ * Analogous to atomic_long_add().
  *
  * This function is safe to call as long as @ref is between init and exit.
  */
-static inline void percpu_ref_get(struct percpu_ref *ref)
+static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
 {
 	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__ref_is_percpu(ref, &percpu_count))
-		this_cpu_inc(*percpu_count);
+		this_cpu_add(*percpu_count, nr);
 	else
-		atomic_long_inc(&ref->count);
+		atomic_long_add(nr, &ref->count);
 
 	rcu_read_unlock_sched();
 }
 
+/**
+ * percpu_ref_get - increment a percpu refcount
+ * @ref: percpu_ref to get
+ *
+ * Analagous to atomic_long_inc().
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_get(struct percpu_ref *ref)
+{
+	percpu_ref_get_many(ref, 1);
+}
+
 /**
  * percpu_ref_tryget - try to increment a percpu refcount
  * @ref: percpu_ref to try-get
@@ -231,28 +245,43 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 }
 
 /**
- * percpu_ref_put - decrement a percpu refcount
+ * percpu_ref_put_many - decrement a percpu refcount
  * @ref: percpu_ref to put
+ * @nr: number of references to put
  *
  * Decrement the refcount, and if 0, call the release function (which was passed
  * to percpu_ref_init())
  *
  * This function is safe to call as long as @ref is between init and exit.
  */
-static inline void percpu_ref_put(struct percpu_ref *ref)
+static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
 {
 	unsigned long __percpu *percpu_count;
 
 	rcu_read_lock_sched();
 
 	if (__ref_is_percpu(ref, &percpu_count))
-		this_cpu_dec(*percpu_count);
-	else if (unlikely(atomic_long_dec_and_test(&ref->count)))
+		this_cpu_sub(*percpu_count, nr);
+	else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
 		ref->release(ref);
 
 	rcu_read_unlock_sched();
 }
 
+/**
+ * percpu_ref_put - decrement a percpu refcount
+ * @ref: percpu_ref to put
+ *
+ * Decrement the refcount, and if 0, call the release function (which was passed
+ * to percpu_ref_init())
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+static inline void percpu_ref_put(struct percpu_ref *ref)
+{
+	percpu_ref_put_many(ref, 1);
+}
+
 /**
  * percpu_ref_is_zero - test whether a percpu refcount reached zero
  * @ref: percpu_ref to test
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c3cd3bb77dd9..f69da2ac6323 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2273,6 +2273,7 @@ static void drain_stock(struct memcg_stock_pcp *stock)
 		page_counter_uncharge(&old->memory, stock->nr_pages);
 		if (do_swap_account)
 			page_counter_uncharge(&old->memsw, stock->nr_pages);
+		css_put_many(&old->css, stock->nr_pages);
 		stock->nr_pages = 0;
 	}
 	stock->cached = NULL;
@@ -2530,6 +2531,7 @@ bypass:
 	return -EINTR;
 
 done_restock:
+	css_get_many(&memcg->css, batch);
 	if (batch > nr_pages)
 		refill_stock(memcg, batch - nr_pages);
 done:
@@ -2544,6 +2546,8 @@ static void cancel_charge(struct mem_cgroup *memcg, unsigned int nr_pages)
 	page_counter_uncharge(&memcg->memory, nr_pages);
 	if (do_swap_account)
 		page_counter_uncharge(&memcg->memsw, nr_pages);
+
+	css_put_many(&memcg->css, nr_pages);
 }
 
 /*
@@ -2739,6 +2743,7 @@ static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
 		page_counter_charge(&memcg->memory, nr_pages);
 		if (do_swap_account)
 			page_counter_charge(&memcg->memsw, nr_pages);
+		css_get_many(&memcg->css, nr_pages);
 		ret = 0;
 	} else if (ret)
 		page_counter_uncharge(&memcg->kmem, nr_pages);
@@ -2754,8 +2759,10 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
 		page_counter_uncharge(&memcg->memsw, nr_pages);
 
 	/* Not down to 0 */
-	if (page_counter_uncharge(&memcg->kmem, nr_pages))
+	if (page_counter_uncharge(&memcg->kmem, nr_pages)) {
+		css_put_many(&memcg->css, nr_pages);
 		return;
+	}
 
 	/*
 	 * Releases a reference taken in kmem_cgroup_css_offline in case
@@ -2767,6 +2774,8 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
 	 */
 	if (memcg_kmem_test_and_clear_dead(memcg))
 		css_put(&memcg->css);
+
+	css_put_many(&memcg->css, nr_pages);
 }
 
 /*
@@ -3394,10 +3403,13 @@ static int mem_cgroup_move_parent(struct page *page,
 	ret = mem_cgroup_move_account(page, nr_pages,
 				pc, child, parent);
 	if (!ret) {
+		if (!mem_cgroup_is_root(parent))
+			css_get_many(&parent->css, nr_pages);
 		/* Take charge off the local counters */
 		page_counter_cancel(&child->memory, nr_pages);
 		if (do_swap_account)
 			page_counter_cancel(&child->memsw, nr_pages);
+		css_put_many(&child->css, nr_pages);
 	}
 
 	if (nr_pages > 1)
@@ -5767,7 +5779,6 @@ static void __mem_cgroup_clear_mc(void)
 {
 	struct mem_cgroup *from = mc.from;
 	struct mem_cgroup *to = mc.to;
-	int i;
 
 	/* we must uncharge all the leftover precharges from mc.to */
 	if (mc.precharge) {
@@ -5795,8 +5806,7 @@ static void __mem_cgroup_clear_mc(void)
 		if (!mem_cgroup_is_root(mc.to))
 			page_counter_uncharge(&mc.to->memory, mc.moved_swap);
 
-		for (i = 0; i < mc.moved_swap; i++)
-			css_put(&mc.from->css);
+		css_put_many(&mc.from->css, mc.moved_swap);
 
 		/* we've already done css_get(mc.to) */
 		mc.moved_swap = 0;
@@ -6343,6 +6353,9 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 	__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
 	memcg_check_events(memcg, dummy_page);
 	local_irq_restore(flags);
+
+	if (!mem_cgroup_is_root(memcg))
+		css_put_many(&memcg->css, max(nr_mem, nr_memsw));
 }
 
 static void uncharge_list(struct list_head *page_list)
-- 
cgit v1.2.3


From 64f2199389414341ed3a570663f23616c131ba25 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:42:45 -0800
Subject: mm: memcontrol: remove obsolete kmemcg pinning tricks

As charges now pin the css explicitely, there is no more need for kmemcg
to acquire a proxy reference for outstanding pages during offlining, or
maintain state to identify such "dead" groups.

This was the last user of the uncharge functions' return values, so remove
them as well.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: David Rientjes <rientjes@google.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_counter.h |  4 +--
 mm/memcontrol.c              | 74 +-------------------------------------------
 mm/page_counter.c            | 23 +++-----------
 3 files changed, 7 insertions(+), 94 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h
index 7cce3be99ff3..955421575d16 100644
--- a/include/linux/page_counter.h
+++ b/include/linux/page_counter.h
@@ -34,12 +34,12 @@ static inline unsigned long page_counter_read(struct page_counter *counter)
 	return atomic_long_read(&counter->count);
 }
 
-int page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
+void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages);
 void page_counter_charge(struct page_counter *counter, unsigned long nr_pages);
 int page_counter_try_charge(struct page_counter *counter,
 			    unsigned long nr_pages,
 			    struct page_counter **fail);
-int page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
+void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages);
 int page_counter_limit(struct page_counter *counter, unsigned long limit);
 int page_counter_memparse(const char *buf, unsigned long *nr_pages);
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index f69da2ac6323..0e6484ea268d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -369,7 +369,6 @@ struct mem_cgroup {
 /* internal only representation about the status of kmem accounting. */
 enum {
 	KMEM_ACCOUNTED_ACTIVE, /* accounted by this cgroup itself */
-	KMEM_ACCOUNTED_DEAD, /* dead memcg with pending kmem charges */
 };
 
 #ifdef CONFIG_MEMCG_KMEM
@@ -383,22 +382,6 @@ static bool memcg_kmem_is_active(struct mem_cgroup *memcg)
 	return test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags);
 }
 
-static void memcg_kmem_mark_dead(struct mem_cgroup *memcg)
-{
-	/*
-	 * Our caller must use css_get() first, because memcg_uncharge_kmem()
-	 * will call css_put() if it sees the memcg is dead.
-	 */
-	smp_wmb();
-	if (test_bit(KMEM_ACCOUNTED_ACTIVE, &memcg->kmem_account_flags))
-		set_bit(KMEM_ACCOUNTED_DEAD, &memcg->kmem_account_flags);
-}
-
-static bool memcg_kmem_test_and_clear_dead(struct mem_cgroup *memcg)
-{
-	return test_and_clear_bit(KMEM_ACCOUNTED_DEAD,
-				  &memcg->kmem_account_flags);
-}
 #endif
 
 /* Stuffs for move charges at task migration. */
@@ -2758,22 +2741,7 @@ static void memcg_uncharge_kmem(struct mem_cgroup *memcg,
 	if (do_swap_account)
 		page_counter_uncharge(&memcg->memsw, nr_pages);
 
-	/* Not down to 0 */
-	if (page_counter_uncharge(&memcg->kmem, nr_pages)) {
-		css_put_many(&memcg->css, nr_pages);
-		return;
-	}
-
-	/*
-	 * Releases a reference taken in kmem_cgroup_css_offline in case
-	 * this last uncharge is racing with the offlining code or it is
-	 * outliving the memcg existence.
-	 *
-	 * The memory barrier imposed by test&clear is paired with the
-	 * explicit one in memcg_kmem_mark_dead().
-	 */
-	if (memcg_kmem_test_and_clear_dead(memcg))
-		css_put(&memcg->css);
+	page_counter_uncharge(&memcg->kmem, nr_pages);
 
 	css_put_many(&memcg->css, nr_pages);
 }
@@ -4757,40 +4725,6 @@ static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 {
 	mem_cgroup_sockets_destroy(memcg);
 }
-
-static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
-{
-	if (!memcg_kmem_is_active(memcg))
-		return;
-
-	/*
-	 * kmem charges can outlive the cgroup. In the case of slab
-	 * pages, for instance, a page contain objects from various
-	 * processes. As we prevent from taking a reference for every
-	 * such allocation we have to be careful when doing uncharge
-	 * (see memcg_uncharge_kmem) and here during offlining.
-	 *
-	 * The idea is that that only the _last_ uncharge which sees
-	 * the dead memcg will drop the last reference. An additional
-	 * reference is taken here before the group is marked dead
-	 * which is then paired with css_put during uncharge resp. here.
-	 *
-	 * Although this might sound strange as this path is called from
-	 * css_offline() when the referencemight have dropped down to 0 and
-	 * shouldn't be incremented anymore (css_tryget_online() would
-	 * fail) we do not have other options because of the kmem
-	 * allocations lifetime.
-	 */
-	css_get(&memcg->css);
-
-	memcg_kmem_mark_dead(memcg);
-
-	if (page_counter_read(&memcg->kmem))
-		return;
-
-	if (memcg_kmem_test_and_clear_dead(memcg))
-		css_put(&memcg->css);
-}
 #else
 static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 {
@@ -4800,10 +4734,6 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 {
 }
-
-static void kmem_cgroup_css_offline(struct mem_cgroup *memcg)
-{
-}
 #endif
 
 /*
@@ -5407,8 +5337,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 	}
 	spin_unlock(&memcg->event_list_lock);
 
-	kmem_cgroup_css_offline(memcg);
-
 	/*
 	 * This requires that offlining is serialized.  Right now that is
 	 * guaranteed because css_killed_work_fn() holds the cgroup_mutex.
diff --git a/mm/page_counter.c b/mm/page_counter.c
index f0cbc0825426..a009574fbba9 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -16,19 +16,14 @@
  * page_counter_cancel - take pages out of the local counter
  * @counter: counter
  * @nr_pages: number of pages to cancel
- *
- * Returns whether there are remaining pages in the counter.
  */
-int page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
+void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages)
 {
 	long new;
 
 	new = atomic_long_sub_return(nr_pages, &counter->count);
-
 	/* More uncharges than charges? */
 	WARN_ON_ONCE(new < 0);
-
-	return new > 0;
 }
 
 /**
@@ -117,23 +112,13 @@ failed:
  * page_counter_uncharge - hierarchically uncharge pages
  * @counter: counter
  * @nr_pages: number of pages to uncharge
- *
- * Returns whether there are remaining charges in @counter.
  */
-int page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
+void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages)
 {
 	struct page_counter *c;
-	int ret = 1;
 
-	for (c = counter; c; c = c->parent) {
-		int remainder;
-
-		remainder = page_counter_cancel(c, nr_pages);
-		if (c == counter && !remainder)
-			ret = 0;
-	}
-
-	return ret;
+	for (c = counter; c; c = c->parent)
+		page_counter_cancel(c, nr_pages);
 }
 
 /**
-- 
cgit v1.2.3


From 93481ff0e5a0c7636359a7ee52248856da5e7859 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 10 Dec 2014 15:43:01 -0800
Subject: mm: introduce single zone pcplists drain

The functions for draining per-cpu pages back to buddy allocators
currently always operate on all zones.  There are however several cases
where the drain is only needed in the context of a single zone, and
spilling other pcplists is a waste of time both due to the extra
spilling and later refilling.

This patch introduces new zone pointer parameter to drain_all_pages()
and changes the dummy parameter of drain_local_pages() to be also a zone
pointer.  When NULL is passed, the functions operate on all zones as
usual.  Passing a specific zone pointer reduces the work to the single
zone.

All callers are updated to pass the NULL pointer in this patch.
Conversion to single zone (where appropriate) is done in further
patches.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Rik van Riel <riel@redhat.com>
Cc: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Zhang Yanfei <zhangyanfei@cn.fujitsu.com>
Cc: Xishi Qiu <qiuxishi@huawei.com>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h |  4 +--
 mm/memory-failure.c |  4 +--
 mm/memory_hotplug.c |  4 +--
 mm/page_alloc.c     | 81 ++++++++++++++++++++++++++++++++++++-----------------
 mm/page_isolation.c |  2 +-
 5 files changed, 63 insertions(+), 32 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 41b30fd4d041..07d2699cdb51 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -381,8 +381,8 @@ extern void free_kmem_pages(unsigned long addr, unsigned int order);
 
 void page_alloc_init(void);
 void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp);
-void drain_all_pages(void);
-void drain_local_pages(void *dummy);
+void drain_all_pages(struct zone *zone);
+void drain_local_pages(struct zone *zone);
 
 /*
  * gfp_allowed_mask is set to GFP_BOOT_MASK during early boot to restrict what
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 8639f6b28746..851b4d7eef3a 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -233,7 +233,7 @@ void shake_page(struct page *p, int access)
 		lru_add_drain_all();
 		if (PageLRU(p))
 			return;
-		drain_all_pages();
+		drain_all_pages(NULL);
 		if (PageLRU(p) || is_free_buddy_page(p))
 			return;
 	}
@@ -1661,7 +1661,7 @@ static int __soft_offline_page(struct page *page, int flags)
 			if (!is_free_buddy_page(page))
 				lru_add_drain_all();
 			if (!is_free_buddy_page(page))
-				drain_all_pages();
+				drain_all_pages(NULL);
 			SetPageHWPoison(page);
 			if (!is_free_buddy_page(page))
 				pr_info("soft offline: %#lx: page leaked\n",
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index 1bf4807cb21e..aa0c6e5a3065 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -1725,7 +1725,7 @@ repeat:
 	if (drain) {
 		lru_add_drain_all();
 		cond_resched();
-		drain_all_pages();
+		drain_all_pages(NULL);
 	}
 
 	pfn = scan_movable_pages(start_pfn, end_pfn);
@@ -1747,7 +1747,7 @@ repeat:
 	lru_add_drain_all();
 	yield();
 	/* drain pcp pages, this is synchronous. */
-	drain_all_pages();
+	drain_all_pages(NULL);
 	/*
 	 * dissolve free hugepages in the memory block before doing offlining
 	 * actually in order to make hugetlbfs's object counting consistent.
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 701fe9018fdc..13d5796de8f3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1267,55 +1267,75 @@ void drain_zone_pages(struct zone *zone, struct per_cpu_pages *pcp)
 #endif
 
 /*
- * Drain pages of the indicated processor.
+ * Drain pcplists of the indicated processor and zone.
  *
  * The processor must either be the current processor and the
  * thread pinned to the current processor or a processor that
  * is not online.
  */
-static void drain_pages(unsigned int cpu)
+static void drain_pages_zone(unsigned int cpu, struct zone *zone)
 {
 	unsigned long flags;
-	struct zone *zone;
+	struct per_cpu_pageset *pset;
+	struct per_cpu_pages *pcp;
 
-	for_each_populated_zone(zone) {
-		struct per_cpu_pageset *pset;
-		struct per_cpu_pages *pcp;
+	local_irq_save(flags);
+	pset = per_cpu_ptr(zone->pageset, cpu);
 
-		local_irq_save(flags);
-		pset = per_cpu_ptr(zone->pageset, cpu);
+	pcp = &pset->pcp;
+	if (pcp->count) {
+		free_pcppages_bulk(zone, pcp->count, pcp);
+		pcp->count = 0;
+	}
+	local_irq_restore(flags);
+}
 
-		pcp = &pset->pcp;
-		if (pcp->count) {
-			free_pcppages_bulk(zone, pcp->count, pcp);
-			pcp->count = 0;
-		}
-		local_irq_restore(flags);
+/*
+ * Drain pcplists of all zones on the indicated processor.
+ *
+ * The processor must either be the current processor and the
+ * thread pinned to the current processor or a processor that
+ * is not online.
+ */
+static void drain_pages(unsigned int cpu)
+{
+	struct zone *zone;
+
+	for_each_populated_zone(zone) {
+		drain_pages_zone(cpu, zone);
 	}
 }
 
 /*
  * Spill all of this CPU's per-cpu pages back into the buddy allocator.
+ *
+ * The CPU has to be pinned. When zone parameter is non-NULL, spill just
+ * the single zone's pages.
  */
-void drain_local_pages(void *arg)
+void drain_local_pages(struct zone *zone)
 {
-	drain_pages(smp_processor_id());
+	int cpu = smp_processor_id();
+
+	if (zone)
+		drain_pages_zone(cpu, zone);
+	else
+		drain_pages(cpu);
 }
 
 /*
  * Spill all the per-cpu pages from all CPUs back into the buddy allocator.
  *
+ * When zone parameter is non-NULL, spill just the single zone's pages.
+ *
  * Note that this code is protected against sending an IPI to an offline
  * CPU but does not guarantee sending an IPI to newly hotplugged CPUs:
  * on_each_cpu_mask() blocks hotplug and won't talk to offlined CPUs but
  * nothing keeps CPUs from showing up after we populated the cpumask and
  * before the call to on_each_cpu_mask().
  */
-void drain_all_pages(void)
+void drain_all_pages(struct zone *zone)
 {
 	int cpu;
-	struct per_cpu_pageset *pcp;
-	struct zone *zone;
 
 	/*
 	 * Allocate in the BSS so we wont require allocation in
@@ -1330,20 +1350,31 @@ void drain_all_pages(void)
 	 * disables preemption as part of its processing
 	 */
 	for_each_online_cpu(cpu) {
+		struct per_cpu_pageset *pcp;
+		struct zone *z;
 		bool has_pcps = false;
-		for_each_populated_zone(zone) {
+
+		if (zone) {
 			pcp = per_cpu_ptr(zone->pageset, cpu);
-			if (pcp->pcp.count) {
+			if (pcp->pcp.count)
 				has_pcps = true;
-				break;
+		} else {
+			for_each_populated_zone(z) {
+				pcp = per_cpu_ptr(z->pageset, cpu);
+				if (pcp->pcp.count) {
+					has_pcps = true;
+					break;
+				}
 			}
 		}
+
 		if (has_pcps)
 			cpumask_set_cpu(cpu, &cpus_with_pcps);
 		else
 			cpumask_clear_cpu(cpu, &cpus_with_pcps);
 	}
-	on_each_cpu_mask(&cpus_with_pcps, drain_local_pages, NULL, 1);
+	on_each_cpu_mask(&cpus_with_pcps, (smp_call_func_t) drain_local_pages,
+								zone, 1);
 }
 
 #ifdef CONFIG_HIBERNATION
@@ -2433,7 +2464,7 @@ retry:
 	 * pages are pinned on the per-cpu lists. Drain them and try again
 	 */
 	if (!page && !drained) {
-		drain_all_pages();
+		drain_all_pages(NULL);
 		drained = true;
 		goto retry;
 	}
@@ -6385,7 +6416,7 @@ int alloc_contig_range(unsigned long start, unsigned long end,
 	 */
 
 	lru_add_drain_all();
-	drain_all_pages();
+	drain_all_pages(NULL);
 
 	order = 0;
 	outer_start = start;
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index c8778f7e208e..f2452e5116b2 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -68,7 +68,7 @@ out:
 
 	spin_unlock_irqrestore(&zone->lock, flags);
 	if (!ret)
-		drain_all_pages();
+		drain_all_pages(NULL);
 	return ret;
 }
 
-- 
cgit v1.2.3


From ebff398017c69a3810bcbc5200ba224d5ccaa207 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 10 Dec 2014 15:43:22 -0800
Subject: mm, compaction: pass classzone_idx and alloc_flags to watermark
 checking

Compaction relies on zone watermark checks for decisions such as if it's
worth to start compacting in compaction_suitable() or whether compaction
should stop in compact_finished().  The watermark checks take
classzone_idx and alloc_flags parameters, which are related to the memory
allocation request.  But from the context of compaction they are currently
passed as 0, including the direct compaction which is invoked to satisfy
the allocation request, and could therefore know the proper values.

The lack of proper values can lead to mismatch between decisions taken
during compaction and decisions related to the allocation request.  Lack
of proper classzone_idx value means that lowmem_reserve is not taken into
account.  This has manifested (during recent changes to deferred
compaction) when DMA zone was used as fallback for preferred Normal zone.
compaction_suitable() without proper classzone_idx would think that the
watermarks are already satisfied, but watermark check in
get_page_from_freelist() would fail.  Because of this problem, deferring
compaction has extra complexity that can be removed in the following
patch.

The issue (not confirmed in practice) with missing alloc_flags is opposite
in nature.  For allocations that include ALLOC_HIGH, ALLOC_HIGHER or
ALLOC_CMA in alloc_flags (the last includes all MOVABLE allocations on
CMA-enabled systems) the watermark checking in compaction with 0 passed
will be stricter than in get_page_from_freelist().  In these cases
compaction might be running for a longer time than is really needed.

Another issue compaction_suitable() is that the check for "does the zone
need compaction at all?" comes only after the check "does the zone have
enough free free pages to succeed compaction".  The latter considers extra
pages for migration and can therefore in some situations fail and return
COMPACT_SKIPPED, although the high-order allocation would succeed and we
should return COMPACT_PARTIAL.

This patch fixes these problems by adding alloc_flags and classzone_idx to
struct compact_control and related functions involved in direct compaction
and watermark checking.  Where possible, all other callers of
compaction_suitable() pass proper values where those are known.  This is
currently limited to classzone_idx, which is sometimes known in kswapd
context.  However, the direct reclaim callers should_continue_reclaim()
and compaction_ready() do not currently know the proper values, so the
coordination between reclaim and compaction may still not be as accurate
as it could.  This can be fixed later, if it's shown to be an issue.

Additionaly the checks in compact_suitable() are reordered to address the
second issue described above.

The effect of this patch should be slightly better high-order allocation
success rates and/or less compaction overhead, depending on the type of
allocations and presence of CMA.  It allows simplifying deferred
compaction code in a followup patch.

When testing with stress-highalloc, there was some slight improvement
(which might be just due to variance) in success rates of non-THP-like
allocations.

Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Acked-by: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  8 ++++++--
 mm/compaction.c            | 48 ++++++++++++++++++++++++++--------------------
 mm/internal.h              |  2 ++
 mm/page_alloc.c            |  1 +
 mm/vmscan.c                | 12 ++++++------
 5 files changed, 42 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 60bdf8dc02a3..d896765a15b0 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -33,10 +33,12 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
 			enum migrate_mode mode, int *contended,
+			int alloc_flags, int classzone_idx,
 			struct zone **candidate_zone);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
-extern unsigned long compaction_suitable(struct zone *zone, int order);
+extern unsigned long compaction_suitable(struct zone *zone, int order,
+					int alloc_flags, int classzone_idx);
 
 /* Do not skip compaction more than 64 times */
 #define COMPACT_MAX_DEFER_SHIFT 6
@@ -103,6 +105,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
 			enum migrate_mode mode, int *contended,
+			int alloc_flags, int classzone_idx,
 			struct zone **candidate_zone)
 {
 	return COMPACT_CONTINUE;
@@ -116,7 +119,8 @@ static inline void reset_isolation_suitable(pg_data_t *pgdat)
 {
 }
 
-static inline unsigned long compaction_suitable(struct zone *zone, int order)
+static inline unsigned long compaction_suitable(struct zone *zone, int order,
+					int alloc_flags, int classzone_idx)
 {
 	return COMPACT_SKIPPED;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index f9792ba3537c..1fc6736815e0 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1086,9 +1086,9 @@ static int compact_finished(struct zone *zone, struct compact_control *cc,
 
 	/* Compaction run is not finished if the watermark is not met */
 	watermark = low_wmark_pages(zone);
-	watermark += (1 << cc->order);
 
-	if (!zone_watermark_ok(zone, cc->order, watermark, 0, 0))
+	if (!zone_watermark_ok(zone, cc->order, watermark, cc->classzone_idx,
+							cc->alloc_flags))
 		return COMPACT_CONTINUE;
 
 	/* Direct compactor: Is a suitable page free? */
@@ -1114,7 +1114,8 @@ static int compact_finished(struct zone *zone, struct compact_control *cc,
  *   COMPACT_PARTIAL  - If the allocation would succeed without compaction
  *   COMPACT_CONTINUE - If compaction should run now
  */
-unsigned long compaction_suitable(struct zone *zone, int order)
+unsigned long compaction_suitable(struct zone *zone, int order,
+					int alloc_flags, int classzone_idx)
 {
 	int fragindex;
 	unsigned long watermark;
@@ -1126,21 +1127,30 @@ unsigned long compaction_suitable(struct zone *zone, int order)
 	if (order == -1)
 		return COMPACT_CONTINUE;
 
+	watermark = low_wmark_pages(zone);
+	/*
+	 * If watermarks for high-order allocation are already met, there
+	 * should be no need for compaction at all.
+	 */
+	if (zone_watermark_ok(zone, order, watermark, classzone_idx,
+								alloc_flags))
+		return COMPACT_PARTIAL;
+
 	/*
 	 * Watermarks for order-0 must be met for compaction. Note the 2UL.
 	 * This is because during migration, copies of pages need to be
 	 * allocated and for a short time, the footprint is higher
 	 */
-	watermark = low_wmark_pages(zone) + (2UL << order);
-	if (!zone_watermark_ok(zone, 0, watermark, 0, 0))
+	watermark += (2UL << order);
+	if (!zone_watermark_ok(zone, 0, watermark, classzone_idx, alloc_flags))
 		return COMPACT_SKIPPED;
 
 	/*
 	 * fragmentation index determines if allocation failures are due to
 	 * low memory or external fragmentation
 	 *
-	 * index of -1000 implies allocations might succeed depending on
-	 * watermarks
+	 * index of -1000 would imply allocations might succeed depending on
+	 * watermarks, but we already failed the high-order watermark check
 	 * index towards 0 implies failure is due to lack of memory
 	 * index towards 1000 implies failure is due to fragmentation
 	 *
@@ -1150,10 +1160,6 @@ unsigned long compaction_suitable(struct zone *zone, int order)
 	if (fragindex >= 0 && fragindex <= sysctl_extfrag_threshold)
 		return COMPACT_SKIPPED;
 
-	if (fragindex == -1000 && zone_watermark_ok(zone, order, watermark,
-	    0, 0))
-		return COMPACT_PARTIAL;
-
 	return COMPACT_CONTINUE;
 }
 
@@ -1165,7 +1171,8 @@ static int compact_zone(struct zone *zone, struct compact_control *cc)
 	const int migratetype = gfpflags_to_migratetype(cc->gfp_mask);
 	const bool sync = cc->mode != MIGRATE_ASYNC;
 
-	ret = compaction_suitable(zone, cc->order);
+	ret = compaction_suitable(zone, cc->order, cc->alloc_flags,
+							cc->classzone_idx);
 	switch (ret) {
 	case COMPACT_PARTIAL:
 	case COMPACT_SKIPPED:
@@ -1254,7 +1261,8 @@ out:
 }
 
 static unsigned long compact_zone_order(struct zone *zone, int order,
-		gfp_t gfp_mask, enum migrate_mode mode, int *contended)
+		gfp_t gfp_mask, enum migrate_mode mode, int *contended,
+		int alloc_flags, int classzone_idx)
 {
 	unsigned long ret;
 	struct compact_control cc = {
@@ -1264,6 +1272,8 @@ static unsigned long compact_zone_order(struct zone *zone, int order,
 		.gfp_mask = gfp_mask,
 		.zone = zone,
 		.mode = mode,
+		.alloc_flags = alloc_flags,
+		.classzone_idx = classzone_idx,
 	};
 	INIT_LIST_HEAD(&cc.freepages);
 	INIT_LIST_HEAD(&cc.migratepages);
@@ -1295,6 +1305,7 @@ int sysctl_extfrag_threshold = 500;
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
 			enum migrate_mode mode, int *contended,
+			int alloc_flags, int classzone_idx,
 			struct zone **candidate_zone)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
@@ -1303,7 +1314,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	struct zoneref *z;
 	struct zone *zone;
 	int rc = COMPACT_DEFERRED;
-	int alloc_flags = 0;
 	int all_zones_contended = COMPACT_CONTENDED_LOCK; /* init for &= op */
 
 	*contended = COMPACT_CONTENDED_NONE;
@@ -1312,10 +1322,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 	if (!order || !may_enter_fs || !may_perform_io)
 		return COMPACT_SKIPPED;
 
-#ifdef CONFIG_CMA
-	if (gfpflags_to_migratetype(gfp_mask) == MIGRATE_MOVABLE)
-		alloc_flags |= ALLOC_CMA;
-#endif
 	/* Compact each zone in the list */
 	for_each_zone_zonelist_nodemask(zone, z, zonelist, high_zoneidx,
 								nodemask) {
@@ -1326,7 +1332,7 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			continue;
 
 		status = compact_zone_order(zone, order, gfp_mask, mode,
-							&zone_contended);
+				&zone_contended, alloc_flags, classzone_idx);
 		rc = max(status, rc);
 		/*
 		 * It takes at least one zone that wasn't lock contended
@@ -1335,8 +1341,8 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 		all_zones_contended &= zone_contended;
 
 		/* If a normal allocation would succeed, stop compacting */
-		if (zone_watermark_ok(zone, order, low_wmark_pages(zone), 0,
-				      alloc_flags)) {
+		if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
+					classzone_idx, alloc_flags)) {
 			*candidate_zone = zone;
 			/*
 			 * We think the allocation will succeed in this zone,
diff --git a/mm/internal.h b/mm/internal.h
index a4f90ba7068e..b643938fcf12 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -168,6 +168,8 @@ struct compact_control {
 
 	int order;			/* order a direct compactor needs */
 	const gfp_t gfp_mask;		/* gfp mask of a direct compactor */
+	const int alloc_flags;		/* alloc flags of a direct compactor */
+	const int classzone_idx;	/* zone index of a direct compactor */
 	struct zone *zone;
 	int contended;			/* Signal need_sched() or lock
 					 * contention detected during
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b7c18f094697..e32121fa2ba9 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2341,6 +2341,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	compact_result = try_to_compact_pages(zonelist, order, gfp_mask,
 						nodemask, mode,
 						contended_compaction,
+						alloc_flags, classzone_idx,
 						&last_compact_zone);
 	current->flags &= ~PF_MEMALLOC;
 
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 53157e157061..4636d9e822c1 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -2249,7 +2249,7 @@ static inline bool should_continue_reclaim(struct zone *zone,
 		return true;
 
 	/* If compaction would go ahead or the allocation would succeed, stop */
-	switch (compaction_suitable(zone, sc->order)) {
+	switch (compaction_suitable(zone, sc->order, 0, 0)) {
 	case COMPACT_PARTIAL:
 	case COMPACT_CONTINUE:
 		return false;
@@ -2346,7 +2346,7 @@ static inline bool compaction_ready(struct zone *zone, int order)
 	 * If compaction is not ready to start and allocation is not likely
 	 * to succeed without it, then keep reclaiming.
 	 */
-	if (compaction_suitable(zone, order) == COMPACT_SKIPPED)
+	if (compaction_suitable(zone, order, 0, 0) == COMPACT_SKIPPED)
 		return false;
 
 	return watermark_ok;
@@ -2824,8 +2824,8 @@ static bool zone_balanced(struct zone *zone, int order,
 				    balance_gap, classzone_idx, 0))
 		return false;
 
-	if (IS_ENABLED(CONFIG_COMPACTION) && order &&
-	    compaction_suitable(zone, order) == COMPACT_SKIPPED)
+	if (IS_ENABLED(CONFIG_COMPACTION) && order && compaction_suitable(zone,
+				order, 0, classzone_idx) == COMPACT_SKIPPED)
 		return false;
 
 	return true;
@@ -2952,8 +2952,8 @@ static bool kswapd_shrink_zone(struct zone *zone,
 	 * from memory. Do not reclaim more than needed for compaction.
 	 */
 	if (IS_ENABLED(CONFIG_COMPACTION) && sc->order &&
-			compaction_suitable(zone, sc->order) !=
-				COMPACT_SKIPPED)
+			compaction_suitable(zone, sc->order, 0, classzone_idx)
+							!= COMPACT_SKIPPED)
 		testorder = 0;
 
 	/*
-- 
cgit v1.2.3


From 97d47a65be1e513edd02325ae828c9997878b578 Mon Sep 17 00:00:00 2001
From: Vlastimil Babka <vbabka@suse.cz>
Date: Wed, 10 Dec 2014 15:43:25 -0800
Subject: mm, compaction: simplify deferred compaction

Since commit 53853e2d2bfb ("mm, compaction: defer each zone individually
instead of preferred zone"), compaction is deferred for each zone where
sync direct compaction fails, and reset where it succeeds.  However, it
was observed that for DMA zone compaction often appeared to succeed
while subsequent allocation attempt would not, due to different outcome
of watermark check.

In order to properly defer compaction in this zone, the candidate zone
has to be passed back to __alloc_pages_direct_compact() and compaction
deferred in the zone after the allocation attempt fails.

The large source of mismatch between watermark check in compaction and
allocation was the lack of alloc_flags and classzone_idx values in
compaction, which has been fixed in the previous patch.  So with this
problem fixed, we can simplify the code by removing the candidate_zone
parameter and deferring in __alloc_pages_direct_compact().

After this patch, the compaction activity during stress-highalloc
benchmark is still somewhat increased, but it's negligible compared to the
increase that occurred without the better watermark checking.  This
suggests that it is still possible to apparently succeed in compaction but
fail to allocate, possibly due to parallel allocation activity.

[akpm@linux-foundation.org: fix build]
Suggested-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Vlastimil Babka <vbabka@suse.cz>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Naoya Horiguchi <n-horiguchi@ah.jp.nec.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Rik van Riel <riel@redhat.com>
Cc: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/compaction.h |  6 ++----
 mm/compaction.c            |  5 +----
 mm/page_alloc.c            | 12 +-----------
 3 files changed, 4 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index d896765a15b0..3238ffa33f68 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -33,8 +33,7 @@ extern int fragmentation_index(struct zone *zone, unsigned int order);
 extern unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *mask,
 			enum migrate_mode mode, int *contended,
-			int alloc_flags, int classzone_idx,
-			struct zone **candidate_zone);
+			int alloc_flags, int classzone_idx);
 extern void compact_pgdat(pg_data_t *pgdat, int order);
 extern void reset_isolation_suitable(pg_data_t *pgdat);
 extern unsigned long compaction_suitable(struct zone *zone, int order,
@@ -105,8 +104,7 @@ static inline bool compaction_restarting(struct zone *zone, int order)
 static inline unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
 			enum migrate_mode mode, int *contended,
-			int alloc_flags, int classzone_idx,
-			struct zone **candidate_zone)
+			int alloc_flags, int classzone_idx)
 {
 	return COMPACT_CONTINUE;
 }
diff --git a/mm/compaction.c b/mm/compaction.c
index 1fc6736815e0..75f4c1206d00 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1298,15 +1298,13 @@ int sysctl_extfrag_threshold = 500;
  * @mode: The migration mode for async, sync light, or sync migration
  * @contended: Return value that determines if compaction was aborted due to
  *	       need_resched() or lock contention
- * @candidate_zone: Return the zone where we think allocation should succeed
  *
  * This is the main entry point for direct page compaction.
  */
 unsigned long try_to_compact_pages(struct zonelist *zonelist,
 			int order, gfp_t gfp_mask, nodemask_t *nodemask,
 			enum migrate_mode mode, int *contended,
-			int alloc_flags, int classzone_idx,
-			struct zone **candidate_zone)
+			int alloc_flags, int classzone_idx)
 {
 	enum zone_type high_zoneidx = gfp_zone(gfp_mask);
 	int may_enter_fs = gfp_mask & __GFP_FS;
@@ -1343,7 +1341,6 @@ unsigned long try_to_compact_pages(struct zonelist *zonelist,
 		/* If a normal allocation would succeed, stop compacting */
 		if (zone_watermark_ok(zone, order, low_wmark_pages(zone),
 					classzone_idx, alloc_flags)) {
-			*candidate_zone = zone;
 			/*
 			 * We think the allocation will succeed in this zone,
 			 * but it is not certain, hence the false. The caller
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e32121fa2ba9..edb0ce1e7cf3 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -2330,7 +2330,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	int classzone_idx, int migratetype, enum migrate_mode mode,
 	int *contended_compaction, bool *deferred_compaction)
 {
-	struct zone *last_compact_zone = NULL;
 	unsigned long compact_result;
 	struct page *page;
 
@@ -2341,8 +2340,7 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 	compact_result = try_to_compact_pages(zonelist, order, gfp_mask,
 						nodemask, mode,
 						contended_compaction,
-						alloc_flags, classzone_idx,
-						&last_compact_zone);
+						alloc_flags, classzone_idx);
 	current->flags &= ~PF_MEMALLOC;
 
 	switch (compact_result) {
@@ -2379,14 +2377,6 @@ __alloc_pages_direct_compact(gfp_t gfp_mask, unsigned int order,
 		return page;
 	}
 
-	/*
-	 * last_compact_zone is where try_to_compact_pages thought allocation
-	 * should succeed, so it did not defer compaction. But here we know
-	 * that it didn't succeed, so we do the defer.
-	 */
-	if (last_compact_zone && mode != MIGRATE_ASYNC)
-		defer_compaction(last_compact_zone, order);
-
 	/*
 	 * It's bad if compaction run occurs and fails. The most likely reason
 	 * is that pages exist, but not enough to satisfy watermarks.
-- 
cgit v1.2.3


From 18eca2e636f921e6350dc31b5b450bb4102d664f Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:43:57 -0800
Subject: mm: memcontrol: remove unnecessary PCG_MEMSW memory+swap charge flag

Now that mem_cgroup_swapout() fully uncharges the page, every page that is
still in use when reaching mem_cgroup_uncharge() is known to carry both
the memory and the memory+swap charge.  Simplify the uncharge path and
remove the PCG_MEMSW page flag accordingly.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h |  1 -
 mm/memcontrol.c             | 34 ++++++++++++----------------------
 2 files changed, 12 insertions(+), 23 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 5c831f1eca79..da62ee2be28b 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -5,7 +5,6 @@ enum {
 	/* flags for mem_cgroup */
 	PCG_USED = 0x01,	/* This page is charged to a memcg */
 	PCG_MEM = 0x02,		/* This page holds a memory charge */
-	PCG_MEMSW = 0x04,	/* This page holds a memory+swap charge */
 };
 
 struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 266a440c89f9..baf3b535b180 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2614,7 +2614,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 	 *   have the page locked
 	 */
 	pc->mem_cgroup = memcg;
-	pc->flags = PCG_USED | PCG_MEM | (do_swap_account ? PCG_MEMSW : 0);
+	pc->flags = PCG_USED | PCG_MEM;
 
 	if (lrucare)
 		unlock_page_lru(page, isolated);
@@ -5793,7 +5793,6 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	if (!PageCgroupUsed(pc))
 		return;
 
-	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEMSW), page);
 	memcg = pc->mem_cgroup;
 
 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
@@ -5989,17 +5988,16 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg)
 }
 
 static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
-			   unsigned long nr_mem, unsigned long nr_memsw,
 			   unsigned long nr_anon, unsigned long nr_file,
 			   unsigned long nr_huge, struct page *dummy_page)
 {
+	unsigned long nr_pages = nr_anon + nr_file;
 	unsigned long flags;
 
 	if (!mem_cgroup_is_root(memcg)) {
-		if (nr_mem)
-			page_counter_uncharge(&memcg->memory, nr_mem);
-		if (nr_memsw)
-			page_counter_uncharge(&memcg->memsw, nr_memsw);
+		page_counter_uncharge(&memcg->memory, nr_pages);
+		if (do_swap_account)
+			page_counter_uncharge(&memcg->memsw, nr_pages);
 		memcg_oom_recover(memcg);
 	}
 
@@ -6008,23 +6006,21 @@ static void uncharge_batch(struct mem_cgroup *memcg, unsigned long pgpgout,
 	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_CACHE], nr_file);
 	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE], nr_huge);
 	__this_cpu_add(memcg->stat->events[MEM_CGROUP_EVENTS_PGPGOUT], pgpgout);
-	__this_cpu_add(memcg->stat->nr_page_events, nr_anon + nr_file);
+	__this_cpu_add(memcg->stat->nr_page_events, nr_pages);
 	memcg_check_events(memcg, dummy_page);
 	local_irq_restore(flags);
 
 	if (!mem_cgroup_is_root(memcg))
-		css_put_many(&memcg->css, max(nr_mem, nr_memsw));
+		css_put_many(&memcg->css, nr_pages);
 }
 
 static void uncharge_list(struct list_head *page_list)
 {
 	struct mem_cgroup *memcg = NULL;
-	unsigned long nr_memsw = 0;
 	unsigned long nr_anon = 0;
 	unsigned long nr_file = 0;
 	unsigned long nr_huge = 0;
 	unsigned long pgpgout = 0;
-	unsigned long nr_mem = 0;
 	struct list_head *next;
 	struct page *page;
 
@@ -6051,10 +6047,9 @@ static void uncharge_list(struct list_head *page_list)
 
 		if (memcg != pc->mem_cgroup) {
 			if (memcg) {
-				uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
-					       nr_anon, nr_file, nr_huge, page);
-				pgpgout = nr_mem = nr_memsw = 0;
-				nr_anon = nr_file = nr_huge = 0;
+				uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+					       nr_huge, page);
+				pgpgout = nr_anon = nr_file = nr_huge = 0;
 			}
 			memcg = pc->mem_cgroup;
 		}
@@ -6070,18 +6065,14 @@ static void uncharge_list(struct list_head *page_list)
 		else
 			nr_file += nr_pages;
 
-		if (pc->flags & PCG_MEM)
-			nr_mem += nr_pages;
-		if (pc->flags & PCG_MEMSW)
-			nr_memsw += nr_pages;
 		pc->flags = 0;
 
 		pgpgout++;
 	} while (next != page_list);
 
 	if (memcg)
-		uncharge_batch(memcg, pgpgout, nr_mem, nr_memsw,
-			       nr_anon, nr_file, nr_huge, page);
+		uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
+			       nr_huge, page);
 }
 
 /**
@@ -6166,7 +6157,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 		return;
 
 	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
-	VM_BUG_ON_PAGE(do_swap_account && !(pc->flags & PCG_MEMSW), oldpage);
 
 	if (lrucare)
 		lock_page_lru(oldpage, &isolated);
-- 
cgit v1.2.3


From f4aaa8b43d90294ca7546317997c452600e9a8a7 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:44:00 -0800
Subject: mm: memcontrol: remove unnecessary PCG_MEM memory charge flag

PCG_MEM is a remnant from an earlier version of 0a31bc97c80c ("mm:
memcontrol: rewrite uncharge API"), used to tell whether migration cleared
a charge while leaving pc->mem_cgroup valid and PCG_USED set.  But in the
final version, mem_cgroup_migrate() directly uncharges the source page,
rendering this distinction unnecessary.  Remove it.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h | 1 -
 mm/memcontrol.c             | 4 +---
 2 files changed, 1 insertion(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index da62ee2be28b..97536e685843 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -4,7 +4,6 @@
 enum {
 	/* flags for mem_cgroup */
 	PCG_USED = 0x01,	/* This page is charged to a memcg */
-	PCG_MEM = 0x02,		/* This page holds a memory charge */
 };
 
 struct pglist_data;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index baf3b535b180..3dfb56a93117 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2614,7 +2614,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 	 *   have the page locked
 	 */
 	pc->mem_cgroup = memcg;
-	pc->flags = PCG_USED | PCG_MEM;
+	pc->flags = PCG_USED;
 
 	if (lrucare)
 		unlock_page_lru(page, isolated);
@@ -6156,8 +6156,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 	if (!PageCgroupUsed(pc))
 		return;
 
-	VM_BUG_ON_PAGE(!(pc->flags & PCG_MEM), oldpage);
-
 	if (lrucare)
 		lock_page_lru(oldpage, &isolated);
 
-- 
cgit v1.2.3


From 2983331575bfb248abfb02efb5140b4a299e3f45 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:44:02 -0800
Subject: mm: memcontrol: remove unnecessary PCG_USED pc->mem_cgroup valid flag

pc->mem_cgroup had to be left intact after uncharge for the final LRU
removal, and !PCG_USED indicated whether the page was uncharged.  But
since commit 0a31bc97c80c ("mm: memcontrol: rewrite uncharge API") pages
are uncharged after the final LRU removal.  Uncharge can simply clear
the pointer and the PCG_USED/PageCgroupUsed sites can test that instead.

Because this is the last page_cgroup flag, this patch reduces the memcg
per-page overhead to a single pointer.

[akpm@linux-foundation.org: remove unneeded initialization of `memcg', per Michal]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Hugh Dickins <hughd@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/page_cgroup.h |  10 -----
 mm/memcontrol.c             | 107 +++++++++++++++++---------------------------
 2 files changed, 41 insertions(+), 76 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 97536e685843..1289be6b436c 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -1,11 +1,6 @@
 #ifndef __LINUX_PAGE_CGROUP_H
 #define __LINUX_PAGE_CGROUP_H
 
-enum {
-	/* flags for mem_cgroup */
-	PCG_USED = 0x01,	/* This page is charged to a memcg */
-};
-
 struct pglist_data;
 
 #ifdef CONFIG_MEMCG
@@ -19,7 +14,6 @@ struct mem_cgroup;
  * then the page cgroup for pfn always exists.
  */
 struct page_cgroup {
-	unsigned long flags;
 	struct mem_cgroup *mem_cgroup;
 };
 
@@ -39,10 +33,6 @@ static inline void page_cgroup_init(void)
 
 struct page_cgroup *lookup_page_cgroup(struct page *page);
 
-static inline int PageCgroupUsed(struct page_cgroup *pc)
-{
-	return !!(pc->flags & PCG_USED);
-}
 #else /* !CONFIG_MEMCG */
 struct page_cgroup;
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 3dfb56a93117..09fece0eb9f1 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1284,14 +1284,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
 
 	pc = lookup_page_cgroup(page);
 	memcg = pc->mem_cgroup;
-
 	/*
 	 * Swapcache readahead pages are added to the LRU - and
-	 * possibly migrated - before they are charged.  Ensure
-	 * pc->mem_cgroup is sane.
+	 * possibly migrated - before they are charged.
 	 */
-	if (!PageLRU(page) && !PageCgroupUsed(pc) && memcg != root_mem_cgroup)
-		pc->mem_cgroup = memcg = root_mem_cgroup;
+	if (!memcg)
+		memcg = root_mem_cgroup;
 
 	mz = mem_cgroup_page_zoneinfo(memcg, page);
 	lruvec = &mz->lruvec;
@@ -2151,7 +2149,7 @@ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
 	pc = lookup_page_cgroup(page);
 again:
 	memcg = pc->mem_cgroup;
-	if (unlikely(!memcg || !PageCgroupUsed(pc)))
+	if (unlikely(!memcg))
 		return NULL;
 
 	*locked = false;
@@ -2159,7 +2157,7 @@ again:
 		return memcg;
 
 	move_lock_mem_cgroup(memcg, flags);
-	if (memcg != pc->mem_cgroup || !PageCgroupUsed(pc)) {
+	if (memcg != pc->mem_cgroup) {
 		move_unlock_mem_cgroup(memcg, flags);
 		goto again;
 	}
@@ -2525,7 +2523,7 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
  */
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
-	struct mem_cgroup *memcg = NULL;
+	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
 	unsigned short id;
 	swp_entry_t ent;
@@ -2533,9 +2531,10 @@ struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
 	pc = lookup_page_cgroup(page);
-	if (PageCgroupUsed(pc)) {
-		memcg = pc->mem_cgroup;
-		if (memcg && !css_tryget_online(&memcg->css))
+	memcg = pc->mem_cgroup;
+
+	if (memcg) {
+		if (!css_tryget_online(&memcg->css))
 			memcg = NULL;
 	} else if (PageSwapCache(page)) {
 		ent.val = page_private(page);
@@ -2586,7 +2585,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 	struct page_cgroup *pc = lookup_page_cgroup(page);
 	int isolated;
 
-	VM_BUG_ON_PAGE(PageCgroupUsed(pc), page);
+	VM_BUG_ON_PAGE(pc->mem_cgroup, page);
 	/*
 	 * we don't need page_cgroup_lock about tail pages, becase they are not
 	 * accessed by any other context at this point.
@@ -2601,7 +2600,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 
 	/*
 	 * Nobody should be changing or seriously looking at
-	 * pc->mem_cgroup and pc->flags at this point:
+	 * pc->mem_cgroup at this point:
 	 *
 	 * - the page is uncharged
 	 *
@@ -2614,7 +2613,6 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 	 *   have the page locked
 	 */
 	pc->mem_cgroup = memcg;
-	pc->flags = PCG_USED;
 
 	if (lrucare)
 		unlock_page_lru(page, isolated);
@@ -3126,37 +3124,22 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
 		memcg_uncharge_kmem(memcg, 1 << order);
 		return;
 	}
-	/*
-	 * The page is freshly allocated and not visible to any
-	 * outside callers yet.  Set up pc non-atomically.
-	 */
 	pc = lookup_page_cgroup(page);
 	pc->mem_cgroup = memcg;
-	pc->flags = PCG_USED;
 }
 
 void __memcg_kmem_uncharge_pages(struct page *page, int order)
 {
-	struct mem_cgroup *memcg = NULL;
-	struct page_cgroup *pc;
-
-
-	pc = lookup_page_cgroup(page);
-	if (!PageCgroupUsed(pc))
-		return;
-
-	memcg = pc->mem_cgroup;
-	pc->flags = 0;
+	struct page_cgroup *pc = lookup_page_cgroup(page);
+	struct mem_cgroup *memcg = pc->mem_cgroup;
 
-	/*
-	 * We trust that only if there is a memcg associated with the page, it
-	 * is a valid allocation
-	 */
 	if (!memcg)
 		return;
 
 	VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
+
 	memcg_uncharge_kmem(memcg, 1 << order);
+	pc->mem_cgroup = NULL;
 }
 #else
 static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
@@ -3174,23 +3157,16 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
  */
 void mem_cgroup_split_huge_fixup(struct page *head)
 {
-	struct page_cgroup *head_pc;
-	struct page_cgroup *pc;
-	struct mem_cgroup *memcg;
+	struct page_cgroup *pc = lookup_page_cgroup(head);
 	int i;
 
 	if (mem_cgroup_disabled())
 		return;
 
-	head_pc = lookup_page_cgroup(head);
+	for (i = 1; i < HPAGE_PMD_NR; i++)
+		pc[i].mem_cgroup = pc[0].mem_cgroup;
 
-	memcg = head_pc->mem_cgroup;
-	for (i = 1; i < HPAGE_PMD_NR; i++) {
-		pc = head_pc + i;
-		pc->mem_cgroup = memcg;
-		pc->flags = head_pc->flags;
-	}
-	__this_cpu_sub(memcg->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
+	__this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
 		       HPAGE_PMD_NR);
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -3240,7 +3216,7 @@ static int mem_cgroup_move_account(struct page *page,
 		goto out;
 
 	ret = -EINVAL;
-	if (!PageCgroupUsed(pc) || pc->mem_cgroup != from)
+	if (pc->mem_cgroup != from)
 		goto out_unlock;
 
 	move_lock_mem_cgroup(from, &flags);
@@ -3350,7 +3326,7 @@ static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
 	 * the first time, i.e. during boot or memory hotplug;
 	 * or when mem_cgroup_disabled().
 	 */
-	if (likely(pc) && PageCgroupUsed(pc))
+	if (likely(pc) && pc->mem_cgroup)
 		return pc;
 	return NULL;
 }
@@ -3368,10 +3344,8 @@ void mem_cgroup_print_bad_page(struct page *page)
 	struct page_cgroup *pc;
 
 	pc = lookup_page_cgroup_used(page);
-	if (pc) {
-		pr_alert("pc:%p pc->flags:%lx pc->mem_cgroup:%p\n",
-			 pc, pc->flags, pc->mem_cgroup);
-	}
+	if (pc)
+		pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup);
 }
 #endif
 
@@ -5308,7 +5282,7 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
 		 * mem_cgroup_move_account() checks the pc is valid or
 		 * not under LRU exclusion.
 		 */
-		if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+		if (pc->mem_cgroup == mc.from) {
 			ret = MC_TARGET_PAGE;
 			if (target)
 				target->page = page;
@@ -5344,7 +5318,7 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
 	if (!move_anon())
 		return ret;
 	pc = lookup_page_cgroup(page);
-	if (PageCgroupUsed(pc) && pc->mem_cgroup == mc.from) {
+	if (pc->mem_cgroup == mc.from) {
 		ret = MC_TARGET_PAGE;
 		if (target) {
 			get_page(page);
@@ -5788,18 +5762,17 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 		return;
 
 	pc = lookup_page_cgroup(page);
+	memcg = pc->mem_cgroup;
 
 	/* Readahead page, never charged */
-	if (!PageCgroupUsed(pc))
+	if (!memcg)
 		return;
 
-	memcg = pc->mem_cgroup;
-
 	oldid = swap_cgroup_record(entry, mem_cgroup_id(memcg));
 	VM_BUG_ON_PAGE(oldid, page);
 	mem_cgroup_swap_statistics(memcg, true);
 
-	pc->flags = 0;
+	pc->mem_cgroup = NULL;
 
 	if (!mem_cgroup_is_root(memcg))
 		page_counter_uncharge(&memcg->memory, 1);
@@ -5874,7 +5847,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 		 * the page lock, which serializes swap cache removal, which
 		 * in turn serializes uncharging.
 		 */
-		if (PageCgroupUsed(pc))
+		if (pc->mem_cgroup)
 			goto out;
 	}
 
@@ -6036,13 +6009,13 @@ static void uncharge_list(struct list_head *page_list)
 		VM_BUG_ON_PAGE(page_count(page), page);
 
 		pc = lookup_page_cgroup(page);
-		if (!PageCgroupUsed(pc))
+		if (!pc->mem_cgroup)
 			continue;
 
 		/*
 		 * Nobody should be changing or seriously looking at
-		 * pc->mem_cgroup and pc->flags at this point, we have
-		 * fully exclusive access to the page.
+		 * pc->mem_cgroup at this point, we have fully
+		 * exclusive access to the page.
 		 */
 
 		if (memcg != pc->mem_cgroup) {
@@ -6065,7 +6038,7 @@ static void uncharge_list(struct list_head *page_list)
 		else
 			nr_file += nr_pages;
 
-		pc->flags = 0;
+		pc->mem_cgroup = NULL;
 
 		pgpgout++;
 	} while (next != page_list);
@@ -6091,7 +6064,7 @@ void mem_cgroup_uncharge(struct page *page)
 
 	/* Don't touch page->lru of any random page, pre-check: */
 	pc = lookup_page_cgroup(page);
-	if (!PageCgroupUsed(pc))
+	if (!pc->mem_cgroup)
 		return;
 
 	INIT_LIST_HEAD(&page->lru);
@@ -6127,6 +6100,7 @@ void mem_cgroup_uncharge_list(struct list_head *page_list)
 void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 			bool lrucare)
 {
+	struct mem_cgroup *memcg;
 	struct page_cgroup *pc;
 	int isolated;
 
@@ -6143,7 +6117,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 
 	/* Page cache replacement: new page already charged? */
 	pc = lookup_page_cgroup(newpage);
-	if (PageCgroupUsed(pc))
+	if (pc->mem_cgroup)
 		return;
 
 	/*
@@ -6153,18 +6127,19 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 	 * reclaim just put back on the LRU but has not released yet.
 	 */
 	pc = lookup_page_cgroup(oldpage);
-	if (!PageCgroupUsed(pc))
+	memcg = pc->mem_cgroup;
+	if (!memcg)
 		return;
 
 	if (lrucare)
 		lock_page_lru(oldpage, &isolated);
 
-	pc->flags = 0;
+	pc->mem_cgroup = NULL;
 
 	if (lrucare)
 		unlock_page_lru(oldpage, isolated);
 
-	commit_charge(newpage, pc->mem_cgroup, lrucare);
+	commit_charge(newpage, memcg, lrucare);
 }
 
 /*
-- 
cgit v1.2.3


From 97ad2be1daf8e6f2d297aa349101b340e1327917 Mon Sep 17 00:00:00 2001
From: Sasha Levin <sasha.levin@oracle.com>
Date: Wed, 10 Dec 2014 15:44:13 -0800
Subject: mm, hugetlb: correct bit shift in hstate_sizelog()

hstate_sizelog() would shift left an int rather than long, triggering
undefined behaviour and passing an incorrect value when the requested
page size was more than 4GB, thus breaking >4GB pages.

Signed-off-by: Sasha Levin <sasha.levin@oracle.com>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Andrey Ryabinin <a.ryabinin@samsung.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/hugetlb.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 6e6d338641fe..cdd149ca5cc0 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -311,7 +311,8 @@ static inline struct hstate *hstate_sizelog(int page_size_log)
 {
 	if (!page_size_log)
 		return &default_hstate;
-	return size_to_hstate(1 << page_size_log);
+
+	return size_to_hstate(1UL << page_size_log);
 }
 
 static inline struct hstate *hstate_vma(struct vm_area_struct *vma)
-- 
cgit v1.2.3


From b047501cd9f11d5e1d54ea0f90e2b10754021a0e Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Wed, 10 Dec 2014 15:44:19 -0800
Subject: memcg: use generic slab iterators for showing slabinfo

Let's use generic slab_start/next/stop for showing memcg caches info.  In
contrast to the current implementation, this will work even if all memcg
caches' info doesn't fit into a seq buffer (a page), plus it simply looks
neater.

Actually, the main reason I do this isn't mere cleanup.  I'm going to zap
the memcg_slab_caches list, because I find it useless provided we have the
slab_caches list, and this patch is a step in this direction.

It should be noted that before this patch an attempt to read
memory.kmem.slabinfo of a cgroup that doesn't have kmem limit set resulted
in -EIO, while after this patch it will silently show nothing except the
header, but I don't think it will frustrate anyone.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Christoph Lameter <cl@linux.com>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/slab.h |  4 ----
 mm/memcontrol.c      | 25 ++++---------------------
 mm/slab.h            |  1 +
 mm/slab_common.c     | 25 +++++++++++++++++++------
 4 files changed, 24 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/slab.h b/include/linux/slab.h
index c265bec6a57d..8a2457d42fc8 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -513,10 +513,6 @@ struct memcg_cache_params {
 
 int memcg_update_all_caches(int num_memcgs);
 
-struct seq_file;
-int cache_show(struct kmem_cache *s, struct seq_file *m);
-void print_slabinfo_header(struct seq_file *m);
-
 /**
  * kmalloc_array - allocate memory for an array.
  * @n: number of elements.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 32e3b191857d..9d30129b0d4a 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2547,26 +2547,6 @@ static struct kmem_cache *memcg_params_to_cache(struct memcg_cache_params *p)
 	return cache_from_memcg_idx(cachep, memcg_cache_id(p->memcg));
 }
 
-#ifdef CONFIG_SLABINFO
-static int mem_cgroup_slabinfo_read(struct seq_file *m, void *v)
-{
-	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
-	struct memcg_cache_params *params;
-
-	if (!memcg_kmem_is_active(memcg))
-		return -EIO;
-
-	print_slabinfo_header(m);
-
-	mutex_lock(&memcg_slab_mutex);
-	list_for_each_entry(params, &memcg->memcg_slab_caches, list)
-		cache_show(memcg_params_to_cache(params), m);
-	mutex_unlock(&memcg_slab_mutex);
-
-	return 0;
-}
-#endif
-
 static int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp,
 			     unsigned long nr_pages)
 {
@@ -4708,7 +4688,10 @@ static struct cftype mem_cgroup_files[] = {
 #ifdef CONFIG_SLABINFO
 	{
 		.name = "kmem.slabinfo",
-		.seq_show = mem_cgroup_slabinfo_read,
+		.seq_start = slab_start,
+		.seq_next = slab_next,
+		.seq_stop = slab_stop,
+		.seq_show = memcg_slab_show,
 	},
 #endif
 #endif
diff --git a/mm/slab.h b/mm/slab.h
index 078acbcf64e8..1cf4005482dd 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -360,5 +360,6 @@ static inline struct kmem_cache_node *get_node(struct kmem_cache *s, int node)
 void *slab_start(struct seq_file *m, loff_t *pos);
 void *slab_next(struct seq_file *m, void *p, loff_t *pos);
 void slab_stop(struct seq_file *m, void *p);
+int memcg_slab_show(struct seq_file *m, void *p);
 
 #endif /* MM_SLAB_H */
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 2a3f5ff410cf..e03dd6f2a272 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -811,7 +811,7 @@ EXPORT_SYMBOL(kmalloc_order_trace);
 #define SLABINFO_RIGHTS S_IRUSR
 #endif
 
-void print_slabinfo_header(struct seq_file *m)
+static void print_slabinfo_header(struct seq_file *m)
 {
 	/*
 	 * Output format version, so at least we can change it
@@ -876,7 +876,7 @@ memcg_accumulate_slabinfo(struct kmem_cache *s, struct slabinfo *info)
 	}
 }
 
-int cache_show(struct kmem_cache *s, struct seq_file *m)
+static void cache_show(struct kmem_cache *s, struct seq_file *m)
 {
 	struct slabinfo sinfo;
 
@@ -895,7 +895,6 @@ int cache_show(struct kmem_cache *s, struct seq_file *m)
 		   sinfo.active_slabs, sinfo.num_slabs, sinfo.shared_avail);
 	slabinfo_show_stats(m, s);
 	seq_putc(m, '\n');
-	return 0;
 }
 
 static int slab_show(struct seq_file *m, void *p)
@@ -904,10 +903,24 @@ static int slab_show(struct seq_file *m, void *p)
 
 	if (p == slab_caches.next)
 		print_slabinfo_header(m);
-	if (!is_root_cache(s))
-		return 0;
-	return cache_show(s, m);
+	if (is_root_cache(s))
+		cache_show(s, m);
+	return 0;
+}
+
+#ifdef CONFIG_MEMCG_KMEM
+int memcg_slab_show(struct seq_file *m, void *p)
+{
+	struct kmem_cache *s = list_entry(p, struct kmem_cache, list);
+	struct mem_cgroup *memcg = mem_cgroup_from_css(seq_css(m));
+
+	if (p == slab_caches.next)
+		print_slabinfo_header(m);
+	if (!is_root_cache(s) && s->memcg_params->memcg == memcg)
+		cache_show(s, m);
+	return 0;
 }
+#endif
 
 /*
  * slabinfo_op - iterator that generates /proc/slabinfo
-- 
cgit v1.2.3


From 413918bb61b4fa027baa3e79546c47f15e4b9ea8 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:44:30 -0800
Subject: mm: memcontrol: pull the NULL check from
 __mem_cgroup_same_or_subtree()

The NULL in mm_match_cgroup() comes from a possibly exiting mm->owner.  It
makes a lot more sense to check where it's looked up, rather than check
for it in __mem_cgroup_same_or_subtree() where it's unexpected.

No other callsite passes NULL to __mem_cgroup_same_or_subtree().

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 5 +++--
 mm/memcontrol.c            | 2 +-
 2 files changed, 4 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index ea007615e8f9..e32ab948f589 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -83,11 +83,12 @@ static inline
 bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *task_memcg;
-	bool match;
+	bool match = false;
 
 	rcu_read_lock();
 	task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
-	match = __mem_cgroup_same_or_subtree(memcg, task_memcg);
+	if (task_memcg)
+		match = __mem_cgroup_same_or_subtree(memcg, task_memcg);
 	rcu_read_unlock();
 	return match;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 367cc57df362..e5dcebd71dfb 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1337,7 +1337,7 @@ bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
 {
 	if (root_memcg == memcg)
 		return true;
-	if (!root_memcg->use_hierarchy || !memcg)
+	if (!root_memcg->use_hierarchy)
 		return false;
 	return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
 }
-- 
cgit v1.2.3


From 2314b42db67be30b747122d65c6cd2c85da34538 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:44:33 -0800
Subject: mm: memcontrol: drop bogus RCU locking from
 mem_cgroup_same_or_subtree()

None of the mem_cgroup_same_or_subtree() callers actually require it to
take the RCU lock, either because they hold it themselves or they have css
references.  Remove it.

To make the API change clear, rename the leftover helper to
mem_cgroup_is_descendant() to match cgroup_is_descendant().

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Reviewed-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 13 +++++-----
 mm/memcontrol.c            | 59 +++++++++++++---------------------------------
 mm/oom_kill.c              |  4 ++--
 3 files changed, 24 insertions(+), 52 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index e32ab948f589..d4575a1d6e99 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -68,10 +68,9 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *);
 struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *);
 
-bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-				  struct mem_cgroup *memcg);
-bool task_in_mem_cgroup(struct task_struct *task,
-			const struct mem_cgroup *memcg);
+bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
+			      struct mem_cgroup *root);
+bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg);
 
 extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page);
 extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
@@ -79,8 +78,8 @@ extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p);
 extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg);
 extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css);
 
-static inline
-bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
+static inline bool mm_match_cgroup(struct mm_struct *mm,
+				   struct mem_cgroup *memcg)
 {
 	struct mem_cgroup *task_memcg;
 	bool match = false;
@@ -88,7 +87,7 @@ bool mm_match_cgroup(const struct mm_struct *mm, const struct mem_cgroup *memcg)
 	rcu_read_lock();
 	task_memcg = mem_cgroup_from_task(rcu_dereference(mm->owner));
 	if (task_memcg)
-		match = __mem_cgroup_same_or_subtree(memcg, task_memcg);
+		match = mem_cgroup_is_descendant(task_memcg, memcg);
 	rcu_read_unlock();
 	return match;
 }
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index e5dcebd71dfb..b841bf430179 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1328,41 +1328,24 @@ void mem_cgroup_update_lru_size(struct lruvec *lruvec, enum lru_list lru,
 	VM_BUG_ON((long)(*lru_size) < 0);
 }
 
-/*
- * Checks whether given mem is same or in the root_mem_cgroup's
- * hierarchy subtree
- */
-bool __mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-				  struct mem_cgroup *memcg)
+bool mem_cgroup_is_descendant(struct mem_cgroup *memcg, struct mem_cgroup *root)
 {
-	if (root_memcg == memcg)
+	if (root == memcg)
 		return true;
-	if (!root_memcg->use_hierarchy)
+	if (!root->use_hierarchy)
 		return false;
-	return cgroup_is_descendant(memcg->css.cgroup, root_memcg->css.cgroup);
-}
-
-static bool mem_cgroup_same_or_subtree(const struct mem_cgroup *root_memcg,
-				       struct mem_cgroup *memcg)
-{
-	bool ret;
-
-	rcu_read_lock();
-	ret = __mem_cgroup_same_or_subtree(root_memcg, memcg);
-	rcu_read_unlock();
-	return ret;
+	return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
 }
 
-bool task_in_mem_cgroup(struct task_struct *task,
-			const struct mem_cgroup *memcg)
+bool task_in_mem_cgroup(struct task_struct *task, struct mem_cgroup *memcg)
 {
-	struct mem_cgroup *curr;
+	struct mem_cgroup *task_memcg;
 	struct task_struct *p;
 	bool ret;
 
 	p = find_lock_task_mm(task);
 	if (p) {
-		curr = get_mem_cgroup_from_mm(p->mm);
+		task_memcg = get_mem_cgroup_from_mm(p->mm);
 		task_unlock(p);
 	} else {
 		/*
@@ -1371,18 +1354,12 @@ bool task_in_mem_cgroup(struct task_struct *task,
 		 * killed to prevent needlessly killing additional tasks.
 		 */
 		rcu_read_lock();
-		curr = mem_cgroup_from_task(task);
-		css_get(&curr->css);
+		task_memcg = mem_cgroup_from_task(task);
+		css_get(&task_memcg->css);
 		rcu_read_unlock();
 	}
-	/*
-	 * We should check use_hierarchy of "memcg" not "curr". Because checking
-	 * use_hierarchy of "curr" here make this function true if hierarchy is
-	 * enabled in "curr" and "curr" is a child of "memcg" in *cgroup*
-	 * hierarchy(even if use_hierarchy is disabled in "memcg").
-	 */
-	ret = mem_cgroup_same_or_subtree(memcg, curr);
-	css_put(&curr->css);
+	ret = mem_cgroup_is_descendant(task_memcg, memcg);
+	css_put(&task_memcg->css);
 	return ret;
 }
 
@@ -1467,8 +1444,8 @@ static bool mem_cgroup_under_move(struct mem_cgroup *memcg)
 	if (!from)
 		goto unlock;
 
-	ret = mem_cgroup_same_or_subtree(memcg, from)
-		|| mem_cgroup_same_or_subtree(memcg, to);
+	ret = mem_cgroup_is_descendant(from, memcg) ||
+		mem_cgroup_is_descendant(to, memcg);
 unlock:
 	spin_unlock(&mc.lock);
 	return ret;
@@ -1900,12 +1877,8 @@ static int memcg_oom_wake_function(wait_queue_t *wait,
 	oom_wait_info = container_of(wait, struct oom_wait_info, wait);
 	oom_wait_memcg = oom_wait_info->memcg;
 
-	/*
-	 * Both of oom_wait_info->memcg and wake_memcg are stable under us.
-	 * Then we can use css_is_ancestor without taking care of RCU.
-	 */
-	if (!mem_cgroup_same_or_subtree(oom_wait_memcg, wake_memcg)
-		&& !mem_cgroup_same_or_subtree(wake_memcg, oom_wait_memcg))
+	if (!mem_cgroup_is_descendant(wake_memcg, oom_wait_memcg) &&
+	    !mem_cgroup_is_descendant(oom_wait_memcg, wake_memcg))
 		return 0;
 	return autoremove_wake_function(wait, mode, sync, arg);
 }
@@ -2225,7 +2198,7 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
 		memcg = stock->cached;
 		if (!memcg || !stock->nr_pages)
 			continue;
-		if (!mem_cgroup_same_or_subtree(root_memcg, memcg))
+		if (!mem_cgroup_is_descendant(memcg, root_memcg))
 			continue;
 		if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
 			if (cpu == curcpu)
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 5340f6b91312..3b014d326151 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -119,7 +119,7 @@ found:
 
 /* return true if the task is not adequate as candidate victim task. */
 static bool oom_unkillable_task(struct task_struct *p,
-		const struct mem_cgroup *memcg, const nodemask_t *nodemask)
+		struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	if (is_global_init(p))
 		return true;
@@ -353,7 +353,7 @@ static struct task_struct *select_bad_process(unsigned int *ppoints,
  * State information includes task's pid, uid, tgid, vm size, rss, nr_ptes,
  * swapents, oom_score_adj value, and name.
  */
-static void dump_tasks(const struct mem_cgroup *memcg, const nodemask_t *nodemask)
+static void dump_tasks(struct mem_cgroup *memcg, const nodemask_t *nodemask)
 {
 	struct task_struct *p;
 	struct task_struct *task;
-- 
cgit v1.2.3


From e4bd6a0248b2a026e07c19995c41a4cb5a49d797 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Wed, 10 Dec 2014 15:44:39 -0800
Subject: mm, memcg: fix potential undefined behaviour in page stat accounting

Since commit d7365e783edb ("mm: memcontrol: fix missed end-writeback
page accounting") mem_cgroup_end_page_stat consumes locked and flags
variables directly rather than via pointers which might trigger C
undefined behavior as those variables are initialized only in the slow
path of mem_cgroup_begin_page_stat.

Although mem_cgroup_end_page_stat handles parameters correctly and
touches them only when they hold a sensible value it is caller which
loads a potentially uninitialized value which then might allow compiler
to do crazy things.

I haven't seen any warning from gcc and it seems that the current
version (4.9) doesn't exploit this type undefined behavior but Sasha has
reported the following:

  UBSan: Undefined behaviour in mm/rmap.c:1084:2
  load of value 255 is not a valid value for type '_Bool'
  CPU: 4 PID: 8304 Comm: rngd Not tainted 3.18.0-rc2-next-20141029-sasha-00039-g77ed13d-dirty #1427
  Call Trace:
    dump_stack (lib/dump_stack.c:52)
    ubsan_epilogue (lib/ubsan.c:159)
    __ubsan_handle_load_invalid_value (lib/ubsan.c:482)
    page_remove_rmap (mm/rmap.c:1084 mm/rmap.c:1096)
    unmap_page_range (./arch/x86/include/asm/atomic.h:27 include/linux/mm.h:463 mm/memory.c:1146 mm/memory.c:1258 mm/memory.c:1279 mm/memory.c:1303)
    unmap_single_vma (mm/memory.c:1348)
    unmap_vmas (mm/memory.c:1377 (discriminator 3))
    exit_mmap (mm/mmap.c:2837)
    mmput (kernel/fork.c:659)
    do_exit (./arch/x86/include/asm/thread_info.h:168 kernel/exit.c:462 kernel/exit.c:747)
    do_group_exit (include/linux/sched.h:775 kernel/exit.c:873)
    SyS_exit_group (kernel/exit.c:901)
    tracesys_phase2 (arch/x86/kernel/entry_64.S:529)

Fix this by using pointer parameters for both locked and flags and be
more robust for future compiler changes even though the current code is
implemented correctly.

Signed-off-by: Michal Hocko <mhocko@suse.cz>
Reported-by: Sasha Levin <sasha.levin@oracle.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 6 +++---
 mm/memcontrol.c            | 8 ++++----
 mm/page-writeback.c        | 4 ++--
 mm/rmap.c                  | 4 ++--
 4 files changed, 11 insertions(+), 11 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index d4575a1d6e99..de018766be45 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -141,8 +141,8 @@ static inline bool mem_cgroup_disabled(void)
 
 struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page, bool *locked,
 					      unsigned long *flags);
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
-			      unsigned long flags);
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
+			      unsigned long *flags);
 void mem_cgroup_update_page_stat(struct mem_cgroup *memcg,
 				 enum mem_cgroup_stat_index idx, int val);
 
@@ -297,7 +297,7 @@ static inline struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
 }
 
 static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg,
-					bool locked, unsigned long flags)
+					bool *locked, unsigned long *flags)
 {
 }
 
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b841bf430179..031ca345677b 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2053,11 +2053,11 @@ again:
  * @locked: value received from mem_cgroup_begin_page_stat()
  * @flags: value received from mem_cgroup_begin_page_stat()
  */
-void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool locked,
-			      unsigned long flags)
+void mem_cgroup_end_page_stat(struct mem_cgroup *memcg, bool *locked,
+			      unsigned long *flags)
 {
-	if (memcg && locked)
-		spin_unlock_irqrestore(&memcg->move_lock, flags);
+	if (memcg && *locked)
+		spin_unlock_irqrestore(&memcg->move_lock, *flags);
 
 	rcu_read_unlock();
 }
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 19ceae87522d..d5d81f5384d1 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2357,7 +2357,7 @@ int test_clear_page_writeback(struct page *page)
 		dec_zone_page_state(page, NR_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITTEN);
 	}
-	mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
+	mem_cgroup_end_page_stat(memcg, &locked, &memcg_flags);
 	return ret;
 }
 
@@ -2399,7 +2399,7 @@ int __test_set_page_writeback(struct page *page, bool keep_write)
 		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_WRITEBACK);
 		inc_zone_page_state(page, NR_WRITEBACK);
 	}
-	mem_cgroup_end_page_stat(memcg, locked, memcg_flags);
+	mem_cgroup_end_page_stat(memcg, &locked, &memcg_flags);
 	return ret;
 
 }
diff --git a/mm/rmap.c b/mm/rmap.c
index 3e4c7213210c..45eba36fd673 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1053,7 +1053,7 @@ void page_add_file_rmap(struct page *page)
 		__inc_zone_page_state(page, NR_FILE_MAPPED);
 		mem_cgroup_inc_page_stat(memcg, MEM_CGROUP_STAT_FILE_MAPPED);
 	}
-	mem_cgroup_end_page_stat(memcg, locked, flags);
+	mem_cgroup_end_page_stat(memcg, &locked, &flags);
 }
 
 static void page_remove_file_rmap(struct page *page)
@@ -1083,7 +1083,7 @@ static void page_remove_file_rmap(struct page *page)
 	if (unlikely(PageMlocked(page)))
 		clear_page_mlock(page);
 out:
-	mem_cgroup_end_page_stat(memcg, locked, flags);
+	mem_cgroup_end_page_stat(memcg, &locked, &flags);
 }
 
 /**
-- 
cgit v1.2.3


From 1306a85aed3ec3db98945aafb7dfbe5648a1203c Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:44:52 -0800
Subject: mm: embed the memcg pointer directly into struct page

Memory cgroups used to have 5 per-page pointers.  To allow users to
disable that amount of overhead during runtime, those pointers were
allocated in a separate array, with a translation layer between them and
struct page.

There is now only one page pointer remaining: the memcg pointer, that
indicates which cgroup the page is associated with when charged.  The
complexity of runtime allocation and the runtime translation overhead is
no longer justified to save that *potential* 0.19% of memory.  With
CONFIG_SLUB, page->mem_cgroup actually sits in the doubleword padding
after the page->private member and doesn't even increase struct page,
and then this patch actually saves space.  Remaining users that care can
still compile their kernels without CONFIG_MEMCG.

     text    data     bss     dec     hex     filename
  8828345 1725264  983040 11536649 b00909  vmlinux.old
  8827425 1725264  966656 11519345 afc571  vmlinux.new

[mhocko@suse.cz: update Documentation/cgroups/memory.txt]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Vladimir Davydov <vdavydov@parallels.com>
Cc: Tejun Heo <tj@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Acked-by: Konstantin Khlebnikov <koct9i@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/cgroups/memory.txt |   5 +
 include/linux/memcontrol.h       |   6 +-
 include/linux/mm_types.h         |   5 +
 include/linux/mmzone.h           |  12 --
 include/linux/page_cgroup.h      |  53 -------
 init/main.c                      |   7 -
 mm/memcontrol.c                  | 124 +++++----------
 mm/page_alloc.c                  |   2 -
 mm/page_cgroup.c                 | 319 ---------------------------------------
 9 files changed, 46 insertions(+), 487 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt
index 67613ff0270c..46b2b5080317 100644
--- a/Documentation/cgroups/memory.txt
+++ b/Documentation/cgroups/memory.txt
@@ -1,5 +1,10 @@
 Memory Resource Controller
 
+NOTE: This document is hopelessly outdated and it asks for a complete
+      rewrite. It still contains a useful information so we are keeping it
+      here but make sure to check the current code if you need a deeper
+      understanding.
+
 NOTE: The Memory Resource Controller has generically been referred to as the
       memory controller in this document. Do not confuse memory controller
       used here with the memory controller that is used in hardware.
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index de018766be45..c4d080875164 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -25,7 +25,6 @@
 #include <linux/jump_label.h>
 
 struct mem_cgroup;
-struct page_cgroup;
 struct page;
 struct mm_struct;
 struct kmem_cache;
@@ -466,8 +465,6 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order)
  * memcg_kmem_uncharge_pages: uncharge pages from memcg
  * @page: pointer to struct page being freed
  * @order: allocation order.
- *
- * there is no need to specify memcg here, since it is embedded in page_cgroup
  */
 static inline void
 memcg_kmem_uncharge_pages(struct page *page, int order)
@@ -484,8 +481,7 @@ memcg_kmem_uncharge_pages(struct page *page, int order)
  *
  * Needs to be called after memcg_kmem_newpage_charge, regardless of success or
  * failure of the allocation. if @page is NULL, this function will revert the
- * charges. Otherwise, it will commit the memcg given by @memcg to the
- * corresponding page_cgroup.
+ * charges. Otherwise, it will commit @page to @memcg.
  */
 static inline void
 memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order)
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 004e9d17b47e..bf9f57529dcf 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -22,6 +22,7 @@
 #define AT_VECTOR_SIZE (2*(AT_VECTOR_SIZE_ARCH + AT_VECTOR_SIZE_BASE + 1))
 
 struct address_space;
+struct mem_cgroup;
 
 #define USE_SPLIT_PTE_PTLOCKS	(NR_CPUS >= CONFIG_SPLIT_PTLOCK_CPUS)
 #define USE_SPLIT_PMD_PTLOCKS	(USE_SPLIT_PTE_PTLOCKS && \
@@ -167,6 +168,10 @@ struct page {
 		struct page *first_page;	/* Compound tail pages */
 	};
 
+#ifdef CONFIG_MEMCG
+	struct mem_cgroup *mem_cgroup;
+#endif
+
 	/*
 	 * On machines where all RAM is mapped into kernel address space,
 	 * we can simply calculate the virtual address. On machines with
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index ffe66e381c04..3879d7664dfc 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -722,9 +722,6 @@ typedef struct pglist_data {
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
 	struct page *node_mem_map;
-#ifdef CONFIG_MEMCG
-	struct page_cgroup *node_page_cgroup;
-#endif
 #endif
 #ifndef CONFIG_NO_BOOTMEM
 	struct bootmem_data *bdata;
@@ -1078,7 +1075,6 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
 
 struct page;
-struct page_cgroup;
 struct mem_section {
 	/*
 	 * This is, logically, a pointer to an array of struct
@@ -1096,14 +1092,6 @@ struct mem_section {
 
 	/* See declaration of similar field in struct zone */
 	unsigned long *pageblock_flags;
-#ifdef CONFIG_MEMCG
-	/*
-	 * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use
-	 * section. (see memcontrol.h/page_cgroup.h about this.)
-	 */
-	struct page_cgroup *page_cgroup;
-	unsigned long pad;
-#endif
 	/*
 	 * WARNING: mem_section must be a power-of-2 in size for the
 	 * calculation and use of SECTION_ROOT_MASK to make sense.
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
index 1289be6b436c..65be35785c86 100644
--- a/include/linux/page_cgroup.h
+++ b/include/linux/page_cgroup.h
@@ -1,59 +1,6 @@
 #ifndef __LINUX_PAGE_CGROUP_H
 #define __LINUX_PAGE_CGROUP_H
 
-struct pglist_data;
-
-#ifdef CONFIG_MEMCG
-struct mem_cgroup;
-
-/*
- * Page Cgroup can be considered as an extended mem_map.
- * A page_cgroup page is associated with every page descriptor. The
- * page_cgroup helps us identify information about the cgroup
- * All page cgroups are allocated at boot or memory hotplug event,
- * then the page cgroup for pfn always exists.
- */
-struct page_cgroup {
-	struct mem_cgroup *mem_cgroup;
-};
-
-extern void pgdat_page_cgroup_init(struct pglist_data *pgdat);
-
-#ifdef CONFIG_SPARSEMEM
-static inline void page_cgroup_init_flatmem(void)
-{
-}
-extern void page_cgroup_init(void);
-#else
-extern void page_cgroup_init_flatmem(void);
-static inline void page_cgroup_init(void)
-{
-}
-#endif
-
-struct page_cgroup *lookup_page_cgroup(struct page *page);
-
-#else /* !CONFIG_MEMCG */
-struct page_cgroup;
-
-static inline void pgdat_page_cgroup_init(struct pglist_data *pgdat)
-{
-}
-
-static inline struct page_cgroup *lookup_page_cgroup(struct page *page)
-{
-	return NULL;
-}
-
-static inline void page_cgroup_init(void)
-{
-}
-
-static inline void page_cgroup_init_flatmem(void)
-{
-}
-#endif /* CONFIG_MEMCG */
-
 #include <linux/swap.h>
 
 #ifdef CONFIG_MEMCG_SWAP
diff --git a/init/main.c b/init/main.c
index 321d0ceb26d3..d2e4ead4891f 100644
--- a/init/main.c
+++ b/init/main.c
@@ -51,7 +51,6 @@
 #include <linux/mempolicy.h>
 #include <linux/key.h>
 #include <linux/buffer_head.h>
-#include <linux/page_cgroup.h>
 #include <linux/debug_locks.h>
 #include <linux/debugobjects.h>
 #include <linux/lockdep.h>
@@ -485,11 +484,6 @@ void __init __weak thread_info_cache_init(void)
  */
 static void __init mm_init(void)
 {
-	/*
-	 * page_cgroup requires contiguous pages,
-	 * bigger than MAX_ORDER unless SPARSEMEM.
-	 */
-	page_cgroup_init_flatmem();
 	mem_init();
 	kmem_cache_init();
 	percpu_init_late();
@@ -627,7 +621,6 @@ asmlinkage __visible void __init start_kernel(void)
 		initrd_start = 0;
 	}
 #endif
-	page_cgroup_init();
 	debug_objects_mem_init();
 	kmemleak_init();
 	setup_per_cpu_pageset();
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 78cb3b05a9fa..b864067791dc 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1274,7 +1274,6 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
 {
 	struct mem_cgroup_per_zone *mz;
 	struct mem_cgroup *memcg;
-	struct page_cgroup *pc;
 	struct lruvec *lruvec;
 
 	if (mem_cgroup_disabled()) {
@@ -1282,8 +1281,7 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone)
 		goto out;
 	}
 
-	pc = lookup_page_cgroup(page);
-	memcg = pc->mem_cgroup;
+	memcg = page->mem_cgroup;
 	/*
 	 * Swapcache readahead pages are added to the LRU - and
 	 * possibly migrated - before they are charged.
@@ -2020,16 +2018,13 @@ struct mem_cgroup *mem_cgroup_begin_page_stat(struct page *page,
 					      unsigned long *flags)
 {
 	struct mem_cgroup *memcg;
-	struct page_cgroup *pc;
 
 	rcu_read_lock();
 
 	if (mem_cgroup_disabled())
 		return NULL;
-
-	pc = lookup_page_cgroup(page);
 again:
-	memcg = pc->mem_cgroup;
+	memcg = page->mem_cgroup;
 	if (unlikely(!memcg))
 		return NULL;
 
@@ -2038,7 +2033,7 @@ again:
 		return memcg;
 
 	spin_lock_irqsave(&memcg->move_lock, *flags);
-	if (memcg != pc->mem_cgroup) {
+	if (memcg != page->mem_cgroup) {
 		spin_unlock_irqrestore(&memcg->move_lock, *flags);
 		goto again;
 	}
@@ -2405,15 +2400,12 @@ static struct mem_cgroup *mem_cgroup_lookup(unsigned short id)
 struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page)
 {
 	struct mem_cgroup *memcg;
-	struct page_cgroup *pc;
 	unsigned short id;
 	swp_entry_t ent;
 
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
-	pc = lookup_page_cgroup(page);
-	memcg = pc->mem_cgroup;
-
+	memcg = page->mem_cgroup;
 	if (memcg) {
 		if (!css_tryget_online(&memcg->css))
 			memcg = NULL;
@@ -2463,10 +2455,9 @@ static void unlock_page_lru(struct page *page, int isolated)
 static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 			  bool lrucare)
 {
-	struct page_cgroup *pc = lookup_page_cgroup(page);
 	int isolated;
 
-	VM_BUG_ON_PAGE(pc->mem_cgroup, page);
+	VM_BUG_ON_PAGE(page->mem_cgroup, page);
 
 	/*
 	 * In some cases, SwapCache and FUSE(splice_buf->radixtree), the page
@@ -2477,7 +2468,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 
 	/*
 	 * Nobody should be changing or seriously looking at
-	 * pc->mem_cgroup at this point:
+	 * page->mem_cgroup at this point:
 	 *
 	 * - the page is uncharged
 	 *
@@ -2489,7 +2480,7 @@ static void commit_charge(struct page *page, struct mem_cgroup *memcg,
 	 * - a page cache insertion, a swapin fault, or a migration
 	 *   have the page locked
 	 */
-	pc->mem_cgroup = memcg;
+	page->mem_cgroup = memcg;
 
 	if (lrucare)
 		unlock_page_lru(page, isolated);
@@ -2972,8 +2963,6 @@ __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **_memcg, int order)
 void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
 			      int order)
 {
-	struct page_cgroup *pc;
-
 	VM_BUG_ON(mem_cgroup_is_root(memcg));
 
 	/* The page allocation failed. Revert */
@@ -2981,14 +2970,12 @@ void __memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg,
 		memcg_uncharge_kmem(memcg, 1 << order);
 		return;
 	}
-	pc = lookup_page_cgroup(page);
-	pc->mem_cgroup = memcg;
+	page->mem_cgroup = memcg;
 }
 
 void __memcg_kmem_uncharge_pages(struct page *page, int order)
 {
-	struct page_cgroup *pc = lookup_page_cgroup(page);
-	struct mem_cgroup *memcg = pc->mem_cgroup;
+	struct mem_cgroup *memcg = page->mem_cgroup;
 
 	if (!memcg)
 		return;
@@ -2996,7 +2983,7 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
 	VM_BUG_ON_PAGE(mem_cgroup_is_root(memcg), page);
 
 	memcg_uncharge_kmem(memcg, 1 << order);
-	pc->mem_cgroup = NULL;
+	page->mem_cgroup = NULL;
 }
 #else
 static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
@@ -3014,16 +3001,15 @@ static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
  */
 void mem_cgroup_split_huge_fixup(struct page *head)
 {
-	struct page_cgroup *pc = lookup_page_cgroup(head);
 	int i;
 
 	if (mem_cgroup_disabled())
 		return;
 
 	for (i = 1; i < HPAGE_PMD_NR; i++)
-		pc[i].mem_cgroup = pc[0].mem_cgroup;
+		head[i].mem_cgroup = head->mem_cgroup;
 
-	__this_cpu_sub(pc[0].mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
+	__this_cpu_sub(head->mem_cgroup->stat->count[MEM_CGROUP_STAT_RSS_HUGE],
 		       HPAGE_PMD_NR);
 }
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE */
@@ -3032,7 +3018,6 @@ void mem_cgroup_split_huge_fixup(struct page *head)
  * mem_cgroup_move_account - move account of the page
  * @page: the page
  * @nr_pages: number of regular pages (>1 for huge pages)
- * @pc:	page_cgroup of the page.
  * @from: mem_cgroup which the page is moved from.
  * @to:	mem_cgroup which the page is moved to. @from != @to.
  *
@@ -3045,7 +3030,6 @@ void mem_cgroup_split_huge_fixup(struct page *head)
  */
 static int mem_cgroup_move_account(struct page *page,
 				   unsigned int nr_pages,
-				   struct page_cgroup *pc,
 				   struct mem_cgroup *from,
 				   struct mem_cgroup *to)
 {
@@ -3065,7 +3049,7 @@ static int mem_cgroup_move_account(struct page *page,
 		goto out;
 
 	/*
-	 * Prevent mem_cgroup_migrate() from looking at pc->mem_cgroup
+	 * Prevent mem_cgroup_migrate() from looking at page->mem_cgroup
 	 * of its source page while we change it: page migration takes
 	 * both pages off the LRU, but page cache replacement doesn't.
 	 */
@@ -3073,7 +3057,7 @@ static int mem_cgroup_move_account(struct page *page,
 		goto out;
 
 	ret = -EINVAL;
-	if (pc->mem_cgroup != from)
+	if (page->mem_cgroup != from)
 		goto out_unlock;
 
 	spin_lock_irqsave(&from->move_lock, flags);
@@ -3093,13 +3077,13 @@ static int mem_cgroup_move_account(struct page *page,
 	}
 
 	/*
-	 * It is safe to change pc->mem_cgroup here because the page
+	 * It is safe to change page->mem_cgroup here because the page
 	 * is referenced, charged, and isolated - we can't race with
 	 * uncharging, charging, migration, or LRU putback.
 	 */
 
 	/* caller should have done css_get */
-	pc->mem_cgroup = to;
+	page->mem_cgroup = to;
 	spin_unlock_irqrestore(&from->move_lock, flags);
 
 	ret = 0;
@@ -3174,36 +3158,17 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
 #endif
 
 #ifdef CONFIG_DEBUG_VM
-static struct page_cgroup *lookup_page_cgroup_used(struct page *page)
-{
-	struct page_cgroup *pc;
-
-	pc = lookup_page_cgroup(page);
-	/*
-	 * Can be NULL while feeding pages into the page allocator for
-	 * the first time, i.e. during boot or memory hotplug;
-	 * or when mem_cgroup_disabled().
-	 */
-	if (likely(pc) && pc->mem_cgroup)
-		return pc;
-	return NULL;
-}
-
 bool mem_cgroup_bad_page_check(struct page *page)
 {
 	if (mem_cgroup_disabled())
 		return false;
 
-	return lookup_page_cgroup_used(page) != NULL;
+	return page->mem_cgroup != NULL;
 }
 
 void mem_cgroup_print_bad_page(struct page *page)
 {
-	struct page_cgroup *pc;
-
-	pc = lookup_page_cgroup_used(page);
-	if (pc)
-		pr_alert("pc:%p pc->mem_cgroup:%p\n", pc, pc->mem_cgroup);
+	pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
 }
 #endif
 
@@ -5123,7 +5088,6 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
 		unsigned long addr, pte_t ptent, union mc_target *target)
 {
 	struct page *page = NULL;
-	struct page_cgroup *pc;
 	enum mc_target_type ret = MC_TARGET_NONE;
 	swp_entry_t ent = { .val = 0 };
 
@@ -5137,13 +5101,12 @@ static enum mc_target_type get_mctgt_type(struct vm_area_struct *vma,
 	if (!page && !ent.val)
 		return ret;
 	if (page) {
-		pc = lookup_page_cgroup(page);
 		/*
 		 * Do only loose check w/o serialization.
-		 * mem_cgroup_move_account() checks the pc is valid or
+		 * mem_cgroup_move_account() checks the page is valid or
 		 * not under LRU exclusion.
 		 */
-		if (pc->mem_cgroup == mc.from) {
+		if (page->mem_cgroup == mc.from) {
 			ret = MC_TARGET_PAGE;
 			if (target)
 				target->page = page;
@@ -5171,15 +5134,13 @@ static enum mc_target_type get_mctgt_type_thp(struct vm_area_struct *vma,
 		unsigned long addr, pmd_t pmd, union mc_target *target)
 {
 	struct page *page = NULL;
-	struct page_cgroup *pc;
 	enum mc_target_type ret = MC_TARGET_NONE;
 
 	page = pmd_page(pmd);
 	VM_BUG_ON_PAGE(!page || !PageHead(page), page);
 	if (!move_anon())
 		return ret;
-	pc = lookup_page_cgroup(page);
-	if (pc->mem_cgroup == mc.from) {
+	if (page->mem_cgroup == mc.from) {
 		ret = MC_TARGET_PAGE;
 		if (target) {
 			get_page(page);
@@ -5378,7 +5339,6 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 	enum mc_target_type target_type;
 	union mc_target target;
 	struct page *page;
-	struct page_cgroup *pc;
 
 	/*
 	 * We don't take compound_lock() here but no race with splitting thp
@@ -5399,9 +5359,8 @@ static int mem_cgroup_move_charge_pte_range(pmd_t *pmd,
 		if (target_type == MC_TARGET_PAGE) {
 			page = target.page;
 			if (!isolate_lru_page(page)) {
-				pc = lookup_page_cgroup(page);
 				if (!mem_cgroup_move_account(page, HPAGE_PMD_NR,
-							pc, mc.from, mc.to)) {
+							     mc.from, mc.to)) {
 					mc.precharge -= HPAGE_PMD_NR;
 					mc.moved_charge += HPAGE_PMD_NR;
 				}
@@ -5429,9 +5388,7 @@ retry:
 			page = target.page;
 			if (isolate_lru_page(page))
 				goto put;
-			pc = lookup_page_cgroup(page);
-			if (!mem_cgroup_move_account(page, 1, pc,
-						     mc.from, mc.to)) {
+			if (!mem_cgroup_move_account(page, 1, mc.from, mc.to)) {
 				mc.precharge--;
 				/* we uncharge from mc.from later. */
 				mc.moved_charge++;
@@ -5619,7 +5576,6 @@ static void __init enable_swap_cgroup(void)
 void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 {
 	struct mem_cgroup *memcg;
-	struct page_cgroup *pc;
 	unsigned short oldid;
 
 	VM_BUG_ON_PAGE(PageLRU(page), page);
@@ -5628,8 +5584,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	if (!do_swap_account)
 		return;
 
-	pc = lookup_page_cgroup(page);
-	memcg = pc->mem_cgroup;
+	memcg = page->mem_cgroup;
 
 	/* Readahead page, never charged */
 	if (!memcg)
@@ -5639,7 +5594,7 @@ void mem_cgroup_swapout(struct page *page, swp_entry_t entry)
 	VM_BUG_ON_PAGE(oldid, page);
 	mem_cgroup_swap_statistics(memcg, true);
 
-	pc->mem_cgroup = NULL;
+	page->mem_cgroup = NULL;
 
 	if (!mem_cgroup_is_root(memcg))
 		page_counter_uncharge(&memcg->memory, 1);
@@ -5706,7 +5661,6 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 		goto out;
 
 	if (PageSwapCache(page)) {
-		struct page_cgroup *pc = lookup_page_cgroup(page);
 		/*
 		 * Every swap fault against a single page tries to charge the
 		 * page, bail as early as possible.  shmem_unuse() encounters
@@ -5714,7 +5668,7 @@ int mem_cgroup_try_charge(struct page *page, struct mm_struct *mm,
 		 * the page lock, which serializes swap cache removal, which
 		 * in turn serializes uncharging.
 		 */
-		if (pc->mem_cgroup)
+		if (page->mem_cgroup)
 			goto out;
 	}
 
@@ -5867,7 +5821,6 @@ static void uncharge_list(struct list_head *page_list)
 	next = page_list->next;
 	do {
 		unsigned int nr_pages = 1;
-		struct page_cgroup *pc;
 
 		page = list_entry(next, struct page, lru);
 		next = page->lru.next;
@@ -5875,23 +5828,22 @@ static void uncharge_list(struct list_head *page_list)
 		VM_BUG_ON_PAGE(PageLRU(page), page);
 		VM_BUG_ON_PAGE(page_count(page), page);
 
-		pc = lookup_page_cgroup(page);
-		if (!pc->mem_cgroup)
+		if (!page->mem_cgroup)
 			continue;
 
 		/*
 		 * Nobody should be changing or seriously looking at
-		 * pc->mem_cgroup at this point, we have fully
+		 * page->mem_cgroup at this point, we have fully
 		 * exclusive access to the page.
 		 */
 
-		if (memcg != pc->mem_cgroup) {
+		if (memcg != page->mem_cgroup) {
 			if (memcg) {
 				uncharge_batch(memcg, pgpgout, nr_anon, nr_file,
 					       nr_huge, page);
 				pgpgout = nr_anon = nr_file = nr_huge = 0;
 			}
-			memcg = pc->mem_cgroup;
+			memcg = page->mem_cgroup;
 		}
 
 		if (PageTransHuge(page)) {
@@ -5905,7 +5857,7 @@ static void uncharge_list(struct list_head *page_list)
 		else
 			nr_file += nr_pages;
 
-		pc->mem_cgroup = NULL;
+		page->mem_cgroup = NULL;
 
 		pgpgout++;
 	} while (next != page_list);
@@ -5924,14 +5876,11 @@ static void uncharge_list(struct list_head *page_list)
  */
 void mem_cgroup_uncharge(struct page *page)
 {
-	struct page_cgroup *pc;
-
 	if (mem_cgroup_disabled())
 		return;
 
 	/* Don't touch page->lru of any random page, pre-check: */
-	pc = lookup_page_cgroup(page);
-	if (!pc->mem_cgroup)
+	if (!page->mem_cgroup)
 		return;
 
 	INIT_LIST_HEAD(&page->lru);
@@ -5968,7 +5917,6 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 			bool lrucare)
 {
 	struct mem_cgroup *memcg;
-	struct page_cgroup *pc;
 	int isolated;
 
 	VM_BUG_ON_PAGE(!PageLocked(oldpage), oldpage);
@@ -5983,8 +5931,7 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 		return;
 
 	/* Page cache replacement: new page already charged? */
-	pc = lookup_page_cgroup(newpage);
-	if (pc->mem_cgroup)
+	if (newpage->mem_cgroup)
 		return;
 
 	/*
@@ -5993,15 +5940,14 @@ void mem_cgroup_migrate(struct page *oldpage, struct page *newpage,
 	 * uncharged page when the PFN walker finds a page that
 	 * reclaim just put back on the LRU but has not released yet.
 	 */
-	pc = lookup_page_cgroup(oldpage);
-	memcg = pc->mem_cgroup;
+	memcg = oldpage->mem_cgroup;
 	if (!memcg)
 		return;
 
 	if (lrucare)
 		lock_page_lru(oldpage, &isolated);
 
-	pc->mem_cgroup = NULL;
+	oldpage->mem_cgroup = NULL;
 
 	if (lrucare)
 		unlock_page_lru(oldpage, isolated);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 97b6966816e5..22cfdeffbf69 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,7 +48,6 @@
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
-#include <linux/page_cgroup.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
 #include <linux/compaction.h>
@@ -4853,7 +4852,6 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 #endif
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
-	pgdat_page_cgroup_init(pgdat);
 
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
index 5331c2bd85a2..f0f31c1d4d0c 100644
--- a/mm/page_cgroup.c
+++ b/mm/page_cgroup.c
@@ -1,326 +1,7 @@
 #include <linux/mm.h>
-#include <linux/mmzone.h>
-#include <linux/bootmem.h>
-#include <linux/bit_spinlock.h>
 #include <linux/page_cgroup.h>
-#include <linux/hash.h>
-#include <linux/slab.h>
-#include <linux/memory.h>
 #include <linux/vmalloc.h>
-#include <linux/cgroup.h>
 #include <linux/swapops.h>
-#include <linux/kmemleak.h>
-
-static unsigned long total_usage;
-
-#if !defined(CONFIG_SPARSEMEM)
-
-
-void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
-{
-	pgdat->node_page_cgroup = NULL;
-}
-
-struct page_cgroup *lookup_page_cgroup(struct page *page)
-{
-	unsigned long pfn = page_to_pfn(page);
-	unsigned long offset;
-	struct page_cgroup *base;
-
-	base = NODE_DATA(page_to_nid(page))->node_page_cgroup;
-#ifdef CONFIG_DEBUG_VM
-	/*
-	 * The sanity checks the page allocator does upon freeing a
-	 * page can reach here before the page_cgroup arrays are
-	 * allocated when feeding a range of pages to the allocator
-	 * for the first time during bootup or memory hotplug.
-	 */
-	if (unlikely(!base))
-		return NULL;
-#endif
-	offset = pfn - NODE_DATA(page_to_nid(page))->node_start_pfn;
-	return base + offset;
-}
-
-static int __init alloc_node_page_cgroup(int nid)
-{
-	struct page_cgroup *base;
-	unsigned long table_size;
-	unsigned long nr_pages;
-
-	nr_pages = NODE_DATA(nid)->node_spanned_pages;
-	if (!nr_pages)
-		return 0;
-
-	table_size = sizeof(struct page_cgroup) * nr_pages;
-
-	base = memblock_virt_alloc_try_nid_nopanic(
-			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
-			BOOTMEM_ALLOC_ACCESSIBLE, nid);
-	if (!base)
-		return -ENOMEM;
-	NODE_DATA(nid)->node_page_cgroup = base;
-	total_usage += table_size;
-	return 0;
-}
-
-void __init page_cgroup_init_flatmem(void)
-{
-
-	int nid, fail;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	for_each_online_node(nid)  {
-		fail = alloc_node_page_cgroup(nid);
-		if (fail)
-			goto fail;
-	}
-	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
-	printk(KERN_INFO "please try 'cgroup_disable=memory' option if you"
-	" don't want memory cgroups\n");
-	return;
-fail:
-	printk(KERN_CRIT "allocation of page_cgroup failed.\n");
-	printk(KERN_CRIT "please try 'cgroup_disable=memory' boot option\n");
-	panic("Out of memory");
-}
-
-#else /* CONFIG_FLAT_NODE_MEM_MAP */
-
-struct page_cgroup *lookup_page_cgroup(struct page *page)
-{
-	unsigned long pfn = page_to_pfn(page);
-	struct mem_section *section = __pfn_to_section(pfn);
-#ifdef CONFIG_DEBUG_VM
-	/*
-	 * The sanity checks the page allocator does upon freeing a
-	 * page can reach here before the page_cgroup arrays are
-	 * allocated when feeding a range of pages to the allocator
-	 * for the first time during bootup or memory hotplug.
-	 */
-	if (!section->page_cgroup)
-		return NULL;
-#endif
-	return section->page_cgroup + pfn;
-}
-
-static void *__meminit alloc_page_cgroup(size_t size, int nid)
-{
-	gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
-	void *addr = NULL;
-
-	addr = alloc_pages_exact_nid(nid, size, flags);
-	if (addr) {
-		kmemleak_alloc(addr, size, 1, flags);
-		return addr;
-	}
-
-	if (node_state(nid, N_HIGH_MEMORY))
-		addr = vzalloc_node(size, nid);
-	else
-		addr = vzalloc(size);
-
-	return addr;
-}
-
-static int __meminit init_section_page_cgroup(unsigned long pfn, int nid)
-{
-	struct mem_section *section;
-	struct page_cgroup *base;
-	unsigned long table_size;
-
-	section = __pfn_to_section(pfn);
-
-	if (section->page_cgroup)
-		return 0;
-
-	table_size = sizeof(struct page_cgroup) * PAGES_PER_SECTION;
-	base = alloc_page_cgroup(table_size, nid);
-
-	/*
-	 * The value stored in section->page_cgroup is (base - pfn)
-	 * and it does not point to the memory block allocated above,
-	 * causing kmemleak false positives.
-	 */
-	kmemleak_not_leak(base);
-
-	if (!base) {
-		printk(KERN_ERR "page cgroup allocation failure\n");
-		return -ENOMEM;
-	}
-
-	/*
-	 * The passed "pfn" may not be aligned to SECTION.  For the calculation
-	 * we need to apply a mask.
-	 */
-	pfn &= PAGE_SECTION_MASK;
-	section->page_cgroup = base - pfn;
-	total_usage += table_size;
-	return 0;
-}
-#ifdef CONFIG_MEMORY_HOTPLUG
-static void free_page_cgroup(void *addr)
-{
-	if (is_vmalloc_addr(addr)) {
-		vfree(addr);
-	} else {
-		struct page *page = virt_to_page(addr);
-		size_t table_size =
-			sizeof(struct page_cgroup) * PAGES_PER_SECTION;
-
-		BUG_ON(PageReserved(page));
-		kmemleak_free(addr);
-		free_pages_exact(addr, table_size);
-	}
-}
-
-static void __free_page_cgroup(unsigned long pfn)
-{
-	struct mem_section *ms;
-	struct page_cgroup *base;
-
-	ms = __pfn_to_section(pfn);
-	if (!ms || !ms->page_cgroup)
-		return;
-	base = ms->page_cgroup + pfn;
-	free_page_cgroup(base);
-	ms->page_cgroup = NULL;
-}
-
-static int __meminit online_page_cgroup(unsigned long start_pfn,
-				unsigned long nr_pages,
-				int nid)
-{
-	unsigned long start, end, pfn;
-	int fail = 0;
-
-	start = SECTION_ALIGN_DOWN(start_pfn);
-	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
-
-	if (nid == -1) {
-		/*
-		 * In this case, "nid" already exists and contains valid memory.
-		 * "start_pfn" passed to us is a pfn which is an arg for
-		 * online__pages(), and start_pfn should exist.
-		 */
-		nid = pfn_to_nid(start_pfn);
-		VM_BUG_ON(!node_state(nid, N_ONLINE));
-	}
-
-	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
-		if (!pfn_present(pfn))
-			continue;
-		fail = init_section_page_cgroup(pfn, nid);
-	}
-	if (!fail)
-		return 0;
-
-	/* rollback */
-	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
-		__free_page_cgroup(pfn);
-
-	return -ENOMEM;
-}
-
-static int __meminit offline_page_cgroup(unsigned long start_pfn,
-				unsigned long nr_pages, int nid)
-{
-	unsigned long start, end, pfn;
-
-	start = SECTION_ALIGN_DOWN(start_pfn);
-	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
-
-	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
-		__free_page_cgroup(pfn);
-	return 0;
-
-}
-
-static int __meminit page_cgroup_callback(struct notifier_block *self,
-			       unsigned long action, void *arg)
-{
-	struct memory_notify *mn = arg;
-	int ret = 0;
-	switch (action) {
-	case MEM_GOING_ONLINE:
-		ret = online_page_cgroup(mn->start_pfn,
-				   mn->nr_pages, mn->status_change_nid);
-		break;
-	case MEM_OFFLINE:
-		offline_page_cgroup(mn->start_pfn,
-				mn->nr_pages, mn->status_change_nid);
-		break;
-	case MEM_CANCEL_ONLINE:
-		offline_page_cgroup(mn->start_pfn,
-				mn->nr_pages, mn->status_change_nid);
-		break;
-	case MEM_GOING_OFFLINE:
-		break;
-	case MEM_ONLINE:
-	case MEM_CANCEL_OFFLINE:
-		break;
-	}
-
-	return notifier_from_errno(ret);
-}
-
-#endif
-
-void __init page_cgroup_init(void)
-{
-	unsigned long pfn;
-	int nid;
-
-	if (mem_cgroup_disabled())
-		return;
-
-	for_each_node_state(nid, N_MEMORY) {
-		unsigned long start_pfn, end_pfn;
-
-		start_pfn = node_start_pfn(nid);
-		end_pfn = node_end_pfn(nid);
-		/*
-		 * start_pfn and end_pfn may not be aligned to SECTION and the
-		 * page->flags of out of node pages are not initialized.  So we
-		 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
-		 */
-		for (pfn = start_pfn;
-		     pfn < end_pfn;
-                     pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
-
-			if (!pfn_valid(pfn))
-				continue;
-			/*
-			 * Nodes's pfns can be overlapping.
-			 * We know some arch can have a nodes layout such as
-			 * -------------pfn-------------->
-			 * N0 | N1 | N2 | N0 | N1 | N2|....
-			 */
-			if (pfn_to_nid(pfn) != nid)
-				continue;
-			if (init_section_page_cgroup(pfn, nid))
-				goto oom;
-		}
-	}
-	hotplug_memory_notifier(page_cgroup_callback, 0);
-	printk(KERN_INFO "allocated %ld bytes of page_cgroup\n", total_usage);
-	printk(KERN_INFO "please try 'cgroup_disable=memory' option if you "
-			 "don't want memory cgroups\n");
-	return;
-oom:
-	printk(KERN_CRIT "try 'cgroup_disable=memory' boot option\n");
-	panic("Out of memory");
-}
-
-void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat)
-{
-	return;
-}
-
-#endif
-
 
 #ifdef CONFIG_MEMCG_SWAP
 
-- 
cgit v1.2.3


From 5d1ea48bdde67898e87d6d8f511fd097fa64c749 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:44:55 -0800
Subject: mm: page_cgroup: rename file to mm/swap_cgroup.c

Now that the external page_cgroup data structure and its lookup is gone,
the only code remaining in there is swap slot accounting.

Rename it and move the conditional compilation into mm/Makefile.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Tejun Heo <tj@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 MAINTAINERS                 |   2 +-
 include/linux/page_cgroup.h |  40 ---------
 include/linux/swap_cgroup.h |  42 +++++++++
 mm/Makefile                 |   3 +-
 mm/memcontrol.c             |   2 +-
 mm/page_cgroup.c            | 211 --------------------------------------------
 mm/swap_cgroup.c            | 208 +++++++++++++++++++++++++++++++++++++++++++
 mm/swap_state.c             |   1 -
 mm/swapfile.c               |   2 +-
 9 files changed, 255 insertions(+), 256 deletions(-)
 delete mode 100644 include/linux/page_cgroup.h
 create mode 100644 include/linux/swap_cgroup.h
 delete mode 100644 mm/page_cgroup.c
 create mode 100644 mm/swap_cgroup.c

(limited to 'include/linux')

diff --git a/MAINTAINERS b/MAINTAINERS
index 0d6469a2cf70..0aedd3e1804b 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2606,7 +2606,7 @@ L:	cgroups@vger.kernel.org
 L:	linux-mm@kvack.org
 S:	Maintained
 F:	mm/memcontrol.c
-F:	mm/page_cgroup.c
+F:	mm/swap_cgroup.c
 
 CORETEMP HARDWARE MONITORING DRIVER
 M:	Fenghua Yu <fenghua.yu@intel.com>
diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h
deleted file mode 100644
index 65be35785c86..000000000000
--- a/include/linux/page_cgroup.h
+++ /dev/null
@@ -1,40 +0,0 @@
-#ifndef __LINUX_PAGE_CGROUP_H
-#define __LINUX_PAGE_CGROUP_H
-
-#include <linux/swap.h>
-
-#ifdef CONFIG_MEMCG_SWAP
-extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
-					unsigned short old, unsigned short new);
-extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
-extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
-extern int swap_cgroup_swapon(int type, unsigned long max_pages);
-extern void swap_cgroup_swapoff(int type);
-#else
-
-static inline
-unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
-{
-	return 0;
-}
-
-static inline
-unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
-{
-	return 0;
-}
-
-static inline int
-swap_cgroup_swapon(int type, unsigned long max_pages)
-{
-	return 0;
-}
-
-static inline void swap_cgroup_swapoff(int type)
-{
-	return;
-}
-
-#endif /* CONFIG_MEMCG_SWAP */
-
-#endif /* __LINUX_PAGE_CGROUP_H */
diff --git a/include/linux/swap_cgroup.h b/include/linux/swap_cgroup.h
new file mode 100644
index 000000000000..145306bdc92f
--- /dev/null
+++ b/include/linux/swap_cgroup.h
@@ -0,0 +1,42 @@
+#ifndef __LINUX_SWAP_CGROUP_H
+#define __LINUX_SWAP_CGROUP_H
+
+#include <linux/swap.h>
+
+#ifdef CONFIG_MEMCG_SWAP
+
+extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
+					unsigned short old, unsigned short new);
+extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id);
+extern unsigned short lookup_swap_cgroup_id(swp_entry_t ent);
+extern int swap_cgroup_swapon(int type, unsigned long max_pages);
+extern void swap_cgroup_swapoff(int type);
+
+#else
+
+static inline
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
+{
+	return 0;
+}
+
+static inline
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
+{
+	return 0;
+}
+
+static inline int
+swap_cgroup_swapon(int type, unsigned long max_pages)
+{
+	return 0;
+}
+
+static inline void swap_cgroup_swapoff(int type)
+{
+	return;
+}
+
+#endif /* CONFIG_MEMCG_SWAP */
+
+#endif /* __LINUX_SWAP_CGROUP_H */
diff --git a/mm/Makefile b/mm/Makefile
index 6d9f40e922f7..b3c6ce932c64 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -56,7 +56,8 @@ obj-$(CONFIG_MIGRATION) += migrate.o
 obj-$(CONFIG_QUICKLIST) += quicklist.o
 obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o
 obj-$(CONFIG_PAGE_COUNTER) += page_counter.o
-obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o vmpressure.o
+obj-$(CONFIG_MEMCG) += memcontrol.o vmpressure.o
+obj-$(CONFIG_MEMCG_SWAP) += swap_cgroup.o
 obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o
 obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index b864067791dc..ab270e34ba3e 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -51,7 +51,7 @@
 #include <linux/seq_file.h>
 #include <linux/vmpressure.h>
 #include <linux/mm_inline.h>
-#include <linux/page_cgroup.h>
+#include <linux/swap_cgroup.h>
 #include <linux/cpu.h>
 #include <linux/oom.h>
 #include <linux/lockdep.h>
diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c
deleted file mode 100644
index f0f31c1d4d0c..000000000000
--- a/mm/page_cgroup.c
+++ /dev/null
@@ -1,211 +0,0 @@
-#include <linux/mm.h>
-#include <linux/page_cgroup.h>
-#include <linux/vmalloc.h>
-#include <linux/swapops.h>
-
-#ifdef CONFIG_MEMCG_SWAP
-
-static DEFINE_MUTEX(swap_cgroup_mutex);
-struct swap_cgroup_ctrl {
-	struct page **map;
-	unsigned long length;
-	spinlock_t	lock;
-};
-
-static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
-
-struct swap_cgroup {
-	unsigned short		id;
-};
-#define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
-
-/*
- * SwapCgroup implements "lookup" and "exchange" operations.
- * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
- * against SwapCache. At swap_free(), this is accessed directly from swap.
- *
- * This means,
- *  - we have no race in "exchange" when we're accessed via SwapCache because
- *    SwapCache(and its swp_entry) is under lock.
- *  - When called via swap_free(), there is no user of this entry and no race.
- * Then, we don't need lock around "exchange".
- *
- * TODO: we can push these buffers out to HIGHMEM.
- */
-
-/*
- * allocate buffer for swap_cgroup.
- */
-static int swap_cgroup_prepare(int type)
-{
-	struct page *page;
-	struct swap_cgroup_ctrl *ctrl;
-	unsigned long idx, max;
-
-	ctrl = &swap_cgroup_ctrl[type];
-
-	for (idx = 0; idx < ctrl->length; idx++) {
-		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
-		if (!page)
-			goto not_enough_page;
-		ctrl->map[idx] = page;
-	}
-	return 0;
-not_enough_page:
-	max = idx;
-	for (idx = 0; idx < max; idx++)
-		__free_page(ctrl->map[idx]);
-
-	return -ENOMEM;
-}
-
-static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
-					struct swap_cgroup_ctrl **ctrlp)
-{
-	pgoff_t offset = swp_offset(ent);
-	struct swap_cgroup_ctrl *ctrl;
-	struct page *mappage;
-	struct swap_cgroup *sc;
-
-	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
-	if (ctrlp)
-		*ctrlp = ctrl;
-
-	mappage = ctrl->map[offset / SC_PER_PAGE];
-	sc = page_address(mappage);
-	return sc + offset % SC_PER_PAGE;
-}
-
-/**
- * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
- * @ent: swap entry to be cmpxchged
- * @old: old id
- * @new: new id
- *
- * Returns old id at success, 0 at failure.
- * (There is no mem_cgroup using 0 as its id)
- */
-unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
-					unsigned short old, unsigned short new)
-{
-	struct swap_cgroup_ctrl *ctrl;
-	struct swap_cgroup *sc;
-	unsigned long flags;
-	unsigned short retval;
-
-	sc = lookup_swap_cgroup(ent, &ctrl);
-
-	spin_lock_irqsave(&ctrl->lock, flags);
-	retval = sc->id;
-	if (retval == old)
-		sc->id = new;
-	else
-		retval = 0;
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-	return retval;
-}
-
-/**
- * swap_cgroup_record - record mem_cgroup for this swp_entry.
- * @ent: swap entry to be recorded into
- * @id: mem_cgroup to be recorded
- *
- * Returns old value at success, 0 at failure.
- * (Of course, old value can be 0.)
- */
-unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
-{
-	struct swap_cgroup_ctrl *ctrl;
-	struct swap_cgroup *sc;
-	unsigned short old;
-	unsigned long flags;
-
-	sc = lookup_swap_cgroup(ent, &ctrl);
-
-	spin_lock_irqsave(&ctrl->lock, flags);
-	old = sc->id;
-	sc->id = id;
-	spin_unlock_irqrestore(&ctrl->lock, flags);
-
-	return old;
-}
-
-/**
- * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
- * @ent: swap entry to be looked up.
- *
- * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
- */
-unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
-{
-	return lookup_swap_cgroup(ent, NULL)->id;
-}
-
-int swap_cgroup_swapon(int type, unsigned long max_pages)
-{
-	void *array;
-	unsigned long array_size;
-	unsigned long length;
-	struct swap_cgroup_ctrl *ctrl;
-
-	if (!do_swap_account)
-		return 0;
-
-	length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
-	array_size = length * sizeof(void *);
-
-	array = vzalloc(array_size);
-	if (!array)
-		goto nomem;
-
-	ctrl = &swap_cgroup_ctrl[type];
-	mutex_lock(&swap_cgroup_mutex);
-	ctrl->length = length;
-	ctrl->map = array;
-	spin_lock_init(&ctrl->lock);
-	if (swap_cgroup_prepare(type)) {
-		/* memory shortage */
-		ctrl->map = NULL;
-		ctrl->length = 0;
-		mutex_unlock(&swap_cgroup_mutex);
-		vfree(array);
-		goto nomem;
-	}
-	mutex_unlock(&swap_cgroup_mutex);
-
-	return 0;
-nomem:
-	printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
-	printk(KERN_INFO
-		"swap_cgroup can be disabled by swapaccount=0 boot option\n");
-	return -ENOMEM;
-}
-
-void swap_cgroup_swapoff(int type)
-{
-	struct page **map;
-	unsigned long i, length;
-	struct swap_cgroup_ctrl *ctrl;
-
-	if (!do_swap_account)
-		return;
-
-	mutex_lock(&swap_cgroup_mutex);
-	ctrl = &swap_cgroup_ctrl[type];
-	map = ctrl->map;
-	length = ctrl->length;
-	ctrl->map = NULL;
-	ctrl->length = 0;
-	mutex_unlock(&swap_cgroup_mutex);
-
-	if (map) {
-		for (i = 0; i < length; i++) {
-			struct page *page = map[i];
-			if (page)
-				__free_page(page);
-		}
-		vfree(map);
-	}
-}
-
-#endif
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
new file mode 100644
index 000000000000..b5f7f24b8dd1
--- /dev/null
+++ b/mm/swap_cgroup.c
@@ -0,0 +1,208 @@
+#include <linux/swap_cgroup.h>
+#include <linux/vmalloc.h>
+#include <linux/mm.h>
+
+#include <linux/swapops.h> /* depends on mm.h include */
+
+static DEFINE_MUTEX(swap_cgroup_mutex);
+struct swap_cgroup_ctrl {
+	struct page **map;
+	unsigned long length;
+	spinlock_t	lock;
+};
+
+static struct swap_cgroup_ctrl swap_cgroup_ctrl[MAX_SWAPFILES];
+
+struct swap_cgroup {
+	unsigned short		id;
+};
+#define SC_PER_PAGE	(PAGE_SIZE/sizeof(struct swap_cgroup))
+
+/*
+ * SwapCgroup implements "lookup" and "exchange" operations.
+ * In typical usage, this swap_cgroup is accessed via memcg's charge/uncharge
+ * against SwapCache. At swap_free(), this is accessed directly from swap.
+ *
+ * This means,
+ *  - we have no race in "exchange" when we're accessed via SwapCache because
+ *    SwapCache(and its swp_entry) is under lock.
+ *  - When called via swap_free(), there is no user of this entry and no race.
+ * Then, we don't need lock around "exchange".
+ *
+ * TODO: we can push these buffers out to HIGHMEM.
+ */
+
+/*
+ * allocate buffer for swap_cgroup.
+ */
+static int swap_cgroup_prepare(int type)
+{
+	struct page *page;
+	struct swap_cgroup_ctrl *ctrl;
+	unsigned long idx, max;
+
+	ctrl = &swap_cgroup_ctrl[type];
+
+	for (idx = 0; idx < ctrl->length; idx++) {
+		page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+		if (!page)
+			goto not_enough_page;
+		ctrl->map[idx] = page;
+	}
+	return 0;
+not_enough_page:
+	max = idx;
+	for (idx = 0; idx < max; idx++)
+		__free_page(ctrl->map[idx]);
+
+	return -ENOMEM;
+}
+
+static struct swap_cgroup *lookup_swap_cgroup(swp_entry_t ent,
+					struct swap_cgroup_ctrl **ctrlp)
+{
+	pgoff_t offset = swp_offset(ent);
+	struct swap_cgroup_ctrl *ctrl;
+	struct page *mappage;
+	struct swap_cgroup *sc;
+
+	ctrl = &swap_cgroup_ctrl[swp_type(ent)];
+	if (ctrlp)
+		*ctrlp = ctrl;
+
+	mappage = ctrl->map[offset / SC_PER_PAGE];
+	sc = page_address(mappage);
+	return sc + offset % SC_PER_PAGE;
+}
+
+/**
+ * swap_cgroup_cmpxchg - cmpxchg mem_cgroup's id for this swp_entry.
+ * @ent: swap entry to be cmpxchged
+ * @old: old id
+ * @new: new id
+ *
+ * Returns old id at success, 0 at failure.
+ * (There is no mem_cgroup using 0 as its id)
+ */
+unsigned short swap_cgroup_cmpxchg(swp_entry_t ent,
+					unsigned short old, unsigned short new)
+{
+	struct swap_cgroup_ctrl *ctrl;
+	struct swap_cgroup *sc;
+	unsigned long flags;
+	unsigned short retval;
+
+	sc = lookup_swap_cgroup(ent, &ctrl);
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+	retval = sc->id;
+	if (retval == old)
+		sc->id = new;
+	else
+		retval = 0;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+	return retval;
+}
+
+/**
+ * swap_cgroup_record - record mem_cgroup for this swp_entry.
+ * @ent: swap entry to be recorded into
+ * @id: mem_cgroup to be recorded
+ *
+ * Returns old value at success, 0 at failure.
+ * (Of course, old value can be 0.)
+ */
+unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id)
+{
+	struct swap_cgroup_ctrl *ctrl;
+	struct swap_cgroup *sc;
+	unsigned short old;
+	unsigned long flags;
+
+	sc = lookup_swap_cgroup(ent, &ctrl);
+
+	spin_lock_irqsave(&ctrl->lock, flags);
+	old = sc->id;
+	sc->id = id;
+	spin_unlock_irqrestore(&ctrl->lock, flags);
+
+	return old;
+}
+
+/**
+ * lookup_swap_cgroup_id - lookup mem_cgroup id tied to swap entry
+ * @ent: swap entry to be looked up.
+ *
+ * Returns ID of mem_cgroup at success. 0 at failure. (0 is invalid ID)
+ */
+unsigned short lookup_swap_cgroup_id(swp_entry_t ent)
+{
+	return lookup_swap_cgroup(ent, NULL)->id;
+}
+
+int swap_cgroup_swapon(int type, unsigned long max_pages)
+{
+	void *array;
+	unsigned long array_size;
+	unsigned long length;
+	struct swap_cgroup_ctrl *ctrl;
+
+	if (!do_swap_account)
+		return 0;
+
+	length = DIV_ROUND_UP(max_pages, SC_PER_PAGE);
+	array_size = length * sizeof(void *);
+
+	array = vzalloc(array_size);
+	if (!array)
+		goto nomem;
+
+	ctrl = &swap_cgroup_ctrl[type];
+	mutex_lock(&swap_cgroup_mutex);
+	ctrl->length = length;
+	ctrl->map = array;
+	spin_lock_init(&ctrl->lock);
+	if (swap_cgroup_prepare(type)) {
+		/* memory shortage */
+		ctrl->map = NULL;
+		ctrl->length = 0;
+		mutex_unlock(&swap_cgroup_mutex);
+		vfree(array);
+		goto nomem;
+	}
+	mutex_unlock(&swap_cgroup_mutex);
+
+	return 0;
+nomem:
+	printk(KERN_INFO "couldn't allocate enough memory for swap_cgroup.\n");
+	printk(KERN_INFO
+		"swap_cgroup can be disabled by swapaccount=0 boot option\n");
+	return -ENOMEM;
+}
+
+void swap_cgroup_swapoff(int type)
+{
+	struct page **map;
+	unsigned long i, length;
+	struct swap_cgroup_ctrl *ctrl;
+
+	if (!do_swap_account)
+		return;
+
+	mutex_lock(&swap_cgroup_mutex);
+	ctrl = &swap_cgroup_ctrl[type];
+	map = ctrl->map;
+	length = ctrl->length;
+	ctrl->map = NULL;
+	ctrl->length = 0;
+	mutex_unlock(&swap_cgroup_mutex);
+
+	if (map) {
+		for (i = 0; i < length; i++) {
+			struct page *page = map[i];
+			if (page)
+				__free_page(page);
+		}
+		vfree(map);
+	}
+}
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 154444918685..9711342987a0 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -17,7 +17,6 @@
 #include <linux/blkdev.h>
 #include <linux/pagevec.h>
 #include <linux/migrate.h>
-#include <linux/page_cgroup.h>
 
 #include <asm/pgtable.h>
 
diff --git a/mm/swapfile.c b/mm/swapfile.c
index 8798b2e0ac59..63f55ccb9b26 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -38,7 +38,7 @@
 #include <asm/pgtable.h>
 #include <asm/tlbflush.h>
 #include <linux/swapops.h>
-#include <linux/page_cgroup.h>
+#include <linux/swap_cgroup.h>
 
 static bool swap_count_continued(struct swap_info_struct *, pgoff_t,
 				 unsigned char);
-- 
cgit v1.2.3


From 9edad6ea0f1416415f6fe31cc9d1dbc3817803ed Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Wed, 10 Dec 2014 15:44:58 -0800
Subject: mm: move page->mem_cgroup bad page handling into generic code

Now that the external page_cgroup data structure and its lookup is
gone, let the generic bad_page() check for page->mem_cgroup sanity.

Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Michal Hocko <mhocko@suse.cz>
Acked-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: David S. Miller <davem@davemloft.net>
Cc: KAMEZAWA Hiroyuki <kamezawa.hiroyu@jp.fujitsu.com>
Cc: "Kirill A. Shutemov" <kirill@shutemov.name>
Cc: Tejun Heo <tj@kernel.org>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 17 -----------------
 init/Kconfig               | 12 ------------
 mm/debug.c                 |  5 ++++-
 mm/memcontrol.c            | 15 ---------------
 mm/page_alloc.c            | 12 ++++++++----
 5 files changed, 12 insertions(+), 49 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index c4d080875164..6ea9f919e888 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -173,10 +173,6 @@ static inline void mem_cgroup_count_vm_event(struct mm_struct *mm,
 void mem_cgroup_split_huge_fixup(struct page *head);
 #endif
 
-#ifdef CONFIG_DEBUG_VM
-bool mem_cgroup_bad_page_check(struct page *page);
-void mem_cgroup_print_bad_page(struct page *page);
-#endif
 #else /* CONFIG_MEMCG */
 struct mem_cgroup;
 
@@ -346,19 +342,6 @@ void mem_cgroup_count_vm_event(struct mm_struct *mm, enum vm_event_item idx)
 }
 #endif /* CONFIG_MEMCG */
 
-#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM)
-static inline bool
-mem_cgroup_bad_page_check(struct page *page)
-{
-	return false;
-}
-
-static inline void
-mem_cgroup_print_bad_page(struct page *page)
-{
-}
-#endif
-
 enum {
 	UNDER_LIMIT,
 	SOFT_LIMIT,
diff --git a/init/Kconfig b/init/Kconfig
index 46768752130d..7e9fbd48e2ab 100644
--- a/init/Kconfig
+++ b/init/Kconfig
@@ -983,18 +983,6 @@ config MEMCG
 	  Provides a memory resource controller that manages both anonymous
 	  memory and page cache. (See Documentation/cgroups/memory.txt)
 
-	  Note that setting this option increases fixed memory overhead
-	  associated with each page of memory in the system. By this,
-	  8(16)bytes/PAGE_SIZE on 32(64)bit system will be occupied by memory
-	  usage tracking struct at boot. Total amount of this is printed out
-	  at boot.
-
-	  Only enable when you're ok with these trade offs and really
-	  sure you need the memory resource controller. Even when you enable
-	  this, you can set "cgroup_disable=memory" at your boot option to
-	  disable memory resource controller and you can avoid overheads.
-	  (and lose benefits of memory resource controller)
-
 config MEMCG_SWAP
 	bool "Memory Resource Controller Swap Extension"
 	depends on MEMCG && SWAP
diff --git a/mm/debug.c b/mm/debug.c
index 5ce45c9a29b5..0e58f3211f89 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -95,7 +95,10 @@ void dump_page_badflags(struct page *page, const char *reason,
 		dump_flags(page->flags & badflags,
 				pageflag_names, ARRAY_SIZE(pageflag_names));
 	}
-	mem_cgroup_print_bad_page(page);
+#ifdef CONFIG_MEMCG
+	if (page->mem_cgroup)
+		pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
+#endif
 }
 
 void dump_page(struct page *page, const char *reason)
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index ab270e34ba3e..1869cb64d089 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -3157,21 +3157,6 @@ static inline int mem_cgroup_move_swap_account(swp_entry_t entry,
 }
 #endif
 
-#ifdef CONFIG_DEBUG_VM
-bool mem_cgroup_bad_page_check(struct page *page)
-{
-	if (mem_cgroup_disabled())
-		return false;
-
-	return page->mem_cgroup != NULL;
-}
-
-void mem_cgroup_print_bad_page(struct page *page)
-{
-	pr_alert("page->mem_cgroup:%p\n", page->mem_cgroup);
-}
-#endif
-
 static DEFINE_MUTEX(memcg_limit_mutex);
 
 static int mem_cgroup_resize_limit(struct mem_cgroup *memcg,
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 22cfdeffbf69..a7198c065999 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -640,8 +640,10 @@ static inline int free_pages_check(struct page *page)
 		bad_reason = "PAGE_FLAGS_CHECK_AT_FREE flag(s) set";
 		bad_flags = PAGE_FLAGS_CHECK_AT_FREE;
 	}
-	if (unlikely(mem_cgroup_bad_page_check(page)))
-		bad_reason = "cgroup check failed";
+#ifdef CONFIG_MEMCG
+	if (unlikely(page->mem_cgroup))
+		bad_reason = "page still charged to cgroup";
+#endif
 	if (unlikely(bad_reason)) {
 		bad_page(page, bad_reason, bad_flags);
 		return 1;
@@ -900,8 +902,10 @@ static inline int check_new_page(struct page *page)
 		bad_reason = "PAGE_FLAGS_CHECK_AT_PREP flag set";
 		bad_flags = PAGE_FLAGS_CHECK_AT_PREP;
 	}
-	if (unlikely(mem_cgroup_bad_page_check(page)))
-		bad_reason = "cgroup check failed";
+#ifdef CONFIG_MEMCG
+	if (unlikely(page->mem_cgroup))
+		bad_reason = "page still charged to cgroup";
+#endif
 	if (unlikely(bad_reason)) {
 		bad_page(page, bad_reason, bad_flags);
 		return 1;
-- 
cgit v1.2.3


From 7c8bd2322c7fd973d089b27de55e29c92c667a06 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Wed, 10 Dec 2014 15:45:33 -0800
Subject: exit: ptrace: shift "reap dead" code from exit_ptrace() to
 forget_original_parent()

Now that forget_original_parent() uses ->ptrace_entry for EXIT_DEAD tasks,
we can simply pass "dead_children" list to exit_ptrace() and remove
another release_task() loop.  Plus this way we do not need to drop and
reacquire tasklist_lock.

Also shift the list_empty(ptraced) check, if we want this optimization it
makes sense to eliminate the function call altogether.

Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Aaron Tomlin <atomlin@redhat.com>
Cc: Alexey Dobriyan <adobriyan@gmail.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>,
Cc: Sterling Alexander <stalexan@redhat.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Roland McGrath <roland@hack.frob.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ptrace.h |  2 +-
 kernel/exit.c          | 10 ++++------
 kernel/ptrace.c        | 23 +++--------------------
 3 files changed, 8 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h
index cc79eff4a1ad..987a73a40ef8 100644
--- a/include/linux/ptrace.h
+++ b/include/linux/ptrace.h
@@ -52,7 +52,7 @@ extern void ptrace_notify(int exit_code);
 extern void __ptrace_link(struct task_struct *child,
 			  struct task_struct *new_parent);
 extern void __ptrace_unlink(struct task_struct *child);
-extern void exit_ptrace(struct task_struct *tracer);
+extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead);
 #define PTRACE_MODE_READ	0x01
 #define PTRACE_MODE_ATTACH	0x02
 #define PTRACE_MODE_NOAUDIT	0x04
diff --git a/kernel/exit.c b/kernel/exit.c
index 772e9175735c..9c9526d87276 100644
--- a/kernel/exit.c
+++ b/kernel/exit.c
@@ -553,13 +553,11 @@ static void forget_original_parent(struct task_struct *father)
 	LIST_HEAD(dead_children);
 
 	write_lock_irq(&tasklist_lock);
-	/*
-	 * Note that exit_ptrace() and find_new_reaper() might
-	 * drop tasklist_lock and reacquire it.
-	 */
-	exit_ptrace(father);
-	reaper = find_new_reaper(father);
+	if (unlikely(!list_empty(&father->ptraced)))
+		exit_ptrace(father, &dead_children);
 
+	/* Can drop and reacquire tasklist_lock */
+	reaper = find_new_reaper(father);
 	list_for_each_entry(p, &father->children, sibling) {
 		for_each_thread(p, t) {
 			t->real_parent = reaper;
diff --git a/kernel/ptrace.c b/kernel/ptrace.c
index 54e75226c2c4..1eb9d90c3af9 100644
--- a/kernel/ptrace.c
+++ b/kernel/ptrace.c
@@ -485,36 +485,19 @@ static int ptrace_detach(struct task_struct *child, unsigned int data)
 
 /*
  * Detach all tasks we were using ptrace on. Called with tasklist held
- * for writing, and returns with it held too. But note it can release
- * and reacquire the lock.
+ * for writing.
  */
-void exit_ptrace(struct task_struct *tracer)
-	__releases(&tasklist_lock)
-	__acquires(&tasklist_lock)
+void exit_ptrace(struct task_struct *tracer, struct list_head *dead)
 {
 	struct task_struct *p, *n;
-	LIST_HEAD(ptrace_dead);
-
-	if (likely(list_empty(&tracer->ptraced)))
-		return;
 
 	list_for_each_entry_safe(p, n, &tracer->ptraced, ptrace_entry) {
 		if (unlikely(p->ptrace & PT_EXITKILL))
 			send_sig_info(SIGKILL, SEND_SIG_FORCED, p);
 
 		if (__ptrace_detach(tracer, p))
-			list_add(&p->ptrace_entry, &ptrace_dead);
-	}
-
-	write_unlock_irq(&tasklist_lock);
-	BUG_ON(!list_empty(&tracer->ptraced));
-
-	list_for_each_entry_safe(p, n, &ptrace_dead, ptrace_entry) {
-		list_del_init(&p->ptrace_entry);
-		release_task(p);
+			list_add(&p->ptrace_entry, dead);
 	}
-
-	write_lock_irq(&tasklist_lock);
 }
 
 int ptrace_readdata(struct task_struct *tsk, unsigned long src, char __user *dst, int len)
-- 
cgit v1.2.3


From f938612dd97d481b8b5bf960c992ae577f081c17 Mon Sep 17 00:00:00 2001
From: Yann Droneaud <ydroneaud@opteya.com>
Date: Wed, 10 Dec 2014 15:45:47 -0800
Subject: include/linux/file.h: remove get_unused_fd() macro

Macro get_unused_fd() is used to allocate a file descriptor with default
flags.  Those default flags (0) don't enable close-on-exec.

This can be seen as an unsafe default: in most case close-on-exec should
be enabled to not leak file descriptor across exec().

It would be better to have a "safer" default set of flags, eg.  O_CLOEXEC
must be used to enable close-on-exec.

Instead this patch removes get_unused_fd() so that out of tree modules
won't be affect by a runtime behavor change which might introduce other
kind of bugs: it's better to catch the change at build time, making it
easier to fix.

Removing the macro will also promote use of get_unused_fd_flags() (or
anon_inode_getfd()) with flags provided by userspace.  Or, if flags cannot
be given by userspace, with flags set to O_CLOEXEC by default.

Signed-off-by: Yann Droneaud <ydroneaud@opteya.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/file.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/file.h b/include/linux/file.h
index 4d69123377a2..f87d30882a24 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -66,7 +66,6 @@ extern void set_close_on_exec(unsigned int fd, int flag);
 extern bool get_close_on_exec(unsigned int fd);
 extern void put_filp(struct file *);
 extern int get_unused_fd_flags(unsigned flags);
-#define get_unused_fd() get_unused_fd_flags(0)
 extern void put_unused_fd(unsigned int fd);
 
 extern void fd_install(unsigned int fd, struct file *file);
-- 
cgit v1.2.3


From 9e3961a0979817c612b10b2da4f3045ec9faa779 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Wed, 10 Dec 2014 15:45:50 -0800
Subject: kernel: add panic_on_warn

There have been several times where I have had to rebuild a kernel to
cause a panic when hitting a WARN() in the code in order to get a crash
dump from a system.  Sometimes this is easy to do, other times (such as
in the case of a remote admin) it is not trivial to send new images to
the user.

A much easier method would be a switch to change the WARN() over to a
panic.  This makes debugging easier in that I can now test the actual
image the WARN() was seen on and I do not have to engage in remote
debugging.

This patch adds a panic_on_warn kernel parameter and
/proc/sys/kernel/panic_on_warn calls panic() in the
warn_slowpath_common() path.  The function will still print out the
location of the warning.

An example of the panic_on_warn output:

The first line below is from the WARN_ON() to output the WARN_ON()'s
location.  After that the panic() output is displayed.

    WARNING: CPU: 30 PID: 11698 at /home/prarit/dummy_module/dummy-module.c:25 init_dummy+0x1f/0x30 [dummy_module]()
    Kernel panic - not syncing: panic_on_warn set ...

    CPU: 30 PID: 11698 Comm: insmod Tainted: G        W  OE  3.17.0+ #57
    Hardware name: Intel Corporation S2600CP/S2600CP, BIOS RMLSDP.86I.00.29.D696.1311111329 11/11/2013
     0000000000000000 000000008e3f87df ffff88080f093c38 ffffffff81665190
     0000000000000000 ffffffff818aea3d ffff88080f093cb8 ffffffff8165e2ec
     ffffffff00000008 ffff88080f093cc8 ffff88080f093c68 000000008e3f87df
    Call Trace:
     [<ffffffff81665190>] dump_stack+0x46/0x58
     [<ffffffff8165e2ec>] panic+0xd0/0x204
     [<ffffffffa038e05f>] ? init_dummy+0x1f/0x30 [dummy_module]
     [<ffffffff81076b90>] warn_slowpath_common+0xd0/0xd0
     [<ffffffffa038e040>] ? dummy_greetings+0x40/0x40 [dummy_module]
     [<ffffffff81076c8a>] warn_slowpath_null+0x1a/0x20
     [<ffffffffa038e05f>] init_dummy+0x1f/0x30 [dummy_module]
     [<ffffffff81002144>] do_one_initcall+0xd4/0x210
     [<ffffffff811b52c2>] ? __vunmap+0xc2/0x110
     [<ffffffff810f8889>] load_module+0x16a9/0x1b30
     [<ffffffff810f3d30>] ? store_uevent+0x70/0x70
     [<ffffffff810f49b9>] ? copy_module_from_fd.isra.44+0x129/0x180
     [<ffffffff810f8ec6>] SyS_finit_module+0xa6/0xd0
     [<ffffffff8166cf29>] system_call_fastpath+0x12/0x17

Successfully tested by me.

hpa said: There is another very valid use for this: many operators would
rather a machine shuts down than being potentially compromised either
functionally or security-wise.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Rusty Russell <rusty@rustcorp.com.au>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Andi Kleen <ak@linux.intel.com>
Cc: Masami Hiramatsu <masami.hiramatsu.pt@hitachi.com>
Acked-by: Yasuaki Ishimatsu <isimatu.yasuaki@jp.fujitsu.com>
Cc: Fabian Frederick <fabf@skynet.be>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kdump/kdump.txt       |  7 +++++++
 Documentation/kernel-parameters.txt |  3 +++
 Documentation/sysctl/kernel.txt     | 40 ++++++++++++++++++++++++-------------
 include/linux/kernel.h              |  1 +
 include/uapi/linux/sysctl.h         |  1 +
 kernel/panic.c                      | 13 ++++++++++++
 kernel/sysctl.c                     |  9 +++++++++
 kernel/sysctl_binary.c              |  1 +
 8 files changed, 61 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kdump/kdump.txt b/Documentation/kdump/kdump.txt
index 6c0b9f27e465..bc4bd5a44b88 100644
--- a/Documentation/kdump/kdump.txt
+++ b/Documentation/kdump/kdump.txt
@@ -471,6 +471,13 @@ format. Crash is available on Dave Anderson's site at the following URL:
 
    http://people.redhat.com/~anderson/
 
+Trigger Kdump on WARN()
+=======================
+
+The kernel parameter, panic_on_warn, calls panic() in all WARN() paths.  This
+will cause a kdump to occur at the panic() call.  In cases where a user wants
+to specify this during runtime, /proc/sys/kernel/panic_on_warn can be set to 1
+to achieve the same behaviour.
 
 Contact
 =======
diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 838f3776c924..d6eb3636fe5a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2509,6 +2509,9 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			timeout < 0: reboot immediately
 			Format: <timeout>
 
+	panic_on_warn	panic() instead of WARN().  Useful to cause kdump
+			on a WARN().
+
 	crash_kexec_post_notifiers
 			Run kdump after running panic-notifiers and dumping
 			kmsg. This only for the users who doubt kdump always
diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index 57baff5bdb80..b5d0c8501a18 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -54,8 +54,9 @@ show up in /proc/sys/kernel:
 - overflowuid
 - panic
 - panic_on_oops
-- panic_on_unrecovered_nmi
 - panic_on_stackoverflow
+- panic_on_unrecovered_nmi
+- panic_on_warn
 - pid_max
 - powersave-nap               [ PPC only ]
 - printk
@@ -527,19 +528,6 @@ the recommended setting is 60.
 
 ==============================================================
 
-panic_on_unrecovered_nmi:
-
-The default Linux behaviour on an NMI of either memory or unknown is
-to continue operation. For many environments such as scientific
-computing it is preferable that the box is taken out and the error
-dealt with than an uncorrected parity/ECC error get propagated.
-
-A small number of systems do generate NMI's for bizarre random reasons
-such as power management so the default is off. That sysctl works like
-the existing panic controls already in that directory.
-
-==============================================================
-
 panic_on_oops:
 
 Controls the kernel's behaviour when an oops or BUG is encountered.
@@ -563,6 +551,30 @@ This file shows up if CONFIG_DEBUG_STACKOVERFLOW is enabled.
 
 ==============================================================
 
+panic_on_unrecovered_nmi:
+
+The default Linux behaviour on an NMI of either memory or unknown is
+to continue operation. For many environments such as scientific
+computing it is preferable that the box is taken out and the error
+dealt with than an uncorrected parity/ECC error get propagated.
+
+A small number of systems do generate NMI's for bizarre random reasons
+such as power management so the default is off. That sysctl works like
+the existing panic controls already in that directory.
+
+==============================================================
+
+panic_on_warn:
+
+Calls panic() in the WARN() path when set to 1.  This is useful to avoid
+a kernel rebuild when attempting to kdump at the location of a WARN().
+
+0: only WARN(), default behaviour.
+
+1: call panic() after printing out WARN() location.
+
+==============================================================
+
 perf_cpu_time_max_percent:
 
 Hints to the kernel how much CPU time it should be allowed to
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 446d76a87ba1..233ea8107038 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -427,6 +427,7 @@ extern int panic_timeout;
 extern int panic_on_oops;
 extern int panic_on_unrecovered_nmi;
 extern int panic_on_io_nmi;
+extern int panic_on_warn;
 extern int sysctl_panic_on_stackoverflow;
 /*
  * Only to be used by arch init code. If the user over-wrote the default
diff --git a/include/uapi/linux/sysctl.h b/include/uapi/linux/sysctl.h
index 43aaba1cc037..0956373b56db 100644
--- a/include/uapi/linux/sysctl.h
+++ b/include/uapi/linux/sysctl.h
@@ -153,6 +153,7 @@ enum
 	KERN_MAX_LOCK_DEPTH=74, /* int: rtmutex's maximum lock depth */
 	KERN_NMI_WATCHDOG=75, /* int: enable/disable nmi watchdog */
 	KERN_PANIC_ON_NMI=76, /* int: whether we will panic on an unrecovered */
+	KERN_PANIC_ON_WARN=77, /* int: call panic() in WARN() functions */
 };
 
 
diff --git a/kernel/panic.c b/kernel/panic.c
index cf80672b7924..4d8d6f906dec 100644
--- a/kernel/panic.c
+++ b/kernel/panic.c
@@ -33,6 +33,7 @@ static int pause_on_oops;
 static int pause_on_oops_flag;
 static DEFINE_SPINLOCK(pause_on_oops_lock);
 static bool crash_kexec_post_notifiers;
+int panic_on_warn __read_mostly;
 
 int panic_timeout = CONFIG_PANIC_TIMEOUT;
 EXPORT_SYMBOL_GPL(panic_timeout);
@@ -428,6 +429,17 @@ static void warn_slowpath_common(const char *file, int line, void *caller,
 	if (args)
 		vprintk(args->fmt, args->args);
 
+	if (panic_on_warn) {
+		/*
+		 * This thread may hit another WARN() in the panic path.
+		 * Resetting this prevents additional WARN() from panicking the
+		 * system on this thread.  Other threads are blocked by the
+		 * panic_mutex in panic().
+		 */
+		panic_on_warn = 0;
+		panic("panic_on_warn set ...\n");
+	}
+
 	print_modules();
 	dump_stack();
 	print_oops_end_marker();
@@ -485,6 +497,7 @@ EXPORT_SYMBOL(__stack_chk_fail);
 
 core_param(panic, panic_timeout, int, 0644);
 core_param(pause_on_oops, pause_on_oops, int, 0644);
+core_param(panic_on_warn, panic_on_warn, int, 0644);
 
 static int __init setup_crash_kexec_post_notifiers(char *s)
 {
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 15f2511a1b7c..7c54ff79afd7 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1104,6 +1104,15 @@ static struct ctl_table kern_table[] = {
 		.proc_handler	= proc_dointvec,
 	},
 #endif
+	{
+		.procname	= "panic_on_warn",
+		.data		= &panic_on_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{ }
 };
 
diff --git a/kernel/sysctl_binary.c b/kernel/sysctl_binary.c
index 9a4f750a2963..7e7746a42a62 100644
--- a/kernel/sysctl_binary.c
+++ b/kernel/sysctl_binary.c
@@ -137,6 +137,7 @@ static const struct bin_table bin_kern_table[] = {
 	{ CTL_INT,	KERN_COMPAT_LOG,		"compat-log" },
 	{ CTL_INT,	KERN_MAX_LOCK_DEPTH,		"max_lock_depth" },
 	{ CTL_INT,	KERN_PANIC_ON_NMI,		"panic_on_unrecovered_nmi" },
+	{ CTL_INT,	KERN_PANIC_ON_WARN,		"panic_on_warn" },
 	{}
 };
 
-- 
cgit v1.2.3


From 1dc6244bd6d4f62239487fb0befc41c63e117290 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 10 Dec 2014 15:45:53 -0800
Subject: printk: remove used-once early_vprintk

Eliminate the unlikely possibility of message interleaving for
early_printk/early_vprintk use.

early_vprintk can be done via the %pV extension so remove this
unnecessary function and change early_printk to have the equivalent
vprintk code.

All uses of early_printk already end with a newline so also remove the
unnecessary newline from the early_printk function.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Chris Metcalf <cmetcalf@tilera.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/tile/kernel/early_printk.c | 19 +++++++++++++------
 include/linux/printk.h          |  1 -
 kernel/printk/printk.c          | 19 ++++++++-----------
 3 files changed, 21 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/arch/tile/kernel/early_printk.c b/arch/tile/kernel/early_printk.c
index b608e00e7f6d..aefb2c086726 100644
--- a/arch/tile/kernel/early_printk.c
+++ b/arch/tile/kernel/early_printk.c
@@ -43,13 +43,20 @@ static struct console early_hv_console = {
 
 void early_panic(const char *fmt, ...)
 {
-	va_list ap;
+	struct va_format vaf;
+	va_list args;
+
 	arch_local_irq_disable_all();
-	va_start(ap, fmt);
-	early_printk("Kernel panic - not syncing: ");
-	early_vprintk(fmt, ap);
-	early_printk("\n");
-	va_end(ap);
+
+	va_start(args, fmt);
+
+	vaf.fmt = fmt;
+	vaf.va = &args;
+
+	early_printk("Kernel panic - not syncing: %pV", &vaf);
+
+	va_end(args);
+
 	dump_stack();
 	hv_halt();
 }
diff --git a/include/linux/printk.h b/include/linux/printk.h
index d78125f73ac4..3dd489f2dedc 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -118,7 +118,6 @@ int no_printk(const char *fmt, ...)
 #ifdef CONFIG_EARLY_PRINTK
 extern asmlinkage __printf(1, 2)
 void early_printk(const char *fmt, ...);
-void early_vprintk(const char *fmt, va_list ap);
 #else
 static inline __printf(1, 2) __cold
 void early_printk(const char *s, ...) { }
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index ced2b84b1cb7..4815c98ae175 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -1881,23 +1881,20 @@ static size_t cont_print_text(char *text, size_t size) { return 0; }
 #ifdef CONFIG_EARLY_PRINTK
 struct console *early_console;
 
-void early_vprintk(const char *fmt, va_list ap)
-{
-	if (early_console) {
-		char buf[512];
-		int n = vscnprintf(buf, sizeof(buf), fmt, ap);
-
-		early_console->write(early_console, buf, n);
-	}
-}
-
 asmlinkage __visible void early_printk(const char *fmt, ...)
 {
 	va_list ap;
+	char buf[512];
+	int n;
+
+	if (!early_console)
+		return;
 
 	va_start(ap, fmt);
-	early_vprintk(fmt, ap);
+	n = vscnprintf(buf, sizeof(buf), fmt, ap);
 	va_end(ap);
+
+	early_console->write(early_console, buf, n);
 }
 #endif
 
-- 
cgit v1.2.3


From a39d4a857d4bb0a62d6655c0d69f7387fe1ad160 Mon Sep 17 00:00:00 2001
From: Joe Perches <joe@perches.com>
Date: Wed, 10 Dec 2014 15:50:15 -0800
Subject: printk: add and use LOGLEVEL_<level> defines for KERN_<LEVEL>
 equivalents

Use #defines instead of magic values.

Signed-off-by: Joe Perches <joe@perches.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Jason Baron <jbaron@akamai.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/usb/storage/debug.c |  2 +-
 include/linux/kern_levels.h | 13 +++++++++++++
 kernel/printk/printk.c      | 28 +++++++++++++---------------
 lib/dynamic_debug.c         |  4 ++--
 4 files changed, 29 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/usb/storage/debug.c b/drivers/usb/storage/debug.c
index 66a684a29938..2d81e1d8ee30 100644
--- a/drivers/usb/storage/debug.c
+++ b/drivers/usb/storage/debug.c
@@ -188,7 +188,7 @@ int usb_stor_dbg(const struct us_data *us, const char *fmt, ...)
 
 	va_start(args, fmt);
 
-	r = dev_vprintk_emit(7, &us->pusb_dev->dev, fmt, args);
+	r = dev_vprintk_emit(LOGLEVEL_DEBUG, &us->pusb_dev->dev, fmt, args);
 
 	va_end(args);
 
diff --git a/include/linux/kern_levels.h b/include/linux/kern_levels.h
index 866caaa9e2bb..c2ce155d83cc 100644
--- a/include/linux/kern_levels.h
+++ b/include/linux/kern_levels.h
@@ -22,4 +22,17 @@
  */
 #define KERN_CONT	""
 
+/* integer equivalents of KERN_<LEVEL> */
+#define LOGLEVEL_SCHED		-2	/* Deferred messages from sched code
+					 * are set to this special level */
+#define LOGLEVEL_DEFAULT	-1	/* default (or last) loglevel */
+#define LOGLEVEL_EMERG		0	/* system is unusable */
+#define LOGLEVEL_ALERT		1	/* action must be taken immediately */
+#define LOGLEVEL_CRIT		2	/* critical conditions */
+#define LOGLEVEL_ERR		3	/* error conditions */
+#define LOGLEVEL_WARNING	4	/* warning conditions */
+#define LOGLEVEL_NOTICE		5	/* normal but significant condition */
+#define LOGLEVEL_INFO		6	/* informational */
+#define LOGLEVEL_DEBUG		7	/* debug-level messages */
+
 #endif
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 4815c98ae175..1b7092dbb590 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -62,9 +62,6 @@ int console_printk[4] = {
 	CONSOLE_LOGLEVEL_DEFAULT,	/* default_console_loglevel */
 };
 
-/* Deferred messaged from sched code are marked by this special level */
-#define SCHED_MESSAGE_LOGLEVEL -2
-
 /*
  * Low level drivers may need that to know if they can schedule in
  * their unblank() callback or not. So let's export it.
@@ -1259,7 +1256,7 @@ static int syslog_print_all(char __user *buf, int size, bool clear)
 int do_syslog(int type, char __user *buf, int len, bool from_file)
 {
 	bool clear = false;
-	static int saved_console_loglevel = -1;
+	static int saved_console_loglevel = LOGLEVEL_DEFAULT;
 	int error;
 
 	error = check_syslog_permissions(type, from_file);
@@ -1316,15 +1313,15 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 		break;
 	/* Disable logging to console */
 	case SYSLOG_ACTION_CONSOLE_OFF:
-		if (saved_console_loglevel == -1)
+		if (saved_console_loglevel == LOGLEVEL_DEFAULT)
 			saved_console_loglevel = console_loglevel;
 		console_loglevel = minimum_console_loglevel;
 		break;
 	/* Enable logging to console */
 	case SYSLOG_ACTION_CONSOLE_ON:
-		if (saved_console_loglevel != -1) {
+		if (saved_console_loglevel != LOGLEVEL_DEFAULT) {
 			console_loglevel = saved_console_loglevel;
-			saved_console_loglevel = -1;
+			saved_console_loglevel = LOGLEVEL_DEFAULT;
 		}
 		break;
 	/* Set level of messages printed to console */
@@ -1336,7 +1333,7 @@ int do_syslog(int type, char __user *buf, int len, bool from_file)
 			len = minimum_console_loglevel;
 		console_loglevel = len;
 		/* Implicitly re-enable logging to console */
-		saved_console_loglevel = -1;
+		saved_console_loglevel = LOGLEVEL_DEFAULT;
 		error = 0;
 		break;
 	/* Number of chars in the log buffer */
@@ -1629,8 +1626,8 @@ asmlinkage int vprintk_emit(int facility, int level,
 	/* cpu currently holding logbuf_lock in this function */
 	static volatile unsigned int logbuf_cpu = UINT_MAX;
 
-	if (level == SCHED_MESSAGE_LOGLEVEL) {
-		level = -1;
+	if (level == LOGLEVEL_SCHED) {
+		level = LOGLEVEL_DEFAULT;
 		in_sched = true;
 	}
 
@@ -1695,8 +1692,9 @@ asmlinkage int vprintk_emit(int facility, int level,
 			const char *end_of_header = printk_skip_level(text);
 			switch (kern_level) {
 			case '0' ... '7':
-				if (level == -1)
+				if (level == LOGLEVEL_DEFAULT)
 					level = kern_level - '0';
+				/* fallthrough */
 			case 'd':	/* KERN_DEFAULT */
 				lflags |= LOG_PREFIX;
 			}
@@ -1710,7 +1708,7 @@ asmlinkage int vprintk_emit(int facility, int level,
 		}
 	}
 
-	if (level == -1)
+	if (level == LOGLEVEL_DEFAULT)
 		level = default_message_loglevel;
 
 	if (dict)
@@ -1788,7 +1786,7 @@ EXPORT_SYMBOL(vprintk_emit);
 
 asmlinkage int vprintk(const char *fmt, va_list args)
 {
-	return vprintk_emit(0, -1, NULL, 0, fmt, args);
+	return vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
 }
 EXPORT_SYMBOL(vprintk);
 
@@ -1842,7 +1840,7 @@ asmlinkage __visible int printk(const char *fmt, ...)
 	}
 #endif
 	va_start(args, fmt);
-	r = vprintk_emit(0, -1, NULL, 0, fmt, args);
+	r = vprintk_emit(0, LOGLEVEL_DEFAULT, NULL, 0, fmt, args);
 	va_end(args);
 
 	return r;
@@ -2631,7 +2629,7 @@ int printk_deferred(const char *fmt, ...)
 
 	preempt_disable();
 	va_start(args, fmt);
-	r = vprintk_emit(0, SCHED_MESSAGE_LOGLEVEL, NULL, 0, fmt, args);
+	r = vprintk_emit(0, LOGLEVEL_SCHED, NULL, 0, fmt, args);
 	va_end(args);
 
 	__this_cpu_or(printk_pending, PRINTK_PENDING_OUTPUT);
diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index dfba05521748..527799d44476 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -576,7 +576,7 @@ void __dynamic_dev_dbg(struct _ddebug *descriptor,
 	} else {
 		char buf[PREFIX_SIZE];
 
-		dev_printk_emit(7, dev, "%s%s %s: %pV",
+		dev_printk_emit(LOGLEVEL_DEBUG, dev, "%s%s %s: %pV",
 				dynamic_emit_prefix(descriptor, buf),
 				dev_driver_string(dev), dev_name(dev),
 				&vaf);
@@ -605,7 +605,7 @@ void __dynamic_netdev_dbg(struct _ddebug *descriptor,
 	if (dev && dev->dev.parent) {
 		char buf[PREFIX_SIZE];
 
-		dev_printk_emit(7, dev->dev.parent,
+		dev_printk_emit(LOGLEVEL_DEBUG, dev->dev.parent,
 				"%s%s %s %s%s: %pV",
 				dynamic_emit_prefix(descriptor, buf),
 				dev_driver_string(dev->dev.parent),
-- 
cgit v1.2.3


From e149ed2b805fefdccf7ccdfc19eca22fdd4514ac Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Nov 2014 10:57:28 -0400
Subject: take the targets of /proc/*/ns/* symlinks to separate fs

New pseudo-filesystem: nsfs.  Targets of /proc/*/ns/* live there now.
It's not mountable (not even registered, so it's not in /proc/filesystems,
etc.).  Files on it *are* bindable - we explicitly permit that in do_loopback().

This stuff lives in fs/nsfs.c now; proc_ns_fget() moved there as well.
get_proc_ns() is a macro now (it's simply returning ->i_private; would
have been an inline, if not for header ordering headache).
proc_ns_inode() is an ex-parrot.  The interface used in procfs is
ns_get_path(path, task, ops) and ns_get_name(buf, size, task, ops).

Dentries and inodes are never hashed; a non-counting reference to dentry
is stashed in ns_common (removed by ->d_prune()) and reused by ns_get_path()
if present.  See ns_get_path()/ns_prune_dentry/nsfs_evict() for details
of that mechanism.

As the result, proc_ns_follow_link() has stopped poking in nd->path.mnt;
it does nd_jump_link() on a consistent <vfsmount,dentry> pair it gets
from ns_get_path().

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/Makefile                |   2 +-
 fs/internal.h              |   5 ++
 fs/namespace.c             |   9 ++-
 fs/nsfs.c                  | 161 +++++++++++++++++++++++++++++++++++++++++++++
 fs/proc/inode.c            |   5 --
 fs/proc/namespaces.c       | 152 ++++--------------------------------------
 include/linux/ns_common.h  |   1 +
 include/linux/proc_ns.h    |  31 +++++----
 include/uapi/linux/magic.h |   1 +
 init/main.c                |   2 +
 10 files changed, 208 insertions(+), 161 deletions(-)
 create mode 100644 fs/nsfs.c

(limited to 'include/linux')

diff --git a/fs/Makefile b/fs/Makefile
index 34a1b9dea6dd..34393376eaa2 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -11,7 +11,7 @@ obj-y :=	open.o read_write.o file_table.o super.o \
 		attr.o bad_inode.o file.o filesystems.o namespace.o \
 		seq_file.o xattr.o libfs.o fs-writeback.o \
 		pnode.o splice.o sync.o utimes.o \
-		stack.o fs_struct.o statfs.o fs_pin.o
+		stack.o fs_struct.o statfs.o fs_pin.o nsfs.o
 
 ifeq ($(CONFIG_BLOCK),y)
 obj-y +=	buffer.o block_dev.o direct-io.o mpage.o
diff --git a/fs/internal.h b/fs/internal.h
index 757ba2abf21e..e9a61fe67575 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -147,3 +147,8 @@ extern const struct file_operations pipefifo_fops;
  */
 extern void sb_pin_kill(struct super_block *sb);
 extern void mnt_pin_kill(struct mount *m);
+
+/*
+ * fs/nsfs.c
+ */
+extern struct dentry_operations ns_dentry_operations;
diff --git a/fs/namespace.c b/fs/namespace.c
index 9dfb4cac0c41..30df6e7dd807 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1569,8 +1569,8 @@ SYSCALL_DEFINE1(oldumount, char __user *, name)
 static bool is_mnt_ns_file(struct dentry *dentry)
 {
 	/* Is this a proxy for a mount namespace? */
-	struct inode *inode = dentry->d_inode;
-	return proc_ns_inode(inode) && dentry->d_fsdata == &mntns_operations;
+	return dentry->d_op == &ns_dentry_operations &&
+	       dentry->d_fsdata == &mntns_operations;
 }
 
 struct mnt_namespace *to_mnt_ns(struct ns_common *ns)
@@ -2016,7 +2016,10 @@ static int do_loopback(struct path *path, const char *old_name,
 	if (IS_MNT_UNBINDABLE(old))
 		goto out2;
 
-	if (!check_mnt(parent) || !check_mnt(old))
+	if (!check_mnt(parent))
+		goto out2;
+
+	if (!check_mnt(old) && old_path.dentry->d_op != &ns_dentry_operations)
 		goto out2;
 
 	if (!recurse && has_locked_children(old, old_path.dentry))
diff --git a/fs/nsfs.c b/fs/nsfs.c
new file mode 100644
index 000000000000..af1b24fa899d
--- /dev/null
+++ b/fs/nsfs.c
@@ -0,0 +1,161 @@
+#include <linux/mount.h>
+#include <linux/file.h>
+#include <linux/fs.h>
+#include <linux/proc_ns.h>
+#include <linux/magic.h>
+#include <linux/ktime.h>
+
+static struct vfsmount *nsfs_mnt;
+
+static const struct file_operations ns_file_operations = {
+	.llseek		= no_llseek,
+};
+
+static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
+{
+	struct inode *inode = dentry->d_inode;
+	const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
+
+	return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
+		ns_ops->name, inode->i_ino);
+}
+
+static void ns_prune_dentry(struct dentry *dentry)
+{
+	struct inode *inode = dentry->d_inode;
+	if (inode) {
+		struct ns_common *ns = inode->i_private;
+		atomic_long_set(&ns->stashed, 0);
+	}
+}
+
+const struct dentry_operations ns_dentry_operations =
+{
+	.d_prune	= ns_prune_dentry,
+	.d_delete	= always_delete_dentry,
+	.d_dname	= ns_dname,
+};
+
+static void nsfs_evict(struct inode *inode)
+{
+	struct ns_common *ns = inode->i_private;
+	clear_inode(inode);
+	ns->ops->put(ns);
+}
+
+void *ns_get_path(struct path *path, struct task_struct *task,
+			const struct proc_ns_operations *ns_ops)
+{
+	struct vfsmount *mnt = mntget(nsfs_mnt);
+	struct qstr qname = { .name = "", };
+	struct dentry *dentry;
+	struct inode *inode;
+	struct ns_common *ns;
+	unsigned long d;
+
+again:
+	ns = ns_ops->get(task);
+	if (!ns) {
+		mntput(mnt);
+		return ERR_PTR(-ENOENT);
+	}
+	rcu_read_lock();
+	d = atomic_long_read(&ns->stashed);
+	if (!d)
+		goto slow;
+	dentry = (struct dentry *)d;
+	if (!lockref_get_not_dead(&dentry->d_lockref))
+		goto slow;
+	rcu_read_unlock();
+	ns_ops->put(ns);
+got_it:
+	path->mnt = mnt;
+	path->dentry = dentry;
+	return NULL;
+slow:
+	rcu_read_unlock();
+	inode = new_inode_pseudo(mnt->mnt_sb);
+	if (!inode) {
+		ns_ops->put(ns);
+		mntput(mnt);
+		return ERR_PTR(-ENOMEM);
+	}
+	inode->i_ino = ns->inum;
+	inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
+	inode->i_flags |= S_IMMUTABLE;
+	inode->i_mode = S_IFREG | S_IRUGO;
+	inode->i_fop = &ns_file_operations;
+	inode->i_private = ns;
+
+	dentry = d_alloc_pseudo(mnt->mnt_sb, &qname);
+	if (!dentry) {
+		iput(inode);
+		mntput(mnt);
+		return ERR_PTR(-ENOMEM);
+	}
+	d_instantiate(dentry, inode);
+	dentry->d_fsdata = (void *)ns_ops;
+	d = atomic_long_cmpxchg(&ns->stashed, 0, (unsigned long)dentry);
+	if (d) {
+		d_delete(dentry);	/* make sure ->d_prune() does nothing */
+		dput(dentry);
+		cpu_relax();
+		goto again;
+	}
+	goto got_it;
+}
+
+int ns_get_name(char *buf, size_t size, struct task_struct *task,
+			const struct proc_ns_operations *ns_ops)
+{
+	struct ns_common *ns;
+	int res = -ENOENT;
+	ns = ns_ops->get(task);
+	if (ns) {
+		res = snprintf(buf, size, "%s:[%u]", ns_ops->name, ns->inum);
+		ns_ops->put(ns);
+	}
+	return res;
+}
+
+struct file *proc_ns_fget(int fd)
+{
+	struct file *file;
+
+	file = fget(fd);
+	if (!file)
+		return ERR_PTR(-EBADF);
+
+	if (file->f_op != &ns_file_operations)
+		goto out_invalid;
+
+	return file;
+
+out_invalid:
+	fput(file);
+	return ERR_PTR(-EINVAL);
+}
+
+static const struct super_operations nsfs_ops = {
+	.statfs = simple_statfs,
+	.evict_inode = nsfs_evict,
+};
+static struct dentry *nsfs_mount(struct file_system_type *fs_type,
+			int flags, const char *dev_name, void *data)
+{
+	return mount_pseudo(fs_type, "nsfs:", &nsfs_ops,
+			&ns_dentry_operations, NSFS_MAGIC);
+}
+static struct file_system_type nsfs = {
+	.name = "nsfs",
+	.mount = nsfs_mount,
+	.kill_sb = kill_anon_super,
+};
+
+void __init nsfs_init(void)
+{
+	nsfs_mnt = kern_mount(&nsfs);
+	if (IS_ERR(nsfs_mnt))
+		panic("can't set nsfs up\n");
+	nsfs_mnt->mnt_sb->s_flags &= ~MS_NOUSER;
+}
diff --git a/fs/proc/inode.c b/fs/proc/inode.c
index a212996e0987..57a9be9a6668 100644
--- a/fs/proc/inode.c
+++ b/fs/proc/inode.c
@@ -32,7 +32,6 @@ static void proc_evict_inode(struct inode *inode)
 {
 	struct proc_dir_entry *de;
 	struct ctl_table_header *head;
-	struct ns_common *ns;
 
 	truncate_inode_pages_final(&inode->i_data);
 	clear_inode(inode);
@@ -49,10 +48,6 @@ static void proc_evict_inode(struct inode *inode)
 		RCU_INIT_POINTER(PROC_I(inode)->sysctl, NULL);
 		sysctl_head_put(head);
 	}
-	/* Release any associated namespace */
-	ns = PROC_I(inode)->ns.ns;
-	if (ns && ns->ops)
-		ns->ops->put(ns);
 }
 
 static struct kmem_cache * proc_inode_cachep;
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 18fc1cf899de..aaaac77abad0 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -1,10 +1,6 @@
 #include <linux/proc_fs.h>
 #include <linux/nsproxy.h>
-#include <linux/sched.h>
 #include <linux/ptrace.h>
-#include <linux/fs_struct.h>
-#include <linux/mount.h>
-#include <linux/path.h>
 #include <linux/namei.h>
 #include <linux/file.h>
 #include <linux/utsname.h>
@@ -34,139 +30,45 @@ static const struct proc_ns_operations *ns_entries[] = {
 	&mntns_operations,
 };
 
-static const struct file_operations ns_file_operations = {
-	.llseek		= no_llseek,
-};
-
-static const struct inode_operations ns_inode_operations = {
-	.setattr	= proc_setattr,
-};
-
-static char *ns_dname(struct dentry *dentry, char *buffer, int buflen)
-{
-	struct inode *inode = dentry->d_inode;
-	const struct proc_ns_operations *ns_ops = dentry->d_fsdata;
-
-	return dynamic_dname(dentry, buffer, buflen, "%s:[%lu]",
-		ns_ops->name, inode->i_ino);
-}
-
-const struct dentry_operations ns_dentry_operations =
-{
-	.d_delete	= always_delete_dentry,
-	.d_dname	= ns_dname,
-};
-
-static struct dentry *proc_ns_get_dentry(struct super_block *sb,
-	struct task_struct *task, const struct proc_ns_operations *ns_ops)
-{
-	struct dentry *dentry, *result;
-	struct inode *inode;
-	struct proc_inode *ei;
-	struct qstr qname = { .name = "", };
-	struct ns_common *ns;
-
-	ns = ns_ops->get(task);
-	if (!ns)
-		return ERR_PTR(-ENOENT);
-
-	dentry = d_alloc_pseudo(sb, &qname);
-	if (!dentry) {
-		ns_ops->put(ns);
-		return ERR_PTR(-ENOMEM);
-	}
-	dentry->d_fsdata = (void *)ns_ops;
-
-	inode = iget_locked(sb, ns->inum);
-	if (!inode) {
-		dput(dentry);
-		ns_ops->put(ns);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	ei = PROC_I(inode);
-	if (inode->i_state & I_NEW) {
-		inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
-		inode->i_op = &ns_inode_operations;
-		inode->i_mode = S_IFREG | S_IRUGO;
-		inode->i_fop = &ns_file_operations;
-		ei->ns.ns_ops = ns_ops;
-		ei->ns.ns = ns;
-		unlock_new_inode(inode);
-	} else {
-		ns_ops->put(ns);
-	}
-
-	d_set_d_op(dentry, &ns_dentry_operations);
-	result = d_instantiate_unique(dentry, inode);
-	if (result) {
-		dput(dentry);
-		dentry = result;
-	}
-
-	return dentry;
-}
-
 static void *proc_ns_follow_link(struct dentry *dentry, struct nameidata *nd)
 {
 	struct inode *inode = dentry->d_inode;
-	struct super_block *sb = inode->i_sb;
-	struct proc_inode *ei = PROC_I(inode);
+	const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
 	struct task_struct *task;
 	struct path ns_path;
 	void *error = ERR_PTR(-EACCES);
 
 	task = get_proc_task(inode);
 	if (!task)
-		goto out;
+		return error;
 
-	if (!ptrace_may_access(task, PTRACE_MODE_READ))
-		goto out_put_task;
-
-	ns_path.dentry = proc_ns_get_dentry(sb, task, ei->ns.ns_ops);
-	if (IS_ERR(ns_path.dentry)) {
-		error = ERR_CAST(ns_path.dentry);
-		goto out_put_task;
+	if (ptrace_may_access(task, PTRACE_MODE_READ)) {
+		error = ns_get_path(&ns_path, task, ns_ops);
+		if (!error)
+			nd_jump_link(nd, &ns_path);
 	}
-
-	ns_path.mnt = mntget(nd->path.mnt);
-	nd_jump_link(nd, &ns_path);
-	error = NULL;
-
-out_put_task:
 	put_task_struct(task);
-out:
 	return error;
 }
 
 static int proc_ns_readlink(struct dentry *dentry, char __user *buffer, int buflen)
 {
 	struct inode *inode = dentry->d_inode;
-	struct proc_inode *ei = PROC_I(inode);
-	const struct proc_ns_operations *ns_ops = ei->ns.ns_ops;
+	const struct proc_ns_operations *ns_ops = PROC_I(inode)->ns.ns_ops;
 	struct task_struct *task;
-	struct ns_common *ns;
 	char name[50];
 	int res = -EACCES;
 
 	task = get_proc_task(inode);
 	if (!task)
-		goto out;
-
-	if (!ptrace_may_access(task, PTRACE_MODE_READ))
-		goto out_put_task;
+		return res;
 
-	res = -ENOENT;
-	ns = ns_ops->get(task);
-	if (!ns)
-		goto out_put_task;
-
-	snprintf(name, sizeof(name), "%s:[%u]", ns_ops->name, ns->inum);
-	res = readlink_copy(buffer, buflen, name);
-	ns_ops->put(ns);
-out_put_task:
+	if (ptrace_may_access(task, PTRACE_MODE_READ)) {
+		res = ns_get_name(name, sizeof(name), task, ns_ops);
+		if (res >= 0)
+			res = readlink_copy(buffer, buflen, name);
+	}
 	put_task_struct(task);
-out:
 	return res;
 }
 
@@ -268,31 +170,3 @@ const struct inode_operations proc_ns_dir_inode_operations = {
 	.getattr	= pid_getattr,
 	.setattr	= proc_setattr,
 };
-
-struct file *proc_ns_fget(int fd)
-{
-	struct file *file;
-
-	file = fget(fd);
-	if (!file)
-		return ERR_PTR(-EBADF);
-
-	if (file->f_op != &ns_file_operations)
-		goto out_invalid;
-
-	return file;
-
-out_invalid:
-	fput(file);
-	return ERR_PTR(-EINVAL);
-}
-
-struct ns_common *get_proc_ns(struct inode *inode)
-{
-	return PROC_I(inode)->ns.ns;
-}
-
-bool proc_ns_inode(struct inode *inode)
-{
-	return inode->i_fop == &ns_file_operations;
-}
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index ce23cf4bbe69..85a5c8c16be9 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -4,6 +4,7 @@
 struct proc_ns_operations;
 
 struct ns_common {
+	atomic_long_t stashed;
 	const struct proc_ns_operations *ops;
 	unsigned int inum;
 };
diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h
index 2837ff41cfe3..42dfc615dbf8 100644
--- a/include/linux/proc_ns.h
+++ b/include/linux/proc_ns.h
@@ -4,9 +4,11 @@
 #ifndef _LINUX_PROC_NS_H
 #define _LINUX_PROC_NS_H
 
+#include <linux/ns_common.h>
+
 struct pid_namespace;
 struct nsproxy;
-struct ns_common;
+struct path;
 
 struct proc_ns_operations {
 	const char *name;
@@ -38,35 +40,38 @@ enum {
 
 extern int pid_ns_prepare_proc(struct pid_namespace *ns);
 extern void pid_ns_release_proc(struct pid_namespace *ns);
-extern struct file *proc_ns_fget(int fd);
-extern struct ns_common *get_proc_ns(struct inode *);
 extern int proc_alloc_inum(unsigned int *pino);
 extern void proc_free_inum(unsigned int inum);
-extern bool proc_ns_inode(struct inode *inode);
 
 #else /* CONFIG_PROC_FS */
 
 static inline int pid_ns_prepare_proc(struct pid_namespace *ns) { return 0; }
 static inline void pid_ns_release_proc(struct pid_namespace *ns) {}
 
-static inline struct file *proc_ns_fget(int fd)
-{
-	return ERR_PTR(-EINVAL);
-}
-
-static inline struct ns_common *get_proc_ns(struct inode *inode) { return NULL; }
-
 static inline int proc_alloc_inum(unsigned int *inum)
 {
 	*inum = 1;
 	return 0;
 }
 static inline void proc_free_inum(unsigned int inum) {}
-static inline bool proc_ns_inode(struct inode *inode) { return false; }
 
 #endif /* CONFIG_PROC_FS */
 
-#define ns_alloc_inum(ns) proc_alloc_inum(&(ns)->inum)
+static inline int ns_alloc_inum(struct ns_common *ns)
+{
+	atomic_long_set(&ns->stashed, 0);
+	return proc_alloc_inum(&ns->inum);
+}
+
 #define ns_free_inum(ns) proc_free_inum((ns)->inum)
 
+extern struct file *proc_ns_fget(int fd);
+#define get_proc_ns(inode) ((struct ns_common *)(inode)->i_private)
+extern void *ns_get_path(struct path *path, struct task_struct *task,
+			const struct proc_ns_operations *ns_ops);
+
+extern int ns_get_name(char *buf, size_t size, struct task_struct *task,
+			const struct proc_ns_operations *ns_ops);
+extern void nsfs_init(void);
+
 #endif /* _LINUX_PROC_NS_H */
diff --git a/include/uapi/linux/magic.h b/include/uapi/linux/magic.h
index 77c60311a6c6..7d664ea85ebd 100644
--- a/include/uapi/linux/magic.h
+++ b/include/uapi/linux/magic.h
@@ -72,5 +72,6 @@
 #define MTD_INODE_FS_MAGIC      0x11307854
 #define ANON_INODE_FS_MAGIC	0x09041934
 #define BTRFS_TEST_MAGIC	0x73727279
+#define NSFS_MAGIC		0x6e736673
 
 #endif /* __LINUX_MAGIC_H__ */
diff --git a/init/main.c b/init/main.c
index 800a0daede7e..bcc75057ea87 100644
--- a/init/main.c
+++ b/init/main.c
@@ -78,6 +78,7 @@
 #include <linux/context_tracking.h>
 #include <linux/random.h>
 #include <linux/list.h>
+#include <linux/proc_ns.h>
 
 #include <asm/io.h>
 #include <asm/bugs.h>
@@ -660,6 +661,7 @@ asmlinkage __visible void __init start_kernel(void)
 	/* rootfs populating might need page-writeback */
 	page_writeback_init();
 	proc_root_init();
+	nsfs_init();
 	cgroup_init();
 	cpuset_init();
 	taskstats_init_early();
-- 
cgit v1.2.3


From 1f55a6ec940fb45e3edaa52b6e9fc40cf8e18dcb Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 1 Nov 2014 19:30:41 -0400
Subject: make nameidata completely opaque outside of fs/namei.c

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/namei.c            | 24 ++++++++++++++++++++++++
 include/linux/namei.h | 25 +++----------------------
 2 files changed, 27 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/fs/namei.c b/fs/namei.c
index db5fe86319e6..398a73b522cb 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -487,6 +487,18 @@ void path_put(const struct path *path)
 }
 EXPORT_SYMBOL(path_put);
 
+struct nameidata {
+	struct path	path;
+	struct qstr	last;
+	struct path	root;
+	struct inode	*inode; /* path.dentry.d_inode */
+	unsigned int	flags;
+	unsigned	seq, m_seq;
+	int		last_type;
+	unsigned	depth;
+	char *saved_names[MAX_NESTED_LINKS + 1];
+};
+
 /*
  * Path walking has 2 modes, rcu-walk and ref-walk (see
  * Documentation/filesystems/path-lookup.txt).  In situations when we can't
@@ -695,6 +707,18 @@ void nd_jump_link(struct nameidata *nd, struct path *path)
 	nd->flags |= LOOKUP_JUMPED;
 }
 
+void nd_set_link(struct nameidata *nd, char *path)
+{
+	nd->saved_names[nd->depth] = path;
+}
+EXPORT_SYMBOL(nd_set_link);
+
+char *nd_get_link(struct nameidata *nd)
+{
+	return nd->saved_names[nd->depth];
+}
+EXPORT_SYMBOL(nd_get_link);
+
 static inline void put_link(struct nameidata *nd, struct path *link, void *cookie)
 {
 	struct inode *inode = link->dentry->d_inode;
diff --git a/include/linux/namei.h b/include/linux/namei.h
index 492de72560fa..c8990779f0c3 100644
--- a/include/linux/namei.h
+++ b/include/linux/namei.h
@@ -7,21 +7,10 @@
 #include <linux/path.h>
 
 struct vfsmount;
+struct nameidata;
 
 enum { MAX_NESTED_LINKS = 8 };
 
-struct nameidata {
-	struct path	path;
-	struct qstr	last;
-	struct path	root;
-	struct inode	*inode; /* path.dentry.d_inode */
-	unsigned int	flags;
-	unsigned	seq, m_seq;
-	int		last_type;
-	unsigned	depth;
-	char *saved_names[MAX_NESTED_LINKS + 1];
-};
-
 /*
  * Type of the last component on LOOKUP_PARENT
  */
@@ -82,16 +71,8 @@ extern struct dentry *lock_rename(struct dentry *, struct dentry *);
 extern void unlock_rename(struct dentry *, struct dentry *);
 
 extern void nd_jump_link(struct nameidata *nd, struct path *path);
-
-static inline void nd_set_link(struct nameidata *nd, char *path)
-{
-	nd->saved_names[nd->depth] = path;
-}
-
-static inline char *nd_get_link(struct nameidata *nd)
-{
-	return nd->saved_names[nd->depth];
-}
+extern void nd_set_link(struct nameidata *nd, char *path);
+extern char *nd_get_link(struct nameidata *nd);
 
 static inline void nd_terminate_link(void *name, size_t len, size_t maxlen)
 {
-- 
cgit v1.2.3


From bd9b51e79cb0b8bc00a7e0076a4a8963ca4a797c Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Tue, 18 Nov 2014 23:38:21 -0500
Subject: make default ->i_fop have ->open() fail with ENXIO

As it is, default ->i_fop has NULL ->open() (along with all other methods).
The only case where it matters is reopening (via procfs symlink) a file that
didn't get its ->f_op from ->i_fop - anything else will have ->i_fop assigned
to something sane (default would fail on read/write/ioctl/etc.).

	Unfortunately, such case exists - alloc_file() users, especially
anon_get_file() ones.  There we have tons of opened files of very different
kinds sharing the same inode.  As the result, attempt to reopen those via
procfs succeeds and you get a descriptor you can't do anything with.

	Moreover, in case of sockets we set ->i_fop that will only be used
on such reopen attempts - and put a failing ->open() into it to make sure
those do not succeed.

	It would be simpler to put such ->open() into default ->i_fop and leave
it unchanged both for anon inode (as we do anyway) and for socket ones.  Result:
	* everything going through do_dentry_open() works as it used to
	* sock_no_open() kludge is gone
	* attempts to reopen anon-inode files fail as they really ought to
	* ditto for aio_private_file()
	* ditto for perfmon - this one actually tried to imitate sock_no_open()
trick, but failed to set ->i_fop, so in the current tree reopens succeed and
yield completely useless descriptor.  Intent clearly had been to fail with
-ENXIO on such reopens; now it actually does.
	* everything else that used alloc_file() keeps working - it has ->i_fop
set for its inodes anyway

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/ia64/kernel/perfmon.c | 10 ----------
 fs/inode.c                 | 11 ++++++++---
 include/linux/fs.h         |  1 -
 net/Makefile               |  2 --
 net/nonet.c                | 26 --------------------------
 net/socket.c               | 19 -------------------
 6 files changed, 8 insertions(+), 61 deletions(-)
 delete mode 100644 net/nonet.c

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c
index 5845ffea67c3..ac4528f5acd1 100644
--- a/arch/ia64/kernel/perfmon.c
+++ b/arch/ia64/kernel/perfmon.c
@@ -2145,22 +2145,12 @@ doit:
 	return 0;
 }
 
-static int
-pfm_no_open(struct inode *irrelevant, struct file *dontcare)
-{
-	DPRINT(("pfm_no_open called\n"));
-	return -ENXIO;
-}
-
-
-
 static const struct file_operations pfm_file_ops = {
 	.llseek		= no_llseek,
 	.read		= pfm_read,
 	.write		= pfm_write,
 	.poll		= pfm_poll,
 	.unlocked_ioctl = pfm_ioctl,
-	.open		= pfm_no_open,	/* special open code to disallow open via /proc */
 	.fasync		= pfm_fasync,
 	.release	= pfm_close,
 	.flush		= pfm_flush
diff --git a/fs/inode.c b/fs/inode.c
index 26753ba7b6d6..5b83ef7fc8d5 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -114,6 +114,11 @@ int proc_nr_inodes(struct ctl_table *table, int write,
 }
 #endif
 
+static int no_open(struct inode *inode, struct file *file)
+{
+	return -ENXIO;
+}
+
 /**
  * inode_init_always - perform inode structure intialisation
  * @sb: superblock inode belongs to
@@ -125,7 +130,7 @@ int proc_nr_inodes(struct ctl_table *table, int write,
 int inode_init_always(struct super_block *sb, struct inode *inode)
 {
 	static const struct inode_operations empty_iops;
-	static const struct file_operations empty_fops;
+	static const struct file_operations no_open_fops = {.open = no_open};
 	struct address_space *const mapping = &inode->i_data;
 
 	inode->i_sb = sb;
@@ -133,7 +138,7 @@ int inode_init_always(struct super_block *sb, struct inode *inode)
 	inode->i_flags = 0;
 	atomic_set(&inode->i_count, 1);
 	inode->i_op = &empty_iops;
-	inode->i_fop = &empty_fops;
+	inode->i_fop = &no_open_fops;
 	inode->__i_nlink = 1;
 	inode->i_opflags = 0;
 	i_uid_write(inode, 0);
@@ -1801,7 +1806,7 @@ void init_special_inode(struct inode *inode, umode_t mode, dev_t rdev)
 	} else if (S_ISFIFO(mode))
 		inode->i_fop = &pipefifo_fops;
 	else if (S_ISSOCK(mode))
-		inode->i_fop = &bad_sock_fops;
+		;	/* leave it no_open_fops */
 	else
 		printk(KERN_DEBUG "init_special_inode: bogus i_mode (%o) for"
 				  " inode %s:%lu\n", mode, inode->i_sb->s_id,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 2beddc284bc2..b37beaf7a3a5 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2151,7 +2151,6 @@ static inline int sb_is_blkdev_sb(struct super_block *sb)
 extern int sync_filesystem(struct super_block *);
 extern const struct file_operations def_blk_fops;
 extern const struct file_operations def_chr_fops;
-extern const struct file_operations bad_sock_fops;
 #ifdef CONFIG_BLOCK
 extern int ioctl_by_bdev(struct block_device *, unsigned, unsigned long);
 extern int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long);
diff --git a/net/Makefile b/net/Makefile
index 7ed1970074b0..1f6c3e4b36d5 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -5,8 +5,6 @@
 # Rewritten to use lists instead of if-statements.
 #
 
-obj-y	:= nonet.o
-
 obj-$(CONFIG_NET)		:= socket.o core/
 
 tmp-$(CONFIG_COMPAT) 		:= compat.o
diff --git a/net/nonet.c b/net/nonet.c
deleted file mode 100644
index b1a73fda9c12..000000000000
--- a/net/nonet.c
+++ /dev/null
@@ -1,26 +0,0 @@
-/*
- * net/nonet.c
- *
- * Dummy functions to allow us to configure network support entirely
- * out of the kernel.
- *
- * Distributed under the terms of the GNU GPL version 2.
- * Copyright (c) Matthew Wilcox 2003
- */
-
-#include <linux/module.h>
-#include <linux/errno.h>
-#include <linux/fs.h>
-#include <linux/init.h>
-#include <linux/kernel.h>
-
-static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
-{
-	return -ENXIO;
-}
-
-const struct file_operations bad_sock_fops = {
-	.owner = THIS_MODULE,
-	.open = sock_no_open,
-	.llseek = noop_llseek,
-};
diff --git a/net/socket.c b/net/socket.c
index fe20c319a0bb..850f6c383342 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -113,7 +113,6 @@ unsigned int sysctl_net_busy_read __read_mostly;
 unsigned int sysctl_net_busy_poll __read_mostly;
 #endif
 
-static int sock_no_open(struct inode *irrelevant, struct file *dontcare);
 static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov,
 			 unsigned long nr_segs, loff_t pos);
 static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov,
@@ -151,7 +150,6 @@ static const struct file_operations socket_file_ops = {
 	.compat_ioctl = compat_sock_ioctl,
 #endif
 	.mmap =		sock_mmap,
-	.open =		sock_no_open,	/* special open code to disallow open via /proc */
 	.release =	sock_close,
 	.fasync =	sock_fasync,
 	.sendpage =	sock_sendpage,
@@ -559,23 +557,6 @@ static struct socket *sock_alloc(void)
 	return sock;
 }
 
-/*
- *	In theory you can't get an open on this inode, but /proc provides
- *	a back door. Remember to keep it shut otherwise you'll let the
- *	creepy crawlies in.
- */
-
-static int sock_no_open(struct inode *irrelevant, struct file *dontcare)
-{
-	return -ENXIO;
-}
-
-const struct file_operations bad_sock_fops = {
-	.owner = THIS_MODULE,
-	.open = sock_no_open,
-	.llseek = noop_llseek,
-};
-
 /**
  *	sock_release	-	close a socket
  *	@sock: socket to close
-- 
cgit v1.2.3


From 81e1dadfb5b2d47aa513ad60b1c9cf0ea17b6514 Mon Sep 17 00:00:00 2001
From: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Date: Thu, 4 Dec 2014 11:54:43 +0530
Subject: arm: omap3: twl: remove usb phy init data

commit dbc98635e0d4 ("phy: remove the old lookup method") removes
struct phy_consumer but twl-common.c still uses the "phy_consumer"
structure resulting in the following compilation warning.

arch/arm/mach-omap2/twl-common.c:94:21: error: array type has
incomplete element type
 struct phy_consumer consumers[] = {

Removed using phy_consumer since twl4030 uses the new lookup
method.

Signed-off-by: Heikki Krogerus <heikki.krogerus@linux.intel.com>
Signed-off-by: Kishon Vijay Abraham I <kishon@ti.com>
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Greg Kroah-Hartman <greg@kroah.com>
---
 arch/arm/mach-omap2/twl-common.c | 12 +-----------
 include/linux/i2c/twl.h          |  2 --
 2 files changed, 1 insertion(+), 13 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/twl-common.c b/arch/arm/mach-omap2/twl-common.c
index b0d54dae1bcb..4457e731f7a4 100644
--- a/arch/arm/mach-omap2/twl-common.c
+++ b/arch/arm/mach-omap2/twl-common.c
@@ -91,18 +91,8 @@ void __init omap_pmic_late_init(void)
 }
 
 #if defined(CONFIG_ARCH_OMAP3)
-struct phy_consumer consumers[] = {
-	PHY_CONSUMER("musb-hdrc.0", "usb"),
-};
-
-struct phy_init_data init_data = {
-	.consumers = consumers,
-	.num_consumers = ARRAY_SIZE(consumers),
-};
-
 static struct twl4030_usb_data omap3_usb_pdata = {
-	.usb_mode	= T2_USB_MODE_ULPI,
-	.init_data	= &init_data,
+	.usb_mode = T2_USB_MODE_ULPI,
 };
 
 static int omap3_batt_table[] = {
diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h
index 8cfb50f38529..0bc03f100d04 100644
--- a/include/linux/i2c/twl.h
+++ b/include/linux/i2c/twl.h
@@ -26,7 +26,6 @@
 #define __TWL_H_
 
 #include <linux/types.h>
-#include <linux/phy/phy.h>
 #include <linux/input/matrix_keypad.h>
 
 /*
@@ -634,7 +633,6 @@ enum twl4030_usb_mode {
 struct twl4030_usb_data {
 	enum twl4030_usb_mode	usb_mode;
 	unsigned long		features;
-	struct phy_init_data	*init_data;
 
 	int		(*phy_init)(struct device *dev);
 	int		(*phy_exit)(struct device *dev);
-- 
cgit v1.2.3


From f95b414edb18de59940dcebbefb49cf25c6d505c Mon Sep 17 00:00:00 2001
From: Gu Zheng <guz.fnst@cn.fujitsu.com>
Date: Thu, 11 Dec 2014 11:22:04 +0800
Subject: net: introduce helper macro for_each_cmsghdr

Introduce helper macro for_each_cmsghdr as a wrapper of the enumerating
cmsghdr from msghdr, just cleanup.

Signed-off-by: Gu Zheng <guz.fnst@cn.fujitsu.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 crypto/af_alg.c        | 2 +-
 include/linux/socket.h | 4 ++++
 net/core/scm.c         | 3 +--
 net/dccp/proto.c       | 5 ++---
 net/ipv4/ip_sockglue.c | 2 +-
 net/ipv6/datagram.c    | 2 +-
 net/iucv/af_iucv.c     | 4 +---
 net/rds/send.c         | 4 ++--
 net/rxrpc/ar-output.c  | 2 +-
 net/sctp/socket.c      | 3 +--
 10 files changed, 15 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/crypto/af_alg.c b/crypto/af_alg.c
index 6a3ad8011585..bc21f520d489 100644
--- a/crypto/af_alg.c
+++ b/crypto/af_alg.c
@@ -399,7 +399,7 @@ int af_alg_cmsg_send(struct msghdr *msg, struct af_alg_control *con)
 {
 	struct cmsghdr *cmsg;
 
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
 			return -EINVAL;
 		if (cmsg->cmsg_level != SOL_ALG)
diff --git a/include/linux/socket.h b/include/linux/socket.h
index 048d6d6eed6d..6e49a14365dc 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -103,6 +103,10 @@ struct cmsghdr {
 			     (cmsg)->cmsg_len <= (unsigned long) \
 			     ((mhdr)->msg_controllen - \
 			      ((char *)(cmsg) - (char *)(mhdr)->msg_control)))
+#define for_each_cmsghdr(cmsg, msg) \
+	for (cmsg = CMSG_FIRSTHDR(msg); \
+	     cmsg; \
+	     cmsg = CMSG_NXTHDR(msg, cmsg))
 
 /*
  *	Get the next cmsg header
diff --git a/net/core/scm.c b/net/core/scm.c
index b442e7e25e60..3b6899b7d810 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -129,8 +129,7 @@ int __scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *p)
 	struct cmsghdr *cmsg;
 	int err;
 
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg))
-	{
+	for_each_cmsghdr(cmsg, msg) {
 		err = -EINVAL;
 
 		/* Verify that cmsg_len is at least sizeof(struct cmsghdr) */
diff --git a/net/dccp/proto.c b/net/dccp/proto.c
index 19f038739087..e171b780b499 100644
--- a/net/dccp/proto.c
+++ b/net/dccp/proto.c
@@ -703,7 +703,7 @@ EXPORT_SYMBOL_GPL(compat_dccp_getsockopt);
 
 static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
 {
-	struct cmsghdr *cmsg = CMSG_FIRSTHDR(msg);
+	struct cmsghdr *cmsg;
 
 	/*
 	 * Assign an (opaque) qpolicy priority value to skb->priority.
@@ -717,8 +717,7 @@ static int dccp_msghdr_parse(struct msghdr *msg, struct sk_buff *skb)
 	 */
 	skb->priority = 0;
 
-	for (; cmsg != NULL; cmsg = CMSG_NXTHDR(msg, cmsg)) {
-
+	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
 			return -EINVAL;
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 640f26c6a9fe..8a89c738b7a3 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -192,7 +192,7 @@ int ip_cmsg_send(struct net *net, struct msghdr *msg, struct ipcm_cookie *ipc,
 	int err, val;
 	struct cmsghdr *cmsg;
 
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
 			return -EINVAL;
 #if IS_ENABLED(CONFIG_IPV6)
diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c
index 2464a00e36ab..100c589a2a6c 100644
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -657,7 +657,7 @@ int ip6_datagram_send_ctl(struct net *net, struct sock *sk,
 	int len;
 	int err = 0;
 
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+	for_each_cmsghdr(cmsg, msg) {
 		int addr_type;
 
 		if (!CMSG_OK(msg, cmsg)) {
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 1cd3f8107239..2e9953b2db84 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -1070,9 +1070,7 @@ static int iucv_sock_sendmsg(struct kiocb *iocb, struct socket *sock,
 	txmsg.class = 0;
 
 	/* iterate over control messages */
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg;
-		cmsg = CMSG_NXTHDR(msg, cmsg)) {
-
+	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg)) {
 			err = -EINVAL;
 			goto out;
diff --git a/net/rds/send.c b/net/rds/send.c
index 40a5629a0a13..42f65d4305c8 100644
--- a/net/rds/send.c
+++ b/net/rds/send.c
@@ -826,7 +826,7 @@ static int rds_rm_size(struct msghdr *msg, int data_len)
 	int cmsg_groups = 0;
 	int retval;
 
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
 			return -EINVAL;
 
@@ -878,7 +878,7 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm,
 	struct cmsghdr *cmsg;
 	int ret = 0;
 
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
 			return -EINVAL;
 
diff --git a/net/rxrpc/ar-output.c b/net/rxrpc/ar-output.c
index 86e0f10aa2c0..e1a9373e5979 100644
--- a/net/rxrpc/ar-output.c
+++ b/net/rxrpc/ar-output.c
@@ -45,7 +45,7 @@ static int rxrpc_sendmsg_cmsg(struct rxrpc_sock *rx, struct msghdr *msg,
 	if (msg->msg_controllen == 0)
 		return -EINVAL;
 
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg; cmsg = CMSG_NXTHDR(msg, cmsg)) {
+	for_each_cmsghdr(cmsg, msg) {
 		if (!CMSG_OK(msg, cmsg))
 			return -EINVAL;
 
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index c92f96cda699..2625eccb77d5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -6592,8 +6592,7 @@ static int sctp_msghdr_parse(const struct msghdr *msg, sctp_cmsgs_t *cmsgs)
 	struct cmsghdr *cmsg;
 	struct msghdr *my_msg = (struct msghdr *)msg;
 
-	for (cmsg = CMSG_FIRSTHDR(msg); cmsg != NULL;
-	     cmsg = CMSG_NXTHDR(my_msg, cmsg)) {
+	for_each_cmsghdr(cmsg, my_msg) {
 		if (!CMSG_OK(my_msg, cmsg))
 			return -EINVAL;
 
-- 
cgit v1.2.3


From 3d2667826cbb5e2eb8d8c7ee92f74bcb00b36a2f Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Thu, 11 Dec 2014 15:54:07 +0200
Subject: virtio_config: fix virtio_cread_bytes

virtio_cread_bytes is implemented incorrectly in case length happens to
be 2,4 or 8 bytes: transports and devices will assume it's an integer
value that has to be converted to LE format.

Let's just do multiple 1-byte reads: this also makes life easier
for transports who only need to implement 1,2,4 and 8 byte reads.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index 7979f850e7ac..a61cd37f088c 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -305,7 +305,10 @@ static inline void virtio_cread_bytes(struct virtio_device *vdev,
 				      unsigned int offset,
 				      void *buf, size_t len)
 {
-	vdev->config->get(vdev, offset, buf, len);
+	int i;
+
+	for (i = 0; i < len; i++)
+		vdev->config->get(vdev, offset + i, buf + i, 1);
 }
 
 static inline void virtio_cwrite8(struct virtio_device *vdev,
-- 
cgit v1.2.3


From 3dca0f42c7baaa4e01699629da13d6556f001ebe Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 11 Dec 2014 10:57:53 +0200
Subject: net/mlx4_core: Use tasklet for user-space CQ completion events

Previously, we've fired all our completion callbacks straight from our ISR.

Some of those callbacks were lightweight (for example, mlx4_en's and
IPoIB napi callbacks), but some of them did more work (for example,
the user-space RDMA stack uverbs' completion handler). Besides that,
doing more than the minimal work in ISR is generally considered wrong,
it could even lead to a hard lockup of the system. Since when a lot
of completion events are generated by the hardware, the loop over those
events could be so long, that we'll get into a hard lockup by the system
watchdog.

In order to avoid that, add a new way of invoking completion events
callbacks. In the interrupt itself, we add the CQs which receive completion
event to a per-EQ list and schedule a tasklet. In the tasklet context
we loop over all the CQs in the list and invoke the user callback.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/cq.c           |  5 +++-
 drivers/net/ethernet/mellanox/mlx4/cq.c   | 50 +++++++++++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx4/eq.c   | 16 +++++++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h | 12 ++++++++
 include/linux/mlx4/device.h               |  5 ++++
 5 files changed, 86 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/cq.c b/drivers/infiniband/hw/mlx4/cq.c
index 1066eec854a9..a3b70f6c4035 100644
--- a/drivers/infiniband/hw/mlx4/cq.c
+++ b/drivers/infiniband/hw/mlx4/cq.c
@@ -233,7 +233,10 @@ struct ib_cq *mlx4_ib_create_cq(struct ib_device *ibdev, int entries, int vector
 	if (err)
 		goto err_dbmap;
 
-	cq->mcq.comp  = mlx4_ib_cq_comp;
+	if (context)
+		cq->mcq.tasklet_ctx.comp = mlx4_ib_cq_comp;
+	else
+		cq->mcq.comp = mlx4_ib_cq_comp;
 	cq->mcq.event = mlx4_ib_cq_event;
 
 	if (context)
diff --git a/drivers/net/ethernet/mellanox/mlx4/cq.c b/drivers/net/ethernet/mellanox/mlx4/cq.c
index 56022d647837..e71f31387ac6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/cq.c
@@ -52,6 +52,51 @@
 #define MLX4_CQ_STATE_ARMED_SOL		( 6 <<  8)
 #define MLX4_EQ_STATE_FIRED		(10 <<  8)
 
+#define TASKLET_MAX_TIME 2
+#define TASKLET_MAX_TIME_JIFFIES msecs_to_jiffies(TASKLET_MAX_TIME)
+
+void mlx4_cq_tasklet_cb(unsigned long data)
+{
+	unsigned long flags;
+	unsigned long end = jiffies + TASKLET_MAX_TIME_JIFFIES;
+	struct mlx4_eq_tasklet *ctx = (struct mlx4_eq_tasklet *)data;
+	struct mlx4_cq *mcq, *temp;
+
+	spin_lock_irqsave(&ctx->lock, flags);
+	list_splice_tail_init(&ctx->list, &ctx->process_list);
+	spin_unlock_irqrestore(&ctx->lock, flags);
+
+	list_for_each_entry_safe(mcq, temp, &ctx->process_list, tasklet_ctx.list) {
+		list_del_init(&mcq->tasklet_ctx.list);
+		mcq->tasklet_ctx.comp(mcq);
+		if (atomic_dec_and_test(&mcq->refcount))
+			complete(&mcq->free);
+		if (time_after(jiffies, end))
+			break;
+	}
+
+	if (!list_empty(&ctx->process_list))
+		tasklet_schedule(&ctx->task);
+}
+
+static void mlx4_add_cq_to_tasklet(struct mlx4_cq *cq)
+{
+	unsigned long flags;
+	struct mlx4_eq_tasklet *tasklet_ctx = cq->tasklet_ctx.priv;
+
+	spin_lock_irqsave(&tasklet_ctx->lock, flags);
+	/* When migrating CQs between EQs will be implemented, please note
+	 * that you need to sync this point. It is possible that
+	 * while migrating a CQ, completions on the old EQs could
+	 * still arrive.
+	 */
+	if (list_empty_careful(&cq->tasklet_ctx.list)) {
+		atomic_inc(&cq->refcount);
+		list_add_tail(&cq->tasklet_ctx.list, &tasklet_ctx->list);
+	}
+	spin_unlock_irqrestore(&tasklet_ctx->lock, flags);
+}
+
 void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn)
 {
 	struct mlx4_cq *cq;
@@ -292,6 +337,11 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent,
 	cq->uar        = uar;
 	atomic_set(&cq->refcount, 1);
 	init_completion(&cq->free);
+	cq->comp = mlx4_add_cq_to_tasklet;
+	cq->tasklet_ctx.priv =
+		&priv->eq_table.eq[cq->vector].tasklet_ctx;
+	INIT_LIST_HEAD(&cq->tasklet_ctx.list);
+
 
 	cq->irq = priv->eq_table.eq[cq->vector].irq;
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx4/eq.c b/drivers/net/ethernet/mellanox/mlx4/eq.c
index d68b264cee4d..3d275fbaf0eb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx4/eq.c
@@ -450,7 +450,7 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 {
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_eqe *eqe;
-	int cqn;
+	int cqn = -1;
 	int eqes_found = 0;
 	int set_ci = 0;
 	int port;
@@ -758,6 +758,13 @@ static int mlx4_eq_int(struct mlx4_dev *dev, struct mlx4_eq *eq)
 
 	eq_set_ci(eq, 1);
 
+	/* cqn is 24bit wide but is initialized such that its higher bits
+	 * are ones too. Thus, if we got any event, cqn's high bits should be off
+	 * and we need to schedule the tasklet.
+	 */
+	if (!(cqn & ~0xffffff))
+		tasklet_schedule(&eq->tasklet_ctx.task);
+
 	return eqes_found;
 }
 
@@ -971,6 +978,12 @@ static int mlx4_create_eq(struct mlx4_dev *dev, int nent,
 
 	eq->cons_index = 0;
 
+	INIT_LIST_HEAD(&eq->tasklet_ctx.list);
+	INIT_LIST_HEAD(&eq->tasklet_ctx.process_list);
+	spin_lock_init(&eq->tasklet_ctx.lock);
+	tasklet_init(&eq->tasklet_ctx.task, mlx4_cq_tasklet_cb,
+		     (unsigned long)&eq->tasklet_ctx);
+
 	return err;
 
 err_out_free_mtt:
@@ -1027,6 +1040,7 @@ static void mlx4_free_eq(struct mlx4_dev *dev,
 		}
 	}
 	synchronize_irq(eq->irq);
+	tasklet_disable(&eq->tasklet_ctx.task);
 
 	mlx4_mtt_cleanup(dev, &eq->mtt);
 	for (i = 0; i < npages; ++i)
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index f48e7c3eecf8..b67ef488c30c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -43,6 +43,8 @@
 #include <linux/timer.h>
 #include <linux/semaphore.h>
 #include <linux/workqueue.h>
+#include <linux/interrupt.h>
+#include <linux/spinlock.h>
 
 #include <linux/mlx4/device.h>
 #include <linux/mlx4/driver.h>
@@ -373,6 +375,14 @@ struct mlx4_srq_context {
 	__be64			db_rec_addr;
 };
 
+struct mlx4_eq_tasklet {
+	struct list_head list;
+	struct list_head process_list;
+	struct tasklet_struct task;
+	/* lock on completion tasklet list */
+	spinlock_t lock;
+};
+
 struct mlx4_eq {
 	struct mlx4_dev	       *dev;
 	void __iomem	       *doorbell;
@@ -383,6 +393,7 @@ struct mlx4_eq {
 	int			nent;
 	struct mlx4_buf_list   *page_list;
 	struct mlx4_mtt		mtt;
+	struct mlx4_eq_tasklet	tasklet_ctx;
 };
 
 struct mlx4_slave_eqe {
@@ -1146,6 +1157,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev);
 int mlx4_comm_cmd(struct mlx4_dev *dev, u8 cmd, u16 param,
 		  unsigned long timeout);
 
+void mlx4_cq_tasklet_cb(unsigned long data);
 void mlx4_cq_completion(struct mlx4_dev *dev, u32 cqn);
 void mlx4_cq_event(struct mlx4_dev *dev, u32 cqn, int event_type);
 
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index cf09e65c2901..3951b5368d7e 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -621,6 +621,11 @@ struct mlx4_cq {
 
 	atomic_t		refcount;
 	struct completion	free;
+	struct {
+		struct list_head list;
+		void (*comp)(struct mlx4_cq *);
+		void		*priv;
+	} tasklet_ctx;
 };
 
 struct mlx4_qp {
-- 
cgit v1.2.3


From ddae0349fdb78bcc5e7219061847012aa1a29069 Mon Sep 17 00:00:00 2001
From: Eugenia Emantayev <eugenia@mellanox.co.il>
Date: Thu, 11 Dec 2014 10:57:54 +0200
Subject: net/mlx4: Change QP allocation scheme

When using BF (Blue-Flame), the QPN overrides the VLAN, CV, and SV fields
in the WQE. Thus, BF may only be used for QPNs with bits 6,7 unset.

The current Ethernet driver code reserves a Tx QP range with 256b alignment.

This is wrong because if there are more than 64 Tx QPs in use,
QPNs >= base + 65 will have bits 6/7 set.

This problem is not specific for the Ethernet driver, any entity that
tries to reserve more than 64 BF-enabled QPs should fail. Also, using
ranges is not necessary here and is wasteful.

The new mechanism introduced here will support reservation for
"Eth QPs eligible for BF" for all drivers: bare-metal, multi-PF, and VFs
(when hypervisors support WC in VMs). The flow we use is:

1. In mlx4_en, allocate Tx QPs one by one instead of a range allocation,
   and request "BF enabled QPs" if BF is supported for the function

2. In the ALLOC_RES FW command, change param1 to:
a. param1[23:0]  - number of QPs
b. param1[31-24] - flags controlling QPs reservation

Bit 31 refers to Eth blueflame supported QPs. Those QPs must have
bits 6 and 7 unset in order to be used in Ethernet.

Bits 24-30 of the flags are currently reserved.

When a function tries to allocate a QP, it states the required attributes
for this QP. Those attributes are considered "best-effort". If an attribute,
such as Ethernet BF enabled QP, is a must-have attribute, the function has
to check that attribute is supported before trying to do the allocation.

In a lower layer of the code, mlx4_qp_reserve_range masks out the bits
which are unsupported. If SRIOV is used, the PF validates those attributes
and masks out unsupported attributes as well. In order to notify VFs which
attributes are supported, the VF uses QUERY_FUNC_CAP command. This command's
mailbox is filled by the PF, which notifies which QP allocation attributes
it supports.

Signed-off-by: Eugenia Emantayev <eugenia@mellanox.co.il>
Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/main.c                  |  2 +-
 drivers/infiniband/hw/mlx4/qp.c                    | 11 ++++--
 drivers/net/ethernet/mellanox/mlx4/alloc.c         | 43 +++++++++++++++++++---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c     | 10 +----
 drivers/net/ethernet/mellanox/mlx4/en_rx.c         |  4 +-
 drivers/net/ethernet/mellanox/mlx4/en_tx.c         | 14 +++++--
 drivers/net/ethernet/mellanox/mlx4/fw.c            | 20 +++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h            |  1 +
 drivers/net/ethernet/mellanox/mlx4/main.c          | 11 +++++-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h          |  5 ++-
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h       |  2 +-
 drivers/net/ethernet/mellanox/mlx4/qp.c            | 24 +++++++++---
 .../net/ethernet/mellanox/mlx4/resource_tracker.c  |  7 +++-
 include/linux/mlx4/device.h                        | 21 ++++++++++-
 14 files changed, 137 insertions(+), 38 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/main.c b/drivers/infiniband/hw/mlx4/main.c
index 0c3375524a64..57ecc5b204f3 100644
--- a/drivers/infiniband/hw/mlx4/main.c
+++ b/drivers/infiniband/hw/mlx4/main.c
@@ -2227,7 +2227,7 @@ static void *mlx4_ib_add(struct mlx4_dev *dev)
 		ibdev->steer_qpn_count = MLX4_IB_UC_MAX_NUM_QPS;
 		err = mlx4_qp_reserve_range(dev, ibdev->steer_qpn_count,
 					    MLX4_IB_UC_STEER_QPN_ALIGN,
-					    &ibdev->steer_qpn_base);
+					    &ibdev->steer_qpn_base, 0);
 		if (err)
 			goto err_counter;
 
diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 9c5150c3cb31..506d1bdad227 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -802,16 +802,19 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 			}
 		}
 	} else {
-		/* Raw packet QPNs must be aligned to 8 bits. If not, the WQE
-		 * BlueFlame setup flow wrongly causes VLAN insertion. */
+		/* Raw packet QPNs may not have bits 6,7 set in their qp_num;
+		 * otherwise, the WQE BlueFlame setup flow wrongly causes
+		 * VLAN insertion. */
 		if (init_attr->qp_type == IB_QPT_RAW_PACKET)
-			err = mlx4_qp_reserve_range(dev->dev, 1, 1 << 8, &qpn);
+			err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn,
+						    init_attr->cap.max_send_wr ?
+						    MLX4_RESERVE_ETH_BF_QP : 0);
 		else
 			if (qp->flags & MLX4_IB_QP_NETIF)
 				err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
 			else
 				err = mlx4_qp_reserve_range(dev->dev, 1, 1,
-							    &qpn);
+							    &qpn, 0);
 		if (err)
 			goto err_proxy;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/alloc.c b/drivers/net/ethernet/mellanox/mlx4/alloc.c
index b0297da50304..91a8acc191bb 100644
--- a/drivers/net/ethernet/mellanox/mlx4/alloc.c
+++ b/drivers/net/ethernet/mellanox/mlx4/alloc.c
@@ -76,22 +76,53 @@ void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr)
 	mlx4_bitmap_free_range(bitmap, obj, 1, use_rr);
 }
 
-u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align)
+static unsigned long find_aligned_range(unsigned long *bitmap,
+					u32 start, u32 nbits,
+					int len, int align, u32 skip_mask)
+{
+	unsigned long end, i;
+
+again:
+	start = ALIGN(start, align);
+
+	while ((start < nbits) && (test_bit(start, bitmap) ||
+				   (start & skip_mask)))
+		start += align;
+
+	if (start >= nbits)
+		return -1;
+
+	end = start+len;
+	if (end > nbits)
+		return -1;
+
+	for (i = start + 1; i < end; i++) {
+		if (test_bit(i, bitmap) || ((u32)i & skip_mask)) {
+			start = i + 1;
+			goto again;
+		}
+	}
+
+	return start;
+}
+
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+			    int align, u32 skip_mask)
 {
 	u32 obj;
 
-	if (likely(cnt == 1 && align == 1))
+	if (likely(cnt == 1 && align == 1 && !skip_mask))
 		return mlx4_bitmap_alloc(bitmap);
 
 	spin_lock(&bitmap->lock);
 
-	obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
-				bitmap->last, cnt, align - 1);
+	obj = find_aligned_range(bitmap->table, bitmap->last,
+				 bitmap->max, cnt, align, skip_mask);
 	if (obj >= bitmap->max) {
 		bitmap->top = (bitmap->top + bitmap->max + bitmap->reserved_top)
 				& bitmap->mask;
-		obj = bitmap_find_next_zero_area(bitmap->table, bitmap->max,
-						0, cnt, align - 1);
+		obj = find_aligned_range(bitmap->table, 0, bitmap->max,
+					 cnt, align, skip_mask);
 	}
 
 	if (obj < bitmap->max) {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index dccf0e1f86be..c67effb05b2f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -595,7 +595,7 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
 		return 0;
 	}
 
-	err = mlx4_qp_reserve_range(dev, 1, 1, qpn);
+	err = mlx4_qp_reserve_range(dev, 1, 1, qpn, 0);
 	en_dbg(DRV, priv, "Reserved qp %d\n", *qpn);
 	if (err) {
 		en_err(priv, "Failed to reserve qp for mac registration\n");
@@ -1974,15 +1974,8 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 {
 	struct mlx4_en_port_profile *prof = priv->prof;
 	int i;
-	int err;
 	int node;
 
-	err = mlx4_qp_reserve_range(priv->mdev->dev, priv->tx_ring_num, 256, &priv->base_tx_qpn);
-	if (err) {
-		en_err(priv, "failed reserving range for TX rings\n");
-		return err;
-	}
-
 	/* Create tx Rings */
 	for (i = 0; i < priv->tx_ring_num; i++) {
 		node = cpu_to_node(i % num_online_cpus());
@@ -1991,7 +1984,6 @@ int mlx4_en_alloc_resources(struct mlx4_en_priv *priv)
 			goto err;
 
 		if (mlx4_en_create_tx_ring(priv, &priv->tx_ring[i],
-					   priv->base_tx_qpn + i,
 					   prof->tx_ring_size, TXBB_SIZE,
 					   node, i))
 			goto err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 69a2e1b88ea8..a850f24fabdf 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1131,7 +1131,7 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
 	int err;
 	u32 qpn;
 
-	err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn);
+	err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0);
 	if (err) {
 		en_err(priv, "Failed reserving drop qpn\n");
 		return err;
@@ -1174,7 +1174,7 @@ int mlx4_en_config_rss_steer(struct mlx4_en_priv *priv)
 	en_dbg(DRV, priv, "Configuring rss steering\n");
 	err = mlx4_qp_reserve_range(mdev->dev, priv->rx_ring_num,
 				    priv->rx_ring_num,
-				    &rss_map->base_qpn);
+				    &rss_map->base_qpn, 0);
 	if (err) {
 		en_err(priv, "Failed reserving %d qps\n", priv->rx_ring_num);
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index d0cecbdd9ba8..a308d41e4de0 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -46,7 +46,7 @@
 #include "mlx4_en.h"
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
-			   struct mlx4_en_tx_ring **pring, int qpn, u32 size,
+			   struct mlx4_en_tx_ring **pring, u32 size,
 			   u16 stride, int node, int queue_index)
 {
 	struct mlx4_en_dev *mdev = priv->mdev;
@@ -112,11 +112,17 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	       ring, ring->buf, ring->size, ring->buf_size,
 	       (unsigned long long) ring->wqres.buf.direct.map);
 
-	ring->qpn = qpn;
+	err = mlx4_qp_reserve_range(mdev->dev, 1, 1, &ring->qpn,
+				    MLX4_RESERVE_ETH_BF_QP);
+	if (err) {
+		en_err(priv, "failed reserving qp for TX ring\n");
+		goto err_map;
+	}
+
 	err = mlx4_qp_alloc(mdev->dev, ring->qpn, &ring->qp, GFP_KERNEL);
 	if (err) {
 		en_err(priv, "Failed allocating qp %d\n", ring->qpn);
-		goto err_map;
+		goto err_reserve;
 	}
 	ring->qp.event = mlx4_en_sqp_event;
 
@@ -143,6 +149,8 @@ int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 	*pring = ring;
 	return 0;
 
+err_reserve:
+	mlx4_qp_release_range(mdev->dev, ring->qpn, 1);
 err_map:
 	mlx4_en_unmap_buffer(&ring->wqres.buf);
 err_hwq_res:
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 5089f76f060b..1469b5b5be64 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -266,10 +266,15 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_MTT_QUOTA_OFFSET		0x64
 #define QUERY_FUNC_CAP_MCG_QUOTA_OFFSET		0x68
 
+#define QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET	0x6c
+
 #define QUERY_FUNC_CAP_FMR_FLAG			0x80
 #define QUERY_FUNC_CAP_FLAG_RDMA		0x40
 #define QUERY_FUNC_CAP_FLAG_ETH			0x80
 #define QUERY_FUNC_CAP_FLAG_QUOTAS		0x10
+#define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX	0x04
+
+#define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG	(1UL << 31)
 
 /* when opcode modifier = 1 */
 #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET		0x3
@@ -339,7 +344,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 			mlx4_get_active_ports(dev, slave);
 		/* enable rdma and ethernet interfaces, and new quota locations */
 		field = (QUERY_FUNC_CAP_FLAG_ETH | QUERY_FUNC_CAP_FLAG_RDMA |
-			 QUERY_FUNC_CAP_FLAG_QUOTAS);
+			 QUERY_FUNC_CAP_FLAG_QUOTAS | QUERY_FUNC_CAP_FLAG_VALID_MAILBOX);
 		MLX4_PUT(outbox->buf, field, QUERY_FUNC_CAP_FLAGS_OFFSET);
 
 		field = min(
@@ -401,6 +406,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP);
 
+		size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG;
+		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET);
 	} else
 		err = -EINVAL;
 
@@ -493,6 +500,17 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
 		MLX4_GET(size, outbox, QUERY_FUNC_CAP_RESERVED_EQ_OFFSET);
 		func_cap->reserved_eq = size & 0xFFFFFF;
 
+		func_cap->extra_flags = 0;
+
+		/* Mailbox data from 0x6c and onward should only be treated if
+		 * QUERY_FUNC_CAP_FLAG_VALID_MAILBOX is set in func_cap->flags
+		 */
+		if (func_cap->flags & QUERY_FUNC_CAP_FLAG_VALID_MAILBOX) {
+			MLX4_GET(size, outbox, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET);
+			if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG)
+				func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_BF_RES_QP;
+		}
+
 		goto out;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 475215ee370f..0e910a452b02 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -144,6 +144,7 @@ struct mlx4_func_cap {
 	u8	port_flags;
 	u8	flags1;
 	u64	phys_port_id;
+	u32	extra_flags;
 };
 
 struct mlx4_func {
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 3044f9e623cb..6a9a941ddf58 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -466,8 +466,13 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	    mlx4_is_master(dev))
 		dev->caps.function_caps |= MLX4_FUNC_CAP_64B_EQE_CQE;
 
-	if (!mlx4_is_slave(dev))
+	if (!mlx4_is_slave(dev)) {
 		mlx4_enable_cqe_eqe_stride(dev);
+		dev->caps.alloc_res_qp_mask =
+			(dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0);
+	} else {
+		dev->caps.alloc_res_qp_mask = 0;
+	}
 
 	return 0;
 }
@@ -817,6 +822,10 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 
 	slave_adjust_steering_mode(dev, &dev_cap, &hca_param);
 
+	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_BF_RES_QP &&
+	    dev->caps.bf_reg_size)
+		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
+
 	return 0;
 
 err_mem:
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index b67ef488c30c..6834da6c35ed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -884,7 +884,8 @@ extern struct workqueue_struct *mlx4_wq;
 
 u32 mlx4_bitmap_alloc(struct mlx4_bitmap *bitmap);
 void mlx4_bitmap_free(struct mlx4_bitmap *bitmap, u32 obj, int use_rr);
-u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt, int align);
+u32 mlx4_bitmap_alloc_range(struct mlx4_bitmap *bitmap, int cnt,
+			    int align, u32 skip_mask);
 void mlx4_bitmap_free_range(struct mlx4_bitmap *bitmap, u32 obj, int cnt,
 			    int use_rr);
 u32 mlx4_bitmap_avail(struct mlx4_bitmap *bitmap);
@@ -970,7 +971,7 @@ int mlx4_DMA_wrapper(struct mlx4_dev *dev, int slave,
 		     struct mlx4_cmd_mailbox *outbox,
 		     struct mlx4_cmd_info *cmd);
 int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
-			    int *base);
+			    int *base, u8 flags);
 void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 int __mlx4_register_mac(struct mlx4_dev *dev, u8 port, u64 mac);
 void __mlx4_unregister_mac(struct mlx4_dev *dev, u8 port, u64 mac);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index ac48a8d91501..944a112dff37 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -778,7 +778,7 @@ netdev_tx_t mlx4_en_xmit(struct sk_buff *skb, struct net_device *dev);
 
 int mlx4_en_create_tx_ring(struct mlx4_en_priv *priv,
 			   struct mlx4_en_tx_ring **pring,
-			   int qpn, u32 size, u16 stride,
+			   u32 size, u16 stride,
 			   int node, int queue_index);
 void mlx4_en_destroy_tx_ring(struct mlx4_en_priv *priv,
 			     struct mlx4_en_tx_ring **pring);
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 2301365c79c7..40e82edac99d 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -42,6 +42,10 @@
 #include "mlx4.h"
 #include "icm.h"
 
+/* QP to support BF should have bits 6,7 cleared */
+#define MLX4_BF_QP_SKIP_MASK	0xc0
+#define MLX4_MAX_BF_QP_RANGE	0x40
+
 void mlx4_qp_event(struct mlx4_dev *dev, u32 qpn, int event_type)
 {
 	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
@@ -207,26 +211,36 @@ int mlx4_qp_modify(struct mlx4_dev *dev, struct mlx4_mtt *mtt,
 EXPORT_SYMBOL_GPL(mlx4_qp_modify);
 
 int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
-				   int *base)
+			    int *base, u8 flags)
 {
+	int bf_qp = !!(flags & (u8)MLX4_RESERVE_ETH_BF_QP);
+
 	struct mlx4_priv *priv = mlx4_priv(dev);
 	struct mlx4_qp_table *qp_table = &priv->qp_table;
 
-	*base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align);
+	if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
+		return -ENOMEM;
+
+	*base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
+					bf_qp ? MLX4_BF_QP_SKIP_MASK : 0);
 	if (*base == -1)
 		return -ENOMEM;
 
 	return 0;
 }
 
-int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base)
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+			  int *base, u8 flags)
 {
 	u64 in_param = 0;
 	u64 out_param;
 	int err;
 
+	/* Turn off all unsupported QP allocation flags */
+	flags &= dev->caps.alloc_res_qp_mask;
+
 	if (mlx4_is_mfunc(dev)) {
-		set_param_l(&in_param, cnt);
+		set_param_l(&in_param, (((u32)flags) << 24) | (u32)cnt);
 		set_param_h(&in_param, align);
 		err = mlx4_cmd_imm(dev, in_param, &out_param,
 				   RES_QP, RES_OP_RESERVE,
@@ -238,7 +252,7 @@ int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base)
 		*base = get_param_l(&out_param);
 		return 0;
 	}
-	return __mlx4_qp_reserve_range(dev, cnt, align, base);
+	return __mlx4_qp_reserve_range(dev, cnt, align, base, flags);
 }
 EXPORT_SYMBOL_GPL(mlx4_qp_reserve_range);
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
index 16f617b5749e..4efbd1eca611 100644
--- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
+++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c
@@ -1543,16 +1543,21 @@ static int qp_alloc_res(struct mlx4_dev *dev, int slave, int op, int cmd,
 	int align;
 	int base;
 	int qpn;
+	u8 flags;
 
 	switch (op) {
 	case RES_OP_RESERVE:
 		count = get_param_l(&in_param) & 0xffffff;
+		/* Turn off all unsupported QP allocation flags that the
+		 * slave tries to set.
+		 */
+		flags = (get_param_l(&in_param) >> 24) & dev->caps.alloc_res_qp_mask;
 		align = get_param_h(&in_param);
 		err = mlx4_grant_resource(dev, slave, RES_QP, count, 0);
 		if (err)
 			return err;
 
-		err = __mlx4_qp_reserve_range(dev, count, align, &base);
+		err = __mlx4_qp_reserve_range(dev, count, align, &base, flags);
 		if (err) {
 			mlx4_release_resource(dev, slave, RES_QP, count, 0);
 			return err;
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 3951b5368d7e..272aa258c036 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -194,6 +194,22 @@ enum {
 	MLX4_DEV_CAP_FLAG2_80_VFS		= 1LL <<  18
 };
 
+enum {
+	MLX4_QUERY_FUNC_FLAGS_BF_RES_QP		= 1LL << 0
+};
+
+/* bit enums for an 8-bit flags field indicating special use
+ * QPs which require special handling in qp_reserve_range.
+ * Currently, this only includes QPs used by the ETH interface,
+ * where we expect to use blueflame.  These QPs must not have
+ * bits 6 and 7 set in their qp number.
+ *
+ * This enum may use only bits 0..7.
+ */
+enum {
+	MLX4_RESERVE_ETH_BF_QP	= 1 << 7,
+};
+
 enum {
 	MLX4_DEV_CAP_64B_EQE_ENABLED	= 1LL << 0,
 	MLX4_DEV_CAP_64B_CQE_ENABLED	= 1LL << 1,
@@ -501,6 +517,7 @@ struct mlx4_caps {
 	u64			phys_port_id[MLX4_MAX_PORTS + 1];
 	int			tunnel_offload_mode;
 	u8			rx_checksum_flags_port[MLX4_MAX_PORTS + 1];
+	u8			alloc_res_qp_mask;
 };
 
 struct mlx4_buf_list {
@@ -950,8 +967,8 @@ int mlx4_cq_alloc(struct mlx4_dev *dev, int nent, struct mlx4_mtt *mtt,
 		  struct mlx4_uar *uar, u64 db_rec, struct mlx4_cq *cq,
 		  unsigned vector, int collapsed, int timestamp_en);
 void mlx4_cq_free(struct mlx4_dev *dev, struct mlx4_cq *cq);
-
-int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align, int *base);
+int mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
+			  int *base, u8 flags);
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt);
 
 int mlx4_qp_alloc(struct mlx4_dev *dev, int qpn, struct mlx4_qp *qp,
-- 
cgit v1.2.3


From d57febe1a47801ef8a55dbf10672850523dfaa60 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 11 Dec 2014 10:57:57 +0200
Subject: net/mlx4: Add A0 hybrid steering

A0 hybrid steering is a form of high performance flow steering.
By using this mode, mlx4 cards use a fast limited table based steering,
in order to enable fast steering of unicast packets to a QP.

In order to implement A0 hybrid steering we allocate resources
from different zones:
(1) General range
(2) Special MAC-assigned QPs [RSS, Raw-Ethernet] each has its own region.

When we create a rss QP or a raw ethernet (A0 steerable and BF ready) QP,
we try hard to allocate the QP from range (2). Otherwise, we try hard not
to allocate from this  range. However, when the system is pushed to its
limits and one needs every resource, the allocator uses every region it can.

Meaning, when we run out of raw-eth qps, the allocator allocates from the
general range (and the special-A0 area is no longer active). If we run out
of RSS qps, the mechanism tries to allocate from the raw-eth QP zone. If that
is also exhausted, the allocator will allocate from the general range
(and the A0 region is no longer active).

Note that if a raw-eth qp is allocated from the general range, it attempts
to allocate the range such that bits 6 and 7 (blueflame bits) in the
QP number are not set.

When the feature is used in SRIOV, the VF has to notify the PF what
kind of QP attributes it needs. In order to do that, along with the
"Eth QP blueflame" bit, we reserve a new "A0 steerable QP". According
to the combination of these bits, the PF tries to allocate a suitable QP.

In order to maintain backward compatibility (with older PFs), the PF
notifies which QP attributes it supports via QUERY_FUNC_CAP command.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/hw/mlx4/qp.c                |   6 +-
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   2 +-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     |   3 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c        |   6 +-
 drivers/net/ethernet/mellanox/mlx4/main.c      |   8 +-
 drivers/net/ethernet/mellanox/mlx4/mlx4.h      |  13 +-
 drivers/net/ethernet/mellanox/mlx4/qp.c        | 277 +++++++++++++++++++++++--
 include/linux/mlx4/device.h                    |  10 +-
 8 files changed, 300 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx4/qp.c b/drivers/infiniband/hw/mlx4/qp.c
index 506d1bdad227..cf000b7ad64f 100644
--- a/drivers/infiniband/hw/mlx4/qp.c
+++ b/drivers/infiniband/hw/mlx4/qp.c
@@ -807,8 +807,10 @@ static int create_qp_common(struct mlx4_ib_dev *dev, struct ib_pd *pd,
 		 * VLAN insertion. */
 		if (init_attr->qp_type == IB_QPT_RAW_PACKET)
 			err = mlx4_qp_reserve_range(dev->dev, 1, 1, &qpn,
-						    init_attr->cap.max_send_wr ?
-						    MLX4_RESERVE_ETH_BF_QP : 0);
+						    (init_attr->cap.max_send_wr ?
+						     MLX4_RESERVE_ETH_BF_QP : 0) |
+						    (init_attr->cap.max_recv_wr ?
+						     MLX4_RESERVE_A0_QP : 0));
 		else
 			if (qp->flags & MLX4_IB_QP_NETIF)
 				err = mlx4_ib_steer_qp_alloc(dev, 1, &qpn);
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index c67effb05b2f..568e1f41fdd4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -595,7 +595,7 @@ static int mlx4_en_get_qp(struct mlx4_en_priv *priv)
 		return 0;
 	}
 
-	err = mlx4_qp_reserve_range(dev, 1, 1, qpn, 0);
+	err = mlx4_qp_reserve_range(dev, 1, 1, qpn, MLX4_RESERVE_A0_QP);
 	en_dbg(DRV, priv, "Reserved qp %d\n", *qpn);
 	if (err) {
 		en_err(priv, "Failed to reserve qp for mac registration\n");
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index a850f24fabdf..a0474eb94aa3 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -1131,7 +1131,8 @@ int mlx4_en_create_drop_qp(struct mlx4_en_priv *priv)
 	int err;
 	u32 qpn;
 
-	err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn, 0);
+	err = mlx4_qp_reserve_range(priv->mdev->dev, 1, 1, &qpn,
+				    MLX4_RESERVE_A0_QP);
 	if (err) {
 		en_err(priv, "Failed reserving drop qpn\n");
 		return err;
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 1469b5b5be64..622bffaa9d78 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -275,6 +275,7 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 #define QUERY_FUNC_CAP_FLAG_VALID_MAILBOX	0x04
 
 #define QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG	(1UL << 31)
+#define QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG	(1UL << 30)
 
 /* when opcode modifier = 1 */
 #define QUERY_FUNC_CAP_PHYS_PORT_OFFSET		0x3
@@ -406,7 +407,8 @@ int mlx4_QUERY_FUNC_CAP_wrapper(struct mlx4_dev *dev, int slave,
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET);
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_MCG_QUOTA_OFFSET_DEP);
 
-		size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG;
+		size = QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG |
+			QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG;
 		MLX4_PUT(outbox->buf, size, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET);
 	} else
 		err = -EINVAL;
@@ -509,6 +511,8 @@ int mlx4_QUERY_FUNC_CAP(struct mlx4_dev *dev, u8 gen_or_port,
 			MLX4_GET(size, outbox, QUERY_FUNC_CAP_EXTRA_FLAGS_OFFSET);
 			if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_BF_QP_ALLOC_FLAG)
 				func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_BF_RES_QP;
+			if (size & QUERY_FUNC_CAP_EXTRA_FLAGS_A0_QP_ALLOC_FLAG)
+				func_cap->extra_flags |= MLX4_QUERY_FUNC_FLAGS_A0_RES_QP;
 		}
 
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 6a9a941ddf58..3bfe90b95f96 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -436,6 +436,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		(1 << dev->caps.log_num_vlans) *
 		dev->caps.num_ports;
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
+	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
+		MLX4_A0_STEERING_TABLE_SIZE;
 
 	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
@@ -469,7 +471,8 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	if (!mlx4_is_slave(dev)) {
 		mlx4_enable_cqe_eqe_stride(dev);
 		dev->caps.alloc_res_qp_mask =
-			(dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0);
+			(dev->caps.bf_reg_size ? MLX4_RESERVE_ETH_BF_QP : 0) |
+			MLX4_RESERVE_A0_QP;
 	} else {
 		dev->caps.alloc_res_qp_mask = 0;
 	}
@@ -826,6 +829,9 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 	    dev->caps.bf_reg_size)
 		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_ETH_BF_QP;
 
+	if (func_cap.extra_flags & MLX4_QUERY_FUNC_FLAGS_A0_RES_QP)
+		dev->caps.alloc_res_qp_mask |= MLX4_RESERVE_A0_QP;
+
 	return 0;
 
 err_mem:
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index bc1505efa436..cebd1180702b 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -682,8 +682,19 @@ struct mlx4_srq_table {
 	struct mlx4_icm_table	cmpt_table;
 };
 
+enum mlx4_qp_table_zones {
+	MLX4_QP_TABLE_ZONE_GENERAL,
+	MLX4_QP_TABLE_ZONE_RSS,
+	MLX4_QP_TABLE_ZONE_RAW_ETH,
+	MLX4_QP_TABLE_ZONE_NUM
+};
+
+#define MLX4_A0_STEERING_TABLE_SIZE    256
+
 struct mlx4_qp_table {
-	struct mlx4_bitmap	bitmap;
+	struct mlx4_bitmap	*bitmap_gen;
+	struct mlx4_zone_allocator *zones;
+	u32			zones_uids[MLX4_QP_TABLE_ZONE_NUM];
 	u32			rdmarc_base;
 	int			rdmarc_shift;
 	spinlock_t		lock;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index 8720428c9807..d8d040c366f4 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -213,6 +213,7 @@ EXPORT_SYMBOL_GPL(mlx4_qp_modify);
 int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
 			    int *base, u8 flags)
 {
+	u32 uid;
 	int bf_qp = !!(flags & (u8)MLX4_RESERVE_ETH_BF_QP);
 
 	struct mlx4_priv *priv = mlx4_priv(dev);
@@ -221,8 +222,16 @@ int __mlx4_qp_reserve_range(struct mlx4_dev *dev, int cnt, int align,
 	if (cnt > MLX4_MAX_BF_QP_RANGE && bf_qp)
 		return -ENOMEM;
 
-	*base = mlx4_bitmap_alloc_range(&qp_table->bitmap, cnt, align,
-					bf_qp ? MLX4_BF_QP_SKIP_MASK : 0);
+	uid = MLX4_QP_TABLE_ZONE_GENERAL;
+	if (flags & (u8)MLX4_RESERVE_A0_QP) {
+		if (bf_qp)
+			uid = MLX4_QP_TABLE_ZONE_RAW_ETH;
+		else
+			uid = MLX4_QP_TABLE_ZONE_RSS;
+	}
+
+	*base = mlx4_zone_alloc_entries(qp_table->zones, uid, cnt, align,
+					bf_qp ? MLX4_BF_QP_SKIP_MASK : 0, NULL);
 	if (*base == -1)
 		return -ENOMEM;
 
@@ -263,7 +272,7 @@ void __mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
 
 	if (mlx4_is_qp_reserved(dev, (u32) base_qpn))
 		return;
-	mlx4_bitmap_free_range(&qp_table->bitmap, base_qpn, cnt, MLX4_USE_RR);
+	mlx4_zone_free_entries_unique(qp_table->zones, base_qpn, cnt);
 }
 
 void mlx4_qp_release_range(struct mlx4_dev *dev, int base_qpn, int cnt)
@@ -473,6 +482,227 @@ static int mlx4_CONF_SPECIAL_QP(struct mlx4_dev *dev, u32 base_qpn)
 			MLX4_CMD_TIME_CLASS_B, MLX4_CMD_NATIVE);
 }
 
+#define MLX4_QP_TABLE_RSS_ETH_PRIORITY 2
+#define MLX4_QP_TABLE_RAW_ETH_PRIORITY 1
+#define MLX4_QP_TABLE_RAW_ETH_SIZE     256
+
+static int mlx4_create_zones(struct mlx4_dev *dev,
+			     u32 reserved_bottom_general,
+			     u32 reserved_top_general,
+			     u32 reserved_bottom_rss,
+			     u32 start_offset_rss,
+			     u32 max_table_offset)
+{
+	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+	struct mlx4_bitmap (*bitmap)[MLX4_QP_TABLE_ZONE_NUM] = NULL;
+	int bitmap_initialized = 0;
+	u32 last_offset;
+	int k;
+	int err;
+
+	qp_table->zones = mlx4_zone_allocator_create(MLX4_ZONE_ALLOC_FLAGS_NO_OVERLAP);
+
+	if (NULL == qp_table->zones)
+		return -ENOMEM;
+
+	bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL);
+
+	if (NULL == bitmap) {
+		err = -ENOMEM;
+		goto free_zone;
+	}
+
+	err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_GENERAL, dev->caps.num_qps,
+			       (1 << 23) - 1, reserved_bottom_general,
+			       reserved_top_general);
+
+	if (err)
+		goto free_bitmap;
+
+	++bitmap_initialized;
+
+	err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_GENERAL,
+				MLX4_ZONE_FALLBACK_TO_HIGHER_PRIO |
+				MLX4_ZONE_USE_RR, 0,
+				0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_GENERAL);
+
+	if (err)
+		goto free_bitmap;
+
+	err = mlx4_bitmap_init(*bitmap + MLX4_QP_TABLE_ZONE_RSS,
+			       reserved_bottom_rss,
+			       reserved_bottom_rss - 1,
+			       dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW],
+			       reserved_bottom_rss - start_offset_rss);
+
+	if (err)
+		goto free_bitmap;
+
+	++bitmap_initialized;
+
+	err = mlx4_zone_add_one(qp_table->zones, *bitmap + MLX4_QP_TABLE_ZONE_RSS,
+				MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO |
+				MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO |
+				MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RSS_ETH_PRIORITY,
+				0, qp_table->zones_uids + MLX4_QP_TABLE_ZONE_RSS);
+
+	if (err)
+		goto free_bitmap;
+
+	last_offset = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+	/*  We have a single zone for the A0 steering QPs area of the FW. This area
+	 *  needs to be split into subareas. One set of subareas is for RSS QPs
+	 *  (in which qp number bits 6 and/or 7 are set); the other set of subareas
+	 *  is for RAW_ETH QPs, which require that both bits 6 and 7 are zero.
+	 *  Currently, the values returned by the FW (A0 steering area starting qp number
+	 *  and A0 steering area size) are such that there are only two subareas -- one
+	 *  for RSS and one for RAW_ETH.
+	 */
+	for (k = MLX4_QP_TABLE_ZONE_RSS + 1; k < sizeof(*bitmap)/sizeof((*bitmap)[0]);
+	     k++) {
+		int size;
+		u32 offset = start_offset_rss;
+		u32 bf_mask;
+		u32 requested_size;
+
+		/* Assuming MLX4_BF_QP_SKIP_MASK is consecutive ones, this calculates
+		 * a mask of all LSB bits set until (and not including) the first
+		 * set bit of  MLX4_BF_QP_SKIP_MASK. For example, if MLX4_BF_QP_SKIP_MASK
+		 * is 0xc0, bf_mask will be 0x3f.
+		 */
+		bf_mask = (MLX4_BF_QP_SKIP_MASK & ~(MLX4_BF_QP_SKIP_MASK - 1)) - 1;
+		requested_size = min((u32)MLX4_QP_TABLE_RAW_ETH_SIZE, bf_mask + 1);
+
+		if (((last_offset & MLX4_BF_QP_SKIP_MASK) &&
+		     ((int)(max_table_offset - last_offset)) >=
+		     roundup_pow_of_two(MLX4_BF_QP_SKIP_MASK)) ||
+		    (!(last_offset & MLX4_BF_QP_SKIP_MASK) &&
+		     !((last_offset + requested_size - 1) &
+		       MLX4_BF_QP_SKIP_MASK)))
+			size = requested_size;
+		else {
+			u32 candidate_offset =
+				(last_offset | MLX4_BF_QP_SKIP_MASK | bf_mask) + 1;
+
+			if (last_offset & MLX4_BF_QP_SKIP_MASK)
+				last_offset = candidate_offset;
+
+			/* From this point, the BF bits are 0 */
+
+			if (last_offset > max_table_offset) {
+				/* need to skip */
+				size = -1;
+			} else {
+				size = min3(max_table_offset - last_offset,
+					    bf_mask - (last_offset & bf_mask),
+					    requested_size);
+				if (size < requested_size) {
+					int candidate_size;
+
+					candidate_size = min3(
+						max_table_offset - candidate_offset,
+						bf_mask - (last_offset & bf_mask),
+						requested_size);
+
+					/*  We will not take this path if last_offset was
+					 *  already set above to candidate_offset
+					 */
+					if (candidate_size > size) {
+						last_offset = candidate_offset;
+						size = candidate_size;
+					}
+				}
+			}
+		}
+
+		if (size > 0) {
+			/* mlx4_bitmap_alloc_range will find a contiguous range of "size"
+			 * QPs in which both bits 6 and 7 are zero, because we pass it the
+			 * MLX4_BF_SKIP_MASK).
+			 */
+			offset = mlx4_bitmap_alloc_range(
+					*bitmap + MLX4_QP_TABLE_ZONE_RSS,
+					size, 1,
+					MLX4_BF_QP_SKIP_MASK);
+
+			if (offset == (u32)-1) {
+				err = -ENOMEM;
+				break;
+			}
+
+			last_offset = offset + size;
+
+			err = mlx4_bitmap_init(*bitmap + k, roundup_pow_of_two(size),
+					       roundup_pow_of_two(size) - 1, 0,
+					       roundup_pow_of_two(size) - size);
+		} else {
+			/* Add an empty bitmap, we'll allocate from different zones (since
+			 * at least one is reserved)
+			 */
+			err = mlx4_bitmap_init(*bitmap + k, 1,
+					       MLX4_QP_TABLE_RAW_ETH_SIZE - 1, 0,
+					       0);
+			mlx4_bitmap_alloc_range(*bitmap + k, 1, 1, 0);
+		}
+
+		if (err)
+			break;
+
+		++bitmap_initialized;
+
+		err = mlx4_zone_add_one(qp_table->zones, *bitmap + k,
+					MLX4_ZONE_ALLOW_ALLOC_FROM_LOWER_PRIO |
+					MLX4_ZONE_ALLOW_ALLOC_FROM_EQ_PRIO |
+					MLX4_ZONE_USE_RR, MLX4_QP_TABLE_RAW_ETH_PRIORITY,
+					offset, qp_table->zones_uids + k);
+
+		if (err)
+			break;
+	}
+
+	if (err)
+		goto free_bitmap;
+
+	qp_table->bitmap_gen = *bitmap;
+
+	return err;
+
+free_bitmap:
+	for (k = 0; k < bitmap_initialized; k++)
+		mlx4_bitmap_cleanup(*bitmap + k);
+	kfree(bitmap);
+free_zone:
+	mlx4_zone_allocator_destroy(qp_table->zones);
+	return err;
+}
+
+static void mlx4_cleanup_qp_zones(struct mlx4_dev *dev)
+{
+	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
+
+	if (qp_table->zones) {
+		int i;
+
+		for (i = 0;
+		     i < sizeof(qp_table->zones_uids)/sizeof(qp_table->zones_uids[0]);
+		     i++) {
+			struct mlx4_bitmap *bitmap =
+				mlx4_zone_get_bitmap(qp_table->zones,
+						     qp_table->zones_uids[i]);
+
+			mlx4_zone_remove_one(qp_table->zones, qp_table->zones_uids[i]);
+			if (NULL == bitmap)
+				continue;
+
+			mlx4_bitmap_cleanup(bitmap);
+		}
+		mlx4_zone_allocator_destroy(qp_table->zones);
+		kfree(qp_table->bitmap_gen);
+		qp_table->bitmap_gen = NULL;
+		qp_table->zones = NULL;
+	}
+}
+
 int mlx4_init_qp_table(struct mlx4_dev *dev)
 {
 	struct mlx4_qp_table *qp_table = &mlx4_priv(dev)->qp_table;
@@ -480,22 +710,33 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	int reserved_from_top = 0;
 	int reserved_from_bot;
 	int k;
+	int fixed_reserved_from_bot_rv = 0;
+	int bottom_reserved_for_rss_bitmap;
+	u32 max_table_offset = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
+		MLX4_A0_STEERING_TABLE_SIZE;
 
 	spin_lock_init(&qp_table->lock);
 	INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
 	if (mlx4_is_slave(dev))
 		return 0;
 
-	/*
-	 * We reserve 2 extra QPs per port for the special QPs.  The
+	/* We reserve 2 extra QPs per port for the special QPs.  The
 	 * block of special QPs must be aligned to a multiple of 8, so
 	 * round up.
 	 *
 	 * We also reserve the MSB of the 24-bit QP number to indicate
 	 * that a QP is an XRC QP.
 	 */
-	dev->phys_caps.base_sqpn =
-		ALIGN(dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW], 8);
+	for (k = 0; k <= MLX4_QP_REGION_BOTTOM; k++)
+		fixed_reserved_from_bot_rv += dev->caps.reserved_qps_cnt[k];
+
+	if (fixed_reserved_from_bot_rv < max_table_offset)
+		fixed_reserved_from_bot_rv = max_table_offset;
+
+	/* We reserve at least 1 extra for bitmaps that we don't have enough space for*/
+	bottom_reserved_for_rss_bitmap =
+		roundup_pow_of_two(fixed_reserved_from_bot_rv + 1);
+	dev->phys_caps.base_sqpn = ALIGN(bottom_reserved_for_rss_bitmap, 8);
 
 	{
 		int sort[MLX4_NUM_QP_REGION];
@@ -505,8 +746,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 		for (i = 1; i < MLX4_NUM_QP_REGION; ++i)
 			sort[i] = i;
 
-		for (i = MLX4_NUM_QP_REGION; i > 0; --i) {
-			for (j = 2; j < i; ++j) {
+		for (i = MLX4_NUM_QP_REGION; i > MLX4_QP_REGION_BOTTOM; --i) {
+			for (j = MLX4_QP_REGION_BOTTOM + 2; j < i; ++j) {
 				if (dev->caps.reserved_qps_cnt[sort[j]] >
 				    dev->caps.reserved_qps_cnt[sort[j - 1]]) {
 					tmp             = sort[j];
@@ -516,13 +757,12 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 			}
 		}
 
-		for (i = 1; i < MLX4_NUM_QP_REGION; ++i) {
+		for (i = MLX4_QP_REGION_BOTTOM + 1; i < MLX4_NUM_QP_REGION; ++i) {
 			last_base -= dev->caps.reserved_qps_cnt[sort[i]];
 			dev->caps.reserved_qps_base[sort[i]] = last_base;
 			reserved_from_top +=
 				dev->caps.reserved_qps_cnt[sort[i]];
 		}
-
 	}
 
        /* Reserve 8 real SQPs in both native and SRIOV modes.
@@ -541,9 +781,11 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 		return -EINVAL;
 	}
 
-	err = mlx4_bitmap_init(&qp_table->bitmap, dev->caps.num_qps,
-			       (1 << 23) - 1, reserved_from_bot,
-			       reserved_from_top);
+	err = mlx4_create_zones(dev, reserved_from_bot, reserved_from_bot,
+				bottom_reserved_for_rss_bitmap,
+				fixed_reserved_from_bot_rv,
+				max_table_offset);
+
 	if (err)
 		return err;
 
@@ -579,7 +821,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	err = mlx4_CONF_SPECIAL_QP(dev, dev->phys_caps.base_sqpn);
 	if (err)
 		goto err_mem;
-	return 0;
+
+	return err;
 
 err_mem:
 	kfree(dev->caps.qp0_tunnel);
@@ -588,6 +831,7 @@ err_mem:
 	kfree(dev->caps.qp1_proxy);
 	dev->caps.qp0_tunnel = dev->caps.qp0_proxy =
 		dev->caps.qp1_tunnel = dev->caps.qp1_proxy = NULL;
+	mlx4_cleanup_qp_zones(dev);
 	return err;
 }
 
@@ -597,7 +841,8 @@ void mlx4_cleanup_qp_table(struct mlx4_dev *dev)
 		return;
 
 	mlx4_CONF_SPECIAL_QP(dev, 0);
-	mlx4_bitmap_cleanup(&mlx4_priv(dev)->qp_table.bitmap);
+
+	mlx4_cleanup_qp_zones(dev);
 }
 
 int mlx4_qp_query(struct mlx4_dev *dev, struct mlx4_qp *qp,
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 272aa258c036..39890cddc5fa 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -195,7 +195,8 @@ enum {
 };
 
 enum {
-	MLX4_QUERY_FUNC_FLAGS_BF_RES_QP		= 1LL << 0
+	MLX4_QUERY_FUNC_FLAGS_BF_RES_QP		= 1LL << 0,
+	MLX4_QUERY_FUNC_FLAGS_A0_RES_QP		= 1LL << 1
 };
 
 /* bit enums for an 8-bit flags field indicating special use
@@ -207,6 +208,7 @@ enum {
  * This enum may use only bits 0..7.
  */
 enum {
+	MLX4_RESERVE_A0_QP	= 1 << 6,
 	MLX4_RESERVE_ETH_BF_QP	= 1 << 7,
 };
 
@@ -349,6 +351,8 @@ enum {
 
 enum mlx4_qp_region {
 	MLX4_QP_REGION_FW = 0,
+	MLX4_QP_REGION_RSS_RAW_ETH,
+	MLX4_QP_REGION_BOTTOM = MLX4_QP_REGION_RSS_RAW_ETH,
 	MLX4_QP_REGION_ETH_ADDR,
 	MLX4_QP_REGION_FC_ADDR,
 	MLX4_QP_REGION_FC_EXCH,
@@ -891,7 +895,9 @@ static inline int mlx4_num_reserved_sqps(struct mlx4_dev *dev)
 static inline int mlx4_is_qp_reserved(struct mlx4_dev *dev, u32 qpn)
 {
 	return (qpn < dev->phys_caps.base_sqpn + 8 +
-		16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev));
+		16 * MLX4_MFUNC_MAX * !!mlx4_is_master(dev) &&
+		qpn >= dev->phys_caps.base_sqpn) ||
+	       (qpn < dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW]);
 }
 
 static inline int mlx4_is_guest_proxy(struct mlx4_dev *dev, int slave, u32 qpn)
-- 
cgit v1.2.3


From 7d077cd34eabb2ffd05abe0f2cad01da1ef11712 Mon Sep 17 00:00:00 2001
From: Matan Barak <matanb@mellanox.com>
Date: Thu, 11 Dec 2014 10:58:00 +0200
Subject: net/mlx4: Add support for A0 steering

Add the required firmware commands for A0 steering and a way to enable
that. The firmware support focuses on INIT_HCA, QUERY_HCA, QUERY_PORT,
QUERY_DEV_CAP and QUERY_FUNC_CAP commands. Those commands are used
to configure and query the device.

The different A0 DMFS (steering) modes are:

Static - optimized performance, but flow steering rules are
limited. This mode should be choosed explicitly by the user
in order to be used.

Dynamic - this mode should be explicitly choosed by the user.
In this mode, the FW works in optimized steering mode as long as
it can and afterwards automatically drops to classic (full) DMFS.

Disable - this mode should be explicitly choosed by the user.
The user instructs the system not to use optimized steering, even if
the FW supports Dynamic A0 DMFS (and thus will be able to use optimized
steering in Default A0 DMFS mode).

Default - this mode is implicitly choosed. In this mode, if the FW
supports Dynamic A0 DMFS, it'll work in this mode. Otherwise, it'll
work at Disable A0 DMFS mode.

Under SRIOV configuration, when the A0 steering mode is enabled,
older guest VF drivers who aren't using the RX QP allocation flag
(MLX4_RESERVE_A0_QP) will get a QP from the general range and
fail when attempting to register a steering rule. To avoid that,
the PF context behaviour is changed once on A0 static mode, to
require support for the allocation flag in VF drivers too.

In order to enable A0 steering, we use log_num_mgm_entry_size param.
If the value of the parameter is not positive, we treat the absolute
value of log_num_mgm_entry_size as a bit field. Setting bit 2 of this
bit field enables static A0 steering.

Signed-off-by: Matan Barak <matanb@mellanox.com>
Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c |   3 +-
 drivers/net/ethernet/mellanox/mlx4/fw.c        |  48 ++++++++-
 drivers/net/ethernet/mellanox/mlx4/fw.h        |   4 +
 drivers/net/ethernet/mellanox/mlx4/main.c      | 132 +++++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx4/mlx4.h      |   2 -
 drivers/net/ethernet/mellanox/mlx4/qp.c        |   4 +-
 include/linux/mlx4/device.h                    |  17 +++-
 7 files changed, 191 insertions(+), 19 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 568e1f41fdd4..6ff214de1111 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2594,7 +2594,8 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port,
 			NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
 
 	if (mdev->dev->caps.steering_mode ==
-	    MLX4_STEERING_MODE_DEVICE_MANAGED)
+	    MLX4_STEERING_MODE_DEVICE_MANAGED &&
+	    mdev->dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC)
 		dev->hw_features |= NETIF_F_NTUPLE;
 
 	if (mdev->dev->caps.steering_mode != MLX4_STEERING_MODE_A0)
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.c b/drivers/net/ethernet/mellanox/mlx4/fw.c
index 073b3d1c8b91..ef3b95bac2ad 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.c
@@ -144,7 +144,8 @@ static void dump_dev_cap_flags2(struct mlx4_dev *dev, u64 flags)
 		[15] = "Ethernet Backplane autoneg support",
 		[16] = "CONFIG DEV support",
 		[17] = "Asymmetric EQs support",
-		[18] = "More than 80 VFs support"
+		[18] = "More than 80 VFs support",
+		[19] = "Performance optimized for limited rule configuration flow steering support"
 	};
 	int i;
 
@@ -680,6 +681,8 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 #define QUERY_DEV_CAP_FW_REASSIGN_MAC		0x9d
 #define QUERY_DEV_CAP_VXLAN			0x9e
 #define QUERY_DEV_CAP_MAD_DEMUX_OFFSET		0xb0
+#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET	0xa8
+#define QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET	0xac
 
 	dev_cap->flags2 = 0;
 	mailbox = mlx4_alloc_cmd_mailbox(dev);
@@ -876,6 +879,13 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	if (field32 & (1 << 0))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_MAD_DEMUX;
 
+	MLX4_GET(dev_cap->dmfs_high_rate_qpn_base, outbox,
+		 QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_BASE_OFFSET);
+	dev_cap->dmfs_high_rate_qpn_base &= MGM_QPN_MASK;
+	MLX4_GET(dev_cap->dmfs_high_rate_qpn_range, outbox,
+		 QUERY_DEV_CAP_DMFS_HIGH_RATE_QPN_RANGE_OFFSET);
+	dev_cap->dmfs_high_rate_qpn_range &= MGM_QPN_MASK;
+
 	MLX4_GET(field32, outbox, QUERY_DEV_CAP_EXT_2_FLAGS_OFFSET);
 	if (field32 & (1 << 16))
 		dev_cap->flags2 |= MLX4_DEV_CAP_FLAG2_UPDATE_QP;
@@ -935,6 +945,10 @@ int mlx4_QUERY_DEV_CAP(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 	mlx4_dbg(dev, "Max GSO size: %d\n", dev_cap->max_gso_sz);
 	mlx4_dbg(dev, "Max counters: %d\n", dev_cap->max_counters);
 	mlx4_dbg(dev, "Max RSS Table size: %d\n", dev_cap->max_rss_tbl_sz);
+	mlx4_dbg(dev, "DMFS high rate steer QPn base: %d\n",
+		 dev_cap->dmfs_high_rate_qpn_base);
+	mlx4_dbg(dev, "DMFS high rate steer QPn range: %d\n",
+		 dev_cap->dmfs_high_rate_qpn_range);
 
 	dump_dev_cap_flags(dev, dev_cap->flags);
 	dump_dev_cap_flags2(dev, dev_cap->flags2);
@@ -996,6 +1010,7 @@ int mlx4_QUERY_PORT(struct mlx4_dev *dev, int port, struct mlx4_port_cap *port_c
 		port_cap->supported_port_types = field & 3;
 		port_cap->suggested_type = (field >> 3) & 1;
 		port_cap->default_sense = (field >> 4) & 1;
+		port_cap->dmfs_optimized_state = (field >> 5) & 1;
 		MLX4_GET(field, outbox, QUERY_PORT_MTU_OFFSET);
 		port_cap->ib_mtu	   = field & 0xf;
 		MLX4_GET(field, outbox, QUERY_PORT_WIDTH_OFFSET);
@@ -1530,6 +1545,12 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 	struct mlx4_cmd_mailbox *mailbox;
 	__be32 *inbox;
 	int err;
+	static const u8 a0_dmfs_hw_steering[] =  {
+		[MLX4_STEERING_DMFS_A0_DEFAULT]		= 0,
+		[MLX4_STEERING_DMFS_A0_DYNAMIC]		= 1,
+		[MLX4_STEERING_DMFS_A0_STATIC]		= 2,
+		[MLX4_STEERING_DMFS_A0_DISABLE]		= 3
+	};
 
 #define INIT_HCA_IN_SIZE		 0x200
 #define INIT_HCA_VERSION_OFFSET		 0x000
@@ -1563,6 +1584,7 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 #define  INIT_HCA_FS_PARAM_OFFSET         0x1d0
 #define  INIT_HCA_FS_BASE_OFFSET          (INIT_HCA_FS_PARAM_OFFSET + 0x00)
 #define  INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x12)
+#define  INIT_HCA_FS_A0_OFFSET		  (INIT_HCA_FS_PARAM_OFFSET + 0x18)
 #define  INIT_HCA_FS_LOG_TABLE_SZ_OFFSET  (INIT_HCA_FS_PARAM_OFFSET + 0x1b)
 #define  INIT_HCA_FS_ETH_BITS_OFFSET      (INIT_HCA_FS_PARAM_OFFSET + 0x21)
 #define  INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET (INIT_HCA_FS_PARAM_OFFSET + 0x22)
@@ -1673,8 +1695,11 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 		/* Enable Ethernet flow steering
 		 * with udp unicast and tcp unicast
 		 */
-		MLX4_PUT(inbox, (u8) (MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN),
-			 INIT_HCA_FS_ETH_BITS_OFFSET);
+		if (dev->caps.dmfs_high_steer_mode !=
+		    MLX4_STEERING_DMFS_A0_STATIC)
+			MLX4_PUT(inbox,
+				 (u8)(MLX4_FS_UDP_UC_EN | MLX4_FS_TCP_UC_EN),
+				 INIT_HCA_FS_ETH_BITS_OFFSET);
 		MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
 			 INIT_HCA_FS_ETH_NUM_ADDRS_OFFSET);
 		/* Enable IPoIB flow steering
@@ -1684,6 +1709,13 @@ int mlx4_INIT_HCA(struct mlx4_dev *dev, struct mlx4_init_hca_param *param)
 			 INIT_HCA_FS_IB_BITS_OFFSET);
 		MLX4_PUT(inbox, (u16) MLX4_FS_NUM_OF_L2_ADDR,
 			 INIT_HCA_FS_IB_NUM_ADDRS_OFFSET);
+
+		if (dev->caps.dmfs_high_steer_mode !=
+		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
+			MLX4_PUT(inbox,
+				 ((u8)(a0_dmfs_hw_steering[dev->caps.dmfs_high_steer_mode]
+				       << 6)),
+				 INIT_HCA_FS_A0_OFFSET);
 	} else {
 		MLX4_PUT(inbox, param->mc_base,	INIT_HCA_MC_BASE_OFFSET);
 		MLX4_PUT(inbox, param->log_mc_entry_sz,
@@ -1734,6 +1766,12 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 	u32 dword_field;
 	int err;
 	u8 byte_field;
+	static const u8 a0_dmfs_query_hw_steering[] =  {
+		[0] = MLX4_STEERING_DMFS_A0_DEFAULT,
+		[1] = MLX4_STEERING_DMFS_A0_DYNAMIC,
+		[2] = MLX4_STEERING_DMFS_A0_STATIC,
+		[3] = MLX4_STEERING_DMFS_A0_DISABLE
+	};
 
 #define QUERY_HCA_GLOBAL_CAPS_OFFSET	0x04
 #define QUERY_HCA_CORE_CLOCK_OFFSET	0x0c
@@ -1786,6 +1824,10 @@ int mlx4_QUERY_HCA(struct mlx4_dev *dev,
 			 INIT_HCA_FS_LOG_ENTRY_SZ_OFFSET);
 		MLX4_GET(param->log_mc_table_sz, outbox,
 			 INIT_HCA_FS_LOG_TABLE_SZ_OFFSET);
+		MLX4_GET(byte_field, outbox,
+			 INIT_HCA_FS_A0_OFFSET);
+		param->dmfs_high_steer_mode =
+			a0_dmfs_query_hw_steering[(byte_field >> 6) & 3];
 	} else {
 		MLX4_GET(param->mc_base, outbox, INIT_HCA_MC_BASE_OFFSET);
 		MLX4_GET(param->log_mc_entry_sz, outbox,
diff --git a/drivers/net/ethernet/mellanox/mlx4/fw.h b/drivers/net/ethernet/mellanox/mlx4/fw.h
index 744398b7ab5e..794e2826609a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/fw.h
+++ b/drivers/net/ethernet/mellanox/mlx4/fw.h
@@ -60,6 +60,7 @@ struct mlx4_port_cap {
 	int vendor_oui;
 	u16 wavelength;
 	u64 trans_code;
+	u8 dmfs_optimized_state;
 };
 
 struct mlx4_dev_cap {
@@ -124,6 +125,8 @@ struct mlx4_dev_cap {
 	int max_gso_sz;
 	int max_rss_tbl_sz;
 	u32 max_counters;
+	u32 dmfs_high_rate_qpn_base;
+	u32 dmfs_high_rate_qpn_range;
 	struct mlx4_port_cap port_cap[MLX4_MAX_PORTS + 1];
 };
 
@@ -194,6 +197,7 @@ struct mlx4_init_hca_param {
 	u8  mw_enabled;  /* Enable memory windows */
 	u8  uar_page_sz; /* log pg sz in 4k chunks */
 	u8  steering_mode; /* for QUERY_HCA */
+	u8  dmfs_high_steer_mode; /* for QUERY_HCA */
 	u64 dev_cap_enabled;
 	u16 cqe_size; /* For use only when CQE stride feature enabled */
 	u16 eqe_size; /* For use only when EQE stride feature enabled */
diff --git a/drivers/net/ethernet/mellanox/mlx4/main.c b/drivers/net/ethernet/mellanox/mlx4/main.c
index 6173b8072988..e25436b24ce7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/main.c
+++ b/drivers/net/ethernet/mellanox/mlx4/main.c
@@ -105,7 +105,8 @@ MODULE_PARM_DESC(enable_64b_cqe_eqe,
 		 "Enable 64 byte CQEs/EQEs when the FW supports this (default: True)");
 
 #define PF_CONTEXT_BEHAVIOUR_MASK	(MLX4_FUNC_CAP_64B_EQE_CQE | \
-					 MLX4_FUNC_CAP_EQE_CQE_STRIDE)
+					 MLX4_FUNC_CAP_EQE_CQE_STRIDE | \
+					 MLX4_FUNC_CAP_DMFS_A0_STATIC)
 
 static char mlx4_version[] =
 	DRV_NAME ": Mellanox ConnectX core driver v"
@@ -463,8 +464,28 @@ static int mlx4_dev_cap(struct mlx4_dev *dev, struct mlx4_dev_cap *dev_cap)
 		(1 << dev->caps.log_num_vlans) *
 		dev->caps.num_ports;
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FC_EXCH] = MLX4_NUM_FEXCH;
+
+	if (dev_cap->dmfs_high_rate_qpn_base > 0 &&
+	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN)
+		dev->caps.dmfs_high_rate_qpn_base = dev_cap->dmfs_high_rate_qpn_base;
+	else
+		dev->caps.dmfs_high_rate_qpn_base =
+			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+
+	if (dev_cap->dmfs_high_rate_qpn_range > 0 &&
+	    dev->caps.flags2 & MLX4_DEV_CAP_FLAG2_FS_EN) {
+		dev->caps.dmfs_high_rate_qpn_range = dev_cap->dmfs_high_rate_qpn_range;
+		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DEFAULT;
+		dev->caps.flags2 |= MLX4_DEV_CAP_FLAG2_FS_A0;
+	} else {
+		dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_NOT_SUPPORTED;
+		dev->caps.dmfs_high_rate_qpn_base =
+			dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+		dev->caps.dmfs_high_rate_qpn_range = MLX4_A0_STEERING_TABLE_SIZE;
+	}
+
 	dev->caps.reserved_qps_cnt[MLX4_QP_REGION_RSS_RAW_ETH] =
-		MLX4_A0_STEERING_TABLE_SIZE;
+		dev->caps.dmfs_high_rate_qpn_range;
 
 	dev->caps.reserved_qps = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
 		dev->caps.reserved_qps_cnt[MLX4_QP_REGION_ETH_ADDR] +
@@ -753,7 +774,8 @@ static int mlx4_slave_cap(struct mlx4_dev *dev)
 
 	if ((func_cap.pf_context_behaviour | PF_CONTEXT_BEHAVIOUR_MASK) !=
 	    PF_CONTEXT_BEHAVIOUR_MASK) {
-		mlx4_err(dev, "Unknown pf context behaviour\n");
+		mlx4_err(dev, "Unknown pf context behaviour %x known flags %x\n",
+			 func_cap.pf_context_behaviour, PF_CONTEXT_BEHAVIOUR_MASK);
 		return -ENOSYS;
 	}
 
@@ -1640,10 +1662,46 @@ static int choose_log_fs_mgm_entry_size(int qp_per_entry)
 	return (i <= MLX4_MAX_MGM_LOG_ENTRY_SIZE) ? i : -1;
 }
 
+static const char *dmfs_high_rate_steering_mode_str(int dmfs_high_steer_mode)
+{
+	switch (dmfs_high_steer_mode) {
+	case MLX4_STEERING_DMFS_A0_DEFAULT:
+		return "default performance";
+
+	case MLX4_STEERING_DMFS_A0_DYNAMIC:
+		return "dynamic hybrid mode";
+
+	case MLX4_STEERING_DMFS_A0_STATIC:
+		return "performance optimized for limited rule configuration (static)";
+
+	case MLX4_STEERING_DMFS_A0_DISABLE:
+		return "disabled performance optimized steering";
+
+	case MLX4_STEERING_DMFS_A0_NOT_SUPPORTED:
+		return "performance optimized steering not supported";
+
+	default:
+		return "Unrecognized mode";
+	}
+}
+
+#define MLX4_DMFS_A0_STEERING			(1UL << 2)
+
 static void choose_steering_mode(struct mlx4_dev *dev,
 				 struct mlx4_dev_cap *dev_cap)
 {
-	if (mlx4_log_num_mgm_entry_size == -1 &&
+	if (mlx4_log_num_mgm_entry_size <= 0) {
+		if ((-mlx4_log_num_mgm_entry_size) & MLX4_DMFS_A0_STEERING) {
+			if (dev->caps.dmfs_high_steer_mode ==
+			    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
+				mlx4_err(dev, "DMFS high rate mode not supported\n");
+			else
+				dev->caps.dmfs_high_steer_mode =
+					MLX4_STEERING_DMFS_A0_STATIC;
+		}
+	}
+
+	if (mlx4_log_num_mgm_entry_size <= 0 &&
 	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_FS_EN &&
 	    (!mlx4_is_mfunc(dev) ||
 	     (dev_cap->fs_max_num_qp_per_entry >= (dev->num_vfs + 1))) &&
@@ -1656,6 +1714,9 @@ static void choose_steering_mode(struct mlx4_dev *dev,
 		dev->caps.fs_log_max_ucast_qp_range_size =
 			dev_cap->fs_log_max_ucast_qp_range_size;
 	} else {
+		if (dev->caps.dmfs_high_steer_mode !=
+		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
+			dev->caps.dmfs_high_steer_mode = MLX4_STEERING_DMFS_A0_DISABLE;
 		if (dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_UC_STEER &&
 		    dev->caps.flags & MLX4_DEV_CAP_FLAG_VEP_MC_STEER)
 			dev->caps.steering_mode = MLX4_STEERING_MODE_B0;
@@ -1682,7 +1743,8 @@ static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
 				       struct mlx4_dev_cap *dev_cap)
 {
 	if (dev->caps.steering_mode == MLX4_STEERING_MODE_DEVICE_MANAGED &&
-	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS)
+	    dev_cap->flags2 & MLX4_DEV_CAP_FLAG2_VXLAN_OFFLOADS &&
+	    dev->caps.dmfs_high_steer_mode != MLX4_STEERING_DMFS_A0_STATIC)
 		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_VXLAN;
 	else
 		dev->caps.tunnel_offload_mode = MLX4_TUNNEL_OFFLOAD_MODE_NONE;
@@ -1691,6 +1753,35 @@ static void choose_tunnel_offload_mode(struct mlx4_dev *dev,
 		 == MLX4_TUNNEL_OFFLOAD_MODE_VXLAN) ? "vxlan" : "none");
 }
 
+static int mlx4_validate_optimized_steering(struct mlx4_dev *dev)
+{
+	int i;
+	struct mlx4_port_cap port_cap;
+
+	if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_NOT_SUPPORTED)
+		return -EINVAL;
+
+	for (i = 1; i <= dev->caps.num_ports; i++) {
+		if (mlx4_dev_port(dev, i, &port_cap)) {
+			mlx4_err(dev,
+				 "QUERY_DEV_CAP command failed, can't veify DMFS high rate steering.\n");
+		} else if ((dev->caps.dmfs_high_steer_mode !=
+			    MLX4_STEERING_DMFS_A0_DEFAULT) &&
+			   (port_cap.dmfs_optimized_state ==
+			    !!(dev->caps.dmfs_high_steer_mode ==
+			    MLX4_STEERING_DMFS_A0_DISABLE))) {
+			mlx4_err(dev,
+				 "DMFS high rate steer mode differ, driver requested %s but %s in FW.\n",
+				 dmfs_high_rate_steering_mode_str(
+					dev->caps.dmfs_high_steer_mode),
+				 (port_cap.dmfs_optimized_state ?
+					"enabled" : "disabled"));
+		}
+	}
+
+	return 0;
+}
+
 static int mlx4_init_fw(struct mlx4_dev *dev)
 {
 	struct mlx4_mod_stat_cfg   mlx4_cfg;
@@ -1743,6 +1834,10 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 		choose_steering_mode(dev, &dev_cap);
 		choose_tunnel_offload_mode(dev, &dev_cap);
 
+		if (dev->caps.dmfs_high_steer_mode == MLX4_STEERING_DMFS_A0_STATIC &&
+		    mlx4_is_master(dev))
+			dev->caps.function_caps |= MLX4_FUNC_CAP_DMFS_A0_STATIC;
+
 		err = mlx4_get_phys_port_id(dev);
 		if (err)
 			mlx4_err(dev, "Fail to get physical port id\n");
@@ -1829,6 +1924,24 @@ static int mlx4_init_hca(struct mlx4_dev *dev)
 				mlx4_err(dev, "Failed to map internal clock. Timestamping is not supported\n");
 			}
 		}
+
+		if (dev->caps.dmfs_high_steer_mode !=
+		    MLX4_STEERING_DMFS_A0_NOT_SUPPORTED) {
+			if (mlx4_validate_optimized_steering(dev))
+				mlx4_warn(dev, "Optimized steering validation failed\n");
+
+			if (dev->caps.dmfs_high_steer_mode ==
+			    MLX4_STEERING_DMFS_A0_DISABLE) {
+				dev->caps.dmfs_high_rate_qpn_base =
+					dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW];
+				dev->caps.dmfs_high_rate_qpn_range =
+					MLX4_A0_STEERING_TABLE_SIZE;
+			}
+
+			mlx4_dbg(dev, "DMFS high rate steer mode is: %s\n",
+				 dmfs_high_rate_steering_mode_str(
+					dev->caps.dmfs_high_steer_mode));
+		}
 	} else {
 		err = mlx4_init_slave(dev);
 		if (err) {
@@ -3201,10 +3314,11 @@ static int __init mlx4_verify_params(void)
 		port_type_array[0] = true;
 	}
 
-	if (mlx4_log_num_mgm_entry_size != -1 &&
-	    (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
-	     mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE)) {
-		pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-1 or %d..%d)\n",
+	if (mlx4_log_num_mgm_entry_size < -7 ||
+	    (mlx4_log_num_mgm_entry_size > 0 &&
+	     (mlx4_log_num_mgm_entry_size < MLX4_MIN_MGM_LOG_ENTRY_SIZE ||
+	      mlx4_log_num_mgm_entry_size > MLX4_MAX_MGM_LOG_ENTRY_SIZE))) {
+		pr_warn("mlx4_core: mlx4_log_num_mgm_entry_size (%d) not in legal range (-7..0 or %d..%d)\n",
 			mlx4_log_num_mgm_entry_size,
 			MLX4_MIN_MGM_LOG_ENTRY_SIZE,
 			MLX4_MAX_MGM_LOG_ENTRY_SIZE);
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4.h b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
index cebd1180702b..bdd4eea2247c 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4.h
@@ -689,8 +689,6 @@ enum mlx4_qp_table_zones {
 	MLX4_QP_TABLE_ZONE_NUM
 };
 
-#define MLX4_A0_STEERING_TABLE_SIZE    256
-
 struct mlx4_qp_table {
 	struct mlx4_bitmap	*bitmap_gen;
 	struct mlx4_zone_allocator *zones;
diff --git a/drivers/net/ethernet/mellanox/mlx4/qp.c b/drivers/net/ethernet/mellanox/mlx4/qp.c
index d8d040c366f4..1586ecce13c7 100644
--- a/drivers/net/ethernet/mellanox/mlx4/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx4/qp.c
@@ -712,8 +712,8 @@ int mlx4_init_qp_table(struct mlx4_dev *dev)
 	int k;
 	int fixed_reserved_from_bot_rv = 0;
 	int bottom_reserved_for_rss_bitmap;
-	u32 max_table_offset = dev->caps.reserved_qps_cnt[MLX4_QP_REGION_FW] +
-		MLX4_A0_STEERING_TABLE_SIZE;
+	u32 max_table_offset = dev->caps.dmfs_high_rate_qpn_base +
+			dev->caps.dmfs_high_rate_qpn_range;
 
 	spin_lock_init(&qp_table->lock);
 	INIT_RADIX_TREE(&dev->qp_table_tree, GFP_ATOMIC);
diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h
index 39890cddc5fa..25c791e295fd 100644
--- a/include/linux/mlx4/device.h
+++ b/include/linux/mlx4/device.h
@@ -117,6 +117,14 @@ enum {
 	MLX4_STEERING_MODE_DEVICE_MANAGED
 };
 
+enum {
+	MLX4_STEERING_DMFS_A0_DEFAULT,
+	MLX4_STEERING_DMFS_A0_DYNAMIC,
+	MLX4_STEERING_DMFS_A0_STATIC,
+	MLX4_STEERING_DMFS_A0_DISABLE,
+	MLX4_STEERING_DMFS_A0_NOT_SUPPORTED
+};
+
 static inline const char *mlx4_steering_mode_str(int steering_mode)
 {
 	switch (steering_mode) {
@@ -191,7 +199,8 @@ enum {
 	MLX4_DEV_CAP_FLAG2_ETH_BACKPL_AN_REP	= 1LL <<  15,
 	MLX4_DEV_CAP_FLAG2_CONFIG_DEV		= 1LL <<  16,
 	MLX4_DEV_CAP_FLAG2_SYS_EQS		= 1LL <<  17,
-	MLX4_DEV_CAP_FLAG2_80_VFS		= 1LL <<  18
+	MLX4_DEV_CAP_FLAG2_80_VFS		= 1LL <<  18,
+	MLX4_DEV_CAP_FLAG2_FS_A0		= 1LL <<  19
 };
 
 enum {
@@ -225,7 +234,8 @@ enum {
 
 enum {
 	MLX4_FUNC_CAP_64B_EQE_CQE	= 1L << 0,
-	MLX4_FUNC_CAP_EQE_CQE_STRIDE	= 1L << 1
+	MLX4_FUNC_CAP_EQE_CQE_STRIDE	= 1L << 1,
+	MLX4_FUNC_CAP_DMFS_A0_STATIC	= 1L << 2
 };
 
 
@@ -482,6 +492,7 @@ struct mlx4_caps {
 	int			reserved_mcgs;
 	int			num_qp_per_mgm;
 	int			steering_mode;
+	int			dmfs_high_steer_mode;
 	int			fs_log_max_ucast_qp_range_size;
 	int			num_pds;
 	int			reserved_pds;
@@ -522,6 +533,8 @@ struct mlx4_caps {
 	int			tunnel_offload_mode;
 	u8			rx_checksum_flags_port[MLX4_MAX_PORTS + 1];
 	u8			alloc_res_qp_mask;
+	u32			dmfs_high_rate_qpn_base;
+	u32			dmfs_high_rate_qpn_range;
 };
 
 struct mlx4_buf_list {
-- 
cgit v1.2.3


From 7e50387bceda4d5542e4ba87097f69071b425fe5 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Thu, 9 Oct 2014 07:20:32 -0500
Subject: ipmi: Move the address source to string to ipmi-generic code

It was in the system interface driver, but is generic functionality.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 12 ++++++++++++
 drivers/char/ipmi/ipmi_si_intf.c    | 10 +++-------
 include/linux/ipmi.h                |  1 +
 3 files changed, 16 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index f816211f062f..e5d7c0b6fe3d 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -474,6 +474,18 @@ static DEFINE_MUTEX(smi_watchers_mutex);
 #define ipmi_get_stat(intf, stat) \
 	((unsigned int) atomic_read(&(intf)->stats[IPMI_STAT_ ## stat]))
 
+static char *addr_src_to_str[] = { "invalid", "hotmod", "hardcoded", "SPMI",
+				   "ACPI", "SMBIOS", "PCI",
+				   "device-tree", "default" };
+
+const char *ipmi_addr_src_to_str(enum ipmi_addr_src src)
+{
+	if (src > SI_DEFAULT)
+		src = 0; /* Invalid */
+	return addr_src_to_str[src];
+}
+EXPORT_SYMBOL(ipmi_addr_src_to_str);
+
 static int is_lan_addr(struct ipmi_addr *addr)
 {
 	return addr->addr_type == IPMI_LAN_ADDR_TYPE;
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 87471198ee4c..337182b5c51a 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -111,10 +111,6 @@ enum si_type {
 };
 static char *si_to_str[] = { "kcs", "smic", "bt" };
 
-static char *ipmi_addr_src_to_str[] = { NULL, "hotmod", "hardcoded", "SPMI",
-					"ACPI", "SMBIOS", "PCI",
-					"device-tree", "default" };
-
 #define DEVICE_NAME "ipmi_si"
 
 static struct platform_driver ipmi_driver;
@@ -3279,8 +3275,8 @@ static int add_smi(struct smi_info *new_smi)
 	int rv = 0;
 
 	printk(KERN_INFO PFX "Adding %s-specified %s state machine",
-			ipmi_addr_src_to_str[new_smi->addr_source],
-			si_to_str[new_smi->si_type]);
+	       ipmi_addr_src_to_str(new_smi->addr_source),
+	       si_to_str[new_smi->si_type]);
 	mutex_lock(&smi_infos_lock);
 	if (!is_new_interface(new_smi)) {
 		printk(KERN_CONT " duplicate interface\n");
@@ -3310,7 +3306,7 @@ static int try_smi_init(struct smi_info *new_smi)
 	printk(KERN_INFO PFX "Trying %s-specified %s state"
 	       " machine at %s address 0x%lx, slave address 0x%x,"
 	       " irq %d\n",
-	       ipmi_addr_src_to_str[new_smi->addr_source],
+	       ipmi_addr_src_to_str(new_smi->addr_source),
 	       si_to_str[new_smi->si_type],
 	       addr_space_to_str[new_smi->io.addr_type],
 	       new_smi->io.addr_data,
diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 76d2acbfa7c6..47b8f8ddb2b1 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -278,6 +278,7 @@ enum ipmi_addr_src {
 	SI_INVALID = 0, SI_HOTMOD, SI_HARDCODED, SI_SPMI, SI_ACPI, SI_SMBIOS,
 	SI_PCI,	SI_DEVICETREE, SI_DEFAULT
 };
+const char *ipmi_addr_src_to_str(enum ipmi_addr_src src);
 
 union ipmi_smi_info_union {
 	/*
-- 
cgit v1.2.3


From 5a0e10ec4a82ec9e1ab9b85a0f2c2893f7ffda25 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 10 Oct 2014 22:11:05 -0500
Subject: ipmi: Remove useless sysfs_name parameters

It was always "bmc", so just hardcode it.  It makes no sense to
pass that in.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 34 +++++-----------------------------
 drivers/char/ipmi/ipmi_si_intf.c    |  1 -
 include/linux/ipmi_smi.h            |  1 -
 3 files changed, 5 insertions(+), 31 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index 4539afa5b7f2..5b08b92c6441 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -342,7 +342,6 @@ struct ipmi_smi {
 
 	struct bmc_device *bmc;
 	char *my_dev_name;
-	char *sysfs_name;
 
 	/*
 	 * This is the lower-layer's sender routine.  Note that you
@@ -2373,11 +2372,7 @@ static void ipmi_bmc_unregister(ipmi_smi_t intf)
 {
 	struct bmc_device *bmc = intf->bmc;
 
-	if (intf->sysfs_name) {
-		sysfs_remove_link(&intf->si_dev->kobj, intf->sysfs_name);
-		kfree(intf->sysfs_name);
-		intf->sysfs_name = NULL;
-	}
+	sysfs_remove_link(&intf->si_dev->kobj, "bmc");
 	if (intf->my_dev_name) {
 		sysfs_remove_link(&bmc->pdev.dev.kobj, intf->my_dev_name);
 		kfree(intf->my_dev_name);
@@ -2417,8 +2412,7 @@ out:
 	return err;
 }
 
-static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
-			     const char *sysfs_name)
+static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum)
 {
 	int               rv;
 	struct bmc_device *bmc = intf->bmc;
@@ -2489,6 +2483,7 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
 		bmc->pdev.id = bmc->id.device_id;
 		bmc->pdev.dev.release = release_bmc_device;
 		bmc->pdev.dev.type = &bmc_device_type;
+		kref_init(&bmc->usecount);
 
 		rv = platform_device_register(&bmc->pdev);
 		mutex_unlock(&ipmidriver_mutex);
@@ -2505,8 +2500,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
 			return rv;
 		}
 
-		kref_init(&bmc->usecount);
-
 		rv = create_bmc_files(bmc);
 		if (rv) {
 			mutex_lock(&ipmidriver_mutex);
@@ -2527,20 +2520,8 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
 	 * create symlink from system interface device to bmc device
 	 * and back.
 	 */
-	intf->sysfs_name = kstrdup(sysfs_name, GFP_KERNEL);
-	if (!intf->sysfs_name) {
-		rv = -ENOMEM;
-		printk(KERN_ERR
-		       "ipmi_msghandler: allocate link to BMC: %d\n",
-		       rv);
-		goto out_err;
-	}
-
-	rv = sysfs_create_link(&intf->si_dev->kobj,
-			       &bmc->pdev.dev.kobj, intf->sysfs_name);
+	rv = sysfs_create_link(&intf->si_dev->kobj, &bmc->pdev.dev.kobj, "bmc");
 	if (rv) {
-		kfree(intf->sysfs_name);
-		intf->sysfs_name = NULL;
 		printk(KERN_ERR
 		       "ipmi_msghandler: Unable to create bmc symlink: %d\n",
 		       rv);
@@ -2549,8 +2530,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
 
 	intf->my_dev_name = kasprintf(GFP_KERNEL, "ipmi%d", ifnum);
 	if (!intf->my_dev_name) {
-		kfree(intf->sysfs_name);
-		intf->sysfs_name = NULL;
 		rv = -ENOMEM;
 		printk(KERN_ERR
 		       "ipmi_msghandler: allocate link from BMC: %d\n",
@@ -2561,8 +2540,6 @@ static int ipmi_bmc_register(ipmi_smi_t intf, int ifnum,
 	rv = sysfs_create_link(&bmc->pdev.dev.kobj, &intf->si_dev->kobj,
 			       intf->my_dev_name);
 	if (rv) {
-		kfree(intf->sysfs_name);
-		intf->sysfs_name = NULL;
 		kfree(intf->my_dev_name);
 		intf->my_dev_name = NULL;
 		printk(KERN_ERR
@@ -2761,7 +2738,6 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 		      void		       *send_info,
 		      struct ipmi_device_id    *device_id,
 		      struct device            *si_dev,
-		      const char               *sysfs_name,
 		      unsigned char            slave_addr)
 {
 	int              i, j;
@@ -2895,7 +2871,7 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 	if (rv == 0)
 		rv = add_proc_entries(intf, i);
 
-	rv = ipmi_bmc_register(intf, i, sysfs_name);
+	rv = ipmi_bmc_register(intf, i);
 
  out:
 	if (rv) {
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index 337182b5c51a..f474ad8a7b8e 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -3429,7 +3429,6 @@ static int try_smi_init(struct smi_info *new_smi)
 			       new_smi,
 			       &new_smi->device_id,
 			       new_smi->dev,
-			       "bmc",
 			       new_smi->slave_addr);
 	if (rv) {
 		dev_err(new_smi->dev, "Unable to register device: error %d\n",
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index bd349240d50e..6131845016d9 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -212,7 +212,6 @@ int ipmi_register_smi(struct ipmi_smi_handlers *handlers,
 		      void                     *send_info,
 		      struct ipmi_device_id    *device_id,
 		      struct device            *dev,
-		      const char               *sysfs_name,
 		      unsigned char            slave_addr);
 
 /*
-- 
cgit v1.2.3


From a11213fc36d29d42ab00d400796e98fc770c93b9 Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 10 Oct 2014 17:47:04 -0500
Subject: ipmi: Use the proper type for acpi_handle

Minor cleanup, don't use a void pointer, use the right type.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 include/linux/ipmi.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h
index 47b8f8ddb2b1..838dbfa3c331 100644
--- a/include/linux/ipmi.h
+++ b/include/linux/ipmi.h
@@ -37,6 +37,7 @@
 
 #include <linux/list.h>
 #include <linux/proc_fs.h>
+#include <linux/acpi.h> /* For acpi_handle */
 
 struct module;
 struct device;
@@ -281,13 +282,15 @@ enum ipmi_addr_src {
 const char *ipmi_addr_src_to_str(enum ipmi_addr_src src);
 
 union ipmi_smi_info_union {
+#ifdef CONFIG_ACPI
 	/*
 	 * the acpi_info element is defined for the SI_ACPI
 	 * address type
 	 */
 	struct {
-		void *acpi_handle;
+		acpi_handle acpi_handle;
 	} acpi_info;
+#endif
 };
 
 struct ipmi_smi_info {
-- 
cgit v1.2.3


From 99ab32f3b5d705be562b8c4d9dca7c1ae3dc2cdf Mon Sep 17 00:00:00 2001
From: Corey Minyard <cminyard@mvista.com>
Date: Fri, 7 Nov 2014 07:57:31 -0600
Subject: ipmi: Remove the now unused priority from SMI sender

Since the queue was moved into the message handler, the priority
field is now irrelevant.

Signed-off-by: Corey Minyard <cminyard@mvista.com>
---
 drivers/char/ipmi/ipmi_msghandler.c | 4 ++--
 drivers/char/ipmi/ipmi_si_intf.c    | 3 +--
 include/linux/ipmi_smi.h            | 9 ++++-----
 3 files changed, 7 insertions(+), 9 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/char/ipmi/ipmi_msghandler.c b/drivers/char/ipmi/ipmi_msghandler.c
index b705218fbbfa..5fa83f751378 100644
--- a/drivers/char/ipmi/ipmi_msghandler.c
+++ b/drivers/char/ipmi/ipmi_msghandler.c
@@ -1516,7 +1516,7 @@ static void smi_send(ipmi_smi_t intf, struct ipmi_smi_handlers *handlers,
 		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
 
 	if (smi_msg)
-		handlers->sender(intf->send_info, smi_msg, 0);
+		handlers->sender(intf->send_info, smi_msg);
 }
 
 /*
@@ -3908,7 +3908,7 @@ static void smi_recv_tasklet(unsigned long val)
 	if (!run_to_completion)
 		spin_unlock_irqrestore(&intf->xmit_msgs_lock, flags);
 	if (newmsg)
-		intf->handlers->sender(intf->send_info, newmsg, 0);
+		intf->handlers->sender(intf->send_info, newmsg);
 
 	handle_new_recv_msgs(intf);
 }
diff --git a/drivers/char/ipmi/ipmi_si_intf.c b/drivers/char/ipmi/ipmi_si_intf.c
index d31a7fce2260..4f11301a0c42 100644
--- a/drivers/char/ipmi/ipmi_si_intf.c
+++ b/drivers/char/ipmi/ipmi_si_intf.c
@@ -892,8 +892,7 @@ static void check_start_timer_thread(struct smi_info *smi_info)
 }
 
 static void sender(void                *send_info,
-		   struct ipmi_smi_msg *msg,
-		   int                 priority)
+		   struct ipmi_smi_msg *msg)
 {
 	struct smi_info   *smi_info = send_info;
 	enum si_sm_result result;
diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h
index 6131845016d9..0b1e569f5ff5 100644
--- a/include/linux/ipmi_smi.h
+++ b/include/linux/ipmi_smi.h
@@ -98,12 +98,11 @@ struct ipmi_smi_handlers {
 	   operation is not allowed to fail.  If an error occurs, it
 	   should report back the error in a received message.  It may
 	   do this in the current call context, since no write locks
-	   are held when this is run.  If the priority is > 0, the
-	   message will go into a high-priority queue and be sent
-	   first.  Otherwise, it goes into a normal-priority queue. */
+	   are held when this is run.  Message are delivered one at
+	   a time by the message handler, a new message will not be
+	   delivered until the previous message is returned. */
 	void (*sender)(void                *send_info,
-		       struct ipmi_smi_msg *msg,
-		       int                 priority);
+		       struct ipmi_smi_msg *msg);
 
 	/* Called by the upper layer to request that we try to get
 	   events from the BMC we are attached to. */
-- 
cgit v1.2.3


From 4b1acc43331d6c716c331a61477660dc20c8b59c Mon Sep 17 00:00:00 2001
From: Wolfram Sang <wsa+renesas@sang-engineering.com>
Date: Tue, 18 Nov 2014 17:04:53 +0100
Subject: i2c: core changes for slave support

Finally(!), make Linux support being an I2C slave. Most of the existing
infrastructure is reused. We mainly add i2c_slave_register/unregister()
calls which tells i2c bus drivers to activate the slave mode. Then, they
also get a callback to report slave events to.

Signed-off-by: Wolfram Sang <wsa+renesas@sang-engineering.com>
Signed-off-by: Wolfram Sang <wsa@the-dreams.de>
---
 drivers/i2c/i2c-core.c | 49 +++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/i2c.h    | 29 +++++++++++++++++++++++++++++
 2 files changed, 78 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/i2c/i2c-core.c b/drivers/i2c/i2c-core.c
index 3105bd273f70..c09d06bf4d9b 100644
--- a/drivers/i2c/i2c-core.c
+++ b/drivers/i2c/i2c-core.c
@@ -24,6 +24,7 @@
    (c) 2013  Wolfram Sang <wsa@the-dreams.de>
    I2C ACPI code Copyright (C) 2014 Intel Corp
    Author: Lan Tianyu <tianyu.lan@intel.com>
+   I2C slave support (c) 2014 by Wolfram Sang <wsa@sang-engineering.com>
  */
 
 #include <linux/module.h>
@@ -2911,6 +2912,54 @@ trace:
 }
 EXPORT_SYMBOL(i2c_smbus_xfer);
 
+int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb)
+{
+	int ret;
+
+	if (!client || !slave_cb)
+		return -EINVAL;
+
+	if (!(client->flags & I2C_CLIENT_TEN)) {
+		/* Enforce stricter address checking */
+		ret = i2c_check_addr_validity(client->addr);
+		if (ret)
+			return ret;
+	}
+
+	if (!client->adapter->algo->reg_slave)
+		return -EOPNOTSUPP;
+
+	client->slave_cb = slave_cb;
+
+	i2c_lock_adapter(client->adapter);
+	ret = client->adapter->algo->reg_slave(client);
+	i2c_unlock_adapter(client->adapter);
+
+	if (ret)
+		client->slave_cb = NULL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_slave_register);
+
+int i2c_slave_unregister(struct i2c_client *client)
+{
+	int ret;
+
+	if (!client->adapter->algo->unreg_slave)
+		return -EOPNOTSUPP;
+
+	i2c_lock_adapter(client->adapter);
+	ret = client->adapter->algo->unreg_slave(client);
+	i2c_unlock_adapter(client->adapter);
+
+	if (ret == 0)
+		client->slave_cb = NULL;
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(i2c_slave_unregister);
+
 MODULE_AUTHOR("Simon G. Vogl <simon@tk.uni-linz.ac.at>");
 MODULE_DESCRIPTION("I2C-Bus main module");
 MODULE_LICENSE("GPL");
diff --git a/include/linux/i2c.h b/include/linux/i2c.h
index b556e0ab946f..a720d9921b47 100644
--- a/include/linux/i2c.h
+++ b/include/linux/i2c.h
@@ -46,6 +46,8 @@ struct i2c_client;
 struct i2c_driver;
 union i2c_smbus_data;
 struct i2c_board_info;
+enum i2c_slave_event;
+typedef int (*i2c_slave_cb_t)(struct i2c_client *, enum i2c_slave_event, u8 *);
 
 struct module;
 
@@ -209,6 +211,8 @@ struct i2c_driver {
  * @irq: indicates the IRQ generated by this device (if any)
  * @detected: member of an i2c_driver.clients list or i2c-core's
  *	userspace_devices list
+ * @slave_cb: Callback when I2C slave mode of an adapter is used. The adapter
+ *	calls it to pass on slave events to the slave driver.
  *
  * An i2c_client identifies a single device (i.e. chip) connected to an
  * i2c bus. The behaviour exposed to Linux is defined by the driver
@@ -224,6 +228,7 @@ struct i2c_client {
 	struct device dev;		/* the device structure		*/
 	int irq;			/* irq issued by device		*/
 	struct list_head detected;
+	i2c_slave_cb_t slave_cb;	/* callback for slave mode	*/
 };
 #define to_i2c_client(d) container_of(d, struct i2c_client, dev)
 
@@ -246,6 +251,25 @@ static inline void i2c_set_clientdata(struct i2c_client *dev, void *data)
 	dev_set_drvdata(&dev->dev, data);
 }
 
+/* I2C slave support */
+
+enum i2c_slave_event {
+	I2C_SLAVE_REQ_READ_START,
+	I2C_SLAVE_REQ_READ_END,
+	I2C_SLAVE_REQ_WRITE_START,
+	I2C_SLAVE_REQ_WRITE_END,
+	I2C_SLAVE_STOP,
+};
+
+extern int i2c_slave_register(struct i2c_client *client, i2c_slave_cb_t slave_cb);
+extern int i2c_slave_unregister(struct i2c_client *client);
+
+static inline int i2c_slave_event(struct i2c_client *client,
+				  enum i2c_slave_event event, u8 *val)
+{
+	return client->slave_cb(client, event, val);
+}
+
 /**
  * struct i2c_board_info - template for device creation
  * @type: chip type, to initialize i2c_client.name
@@ -352,6 +376,8 @@ i2c_register_board_info(int busnum, struct i2c_board_info const *info,
  *   into I2C transfers instead.
  * @functionality: Return the flags that this algorithm/adapter pair supports
  *   from the I2C_FUNC_* flags.
+ * @reg_slave: Register given client to I2C slave mode of this adapter
+ * @unreg_slave: Unregister given client from I2C slave mode of this adapter
  *
  * The following structs are for those who like to implement new bus drivers:
  * i2c_algorithm is the interface to a class of hardware solutions which can
@@ -377,6 +403,9 @@ struct i2c_algorithm {
 
 	/* To determine what the adapter supports */
 	u32 (*functionality) (struct i2c_adapter *);
+
+	int (*reg_slave)(struct i2c_client *client);
+	int (*unreg_slave)(struct i2c_client *client);
 };
 
 /**
-- 
cgit v1.2.3


From 027bc8b08242c59e19356b4b2c189f2d849ab660 Mon Sep 17 00:00:00 2001
From: Tony Lindgren <tony@atomide.com>
Date: Tue, 16 Sep 2014 13:50:01 -0700
Subject: pstore-ram: Allow optional mapping with pgprot_noncached

On some ARMs the memory can be mapped pgprot_noncached() and still
be working for atomic operations. As pointed out by Colin Cross
<ccross@android.com>, in some cases you do want to use
pgprot_noncached() if the SoC supports it to see a debug printk
just before a write hanging the system.

On ARMs, the atomic operations on strongly ordered memory are
implementation defined. So let's provide an optional kernel parameter
for configuring pgprot_noncached(), and use pgprot_writecombine() by
default.

Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Rob Herring <robherring2@gmail.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Anton Vorontsov <anton@enomsg.org>
Cc: Colin Cross <ccross@android.com>
Cc: Olof Johansson <olof@lixom.net>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: stable@vger.kernel.org
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Tony Luck <tony.luck@intel.com>
---
 Documentation/ramoops.txt  | 13 +++++++++++--
 fs/pstore/ram.c            | 13 +++++++++++--
 fs/pstore/ram_core.c       | 31 ++++++++++++++++++++++---------
 include/linux/pstore_ram.h |  4 +++-
 4 files changed, 47 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/ramoops.txt b/Documentation/ramoops.txt
index 69b3cac4749d..5d8675615e59 100644
--- a/Documentation/ramoops.txt
+++ b/Documentation/ramoops.txt
@@ -14,11 +14,19 @@ survive after a restart.
 
 1. Ramoops concepts
 
-Ramoops uses a predefined memory area to store the dump. The start and size of
-the memory area are set using two variables:
+Ramoops uses a predefined memory area to store the dump. The start and size
+and type of the memory area are set using three variables:
   * "mem_address" for the start
   * "mem_size" for the size. The memory size will be rounded down to a
   power of two.
+  * "mem_type" to specifiy if the memory type (default is pgprot_writecombine).
+
+Typically the default value of mem_type=0 should be used as that sets the pstore
+mapping to pgprot_writecombine. Setting mem_type=1 attempts to use
+pgprot_noncached, which only works on some platforms. This is because pstore
+depends on atomic operations. At least on ARM, pgprot_noncached causes the
+memory to be mapped strongly ordered, and atomic operations on strongly ordered
+memory are implementation defined, and won't work on many ARMs such as omaps.
 
 The memory area is divided into "record_size" chunks (also rounded down to
 power of two) and each oops/panic writes a "record_size" chunk of
@@ -55,6 +63,7 @@ Setting the ramoops parameters can be done in 2 different manners:
 static struct ramoops_platform_data ramoops_data = {
         .mem_size               = <...>,
         .mem_address            = <...>,
+        .mem_type               = <...>,
         .record_size            = <...>,
         .dump_oops              = <...>,
         .ecc                    = <...>,
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index ec881b312700..2f389ce5023c 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -61,6 +61,11 @@ module_param(mem_size, ulong, 0400);
 MODULE_PARM_DESC(mem_size,
 		"size of reserved RAM used to store oops/panic logs");
 
+static unsigned int mem_type;
+module_param(mem_type, uint, 0600);
+MODULE_PARM_DESC(mem_type,
+		"set to 1 to try to use unbuffered memory (default 0)");
+
 static int dump_oops = 1;
 module_param(dump_oops, int, 0600);
 MODULE_PARM_DESC(dump_oops,
@@ -79,6 +84,7 @@ struct ramoops_context {
 	struct persistent_ram_zone *fprz;
 	phys_addr_t phys_addr;
 	unsigned long size;
+	unsigned int memtype;
 	size_t record_size;
 	size_t console_size;
 	size_t ftrace_size;
@@ -366,7 +372,8 @@ static int ramoops_init_przs(struct device *dev, struct ramoops_context *cxt,
 		size_t sz = cxt->record_size;
 
 		cxt->przs[i] = persistent_ram_new(*paddr, sz, 0,
-						  &cxt->ecc_info);
+						  &cxt->ecc_info,
+						  cxt->memtype);
 		if (IS_ERR(cxt->przs[i])) {
 			err = PTR_ERR(cxt->przs[i]);
 			dev_err(dev, "failed to request mem region (0x%zx@0x%llx): %d\n",
@@ -396,7 +403,7 @@ static int ramoops_init_prz(struct device *dev, struct ramoops_context *cxt,
 		return -ENOMEM;
 	}
 
-	*prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info);
+	*prz = persistent_ram_new(*paddr, sz, sig, &cxt->ecc_info, cxt->memtype);
 	if (IS_ERR(*prz)) {
 		int err = PTR_ERR(*prz);
 
@@ -443,6 +450,7 @@ static int ramoops_probe(struct platform_device *pdev)
 
 	cxt->size = pdata->mem_size;
 	cxt->phys_addr = pdata->mem_address;
+	cxt->memtype = pdata->mem_type;
 	cxt->record_size = pdata->record_size;
 	cxt->console_size = pdata->console_size;
 	cxt->ftrace_size = pdata->ftrace_size;
@@ -572,6 +580,7 @@ static void ramoops_register_dummy(void)
 
 	dummy_data->mem_size = mem_size;
 	dummy_data->mem_address = mem_address;
+	dummy_data->mem_type = 0;
 	dummy_data->record_size = record_size;
 	dummy_data->console_size = ramoops_console_size;
 	dummy_data->ftrace_size = ramoops_ftrace_size;
diff --git a/fs/pstore/ram_core.c b/fs/pstore/ram_core.c
index 24f94b0f2270..76c3f80efdfa 100644
--- a/fs/pstore/ram_core.c
+++ b/fs/pstore/ram_core.c
@@ -380,7 +380,8 @@ void persistent_ram_zap(struct persistent_ram_zone *prz)
 	persistent_ram_update_header_ecc(prz);
 }
 
-static void *persistent_ram_vmap(phys_addr_t start, size_t size)
+static void *persistent_ram_vmap(phys_addr_t start, size_t size,
+		unsigned int memtype)
 {
 	struct page **pages;
 	phys_addr_t page_start;
@@ -392,7 +393,10 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
 	page_start = start - offset_in_page(start);
 	page_count = DIV_ROUND_UP(size + offset_in_page(start), PAGE_SIZE);
 
-	prot = pgprot_writecombine(PAGE_KERNEL);
+	if (memtype)
+		prot = pgprot_noncached(PAGE_KERNEL);
+	else
+		prot = pgprot_writecombine(PAGE_KERNEL);
 
 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
 	if (!pages) {
@@ -411,8 +415,11 @@ static void *persistent_ram_vmap(phys_addr_t start, size_t size)
 	return vaddr;
 }
 
-static void *persistent_ram_iomap(phys_addr_t start, size_t size)
+static void *persistent_ram_iomap(phys_addr_t start, size_t size,
+		unsigned int memtype)
 {
+	void *va;
+
 	if (!request_mem_region(start, size, "persistent_ram")) {
 		pr_err("request mem region (0x%llx@0x%llx) failed\n",
 			(unsigned long long)size, (unsigned long long)start);
@@ -422,19 +429,24 @@ static void *persistent_ram_iomap(phys_addr_t start, size_t size)
 	buffer_start_add = buffer_start_add_locked;
 	buffer_size_add = buffer_size_add_locked;
 
-	return ioremap_wc(start, size);
+	if (memtype)
+		va = ioremap(start, size);
+	else
+		va = ioremap_wc(start, size);
+
+	return va;
 }
 
 static int persistent_ram_buffer_map(phys_addr_t start, phys_addr_t size,
-		struct persistent_ram_zone *prz)
+		struct persistent_ram_zone *prz, int memtype)
 {
 	prz->paddr = start;
 	prz->size = size;
 
 	if (pfn_valid(start >> PAGE_SHIFT))
-		prz->vaddr = persistent_ram_vmap(start, size);
+		prz->vaddr = persistent_ram_vmap(start, size, memtype);
 	else
-		prz->vaddr = persistent_ram_iomap(start, size);
+		prz->vaddr = persistent_ram_iomap(start, size, memtype);
 
 	if (!prz->vaddr) {
 		pr_err("%s: Failed to map 0x%llx pages at 0x%llx\n", __func__,
@@ -500,7 +512,8 @@ void persistent_ram_free(struct persistent_ram_zone *prz)
 }
 
 struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
-			u32 sig, struct persistent_ram_ecc_info *ecc_info)
+			u32 sig, struct persistent_ram_ecc_info *ecc_info,
+			unsigned int memtype)
 {
 	struct persistent_ram_zone *prz;
 	int ret = -ENOMEM;
@@ -511,7 +524,7 @@ struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
 		goto err;
 	}
 
-	ret = persistent_ram_buffer_map(start, size, prz);
+	ret = persistent_ram_buffer_map(start, size, prz, memtype);
 	if (ret)
 		goto err;
 
diff --git a/include/linux/pstore_ram.h b/include/linux/pstore_ram.h
index 9974975d40db..4af3fdc85b01 100644
--- a/include/linux/pstore_ram.h
+++ b/include/linux/pstore_ram.h
@@ -53,7 +53,8 @@ struct persistent_ram_zone {
 };
 
 struct persistent_ram_zone *persistent_ram_new(phys_addr_t start, size_t size,
-			u32 sig, struct persistent_ram_ecc_info *ecc_info);
+			u32 sig, struct persistent_ram_ecc_info *ecc_info,
+			unsigned int memtype);
 void persistent_ram_free(struct persistent_ram_zone *prz);
 void persistent_ram_zap(struct persistent_ram_zone *prz);
 
@@ -76,6 +77,7 @@ ssize_t persistent_ram_ecc_string(struct persistent_ram_zone *prz,
 struct ramoops_platform_data {
 	unsigned long	mem_size;
 	unsigned long	mem_address;
+	unsigned int	mem_type;
 	unsigned long	record_size;
 	unsigned long	console_size;
 	unsigned long	ftrace_size;
-- 
cgit v1.2.3


From 9cc46516ddf497ea16e8d7cb986ae03a0f6b92f8 Mon Sep 17 00:00:00 2001
From: "Eric W. Biederman" <ebiederm@xmission.com>
Date: Tue, 2 Dec 2014 12:27:26 -0600
Subject: userns: Add a knob to disable setgroups on a per user namespace basis

- Expose the knob to user space through a proc file /proc/<pid>/setgroups

  A value of "deny" means the setgroups system call is disabled in the
  current processes user namespace and can not be enabled in the
  future in this user namespace.

  A value of "allow" means the segtoups system call is enabled.

- Descendant user namespaces inherit the value of setgroups from
  their parents.

- A proc file is used (instead of a sysctl) as sysctls currently do
  not allow checking the permissions at open time.

- Writing to the proc file is restricted to before the gid_map
  for the user namespace is set.

  This ensures that disabling setgroups at a user namespace
  level will never remove the ability to call setgroups
  from a process that already has that ability.

  A process may opt in to the setgroups disable for itself by
  creating, entering and configuring a user namespace or by calling
  setns on an existing user namespace with setgroups disabled.
  Processes without privileges already can not call setgroups so this
  is a noop.  Prodcess with privilege become processes without
  privilege when entering a user namespace and as with any other path
  to dropping privilege they would not have the ability to call
  setgroups.  So this remains within the bounds of what is possible
  without a knob to disable setgroups permanently in a user namespace.

Cc: stable@vger.kernel.org
Signed-off-by: "Eric W. Biederman" <ebiederm@xmission.com>
---
 fs/proc/base.c                 | 53 ++++++++++++++++++++++++++
 include/linux/user_namespace.h |  7 ++++
 kernel/user.c                  |  1 +
 kernel/user_namespace.c        | 85 ++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 146 insertions(+)

(limited to 'include/linux')

diff --git a/fs/proc/base.c b/fs/proc/base.c
index 772efa45a452..7dc3ea89ef1a 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2464,6 +2464,57 @@ static const struct file_operations proc_projid_map_operations = {
 	.llseek		= seq_lseek,
 	.release	= proc_id_map_release,
 };
+
+static int proc_setgroups_open(struct inode *inode, struct file *file)
+{
+	struct user_namespace *ns = NULL;
+	struct task_struct *task;
+	int ret;
+
+	ret = -ESRCH;
+	task = get_proc_task(inode);
+	if (task) {
+		rcu_read_lock();
+		ns = get_user_ns(task_cred_xxx(task, user_ns));
+		rcu_read_unlock();
+		put_task_struct(task);
+	}
+	if (!ns)
+		goto err;
+
+	if (file->f_mode & FMODE_WRITE) {
+		ret = -EACCES;
+		if (!ns_capable(ns, CAP_SYS_ADMIN))
+			goto err_put_ns;
+	}
+
+	ret = single_open(file, &proc_setgroups_show, ns);
+	if (ret)
+		goto err_put_ns;
+
+	return 0;
+err_put_ns:
+	put_user_ns(ns);
+err:
+	return ret;
+}
+
+static int proc_setgroups_release(struct inode *inode, struct file *file)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	int ret = single_release(inode, file);
+	put_user_ns(ns);
+	return ret;
+}
+
+static const struct file_operations proc_setgroups_operations = {
+	.open		= proc_setgroups_open,
+	.write		= proc_setgroups_write,
+	.read		= seq_read,
+	.llseek		= seq_lseek,
+	.release	= proc_setgroups_release,
+};
 #endif /* CONFIG_USER_NS */
 
 static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
@@ -2572,6 +2623,7 @@ static const struct pid_entry tgid_base_stuff[] = {
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	REG("timers",	  S_IRUGO, proc_timers_operations),
@@ -2913,6 +2965,7 @@ static const struct pid_entry tid_base_stuff[] = {
 	REG("uid_map",    S_IRUGO|S_IWUSR, proc_uid_map_operations),
 	REG("gid_map",    S_IRUGO|S_IWUSR, proc_gid_map_operations),
 	REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
+	REG("setgroups",  S_IRUGO|S_IWUSR, proc_setgroups_operations),
 #endif
 };
 
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 8d493083486a..9f3579ff543d 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -17,6 +17,10 @@ struct uid_gid_map {	/* 64 bytes -- 1 cache line */
 	} extent[UID_GID_MAP_MAX_EXTENTS];
 };
 
+#define USERNS_SETGROUPS_ALLOWED 1UL
+
+#define USERNS_INIT_FLAGS USERNS_SETGROUPS_ALLOWED
+
 struct user_namespace {
 	struct uid_gid_map	uid_map;
 	struct uid_gid_map	gid_map;
@@ -27,6 +31,7 @@ struct user_namespace {
 	kuid_t			owner;
 	kgid_t			group;
 	unsigned int		proc_inum;
+	unsigned long		flags;
 
 	/* Register of per-UID persistent keyrings for this namespace */
 #ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -63,6 +68,8 @@ extern const struct seq_operations proc_projid_seq_operations;
 extern ssize_t proc_uid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_gid_map_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t proc_projid_map_write(struct file *, const char __user *, size_t, loff_t *);
+extern ssize_t proc_setgroups_write(struct file *, const char __user *, size_t, loff_t *);
+extern int proc_setgroups_show(struct seq_file *m, void *v);
 extern bool userns_may_setgroups(const struct user_namespace *ns);
 #else
 
diff --git a/kernel/user.c b/kernel/user.c
index 4efa39350e44..2d09940c9632 100644
--- a/kernel/user.c
+++ b/kernel/user.c
@@ -51,6 +51,7 @@ struct user_namespace init_user_ns = {
 	.owner = GLOBAL_ROOT_UID,
 	.group = GLOBAL_ROOT_GID,
 	.proc_inum = PROC_USER_INIT_INO,
+	.flags = USERNS_INIT_FLAGS,
 #ifdef CONFIG_PERSISTENT_KEYRINGS
 	.persistent_keyring_register_sem =
 	__RWSEM_INITIALIZER(init_user_ns.persistent_keyring_register_sem),
diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c
index 44a555ac6104..6e80f4c1322b 100644
--- a/kernel/user_namespace.c
+++ b/kernel/user_namespace.c
@@ -100,6 +100,11 @@ int create_user_ns(struct cred *new)
 	ns->owner = owner;
 	ns->group = group;
 
+	/* Inherit USERNS_SETGROUPS_ALLOWED from our parent */
+	mutex_lock(&userns_state_mutex);
+	ns->flags = parent_ns->flags;
+	mutex_unlock(&userns_state_mutex);
+
 	set_cred_user_ns(new, ns);
 
 #ifdef CONFIG_PERSISTENT_KEYRINGS
@@ -839,6 +844,84 @@ static bool new_idmap_permitted(const struct file *file,
 	return false;
 }
 
+int proc_setgroups_show(struct seq_file *seq, void *v)
+{
+	struct user_namespace *ns = seq->private;
+	unsigned long userns_flags = ACCESS_ONCE(ns->flags);
+
+	seq_printf(seq, "%s\n",
+		   (userns_flags & USERNS_SETGROUPS_ALLOWED) ?
+		   "allow" : "deny");
+	return 0;
+}
+
+ssize_t proc_setgroups_write(struct file *file, const char __user *buf,
+			     size_t count, loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	struct user_namespace *ns = seq->private;
+	char kbuf[8], *pos;
+	bool setgroups_allowed;
+	ssize_t ret;
+
+	/* Only allow a very narrow range of strings to be written */
+	ret = -EINVAL;
+	if ((*ppos != 0) || (count >= sizeof(kbuf)))
+		goto out;
+
+	/* What was written? */
+	ret = -EFAULT;
+	if (copy_from_user(kbuf, buf, count))
+		goto out;
+	kbuf[count] = '\0';
+	pos = kbuf;
+
+	/* What is being requested? */
+	ret = -EINVAL;
+	if (strncmp(pos, "allow", 5) == 0) {
+		pos += 5;
+		setgroups_allowed = true;
+	}
+	else if (strncmp(pos, "deny", 4) == 0) {
+		pos += 4;
+		setgroups_allowed = false;
+	}
+	else
+		goto out;
+
+	/* Verify there is not trailing junk on the line */
+	pos = skip_spaces(pos);
+	if (*pos != '\0')
+		goto out;
+
+	ret = -EPERM;
+	mutex_lock(&userns_state_mutex);
+	if (setgroups_allowed) {
+		/* Enabling setgroups after setgroups has been disabled
+		 * is not allowed.
+		 */
+		if (!(ns->flags & USERNS_SETGROUPS_ALLOWED))
+			goto out_unlock;
+	} else {
+		/* Permanently disabling setgroups after setgroups has
+		 * been enabled by writing the gid_map is not allowed.
+		 */
+		if (ns->gid_map.nr_extents != 0)
+			goto out_unlock;
+		ns->flags &= ~USERNS_SETGROUPS_ALLOWED;
+	}
+	mutex_unlock(&userns_state_mutex);
+
+	/* Report a successful write */
+	*ppos = count;
+	ret = count;
+out:
+	return ret;
+out_unlock:
+	mutex_unlock(&userns_state_mutex);
+	goto out;
+}
+
 bool userns_may_setgroups(const struct user_namespace *ns)
 {
 	bool allowed;
@@ -848,6 +931,8 @@ bool userns_may_setgroups(const struct user_namespace *ns)
 	 * the user namespace has been established.
 	 */
 	allowed = ns->gid_map.nr_extents != 0;
+	/* Is setgroups allowed? */
+	allowed = allowed && (ns->flags & USERNS_SETGROUPS_ALLOWED);
 	mutex_unlock(&userns_state_mutex);
 
 	return allowed;
-- 
cgit v1.2.3


From 7142637dd93a7e1e9f5515e6c66c7d271fa92ad2 Mon Sep 17 00:00:00 2001
From: Quentin Lambert <lambert.quentin@gmail.com>
Date: Fri, 12 Dec 2014 13:36:54 +0100
Subject: linux/interrupt.h: remove the definition of unused tasklet_hi_enable

Signed-off-by: Quentin Lambert <lambert.quentin@gmail.com>
Signed-off-by: Valentin Rothberg <valentinrothberg@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/interrupt.h | 6 ------
 1 file changed, 6 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h
index 69517a24bc50..d9b05b5bf8c7 100644
--- a/include/linux/interrupt.h
+++ b/include/linux/interrupt.h
@@ -556,12 +556,6 @@ static inline void tasklet_enable(struct tasklet_struct *t)
 	atomic_dec(&t->count);
 }
 
-static inline void tasklet_hi_enable(struct tasklet_struct *t)
-{
-	smp_mb__before_atomic();
-	atomic_dec(&t->count);
-}
-
 extern void tasklet_kill(struct tasklet_struct *t);
 extern void tasklet_kill_immediate(struct tasklet_struct *t, unsigned int cpu);
 extern void tasklet_init(struct tasklet_struct *t,
-- 
cgit v1.2.3


From 40bd62c6194bdee1bc6652b3b0aa28e34883f603 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 11 Dec 2014 11:20:36 +0100
Subject: PM: Remove the SET_PM_RUNTIME_PM_OPS() macro

There're now no users left of the SET_PM_RUNTIME_PM_OPS() macro, since
all have converted to use the SET_RUNTIME_PM_OPS() macro instead, so
let's remove it.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pm.h b/include/linux/pm.h
index 66a656eb335b..8b5976364619 100644
--- a/include/linux/pm.h
+++ b/include/linux/pm.h
@@ -351,8 +351,6 @@ struct dev_pm_ops {
 #define SET_RUNTIME_PM_OPS(suspend_fn, resume_fn, idle_fn)
 #endif
 
-#define SET_PM_RUNTIME_PM_OPS	SET_RUNTIME_PM_OPS
-
 /*
  * Use this if you want to use the same suspend and resume callbacks for suspend
  * to RAM and hibernation.
-- 
cgit v1.2.3


From c291ee622165cb2c8d4e7af63fffd499354a23be Mon Sep 17 00:00:00 2001
From: Thomas Gleixner <tglx@linutronix.de>
Date: Thu, 11 Dec 2014 23:01:41 +0100
Subject: genirq: Prevent proc race against freeing of irq descriptors

Since the rework of the sparse interrupt code to actually free the
unused interrupt descriptors there exists a race between the /proc
interfaces to the irq subsystem and the code which frees the interrupt
descriptor.

CPU0				CPU1
				show_interrupts()
				  desc = irq_to_desc(X);
free_desc(desc)
  remove_from_radix_tree();
  kfree(desc);
				  raw_spinlock_irq(&desc->lock);

/proc/interrupts is the only interface which can actively corrupt
kernel memory via the lock access. /proc/stat can only read from freed
memory. Extremly hard to trigger, but possible.

The interfaces in /proc/irq/N/ are not affected by this because the
removal of the proc file is serialized in procfs against concurrent
readers/writers. The removal happens before the descriptor is freed.

For architectures which have CONFIG_SPARSE_IRQ=n this is a non issue
as the descriptor is never freed. It's merely cleared out with the irq
descriptor lock held. So any concurrent proc access will either see
the old correct value or the cleared out ones.

Protect the lookup and access to the irq descriptor in
show_interrupts() with the sparse_irq_lock.

Provide kstat_irqs_usr() which is protecting the lookup and access
with sparse_irq_lock and switch /proc/stat to use it.

Document the existing kstat_irqs interfaces so it's clear that the
caller needs to take care about protection. The users of these
interfaces are either not affected due to SPARSE_IRQ=n or already
protected against removal.

Fixes: 1f5a5b87f78f "genirq: Implement a sane sparse_irq allocator"
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: stable@vger.kernel.org
---
 fs/proc/stat.c              |  2 +-
 include/linux/kernel_stat.h |  1 +
 kernel/irq/internals.h      |  4 ++++
 kernel/irq/irqdesc.c        | 52 +++++++++++++++++++++++++++++++++++++++++++++
 kernel/irq/proc.c           | 22 ++++++++++++++++++-
 5 files changed, 79 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/fs/proc/stat.c b/fs/proc/stat.c
index bf2d03f8fd3e..510413eb25b8 100644
--- a/fs/proc/stat.c
+++ b/fs/proc/stat.c
@@ -159,7 +159,7 @@ static int show_stat(struct seq_file *p, void *v)
 
 	/* sum again ? it could be updated? */
 	for_each_irq_nr(j)
-		seq_put_decimal_ull(p, ' ', kstat_irqs(j));
+		seq_put_decimal_ull(p, ' ', kstat_irqs_usr(j));
 
 	seq_printf(p,
 		"\nctxt %llu\n"
diff --git a/include/linux/kernel_stat.h b/include/linux/kernel_stat.h
index b9376cd5a187..25a822f6f000 100644
--- a/include/linux/kernel_stat.h
+++ b/include/linux/kernel_stat.h
@@ -68,6 +68,7 @@ static inline unsigned int kstat_softirqs_cpu(unsigned int irq, int cpu)
  * Number of interrupts per specific IRQ source, since bootup
  */
 extern unsigned int kstat_irqs(unsigned int irq);
+extern unsigned int kstat_irqs_usr(unsigned int irq);
 
 /*
  * Number of interrupts per cpu, since bootup
diff --git a/kernel/irq/internals.h b/kernel/irq/internals.h
index 4332d766619d..df553b0af936 100644
--- a/kernel/irq/internals.h
+++ b/kernel/irq/internals.h
@@ -78,8 +78,12 @@ extern void unmask_threaded_irq(struct irq_desc *desc);
 
 #ifdef CONFIG_SPARSE_IRQ
 static inline void irq_mark_irq(unsigned int irq) { }
+extern void irq_lock_sparse(void);
+extern void irq_unlock_sparse(void);
 #else
 extern void irq_mark_irq(unsigned int irq);
+static inline void irq_lock_sparse(void) { }
+static inline void irq_unlock_sparse(void) { }
 #endif
 
 extern void init_kstat_irqs(struct irq_desc *desc, int node, int nr);
diff --git a/kernel/irq/irqdesc.c b/kernel/irq/irqdesc.c
index a1782f88f0af..99793b9b6d23 100644
--- a/kernel/irq/irqdesc.c
+++ b/kernel/irq/irqdesc.c
@@ -132,6 +132,16 @@ static void free_masks(struct irq_desc *desc)
 static inline void free_masks(struct irq_desc *desc) { }
 #endif
 
+void irq_lock_sparse(void)
+{
+	mutex_lock(&sparse_irq_lock);
+}
+
+void irq_unlock_sparse(void)
+{
+	mutex_unlock(&sparse_irq_lock);
+}
+
 static struct irq_desc *alloc_desc(int irq, int node, struct module *owner)
 {
 	struct irq_desc *desc;
@@ -168,6 +178,12 @@ static void free_desc(unsigned int irq)
 
 	unregister_irq_proc(irq, desc);
 
+	/*
+	 * sparse_irq_lock protects also show_interrupts() and
+	 * kstat_irq_usr(). Once we deleted the descriptor from the
+	 * sparse tree we can free it. Access in proc will fail to
+	 * lookup the descriptor.
+	 */
 	mutex_lock(&sparse_irq_lock);
 	delete_irq_desc(irq);
 	mutex_unlock(&sparse_irq_lock);
@@ -574,6 +590,15 @@ void kstat_incr_irq_this_cpu(unsigned int irq)
 	kstat_incr_irqs_this_cpu(irq, irq_to_desc(irq));
 }
 
+/**
+ * kstat_irqs_cpu - Get the statistics for an interrupt on a cpu
+ * @irq:	The interrupt number
+ * @cpu:	The cpu number
+ *
+ * Returns the sum of interrupt counts on @cpu since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
 unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -582,6 +607,14 @@ unsigned int kstat_irqs_cpu(unsigned int irq, int cpu)
 			*per_cpu_ptr(desc->kstat_irqs, cpu) : 0;
 }
 
+/**
+ * kstat_irqs - Get the statistics for an interrupt
+ * @irq:	The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. The caller must ensure that the interrupt is not removed
+ * concurrently.
+ */
 unsigned int kstat_irqs(unsigned int irq)
 {
 	struct irq_desc *desc = irq_to_desc(irq);
@@ -594,3 +627,22 @@ unsigned int kstat_irqs(unsigned int irq)
 		sum += *per_cpu_ptr(desc->kstat_irqs, cpu);
 	return sum;
 }
+
+/**
+ * kstat_irqs_usr - Get the statistics for an interrupt
+ * @irq:	The interrupt number
+ *
+ * Returns the sum of interrupt counts on all cpus since boot for
+ * @irq. Contrary to kstat_irqs() this can be called from any
+ * preemptible context. It's protected against concurrent removal of
+ * an interrupt descriptor when sparse irqs are enabled.
+ */
+unsigned int kstat_irqs_usr(unsigned int irq)
+{
+	int sum;
+
+	irq_lock_sparse();
+	sum = kstat_irqs(irq);
+	irq_unlock_sparse();
+	return sum;
+}
diff --git a/kernel/irq/proc.c b/kernel/irq/proc.c
index ac1ba2f11032..9dc9bfd8a678 100644
--- a/kernel/irq/proc.c
+++ b/kernel/irq/proc.c
@@ -15,6 +15,23 @@
 
 #include "internals.h"
 
+/*
+ * Access rules:
+ *
+ * procfs protects read/write of /proc/irq/N/ files against a
+ * concurrent free of the interrupt descriptor. remove_proc_entry()
+ * immediately prevents new read/writes to happen and waits for
+ * already running read/write functions to complete.
+ *
+ * We remove the proc entries first and then delete the interrupt
+ * descriptor from the radix tree and free it. So it is guaranteed
+ * that irq_to_desc(N) is valid as long as the read/writes are
+ * permitted by procfs.
+ *
+ * The read from /proc/interrupts is a different problem because there
+ * is no protection. So the lookup and the access to irqdesc
+ * information must be protected by sparse_irq_lock.
+ */
 static struct proc_dir_entry *root_irq_dir;
 
 #ifdef CONFIG_SMP
@@ -437,9 +454,10 @@ int show_interrupts(struct seq_file *p, void *v)
 		seq_putc(p, '\n');
 	}
 
+	irq_lock_sparse();
 	desc = irq_to_desc(i);
 	if (!desc)
-		return 0;
+		goto outsparse;
 
 	raw_spin_lock_irqsave(&desc->lock, flags);
 	for_each_online_cpu(j)
@@ -479,6 +497,8 @@ int show_interrupts(struct seq_file *p, void *v)
 	seq_putc(p, '\n');
 out:
 	raw_spin_unlock_irqrestore(&desc->lock, flags);
+outsparse:
+	irq_unlock_sparse();
 	return 0;
 }
 #endif
-- 
cgit v1.2.3


From 8b28f621bea6f84d44adf7e804b73aff1e09105b Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 12 Dec 2014 16:54:18 -0800
Subject: mm,fs: introduce helpers around the i_mmap_mutex

This series is a continuation of the conversion of the i_mmap_mutex to
rwsem, following what we have for the anon memory counterpart.  With
Hugh's feedback from the first iteration.

Ultimately, the most obvious paths that require exclusive ownership of the
lock is when we modify the VMA interval tree, via
vma_interval_tree_insert() and vma_interval_tree_remove() families.  Cases
such as unmapping, where the ptes content is changed but the tree remains
untouched should make it safe to share the i_mmap_rwsem.

As such, the code of course is straightforward, however the devil is very
much in the details.  While its been tested on a number of workloads
without anything exploding, I would not be surprised if there are some
less documented/known assumptions about the lock that could suffer from
these changes.  Or maybe I'm just missing something, but either way I
believe its at the point where it could use more eyes and hopefully some
time in linux-next.

Because the lock type conversion is the heart of this patchset,
its worth noting a few comparisons between mutex vs rwsem (xadd):

  (i) Same size, no extra footprint.

  (ii) Both have CONFIG_XXX_SPIN_ON_OWNER capabilities for
       exclusive lock ownership.

  (iii) Both can be slightly unfair wrt exclusive ownership, with
        writer lock stealing properties, not necessarily respecting
        FIFO order for granting the lock when contended.

  (iv) Mutexes can be slightly faster than rwsems when
       the lock is non-contended.

  (v) Both suck at performance for debug (slowpaths), which
      shouldn't matter anyway.

Sharing the lock is obviously beneficial, and sem writer ownership is
close enough to mutexes.  The biggest winner of these changes is
migration.

As for concrete numbers, the following performance results are for a
4-socket 60-core IvyBridge-EX with 130Gb of RAM.

Both alltests and disk (xfs+ramdisk) workloads of aim7 suite do quite well
with this set, with a steady ~60% throughput (jpm) increase for alltests
and up to ~30% for disk for high amounts of concurrency.  Lower counts of
workload users (< 100) does not show much difference at all, so at least
no regressions.

                    3.18-rc1            3.18-rc1-i_mmap_rwsem
alltests-100     17918.72 (  0.00%)    28417.97 ( 58.59%)
alltests-200     16529.39 (  0.00%)    26807.92 ( 62.18%)
alltests-300     16591.17 (  0.00%)    26878.08 ( 62.00%)
alltests-400     16490.37 (  0.00%)    26664.63 ( 61.70%)
alltests-500     16593.17 (  0.00%)    26433.72 ( 59.30%)
alltests-600     16508.56 (  0.00%)    26409.20 ( 59.97%)
alltests-700     16508.19 (  0.00%)    26298.58 ( 59.31%)
alltests-800     16437.58 (  0.00%)    26433.02 ( 60.81%)
alltests-900     16418.35 (  0.00%)    26241.61 ( 59.83%)
alltests-1000    16369.00 (  0.00%)    26195.76 ( 60.03%)
alltests-1100    16330.11 (  0.00%)    26133.46 ( 60.03%)
alltests-1200    16341.30 (  0.00%)    26084.03 ( 59.62%)
alltests-1300    16304.75 (  0.00%)    26024.74 ( 59.61%)
alltests-1400    16231.08 (  0.00%)    25952.35 ( 59.89%)
alltests-1500    16168.06 (  0.00%)    25850.58 ( 59.89%)
alltests-1600    16142.56 (  0.00%)    25767.42 ( 59.62%)
alltests-1700    16118.91 (  0.00%)    25689.58 ( 59.38%)
alltests-1800    16068.06 (  0.00%)    25599.71 ( 59.32%)
alltests-1900    16046.94 (  0.00%)    25525.92 ( 59.07%)
alltests-2000    16007.26 (  0.00%)    25513.07 ( 59.38%)

disk-100          7582.14 (  0.00%)     7257.48 ( -4.28%)
disk-200          6962.44 (  0.00%)     7109.15 (  2.11%)
disk-300          6435.93 (  0.00%)     6904.75 (  7.28%)
disk-400          6370.84 (  0.00%)     6861.26 (  7.70%)
disk-500          6353.42 (  0.00%)     6846.71 (  7.76%)
disk-600          6368.82 (  0.00%)     6806.75 (  6.88%)
disk-700          6331.37 (  0.00%)     6796.01 (  7.34%)
disk-800          6324.22 (  0.00%)     6788.00 (  7.33%)
disk-900          6253.52 (  0.00%)     6750.43 (  7.95%)
disk-1000         6242.53 (  0.00%)     6855.11 (  9.81%)
disk-1100         6234.75 (  0.00%)     6858.47 ( 10.00%)
disk-1200         6312.76 (  0.00%)     6845.13 (  8.43%)
disk-1300         6309.95 (  0.00%)     6834.51 (  8.31%)
disk-1400         6171.76 (  0.00%)     6787.09 (  9.97%)
disk-1500         6139.81 (  0.00%)     6761.09 ( 10.12%)
disk-1600         4807.12 (  0.00%)     6725.33 ( 39.90%)
disk-1700         4669.50 (  0.00%)     5985.38 ( 28.18%)
disk-1800         4663.51 (  0.00%)     5972.99 ( 28.08%)
disk-1900         4674.31 (  0.00%)     5949.94 ( 27.29%)
disk-2000         4668.36 (  0.00%)     5834.93 ( 24.99%)

In addition, a 67.5% increase in successfully migrated NUMA pages, thus
improving node locality.

The patch layout is simple but designed for bisection (in case reversion
is needed if the changes break upstream) and easier review:

o Patches 1-4 convert the i_mmap lock from mutex to rwsem.
o Patches 5-10 share the lock in specific paths, each patch
  details the rationale behind why it should be safe.

This patchset has been tested with: postgres 9.4 (with brand new hugetlb
support), hugetlbfs test suite (all tests pass, in fact more tests pass
with these changes than with an upstream kernel), ltp, aim7 benchmarks,
memcached and iozone with the -B option for mmap'ing.  *Untested* paths
are nommu, memory-failure, uprobes and xip.

This patch (of 8):

Various parts of the kernel acquire and release this mutex, so add
i_mmap_lock_write() and immap_unlock_write() helper functions that will
encapsulate this logic.  The next patch will make use of these.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index bb29b02d9bb6..bd0a1b2f3c02 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -467,6 +467,16 @@ struct block_device {
 
 int mapping_tagged(struct address_space *mapping, int tag);
 
+static inline void i_mmap_lock_write(struct address_space *mapping)
+{
+	mutex_lock(&mapping->i_mmap_mutex);
+}
+
+static inline void i_mmap_unlock_write(struct address_space *mapping)
+{
+	mutex_unlock(&mapping->i_mmap_mutex);
+}
+
 /*
  * Might pages of this file be mapped into userspace?
  */
-- 
cgit v1.2.3


From c8c06efa8b552608493b7066c234cfa82c47fcea Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 12 Dec 2014 16:54:24 -0800
Subject: mm: convert i_mmap_mutex to rwsem

The i_mmap_mutex is a close cousin of the anon vma lock, both protecting
similar data, one for file backed pages and the other for anon memory.  To
this end, this lock can also be a rwsem.  In addition, there are some
important opportunities to share the lock when there are no tree
modifications.

This conversion is straightforward.  For now, all users take the write
lock.

[sfr@canb.auug.org.au: update fremap.c]
Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Reviewed-by: Rik van Riel <riel@redhat.com>
Acked-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/hugetlbfs/inode.c         | 10 +++++-----
 fs/inode.c                   |  2 +-
 include/linux/fs.h           |  7 ++++---
 include/linux/mmu_notifier.h |  2 +-
 kernel/events/uprobes.c      |  2 +-
 mm/filemap.c                 | 10 +++++-----
 mm/hugetlb.c                 | 10 +++++-----
 mm/mmap.c                    |  8 ++++----
 mm/mremap.c                  |  2 +-
 mm/rmap.c                    |  6 +++---
 10 files changed, 30 insertions(+), 29 deletions(-)

(limited to 'include/linux')

diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index a082709aa427..5eba47f593f8 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -472,12 +472,12 @@ static struct inode *hugetlbfs_get_root(struct super_block *sb,
 }
 
 /*
- * Hugetlbfs is not reclaimable; therefore its i_mmap_mutex will never
+ * Hugetlbfs is not reclaimable; therefore its i_mmap_rwsem will never
  * be taken from reclaim -- unlike regular filesystems. This needs an
  * annotation because huge_pmd_share() does an allocation under
- * i_mmap_mutex.
+ * i_mmap_rwsem.
  */
-static struct lock_class_key hugetlbfs_i_mmap_mutex_key;
+static struct lock_class_key hugetlbfs_i_mmap_rwsem_key;
 
 static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 					struct inode *dir,
@@ -495,8 +495,8 @@ static struct inode *hugetlbfs_get_inode(struct super_block *sb,
 		struct hugetlbfs_inode_info *info;
 		inode->i_ino = get_next_ino();
 		inode_init_owner(inode, dir, mode);
-		lockdep_set_class(&inode->i_mapping->i_mmap_mutex,
-				&hugetlbfs_i_mmap_mutex_key);
+		lockdep_set_class(&inode->i_mapping->i_mmap_rwsem,
+				&hugetlbfs_i_mmap_rwsem_key);
 		inode->i_mapping->a_ops = &hugetlbfs_aops;
 		inode->i_mapping->backing_dev_info =&hugetlbfs_backing_dev_info;
 		inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
diff --git a/fs/inode.c b/fs/inode.c
index 2ed95f7caa4f..ad60555b4768 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -346,7 +346,7 @@ void address_space_init_once(struct address_space *mapping)
 	memset(mapping, 0, sizeof(*mapping));
 	INIT_RADIX_TREE(&mapping->page_tree, GFP_ATOMIC);
 	spin_lock_init(&mapping->tree_lock);
-	mutex_init(&mapping->i_mmap_mutex);
+	init_rwsem(&mapping->i_mmap_rwsem);
 	INIT_LIST_HEAD(&mapping->private_list);
 	spin_lock_init(&mapping->private_lock);
 	mapping->i_mmap = RB_ROOT;
diff --git a/include/linux/fs.h b/include/linux/fs.h
index bd0a1b2f3c02..6abcd0b72ae0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -18,6 +18,7 @@
 #include <linux/pid.h>
 #include <linux/bug.h>
 #include <linux/mutex.h>
+#include <linux/rwsem.h>
 #include <linux/capability.h>
 #include <linux/semaphore.h>
 #include <linux/fiemap.h>
@@ -401,7 +402,7 @@ struct address_space {
 	atomic_t		i_mmap_writable;/* count VM_SHARED mappings */
 	struct rb_root		i_mmap;		/* tree of private and shared mappings */
 	struct list_head	i_mmap_nonlinear;/*list VM_NONLINEAR mappings */
-	struct mutex		i_mmap_mutex;	/* protect tree, count, list */
+	struct rw_semaphore	i_mmap_rwsem;	/* protect tree, count, list */
 	/* Protected by tree_lock together with the radix tree */
 	unsigned long		nrpages;	/* number of total pages */
 	unsigned long		nrshadows;	/* number of shadow entries */
@@ -469,12 +470,12 @@ int mapping_tagged(struct address_space *mapping, int tag);
 
 static inline void i_mmap_lock_write(struct address_space *mapping)
 {
-	mutex_lock(&mapping->i_mmap_mutex);
+	down_write(&mapping->i_mmap_rwsem);
 }
 
 static inline void i_mmap_unlock_write(struct address_space *mapping)
 {
-	mutex_unlock(&mapping->i_mmap_mutex);
+	up_write(&mapping->i_mmap_rwsem);
 }
 
 /*
diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h
index 88787bb4b3b9..ab8564b03468 100644
--- a/include/linux/mmu_notifier.h
+++ b/include/linux/mmu_notifier.h
@@ -154,7 +154,7 @@ struct mmu_notifier_ops {
  * Therefore notifier chains can only be traversed when either
  *
  * 1. mmap_sem is held.
- * 2. One of the reverse map locks is held (i_mmap_mutex or anon_vma->rwsem).
+ * 2. One of the reverse map locks is held (i_mmap_rwsem or anon_vma->rwsem).
  * 3. No other concurrent thread can access the list (release)
  */
 struct mmu_notifier {
diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
index aac81bf9df09..1901dbfa7ce0 100644
--- a/kernel/events/uprobes.c
+++ b/kernel/events/uprobes.c
@@ -731,7 +731,7 @@ build_map_info(struct address_space *mapping, loff_t offset, bool is_register)
 
 		if (!prev && !more) {
 			/*
-			 * Needs GFP_NOWAIT to avoid i_mmap_mutex recursion through
+			 * Needs GFP_NOWAIT to avoid i_mmap_rwsem recursion through
 			 * reclaim. This is optimistic, no harm done if it fails.
 			 */
 			prev = kmalloc(sizeof(struct map_info),
diff --git a/mm/filemap.c b/mm/filemap.c
index 14b4642279f1..e8905bc3cbd7 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -62,16 +62,16 @@
 /*
  * Lock ordering:
  *
- *  ->i_mmap_mutex		(truncate_pagecache)
+ *  ->i_mmap_rwsem		(truncate_pagecache)
  *    ->private_lock		(__free_pte->__set_page_dirty_buffers)
  *      ->swap_lock		(exclusive_swap_page, others)
  *        ->mapping->tree_lock
  *
  *  ->i_mutex
- *    ->i_mmap_mutex		(truncate->unmap_mapping_range)
+ *    ->i_mmap_rwsem		(truncate->unmap_mapping_range)
  *
  *  ->mmap_sem
- *    ->i_mmap_mutex
+ *    ->i_mmap_rwsem
  *      ->page_table_lock or pte_lock	(various, mainly in memory.c)
  *        ->mapping->tree_lock	(arch-dependent flush_dcache_mmap_lock)
  *
@@ -85,7 +85,7 @@
  *    sb_lock			(fs/fs-writeback.c)
  *    ->mapping->tree_lock	(__sync_single_inode)
  *
- *  ->i_mmap_mutex
+ *  ->i_mmap_rwsem
  *    ->anon_vma.lock		(vma_adjust)
  *
  *  ->anon_vma.lock
@@ -105,7 +105,7 @@
  *    ->inode->i_lock		(zap_pte_range->set_page_dirty)
  *    ->private_lock		(zap_pte_range->__set_page_dirty_buffers)
  *
- * ->i_mmap_mutex
+ * ->i_mmap_rwsem
  *   ->tasklist_lock            (memory_failure, collect_procs_ao)
  */
 
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index ffe19304cc09..989cb032eaf5 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -2726,9 +2726,9 @@ void __unmap_hugepage_range_final(struct mmu_gather *tlb,
 	 * on its way out.  We're lucky that the flag has such an appropriate
 	 * name, and can in fact be safely cleared here. We could clear it
 	 * before the __unmap_hugepage_range above, but all that's necessary
-	 * is to clear it before releasing the i_mmap_mutex. This works
+	 * is to clear it before releasing the i_mmap_rwsem. This works
 	 * because in the context this is called, the VMA is about to be
-	 * destroyed and the i_mmap_mutex is held.
+	 * destroyed and the i_mmap_rwsem is held.
 	 */
 	vma->vm_flags &= ~VM_MAYSHARE;
 }
@@ -3370,9 +3370,9 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma,
 		spin_unlock(ptl);
 	}
 	/*
-	 * Must flush TLB before releasing i_mmap_mutex: x86's huge_pmd_unshare
+	 * Must flush TLB before releasing i_mmap_rwsem: x86's huge_pmd_unshare
 	 * may have cleared our pud entry and done put_page on the page table:
-	 * once we release i_mmap_mutex, another task can do the final put_page
+	 * once we release i_mmap_rwsem, another task can do the final put_page
 	 * and that page table be reused and filled with junk.
 	 */
 	flush_tlb_range(vma, start, end);
@@ -3525,7 +3525,7 @@ static int vma_shareable(struct vm_area_struct *vma, unsigned long addr)
  * and returns the corresponding pte. While this is not necessary for the
  * !shared pmd case because we can allocate the pmd later as well, it makes the
  * code much cleaner. pmd allocation is essential for the shared case because
- * pud has to be populated inside the same i_mmap_mutex section - otherwise
+ * pud has to be populated inside the same i_mmap_rwsem section - otherwise
  * racing tasks could either miss the sharing (see huge_pte_offset) or select a
  * bad pmd for sharing.
  */
diff --git a/mm/mmap.c b/mm/mmap.c
index ecd6ecf48778..0d84b2f86f3b 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -232,7 +232,7 @@ error:
 }
 
 /*
- * Requires inode->i_mapping->i_mmap_mutex
+ * Requires inode->i_mapping->i_mmap_rwsem
  */
 static void __remove_shared_vm_struct(struct vm_area_struct *vma,
 		struct file *file, struct address_space *mapping)
@@ -2791,7 +2791,7 @@ void exit_mmap(struct mm_struct *mm)
 
 /* Insert vm structure into process list sorted by address
  * and into the inode's i_mmap tree.  If vm_file is non-NULL
- * then i_mmap_mutex is taken here.
+ * then i_mmap_rwsem is taken here.
  */
 int insert_vm_struct(struct mm_struct *mm, struct vm_area_struct *vma)
 {
@@ -3086,7 +3086,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
 		 */
 		if (test_and_set_bit(AS_MM_ALL_LOCKS, &mapping->flags))
 			BUG();
-		mutex_lock_nest_lock(&mapping->i_mmap_mutex, &mm->mmap_sem);
+		down_write_nest_lock(&mapping->i_mmap_rwsem, &mm->mmap_sem);
 	}
 }
 
@@ -3113,7 +3113,7 @@ static void vm_lock_mapping(struct mm_struct *mm, struct address_space *mapping)
  * vma in this mm is backed by the same anon_vma or address_space.
  *
  * We can take all the locks in random order because the VM code
- * taking i_mmap_mutex or anon_vma->rwsem outside the mmap_sem never
+ * taking i_mmap_rwsem or anon_vma->rwsem outside the mmap_sem never
  * takes more than one of them in a row. Secondly we're protected
  * against a concurrent mm_take_all_locks() by the mm_all_locks_mutex.
  *
diff --git a/mm/mremap.c b/mm/mremap.c
index 426b448d6447..84aa36f9f308 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -99,7 +99,7 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
 	spinlock_t *old_ptl, *new_ptl;
 
 	/*
-	 * When need_rmap_locks is true, we take the i_mmap_mutex and anon_vma
+	 * When need_rmap_locks is true, we take the i_mmap_rwsem and anon_vma
 	 * locks to ensure that rmap will always observe either the old or the
 	 * new ptes. This is the easiest way to avoid races with
 	 * truncate_pagecache(), page migration, etc...
diff --git a/mm/rmap.c b/mm/rmap.c
index bea03f6bec61..18247f89f1a8 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -23,7 +23,7 @@
  * inode->i_mutex	(while writing or truncating, not reading or faulting)
  *   mm->mmap_sem
  *     page->flags PG_locked (lock_page)
- *       mapping->i_mmap_mutex
+ *       mapping->i_mmap_rwsem
  *         anon_vma->rwsem
  *           mm->page_table_lock or pte_lock
  *             zone->lru_lock (in mark_page_accessed, isolate_lru_page)
@@ -1260,7 +1260,7 @@ out_mlock:
 	/*
 	 * We need mmap_sem locking, Otherwise VM_LOCKED check makes
 	 * unstable result and race. Plus, We can't wait here because
-	 * we now hold anon_vma->rwsem or mapping->i_mmap_mutex.
+	 * we now hold anon_vma->rwsem or mapping->i_mmap_rwsem.
 	 * if trylock failed, the page remain in evictable lru and later
 	 * vmscan could retry to move the page to unevictable lru if the
 	 * page is actually mlocked.
@@ -1684,7 +1684,7 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 	 * The page lock not only makes sure that page->mapping cannot
 	 * suddenly be NULLified by truncation, it makes sure that the
 	 * structure at mapping cannot be freed and reused yet,
-	 * so we can safely take mapping->i_mmap_mutex.
+	 * so we can safely take mapping->i_mmap_rwsem.
 	 */
 	VM_BUG_ON_PAGE(!PageLocked(page), page);
 
-- 
cgit v1.2.3


From 3dec0ba0be6a532cac949e02b853021bf6d57dad Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 12 Dec 2014 16:54:27 -0800
Subject: mm/rmap: share the i_mmap_rwsem

Similarly to the anon memory counterpart, we can share the mapping's lock
ownership as the interval tree is not modified when doing doing the walk,
only the file page.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Acked-by: Rik van Riel <riel@redhat.com>
Acked-by: "Kirill A. Shutemov" <kirill@shutemov.name>
Acked-by: Hugh Dickins <hughd@google.com>
Cc: Oleg Nesterov <oleg@redhat.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Srikar Dronamraju <srikar@linux.vnet.ibm.com>
Acked-by: Mel Gorman <mgorman@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fs.h | 10 ++++++++++
 mm/rmap.c          |  6 +++---
 2 files changed, 13 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 6abcd0b72ae0..1d1838de6882 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -478,6 +478,16 @@ static inline void i_mmap_unlock_write(struct address_space *mapping)
 	up_write(&mapping->i_mmap_rwsem);
 }
 
+static inline void i_mmap_lock_read(struct address_space *mapping)
+{
+	down_read(&mapping->i_mmap_rwsem);
+}
+
+static inline void i_mmap_unlock_read(struct address_space *mapping)
+{
+	up_read(&mapping->i_mmap_rwsem);
+}
+
 /*
  * Might pages of this file be mapped into userspace?
  */
diff --git a/mm/rmap.c b/mm/rmap.c
index 18247f89f1a8..14ad2b3b0f54 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -1690,7 +1690,8 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 
 	if (!mapping)
 		return ret;
-	i_mmap_lock_write(mapping);
+
+	i_mmap_lock_read(mapping);
 	vma_interval_tree_foreach(vma, &mapping->i_mmap, pgoff, pgoff) {
 		unsigned long address = vma_address(page, vma);
 
@@ -1711,9 +1712,8 @@ static int rmap_walk_file(struct page *page, struct rmap_walk_control *rwc)
 		goto done;
 
 	ret = rwc->file_nonlinear(page, mapping, rwc->arg);
-
 done:
-	i_mmap_unlock_write(mapping);
+	i_mmap_unlock_read(mapping);
 	return ret;
 }
 
-- 
cgit v1.2.3


From 5e19b013f55a884c59a14391b22138899d1cc4cc Mon Sep 17 00:00:00 2001
From: Michal Nazarewicz <mina86@mina86.com>
Date: Fri, 12 Dec 2014 16:54:45 -0800
Subject: lib: bitmap: add alignment offset for bitmap_find_next_zero_area()

Add a bitmap_find_next_zero_area_off() function which works like
bitmap_find_next_zero_area() function except it allows an offset to be
specified when alignment is checked.  This lets caller request a bit such
that its number plus the offset is aligned according to the mask.

[gregory.0xf0@gmail.com: Retrieved from https://patchwork.linuxtv.org/patch/6254/ and updated documentation]
Signed-off-by: Michal Nazarewicz <mina86@mina86.com>
Signed-off-by: Kyungmin Park <kyungmin.park@samsung.com>
Signed-off-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Gregory Fong <gregory.0xf0@gmail.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Kukjin Kim <kgene.kim@samsung.com>
Cc: Laurent Pinchart <laurent.pinchart@ideasonboard.com>
Cc: Laura Abbott <lauraa@codeaurora.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/bitmap.h | 36 +++++++++++++++++++++++++++++++-----
 lib/bitmap.c           | 24 +++++++++++++-----------
 2 files changed, 44 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h
index e1c8d080c427..34e020c23644 100644
--- a/include/linux/bitmap.h
+++ b/include/linux/bitmap.h
@@ -45,6 +45,7 @@
  * bitmap_set(dst, pos, nbits)			Set specified bit area
  * bitmap_clear(dst, pos, nbits)		Clear specified bit area
  * bitmap_find_next_zero_area(buf, len, pos, n, mask)	Find bit free area
+ * bitmap_find_next_zero_area_off(buf, len, pos, n, mask)	as above
  * bitmap_shift_right(dst, src, n, nbits)	*dst = *src >> n
  * bitmap_shift_left(dst, src, n, nbits)	*dst = *src << n
  * bitmap_remap(dst, src, old, new, nbits)	*dst = map(old, new)(src)
@@ -114,11 +115,36 @@ extern int __bitmap_weight(const unsigned long *bitmap, unsigned int nbits);
 
 extern void bitmap_set(unsigned long *map, unsigned int start, int len);
 extern void bitmap_clear(unsigned long *map, unsigned int start, int len);
-extern unsigned long bitmap_find_next_zero_area(unsigned long *map,
-					 unsigned long size,
-					 unsigned long start,
-					 unsigned int nr,
-					 unsigned long align_mask);
+
+extern unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
+						    unsigned long size,
+						    unsigned long start,
+						    unsigned int nr,
+						    unsigned long align_mask,
+						    unsigned long align_offset);
+
+/**
+ * bitmap_find_next_zero_area - find a contiguous aligned zero area
+ * @map: The address to base the search on
+ * @size: The bitmap size in bits
+ * @start: The bitnumber to start searching at
+ * @nr: The number of zeroed bits we're looking for
+ * @align_mask: Alignment mask for zero area
+ *
+ * The @align_mask should be one less than a power of 2; the effect is that
+ * the bit offset of all zero areas this function finds is multiples of that
+ * power of 2. A @align_mask of 0 means no alignment is required.
+ */
+static inline unsigned long
+bitmap_find_next_zero_area(unsigned long *map,
+			   unsigned long size,
+			   unsigned long start,
+			   unsigned int nr,
+			   unsigned long align_mask)
+{
+	return bitmap_find_next_zero_area_off(map, size, start, nr,
+					      align_mask, 0);
+}
 
 extern int bitmap_scnprintf(char *buf, unsigned int len,
 			const unsigned long *src, int nbits);
diff --git a/lib/bitmap.c b/lib/bitmap.c
index b499ab6ada29..969ae8fbc85b 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -326,30 +326,32 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len)
 }
 EXPORT_SYMBOL(bitmap_clear);
 
-/*
- * bitmap_find_next_zero_area - find a contiguous aligned zero area
+/**
+ * bitmap_find_next_zero_area_off - find a contiguous aligned zero area
  * @map: The address to base the search on
  * @size: The bitmap size in bits
  * @start: The bitnumber to start searching at
  * @nr: The number of zeroed bits we're looking for
  * @align_mask: Alignment mask for zero area
+ * @align_offset: Alignment offset for zero area.
  *
  * The @align_mask should be one less than a power of 2; the effect is that
- * the bit offset of all zero areas this function finds is multiples of that
- * power of 2. A @align_mask of 0 means no alignment is required.
+ * the bit offset of all zero areas this function finds plus @align_offset
+ * is multiple of that power of 2.
  */
-unsigned long bitmap_find_next_zero_area(unsigned long *map,
-					 unsigned long size,
-					 unsigned long start,
-					 unsigned int nr,
-					 unsigned long align_mask)
+unsigned long bitmap_find_next_zero_area_off(unsigned long *map,
+					     unsigned long size,
+					     unsigned long start,
+					     unsigned int nr,
+					     unsigned long align_mask,
+					     unsigned long align_offset)
 {
 	unsigned long index, end, i;
 again:
 	index = find_next_zero_bit(map, size, start);
 
 	/* Align allocation */
-	index = __ALIGN_MASK(index, align_mask);
+	index = __ALIGN_MASK(index + align_offset, align_mask) - align_offset;
 
 	end = index + nr;
 	if (end > size)
@@ -361,7 +363,7 @@ again:
 	}
 	return index;
 }
-EXPORT_SYMBOL(bitmap_find_next_zero_area);
+EXPORT_SYMBOL(bitmap_find_next_zero_area_off);
 
 /*
  * Bitmap printing & parsing functions: first version by Nadia Yvette Chambers,
-- 
cgit v1.2.3


From 6f185c290edec576a2cccd6670e5b8e02e6f04db Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 12 Dec 2014 16:55:15 -0800
Subject: memcg: turn memcg_kmem_skip_account into a bit field

It isn't supposed to stack, so turn it into a bit-field to save 4 bytes on
the task_struct.

Also, remove the memcg_stop/resume_kmem_account helpers - it is clearer to
set/clear the flag inline.  Regarding the overwhelming comment to the
helpers, which is removed by this patch too, we already have a compact yet
accurate explanation in memcg_schedule_cache_create, no need in yet
another one.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/sched.h |  7 +++++--
 mm/memcontrol.c       | 35 ++---------------------------------
 2 files changed, 7 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/sched.h b/include/linux/sched.h
index 55f5ee7cc3d3..4cfdbcf8cf56 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -1364,6 +1364,10 @@ struct task_struct {
 	unsigned sched_reset_on_fork:1;
 	unsigned sched_contributes_to_load:1;
 
+#ifdef CONFIG_MEMCG_KMEM
+	unsigned memcg_kmem_skip_account:1;
+#endif
+
 	unsigned long atomic_flags; /* Flags needing atomic access. */
 
 	pid_t pid;
@@ -1679,8 +1683,7 @@ struct task_struct {
 	/* bitmask and counter of trace recursion */
 	unsigned long trace_recursion;
 #endif /* CONFIG_TRACING */
-#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */
-	unsigned int memcg_kmem_skip_account;
+#ifdef CONFIG_MEMCG
 	struct memcg_oom_info {
 		struct mem_cgroup *memcg;
 		gfp_t gfp_mask;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index d9fab72da52e..11cbfde4dc6d 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2673,37 +2673,6 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
 	css_put(&memcg->css);
 }
 
-/*
- * During the creation a new cache, we need to disable our accounting mechanism
- * altogether. This is true even if we are not creating, but rather just
- * enqueing new caches to be created.
- *
- * This is because that process will trigger allocations; some visible, like
- * explicit kmallocs to auxiliary data structures, name strings and internal
- * cache structures; some well concealed, like INIT_WORK() that can allocate
- * objects during debug.
- *
- * If any allocation happens during memcg_kmem_get_cache, we will recurse back
- * to it. This may not be a bounded recursion: since the first cache creation
- * failed to complete (waiting on the allocation), we'll just try to create the
- * cache again, failing at the same point.
- *
- * memcg_kmem_get_cache is prepared to abort after seeing a positive count of
- * memcg_kmem_skip_account. So we enclose anything that might allocate memory
- * inside the following two functions.
- */
-static inline void memcg_stop_kmem_account(void)
-{
-	VM_BUG_ON(!current->mm);
-	current->memcg_kmem_skip_account++;
-}
-
-static inline void memcg_resume_kmem_account(void)
-{
-	VM_BUG_ON(!current->mm);
-	current->memcg_kmem_skip_account--;
-}
-
 int __memcg_cleanup_cache_params(struct kmem_cache *s)
 {
 	struct kmem_cache *c;
@@ -2798,9 +2767,9 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
 	 * this point we can't allow ourselves back into memcg_kmem_get_cache,
 	 * the safest choice is to do it like this, wrapping the whole function.
 	 */
-	memcg_stop_kmem_account();
+	current->memcg_kmem_skip_account = 1;
 	__memcg_schedule_register_cache(memcg, cachep);
-	memcg_resume_kmem_account();
+	current->memcg_kmem_skip_account = 0;
 }
 
 int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
-- 
cgit v1.2.3


From bd6dace78b9e4595d29892f806514518449c7489 Mon Sep 17 00:00:00 2001
From: Tejun Heo <tj@kernel.org>
Date: Fri, 12 Dec 2014 16:55:35 -0800
Subject: mm: move swp_entry_t definition to include/linux/mm_types.h

swp_entry_t being defined in include/linux/swap.h instead of
include/linux/mm_types.h causes cyclic include dependency later when
include/linux/page_cgroup.h is included from writeback path.  Move the
definition to include/linux/mm_types.h.

While at it, reformat the comment above it.

Signed-off-by: Tejun Heo <tj@kernel.org>
Cc: Mel Gorman <mel@csn.ul.ie>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm_types.h | 8 ++++++++
 include/linux/swap.h     | 8 --------
 2 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index bf9f57529dcf..fc2daffa9db1 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -534,4 +534,12 @@ enum tlb_flush_reason {
 	NR_TLB_FLUSH_REASONS,
 };
 
+ /*
+  * A swap entry has to fit into a "unsigned long", as the entry is hidden
+  * in the "index" field of the swapper address space.
+  */
+typedef struct {
+	unsigned long val;
+} swp_entry_t;
+
 #endif /* _LINUX_MM_TYPES_H */
diff --git a/include/linux/swap.h b/include/linux/swap.h
index 37a585beef5c..34e8b60ab973 100644
--- a/include/linux/swap.h
+++ b/include/linux/swap.h
@@ -102,14 +102,6 @@ union swap_header {
 	} info;
 };
 
- /* A swap entry has to fit into a "unsigned long", as
-  * the entry is hidden in the "index" field of the
-  * swapper address space.
-  */
-typedef struct {
-	unsigned long val;
-} swp_entry_t;
-
 /*
  * current->reclaim_state points to one of these when a task is running
  * memory reclaim
-- 
cgit v1.2.3


From 056b7ccef4bc670b1ed77181159c8228de0926ab Mon Sep 17 00:00:00 2001
From: Zhang Zhen <zhenzhang.zhang@huawei.com>
Date: Fri, 12 Dec 2014 16:55:38 -0800
Subject: mm/memcontrol.c: remove the unused arg in __memcg_kmem_get_cache()

The gfp was passed in but never used in this function.

Signed-off-by: Zhang Zhen <zhenzhang.zhang@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 4 ++--
 mm/memcontrol.c            | 3 +--
 2 files changed, 3 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 6ea9f919e888..b74942a9e22f 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -401,7 +401,7 @@ int memcg_cache_id(struct mem_cgroup *memcg);
 void memcg_update_array_size(int num_groups);
 
 struct kmem_cache *
-__memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp);
+__memcg_kmem_get_cache(struct kmem_cache *cachep);
 
 int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order);
 void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
@@ -492,7 +492,7 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 	if (unlikely(fatal_signal_pending(current)))
 		return cachep;
 
-	return __memcg_kmem_get_cache(cachep, gfp);
+	return __memcg_kmem_get_cache(cachep);
 }
 #else
 #define for_each_memcg_cache_index(_idx)	\
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 11cbfde4dc6d..c6ac50e7d1c2 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2804,8 +2804,7 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
  * Can't be called in interrupt context or from kernel threads.
  * This function needs to be called with rcu_read_lock() held.
  */
-struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep,
-					  gfp_t gfp)
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
 {
 	struct mem_cgroup *memcg;
 	struct kmem_cache *memcg_cachep;
-- 
cgit v1.2.3


From 66f2ca7e3f59312888131546176b42d6e248558a Mon Sep 17 00:00:00 2001
From: Andrew Morton <akpm@linux-foundation.org>
Date: Fri, 12 Dec 2014 16:55:41 -0800
Subject: include/linux/kmemleak.h: needs slab.h

include/linux/kmemleak.h: In function 'kmemleak_alloc_recursive':
include/linux/kmemleak.h:43: error: 'SLAB_NOLEAKTRACE' undeclared (first use in this function)

Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/kmemleak.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h
index 057e95971014..e705467ddb47 100644
--- a/include/linux/kmemleak.h
+++ b/include/linux/kmemleak.h
@@ -21,6 +21,8 @@
 #ifndef __KMEMLEAK_H
 #define __KMEMLEAK_H
 
+#include <linux/slab.h>
+
 #ifdef CONFIG_DEBUG_KMEMLEAK
 
 extern void kmemleak_init(void) __ref;
-- 
cgit v1.2.3


From 2d48366b3ff745729815c15077508f8d7722ec5f Mon Sep 17 00:00:00 2001
From: Jianyu Zhan <nasa4836@gmail.com>
Date: Fri, 12 Dec 2014 16:55:43 -0800
Subject: mm, gfp: escalatedly define GFP_HIGHUSER and GFP_HIGHUSER_MOVABLE

GFP_USER, GFP_HIGHUSER and GFP_HIGHUSER_MOVABLE are escalatedly confined
defined, also implied by their names:

GFP_USER                                  = GFP_USER
GFP_USER + __GFP_HIGHMEM                  = GFP_HIGHUSER
GFP_USER + __GFP_HIGHMEM + __GFP_MOVABLE  = GFP_HIGHUSER_MOVABLE

So just make GFP_HIGHUSER and GFP_HIGHUSER_MOVABLE escalatedly defined to
reflect this fact.  It also makes the definition clear and texturally warn
on any furture break-up of this escalated relastionship.

Signed-off-by: Jianyu Zhan <jianyu.zhan@emc.com>
Acked-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/gfp.h | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index 07d2699cdb51..b840e3b2770d 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -110,11 +110,8 @@ struct vm_area_struct;
 #define GFP_TEMPORARY	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
 			 __GFP_RECLAIMABLE)
 #define GFP_USER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
-#define GFP_HIGHUSER	(__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
-			 __GFP_HIGHMEM)
-#define GFP_HIGHUSER_MOVABLE	(__GFP_WAIT | __GFP_IO | __GFP_FS | \
-				 __GFP_HARDWALL | __GFP_HIGHMEM | \
-				 __GFP_MOVABLE)
+#define GFP_HIGHUSER	(GFP_USER | __GFP_HIGHMEM)
+#define GFP_HIGHUSER_MOVABLE	(GFP_HIGHUSER | __GFP_MOVABLE)
 #define GFP_IOFS	(__GFP_IO | __GFP_FS)
 #define GFP_TRANSHUGE	(GFP_HIGHUSER_MOVABLE | __GFP_COMP | \
 			 __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \
-- 
cgit v1.2.3


From eefa864b701d78dc9753c70a3540a2e9ae192595 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Fri, 12 Dec 2014 16:55:46 -0800
Subject: mm/page_ext: resurrect struct page extending code for debugging

When we debug something, we'd like to insert some information to every
page.  For this purpose, we sometimes modify struct page itself.  But,
this has drawbacks.  First, it requires re-compile.  This makes us
hesitate to use the powerful debug feature so development process is
slowed down.  And, second, sometimes it is impossible to rebuild the
kernel due to third party module dependency.  At third, system behaviour
would be largely different after re-compile, because it changes size of
struct page greatly and this structure is accessed by every part of
kernel.  Keeping this as it is would be better to reproduce errornous
situation.

This feature is intended to overcome above mentioned problems.  This
feature allocates memory for extended data per page in certain place
rather than the struct page itself.  This memory can be accessed by the
accessor functions provided by this code.  During the boot process, it
checks whether allocation of huge chunk of memory is needed or not.  If
not, it avoids allocating memory at all.  With this advantage, we can
include this feature into the kernel in default and can avoid rebuild and
solve related problems.

Until now, memcg uses this technique.  But, now, memcg decides to embed
their variable to struct page itself and it's code to extend struct page
has been removed.  I'd like to use this code to develop debug feature, so
this patch resurrect it.

To help these things to work well, this patch introduces two callbacks for
clients.  One is the need callback which is mandatory if user wants to
avoid useless memory allocation at boot-time.  The other is optional, init
callback, which is used to do proper initialization after memory is
allocated.  Detailed explanation about purpose of these functions is in
code comment.  Please refer it.

Others are completely same with previous extension code in memcg.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dave Hansen <dave@sr71.net>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Jungsoo Son <jungsoo.son@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mmzone.h   |  12 ++
 include/linux/page_ext.h |  59 +++++++
 init/main.c              |   7 +
 mm/Kconfig.debug         |   9 ++
 mm/Makefile              |   1 +
 mm/page_alloc.c          |   2 +
 mm/page_ext.c            | 395 +++++++++++++++++++++++++++++++++++++++++++++++
 7 files changed, 485 insertions(+)
 create mode 100644 include/linux/page_ext.h
 create mode 100644 mm/page_ext.c

(limited to 'include/linux')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 3879d7664dfc..2f0856d14b21 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -722,6 +722,9 @@ typedef struct pglist_data {
 	int nr_zones;
 #ifdef CONFIG_FLAT_NODE_MEM_MAP	/* means !SPARSEMEM */
 	struct page *node_mem_map;
+#ifdef CONFIG_PAGE_EXTENSION
+	struct page_ext *node_page_ext;
+#endif
 #endif
 #ifndef CONFIG_NO_BOOTMEM
 	struct bootmem_data *bdata;
@@ -1075,6 +1078,7 @@ static inline unsigned long early_pfn_to_nid(unsigned long pfn)
 #define SECTION_ALIGN_DOWN(pfn)	((pfn) & PAGE_SECTION_MASK)
 
 struct page;
+struct page_ext;
 struct mem_section {
 	/*
 	 * This is, logically, a pointer to an array of struct
@@ -1092,6 +1096,14 @@ struct mem_section {
 
 	/* See declaration of similar field in struct zone */
 	unsigned long *pageblock_flags;
+#ifdef CONFIG_PAGE_EXTENSION
+	/*
+	 * If !SPARSEMEM, pgdat doesn't have page_ext pointer. We use
+	 * section. (see page_ext.h about this.)
+	 */
+	struct page_ext *page_ext;
+	unsigned long pad;
+#endif
 	/*
 	 * WARNING: mem_section must be a power-of-2 in size for the
 	 * calculation and use of SECTION_ROOT_MASK to make sense.
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
new file mode 100644
index 000000000000..2ccc8b414e5c
--- /dev/null
+++ b/include/linux/page_ext.h
@@ -0,0 +1,59 @@
+#ifndef __LINUX_PAGE_EXT_H
+#define __LINUX_PAGE_EXT_H
+
+struct pglist_data;
+struct page_ext_operations {
+	bool (*need)(void);
+	void (*init)(void);
+};
+
+#ifdef CONFIG_PAGE_EXTENSION
+
+/*
+ * Page Extension can be considered as an extended mem_map.
+ * A page_ext page is associated with every page descriptor. The
+ * page_ext helps us add more information about the page.
+ * All page_ext are allocated at boot or memory hotplug event,
+ * then the page_ext for pfn always exists.
+ */
+struct page_ext {
+	unsigned long flags;
+};
+
+extern void pgdat_page_ext_init(struct pglist_data *pgdat);
+
+#ifdef CONFIG_SPARSEMEM
+static inline void page_ext_init_flatmem(void)
+{
+}
+extern void page_ext_init(void);
+#else
+extern void page_ext_init_flatmem(void);
+static inline void page_ext_init(void)
+{
+}
+#endif
+
+struct page_ext *lookup_page_ext(struct page *page);
+
+#else /* !CONFIG_PAGE_EXTENSION */
+struct page_ext;
+
+static inline void pgdat_page_ext_init(struct pglist_data *pgdat)
+{
+}
+
+static inline struct page_ext *lookup_page_ext(struct page *page)
+{
+	return NULL;
+}
+
+static inline void page_ext_init(void)
+{
+}
+
+static inline void page_ext_init_flatmem(void)
+{
+}
+#endif /* CONFIG_PAGE_EXTENSION */
+#endif /* __LINUX_PAGE_EXT_H */
diff --git a/init/main.c b/init/main.c
index ca380ec685de..ed7e7ad5fee0 100644
--- a/init/main.c
+++ b/init/main.c
@@ -51,6 +51,7 @@
 #include <linux/mempolicy.h>
 #include <linux/key.h>
 #include <linux/buffer_head.h>
+#include <linux/page_ext.h>
 #include <linux/debug_locks.h>
 #include <linux/debugobjects.h>
 #include <linux/lockdep.h>
@@ -484,6 +485,11 @@ void __init __weak thread_info_cache_init(void)
  */
 static void __init mm_init(void)
 {
+	/*
+	 * page_ext requires contiguous pages,
+	 * bigger than MAX_ORDER unless SPARSEMEM.
+	 */
+	page_ext_init_flatmem();
 	mem_init();
 	kmem_cache_init();
 	percpu_init_late();
@@ -621,6 +627,7 @@ asmlinkage __visible void __init start_kernel(void)
 		initrd_start = 0;
 	}
 #endif
+	page_ext_init();
 	debug_objects_mem_init();
 	kmemleak_init();
 	setup_per_cpu_pageset();
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 4b2443254de2..1ba81c7769f7 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -1,3 +1,12 @@
+config PAGE_EXTENSION
+	bool "Extend memmap on extra space for more information on page"
+	---help---
+	  Extend memmap on extra space for more information on page. This
+	  could be used for debugging features that need to insert extra
+	  field for every page. This extension enables us to save memory
+	  by not allocating this extra memory according to boottime
+	  configuration.
+
 config DEBUG_PAGEALLOC
 	bool "Debug page memory allocations"
 	depends on DEBUG_KERNEL
diff --git a/mm/Makefile b/mm/Makefile
index b3c6ce932c64..580cd3f392af 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -71,3 +71,4 @@ obj-$(CONFIG_ZSMALLOC)	+= zsmalloc.o
 obj-$(CONFIG_GENERIC_EARLY_IOREMAP) += early_ioremap.o
 obj-$(CONFIG_CMA)	+= cma.o
 obj-$(CONFIG_MEMORY_BALLOON) += balloon_compaction.o
+obj-$(CONFIG_PAGE_EXTENSION) += page_ext.o
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 2e8b7f39605a..b64666cf5865 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -48,6 +48,7 @@
 #include <linux/backing-dev.h>
 #include <linux/fault-inject.h>
 #include <linux/page-isolation.h>
+#include <linux/page_ext.h>
 #include <linux/debugobjects.h>
 #include <linux/kmemleak.h>
 #include <linux/compaction.h>
@@ -4856,6 +4857,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat,
 #endif
 	init_waitqueue_head(&pgdat->kswapd_wait);
 	init_waitqueue_head(&pgdat->pfmemalloc_wait);
+	pgdat_page_ext_init(pgdat);
 
 	for (j = 0; j < MAX_NR_ZONES; j++) {
 		struct zone *zone = pgdat->node_zones + j;
diff --git a/mm/page_ext.c b/mm/page_ext.c
new file mode 100644
index 000000000000..514a3bccd63f
--- /dev/null
+++ b/mm/page_ext.c
@@ -0,0 +1,395 @@
+#include <linux/mm.h>
+#include <linux/mmzone.h>
+#include <linux/bootmem.h>
+#include <linux/page_ext.h>
+#include <linux/memory.h>
+#include <linux/vmalloc.h>
+#include <linux/kmemleak.h>
+
+/*
+ * struct page extension
+ *
+ * This is the feature to manage memory for extended data per page.
+ *
+ * Until now, we must modify struct page itself to store extra data per page.
+ * This requires rebuilding the kernel and it is really time consuming process.
+ * And, sometimes, rebuild is impossible due to third party module dependency.
+ * At last, enlarging struct page could cause un-wanted system behaviour change.
+ *
+ * This feature is intended to overcome above mentioned problems. This feature
+ * allocates memory for extended data per page in certain place rather than
+ * the struct page itself. This memory can be accessed by the accessor
+ * functions provided by this code. During the boot process, it checks whether
+ * allocation of huge chunk of memory is needed or not. If not, it avoids
+ * allocating memory at all. With this advantage, we can include this feature
+ * into the kernel in default and can avoid rebuild and solve related problems.
+ *
+ * To help these things to work well, there are two callbacks for clients. One
+ * is the need callback which is mandatory if user wants to avoid useless
+ * memory allocation at boot-time. The other is optional, init callback, which
+ * is used to do proper initialization after memory is allocated.
+ *
+ * The need callback is used to decide whether extended memory allocation is
+ * needed or not. Sometimes users want to deactivate some features in this
+ * boot and extra memory would be unneccessary. In this case, to avoid
+ * allocating huge chunk of memory, each clients represent their need of
+ * extra memory through the need callback. If one of the need callbacks
+ * returns true, it means that someone needs extra memory so that
+ * page extension core should allocates memory for page extension. If
+ * none of need callbacks return true, memory isn't needed at all in this boot
+ * and page extension core can skip to allocate memory. As result,
+ * none of memory is wasted.
+ *
+ * The init callback is used to do proper initialization after page extension
+ * is completely initialized. In sparse memory system, extra memory is
+ * allocated some time later than memmap is allocated. In other words, lifetime
+ * of memory for page extension isn't same with memmap for struct page.
+ * Therefore, clients can't store extra data until page extension is
+ * initialized, even if pages are allocated and used freely. This could
+ * cause inadequate state of extra data per page, so, to prevent it, client
+ * can utilize this callback to initialize the state of it correctly.
+ */
+
+static struct page_ext_operations *page_ext_ops[] = {
+};
+
+static unsigned long total_usage;
+
+static bool __init invoke_need_callbacks(void)
+{
+	int i;
+	int entries = ARRAY_SIZE(page_ext_ops);
+
+	for (i = 0; i < entries; i++) {
+		if (page_ext_ops[i]->need && page_ext_ops[i]->need())
+			return true;
+	}
+
+	return false;
+}
+
+static void __init invoke_init_callbacks(void)
+{
+	int i;
+	int entries = ARRAY_SIZE(page_ext_ops);
+
+	for (i = 0; i < entries; i++) {
+		if (page_ext_ops[i]->init)
+			page_ext_ops[i]->init();
+	}
+}
+
+#if !defined(CONFIG_SPARSEMEM)
+
+
+void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
+{
+	pgdat->node_page_ext = NULL;
+}
+
+struct page_ext *lookup_page_ext(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	unsigned long offset;
+	struct page_ext *base;
+
+	base = NODE_DATA(page_to_nid(page))->node_page_ext;
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * The sanity checks the page allocator does upon freeing a
+	 * page can reach here before the page_ext arrays are
+	 * allocated when feeding a range of pages to the allocator
+	 * for the first time during bootup or memory hotplug.
+	 */
+	if (unlikely(!base))
+		return NULL;
+#endif
+	offset = pfn - round_down(node_start_pfn(page_to_nid(page)),
+					MAX_ORDER_NR_PAGES);
+	return base + offset;
+}
+
+static int __init alloc_node_page_ext(int nid)
+{
+	struct page_ext *base;
+	unsigned long table_size;
+	unsigned long nr_pages;
+
+	nr_pages = NODE_DATA(nid)->node_spanned_pages;
+	if (!nr_pages)
+		return 0;
+
+	/*
+	 * Need extra space if node range is not aligned with
+	 * MAX_ORDER_NR_PAGES. When page allocator's buddy algorithm
+	 * checks buddy's status, range could be out of exact node range.
+	 */
+	if (!IS_ALIGNED(node_start_pfn(nid), MAX_ORDER_NR_PAGES) ||
+		!IS_ALIGNED(node_end_pfn(nid), MAX_ORDER_NR_PAGES))
+		nr_pages += MAX_ORDER_NR_PAGES;
+
+	table_size = sizeof(struct page_ext) * nr_pages;
+
+	base = memblock_virt_alloc_try_nid_nopanic(
+			table_size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
+			BOOTMEM_ALLOC_ACCESSIBLE, nid);
+	if (!base)
+		return -ENOMEM;
+	NODE_DATA(nid)->node_page_ext = base;
+	total_usage += table_size;
+	return 0;
+}
+
+void __init page_ext_init_flatmem(void)
+{
+
+	int nid, fail;
+
+	if (!invoke_need_callbacks())
+		return;
+
+	for_each_online_node(nid)  {
+		fail = alloc_node_page_ext(nid);
+		if (fail)
+			goto fail;
+	}
+	pr_info("allocated %ld bytes of page_ext\n", total_usage);
+	invoke_init_callbacks();
+	return;
+
+fail:
+	pr_crit("allocation of page_ext failed.\n");
+	panic("Out of memory");
+}
+
+#else /* CONFIG_FLAT_NODE_MEM_MAP */
+
+struct page_ext *lookup_page_ext(struct page *page)
+{
+	unsigned long pfn = page_to_pfn(page);
+	struct mem_section *section = __pfn_to_section(pfn);
+#ifdef CONFIG_DEBUG_VM
+	/*
+	 * The sanity checks the page allocator does upon freeing a
+	 * page can reach here before the page_ext arrays are
+	 * allocated when feeding a range of pages to the allocator
+	 * for the first time during bootup or memory hotplug.
+	 */
+	if (!section->page_ext)
+		return NULL;
+#endif
+	return section->page_ext + pfn;
+}
+
+static void *__meminit alloc_page_ext(size_t size, int nid)
+{
+	gfp_t flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN;
+	void *addr = NULL;
+
+	addr = alloc_pages_exact_nid(nid, size, flags);
+	if (addr) {
+		kmemleak_alloc(addr, size, 1, flags);
+		return addr;
+	}
+
+	if (node_state(nid, N_HIGH_MEMORY))
+		addr = vzalloc_node(size, nid);
+	else
+		addr = vzalloc(size);
+
+	return addr;
+}
+
+static int __meminit init_section_page_ext(unsigned long pfn, int nid)
+{
+	struct mem_section *section;
+	struct page_ext *base;
+	unsigned long table_size;
+
+	section = __pfn_to_section(pfn);
+
+	if (section->page_ext)
+		return 0;
+
+	table_size = sizeof(struct page_ext) * PAGES_PER_SECTION;
+	base = alloc_page_ext(table_size, nid);
+
+	/*
+	 * The value stored in section->page_ext is (base - pfn)
+	 * and it does not point to the memory block allocated above,
+	 * causing kmemleak false positives.
+	 */
+	kmemleak_not_leak(base);
+
+	if (!base) {
+		pr_err("page ext allocation failure\n");
+		return -ENOMEM;
+	}
+
+	/*
+	 * The passed "pfn" may not be aligned to SECTION.  For the calculation
+	 * we need to apply a mask.
+	 */
+	pfn &= PAGE_SECTION_MASK;
+	section->page_ext = base - pfn;
+	total_usage += table_size;
+	return 0;
+}
+#ifdef CONFIG_MEMORY_HOTPLUG
+static void free_page_ext(void *addr)
+{
+	if (is_vmalloc_addr(addr)) {
+		vfree(addr);
+	} else {
+		struct page *page = virt_to_page(addr);
+		size_t table_size;
+
+		table_size = sizeof(struct page_ext) * PAGES_PER_SECTION;
+
+		BUG_ON(PageReserved(page));
+		free_pages_exact(addr, table_size);
+	}
+}
+
+static void __free_page_ext(unsigned long pfn)
+{
+	struct mem_section *ms;
+	struct page_ext *base;
+
+	ms = __pfn_to_section(pfn);
+	if (!ms || !ms->page_ext)
+		return;
+	base = ms->page_ext + pfn;
+	free_page_ext(base);
+	ms->page_ext = NULL;
+}
+
+static int __meminit online_page_ext(unsigned long start_pfn,
+				unsigned long nr_pages,
+				int nid)
+{
+	unsigned long start, end, pfn;
+	int fail = 0;
+
+	start = SECTION_ALIGN_DOWN(start_pfn);
+	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
+
+	if (nid == -1) {
+		/*
+		 * In this case, "nid" already exists and contains valid memory.
+		 * "start_pfn" passed to us is a pfn which is an arg for
+		 * online__pages(), and start_pfn should exist.
+		 */
+		nid = pfn_to_nid(start_pfn);
+		VM_BUG_ON(!node_state(nid, N_ONLINE));
+	}
+
+	for (pfn = start; !fail && pfn < end; pfn += PAGES_PER_SECTION) {
+		if (!pfn_present(pfn))
+			continue;
+		fail = init_section_page_ext(pfn, nid);
+	}
+	if (!fail)
+		return 0;
+
+	/* rollback */
+	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+		__free_page_ext(pfn);
+
+	return -ENOMEM;
+}
+
+static int __meminit offline_page_ext(unsigned long start_pfn,
+				unsigned long nr_pages, int nid)
+{
+	unsigned long start, end, pfn;
+
+	start = SECTION_ALIGN_DOWN(start_pfn);
+	end = SECTION_ALIGN_UP(start_pfn + nr_pages);
+
+	for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION)
+		__free_page_ext(pfn);
+	return 0;
+
+}
+
+static int __meminit page_ext_callback(struct notifier_block *self,
+			       unsigned long action, void *arg)
+{
+	struct memory_notify *mn = arg;
+	int ret = 0;
+
+	switch (action) {
+	case MEM_GOING_ONLINE:
+		ret = online_page_ext(mn->start_pfn,
+				   mn->nr_pages, mn->status_change_nid);
+		break;
+	case MEM_OFFLINE:
+		offline_page_ext(mn->start_pfn,
+				mn->nr_pages, mn->status_change_nid);
+		break;
+	case MEM_CANCEL_ONLINE:
+		offline_page_ext(mn->start_pfn,
+				mn->nr_pages, mn->status_change_nid);
+		break;
+	case MEM_GOING_OFFLINE:
+		break;
+	case MEM_ONLINE:
+	case MEM_CANCEL_OFFLINE:
+		break;
+	}
+
+	return notifier_from_errno(ret);
+}
+
+#endif
+
+void __init page_ext_init(void)
+{
+	unsigned long pfn;
+	int nid;
+
+	if (!invoke_need_callbacks())
+		return;
+
+	for_each_node_state(nid, N_MEMORY) {
+		unsigned long start_pfn, end_pfn;
+
+		start_pfn = node_start_pfn(nid);
+		end_pfn = node_end_pfn(nid);
+		/*
+		 * start_pfn and end_pfn may not be aligned to SECTION and the
+		 * page->flags of out of node pages are not initialized.  So we
+		 * scan [start_pfn, the biggest section's pfn < end_pfn) here.
+		 */
+		for (pfn = start_pfn; pfn < end_pfn;
+			pfn = ALIGN(pfn + 1, PAGES_PER_SECTION)) {
+
+			if (!pfn_valid(pfn))
+				continue;
+			/*
+			 * Nodes's pfns can be overlapping.
+			 * We know some arch can have a nodes layout such as
+			 * -------------pfn-------------->
+			 * N0 | N1 | N2 | N0 | N1 | N2|....
+			 */
+			if (pfn_to_nid(pfn) != nid)
+				continue;
+			if (init_section_page_ext(pfn, nid))
+				goto oom;
+		}
+	}
+	hotplug_memory_notifier(page_ext_callback, 0);
+	pr_info("allocated %ld bytes of page_ext\n", total_usage);
+	invoke_init_callbacks();
+	return;
+
+oom:
+	panic("Out of memory");
+}
+
+void __meminit pgdat_page_ext_init(struct pglist_data *pgdat)
+{
+}
+
+#endif
-- 
cgit v1.2.3


From e30825f1869a75b29a69dc8e0aaaaccc492092cf Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Fri, 12 Dec 2014 16:55:49 -0800
Subject: mm/debug-pagealloc: prepare boottime configurable on/off

Until now, debug-pagealloc needs extra flags in struct page, so we need to
recompile whole source code when we decide to use it.  This is really
painful, because it takes some time to recompile and sometimes rebuild is
not possible due to third party module depending on struct page.  So, we
can't use this good feature in many cases.

Now, we have the page extension feature that allows us to insert extra
flags to outside of struct page.  This gets rid of third party module
issue mentioned above.  And, this allows us to determine if we need extra
memory for this page extension in boottime.  With these property, we can
avoid using debug-pagealloc in boottime with low computational overhead in
the kernel built with CONFIG_DEBUG_PAGEALLOC.  This will help our
development process greatly.

This patch is the preparation step to achive above goal.  debug-pagealloc
originally uses extra field of struct page, but, after this patch, it will
use field of struct page_ext.  Because memory for page_ext is allocated
later than initialization of page allocator in CONFIG_SPARSEMEM, we should
disable debug-pagealloc feature temporarily until initialization of
page_ext.  This patch implements this.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dave Hansen <dave@sr71.net>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Jungsoo Son <jungsoo.son@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h               | 19 ++++++++++++++++++-
 include/linux/mm_types.h         |  4 ----
 include/linux/page-debug-flags.h | 32 --------------------------------
 include/linux/page_ext.h         | 15 +++++++++++++++
 mm/Kconfig.debug                 |  1 +
 mm/debug-pagealloc.c             | 37 +++++++++++++++++++++++++++++++++----
 mm/page_alloc.c                  | 38 +++++++++++++++++++++++++++++++++++---
 mm/page_ext.c                    |  4 ++++
 8 files changed, 106 insertions(+), 44 deletions(-)
 delete mode 100644 include/linux/page-debug-flags.h

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 3b337efbe533..66560f1a0564 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -19,6 +19,7 @@
 #include <linux/bit_spinlock.h>
 #include <linux/shrinker.h>
 #include <linux/resource.h>
+#include <linux/page_ext.h>
 
 struct mempolicy;
 struct anon_vma;
@@ -2155,20 +2156,36 @@ extern void copy_user_huge_page(struct page *dst, struct page *src,
 				unsigned int pages_per_huge_page);
 #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */
 
+extern struct page_ext_operations debug_guardpage_ops;
+extern struct page_ext_operations page_poisoning_ops;
+
 #ifdef CONFIG_DEBUG_PAGEALLOC
 extern unsigned int _debug_guardpage_minorder;
+extern bool _debug_guardpage_enabled;
 
 static inline unsigned int debug_guardpage_minorder(void)
 {
 	return _debug_guardpage_minorder;
 }
 
+static inline bool debug_guardpage_enabled(void)
+{
+	return _debug_guardpage_enabled;
+}
+
 static inline bool page_is_guard(struct page *page)
 {
-	return test_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+	struct page_ext *page_ext;
+
+	if (!debug_guardpage_enabled())
+		return false;
+
+	page_ext = lookup_page_ext(page);
+	return test_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
 }
 #else
 static inline unsigned int debug_guardpage_minorder(void) { return 0; }
+static inline bool debug_guardpage_enabled(void) { return false; }
 static inline bool page_is_guard(struct page *page) { return false; }
 #endif /* CONFIG_DEBUG_PAGEALLOC */
 
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index fc2daffa9db1..6d34aa266a8c 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -10,7 +10,6 @@
 #include <linux/rwsem.h>
 #include <linux/completion.h>
 #include <linux/cpumask.h>
-#include <linux/page-debug-flags.h>
 #include <linux/uprobes.h>
 #include <linux/page-flags-layout.h>
 #include <asm/page.h>
@@ -186,9 +185,6 @@ struct page {
 	void *virtual;			/* Kernel virtual address (NULL if
 					   not kmapped, ie. highmem) */
 #endif /* WANT_PAGE_VIRTUAL */
-#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
-	unsigned long debug_flags;	/* Use atomic bitops on this */
-#endif
 
 #ifdef CONFIG_KMEMCHECK
 	/*
diff --git a/include/linux/page-debug-flags.h b/include/linux/page-debug-flags.h
deleted file mode 100644
index 22691f614043..000000000000
--- a/include/linux/page-debug-flags.h
+++ /dev/null
@@ -1,32 +0,0 @@
-#ifndef LINUX_PAGE_DEBUG_FLAGS_H
-#define  LINUX_PAGE_DEBUG_FLAGS_H
-
-/*
- * page->debug_flags bits:
- *
- * PAGE_DEBUG_FLAG_POISON is set for poisoned pages. This is used to
- * implement generic debug pagealloc feature. The pages are filled with
- * poison patterns and set this flag after free_pages(). The poisoned
- * pages are verified whether the patterns are not corrupted and clear
- * the flag before alloc_pages().
- */
-
-enum page_debug_flags {
-	PAGE_DEBUG_FLAG_POISON,		/* Page is poisoned */
-	PAGE_DEBUG_FLAG_GUARD,
-};
-
-/*
- * Ensure that CONFIG_WANT_PAGE_DEBUG_FLAGS reliably
- * gets turned off when no debug features are enabling it!
- */
-
-#ifdef CONFIG_WANT_PAGE_DEBUG_FLAGS
-#if !defined(CONFIG_PAGE_POISONING) && \
-    !defined(CONFIG_PAGE_GUARD) \
-/* && !defined(CONFIG_PAGE_DEBUG_SOMETHING_ELSE) && ... */
-#error WANT_PAGE_DEBUG_FLAGS is turned on with no debug features!
-#endif
-#endif /* CONFIG_WANT_PAGE_DEBUG_FLAGS */
-
-#endif /* LINUX_PAGE_DEBUG_FLAGS_H */
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index 2ccc8b414e5c..61c0f05f9069 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -9,6 +9,21 @@ struct page_ext_operations {
 
 #ifdef CONFIG_PAGE_EXTENSION
 
+/*
+ * page_ext->flags bits:
+ *
+ * PAGE_EXT_DEBUG_POISON is set for poisoned pages. This is used to
+ * implement generic debug pagealloc feature. The pages are filled with
+ * poison patterns and set this flag after free_pages(). The poisoned
+ * pages are verified whether the patterns are not corrupted and clear
+ * the flag before alloc_pages().
+ */
+
+enum page_ext_flags {
+	PAGE_EXT_DEBUG_POISON,		/* Page is poisoned */
+	PAGE_EXT_DEBUG_GUARD,
+};
+
 /*
  * Page Extension can be considered as an extended mem_map.
  * A page_ext page is associated with every page descriptor. The
diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug
index 1ba81c7769f7..56badfc4810a 100644
--- a/mm/Kconfig.debug
+++ b/mm/Kconfig.debug
@@ -12,6 +12,7 @@ config DEBUG_PAGEALLOC
 	depends on DEBUG_KERNEL
 	depends on !HIBERNATION || ARCH_SUPPORTS_DEBUG_PAGEALLOC && !PPC && !SPARC
 	depends on !KMEMCHECK
+	select PAGE_EXTENSION
 	select PAGE_POISONING if !ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	select PAGE_GUARD if ARCH_SUPPORTS_DEBUG_PAGEALLOC
 	---help---
diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c
index 789ff70c8a4a..0072f2c53331 100644
--- a/mm/debug-pagealloc.c
+++ b/mm/debug-pagealloc.c
@@ -2,23 +2,49 @@
 #include <linux/string.h>
 #include <linux/mm.h>
 #include <linux/highmem.h>
-#include <linux/page-debug-flags.h>
+#include <linux/page_ext.h>
 #include <linux/poison.h>
 #include <linux/ratelimit.h>
 
+static bool page_poisoning_enabled __read_mostly;
+
+static bool need_page_poisoning(void)
+{
+	return true;
+}
+
+static void init_page_poisoning(void)
+{
+	page_poisoning_enabled = true;
+}
+
+struct page_ext_operations page_poisoning_ops = {
+	.need = need_page_poisoning,
+	.init = init_page_poisoning,
+};
+
 static inline void set_page_poison(struct page *page)
 {
-	__set_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags);
+	struct page_ext *page_ext;
+
+	page_ext = lookup_page_ext(page);
+	__set_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
 }
 
 static inline void clear_page_poison(struct page *page)
 {
-	__clear_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags);
+	struct page_ext *page_ext;
+
+	page_ext = lookup_page_ext(page);
+	__clear_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
 }
 
 static inline bool page_poison(struct page *page)
 {
-	return test_bit(PAGE_DEBUG_FLAG_POISON, &page->debug_flags);
+	struct page_ext *page_ext;
+
+	page_ext = lookup_page_ext(page);
+	return test_bit(PAGE_EXT_DEBUG_POISON, &page_ext->flags);
 }
 
 static void poison_page(struct page *page)
@@ -95,6 +121,9 @@ static void unpoison_pages(struct page *page, int n)
 
 void kernel_map_pages(struct page *page, int numpages, int enable)
 {
+	if (!page_poisoning_enabled)
+		return;
+
 	if (enable)
 		unpoison_pages(page, numpages);
 	else
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index b64666cf5865..e0a39d328ca1 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -56,7 +56,7 @@
 #include <linux/prefetch.h>
 #include <linux/mm_inline.h>
 #include <linux/migrate.h>
-#include <linux/page-debug-flags.h>
+#include <linux/page_ext.h>
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
 
@@ -425,6 +425,22 @@ static inline void prep_zero_page(struct page *page, unsigned int order,
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 unsigned int _debug_guardpage_minorder;
+bool _debug_guardpage_enabled __read_mostly;
+
+static bool need_debug_guardpage(void)
+{
+	return true;
+}
+
+static void init_debug_guardpage(void)
+{
+	_debug_guardpage_enabled = true;
+}
+
+struct page_ext_operations debug_guardpage_ops = {
+	.need = need_debug_guardpage,
+	.init = init_debug_guardpage,
+};
 
 static int __init debug_guardpage_minorder_setup(char *buf)
 {
@@ -443,7 +459,14 @@ __setup("debug_guardpage_minorder=", debug_guardpage_minorder_setup);
 static inline void set_page_guard(struct zone *zone, struct page *page,
 				unsigned int order, int migratetype)
 {
-	__set_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+	struct page_ext *page_ext;
+
+	if (!debug_guardpage_enabled())
+		return;
+
+	page_ext = lookup_page_ext(page);
+	__set_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
+
 	INIT_LIST_HEAD(&page->lru);
 	set_page_private(page, order);
 	/* Guard pages are not available for any usage */
@@ -453,12 +476,20 @@ static inline void set_page_guard(struct zone *zone, struct page *page,
 static inline void clear_page_guard(struct zone *zone, struct page *page,
 				unsigned int order, int migratetype)
 {
-	__clear_bit(PAGE_DEBUG_FLAG_GUARD, &page->debug_flags);
+	struct page_ext *page_ext;
+
+	if (!debug_guardpage_enabled())
+		return;
+
+	page_ext = lookup_page_ext(page);
+	__clear_bit(PAGE_EXT_DEBUG_GUARD, &page_ext->flags);
+
 	set_page_private(page, 0);
 	if (!is_migrate_isolate(migratetype))
 		__mod_zone_freepage_state(zone, (1 << order), migratetype);
 }
 #else
+struct page_ext_operations debug_guardpage_ops = { NULL, };
 static inline void set_page_guard(struct zone *zone, struct page *page,
 				unsigned int order, int migratetype) {}
 static inline void clear_page_guard(struct zone *zone, struct page *page,
@@ -869,6 +900,7 @@ static inline void expand(struct zone *zone, struct page *page,
 		VM_BUG_ON_PAGE(bad_range(zone, &page[size]), &page[size]);
 
 		if (IS_ENABLED(CONFIG_DEBUG_PAGEALLOC) &&
+			debug_guardpage_enabled() &&
 			high < debug_guardpage_minorder()) {
 			/*
 			 * Mark as guard pages (or page), that will allow to
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 514a3bccd63f..c2cd7b15f0de 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -51,6 +51,10 @@
  */
 
 static struct page_ext_operations *page_ext_ops[] = {
+	&debug_guardpage_ops,
+#ifdef CONFIG_PAGE_POISONING
+	&page_poisoning_ops,
+#endif
 };
 
 static unsigned long total_usage;
-- 
cgit v1.2.3


From 031bc5743f158b2d5498294f489e534a31251626 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Fri, 12 Dec 2014 16:55:52 -0800
Subject: mm/debug-pagealloc: make debug-pagealloc boottime configurable

Now, we have prepared to avoid using debug-pagealloc in boottime.  So
introduce new kernel-parameter to disable debug-pagealloc in boottime, and
makes related functions to be disabled in this case.

Only non-intuitive part is change of guard page functions.  Because guard
page is effective only if debug-pagealloc is enabled, turning off
according to debug-pagealloc is reasonable thing to do.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dave Hansen <dave@sr71.net>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Jungsoo Son <jungsoo.son@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |  9 +++++++++
 arch/powerpc/mm/hash_utils_64.c     |  2 +-
 arch/powerpc/mm/pgtable_32.c        |  2 +-
 arch/s390/mm/pageattr.c             |  2 +-
 arch/sparc/mm/init_64.c             |  2 +-
 arch/x86/mm/pageattr.c              |  2 +-
 include/linux/mm.h                  | 17 ++++++++++++++++-
 mm/debug-pagealloc.c                |  8 +++++++-
 mm/page_alloc.c                     | 20 ++++++++++++++++++++
 9 files changed, 57 insertions(+), 7 deletions(-)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 24539d1c7d25..6f067954675b 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -829,6 +829,15 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			CONFIG_DEBUG_PAGEALLOC, hence this option will not help
 			tracking down these problems.
 
+	debug_pagealloc=
+			[KNL] When CONFIG_DEBUG_PAGEALLOC is set, this
+			parameter enables the feature at boot time. In
+			default, it is disabled. We can avoid allocating huge
+			chunk of memory for debug pagealloc if we don't enable
+			it at boot time and the system will work mostly same
+			with the kernel built without CONFIG_DEBUG_PAGEALLOC.
+			on: enable the feature
+
 	debugpat	[X86] Enable PAT debugging
 
 	decnet.addr=	[HW,NET]
diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c
index e56a307bc676..2c2022d16059 100644
--- a/arch/powerpc/mm/hash_utils_64.c
+++ b/arch/powerpc/mm/hash_utils_64.c
@@ -1514,7 +1514,7 @@ static void kernel_unmap_linear_page(unsigned long vaddr, unsigned long lmi)
 			       mmu_kernel_ssize, 0);
 }
 
-void kernel_map_pages(struct page *page, int numpages, int enable)
+void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	unsigned long flags, vaddr, lmi;
 	int i;
diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c
index d545b1231594..50fad3801f30 100644
--- a/arch/powerpc/mm/pgtable_32.c
+++ b/arch/powerpc/mm/pgtable_32.c
@@ -429,7 +429,7 @@ static int change_page_attr(struct page *page, int numpages, pgprot_t prot)
 }
 
 
-void kernel_map_pages(struct page *page, int numpages, int enable)
+void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (PageHighMem(page))
 		return;
diff --git a/arch/s390/mm/pageattr.c b/arch/s390/mm/pageattr.c
index 3fef3b299665..426c9d462d1c 100644
--- a/arch/s390/mm/pageattr.c
+++ b/arch/s390/mm/pageattr.c
@@ -120,7 +120,7 @@ static void ipte_range(pte_t *pte, unsigned long address, int nr)
 	}
 }
 
-void kernel_map_pages(struct page *page, int numpages, int enable)
+void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	unsigned long address;
 	int nr, i, j;
diff --git a/arch/sparc/mm/init_64.c b/arch/sparc/mm/init_64.c
index 2d91c62f7f5f..3ea267c53320 100644
--- a/arch/sparc/mm/init_64.c
+++ b/arch/sparc/mm/init_64.c
@@ -1621,7 +1621,7 @@ static void __init kernel_physical_mapping_init(void)
 }
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-void kernel_map_pages(struct page *page, int numpages, int enable)
+void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	unsigned long phys_start = page_to_pfn(page) << PAGE_SHIFT;
 	unsigned long phys_end = phys_start + (numpages * PAGE_SIZE);
diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c
index a3a5d46605d2..dfaf2e0f5f8f 100644
--- a/arch/x86/mm/pageattr.c
+++ b/arch/x86/mm/pageattr.c
@@ -1817,7 +1817,7 @@ static int __set_pages_np(struct page *page, int numpages)
 	return __change_page_attr_set_clr(&cpa, 0);
 }
 
-void kernel_map_pages(struct page *page, int numpages, int enable)
+void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (PageHighMem(page))
 		return;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 66560f1a0564..8b8d77a1532f 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2061,7 +2061,22 @@ static inline void vm_stat_account(struct mm_struct *mm,
 #endif /* CONFIG_PROC_FS */
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
-extern void kernel_map_pages(struct page *page, int numpages, int enable);
+extern bool _debug_pagealloc_enabled;
+extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline bool debug_pagealloc_enabled(void)
+{
+	return _debug_pagealloc_enabled;
+}
+
+static inline void
+kernel_map_pages(struct page *page, int numpages, int enable)
+{
+	if (!debug_pagealloc_enabled())
+		return;
+
+	__kernel_map_pages(page, numpages, enable);
+}
 #ifdef CONFIG_HIBERNATION
 extern bool kernel_page_present(struct page *page);
 #endif /* CONFIG_HIBERNATION */
diff --git a/mm/debug-pagealloc.c b/mm/debug-pagealloc.c
index 0072f2c53331..5bf5906ce13b 100644
--- a/mm/debug-pagealloc.c
+++ b/mm/debug-pagealloc.c
@@ -10,11 +10,17 @@ static bool page_poisoning_enabled __read_mostly;
 
 static bool need_page_poisoning(void)
 {
+	if (!debug_pagealloc_enabled())
+		return false;
+
 	return true;
 }
 
 static void init_page_poisoning(void)
 {
+	if (!debug_pagealloc_enabled())
+		return;
+
 	page_poisoning_enabled = true;
 }
 
@@ -119,7 +125,7 @@ static void unpoison_pages(struct page *page, int n)
 		unpoison_page(page + i);
 }
 
-void kernel_map_pages(struct page *page, int numpages, int enable)
+void __kernel_map_pages(struct page *page, int numpages, int enable)
 {
 	if (!page_poisoning_enabled)
 		return;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index e0a39d328ca1..303d38516807 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -425,15 +425,35 @@ static inline void prep_zero_page(struct page *page, unsigned int order,
 
 #ifdef CONFIG_DEBUG_PAGEALLOC
 unsigned int _debug_guardpage_minorder;
+bool _debug_pagealloc_enabled __read_mostly;
 bool _debug_guardpage_enabled __read_mostly;
 
+static int __init early_debug_pagealloc(char *buf)
+{
+	if (!buf)
+		return -EINVAL;
+
+	if (strcmp(buf, "on") == 0)
+		_debug_pagealloc_enabled = true;
+
+	return 0;
+}
+early_param("debug_pagealloc", early_debug_pagealloc);
+
 static bool need_debug_guardpage(void)
 {
+	/* If we don't use debug_pagealloc, we don't need guard page */
+	if (!debug_pagealloc_enabled())
+		return false;
+
 	return true;
 }
 
 static void init_debug_guardpage(void)
 {
+	if (!debug_pagealloc_enabled())
+		return;
+
 	_debug_guardpage_enabled = true;
 }
 
-- 
cgit v1.2.3


From 9a92a6ce6f842713ccd0025c5228fe8bea61234c Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Fri, 12 Dec 2014 16:55:58 -0800
Subject: stacktrace: introduce snprint_stack_trace for buffer output

Current stacktrace only have the function for console output.  page_owner
that will be introduced in following patch needs to print the output of
stacktrace into the buffer for our own output format so so new function,
snprint_stack_trace(), is needed.

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dave Hansen <dave@sr71.net>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Jungsoo Son <jungsoo.son@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/stacktrace.h |  5 +++++
 kernel/stacktrace.c        | 32 ++++++++++++++++++++++++++++++++
 2 files changed, 37 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h
index 115b570e3bff..669045ab73f3 100644
--- a/include/linux/stacktrace.h
+++ b/include/linux/stacktrace.h
@@ -1,6 +1,8 @@
 #ifndef __LINUX_STACKTRACE_H
 #define __LINUX_STACKTRACE_H
 
+#include <linux/types.h>
+
 struct task_struct;
 struct pt_regs;
 
@@ -20,6 +22,8 @@ extern void save_stack_trace_tsk(struct task_struct *tsk,
 				struct stack_trace *trace);
 
 extern void print_stack_trace(struct stack_trace *trace, int spaces);
+extern int snprint_stack_trace(char *buf, size_t size,
+			struct stack_trace *trace, int spaces);
 
 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
 extern void save_stack_trace_user(struct stack_trace *trace);
@@ -32,6 +36,7 @@ extern void save_stack_trace_user(struct stack_trace *trace);
 # define save_stack_trace_tsk(tsk, trace)		do { } while (0)
 # define save_stack_trace_user(trace)			do { } while (0)
 # define print_stack_trace(trace, spaces)		do { } while (0)
+# define snprint_stack_trace(buf, size, trace, spaces)	do { } while (0)
 #endif
 
 #endif
diff --git a/kernel/stacktrace.c b/kernel/stacktrace.c
index 00fe55cc5a82..b6e4c16377c7 100644
--- a/kernel/stacktrace.c
+++ b/kernel/stacktrace.c
@@ -25,6 +25,38 @@ void print_stack_trace(struct stack_trace *trace, int spaces)
 }
 EXPORT_SYMBOL_GPL(print_stack_trace);
 
+int snprint_stack_trace(char *buf, size_t size,
+			struct stack_trace *trace, int spaces)
+{
+	int i;
+	unsigned long ip;
+	int generated;
+	int total = 0;
+
+	if (WARN_ON(!trace->entries))
+		return 0;
+
+	for (i = 0; i < trace->nr_entries; i++) {
+		ip = trace->entries[i];
+		generated = snprintf(buf, size, "%*c[<%p>] %pS\n",
+				1 + spaces, ' ', (void *) ip, (void *) ip);
+
+		total += generated;
+
+		/* Assume that generated isn't a negative number */
+		if (generated >= size) {
+			buf += size;
+			size = 0;
+		} else {
+			buf += generated;
+			size -= generated;
+		}
+	}
+
+	return total;
+}
+EXPORT_SYMBOL_GPL(snprint_stack_trace);
+
 /*
  * Architectures that do not implement save_stack_trace_tsk or
  * save_stack_trace_regs get this weak alias and a once-per-bootup warning
-- 
cgit v1.2.3


From 48c96a3685795e52903e60c7ee115e5e22e7d640 Mon Sep 17 00:00:00 2001
From: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Date: Fri, 12 Dec 2014 16:56:01 -0800
Subject: mm/page_owner: keep track of page owners

This is the page owner tracking code which is introduced so far ago.  It
is resident on Andrew's tree, though, nobody tried to upstream so it
remain as is.  Our company uses this feature actively to debug memory leak
or to find a memory hogger so I decide to upstream this feature.

This functionality help us to know who allocates the page.  When
allocating a page, we store some information about allocation in extra
memory.  Later, if we need to know status of all pages, we can get and
analyze it from this stored information.

In previous version of this feature, extra memory is statically defined in
struct page, but, in this version, extra memory is allocated outside of
struct page.  It enables us to turn on/off this feature at boottime
without considerable memory waste.

Although we already have tracepoint for tracing page allocation/free,
using it to analyze page owner is rather complex.  We need to enlarge the
trace buffer for preventing overlapping until userspace program launched.
And, launched program continually dump out the trace buffer for later
analysis and it would change system behaviour with more possibility rather
than just keeping it in memory, so bad for debug.

Moreover, we can use page_owner feature further for various purposes.  For
example, we can use it for fragmentation statistics implemented in this
patch.  And, I also plan to implement some CMA failure debugging feature
using this interface.

I'd like to give the credit for all developers contributed this feature,
but, it's not easy because I don't know exact history.  Sorry about that.
Below is people who has "Signed-off-by" in the patches in Andrew's tree.

Contributor:
Alexander Nyberg <alexn@dsv.su.se>
Mel Gorman <mgorman@suse.de>
Dave Hansen <dave@linux.vnet.ibm.com>
Minchan Kim <minchan@kernel.org>
Michal Nazarewicz <mina86@mina86.com>
Andrew Morton <akpm@linux-foundation.org>
Jungsoo Son <jungsoo.son@lge.com>

Signed-off-by: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Cc: Mel Gorman <mgorman@suse.de>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Minchan Kim <minchan@kernel.org>
Cc: Dave Hansen <dave@sr71.net>
Cc: Michal Nazarewicz <mina86@mina86.com>
Cc: Jungsoo Son <jungsoo.son@lge.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/kernel-parameters.txt |   6 +
 include/linux/page_ext.h            |  10 ++
 include/linux/page_owner.h          |  38 ++++++
 lib/Kconfig.debug                   |  16 +++
 mm/Makefile                         |   1 +
 mm/page_alloc.c                     |  11 +-
 mm/page_ext.c                       |   4 +
 mm/page_owner.c                     | 222 ++++++++++++++++++++++++++++++++++++
 mm/vmstat.c                         | 101 ++++++++++++++++
 tools/vm/Makefile                   |   4 +-
 tools/vm/page_owner_sort.c          | 144 +++++++++++++++++++++++
 11 files changed, 554 insertions(+), 3 deletions(-)
 create mode 100644 include/linux/page_owner.h
 create mode 100644 mm/page_owner.c
 create mode 100644 tools/vm/page_owner_sort.c

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 6f067954675b..68153642c44e 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -2513,6 +2513,12 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 	OSS		[HW,OSS]
 			See Documentation/sound/oss/oss-parameters.txt
 
+	page_owner=	[KNL] Boot-time page_owner enabling option.
+			Storage of the information about who allocated
+			each page is disabled in default. With this switch,
+			we can turn it on.
+			on: enable the feature
+
 	panic=		[KNL] Kernel behaviour on panic: delay <timeout>
 			timeout > 0: seconds before rebooting
 			timeout = 0: wait forever
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index 61c0f05f9069..d2a2c84c72d0 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -1,6 +1,9 @@
 #ifndef __LINUX_PAGE_EXT_H
 #define __LINUX_PAGE_EXT_H
 
+#include <linux/types.h>
+#include <linux/stacktrace.h>
+
 struct pglist_data;
 struct page_ext_operations {
 	bool (*need)(void);
@@ -22,6 +25,7 @@ struct page_ext_operations {
 enum page_ext_flags {
 	PAGE_EXT_DEBUG_POISON,		/* Page is poisoned */
 	PAGE_EXT_DEBUG_GUARD,
+	PAGE_EXT_OWNER,
 };
 
 /*
@@ -33,6 +37,12 @@ enum page_ext_flags {
  */
 struct page_ext {
 	unsigned long flags;
+#ifdef CONFIG_PAGE_OWNER
+	unsigned int order;
+	gfp_t gfp_mask;
+	struct stack_trace trace;
+	unsigned long trace_entries[8];
+#endif
 };
 
 extern void pgdat_page_ext_init(struct pglist_data *pgdat);
diff --git a/include/linux/page_owner.h b/include/linux/page_owner.h
new file mode 100644
index 000000000000..b48c3471c254
--- /dev/null
+++ b/include/linux/page_owner.h
@@ -0,0 +1,38 @@
+#ifndef __LINUX_PAGE_OWNER_H
+#define __LINUX_PAGE_OWNER_H
+
+#ifdef CONFIG_PAGE_OWNER
+extern bool page_owner_inited;
+extern struct page_ext_operations page_owner_ops;
+
+extern void __reset_page_owner(struct page *page, unsigned int order);
+extern void __set_page_owner(struct page *page,
+			unsigned int order, gfp_t gfp_mask);
+
+static inline void reset_page_owner(struct page *page, unsigned int order)
+{
+	if (likely(!page_owner_inited))
+		return;
+
+	__reset_page_owner(page, order);
+}
+
+static inline void set_page_owner(struct page *page,
+			unsigned int order, gfp_t gfp_mask)
+{
+	if (likely(!page_owner_inited))
+		return;
+
+	__set_page_owner(page, order, gfp_mask);
+}
+#else
+static inline void reset_page_owner(struct page *page, unsigned int order)
+{
+}
+static inline void set_page_owner(struct page *page,
+			unsigned int order, gfp_t gfp_mask)
+{
+}
+
+#endif /* CONFIG_PAGE_OWNER */
+#endif /* __LINUX_PAGE_OWNER_H */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index d780351835e9..5f2ce616c046 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -227,6 +227,22 @@ config UNUSED_SYMBOLS
 	  you really need it, and what the merge plan to the mainline kernel for
 	  your module is.
 
+config PAGE_OWNER
+	bool "Track page owner"
+	depends on DEBUG_KERNEL && STACKTRACE_SUPPORT
+	select DEBUG_FS
+	select STACKTRACE
+	select PAGE_EXTENSION
+	help
+	  This keeps track of what call chain is the owner of a page, may
+	  help to find bare alloc_page(s) leaks. Even if you include this
+	  feature on your build, it is disabled in default. You should pass
+	  "page_owner=on" to boot parameter in order to enable it. Eats
+	  a fair amount of memory if enabled. See tools/vm/page_owner_sort.c
+	  for user-space helper.
+
+	  If unsure, say N.
+
 config DEBUG_FS
 	bool "Debug Filesystem"
 	help
diff --git a/mm/Makefile b/mm/Makefile
index 580cd3f392af..4bf586e66378 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -63,6 +63,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o
 obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o
 obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o
 obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o
+obj-$(CONFIG_PAGE_OWNER) += page_owner.o
 obj-$(CONFIG_CLEANCACHE) += cleancache.o
 obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o
 obj-$(CONFIG_ZPOOL)	+= zpool.o
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 303d38516807..c13b6b29add2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -59,6 +59,7 @@
 #include <linux/page_ext.h>
 #include <linux/hugetlb.h>
 #include <linux/sched/rt.h>
+#include <linux/page_owner.h>
 
 #include <asm/sections.h>
 #include <asm/tlbflush.h>
@@ -813,6 +814,8 @@ static bool free_pages_prepare(struct page *page, unsigned int order)
 	if (bad)
 		return false;
 
+	reset_page_owner(page, order);
+
 	if (!PageHighMem(page)) {
 		debug_check_no_locks_freed(page_address(page),
 					   PAGE_SIZE << order);
@@ -988,6 +991,8 @@ static int prep_new_page(struct page *page, unsigned int order, gfp_t gfp_flags)
 	if (order && (gfp_flags & __GFP_COMP))
 		prep_compound_page(page, order);
 
+	set_page_owner(page, order, gfp_flags);
+
 	return 0;
 }
 
@@ -1560,8 +1565,11 @@ void split_page(struct page *page, unsigned int order)
 		split_page(virt_to_page(page[0].shadow), order);
 #endif
 
-	for (i = 1; i < (1 << order); i++)
+	set_page_owner(page, 0, 0);
+	for (i = 1; i < (1 << order); i++) {
 		set_page_refcounted(page + i);
+		set_page_owner(page + i, 0, 0);
+	}
 }
 EXPORT_SYMBOL_GPL(split_page);
 
@@ -1601,6 +1609,7 @@ int __isolate_free_page(struct page *page, unsigned int order)
 		}
 	}
 
+	set_page_owner(page, order, 0);
 	return 1UL << order;
 }
 
diff --git a/mm/page_ext.c b/mm/page_ext.c
index c2cd7b15f0de..d86fd2f5353f 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -5,6 +5,7 @@
 #include <linux/memory.h>
 #include <linux/vmalloc.h>
 #include <linux/kmemleak.h>
+#include <linux/page_owner.h>
 
 /*
  * struct page extension
@@ -55,6 +56,9 @@ static struct page_ext_operations *page_ext_ops[] = {
 #ifdef CONFIG_PAGE_POISONING
 	&page_poisoning_ops,
 #endif
+#ifdef CONFIG_PAGE_OWNER
+	&page_owner_ops,
+#endif
 };
 
 static unsigned long total_usage;
diff --git a/mm/page_owner.c b/mm/page_owner.c
new file mode 100644
index 000000000000..85eec7ea6735
--- /dev/null
+++ b/mm/page_owner.c
@@ -0,0 +1,222 @@
+#include <linux/debugfs.h>
+#include <linux/mm.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+#include <linux/bootmem.h>
+#include <linux/stacktrace.h>
+#include <linux/page_owner.h>
+#include "internal.h"
+
+static bool page_owner_disabled = true;
+bool page_owner_inited __read_mostly;
+
+static int early_page_owner_param(char *buf)
+{
+	if (!buf)
+		return -EINVAL;
+
+	if (strcmp(buf, "on") == 0)
+		page_owner_disabled = false;
+
+	return 0;
+}
+early_param("page_owner", early_page_owner_param);
+
+static bool need_page_owner(void)
+{
+	if (page_owner_disabled)
+		return false;
+
+	return true;
+}
+
+static void init_page_owner(void)
+{
+	if (page_owner_disabled)
+		return;
+
+	page_owner_inited = true;
+}
+
+struct page_ext_operations page_owner_ops = {
+	.need = need_page_owner,
+	.init = init_page_owner,
+};
+
+void __reset_page_owner(struct page *page, unsigned int order)
+{
+	int i;
+	struct page_ext *page_ext;
+
+	for (i = 0; i < (1 << order); i++) {
+		page_ext = lookup_page_ext(page + i);
+		__clear_bit(PAGE_EXT_OWNER, &page_ext->flags);
+	}
+}
+
+void __set_page_owner(struct page *page, unsigned int order, gfp_t gfp_mask)
+{
+	struct page_ext *page_ext;
+	struct stack_trace *trace;
+
+	page_ext = lookup_page_ext(page);
+
+	trace = &page_ext->trace;
+	trace->nr_entries = 0;
+	trace->max_entries = ARRAY_SIZE(page_ext->trace_entries);
+	trace->entries = &page_ext->trace_entries[0];
+	trace->skip = 3;
+	save_stack_trace(&page_ext->trace);
+
+	page_ext->order = order;
+	page_ext->gfp_mask = gfp_mask;
+
+	__set_bit(PAGE_EXT_OWNER, &page_ext->flags);
+}
+
+static ssize_t
+print_page_owner(char __user *buf, size_t count, unsigned long pfn,
+		struct page *page, struct page_ext *page_ext)
+{
+	int ret;
+	int pageblock_mt, page_mt;
+	char *kbuf;
+
+	kbuf = kmalloc(count, GFP_KERNEL);
+	if (!kbuf)
+		return -ENOMEM;
+
+	ret = snprintf(kbuf, count,
+			"Page allocated via order %u, mask 0x%x\n",
+			page_ext->order, page_ext->gfp_mask);
+
+	if (ret >= count)
+		goto err;
+
+	/* Print information relevant to grouping pages by mobility */
+	pageblock_mt = get_pfnblock_migratetype(page, pfn);
+	page_mt  = gfpflags_to_migratetype(page_ext->gfp_mask);
+	ret += snprintf(kbuf + ret, count - ret,
+			"PFN %lu Block %lu type %d %s Flags %s%s%s%s%s%s%s%s%s%s%s%s\n",
+			pfn,
+			pfn >> pageblock_order,
+			pageblock_mt,
+			pageblock_mt != page_mt ? "Fallback" : "        ",
+			PageLocked(page)	? "K" : " ",
+			PageError(page)		? "E" : " ",
+			PageReferenced(page)	? "R" : " ",
+			PageUptodate(page)	? "U" : " ",
+			PageDirty(page)		? "D" : " ",
+			PageLRU(page)		? "L" : " ",
+			PageActive(page)	? "A" : " ",
+			PageSlab(page)		? "S" : " ",
+			PageWriteback(page)	? "W" : " ",
+			PageCompound(page)	? "C" : " ",
+			PageSwapCache(page)	? "B" : " ",
+			PageMappedToDisk(page)	? "M" : " ");
+
+	if (ret >= count)
+		goto err;
+
+	ret += snprint_stack_trace(kbuf + ret, count - ret,
+					&page_ext->trace, 0);
+	if (ret >= count)
+		goto err;
+
+	ret += snprintf(kbuf + ret, count - ret, "\n");
+	if (ret >= count)
+		goto err;
+
+	if (copy_to_user(buf, kbuf, ret))
+		ret = -EFAULT;
+
+	kfree(kbuf);
+	return ret;
+
+err:
+	kfree(kbuf);
+	return -ENOMEM;
+}
+
+static ssize_t
+read_page_owner(struct file *file, char __user *buf, size_t count, loff_t *ppos)
+{
+	unsigned long pfn;
+	struct page *page;
+	struct page_ext *page_ext;
+
+	if (!page_owner_inited)
+		return -EINVAL;
+
+	page = NULL;
+	pfn = min_low_pfn + *ppos;
+
+	/* Find a valid PFN or the start of a MAX_ORDER_NR_PAGES area */
+	while (!pfn_valid(pfn) && (pfn & (MAX_ORDER_NR_PAGES - 1)) != 0)
+		pfn++;
+
+	drain_all_pages(NULL);
+
+	/* Find an allocated page */
+	for (; pfn < max_pfn; pfn++) {
+		/*
+		 * If the new page is in a new MAX_ORDER_NR_PAGES area,
+		 * validate the area as existing, skip it if not
+		 */
+		if ((pfn & (MAX_ORDER_NR_PAGES - 1)) == 0 && !pfn_valid(pfn)) {
+			pfn += MAX_ORDER_NR_PAGES - 1;
+			continue;
+		}
+
+		/* Check for holes within a MAX_ORDER area */
+		if (!pfn_valid_within(pfn))
+			continue;
+
+		page = pfn_to_page(pfn);
+		if (PageBuddy(page)) {
+			unsigned long freepage_order = page_order_unsafe(page);
+
+			if (freepage_order < MAX_ORDER)
+				pfn += (1UL << freepage_order) - 1;
+			continue;
+		}
+
+		page_ext = lookup_page_ext(page);
+
+		/*
+		 * Pages allocated before initialization of page_owner are
+		 * non-buddy and have no page_owner info.
+		 */
+		if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+			continue;
+
+		/* Record the next PFN to read in the file offset */
+		*ppos = (pfn - min_low_pfn) + 1;
+
+		return print_page_owner(buf, count, pfn, page, page_ext);
+	}
+
+	return 0;
+}
+
+static const struct file_operations proc_page_owner_operations = {
+	.read		= read_page_owner,
+};
+
+static int __init pageowner_init(void)
+{
+	struct dentry *dentry;
+
+	if (!page_owner_inited) {
+		pr_info("page_owner is disabled\n");
+		return 0;
+	}
+
+	dentry = debugfs_create_file("page_owner", S_IRUSR, NULL,
+			NULL, &proc_page_owner_operations);
+	if (IS_ERR(dentry))
+		return PTR_ERR(dentry);
+
+	return 0;
+}
+module_init(pageowner_init)
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 1b12d390dc68..b090e9e3d626 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -22,6 +22,8 @@
 #include <linux/writeback.h>
 #include <linux/compaction.h>
 #include <linux/mm_inline.h>
+#include <linux/page_ext.h>
+#include <linux/page_owner.h>
 
 #include "internal.h"
 
@@ -1017,6 +1019,104 @@ static int pagetypeinfo_showblockcount(struct seq_file *m, void *arg)
 	return 0;
 }
 
+#ifdef CONFIG_PAGE_OWNER
+static void pagetypeinfo_showmixedcount_print(struct seq_file *m,
+							pg_data_t *pgdat,
+							struct zone *zone)
+{
+	struct page *page;
+	struct page_ext *page_ext;
+	unsigned long pfn = zone->zone_start_pfn, block_end_pfn;
+	unsigned long end_pfn = pfn + zone->spanned_pages;
+	unsigned long count[MIGRATE_TYPES] = { 0, };
+	int pageblock_mt, page_mt;
+	int i;
+
+	/* Scan block by block. First and last block may be incomplete */
+	pfn = zone->zone_start_pfn;
+
+	/*
+	 * Walk the zone in pageblock_nr_pages steps. If a page block spans
+	 * a zone boundary, it will be double counted between zones. This does
+	 * not matter as the mixed block count will still be correct
+	 */
+	for (; pfn < end_pfn; ) {
+		if (!pfn_valid(pfn)) {
+			pfn = ALIGN(pfn + 1, MAX_ORDER_NR_PAGES);
+			continue;
+		}
+
+		block_end_pfn = ALIGN(pfn + 1, pageblock_nr_pages);
+		block_end_pfn = min(block_end_pfn, end_pfn);
+
+		page = pfn_to_page(pfn);
+		pageblock_mt = get_pfnblock_migratetype(page, pfn);
+
+		for (; pfn < block_end_pfn; pfn++) {
+			if (!pfn_valid_within(pfn))
+				continue;
+
+			page = pfn_to_page(pfn);
+			if (PageBuddy(page)) {
+				pfn += (1UL << page_order(page)) - 1;
+				continue;
+			}
+
+			if (PageReserved(page))
+				continue;
+
+			page_ext = lookup_page_ext(page);
+
+			if (!test_bit(PAGE_EXT_OWNER, &page_ext->flags))
+				continue;
+
+			page_mt = gfpflags_to_migratetype(page_ext->gfp_mask);
+			if (pageblock_mt != page_mt) {
+				if (is_migrate_cma(pageblock_mt))
+					count[MIGRATE_MOVABLE]++;
+				else
+					count[pageblock_mt]++;
+
+				pfn = block_end_pfn;
+				break;
+			}
+			pfn += (1UL << page_ext->order) - 1;
+		}
+	}
+
+	/* Print counts */
+	seq_printf(m, "Node %d, zone %8s ", pgdat->node_id, zone->name);
+	for (i = 0; i < MIGRATE_TYPES; i++)
+		seq_printf(m, "%12lu ", count[i]);
+	seq_putc(m, '\n');
+}
+#endif /* CONFIG_PAGE_OWNER */
+
+/*
+ * Print out the number of pageblocks for each migratetype that contain pages
+ * of other types. This gives an indication of how well fallbacks are being
+ * contained by rmqueue_fallback(). It requires information from PAGE_OWNER
+ * to determine what is going on
+ */
+static void pagetypeinfo_showmixedcount(struct seq_file *m, pg_data_t *pgdat)
+{
+#ifdef CONFIG_PAGE_OWNER
+	int mtype;
+
+	if (!page_owner_inited)
+		return;
+
+	drain_all_pages(NULL);
+
+	seq_printf(m, "\n%-23s", "Number of mixed blocks ");
+	for (mtype = 0; mtype < MIGRATE_TYPES; mtype++)
+		seq_printf(m, "%12s ", migratetype_names[mtype]);
+	seq_putc(m, '\n');
+
+	walk_zones_in_node(m, pgdat, pagetypeinfo_showmixedcount_print);
+#endif /* CONFIG_PAGE_OWNER */
+}
+
 /*
  * This prints out statistics in relation to grouping pages by mobility.
  * It is expensive to collect so do not constantly read the file.
@@ -1034,6 +1134,7 @@ static int pagetypeinfo_show(struct seq_file *m, void *arg)
 	seq_putc(m, '\n');
 	pagetypeinfo_showfree(m, pgdat);
 	pagetypeinfo_showblockcount(m, pgdat);
+	pagetypeinfo_showmixedcount(m, pgdat);
 
 	return 0;
 }
diff --git a/tools/vm/Makefile b/tools/vm/Makefile
index 3d907dacf2ac..ac884b65a072 100644
--- a/tools/vm/Makefile
+++ b/tools/vm/Makefile
@@ -1,6 +1,6 @@
 # Makefile for vm tools
 #
-TARGETS=page-types slabinfo
+TARGETS=page-types slabinfo page_owner_sort
 
 LIB_DIR = ../lib/api
 LIBS = $(LIB_DIR)/libapikfs.a
@@ -18,5 +18,5 @@ $(LIBS):
 	$(CC) $(CFLAGS) -o $@ $< $(LDFLAGS)
 
 clean:
-	$(RM) page-types slabinfo
+	$(RM) page-types slabinfo page_owner_sort
 	make -C $(LIB_DIR) clean
diff --git a/tools/vm/page_owner_sort.c b/tools/vm/page_owner_sort.c
new file mode 100644
index 000000000000..77147b42d598
--- /dev/null
+++ b/tools/vm/page_owner_sort.c
@@ -0,0 +1,144 @@
+/*
+ * User-space helper to sort the output of /sys/kernel/debug/page_owner
+ *
+ * Example use:
+ * cat /sys/kernel/debug/page_owner > page_owner_full.txt
+ * grep -v ^PFN page_owner_full.txt > page_owner.txt
+ * ./sort page_owner.txt sorted_page_owner.txt
+*/
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <string.h>
+
+struct block_list {
+	char *txt;
+	int len;
+	int num;
+};
+
+
+static struct block_list *list;
+static int list_size;
+static int max_size;
+
+struct block_list *block_head;
+
+int read_block(char *buf, int buf_size, FILE *fin)
+{
+	char *curr = buf, *const buf_end = buf + buf_size;
+
+	while (buf_end - curr > 1 && fgets(curr, buf_end - curr, fin)) {
+		if (*curr == '\n') /* empty line */
+			return curr - buf;
+		curr += strlen(curr);
+	}
+
+	return -1; /* EOF or no space left in buf. */
+}
+
+static int compare_txt(const void *p1, const void *p2)
+{
+	const struct block_list *l1 = p1, *l2 = p2;
+
+	return strcmp(l1->txt, l2->txt);
+}
+
+static int compare_num(const void *p1, const void *p2)
+{
+	const struct block_list *l1 = p1, *l2 = p2;
+
+	return l2->num - l1->num;
+}
+
+static void add_list(char *buf, int len)
+{
+	if (list_size != 0 &&
+	    len == list[list_size-1].len &&
+	    memcmp(buf, list[list_size-1].txt, len) == 0) {
+		list[list_size-1].num++;
+		return;
+	}
+	if (list_size == max_size) {
+		printf("max_size too small??\n");
+		exit(1);
+	}
+	list[list_size].txt = malloc(len+1);
+	list[list_size].len = len;
+	list[list_size].num = 1;
+	memcpy(list[list_size].txt, buf, len);
+	list[list_size].txt[len] = 0;
+	list_size++;
+	if (list_size % 1000 == 0) {
+		printf("loaded %d\r", list_size);
+		fflush(stdout);
+	}
+}
+
+#define BUF_SIZE	1024
+
+int main(int argc, char **argv)
+{
+	FILE *fin, *fout;
+	char buf[BUF_SIZE];
+	int ret, i, count;
+	struct block_list *list2;
+	struct stat st;
+
+	if (argc < 3) {
+		printf("Usage: ./program <input> <output>\n");
+		perror("open: ");
+		exit(1);
+	}
+
+	fin = fopen(argv[1], "r");
+	fout = fopen(argv[2], "w");
+	if (!fin || !fout) {
+		printf("Usage: ./program <input> <output>\n");
+		perror("open: ");
+		exit(1);
+	}
+
+	fstat(fileno(fin), &st);
+	max_size = st.st_size / 100; /* hack ... */
+
+	list = malloc(max_size * sizeof(*list));
+
+	for ( ; ; ) {
+		ret = read_block(buf, BUF_SIZE, fin);
+		if (ret < 0)
+			break;
+
+		add_list(buf, ret);
+	}
+
+	printf("loaded %d\n", list_size);
+
+	printf("sorting ....\n");
+
+	qsort(list, list_size, sizeof(list[0]), compare_txt);
+
+	list2 = malloc(sizeof(*list) * list_size);
+
+	printf("culling\n");
+
+	for (i = count = 0; i < list_size; i++) {
+		if (count == 0 ||
+		    strcmp(list2[count-1].txt, list[i].txt) != 0) {
+			list2[count++] = list[i];
+		} else {
+			list2[count-1].num += list[i].num;
+		}
+	}
+
+	qsort(list2, count, sizeof(list[0]), compare_num);
+
+	for (i = 0; i < count; i++)
+		fprintf(fout, "%d times:\n%s\n", list2[i].num, list2[i].txt);
+
+	return 0;
+}
-- 
cgit v1.2.3


From f5f302e21257ebb0c074bbafc37606c26d28cc3d Mon Sep 17 00:00:00 2001
From: Davidlohr Bueso <dave@stgolabs.net>
Date: Fri, 12 Dec 2014 16:56:10 -0800
Subject: mm,vmacache: count number of system-wide flushes

These flushes deal with sequence number overflows, such as for long lived
threads.  These are rare, but interesting from a debugging PoV.  As such,
display the number of flushes when vmacache debugging is enabled.

Signed-off-by: Davidlohr Bueso <dbueso@suse.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/vm_event_item.h | 1 +
 mm/vmacache.c                 | 2 ++
 mm/vmstat.c                   | 1 +
 3 files changed, 4 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h
index 730334cdf037..9246d32dc973 100644
--- a/include/linux/vm_event_item.h
+++ b/include/linux/vm_event_item.h
@@ -90,6 +90,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT,
 #ifdef CONFIG_DEBUG_VM_VMACACHE
 		VMACACHE_FIND_CALLS,
 		VMACACHE_FIND_HITS,
+		VMACACHE_FULL_FLUSHES,
 #endif
 		NR_VM_EVENT_ITEMS
 };
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 9f25af825dec..b6e3662fe339 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -17,6 +17,8 @@ void vmacache_flush_all(struct mm_struct *mm)
 {
 	struct task_struct *g, *p;
 
+	count_vm_vmacache_event(VMACACHE_FULL_FLUSHES);
+
 	/*
 	 * Single threaded tasks need not iterate the entire
 	 * list of process. We can avoid the flushing as well
diff --git a/mm/vmstat.c b/mm/vmstat.c
index b090e9e3d626..1284f89fca08 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -900,6 +900,7 @@ const char * const vmstat_text[] = {
 #ifdef CONFIG_DEBUG_VM_VMACACHE
 	"vmacache_find_calls",
 	"vmacache_find_hits",
+	"vmacache_full_flushes",
 #endif
 #endif /* CONFIG_VM_EVENTS_COUNTERS */
 };
-- 
cgit v1.2.3


From 6b4f7799c6a5703ac6b8c0649f4c22f00fa07513 Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Fri, 12 Dec 2014 16:56:13 -0800
Subject: mm: vmscan: invoke slab shrinkers from shrink_zone()

The slab shrinkers are currently invoked from the zonelist walkers in
kswapd, direct reclaim, and zone reclaim, all of which roughly gauge the
eligible LRU pages and assemble a nodemask to pass to NUMA-aware
shrinkers, which then again have to walk over the nodemask.  This is
redundant code, extra runtime work, and fairly inaccurate when it comes to
the estimation of actually scannable LRU pages.  The code duplication will
only get worse when making the shrinkers cgroup-aware and requiring them
to have out-of-band cgroup hierarchy walks as well.

Instead, invoke the shrinkers from shrink_zone(), which is where all
reclaimers end up, to avoid this duplication.

Take the count for eligible LRU pages out of get_scan_count(), which
considers many more factors than just the availability of swap space, like
zone_reclaimable_pages() currently does.  Accumulate the number over all
visited lruvecs to get the per-zone value.

Some nodes have multiple zones due to memory addressing restrictions.  To
avoid putting too much pressure on the shrinkers, only invoke them once
for each such node, using the class zone of the allocation as the pivot
zone.

For now, this integrates the slab shrinking better into the reclaim logic
and gets rid of duplicative invocations from kswapd, direct reclaim, and
zone reclaim.  It also prepares for cgroup-awareness, allowing
memcg-capable shrinkers to be added at the lruvec level without much
duplication of both code and runtime work.

This changes kswapd behavior, which used to invoke the shrinkers for each
zone, but with scan ratios gathered from the entire node, resulting in
meaningless pressure quantities on multi-zone nodes.

Zone reclaim behavior also changes.  It used to shrink slabs until the
same amount of pages were shrunk as were reclaimed from the LRUs.  Now it
merely invokes the shrinkers once with the zone's scan ratio, which makes
the shrinkers go easier on caches that implement aging and would prefer
feeding back pressure from recently used slab objects to unused LRU pages.

[vdavydov@parallels.com: assure class zone is populated]
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Cc: Dave Chinner <david@fromorbit.com>
Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 drivers/staging/android/ashmem.c |   3 +-
 fs/drop_caches.c                 |  11 +-
 include/linux/mm.h               |   6 +-
 include/linux/shrinker.h         |   2 -
 mm/memory-failure.c              |  11 +-
 mm/page_alloc.c                  |   6 +-
 mm/vmscan.c                      | 216 ++++++++++++++++-----------------------
 7 files changed, 106 insertions(+), 149 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/staging/android/ashmem.c b/drivers/staging/android/ashmem.c
index ad4f5790a76f..46f8ef42559e 100644
--- a/drivers/staging/android/ashmem.c
+++ b/drivers/staging/android/ashmem.c
@@ -418,7 +418,7 @@ out:
 }
 
 /*
- * ashmem_shrink - our cache shrinker, called from mm/vmscan.c :: shrink_slab
+ * ashmem_shrink - our cache shrinker, called from mm/vmscan.c
  *
  * 'nr_to_scan' is the number of objects to scan for freeing.
  *
@@ -785,7 +785,6 @@ static long ashmem_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
 				.nr_to_scan = LONG_MAX,
 			};
 			ret = ashmem_shrink_count(&ashmem_shrinker, &sc);
-			nodes_setall(sc.nodes_to_scan);
 			ashmem_shrink_scan(&ashmem_shrinker, &sc);
 		}
 		break;
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index 1de7294aad20..2bc2c87f35e7 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -40,13 +40,14 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 static void drop_slab(void)
 {
 	int nr_objects;
-	struct shrink_control shrink = {
-		.gfp_mask = GFP_KERNEL,
-	};
 
-	nodes_setall(shrink.nodes_to_scan);
 	do {
-		nr_objects = shrink_slab(&shrink, 1000, 1000);
+		int nid;
+
+		nr_objects = 0;
+		for_each_online_node(nid)
+			nr_objects += shrink_node_slabs(GFP_KERNEL, nid,
+							1000, 1000);
 	} while (nr_objects > 10);
 }
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8b8d77a1532f..c0a67b894c4c 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2110,9 +2110,9 @@ int drop_caches_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 #endif
 
-unsigned long shrink_slab(struct shrink_control *shrink,
-			  unsigned long nr_pages_scanned,
-			  unsigned long lru_pages);
+unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
+				unsigned long nr_scanned,
+				unsigned long nr_eligible);
 
 #ifndef CONFIG_MMU
 #define randomize_va_space 0
diff --git a/include/linux/shrinker.h b/include/linux/shrinker.h
index 68c097077ef0..f4aee75f00b1 100644
--- a/include/linux/shrinker.h
+++ b/include/linux/shrinker.h
@@ -18,8 +18,6 @@ struct shrink_control {
 	 */
 	unsigned long nr_to_scan;
 
-	/* shrink from these nodes */
-	nodemask_t nodes_to_scan;
 	/* current node being shrunk (for NUMA aware shrinkers) */
 	int nid;
 };
diff --git a/mm/memory-failure.c b/mm/memory-failure.c
index 6b94969d91c5..feb803bf3443 100644
--- a/mm/memory-failure.c
+++ b/mm/memory-failure.c
@@ -239,19 +239,14 @@ void shake_page(struct page *p, int access)
 	}
 
 	/*
-	 * Only call shrink_slab here (which would also shrink other caches) if
-	 * access is not potentially fatal.
+	 * Only call shrink_node_slabs here (which would also shrink
+	 * other caches) if access is not potentially fatal.
 	 */
 	if (access) {
 		int nr;
 		int nid = page_to_nid(p);
 		do {
-			struct shrink_control shrink = {
-				.gfp_mask = GFP_KERNEL,
-			};
-			node_set(nid, shrink.nodes_to_scan);
-
-			nr = shrink_slab(&shrink, 1000, 1000);
+			nr = shrink_node_slabs(GFP_KERNEL, nid, 1000, 1000);
 			if (page_count(p) == 1)
 				break;
 		} while (nr > 10);
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c13b6b29add2..9d3870862293 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -6284,9 +6284,9 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count,
 		if (!PageLRU(page))
 			found++;
 		/*
-		 * If there are RECLAIMABLE pages, we need to check it.
-		 * But now, memory offline itself doesn't call shrink_slab()
-		 * and it still to be fixed.
+		 * If there are RECLAIMABLE pages, we need to check
+		 * it.  But now, memory offline itself doesn't call
+		 * shrink_node_slabs() and it still to be fixed.
 		 */
 		/*
 		 * If the page is not RAM, page_count()should be 0.
diff --git a/mm/vmscan.c b/mm/vmscan.c
index a384339bf718..bd9a72bc4a1b 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -229,9 +229,10 @@ EXPORT_SYMBOL(unregister_shrinker);
 
 #define SHRINK_BATCH 128
 
-static unsigned long
-shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
-		 unsigned long nr_pages_scanned, unsigned long lru_pages)
+static unsigned long shrink_slabs(struct shrink_control *shrinkctl,
+				  struct shrinker *shrinker,
+				  unsigned long nr_scanned,
+				  unsigned long nr_eligible)
 {
 	unsigned long freed = 0;
 	unsigned long long delta;
@@ -255,9 +256,9 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
 	nr = atomic_long_xchg(&shrinker->nr_deferred[nid], 0);
 
 	total_scan = nr;
-	delta = (4 * nr_pages_scanned) / shrinker->seeks;
+	delta = (4 * nr_scanned) / shrinker->seeks;
 	delta *= freeable;
-	do_div(delta, lru_pages + 1);
+	do_div(delta, nr_eligible + 1);
 	total_scan += delta;
 	if (total_scan < 0) {
 		pr_err("shrink_slab: %pF negative objects to delete nr=%ld\n",
@@ -289,8 +290,8 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
 		total_scan = freeable * 2;
 
 	trace_mm_shrink_slab_start(shrinker, shrinkctl, nr,
-				nr_pages_scanned, lru_pages,
-				freeable, delta, total_scan);
+				   nr_scanned, nr_eligible,
+				   freeable, delta, total_scan);
 
 	/*
 	 * Normally, we should not scan less than batch_size objects in one
@@ -339,34 +340,37 @@ shrink_slab_node(struct shrink_control *shrinkctl, struct shrinker *shrinker,
 	return freed;
 }
 
-/*
- * Call the shrink functions to age shrinkable caches
- *
- * Here we assume it costs one seek to replace a lru page and that it also
- * takes a seek to recreate a cache object.  With this in mind we age equal
- * percentages of the lru and ageable caches.  This should balance the seeks
- * generated by these structures.
+/**
+ * shrink_node_slabs - shrink slab caches of a given node
+ * @gfp_mask: allocation context
+ * @nid: node whose slab caches to target
+ * @nr_scanned: pressure numerator
+ * @nr_eligible: pressure denominator
  *
- * If the vm encountered mapped pages on the LRU it increase the pressure on
- * slab to avoid swapping.
+ * Call the shrink functions to age shrinkable caches.
  *
- * We do weird things to avoid (scanned*seeks*entries) overflowing 32 bits.
+ * @nid is passed along to shrinkers with SHRINKER_NUMA_AWARE set,
+ * unaware shrinkers will receive a node id of 0 instead.
  *
- * `lru_pages' represents the number of on-LRU pages in all the zones which
- * are eligible for the caller's allocation attempt.  It is used for balancing
- * slab reclaim versus page reclaim.
+ * @nr_scanned and @nr_eligible form a ratio that indicate how much of
+ * the available objects should be scanned.  Page reclaim for example
+ * passes the number of pages scanned and the number of pages on the
+ * LRU lists that it considered on @nid, plus a bias in @nr_scanned
+ * when it encountered mapped pages.  The ratio is further biased by
+ * the ->seeks setting of the shrink function, which indicates the
+ * cost to recreate an object relative to that of an LRU page.
  *
- * Returns the number of slab objects which we shrunk.
+ * Returns the number of reclaimed slab objects.
  */
-unsigned long shrink_slab(struct shrink_control *shrinkctl,
-			  unsigned long nr_pages_scanned,
-			  unsigned long lru_pages)
+unsigned long shrink_node_slabs(gfp_t gfp_mask, int nid,
+				unsigned long nr_scanned,
+				unsigned long nr_eligible)
 {
 	struct shrinker *shrinker;
 	unsigned long freed = 0;
 
-	if (nr_pages_scanned == 0)
-		nr_pages_scanned = SWAP_CLUSTER_MAX;
+	if (nr_scanned == 0)
+		nr_scanned = SWAP_CLUSTER_MAX;
 
 	if (!down_read_trylock(&shrinker_rwsem)) {
 		/*
@@ -380,20 +384,17 @@ unsigned long shrink_slab(struct shrink_control *shrinkctl,
 	}
 
 	list_for_each_entry(shrinker, &shrinker_list, list) {
-		if (!(shrinker->flags & SHRINKER_NUMA_AWARE)) {
-			shrinkctl->nid = 0;
-			freed += shrink_slab_node(shrinkctl, shrinker,
-					nr_pages_scanned, lru_pages);
-			continue;
-		}
+		struct shrink_control sc = {
+			.gfp_mask = gfp_mask,
+			.nid = nid,
+		};
 
-		for_each_node_mask(shrinkctl->nid, shrinkctl->nodes_to_scan) {
-			if (node_online(shrinkctl->nid))
-				freed += shrink_slab_node(shrinkctl, shrinker,
-						nr_pages_scanned, lru_pages);
+		if (!(shrinker->flags & SHRINKER_NUMA_AWARE))
+			sc.nid = 0;
 
-		}
+		freed += shrink_slabs(&sc, shrinker, nr_scanned, nr_eligible);
 	}
+
 	up_read(&shrinker_rwsem);
 out:
 	cond_resched();
@@ -1876,7 +1877,8 @@ enum scan_balance {
  * nr[2] = file inactive pages to scan; nr[3] = file active pages to scan
  */
 static void get_scan_count(struct lruvec *lruvec, int swappiness,
-			   struct scan_control *sc, unsigned long *nr)
+			   struct scan_control *sc, unsigned long *nr,
+			   unsigned long *lru_pages)
 {
 	struct zone_reclaim_stat *reclaim_stat = &lruvec->reclaim_stat;
 	u64 fraction[2];
@@ -2022,6 +2024,7 @@ out:
 	some_scanned = false;
 	/* Only use force_scan on second pass. */
 	for (pass = 0; !some_scanned && pass < 2; pass++) {
+		*lru_pages = 0;
 		for_each_evictable_lru(lru) {
 			int file = is_file_lru(lru);
 			unsigned long size;
@@ -2048,14 +2051,19 @@ out:
 			case SCAN_FILE:
 			case SCAN_ANON:
 				/* Scan one type exclusively */
-				if ((scan_balance == SCAN_FILE) != file)
+				if ((scan_balance == SCAN_FILE) != file) {
+					size = 0;
 					scan = 0;
+				}
 				break;
 			default:
 				/* Look ma, no brain */
 				BUG();
 			}
+
+			*lru_pages += size;
 			nr[lru] = scan;
+
 			/*
 			 * Skip the second pass and don't force_scan,
 			 * if we found something to scan.
@@ -2069,7 +2077,7 @@ out:
  * This is a basic per-zone page freer.  Used by both kswapd and direct reclaim.
  */
 static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
-			  struct scan_control *sc)
+			  struct scan_control *sc, unsigned long *lru_pages)
 {
 	unsigned long nr[NR_LRU_LISTS];
 	unsigned long targets[NR_LRU_LISTS];
@@ -2080,7 +2088,7 @@ static void shrink_lruvec(struct lruvec *lruvec, int swappiness,
 	struct blk_plug plug;
 	bool scan_adjusted;
 
-	get_scan_count(lruvec, swappiness, sc, nr);
+	get_scan_count(lruvec, swappiness, sc, nr, lru_pages);
 
 	/* Record the original scan target for proportional adjustments later */
 	memcpy(targets, nr, sizeof(nr));
@@ -2258,7 +2266,8 @@ static inline bool should_continue_reclaim(struct zone *zone,
 	}
 }
 
-static bool shrink_zone(struct zone *zone, struct scan_control *sc)
+static bool shrink_zone(struct zone *zone, struct scan_control *sc,
+			bool is_classzone)
 {
 	unsigned long nr_reclaimed, nr_scanned;
 	bool reclaimable = false;
@@ -2269,6 +2278,7 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 			.zone = zone,
 			.priority = sc->priority,
 		};
+		unsigned long zone_lru_pages = 0;
 		struct mem_cgroup *memcg;
 
 		nr_reclaimed = sc->nr_reclaimed;
@@ -2276,13 +2286,15 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 
 		memcg = mem_cgroup_iter(root, NULL, &reclaim);
 		do {
+			unsigned long lru_pages;
 			struct lruvec *lruvec;
 			int swappiness;
 
 			lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 			swappiness = mem_cgroup_swappiness(memcg);
 
-			shrink_lruvec(lruvec, swappiness, sc);
+			shrink_lruvec(lruvec, swappiness, sc, &lru_pages);
+			zone_lru_pages += lru_pages;
 
 			/*
 			 * Direct reclaim and kswapd have to scan all memory
@@ -2302,6 +2314,25 @@ static bool shrink_zone(struct zone *zone, struct scan_control *sc)
 			memcg = mem_cgroup_iter(root, memcg, &reclaim);
 		} while (memcg);
 
+		/*
+		 * Shrink the slab caches in the same proportion that
+		 * the eligible LRU pages were scanned.
+		 */
+		if (global_reclaim(sc) && is_classzone) {
+			struct reclaim_state *reclaim_state;
+
+			shrink_node_slabs(sc->gfp_mask, zone_to_nid(zone),
+					  sc->nr_scanned - nr_scanned,
+					  zone_lru_pages);
+
+			reclaim_state = current->reclaim_state;
+			if (reclaim_state) {
+				sc->nr_reclaimed +=
+					reclaim_state->reclaimed_slab;
+				reclaim_state->reclaimed_slab = 0;
+			}
+		}
+
 		vmpressure(sc->gfp_mask, sc->target_mem_cgroup,
 			   sc->nr_scanned - nr_scanned,
 			   sc->nr_reclaimed - nr_reclaimed);
@@ -2376,12 +2407,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 	struct zone *zone;
 	unsigned long nr_soft_reclaimed;
 	unsigned long nr_soft_scanned;
-	unsigned long lru_pages = 0;
-	struct reclaim_state *reclaim_state = current->reclaim_state;
 	gfp_t orig_mask;
-	struct shrink_control shrink = {
-		.gfp_mask = sc->gfp_mask,
-	};
 	enum zone_type requested_highidx = gfp_zone(sc->gfp_mask);
 	bool reclaimable = false;
 
@@ -2394,12 +2420,18 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 	if (buffer_heads_over_limit)
 		sc->gfp_mask |= __GFP_HIGHMEM;
 
-	nodes_clear(shrink.nodes_to_scan);
-
 	for_each_zone_zonelist_nodemask(zone, z, zonelist,
-					gfp_zone(sc->gfp_mask), sc->nodemask) {
+					requested_highidx, sc->nodemask) {
+		enum zone_type classzone_idx;
+
 		if (!populated_zone(zone))
 			continue;
+
+		classzone_idx = requested_highidx;
+		while (!populated_zone(zone->zone_pgdat->node_zones +
+							classzone_idx))
+			classzone_idx--;
+
 		/*
 		 * Take care memory controller reclaiming has small influence
 		 * to global LRU.
@@ -2409,9 +2441,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 						 GFP_KERNEL | __GFP_HARDWALL))
 				continue;
 
-			lru_pages += zone_reclaimable_pages(zone);
-			node_set(zone_to_nid(zone), shrink.nodes_to_scan);
-
 			if (sc->priority != DEF_PRIORITY &&
 			    !zone_reclaimable(zone))
 				continue;	/* Let kswapd poll it */
@@ -2450,7 +2479,7 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 			/* need some check for avoid more shrink_zone() */
 		}
 
-		if (shrink_zone(zone, sc))
+		if (shrink_zone(zone, sc, zone_idx(zone) == classzone_idx))
 			reclaimable = true;
 
 		if (global_reclaim(sc) &&
@@ -2458,20 +2487,6 @@ static bool shrink_zones(struct zonelist *zonelist, struct scan_control *sc)
 			reclaimable = true;
 	}
 
-	/*
-	 * Don't shrink slabs when reclaiming memory from over limit cgroups
-	 * but do shrink slab at least once when aborting reclaim for
-	 * compaction to avoid unevenly scanning file/anon LRU pages over slab
-	 * pages.
-	 */
-	if (global_reclaim(sc)) {
-		shrink_slab(&shrink, sc->nr_scanned, lru_pages);
-		if (reclaim_state) {
-			sc->nr_reclaimed += reclaim_state->reclaimed_slab;
-			reclaim_state->reclaimed_slab = 0;
-		}
-	}
-
 	/*
 	 * Restore to original mask to avoid the impact on the caller if we
 	 * promoted it to __GFP_HIGHMEM.
@@ -2736,6 +2751,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
 	};
 	struct lruvec *lruvec = mem_cgroup_zone_lruvec(zone, memcg);
 	int swappiness = mem_cgroup_swappiness(memcg);
+	unsigned long lru_pages;
 
 	sc.gfp_mask = (gfp_mask & GFP_RECLAIM_MASK) |
 			(GFP_HIGHUSER_MOVABLE & ~GFP_RECLAIM_MASK);
@@ -2751,7 +2767,7 @@ unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg,
 	 * will pick up pages from other mem cgroup's as well. We hack
 	 * the priority and make it zero.
 	 */
-	shrink_lruvec(lruvec, swappiness, &sc);
+	shrink_lruvec(lruvec, swappiness, &sc, &lru_pages);
 
 	trace_mm_vmscan_memcg_softlimit_reclaim_end(sc.nr_reclaimed);
 
@@ -2932,15 +2948,10 @@ static bool prepare_kswapd_sleep(pg_data_t *pgdat, int order, long remaining,
 static bool kswapd_shrink_zone(struct zone *zone,
 			       int classzone_idx,
 			       struct scan_control *sc,
-			       unsigned long lru_pages,
 			       unsigned long *nr_attempted)
 {
 	int testorder = sc->order;
 	unsigned long balance_gap;
-	struct reclaim_state *reclaim_state = current->reclaim_state;
-	struct shrink_control shrink = {
-		.gfp_mask = sc->gfp_mask,
-	};
 	bool lowmem_pressure;
 
 	/* Reclaim above the high watermark. */
@@ -2975,13 +2986,7 @@ static bool kswapd_shrink_zone(struct zone *zone,
 						balance_gap, classzone_idx))
 		return true;
 
-	shrink_zone(zone, sc);
-	nodes_clear(shrink.nodes_to_scan);
-	node_set(zone_to_nid(zone), shrink.nodes_to_scan);
-
-	reclaim_state->reclaimed_slab = 0;
-	shrink_slab(&shrink, sc->nr_scanned, lru_pages);
-	sc->nr_reclaimed += reclaim_state->reclaimed_slab;
+	shrink_zone(zone, sc, zone_idx(zone) == classzone_idx);
 
 	/* Account for the number of pages attempted to reclaim */
 	*nr_attempted += sc->nr_to_reclaim;
@@ -3042,7 +3047,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 	count_vm_event(PAGEOUTRUN);
 
 	do {
-		unsigned long lru_pages = 0;
 		unsigned long nr_attempted = 0;
 		bool raise_priority = true;
 		bool pgdat_needs_compaction = (order > 0);
@@ -3102,8 +3106,6 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 			if (!populated_zone(zone))
 				continue;
 
-			lru_pages += zone_reclaimable_pages(zone);
-
 			/*
 			 * If any zone is currently balanced then kswapd will
 			 * not call compaction as it is expected that the
@@ -3159,8 +3161,8 @@ static unsigned long balance_pgdat(pg_data_t *pgdat, int order,
 			 * that that high watermark would be met at 100%
 			 * efficiency.
 			 */
-			if (kswapd_shrink_zone(zone, end_zone, &sc,
-					lru_pages, &nr_attempted))
+			if (kswapd_shrink_zone(zone, end_zone,
+					       &sc, &nr_attempted))
 				raise_priority = false;
 		}
 
@@ -3612,10 +3614,6 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		.may_unmap = !!(zone_reclaim_mode & RECLAIM_SWAP),
 		.may_swap = 1,
 	};
-	struct shrink_control shrink = {
-		.gfp_mask = sc.gfp_mask,
-	};
-	unsigned long nr_slab_pages0, nr_slab_pages1;
 
 	cond_resched();
 	/*
@@ -3634,44 +3632,10 @@ static int __zone_reclaim(struct zone *zone, gfp_t gfp_mask, unsigned int order)
 		 * priorities until we have enough memory freed.
 		 */
 		do {
-			shrink_zone(zone, &sc);
+			shrink_zone(zone, &sc, true);
 		} while (sc.nr_reclaimed < nr_pages && --sc.priority >= 0);
 	}
 
-	nr_slab_pages0 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-	if (nr_slab_pages0 > zone->min_slab_pages) {
-		/*
-		 * shrink_slab() does not currently allow us to determine how
-		 * many pages were freed in this zone. So we take the current
-		 * number of slab pages and shake the slab until it is reduced
-		 * by the same nr_pages that we used for reclaiming unmapped
-		 * pages.
-		 */
-		nodes_clear(shrink.nodes_to_scan);
-		node_set(zone_to_nid(zone), shrink.nodes_to_scan);
-		for (;;) {
-			unsigned long lru_pages = zone_reclaimable_pages(zone);
-
-			/* No reclaimable slab or very low memory pressure */
-			if (!shrink_slab(&shrink, sc.nr_scanned, lru_pages))
-				break;
-
-			/* Freed enough memory */
-			nr_slab_pages1 = zone_page_state(zone,
-							NR_SLAB_RECLAIMABLE);
-			if (nr_slab_pages1 + nr_pages <= nr_slab_pages0)
-				break;
-		}
-
-		/*
-		 * Update nr_reclaimed by the number of slab pages we
-		 * reclaimed from this zone.
-		 */
-		nr_slab_pages1 = zone_page_state(zone, NR_SLAB_RECLAIMABLE);
-		if (nr_slab_pages1 < nr_slab_pages0)
-			sc.nr_reclaimed += nr_slab_pages0 - nr_slab_pages1;
-	}
-
 	p->reclaim_state = NULL;
 	current->flags &= ~(PF_MEMALLOC | PF_SWAPWRITE);
 	lockdep_clear_current_reclaim_state();
-- 
cgit v1.2.3


From d003f371b27016354c392464819530d47a915765 Mon Sep 17 00:00:00 2001
From: Oleg Nesterov <oleg@redhat.com>
Date: Fri, 12 Dec 2014 16:56:24 -0800
Subject: oom: don't assume that a coredumping thread will exit soon

oom_kill.c assumes that PF_EXITING task should exit and free the memory
soon.  This is wrong in many ways and one important case is the coredump.
A task can sleep in exit_mm() "forever" while the coredumping sub-thread
can need more memory.

Change the PF_EXITING checks to take SIGNAL_GROUP_COREDUMP into account,
we add the new trivial helper for that.

Note: this is only the first step, this patch doesn't try to solve other
problems.  The SIGNAL_GROUP_COREDUMP check is obviously racy, a task can
participate in coredump after it was already observed in PF_EXITING state,
so TIF_MEMDIE (which also blocks oom-killer) still can be wrongly set.
fatal_signal_pending() can be true because of SIGNAL_GROUP_COREDUMP so
out_of_memory() and mem_cgroup_out_of_memory() shouldn't blindly trust it.
 And even the name/usage of the new helper is confusing, an exiting thread
can only free its ->mm if it is the only/last task in thread group.

[akpm@linux-foundation.org: add comment]
Signed-off-by: Oleg Nesterov <oleg@redhat.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: David Rientjes <rientjes@google.com>
Acked-by: Michal Hocko <mhocko@suse.cz>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/oom.h | 11 +++++++++++
 mm/memcontrol.c     |  2 +-
 mm/oom_kill.c       |  6 +++---
 3 files changed, 15 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/oom.h b/include/linux/oom.h
index e8d6e1058723..853698c721f7 100644
--- a/include/linux/oom.h
+++ b/include/linux/oom.h
@@ -92,6 +92,17 @@ static inline bool oom_gfp_allowed(gfp_t gfp_mask)
 
 extern struct task_struct *find_lock_task_mm(struct task_struct *p);
 
+static inline bool task_will_free_mem(struct task_struct *task)
+{
+	/*
+	 * A coredumping process may sleep for an extended period in exit_mm(),
+	 * so the oom killer cannot assume that the process will promptly exit
+	 * and release memory.
+	 */
+	return (task->flags & PF_EXITING) &&
+		!(task->signal->flags & SIGNAL_GROUP_COREDUMP);
+}
+
 /* sysctls */
 extern int sysctl_oom_dump_tasks;
 extern int sysctl_oom_kill_allocating_task;
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index c6ac50e7d1c2..998fb1756d43 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1559,7 +1559,7 @@ static void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask,
 	 * select it.  The goal is to allow it to allocate so that it may
 	 * quickly exit and free its memory.
 	 */
-	if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
+	if (fatal_signal_pending(current) || task_will_free_mem(current)) {
 		set_thread_flag(TIF_MEMDIE);
 		return;
 	}
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 864bba992735..f694ef0d9f9a 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -281,7 +281,7 @@ enum oom_scan_t oom_scan_process_thread(struct task_struct *task,
 	if (oom_task_origin(task))
 		return OOM_SCAN_SELECT;
 
-	if (task->flags & PF_EXITING && !force_kill) {
+	if (task_will_free_mem(task) && !force_kill) {
 		/*
 		 * If this task is not being ptraced on exit, then wait for it
 		 * to finish before killing some other task unnecessarily.
@@ -443,7 +443,7 @@ void oom_kill_process(struct task_struct *p, gfp_t gfp_mask, int order,
 	 * If the task is already exiting, don't alarm the sysadmin or kill
 	 * its children or threads, just set TIF_MEMDIE so it can die quickly
 	 */
-	if (p->flags & PF_EXITING) {
+	if (task_will_free_mem(p)) {
 		set_tsk_thread_flag(p, TIF_MEMDIE);
 		put_task_struct(p);
 		return;
@@ -649,7 +649,7 @@ void out_of_memory(struct zonelist *zonelist, gfp_t gfp_mask,
 	 * select it.  The goal is to allow it to allocate so that it may
 	 * quickly exit and free its memory.
 	 */
-	if (fatal_signal_pending(current) || current->flags & PF_EXITING) {
+	if (fatal_signal_pending(current) || task_will_free_mem(current)) {
 		set_thread_flag(TIF_MEMDIE);
 		return;
 	}
-- 
cgit v1.2.3


From 8135be5a8012f4c7e95218563855e16c09a8271b Mon Sep 17 00:00:00 2001
From: Vladimir Davydov <vdavydov@parallels.com>
Date: Fri, 12 Dec 2014 16:56:38 -0800
Subject: memcg: fix possible use-after-free in memcg_kmem_get_cache()

Suppose task @t that belongs to a memory cgroup @memcg is going to
allocate an object from a kmem cache @c.  The copy of @c corresponding to
@memcg, @mc, is empty.  Then if kmem_cache_alloc races with the memory
cgroup destruction we can access the memory cgroup's copy of the cache
after it was destroyed:

CPU0				CPU1
----				----
[ current=@t
  @mc->memcg_params->nr_pages=0 ]

kmem_cache_alloc(@c):
  call memcg_kmem_get_cache(@c);
  proceed to allocation from @mc:
    alloc a page for @mc:
      ...

				move @t from @memcg
				destroy @memcg:
				  mem_cgroup_css_offline(@memcg):
				    memcg_unregister_all_caches(@memcg):
				      kmem_cache_destroy(@mc)

    add page to @mc

We could fix this issue by taking a reference to a per-memcg cache, but
that would require adding a per-cpu reference counter to per-memcg caches,
which would look cumbersome.

Instead, let's take a reference to a memory cgroup, which already has a
per-cpu reference counter, in the beginning of kmem_cache_alloc to be
dropped in the end, and move per memcg caches destruction from css offline
to css free.  As a side effect, per-memcg caches will be destroyed not one
by one, but all at once when the last page accounted to the memory cgroup
is freed.  This doesn't sound as a high price for code readability though.

Note, this patch does add some overhead to the kmem_cache_alloc hot path,
but it is pretty negligible - it's just a function call plus a per cpu
counter decrement, which is comparable to what we already have in
memcg_kmem_get_cache.  Besides, it's only relevant if there are memory
cgroups with kmem accounting enabled.  I don't think we can find a way to
handle this race w/o it, because alloc_page called from kmem_cache_alloc
may sleep so we can't flush all pending kmallocs w/o reference counting.

Signed-off-by: Vladimir Davydov <vdavydov@parallels.com>
Acked-by: Christoph Lameter <cl@linux.com>
Cc: Johannes Weiner <hannes@cmpxchg.org>
Cc: Michal Hocko <mhocko@suse.cz>
Cc: Pekka Enberg <penberg@kernel.org>
Cc: David Rientjes <rientjes@google.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/memcontrol.h | 14 +++++++++++--
 include/linux/slab.h       |  2 --
 mm/memcontrol.c            | 51 +++++++++++++++-------------------------------
 mm/slab.c                  |  2 ++
 mm/slub.c                  | 14 ++++++++-----
 5 files changed, 39 insertions(+), 44 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index b74942a9e22f..7c95af8d552c 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -400,8 +400,8 @@ int memcg_cache_id(struct mem_cgroup *memcg);
 
 void memcg_update_array_size(int num_groups);
 
-struct kmem_cache *
-__memcg_kmem_get_cache(struct kmem_cache *cachep);
+struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep);
+void __memcg_kmem_put_cache(struct kmem_cache *cachep);
 
 int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order);
 void __memcg_uncharge_slab(struct kmem_cache *cachep, int order);
@@ -494,6 +494,12 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 
 	return __memcg_kmem_get_cache(cachep);
 }
+
+static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+	if (memcg_kmem_enabled())
+		__memcg_kmem_put_cache(cachep);
+}
 #else
 #define for_each_memcg_cache_index(_idx)	\
 	for (; NULL; )
@@ -528,6 +534,10 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp)
 {
 	return cachep;
 }
+
+static inline void memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+}
 #endif /* CONFIG_MEMCG_KMEM */
 #endif /* _LINUX_MEMCONTROL_H */
 
diff --git a/include/linux/slab.h b/include/linux/slab.h
index 8a2457d42fc8..9a139b637069 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -493,7 +493,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node)
  * @memcg: pointer to the memcg this cache belongs to
  * @list: list_head for the list of all caches in this memcg
  * @root_cache: pointer to the global, root cache, this cache was derived from
- * @nr_pages: number of pages that belongs to this cache.
  */
 struct memcg_cache_params {
 	bool is_root_cache;
@@ -506,7 +505,6 @@ struct memcg_cache_params {
 			struct mem_cgroup *memcg;
 			struct list_head list;
 			struct kmem_cache *root_cache;
-			atomic_t nr_pages;
 		};
 	};
 };
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index dac81b975996..05e1584750ac 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -2635,7 +2635,6 @@ static void memcg_register_cache(struct mem_cgroup *memcg,
 	if (!cachep)
 		return;
 
-	css_get(&memcg->css);
 	list_add(&cachep->memcg_params->list, &memcg->memcg_slab_caches);
 
 	/*
@@ -2669,9 +2668,6 @@ static void memcg_unregister_cache(struct kmem_cache *cachep)
 	list_del(&cachep->memcg_params->list);
 
 	kmem_cache_destroy(cachep);
-
-	/* drop the reference taken in memcg_register_cache */
-	css_put(&memcg->css);
 }
 
 int __memcg_cleanup_cache_params(struct kmem_cache *s)
@@ -2705,9 +2701,7 @@ static void memcg_unregister_all_caches(struct mem_cgroup *memcg)
 	mutex_lock(&memcg_slab_mutex);
 	list_for_each_entry_safe(params, tmp, &memcg->memcg_slab_caches, list) {
 		cachep = memcg_params_to_cache(params);
-		kmem_cache_shrink(cachep);
-		if (atomic_read(&cachep->memcg_params->nr_pages) == 0)
-			memcg_unregister_cache(cachep);
+		memcg_unregister_cache(cachep);
 	}
 	mutex_unlock(&memcg_slab_mutex);
 }
@@ -2742,10 +2736,10 @@ static void __memcg_schedule_register_cache(struct mem_cgroup *memcg,
 	struct memcg_register_cache_work *cw;
 
 	cw = kmalloc(sizeof(*cw), GFP_NOWAIT);
-	if (cw == NULL) {
-		css_put(&memcg->css);
+	if (!cw)
 		return;
-	}
+
+	css_get(&memcg->css);
 
 	cw->memcg = memcg;
 	cw->cachep = cachep;
@@ -2776,12 +2770,8 @@ static void memcg_schedule_register_cache(struct mem_cgroup *memcg,
 int __memcg_charge_slab(struct kmem_cache *cachep, gfp_t gfp, int order)
 {
 	unsigned int nr_pages = 1 << order;
-	int res;
 
-	res = memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
-	if (!res)
-		atomic_add(nr_pages, &cachep->memcg_params->nr_pages);
-	return res;
+	return memcg_charge_kmem(cachep->memcg_params->memcg, gfp, nr_pages);
 }
 
 void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
@@ -2789,7 +2779,6 @@ void __memcg_uncharge_slab(struct kmem_cache *cachep, int order)
 	unsigned int nr_pages = 1 << order;
 
 	memcg_uncharge_kmem(cachep->memcg_params->memcg, nr_pages);
-	atomic_sub(nr_pages, &cachep->memcg_params->nr_pages);
 }
 
 /*
@@ -2816,22 +2805,13 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
 	if (current->memcg_kmem_skip_account)
 		return cachep;
 
-	rcu_read_lock();
-	memcg = mem_cgroup_from_task(rcu_dereference(current->mm->owner));
-
+	memcg = get_mem_cgroup_from_mm(current->mm);
 	if (!memcg_kmem_is_active(memcg))
 		goto out;
 
 	memcg_cachep = cache_from_memcg_idx(cachep, memcg_cache_id(memcg));
-	if (likely(memcg_cachep)) {
-		cachep = memcg_cachep;
-		goto out;
-	}
-
-	/* The corresponding put will be done in the workqueue. */
-	if (!css_tryget_online(&memcg->css))
-		goto out;
-	rcu_read_unlock();
+	if (likely(memcg_cachep))
+		return memcg_cachep;
 
 	/*
 	 * If we are in a safe context (can wait, and not in interrupt
@@ -2846,12 +2826,17 @@ struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep)
 	 * defer everything.
 	 */
 	memcg_schedule_register_cache(memcg, cachep);
-	return cachep;
 out:
-	rcu_read_unlock();
+	css_put(&memcg->css);
 	return cachep;
 }
 
+void __memcg_kmem_put_cache(struct kmem_cache *cachep)
+{
+	if (!is_root_cache(cachep))
+		css_put(&cachep->memcg_params->memcg->css);
+}
+
 /*
  * We need to verify if the allocation against current->mm->owner's memcg is
  * possible for the given order. But the page is not allocated yet, so we'll
@@ -2914,10 +2899,6 @@ void __memcg_kmem_uncharge_pages(struct page *page, int order)
 	memcg_uncharge_kmem(memcg, 1 << order);
 	page->mem_cgroup = NULL;
 }
-#else
-static inline void memcg_unregister_all_caches(struct mem_cgroup *memcg)
-{
-}
 #endif /* CONFIG_MEMCG_KMEM */
 
 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
@@ -4188,6 +4169,7 @@ static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss)
 
 static void memcg_destroy_kmem(struct mem_cgroup *memcg)
 {
+	memcg_unregister_all_caches(memcg);
 	mem_cgroup_sockets_destroy(memcg);
 }
 #else
@@ -4797,7 +4779,6 @@ static void mem_cgroup_css_offline(struct cgroup_subsys_state *css)
 	}
 	spin_unlock(&memcg->event_list_lock);
 
-	memcg_unregister_all_caches(memcg);
 	vmpressure_cleanup(&memcg->vmpressure);
 }
 
diff --git a/mm/slab.c b/mm/slab.c
index fee275b5b6b7..6042fe57cc60 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -3182,6 +3182,7 @@ slab_alloc_node(struct kmem_cache *cachep, gfp_t flags, int nodeid,
 			memset(ptr, 0, cachep->object_size);
 	}
 
+	memcg_kmem_put_cache(cachep);
 	return ptr;
 }
 
@@ -3247,6 +3248,7 @@ slab_alloc(struct kmem_cache *cachep, gfp_t flags, unsigned long caller)
 			memset(objp, 0, cachep->object_size);
 	}
 
+	memcg_kmem_put_cache(cachep);
 	return objp;
 }
 
diff --git a/mm/slub.c b/mm/slub.c
index 765c5884d03d..fe4db9c17238 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1233,13 +1233,17 @@ static inline void kfree_hook(const void *x)
 	kmemleak_free(x);
 }
 
-static inline int slab_pre_alloc_hook(struct kmem_cache *s, gfp_t flags)
+static inline struct kmem_cache *slab_pre_alloc_hook(struct kmem_cache *s,
+						     gfp_t flags)
 {
 	flags &= gfp_allowed_mask;
 	lockdep_trace_alloc(flags);
 	might_sleep_if(flags & __GFP_WAIT);
 
-	return should_failslab(s->object_size, flags, s->flags);
+	if (should_failslab(s->object_size, flags, s->flags))
+		return NULL;
+
+	return memcg_kmem_get_cache(s, flags);
 }
 
 static inline void slab_post_alloc_hook(struct kmem_cache *s,
@@ -1248,6 +1252,7 @@ static inline void slab_post_alloc_hook(struct kmem_cache *s,
 	flags &= gfp_allowed_mask;
 	kmemcheck_slab_alloc(s, flags, object, slab_ksize(s));
 	kmemleak_alloc_recursive(object, s->object_size, 1, s->flags, flags);
+	memcg_kmem_put_cache(s);
 }
 
 static inline void slab_free_hook(struct kmem_cache *s, void *x)
@@ -2384,10 +2389,9 @@ static __always_inline void *slab_alloc_node(struct kmem_cache *s,
 	struct page *page;
 	unsigned long tid;
 
-	if (slab_pre_alloc_hook(s, gfpflags))
+	s = slab_pre_alloc_hook(s, gfpflags);
+	if (!s)
 		return NULL;
-
-	s = memcg_kmem_get_cache(s, gfpflags);
 redo:
 	/*
 	 * Must read kmem_cache cpu data via this cpu ptr. Preemption is
-- 
cgit v1.2.3


From 51f39a1f0cea1cacf8c787f652f26dfee9611874 Mon Sep 17 00:00:00 2001
From: David Drysdale <drysdale@google.com>
Date: Fri, 12 Dec 2014 16:57:29 -0800
Subject: syscalls: implement execveat() system call

This patchset adds execveat(2) for x86, and is derived from Meredydd
Luff's patch from Sept 2012 (https://lkml.org/lkml/2012/9/11/528).

The primary aim of adding an execveat syscall is to allow an
implementation of fexecve(3) that does not rely on the /proc filesystem,
at least for executables (rather than scripts).  The current glibc version
of fexecve(3) is implemented via /proc, which causes problems in sandboxed
or otherwise restricted environments.

Given the desire for a /proc-free fexecve() implementation, HPA suggested
(https://lkml.org/lkml/2006/7/11/556) that an execveat(2) syscall would be
an appropriate generalization.

Also, having a new syscall means that it can take a flags argument without
back-compatibility concerns.  The current implementation just defines the
AT_EMPTY_PATH and AT_SYMLINK_NOFOLLOW flags, but other flags could be
added in future -- for example, flags for new namespaces (as suggested at
https://lkml.org/lkml/2006/7/11/474).

Related history:
 - https://lkml.org/lkml/2006/12/27/123 is an example of someone
   realizing that fexecve() is likely to fail in a chroot environment.
 - http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=514043 covered
   documenting the /proc requirement of fexecve(3) in its manpage, to
   "prevent other people from wasting their time".
 - https://bugzilla.redhat.com/show_bug.cgi?id=241609 described a
   problem where a process that did setuid() could not fexecve()
   because it no longer had access to /proc/self/fd; this has since
   been fixed.

This patch (of 4):

Add a new execveat(2) system call.  execveat() is to execve() as openat()
is to open(): it takes a file descriptor that refers to a directory, and
resolves the filename relative to that.

In addition, if the filename is empty and AT_EMPTY_PATH is specified,
execveat() executes the file to which the file descriptor refers.  This
replicates the functionality of fexecve(), which is a system call in other
UNIXen, but in Linux glibc it depends on opening "/proc/self/fd/<fd>" (and
so relies on /proc being mounted).

The filename fed to the executed program as argv[0] (or the name of the
script fed to a script interpreter) will be of the form "/dev/fd/<fd>"
(for an empty filename) or "/dev/fd/<fd>/<filename>", effectively
reflecting how the executable was found.  This does however mean that
execution of a script in a /proc-less environment won't work; also, script
execution via an O_CLOEXEC file descriptor fails (as the file will not be
accessible after exec).

Based on patches by Meredydd Luff.

Signed-off-by: David Drysdale <drysdale@google.com>
Cc: Meredydd Luff <meredydd@senatehouse.org>
Cc: Shuah Khan <shuah.kh@samsung.com>
Cc: "Eric W. Biederman" <ebiederm@xmission.com>
Cc: Andy Lutomirski <luto@amacapital.net>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Rich Felker <dalias@aerifal.cx>
Cc: Christoph Hellwig <hch@infradead.org>
Cc: Michael Kerrisk <mtk.manpages@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/binfmt_em86.c                  |   4 ++
 fs/binfmt_misc.c                  |   4 ++
 fs/binfmt_script.c                |  10 ++++
 fs/exec.c                         | 113 +++++++++++++++++++++++++++++++++-----
 fs/namei.c                        |   2 +-
 include/linux/binfmts.h           |   4 ++
 include/linux/compat.h            |   3 +
 include/linux/fs.h                |   1 +
 include/linux/sched.h             |   4 ++
 include/linux/syscalls.h          |   5 ++
 include/uapi/asm-generic/unistd.h |   4 +-
 kernel/sys_ni.c                   |   3 +
 lib/audit.c                       |   3 +
 13 files changed, 145 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/fs/binfmt_em86.c b/fs/binfmt_em86.c
index f37b08cea1f7..490538536cb4 100644
--- a/fs/binfmt_em86.c
+++ b/fs/binfmt_em86.c
@@ -42,6 +42,10 @@ static int load_em86(struct linux_binprm *bprm)
 			return -ENOEXEC;
 	}
 
+	/* Need to be able to load the file after exec */
+	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
+		return -ENOENT;
+
 	allow_write_access(bprm->file);
 	fput(bprm->file);
 	bprm->file = NULL;
diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c
index 70789e198dea..c04ef1d4f18a 100644
--- a/fs/binfmt_misc.c
+++ b/fs/binfmt_misc.c
@@ -144,6 +144,10 @@ static int load_misc_binary(struct linux_binprm *bprm)
 	if (!fmt)
 		goto ret;
 
+	/* Need to be able to load the file after exec */
+	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
+		return -ENOENT;
+
 	if (!(fmt->flags & MISC_FMT_PRESERVE_ARGV0)) {
 		retval = remove_arg_zero(bprm);
 		if (retval)
diff --git a/fs/binfmt_script.c b/fs/binfmt_script.c
index 5027a3e14922..afdf4e3cafc2 100644
--- a/fs/binfmt_script.c
+++ b/fs/binfmt_script.c
@@ -24,6 +24,16 @@ static int load_script(struct linux_binprm *bprm)
 
 	if ((bprm->buf[0] != '#') || (bprm->buf[1] != '!'))
 		return -ENOEXEC;
+
+	/*
+	 * If the script filename will be inaccessible after exec, typically
+	 * because it is a "/dev/fd/<fd>/.." path against an O_CLOEXEC fd, give
+	 * up now (on the assumption that the interpreter will want to load
+	 * this file).
+	 */
+	if (bprm->interp_flags & BINPRM_FLAGS_PATH_INACCESSIBLE)
+		return -ENOENT;
+
 	/*
 	 * This section does the #! interpretation.
 	 * Sorta complicated, but hopefully it will work.  -TYT
diff --git a/fs/exec.c b/fs/exec.c
index 01aebe300200..ad8798e26be9 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -748,18 +748,25 @@ EXPORT_SYMBOL(setup_arg_pages);
 
 #endif /* CONFIG_MMU */
 
-static struct file *do_open_exec(struct filename *name)
+static struct file *do_open_execat(int fd, struct filename *name, int flags)
 {
 	struct file *file;
 	int err;
-	static const struct open_flags open_exec_flags = {
+	struct open_flags open_exec_flags = {
 		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
 		.acc_mode = MAY_EXEC | MAY_OPEN,
 		.intent = LOOKUP_OPEN,
 		.lookup_flags = LOOKUP_FOLLOW,
 	};
 
-	file = do_filp_open(AT_FDCWD, name, &open_exec_flags);
+	if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
+		return ERR_PTR(-EINVAL);
+	if (flags & AT_SYMLINK_NOFOLLOW)
+		open_exec_flags.lookup_flags &= ~LOOKUP_FOLLOW;
+	if (flags & AT_EMPTY_PATH)
+		open_exec_flags.lookup_flags |= LOOKUP_EMPTY;
+
+	file = do_filp_open(fd, name, &open_exec_flags);
 	if (IS_ERR(file))
 		goto out;
 
@@ -770,12 +777,13 @@ static struct file *do_open_exec(struct filename *name)
 	if (file->f_path.mnt->mnt_flags & MNT_NOEXEC)
 		goto exit;
 
-	fsnotify_open(file);
-
 	err = deny_write_access(file);
 	if (err)
 		goto exit;
 
+	if (name->name[0] != '\0')
+		fsnotify_open(file);
+
 out:
 	return file;
 
@@ -787,7 +795,7 @@ exit:
 struct file *open_exec(const char *name)
 {
 	struct filename tmp = { .name = name };
-	return do_open_exec(&tmp);
+	return do_open_execat(AT_FDCWD, &tmp, 0);
 }
 EXPORT_SYMBOL(open_exec);
 
@@ -1428,10 +1436,12 @@ static int exec_binprm(struct linux_binprm *bprm)
 /*
  * sys_execve() executes a new program.
  */
-static int do_execve_common(struct filename *filename,
-				struct user_arg_ptr argv,
-				struct user_arg_ptr envp)
+static int do_execveat_common(int fd, struct filename *filename,
+			      struct user_arg_ptr argv,
+			      struct user_arg_ptr envp,
+			      int flags)
 {
+	char *pathbuf = NULL;
 	struct linux_binprm *bprm;
 	struct file *file;
 	struct files_struct *displaced;
@@ -1472,7 +1482,7 @@ static int do_execve_common(struct filename *filename,
 	check_unsafe_exec(bprm);
 	current->in_execve = 1;
 
-	file = do_open_exec(filename);
+	file = do_open_execat(fd, filename, flags);
 	retval = PTR_ERR(file);
 	if (IS_ERR(file))
 		goto out_unmark;
@@ -1480,7 +1490,28 @@ static int do_execve_common(struct filename *filename,
 	sched_exec();
 
 	bprm->file = file;
-	bprm->filename = bprm->interp = filename->name;
+	if (fd == AT_FDCWD || filename->name[0] == '/') {
+		bprm->filename = filename->name;
+	} else {
+		if (filename->name[0] == '\0')
+			pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d", fd);
+		else
+			pathbuf = kasprintf(GFP_TEMPORARY, "/dev/fd/%d/%s",
+					    fd, filename->name);
+		if (!pathbuf) {
+			retval = -ENOMEM;
+			goto out_unmark;
+		}
+		/*
+		 * Record that a name derived from an O_CLOEXEC fd will be
+		 * inaccessible after exec. Relies on having exclusive access to
+		 * current->files (due to unshare_files above).
+		 */
+		if (close_on_exec(fd, rcu_dereference_raw(current->files->fdt)))
+			bprm->interp_flags |= BINPRM_FLAGS_PATH_INACCESSIBLE;
+		bprm->filename = pathbuf;
+	}
+	bprm->interp = bprm->filename;
 
 	retval = bprm_mm_init(bprm);
 	if (retval)
@@ -1521,6 +1552,7 @@ static int do_execve_common(struct filename *filename,
 	acct_update_integrals(current);
 	task_numa_free(current);
 	free_bprm(bprm);
+	kfree(pathbuf);
 	putname(filename);
 	if (displaced)
 		put_files_struct(displaced);
@@ -1538,6 +1570,7 @@ out_unmark:
 
 out_free:
 	free_bprm(bprm);
+	kfree(pathbuf);
 
 out_files:
 	if (displaced)
@@ -1553,7 +1586,18 @@ int do_execve(struct filename *filename,
 {
 	struct user_arg_ptr argv = { .ptr.native = __argv };
 	struct user_arg_ptr envp = { .ptr.native = __envp };
-	return do_execve_common(filename, argv, envp);
+	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
+}
+
+int do_execveat(int fd, struct filename *filename,
+		const char __user *const __user *__argv,
+		const char __user *const __user *__envp,
+		int flags)
+{
+	struct user_arg_ptr argv = { .ptr.native = __argv };
+	struct user_arg_ptr envp = { .ptr.native = __envp };
+
+	return do_execveat_common(fd, filename, argv, envp, flags);
 }
 
 #ifdef CONFIG_COMPAT
@@ -1569,7 +1613,23 @@ static int compat_do_execve(struct filename *filename,
 		.is_compat = true,
 		.ptr.compat = __envp,
 	};
-	return do_execve_common(filename, argv, envp);
+	return do_execveat_common(AT_FDCWD, filename, argv, envp, 0);
+}
+
+static int compat_do_execveat(int fd, struct filename *filename,
+			      const compat_uptr_t __user *__argv,
+			      const compat_uptr_t __user *__envp,
+			      int flags)
+{
+	struct user_arg_ptr argv = {
+		.is_compat = true,
+		.ptr.compat = __argv,
+	};
+	struct user_arg_ptr envp = {
+		.is_compat = true,
+		.ptr.compat = __envp,
+	};
+	return do_execveat_common(fd, filename, argv, envp, flags);
 }
 #endif
 
@@ -1609,6 +1669,20 @@ SYSCALL_DEFINE3(execve,
 {
 	return do_execve(getname(filename), argv, envp);
 }
+
+SYSCALL_DEFINE5(execveat,
+		int, fd, const char __user *, filename,
+		const char __user *const __user *, argv,
+		const char __user *const __user *, envp,
+		int, flags)
+{
+	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+
+	return do_execveat(fd,
+			   getname_flags(filename, lookup_flags, NULL),
+			   argv, envp, flags);
+}
+
 #ifdef CONFIG_COMPAT
 COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
 	const compat_uptr_t __user *, argv,
@@ -1616,4 +1690,17 @@ COMPAT_SYSCALL_DEFINE3(execve, const char __user *, filename,
 {
 	return compat_do_execve(getname(filename), argv, envp);
 }
+
+COMPAT_SYSCALL_DEFINE5(execveat, int, fd,
+		       const char __user *, filename,
+		       const compat_uptr_t __user *, argv,
+		       const compat_uptr_t __user *, envp,
+		       int,  flags)
+{
+	int lookup_flags = (flags & AT_EMPTY_PATH) ? LOOKUP_EMPTY : 0;
+
+	return compat_do_execveat(fd,
+				  getname_flags(filename, lookup_flags, NULL),
+				  argv, envp, flags);
+}
 #endif
diff --git a/fs/namei.c b/fs/namei.c
index db5fe86319e6..ca814165d84c 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -130,7 +130,7 @@ void final_putname(struct filename *name)
 
 #define EMBEDDED_NAME_MAX	(PATH_MAX - sizeof(struct filename))
 
-static struct filename *
+struct filename *
 getname_flags(const char __user *filename, int flags, int *empty)
 {
 	struct filename *result, *err;
diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h
index 61f29e5ea840..576e4639ca60 100644
--- a/include/linux/binfmts.h
+++ b/include/linux/binfmts.h
@@ -53,6 +53,10 @@ struct linux_binprm {
 #define BINPRM_FLAGS_EXECFD_BIT 1
 #define BINPRM_FLAGS_EXECFD (1 << BINPRM_FLAGS_EXECFD_BIT)
 
+/* filename of the binary will be inaccessible after exec */
+#define BINPRM_FLAGS_PATH_INACCESSIBLE_BIT 2
+#define BINPRM_FLAGS_PATH_INACCESSIBLE (1 << BINPRM_FLAGS_PATH_INACCESSIBLE_BIT)
+
 /* Function parameter for binfmt->coredump */
 struct coredump_params {
 	const siginfo_t *siginfo;
diff --git a/include/linux/compat.h b/include/linux/compat.h
index e6494261eaff..7450ca2ac1fc 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -357,6 +357,9 @@ asmlinkage long compat_sys_lseek(unsigned int, compat_off_t, unsigned int);
 
 asmlinkage long compat_sys_execve(const char __user *filename, const compat_uptr_t __user *argv,
 		     const compat_uptr_t __user *envp);
+asmlinkage long compat_sys_execveat(int dfd, const char __user *filename,
+		     const compat_uptr_t __user *argv,
+		     const compat_uptr_t __user *envp, int flags);
 
 asmlinkage long compat_sys_select(int n, compat_ulong_t __user *inp,
 		compat_ulong_t __user *outp, compat_ulong_t __user *exp,
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 1d1838de6882..4193a0bd99b0 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2096,6 +2096,7 @@ extern int vfs_open(const struct path *, struct file *, const struct cred *);
 extern struct file * dentry_open(const struct path *, int, const struct cred *);
 extern int filp_close(struct file *, fl_owner_t id);
 
+extern struct filename *getname_flags(const char __user *, int, int *);
 extern struct filename *getname(const char __user *);
 extern struct filename *getname_kernel(const char *);
 
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 4cfdbcf8cf56..8db31ef98d2f 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -2485,6 +2485,10 @@ extern void do_group_exit(int);
 extern int do_execve(struct filename *,
 		     const char __user * const __user *,
 		     const char __user * const __user *);
+extern int do_execveat(int, struct filename *,
+		       const char __user * const __user *,
+		       const char __user * const __user *,
+		       int);
 extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *);
 struct task_struct *fork_idle(int);
 extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags);
diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h
index c9afdc7a7f84..85893d744901 100644
--- a/include/linux/syscalls.h
+++ b/include/linux/syscalls.h
@@ -877,4 +877,9 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags,
 asmlinkage long sys_getrandom(char __user *buf, size_t count,
 			      unsigned int flags);
 asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size);
+
+asmlinkage long sys_execveat(int dfd, const char __user *filename,
+			const char __user *const __user *argv,
+			const char __user *const __user *envp, int flags);
+
 #endif
diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h
index 22749c134117..e016bd9b1a04 100644
--- a/include/uapi/asm-generic/unistd.h
+++ b/include/uapi/asm-generic/unistd.h
@@ -707,9 +707,11 @@ __SYSCALL(__NR_getrandom, sys_getrandom)
 __SYSCALL(__NR_memfd_create, sys_memfd_create)
 #define __NR_bpf 280
 __SYSCALL(__NR_bpf, sys_bpf)
+#define __NR_execveat 281
+__SC_COMP(__NR_execveat, sys_execveat, compat_sys_execveat)
 
 #undef __NR_syscalls
-#define __NR_syscalls 281
+#define __NR_syscalls 282
 
 /*
  * All syscalls below here should go away really,
diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c
index 61eea02b53f5..5adcb0ae3a58 100644
--- a/kernel/sys_ni.c
+++ b/kernel/sys_ni.c
@@ -226,3 +226,6 @@ cond_syscall(sys_seccomp);
 
 /* access BPF programs and maps */
 cond_syscall(sys_bpf);
+
+/* execveat */
+cond_syscall(sys_execveat);
diff --git a/lib/audit.c b/lib/audit.c
index 1d726a22565b..b8fb5ee81e26 100644
--- a/lib/audit.c
+++ b/lib/audit.c
@@ -53,6 +53,9 @@ int audit_classify_syscall(int abi, unsigned syscall)
 #ifdef __NR_socketcall
 	case __NR_socketcall:
 		return 4;
+#endif
+#ifdef __NR_execveat
+	case __NR_execveat:
 #endif
 	case __NR_execve:
 		return 5;
-- 
cgit v1.2.3


From 89e3f90995b370fa46922eece62ea23f039a202d Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 12 Dec 2014 16:57:57 -0800
Subject: ratelimit: add initialization macro

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Cc: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/ratelimit.h | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ratelimit.h b/include/linux/ratelimit.h
index 0a260d8a18bf..18102529254e 100644
--- a/include/linux/ratelimit.h
+++ b/include/linux/ratelimit.h
@@ -17,14 +17,20 @@ struct ratelimit_state {
 	unsigned long	begin;
 };
 
-#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init)		\
-									\
-	struct ratelimit_state name = {					\
+#define RATELIMIT_STATE_INIT(name, interval_init, burst_init) {		\
 		.lock		= __RAW_SPIN_LOCK_UNLOCKED(name.lock),	\
 		.interval	= interval_init,			\
 		.burst		= burst_init,				\
 	}
 
+#define RATELIMIT_STATE_INIT_DISABLED					\
+	RATELIMIT_STATE_INIT(ratelimit_state, 0, DEFAULT_RATELIMIT_BURST)
+
+#define DEFINE_RATELIMIT_STATE(name, interval_init, burst_init)		\
+									\
+	struct ratelimit_state name =					\
+		RATELIMIT_STATE_INIT(name, interval_init, burst_init)	\
+
 static inline void ratelimit_state_init(struct ratelimit_state *rs,
 					int interval, int burst)
 {
-- 
cgit v1.2.3


From 6adc4a22f20bbf3bbc754f1ec8c82be5dfb5c71a Mon Sep 17 00:00:00 2001
From: Dmitry Monakhov <dmonakhov@openvz.org>
Date: Fri, 12 Dec 2014 16:58:00 -0800
Subject: fault-inject: add ratelimit option

Current debug levels are not optimal.  Especially if one want to provoke
big numbers of faults(broken device simulator) then any verbose level will
produce giant numbers of identical logging messages.  Let's add ratelimit
parameter for that purpose.

Signed-off-by: Dmitry Monakhov <dmonakhov@openvz.org>
Acked-by: Akinobu Mita <akinobu.mita@gmail.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/fault-inject.h | 17 +++++++++++------
 lib/fault-inject.c           | 21 +++++++++++++++++----
 2 files changed, 28 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h
index c6f996f2abb6..798fad9e420d 100644
--- a/include/linux/fault-inject.h
+++ b/include/linux/fault-inject.h
@@ -5,6 +5,7 @@
 
 #include <linux/types.h>
 #include <linux/debugfs.h>
+#include <linux/ratelimit.h>
 #include <linux/atomic.h>
 
 /*
@@ -25,14 +26,18 @@ struct fault_attr {
 	unsigned long reject_end;
 
 	unsigned long count;
+	struct ratelimit_state ratelimit_state;
+	struct dentry *dname;
 };
 
-#define FAULT_ATTR_INITIALIZER {				\
-		.interval = 1,					\
-		.times = ATOMIC_INIT(1),			\
-		.require_end = ULONG_MAX,			\
-		.stacktrace_depth = 32,				\
-		.verbose = 2,					\
+#define FAULT_ATTR_INITIALIZER {					\
+		.interval = 1,						\
+		.times = ATOMIC_INIT(1),				\
+		.require_end = ULONG_MAX,				\
+		.stacktrace_depth = 32,					\
+		.ratelimit_state = RATELIMIT_STATE_INIT_DISABLED,	\
+		.verbose = 2,						\
+		.dname = NULL,						\
 	}
 
 #define DECLARE_FAULT_ATTR(name) struct fault_attr name = FAULT_ATTR_INITIALIZER
diff --git a/lib/fault-inject.c b/lib/fault-inject.c
index d7d501ea856d..f1cdeb024d17 100644
--- a/lib/fault-inject.c
+++ b/lib/fault-inject.c
@@ -40,10 +40,16 @@ EXPORT_SYMBOL_GPL(setup_fault_attr);
 
 static void fail_dump(struct fault_attr *attr)
 {
-	if (attr->verbose > 0)
-		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure\n");
-	if (attr->verbose > 1)
-		dump_stack();
+	if (attr->verbose > 0 && __ratelimit(&attr->ratelimit_state)) {
+		printk(KERN_NOTICE "FAULT_INJECTION: forcing a failure.\n"
+		       "name %pd, interval %lu, probability %lu, "
+		       "space %d, times %d\n", attr->dname,
+		       attr->probability, attr->interval,
+		       atomic_read(&attr->space),
+		       atomic_read(&attr->times));
+		if (attr->verbose > 1)
+			dump_stack();
+	}
 }
 
 #define atomic_dec_not_zero(v)		atomic_add_unless((v), -1, 0)
@@ -202,6 +208,12 @@ struct dentry *fault_create_debugfs_attr(const char *name,
 		goto fail;
 	if (!debugfs_create_ul("verbose", mode, dir, &attr->verbose))
 		goto fail;
+	if (!debugfs_create_u32("verbose_ratelimit_interval_ms", mode, dir,
+				&attr->ratelimit_state.interval))
+		goto fail;
+	if (!debugfs_create_u32("verbose_ratelimit_burst", mode, dir,
+				&attr->ratelimit_state.burst))
+		goto fail;
 	if (!debugfs_create_bool("task-filter", mode, dir, &attr->task_filter))
 		goto fail;
 
@@ -222,6 +234,7 @@ struct dentry *fault_create_debugfs_attr(const char *name,
 
 #endif /* CONFIG_FAULT_INJECTION_STACKTRACE_FILTER */
 
+	attr->dname = dget(dir);
 	return dir;
 fail:
 	debugfs_remove_recursive(dir);
-- 
cgit v1.2.3


From 0050ee059f7fc86b1df2527aaa14ed5dc72f9973 Mon Sep 17 00:00:00 2001
From: Manfred Spraul <manfred@colorfullife.com>
Date: Fri, 12 Dec 2014 16:58:17 -0800
Subject: ipc/msg: increase MSGMNI, remove scaling

SysV can be abused to allocate locked kernel memory.  For most systems, a
small limit doesn't make sense, see the discussion with regards to SHMMAX.

Therefore: increase MSGMNI to the maximum supported.

And: If we ignore the risk of locking too much memory, then an automatic
scaling of MSGMNI doesn't make sense.  Therefore the logic can be removed.

The code preserves auto_msgmni to avoid breaking any user space applications
that expect that the value exists.

Notes:
1) If an administrator must limit the memory allocations, then he can set
MSGMNI as necessary.

Or he can disable sysv entirely (as e.g. done by Android).

2) MSGMAX and MSGMNB are intentionally not increased, as these values are used
to control latency vs. throughput:
If MSGMNB is large, then msgsnd() just returns and more messages can be queued
before a task switch to a task that calls msgrcv() is forced.

[akpm@linux-foundation.org: coding-style fixes]
Signed-off-by: Manfred Spraul <manfred@colorfullife.com>
Cc: Davidlohr Bueso <dave@stgolabs.net>
Cc: Rafael Aquini <aquini@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/sysctl/kernel.txt | 10 +++--
 include/linux/ipc_namespace.h   | 20 ---------
 include/uapi/linux/msg.h        | 28 +++++++++----
 ipc/Makefile                    |  2 +-
 ipc/ipc_sysctl.c                | 93 ++++++++---------------------------------
 ipc/ipcns_notifier.c            | 92 ----------------------------------------
 ipc/msg.c                       | 36 +---------------
 ipc/namespace.c                 | 22 ----------
 ipc/util.c                      | 40 ------------------
 9 files changed, 45 insertions(+), 298 deletions(-)
 delete mode 100644 ipc/ipcns_notifier.c

(limited to 'include/linux')

diff --git a/Documentation/sysctl/kernel.txt b/Documentation/sysctl/kernel.txt
index b5d0c8501a18..75511efefc64 100644
--- a/Documentation/sysctl/kernel.txt
+++ b/Documentation/sysctl/kernel.txt
@@ -116,10 +116,12 @@ set during run time.
 
 auto_msgmni:
 
-Enables/Disables automatic recomputing of msgmni upon memory add/remove
-or upon ipc namespace creation/removal (see the msgmni description
-above). Echoing "1" into this file enables msgmni automatic recomputing.
-Echoing "0" turns it off. auto_msgmni default value is 1.
+This variable has no effect and may be removed in future kernel
+releases. Reading it always returns 0.
+Up to Linux 3.17, it enabled/disabled automatic recomputing of msgmni
+upon memory add/remove or upon ipc namespace creation/removal.
+Echoing "1" into this file enabled msgmni automatic recomputing.
+Echoing "0" turned it off. auto_msgmni default value was 1.
 
 
 ==============================================================
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index 35e7eca4e33b..e365d5ec69cb 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -7,15 +7,6 @@
 #include <linux/notifier.h>
 #include <linux/nsproxy.h>
 
-/*
- * ipc namespace events
- */
-#define IPCNS_MEMCHANGED   0x00000001   /* Notify lowmem size changed */
-#define IPCNS_CREATED  0x00000002   /* Notify new ipc namespace created */
-#define IPCNS_REMOVED  0x00000003   /* Notify ipc namespace removed */
-
-#define IPCNS_CALLBACK_PRI 0
-
 struct user_namespace;
 
 struct ipc_ids {
@@ -38,7 +29,6 @@ struct ipc_namespace {
 	unsigned int	msg_ctlmni;
 	atomic_t	msg_bytes;
 	atomic_t	msg_hdrs;
-	int		auto_msgmni;
 
 	size_t		shm_ctlmax;
 	size_t		shm_ctlall;
@@ -77,18 +67,8 @@ extern atomic_t nr_ipc_ns;
 extern spinlock_t mq_lock;
 
 #ifdef CONFIG_SYSVIPC
-extern int register_ipcns_notifier(struct ipc_namespace *);
-extern int cond_register_ipcns_notifier(struct ipc_namespace *);
-extern void unregister_ipcns_notifier(struct ipc_namespace *);
-extern int ipcns_notify(unsigned long);
 extern void shm_destroy_orphaned(struct ipc_namespace *ns);
 #else /* CONFIG_SYSVIPC */
-static inline int register_ipcns_notifier(struct ipc_namespace *ns)
-{ return 0; }
-static inline int cond_register_ipcns_notifier(struct ipc_namespace *ns)
-{ return 0; }
-static inline void unregister_ipcns_notifier(struct ipc_namespace *ns) { }
-static inline int ipcns_notify(unsigned long l) { return 0; }
 static inline void shm_destroy_orphaned(struct ipc_namespace *ns) {}
 #endif /* CONFIG_SYSVIPC */
 
diff --git a/include/uapi/linux/msg.h b/include/uapi/linux/msg.h
index a70375526578..f51c8001dbe5 100644
--- a/include/uapi/linux/msg.h
+++ b/include/uapi/linux/msg.h
@@ -51,16 +51,28 @@ struct msginfo {
 };
 
 /*
- * Scaling factor to compute msgmni:
- * the memory dedicated to msg queues (msgmni * msgmnb) should occupy
- * at most 1/MSG_MEM_SCALE of the lowmem (see the formula in ipc/msg.c):
- * up to 8MB       : msgmni = 16 (MSGMNI)
- * 4 GB            : msgmni = 8K
- * more than 16 GB : msgmni = 32K (IPCMNI)
+ * MSGMNI, MSGMAX and MSGMNB are default values which can be
+ * modified by sysctl.
+ *
+ * MSGMNI is the upper limit for the number of messages queues per
+ * namespace.
+ * It has been chosen to be as large possible without facilitating
+ * scenarios where userspace causes overflows when adjusting the limits via
+ * operations of the form retrieve current limit; add X; update limit".
+ *
+ * MSGMNB is the default size of a new message queue. Non-root tasks can
+ * decrease the size with msgctl(IPC_SET), root tasks
+ * (actually: CAP_SYS_RESOURCE) can both increase and decrease the queue
+ * size. The optimal value is application dependent.
+ * 16384 is used because it was always used (since 0.99.10)
+ *
+ * MAXMAX is the maximum size of an individual message, it's a global
+ * (per-namespace) limit that applies for all message queues.
+ * It's set to 1/2 of MSGMNB, to ensure that at least two messages fit into
+ * the queue. This is also an arbitrary choice (since 2.6.0).
  */
-#define MSG_MEM_SCALE 32
 
-#define MSGMNI    16   /* <= IPCMNI */     /* max # of msg queue identifiers */
+#define MSGMNI 32000   /* <= IPCMNI */     /* max # of msg queue identifiers */
 #define MSGMAX  8192   /* <= INT_MAX */   /* max size of message (bytes) */
 #define MSGMNB 16384   /* <= INT_MAX */   /* default max size of a message queue */
 
diff --git a/ipc/Makefile b/ipc/Makefile
index 9075e172e52c..86c7300ecdf5 100644
--- a/ipc/Makefile
+++ b/ipc/Makefile
@@ -3,7 +3,7 @@
 #
 
 obj-$(CONFIG_SYSVIPC_COMPAT) += compat.o
-obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o ipcns_notifier.o syscall.o
+obj-$(CONFIG_SYSVIPC) += util.o msgutil.o msg.o sem.o shm.o syscall.o
 obj-$(CONFIG_SYSVIPC_SYSCTL) += ipc_sysctl.o
 obj_mq-$(CONFIG_COMPAT) += compat_mq.o
 obj-$(CONFIG_POSIX_MQUEUE) += mqueue.o msgutil.o $(obj_mq-y)
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index e8075b247497..8ad93c29f511 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -62,29 +62,6 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
 	return err;
 }
 
-static int proc_ipc_callback_dointvec_minmax(struct ctl_table *table, int write,
-	void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	struct ctl_table ipc_table;
-	size_t lenp_bef = *lenp;
-	int rc;
-
-	memcpy(&ipc_table, table, sizeof(ipc_table));
-	ipc_table.data = get_ipc(table);
-
-	rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
-
-	if (write && !rc && lenp_bef == *lenp)
-		/*
-		 * Tunable has successfully been changed by hand. Disable its
-		 * automatic adjustment. This simply requires unregistering
-		 * the notifiers that trigger recalculation.
-		 */
-		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
-
-	return rc;
-}
-
 static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
@@ -96,54 +73,19 @@ static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
 					lenp, ppos);
 }
 
-/*
- * Routine that is called when the file "auto_msgmni" has successfully been
- * written.
- * Two values are allowed:
- * 0: unregister msgmni's callback routine from the ipc namespace notifier
- *    chain. This means that msgmni won't be recomputed anymore upon memory
- *    add/remove or ipc namespace creation/removal.
- * 1: register back the callback routine.
- */
-static void ipc_auto_callback(int val)
-{
-	if (!val)
-		unregister_ipcns_notifier(current->nsproxy->ipc_ns);
-	else {
-		/*
-		 * Re-enable automatic recomputing only if not already
-		 * enabled.
-		 */
-		recompute_msgmni(current->nsproxy->ipc_ns);
-		cond_register_ipcns_notifier(current->nsproxy->ipc_ns);
-	}
-}
-
-static int proc_ipcauto_dointvec_minmax(struct ctl_table *table, int write,
+static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
 	void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
-	int oldval;
-	int rc;
+	int dummy = 0;
 
 	memcpy(&ipc_table, table, sizeof(ipc_table));
-	ipc_table.data = get_ipc(table);
-	oldval = *((int *)(ipc_table.data));
+	ipc_table.data = &dummy;
 
-	rc = proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
+	if (write)
+		pr_info_once("writing to auto_msgmni has no effect");
 
-	if (write && !rc) {
-		int newval = *((int *)(ipc_table.data));
-		/*
-		 * The file "auto_msgmni" has correctly been set.
-		 * React by (un)registering the corresponding tunable, if the
-		 * value has changed.
-		 */
-		if (newval != oldval)
-			ipc_auto_callback(newval);
-	}
-
-	return rc;
+	return proc_dointvec_minmax(&ipc_table, write, buffer, lenp, ppos);
 }
 
 #else
@@ -151,8 +93,7 @@ static int proc_ipcauto_dointvec_minmax(struct ctl_table *table, int write,
 #define proc_ipc_dointvec	   NULL
 #define proc_ipc_dointvec_minmax   NULL
 #define proc_ipc_dointvec_minmax_orphans   NULL
-#define proc_ipc_callback_dointvec_minmax  NULL
-#define proc_ipcauto_dointvec_minmax NULL
+#define proc_ipc_auto_msgmni	   NULL
 #endif
 
 static int zero;
@@ -204,10 +145,19 @@ static struct ctl_table ipc_kern_table[] = {
 		.data		= &init_ipc_ns.msg_ctlmni,
 		.maxlen		= sizeof(init_ipc_ns.msg_ctlmni),
 		.mode		= 0644,
-		.proc_handler	= proc_ipc_callback_dointvec_minmax,
+		.proc_handler	= proc_ipc_dointvec_minmax,
 		.extra1		= &zero,
 		.extra2		= &int_max,
 	},
+	{
+		.procname	= "auto_msgmni",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_ipc_auto_msgmni,
+		.extra1		= &zero,
+		.extra2		= &one,
+	},
 	{
 		.procname	=  "msgmnb",
 		.data		= &init_ipc_ns.msg_ctlmnb,
@@ -224,15 +174,6 @@ static struct ctl_table ipc_kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_ipc_dointvec,
 	},
-	{
-		.procname	= "auto_msgmni",
-		.data		= &init_ipc_ns.auto_msgmni,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_ipcauto_dointvec_minmax,
-		.extra1		= &zero,
-		.extra2		= &one,
-	},
 #ifdef CONFIG_CHECKPOINT_RESTORE
 	{
 		.procname	= "sem_next_id",
diff --git a/ipc/ipcns_notifier.c b/ipc/ipcns_notifier.c
deleted file mode 100644
index b9b31a4f77e1..000000000000
--- a/ipc/ipcns_notifier.c
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * linux/ipc/ipcns_notifier.c
- * Copyright (C) 2007 BULL SA. Nadia Derbey
- *
- * Notification mechanism for ipc namespaces:
- * The callback routine registered in the memory chain invokes the ipcns
- * notifier chain with the IPCNS_MEMCHANGED event.
- * Each callback routine registered in the ipcns namespace recomputes msgmni
- * for the owning namespace.
- */
-
-#include <linux/msg.h>
-#include <linux/rcupdate.h>
-#include <linux/notifier.h>
-#include <linux/nsproxy.h>
-#include <linux/ipc_namespace.h>
-
-#include "util.h"
-
-
-
-static BLOCKING_NOTIFIER_HEAD(ipcns_chain);
-
-
-static int ipcns_callback(struct notifier_block *self,
-				unsigned long action, void *arg)
-{
-	struct ipc_namespace *ns;
-
-	switch (action) {
-	case IPCNS_MEMCHANGED:   /* amount of lowmem has changed */
-	case IPCNS_CREATED:
-	case IPCNS_REMOVED:
-		/*
-		 * It's time to recompute msgmni
-		 */
-		ns = container_of(self, struct ipc_namespace, ipcns_nb);
-		/*
-		 * No need to get a reference on the ns: the 1st job of
-		 * free_ipc_ns() is to unregister the callback routine.
-		 * blocking_notifier_chain_unregister takes the wr lock to do
-		 * it.
-		 * When this callback routine is called the rd lock is held by
-		 * blocking_notifier_call_chain.
-		 * So the ipc ns cannot be freed while we are here.
-		 */
-		recompute_msgmni(ns);
-		break;
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-int register_ipcns_notifier(struct ipc_namespace *ns)
-{
-	int rc;
-
-	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
-	ns->ipcns_nb.notifier_call = ipcns_callback;
-	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-	rc = blocking_notifier_chain_register(&ipcns_chain, &ns->ipcns_nb);
-	if (!rc)
-		ns->auto_msgmni = 1;
-	return rc;
-}
-
-int cond_register_ipcns_notifier(struct ipc_namespace *ns)
-{
-	int rc;
-
-	memset(&ns->ipcns_nb, 0, sizeof(ns->ipcns_nb));
-	ns->ipcns_nb.notifier_call = ipcns_callback;
-	ns->ipcns_nb.priority = IPCNS_CALLBACK_PRI;
-	rc = blocking_notifier_chain_cond_register(&ipcns_chain,
-							&ns->ipcns_nb);
-	if (!rc)
-		ns->auto_msgmni = 1;
-	return rc;
-}
-
-void unregister_ipcns_notifier(struct ipc_namespace *ns)
-{
-	blocking_notifier_chain_unregister(&ipcns_chain, &ns->ipcns_nb);
-	ns->auto_msgmni = 0;
-}
-
-int ipcns_notify(unsigned long val)
-{
-	return blocking_notifier_call_chain(&ipcns_chain, val, NULL);
-}
diff --git a/ipc/msg.c b/ipc/msg.c
index c5d8e3749985..a7261d5cbc89 100644
--- a/ipc/msg.c
+++ b/ipc/msg.c
@@ -989,43 +989,12 @@ SYSCALL_DEFINE5(msgrcv, int, msqid, struct msgbuf __user *, msgp, size_t, msgsz,
 	return do_msgrcv(msqid, msgp, msgsz, msgtyp, msgflg, do_msg_fill);
 }
 
-/*
- * Scale msgmni with the available lowmem size: the memory dedicated to msg
- * queues should occupy at most 1/MSG_MEM_SCALE of lowmem.
- * Also take into account the number of nsproxies created so far.
- * This should be done staying within the (MSGMNI , IPCMNI/nr_ipc_ns) range.
- */
-void recompute_msgmni(struct ipc_namespace *ns)
-{
-	struct sysinfo i;
-	unsigned long allowed;
-	int nb_ns;
-
-	si_meminfo(&i);
-	allowed = (((i.totalram - i.totalhigh) / MSG_MEM_SCALE) * i.mem_unit)
-		/ MSGMNB;
-	nb_ns = atomic_read(&nr_ipc_ns);
-	allowed /= nb_ns;
-
-	if (allowed < MSGMNI) {
-		ns->msg_ctlmni = MSGMNI;
-		return;
-	}
-
-	if (allowed > IPCMNI / nb_ns) {
-		ns->msg_ctlmni = IPCMNI / nb_ns;
-		return;
-	}
-
-	ns->msg_ctlmni = allowed;
-}
 
 void msg_init_ns(struct ipc_namespace *ns)
 {
 	ns->msg_ctlmax = MSGMAX;
 	ns->msg_ctlmnb = MSGMNB;
-
-	recompute_msgmni(ns);
+	ns->msg_ctlmni = MSGMNI;
 
 	atomic_set(&ns->msg_bytes, 0);
 	atomic_set(&ns->msg_hdrs, 0);
@@ -1069,9 +1038,6 @@ void __init msg_init(void)
 {
 	msg_init_ns(&init_ipc_ns);
 
-	printk(KERN_INFO "msgmni has been set to %d\n",
-		init_ipc_ns.msg_ctlmni);
-
 	ipc_init_proc_interface("sysvipc/msg",
 				"       key      msqid perms      cbytes       qnum lspid lrpid   uid   gid  cuid  cgid      stime      rtime      ctime\n",
 				IPC_MSG_IDS, sysvipc_msg_proc_show);
diff --git a/ipc/namespace.c b/ipc/namespace.c
index b54468e48e32..1a3ffd40356e 100644
--- a/ipc/namespace.c
+++ b/ipc/namespace.c
@@ -45,14 +45,6 @@ static struct ipc_namespace *create_ipc_ns(struct user_namespace *user_ns,
 	msg_init_ns(ns);
 	shm_init_ns(ns);
 
-	/*
-	 * msgmni has already been computed for the new ipc ns.
-	 * Thus, do the ipcns creation notification before registering that
-	 * new ipcns in the chain.
-	 */
-	ipcns_notify(IPCNS_CREATED);
-	register_ipcns_notifier(ns);
-
 	ns->user_ns = get_user_ns(user_ns);
 
 	return ns;
@@ -99,25 +91,11 @@ void free_ipcs(struct ipc_namespace *ns, struct ipc_ids *ids,
 
 static void free_ipc_ns(struct ipc_namespace *ns)
 {
-	/*
-	 * Unregistering the hotplug notifier at the beginning guarantees
-	 * that the ipc namespace won't be freed while we are inside the
-	 * callback routine. Since the blocking_notifier_chain_XXX routines
-	 * hold a rw lock on the notifier list, unregister_ipcns_notifier()
-	 * won't take the rw lock before blocking_notifier_call_chain() has
-	 * released the rd lock.
-	 */
-	unregister_ipcns_notifier(ns);
 	sem_exit_ns(ns);
 	msg_exit_ns(ns);
 	shm_exit_ns(ns);
 	atomic_dec(&nr_ipc_ns);
 
-	/*
-	 * Do the ipcns removal notification after decrementing nr_ipc_ns in
-	 * order to have a correct value when recomputing msgmni.
-	 */
-	ipcns_notify(IPCNS_REMOVED);
 	put_user_ns(ns->user_ns);
 	proc_free_inum(ns->proc_inum);
 	kfree(ns);
diff --git a/ipc/util.c b/ipc/util.c
index 88adc329888c..106bed0378ab 100644
--- a/ipc/util.c
+++ b/ipc/util.c
@@ -71,44 +71,6 @@ struct ipc_proc_iface {
 	int (*show)(struct seq_file *, void *);
 };
 
-static void ipc_memory_notifier(struct work_struct *work)
-{
-	ipcns_notify(IPCNS_MEMCHANGED);
-}
-
-static int ipc_memory_callback(struct notifier_block *self,
-				unsigned long action, void *arg)
-{
-	static DECLARE_WORK(ipc_memory_wq, ipc_memory_notifier);
-
-	switch (action) {
-	case MEM_ONLINE:    /* memory successfully brought online */
-	case MEM_OFFLINE:   /* or offline: it's time to recompute msgmni */
-		/*
-		 * This is done by invoking the ipcns notifier chain with the
-		 * IPC_MEMCHANGED event.
-		 * In order not to keep the lock on the hotplug memory chain
-		 * for too long, queue a work item that will, when waken up,
-		 * activate the ipcns notification chain.
-		 */
-		schedule_work(&ipc_memory_wq);
-		break;
-	case MEM_GOING_ONLINE:
-	case MEM_GOING_OFFLINE:
-	case MEM_CANCEL_ONLINE:
-	case MEM_CANCEL_OFFLINE:
-	default:
-		break;
-	}
-
-	return NOTIFY_OK;
-}
-
-static struct notifier_block ipc_memory_nb = {
-	.notifier_call = ipc_memory_callback,
-	.priority = IPC_CALLBACK_PRI,
-};
-
 /**
  * ipc_init - initialise ipc subsystem
  *
@@ -124,8 +86,6 @@ static int __init ipc_init(void)
 	sem_init();
 	msg_init();
 	shm_init();
-	register_hotmemory_notifier(&ipc_memory_nb);
-	register_ipcns_notifier(&init_ipc_ns);
 	return 0;
 }
 device_initcall(ipc_init);
-- 
cgit v1.2.3


From 0809ab69a2782afac8c4d7f3d35cd123050aab9a Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 12 Dec 2014 16:58:36 -0800
Subject: fsnotify: unify inode and mount marks handling

There's a lot of common code in inode and mount marks handling.  Factor it
out to a common helper function.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/dnotify/dnotify.c          |   4 +-
 fs/notify/fdinfo.c                   |   6 +-
 fs/notify/fsnotify.c                 |   4 +-
 fs/notify/fsnotify.h                 |  12 ++++
 fs/notify/inode_mark.c               | 113 ++++++-----------------------------
 fs/notify/inotify/inotify_fsnotify.c |   2 +-
 fs/notify/inotify/inotify_user.c     |  10 ++--
 fs/notify/mark.c                     |  89 ++++++++++++++++++++++++++-
 fs/notify/vfsmount_mark.c            | 109 ++++++---------------------------
 include/linux/fsnotify_backend.h     |  24 ++------
 kernel/audit_tree.c                  |  16 ++---
 11 files changed, 160 insertions(+), 229 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/dnotify/dnotify.c b/fs/notify/dnotify/dnotify.c
index caaaf9dfe353..44523f4a6084 100644
--- a/fs/notify/dnotify/dnotify.c
+++ b/fs/notify/dnotify/dnotify.c
@@ -69,8 +69,8 @@ static void dnotify_recalc_inode_mask(struct fsnotify_mark *fsn_mark)
 	if (old_mask == new_mask)
 		return;
 
-	if (fsn_mark->i.inode)
-		fsnotify_recalc_inode_mask(fsn_mark->i.inode);
+	if (fsn_mark->inode)
+		fsnotify_recalc_inode_mask(fsn_mark->inode);
 }
 
 /*
diff --git a/fs/notify/fdinfo.c b/fs/notify/fdinfo.c
index 6ffd220eb14d..58b7cdb63da9 100644
--- a/fs/notify/fdinfo.c
+++ b/fs/notify/fdinfo.c
@@ -80,7 +80,7 @@ static void inotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		return;
 
 	inode_mark = container_of(mark, struct inotify_inode_mark, fsn_mark);
-	inode = igrab(mark->i.inode);
+	inode = igrab(mark->inode);
 	if (inode) {
 		seq_printf(m, "inotify wd:%x ino:%lx sdev:%x mask:%x ignored_mask:%x ",
 			   inode_mark->wd, inode->i_ino, inode->i_sb->s_dev,
@@ -112,7 +112,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		mflags |= FAN_MARK_IGNORED_SURV_MODIFY;
 
 	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
-		inode = igrab(mark->i.inode);
+		inode = igrab(mark->inode);
 		if (!inode)
 			return;
 		seq_printf(m, "fanotify ino:%lx sdev:%x mflags:%x mask:%x ignored_mask:%x ",
@@ -122,7 +122,7 @@ static void fanotify_fdinfo(struct seq_file *m, struct fsnotify_mark *mark)
 		seq_putc(m, '\n');
 		iput(inode);
 	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT) {
-		struct mount *mnt = real_mount(mark->m.mnt);
+		struct mount *mnt = real_mount(mark->mnt);
 
 		seq_printf(m, "fanotify mnt_id:%x mflags:%x mask:%x ignored_mask:%x\n",
 			   mnt->mnt_id, mflags, mark->mask, mark->ignored_mask);
diff --git a/fs/notify/fsnotify.c b/fs/notify/fsnotify.c
index 41e39102743a..dd3fb0b17be7 100644
--- a/fs/notify/fsnotify.c
+++ b/fs/notify/fsnotify.c
@@ -242,13 +242,13 @@ int fsnotify(struct inode *to_tell, __u32 mask, void *data, int data_is,
 
 		if (inode_node) {
 			inode_mark = hlist_entry(srcu_dereference(inode_node, &fsnotify_mark_srcu),
-						 struct fsnotify_mark, i.i_list);
+						 struct fsnotify_mark, obj_list);
 			inode_group = inode_mark->group;
 		}
 
 		if (vfsmount_node) {
 			vfsmount_mark = hlist_entry(srcu_dereference(vfsmount_node, &fsnotify_mark_srcu),
-							struct fsnotify_mark, m.m_list);
+						    struct fsnotify_mark, obj_list);
 			vfsmount_group = vfsmount_mark->group;
 		}
 
diff --git a/fs/notify/fsnotify.h b/fs/notify/fsnotify.h
index 3b68b0ae0a97..13a00be516d2 100644
--- a/fs/notify/fsnotify.h
+++ b/fs/notify/fsnotify.h
@@ -12,12 +12,19 @@ extern void fsnotify_flush_notify(struct fsnotify_group *group);
 /* protects reads of inode and vfsmount marks list */
 extern struct srcu_struct fsnotify_mark_srcu;
 
+/* Calculate mask of events for a list of marks */
+extern u32 fsnotify_recalc_mask(struct hlist_head *head);
+
 /* compare two groups for sorting of marks lists */
 extern int fsnotify_compare_groups(struct fsnotify_group *a,
 				   struct fsnotify_group *b);
 
 extern void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *fsn_mark,
 						__u32 mask);
+/* Add mark to a proper place in mark list */
+extern int fsnotify_add_mark_list(struct hlist_head *head,
+				  struct fsnotify_mark *mark,
+				  int allow_dups);
 /* add a mark to an inode */
 extern int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 				   struct fsnotify_group *group, struct inode *inode,
@@ -31,6 +38,11 @@ extern int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 extern void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark);
 /* inode specific destruction of a mark */
 extern void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark);
+/* Destroy all marks in the given list */
+extern void fsnotify_destroy_marks(struct list_head *to_free);
+/* Find mark belonging to given group in the list of marks */
+extern struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
+						struct fsnotify_group *group);
 /* run the list of all marks associated with inode and flag them to be freed */
 extern void fsnotify_clear_marks_by_inode(struct inode *inode);
 /* run the list of all marks associated with vfsmount and flag them to be freed */
diff --git a/fs/notify/inode_mark.c b/fs/notify/inode_mark.c
index dfbf5447eea4..3daf513ee99e 100644
--- a/fs/notify/inode_mark.c
+++ b/fs/notify/inode_mark.c
@@ -30,21 +30,6 @@
 
 #include "../internal.h"
 
-/*
- * Recalculate the mask of events relevant to a given inode locked.
- */
-static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
-{
-	struct fsnotify_mark *mark;
-	__u32 new_mask = 0;
-
-	assert_spin_locked(&inode->i_lock);
-
-	hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list)
-		new_mask |= mark->mask;
-	inode->i_fsnotify_mask = new_mask;
-}
-
 /*
  * Recalculate the inode->i_fsnotify_mask, or the mask of all FS_* event types
  * any notifier is interested in hearing for this inode.
@@ -52,7 +37,7 @@ static void fsnotify_recalc_inode_mask_locked(struct inode *inode)
 void fsnotify_recalc_inode_mask(struct inode *inode)
 {
 	spin_lock(&inode->i_lock);
-	fsnotify_recalc_inode_mask_locked(inode);
+	inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
 	spin_unlock(&inode->i_lock);
 
 	__fsnotify_update_child_dentry_flags(inode);
@@ -60,23 +45,22 @@ void fsnotify_recalc_inode_mask(struct inode *inode)
 
 void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
 {
-	struct inode *inode = mark->i.inode;
+	struct inode *inode = mark->inode;
 
 	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
 	assert_spin_locked(&mark->lock);
 
 	spin_lock(&inode->i_lock);
 
-	hlist_del_init_rcu(&mark->i.i_list);
-	mark->i.inode = NULL;
+	hlist_del_init_rcu(&mark->obj_list);
+	mark->inode = NULL;
 
 	/*
 	 * this mark is now off the inode->i_fsnotify_marks list and we
 	 * hold the inode->i_lock, so this is the perfect time to update the
 	 * inode->i_fsnotify_mask
 	 */
-	fsnotify_recalc_inode_mask_locked(inode);
-
+	inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
 	spin_unlock(&inode->i_lock);
 }
 
@@ -85,30 +69,19 @@ void fsnotify_destroy_inode_mark(struct fsnotify_mark *mark)
  */
 void fsnotify_clear_marks_by_inode(struct inode *inode)
 {
-	struct fsnotify_mark *mark, *lmark;
+	struct fsnotify_mark *mark;
 	struct hlist_node *n;
 	LIST_HEAD(free_list);
 
 	spin_lock(&inode->i_lock);
-	hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, i.i_list) {
-		list_add(&mark->i.free_i_list, &free_list);
-		hlist_del_init_rcu(&mark->i.i_list);
+	hlist_for_each_entry_safe(mark, n, &inode->i_fsnotify_marks, obj_list) {
+		list_add(&mark->free_list, &free_list);
+		hlist_del_init_rcu(&mark->obj_list);
 		fsnotify_get_mark(mark);
 	}
 	spin_unlock(&inode->i_lock);
 
-	list_for_each_entry_safe(mark, lmark, &free_list, i.free_i_list) {
-		struct fsnotify_group *group;
-
-		spin_lock(&mark->lock);
-		fsnotify_get_group(mark->group);
-		group = mark->group;
-		spin_unlock(&mark->lock);
-
-		fsnotify_destroy_mark(mark, group);
-		fsnotify_put_mark(mark);
-		fsnotify_put_group(group);
-	}
+	fsnotify_destroy_marks(&free_list);
 }
 
 /*
@@ -119,27 +92,6 @@ void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group)
 	fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_INODE);
 }
 
-/*
- * given a group and inode, find the mark associated with that combination.
- * if found take a reference to that mark and return it, else return NULL
- */
-static struct fsnotify_mark *fsnotify_find_inode_mark_locked(
-		struct fsnotify_group *group,
-		struct inode *inode)
-{
-	struct fsnotify_mark *mark;
-
-	assert_spin_locked(&inode->i_lock);
-
-	hlist_for_each_entry(mark, &inode->i_fsnotify_marks, i.i_list) {
-		if (mark->group == group) {
-			fsnotify_get_mark(mark);
-			return mark;
-		}
-	}
-	return NULL;
-}
-
 /*
  * given a group and inode, find the mark associated with that combination.
  * if found take a reference to that mark and return it, else return NULL
@@ -150,7 +102,7 @@ struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group,
 	struct fsnotify_mark *mark;
 
 	spin_lock(&inode->i_lock);
-	mark = fsnotify_find_inode_mark_locked(group, inode);
+	mark = fsnotify_find_mark(&inode->i_fsnotify_marks, group);
 	spin_unlock(&inode->i_lock);
 
 	return mark;
@@ -168,10 +120,10 @@ void fsnotify_set_inode_mark_mask_locked(struct fsnotify_mark *mark,
 	assert_spin_locked(&mark->lock);
 
 	if (mask &&
-	    mark->i.inode &&
+	    mark->inode &&
 	    !(mark->flags & FSNOTIFY_MARK_FLAG_OBJECT_PINNED)) {
 		mark->flags |= FSNOTIFY_MARK_FLAG_OBJECT_PINNED;
-		inode = igrab(mark->i.inode);
+		inode = igrab(mark->inode);
 		/*
 		 * we shouldn't be able to get here if the inode wasn't
 		 * already safely held in memory.  But bug in case it
@@ -192,9 +144,7 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 			    struct fsnotify_group *group, struct inode *inode,
 			    int allow_dups)
 {
-	struct fsnotify_mark *lmark, *last = NULL;
-	int ret = 0;
-	int cmp;
+	int ret;
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_INODE;
 
@@ -202,37 +152,10 @@ int fsnotify_add_inode_mark(struct fsnotify_mark *mark,
 	assert_spin_locked(&mark->lock);
 
 	spin_lock(&inode->i_lock);
-
-	mark->i.inode = inode;
-
-	/* is mark the first mark? */
-	if (hlist_empty(&inode->i_fsnotify_marks)) {
-		hlist_add_head_rcu(&mark->i.i_list, &inode->i_fsnotify_marks);
-		goto out;
-	}
-
-	/* should mark be in the middle of the current list? */
-	hlist_for_each_entry(lmark, &inode->i_fsnotify_marks, i.i_list) {
-		last = lmark;
-
-		if ((lmark->group == group) && !allow_dups) {
-			ret = -EEXIST;
-			goto out;
-		}
-
-		cmp = fsnotify_compare_groups(lmark->group, mark->group);
-		if (cmp < 0)
-			continue;
-
-		hlist_add_before_rcu(&mark->i.i_list, &lmark->i.i_list);
-		goto out;
-	}
-
-	BUG_ON(last == NULL);
-	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_behind_rcu(&mark->i.i_list, &last->i.i_list);
-out:
-	fsnotify_recalc_inode_mask_locked(inode);
+	mark->inode = inode;
+	ret = fsnotify_add_mark_list(&inode->i_fsnotify_marks, mark,
+				     allow_dups);
+	inode->i_fsnotify_mask = fsnotify_recalc_mask(&inode->i_fsnotify_marks);
 	spin_unlock(&inode->i_lock);
 
 	return ret;
diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c
index 7d888d77d59a..2cd900c2c737 100644
--- a/fs/notify/inotify/inotify_fsnotify.c
+++ b/fs/notify/inotify/inotify_fsnotify.c
@@ -156,7 +156,7 @@ static int idr_callback(int id, void *p, void *data)
 	 */
 	if (fsn_mark)
 		printk(KERN_WARNING "fsn_mark->group=%p inode=%p wd=%d\n",
-			fsn_mark->group, fsn_mark->i.inode, i_mark->wd);
+			fsn_mark->group, fsn_mark->inode, i_mark->wd);
 	return 0;
 }
 
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c
index 283aa312d745..450648697433 100644
--- a/fs/notify/inotify/inotify_user.c
+++ b/fs/notify/inotify/inotify_user.c
@@ -433,7 +433,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	if (wd == -1) {
 		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
 			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
-			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
+			i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
 		goto out;
 	}
 
@@ -442,7 +442,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	if (unlikely(!found_i_mark)) {
 		WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
 			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
-			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
+			i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
 		goto out;
 	}
 
@@ -456,9 +456,9 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 			"mark->inode=%p found_i_mark=%p found_i_mark->wd=%d "
 			"found_i_mark->group=%p found_i_mark->inode=%p\n",
 			__func__, i_mark, i_mark->wd, i_mark->fsn_mark.group,
-			i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd,
+			i_mark->fsn_mark.inode, found_i_mark, found_i_mark->wd,
 			found_i_mark->fsn_mark.group,
-			found_i_mark->fsn_mark.i.inode);
+			found_i_mark->fsn_mark.inode);
 		goto out;
 	}
 
@@ -470,7 +470,7 @@ static void inotify_remove_from_idr(struct fsnotify_group *group,
 	if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) {
 		printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p"
 			" i_mark->inode=%p\n", __func__, i_mark, i_mark->wd,
-			i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode);
+			i_mark->fsn_mark.group, i_mark->fsn_mark.inode);
 		/* we can't really recover with bad ref cnting.. */
 		BUG();
 	}
diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 34c38fabf514..3942d5c9eb8d 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -110,6 +110,17 @@ void fsnotify_put_mark(struct fsnotify_mark *mark)
 	}
 }
 
+/* Calculate mask of events for a list of marks */
+u32 fsnotify_recalc_mask(struct hlist_head *head)
+{
+	u32 new_mask = 0;
+	struct fsnotify_mark *mark;
+
+	hlist_for_each_entry(mark, head, obj_list)
+		new_mask |= mark->mask;
+	return new_mask;
+}
+
 /*
  * Any time a mark is getting freed we end up here.
  * The caller had better be holding a reference to this mark so we don't actually
@@ -133,7 +144,7 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 	mark->flags &= ~FSNOTIFY_MARK_FLAG_ALIVE;
 
 	if (mark->flags & FSNOTIFY_MARK_FLAG_INODE) {
-		inode = mark->i.inode;
+		inode = mark->inode;
 		fsnotify_destroy_inode_mark(mark);
 	} else if (mark->flags & FSNOTIFY_MARK_FLAG_VFSMOUNT)
 		fsnotify_destroy_vfsmount_mark(mark);
@@ -192,6 +203,27 @@ void fsnotify_destroy_mark(struct fsnotify_mark *mark,
 	mutex_unlock(&group->mark_mutex);
 }
 
+/*
+ * Destroy all marks in the given list. The marks must be already detached from
+ * the original inode / vfsmount.
+ */
+void fsnotify_destroy_marks(struct list_head *to_free)
+{
+	struct fsnotify_mark *mark, *lmark;
+	struct fsnotify_group *group;
+
+	list_for_each_entry_safe(mark, lmark, to_free, free_list) {
+		spin_lock(&mark->lock);
+		fsnotify_get_group(mark->group);
+		group = mark->group;
+		spin_unlock(&mark->lock);
+
+		fsnotify_destroy_mark(mark, group);
+		fsnotify_put_mark(mark);
+		fsnotify_put_group(group);
+	}
+}
+
 void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask)
 {
 	assert_spin_locked(&mark->lock);
@@ -245,6 +277,39 @@ int fsnotify_compare_groups(struct fsnotify_group *a, struct fsnotify_group *b)
 	return -1;
 }
 
+/* Add mark into proper place in given list of marks */
+int fsnotify_add_mark_list(struct hlist_head *head, struct fsnotify_mark *mark,
+			   int allow_dups)
+{
+	struct fsnotify_mark *lmark, *last = NULL;
+	int cmp;
+
+	/* is mark the first mark? */
+	if (hlist_empty(head)) {
+		hlist_add_head_rcu(&mark->obj_list, head);
+		return 0;
+	}
+
+	/* should mark be in the middle of the current list? */
+	hlist_for_each_entry(lmark, head, obj_list) {
+		last = lmark;
+
+		if ((lmark->group == mark->group) && !allow_dups)
+			return -EEXIST;
+
+		cmp = fsnotify_compare_groups(lmark->group, mark->group);
+		if (cmp >= 0) {
+			hlist_add_before_rcu(&mark->obj_list, &lmark->obj_list);
+			return 0;
+		}
+	}
+
+	BUG_ON(last == NULL);
+	/* mark should be the last entry.  last is the current last entry */
+	hlist_add_behind_rcu(&mark->obj_list, &last->obj_list);
+	return 0;
+}
+
 /*
  * Attach an initialized mark to a given group and fs object.
  * These marks may be used for the fsnotify backend to determine which
@@ -322,6 +387,24 @@ int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group,
 	return ret;
 }
 
+/*
+ * Given a list of marks, find the mark associated with given group. If found
+ * take a reference to that mark and return it, else return NULL.
+ */
+struct fsnotify_mark *fsnotify_find_mark(struct hlist_head *head,
+					 struct fsnotify_group *group)
+{
+	struct fsnotify_mark *mark;
+
+	hlist_for_each_entry(mark, head, obj_list) {
+		if (mark->group == group) {
+			fsnotify_get_mark(mark);
+			return mark;
+		}
+	}
+	return NULL;
+}
+
 /*
  * clear any marks in a group in which mark->flags & flags is true
  */
@@ -352,8 +435,8 @@ void fsnotify_clear_marks_by_group(struct fsnotify_group *group)
 void fsnotify_duplicate_mark(struct fsnotify_mark *new, struct fsnotify_mark *old)
 {
 	assert_spin_locked(&old->lock);
-	new->i.inode = old->i.inode;
-	new->m.mnt = old->m.mnt;
+	new->inode = old->inode;
+	new->mnt = old->mnt;
 	if (old->group)
 		fsnotify_get_group(old->group);
 	new->group = old->group;
diff --git a/fs/notify/vfsmount_mark.c b/fs/notify/vfsmount_mark.c
index faefa72a11eb..326b148e623c 100644
--- a/fs/notify/vfsmount_mark.c
+++ b/fs/notify/vfsmount_mark.c
@@ -32,31 +32,20 @@
 
 void fsnotify_clear_marks_by_mount(struct vfsmount *mnt)
 {
-	struct fsnotify_mark *mark, *lmark;
+	struct fsnotify_mark *mark;
 	struct hlist_node *n;
 	struct mount *m = real_mount(mnt);
 	LIST_HEAD(free_list);
 
 	spin_lock(&mnt->mnt_root->d_lock);
-	hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, m.m_list) {
-		list_add(&mark->m.free_m_list, &free_list);
-		hlist_del_init_rcu(&mark->m.m_list);
+	hlist_for_each_entry_safe(mark, n, &m->mnt_fsnotify_marks, obj_list) {
+		list_add(&mark->free_list, &free_list);
+		hlist_del_init_rcu(&mark->obj_list);
 		fsnotify_get_mark(mark);
 	}
 	spin_unlock(&mnt->mnt_root->d_lock);
 
-	list_for_each_entry_safe(mark, lmark, &free_list, m.free_m_list) {
-		struct fsnotify_group *group;
-
-		spin_lock(&mark->lock);
-		fsnotify_get_group(mark->group);
-		group = mark->group;
-		spin_unlock(&mark->lock);
-
-		fsnotify_destroy_mark(mark, group);
-		fsnotify_put_mark(mark);
-		fsnotify_put_group(group);
-	}
+	fsnotify_destroy_marks(&free_list);
 }
 
 void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
@@ -64,67 +53,36 @@ void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group)
 	fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_MARK_FLAG_VFSMOUNT);
 }
 
-/*
- * Recalculate the mask of events relevant to a given vfsmount locked.
- */
-static void fsnotify_recalc_vfsmount_mask_locked(struct vfsmount *mnt)
-{
-	struct mount *m = real_mount(mnt);
-	struct fsnotify_mark *mark;
-	__u32 new_mask = 0;
-
-	assert_spin_locked(&mnt->mnt_root->d_lock);
-
-	hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list)
-		new_mask |= mark->mask;
-	m->mnt_fsnotify_mask = new_mask;
-}
-
 /*
  * Recalculate the mnt->mnt_fsnotify_mask, or the mask of all FS_* event types
  * any notifier is interested in hearing for this mount point
  */
 void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt)
 {
+	struct mount *m = real_mount(mnt);
+
 	spin_lock(&mnt->mnt_root->d_lock);
-	fsnotify_recalc_vfsmount_mask_locked(mnt);
+	m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
 	spin_unlock(&mnt->mnt_root->d_lock);
 }
 
 void fsnotify_destroy_vfsmount_mark(struct fsnotify_mark *mark)
 {
-	struct vfsmount *mnt = mark->m.mnt;
+	struct vfsmount *mnt = mark->mnt;
+	struct mount *m = real_mount(mnt);
 
 	BUG_ON(!mutex_is_locked(&mark->group->mark_mutex));
 	assert_spin_locked(&mark->lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
 
-	hlist_del_init_rcu(&mark->m.m_list);
-	mark->m.mnt = NULL;
-
-	fsnotify_recalc_vfsmount_mask_locked(mnt);
+	hlist_del_init_rcu(&mark->obj_list);
+	mark->mnt = NULL;
 
+	m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
 	spin_unlock(&mnt->mnt_root->d_lock);
 }
 
-static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_group *group,
-								struct vfsmount *mnt)
-{
-	struct mount *m = real_mount(mnt);
-	struct fsnotify_mark *mark;
-
-	assert_spin_locked(&mnt->mnt_root->d_lock);
-
-	hlist_for_each_entry(mark, &m->mnt_fsnotify_marks, m.m_list) {
-		if (mark->group == group) {
-			fsnotify_get_mark(mark);
-			return mark;
-		}
-	}
-	return NULL;
-}
-
 /*
  * given a group and vfsmount, find the mark associated with that combination.
  * if found take a reference to that mark and return it, else return NULL
@@ -132,10 +90,11 @@ static struct fsnotify_mark *fsnotify_find_vfsmount_mark_locked(struct fsnotify_
 struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group,
 						  struct vfsmount *mnt)
 {
+	struct mount *m = real_mount(mnt);
 	struct fsnotify_mark *mark;
 
 	spin_lock(&mnt->mnt_root->d_lock);
-	mark = fsnotify_find_vfsmount_mark_locked(group, mnt);
+	mark = fsnotify_find_mark(&m->mnt_fsnotify_marks, group);
 	spin_unlock(&mnt->mnt_root->d_lock);
 
 	return mark;
@@ -151,9 +110,7 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 			       int allow_dups)
 {
 	struct mount *m = real_mount(mnt);
-	struct fsnotify_mark *lmark, *last = NULL;
-	int ret = 0;
-	int cmp;
+	int ret;
 
 	mark->flags |= FSNOTIFY_MARK_FLAG_VFSMOUNT;
 
@@ -161,37 +118,9 @@ int fsnotify_add_vfsmount_mark(struct fsnotify_mark *mark,
 	assert_spin_locked(&mark->lock);
 
 	spin_lock(&mnt->mnt_root->d_lock);
-
-	mark->m.mnt = mnt;
-
-	/* is mark the first mark? */
-	if (hlist_empty(&m->mnt_fsnotify_marks)) {
-		hlist_add_head_rcu(&mark->m.m_list, &m->mnt_fsnotify_marks);
-		goto out;
-	}
-
-	/* should mark be in the middle of the current list? */
-	hlist_for_each_entry(lmark, &m->mnt_fsnotify_marks, m.m_list) {
-		last = lmark;
-
-		if ((lmark->group == group) && !allow_dups) {
-			ret = -EEXIST;
-			goto out;
-		}
-
-		cmp = fsnotify_compare_groups(lmark->group, mark->group);
-		if (cmp < 0)
-			continue;
-
-		hlist_add_before_rcu(&mark->m.m_list, &lmark->m.m_list);
-		goto out;
-	}
-
-	BUG_ON(last == NULL);
-	/* mark should be the last entry.  last is the current last entry */
-	hlist_add_behind_rcu(&mark->m.m_list, &last->m.m_list);
-out:
-	fsnotify_recalc_vfsmount_mask_locked(mnt);
+	mark->mnt = mnt;
+	ret = fsnotify_add_mark_list(&m->mnt_fsnotify_marks, mark, allow_dups);
+	m->mnt_fsnotify_mask = fsnotify_recalc_mask(&m->mnt_fsnotify_marks);
 	spin_unlock(&mnt->mnt_root->d_lock);
 
 	return ret;
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index ca060d7c4fa6..442847a02b8f 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -196,24 +196,6 @@ struct fsnotify_group {
 #define FSNOTIFY_EVENT_PATH	1
 #define FSNOTIFY_EVENT_INODE	2
 
-/*
- * Inode specific fields in an fsnotify_mark
- */
-struct fsnotify_inode_mark {
-	struct inode *inode;		/* inode this mark is associated with */
-	struct hlist_node i_list;	/* list of marks by inode->i_fsnotify_marks */
-	struct list_head free_i_list;	/* tmp list used when freeing this mark */
-};
-
-/*
- * Mount point specific fields in an fsnotify_mark
- */
-struct fsnotify_vfsmount_mark {
-	struct vfsmount *mnt;		/* vfsmount this mark is associated with */
-	struct hlist_node m_list;	/* list of marks by inode->i_fsnotify_marks */
-	struct list_head free_m_list;	/* tmp list used when freeing this mark */
-};
-
 /*
  * a mark is simply an object attached to an in core inode which allows an
  * fsnotify listener to indicate they are either no longer interested in events
@@ -232,9 +214,11 @@ struct fsnotify_mark {
 	struct fsnotify_group *group;	/* group this mark is for */
 	struct list_head g_list;	/* list of marks by group->i_fsnotify_marks */
 	spinlock_t lock;		/* protect group and inode */
+	struct hlist_node obj_list;	/* list of marks for inode / vfsmount */
+	struct list_head free_list;	/* tmp list used when freeing this mark */
 	union {
-		struct fsnotify_inode_mark i;
-		struct fsnotify_vfsmount_mark m;
+		struct inode *inode;	/* inode this mark is associated with */
+		struct vfsmount *mnt;	/* vfsmount this mark is associated with */
 	};
 	__u32 ignored_mask;		/* events types to ignore */
 #define FSNOTIFY_MARK_FLAG_INODE		0x01
diff --git a/kernel/audit_tree.c b/kernel/audit_tree.c
index 80f29e015570..2e0c97427b33 100644
--- a/kernel/audit_tree.c
+++ b/kernel/audit_tree.c
@@ -174,9 +174,9 @@ static void insert_hash(struct audit_chunk *chunk)
 	struct fsnotify_mark *entry = &chunk->mark;
 	struct list_head *list;
 
-	if (!entry->i.inode)
+	if (!entry->inode)
 		return;
-	list = chunk_hash(entry->i.inode);
+	list = chunk_hash(entry->inode);
 	list_add_rcu(&chunk->hash, list);
 }
 
@@ -188,7 +188,7 @@ struct audit_chunk *audit_tree_lookup(const struct inode *inode)
 
 	list_for_each_entry_rcu(p, list, hash) {
 		/* mark.inode may have gone NULL, but who cares? */
-		if (p->mark.i.inode == inode) {
+		if (p->mark.inode == inode) {
 			atomic_long_inc(&p->refs);
 			return p;
 		}
@@ -231,7 +231,7 @@ static void untag_chunk(struct node *p)
 		new = alloc_chunk(size);
 
 	spin_lock(&entry->lock);
-	if (chunk->dead || !entry->i.inode) {
+	if (chunk->dead || !entry->inode) {
 		spin_unlock(&entry->lock);
 		if (new)
 			free_chunk(new);
@@ -258,7 +258,7 @@ static void untag_chunk(struct node *p)
 		goto Fallback;
 
 	fsnotify_duplicate_mark(&new->mark, entry);
-	if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.i.inode, NULL, 1)) {
+	if (fsnotify_add_mark(&new->mark, new->mark.group, new->mark.inode, NULL, 1)) {
 		fsnotify_put_mark(&new->mark);
 		goto Fallback;
 	}
@@ -386,7 +386,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	chunk_entry = &chunk->mark;
 
 	spin_lock(&old_entry->lock);
-	if (!old_entry->i.inode) {
+	if (!old_entry->inode) {
 		/* old_entry is being shot, lets just lie */
 		spin_unlock(&old_entry->lock);
 		fsnotify_put_mark(old_entry);
@@ -395,7 +395,7 @@ static int tag_chunk(struct inode *inode, struct audit_tree *tree)
 	}
 
 	fsnotify_duplicate_mark(chunk_entry, old_entry);
-	if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->i.inode, NULL, 1)) {
+	if (fsnotify_add_mark(chunk_entry, chunk_entry->group, chunk_entry->inode, NULL, 1)) {
 		spin_unlock(&old_entry->lock);
 		fsnotify_put_mark(chunk_entry);
 		fsnotify_put_mark(old_entry);
@@ -611,7 +611,7 @@ void audit_trim_trees(void)
 		list_for_each_entry(node, &tree->chunks, list) {
 			struct audit_chunk *chunk = find_chunk(node);
 			/* this could be NULL if the watch is dying else where... */
-			struct inode *inode = chunk->mark.i.inode;
+			struct inode *inode = chunk->mark.inode;
 			node->index |= 1U<<31;
 			if (iterate_mounts(compare_root, inode, root_mnt))
 				node->index &= ~(1U<<31);
-- 
cgit v1.2.3


From 37d469e7673a663cbf38360beb1eaa3224c9d272 Mon Sep 17 00:00:00 2001
From: Jan Kara <jack@suse.cz>
Date: Fri, 12 Dec 2014 16:58:39 -0800
Subject: fsnotify: remove destroy_list from fsnotify_mark

destroy_list is used to track marks which still need waiting for srcu
period end before they can be freed.  However by the time mark is added to
destroy_list it isn't in group's list of marks anymore and thus we can
reuse fsnotify_mark->g_list for queueing into destroy_list.  This saves
two pointers for each fsnotify_mark.

Signed-off-by: Jan Kara <jack@suse.cz>
Cc: Eric Paris <eparis@redhat.com>
Cc: Heinrich Schuchardt <xypron.glpk@gmx.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/notify/mark.c                 | 8 ++++----
 include/linux/fsnotify_backend.h | 7 +++++--
 2 files changed, 9 insertions(+), 6 deletions(-)

(limited to 'include/linux')

diff --git a/fs/notify/mark.c b/fs/notify/mark.c
index 3942d5c9eb8d..92e48c70f0f0 100644
--- a/fs/notify/mark.c
+++ b/fs/notify/mark.c
@@ -161,7 +161,7 @@ void fsnotify_destroy_mark_locked(struct fsnotify_mark *mark,
 	mutex_unlock(&group->mark_mutex);
 
 	spin_lock(&destroy_lock);
-	list_add(&mark->destroy_list, &destroy_list);
+	list_add(&mark->g_list, &destroy_list);
 	spin_unlock(&destroy_lock);
 	wake_up(&destroy_waitq);
 	/*
@@ -370,7 +370,7 @@ err:
 	spin_unlock(&mark->lock);
 
 	spin_lock(&destroy_lock);
-	list_add(&mark->destroy_list, &destroy_list);
+	list_add(&mark->g_list, &destroy_list);
 	spin_unlock(&destroy_lock);
 	wake_up(&destroy_waitq);
 
@@ -469,8 +469,8 @@ static int fsnotify_mark_destroy(void *ignored)
 
 		synchronize_srcu(&fsnotify_mark_srcu);
 
-		list_for_each_entry_safe(mark, next, &private_destroy_list, destroy_list) {
-			list_del_init(&mark->destroy_list);
+		list_for_each_entry_safe(mark, next, &private_destroy_list, g_list) {
+			list_del_init(&mark->g_list);
 			fsnotify_put_mark(mark);
 		}
 
diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h
index 442847a02b8f..0f313f93c586 100644
--- a/include/linux/fsnotify_backend.h
+++ b/include/linux/fsnotify_backend.h
@@ -212,7 +212,11 @@ struct fsnotify_mark {
 	 * in kernel that found and may be using this mark. */
 	atomic_t refcnt;		/* active things looking at this mark */
 	struct fsnotify_group *group;	/* group this mark is for */
-	struct list_head g_list;	/* list of marks by group->i_fsnotify_marks */
+	struct list_head g_list;	/* list of marks by group->i_fsnotify_marks
+					 * Also reused for queueing mark into
+					 * destroy_list when it's waiting for
+					 * the end of SRCU period before it can
+					 * be freed */
 	spinlock_t lock;		/* protect group and inode */
 	struct hlist_node obj_list;	/* list of marks for inode / vfsmount */
 	struct list_head free_list;	/* tmp list used when freeing this mark */
@@ -227,7 +231,6 @@ struct fsnotify_mark {
 #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY	0x08
 #define FSNOTIFY_MARK_FLAG_ALIVE		0x10
 	unsigned int flags;		/* vfsmount or inode mark? */
-	struct list_head destroy_list;
 	void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */
 };
 
-- 
cgit v1.2.3


From 6c51ec4d18d24b2ffa69de5d60bebaeb4f8e2398 Mon Sep 17 00:00:00 2001
From: Christoph Lameter <cl@linux.com>
Date: Fri, 12 Dec 2014 16:58:42 -0800
Subject: percpu: remove __get_cpu_var and __raw_get_cpu_var macros

No user is left in the kernel source tree.  Therefore we can drop the
definitions.

This is the final merge of the transition away from __get_cpu_var.  After
this patch the kernel will not build if anyone uses __get_cpu_var.

Signed-off-by: Christoph Lameter <cl@linux.com>
Cc: Tejun Heo <tj@kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/percpu-defs.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h
index 420032d41d27..57f3a1c550dc 100644
--- a/include/linux/percpu-defs.h
+++ b/include/linux/percpu-defs.h
@@ -254,8 +254,6 @@ do {									\
 #endif	/* CONFIG_SMP */
 
 #define per_cpu(var, cpu)	(*per_cpu_ptr(&(var), cpu))
-#define __raw_get_cpu_var(var)	(*raw_cpu_ptr(&(var)))
-#define __get_cpu_var(var)	(*this_cpu_ptr(&(var)))
 
 /*
  * Must be an lvalue. Since @var must be a simple identifier,
-- 
cgit v1.2.3


From e4a0d3e720e7e508749c1439b5ba3aff56c92976 Mon Sep 17 00:00:00 2001
From: Pavel Emelyanov <xemul@parallels.com>
Date: Thu, 18 Sep 2014 19:56:17 +0400
Subject: aio: Make it possible to remap aio ring

There are actually two issues this patch addresses. Let me start with
the one I tried to solve in the beginning.

So, in the checkpoint-restore project (criu) we try to dump tasks'
state and restore one back exactly as it was. One of the tasks' state
bits is rings set up with io_setup() call. There's (almost) no problems
in dumping them, there's a problem restoring them -- if I dump a task
with aio ring originally mapped at address A, I want to restore one
back at exactly the same address A. Unfortunately, the io_setup() does
not allow for that -- it mmaps the ring at whatever place mm finds
appropriate (it calls do_mmap_pgoff() with zero address and without
the MAP_FIXED flag).

To make restore possible I'm going to mremap() the freshly created ring
into the address A (under which it was seen before dump). The problem is
that the ring's virtual address is passed back to the user-space as the
context ID and this ID is then used as search key by all the other io_foo()
calls. Reworking this ID to be just some integer doesn't seem to work, as
this value is already used by libaio as a pointer using which this library
accesses memory for aio meta-data.

So, to make restore work we need to make sure that

a) ring is mapped at desired virtual address
b) kioctx->user_id matches this value

Having said that, the patch makes mremap() on aio region update the
kioctx's user_id and mmap_base values.

Here appears the 2nd issue I mentioned in the beginning of this mail.
If (regardless of the C/R dances I do) someone creates an io context
with io_setup(), then mremap()-s the ring and then destroys the context,
the kill_ioctx() routine will call munmap() on wrong (old) address.
This will result in a) aio ring remaining in memory and b) some other
vma get unexpectedly unmapped.

What do you think?

Signed-off-by: Pavel Emelyanov <xemul@parallels.com>
Acked-by: Dmitry Monakhov <dmonakhov@openvz.org>
Signed-off-by: Benjamin LaHaise <bcrl@kvack.org>
---
 fs/aio.c           | 25 +++++++++++++++++++++++++
 include/linux/fs.h |  1 +
 mm/mremap.c        |  3 ++-
 3 files changed, 28 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/fs/aio.c b/fs/aio.c
index 14b93159ef83..bfab55607a4d 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -286,12 +286,37 @@ static void aio_free_ring(struct kioctx *ctx)
 
 static int aio_ring_mmap(struct file *file, struct vm_area_struct *vma)
 {
+	vma->vm_flags |= VM_DONTEXPAND;
 	vma->vm_ops = &generic_file_vm_ops;
 	return 0;
 }
 
+static void aio_ring_remap(struct file *file, struct vm_area_struct *vma)
+{
+	struct mm_struct *mm = vma->vm_mm;
+	struct kioctx_table *table;
+	int i;
+
+	spin_lock(&mm->ioctx_lock);
+	rcu_read_lock();
+	table = rcu_dereference(mm->ioctx_table);
+	for (i = 0; i < table->nr; i++) {
+		struct kioctx *ctx;
+
+		ctx = table->table[i];
+		if (ctx && ctx->aio_ring_file == file) {
+			ctx->user_id = ctx->mmap_base = vma->vm_start;
+			break;
+		}
+	}
+
+	rcu_read_unlock();
+	spin_unlock(&mm->ioctx_lock);
+}
+
 static const struct file_operations aio_ring_fops = {
 	.mmap = aio_ring_mmap,
+	.mremap = aio_ring_remap,
 };
 
 #if IS_ENABLED(CONFIG_MIGRATION)
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 9ab779e8a63c..85f378c55c26 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1497,6 +1497,7 @@ struct file_operations {
 	long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
 	long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
 	int (*mmap) (struct file *, struct vm_area_struct *);
+	void (*mremap)(struct file *, struct vm_area_struct *);
 	int (*open) (struct inode *, struct file *);
 	int (*flush) (struct file *, fl_owner_t id);
 	int (*release) (struct inode *, struct file *);
diff --git a/mm/mremap.c b/mm/mremap.c
index b147f66f4c40..c855922497a3 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -288,7 +288,8 @@ static unsigned long move_vma(struct vm_area_struct *vma,
 		old_len = new_len;
 		old_addr = new_addr;
 		new_addr = -ENOMEM;
-	}
+	} else if (vma->vm_file && vma->vm_file->f_op->mremap)
+		vma->vm_file->f_op->mremap(vma->vm_file, new_vma);
 
 	/* Conceal VM_ACCOUNT so old reservation is not undone */
 	if (vm_flags & VM_ACCOUNT) {
-- 
cgit v1.2.3


From d71de9ec6ba806104439d3a669befda84757b5af Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Sun, 14 Dec 2014 16:55:44 +0200
Subject: virtio: core support for config generation

virtio 1.0 spec says:

Drivers MUST NOT assume reads from fields greater than 32 bits wide are
atomic, nor are reads from multiple fields: drivers SHOULD read device
configuration space fields like so:
	u32 before, after;
	do {
		before = get_config_generation(device);
		// read config entry/entries.
		after = get_config_generation(device);
	} while (after != before);

Do exactly this, for transports that support it.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 include/linux/virtio_config.h | 32 ++++++++++++++++++++++++++++----
 1 file changed, 28 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h
index a61cd37f088c..ca3ed78e5ec7 100644
--- a/include/linux/virtio_config.h
+++ b/include/linux/virtio_config.h
@@ -19,6 +19,9 @@
  *	offset: the offset of the configuration field
  *	buf: the buffer to read the field value from.
  *	len: the length of the buffer
+ * @generation: config generation counter
+ *	vdev: the virtio_device
+ *	Returns the config generation counter
  * @get_status: read the status byte
  *	vdev: the virtio_device
  *	Returns the status byte
@@ -60,6 +63,7 @@ struct virtio_config_ops {
 		    void *buf, unsigned len);
 	void (*set)(struct virtio_device *vdev, unsigned offset,
 		    const void *buf, unsigned len);
+	u32 (*generation)(struct virtio_device *vdev);
 	u8 (*get_status)(struct virtio_device *vdev);
 	void (*set_status)(struct virtio_device *vdev, u8 status);
 	void (*reset)(struct virtio_device *vdev);
@@ -301,14 +305,33 @@ static inline u8 virtio_cread8(struct virtio_device *vdev, unsigned int offset)
 	return ret;
 }
 
+/* Read @count fields, @bytes each. */
+static inline void __virtio_cread_many(struct virtio_device *vdev,
+				       unsigned int offset,
+				       void *buf, size_t count, size_t bytes)
+{
+	u32 old, gen = vdev->config->generation ?
+		vdev->config->generation(vdev) : 0;
+	int i;
+
+	do {
+		old = gen;
+
+		for (i = 0; i < count; i++)
+			vdev->config->get(vdev, offset + bytes * i,
+					  buf + i * bytes, bytes);
+
+		gen = vdev->config->generation ?
+			vdev->config->generation(vdev) : 0;
+	} while (gen != old);
+}
+
+
 static inline void virtio_cread_bytes(struct virtio_device *vdev,
 				      unsigned int offset,
 				      void *buf, size_t len)
 {
-	int i;
-
-	for (i = 0; i < len; i++)
-		vdev->config->get(vdev, offset + i, buf + i, 1);
+	__virtio_cread_many(vdev, offset, buf, len, 1);
 }
 
 static inline void virtio_cwrite8(struct virtio_device *vdev,
@@ -352,6 +375,7 @@ static inline u64 virtio_cread64(struct virtio_device *vdev,
 {
 	u64 ret;
 	vdev->config->get(vdev, offset, &ret, sizeof(ret));
+	__virtio_cread_many(vdev, offset, &ret, 1, sizeof(ret));
 	return virtio64_to_cpu(vdev, (__force __virtio64)ret);
 }
 
-- 
cgit v1.2.3


From 5f893b2639b21ffe6834b1aebba392c37d2b83f9 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 12 Dec 2014 20:05:10 -0500
Subject: tracing: Move enabling tracepoints to just after rcu_init()

Enabling tracepoints at boot up can be very useful. The tracepoint
can be initialized right after RCU has been. There's no need to
wait for the early_initcall() to be called. That's too late for some
things that can use tracepoints for debugging. Move the logic to
enable tracepoints out of the initcalls and into init/main.c to
right after rcu_init().

This also allows trace_printk() to be used early too.

Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1412121539300.16494@nanos
Link: http://lkml.kernel.org/r/20141214164104.307127356@goodmis.org

Reviewed-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com>
Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 include/linux/ftrace.h        |  6 ++++++
 init/main.c                   |  4 ++++
 kernel/trace/trace.c          |  8 +++++++-
 kernel/trace/trace.h          | 13 +++++++++++++
 kernel/trace/trace_events.c   | 10 ++++++++--
 kernel/trace/trace_syscalls.c |  7 ++-----
 6 files changed, 40 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index ed501953f0b2..f4bc14b7d444 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -39,6 +39,12 @@
 # define FTRACE_FORCE_LIST_FUNC 0
 #endif
 
+/* Main tracing buffer and events set up */
+#ifdef CONFIG_TRACING
+void trace_init(void);
+#else
+static inline void trace_init(void) { }
+#endif
 
 struct module;
 struct ftrace_hash;
diff --git a/init/main.c b/init/main.c
index 800a0daede7e..70687069f8e2 100644
--- a/init/main.c
+++ b/init/main.c
@@ -577,6 +577,10 @@ asmlinkage __visible void __init start_kernel(void)
 		local_irq_disable();
 	idr_init_cache();
 	rcu_init();
+
+	/* trace_printk() and trace points may be used after this */
+	trace_init();
+
 	context_tracking_init();
 	radix_tree_init();
 	/* init some links before init_ISA_irqs() */
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 4ceb2546c7ef..ec3ca694665f 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -6876,6 +6876,13 @@ out:
 	return ret;
 }
 
+void __init trace_init(void)
+{
+	tracer_alloc_buffers();
+	init_ftrace_syscalls();
+	trace_event_init();	
+}
+
 __init static int clear_boot_tracer(void)
 {
 	/*
@@ -6895,6 +6902,5 @@ __init static int clear_boot_tracer(void)
 	return 0;
 }
 
-early_initcall(tracer_alloc_buffers);
 fs_initcall(tracer_init_debugfs);
 late_initcall(clear_boot_tracer);
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 3255dfb054a0..c138c149d6ef 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1301,4 +1301,17 @@ int perf_ftrace_event_register(struct ftrace_event_call *call,
 #define perf_ftrace_event_register NULL
 #endif
 
+#ifdef CONFIG_FTRACE_SYSCALLS
+void init_ftrace_syscalls(void);
+#else
+static inline void init_ftrace_syscalls(void) { }
+#endif
+
+#ifdef CONFIG_EVENT_TRACING
+void trace_event_init(void);
+#else
+static inline void __init trace_event_init(void) { }
+#endif
+
+
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index f9d0cbe014b7..fd9deb0e03f0 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -2477,8 +2477,14 @@ static __init int event_trace_init(void)
 #endif
 	return 0;
 }
-early_initcall(event_trace_memsetup);
-core_initcall(event_trace_enable);
+
+void __init trace_event_init(void)
+{
+	event_trace_memsetup();
+	init_ftrace_syscalls();
+	event_trace_enable();
+}
+
 fs_initcall(event_trace_init);
 
 #ifdef CONFIG_FTRACE_STARTUP_TEST
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index a72f3d8d813e..ec239771c175 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -514,7 +514,7 @@ unsigned long __init __weak arch_syscall_addr(int nr)
 	return (unsigned long)sys_call_table[nr];
 }
 
-static int __init init_ftrace_syscalls(void)
+void __init init_ftrace_syscalls(void)
 {
 	struct syscall_metadata *meta;
 	unsigned long addr;
@@ -524,7 +524,7 @@ static int __init init_ftrace_syscalls(void)
 				    GFP_KERNEL);
 	if (!syscalls_metadata) {
 		WARN_ON(1);
-		return -ENOMEM;
+		return;
 	}
 
 	for (i = 0; i < NR_syscalls; i++) {
@@ -536,10 +536,7 @@ static int __init init_ftrace_syscalls(void)
 		meta->syscall_nr = i;
 		syscalls_metadata[i] = meta;
 	}
-
-	return 0;
 }
-early_initcall(init_ftrace_syscalls);
 
 #ifdef CONFIG_PERF_EVENTS
 
-- 
cgit v1.2.3


From 0daa2302968c13b657118d6ac92471f8fd2f3f28 Mon Sep 17 00:00:00 2001
From: "Steven Rostedt (Red Hat)" <rostedt@goodmis.org>
Date: Fri, 12 Dec 2014 22:27:10 -0500
Subject: tracing: Add tp_printk cmdline to have tracepoints go to printk()

Add the kernel command line tp_printk option that will have tracepoints
that are active sent to printk() as well as to the trace buffer.

Passing "tp_printk" will activate this. To turn it off, the sysctl
/proc/sys/kernel/tracepoint_printk can have '0' echoed into it. Note,
this only works if the cmdline option is used. Echoing 1 into the sysctl
file without the cmdline option will have no affect.

Note, this is a dangerous option. Having high frequency tracepoints send
their data to printk() can possibly cause a live lock. This is another
reason why this is only active if the command line option is used.

Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1412121539300.16494@nanos

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Tested-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Steven Rostedt <rostedt@goodmis.org>
---
 Documentation/kernel-parameters.txt | 18 ++++++++++++++++++
 include/linux/ftrace.h              |  1 +
 kernel/sysctl.c                     |  7 +++++++
 kernel/trace/trace.c                | 17 +++++++++++++++++
 kernel/trace/trace.h                |  1 +
 kernel/trace/trace_events.c         | 32 ++++++++++++++++++++++++++++++++
 6 files changed, 76 insertions(+)

(limited to 'include/linux')

diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt
index 1d09eb37c562..ae41f1181e9a 100644
--- a/Documentation/kernel-parameters.txt
+++ b/Documentation/kernel-parameters.txt
@@ -3500,6 +3500,24 @@ bytes respectively. Such letter suffixes can also be entirely omitted.
 			See also Documentation/trace/ftrace.txt "trace options"
 			section.
 
+	tp_printk[FTRACE]
+			Have the tracepoints sent to printk as well as the
+			tracing ring buffer. This is useful for early boot up
+			where the system hangs or reboots and does not give the
+			option for reading the tracing buffer or performing a
+			ftrace_dump_on_oops.
+
+			To turn off having tracepoints sent to printk,
+			 echo 0 > /proc/sys/kernel/tracepoint_printk
+			Note, echoing 1 into this file without the
+			tracepoint_printk kernel cmdline option has no effect.
+
+			** CAUTION **
+
+			Having tracepoints sent to printk() and activating high
+			frequency tracepoints such as irq or sched, can cause
+			the system to live lock.
+
 	traceoff_on_warning
 			[FTRACE] enable this option to disable tracing when a
 			warning is hit. This turns off "tracing_on". Tracing can
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index f4bc14b7d444..1da602982cf9 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -879,6 +879,7 @@ static inline int test_tsk_trace_graph(struct task_struct *tsk)
 enum ftrace_dump_mode;
 
 extern enum ftrace_dump_mode ftrace_dump_on_oops;
+extern int tracepoint_printk;
 
 extern void disable_trace_on_warning(void);
 extern int __disable_trace_on_warning;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 4aada6d9fe74..bb50c2187194 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -622,6 +622,13 @@ static struct ctl_table kern_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec,
 	},
+	{
+		.procname	= "tracepoint_printk",
+		.data		= &tracepoint_printk,
+		.maxlen		= sizeof(tracepoint_printk),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
 #endif
 #ifdef CONFIG_KEXEC
 	{
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ec3ca694665f..e890d2d4ec89 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -63,6 +63,10 @@ static bool __read_mostly tracing_selftest_running;
  */
 bool __read_mostly tracing_selftest_disabled;
 
+/* Pipe tracepoints to printk */
+struct trace_iterator *tracepoint_print_iter;
+int tracepoint_printk;
+
 /* For tracers that don't implement custom flags */
 static struct tracer_opt dummy_tracer_opt[] = {
 	{ }
@@ -193,6 +197,13 @@ static int __init set_trace_boot_clock(char *str)
 }
 __setup("trace_clock=", set_trace_boot_clock);
 
+static int __init set_tracepoint_printk(char *str)
+{
+	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
+		tracepoint_printk = 1;
+	return 1;
+}
+__setup("tp_printk", set_tracepoint_printk);
 
 unsigned long long ns2usecs(cycle_t nsec)
 {
@@ -6878,6 +6889,12 @@ out:
 
 void __init trace_init(void)
 {
+	if (tracepoint_printk) {
+		tracepoint_print_iter =
+			kmalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
+		if (WARN_ON(!tracepoint_print_iter))
+			tracepoint_printk = 0;
+	}
 	tracer_alloc_buffers();
 	init_ftrace_syscalls();
 	trace_event_init();	
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index c138c149d6ef..8de48bac1ce2 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -1313,5 +1313,6 @@ void trace_event_init(void);
 static inline void __init trace_event_init(void) { }
 #endif
 
+extern struct trace_iterator *tracepoint_print_iter;
 
 #endif /* _LINUX_KERNEL_TRACE_H */
diff --git a/kernel/trace/trace_events.c b/kernel/trace/trace_events.c
index fd9deb0e03f0..9f7175a3df71 100644
--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -212,8 +212,40 @@ void *ftrace_event_buffer_reserve(struct ftrace_event_buffer *fbuffer,
 }
 EXPORT_SYMBOL_GPL(ftrace_event_buffer_reserve);
 
+static DEFINE_SPINLOCK(tracepoint_iter_lock);
+
+static void output_printk(struct ftrace_event_buffer *fbuffer)
+{
+	struct ftrace_event_call *event_call;
+	struct trace_event *event;
+	unsigned long flags;
+	struct trace_iterator *iter = tracepoint_print_iter;
+
+	if (!iter)
+		return;
+
+	event_call = fbuffer->ftrace_file->event_call;
+	if (!event_call || !event_call->event.funcs ||
+	    !event_call->event.funcs->trace)
+		return;
+
+	event = &fbuffer->ftrace_file->event_call->event;
+
+	spin_lock_irqsave(&tracepoint_iter_lock, flags);
+	trace_seq_init(&iter->seq);
+	iter->ent = fbuffer->entry;
+	event_call->event.funcs->trace(iter, 0, event);
+	trace_seq_putc(&iter->seq, 0);
+	printk("%s", iter->seq.buffer);
+
+	spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
+}
+
 void ftrace_event_buffer_commit(struct ftrace_event_buffer *fbuffer)
 {
+	if (tracepoint_printk)
+		output_printk(fbuffer);
+
 	event_trigger_unlock_commit(fbuffer->ftrace_file, fbuffer->buffer,
 				    fbuffer->event, fbuffer->entry,
 				    fbuffer->flags, fbuffer->pc);
-- 
cgit v1.2.3


From b97a8a90067896f99f0d636dbc2b89a953123fad Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 12 Dec 2014 00:36:06 +0200
Subject: vringh: 64 bit features

Pass u64 everywhere.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vringh.c | 4 ++--
 include/linux/vringh.h | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index 5174ebac288d..ac3fe2757961 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -577,7 +577,7 @@ static inline int xfer_to_user(void *dst, void *src, size_t len)
  * Returns an error if num is invalid: you should check pointers
  * yourself!
  */
-int vringh_init_user(struct vringh *vrh, u32 features,
+int vringh_init_user(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc __user *desc,
 		     struct vring_avail __user *avail,
@@ -836,7 +836,7 @@ static inline int xfer_kern(void *src, void *dst, size_t len)
  *
  * Returns an error if num is invalid.
  */
-int vringh_init_kern(struct vringh *vrh, u32 features,
+int vringh_init_kern(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc *desc,
 		     struct vring_avail *avail,
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index 749cde28728b..f696dd0b6472 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -105,7 +105,7 @@ struct vringh_kiov {
 #define VRINGH_IOV_ALLOCATED 0x8000000
 
 /* Helpers for userspace vrings. */
-int vringh_init_user(struct vringh *vrh, u32 features,
+int vringh_init_user(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc __user *desc,
 		     struct vring_avail __user *avail,
@@ -167,7 +167,7 @@ bool vringh_notify_enable_user(struct vringh *vrh);
 void vringh_notify_disable_user(struct vringh *vrh);
 
 /* Helpers for kernelspace vrings. */
-int vringh_init_kern(struct vringh *vrh, u32 features,
+int vringh_init_kern(struct vringh *vrh, u64 features,
 		     unsigned int num, bool weak_barriers,
 		     struct vring_desc *desc,
 		     struct vring_avail *avail,
-- 
cgit v1.2.3


From b9f7ac8c72894c19bf258a54ecaa708df4ffbe80 Mon Sep 17 00:00:00 2001
From: "Michael S. Tsirkin" <mst@redhat.com>
Date: Fri, 12 Dec 2014 01:10:49 +0200
Subject: vringh: update for virtio 1.0 APIs

When switching everything over to virtio 1.0 memory access APIs,
I missed converting vringh.
Fortunately, it's straight-forward.

Signed-off-by: Michael S. Tsirkin <mst@redhat.com>
---
 drivers/vhost/vringh.c | 121 ++++++++++++++++++++++++++++++-------------------
 include/linux/vringh.h |  33 ++++++++++++++
 2 files changed, 107 insertions(+), 47 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/vhost/vringh.c b/drivers/vhost/vringh.c
index ac3fe2757961..3bb02c60a2f5 100644
--- a/drivers/vhost/vringh.c
+++ b/drivers/vhost/vringh.c
@@ -11,6 +11,7 @@
 #include <linux/uaccess.h>
 #include <linux/slab.h>
 #include <linux/export.h>
+#include <uapi/linux/virtio_config.h>
 
 static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
 {
@@ -28,13 +29,14 @@ static __printf(1,2) __cold void vringh_bad(const char *fmt, ...)
 
 /* Returns vring->num if empty, -ve on error. */
 static inline int __vringh_get_head(const struct vringh *vrh,
-				    int (*getu16)(u16 *val, const u16 *p),
+				    int (*getu16)(const struct vringh *vrh,
+						  u16 *val, const __virtio16 *p),
 				    u16 *last_avail_idx)
 {
 	u16 avail_idx, i, head;
 	int err;
 
-	err = getu16(&avail_idx, &vrh->vring.avail->idx);
+	err = getu16(vrh, &avail_idx, &vrh->vring.avail->idx);
 	if (err) {
 		vringh_bad("Failed to access avail idx at %p",
 			   &vrh->vring.avail->idx);
@@ -49,7 +51,7 @@ static inline int __vringh_get_head(const struct vringh *vrh,
 
 	i = *last_avail_idx & (vrh->vring.num - 1);
 
-	err = getu16(&head, &vrh->vring.avail->ring[i]);
+	err = getu16(vrh, &head, &vrh->vring.avail->ring[i]);
 	if (err) {
 		vringh_bad("Failed to read head: idx %d address %p",
 			   *last_avail_idx, &vrh->vring.avail->ring[i]);
@@ -144,28 +146,32 @@ static inline bool no_range_check(struct vringh *vrh, u64 addr, size_t *len,
 }
 
 /* No reason for this code to be inline. */
-static int move_to_indirect(int *up_next, u16 *i, void *addr,
+static int move_to_indirect(const struct vringh *vrh,
+			    int *up_next, u16 *i, void *addr,
 			    const struct vring_desc *desc,
 			    struct vring_desc **descs, int *desc_max)
 {
+	u32 len;
+
 	/* Indirect tables can't have indirect. */
 	if (*up_next != -1) {
 		vringh_bad("Multilevel indirect %u->%u", *up_next, *i);
 		return -EINVAL;
 	}
 
-	if (unlikely(desc->len % sizeof(struct vring_desc))) {
+	len = vringh32_to_cpu(vrh, desc->len);
+	if (unlikely(len % sizeof(struct vring_desc))) {
 		vringh_bad("Strange indirect len %u", desc->len);
 		return -EINVAL;
 	}
 
 	/* We will check this when we follow it! */
-	if (desc->flags & VRING_DESC_F_NEXT)
-		*up_next = desc->next;
+	if (desc->flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT))
+		*up_next = vringh16_to_cpu(vrh, desc->next);
 	else
 		*up_next = -2;
 	*descs = addr;
-	*desc_max = desc->len / sizeof(struct vring_desc);
+	*desc_max = len / sizeof(struct vring_desc);
 
 	/* Now, start at the first indirect. */
 	*i = 0;
@@ -287,22 +293,25 @@ __vringh_iov(struct vringh *vrh, u16 i,
 		if (unlikely(err))
 			goto fail;
 
-		if (unlikely(desc.flags & VRING_DESC_F_INDIRECT)) {
+		if (unlikely(desc.flags &
+			     cpu_to_vringh16(vrh, VRING_DESC_F_INDIRECT))) {
+			u64 a = vringh64_to_cpu(vrh, desc.addr);
+
 			/* Make sure it's OK, and get offset. */
-			len = desc.len;
-			if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
+			len = vringh32_to_cpu(vrh, desc.len);
+			if (!rcheck(vrh, a, &len, &range, getrange)) {
 				err = -EINVAL;
 				goto fail;
 			}
 
-			if (unlikely(len != desc.len)) {
+			if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
 				slow = true;
 				/* We need to save this range to use offset */
 				slowrange = range;
 			}
 
-			addr = (void *)(long)(desc.addr + range.offset);
-			err = move_to_indirect(&up_next, &i, addr, &desc,
+			addr = (void *)(long)(a + range.offset);
+			err = move_to_indirect(vrh, &up_next, &i, addr, &desc,
 					       &descs, &desc_max);
 			if (err)
 				goto fail;
@@ -315,7 +324,7 @@ __vringh_iov(struct vringh *vrh, u16 i,
 			goto fail;
 		}
 
-		if (desc.flags & VRING_DESC_F_WRITE)
+		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_WRITE))
 			iov = wiov;
 		else {
 			iov = riov;
@@ -336,12 +345,14 @@ __vringh_iov(struct vringh *vrh, u16 i,
 
 	again:
 		/* Make sure it's OK, and get offset. */
-		len = desc.len;
-		if (!rcheck(vrh, desc.addr, &len, &range, getrange)) {
+		len = vringh32_to_cpu(vrh, desc.len);
+		if (!rcheck(vrh, vringh64_to_cpu(vrh, desc.addr), &len, &range,
+			    getrange)) {
 			err = -EINVAL;
 			goto fail;
 		}
-		addr = (void *)(unsigned long)(desc.addr + range.offset);
+		addr = (void *)(unsigned long)(vringh64_to_cpu(vrh, desc.addr) +
+					       range.offset);
 
 		if (unlikely(iov->used == (iov->max_num & ~VRINGH_IOV_ALLOCATED))) {
 			err = resize_iovec(iov, gfp);
@@ -353,14 +364,16 @@ __vringh_iov(struct vringh *vrh, u16 i,
 		iov->iov[iov->used].iov_len = len;
 		iov->used++;
 
-		if (unlikely(len != desc.len)) {
-			desc.len -= len;
-			desc.addr += len;
+		if (unlikely(len != vringh32_to_cpu(vrh, desc.len))) {
+			desc.len = cpu_to_vringh32(vrh,
+				   vringh32_to_cpu(vrh, desc.len) - len);
+			desc.addr = cpu_to_vringh64(vrh,
+				    vringh64_to_cpu(vrh, desc.addr) + len);
 			goto again;
 		}
 
-		if (desc.flags & VRING_DESC_F_NEXT) {
-			i = desc.next;
+		if (desc.flags & cpu_to_vringh16(vrh, VRING_DESC_F_NEXT)) {
+			i = vringh16_to_cpu(vrh, desc.next);
 		} else {
 			/* Just in case we need to finish traversing above. */
 			if (unlikely(up_next > 0)) {
@@ -387,7 +400,8 @@ fail:
 static inline int __vringh_complete(struct vringh *vrh,
 				    const struct vring_used_elem *used,
 				    unsigned int num_used,
-				    int (*putu16)(u16 *p, u16 val),
+				    int (*putu16)(const struct vringh *vrh,
+						  __virtio16 *p, u16 val),
 				    int (*putused)(struct vring_used_elem *dst,
 						   const struct vring_used_elem
 						   *src, unsigned num))
@@ -420,7 +434,7 @@ static inline int __vringh_complete(struct vringh *vrh,
 	/* Make sure buffer is written before we update index. */
 	virtio_wmb(vrh->weak_barriers);
 
-	err = putu16(&vrh->vring.used->idx, used_idx + num_used);
+	err = putu16(vrh, &vrh->vring.used->idx, used_idx + num_used);
 	if (err) {
 		vringh_bad("Failed to update used index at %p",
 			   &vrh->vring.used->idx);
@@ -433,7 +447,9 @@ static inline int __vringh_complete(struct vringh *vrh,
 
 
 static inline int __vringh_need_notify(struct vringh *vrh,
-				       int (*getu16)(u16 *val, const u16 *p))
+				       int (*getu16)(const struct vringh *vrh,
+						     u16 *val,
+						     const __virtio16 *p))
 {
 	bool notify;
 	u16 used_event;
@@ -447,7 +463,7 @@ static inline int __vringh_need_notify(struct vringh *vrh,
 	/* Old-style, without event indices. */
 	if (!vrh->event_indices) {
 		u16 flags;
-		err = getu16(&flags, &vrh->vring.avail->flags);
+		err = getu16(vrh, &flags, &vrh->vring.avail->flags);
 		if (err) {
 			vringh_bad("Failed to get flags at %p",
 				   &vrh->vring.avail->flags);
@@ -457,7 +473,7 @@ static inline int __vringh_need_notify(struct vringh *vrh,
 	}
 
 	/* Modern: we know when other side wants to know. */
-	err = getu16(&used_event, &vring_used_event(&vrh->vring));
+	err = getu16(vrh, &used_event, &vring_used_event(&vrh->vring));
 	if (err) {
 		vringh_bad("Failed to get used event idx at %p",
 			   &vring_used_event(&vrh->vring));
@@ -478,20 +494,22 @@ static inline int __vringh_need_notify(struct vringh *vrh,
 }
 
 static inline bool __vringh_notify_enable(struct vringh *vrh,
-					  int (*getu16)(u16 *val, const u16 *p),
-					  int (*putu16)(u16 *p, u16 val))
+					  int (*getu16)(const struct vringh *vrh,
+							u16 *val, const __virtio16 *p),
+					  int (*putu16)(const struct vringh *vrh,
+							__virtio16 *p, u16 val))
 {
 	u16 avail;
 
 	if (!vrh->event_indices) {
 		/* Old-school; update flags. */
-		if (putu16(&vrh->vring.used->flags, 0) != 0) {
+		if (putu16(vrh, &vrh->vring.used->flags, 0) != 0) {
 			vringh_bad("Clearing used flags %p",
 				   &vrh->vring.used->flags);
 			return true;
 		}
 	} else {
-		if (putu16(&vring_avail_event(&vrh->vring),
+		if (putu16(vrh, &vring_avail_event(&vrh->vring),
 			   vrh->last_avail_idx) != 0) {
 			vringh_bad("Updating avail event index %p",
 				   &vring_avail_event(&vrh->vring));
@@ -503,7 +521,7 @@ static inline bool __vringh_notify_enable(struct vringh *vrh,
 	 * sure it's written, then check again. */
 	virtio_mb(vrh->weak_barriers);
 
-	if (getu16(&avail, &vrh->vring.avail->idx) != 0) {
+	if (getu16(vrh, &avail, &vrh->vring.avail->idx) != 0) {
 		vringh_bad("Failed to check avail idx at %p",
 			   &vrh->vring.avail->idx);
 		return true;
@@ -516,11 +534,13 @@ static inline bool __vringh_notify_enable(struct vringh *vrh,
 }
 
 static inline void __vringh_notify_disable(struct vringh *vrh,
-					   int (*putu16)(u16 *p, u16 val))
+					   int (*putu16)(const struct vringh *vrh,
+							 __virtio16 *p, u16 val))
 {
 	if (!vrh->event_indices) {
 		/* Old-school; update flags. */
-		if (putu16(&vrh->vring.used->flags, VRING_USED_F_NO_NOTIFY)) {
+		if (putu16(vrh, &vrh->vring.used->flags,
+			   VRING_USED_F_NO_NOTIFY)) {
 			vringh_bad("Setting used flags %p",
 				   &vrh->vring.used->flags);
 		}
@@ -528,14 +548,18 @@ static inline void __vringh_notify_disable(struct vringh *vrh,
 }
 
 /* Userspace access helpers: in this case, addresses are really userspace. */
-static inline int getu16_user(u16 *val, const u16 *p)
+static inline int getu16_user(const struct vringh *vrh, u16 *val, const __virtio16 *p)
 {
-	return get_user(*val, (__force u16 __user *)p);
+	__virtio16 v = 0;
+	int rc = get_user(v, (__force __virtio16 __user *)p);
+	*val = vringh16_to_cpu(vrh, v);
+	return rc;
 }
 
-static inline int putu16_user(u16 *p, u16 val)
+static inline int putu16_user(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	return put_user(val, (__force u16 __user *)p);
+	__virtio16 v = cpu_to_vringh16(vrh, val);
+	return put_user(v, (__force __virtio16 __user *)p);
 }
 
 static inline int copydesc_user(void *dst, const void *src, size_t len)
@@ -589,6 +613,7 @@ int vringh_init_user(struct vringh *vrh, u64 features,
 		return -EINVAL;
 	}
 
+	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
 	vrh->weak_barriers = weak_barriers;
 	vrh->completed = 0;
@@ -729,8 +754,8 @@ int vringh_complete_user(struct vringh *vrh, u16 head, u32 len)
 {
 	struct vring_used_elem used;
 
-	used.id = head;
-	used.len = len;
+	used.id = cpu_to_vringh32(vrh, head);
+	used.len = cpu_to_vringh32(vrh, len);
 	return __vringh_complete(vrh, &used, 1, putu16_user, putused_user);
 }
 EXPORT_SYMBOL(vringh_complete_user);
@@ -792,15 +817,16 @@ int vringh_need_notify_user(struct vringh *vrh)
 EXPORT_SYMBOL(vringh_need_notify_user);
 
 /* Kernelspace access helpers. */
-static inline int getu16_kern(u16 *val, const u16 *p)
+static inline int getu16_kern(const struct vringh *vrh,
+			      u16 *val, const __virtio16 *p)
 {
-	*val = ACCESS_ONCE(*p);
+	*val = vringh16_to_cpu(vrh, ACCESS_ONCE(*p));
 	return 0;
 }
 
-static inline int putu16_kern(u16 *p, u16 val)
+static inline int putu16_kern(const struct vringh *vrh, __virtio16 *p, u16 val)
 {
-	ACCESS_ONCE(*p) = val;
+	ACCESS_ONCE(*p) = cpu_to_vringh16(vrh, val);
 	return 0;
 }
 
@@ -848,6 +874,7 @@ int vringh_init_kern(struct vringh *vrh, u64 features,
 		return -EINVAL;
 	}
 
+	vrh->little_endian = (features & (1ULL << VIRTIO_F_VERSION_1));
 	vrh->event_indices = (features & (1 << VIRTIO_RING_F_EVENT_IDX));
 	vrh->weak_barriers = weak_barriers;
 	vrh->completed = 0;
@@ -962,8 +989,8 @@ int vringh_complete_kern(struct vringh *vrh, u16 head, u32 len)
 {
 	struct vring_used_elem used;
 
-	used.id = head;
-	used.len = len;
+	used.id = cpu_to_vringh32(vrh, head);
+	used.len = cpu_to_vringh32(vrh, len);
 
 	return __vringh_complete(vrh, &used, 1, putu16_kern, putused_kern);
 }
diff --git a/include/linux/vringh.h b/include/linux/vringh.h
index f696dd0b6472..a3fa537e717a 100644
--- a/include/linux/vringh.h
+++ b/include/linux/vringh.h
@@ -24,12 +24,16 @@
 #ifndef _LINUX_VRINGH_H
 #define _LINUX_VRINGH_H
 #include <uapi/linux/virtio_ring.h>
+#include <linux/virtio_byteorder.h>
 #include <linux/uio.h>
 #include <linux/slab.h>
 #include <asm/barrier.h>
 
 /* virtio_ring with information needed for host access. */
 struct vringh {
+	/* Everything is little endian */
+	bool little_endian;
+
 	/* Guest publishes used event idx (note: we always do). */
 	bool event_indices;
 
@@ -222,4 +226,33 @@ static inline void vringh_notify(struct vringh *vrh)
 		vrh->notify(vrh);
 }
 
+static inline u16 vringh16_to_cpu(const struct vringh *vrh, __virtio16 val)
+{
+	return __virtio16_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio16 cpu_to_vringh16(const struct vringh *vrh, u16 val)
+{
+	return __cpu_to_virtio16(vrh->little_endian, val);
+}
+
+static inline u32 vringh32_to_cpu(const struct vringh *vrh, __virtio32 val)
+{
+	return __virtio32_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio32 cpu_to_vringh32(const struct vringh *vrh, u32 val)
+{
+	return __cpu_to_virtio32(vrh->little_endian, val);
+}
+
+static inline u64 vringh64_to_cpu(const struct vringh *vrh, __virtio64 val)
+{
+	return __virtio64_to_cpu(vrh->little_endian, val);
+}
+
+static inline __virtio64 cpu_to_vringh64(const struct vringh *vrh, u64 val)
+{
+	return __cpu_to_virtio64(vrh->little_endian, val);
+}
 #endif /* _LINUX_VRINGH_H */
-- 
cgit v1.2.3


From 6f8e853d18a98ee95832ffebfaa288d42ae28cd5 Mon Sep 17 00:00:00 2001
From: Tero Kristo <t-kristo@ti.com>
Date: Fri, 12 Dec 2014 15:22:00 +0200
Subject: ARM: OMAP2+: clock: fix DPLL code to use new determine rate APIs

While the change for determine_rate clock operation was merged,
the OMAP counterpart using these calls was overlooked for some reason,
and caused boot failures on at least OMAP4 platforms. Fixed by updating
the DPLL API calls to use the new parameters.

Signed-off-by: Tero Kristo <t-kristo@ti.com>
Fixes: 646cafc6aa ("clk: Change clk_ops->determine_rate")
Cc: Tomeu Vizoso <tomeu.vizoso@collabora.com>
Acked-by: Tony Lindgren <tony@atomide.com>
Acked-by: Paul Walmsley <paul@pwsan.com>
Tested-by: Kevin Hilman <khilman@linaro.org>
Reported-by: Kevin Hilman <khilman@linaro.org>
Signed-off-by: Michael Turquette <mturquette@linaro.org>
---
 arch/arm/mach-omap2/dpll3xxx.c | 6 +++---
 arch/arm/mach-omap2/dpll44xx.c | 6 +++---
 include/linux/clk/ti.h         | 4 ++--
 3 files changed, 8 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/mach-omap2/dpll3xxx.c b/arch/arm/mach-omap2/dpll3xxx.c
index 20e120d071dd..c2da2a0fe5ad 100644
--- a/arch/arm/mach-omap2/dpll3xxx.c
+++ b/arch/arm/mach-omap2/dpll3xxx.c
@@ -474,7 +474,7 @@ void omap3_noncore_dpll_disable(struct clk_hw *hw)
  */
 long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate,
 				       unsigned long *best_parent_rate,
-				       struct clk **best_parent_clk)
+				       struct clk_hw **best_parent_clk)
 {
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
 	struct dpll_data *dd;
@@ -488,10 +488,10 @@ long omap3_noncore_dpll_determine_rate(struct clk_hw *hw, unsigned long rate,
 
 	if (__clk_get_rate(dd->clk_bypass) == rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		*best_parent_clk = dd->clk_bypass;
+		*best_parent_clk = __clk_get_hw(dd->clk_bypass);
 	} else {
 		rate = omap2_dpll_round_rate(hw, rate, best_parent_rate);
-		*best_parent_clk = dd->clk_ref;
+		*best_parent_clk = __clk_get_hw(dd->clk_ref);
 	}
 
 	*best_parent_rate = rate;
diff --git a/arch/arm/mach-omap2/dpll44xx.c b/arch/arm/mach-omap2/dpll44xx.c
index 535822fcf4bb..0e58e5a85d53 100644
--- a/arch/arm/mach-omap2/dpll44xx.c
+++ b/arch/arm/mach-omap2/dpll44xx.c
@@ -223,7 +223,7 @@ out:
  */
 long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk)
+					struct clk_hw **best_parent_clk)
 {
 	struct clk_hw_omap *clk = to_clk_hw_omap(hw);
 	struct dpll_data *dd;
@@ -237,11 +237,11 @@ long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw, unsigned long rate,
 
 	if (__clk_get_rate(dd->clk_bypass) == rate &&
 	    (dd->modes & (1 << DPLL_LOW_POWER_BYPASS))) {
-		*best_parent_clk = dd->clk_bypass;
+		*best_parent_clk = __clk_get_hw(dd->clk_bypass);
 	} else {
 		rate = omap4_dpll_regm4xen_round_rate(hw, rate,
 						      best_parent_rate);
-		*best_parent_clk = dd->clk_ref;
+		*best_parent_clk = __clk_get_hw(dd->clk_ref);
 	}
 
 	*best_parent_rate = rate;
diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h
index 74e5341463c9..55ef529a0dbf 100644
--- a/include/linux/clk/ti.h
+++ b/include/linux/clk/ti.h
@@ -264,7 +264,7 @@ int omap3_noncore_dpll_set_rate_and_parent(struct clk_hw *hw,
 long omap3_noncore_dpll_determine_rate(struct clk_hw *hw,
 				       unsigned long rate,
 				       unsigned long *best_parent_rate,
-				       struct clk **best_parent_clk);
+				       struct clk_hw **best_parent_clk);
 unsigned long omap4_dpll_regm4xen_recalc(struct clk_hw *hw,
 					 unsigned long parent_rate);
 long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw,
@@ -273,7 +273,7 @@ long omap4_dpll_regm4xen_round_rate(struct clk_hw *hw,
 long omap4_dpll_regm4xen_determine_rate(struct clk_hw *hw,
 					unsigned long rate,
 					unsigned long *best_parent_rate,
-					struct clk **best_parent_clk);
+					struct clk_hw **best_parent_clk);
 u8 omap2_init_dpll_parent(struct clk_hw *hw);
 unsigned long omap3_dpll_recalc(struct clk_hw *hw, unsigned long parent_rate);
 long omap2_dpll_round_rate(struct clk_hw *hw, unsigned long target_rate,
-- 
cgit v1.2.3


From 968e78dd96443e2cc963c493070574778805e76a Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 11 Dec 2014 17:04:11 +0200
Subject: IB/mlx5: Enhance UMR support to allow partial page table update

The current UMR interface doesn't allow partial updates to a memory
region's page tables. This patch changes the interface to allow that.

It also changes the way the UMR operation validates the memory
region's state.  When set, IB_SEND_UMR_FAIL_IF_FREE will cause the UMR
operation to fail if the MKEY is in the free state. When it is
unchecked the operation will check that it isn't in the free state.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  15 ++++++
 drivers/infiniband/hw/mlx5/mr.c      |  29 +++++-----
 drivers/infiniband/hw/mlx5/qp.c      | 100 ++++++++++++++++++++++-------------
 include/linux/mlx5/device.h          |  13 +++++
 4 files changed, 109 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 29da55222070..53d19e6e69a4 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -111,6 +111,8 @@ struct mlx5_ib_pd {
  */
 
 #define MLX5_IB_SEND_UMR_UNREG	IB_SEND_RESERVED_START
+#define MLX5_IB_SEND_UMR_FAIL_IF_FREE (IB_SEND_RESERVED_START << 1)
+#define MLX5_IB_SEND_UMR_UPDATE_MTT (IB_SEND_RESERVED_START << 2)
 #define MLX5_IB_QPT_REG_UMR	IB_QPT_RESERVED1
 #define MLX5_IB_WR_UMR		IB_WR_RESERVED1
 
@@ -206,6 +208,19 @@ enum mlx5_ib_qp_flags {
 	MLX5_IB_QP_SIGNATURE_HANDLING           = 1 << 1,
 };
 
+struct mlx5_umr_wr {
+	union {
+		u64			virt_addr;
+		u64			offset;
+	} target;
+	struct ib_pd		       *pd;
+	unsigned int			page_shift;
+	unsigned int			npages;
+	u32				length;
+	int				access_flags;
+	u32				mkey;
+};
+
 struct mlx5_shared_mr_info {
 	int mr_id;
 	struct ib_umem		*umem;
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 2ab081cdbca0..2de4f4448f8a 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -37,6 +37,7 @@
 #include <linux/export.h>
 #include <linux/delay.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_verbs.h>
 #include "mlx5_ib.h"
 
 enum {
@@ -146,7 +147,7 @@ static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
 		mr->order = ent->order;
 		mr->umred = 1;
 		mr->dev = dev;
-		in->seg.status = 1 << 6;
+		in->seg.status = MLX5_MKEY_STATUS_FREE;
 		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
 		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
@@ -678,6 +679,7 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 {
 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
 	struct ib_mr *mr = dev->umrc.mr;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
 
 	sg->addr = dma;
 	sg->length = ALIGN(sizeof(u64) * n, 64);
@@ -692,21 +694,24 @@ static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
 		wr->num_sge = 0;
 
 	wr->opcode = MLX5_IB_WR_UMR;
-	wr->wr.fast_reg.page_list_len = n;
-	wr->wr.fast_reg.page_shift = page_shift;
-	wr->wr.fast_reg.rkey = key;
-	wr->wr.fast_reg.iova_start = virt_addr;
-	wr->wr.fast_reg.length = len;
-	wr->wr.fast_reg.access_flags = access_flags;
-	wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
+
+	umrwr->npages = n;
+	umrwr->page_shift = page_shift;
+	umrwr->mkey = key;
+	umrwr->target.virt_addr = virt_addr;
+	umrwr->length = len;
+	umrwr->access_flags = access_flags;
+	umrwr->pd = pd;
 }
 
 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
 			       struct ib_send_wr *wr, u32 key)
 {
-	wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
+	wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE;
 	wr->opcode = MLX5_IB_WR_UMR;
-	wr->wr.fast_reg.rkey = key;
+	umrwr->mkey = key;
 }
 
 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
@@ -1031,7 +1036,7 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
 		goto err_free;
 	}
 
-	in->seg.status = 1 << 6; /* free */
+	in->seg.status = MLX5_MKEY_STATUS_FREE;
 	in->seg.xlt_oct_size = cpu_to_be32(ndescs);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
@@ -1146,7 +1151,7 @@ struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
 		goto err_free;
 	}
 
-	in->seg.status = 1 << 6; /* free */
+	in->seg.status = MLX5_MKEY_STATUS_FREE;
 	in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
 	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 1cae1c7132b4..36e2cfe1c2fe 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -70,15 +70,6 @@ static const u32 mlx5_ib_opcode[] = {
 	[MLX5_IB_WR_UMR]			= MLX5_OPCODE_UMR,
 };
 
-struct umr_wr {
-	u64				virt_addr;
-	struct ib_pd		       *pd;
-	unsigned int			page_shift;
-	unsigned int			npages;
-	u32				length;
-	int				access_flags;
-	u32				mkey;
-};
 
 static int is_qp0(enum ib_qp_type qp_type)
 {
@@ -1848,37 +1839,70 @@ static void set_frwr_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 	umr->mkey_mask = frwr_mkey_mask();
 }
 
+static __be64 get_umr_reg_mr_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_LEN		|
+		 MLX5_MKEY_MASK_PAGE_SIZE	|
+		 MLX5_MKEY_MASK_START_ADDR	|
+		 MLX5_MKEY_MASK_PD		|
+		 MLX5_MKEY_MASK_LR		|
+		 MLX5_MKEY_MASK_LW		|
+		 MLX5_MKEY_MASK_KEY		|
+		 MLX5_MKEY_MASK_RR		|
+		 MLX5_MKEY_MASK_RW		|
+		 MLX5_MKEY_MASK_A		|
+		 MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_unreg_mr_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
+static __be64 get_umr_update_mtt_mask(void)
+{
+	u64 result;
+
+	result = MLX5_MKEY_MASK_FREE;
+
+	return cpu_to_be64(result);
+}
+
 static void set_reg_umr_segment(struct mlx5_wqe_umr_ctrl_seg *umr,
 				struct ib_send_wr *wr)
 {
-	struct umr_wr *umrwr = (struct umr_wr *)&wr->wr.fast_reg;
-	u64 mask;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
 
 	memset(umr, 0, sizeof(*umr));
 
+	if (wr->send_flags & MLX5_IB_SEND_UMR_FAIL_IF_FREE)
+		umr->flags = MLX5_UMR_CHECK_FREE; /* fail if free */
+	else
+		umr->flags = MLX5_UMR_CHECK_NOT_FREE; /* fail if not free */
+
 	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UNREG)) {
-		umr->flags = 1 << 5; /* fail if not free */
 		umr->klm_octowords = get_klm_octo(umrwr->npages);
-		mask =  MLX5_MKEY_MASK_LEN		|
-			MLX5_MKEY_MASK_PAGE_SIZE	|
-			MLX5_MKEY_MASK_START_ADDR	|
-			MLX5_MKEY_MASK_PD		|
-			MLX5_MKEY_MASK_LR		|
-			MLX5_MKEY_MASK_LW		|
-			MLX5_MKEY_MASK_KEY		|
-			MLX5_MKEY_MASK_RR		|
-			MLX5_MKEY_MASK_RW		|
-			MLX5_MKEY_MASK_A		|
-			MLX5_MKEY_MASK_FREE;
-		umr->mkey_mask = cpu_to_be64(mask);
+		if (wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT) {
+			umr->mkey_mask = get_umr_update_mtt_mask();
+			umr->bsf_octowords = get_klm_octo(umrwr->target.offset);
+			umr->flags |= MLX5_UMR_TRANSLATION_OFFSET_EN;
+		} else {
+			umr->mkey_mask = get_umr_reg_mr_mask();
+		}
 	} else {
-		umr->flags = 2 << 5; /* fail if free */
-		mask = MLX5_MKEY_MASK_FREE;
-		umr->mkey_mask = cpu_to_be64(mask);
+		umr->mkey_mask = get_umr_unreg_mr_mask();
 	}
 
 	if (!wr->num_sge)
-		umr->flags |= (1 << 7); /* inline */
+		umr->flags |= MLX5_UMR_INLINE;
 }
 
 static u8 get_umr_flags(int acc)
@@ -1895,7 +1919,7 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
 {
 	memset(seg, 0, sizeof(*seg));
 	if (li) {
-		seg->status = 1 << 6;
+		seg->status = MLX5_MKEY_STATUS_FREE;
 		return;
 	}
 
@@ -1912,19 +1936,23 @@ static void set_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr,
 
 static void set_reg_mkey_segment(struct mlx5_mkey_seg *seg, struct ib_send_wr *wr)
 {
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg;
+
 	memset(seg, 0, sizeof(*seg));
 	if (wr->send_flags & MLX5_IB_SEND_UMR_UNREG) {
-		seg->status = 1 << 6;
+		seg->status = MLX5_MKEY_STATUS_FREE;
 		return;
 	}
 
-	seg->flags = convert_access(wr->wr.fast_reg.access_flags);
-	seg->flags_pd = cpu_to_be32(to_mpd((struct ib_pd *)wr->wr.fast_reg.page_list)->pdn);
-	seg->start_addr = cpu_to_be64(wr->wr.fast_reg.iova_start);
-	seg->len = cpu_to_be64(wr->wr.fast_reg.length);
-	seg->log2_page_size = wr->wr.fast_reg.page_shift;
+	seg->flags = convert_access(umrwr->access_flags);
+	if (!(wr->send_flags & MLX5_IB_SEND_UMR_UPDATE_MTT)) {
+		seg->flags_pd = cpu_to_be32(to_mpd(umrwr->pd)->pdn);
+		seg->start_addr = cpu_to_be64(umrwr->target.virt_addr);
+	}
+	seg->len = cpu_to_be64(umrwr->length);
+	seg->log2_page_size = umrwr->page_shift;
 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff00 |
-				       mlx5_mkey_variant(wr->wr.fast_reg.rkey));
+				       mlx5_mkey_variant(umrwr->mkey));
 }
 
 static void set_frwr_pages(struct mlx5_wqe_data_seg *dseg,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index ea4f1c46f761..fa07bfda0e15 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -180,6 +180,15 @@ enum {
 	MLX5_MKEY_MASK_FREE		= 1ull << 29,
 };
 
+enum {
+	MLX5_UMR_TRANSLATION_OFFSET_EN	= (1 << 4),
+
+	MLX5_UMR_CHECK_NOT_FREE		= (1 << 5),
+	MLX5_UMR_CHECK_FREE		= (2 << 5),
+
+	MLX5_UMR_INLINE			= (1 << 7),
+};
+
 enum mlx5_event {
 	MLX5_EVENT_TYPE_COMP		   = 0x0,
 
@@ -776,6 +785,10 @@ struct mlx5_query_eq_mbox_out {
 	struct mlx5_eq_context	ctx;
 };
 
+enum {
+	MLX5_MKEY_STATUS_FREE = 1 << 6,
+};
+
 struct mlx5_mkey_seg {
 	/* This is a two bit field occupying bits 31-30.
 	 * bit 31 is always 0,
-- 
cgit v1.2.3


From c1395a2a8c01e8a919e47d64eb3d23d00e824b8b Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 11 Dec 2014 17:04:14 +0200
Subject: IB/mlx5: Add function to read WQE from user-space

Add a helper function mlx5_ib_read_user_wqe to read information from
user-space owned work queues.  The function will be used in a later
patch by the page-fault handling code in mlx5_ib.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>

[ Add stub for ib_umem_copy_from() for CONFIG_INFINIBAND_USER_MEM=n
  - Roland ]

Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  2 +
 drivers/infiniband/hw/mlx5/qp.c      | 71 ++++++++++++++++++++++++++++++++++++
 include/linux/mlx5/qp.h              |  3 ++
 include/rdma/ib_umem.h               |  5 ++-
 4 files changed, 80 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 53d19e6e69a4..14a0311eaa1c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -503,6 +503,8 @@ int mlx5_ib_post_send(struct ib_qp *ibqp, struct ib_send_wr *wr,
 int mlx5_ib_post_recv(struct ib_qp *ibqp, struct ib_recv_wr *wr,
 		      struct ib_recv_wr **bad_wr);
 void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n);
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+			  void *buffer, u32 length);
 struct ib_cq *mlx5_ib_create_cq(struct ib_device *ibdev, int entries,
 				int vector, struct ib_ucontext *context,
 				struct ib_udata *udata);
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 36e2cfe1c2fe..9783c3342dbf 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -101,6 +101,77 @@ void *mlx5_get_send_wqe(struct mlx5_ib_qp *qp, int n)
 	return get_wqe(qp, qp->sq.offset + (n << MLX5_IB_SQ_STRIDE));
 }
 
+/**
+ * mlx5_ib_read_user_wqe() - Copy a user-space WQE to kernel space.
+ *
+ * @qp: QP to copy from.
+ * @send: copy from the send queue when non-zero, use the receive queue
+ *	  otherwise.
+ * @wqe_index:  index to start copying from. For send work queues, the
+ *		wqe_index is in units of MLX5_SEND_WQE_BB.
+ *		For receive work queue, it is the number of work queue
+ *		element in the queue.
+ * @buffer: destination buffer.
+ * @length: maximum number of bytes to copy.
+ *
+ * Copies at least a single WQE, but may copy more data.
+ *
+ * Return: the number of bytes copied, or an error code.
+ */
+int mlx5_ib_read_user_wqe(struct mlx5_ib_qp *qp, int send, int wqe_index,
+			  void *buffer, u32 length)
+{
+	struct ib_device *ibdev = qp->ibqp.device;
+	struct mlx5_ib_dev *dev = to_mdev(ibdev);
+	struct mlx5_ib_wq *wq = send ? &qp->sq : &qp->rq;
+	size_t offset;
+	size_t wq_end;
+	struct ib_umem *umem = qp->umem;
+	u32 first_copy_length;
+	int wqe_length;
+	int ret;
+
+	if (wq->wqe_cnt == 0) {
+		mlx5_ib_dbg(dev, "mlx5_ib_read_user_wqe for a QP with wqe_cnt == 0. qp_type: 0x%x\n",
+			    qp->ibqp.qp_type);
+		return -EINVAL;
+	}
+
+	offset = wq->offset + ((wqe_index % wq->wqe_cnt) << wq->wqe_shift);
+	wq_end = wq->offset + (wq->wqe_cnt << wq->wqe_shift);
+
+	if (send && length < sizeof(struct mlx5_wqe_ctrl_seg))
+		return -EINVAL;
+
+	if (offset > umem->length ||
+	    (send && offset + sizeof(struct mlx5_wqe_ctrl_seg) > umem->length))
+		return -EINVAL;
+
+	first_copy_length = min_t(u32, offset + length, wq_end) - offset;
+	ret = ib_umem_copy_from(buffer, umem, offset, first_copy_length);
+	if (ret)
+		return ret;
+
+	if (send) {
+		struct mlx5_wqe_ctrl_seg *ctrl = buffer;
+		int ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+
+		wqe_length = ds * MLX5_WQE_DS_UNITS;
+	} else {
+		wqe_length = 1 << wq->wqe_shift;
+	}
+
+	if (wqe_length <= first_copy_length)
+		return first_copy_length;
+
+	ret = ib_umem_copy_from(buffer + first_copy_length, umem, wq->offset,
+				wqe_length - first_copy_length);
+	if (ret)
+		return ret;
+
+	return wqe_length;
+}
+
 static void mlx5_ib_qp_event(struct mlx5_core_qp *qp, int type)
 {
 	struct ib_qp *ibqp = &to_mibqp(qp)->ibqp;
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 3fa075daeb1d..67f4b9660b06 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -189,6 +189,9 @@ struct mlx5_wqe_ctrl_seg {
 	__be32			imm;
 };
 
+#define MLX5_WQE_CTRL_DS_MASK 0x3f
+#define MLX5_WQE_DS_UNITS 16
+
 struct mlx5_wqe_xrc_seg {
 	__be32			xrc_srqn;
 	u8			rsvd[12];
diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h
index 45bb04bc88cd..a51f4091489a 100644
--- a/include/rdma/ib_umem.h
+++ b/include/rdma/ib_umem.h
@@ -98,7 +98,10 @@ static inline struct ib_umem *ib_umem_get(struct ib_ucontext *context,
 }
 static inline void ib_umem_release(struct ib_umem *umem) { }
 static inline int ib_umem_page_count(struct ib_umem *umem) { return 0; }
-
+static inline int ib_umem_copy_from(void *dst, struct ib_umem *umem, size_t offset,
+		      		    size_t length) {
+	return -EINVAL;
+}
 #endif /* CONFIG_INFINIBAND_USER_MEM */
 
 #endif /* IB_UMEM_H */
-- 
cgit v1.2.3


From 6cb7ff3dcfe6aad6a36a0fd0e928b5bea4fabdd5 Mon Sep 17 00:00:00 2001
From: Roland Dreier <roland@purestorage.com>
Date: Mon, 15 Dec 2014 18:17:17 -0800
Subject: mlx5_core: Re-add MLX5_DEV_CAP_FLAG_ON_DMND_PG flag

In commit 0c7aac854f52 ("net/mlx5_core: Remove unused dev cap enum
fields"), the flag MLX5_DEV_CAP_FLAG_ON_DMND_PG was removed.
Unfortunately the on-demand paging changes actually use it, so re-add
the missing flag.

Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 include/linux/mlx5/device.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include/linux')

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index fa07bfda0e15..096abe543d2c 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -234,6 +234,7 @@ enum {
 	MLX5_DEV_CAP_FLAG_APM		= 1LL << 17,
 	MLX5_DEV_CAP_FLAG_ATOMIC	= 1LL << 18,
 	MLX5_DEV_CAP_FLAG_BLOCK_MCAST	= 1LL << 23,
+	MLX5_DEV_CAP_FLAG_ON_DMND_PG	= 1LL << 24,
 	MLX5_DEV_CAP_FLAG_CQ_MODER	= 1LL << 29,
 	MLX5_DEV_CAP_FLAG_RESIZE_CQ	= 1LL << 30,
 	MLX5_DEV_CAP_FLAG_DCT		= 1LL << 37,
-- 
cgit v1.2.3


From e420f0c0f3d1022789fcb59b2a0c4b979ce311ba Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 11 Dec 2014 17:04:19 +0200
Subject: mlx5_core: Add support for page faults events and low level handling

* Add a handler function pointer in the mlx5_core_qp struct for page
  fault events. Handle page fault events by calling the handler
  function, if not NULL.
* Add on-demand paging capability query command.
* Export command for resuming QPs after page faults.
* Add various constants related to paging support.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c |  13 ++-
 drivers/net/ethernet/mellanox/mlx5/core/fw.c |  40 +++++++++
 drivers/net/ethernet/mellanox/mlx5/core/qp.c | 119 +++++++++++++++++++++++++++
 include/linux/mlx5/device.h                  |  54 +++++++++++-
 include/linux/mlx5/driver.h                  |  12 +++
 include/linux/mlx5/qp.h                      |  55 +++++++++++++
 6 files changed, 291 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index ab684463780b..da82991239a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -157,6 +157,8 @@ static const char *eqe_type_str(u8 type)
 		return "MLX5_EVENT_TYPE_CMD";
 	case MLX5_EVENT_TYPE_PAGE_REQUEST:
 		return "MLX5_EVENT_TYPE_PAGE_REQUEST";
+	case MLX5_EVENT_TYPE_PAGE_FAULT:
+		return "MLX5_EVENT_TYPE_PAGE_FAULT";
 	default:
 		return "Unrecognized event";
 	}
@@ -279,6 +281,11 @@ static int mlx5_eq_int(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 			}
 			break;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+		case MLX5_EVENT_TYPE_PAGE_FAULT:
+			mlx5_eq_pagefault(dev, eqe);
+			break;
+#endif
 
 		default:
 			mlx5_core_warn(dev, "Unhandled event 0x%x on EQ 0x%x\n",
@@ -446,8 +453,12 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev)
 int mlx5_start_eqs(struct mlx5_core_dev *dev)
 {
 	struct mlx5_eq_table *table = &dev->priv.eq_table;
+	u32 async_event_mask = MLX5_ASYNC_EVENT_MASK;
 	int err;
 
+	if (dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG)
+		async_event_mask |= (1ull << MLX5_EVENT_TYPE_PAGE_FAULT);
+
 	err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD,
 				 MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD,
 				 "mlx5_cmd_eq", &dev->priv.uuari.uars[0]);
@@ -459,7 +470,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev)
 	mlx5_cmd_use_events(dev);
 
 	err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC,
-				 MLX5_NUM_ASYNC_EQE, MLX5_ASYNC_EVENT_MASK,
+				 MLX5_NUM_ASYNC_EQE, async_event_mask,
 				 "mlx5_async_eq", &dev->priv.uuari.uars[0]);
 	if (err) {
 		mlx5_core_warn(dev, "failed to create async EQ %d\n", err);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 087c4c797deb..06f9036acd83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -69,6 +69,46 @@ int mlx5_cmd_query_hca_cap(struct mlx5_core_dev *dev, struct mlx5_caps *caps)
 	return mlx5_core_get_caps(dev, caps, HCA_CAP_OPMOD_GET_CUR);
 }
 
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *caps)
+{
+	u8 in[MLX5_ST_SZ_BYTES(query_hca_cap_in)];
+	int out_sz = MLX5_ST_SZ_BYTES(query_hca_cap_out);
+	void *out;
+	int err;
+
+	if (!(dev->caps.gen.flags & MLX5_DEV_CAP_FLAG_ON_DMND_PG))
+		return -ENOTSUPP;
+
+	memset(in, 0, sizeof(in));
+	out = kzalloc(out_sz, GFP_KERNEL);
+	if (!out)
+		return -ENOMEM;
+	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
+	MLX5_SET(query_hca_cap_in, in, op_mod, HCA_CAP_OPMOD_GET_ODP_CUR);
+	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+	if (err)
+		goto out;
+
+	err = mlx5_cmd_status_to_err_v2(out);
+	if (err) {
+		mlx5_core_warn(dev, "query cur hca ODP caps failed, %d\n", err);
+		goto out;
+	}
+
+	memcpy(caps, MLX5_ADDR_OF(query_hca_cap_out, out, capability_struct),
+	       sizeof(*caps));
+
+	mlx5_core_dbg(dev, "on-demand paging capabilities:\nrc: %08x\nuc: %08x\nud: %08x\n",
+		be32_to_cpu(caps->per_transport_caps.rc_odp_caps),
+		be32_to_cpu(caps->per_transport_caps.uc_odp_caps),
+		be32_to_cpu(caps->per_transport_caps.ud_odp_caps));
+
+out:
+	kfree(out);
+	return err;
+}
+EXPORT_SYMBOL(mlx5_query_odp_caps);
+
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev)
 {
 	struct mlx5_cmd_init_hca_mbox_in in;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index 5261a2b0da43..575d853dbe05 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -88,6 +88,95 @@ void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type)
 	mlx5_core_put_rsc(common);
 }
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe)
+{
+	struct mlx5_eqe_page_fault *pf_eqe = &eqe->data.page_fault;
+	int qpn = be32_to_cpu(pf_eqe->flags_qpn) & MLX5_QPN_MASK;
+	struct mlx5_core_rsc_common *common = mlx5_get_rsc(dev, qpn);
+	struct mlx5_core_qp *qp =
+		container_of(common, struct mlx5_core_qp, common);
+	struct mlx5_pagefault pfault;
+
+	if (!qp) {
+		mlx5_core_warn(dev, "ODP event for non-existent QP %06x\n",
+			       qpn);
+		return;
+	}
+
+	pfault.event_subtype = eqe->sub_type;
+	pfault.flags = (be32_to_cpu(pf_eqe->flags_qpn) >> MLX5_QPN_BITS) &
+		(MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE | MLX5_PFAULT_RDMA);
+	pfault.bytes_committed = be32_to_cpu(
+		pf_eqe->bytes_committed);
+
+	mlx5_core_dbg(dev,
+		      "PAGE_FAULT: subtype: 0x%02x, flags: 0x%02x,\n",
+		      eqe->sub_type, pfault.flags);
+
+	switch (eqe->sub_type) {
+	case MLX5_PFAULT_SUBTYPE_RDMA:
+		/* RDMA based event */
+		pfault.rdma.r_key =
+			be32_to_cpu(pf_eqe->rdma.r_key);
+		pfault.rdma.packet_size =
+			be16_to_cpu(pf_eqe->rdma.packet_length);
+		pfault.rdma.rdma_op_len =
+			be32_to_cpu(pf_eqe->rdma.rdma_op_len);
+		pfault.rdma.rdma_va =
+			be64_to_cpu(pf_eqe->rdma.rdma_va);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: qpn: 0x%06x, r_key: 0x%08x,\n",
+			      qpn, pfault.rdma.r_key);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: rdma_op_len: 0x%08x,\n",
+			      pfault.rdma.rdma_op_len);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: rdma_va: 0x%016llx,\n",
+			      pfault.rdma.rdma_va);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
+			      pfault.bytes_committed);
+		break;
+
+	case MLX5_PFAULT_SUBTYPE_WQE:
+		/* WQE based event */
+		pfault.wqe.wqe_index =
+			be16_to_cpu(pf_eqe->wqe.wqe_index);
+		pfault.wqe.packet_size =
+			be16_to_cpu(pf_eqe->wqe.packet_length);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: qpn: 0x%06x, wqe_index: 0x%04x,\n",
+			      qpn, pfault.wqe.wqe_index);
+		mlx5_core_dbg(dev,
+			      "PAGE_FAULT: bytes_committed: 0x%06x\n",
+			      pfault.bytes_committed);
+		break;
+
+	default:
+		mlx5_core_warn(dev,
+			       "Unsupported page fault event sub-type: 0x%02hhx, QP %06x\n",
+			       eqe->sub_type, qpn);
+		/* Unsupported page faults should still be resolved by the
+		 * page fault handler
+		 */
+	}
+
+	if (qp->pfault_handler) {
+		qp->pfault_handler(qp, &pfault);
+	} else {
+		mlx5_core_err(dev,
+			      "ODP event for QP %08x, without a fault handler in QP\n",
+			      qpn);
+		/* Page fault will remain unresolved. QP will hang until it is
+		 * destroyed
+		 */
+	}
+
+	mlx5_core_put_rsc(common);
+}
+#endif
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
 			struct mlx5_create_qp_mbox_in *in,
@@ -322,3 +411,33 @@ int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+				u8 flags, int error)
+{
+	struct mlx5_page_fault_resume_mbox_in in;
+	struct mlx5_page_fault_resume_mbox_out out;
+	int err;
+
+	memset(&in, 0, sizeof(in));
+	memset(&out, 0, sizeof(out));
+	in.hdr.opcode = cpu_to_be16(MLX5_CMD_OP_PAGE_FAULT_RESUME);
+	in.hdr.opmod = 0;
+	flags &= (MLX5_PAGE_FAULT_RESUME_REQUESTOR |
+		  MLX5_PAGE_FAULT_RESUME_WRITE	   |
+		  MLX5_PAGE_FAULT_RESUME_RDMA);
+	flags |= (error ? MLX5_PAGE_FAULT_RESUME_ERROR : 0);
+	in.flags_qpn = cpu_to_be32((qpn & MLX5_QPN_MASK) |
+				   (flags << MLX5_QPN_BITS));
+	err = mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	if (err)
+		return err;
+
+	if (out.hdr.status)
+		err = mlx5_cmd_status_to_err(&out.hdr);
+
+	return err;
+}
+EXPORT_SYMBOL_GPL(mlx5_core_page_fault_resume);
+#endif
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 096abe543d2c..70c28239e339 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -119,6 +119,15 @@ enum {
 	MLX5_MAX_LOG_PKEY_TABLE  = 5,
 };
 
+enum {
+	MLX5_MKEY_INBOX_PG_ACCESS = 1 << 31
+};
+
+enum {
+	MLX5_PFAULT_SUBTYPE_WQE = 0,
+	MLX5_PFAULT_SUBTYPE_RDMA = 1,
+};
+
 enum {
 	MLX5_PERM_LOCAL_READ	= 1 << 2,
 	MLX5_PERM_LOCAL_WRITE	= 1 << 3,
@@ -215,6 +224,8 @@ enum mlx5_event {
 
 	MLX5_EVENT_TYPE_CMD		   = 0x0a,
 	MLX5_EVENT_TYPE_PAGE_REQUEST	   = 0xb,
+
+	MLX5_EVENT_TYPE_PAGE_FAULT	   = 0xc,
 };
 
 enum {
@@ -300,6 +311,8 @@ enum {
 enum {
 	HCA_CAP_OPMOD_GET_MAX	= 0,
 	HCA_CAP_OPMOD_GET_CUR	= 1,
+	HCA_CAP_OPMOD_GET_ODP_MAX = 4,
+	HCA_CAP_OPMOD_GET_ODP_CUR = 5
 };
 
 struct mlx5_inbox_hdr {
@@ -329,6 +342,23 @@ struct mlx5_cmd_query_adapter_mbox_out {
 	u8			vsd_psid[16];
 };
 
+enum mlx5_odp_transport_cap_bits {
+	MLX5_ODP_SUPPORT_SEND	 = 1 << 31,
+	MLX5_ODP_SUPPORT_RECV	 = 1 << 30,
+	MLX5_ODP_SUPPORT_WRITE	 = 1 << 29,
+	MLX5_ODP_SUPPORT_READ	 = 1 << 28,
+};
+
+struct mlx5_odp_caps {
+	char reserved[0x10];
+	struct {
+		__be32			rc_odp_caps;
+		__be32			uc_odp_caps;
+		__be32			ud_odp_caps;
+	} per_transport_caps;
+	char reserved2[0xe4];
+};
+
 struct mlx5_cmd_init_hca_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	u8			rsvd0[2];
@@ -449,6 +479,27 @@ struct mlx5_eqe_page_req {
 	__be32		rsvd1[5];
 };
 
+struct mlx5_eqe_page_fault {
+	__be32 bytes_committed;
+	union {
+		struct {
+			u16     reserved1;
+			__be16  wqe_index;
+			u16	reserved2;
+			__be16  packet_length;
+			u8	reserved3[12];
+		} __packed wqe;
+		struct {
+			__be32  r_key;
+			u16	reserved1;
+			__be16  packet_length;
+			__be32  rdma_op_len;
+			__be64  rdma_va;
+		} __packed rdma;
+	} __packed;
+	__be32 flags_qpn;
+} __packed;
+
 union ev_data {
 	__be32				raw[7];
 	struct mlx5_eqe_cmd		cmd;
@@ -460,6 +511,7 @@ union ev_data {
 	struct mlx5_eqe_congestion	cong;
 	struct mlx5_eqe_stall_vl	stall_vl;
 	struct mlx5_eqe_page_req	req_pages;
+	struct mlx5_eqe_page_fault	page_fault;
 } __packed;
 
 struct mlx5_eqe {
@@ -826,7 +878,7 @@ struct mlx5_query_special_ctxs_mbox_out {
 struct mlx5_create_mkey_mbox_in {
 	struct mlx5_inbox_hdr	hdr;
 	__be32			input_mkey_index;
-	u8			rsvd0[4];
+	__be32			flags;
 	struct mlx5_mkey_seg	seg;
 	u8			rsvd1[16];
 	__be32			xlat_oct_act_size;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b1bf41556b32..7088dcd19214 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -113,6 +113,13 @@ enum {
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 };
 
+enum mlx5_page_fault_resume_flags {
+	MLX5_PAGE_FAULT_RESUME_REQUESTOR = 1 << 0,
+	MLX5_PAGE_FAULT_RESUME_WRITE	 = 1 << 1,
+	MLX5_PAGE_FAULT_RESUME_RDMA	 = 1 << 2,
+	MLX5_PAGE_FAULT_RESUME_ERROR	 = 1 << 7,
+};
+
 enum dbg_rsc_type {
 	MLX5_DBG_RSC_QP,
 	MLX5_DBG_RSC_EQ,
@@ -703,6 +710,9 @@ void mlx5_eq_cleanup(struct mlx5_core_dev *dev);
 void mlx5_fill_page_array(struct mlx5_buf *buf, __be64 *pas);
 void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn);
 void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe);
+#endif
 void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type);
 struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn);
 void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector);
@@ -740,6 +750,8 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index);
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num);
 void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common);
+int mlx5_query_odp_caps(struct mlx5_core_dev *dev,
+			struct mlx5_odp_caps *odp_caps);
 
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 67f4b9660b06..6b1d6f60c7e6 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -50,6 +50,9 @@
 #define MLX5_BSF_APPTAG_ESCAPE	0x1
 #define MLX5_BSF_APPREF_ESCAPE	0x2
 
+#define MLX5_QPN_BITS		24
+#define MLX5_QPN_MASK		((1 << MLX5_QPN_BITS) - 1)
+
 enum mlx5_qp_optpar {
 	MLX5_QP_OPTPAR_ALT_ADDR_PATH		= 1 << 0,
 	MLX5_QP_OPTPAR_RRE			= 1 << 1,
@@ -363,9 +366,46 @@ struct mlx5_stride_block_ctrl_seg {
 	__be16		num_entries;
 };
 
+enum mlx5_pagefault_flags {
+	MLX5_PFAULT_REQUESTOR = 1 << 0,
+	MLX5_PFAULT_WRITE     = 1 << 1,
+	MLX5_PFAULT_RDMA      = 1 << 2,
+};
+
+/* Contains the details of a pagefault. */
+struct mlx5_pagefault {
+	u32			bytes_committed;
+	u8			event_subtype;
+	enum mlx5_pagefault_flags flags;
+	union {
+		/* Initiator or send message responder pagefault details. */
+		struct {
+			/* Received packet size, only valid for responders. */
+			u32	packet_size;
+			/*
+			 * WQE index. Refers to either the send queue or
+			 * receive queue, according to event_subtype.
+			 */
+			u16	wqe_index;
+		} wqe;
+		/* RDMA responder pagefault details */
+		struct {
+			u32	r_key;
+			/*
+			 * Received packet size, minimal size page fault
+			 * resolution required for forward progress.
+			 */
+			u32	packet_size;
+			u32	rdma_op_len;
+			u64	rdma_va;
+		} rdma;
+	};
+};
+
 struct mlx5_core_qp {
 	struct mlx5_core_rsc_common	common; /* must be first */
 	void (*event)		(struct mlx5_core_qp *, int);
+	void (*pfault_handler)(struct mlx5_core_qp *, struct mlx5_pagefault *);
 	int			qpn;
 	struct mlx5_rsc_debug	*dbg;
 	int			pid;
@@ -533,6 +573,17 @@ static inline struct mlx5_core_mr *__mlx5_mr_lookup(struct mlx5_core_dev *dev, u
 	return radix_tree_lookup(&dev->priv.mr_table.tree, key);
 }
 
+struct mlx5_page_fault_resume_mbox_in {
+	struct mlx5_inbox_hdr	hdr;
+	__be32			flags_qpn;
+	u8			reserved[4];
+};
+
+struct mlx5_page_fault_resume_mbox_out {
+	struct mlx5_outbox_hdr	hdr;
+	u8			rsvd[8];
+};
+
 int mlx5_core_create_qp(struct mlx5_core_dev *dev,
 			struct mlx5_core_qp *qp,
 			struct mlx5_create_qp_mbox_in *in,
@@ -552,6 +603,10 @@ void mlx5_init_qp_table(struct mlx5_core_dev *dev);
 void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
 int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_core_page_fault_resume(struct mlx5_core_dev *dev, u32 qpn,
+				u8 context, int error);
+#endif
 
 static inline const char *mlx5_qp_type_str(int type)
 {
-- 
cgit v1.2.3


From cc149f751b75211df8c41fcd60bd0006e6143ed6 Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 11 Dec 2014 17:04:21 +0200
Subject: IB/mlx5: Changes in memory region creation to support on-demand
 paging

This patch wraps together several changes needed for on-demand paging support
in the mlx5_ib_populate_pas function, and when registering memory regions.

* Instead of accepting a UMR bit telling the function to enable all
  access flags, the function now accepts the access flags themselves.
* For on-demand paging memory regions, fill the memory tables from the
  correct list, and enable/disable the access flags per-page according
  to whether the page is present.
* A new bit is set to enable writing of access flags when using the
  firmware create_mkey command.
* Disable contig pages when on-demand paging is enabled.

In addition the patch changes the UMR code to use PTR_ALIGN instead of
our own macro.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/mem.c     | 58 ++++++++++++++++++++++++++++++++++--
 drivers/infiniband/hw/mlx5/mlx5_ib.h | 12 +++++++-
 drivers/infiniband/hw/mlx5/mr.c      | 33 +++++++++++---------
 include/linux/mlx5/device.h          |  3 ++
 4 files changed, 88 insertions(+), 18 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index dae07eae9507..5f7b30147180 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -32,6 +32,7 @@
 
 #include <linux/module.h>
 #include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
 #include "mlx5_ib.h"
 
 /* @umem: umem object to scan
@@ -57,6 +58,17 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 	int entry;
 	unsigned long page_shift = ilog2(umem->page_size);
 
+	/* With ODP we must always match OS page size. */
+	if (umem->odp_data) {
+		*count = ib_umem_page_count(umem);
+		*shift = PAGE_SHIFT;
+		*ncont = *count;
+		if (order)
+			*order = ilog2(roundup_pow_of_two(*count));
+
+		return;
+	}
+
 	addr = addr >> page_shift;
 	tmp = (unsigned long)addr;
 	m = find_first_bit(&tmp, sizeof(tmp));
@@ -108,8 +120,32 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 	*count = i;
 }
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
+{
+	u64 mtt_entry = umem_dma & ODP_DMA_ADDR_MASK;
+
+	if (umem_dma & ODP_READ_ALLOWED_BIT)
+		mtt_entry |= MLX5_IB_MTT_READ;
+	if (umem_dma & ODP_WRITE_ALLOWED_BIT)
+		mtt_entry |= MLX5_IB_MTT_WRITE;
+
+	return mtt_entry;
+}
+#endif
+
+/*
+ * Populate the given array with bus addresses from the umem.
+ *
+ * dev - mlx5_ib device
+ * umem - umem to use to fill the pages
+ * page_shift - determines the page size used in the resulting array
+ * pas - bus addresses array to fill
+ * access_flags - access flags to set on all present pages.
+		  use enum mlx5_ib_mtt_access_flags for this.
+ */
 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			  int page_shift, __be64 *pas, int umr)
+			  int page_shift, __be64 *pas, int access_flags)
 {
 	unsigned long umem_page_shift = ilog2(umem->page_size);
 	int shift = page_shift - umem_page_shift;
@@ -120,6 +156,23 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 	int len;
 	struct scatterlist *sg;
 	int entry;
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	const bool odp = umem->odp_data != NULL;
+
+	if (odp) {
+		int num_pages = ib_umem_num_pages(umem);
+
+		WARN_ON(shift != 0);
+		WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
+
+		for (i = 0; i < num_pages; ++i) {
+			dma_addr_t pa = umem->odp_data->dma_list[i];
+
+			pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
+		}
+		return;
+	}
+#endif
 
 	i = 0;
 	for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) {
@@ -128,8 +181,7 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 		for (k = 0; k < len; k++) {
 			if (!(i & mask)) {
 				cur = base + (k << umem_page_shift);
-				if (umr)
-					cur |= 3;
+				cur |= access_flags;
 
 				pas[i >> shift] = cpu_to_be64(cur);
 				mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n",
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index cc50fce8cca7..83c1690e9dd0 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -268,6 +268,13 @@ struct mlx5_ib_xrcd {
 	u32			xrcdn;
 };
 
+enum mlx5_ib_mtt_access_flags {
+	MLX5_IB_MTT_READ  = (1 << 0),
+	MLX5_IB_MTT_WRITE = (1 << 1),
+};
+
+#define MLX5_IB_MTT_PRESENT (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE)
+
 struct mlx5_ib_mr {
 	struct ib_mr		ibmr;
 	struct mlx5_core_mr	mmr;
@@ -552,7 +559,7 @@ void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 			int *ncont, int *order);
 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			  int page_shift, __be64 *pas, int umr);
+			  int page_shift, __be64 *pas, int access_flags);
 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
 int mlx5_ib_get_cqe_size(struct mlx5_ib_dev *dev, struct ib_cq *ibcq);
 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev);
@@ -588,4 +595,7 @@ static inline u8 convert_access(int acc)
 	       MLX5_PERM_LOCAL_READ;
 }
 
+#define MLX5_MAX_UMR_SHIFT 16
+#define MLX5_MAX_UMR_PAGES (1 << MLX5_MAX_UMR_SHIFT)
+
 #endif /* MLX5_IB_H */
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 2de4f4448f8a..49fc3ca735a4 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -48,13 +48,6 @@ enum {
 	MLX5_UMR_ALIGN	= 2048
 };
 
-static __be64 *mr_align(__be64 *ptr, int align)
-{
-	unsigned long mask = align - 1;
-
-	return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
-}
-
 static int order2idx(struct mlx5_ib_dev *dev, int order)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
@@ -669,7 +662,7 @@ static int get_octo_len(u64 addr, u64 len, int page_size)
 
 static int use_umr(int order)
 {
-	return order <= 17;
+	return order <= MLX5_MAX_UMR_SHIFT;
 }
 
 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
@@ -747,8 +740,9 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	struct ib_send_wr wr, *bad;
 	struct mlx5_ib_mr *mr;
 	struct ib_sge sg;
-	int size = sizeof(u64) * npages;
+	int size;
 	__be64 *mr_pas;
+	__be64 *pas;
 	dma_addr_t dma;
 	int err = 0;
 	int i;
@@ -768,17 +762,22 @@ static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
 	if (!mr)
 		return ERR_PTR(-EAGAIN);
 
+	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes.
+	 * To avoid copying garbage after the pas array, we allocate
+	 * a little more. */
+	size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT);
 	mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
 	if (!mr_pas) {
 		err = -ENOMEM;
 		goto free_mr;
 	}
 
-	mlx5_ib_populate_pas(dev, umem, page_shift,
-			     mr_align(mr_pas, MLX5_UMR_ALIGN), 1);
+	pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN);
+	mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT);
+	/* Clear padding after the actual pages. */
+	memset(pas + npages, 0, size - npages * sizeof(u64));
 
-	dma = dma_map_single(ddev, mr_align(mr_pas, MLX5_UMR_ALIGN), size,
-			     DMA_TO_DEVICE);
+	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
 	if (dma_mapping_error(ddev, dma)) {
 		err = -ENOMEM;
 		goto free_pas;
@@ -833,6 +832,8 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 	struct mlx5_ib_mr *mr;
 	int inlen;
 	int err;
+	bool pg_cap = !!(dev->mdev->caps.gen.flags &
+			 MLX5_DEV_CAP_FLAG_ON_DMND_PG);
 
 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
 	if (!mr)
@@ -844,8 +845,12 @@ static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 		err = -ENOMEM;
 		goto err_1;
 	}
-	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
+	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas,
+			     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
 
+	/* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags
+	 * in the page list submitted with the command. */
+	in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0;
 	in->seg.flags = convert_access(access_flags) |
 		MLX5_ACCESS_MODE_MTT;
 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 70c28239e339..64512a7354cb 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -198,6 +198,9 @@ enum {
 	MLX5_UMR_INLINE			= (1 << 7),
 };
 
+#define MLX5_UMR_MTT_ALIGNMENT 0x40
+#define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
+
 enum mlx5_event {
 	MLX5_EVENT_TYPE_COMP		   = 0x0,
 
-- 
cgit v1.2.3


From 832a6b06ab5e13c228fc27e333ad360aa03ace6f Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 11 Dec 2014 17:04:22 +0200
Subject: IB/mlx5: Add mlx5_ib_update_mtt to update page tables after creation

The new function allows updating the page tables of a memory region
after it was created. This can be used to handle page faults and page
invalidations.

Since mlx5_ib_update_mtt will need to work from within page invalidation,
so it must not block on memory allocation. It employs an atomic memory
allocation mechanism that is used as a fallback when kmalloc(GFP_ATOMIC) fails.

In order to reuse code from mlx5_ib_populate_pas, the patch splits
this function and add the needed parameters.

Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/mem.c     |  19 +++--
 drivers/infiniband/hw/mlx5/mlx5_ib.h |   5 ++
 drivers/infiniband/hw/mlx5/mr.c      | 132 ++++++++++++++++++++++++++++++++++-
 include/linux/mlx5/device.h          |   1 +
 4 files changed, 149 insertions(+), 8 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c
index 5f7b30147180..b56e4c5593ee 100644
--- a/drivers/infiniband/hw/mlx5/mem.c
+++ b/drivers/infiniband/hw/mlx5/mem.c
@@ -140,12 +140,16 @@ static u64 umem_dma_to_mtt(dma_addr_t umem_dma)
  * dev - mlx5_ib device
  * umem - umem to use to fill the pages
  * page_shift - determines the page size used in the resulting array
+ * offset - offset into the umem to start from,
+ *          only implemented for ODP umems
+ * num_pages - total number of pages to fill
  * pas - bus addresses array to fill
  * access_flags - access flags to set on all present pages.
 		  use enum mlx5_ib_mtt_access_flags for this.
  */
-void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
-			  int page_shift, __be64 *pas, int access_flags)
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			    int page_shift, size_t offset, size_t num_pages,
+			    __be64 *pas, int access_flags)
 {
 	unsigned long umem_page_shift = ilog2(umem->page_size);
 	int shift = page_shift - umem_page_shift;
@@ -160,13 +164,11 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 	const bool odp = umem->odp_data != NULL;
 
 	if (odp) {
-		int num_pages = ib_umem_num_pages(umem);
-
 		WARN_ON(shift != 0);
 		WARN_ON(access_flags != (MLX5_IB_MTT_READ | MLX5_IB_MTT_WRITE));
 
 		for (i = 0; i < num_pages; ++i) {
-			dma_addr_t pa = umem->odp_data->dma_list[i];
+			dma_addr_t pa = umem->odp_data->dma_list[offset + i];
 
 			pas[i] = cpu_to_be64(umem_dma_to_mtt(pa));
 		}
@@ -194,6 +196,13 @@ void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 	}
 }
 
+void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			  int page_shift, __be64 *pas, int access_flags)
+{
+	return __mlx5_ib_populate_pas(dev, umem, page_shift, 0,
+				      ib_umem_num_pages(umem), pas,
+				      access_flags);
+}
 int mlx5_ib_get_buf_offset(u64 addr, int page_shift, u32 *offset)
 {
 	u64 page_size;
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 83c1690e9dd0..6856e27bfb6a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -527,6 +527,8 @@ struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc);
 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 				  u64 virt_addr, int access_flags,
 				  struct ib_udata *udata);
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index,
+		       int npages, int zap);
 int mlx5_ib_dereg_mr(struct ib_mr *ibmr);
 int mlx5_ib_destroy_mr(struct ib_mr *ibmr);
 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd,
@@ -558,6 +560,9 @@ int mlx5_ib_init_fmr(struct mlx5_ib_dev *dev);
 void mlx5_ib_cleanup_fmr(struct mlx5_ib_dev *dev);
 void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, int *count, int *shift,
 			int *ncont, int *order);
+void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
+			    int page_shift, size_t offset, size_t num_pages,
+			    __be64 *pas, int access_flags);
 void mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem,
 			  int page_shift, __be64 *pas, int access_flags);
 void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num);
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 49fc3ca735a4..38b06267798e 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -44,9 +44,13 @@ enum {
 	MAX_PENDING_REG_MR = 8,
 };
 
-enum {
-	MLX5_UMR_ALIGN	= 2048
-};
+#define MLX5_UMR_ALIGN 2048
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+static __be64 mlx5_ib_update_mtt_emergency_buffer[
+		MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)]
+	__aligned(MLX5_UMR_ALIGN);
+static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
+#endif
 
 static int order2idx(struct mlx5_ib_dev *dev, int order)
 {
@@ -822,6 +826,128 @@ free_mr:
 	return mr;
 }
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages,
+		       int zap)
+{
+	struct mlx5_ib_dev *dev = mr->dev;
+	struct device *ddev = dev->ib_dev.dma_device;
+	struct umr_common *umrc = &dev->umrc;
+	struct mlx5_ib_umr_context umr_context;
+	struct ib_umem *umem = mr->umem;
+	int size;
+	__be64 *pas;
+	dma_addr_t dma;
+	struct ib_send_wr wr, *bad;
+	struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg;
+	struct ib_sge sg;
+	int err = 0;
+	const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64);
+	const int page_index_mask = page_index_alignment - 1;
+	size_t pages_mapped = 0;
+	size_t pages_to_map = 0;
+	size_t pages_iter = 0;
+	int use_emergency_buf = 0;
+
+	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
+	 * so we need to align the offset and length accordingly */
+	if (start_page_index & page_index_mask) {
+		npages += start_page_index & page_index_mask;
+		start_page_index &= ~page_index_mask;
+	}
+
+	pages_to_map = ALIGN(npages, page_index_alignment);
+
+	if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES)
+		return -EINVAL;
+
+	size = sizeof(u64) * pages_to_map;
+	size = min_t(int, PAGE_SIZE, size);
+	/* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim
+	 * code, when we are called from an invalidation. The pas buffer must
+	 * be 2k-aligned for Connect-IB. */
+	pas = (__be64 *)get_zeroed_page(GFP_ATOMIC);
+	if (!pas) {
+		mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n");
+		pas = mlx5_ib_update_mtt_emergency_buffer;
+		size = MLX5_UMR_MTT_MIN_CHUNK_SIZE;
+		use_emergency_buf = 1;
+		mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+		memset(pas, 0, size);
+	}
+	pages_iter = size / sizeof(u64);
+	dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE);
+	if (dma_mapping_error(ddev, dma)) {
+		mlx5_ib_err(dev, "unable to map DMA during MTT update.\n");
+		err = -ENOMEM;
+		goto free_pas;
+	}
+
+	for (pages_mapped = 0;
+	     pages_mapped < pages_to_map && !err;
+	     pages_mapped += pages_iter, start_page_index += pages_iter) {
+		dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
+
+		npages = min_t(size_t,
+			       pages_iter,
+			       ib_umem_num_pages(umem) - start_page_index);
+
+		if (!zap) {
+			__mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT,
+					       start_page_index, npages, pas,
+					       MLX5_IB_MTT_PRESENT);
+			/* Clear padding after the pages brought from the
+			 * umem. */
+			memset(pas + npages, 0, size - npages * sizeof(u64));
+		}
+
+		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
+
+		memset(&wr, 0, sizeof(wr));
+		wr.wr_id = (u64)(unsigned long)&umr_context;
+
+		sg.addr = dma;
+		sg.length = ALIGN(npages * sizeof(u64),
+				MLX5_UMR_MTT_ALIGNMENT);
+		sg.lkey = dev->umrc.mr->lkey;
+
+		wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE |
+				MLX5_IB_SEND_UMR_UPDATE_MTT;
+		wr.sg_list = &sg;
+		wr.num_sge = 1;
+		wr.opcode = MLX5_IB_WR_UMR;
+		umrwr->npages = sg.length / sizeof(u64);
+		umrwr->page_shift = PAGE_SHIFT;
+		umrwr->mkey = mr->mmr.key;
+		umrwr->target.offset = start_page_index;
+
+		mlx5_ib_init_umr_context(&umr_context);
+		down(&umrc->sem);
+		err = ib_post_send(umrc->qp, &wr, &bad);
+		if (err) {
+			mlx5_ib_err(dev, "UMR post send failed, err %d\n", err);
+		} else {
+			wait_for_completion(&umr_context.done);
+			if (umr_context.status != IB_WC_SUCCESS) {
+				mlx5_ib_err(dev, "UMR completion failed, code %d\n",
+					    umr_context.status);
+				err = -EFAULT;
+			}
+		}
+		up(&umrc->sem);
+	}
+	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
+
+free_pas:
+	if (!use_emergency_buf)
+		free_page((unsigned long)pas);
+	else
+		mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex);
+
+	return err;
+}
+#endif
+
 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
 				     u64 length, struct ib_umem *umem,
 				     int npages, int page_shift,
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 64512a7354cb..4e5bd813bb9a 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -200,6 +200,7 @@ enum {
 
 #define MLX5_UMR_MTT_ALIGNMENT 0x40
 #define MLX5_UMR_MTT_MASK      (MLX5_UMR_MTT_ALIGNMENT - 1)
+#define MLX5_UMR_MTT_MIN_CHUNK_SIZE MLX5_UMR_MTT_ALIGNMENT
 
 enum mlx5_event {
 	MLX5_EVENT_TYPE_COMP		   = 0x0,
-- 
cgit v1.2.3


From 6aec21f6a8322fa8d43df3ea7f051dfd8967f1b9 Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 11 Dec 2014 17:04:23 +0200
Subject: IB/mlx5: Page faults handling infrastructure

* Refactor MR registration and cleanup, and fix reg_pages accounting.
* Create a work queue to handle page fault events in a kthread context.
* Register a fault handler to get events from the core for each QP.

The registered fault handler is empty in this patch, and only a later
patch implements it.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/main.c    |  31 +++++++-
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  67 +++++++++++++++-
 drivers/infiniband/hw/mlx5/mr.c      |  45 +++++++----
 drivers/infiniband/hw/mlx5/odp.c     | 145 +++++++++++++++++++++++++++++++++++
 drivers/infiniband/hw/mlx5/qp.c      |  26 ++++++-
 include/linux/mlx5/driver.h          |   2 +-
 6 files changed, 294 insertions(+), 22 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index e6d775f2446d..a801baa79c8e 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -864,7 +864,7 @@ static ssize_t show_reg_pages(struct device *device,
 	struct mlx5_ib_dev *dev =
 		container_of(device, struct mlx5_ib_dev, ib_dev.dev);
 
-	return sprintf(buf, "%d\n", dev->mdev->priv.reg_pages);
+	return sprintf(buf, "%d\n", atomic_read(&dev->mdev->priv.reg_pages));
 }
 
 static ssize_t show_hca(struct device *device, struct device_attribute *attr,
@@ -1389,16 +1389,19 @@ static void *mlx5_ib_add(struct mlx5_core_dev *mdev)
 		goto err_eqs;
 
 	mutex_init(&dev->cap_mask_mutex);
-	spin_lock_init(&dev->mr_lock);
 
 	err = create_dev_resources(&dev->devr);
 	if (err)
 		goto err_eqs;
 
-	err = ib_register_device(&dev->ib_dev, NULL);
+	err = mlx5_ib_odp_init_one(dev);
 	if (err)
 		goto err_rsrc;
 
+	err = ib_register_device(&dev->ib_dev, NULL);
+	if (err)
+		goto err_odp;
+
 	err = create_umr_res(dev);
 	if (err)
 		goto err_dev;
@@ -1420,6 +1423,9 @@ err_umrc:
 err_dev:
 	ib_unregister_device(&dev->ib_dev);
 
+err_odp:
+	mlx5_ib_odp_remove_one(dev);
+
 err_rsrc:
 	destroy_dev_resources(&dev->devr);
 
@@ -1435,8 +1441,10 @@ err_dealloc:
 static void mlx5_ib_remove(struct mlx5_core_dev *mdev, void *context)
 {
 	struct mlx5_ib_dev *dev = context;
+
 	ib_unregister_device(&dev->ib_dev);
 	destroy_umrc_res(dev);
+	mlx5_ib_odp_remove_one(dev);
 	destroy_dev_resources(&dev->devr);
 	free_comp_eqs(dev);
 	ib_dealloc_device(&dev->ib_dev);
@@ -1450,15 +1458,30 @@ static struct mlx5_interface mlx5_ib_interface = {
 
 static int __init mlx5_ib_init(void)
 {
+	int err;
+
 	if (deprecated_prof_sel != 2)
 		pr_warn("prof_sel is deprecated for mlx5_ib, set it for mlx5_core\n");
 
-	return mlx5_register_interface(&mlx5_ib_interface);
+	err = mlx5_ib_odp_init();
+	if (err)
+		return err;
+
+	err = mlx5_register_interface(&mlx5_ib_interface);
+	if (err)
+		goto clean_odp;
+
+	return err;
+
+clean_odp:
+	mlx5_ib_odp_cleanup();
+	return err;
 }
 
 static void __exit mlx5_ib_cleanup(void)
 {
 	mlx5_unregister_interface(&mlx5_ib_interface);
+	mlx5_ib_odp_cleanup();
 }
 
 module_init(mlx5_ib_init);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index 6856e27bfb6a..c6ceec3e3d6a 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -149,6 +149,29 @@ enum {
 	MLX5_QP_EMPTY
 };
 
+/*
+ * Connect-IB can trigger up to four concurrent pagefaults
+ * per-QP.
+ */
+enum mlx5_ib_pagefault_context {
+	MLX5_IB_PAGEFAULT_RESPONDER_READ,
+	MLX5_IB_PAGEFAULT_REQUESTOR_READ,
+	MLX5_IB_PAGEFAULT_RESPONDER_WRITE,
+	MLX5_IB_PAGEFAULT_REQUESTOR_WRITE,
+	MLX5_IB_PAGEFAULT_CONTEXTS
+};
+
+static inline enum mlx5_ib_pagefault_context
+	mlx5_ib_get_pagefault_context(struct mlx5_pagefault *pagefault)
+{
+	return pagefault->flags & (MLX5_PFAULT_REQUESTOR | MLX5_PFAULT_WRITE);
+}
+
+struct mlx5_ib_pfault {
+	struct work_struct	work;
+	struct mlx5_pagefault	mpfault;
+};
+
 struct mlx5_ib_qp {
 	struct ib_qp		ibqp;
 	struct mlx5_core_qp	mqp;
@@ -194,6 +217,21 @@ struct mlx5_ib_qp {
 
 	/* Store signature errors */
 	bool			signature_en;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/*
+	 * A flag that is true for QP's that are in a state that doesn't
+	 * allow page faults, and shouldn't schedule any more faults.
+	 */
+	int                     disable_page_faults;
+	/*
+	 * The disable_page_faults_lock protects a QP's disable_page_faults
+	 * field, allowing for a thread to atomically check whether the QP
+	 * allows page faults, and if so schedule a page fault.
+	 */
+	spinlock_t              disable_page_faults_lock;
+	struct mlx5_ib_pfault	pagefaults[MLX5_IB_PAGEFAULT_CONTEXTS];
+#endif
 };
 
 struct mlx5_ib_cq_buf {
@@ -392,13 +430,17 @@ struct mlx5_ib_dev {
 	struct umr_common		umrc;
 	/* sync used page count stats
 	 */
-	spinlock_t			mr_lock;
 	struct mlx5_ib_resources	devr;
 	struct mlx5_mr_cache		cache;
 	struct timer_list		delay_timer;
 	int				fill_delay;
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
 	struct ib_odp_caps	odp_caps;
+	/*
+	 * Sleepable RCU that prevents destruction of MRs while they are still
+	 * being used by a page fault handler.
+	 */
+	struct srcu_struct      mr_srcu;
 #endif
 };
 
@@ -575,12 +617,33 @@ int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
 			    struct ib_mr_status *mr_status);
 
 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+extern struct workqueue_struct *mlx5_ib_page_fault_wq;
+
 int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev);
-#else
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+			       struct mlx5_ib_pfault *pfault);
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp);
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev);
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev);
+int __init mlx5_ib_odp_init(void);
+void mlx5_ib_odp_cleanup(void);
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp);
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp);
+
+#else /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 static inline int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
 {
 	return 0;
 }
+
+static inline void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)		{}
+static inline int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev) { return 0; }
+static inline void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)	{}
+static inline int mlx5_ib_odp_init(void) { return 0; }
+static inline void mlx5_ib_odp_cleanup(void)				{}
+static inline void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp) {}
+static inline void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)  {}
+
 #endif /* CONFIG_INFINIBAND_ON_DEMAND_PAGING */
 
 static inline void init_query_mad(struct ib_smp *mad)
diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c
index 38b06267798e..922ac85b7198 100644
--- a/drivers/infiniband/hw/mlx5/mr.c
+++ b/drivers/infiniband/hw/mlx5/mr.c
@@ -52,6 +52,8 @@ static __be64 mlx5_ib_update_mtt_emergency_buffer[
 static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex);
 #endif
 
+static int clean_mr(struct mlx5_ib_mr *mr);
+
 static int order2idx(struct mlx5_ib_dev *dev, int order)
 {
 	struct mlx5_mr_cache *cache = &dev->cache;
@@ -1049,6 +1051,10 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 			mlx5_ib_dbg(dev, "cache empty for order %d", order);
 			mr = NULL;
 		}
+	} else if (access_flags & IB_ACCESS_ON_DEMAND) {
+		err = -EINVAL;
+		pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB");
+		goto error;
 	}
 
 	if (!mr)
@@ -1064,9 +1070,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
 
 	mr->umem = umem;
 	mr->npages = npages;
-	spin_lock(&dev->mr_lock);
-	dev->mdev->priv.reg_pages += npages;
-	spin_unlock(&dev->mr_lock);
+	atomic_add(npages, &dev->mdev->priv.reg_pages);
 	mr->ibmr.lkey = mr->mmr.key;
 	mr->ibmr.rkey = mr->mmr.key;
 
@@ -1110,12 +1114,9 @@ error:
 	return err;
 }
 
-int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+static int clean_mr(struct mlx5_ib_mr *mr)
 {
-	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
-	struct mlx5_ib_mr *mr = to_mmr(ibmr);
-	struct ib_umem *umem = mr->umem;
-	int npages = mr->npages;
+	struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device);
 	int umred = mr->umred;
 	int err;
 
@@ -1135,16 +1136,32 @@ int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
 		free_cached_mr(dev, mr);
 	}
 
+	if (!umred)
+		kfree(mr);
+
+	return 0;
+}
+
+int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
+{
+	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
+	struct mlx5_ib_mr *mr = to_mmr(ibmr);
+	int npages = mr->npages;
+	struct ib_umem *umem = mr->umem;
+
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	if (umem)
+		/* Wait for all running page-fault handlers to finish. */
+		synchronize_srcu(&dev->mr_srcu);
+#endif
+
+	clean_mr(mr);
+
 	if (umem) {
 		ib_umem_release(umem);
-		spin_lock(&dev->mr_lock);
-		dev->mdev->priv.reg_pages -= npages;
-		spin_unlock(&dev->mr_lock);
+		atomic_sub(npages, &dev->mdev->priv.reg_pages);
 	}
 
-	if (!umred)
-		kfree(mr);
-
 	return 0;
 }
 
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 66c39ee16aff..63bbdba396f1 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -32,6 +32,8 @@
 
 #include "mlx5_ib.h"
 
+struct workqueue_struct *mlx5_ib_page_fault_wq;
+
 #define COPY_ODP_BIT_MLX_TO_IB(reg, ib_caps, field_name, bit_name) do {	\
 	if (be32_to_cpu(reg.field_name) & MLX5_ODP_SUPPORT_##bit_name)	\
 		ib_caps->field_name |= IB_ODP_SUPPORT_##bit_name;	\
@@ -58,3 +60,146 @@ int mlx5_ib_internal_query_odp_caps(struct mlx5_ib_dev *dev)
 out:
 	return err;
 }
+
+static struct mlx5_ib_mr *mlx5_ib_odp_find_mr_lkey(struct mlx5_ib_dev *dev,
+						   u32 key)
+{
+	u32 base_key = mlx5_base_mkey(key);
+	struct mlx5_core_mr *mmr = __mlx5_mr_lookup(dev->mdev, base_key);
+
+	if (!mmr || mmr->key != key)
+		return NULL;
+
+	return container_of(mmr, struct mlx5_ib_mr, mmr);
+}
+
+static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
+				      struct mlx5_ib_pfault *pfault,
+				      int error) {
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	int ret = mlx5_core_page_fault_resume(dev->mdev, qp->mqp.qpn,
+					      pfault->mpfault.flags,
+					      error);
+	if (ret)
+		pr_err("Failed to resolve the page fault on QP 0x%x\n",
+		       qp->mqp.qpn);
+}
+
+void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
+			       struct mlx5_ib_pfault *pfault)
+{
+	u8 event_subtype = pfault->mpfault.event_subtype;
+
+	switch (event_subtype) {
+	default:
+		pr_warn("Invalid page fault event subtype: 0x%x\n",
+			event_subtype);
+		mlx5_ib_page_fault_resume(qp, pfault, 1);
+		break;
+	}
+}
+
+static void mlx5_ib_qp_pfault_action(struct work_struct *work)
+{
+	struct mlx5_ib_pfault *pfault = container_of(work,
+						     struct mlx5_ib_pfault,
+						     work);
+	enum mlx5_ib_pagefault_context context =
+		mlx5_ib_get_pagefault_context(&pfault->mpfault);
+	struct mlx5_ib_qp *qp = container_of(pfault, struct mlx5_ib_qp,
+					     pagefaults[context]);
+	mlx5_ib_mr_pfault_handler(qp, pfault);
+}
+
+void mlx5_ib_qp_disable_pagefaults(struct mlx5_ib_qp *qp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+	qp->disable_page_faults = 1;
+	spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+
+	/*
+	 * Note that at this point, we are guarenteed that no more
+	 * work queue elements will be posted to the work queue with
+	 * the QP we are closing.
+	 */
+	flush_workqueue(mlx5_ib_page_fault_wq);
+}
+
+void mlx5_ib_qp_enable_pagefaults(struct mlx5_ib_qp *qp)
+{
+	unsigned long flags;
+
+	spin_lock_irqsave(&qp->disable_page_faults_lock, flags);
+	qp->disable_page_faults = 0;
+	spin_unlock_irqrestore(&qp->disable_page_faults_lock, flags);
+}
+
+static void mlx5_ib_pfault_handler(struct mlx5_core_qp *qp,
+				   struct mlx5_pagefault *pfault)
+{
+	/*
+	 * Note that we will only get one fault event per QP per context
+	 * (responder/initiator, read/write), until we resolve the page fault
+	 * with the mlx5_ib_page_fault_resume command. Since this function is
+	 * called from within the work element, there is no risk of missing
+	 * events.
+	 */
+	struct mlx5_ib_qp *mibqp = to_mibqp(qp);
+	enum mlx5_ib_pagefault_context context =
+		mlx5_ib_get_pagefault_context(pfault);
+	struct mlx5_ib_pfault *qp_pfault = &mibqp->pagefaults[context];
+
+	qp_pfault->mpfault = *pfault;
+
+	/* No need to stop interrupts here since we are in an interrupt */
+	spin_lock(&mibqp->disable_page_faults_lock);
+	if (!mibqp->disable_page_faults)
+		queue_work(mlx5_ib_page_fault_wq, &qp_pfault->work);
+	spin_unlock(&mibqp->disable_page_faults_lock);
+}
+
+void mlx5_ib_odp_create_qp(struct mlx5_ib_qp *qp)
+{
+	int i;
+
+	qp->disable_page_faults = 1;
+	spin_lock_init(&qp->disable_page_faults_lock);
+
+	qp->mqp.pfault_handler	= mlx5_ib_pfault_handler;
+
+	for (i = 0; i < MLX5_IB_PAGEFAULT_CONTEXTS; ++i)
+		INIT_WORK(&qp->pagefaults[i].work, mlx5_ib_qp_pfault_action);
+}
+
+int mlx5_ib_odp_init_one(struct mlx5_ib_dev *ibdev)
+{
+	int ret;
+
+	ret = init_srcu_struct(&ibdev->mr_srcu);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+void mlx5_ib_odp_remove_one(struct mlx5_ib_dev *ibdev)
+{
+	cleanup_srcu_struct(&ibdev->mr_srcu);
+}
+
+int __init mlx5_ib_odp_init(void)
+{
+	mlx5_ib_page_fault_wq =
+		create_singlethread_workqueue("mlx5_ib_page_faults");
+	if (!mlx5_ib_page_fault_wq)
+		return -ENOMEM;
+
+	return 0;
+}
+
+void mlx5_ib_odp_cleanup(void)
+{
+	destroy_workqueue(mlx5_ib_page_fault_wq);
+}
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 9783c3342dbf..be0cd358b080 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -876,6 +876,8 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 	int inlen = sizeof(*in);
 	int err;
 
+	mlx5_ib_odp_create_qp(qp);
+
 	gen = &dev->mdev->caps.gen;
 	mutex_init(&qp->mutex);
 	spin_lock_init(&qp->sq.lock);
@@ -1160,11 +1162,13 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp)
 	in = kzalloc(sizeof(*in), GFP_KERNEL);
 	if (!in)
 		return;
-	if (qp->state != IB_QPS_RESET)
+	if (qp->state != IB_QPS_RESET) {
+		mlx5_ib_qp_disable_pagefaults(qp);
 		if (mlx5_core_qp_modify(dev->mdev, to_mlx5_state(qp->state),
 					MLX5_QP_STATE_RST, in, sizeof(*in), &qp->mqp))
 			mlx5_ib_warn(dev, "mlx5_ib: modify QP %06x to RESET failed\n",
 				     qp->mqp.qpn);
+	}
 
 	get_cqs(qp, &send_cq, &recv_cq);
 
@@ -1712,6 +1716,15 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (mlx5_st < 0)
 		goto out;
 
+	/* If moving to a reset or error state, we must disable page faults on
+	 * this QP and flush all current page faults. Otherwise a stale page
+	 * fault may attempt to work on this QP after it is reset and moved
+	 * again to RTS, and may cause the driver and the device to get out of
+	 * sync. */
+	if (cur_state != IB_QPS_RESET && cur_state != IB_QPS_ERR &&
+	    (new_state == IB_QPS_RESET || new_state == IB_QPS_ERR))
+		mlx5_ib_qp_disable_pagefaults(qp);
+
 	optpar = ib_mask_to_mlx5_opt(attr_mask);
 	optpar &= opt_mask[mlx5_cur][mlx5_new][mlx5_st];
 	in->optparam = cpu_to_be32(optpar);
@@ -1721,6 +1734,9 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 	if (err)
 		goto out;
 
+	if (cur_state == IB_QPS_RESET && new_state == IB_QPS_INIT)
+		mlx5_ib_qp_enable_pagefaults(qp);
+
 	qp->state = new_state;
 
 	if (attr_mask & IB_QP_ACCESS_FLAGS)
@@ -3026,6 +3042,14 @@ int mlx5_ib_query_qp(struct ib_qp *ibqp, struct ib_qp_attr *qp_attr, int qp_attr
 	int mlx5_state;
 	int err = 0;
 
+#ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
+	/*
+	 * Wait for any outstanding page faults, in case the user frees memory
+	 * based upon this query's result.
+	 */
+	flush_workqueue(mlx5_ib_page_fault_wq);
+#endif
+
 	mutex_lock(&qp->mutex);
 	outb = kzalloc(sizeof(*outb), GFP_KERNEL);
 	if (!outb) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 7088dcd19214..166d9315fe4b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -474,7 +474,7 @@ struct mlx5_priv {
 	struct workqueue_struct *pg_wq;
 	struct rb_root		page_root;
 	int			fw_pages;
-	int			reg_pages;
+	atomic_t		reg_pages;
 	struct list_head	free_list;
 
 	struct mlx5_core_health health;
-- 
cgit v1.2.3


From 7bdf65d411c1715d695be0d9a555d7f48d0a7220 Mon Sep 17 00:00:00 2001
From: Haggai Eran <haggaie@mellanox.com>
Date: Thu, 11 Dec 2014 17:04:24 +0200
Subject: IB/mlx5: Handle page faults

This patch implement a page fault handler (leaving the pages pinned as
of time being).  The page fault handler handles initiator and responder
page faults for UD/RC transports, for send/receive operations, as well
as RDMA read/write initiator support.

Signed-off-by: Sagi Grimberg <sagig@mellanox.com>
Signed-off-by: Shachar Raindel <raindel@mellanox.com>
Signed-off-by: Haggai Eran <haggaie@mellanox.com>
Signed-off-by: Roland Dreier <roland@purestorage.com>
---
 drivers/infiniband/hw/mlx5/odp.c | 408 +++++++++++++++++++++++++++++++++++++++
 include/linux/mlx5/qp.h          |   7 +
 2 files changed, 415 insertions(+)

(limited to 'include/linux')

diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 63bbdba396f1..bd1dbe5ebc15 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -30,6 +30,9 @@
  * SOFTWARE.
  */
 
+#include <rdma/ib_umem.h>
+#include <rdma/ib_umem_odp.h>
+
 #include "mlx5_ib.h"
 
 struct workqueue_struct *mlx5_ib_page_fault_wq;
@@ -85,12 +88,417 @@ static void mlx5_ib_page_fault_resume(struct mlx5_ib_qp *qp,
 		       qp->mqp.qpn);
 }
 
+/*
+ * Handle a single data segment in a page-fault WQE.
+ *
+ * Returns number of pages retrieved on success. The caller will continue to
+ * the next data segment.
+ * Can return the following error codes:
+ * -EAGAIN to designate a temporary error. The caller will abort handling the
+ *  page fault and resolve it.
+ * -EFAULT when there's an error mapping the requested pages. The caller will
+ *  abort the page fault handling and possibly move the QP to an error state.
+ * On other errors the QP should also be closed with an error.
+ */
+static int pagefault_single_data_segment(struct mlx5_ib_qp *qp,
+					 struct mlx5_ib_pfault *pfault,
+					 u32 key, u64 io_virt, size_t bcnt,
+					 u32 *bytes_mapped)
+{
+	struct mlx5_ib_dev *mib_dev = to_mdev(qp->ibqp.pd->device);
+	int srcu_key;
+	unsigned int current_seq;
+	u64 start_idx;
+	int npages = 0, ret = 0;
+	struct mlx5_ib_mr *mr;
+	u64 access_mask = ODP_READ_ALLOWED_BIT;
+
+	srcu_key = srcu_read_lock(&mib_dev->mr_srcu);
+	mr = mlx5_ib_odp_find_mr_lkey(mib_dev, key);
+	/*
+	 * If we didn't find the MR, it means the MR was closed while we were
+	 * handling the ODP event. In this case we return -EFAULT so that the
+	 * QP will be closed.
+	 */
+	if (!mr || !mr->ibmr.pd) {
+		pr_err("Failed to find relevant mr for lkey=0x%06x, probably the MR was destroyed\n",
+		       key);
+		ret = -EFAULT;
+		goto srcu_unlock;
+	}
+	if (!mr->umem->odp_data) {
+		pr_debug("skipping non ODP MR (lkey=0x%06x) in page fault handler.\n",
+			 key);
+		if (bytes_mapped)
+			*bytes_mapped +=
+				(bcnt - pfault->mpfault.bytes_committed);
+		goto srcu_unlock;
+	}
+	if (mr->ibmr.pd != qp->ibqp.pd) {
+		pr_err("Page-fault with different PDs for QP and MR.\n");
+		ret = -EFAULT;
+		goto srcu_unlock;
+	}
+
+	current_seq = ACCESS_ONCE(mr->umem->odp_data->notifiers_seq);
+
+	/*
+	 * Avoid branches - this code will perform correctly
+	 * in all iterations (in iteration 2 and above,
+	 * bytes_committed == 0).
+	 */
+	io_virt += pfault->mpfault.bytes_committed;
+	bcnt -= pfault->mpfault.bytes_committed;
+
+	start_idx = (io_virt - (mr->mmr.iova & PAGE_MASK)) >> PAGE_SHIFT;
+
+	if (mr->umem->writable)
+		access_mask |= ODP_WRITE_ALLOWED_BIT;
+	npages = ib_umem_odp_map_dma_pages(mr->umem, io_virt, bcnt,
+					   access_mask, current_seq);
+	if (npages < 0) {
+		ret = npages;
+		goto srcu_unlock;
+	}
+
+	if (npages > 0) {
+		mutex_lock(&mr->umem->odp_data->umem_mutex);
+		/*
+		 * No need to check whether the MTTs really belong to
+		 * this MR, since ib_umem_odp_map_dma_pages already
+		 * checks this.
+		 */
+		ret = mlx5_ib_update_mtt(mr, start_idx, npages, 0);
+		mutex_unlock(&mr->umem->odp_data->umem_mutex);
+		if (ret < 0) {
+			pr_err("Failed to update mkey page tables\n");
+			goto srcu_unlock;
+		}
+
+		if (bytes_mapped) {
+			u32 new_mappings = npages * PAGE_SIZE -
+				(io_virt - round_down(io_virt, PAGE_SIZE));
+			*bytes_mapped += min_t(u32, new_mappings, bcnt);
+		}
+	}
+
+srcu_unlock:
+	srcu_read_unlock(&mib_dev->mr_srcu, srcu_key);
+	pfault->mpfault.bytes_committed = 0;
+	return ret ? ret : npages;
+}
+
+/**
+ * Parse a series of data segments for page fault handling.
+ *
+ * @qp the QP on which the fault occurred.
+ * @pfault contains page fault information.
+ * @wqe points at the first data segment in the WQE.
+ * @wqe_end points after the end of the WQE.
+ * @bytes_mapped receives the number of bytes that the function was able to
+ *               map. This allows the caller to decide intelligently whether
+ *               enough memory was mapped to resolve the page fault
+ *               successfully (e.g. enough for the next MTU, or the entire
+ *               WQE).
+ * @total_wqe_bytes receives the total data size of this WQE in bytes (minus
+ *                  the committed bytes).
+ *
+ * Returns the number of pages loaded if positive, zero for an empty WQE, or a
+ * negative error code.
+ */
+static int pagefault_data_segments(struct mlx5_ib_qp *qp,
+				   struct mlx5_ib_pfault *pfault, void *wqe,
+				   void *wqe_end, u32 *bytes_mapped,
+				   u32 *total_wqe_bytes, int receive_queue)
+{
+	int ret = 0, npages = 0;
+	u64 io_virt;
+	u32 key;
+	u32 byte_count;
+	size_t bcnt;
+	int inline_segment;
+
+	/* Skip SRQ next-WQE segment. */
+	if (receive_queue && qp->ibqp.srq)
+		wqe += sizeof(struct mlx5_wqe_srq_next_seg);
+
+	if (bytes_mapped)
+		*bytes_mapped = 0;
+	if (total_wqe_bytes)
+		*total_wqe_bytes = 0;
+
+	while (wqe < wqe_end) {
+		struct mlx5_wqe_data_seg *dseg = wqe;
+
+		io_virt = be64_to_cpu(dseg->addr);
+		key = be32_to_cpu(dseg->lkey);
+		byte_count = be32_to_cpu(dseg->byte_count);
+		inline_segment = !!(byte_count &  MLX5_INLINE_SEG);
+		bcnt	       = byte_count & ~MLX5_INLINE_SEG;
+
+		if (inline_segment) {
+			bcnt = bcnt & MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK;
+			wqe += ALIGN(sizeof(struct mlx5_wqe_inline_seg) + bcnt,
+				     16);
+		} else {
+			wqe += sizeof(*dseg);
+		}
+
+		/* receive WQE end of sg list. */
+		if (receive_queue && bcnt == 0 && key == MLX5_INVALID_LKEY &&
+		    io_virt == 0)
+			break;
+
+		if (!inline_segment && total_wqe_bytes) {
+			*total_wqe_bytes += bcnt - min_t(size_t, bcnt,
+					pfault->mpfault.bytes_committed);
+		}
+
+		/* A zero length data segment designates a length of 2GB. */
+		if (bcnt == 0)
+			bcnt = 1U << 31;
+
+		if (inline_segment || bcnt <= pfault->mpfault.bytes_committed) {
+			pfault->mpfault.bytes_committed -=
+				min_t(size_t, bcnt,
+				      pfault->mpfault.bytes_committed);
+			continue;
+		}
+
+		ret = pagefault_single_data_segment(qp, pfault, key, io_virt,
+						    bcnt, bytes_mapped);
+		if (ret < 0)
+			break;
+		npages += ret;
+	}
+
+	return ret < 0 ? ret : npages;
+}
+
+/*
+ * Parse initiator WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_initiator_pfault_handler(
+	struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+	void **wqe, void **wqe_end, int wqe_length)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	struct mlx5_wqe_ctrl_seg *ctrl = *wqe;
+	u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+	unsigned ds, opcode;
+#if defined(DEBUG)
+	u32 ctrl_wqe_index, ctrl_qpn;
+#endif
+
+	ds = be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_DS_MASK;
+	if (ds * MLX5_WQE_DS_UNITS > wqe_length) {
+		mlx5_ib_err(dev, "Unable to read the complete WQE. ds = 0x%x, ret = 0x%x\n",
+			    ds, wqe_length);
+		return -EFAULT;
+	}
+
+	if (ds == 0) {
+		mlx5_ib_err(dev, "Got WQE with zero DS. wqe_index=%x, qpn=%x\n",
+			    wqe_index, qp->mqp.qpn);
+		return -EFAULT;
+	}
+
+#if defined(DEBUG)
+	ctrl_wqe_index = (be32_to_cpu(ctrl->opmod_idx_opcode) &
+			MLX5_WQE_CTRL_WQE_INDEX_MASK) >>
+			MLX5_WQE_CTRL_WQE_INDEX_SHIFT;
+	if (wqe_index != ctrl_wqe_index) {
+		mlx5_ib_err(dev, "Got WQE with invalid wqe_index. wqe_index=0x%x, qpn=0x%x ctrl->wqe_index=0x%x\n",
+			    wqe_index, qp->mqp.qpn,
+			    ctrl_wqe_index);
+		return -EFAULT;
+	}
+
+	ctrl_qpn = (be32_to_cpu(ctrl->qpn_ds) & MLX5_WQE_CTRL_QPN_MASK) >>
+		MLX5_WQE_CTRL_QPN_SHIFT;
+	if (qp->mqp.qpn != ctrl_qpn) {
+		mlx5_ib_err(dev, "Got WQE with incorrect QP number. wqe_index=0x%x, qpn=0x%x ctrl->qpn=0x%x\n",
+			    wqe_index, qp->mqp.qpn,
+			    ctrl_qpn);
+		return -EFAULT;
+	}
+#endif /* DEBUG */
+
+	*wqe_end = *wqe + ds * MLX5_WQE_DS_UNITS;
+	*wqe += sizeof(*ctrl);
+
+	opcode = be32_to_cpu(ctrl->opmod_idx_opcode) &
+		 MLX5_WQE_CTRL_OPCODE_MASK;
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		switch (opcode) {
+		case MLX5_OPCODE_SEND:
+		case MLX5_OPCODE_SEND_IMM:
+		case MLX5_OPCODE_SEND_INVAL:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_SEND))
+				goto invalid_transport_or_opcode;
+			break;
+		case MLX5_OPCODE_RDMA_WRITE:
+		case MLX5_OPCODE_RDMA_WRITE_IMM:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_WRITE))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_raddr_seg);
+			break;
+		case MLX5_OPCODE_RDMA_READ:
+			if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+			      IB_ODP_SUPPORT_READ))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_raddr_seg);
+			break;
+		default:
+			goto invalid_transport_or_opcode;
+		}
+		break;
+	case IB_QPT_UD:
+		switch (opcode) {
+		case MLX5_OPCODE_SEND:
+		case MLX5_OPCODE_SEND_IMM:
+			if (!(dev->odp_caps.per_transport_caps.ud_odp_caps &
+			      IB_ODP_SUPPORT_SEND))
+				goto invalid_transport_or_opcode;
+			*wqe += sizeof(struct mlx5_wqe_datagram_seg);
+			break;
+		default:
+			goto invalid_transport_or_opcode;
+		}
+		break;
+	default:
+invalid_transport_or_opcode:
+		mlx5_ib_err(dev, "ODP fault on QP of an unsupported opcode or transport. transport: 0x%x opcode: 0x%x.\n",
+			    qp->ibqp.qp_type, opcode);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+/*
+ * Parse responder WQE. Advances the wqe pointer to point at the
+ * scatter-gather list, and set wqe_end to the end of the WQE.
+ */
+static int mlx5_ib_mr_responder_pfault_handler(
+	struct mlx5_ib_qp *qp, struct mlx5_ib_pfault *pfault,
+	void **wqe, void **wqe_end, int wqe_length)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	struct mlx5_ib_wq *wq = &qp->rq;
+	int wqe_size = 1 << wq->wqe_shift;
+
+	if (qp->ibqp.srq) {
+		mlx5_ib_err(dev, "ODP fault on SRQ is not supported\n");
+		return -EFAULT;
+	}
+
+	if (qp->wq_sig) {
+		mlx5_ib_err(dev, "ODP fault with WQE signatures is not supported\n");
+		return -EFAULT;
+	}
+
+	if (wqe_size > wqe_length) {
+		mlx5_ib_err(dev, "Couldn't read all of the receive WQE's content\n");
+		return -EFAULT;
+	}
+
+	switch (qp->ibqp.qp_type) {
+	case IB_QPT_RC:
+		if (!(dev->odp_caps.per_transport_caps.rc_odp_caps &
+		      IB_ODP_SUPPORT_RECV))
+			goto invalid_transport_or_opcode;
+		break;
+	default:
+invalid_transport_or_opcode:
+		mlx5_ib_err(dev, "ODP fault on QP of an unsupported transport. transport: 0x%x\n",
+			    qp->ibqp.qp_type);
+		return -EFAULT;
+	}
+
+	*wqe_end = *wqe + wqe_size;
+
+	return 0;
+}
+
+static void mlx5_ib_mr_wqe_pfault_handler(struct mlx5_ib_qp *qp,
+					  struct mlx5_ib_pfault *pfault)
+{
+	struct mlx5_ib_dev *dev = to_mdev(qp->ibqp.pd->device);
+	int ret;
+	void *wqe, *wqe_end;
+	u32 bytes_mapped, total_wqe_bytes;
+	char *buffer = NULL;
+	int resume_with_error = 0;
+	u16 wqe_index = pfault->mpfault.wqe.wqe_index;
+	int requestor = pfault->mpfault.flags & MLX5_PFAULT_REQUESTOR;
+
+	buffer = (char *)__get_free_page(GFP_KERNEL);
+	if (!buffer) {
+		mlx5_ib_err(dev, "Error allocating memory for IO page fault handling.\n");
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	ret = mlx5_ib_read_user_wqe(qp, requestor, wqe_index, buffer,
+				    PAGE_SIZE);
+	if (ret < 0) {
+		mlx5_ib_err(dev, "Failed reading a WQE following page fault, error=%x, wqe_index=%x, qpn=%x\n",
+			    -ret, wqe_index, qp->mqp.qpn);
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	wqe = buffer;
+	if (requestor)
+		ret = mlx5_ib_mr_initiator_pfault_handler(qp, pfault, &wqe,
+							  &wqe_end, ret);
+	else
+		ret = mlx5_ib_mr_responder_pfault_handler(qp, pfault, &wqe,
+							  &wqe_end, ret);
+	if (ret < 0) {
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	if (wqe >= wqe_end) {
+		mlx5_ib_err(dev, "ODP fault on invalid WQE.\n");
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+	ret = pagefault_data_segments(qp, pfault, wqe, wqe_end, &bytes_mapped,
+				      &total_wqe_bytes, !requestor);
+	if (ret == -EAGAIN) {
+		goto resolve_page_fault;
+	} else if (ret < 0 || total_wqe_bytes > bytes_mapped) {
+		mlx5_ib_err(dev, "Error getting user pages for page fault. Error: 0x%x\n",
+			    -ret);
+		resume_with_error = 1;
+		goto resolve_page_fault;
+	}
+
+resolve_page_fault:
+	mlx5_ib_page_fault_resume(qp, pfault, resume_with_error);
+	mlx5_ib_dbg(dev, "PAGE FAULT completed. QP 0x%x resume_with_error=%d, flags: 0x%x\n",
+		    qp->mqp.qpn, resume_with_error, pfault->mpfault.flags);
+
+	free_page((unsigned long)buffer);
+}
+
 void mlx5_ib_mr_pfault_handler(struct mlx5_ib_qp *qp,
 			       struct mlx5_ib_pfault *pfault)
 {
 	u8 event_subtype = pfault->mpfault.event_subtype;
 
 	switch (event_subtype) {
+	case MLX5_PFAULT_SUBTYPE_WQE:
+		mlx5_ib_mr_wqe_pfault_handler(qp, pfault);
+		break;
 	default:
 		pr_warn("Invalid page fault event subtype: 0x%x\n",
 			event_subtype);
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 6b1d6f60c7e6..61f7a342d1bf 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -193,7 +193,12 @@ struct mlx5_wqe_ctrl_seg {
 };
 
 #define MLX5_WQE_CTRL_DS_MASK 0x3f
+#define MLX5_WQE_CTRL_QPN_MASK 0xffffff00
+#define MLX5_WQE_CTRL_QPN_SHIFT 8
 #define MLX5_WQE_DS_UNITS 16
+#define MLX5_WQE_CTRL_OPCODE_MASK 0xff
+#define MLX5_WQE_CTRL_WQE_INDEX_MASK 0x00ffff00
+#define MLX5_WQE_CTRL_WQE_INDEX_SHIFT 8
 
 struct mlx5_wqe_xrc_seg {
 	__be32			xrc_srqn;
@@ -298,6 +303,8 @@ struct mlx5_wqe_signature_seg {
 	u8	rsvd1[11];
 };
 
+#define MLX5_WQE_INLINE_SEG_BYTE_COUNT_MASK 0x3ff
+
 struct mlx5_wqe_inline_seg {
 	__be32	byte_count;
 };
-- 
cgit v1.2.3


From cffe0a2b5a34c95a4dadc9ec7132690a5b0f6687 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 27 Oct 2014 13:21:42 +0800
Subject: x86, irq: Keep balance of IOAPIC pin reference count

To keep balance of IOAPIC pin reference count, we need to protect
pirq_enable_irq(), acpi_pci_irq_enable() and intel_mid_pci_irq_enable()
from reentrance. There are two cases which will cause reentrance.

The first case is caused by suspend/hibernation. If pcibios_disable_irq
is called during suspending/hibernating, we don't release the assigned
IRQ number, otherwise it may break the suspend/hibernation. So late when
pcibios_enable_irq is called during resume, we shouldn't allocate IRQ
number again.

The second case is that function acpi_pci_irq_enable() may be called
twice for PCI devices present at boot time as below:
1) pci_acpi_init()
	--> acpi_pci_irq_enable() if pci_routeirq is true
2) pci_enable_device()
	--> pcibios_enable_device()
		--> acpi_pci_irq_enable()
We can't kill kernel parameter pci_routeirq yet because it's still
needed for debugging purpose.

So flag irq_managed is introduced to track whether IRQ number is
assigned by OS and to protect pirq_enable_irq(), acpi_pci_irq_enable()
and intel_mid_pci_irq_enable() from reentrance.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Len Brown <lenb@kernel.org>
Link: http://lkml.kernel.org/r/1414387308-27148-13-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/pci/intel_mid_pci.c | 10 +++++++++-
 arch/x86/pci/irq.c           |  7 ++++++-
 drivers/acpi/pci_irq.c       | 11 +++++++++--
 include/linux/pci.h          |  1 +
 4 files changed, 25 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/pci/intel_mid_pci.c b/arch/x86/pci/intel_mid_pci.c
index b9958c364075..44b9271580b5 100644
--- a/arch/x86/pci/intel_mid_pci.c
+++ b/arch/x86/pci/intel_mid_pci.c
@@ -210,6 +210,9 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 {
 	int polarity;
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	if (intel_mid_identify_cpu() == INTEL_MID_CPU_CHIP_TANGIER)
 		polarity = 0; /* active high */
 	else
@@ -224,13 +227,18 @@ static int intel_mid_pci_irq_enable(struct pci_dev *dev)
 	if (mp_map_gsi_to_irq(dev->irq, IOAPIC_MAP_ALLOC) < 0)
 		return -EBUSY;
 
+	dev->irq_managed = 1;
+
 	return 0;
 }
 
 static void intel_mid_pci_irq_disable(struct pci_dev *dev)
 {
-	if (!mp_should_keep_irq(&dev->dev) && dev->irq > 0)
+	if (!mp_should_keep_irq(&dev->dev) && dev->irq_managed &&
+	    dev->irq > 0) {
 		mp_unmap_irq(dev->irq);
+		dev->irq_managed = 0;
+	}
 }
 
 struct pci_ops intel_mid_pci_ops = {
diff --git a/arch/x86/pci/irq.c b/arch/x86/pci/irq.c
index cb50e281d838..99884588a47a 100644
--- a/arch/x86/pci/irq.c
+++ b/arch/x86/pci/irq.c
@@ -1202,6 +1202,9 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			int irq;
 			struct io_apic_irq_attr irq_attr;
 
+			if (dev->irq_managed && dev->irq > 0)
+				return 0;
+
 			irq = IO_APIC_get_PCI_irq_vector(dev->bus->number,
 						PCI_SLOT(dev->devfn),
 						pin - 1, &irq_attr);
@@ -1228,6 +1231,7 @@ static int pirq_enable_irq(struct pci_dev *dev)
 			}
 			dev = temp_dev;
 			if (irq >= 0) {
+				dev->irq_managed = 1;
 				dev->irq = irq;
 				dev_info(&dev->dev, "PCI->APIC IRQ transform: "
 					 "INT %c -> IRQ %d\n", 'A' + pin - 1, irq);
@@ -1269,8 +1273,9 @@ bool mp_should_keep_irq(struct device *dev)
 static void pirq_disable_irq(struct pci_dev *dev)
 {
 	if (io_apic_assign_pci_irqs && !mp_should_keep_irq(&dev->dev) &&
-	    dev->irq) {
+	    dev->irq_managed && dev->irq) {
 		mp_unmap_irq(dev->irq);
 		dev->irq = 0;
+		dev->irq_managed = 0;
 	}
 }
diff --git a/drivers/acpi/pci_irq.c b/drivers/acpi/pci_irq.c
index 6e6b80eb0bba..5f1fdca65e5f 100644
--- a/drivers/acpi/pci_irq.c
+++ b/drivers/acpi/pci_irq.c
@@ -413,6 +413,9 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		return 0;
 	}
 
+	if (dev->irq_managed && dev->irq > 0)
+		return 0;
+
 	entry = acpi_pci_irq_lookup(dev, pin);
 	if (!entry) {
 		/*
@@ -456,6 +459,7 @@ int acpi_pci_irq_enable(struct pci_dev *dev)
 		return rc;
 	}
 	dev->irq = rc;
+	dev->irq_managed = 1;
 
 	if (link)
 		snprintf(link_desc, sizeof(link_desc), " -> Link[%s]", link);
@@ -478,7 +482,7 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	u8 pin;
 
 	pin = dev->pin;
-	if (!pin)
+	if (!pin || !dev->irq_managed || dev->irq <= 0)
 		return;
 
 	/* Keep IOAPIC pin configuration when suspending */
@@ -506,6 +510,9 @@ void acpi_pci_irq_disable(struct pci_dev *dev)
 	 */
 
 	dev_dbg(&dev->dev, "PCI INT %c disabled\n", pin_name(pin));
-	if (gsi >= 0 && dev->irq > 0)
+	if (gsi >= 0) {
 		acpi_unregister_gsi(gsi);
+		dev->irq = 0;
+		dev->irq_managed = 0;
+	}
 }
diff --git a/include/linux/pci.h b/include/linux/pci.h
index a523cee3abb5..c888c5e4e225 100644
--- a/include/linux/pci.h
+++ b/include/linux/pci.h
@@ -349,6 +349,7 @@ struct pci_dev {
 	unsigned int	__aer_firmware_first:1;
 	unsigned int	broken_intx_masking:1;
 	unsigned int	io_window_1k:1;	/* Intel P2P bridge 1K I/O windows */
+	unsigned int	irq_managed:1;
 	pci_dev_flags_t dev_flags;
 	atomic_t	enable_cnt;	/* pci_enable_device has been called */
 
-- 
cgit v1.2.3


From e89900c9ad75a1b80e1f1f46ce9c9bb0e7ea1d96 Mon Sep 17 00:00:00 2001
From: Jiang Liu <jiang.liu@linux.intel.com>
Date: Mon, 27 Oct 2014 13:21:47 +0800
Subject: x86, irq: Introduce helper to check whether an IOAPIC has been
 registered

Introduce acpi_ioapic_registered() to check whether an IOAPIC has already
been registered, it will be used when enabling IOAPIC hotplug.

Signed-off-by: Jiang Liu <jiang.liu@linux.intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Cc: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Cc: Tony Luck <tony.luck@intel.com>
Cc: Joerg Roedel <joro@8bytes.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Rafael J. Wysocki <rjw@rjwysocki.net>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Yinghai Lu <yinghai@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Len Brown <len.brown@intel.com>
Cc: Grant Likely <grant.likely@linaro.org>
Cc: Prarit Bhargava <prarit@redhat.com>
Link: http://lkml.kernel.org/r/1414387308-27148-18-git-send-email-jiang.liu@linux.intel.com
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
---
 arch/x86/include/asm/io_apic.h |  1 +
 arch/x86/kernel/acpi/boot.c    | 22 ++++++++++++++++++++++
 arch/x86/kernel/apic/io_apic.c | 11 +++++++++++
 include/linux/acpi.h           |  1 +
 4 files changed, 35 insertions(+)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h
index ce63cf34c93c..0db2b7037e4b 100644
--- a/arch/x86/include/asm/io_apic.h
+++ b/arch/x86/include/asm/io_apic.h
@@ -191,6 +191,7 @@ extern void mp_unmap_irq(int irq);
 extern int mp_register_ioapic(int id, u32 address, u32 gsi_base,
 			      struct ioapic_domain_cfg *cfg);
 extern int mp_unregister_ioapic(u32 gsi_base);
+extern int mp_ioapic_registered(u32 gsi_base);
 extern int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
 			    irq_hw_number_t hwirq);
 extern void mp_irqdomain_unmap(struct irq_domain *domain, unsigned int virq);
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 5427d9b70f75..ddddaed91872 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -826,6 +826,28 @@ int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base)
 }
 EXPORT_SYMBOL(acpi_unregister_ioapic);
 
+/**
+ * acpi_ioapic_registered - Check whether IOAPIC assoicatied with @gsi_base
+ *			    has been registered
+ * @handle:	ACPI handle of the IOAPIC deivce
+ * @gsi_base:	GSI base associated with the IOAPIC
+ *
+ * Assume caller holds some type of lock to serialize acpi_ioapic_registered()
+ * with acpi_register_ioapic()/acpi_unregister_ioapic().
+ */
+int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base)
+{
+	int ret = 0;
+
+#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC
+	mutex_lock(&acpi_ioapic_lock);
+	ret  = mp_ioapic_registered(gsi_base);
+	mutex_unlock(&acpi_ioapic_lock);
+#endif
+
+	return ret;
+}
+
 static int __init acpi_parse_sbf(struct acpi_table_header *table)
 {
 	struct acpi_table_boot *sb;
diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c
index 06a0a6ce6750..1cedf410096e 100644
--- a/arch/x86/kernel/apic/io_apic.c
+++ b/arch/x86/kernel/apic/io_apic.c
@@ -4026,6 +4026,17 @@ int mp_unregister_ioapic(u32 gsi_base)
 	return 0;
 }
 
+int mp_ioapic_registered(u32 gsi_base)
+{
+	int ioapic;
+
+	for_each_ioapic(ioapic)
+		if (ioapics[ioapic].gsi_config.gsi_base == gsi_base)
+			return 1;
+
+	return 0;
+}
+
 int mp_irqdomain_map(struct irq_domain *domain, unsigned int virq,
 		     irq_hw_number_t hwirq)
 {
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 407a12f663eb..a81dd437a53c 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -148,6 +148,7 @@ int acpi_unmap_lsapic(int cpu);
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base);
 int acpi_unregister_ioapic(acpi_handle handle, u32 gsi_base);
+int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base);
 void acpi_irq_stats_init(void);
 extern u32 acpi_irq_handled;
 extern u32 acpi_irq_not_handled;
-- 
cgit v1.2.3


From 6539c44d08ac2eea693b6163135169b9c8c18bb1 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 16 Dec 2014 14:30:09 -0500
Subject: net: Allow FIXED_PHY to be modular.

Otherwise we get things like:

warning: (NET_DSA_BCM_SF2 && BCMGENET && SYSTEMPORT) selects FIXED_PHY which has unmet direct dependencies (NETDEVICES && PHYLIB=y)

In order to make this work we have to rename fixed.c to fixed_phy.c
because the regulator drivers already have a module named "fixed.o".

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig     |   4 +-
 drivers/net/phy/Makefile    |   2 +-
 drivers/net/phy/fixed.c     | 337 --------------------------------------------
 drivers/net/phy/fixed_phy.c | 337 ++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy_fixed.h   |   2 +-
 5 files changed, 341 insertions(+), 341 deletions(-)
 delete mode 100644 drivers/net/phy/fixed.c
 create mode 100644 drivers/net/phy/fixed_phy.c

(limited to 'include/linux')

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index b4b0f804e84c..a3c251b79f38 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -119,8 +119,8 @@ config MICREL_PHY
 	  Supports the KSZ9021, VSC8201, KS8001 PHYs.
 
 config FIXED_PHY
-	bool "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
-	depends on PHYLIB=y
+	tristate "Driver for MDIO Bus/PHY emulation with fixed speed/link PHYs"
+	depends on PHYLIB
 	---help---
 	  Adds the platform "fixed" MDIO Bus to cover the boards that use
 	  PHYs that are not connected to the real MDIO bus.
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index eb3b18b5978b..501ea7699a2d 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -17,7 +17,7 @@ obj-$(CONFIG_BCM87XX_PHY)	+= bcm87xx.o
 obj-$(CONFIG_ICPLUS_PHY)	+= icplus.o
 obj-$(CONFIG_REALTEK_PHY)	+= realtek.o
 obj-$(CONFIG_LSI_ET1011C_PHY)	+= et1011c.o
-obj-$(CONFIG_FIXED_PHY)		+= fixed.o
+obj-$(CONFIG_FIXED_PHY)		+= fixed_phy.o
 obj-$(CONFIG_MDIO_BITBANG)	+= mdio-bitbang.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
 obj-$(CONFIG_NATIONAL_PHY)	+= national.o
diff --git a/drivers/net/phy/fixed.c b/drivers/net/phy/fixed.c
deleted file mode 100644
index 3ad0e6e16c39..000000000000
--- a/drivers/net/phy/fixed.c
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Fixed MDIO bus (MDIO bus emulation with fixed PHYs)
- *
- * Author: Vitaly Bordug <vbordug@ru.mvista.com>
- *         Anton Vorontsov <avorontsov@ru.mvista.com>
- *
- * Copyright (c) 2006-2007 MontaVista Software, Inc.
- *
- * This program is free software; you can redistribute  it and/or modify it
- * under  the terms of  the GNU General  Public License as published by the
- * Free Software Foundation;  either version 2 of the  License, or (at your
- * option) any later version.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/platform_device.h>
-#include <linux/list.h>
-#include <linux/mii.h>
-#include <linux/phy.h>
-#include <linux/phy_fixed.h>
-#include <linux/err.h>
-#include <linux/slab.h>
-#include <linux/of.h>
-
-#define MII_REGS_NUM 29
-
-struct fixed_mdio_bus {
-	int irqs[PHY_MAX_ADDR];
-	struct mii_bus *mii_bus;
-	struct list_head phys;
-};
-
-struct fixed_phy {
-	int addr;
-	u16 regs[MII_REGS_NUM];
-	struct phy_device *phydev;
-	struct fixed_phy_status status;
-	int (*link_update)(struct net_device *, struct fixed_phy_status *);
-	struct list_head node;
-};
-
-static struct platform_device *pdev;
-static struct fixed_mdio_bus platform_fmb = {
-	.phys = LIST_HEAD_INIT(platform_fmb.phys),
-};
-
-static int fixed_phy_update_regs(struct fixed_phy *fp)
-{
-	u16 bmsr = BMSR_ANEGCAPABLE;
-	u16 bmcr = 0;
-	u16 lpagb = 0;
-	u16 lpa = 0;
-
-	if (fp->status.duplex) {
-		bmcr |= BMCR_FULLDPLX;
-
-		switch (fp->status.speed) {
-		case 1000:
-			bmsr |= BMSR_ESTATEN;
-			bmcr |= BMCR_SPEED1000;
-			lpagb |= LPA_1000FULL;
-			break;
-		case 100:
-			bmsr |= BMSR_100FULL;
-			bmcr |= BMCR_SPEED100;
-			lpa |= LPA_100FULL;
-			break;
-		case 10:
-			bmsr |= BMSR_10FULL;
-			lpa |= LPA_10FULL;
-			break;
-		default:
-			pr_warn("fixed phy: unknown speed\n");
-			return -EINVAL;
-		}
-	} else {
-		switch (fp->status.speed) {
-		case 1000:
-			bmsr |= BMSR_ESTATEN;
-			bmcr |= BMCR_SPEED1000;
-			lpagb |= LPA_1000HALF;
-			break;
-		case 100:
-			bmsr |= BMSR_100HALF;
-			bmcr |= BMCR_SPEED100;
-			lpa |= LPA_100HALF;
-			break;
-		case 10:
-			bmsr |= BMSR_10HALF;
-			lpa |= LPA_10HALF;
-			break;
-		default:
-			pr_warn("fixed phy: unknown speed\n");
-			return -EINVAL;
-		}
-	}
-
-	if (fp->status.link)
-		bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE;
-
-	if (fp->status.pause)
-		lpa |= LPA_PAUSE_CAP;
-
-	if (fp->status.asym_pause)
-		lpa |= LPA_PAUSE_ASYM;
-
-	fp->regs[MII_PHYSID1] = 0;
-	fp->regs[MII_PHYSID2] = 0;
-
-	fp->regs[MII_BMSR] = bmsr;
-	fp->regs[MII_BMCR] = bmcr;
-	fp->regs[MII_LPA] = lpa;
-	fp->regs[MII_STAT1000] = lpagb;
-
-	return 0;
-}
-
-static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
-{
-	struct fixed_mdio_bus *fmb = bus->priv;
-	struct fixed_phy *fp;
-
-	if (reg_num >= MII_REGS_NUM)
-		return -1;
-
-	/* We do not support emulating Clause 45 over Clause 22 register reads
-	 * return an error instead of bogus data.
-	 */
-	switch (reg_num) {
-	case MII_MMD_CTRL:
-	case MII_MMD_DATA:
-		return -1;
-	default:
-		break;
-	}
-
-	list_for_each_entry(fp, &fmb->phys, node) {
-		if (fp->addr == phy_addr) {
-			/* Issue callback if user registered it. */
-			if (fp->link_update) {
-				fp->link_update(fp->phydev->attached_dev,
-						&fp->status);
-				fixed_phy_update_regs(fp);
-			}
-			return fp->regs[reg_num];
-		}
-	}
-
-	return 0xFFFF;
-}
-
-static int fixed_mdio_write(struct mii_bus *bus, int phy_addr, int reg_num,
-			    u16 val)
-{
-	return 0;
-}
-
-/*
- * If something weird is required to be done with link/speed,
- * network driver is able to assign a function to implement this.
- * May be useful for PHY's that need to be software-driven.
- */
-int fixed_phy_set_link_update(struct phy_device *phydev,
-			      int (*link_update)(struct net_device *,
-						 struct fixed_phy_status *))
-{
-	struct fixed_mdio_bus *fmb = &platform_fmb;
-	struct fixed_phy *fp;
-
-	if (!link_update || !phydev || !phydev->bus)
-		return -EINVAL;
-
-	list_for_each_entry(fp, &fmb->phys, node) {
-		if (fp->addr == phydev->addr) {
-			fp->link_update = link_update;
-			fp->phydev = phydev;
-			return 0;
-		}
-	}
-
-	return -ENOENT;
-}
-EXPORT_SYMBOL_GPL(fixed_phy_set_link_update);
-
-int fixed_phy_add(unsigned int irq, int phy_addr,
-		  struct fixed_phy_status *status)
-{
-	int ret;
-	struct fixed_mdio_bus *fmb = &platform_fmb;
-	struct fixed_phy *fp;
-
-	fp = kzalloc(sizeof(*fp), GFP_KERNEL);
-	if (!fp)
-		return -ENOMEM;
-
-	memset(fp->regs, 0xFF,  sizeof(fp->regs[0]) * MII_REGS_NUM);
-
-	fmb->irqs[phy_addr] = irq;
-
-	fp->addr = phy_addr;
-	fp->status = *status;
-
-	ret = fixed_phy_update_regs(fp);
-	if (ret)
-		goto err_regs;
-
-	list_add_tail(&fp->node, &fmb->phys);
-
-	return 0;
-
-err_regs:
-	kfree(fp);
-	return ret;
-}
-EXPORT_SYMBOL_GPL(fixed_phy_add);
-
-void fixed_phy_del(int phy_addr)
-{
-	struct fixed_mdio_bus *fmb = &platform_fmb;
-	struct fixed_phy *fp, *tmp;
-
-	list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
-		if (fp->addr == phy_addr) {
-			list_del(&fp->node);
-			kfree(fp);
-			return;
-		}
-	}
-}
-EXPORT_SYMBOL_GPL(fixed_phy_del);
-
-static int phy_fixed_addr;
-static DEFINE_SPINLOCK(phy_fixed_addr_lock);
-
-struct phy_device *fixed_phy_register(unsigned int irq,
-				      struct fixed_phy_status *status,
-				      struct device_node *np)
-{
-	struct fixed_mdio_bus *fmb = &platform_fmb;
-	struct phy_device *phy;
-	int phy_addr;
-	int ret;
-
-	/* Get the next available PHY address, up to PHY_MAX_ADDR */
-	spin_lock(&phy_fixed_addr_lock);
-	if (phy_fixed_addr == PHY_MAX_ADDR) {
-		spin_unlock(&phy_fixed_addr_lock);
-		return ERR_PTR(-ENOSPC);
-	}
-	phy_addr = phy_fixed_addr++;
-	spin_unlock(&phy_fixed_addr_lock);
-
-	ret = fixed_phy_add(PHY_POLL, phy_addr, status);
-	if (ret < 0)
-		return ERR_PTR(ret);
-
-	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
-	if (!phy || IS_ERR(phy)) {
-		fixed_phy_del(phy_addr);
-		return ERR_PTR(-EINVAL);
-	}
-
-	of_node_get(np);
-	phy->dev.of_node = np;
-
-	ret = phy_device_register(phy);
-	if (ret) {
-		phy_device_free(phy);
-		of_node_put(np);
-		fixed_phy_del(phy_addr);
-		return ERR_PTR(ret);
-	}
-
-	return phy;
-}
-EXPORT_SYMBOL_GPL(fixed_phy_register);
-
-static int __init fixed_mdio_bus_init(void)
-{
-	struct fixed_mdio_bus *fmb = &platform_fmb;
-	int ret;
-
-	pdev = platform_device_register_simple("Fixed MDIO bus", 0, NULL, 0);
-	if (IS_ERR(pdev)) {
-		ret = PTR_ERR(pdev);
-		goto err_pdev;
-	}
-
-	fmb->mii_bus = mdiobus_alloc();
-	if (fmb->mii_bus == NULL) {
-		ret = -ENOMEM;
-		goto err_mdiobus_reg;
-	}
-
-	snprintf(fmb->mii_bus->id, MII_BUS_ID_SIZE, "fixed-0");
-	fmb->mii_bus->name = "Fixed MDIO Bus";
-	fmb->mii_bus->priv = fmb;
-	fmb->mii_bus->parent = &pdev->dev;
-	fmb->mii_bus->read = &fixed_mdio_read;
-	fmb->mii_bus->write = &fixed_mdio_write;
-	fmb->mii_bus->irq = fmb->irqs;
-
-	ret = mdiobus_register(fmb->mii_bus);
-	if (ret)
-		goto err_mdiobus_alloc;
-
-	return 0;
-
-err_mdiobus_alloc:
-	mdiobus_free(fmb->mii_bus);
-err_mdiobus_reg:
-	platform_device_unregister(pdev);
-err_pdev:
-	return ret;
-}
-module_init(fixed_mdio_bus_init);
-
-static void __exit fixed_mdio_bus_exit(void)
-{
-	struct fixed_mdio_bus *fmb = &platform_fmb;
-	struct fixed_phy *fp, *tmp;
-
-	mdiobus_unregister(fmb->mii_bus);
-	mdiobus_free(fmb->mii_bus);
-	platform_device_unregister(pdev);
-
-	list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
-		list_del(&fp->node);
-		kfree(fp);
-	}
-}
-module_exit(fixed_mdio_bus_exit);
-
-MODULE_DESCRIPTION("Fixed MDIO bus (MDIO bus emulation with fixed PHYs)");
-MODULE_AUTHOR("Vitaly Bordug");
-MODULE_LICENSE("GPL");
diff --git a/drivers/net/phy/fixed_phy.c b/drivers/net/phy/fixed_phy.c
new file mode 100644
index 000000000000..3ad0e6e16c39
--- /dev/null
+++ b/drivers/net/phy/fixed_phy.c
@@ -0,0 +1,337 @@
+/*
+ * Fixed MDIO bus (MDIO bus emulation with fixed PHYs)
+ *
+ * Author: Vitaly Bordug <vbordug@ru.mvista.com>
+ *         Anton Vorontsov <avorontsov@ru.mvista.com>
+ *
+ * Copyright (c) 2006-2007 MontaVista Software, Inc.
+ *
+ * This program is free software; you can redistribute  it and/or modify it
+ * under  the terms of  the GNU General  Public License as published by the
+ * Free Software Foundation;  either version 2 of the  License, or (at your
+ * option) any later version.
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/list.h>
+#include <linux/mii.h>
+#include <linux/phy.h>
+#include <linux/phy_fixed.h>
+#include <linux/err.h>
+#include <linux/slab.h>
+#include <linux/of.h>
+
+#define MII_REGS_NUM 29
+
+struct fixed_mdio_bus {
+	int irqs[PHY_MAX_ADDR];
+	struct mii_bus *mii_bus;
+	struct list_head phys;
+};
+
+struct fixed_phy {
+	int addr;
+	u16 regs[MII_REGS_NUM];
+	struct phy_device *phydev;
+	struct fixed_phy_status status;
+	int (*link_update)(struct net_device *, struct fixed_phy_status *);
+	struct list_head node;
+};
+
+static struct platform_device *pdev;
+static struct fixed_mdio_bus platform_fmb = {
+	.phys = LIST_HEAD_INIT(platform_fmb.phys),
+};
+
+static int fixed_phy_update_regs(struct fixed_phy *fp)
+{
+	u16 bmsr = BMSR_ANEGCAPABLE;
+	u16 bmcr = 0;
+	u16 lpagb = 0;
+	u16 lpa = 0;
+
+	if (fp->status.duplex) {
+		bmcr |= BMCR_FULLDPLX;
+
+		switch (fp->status.speed) {
+		case 1000:
+			bmsr |= BMSR_ESTATEN;
+			bmcr |= BMCR_SPEED1000;
+			lpagb |= LPA_1000FULL;
+			break;
+		case 100:
+			bmsr |= BMSR_100FULL;
+			bmcr |= BMCR_SPEED100;
+			lpa |= LPA_100FULL;
+			break;
+		case 10:
+			bmsr |= BMSR_10FULL;
+			lpa |= LPA_10FULL;
+			break;
+		default:
+			pr_warn("fixed phy: unknown speed\n");
+			return -EINVAL;
+		}
+	} else {
+		switch (fp->status.speed) {
+		case 1000:
+			bmsr |= BMSR_ESTATEN;
+			bmcr |= BMCR_SPEED1000;
+			lpagb |= LPA_1000HALF;
+			break;
+		case 100:
+			bmsr |= BMSR_100HALF;
+			bmcr |= BMCR_SPEED100;
+			lpa |= LPA_100HALF;
+			break;
+		case 10:
+			bmsr |= BMSR_10HALF;
+			lpa |= LPA_10HALF;
+			break;
+		default:
+			pr_warn("fixed phy: unknown speed\n");
+			return -EINVAL;
+		}
+	}
+
+	if (fp->status.link)
+		bmsr |= BMSR_LSTATUS | BMSR_ANEGCOMPLETE;
+
+	if (fp->status.pause)
+		lpa |= LPA_PAUSE_CAP;
+
+	if (fp->status.asym_pause)
+		lpa |= LPA_PAUSE_ASYM;
+
+	fp->regs[MII_PHYSID1] = 0;
+	fp->regs[MII_PHYSID2] = 0;
+
+	fp->regs[MII_BMSR] = bmsr;
+	fp->regs[MII_BMCR] = bmcr;
+	fp->regs[MII_LPA] = lpa;
+	fp->regs[MII_STAT1000] = lpagb;
+
+	return 0;
+}
+
+static int fixed_mdio_read(struct mii_bus *bus, int phy_addr, int reg_num)
+{
+	struct fixed_mdio_bus *fmb = bus->priv;
+	struct fixed_phy *fp;
+
+	if (reg_num >= MII_REGS_NUM)
+		return -1;
+
+	/* We do not support emulating Clause 45 over Clause 22 register reads
+	 * return an error instead of bogus data.
+	 */
+	switch (reg_num) {
+	case MII_MMD_CTRL:
+	case MII_MMD_DATA:
+		return -1;
+	default:
+		break;
+	}
+
+	list_for_each_entry(fp, &fmb->phys, node) {
+		if (fp->addr == phy_addr) {
+			/* Issue callback if user registered it. */
+			if (fp->link_update) {
+				fp->link_update(fp->phydev->attached_dev,
+						&fp->status);
+				fixed_phy_update_regs(fp);
+			}
+			return fp->regs[reg_num];
+		}
+	}
+
+	return 0xFFFF;
+}
+
+static int fixed_mdio_write(struct mii_bus *bus, int phy_addr, int reg_num,
+			    u16 val)
+{
+	return 0;
+}
+
+/*
+ * If something weird is required to be done with link/speed,
+ * network driver is able to assign a function to implement this.
+ * May be useful for PHY's that need to be software-driven.
+ */
+int fixed_phy_set_link_update(struct phy_device *phydev,
+			      int (*link_update)(struct net_device *,
+						 struct fixed_phy_status *))
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct fixed_phy *fp;
+
+	if (!link_update || !phydev || !phydev->bus)
+		return -EINVAL;
+
+	list_for_each_entry(fp, &fmb->phys, node) {
+		if (fp->addr == phydev->addr) {
+			fp->link_update = link_update;
+			fp->phydev = phydev;
+			return 0;
+		}
+	}
+
+	return -ENOENT;
+}
+EXPORT_SYMBOL_GPL(fixed_phy_set_link_update);
+
+int fixed_phy_add(unsigned int irq, int phy_addr,
+		  struct fixed_phy_status *status)
+{
+	int ret;
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct fixed_phy *fp;
+
+	fp = kzalloc(sizeof(*fp), GFP_KERNEL);
+	if (!fp)
+		return -ENOMEM;
+
+	memset(fp->regs, 0xFF,  sizeof(fp->regs[0]) * MII_REGS_NUM);
+
+	fmb->irqs[phy_addr] = irq;
+
+	fp->addr = phy_addr;
+	fp->status = *status;
+
+	ret = fixed_phy_update_regs(fp);
+	if (ret)
+		goto err_regs;
+
+	list_add_tail(&fp->node, &fmb->phys);
+
+	return 0;
+
+err_regs:
+	kfree(fp);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(fixed_phy_add);
+
+void fixed_phy_del(int phy_addr)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct fixed_phy *fp, *tmp;
+
+	list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
+		if (fp->addr == phy_addr) {
+			list_del(&fp->node);
+			kfree(fp);
+			return;
+		}
+	}
+}
+EXPORT_SYMBOL_GPL(fixed_phy_del);
+
+static int phy_fixed_addr;
+static DEFINE_SPINLOCK(phy_fixed_addr_lock);
+
+struct phy_device *fixed_phy_register(unsigned int irq,
+				      struct fixed_phy_status *status,
+				      struct device_node *np)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct phy_device *phy;
+	int phy_addr;
+	int ret;
+
+	/* Get the next available PHY address, up to PHY_MAX_ADDR */
+	spin_lock(&phy_fixed_addr_lock);
+	if (phy_fixed_addr == PHY_MAX_ADDR) {
+		spin_unlock(&phy_fixed_addr_lock);
+		return ERR_PTR(-ENOSPC);
+	}
+	phy_addr = phy_fixed_addr++;
+	spin_unlock(&phy_fixed_addr_lock);
+
+	ret = fixed_phy_add(PHY_POLL, phy_addr, status);
+	if (ret < 0)
+		return ERR_PTR(ret);
+
+	phy = get_phy_device(fmb->mii_bus, phy_addr, false);
+	if (!phy || IS_ERR(phy)) {
+		fixed_phy_del(phy_addr);
+		return ERR_PTR(-EINVAL);
+	}
+
+	of_node_get(np);
+	phy->dev.of_node = np;
+
+	ret = phy_device_register(phy);
+	if (ret) {
+		phy_device_free(phy);
+		of_node_put(np);
+		fixed_phy_del(phy_addr);
+		return ERR_PTR(ret);
+	}
+
+	return phy;
+}
+EXPORT_SYMBOL_GPL(fixed_phy_register);
+
+static int __init fixed_mdio_bus_init(void)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	int ret;
+
+	pdev = platform_device_register_simple("Fixed MDIO bus", 0, NULL, 0);
+	if (IS_ERR(pdev)) {
+		ret = PTR_ERR(pdev);
+		goto err_pdev;
+	}
+
+	fmb->mii_bus = mdiobus_alloc();
+	if (fmb->mii_bus == NULL) {
+		ret = -ENOMEM;
+		goto err_mdiobus_reg;
+	}
+
+	snprintf(fmb->mii_bus->id, MII_BUS_ID_SIZE, "fixed-0");
+	fmb->mii_bus->name = "Fixed MDIO Bus";
+	fmb->mii_bus->priv = fmb;
+	fmb->mii_bus->parent = &pdev->dev;
+	fmb->mii_bus->read = &fixed_mdio_read;
+	fmb->mii_bus->write = &fixed_mdio_write;
+	fmb->mii_bus->irq = fmb->irqs;
+
+	ret = mdiobus_register(fmb->mii_bus);
+	if (ret)
+		goto err_mdiobus_alloc;
+
+	return 0;
+
+err_mdiobus_alloc:
+	mdiobus_free(fmb->mii_bus);
+err_mdiobus_reg:
+	platform_device_unregister(pdev);
+err_pdev:
+	return ret;
+}
+module_init(fixed_mdio_bus_init);
+
+static void __exit fixed_mdio_bus_exit(void)
+{
+	struct fixed_mdio_bus *fmb = &platform_fmb;
+	struct fixed_phy *fp, *tmp;
+
+	mdiobus_unregister(fmb->mii_bus);
+	mdiobus_free(fmb->mii_bus);
+	platform_device_unregister(pdev);
+
+	list_for_each_entry_safe(fp, tmp, &fmb->phys, node) {
+		list_del(&fp->node);
+		kfree(fp);
+	}
+}
+module_exit(fixed_mdio_bus_exit);
+
+MODULE_DESCRIPTION("Fixed MDIO bus (MDIO bus emulation with fixed PHYs)");
+MODULE_AUTHOR("Vitaly Bordug");
+MODULE_LICENSE("GPL");
diff --git a/include/linux/phy_fixed.h b/include/linux/phy_fixed.h
index f2ca1b459377..7e75bfe37cc7 100644
--- a/include/linux/phy_fixed.h
+++ b/include/linux/phy_fixed.h
@@ -11,7 +11,7 @@ struct fixed_phy_status {
 
 struct device_node;
 
-#ifdef CONFIG_FIXED_PHY
+#if IS_ENABLED(CONFIG_FIXED_PHY)
 extern int fixed_phy_add(unsigned int irq, int phy_id,
 			 struct fixed_phy_status *status);
 extern struct phy_device *fixed_phy_register(unsigned int irq,
-- 
cgit v1.2.3


From 62c4cf97e82cf79446642e599d155884f600cf17 Mon Sep 17 00:00:00 2001
From: Len Brown <len.brown@intel.com>
Date: Tue, 16 Dec 2014 01:52:08 -0500
Subject: cpuidle / ACPI: remove unused CPUIDLE_FLAG_TIME_INVALID

CPUIDLE_FLAG_TIME_INVALID is no longer checked
by menu or ladder cpuidle governors, so don't
bother setting or defining it.

It was originally invented to account for the fact that
acpi_safe_halt() enables interrupts to invoke HLT.
That would allow interrupt service routines to be included
in the last_idle duration measurements made in cpuidle_enter_state(),
potentially returning a duration much larger than reality.

But menu and ladder can gracefully handle erroneously large duration
intervals without checking for CPUIDLE_FLAG_TIME_INVALID.
Further, if they don't check CPUIDLE_FLAG_TIME_INVALID, they
can also benefit from the instances when the duration interval
is not erroneously large.

Signed-off-by: Len Brown <len.brown@intel.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/processor_idle.c | 2 --
 include/linux/cpuidle.h       | 3 ---
 2 files changed, 5 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/acpi/processor_idle.c b/drivers/acpi/processor_idle.c
index 380b4b43f361..7afba40e350e 100644
--- a/drivers/acpi/processor_idle.c
+++ b/drivers/acpi/processor_idle.c
@@ -985,8 +985,6 @@ static int acpi_processor_setup_cpuidle_states(struct acpi_processor *pr)
 		state->flags = 0;
 		switch (cx->type) {
 			case ACPI_STATE_C1:
-			if (cx->entry_method != ACPI_CSTATE_FFH)
-				state->flags |= CPUIDLE_FLAG_TIME_INVALID;
 
 			state->enter = acpi_idle_enter_c1;
 			state->enter_dead = acpi_idle_play_dead;
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index a07e087f54b2..ab70f3bc44ad 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -53,7 +53,6 @@ struct cpuidle_state {
 };
 
 /* Idle State Flags */
-#define CPUIDLE_FLAG_TIME_INVALID	(0x01) /* is residency time measurable? */
 #define CPUIDLE_FLAG_COUPLED	(0x02) /* state applies to multiple cpus */
 #define CPUIDLE_FLAG_TIMER_STOP (0x04)  /* timer is stopped on this state */
 
@@ -89,8 +88,6 @@ DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
 /**
  * cpuidle_get_last_residency - retrieves the last state's residency time
  * @dev: the target CPU
- *
- * NOTE: this value is invalid if CPUIDLE_FLAG_TIME_INVALID is set
  */
 static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
 {
-- 
cgit v1.2.3


From 7496fcbe8a643097efc061160e1c3b65ee2fa350 Mon Sep 17 00:00:00 2001
From: Amit Daniel Kachhap <amit.daniel@samsung.com>
Date: Mon, 15 Dec 2014 09:08:59 +0530
Subject: PM / Domains: Export of_genpd_get_from_provider function

This function looks up a PM domain form the provider. This will be
useful to add parent/child domain relationship from the SoC specific
code. The caller of the function must make sure that PM domain provider
is already registered.

Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Amit Daniel Kachhap <amit.daniel@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 3 ++-
 include/linux/pm_domain.h   | 8 ++++++++
 2 files changed, 10 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 5d7b7548873a..1bd119efaac1 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2108,7 +2108,7 @@ EXPORT_SYMBOL_GPL(of_genpd_del_provider);
  * Returns a valid pointer to struct generic_pm_domain on success or ERR_PTR()
  * on failure.
  */
-static struct generic_pm_domain *of_genpd_get_from_provider(
+struct generic_pm_domain *of_genpd_get_from_provider(
 					struct of_phandle_args *genpdspec)
 {
 	struct generic_pm_domain *genpd = ERR_PTR(-ENOENT);
@@ -2128,6 +2128,7 @@ static struct generic_pm_domain *of_genpd_get_from_provider(
 
 	return genpd;
 }
+EXPORT_SYMBOL_GPL(of_genpd_get_from_provider);
 
 /**
  * genpd_dev_pm_detach - Detach a device from its PM domain.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 1dd6c7f64166..ba7ca5468a7b 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -275,6 +275,8 @@ typedef struct generic_pm_domain *(*genpd_xlate_t)(struct of_phandle_args *args,
 int __of_genpd_add_provider(struct device_node *np, genpd_xlate_t xlate,
 			void *data);
 void of_genpd_del_provider(struct device_node *np);
+struct generic_pm_domain *of_genpd_get_from_provider(
+			struct of_phandle_args *genpdspec);
 
 struct generic_pm_domain *__of_genpd_xlate_simple(
 					struct of_phandle_args *genpdspec,
@@ -292,6 +294,12 @@ static inline int __of_genpd_add_provider(struct device_node *np,
 }
 static inline void of_genpd_del_provider(struct device_node *np) {}
 
+static inline struct generic_pm_domain *of_genpd_get_from_provider(
+			struct of_phandle_args *genpdspec)
+{
+	return NULL;
+}
+
 #define __of_genpd_xlate_simple		NULL
 #define __of_genpd_xlate_onecell	NULL
 
-- 
cgit v1.2.3


From 777eda2c5b84d6f3543f4aecbf4cd1f29b222a81 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 17 Dec 2014 04:46:46 -0500
Subject: new helper: iter_is_iovec()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/uio.h | 5 +++++
 mm/filemap.c        | 2 +-
 mm/shmem.c          | 2 +-
 3 files changed, 7 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index a41e252396c0..1c5e453f7ea9 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -101,6 +101,11 @@ static inline size_t iov_iter_count(struct iov_iter *i)
 	return i->count;
 }
 
+static inline bool iter_is_iovec(struct iov_iter *i)
+{
+	return !(i->type & (ITER_BVEC | ITER_KVEC));
+}
+
 /*
  * Cap the iov_iter by given limit; note that the second argument is
  * *not* the new size - it's upper limit for such.  Passing it a value
diff --git a/mm/filemap.c b/mm/filemap.c
index e8905bc3cbd7..bd8543c6508f 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -2464,7 +2464,7 @@ ssize_t generic_perform_write(struct file *file,
 	/*
 	 * Copies from kernel address space cannot fail (NFSD is a big user).
 	 */
-	if (segment_eq(get_fs(), KERNEL_DS))
+	if (!iter_is_iovec(i))
 		flags |= AOP_FLAG_UNINTERRUPTIBLE;
 
 	do {
diff --git a/mm/shmem.c b/mm/shmem.c
index 185836ba53ef..73ba1df7c8ba 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1536,7 +1536,7 @@ static ssize_t shmem_file_read_iter(struct kiocb *iocb, struct iov_iter *to)
 	 * holes of a sparse file, we actually need to allocate those pages,
 	 * and even mark them dirty, so it cannot exceed the max_blocks limit.
 	 */
-	if (segment_eq(get_fs(), KERNEL_DS))
+	if (!iter_is_iovec(to))
 		sgp = SGP_DIRTY;
 
 	index = *ppos >> PAGE_CACHE_SHIFT;
-- 
cgit v1.2.3


From 503ccc3fec4a56cdcfedc507cd1ea0d85e1fbfa2 Mon Sep 17 00:00:00 2001
From: Javi Merino <javi.merino@arm.com>
Date: Wed, 17 Dec 2014 11:11:24 +0000
Subject: thermal: cpu_cooling: return ERR_PTR() for !CPU_THERMAL or
 !THERMAL_OF

The documentation of of_cpufreq_cooling_register() and
cpufreq_cooling_register() say that they return ERR_PTR() on error.
Accordingly, callers only check for IS_ERR().  Therefore, make them
return ERR_PTR(-ENOSYS) as is customary in the kernel when config
options are missing.

Cc: Zhang Rui <rui.zhang@intel.com>
Cc: Eduardo Valentin <edubezval@gmail.com>
Signed-off-by: Javi Merino <javi.merino@arm.com>
Signed-off-by: Eduardo Valentin <edubezval@gmail.com>
---
 include/linux/cpu_cooling.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h
index c303d383def1..bd955270d5aa 100644
--- a/include/linux/cpu_cooling.h
+++ b/include/linux/cpu_cooling.h
@@ -50,7 +50,7 @@ static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
 			    const struct cpumask *clip_cpus)
 {
-	return NULL;
+	return ERR_PTR(-ENOSYS);
 }
 #endif
 
@@ -65,13 +65,13 @@ unsigned long cpufreq_cooling_get_level(unsigned int cpu, unsigned int freq);
 static inline struct thermal_cooling_device *
 cpufreq_cooling_register(const struct cpumask *clip_cpus)
 {
-	return NULL;
+	return ERR_PTR(-ENOSYS);
 }
 static inline struct thermal_cooling_device *
 of_cpufreq_cooling_register(struct device_node *np,
 			    const struct cpumask *clip_cpus)
 {
-	return NULL;
+	return ERR_PTR(-ENOSYS);
 }
 static inline
 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev)
-- 
cgit v1.2.3


From 50062175ffc844b8ff9664024c6416a37ad63c77 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 15 May 2014 05:06:42 -0400
Subject: vm_area_operations: kill ->migrate()

the only instance this method has ever grown was one in kernfs -
one that call ->migrate() of another vm_ops if it exists.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/kernfs/file.c        | 22 ----------------------
 include/linux/migrate.h | 10 ----------
 include/linux/mm.h      |  2 --
 mm/mempolicy.c          |  5 -----
 mm/migrate.c            | 21 ---------------------
 5 files changed, 60 deletions(-)

(limited to 'include/linux')

diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c
index 697390ea47b8..ddc9f9612f16 100644
--- a/fs/kernfs/file.c
+++ b/fs/kernfs/file.c
@@ -448,27 +448,6 @@ static struct mempolicy *kernfs_vma_get_policy(struct vm_area_struct *vma,
 	return pol;
 }
 
-static int kernfs_vma_migrate(struct vm_area_struct *vma,
-			      const nodemask_t *from, const nodemask_t *to,
-			      unsigned long flags)
-{
-	struct file *file = vma->vm_file;
-	struct kernfs_open_file *of = kernfs_of(file);
-	int ret;
-
-	if (!of->vm_ops)
-		return 0;
-
-	if (!kernfs_get_active(of->kn))
-		return 0;
-
-	ret = 0;
-	if (of->vm_ops->migrate)
-		ret = of->vm_ops->migrate(vma, from, to, flags);
-
-	kernfs_put_active(of->kn);
-	return ret;
-}
 #endif
 
 static const struct vm_operations_struct kernfs_vm_ops = {
@@ -479,7 +458,6 @@ static const struct vm_operations_struct kernfs_vm_ops = {
 #ifdef CONFIG_NUMA
 	.set_policy	= kernfs_vma_set_policy,
 	.get_policy	= kernfs_vma_get_policy,
-	.migrate	= kernfs_vma_migrate,
 #endif
 };
 
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 01aad3ed89ec..fab9b32ace8e 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -36,9 +36,6 @@ extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free,
 
 extern int migrate_prep(void);
 extern int migrate_prep_local(void);
-extern int migrate_vmas(struct mm_struct *mm,
-		const nodemask_t *from, const nodemask_t *to,
-		unsigned long flags);
 extern void migrate_page_copy(struct page *newpage, struct page *page);
 extern int migrate_huge_page_move_mapping(struct address_space *mapping,
 				  struct page *newpage, struct page *page);
@@ -57,13 +54,6 @@ static inline int migrate_pages(struct list_head *l, new_page_t new,
 static inline int migrate_prep(void) { return -ENOSYS; }
 static inline int migrate_prep_local(void) { return -ENOSYS; }
 
-static inline int migrate_vmas(struct mm_struct *mm,
-		const nodemask_t *from, const nodemask_t *to,
-		unsigned long flags)
-{
-	return -ENOSYS;
-}
-
 static inline void migrate_page_copy(struct page *newpage,
 				     struct page *page) {}
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index c0a67b894c4c..f80d0194c9bc 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -286,8 +286,6 @@ struct vm_operations_struct {
 	 */
 	struct mempolicy *(*get_policy)(struct vm_area_struct *vma,
 					unsigned long addr);
-	int (*migrate)(struct vm_area_struct *vma, const nodemask_t *from,
-		const nodemask_t *to, unsigned long flags);
 #endif
 	/* called by sys_remap_file_pages() to populate non-linear mapping */
 	int (*remap_pages)(struct vm_area_struct *vma, unsigned long addr,
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index e58725aff7e9..c1b273f1837b 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1047,10 +1047,6 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 
 	down_read(&mm->mmap_sem);
 
-	err = migrate_vmas(mm, from, to, flags);
-	if (err)
-		goto out;
-
 	/*
 	 * Find a 'source' bit set in 'tmp' whose corresponding 'dest'
 	 * bit in 'to' is not also set in 'tmp'.  Clear the found 'source'
@@ -1130,7 +1126,6 @@ int do_migrate_pages(struct mm_struct *mm, const nodemask_t *from,
 		if (err < 0)
 			break;
 	}
-out:
 	up_read(&mm->mmap_sem);
 	if (err < 0)
 		return err;
diff --git a/mm/migrate.c b/mm/migrate.c
index b1d02127e1be..344cdf692fc8 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1536,27 +1536,6 @@ out:
 	return err;
 }
 
-/*
- * Call migration functions in the vma_ops that may prepare
- * memory in a vm for migration. migration functions may perform
- * the migration for vmas that do not have an underlying page struct.
- */
-int migrate_vmas(struct mm_struct *mm, const nodemask_t *to,
-	const nodemask_t *from, unsigned long flags)
-{
- 	struct vm_area_struct *vma;
- 	int err = 0;
-
-	for (vma = mm->mmap; vma && !err; vma = vma->vm_next) {
- 		if (vma->vm_ops && vma->vm_ops->migrate) {
- 			err = vma->vm_ops->migrate(vma, to, from, flags);
- 			if (err)
- 				break;
- 		}
- 	}
- 	return err;
-}
-
 #ifdef CONFIG_NUMA_BALANCING
 /*
  * Returns true if this is a safe migration target node for misplaced NUMA
-- 
cgit v1.2.3


From 9280be24dc9c7aaee230de3ed33f8357386de9a2 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 14 Oct 2014 10:33:35 +0800
Subject: ceph: fix file lock interruption

When a lock operation is interrupted, current code sends a unlock request to
MDS to undo the lock operation. This method does not work as expected because
the unlock request can drop locks that have already been acquired.

The fix is use the newly introduced CEPH_LOCK_FCNTL_INTR/CEPH_LOCK_FLOCK_INTR
requests to interrupt blocked file lock request. These requests do not drop
locks that have alread been acquired, they only interrupt blocked file lock
request.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/locks.c              | 64 +++++++++++++++++++++++++++++++++++++-------
 fs/ceph/mds_client.c         |  2 ++
 fs/ceph/mds_client.h         |  6 +++++
 include/linux/ceph/ceph_fs.h |  7 +++--
 4 files changed, 67 insertions(+), 12 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/locks.c b/fs/ceph/locks.c
index fbc39c47bacd..c35c5c614e38 100644
--- a/fs/ceph/locks.c
+++ b/fs/ceph/locks.c
@@ -9,6 +9,8 @@
 #include <linux/ceph/pagelist.h>
 
 static u64 lock_secret;
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+                                         struct ceph_mds_request *req);
 
 static inline u64 secure_addr(void *addr)
 {
@@ -40,6 +42,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	u64 length = 0;
 	u64 owner;
 
+	if (operation != CEPH_MDS_OP_SETFILELOCK || cmd == CEPH_LOCK_UNLOCK)
+		wait = 0;
+
 	req = ceph_mdsc_create_request(mdsc, operation, USE_AUTH_MDS);
 	if (IS_ERR(req))
 		return PTR_ERR(req);
@@ -68,6 +73,9 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	req->r_args.filelock_change.length = cpu_to_le64(length);
 	req->r_args.filelock_change.wait = wait;
 
+	if (wait)
+		req->r_wait_for_completion = ceph_lock_wait_for_completion;
+
 	err = ceph_mdsc_do_request(mdsc, inode, req);
 
 	if (operation == CEPH_MDS_OP_GETFILELOCK) {
@@ -96,6 +104,52 @@ static int ceph_lock_message(u8 lock_type, u16 operation, struct file *file,
 	return err;
 }
 
+static int ceph_lock_wait_for_completion(struct ceph_mds_client *mdsc,
+                                         struct ceph_mds_request *req)
+{
+	struct ceph_mds_request *intr_req;
+	struct inode *inode = req->r_inode;
+	int err, lock_type;
+
+	BUG_ON(req->r_op != CEPH_MDS_OP_SETFILELOCK);
+	if (req->r_args.filelock_change.rule == CEPH_LOCK_FCNTL)
+		lock_type = CEPH_LOCK_FCNTL_INTR;
+	else if (req->r_args.filelock_change.rule == CEPH_LOCK_FLOCK)
+		lock_type = CEPH_LOCK_FLOCK_INTR;
+	else
+		BUG_ON(1);
+	BUG_ON(req->r_args.filelock_change.type == CEPH_LOCK_UNLOCK);
+
+	err = wait_for_completion_interruptible(&req->r_completion);
+	if (!err)
+		return 0;
+
+	dout("ceph_lock_wait_for_completion: request %llu was interrupted\n",
+	     req->r_tid);
+
+	intr_req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_SETFILELOCK,
+					    USE_AUTH_MDS);
+	if (IS_ERR(intr_req))
+		return PTR_ERR(intr_req);
+
+	intr_req->r_inode = inode;
+	ihold(inode);
+	intr_req->r_num_caps = 1;
+
+	intr_req->r_args.filelock_change = req->r_args.filelock_change;
+	intr_req->r_args.filelock_change.rule = lock_type;
+	intr_req->r_args.filelock_change.type = CEPH_LOCK_UNLOCK;
+
+	err = ceph_mdsc_do_request(mdsc, inode, intr_req);
+	ceph_mdsc_put_request(intr_req);
+
+	if (err && err != -ERESTARTSYS)
+		return err;
+
+	wait_for_completion(&req->r_completion);
+	return 0;
+}
+
 /**
  * Attempt to set an fcntl lock.
  * For now, this just goes away to the server. Later it may be more awesome.
@@ -143,11 +197,6 @@ int ceph_lock(struct file *file, int cmd, struct file_lock *fl)
 				     err);
 			}
 		}
-
-	} else if (err == -ERESTARTSYS) {
-		dout("undoing lock\n");
-		ceph_lock_message(CEPH_LOCK_FCNTL, op, file,
-				  CEPH_LOCK_UNLOCK, 0, fl);
 	}
 	return err;
 }
@@ -186,11 +235,6 @@ int ceph_flock(struct file *file, int cmd, struct file_lock *fl)
 					  file, CEPH_LOCK_UNLOCK, 0, fl);
 			dout("got %d on flock_lock_file_wait, undid lock", err);
 		}
-	} else if (err == -ERESTARTSYS) {
-		dout("undoing lock\n");
-		ceph_lock_message(CEPH_LOCK_FLOCK,
-				  CEPH_MDS_OP_SETFILELOCK,
-				  file, CEPH_LOCK_UNLOCK, 0, fl);
 	}
 	return err;
 }
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index a92d3f5c6c12..5a47ed760e6d 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -2208,6 +2208,8 @@ int ceph_mdsc_do_request(struct ceph_mds_client *mdsc,
 			&req->r_completion, req->r_timeout);
 		if (err == 0)
 			err = -EIO;
+	} else if (req->r_wait_for_completion) {
+		err = req->r_wait_for_completion(mdsc, req);
 	} else {
 		err = wait_for_completion_killable(&req->r_completion);
 	}
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 3288359353e9..230bda791d4f 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -166,6 +166,11 @@ struct ceph_mds_client;
  */
 typedef void (*ceph_mds_request_callback_t) (struct ceph_mds_client *mdsc,
 					     struct ceph_mds_request *req);
+/*
+ * wait for request completion callback
+ */
+typedef int (*ceph_mds_request_wait_callback_t) (struct ceph_mds_client *mdsc,
+						 struct ceph_mds_request *req);
 
 /*
  * an in-flight mds request
@@ -239,6 +244,7 @@ struct ceph_mds_request {
 	struct completion r_completion;
 	struct completion r_safe_completion;
 	ceph_mds_request_callback_t r_callback;
+	ceph_mds_request_wait_callback_t r_wait_for_completion;
 	struct list_head  r_unsafe_item;  /* per-session unsafe list item */
 	bool		  r_got_unsafe, r_got_safe, r_got_result;
 
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 3c97d5e9b951..31d8b98b7f96 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -522,8 +522,11 @@ struct ceph_mds_reply_dirfrag {
 	__le32 dist[];
 } __attribute__ ((packed));
 
-#define CEPH_LOCK_FCNTL    1
-#define CEPH_LOCK_FLOCK    2
+#define CEPH_LOCK_FCNTL		1
+#define CEPH_LOCK_FLOCK		2
+#define CEPH_LOCK_FCNTL_INTR    3
+#define CEPH_LOCK_FLOCK_INTR    4
+
 
 #define CEPH_LOCK_SHARED   1
 #define CEPH_LOCK_EXCL     2
-- 
cgit v1.2.3


From 4965fc38c460b274b2a1789e1165a25fb0409d7e Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Thu, 23 Oct 2014 16:32:57 +0400
Subject: libceph: nuke ceph_kvfree()

Use kvfree() from linux/mm.h instead, which is identical.  Also fix the
ceph_buffer comment: we will allocate with kmalloc() up to 32k - the
value of PAGE_ALLOC_COSTLY_ORDER, but that really is just an
implementation detail so don't mention it at all.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
---
 include/linux/ceph/buffer.h  | 3 +--
 include/linux/ceph/libceph.h | 1 -
 net/ceph/buffer.c            | 4 ++--
 net/ceph/ceph_common.c       | 8 --------
 net/ceph/messenger.c         | 2 +-
 5 files changed, 4 insertions(+), 14 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/buffer.h b/include/linux/ceph/buffer.h
index 07ad423cc37f..07ca15e76100 100644
--- a/include/linux/ceph/buffer.h
+++ b/include/linux/ceph/buffer.h
@@ -10,8 +10,7 @@
 /*
  * a simple reference counted buffer.
  *
- * use kmalloc for small sizes (<= one page), vmalloc for larger
- * sizes.
+ * use kmalloc for smaller sizes, vmalloc for larger sizes.
  */
 struct ceph_buffer {
 	struct kref kref;
diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index 07bc359b88ac..d293f7e38814 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -184,7 +184,6 @@ extern bool libceph_compatible(void *data);
 extern const char *ceph_msg_type_name(int type);
 extern int ceph_check_fsid(struct ceph_client *client, struct ceph_fsid *fsid);
 extern void *ceph_kvmalloc(size_t size, gfp_t flags);
-extern void ceph_kvfree(const void *ptr);
 
 extern struct ceph_options *ceph_parse_options(char *options,
 			      const char *dev_name, const char *dev_name_end,
diff --git a/net/ceph/buffer.c b/net/ceph/buffer.c
index 621b5f65407f..add5f921a0ff 100644
--- a/net/ceph/buffer.c
+++ b/net/ceph/buffer.c
@@ -6,7 +6,7 @@
 
 #include <linux/ceph/buffer.h>
 #include <linux/ceph/decode.h>
-#include <linux/ceph/libceph.h> /* for ceph_kv{malloc,free} */
+#include <linux/ceph/libceph.h> /* for ceph_kvmalloc */
 
 struct ceph_buffer *ceph_buffer_new(size_t len, gfp_t gfp)
 {
@@ -35,7 +35,7 @@ void ceph_buffer_release(struct kref *kref)
 	struct ceph_buffer *b = container_of(kref, struct ceph_buffer, kref);
 
 	dout("buffer_release %p\n", b);
-	ceph_kvfree(b->vec.iov_base);
+	kvfree(b->vec.iov_base);
 	kfree(b);
 }
 EXPORT_SYMBOL(ceph_buffer_release);
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index 58fbfe134f93..d361a274aee7 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -184,14 +184,6 @@ void *ceph_kvmalloc(size_t size, gfp_t flags)
 	return __vmalloc(size, flags | __GFP_HIGHMEM, PAGE_KERNEL);
 }
 
-void ceph_kvfree(const void *ptr)
-{
-	if (is_vmalloc_addr(ptr))
-		vfree(ptr);
-	else
-		kfree(ptr);
-}
-
 
 static int parse_fsid(const char *str, struct ceph_fsid *fsid)
 {
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 8d1653caffdb..863d07ab2129 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -3288,7 +3288,7 @@ static int ceph_con_in_msg_alloc(struct ceph_connection *con, int *skip)
 static void ceph_msg_free(struct ceph_msg *m)
 {
 	dout("%s %p\n", __func__, m);
-	ceph_kvfree(m->front.iov_base);
+	kvfree(m->front.iov_base);
 	kmem_cache_free(ceph_msg_cache, m);
 }
 
-- 
cgit v1.2.3


From 33d07337962c7bbd2fd5cf7f1106735c9507fbe2 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 4 Nov 2014 16:33:37 +0800
Subject: libceph: message signature support

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/mds_client.c               | 16 +++++++++++
 include/linux/ceph/auth.h          | 26 +++++++++++++++++
 include/linux/ceph/ceph_features.h |  1 +
 include/linux/ceph/messenger.h     |  9 +++++-
 include/linux/ceph/msgr.h          |  8 ++++++
 net/ceph/auth_x.c                  | 58 ++++++++++++++++++++++++++++++++++++++
 net/ceph/messenger.c               | 32 +++++++++++++++++++--
 net/ceph/osd_client.c              | 16 +++++++++++
 8 files changed, 162 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index fc74617069a3..9f00853f6d42 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -3744,6 +3744,20 @@ static struct ceph_msg *mds_alloc_msg(struct ceph_connection *con,
 	return msg;
 }
 
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+       struct ceph_mds_session *s = con->private;
+       struct ceph_auth_handshake *auth = &s->s_auth;
+       return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+       struct ceph_mds_session *s = con->private;
+       struct ceph_auth_handshake *auth = &s->s_auth;
+       return ceph_auth_check_message_signature(auth, msg);
+}
+
 static const struct ceph_connection_operations mds_con_ops = {
 	.get = con_get,
 	.put = con_put,
@@ -3753,6 +3767,8 @@ static const struct ceph_connection_operations mds_con_ops = {
 	.invalidate_authorizer = invalidate_authorizer,
 	.peer_reset = peer_reset,
 	.alloc_msg = mds_alloc_msg,
+	.sign_message = sign_message,
+	.check_message_signature = check_message_signature,
 };
 
 /* eof */
diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h
index 5f3386844134..260d78b587c4 100644
--- a/include/linux/ceph/auth.h
+++ b/include/linux/ceph/auth.h
@@ -13,6 +13,7 @@
 
 struct ceph_auth_client;
 struct ceph_authorizer;
+struct ceph_msg;
 
 struct ceph_auth_handshake {
 	struct ceph_authorizer *authorizer;
@@ -20,6 +21,10 @@ struct ceph_auth_handshake {
 	size_t authorizer_buf_len;
 	void *authorizer_reply_buf;
 	size_t authorizer_reply_buf_len;
+	int (*sign_message)(struct ceph_auth_handshake *auth,
+			    struct ceph_msg *msg);
+	int (*check_message_signature)(struct ceph_auth_handshake *auth,
+				       struct ceph_msg *msg);
 };
 
 struct ceph_auth_client_ops {
@@ -66,6 +71,11 @@ struct ceph_auth_client_ops {
 	void (*reset)(struct ceph_auth_client *ac);
 
 	void (*destroy)(struct ceph_auth_client *ac);
+
+	int (*sign_message)(struct ceph_auth_handshake *auth,
+			    struct ceph_msg *msg);
+	int (*check_message_signature)(struct ceph_auth_handshake *auth,
+				       struct ceph_msg *msg);
 };
 
 struct ceph_auth_client {
@@ -113,4 +123,20 @@ extern int ceph_auth_verify_authorizer_reply(struct ceph_auth_client *ac,
 extern void ceph_auth_invalidate_authorizer(struct ceph_auth_client *ac,
 					    int peer_type);
 
+static inline int ceph_auth_sign_message(struct ceph_auth_handshake *auth,
+					 struct ceph_msg *msg)
+{
+	if (auth->sign_message)
+		return auth->sign_message(auth, msg);
+	return 0;
+}
+
+static inline
+int ceph_auth_check_message_signature(struct ceph_auth_handshake *auth,
+				      struct ceph_msg *msg)
+{
+	if (auth->check_message_signature)
+		return auth->check_message_signature(auth, msg);
+	return 0;
+}
 #endif
diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h
index d12659ce550d..71e05bbf8ceb 100644
--- a/include/linux/ceph/ceph_features.h
+++ b/include/linux/ceph/ceph_features.h
@@ -84,6 +84,7 @@ static inline u64 ceph_sanitize_features(u64 features)
 	 CEPH_FEATURE_PGPOOL3 |			\
 	 CEPH_FEATURE_OSDENC |			\
 	 CEPH_FEATURE_CRUSH_TUNABLES |		\
+	 CEPH_FEATURE_MSG_AUTH |		\
 	 CEPH_FEATURE_CRUSH_TUNABLES2 |		\
 	 CEPH_FEATURE_REPLY_CREATE_INODE |	\
 	 CEPH_FEATURE_OSDHASHPSPOOL |		\
diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h
index 40ae58e3e9db..d9d396c16503 100644
--- a/include/linux/ceph/messenger.h
+++ b/include/linux/ceph/messenger.h
@@ -42,6 +42,10 @@ struct ceph_connection_operations {
 	struct ceph_msg * (*alloc_msg) (struct ceph_connection *con,
 					struct ceph_msg_header *hdr,
 					int *skip);
+	int (*sign_message) (struct ceph_connection *con, struct ceph_msg *msg);
+
+	int (*check_message_signature) (struct ceph_connection *con,
+					struct ceph_msg *msg);
 };
 
 /* use format string %s%d */
@@ -142,7 +146,10 @@ struct ceph_msg_data_cursor {
  */
 struct ceph_msg {
 	struct ceph_msg_header hdr;	/* header */
-	struct ceph_msg_footer footer;	/* footer */
+	union {
+		struct ceph_msg_footer footer;		/* footer */
+		struct ceph_msg_footer_old old_footer;	/* old format footer */
+	};
 	struct kvec front;              /* unaligned blobs of message */
 	struct ceph_buffer *middle;
 
diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index 3d94a73b5f30..cac4b28ac1c0 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -164,13 +164,21 @@ struct ceph_msg_header {
 /*
  * follows data payload
  */
+struct ceph_msg_footer_old {
+	__le32 front_crc, middle_crc, data_crc;
+	__u8 flags;
+} __attribute__ ((packed));
+
 struct ceph_msg_footer {
 	__le32 front_crc, middle_crc, data_crc;
+	// sig holds the 64 bits of the digital signature for the message PLR
+	__le64  sig;
 	__u8 flags;
 } __attribute__ ((packed));
 
 #define CEPH_MSG_FOOTER_COMPLETE  (1<<0)   /* msg wasn't aborted */
 #define CEPH_MSG_FOOTER_NOCRC     (1<<1)   /* no data crc */
+#define CEPH_MSG_FOOTER_SIGNED	  (1<<2)   /* msg was signed */
 
 
 #endif
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 77f3885c16bc..15845814a0f2 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -8,6 +8,7 @@
 
 #include <linux/ceph/decode.h>
 #include <linux/ceph/auth.h>
+#include <linux/ceph/messenger.h>
 
 #include "crypto.h"
 #include "auth_x.h"
@@ -567,6 +568,8 @@ static int ceph_x_create_authorizer(
 	auth->authorizer_buf_len = au->buf->vec.iov_len;
 	auth->authorizer_reply_buf = au->reply_buf;
 	auth->authorizer_reply_buf_len = sizeof (au->reply_buf);
+	auth->sign_message = ac->ops->sign_message;
+	auth->check_message_signature = ac->ops->check_message_signature;
 
 	return 0;
 }
@@ -667,6 +670,59 @@ static void ceph_x_invalidate_authorizer(struct ceph_auth_client *ac,
 		memset(&th->validity, 0, sizeof(th->validity));
 }
 
+static int calcu_signature(struct ceph_x_authorizer *au,
+			   struct ceph_msg *msg, __le64 *sig)
+{
+	int ret;
+	char tmp_enc[40];
+	__le32 tmp[5] = {
+		16u, msg->hdr.crc, msg->footer.front_crc,
+		msg->footer.middle_crc, msg->footer.data_crc,
+	};
+	ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp),
+			     tmp_enc, sizeof(tmp_enc));
+	if (ret < 0)
+		return ret;
+	*sig = *(__le64*)(tmp_enc + 4);
+	return 0;
+}
+
+static int ceph_x_sign_message(struct ceph_auth_handshake *auth,
+			       struct ceph_msg *msg)
+{
+	int ret;
+	if (!auth->authorizer)
+		return 0;
+	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+			      msg, &msg->footer.sig);
+	if (ret < 0)
+		return ret;
+	msg->footer.flags |= CEPH_MSG_FOOTER_SIGNED;
+	return 0;
+}
+
+static int ceph_x_check_message_signature(struct ceph_auth_handshake *auth,
+					  struct ceph_msg *msg)
+{
+	__le64 sig_check;
+	int ret;
+
+	if (!auth->authorizer)
+		return 0;
+	ret = calcu_signature((struct ceph_x_authorizer *)auth->authorizer,
+			      msg, &sig_check);
+	if (ret < 0)
+		return ret;
+	if (sig_check == msg->footer.sig)
+		return 0;
+	if (msg->footer.flags & CEPH_MSG_FOOTER_SIGNED)
+		dout("ceph_x_check_message_signature %p has signature %llx "
+		     "expect %llx\n", msg, msg->footer.sig, sig_check);
+	else
+		dout("ceph_x_check_message_signature %p sender did not set "
+		     "CEPH_MSG_FOOTER_SIGNED\n", msg);
+	return -EBADMSG;
+}
 
 static const struct ceph_auth_client_ops ceph_x_ops = {
 	.name = "x",
@@ -681,6 +737,8 @@ static const struct ceph_auth_client_ops ceph_x_ops = {
 	.invalidate_authorizer = ceph_x_invalidate_authorizer,
 	.reset =  ceph_x_reset,
 	.destroy = ceph_x_destroy,
+	.sign_message = ceph_x_sign_message,
+	.check_message_signature = ceph_x_check_message_signature,
 };
 
 
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index 863d07ab2129..33a2f201e460 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -1196,8 +1196,18 @@ static void prepare_write_message_footer(struct ceph_connection *con)
 	dout("prepare_write_message_footer %p\n", con);
 	con->out_kvec_is_msg = true;
 	con->out_kvec[v].iov_base = &m->footer;
-	con->out_kvec[v].iov_len = sizeof(m->footer);
-	con->out_kvec_bytes += sizeof(m->footer);
+	if (con->peer_features & CEPH_FEATURE_MSG_AUTH) {
+		if (con->ops->sign_message)
+			con->ops->sign_message(con, m);
+		else
+			m->footer.sig = 0;
+		con->out_kvec[v].iov_len = sizeof(m->footer);
+		con->out_kvec_bytes += sizeof(m->footer);
+	} else {
+		m->old_footer.flags = m->footer.flags;
+		con->out_kvec[v].iov_len = sizeof(m->old_footer);
+		con->out_kvec_bytes += sizeof(m->old_footer);
+	}
 	con->out_kvec_left++;
 	con->out_more = m->more_to_follow;
 	con->out_msg_done = true;
@@ -2249,6 +2259,7 @@ static int read_partial_message(struct ceph_connection *con)
 	int ret;
 	unsigned int front_len, middle_len, data_len;
 	bool do_datacrc = !con->msgr->nocrc;
+	bool need_sign = (con->peer_features & CEPH_FEATURE_MSG_AUTH);
 	u64 seq;
 	u32 crc;
 
@@ -2361,12 +2372,21 @@ static int read_partial_message(struct ceph_connection *con)
 	}
 
 	/* footer */
-	size = sizeof (m->footer);
+	if (need_sign)
+		size = sizeof(m->footer);
+	else
+		size = sizeof(m->old_footer);
+
 	end += size;
 	ret = read_partial(con, end, size, &m->footer);
 	if (ret <= 0)
 		return ret;
 
+	if (!need_sign) {
+		m->footer.flags = m->old_footer.flags;
+		m->footer.sig = 0;
+	}
+
 	dout("read_partial_message got msg %p %d (%u) + %d (%u) + %d (%u)\n",
 	     m, front_len, m->footer.front_crc, middle_len,
 	     m->footer.middle_crc, data_len, m->footer.data_crc);
@@ -2390,6 +2410,12 @@ static int read_partial_message(struct ceph_connection *con)
 		return -EBADMSG;
 	}
 
+	if (need_sign && con->ops->check_message_signature &&
+	    con->ops->check_message_signature(con, m)) {
+		pr_err("read_partial_message %p signature check failed\n", m);
+		return -EBADMSG;
+	}
+
 	return 1; /* done! */
 }
 
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 6f164289bde8..1f6c4055adaf 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -2920,6 +2920,20 @@ static int invalidate_authorizer(struct ceph_connection *con)
 	return ceph_monc_validate_auth(&osdc->client->monc);
 }
 
+static int sign_message(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_auth_handshake *auth = &o->o_auth;
+	return ceph_auth_sign_message(auth, msg);
+}
+
+static int check_message_signature(struct ceph_connection *con, struct ceph_msg *msg)
+{
+	struct ceph_osd *o = con->private;
+	struct ceph_auth_handshake *auth = &o->o_auth;
+	return ceph_auth_check_message_signature(auth, msg);
+}
+
 static const struct ceph_connection_operations osd_con_ops = {
 	.get = get_osd_con,
 	.put = put_osd_con,
@@ -2928,5 +2942,7 @@ static const struct ceph_connection_operations osd_con_ops = {
 	.verify_authorizer_reply = verify_authorizer_reply,
 	.invalidate_authorizer = invalidate_authorizer,
 	.alloc_msg = alloc_msg,
+	.sign_message = sign_message,
+	.check_message_signature = check_message_signature,
 	.fault = osd_reset,
 };
-- 
cgit v1.2.3


From d4e1a4e0db439209672f9803f9f23b0123fb09aa Mon Sep 17 00:00:00 2001
From: John Spray <john.spray@redhat.com>
Date: Thu, 16 Oct 2014 12:05:34 +0100
Subject: libceph: update ceph_msg_header structure

2 bytes of what was reserved space is now used by userspace for the
compat_version field.

Signed-off-by: John Spray <john.spray@redhat.com>
Reviewed-by: Sage Weil <sage@redhat.com>
---
 include/linux/ceph/msgr.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/msgr.h b/include/linux/ceph/msgr.h
index cac4b28ac1c0..1c1887206ffa 100644
--- a/include/linux/ceph/msgr.h
+++ b/include/linux/ceph/msgr.h
@@ -152,7 +152,8 @@ struct ceph_msg_header {
 			     receiver: mask against ~PAGE_MASK */
 
 	struct ceph_entity_name src;
-	__le32 reserved;
+	__le16 compat_version;
+	__le16 reserved;
 	__le32 crc;       /* header crc32c */
 } __attribute__ ((packed));
 
-- 
cgit v1.2.3


From a3fc98005c9c6e4649d26bee0935a7048a95c9e6 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Tue, 11 Nov 2014 16:30:55 +0800
Subject: libceph: require cephx message signature by default

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@redhat.com>
---
 include/linux/ceph/libceph.h |  1 +
 net/ceph/ceph_common.c       | 13 +++++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h
index d293f7e38814..8b11a79ca1cb 100644
--- a/include/linux/ceph/libceph.h
+++ b/include/linux/ceph/libceph.h
@@ -29,6 +29,7 @@
 #define CEPH_OPT_NOSHARE          (1<<1) /* don't share client with other sbs */
 #define CEPH_OPT_MYIP             (1<<2) /* specified my ip */
 #define CEPH_OPT_NOCRC            (1<<3) /* no data crc on writes */
+#define CEPH_OPT_NOMSGAUTH	  (1<<4) /* not require cephx message signature */
 
 #define CEPH_OPT_DEFAULT   (0)
 
diff --git a/net/ceph/ceph_common.c b/net/ceph/ceph_common.c
index d361a274aee7..5d5ab67f516d 100644
--- a/net/ceph/ceph_common.c
+++ b/net/ceph/ceph_common.c
@@ -237,6 +237,8 @@ enum {
 	Opt_noshare,
 	Opt_crc,
 	Opt_nocrc,
+	Opt_cephx_require_signatures,
+	Opt_nocephx_require_signatures,
 };
 
 static match_table_t opt_tokens = {
@@ -255,6 +257,8 @@ static match_table_t opt_tokens = {
 	{Opt_noshare, "noshare"},
 	{Opt_crc, "crc"},
 	{Opt_nocrc, "nocrc"},
+	{Opt_cephx_require_signatures, "cephx_require_signatures"},
+	{Opt_nocephx_require_signatures, "nocephx_require_signatures"},
 	{-1, NULL}
 };
 
@@ -453,6 +457,12 @@ ceph_parse_options(char *options, const char *dev_name,
 		case Opt_nocrc:
 			opt->flags |= CEPH_OPT_NOCRC;
 			break;
+		case Opt_cephx_require_signatures:
+			opt->flags &= ~CEPH_OPT_NOMSGAUTH;
+			break;
+		case Opt_nocephx_require_signatures:
+			opt->flags |= CEPH_OPT_NOMSGAUTH;
+			break;
 
 		default:
 			BUG_ON(token);
@@ -496,6 +506,9 @@ struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private,
 	init_waitqueue_head(&client->auth_wq);
 	client->auth_err = 0;
 
+	if (!ceph_test_opt(client, NOMSGAUTH))
+		required_features |= CEPH_FEATURE_MSG_AUTH;
+
 	client->extra_mon_dispatch = NULL;
 	client->supported_features = CEPH_FEATURES_SUPPORTED_DEFAULT |
 		supported_features;
-- 
cgit v1.2.3


From d74b50bed037794135cb5a4e7418ad71d9848ce1 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Wed, 12 Nov 2014 14:00:43 +0800
Subject: libceph: add SETXATTR/CMPXATTR osd operations support

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@redhat.com>
---
 include/linux/ceph/osd_client.h | 10 +++++++++
 net/ceph/osd_client.c           | 47 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 57 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 03aeb27fcc69..bcf9c2180ea5 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -86,6 +86,13 @@ struct ceph_osd_req_op {
 			u32 truncate_seq;
 			struct ceph_osd_data osd_data;
 		} extent;
+		struct {
+			__le32 name_len;
+			__le32 value_len;
+			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
+			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
+			struct ceph_osd_data osd_data;
+		} xattr;
 		struct {
 			const char *class_name;
 			const char *method_name;
@@ -295,6 +302,9 @@ extern void osd_req_op_cls_response_data_pages(struct ceph_osd_request *,
 extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req,
 					unsigned int which, u16 opcode,
 					const char *class, const char *method);
+extern int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
+				 u16 opcode, const char *name, const void *value,
+				 size_t size, u8 cmp_op, u8 cmp_mode);
 extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
 					unsigned int which, u16 opcode,
 					u64 cookie, u64 version, int flag);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 1f6c4055adaf..2b0555b36e4d 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -292,6 +292,10 @@ static void osd_req_op_data_release(struct ceph_osd_request *osd_req,
 		ceph_osd_data_release(&op->cls.request_data);
 		ceph_osd_data_release(&op->cls.response_data);
 		break;
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_CMPXATTR:
+		ceph_osd_data_release(&op->xattr.osd_data);
+		break;
 	default:
 		break;
 	}
@@ -545,6 +549,39 @@ void osd_req_op_cls_init(struct ceph_osd_request *osd_req, unsigned int which,
 }
 EXPORT_SYMBOL(osd_req_op_cls_init);
 
+int osd_req_op_xattr_init(struct ceph_osd_request *osd_req, unsigned int which,
+			  u16 opcode, const char *name, const void *value,
+			  size_t size, u8 cmp_op, u8 cmp_mode)
+{
+	struct ceph_osd_req_op *op = _osd_req_op_init(osd_req, which, opcode);
+	struct ceph_pagelist *pagelist;
+	size_t payload_len;
+
+	BUG_ON(opcode != CEPH_OSD_OP_SETXATTR && opcode != CEPH_OSD_OP_CMPXATTR);
+
+	pagelist = kmalloc(sizeof(*pagelist), GFP_NOFS);
+	if (!pagelist)
+		return -ENOMEM;
+
+	ceph_pagelist_init(pagelist);
+
+	payload_len = strlen(name);
+	op->xattr.name_len = payload_len;
+	ceph_pagelist_append(pagelist, name, payload_len);
+
+	op->xattr.value_len = size;
+	ceph_pagelist_append(pagelist, value, size);
+	payload_len += size;
+
+	op->xattr.cmp_op = cmp_op;
+	op->xattr.cmp_mode = cmp_mode;
+
+	ceph_osd_data_pagelist_init(&op->xattr.osd_data, pagelist);
+	op->payload_len = payload_len;
+	return 0;
+}
+EXPORT_SYMBOL(osd_req_op_xattr_init);
+
 void osd_req_op_watch_init(struct ceph_osd_request *osd_req,
 				unsigned int which, u16 opcode,
 				u64 cookie, u64 version, int flag)
@@ -676,6 +713,16 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 		dst->alloc_hint.expected_write_size =
 		    cpu_to_le64(src->alloc_hint.expected_write_size);
 		break;
+	case CEPH_OSD_OP_SETXATTR:
+	case CEPH_OSD_OP_CMPXATTR:
+		dst->xattr.name_len = cpu_to_le32(src->xattr.name_len);
+		dst->xattr.value_len = cpu_to_le32(src->xattr.value_len);
+		dst->xattr.cmp_op = src->xattr.cmp_op;
+		dst->xattr.cmp_mode = src->xattr.cmp_mode;
+		osd_data = &src->xattr.osd_data;
+		ceph_osdc_msg_data_add(req->r_request, osd_data);
+		request_data_len = osd_data->pagelist->length;
+		break;
 	default:
 		pr_err("unsupported osd opcode %s\n",
 			ceph_osd_op_name(src->op));
-- 
cgit v1.2.3


From 715e4cd405cfd67bd058e410b3e599bab2072645 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Thu, 13 Nov 2014 14:40:37 +0800
Subject: libceph: specify position of extent operation

allow specifying position of extent operation in multi-operations
osd request. This is required for cephfs to convert inline data to
normal data (compare xattr, then write object).

Signed-off-by: Yan, Zheng <zyan@redhat.com>
Reviewed-by: Ilya Dryomov <idryomov@redhat.com>
---
 fs/ceph/addr.c                  |  9 ++++++---
 fs/ceph/file.c                  |  8 +++++---
 include/linux/ceph/osd_client.h |  3 ++-
 net/ceph/osd_client.c           | 19 ++++++-------------
 4 files changed, 19 insertions(+), 20 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 18c06bbaf136..f2c7aa878aa4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -319,7 +319,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
 	     off, len);
 	vino = ceph_vino(inode);
 	req = ceph_osdc_new_request(osdc, &ci->i_layout, vino, off, &len,
-				    1, CEPH_OSD_OP_READ,
+				    0, 1, CEPH_OSD_OP_READ,
 				    CEPH_OSD_FLAG_READ, NULL,
 				    ci->i_truncate_seq, ci->i_truncate_size,
 				    false);
@@ -750,7 +750,6 @@ retry:
 	last_snapc = snapc;
 
 	while (!done && index <= end) {
-		int num_ops = do_sync ? 2 : 1;
 		unsigned i;
 		int first;
 		pgoff_t next;
@@ -850,7 +849,8 @@ get_more_pages:
 				len = wsize;
 				req = ceph_osdc_new_request(&fsc->client->osdc,
 							&ci->i_layout, vino,
-							offset, &len, num_ops,
+							offset, &len, 0,
+							do_sync ? 2 : 1,
 							CEPH_OSD_OP_WRITE,
 							CEPH_OSD_FLAG_WRITE |
 							CEPH_OSD_FLAG_ONDISK,
@@ -862,6 +862,9 @@ get_more_pages:
 					break;
 				}
 
+				if (do_sync)
+					osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
 				req->r_callback = writepages_finish;
 				req->r_inode = inode;
 
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index d7e0da8366e6..c03ac4c4bcd1 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -598,7 +598,7 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 		snapc = ci->i_snap_realm->cached_context;
 		vino = ceph_vino(inode);
 		req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-					    vino, pos, &len,
+					    vino, pos, &len, 0,
 					    2,/*include a 'startsync' command*/
 					    CEPH_OSD_OP_WRITE, flags, snapc,
 					    ci->i_truncate_seq,
@@ -609,6 +609,8 @@ ceph_sync_direct_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 			break;
 		}
 
+		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
+
 		n = iov_iter_get_pages_alloc(from, &pages, len, &start);
 		if (unlikely(n < 0)) {
 			ret = n;
@@ -713,7 +715,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos)
 		snapc = ci->i_snap_realm->cached_context;
 		vino = ceph_vino(inode);
 		req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
-					    vino, pos, &len, 1,
+					    vino, pos, &len, 0, 1,
 					    CEPH_OSD_OP_WRITE, flags, snapc,
 					    ci->i_truncate_seq,
 					    ci->i_truncate_size,
@@ -1111,7 +1113,7 @@ static int ceph_zero_partial_object(struct inode *inode,
 	req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout,
 					ceph_vino(inode),
 					offset, length,
-					1, op,
+					0, 1, op,
 					CEPH_OSD_FLAG_WRITE |
 					CEPH_OSD_FLAG_ONDISK,
 					NULL, 0, 0, false);
diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index bcf9c2180ea5..5d86416d35f2 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -328,7 +328,8 @@ extern struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *,
 				      struct ceph_file_layout *layout,
 				      struct ceph_vino vino,
 				      u64 offset, u64 *len,
-				      int num_ops, int opcode, int flags,
+				      unsigned int which, int num_ops,
+				      int opcode, int flags,
 				      struct ceph_snap_context *snapc,
 				      u32 truncate_seq, u64 truncate_size,
 				      bool use_mempool);
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index af1b3ee195e9..53299c7b0ca4 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -753,7 +753,8 @@ static u64 osd_req_encode_op(struct ceph_osd_request *req,
 struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					       struct ceph_file_layout *layout,
 					       struct ceph_vino vino,
-					       u64 off, u64 *plen, int num_ops,
+					       u64 off, u64 *plen,
+					       unsigned int which, int num_ops,
 					       int opcode, int flags,
 					       struct ceph_snap_context *snapc,
 					       u32 truncate_seq,
@@ -785,7 +786,7 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 	}
 
 	if (opcode == CEPH_OSD_OP_CREATE || opcode == CEPH_OSD_OP_DELETE) {
-		osd_req_op_init(req, 0, opcode);
+		osd_req_op_init(req, which, opcode);
 	} else {
 		u32 object_size = le32_to_cpu(layout->fl_object_size);
 		u32 object_base = off - objoff;
@@ -798,17 +799,9 @@ struct ceph_osd_request *ceph_osdc_new_request(struct ceph_osd_client *osdc,
 					truncate_size = object_size;
 			}
 		}
-
-		osd_req_op_extent_init(req, 0, opcode, objoff, objlen,
+		osd_req_op_extent_init(req, which, opcode, objoff, objlen,
 				       truncate_size, truncate_seq);
 	}
-	/*
-	 * A second op in the ops array means the caller wants to
-	 * also issue a include a 'startsync' command so that the
-	 * osd will flush data quickly.
-	 */
-	if (num_ops > 1)
-		osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC);
 
 	req->r_base_oloc.pool = ceph_file_layout_pg_pool(*layout);
 
@@ -2675,7 +2668,7 @@ int ceph_osdc_readpages(struct ceph_osd_client *osdc,
 
 	dout("readpages on ino %llx.%llx on %llu~%llu\n", vino.ino,
 	     vino.snap, off, *plen);
-	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 1,
+	req = ceph_osdc_new_request(osdc, layout, vino, off, plen, 0, 1,
 				    CEPH_OSD_OP_READ, CEPH_OSD_FLAG_READ,
 				    NULL, truncate_seq, truncate_size,
 				    false);
@@ -2718,7 +2711,7 @@ int ceph_osdc_writepages(struct ceph_osd_client *osdc, struct ceph_vino vino,
 	int page_align = off & ~PAGE_MASK;
 
 	BUG_ON(vino.snap != CEPH_NOSNAP);	/* snapshots aren't writeable */
-	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 1,
+	req = ceph_osdc_new_request(osdc, layout, vino, off, &len, 0, 1,
 				    CEPH_OSD_OP_WRITE,
 				    CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
 				    snapc, truncate_seq, truncate_size,
-- 
cgit v1.2.3


From 31c542a199d79f0f402c2f3e04229464510d47ec Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Fri, 14 Nov 2014 21:41:55 +0800
Subject: ceph: add inline data to pagecache

Request reply and cap message can contain inline data. add inline data
to the page cache if there is Fc cap.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/addr.c               | 43 +++++++++++++++++++++++++++++++++++++++++++
 fs/ceph/caps.c               | 11 +++++++++++
 fs/ceph/inode.c              | 16 ++++++++++++++++
 fs/ceph/super.h              |  5 ++++-
 fs/ceph/super.h.rej          | 10 ++++++++++
 include/linux/ceph/ceph_fs.h |  1 +
 6 files changed, 85 insertions(+), 1 deletion(-)
 create mode 100644 fs/ceph/super.h.rej

(limited to 'include/linux')

diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index f2c7aa878aa4..4a3f55f27ab4 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -1318,6 +1318,49 @@ out:
 	return ret;
 }
 
+void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+			   char	*data, size_t len)
+{
+	struct address_space *mapping = inode->i_mapping;
+	struct page *page;
+
+	if (locked_page) {
+		page = locked_page;
+	} else {
+		if (i_size_read(inode) == 0)
+			return;
+		page = find_or_create_page(mapping, 0,
+					   mapping_gfp_mask(mapping) & ~__GFP_FS);
+		if (!page)
+			return;
+		if (PageUptodate(page)) {
+			unlock_page(page);
+			page_cache_release(page);
+			return;
+		}
+	}
+
+	dout("fill_inline_data %p %llx.%llx len %lu locked_page %p\n",
+	     inode, ceph_vinop(inode), len, locked_page);
+
+	if (len > 0) {
+		void *kaddr = kmap_atomic(page);
+		memcpy(kaddr, data, len);
+		kunmap_atomic(kaddr);
+	}
+
+	if (page != locked_page) {
+		if (len < PAGE_CACHE_SIZE)
+			zero_user_segment(page, len, PAGE_CACHE_SIZE);
+		else
+			flush_dcache_page(page);
+
+		SetPageUptodate(page);
+		unlock_page(page);
+		page_cache_release(page);
+	}
+}
+
 static struct vm_operations_struct ceph_vmops = {
 	.fault		= ceph_filemap_fault,
 	.page_mkwrite	= ceph_page_mkwrite,
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index b88ae601f309..6372eb9ce491 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -2405,6 +2405,7 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 	bool queue_invalidate = false;
 	bool queue_revalidate = false;
 	bool deleted_inode = false;
+	bool fill_inline = false;
 
 	dout("handle_cap_grant inode %p cap %p mds%d seq %d %s\n",
 	     inode, cap, mds, seq, ceph_cap_string(newcaps));
@@ -2578,6 +2579,13 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 	}
 	BUG_ON(cap->issued & ~cap->implemented);
 
+	if (inline_version > 0 && inline_version >= ci->i_inline_version) {
+		ci->i_inline_version = inline_version;
+		if (ci->i_inline_version != CEPH_INLINE_NONE &&
+		    (newcaps & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)))
+			fill_inline = true;
+	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (le32_to_cpu(grant->op) == CEPH_CAP_OP_IMPORT) {
@@ -2591,6 +2599,9 @@ static void handle_cap_grant(struct ceph_mds_client *mdsc,
 			wake = true;
 	}
 
+	if (fill_inline)
+		ceph_fill_inline_data(inode, NULL, inline_data, inline_len);
+
 	if (queue_trunc) {
 		ceph_queue_vmtruncate(inode);
 		ceph_queue_revalidate(inode);
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 72607c17e6fd..feea6a8f88ae 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -387,6 +387,7 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
 	spin_lock_init(&ci->i_ceph_lock);
 
 	ci->i_version = 0;
+	ci->i_inline_version = 0;
 	ci->i_time_warp_seq = 0;
 	ci->i_ceph_flags = 0;
 	ci->i_ordered_count = 0;
@@ -676,6 +677,7 @@ static int fill_inode(struct inode *inode,
 	bool wake = false;
 	bool queue_trunc = false;
 	bool new_version = false;
+	bool fill_inline = false;
 
 	dout("fill_inode %p ino %llx.%llx v %llu had %llu\n",
 	     inode, ceph_vinop(inode), le64_to_cpu(info->version),
@@ -875,8 +877,22 @@ static int fill_inode(struct inode *inode,
 			   ceph_vinop(inode));
 		__ceph_get_fmode(ci, cap_fmode);
 	}
+
+	if (iinfo->inline_version > 0 &&
+	    iinfo->inline_version >= ci->i_inline_version) {
+		int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
+		ci->i_inline_version = iinfo->inline_version;
+		if (ci->i_inline_version != CEPH_INLINE_NONE &&
+		    (le32_to_cpu(info->cap.caps) & cache_caps))
+			fill_inline = true;
+	}
+
 	spin_unlock(&ci->i_ceph_lock);
 
+	if (fill_inline)
+		ceph_fill_inline_data(inode, NULL,
+				      iinfo->inline_data, iinfo->inline_len);
+
 	if (wake)
 		wake_up_all(&ci->i_cap_wq);
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index fc1c8255dead..b0de9162758b 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -253,6 +253,7 @@ struct ceph_inode_info {
 	spinlock_t i_ceph_lock;
 
 	u64 i_version;
+	u64 i_inline_version;
 	u32 i_time_warp_seq;
 
 	unsigned i_ceph_flags;
@@ -858,7 +859,7 @@ extern int ceph_encode_dentry_release(void **p, struct dentry *dn,
 				      int mds, int drop, int unless);
 
 extern int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
-			 int *got, loff_t endoff);
+			 loff_t endoff, int *got, struct page **pinned_page);
 
 /* for counting open files by mode */
 static inline void __ceph_get_fmode(struct ceph_inode_info *ci, int mode)
@@ -880,6 +881,8 @@ extern int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
 			    struct file *file, unsigned flags, umode_t mode,
 			    int *opened);
 extern int ceph_release(struct inode *inode, struct file *filp);
+extern void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
+				  char *data, size_t len);
 
 /* dir.c */
 extern const struct file_operations ceph_dir_fops;
diff --git a/fs/ceph/super.h.rej b/fs/ceph/super.h.rej
new file mode 100644
index 000000000000..88fe3dfadb29
--- /dev/null
+++ b/fs/ceph/super.h.rej
@@ -0,0 +1,10 @@
+--- fs/ceph/super.h
++++ fs/ceph/super.h
+@@ -254,6 +255,7 @@
+	spinlock_t i_ceph_lock;
+
+	u64 i_version;
++	u64 i_inline_version;
+	u32 i_time_warp_seq;
+
+	unsigned i_ceph_flags;
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 31d8b98b7f96..2d4acfa2c7b7 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -552,6 +552,7 @@ struct ceph_filelock {
 
 int ceph_flags_to_mode(int flags);
 
+#define CEPH_INLINE_NONE	((__u64)-1)
 
 /* capability bits */
 #define CEPH_CAP_PIN         1  /* no specific capabilities beyond the pin */
-- 
cgit v1.2.3


From 01deead041e03c9a6b4e1b2dd165dee4cced6112 Mon Sep 17 00:00:00 2001
From: "Yan, Zheng" <zyan@redhat.com>
Date: Fri, 14 Nov 2014 21:56:29 +0800
Subject: ceph: use getattr request to fetch inline data

Add a new parameter 'locked_page' to ceph_do_getattr(). If inline data
in getattr reply will be copied to the page.

Signed-off-by: Yan, Zheng <zyan@redhat.com>
---
 fs/ceph/inode.c              | 34 +++++++++++++++++++++++++---------
 fs/ceph/mds_client.h         |  1 +
 fs/ceph/super.h              |  7 ++++++-
 include/linux/ceph/ceph_fs.h |  2 ++
 4 files changed, 34 insertions(+), 10 deletions(-)

(limited to 'include/linux')

diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index feea6a8f88ae..0bdabd1bff8a 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -659,7 +659,7 @@ void ceph_fill_file_time(struct inode *inode, int issued,
  * Populate an inode based on info from mds.  May be called on new or
  * existing inodes.
  */
-static int fill_inode(struct inode *inode,
+static int fill_inode(struct inode *inode, struct page *locked_page,
 		      struct ceph_mds_reply_info_in *iinfo,
 		      struct ceph_mds_reply_dirfrag *dirinfo,
 		      struct ceph_mds_session *session,
@@ -883,14 +883,15 @@ static int fill_inode(struct inode *inode,
 		int cache_caps = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
 		ci->i_inline_version = iinfo->inline_version;
 		if (ci->i_inline_version != CEPH_INLINE_NONE &&
-		    (le32_to_cpu(info->cap.caps) & cache_caps))
+		    (locked_page ||
+		     (le32_to_cpu(info->cap.caps) & cache_caps)))
 			fill_inline = true;
 	}
 
 	spin_unlock(&ci->i_ceph_lock);
 
 	if (fill_inline)
-		ceph_fill_inline_data(inode, NULL,
+		ceph_fill_inline_data(inode, locked_page,
 				      iinfo->inline_data, iinfo->inline_len);
 
 	if (wake)
@@ -1080,7 +1081,8 @@ int ceph_fill_trace(struct super_block *sb, struct ceph_mds_request *req,
 		struct inode *dir = req->r_locked_dir;
 
 		if (dir) {
-			err = fill_inode(dir, &rinfo->diri, rinfo->dirfrag,
+			err = fill_inode(dir, NULL,
+					 &rinfo->diri, rinfo->dirfrag,
 					 session, req->r_request_started, -1,
 					 &req->r_caps_reservation);
 			if (err < 0)
@@ -1150,7 +1152,7 @@ retry_lookup:
 		}
 		req->r_target_inode = in;
 
-		err = fill_inode(in, &rinfo->targeti, NULL,
+		err = fill_inode(in, req->r_locked_page, &rinfo->targeti, NULL,
 				session, req->r_request_started,
 				(!req->r_aborted && rinfo->head->result == 0) ?
 				req->r_fmode : -1,
@@ -1321,7 +1323,7 @@ static int readdir_prepopulate_inodes_only(struct ceph_mds_request *req,
 			dout("new_inode badness got %d\n", err);
 			continue;
 		}
-		rc = fill_inode(in, &rinfo->dir_in[i], NULL, session,
+		rc = fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
 				req->r_request_started, -1,
 				&req->r_caps_reservation);
 		if (rc < 0) {
@@ -1437,7 +1439,7 @@ retry_lookup:
 			}
 		}
 
-		if (fill_inode(in, &rinfo->dir_in[i], NULL, session,
+		if (fill_inode(in, NULL, &rinfo->dir_in[i], NULL, session,
 			       req->r_request_started, -1,
 			       &req->r_caps_reservation) < 0) {
 			pr_err("fill_inode badness on %p\n", in);
@@ -1920,7 +1922,8 @@ out_put:
  * Verify that we have a lease on the given mask.  If not,
  * do a getattr against an mds.
  */
-int ceph_do_getattr(struct inode *inode, int mask, bool force)
+int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+		      int mask, bool force)
 {
 	struct ceph_fs_client *fsc = ceph_sb_to_client(inode->i_sb);
 	struct ceph_mds_client *mdsc = fsc->mdsc;
@@ -1932,7 +1935,8 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
 		return 0;
 	}
 
-	dout("do_getattr inode %p mask %s mode 0%o\n", inode, ceph_cap_string(mask), inode->i_mode);
+	dout("do_getattr inode %p mask %s mode 0%o\n",
+	     inode, ceph_cap_string(mask), inode->i_mode);
 	if (!force && ceph_caps_issued_mask(ceph_inode(inode), mask, 1))
 		return 0;
 
@@ -1943,7 +1947,19 @@ int ceph_do_getattr(struct inode *inode, int mask, bool force)
 	ihold(inode);
 	req->r_num_caps = 1;
 	req->r_args.getattr.mask = cpu_to_le32(mask);
+	req->r_locked_page = locked_page;
 	err = ceph_mdsc_do_request(mdsc, NULL, req);
+	if (locked_page && err == 0) {
+		u64 inline_version = req->r_reply_info.targeti.inline_version;
+		if (inline_version == 0) {
+			/* the reply is supposed to contain inline data */
+			err = -EINVAL;
+		} else if (inline_version == CEPH_INLINE_NONE) {
+			err = -ENODATA;
+		} else {
+			err = req->r_reply_info.targeti.inline_len;
+		}
+	}
 	ceph_mdsc_put_request(req);
 	dout("do_getattr result=%d\n", err);
 	return err;
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 01f5e4cfd684..e2817d00f7d9 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -223,6 +223,7 @@ struct ceph_mds_request {
 	int r_request_release_offset;
 	struct ceph_msg  *r_reply;
 	struct ceph_mds_reply_info_parsed r_reply_info;
+	struct page *r_locked_page;
 	int r_err;
 	bool r_aborted;
 
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index b0de9162758b..6d56fae863ca 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -744,7 +744,12 @@ extern void ceph_queue_vmtruncate(struct inode *inode);
 extern void ceph_queue_invalidate(struct inode *inode);
 extern void ceph_queue_writeback(struct inode *inode);
 
-extern int ceph_do_getattr(struct inode *inode, int mask, bool force);
+extern int __ceph_do_getattr(struct inode *inode, struct page *locked_page,
+			     int mask, bool force);
+static inline int ceph_do_getattr(struct inode *inode, int mask, bool force)
+{
+	return __ceph_do_getattr(inode, NULL, mask, force);
+}
 extern int ceph_permission(struct inode *inode, int mask);
 extern int ceph_setattr(struct dentry *dentry, struct iattr *attr);
 extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry,
diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h
index 2d4acfa2c7b7..c0dadaac26e3 100644
--- a/include/linux/ceph/ceph_fs.h
+++ b/include/linux/ceph/ceph_fs.h
@@ -617,6 +617,8 @@ int ceph_flags_to_mode(int flags);
 				 CEPH_CAP_LINK_SHARED |	\
 				 CEPH_CAP_FILE_SHARED |	\
 				 CEPH_CAP_XATTR_SHARED)
+#define CEPH_STAT_CAP_INLINE_DATA (CEPH_CAP_FILE_SHARED | \
+				   CEPH_CAP_FILE_RD)
 
 #define CEPH_CAP_ANY_SHARED (CEPH_CAP_AUTH_SHARED |			\
 			      CEPH_CAP_LINK_SHARED |			\
-- 
cgit v1.2.3


From 84a1d2d1ecd9ae2fc1ae51d8529090000e88a210 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Mon, 17 Nov 2014 21:45:24 +0300
Subject: libceph: fixup includes in pagelist.h

pagelist.h needs to include linux/types.h and asm/byteorder.h and not
rely on other headers pulling yet another set of headers.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
---
 include/linux/ceph/pagelist.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h
index 5f871d84ddce..13d71fe18b0c 100644
--- a/include/linux/ceph/pagelist.h
+++ b/include/linux/ceph/pagelist.h
@@ -1,8 +1,10 @@
 #ifndef __FS_CEPH_PAGELIST_H
 #define __FS_CEPH_PAGELIST_H
 
-#include <linux/list.h>
+#include <asm/byteorder.h>
 #include <linux/atomic.h>
+#include <linux/list.h>
+#include <linux/types.h>
 
 struct ceph_pagelist {
 	struct list_head head;
-- 
cgit v1.2.3


From cb5281a57214581902ac06fb83f0d6ea2d440318 Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Wed, 17 Dec 2014 18:17:20 +0100
Subject: KVM: move APIC types to arch/x86/

They are not used anymore by IA64, move them away.

Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/include/asm/kvm_host.h | 10 ++++++++++
 arch/x86/kvm/ioapic.h           | 17 +++++++++++++++++
 include/linux/kvm_types.h       | 27 ---------------------------
 3 files changed, 27 insertions(+), 27 deletions(-)

(limited to 'include/linux')

diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h
index 0c4c88c008ce..d89c6b828c96 100644
--- a/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@ -664,6 +664,16 @@ struct msr_data {
 	u64 data;
 };
 
+struct kvm_lapic_irq {
+	u32 vector;
+	u32 delivery_mode;
+	u32 dest_mode;
+	u32 level;
+	u32 trig_mode;
+	u32 shorthand;
+	u32 dest_id;
+};
+
 struct kvm_x86_ops {
 	int (*cpu_has_kvm_support)(void);          /* __init */
 	int (*disabled_by_bios)(void);             /* __init */
diff --git a/arch/x86/kvm/ioapic.h b/arch/x86/kvm/ioapic.h
index deac8d509f2a..3c9195535ffc 100644
--- a/arch/x86/kvm/ioapic.h
+++ b/arch/x86/kvm/ioapic.h
@@ -44,6 +44,23 @@ struct rtc_status {
 	DECLARE_BITMAP(dest_map, KVM_MAX_VCPUS);
 };
 
+union kvm_ioapic_redirect_entry {
+	u64 bits;
+	struct {
+		u8 vector;
+		u8 delivery_mode:3;
+		u8 dest_mode:1;
+		u8 delivery_status:1;
+		u8 polarity:1;
+		u8 remote_irr:1;
+		u8 trig_mode:1;
+		u8 mask:1;
+		u8 reserve:7;
+		u8 reserved[4];
+		u8 dest_id;
+	} fields;
+};
+
 struct kvm_ioapic {
 	u64 base_address;
 	u32 ioregsel;
diff --git a/include/linux/kvm_types.h b/include/linux/kvm_types.h
index b606bb689a3e..931da7e917cf 100644
--- a/include/linux/kvm_types.h
+++ b/include/linux/kvm_types.h
@@ -54,33 +54,6 @@ typedef u64            hfn_t;
 
 typedef hfn_t pfn_t;
 
-union kvm_ioapic_redirect_entry {
-	u64 bits;
-	struct {
-		u8 vector;
-		u8 delivery_mode:3;
-		u8 dest_mode:1;
-		u8 delivery_status:1;
-		u8 polarity:1;
-		u8 remote_irr:1;
-		u8 trig_mode:1;
-		u8 mask:1;
-		u8 reserve:7;
-		u8 reserved[4];
-		u8 dest_id;
-	} fields;
-};
-
-struct kvm_lapic_irq {
-	u32 vector;
-	u32 delivery_mode;
-	u32 dest_mode;
-	u32 level;
-	u32 trig_mode;
-	u32 shorthand;
-	u32 dest_id;
-};
-
 struct gfn_to_hva_cache {
 	u64 generation;
 	gpa_t gpa;
-- 
cgit v1.2.3


From 230fa253df6352af12ad0a16128760b5cb3f92df Mon Sep 17 00:00:00 2001
From: Christian Borntraeger <borntraeger@de.ibm.com>
Date: Tue, 25 Nov 2014 10:01:16 +0100
Subject: kernel: Provide READ_ONCE and ASSIGN_ONCE

ACCESS_ONCE does not work reliably on non-scalar types. For
example gcc 4.6 and 4.7 might remove the volatile tag for such
accesses during the SRA (scalar replacement of aggregates) step
https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145)

Let's provide READ_ONCE/ASSIGN_ONCE that will do all accesses via
scalar types as suggested by Linus Torvalds. Accesses larger than
the machines word size cannot be guaranteed to be atomic. These
macros will use memcpy and emit a build warning.

Signed-off-by: Christian Borntraeger <borntraeger@de.ibm.com>
---
 include/linux/compiler.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 74 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/compiler.h b/include/linux/compiler.h
index d5ad7b1118fc..a1c81f80978e 100644
--- a/include/linux/compiler.h
+++ b/include/linux/compiler.h
@@ -186,6 +186,80 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect);
 # define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __LINE__)
 #endif
 
+#include <uapi/linux/types.h>
+
+static __always_inline void data_access_exceeds_word_size(void)
+#ifdef __compiletime_warning
+__compiletime_warning("data access exceeds word size and won't be atomic")
+#endif
+;
+
+static __always_inline void data_access_exceeds_word_size(void)
+{
+}
+
+static __always_inline void __read_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(__u8 *)res = *(volatile __u8 *)p; break;
+	case 2: *(__u16 *)res = *(volatile __u16 *)p; break;
+	case 4: *(__u32 *)res = *(volatile __u32 *)p; break;
+#ifdef CONFIG_64BIT
+	case 8: *(__u64 *)res = *(volatile __u64 *)p; break;
+#endif
+	default:
+		barrier();
+		__builtin_memcpy((void *)res, (const void *)p, size);
+		data_access_exceeds_word_size();
+		barrier();
+	}
+}
+
+static __always_inline void __assign_once_size(volatile void *p, void *res, int size)
+{
+	switch (size) {
+	case 1: *(volatile __u8 *)p = *(__u8 *)res; break;
+	case 2: *(volatile __u16 *)p = *(__u16 *)res; break;
+	case 4: *(volatile __u32 *)p = *(__u32 *)res; break;
+#ifdef CONFIG_64BIT
+	case 8: *(volatile __u64 *)p = *(__u64 *)res; break;
+#endif
+	default:
+		barrier();
+		__builtin_memcpy((void *)p, (const void *)res, size);
+		data_access_exceeds_word_size();
+		barrier();
+	}
+}
+
+/*
+ * Prevent the compiler from merging or refetching reads or writes. The
+ * compiler is also forbidden from reordering successive instances of
+ * READ_ONCE, ASSIGN_ONCE and ACCESS_ONCE (see below), but only when the
+ * compiler is aware of some particular ordering.  One way to make the
+ * compiler aware of ordering is to put the two invocations of READ_ONCE,
+ * ASSIGN_ONCE or ACCESS_ONCE() in different C statements.
+ *
+ * In contrast to ACCESS_ONCE these two macros will also work on aggregate
+ * data types like structs or unions. If the size of the accessed data
+ * type exceeds the word size of the machine (e.g., 32 bits or 64 bits)
+ * READ_ONCE() and ASSIGN_ONCE()  will fall back to memcpy and print a
+ * compile-time warning.
+ *
+ * Their two major use cases are: (1) Mediating communication between
+ * process-level code and irq/NMI handlers, all running on the same CPU,
+ * and (2) Ensuring that the compiler does not  fold, spindle, or otherwise
+ * mutilate accesses that either do not require ordering or that interact
+ * with an explicit memory barrier or atomic instruction that provides the
+ * required ordering.
+ */
+
+#define READ_ONCE(x) \
+	({ typeof(x) __val; __read_once_size(&x, &__val, sizeof(__val)); __val; })
+
+#define ASSIGN_ONCE(val, x) \
+	({ typeof(x) __val; __val = val; __assign_once_size(&x, &__val, sizeof(__val)); __val; })
+
 #endif /* __KERNEL__ */
 
 #endif /* __ASSEMBLY__ */
-- 
cgit v1.2.3


From e48322abb061d75096fe52d71886b237e7ae7bfb Mon Sep 17 00:00:00 2001
From: Pintu Kumar <pintu.k@samsung.com>
Date: Thu, 18 Dec 2014 16:17:15 -0800
Subject: mm: cma: split cma-reserved in dmesg log

When the system boots up, in the dmesg logs we can see the memory
statistics along with total reserved as below.  Memory: 458840k/458840k
available, 65448k reserved, 0K highmem

When CMA is enabled, still the total reserved memory remains the same.
However, the CMA memory is not considered as reserved.  But, when we see
/proc/meminfo, the CMA memory is part of free memory.  This creates
confusion.  This patch corrects the problem by properly subtracting the
CMA reserved memory from the total reserved memory in dmesg logs.

Below is the dmesg snapshot from an arm based device with 512MB RAM and
12MB single CMA region.

Before this change:
  Memory: 458840k/458840k available, 65448k reserved, 0K highmem

After this change:
  Memory: 458840k/458840k available, 53160k reserved, 12288k cma-reserved, 0K highmem

Signed-off-by: Pintu Kumar <pintu.k@samsung.com>
Signed-off-by: Vishnu Pratap Singh <vishnu.ps@samsung.com>
Acked-by: Michal Nazarewicz <mina86@mina86.com>
Cc: Rafael Aquini <aquini@redhat.com>
Cc: Jerome Marchand <jmarchan@redhat.com>
Cc: Marek Szyprowski <m.szyprowski@samsung.com>
Cc: Joonsoo Kim <iamjoonsoo.kim@lge.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/cma.h | 1 +
 mm/cma.c            | 1 +
 mm/page_alloc.c     | 6 ++++--
 3 files changed, 6 insertions(+), 2 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/cma.h b/include/linux/cma.h
index a93438beb33c..9384ba66e975 100644
--- a/include/linux/cma.h
+++ b/include/linux/cma.h
@@ -15,6 +15,7 @@
 
 struct cma;
 
+extern unsigned long totalcma_pages;
 extern phys_addr_t cma_get_base(struct cma *cma);
 extern unsigned long cma_get_size(struct cma *cma);
 
diff --git a/mm/cma.c b/mm/cma.c
index f8917629cbdd..a85ae28709a3 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -337,6 +337,7 @@ int __init cma_declare_contiguous(phys_addr_t base,
 	if (ret)
 		goto err;
 
+	totalcma_pages += (size / PAGE_SIZE);
 	pr_info("Reserved %ld MiB at %pa\n", (unsigned long)size / SZ_1M,
 		&base);
 	return 0;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index fa974d87f60d..7633c503a116 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -111,6 +111,7 @@ static DEFINE_SPINLOCK(managed_page_count_lock);
 
 unsigned long totalram_pages __read_mostly;
 unsigned long totalreserve_pages __read_mostly;
+unsigned long totalcma_pages __read_mostly;
 /*
  * When calculating the number of globally allowed dirty pages, there
  * is a certain number of per-zone reserves that should not be
@@ -5586,7 +5587,7 @@ void __init mem_init_print_info(const char *str)
 
 	pr_info("Memory: %luK/%luK available "
 	       "(%luK kernel code, %luK rwdata, %luK rodata, "
-	       "%luK init, %luK bss, %luK reserved"
+	       "%luK init, %luK bss, %luK reserved, %luK cma-reserved"
 #ifdef	CONFIG_HIGHMEM
 	       ", %luK highmem"
 #endif
@@ -5594,7 +5595,8 @@ void __init mem_init_print_info(const char *str)
 	       nr_free_pages() << (PAGE_SHIFT-10), physpages << (PAGE_SHIFT-10),
 	       codesize >> 10, datasize >> 10, rosize >> 10,
 	       (init_data_size + init_code_size) >> 10, bss_size >> 10,
-	       (physpages - totalram_pages) << (PAGE_SHIFT-10),
+	       (physpages - totalram_pages - totalcma_pages) << (PAGE_SHIFT-10),
+	       totalcma_pages << (PAGE_SHIFT-10),
 #ifdef	CONFIG_HIGHMEM
 	       totalhigh_pages << (PAGE_SHIFT-10),
 #endif
-- 
cgit v1.2.3


From 464ed18ebdb6236fcff59d2a35d4d2e28668435a Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Fri, 19 Dec 2014 15:37:54 +0100
Subject: PM: Eliminate CONFIG_PM_RUNTIME

Having switched over all of the users of CONFIG_PM_RUNTIME to use
CONFIG_PM directly, turn the latter into a user-selectable option
and drop the former entirely from the tree.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Reviewed-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Kevin Hilman <khilman@linaro.org>
---
 arch/arm/configs/ape6evm_defconfig         |  2 +-
 arch/arm/configs/armadillo800eva_defconfig |  2 +-
 arch/arm/configs/bcm_defconfig             |  2 +-
 arch/arm/configs/bockw_defconfig           |  2 +-
 arch/arm/configs/davinci_all_defconfig     |  2 +-
 arch/arm/configs/exynos_defconfig          |  2 +-
 arch/arm/configs/ezx_defconfig             |  1 -
 arch/arm/configs/hisi_defconfig            |  2 +-
 arch/arm/configs/imote2_defconfig          |  1 -
 arch/arm/configs/imx_v6_v7_defconfig       |  2 +-
 arch/arm/configs/keystone_defconfig        |  2 +-
 arch/arm/configs/kzm9g_defconfig           |  2 +-
 arch/arm/configs/lager_defconfig           |  2 +-
 arch/arm/configs/mackerel_defconfig        |  1 -
 arch/arm/configs/marzen_defconfig          |  2 +-
 arch/arm/configs/omap1_defconfig           |  1 -
 arch/arm/configs/prima2_defconfig          |  2 +-
 arch/arm/configs/sama5_defconfig           |  2 +-
 arch/arm/configs/shmobile_defconfig        |  2 +-
 arch/arm/configs/sunxi_defconfig           |  2 +-
 arch/arm/configs/tegra_defconfig           |  2 +-
 arch/arm/configs/u8500_defconfig           |  2 +-
 arch/arm/configs/vt8500_v6_v7_defconfig    |  2 +-
 arch/arm/mach-omap2/Kconfig                |  6 +++---
 arch/mips/configs/db1xxx_defconfig         |  2 +-
 arch/mips/configs/lemote2f_defconfig       |  1 -
 arch/mips/configs/loongson3_defconfig      |  2 +-
 arch/mips/configs/nlm_xlp_defconfig        |  2 +-
 arch/mips/configs/nlm_xlr_defconfig        |  2 +-
 arch/powerpc/configs/ps3_defconfig         |  2 +-
 arch/sh/Kconfig                            |  2 +-
 arch/sh/configs/apsh4ad0a_defconfig        |  2 +-
 arch/sh/configs/sdk7786_defconfig          |  2 +-
 drivers/usb/host/isp1760-hcd.c             |  2 +-
 drivers/usb/host/oxu210hp-hcd.c            |  2 +-
 include/linux/devfreq.h                    |  2 +-
 kernel/power/Kconfig                       | 16 ++++++----------
 37 files changed, 39 insertions(+), 48 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/configs/ape6evm_defconfig b/arch/arm/configs/ape6evm_defconfig
index db81d8ce4c03..9e9a72e3d30f 100644
--- a/arch/arm/configs/ape6evm_defconfig
+++ b/arch/arm/configs/ape6evm_defconfig
@@ -33,7 +33,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_BINFMT_MISC=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/armadillo800eva_defconfig b/arch/arm/configs/armadillo800eva_defconfig
index d9675c68a399..5666e3700a82 100644
--- a/arch/arm/configs/armadillo800eva_defconfig
+++ b/arch/arm/configs/armadillo800eva_defconfig
@@ -43,7 +43,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/bcm_defconfig b/arch/arm/configs/bcm_defconfig
index 83a87e48901c..7117662bab2e 100644
--- a/arch/arm/configs/bcm_defconfig
+++ b/arch/arm/configs/bcm_defconfig
@@ -39,7 +39,7 @@ CONFIG_CPU_IDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
diff --git a/arch/arm/configs/bockw_defconfig b/arch/arm/configs/bockw_defconfig
index 1dde5daa84f9..3125e00f05ab 100644
--- a/arch/arm/configs/bockw_defconfig
+++ b/arch/arm/configs/bockw_defconfig
@@ -29,7 +29,7 @@ CONFIG_ZBOOT_ROM_BSS=0x0
 CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/davinci_all_defconfig b/arch/arm/configs/davinci_all_defconfig
index 759f9b0053e2..235842c9ba96 100644
--- a/arch/arm/configs/davinci_all_defconfig
+++ b/arch/arm/configs/davinci_all_defconfig
@@ -49,7 +49,7 @@ CONFIG_CPU_FREQ_GOV_PERFORMANCE=m
 CONFIG_CPU_FREQ_GOV_POWERSAVE=m
 CONFIG_CPU_FREQ_GOV_ONDEMAND=m
 CONFIG_CPU_IDLE=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/exynos_defconfig b/arch/arm/configs/exynos_defconfig
index c41990729024..5ef14de00a29 100644
--- a/arch/arm/configs/exynos_defconfig
+++ b/arch/arm/configs/exynos_defconfig
@@ -27,7 +27,7 @@ CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_CMDLINE="root=/dev/ram0 rw ramdisk=8192 initrd=0x41000000,8M console=ttySAC1,115200 init=/linuxrc mem=256M"
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/ezx_defconfig b/arch/arm/configs/ezx_defconfig
index eb440aae4283..ea316c4b890e 100644
--- a/arch/arm/configs/ezx_defconfig
+++ b/arch/arm/configs/ezx_defconfig
@@ -39,7 +39,6 @@ CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 CONFIG_PM=y
 CONFIG_APM_EMULATION=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/hisi_defconfig b/arch/arm/configs/hisi_defconfig
index 1fe3621faf65..112543665dd7 100644
--- a/arch/arm/configs/hisi_defconfig
+++ b/arch/arm/configs/hisi_defconfig
@@ -18,7 +18,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_NEON=y
 CONFIG_ARM_ATAG_DTB_COMPAT_CMDLINE_FROM_BOOTLOADER=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/imote2_defconfig b/arch/arm/configs/imote2_defconfig
index 182e54692664..18e59feaa307 100644
--- a/arch/arm/configs/imote2_defconfig
+++ b/arch/arm/configs/imote2_defconfig
@@ -31,7 +31,6 @@ CONFIG_BINFMT_AOUT=m
 CONFIG_BINFMT_MISC=m
 CONFIG_PM=y
 CONFIG_APM_EMULATION=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/imx_v6_v7_defconfig b/arch/arm/configs/imx_v6_v7_defconfig
index f707cd2691cf..7c2075a07eba 100644
--- a/arch/arm/configs/imx_v6_v7_defconfig
+++ b/arch/arm/configs/imx_v6_v7_defconfig
@@ -54,7 +54,7 @@ CONFIG_ARM_IMX6Q_CPUFREQ=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_BINFMT_MISC=m
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_TEST_SUSPEND=y
 CONFIG_NET=y
diff --git a/arch/arm/configs/keystone_defconfig b/arch/arm/configs/keystone_defconfig
index 20a3ff99fae2..a2067cbfe173 100644
--- a/arch/arm/configs/keystone_defconfig
+++ b/arch/arm/configs/keystone_defconfig
@@ -30,7 +30,7 @@ CONFIG_HIGHMEM=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/kzm9g_defconfig b/arch/arm/configs/kzm9g_defconfig
index 8cb115d74fdf..5d63fc5d2d48 100644
--- a/arch/arm/configs/kzm9g_defconfig
+++ b/arch/arm/configs/kzm9g_defconfig
@@ -43,7 +43,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/lager_defconfig b/arch/arm/configs/lager_defconfig
index 929c571ea29b..a82afc916a89 100644
--- a/arch/arm/configs/lager_defconfig
+++ b/arch/arm/configs/lager_defconfig
@@ -37,7 +37,7 @@ CONFIG_AUTO_ZRELADDR=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/mackerel_defconfig b/arch/arm/configs/mackerel_defconfig
index 57ececba2ae6..05a529311b4d 100644
--- a/arch/arm/configs/mackerel_defconfig
+++ b/arch/arm/configs/mackerel_defconfig
@@ -28,7 +28,6 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
 CONFIG_PM=y
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/marzen_defconfig b/arch/arm/configs/marzen_defconfig
index ff91630d34e1..3c8b6d823189 100644
--- a/arch/arm/configs/marzen_defconfig
+++ b/arch/arm/configs/marzen_defconfig
@@ -33,7 +33,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_VFP=y
 CONFIG_KEXEC=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/omap1_defconfig b/arch/arm/configs/omap1_defconfig
index 115cda9f3260..a7dce674f1be 100644
--- a/arch/arm/configs/omap1_defconfig
+++ b/arch/arm/configs/omap1_defconfig
@@ -63,7 +63,6 @@ CONFIG_FPE_NWFPE=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 # CONFIG_SUSPEND is not set
-CONFIG_PM_RUNTIME=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/prima2_defconfig b/arch/arm/configs/prima2_defconfig
index 23591dba47a0..f610230b9c1f 100644
--- a/arch/arm/configs/prima2_defconfig
+++ b/arch/arm/configs/prima2_defconfig
@@ -18,7 +18,7 @@ CONFIG_PREEMPT=y
 CONFIG_AEABI=y
 CONFIG_KEXEC=y
 CONFIG_BINFMT_MISC=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_UEVENT_HELPER_PATH="/sbin/hotplug"
 CONFIG_BLK_DEV_LOOP=y
 CONFIG_BLK_DEV_RAM=y
diff --git a/arch/arm/configs/sama5_defconfig b/arch/arm/configs/sama5_defconfig
index b58fb32770a0..afa24799477a 100644
--- a/arch/arm/configs/sama5_defconfig
+++ b/arch/arm/configs/sama5_defconfig
@@ -32,7 +32,7 @@ CONFIG_VFP=y
 CONFIG_NEON=y
 CONFIG_KERNEL_MODE_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_PM_ADVANCED_DEBUG=y
 CONFIG_NET=y
diff --git a/arch/arm/configs/shmobile_defconfig b/arch/arm/configs/shmobile_defconfig
index df2c0f514b0a..3df6ca0c1d1f 100644
--- a/arch/arm/configs/shmobile_defconfig
+++ b/arch/arm/configs/shmobile_defconfig
@@ -39,7 +39,7 @@ CONFIG_KEXEC=y
 CONFIG_VFP=y
 CONFIG_NEON=y
 # CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is not set
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/sunxi_defconfig b/arch/arm/configs/sunxi_defconfig
index f7ac0379850f..7a342d2780a8 100644
--- a/arch/arm/configs/sunxi_defconfig
+++ b/arch/arm/configs/sunxi_defconfig
@@ -11,7 +11,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/tegra_defconfig b/arch/arm/configs/tegra_defconfig
index 40750f93aa83..3ea9c3377ccb 100644
--- a/arch/arm/configs/tegra_defconfig
+++ b/arch/arm/configs/tegra_defconfig
@@ -46,7 +46,7 @@ CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND=y
 CONFIG_CPU_IDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/u8500_defconfig b/arch/arm/configs/u8500_defconfig
index d219d6a43238..6a1c9898fd03 100644
--- a/arch/arm/configs/u8500_defconfig
+++ b/arch/arm/configs/u8500_defconfig
@@ -25,7 +25,7 @@ CONFIG_CPU_IDLE=y
 CONFIG_ARM_U8500_CPUIDLE=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/arm/configs/vt8500_v6_v7_defconfig b/arch/arm/configs/vt8500_v6_v7_defconfig
index 9e7a25639690..1bfaa7bfc392 100644
--- a/arch/arm/configs/vt8500_v6_v7_defconfig
+++ b/arch/arm/configs/vt8500_v6_v7_defconfig
@@ -16,7 +16,7 @@ CONFIG_ARM_APPENDED_DTB=y
 CONFIG_ARM_ATAG_DTB_COMPAT=y
 CONFIG_VFP=y
 CONFIG_NEON=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_UNIX=y
 CONFIG_INET=y
diff --git a/arch/arm/mach-omap2/Kconfig b/arch/arm/mach-omap2/Kconfig
index f0edec199cd4..6ab656cc4f16 100644
--- a/arch/arm/mach-omap2/Kconfig
+++ b/arch/arm/mach-omap2/Kconfig
@@ -15,7 +15,7 @@ config ARCH_OMAP3
 	select ARM_CPU_SUSPEND if PM
 	select OMAP_INTERCONNECT
 	select PM_OPP if PM
-	select PM_RUNTIME if CPU_IDLE
+	select PM if CPU_IDLE
 	select SOC_HAS_OMAP2_SDRC
 
 config ARCH_OMAP4
@@ -32,7 +32,7 @@ config ARCH_OMAP4
 	select PL310_ERRATA_588369 if CACHE_L2X0
 	select PL310_ERRATA_727915 if CACHE_L2X0
 	select PM_OPP if PM
-	select PM_RUNTIME if CPU_IDLE
+	select PM if CPU_IDLE
 	select ARM_ERRATA_754322
 	select ARM_ERRATA_775420
 
@@ -103,7 +103,7 @@ config ARCH_OMAP2PLUS_TYPICAL
 	select I2C_OMAP
 	select MENELAUS if ARCH_OMAP2
 	select NEON if CPU_V7
-	select PM_RUNTIME
+	select PM
 	select REGULATOR
 	select TWL4030_CORE if ARCH_OMAP3 || ARCH_OMAP4
 	select TWL4030_POWER if ARCH_OMAP3 || ARCH_OMAP4
diff --git a/arch/mips/configs/db1xxx_defconfig b/arch/mips/configs/db1xxx_defconfig
index 46e8f7676a15..3bdb72a70364 100644
--- a/arch/mips/configs/db1xxx_defconfig
+++ b/arch/mips/configs/db1xxx_defconfig
@@ -36,7 +36,7 @@ CONFIG_PCI=y
 CONFIG_PCI_REALLOC_ENABLE_AUTO=y
 CONFIG_PCCARD=y
 CONFIG_PCMCIA_ALCHEMY_DEVBOARD=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_PACKET_DIAG=y
diff --git a/arch/mips/configs/lemote2f_defconfig b/arch/mips/configs/lemote2f_defconfig
index 227a9de32246..e51aad9a94b1 100644
--- a/arch/mips/configs/lemote2f_defconfig
+++ b/arch/mips/configs/lemote2f_defconfig
@@ -37,7 +37,6 @@ CONFIG_MIPS32_N32=y
 CONFIG_PM=y
 CONFIG_HIBERNATION=y
 CONFIG_PM_STD_PARTITION="/dev/hda3"
-CONFIG_PM_RUNTIME=y
 CONFIG_CPU_FREQ=y
 CONFIG_CPU_FREQ_DEBUG=y
 CONFIG_CPU_FREQ_STAT=m
diff --git a/arch/mips/configs/loongson3_defconfig b/arch/mips/configs/loongson3_defconfig
index 1c6191ebd583..7eabcd2031ea 100644
--- a/arch/mips/configs/loongson3_defconfig
+++ b/arch/mips/configs/loongson3_defconfig
@@ -58,7 +58,7 @@ CONFIG_BINFMT_MISC=m
 CONFIG_MIPS32_COMPAT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_NET=y
 CONFIG_PACKET=y
 CONFIG_UNIX=y
diff --git a/arch/mips/configs/nlm_xlp_defconfig b/arch/mips/configs/nlm_xlp_defconfig
index 70509a48df82..b3d1d37f85ea 100644
--- a/arch/mips/configs/nlm_xlp_defconfig
+++ b/arch/mips/configs/nlm_xlp_defconfig
@@ -61,7 +61,7 @@ CONFIG_BINFMT_MISC=y
 CONFIG_MIPS32_COMPAT=y
 CONFIG_MIPS32_O32=y
 CONFIG_MIPS32_N32=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/mips/configs/nlm_xlr_defconfig b/arch/mips/configs/nlm_xlr_defconfig
index 82207e8079f3..3d8016d6cf3e 100644
--- a/arch/mips/configs/nlm_xlr_defconfig
+++ b/arch/mips/configs/nlm_xlr_defconfig
@@ -41,7 +41,7 @@ CONFIG_PCI=y
 CONFIG_PCI_MSI=y
 CONFIG_PCI_DEBUG=y
 CONFIG_BINFMT_MISC=m
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/powerpc/configs/ps3_defconfig b/arch/powerpc/configs/ps3_defconfig
index 2e637c881d2b..879de5efb073 100644
--- a/arch/powerpc/configs/ps3_defconfig
+++ b/arch/powerpc/configs/ps3_defconfig
@@ -36,7 +36,7 @@ CONFIG_KEXEC=y
 CONFIG_SCHED_SMT=y
 CONFIG_CMDLINE_BOOL=y
 CONFIG_CMDLINE=""
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_PM_DEBUG=y
 # CONFIG_SECCOMP is not set
 # CONFIG_PCI is not set
diff --git a/arch/sh/Kconfig b/arch/sh/Kconfig
index c6b6ee5f38b2..0f09f5285d5e 100644
--- a/arch/sh/Kconfig
+++ b/arch/sh/Kconfig
@@ -223,7 +223,7 @@ config CPU_SHX3
 config ARCH_SHMOBILE
 	bool
 	select ARCH_SUSPEND_POSSIBLE
-	select PM_RUNTIME
+	select PM
 
 config CPU_HAS_PMU
        depends on CPU_SH4 || CPU_SH4A
diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig
index ec70475da890..a8d975793b6d 100644
--- a/arch/sh/configs/apsh4ad0a_defconfig
+++ b/arch/sh/configs/apsh4ad0a_defconfig
@@ -47,7 +47,7 @@ CONFIG_PREEMPT=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig
index 76a76a295d74..e7e56a4131b4 100644
--- a/arch/sh/configs/sdk7786_defconfig
+++ b/arch/sh/configs/sdk7786_defconfig
@@ -82,7 +82,7 @@ CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS=y
 CONFIG_BINFMT_MISC=y
 CONFIG_PM=y
 CONFIG_PM_DEBUG=y
-CONFIG_PM_RUNTIME=y
+CONFIG_PM=y
 CONFIG_CPU_IDLE=y
 CONFIG_NET=y
 CONFIG_PACKET=y
diff --git a/drivers/usb/host/isp1760-hcd.c b/drivers/usb/host/isp1760-hcd.c
index e752c3098f38..395649f357aa 100644
--- a/drivers/usb/host/isp1760-hcd.c
+++ b/drivers/usb/host/isp1760-hcd.c
@@ -1739,7 +1739,7 @@ static int isp1760_hub_status_data(struct usb_hcd *hcd, char *buf)
 	int retval = 1;
 	unsigned long flags;
 
-	/* if !PM_RUNTIME, root hub timers won't get shut down ... */
+	/* if !PM, root hub timers won't get shut down ... */
 	if (!HC_IS_RUNNING(hcd->state))
 		return 0;
 
diff --git a/drivers/usb/host/oxu210hp-hcd.c b/drivers/usb/host/oxu210hp-hcd.c
index 75811dd5a9d7..036924e640f5 100644
--- a/drivers/usb/host/oxu210hp-hcd.c
+++ b/drivers/usb/host/oxu210hp-hcd.c
@@ -3087,7 +3087,7 @@ static int oxu_hub_status_data(struct usb_hcd *hcd, char *buf)
 	int ports, i, retval = 1;
 	unsigned long flags;
 
-	/* if !PM_RUNTIME, root hub timers won't get shut down ... */
+	/* if !PM, root hub timers won't get shut down ... */
 	if (!HC_IS_RUNNING(hcd->state))
 		return 0;
 
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index f1863dcd83ea..ce447f0f1bad 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -188,7 +188,7 @@ extern struct devfreq *devm_devfreq_add_device(struct device *dev,
 extern void devm_devfreq_remove_device(struct device *dev,
 				  struct devfreq *devfreq);
 
-/* Supposed to be called by PM_SLEEP/PM_RUNTIME callbacks */
+/* Supposed to be called by PM callbacks */
 extern int devfreq_suspend_device(struct devfreq *devfreq);
 extern int devfreq_resume_device(struct devfreq *devfreq);
 
diff --git a/kernel/power/Kconfig b/kernel/power/Kconfig
index 6e7708c2c21f..48b28d387c7f 100644
--- a/kernel/power/Kconfig
+++ b/kernel/power/Kconfig
@@ -94,7 +94,7 @@ config PM_STD_PARTITION
 config PM_SLEEP
 	def_bool y
 	depends on SUSPEND || HIBERNATE_CALLBACKS
-	select PM_RUNTIME
+	select PM
 
 config PM_SLEEP_SMP
 	def_bool y
@@ -130,23 +130,19 @@ config PM_WAKELOCKS_GC
 	depends on PM_WAKELOCKS
 	default y
 
-config PM_RUNTIME
-	bool "Run-time PM core functionality"
+config PM
+	bool "Device power management core functionality"
 	---help---
 	  Enable functionality allowing I/O devices to be put into energy-saving
-	  (low power) states at run time (or autosuspended) after a specified
-	  period of inactivity and woken up in response to a hardware-generated
+	  (low power) states, for example after a specified period of inactivity
+	  (autosuspended), and woken up in response to a hardware-generated
 	  wake-up event or a driver's request.
 
 	  Hardware support is generally required for this functionality to work
 	  and the bus type drivers of the buses the devices are on are
-	  responsible for the actual handling of the autosuspend requests and
+	  responsible for the actual handling of device suspend requests and
 	  wake-up events.
 
-config PM
-	def_bool y
-	depends on PM_SLEEP || PM_RUNTIME
-
 config PM_DEBUG
 	bool "Power Management Debug Support"
 	depends on PM
-- 
cgit v1.2.3


From 041d7b98ffe59c59fdd639931dea7d74f9aa9a59 Mon Sep 17 00:00:00 2001
From: Richard Guy Briggs <rgb@redhat.com>
Date: Tue, 23 Dec 2014 13:02:04 -0500
Subject: audit: restore AUDIT_LOGINUID unset ABI

A regression was caused by commit 780a7654cee8:
	 audit: Make testing for a valid loginuid explicit.
(which in turn attempted to fix a regression caused by e1760bd)

When audit_krule_to_data() fills in the rules to get a listing, there was a
missing clause to convert back from AUDIT_LOGINUID_SET to AUDIT_LOGINUID.

This broke userspace by not returning the same information that was sent and
expected.

The rule:
	auditctl -a exit,never -F auid=-1
gives:
	auditctl -l
		LIST_RULES: exit,never f24=0 syscall=all
when it should give:
		LIST_RULES: exit,never auid=-1 (0xffffffff) syscall=all

Tag it so that it is reported the same way it was set.  Create a new
private flags audit_krule field (pflags) to store it that won't interact with
the public one from the API.

Cc: stable@vger.kernel.org # v3.10-rc1+
Signed-off-by: Richard Guy Briggs <rgb@redhat.com>
Signed-off-by: Paul Moore <pmoore@redhat.com>
---
 include/linux/audit.h |  4 ++++
 kernel/auditfilter.c  | 10 ++++++++++
 2 files changed, 14 insertions(+)

(limited to 'include/linux')

diff --git a/include/linux/audit.h b/include/linux/audit.h
index 36dffeccebdb..93331929d643 100644
--- a/include/linux/audit.h
+++ b/include/linux/audit.h
@@ -47,6 +47,7 @@ struct sk_buff;
 
 struct audit_krule {
 	int			vers_ops;
+	u32			pflags;
 	u32			flags;
 	u32			listnr;
 	u32			action;
@@ -64,6 +65,9 @@ struct audit_krule {
 	u64			prio;
 };
 
+/* Flag to indicate legacy AUDIT_LOGINUID unset usage */
+#define AUDIT_LOGINUID_LEGACY		0x1
+
 struct audit_field {
 	u32				type;
 	union {
diff --git a/kernel/auditfilter.c b/kernel/auditfilter.c
index c0d148bd7a5c..103586e239a2 100644
--- a/kernel/auditfilter.c
+++ b/kernel/auditfilter.c
@@ -442,6 +442,7 @@ static struct audit_entry *audit_data_to_entry(struct audit_rule_data *data,
 		if ((f->type == AUDIT_LOGINUID) && (f->val == AUDIT_UID_UNSET)) {
 			f->type = AUDIT_LOGINUID_SET;
 			f->val = 0;
+			entry->rule.pflags |= AUDIT_LOGINUID_LEGACY;
 		}
 
 		err = audit_field_valid(entry, f);
@@ -617,6 +618,13 @@ static struct audit_rule_data *audit_krule_to_data(struct audit_krule *krule)
 			data->buflen += data->values[i] =
 				audit_pack_string(&bufp, krule->filterkey);
 			break;
+		case AUDIT_LOGINUID_SET:
+			if (krule->pflags & AUDIT_LOGINUID_LEGACY && !f->val) {
+				data->fields[i] = AUDIT_LOGINUID;
+				data->values[i] = AUDIT_UID_UNSET;
+				break;
+			}
+			/* fallthrough if set */
 		default:
 			data->values[i] = f->val;
 		}
@@ -633,6 +641,7 @@ static int audit_compare_rule(struct audit_krule *a, struct audit_krule *b)
 	int i;
 
 	if (a->flags != b->flags ||
+	    a->pflags != b->pflags ||
 	    a->listnr != b->listnr ||
 	    a->action != b->action ||
 	    a->field_count != b->field_count)
@@ -751,6 +760,7 @@ struct audit_entry *audit_dupe_rule(struct audit_krule *old)
 	new = &entry->rule;
 	new->vers_ops = old->vers_ops;
 	new->flags = old->flags;
+	new->pflags = old->pflags;
 	new->listnr = old->listnr;
 	new->action = old->action;
 	for (i = 0; i < AUDIT_BITMASK_SIZE; i++)
-- 
cgit v1.2.3


From 5f35227ea34bb616c436d9da47fc325866c428f3 Mon Sep 17 00:00:00 2001
From: Jesse Gross <jesse@nicira.com>
Date: Tue, 23 Dec 2014 22:37:26 -0800
Subject: net: Generalize ndo_gso_check to ndo_features_check

GSO isn't the only offload feature with restrictions that
potentially can't be expressed with the current features mechanism.
Checksum is another although it's a general issue that could in
theory apply to anything. Even if it may be possible to
implement these restrictions in other ways, it can result in
duplicate code or inefficient per-packet behavior.

This generalizes ndo_gso_check so that drivers can remove any
features that don't make sense for a given packet, similar to
netif_skb_features(). It also converts existing driver
restrictions to the new format, completing the work that was
done to support tunnel protocols since the issues apply to
checksums as well.

By actually removing features from the set that are used to do
offloading, it solves another problem with the existing
interface. In these cases, GSO would run with the original set
of features and not do anything because it appears that
segmentation is not required.

CC: Tom Herbert <therbert@google.com>
CC: Joe Stringer <joestringer@nicira.com>
CC: Eric Dumazet <edumazet@google.com>
CC: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: Jesse Gross <jesse@nicira.com>
Acked-by:  Tom Herbert <therbert@google.com>
Fixes: 04ffcb255f22 ("net: Add ndo_gso_check")
Tested-by: Hayes Wang <hayeswang@realtek.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c |  8 ++++---
 drivers/net/ethernet/emulex/benet/be_main.c      |  8 ++++---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c   | 10 +++++----
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c |  8 ++++---
 include/linux/netdevice.h                        | 20 +++++++++--------
 include/net/vxlan.h                              | 28 ++++++++++++++++++++----
 net/core/dev.c                                   | 23 +++++++++++--------
 7 files changed, 70 insertions(+), 35 deletions(-)

(limited to 'include/linux')

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
index 9f5e38769a29..72eef9fc883e 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_main.c
@@ -12553,9 +12553,11 @@ static int bnx2x_get_phys_port_id(struct net_device *netdev,
 	return 0;
 }
 
-static bool bnx2x_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t bnx2x_features_check(struct sk_buff *skb,
+					      struct net_device *dev,
+					      netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 
 static const struct net_device_ops bnx2x_netdev_ops = {
@@ -12589,7 +12591,7 @@ static const struct net_device_ops bnx2x_netdev_ops = {
 #endif
 	.ndo_get_phys_port_id	= bnx2x_get_phys_port_id,
 	.ndo_set_vf_link_state	= bnx2x_set_vf_link_state,
-	.ndo_gso_check		= bnx2x_gso_check,
+	.ndo_features_check	= bnx2x_features_check,
 };
 
 static int bnx2x_set_coherency_mask(struct bnx2x *bp)
diff --git a/drivers/net/ethernet/emulex/benet/be_main.c b/drivers/net/ethernet/emulex/benet/be_main.c
index 196073110e32..41a0a5498da7 100644
--- a/drivers/net/ethernet/emulex/benet/be_main.c
+++ b/drivers/net/ethernet/emulex/benet/be_main.c
@@ -4459,9 +4459,11 @@ done:
 	adapter->vxlan_port_count--;
 }
 
-static bool be_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t be_features_check(struct sk_buff *skb,
+					   struct net_device *dev,
+					   netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -4492,7 +4494,7 @@ static const struct net_device_ops be_netdev_ops = {
 #ifdef CONFIG_BE2NET_VXLAN
 	.ndo_add_vxlan_port	= be_add_vxlan_port,
 	.ndo_del_vxlan_port	= be_del_vxlan_port,
-	.ndo_gso_check		= be_gso_check,
+	.ndo_features_check	= be_features_check,
 #endif
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 190cbd931f6b..d0d6dc1b8e46 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -2365,9 +2365,11 @@ static void mlx4_en_del_vxlan_port(struct  net_device *dev,
 	queue_work(priv->mdev->workqueue, &priv->vxlan_del_task);
 }
 
-static bool mlx4_en_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t mlx4_en_features_check(struct sk_buff *skb,
+						struct net_device *dev,
+						netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -2400,7 +2402,7 @@ static const struct net_device_ops mlx4_netdev_ops = {
 #ifdef CONFIG_MLX4_EN_VXLAN
 	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
 	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
-	.ndo_gso_check		= mlx4_en_gso_check,
+	.ndo_features_check	= mlx4_en_features_check,
 #endif
 };
 
@@ -2434,7 +2436,7 @@ static const struct net_device_ops mlx4_netdev_ops_master = {
 #ifdef CONFIG_MLX4_EN_VXLAN
 	.ndo_add_vxlan_port	= mlx4_en_add_vxlan_port,
 	.ndo_del_vxlan_port	= mlx4_en_del_vxlan_port,
-	.ndo_gso_check		= mlx4_en_gso_check,
+	.ndo_features_check	= mlx4_en_features_check,
 #endif
 };
 
diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
index 1aa25b13ace1..9929b97cfb36 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_main.c
@@ -505,9 +505,11 @@ static void qlcnic_del_vxlan_port(struct net_device *netdev,
 	adapter->flags |= QLCNIC_DEL_VXLAN_PORT;
 }
 
-static bool qlcnic_gso_check(struct sk_buff *skb, struct net_device *dev)
+static netdev_features_t qlcnic_features_check(struct sk_buff *skb,
+					       struct net_device *dev,
+					       netdev_features_t features)
 {
-	return vxlan_gso_check(skb);
+	return vxlan_features_check(skb, features);
 }
 #endif
 
@@ -532,7 +534,7 @@ static const struct net_device_ops qlcnic_netdev_ops = {
 #ifdef CONFIG_QLCNIC_VXLAN
 	.ndo_add_vxlan_port	= qlcnic_add_vxlan_port,
 	.ndo_del_vxlan_port	= qlcnic_del_vxlan_port,
-	.ndo_gso_check		= qlcnic_gso_check,
+	.ndo_features_check	= qlcnic_features_check,
 #endif
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = qlcnic_poll_controller,
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index c31f74d76ebd..679e6e90aa4c 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1012,12 +1012,15 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev,
  *	Callback to use for xmit over the accelerated station. This
  *	is used in place of ndo_start_xmit on accelerated net
  *	devices.
- * bool	(*ndo_gso_check) (struct sk_buff *skb,
- *			  struct net_device *dev);
+ * netdev_features_t (*ndo_features_check) (struct sk_buff *skb,
+ *					    struct net_device *dev
+ *					    netdev_features_t features);
  *	Called by core transmit path to determine if device is capable of
- *	performing GSO on a packet. The device returns true if it is
- *	able to GSO the packet, false otherwise. If the return value is
- *	false the stack will do software GSO.
+ *	performing offload operations on a given packet. This is to give
+ *	the device an opportunity to implement any restrictions that cannot
+ *	be otherwise expressed by feature flags. The check is called with
+ *	the set of features that the stack has calculated and it returns
+ *	those the driver believes to be appropriate.
  *
  * int (*ndo_switch_parent_id_get)(struct net_device *dev,
  *				   struct netdev_phys_item_id *psid);
@@ -1178,8 +1181,9 @@ struct net_device_ops {
 							struct net_device *dev,
 							void *priv);
 	int			(*ndo_get_lock_subclass)(struct net_device *dev);
-	bool			(*ndo_gso_check) (struct sk_buff *skb,
-						  struct net_device *dev);
+	netdev_features_t	(*ndo_features_check) (struct sk_buff *skb,
+						       struct net_device *dev,
+						       netdev_features_t features);
 #ifdef CONFIG_NET_SWITCHDEV
 	int			(*ndo_switch_parent_id_get)(struct net_device *dev,
 							    struct netdev_phys_item_id *psid);
@@ -3611,8 +3615,6 @@ static inline bool netif_needs_gso(struct net_device *dev, struct sk_buff *skb,
 				   netdev_features_t features)
 {
 	return skb_is_gso(skb) && (!skb_gso_ok(skb, features) ||
-		(dev->netdev_ops->ndo_gso_check &&
-		 !dev->netdev_ops->ndo_gso_check(skb, dev)) ||
 		unlikely((skb->ip_summed != CHECKSUM_PARTIAL) &&
 			 (skb->ip_summed != CHECKSUM_UNNECESSARY)));
 }
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 57cccd0052e5..903461aa5644 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -1,6 +1,9 @@
 #ifndef __NET_VXLAN_H
 #define __NET_VXLAN_H 1
 
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/if_vlan.h>
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 #include <linux/udp.h>
@@ -51,16 +54,33 @@ int vxlan_xmit_skb(struct vxlan_sock *vs,
 		   __be32 src, __be32 dst, __u8 tos, __u8 ttl, __be16 df,
 		   __be16 src_port, __be16 dst_port, __be32 vni, bool xnet);
 
-static inline bool vxlan_gso_check(struct sk_buff *skb)
+static inline netdev_features_t vxlan_features_check(struct sk_buff *skb,
+						     netdev_features_t features)
 {
-	if ((skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL) &&
+	u8 l4_hdr = 0;
+
+	if (!skb->encapsulation)
+		return features;
+
+	switch (vlan_get_protocol(skb)) {
+	case htons(ETH_P_IP):
+		l4_hdr = ip_hdr(skb)->protocol;
+		break;
+	case htons(ETH_P_IPV6):
+		l4_hdr = ipv6_hdr(skb)->nexthdr;
+		break;
+	default:
+		return features;;
+	}
+
+	if ((l4_hdr == IPPROTO_UDP) &&
 	    (skb->inner_protocol_type != ENCAP_TYPE_ETHER ||
 	     skb->inner_protocol != htons(ETH_P_TEB) ||
 	     (skb_inner_mac_header(skb) - skb_transport_header(skb) !=
 	      sizeof(struct udphdr) + sizeof(struct vxlanhdr))))
-		return false;
+		return features & ~(NETIF_F_ALL_CSUM | NETIF_F_GSO_MASK);
 
-	return true;
+	return features;
 }
 
 /* IP header + UDP + VXLAN + Ethernet header */
diff --git a/net/core/dev.c b/net/core/dev.c
index 0094562b732a..683d493aa1bf 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -2563,7 +2563,7 @@ static netdev_features_t harmonize_features(struct sk_buff *skb,
 
 netdev_features_t netif_skb_features(struct sk_buff *skb)
 {
-	const struct net_device *dev = skb->dev;
+	struct net_device *dev = skb->dev;
 	netdev_features_t features = dev->features;
 	u16 gso_segs = skb_shinfo(skb)->gso_segs;
 	__be16 protocol = skb->protocol;
@@ -2571,13 +2571,20 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 	if (gso_segs > dev->gso_max_segs || gso_segs < dev->gso_min_segs)
 		features &= ~NETIF_F_GSO_MASK;
 
+	/* If encapsulation offload request, verify we are testing
+	 * hardware encapsulation features instead of standard
+	 * features for the netdev
+	 */
+	if (skb->encapsulation)
+		features &= dev->hw_enc_features;
+
 	if (!vlan_tx_tag_present(skb)) {
 		if (unlikely(protocol == htons(ETH_P_8021Q) ||
 			     protocol == htons(ETH_P_8021AD))) {
 			struct vlan_ethhdr *veh = (struct vlan_ethhdr *)skb->data;
 			protocol = veh->h_vlan_encapsulated_proto;
 		} else {
-			return harmonize_features(skb, features);
+			goto finalize;
 		}
 	}
 
@@ -2595,6 +2602,11 @@ netdev_features_t netif_skb_features(struct sk_buff *skb)
 						     NETIF_F_HW_VLAN_CTAG_TX |
 						     NETIF_F_HW_VLAN_STAG_TX);
 
+finalize:
+	if (dev->netdev_ops->ndo_features_check)
+		features &= dev->netdev_ops->ndo_features_check(skb, dev,
+								features);
+
 	return harmonize_features(skb, features);
 }
 EXPORT_SYMBOL(netif_skb_features);
@@ -2665,13 +2677,6 @@ static struct sk_buff *validate_xmit_skb(struct sk_buff *skb, struct net_device
 	if (unlikely(!skb))
 		goto out_null;
 
-	/* If encapsulation offload request, verify we are testing
-	 * hardware encapsulation features instead of standard
-	 * features for the netdev
-	 */
-	if (skb->encapsulation)
-		features &= dev->hw_enc_features;
-
 	if (netif_needs_gso(dev, skb, features)) {
 		struct sk_buff *segs;
 
-- 
cgit v1.2.3


From 023e2cfa36c31b0ad28c159a1bb0d61ff57334c8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 23 Dec 2014 21:00:06 +0100
Subject: netlink/genetlink: pass network namespace to bind/unbind

Netlink families can exist in multiple namespaces, and for the most
part multicast subscriptions are per network namespace. Thus it only
makes sense to have bind/unbind notifications per network namespace.

To achieve this, pass the network namespace of a given client socket
to the bind/unbind functions.

Also do this in generic netlink, and there also make sure that any
bind for multicast groups that only exist in init_net is rejected.
This isn't really a problem if it is accepted since a client in a
different namespace will never receive any notifications from such
a group, but it can confuse the family if not rejected (it's also
possible to silently (without telling the family) accept it, but it
would also have to be ignored on unbind so families that take any
kind of action on bind/unbind won't do unnecessary work for invalid
clients like that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netlink.h   |  4 ++--
 include/net/genetlink.h   |  4 ++--
 kernel/audit.c            |  2 +-
 net/netfilter/nfnetlink.c |  2 +-
 net/netlink/af_netlink.c  | 21 +++++++++++----------
 net/netlink/af_netlink.h  |  8 ++++----
 net/netlink/genetlink.c   | 12 +++++++-----
 7 files changed, 28 insertions(+), 25 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index 9e572daa15d5..02fc86d2348e 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -46,8 +46,8 @@ struct netlink_kernel_cfg {
 	unsigned int	flags;
 	void		(*input)(struct sk_buff *skb);
 	struct mutex	*cb_mutex;
-	int		(*bind)(int group);
-	void		(*unbind)(int group);
+	int		(*bind)(struct net *net, int group);
+	void		(*unbind)(struct net *net, int group);
 	bool		(*compare)(struct net *net, struct sock *sk);
 };
 
diff --git a/include/net/genetlink.h b/include/net/genetlink.h
index 3ed31e5a445b..84125088c309 100644
--- a/include/net/genetlink.h
+++ b/include/net/genetlink.h
@@ -56,8 +56,8 @@ struct genl_family {
 	void			(*post_doit)(const struct genl_ops *ops,
 					     struct sk_buff *skb,
 					     struct genl_info *info);
-	int			(*mcast_bind)(int group);
-	void			(*mcast_unbind)(int group);
+	int			(*mcast_bind)(struct net *net, int group);
+	void			(*mcast_unbind)(struct net *net, int group);
 	struct nlattr **	attrbuf;	/* private */
 	const struct genl_ops *	ops;		/* private */
 	const struct genl_multicast_group *mcgrps; /* private */
diff --git a/kernel/audit.c b/kernel/audit.c
index f8f203e8018c..aba9d9fadf0c 100644
--- a/kernel/audit.c
+++ b/kernel/audit.c
@@ -1100,7 +1100,7 @@ static void audit_receive(struct sk_buff  *skb)
 }
 
 /* Run custom bind function on netlink socket group connect or bind requests. */
-static int audit_bind(int group)
+static int audit_bind(struct net *net, int group)
 {
 	if (!capable(CAP_AUDIT_READ))
 		return -EPERM;
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index 13c2e17bbe27..cde4a6702fa3 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -463,7 +463,7 @@ static void nfnetlink_rcv(struct sk_buff *skb)
 }
 
 #ifdef CONFIG_MODULES
-static int nfnetlink_bind(int group)
+static int nfnetlink_bind(struct net *net, int group)
 {
 	const struct nfnetlink_subsystem *ss;
 	int type;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index f29b63fad932..84ea76ca3f1f 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -1141,8 +1141,8 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
 	struct module *module = NULL;
 	struct mutex *cb_mutex;
 	struct netlink_sock *nlk;
-	int (*bind)(int group);
-	void (*unbind)(int group);
+	int (*bind)(struct net *net, int group);
+	void (*unbind)(struct net *net, int group);
 	int err = 0;
 
 	sock->state = SS_UNCONNECTED;
@@ -1251,7 +1251,7 @@ static int netlink_release(struct socket *sock)
 
 		for (i = 0; i < nlk->ngroups; i++)
 			if (test_bit(i, nlk->groups))
-				nlk->netlink_unbind(i + 1);
+				nlk->netlink_unbind(sock_net(sk), i + 1);
 	}
 	kfree(nlk->groups);
 	nlk->groups = NULL;
@@ -1418,8 +1418,9 @@ static int netlink_realloc_groups(struct sock *sk)
 }
 
 static void netlink_undo_bind(int group, long unsigned int groups,
-			      struct netlink_sock *nlk)
+			      struct sock *sk)
 {
+	struct netlink_sock *nlk = nlk_sk(sk);
 	int undo;
 
 	if (!nlk->netlink_unbind)
@@ -1427,7 +1428,7 @@ static void netlink_undo_bind(int group, long unsigned int groups,
 
 	for (undo = 0; undo < group; undo++)
 		if (test_bit(undo, &groups))
-			nlk->netlink_unbind(undo);
+			nlk->netlink_unbind(sock_net(sk), undo);
 }
 
 static int netlink_bind(struct socket *sock, struct sockaddr *addr,
@@ -1465,10 +1466,10 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 		for (group = 0; group < nlk->ngroups; group++) {
 			if (!test_bit(group, &groups))
 				continue;
-			err = nlk->netlink_bind(group);
+			err = nlk->netlink_bind(net, group);
 			if (!err)
 				continue;
-			netlink_undo_bind(group, groups, nlk);
+			netlink_undo_bind(group, groups, sk);
 			return err;
 		}
 	}
@@ -1478,7 +1479,7 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr,
 			netlink_insert(sk, net, nladdr->nl_pid) :
 			netlink_autobind(sock);
 		if (err) {
-			netlink_undo_bind(nlk->ngroups, groups, nlk);
+			netlink_undo_bind(nlk->ngroups, groups, sk);
 			return err;
 		}
 	}
@@ -2129,7 +2130,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 		if (!val || val - 1 >= nlk->ngroups)
 			return -EINVAL;
 		if (optname == NETLINK_ADD_MEMBERSHIP && nlk->netlink_bind) {
-			err = nlk->netlink_bind(val);
+			err = nlk->netlink_bind(sock_net(sk), val);
 			if (err)
 				return err;
 		}
@@ -2138,7 +2139,7 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
 					 optname == NETLINK_ADD_MEMBERSHIP);
 		netlink_table_ungrab();
 		if (optname == NETLINK_DROP_MEMBERSHIP && nlk->netlink_unbind)
-			nlk->netlink_unbind(val);
+			nlk->netlink_unbind(sock_net(sk), val);
 
 		err = 0;
 		break;
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index b20a1731759b..f123a88496f8 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -39,8 +39,8 @@ struct netlink_sock {
 	struct mutex		*cb_mutex;
 	struct mutex		cb_def_mutex;
 	void			(*netlink_rcv)(struct sk_buff *skb);
-	int			(*netlink_bind)(int group);
-	void			(*netlink_unbind)(int group);
+	int			(*netlink_bind)(struct net *net, int group);
+	void			(*netlink_unbind)(struct net *net, int group);
 	struct module		*module;
 #ifdef CONFIG_NETLINK_MMAP
 	struct mutex		pg_vec_lock;
@@ -65,8 +65,8 @@ struct netlink_table {
 	unsigned int		groups;
 	struct mutex		*cb_mutex;
 	struct module		*module;
-	int			(*bind)(int group);
-	void			(*unbind)(int group);
+	int			(*bind)(struct net *net, int group);
+	void			(*unbind)(struct net *net, int group);
 	bool			(*compare)(struct net *net, struct sock *sock);
 	int			registered;
 };
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 05bf40bbd189..91566ed36c43 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -983,7 +983,7 @@ static struct genl_multicast_group genl_ctrl_groups[] = {
 	{ .name = "notify", },
 };
 
-static int genl_bind(int group)
+static int genl_bind(struct net *net, int group)
 {
 	int i, err;
 	bool found = false;
@@ -997,8 +997,10 @@ static int genl_bind(int group)
 			    group < f->mcgrp_offset + f->n_mcgrps) {
 				int fam_grp = group - f->mcgrp_offset;
 
-				if (f->mcast_bind)
-					err = f->mcast_bind(fam_grp);
+				if (!f->netnsok && net != &init_net)
+					err = -ENOENT;
+				else if (f->mcast_bind)
+					err = f->mcast_bind(net, fam_grp);
 				else
 					err = 0;
 				found = true;
@@ -1014,7 +1016,7 @@ static int genl_bind(int group)
 	return err;
 }
 
-static void genl_unbind(int group)
+static void genl_unbind(struct net *net, int group)
 {
 	int i;
 	bool found = false;
@@ -1029,7 +1031,7 @@ static void genl_unbind(int group)
 				int fam_grp = group - f->mcgrp_offset;
 
 				if (f->mcast_unbind)
-					f->mcast_unbind(fam_grp);
+					f->mcast_unbind(net, fam_grp);
 				found = true;
 				break;
 			}
-- 
cgit v1.2.3


From 45f87de57f8fad59302fd263dd81ffa4843b5b24 Mon Sep 17 00:00:00 2001
From: Michal Hocko <mhocko@suse.cz>
Date: Mon, 29 Dec 2014 20:30:35 +0100
Subject: mm: get rid of radix tree gfp mask for pagecache_get_page

Commit 2457aec63745 ("mm: non-atomically mark page accessed during page
cache allocation where possible") has added a separate parameter for
specifying gfp mask for radix tree allocations.

Not only this is less than optimal from the API point of view because it
is error prone, it is also buggy currently because
grab_cache_page_write_begin is using GFP_KERNEL for radix tree and if
fgp_flags doesn't contain FGP_NOFS (mostly controlled by fs by
AOP_FLAG_NOFS flag) but the mapping_gfp_mask has __GFP_FS cleared then
the radix tree allocation wouldn't obey the restriction and might
recurse into filesystem and cause deadlocks.  This is the case for most
filesystems unfortunately because only ext4 and gfs2 are using
AOP_FLAG_NOFS.

Let's simply remove radix_gfp_mask parameter because the allocation
context is same for both page cache and for the radix tree.  Just make
sure that the radix tree gets only the sane subset of the mask (e.g.  do
not pass __GFP_WRITE).

Long term it is more preferable to convert remaining users of
AOP_FLAG_NOFS to use mapping_gfp_mask instead and simplify this
interface even further.

Reported-by: Dave Chinner <david@fromorbit.com>
Signed-off-by: Michal Hocko <mhocko@suse.cz>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/pagemap.h | 13 ++++++-------
 mm/filemap.c            | 29 ++++++++++++-----------------
 2 files changed, 18 insertions(+), 24 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 7ea069cd3257..4b3736f7065c 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -251,7 +251,7 @@ pgoff_t page_cache_prev_hole(struct address_space *mapping,
 #define FGP_NOWAIT		0x00000020
 
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
-		int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask);
+		int fgp_flags, gfp_t cache_gfp_mask);
 
 /**
  * find_get_page - find and get a page reference
@@ -266,13 +266,13 @@ struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
 static inline struct page *find_get_page(struct address_space *mapping,
 					pgoff_t offset)
 {
-	return pagecache_get_page(mapping, offset, 0, 0, 0);
+	return pagecache_get_page(mapping, offset, 0, 0);
 }
 
 static inline struct page *find_get_page_flags(struct address_space *mapping,
 					pgoff_t offset, int fgp_flags)
 {
-	return pagecache_get_page(mapping, offset, fgp_flags, 0, 0);
+	return pagecache_get_page(mapping, offset, fgp_flags, 0);
 }
 
 /**
@@ -292,7 +292,7 @@ static inline struct page *find_get_page_flags(struct address_space *mapping,
 static inline struct page *find_lock_page(struct address_space *mapping,
 					pgoff_t offset)
 {
-	return pagecache_get_page(mapping, offset, FGP_LOCK, 0, 0);
+	return pagecache_get_page(mapping, offset, FGP_LOCK, 0);
 }
 
 /**
@@ -319,7 +319,7 @@ static inline struct page *find_or_create_page(struct address_space *mapping,
 {
 	return pagecache_get_page(mapping, offset,
 					FGP_LOCK|FGP_ACCESSED|FGP_CREAT,
-					gfp_mask, gfp_mask & GFP_RECLAIM_MASK);
+					gfp_mask);
 }
 
 /**
@@ -340,8 +340,7 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping,
 {
 	return pagecache_get_page(mapping, index,
 			FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
-			mapping_gfp_mask(mapping),
-			GFP_NOFS);
+			mapping_gfp_mask(mapping));
 }
 
 struct page *find_get_entry(struct address_space *mapping, pgoff_t offset);
diff --git a/mm/filemap.c b/mm/filemap.c
index bd8543c6508f..673e4581a2e5 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1046,8 +1046,7 @@ EXPORT_SYMBOL(find_lock_entry);
  * @mapping: the address_space to search
  * @offset: the page index
  * @fgp_flags: PCG flags
- * @cache_gfp_mask: gfp mask to use for the page cache data page allocation
- * @radix_gfp_mask: gfp mask to use for radix tree node allocation
+ * @gfp_mask: gfp mask to use for the page cache data page allocation
  *
  * Looks up the page cache slot at @mapping & @offset.
  *
@@ -1056,11 +1055,9 @@ EXPORT_SYMBOL(find_lock_entry);
  * FGP_ACCESSED: the page will be marked accessed
  * FGP_LOCK: Page is return locked
  * FGP_CREAT: If page is not present then a new page is allocated using
- *		@cache_gfp_mask and added to the page cache and the VM's LRU
- *		list. If radix tree nodes are allocated during page cache
- *		insertion then @radix_gfp_mask is used. The page is returned
- *		locked and with an increased refcount. Otherwise, %NULL is
- *		returned.
+ *		@gfp_mask and added to the page cache and the VM's LRU
+ *		list. The page is returned locked and with an increased
+ *		refcount. Otherwise, %NULL is returned.
  *
  * If FGP_LOCK or FGP_CREAT are specified then the function may sleep even
  * if the GFP flags specified for FGP_CREAT are atomic.
@@ -1068,7 +1065,7 @@ EXPORT_SYMBOL(find_lock_entry);
  * If there is a page cache page, it is returned with an increased refcount.
  */
 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset,
-	int fgp_flags, gfp_t cache_gfp_mask, gfp_t radix_gfp_mask)
+	int fgp_flags, gfp_t gfp_mask)
 {
 	struct page *page;
 
@@ -1105,13 +1102,11 @@ no_page:
 	if (!page && (fgp_flags & FGP_CREAT)) {
 		int err;
 		if ((fgp_flags & FGP_WRITE) && mapping_cap_account_dirty(mapping))
-			cache_gfp_mask |= __GFP_WRITE;
-		if (fgp_flags & FGP_NOFS) {
-			cache_gfp_mask &= ~__GFP_FS;
-			radix_gfp_mask &= ~__GFP_FS;
-		}
+			gfp_mask |= __GFP_WRITE;
+		if (fgp_flags & FGP_NOFS)
+			gfp_mask &= ~__GFP_FS;
 
-		page = __page_cache_alloc(cache_gfp_mask);
+		page = __page_cache_alloc(gfp_mask);
 		if (!page)
 			return NULL;
 
@@ -1122,7 +1117,8 @@ no_page:
 		if (fgp_flags & FGP_ACCESSED)
 			__SetPageReferenced(page);
 
-		err = add_to_page_cache_lru(page, mapping, offset, radix_gfp_mask);
+		err = add_to_page_cache_lru(page, mapping, offset,
+				gfp_mask & GFP_RECLAIM_MASK);
 		if (unlikely(err)) {
 			page_cache_release(page);
 			page = NULL;
@@ -2443,8 +2439,7 @@ struct page *grab_cache_page_write_begin(struct address_space *mapping,
 		fgp_flags |= FGP_NOFS;
 
 	page = pagecache_get_page(mapping, index, fgp_flags,
-			mapping_gfp_mask(mapping),
-			GFP_KERNEL);
+			mapping_gfp_mask(mapping));
 	if (page)
 		wait_for_stable_page(page);
 
-- 
cgit v1.2.3


From d02dc27db0dc74683efc4a2b36f55f5594451f38 Mon Sep 17 00:00:00 2001
From: Hanjun Guo <hanjun.guo@linaro.com>
Date: Sun, 4 Jan 2015 18:55:03 +0800
Subject: ACPI / processor: Rename acpi_(un)map_lsapic() to acpi_(un)map_cpu()

acpi_map_lsapic() will allocate a logical CPU number and map it to
physical CPU id (such as APIC id) for the hot-added CPU, it will also
do some mapping for NUMA node id and etc, acpi_unmap_lsapic() will
do the reverse.

We can see that the name of the function is a little bit confusing and
arch (IA64) dependent so rename them as acpi_(un)map_cpu() to make arch
agnostic and explicit.

Signed-off-by: Hanjun Guo <hanjun.guo@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/ia64/kernel/acpi.c       | 9 ++++-----
 arch/x86/kernel/acpi/boot.c   | 9 ++++-----
 drivers/acpi/acpi_processor.c | 6 +++---
 include/linux/acpi.h          | 4 ++--
 4 files changed, 13 insertions(+), 15 deletions(-)

(limited to 'include/linux')

diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c
index 615ef81def49..e795cb848154 100644
--- a/arch/ia64/kernel/acpi.c
+++ b/arch/ia64/kernel/acpi.c
@@ -893,13 +893,13 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
 {
 	return _acpi_map_lsapic(handle, physid, pcpu);
 }
-EXPORT_SYMBOL(acpi_map_lsapic);
+EXPORT_SYMBOL(acpi_map_cpu);
 
-int acpi_unmap_lsapic(int cpu)
+int acpi_unmap_cpu(int cpu)
 {
 	ia64_cpu_to_sapicid[cpu] = -1;
 	set_cpu_present(cpu, false);
@@ -910,8 +910,7 @@ int acpi_unmap_lsapic(int cpu)
 
 	return (0);
 }
-
-EXPORT_SYMBOL(acpi_unmap_lsapic);
+EXPORT_SYMBOL(acpi_unmap_cpu);
 #endif				/* CONFIG_ACPI_HOTPLUG_CPU */
 
 #ifdef CONFIG_ACPI_NUMA
diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c
index 4433a4be8171..d1626364a28a 100644
--- a/arch/x86/kernel/acpi/boot.c
+++ b/arch/x86/kernel/acpi/boot.c
@@ -750,13 +750,13 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
 }
 
 /* wrapper to silence section mismatch warning */
-int __ref acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu)
+int __ref acpi_map_cpu(acpi_handle handle, int physid, int *pcpu)
 {
 	return _acpi_map_lsapic(handle, physid, pcpu);
 }
-EXPORT_SYMBOL(acpi_map_lsapic);
+EXPORT_SYMBOL(acpi_map_cpu);
 
-int acpi_unmap_lsapic(int cpu)
+int acpi_unmap_cpu(int cpu)
 {
 #ifdef CONFIG_ACPI_NUMA
 	set_apicid_to_node(per_cpu(x86_cpu_to_apicid, cpu), NUMA_NO_NODE);
@@ -768,8 +768,7 @@ int acpi_unmap_lsapic(int cpu)
 
 	return (0);
 }
-
-EXPORT_SYMBOL(acpi_unmap_lsapic);
+EXPORT_SYMBOL(acpi_unmap_cpu);
 #endif				/* CONFIG_ACPI_HOTPLUG_CPU */
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base)
diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c
index f02b29eb0fda..1020b1b53a17 100644
--- a/drivers/acpi/acpi_processor.c
+++ b/drivers/acpi/acpi_processor.c
@@ -180,13 +180,13 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr)
 	cpu_maps_update_begin();
 	cpu_hotplug_begin();
 
-	ret = acpi_map_lsapic(pr->handle, pr->phys_id, &pr->id);
+	ret = acpi_map_cpu(pr->handle, pr->phys_id, &pr->id);
 	if (ret)
 		goto out;
 
 	ret = arch_register_cpu(pr->id);
 	if (ret) {
-		acpi_unmap_lsapic(pr->id);
+		acpi_unmap_cpu(pr->id);
 		goto out;
 	}
 
@@ -461,7 +461,7 @@ static void acpi_processor_remove(struct acpi_device *device)
 
 	/* Remove the CPU. */
 	arch_unregister_cpu(pr->id);
-	acpi_unmap_lsapic(pr->id);
+	acpi_unmap_cpu(pr->id);
 
 	cpu_hotplug_done();
 	cpu_maps_update_done();
diff --git a/include/linux/acpi.h b/include/linux/acpi.h
index 856d381b1d5b..d459cd17b477 100644
--- a/include/linux/acpi.h
+++ b/include/linux/acpi.h
@@ -147,8 +147,8 @@ void acpi_numa_arch_fixup(void);
 
 #ifdef CONFIG_ACPI_HOTPLUG_CPU
 /* Arch dependent functions for cpu hotplug support */
-int acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu);
-int acpi_unmap_lsapic(int cpu);
+int acpi_map_cpu(acpi_handle handle, int physid, int *pcpu);
+int acpi_unmap_cpu(int cpu);
 #endif /* CONFIG_ACPI_HOTPLUG_CPU */
 
 int acpi_register_ioapic(acpi_handle handle, u64 phys_addr, u32 gsi_base);
-- 
cgit v1.2.3


From fee7e49d45149fba60156f5b59014f764d3e3728 Mon Sep 17 00:00:00 2001
From: Linus Torvalds <torvalds@linux-foundation.org>
Date: Tue, 6 Jan 2015 13:00:05 -0800
Subject: mm: propagate error from stack expansion even for guard page

Jay Foad reports that the address sanitizer test (asan) sometimes gets
confused by a stack pointer that ends up being outside the stack vma
that is reported by /proc/maps.

This happens due to an interaction between RLIMIT_STACK and the guard
page: when we do the guard page check, we ignore the potential error
from the stack expansion, which effectively results in a missing guard
page, since the expected stack expansion won't have been done.

And since /proc/maps explicitly ignores the guard page (commit
d7824370e263: "mm: fix up some user-visible effects of the stack guard
page"), the stack pointer ends up being outside the reported stack area.

This is the minimal patch: it just propagates the error.  It also
effectively makes the guard page part of the stack limit, which in turn
measn that the actual real stack is one page less than the stack limit.

Let's see if anybody notices.  We could teach acct_stack_growth() to
allow an extra page for a grow-up/grow-down stack in the rlimit test,
but I don't want to add more complexity if it isn't needed.

Reported-and-tested-by: Jay Foad <jay.foad@gmail.com>
Cc: stable@vger.kernel.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/mm.h | 2 +-
 mm/memory.c        | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index f80d0194c9bc..80fc92a49649 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1952,7 +1952,7 @@ extern int expand_downwards(struct vm_area_struct *vma,
 #if VM_GROWSUP
 extern int expand_upwards(struct vm_area_struct *vma, unsigned long address);
 #else
-  #define expand_upwards(vma, address) do { } while (0)
+  #define expand_upwards(vma, address) (0)
 #endif
 
 /* Look up the first VMA which satisfies  addr < vm_end,  NULL if none. */
diff --git a/mm/memory.c b/mm/memory.c
index ca920d1fd314..d7e497e98f46 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2593,7 +2593,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
 		if (prev && prev->vm_end == address)
 			return prev->vm_flags & VM_GROWSDOWN ? 0 : -ENOMEM;
 
-		expand_downwards(vma, address - PAGE_SIZE);
+		return expand_downwards(vma, address - PAGE_SIZE);
 	}
 	if ((vma->vm_flags & VM_GROWSUP) && address + PAGE_SIZE == vma->vm_end) {
 		struct vm_area_struct *next = vma->vm_next;
@@ -2602,7 +2602,7 @@ static inline int check_stack_guard_page(struct vm_area_struct *vma, unsigned lo
 		if (next && next->vm_start == address + PAGE_SIZE)
 			return next->vm_flags & VM_GROWSUP ? 0 : -ENOMEM;
 
-		expand_upwards(vma, address + PAGE_SIZE);
+		return expand_upwards(vma, address + PAGE_SIZE);
 	}
 	return 0;
 }
-- 
cgit v1.2.3


From d7d5a007b1c64c617ce3ee30c973ed0bb93443d9 Mon Sep 17 00:00:00 2001
From: Ilya Dryomov <idryomov@redhat.com>
Date: Fri, 19 Dec 2014 14:00:41 +0300
Subject: libceph: fix sparse endianness warnings

The only real issue is the one in auth_x.c and it came with
3.19-rc1 merge.

Signed-off-by: Ilya Dryomov <idryomov@redhat.com>
---
 include/linux/ceph/osd_client.h | 4 ++--
 net/ceph/auth_x.c               | 2 +-
 net/ceph/mon_client.c           | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h
index 5d86416d35f2..61b19c46bdb3 100644
--- a/include/linux/ceph/osd_client.h
+++ b/include/linux/ceph/osd_client.h
@@ -87,8 +87,8 @@ struct ceph_osd_req_op {
 			struct ceph_osd_data osd_data;
 		} extent;
 		struct {
-			__le32 name_len;
-			__le32 value_len;
+			u32 name_len;
+			u32 value_len;
 			__u8 cmp_op;       /* CEPH_OSD_CMPXATTR_OP_* */
 			__u8 cmp_mode;     /* CEPH_OSD_CMPXATTR_MODE_* */
 			struct ceph_osd_data osd_data;
diff --git a/net/ceph/auth_x.c b/net/ceph/auth_x.c
index 15845814a0f2..ba6eb17226da 100644
--- a/net/ceph/auth_x.c
+++ b/net/ceph/auth_x.c
@@ -676,7 +676,7 @@ static int calcu_signature(struct ceph_x_authorizer *au,
 	int ret;
 	char tmp_enc[40];
 	__le32 tmp[5] = {
-		16u, msg->hdr.crc, msg->footer.front_crc,
+		cpu_to_le32(16), msg->hdr.crc, msg->footer.front_crc,
 		msg->footer.middle_crc, msg->footer.data_crc,
 	};
 	ret = ceph_x_encrypt(&au->session_key, &tmp, sizeof(tmp),
diff --git a/net/ceph/mon_client.c b/net/ceph/mon_client.c
index a83062ceeec9..f2148e22b148 100644
--- a/net/ceph/mon_client.c
+++ b/net/ceph/mon_client.c
@@ -717,7 +717,7 @@ static int get_poolop_reply_buf(const char *src, size_t src_len,
 	if (src_len != sizeof(u32) + dst_len)
 		return -EINVAL;
 
-	buf_len = le32_to_cpu(*(u32 *)src);
+	buf_len = le32_to_cpu(*(__le32 *)src);
 	if (buf_len != dst_len)
 		return -EINVAL;
 
-- 
cgit v1.2.3


From 7a3ef208e662f4b63d43a23f61a64a129c525bbc Mon Sep 17 00:00:00 2001
From: Konstantin Khlebnikov <koct9i@gmail.com>
Date: Thu, 8 Jan 2015 14:32:15 -0800
Subject: mm: prevent endless growth of anon_vma hierarchy

Constantly forking task causes unlimited grow of anon_vma chain.  Each
next child allocates new level of anon_vmas and links vma to all
previous levels because pages might be inherited from any level.

This patch adds heuristic which decides to reuse existing anon_vma
instead of forking new one.  It adds counter anon_vma->degree which
counts linked vmas and directly descending anon_vmas and reuses anon_vma
if counter is lower than two.  As a result each anon_vma has either vma
or at least two descending anon_vmas.  In such trees half of nodes are
leafs with alive vmas, thus count of anon_vmas is no more than two times
bigger than count of vmas.

This heuristic reuses anon_vmas as few as possible because each reuse
adds false aliasing among vmas and rmap walker ought to scan more ptes
when it searches where page is might be mapped.

Link: http://lkml.kernel.org/r/20120816024610.GA5350@evergreen.ssec.wisc.edu
Fixes: 5beb49305251 ("mm: change anon_vma linking to fix multi-process server scalability issue")
[akpm@linux-foundation.org: fix typo, per Rik]
Signed-off-by: Konstantin Khlebnikov <koct9i@gmail.com>
Reported-by: Daniel Forrest <dan.forrest@ssec.wisc.edu>
Tested-by: Michal Hocko <mhocko@suse.cz>
Tested-by: Jerome Marchand <jmarchan@redhat.com>
Reviewed-by: Michal Hocko <mhocko@suse.cz>
Reviewed-by: Rik van Riel <riel@redhat.com>
Cc: <stable@vger.kernel.org>	[2.6.34+]
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/rmap.h | 10 ++++++++++
 mm/rmap.c            | 42 +++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 51 insertions(+), 1 deletion(-)

(limited to 'include/linux')

diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index c0c2bce6b0b7..d9d7e7e56352 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -36,6 +36,16 @@ struct anon_vma {
 	 */
 	atomic_t refcount;
 
+	/*
+	 * Count of child anon_vmas and VMAs which points to this anon_vma.
+	 *
+	 * This counter is used for making decision about reusing anon_vma
+	 * instead of forking new one. See comments in function anon_vma_clone.
+	 */
+	unsigned degree;
+
+	struct anon_vma *parent;	/* Parent of this anon_vma */
+
 	/*
 	 * NOTE: the LSB of the rb_root.rb_node is set by
 	 * mm_take_all_locks() _after_ taking the above lock. So the
diff --git a/mm/rmap.c b/mm/rmap.c
index c5bc241127b2..71cd5bd0c17d 100644
--- a/mm/rmap.c
+++ b/mm/rmap.c
@@ -72,6 +72,8 @@ static inline struct anon_vma *anon_vma_alloc(void)
 	anon_vma = kmem_cache_alloc(anon_vma_cachep, GFP_KERNEL);
 	if (anon_vma) {
 		atomic_set(&anon_vma->refcount, 1);
+		anon_vma->degree = 1;	/* Reference for first vma */
+		anon_vma->parent = anon_vma;
 		/*
 		 * Initialise the anon_vma root to point to itself. If called
 		 * from fork, the root will be reset to the parents anon_vma.
@@ -188,6 +190,8 @@ int anon_vma_prepare(struct vm_area_struct *vma)
 		if (likely(!vma->anon_vma)) {
 			vma->anon_vma = anon_vma;
 			anon_vma_chain_link(vma, avc, anon_vma);
+			/* vma reference or self-parent link for new root */
+			anon_vma->degree++;
 			allocated = NULL;
 			avc = NULL;
 		}
@@ -236,6 +240,14 @@ static inline void unlock_anon_vma_root(struct anon_vma *root)
 /*
  * Attach the anon_vmas from src to dst.
  * Returns 0 on success, -ENOMEM on failure.
+ *
+ * If dst->anon_vma is NULL this function tries to find and reuse existing
+ * anon_vma which has no vmas and only one child anon_vma. This prevents
+ * degradation of anon_vma hierarchy to endless linear chain in case of
+ * constantly forking task. On the other hand, an anon_vma with more than one
+ * child isn't reused even if there was no alive vma, thus rmap walker has a
+ * good chance of avoiding scanning the whole hierarchy when it searches where
+ * page is mapped.
  */
 int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 {
@@ -256,7 +268,21 @@ int anon_vma_clone(struct vm_area_struct *dst, struct vm_area_struct *src)
 		anon_vma = pavc->anon_vma;
 		root = lock_anon_vma_root(root, anon_vma);
 		anon_vma_chain_link(dst, avc, anon_vma);
+
+		/*
+		 * Reuse existing anon_vma if its degree lower than two,
+		 * that means it has no vma and only one anon_vma child.
+		 *
+		 * Do not chose parent anon_vma, otherwise first child
+		 * will always reuse it. Root anon_vma is never reused:
+		 * it has self-parent reference and at least one child.
+		 */
+		if (!dst->anon_vma && anon_vma != src->anon_vma &&
+				anon_vma->degree < 2)
+			dst->anon_vma = anon_vma;
 	}
+	if (dst->anon_vma)
+		dst->anon_vma->degree++;
 	unlock_anon_vma_root(root);
 	return 0;
 
@@ -280,6 +306,9 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	if (!pvma->anon_vma)
 		return 0;
 
+	/* Drop inherited anon_vma, we'll reuse existing or allocate new. */
+	vma->anon_vma = NULL;
+
 	/*
 	 * First, attach the new VMA to the parent VMA's anon_vmas,
 	 * so rmap can find non-COWed pages in child processes.
@@ -288,6 +317,10 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	if (error)
 		return error;
 
+	/* An existing anon_vma has been reused, all done then. */
+	if (vma->anon_vma)
+		return 0;
+
 	/* Then add our own anon_vma. */
 	anon_vma = anon_vma_alloc();
 	if (!anon_vma)
@@ -301,6 +334,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	 * lock any of the anon_vmas in this anon_vma tree.
 	 */
 	anon_vma->root = pvma->anon_vma->root;
+	anon_vma->parent = pvma->anon_vma;
 	/*
 	 * With refcounts, an anon_vma can stay around longer than the
 	 * process it belongs to. The root anon_vma needs to be pinned until
@@ -311,6 +345,7 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma)
 	vma->anon_vma = anon_vma;
 	anon_vma_lock_write(anon_vma);
 	anon_vma_chain_link(vma, avc, anon_vma);
+	anon_vma->parent->degree++;
 	anon_vma_unlock_write(anon_vma);
 
 	return 0;
@@ -341,12 +376,16 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
 		 * Leave empty anon_vmas on the list - we'll need
 		 * to free them outside the lock.
 		 */
-		if (RB_EMPTY_ROOT(&anon_vma->rb_root))
+		if (RB_EMPTY_ROOT(&anon_vma->rb_root)) {
+			anon_vma->parent->degree--;
 			continue;
+		}
 
 		list_del(&avc->same_vma);
 		anon_vma_chain_free(avc);
 	}
+	if (vma->anon_vma)
+		vma->anon_vma->degree--;
 	unlock_anon_vma_root(root);
 
 	/*
@@ -357,6 +396,7 @@ void unlink_anon_vmas(struct vm_area_struct *vma)
 	list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) {
 		struct anon_vma *anon_vma = avc->anon_vma;
 
+		BUG_ON(anon_vma->degree);
 		put_anon_vma(anon_vma);
 
 		list_del(&avc->same_vma);
-- 
cgit v1.2.3


From 2d6d7f98284648c5ed113fe22a132148950b140f Mon Sep 17 00:00:00 2001
From: Johannes Weiner <hannes@cmpxchg.org>
Date: Thu, 8 Jan 2015 14:32:18 -0800
Subject: mm: protect set_page_dirty() from ongoing truncation

Tejun, while reviewing the code, spotted the following race condition
between the dirtying and truncation of a page:

__set_page_dirty_nobuffers()       __delete_from_page_cache()
  if (TestSetPageDirty(page))
                                     page->mapping = NULL
				     if (PageDirty())
				       dec_zone_page_state(page, NR_FILE_DIRTY);
				       dec_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);
    if (page->mapping)
      account_page_dirtied(page)
        __inc_zone_page_state(page, NR_FILE_DIRTY);
	__inc_bdi_stat(mapping->backing_dev_info, BDI_RECLAIMABLE);

which results in an imbalance of NR_FILE_DIRTY and BDI_RECLAIMABLE.

Dirtiers usually lock out truncation, either by holding the page lock
directly, or in case of zap_pte_range(), by pinning the mapcount with
the page table lock held.  The notable exception to this rule, though,
is do_wp_page(), for which this race exists.  However, do_wp_page()
already waits for a locked page to unlock before setting the dirty bit,
in order to prevent a race where clear_page_dirty() misses the page bit
in the presence of dirty ptes.  Upgrade that wait to a fully locked
set_page_dirty() to also cover the situation explained above.

Afterwards, the code in set_page_dirty() dealing with a truncation race
is no longer needed.  Remove it.

Reported-by: Tejun Heo <tj@kernel.org>
Signed-off-by: Johannes Weiner <hannes@cmpxchg.org>
Acked-by: Kirill A. Shutemov <kirill.shutemov@linux.intel.com>
Reviewed-by: Jan Kara <jack@suse.cz>
Cc: <stable@vger.kernel.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/writeback.h |  1 -
 mm/memory.c               | 27 +++++++++++++++++----------
 mm/page-writeback.c       | 43 ++++++++++++-------------------------------
 3 files changed, 29 insertions(+), 42 deletions(-)

(limited to 'include/linux')

diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a219be961c0a..00048339c23e 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -177,7 +177,6 @@ int write_cache_pages(struct address_space *mapping,
 		      struct writeback_control *wbc, writepage_t writepage,
 		      void *data);
 int do_writepages(struct address_space *mapping, struct writeback_control *wbc);
-void set_page_dirty_balance(struct page *page);
 void writeback_set_ratelimit(void);
 void tag_pages_for_writeback(struct address_space *mapping,
 			     pgoff_t start, pgoff_t end);
diff --git a/mm/memory.c b/mm/memory.c
index d7e497e98f46..c6565f00fb38 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -2137,17 +2137,24 @@ reuse:
 		if (!dirty_page)
 			return ret;
 
-		/*
-		 * Yes, Virginia, this is actually required to prevent a race
-		 * with clear_page_dirty_for_io() from clearing the page dirty
-		 * bit after it clear all dirty ptes, but before a racing
-		 * do_wp_page installs a dirty pte.
-		 *
-		 * do_shared_fault is protected similarly.
-		 */
 		if (!page_mkwrite) {
-			wait_on_page_locked(dirty_page);
-			set_page_dirty_balance(dirty_page);
+			struct address_space *mapping;
+			int dirtied;
+
+			lock_page(dirty_page);
+			dirtied = set_page_dirty(dirty_page);
+			VM_BUG_ON_PAGE(PageAnon(dirty_page), dirty_page);
+			mapping = dirty_page->mapping;
+			unlock_page(dirty_page);
+
+			if (dirtied && mapping) {
+				/*
+				 * Some device drivers do not set page.mapping
+				 * but still dirty their pages
+				 */
+				balance_dirty_pages_ratelimited(mapping);
+			}
+
 			/* file_update_time outside page_lock */
 			if (vma->vm_file)
 				file_update_time(vma->vm_file);
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index d5d81f5384d1..6f4335238e33 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1541,16 +1541,6 @@ pause:
 		bdi_start_background_writeback(bdi);
 }
 
-void set_page_dirty_balance(struct page *page)
-{
-	if (set_page_dirty(page)) {
-		struct address_space *mapping = page_mapping(page);
-
-		if (mapping)
-			balance_dirty_pages_ratelimited(mapping);
-	}
-}
-
 static DEFINE_PER_CPU(int, bdp_ratelimits);
 
 /*
@@ -2123,32 +2113,25 @@ EXPORT_SYMBOL(account_page_dirtied);
  * page dirty in that case, but not all the buffers.  This is a "bottom-up"
  * dirtying, whereas __set_page_dirty_buffers() is a "top-down" dirtying.
  *
- * Most callers have locked the page, which pins the address_space in memory.
- * But zap_pte_range() does not lock the page, however in that case the
- * mapping is pinned by the vma's ->vm_file reference.
- *
- * We take care to handle the case where the page was truncated from the
- * mapping by re-checking page_mapping() inside tree_lock.
+ * The caller must ensure this doesn't race with truncation.  Most will simply
+ * hold the page lock, but e.g. zap_pte_range() calls with the page mapped and
+ * the pte lock held, which also locks out truncation.
  */
 int __set_page_dirty_nobuffers(struct page *page)
 {
 	if (!TestSetPageDirty(page)) {
 		struct address_space *mapping = page_mapping(page);
-		struct address_space *mapping2;
 		unsigned long flags;
 
 		if (!mapping)
 			return 1;
 
 		spin_lock_irqsave(&mapping->tree_lock, flags);
-		mapping2 = page_mapping(page);
-		if (mapping2) { /* Race with truncate? */
-			BUG_ON(mapping2 != mapping);
-			WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
-			account_page_dirtied(page, mapping);
-			radix_tree_tag_set(&mapping->page_tree,
-				page_index(page), PAGECACHE_TAG_DIRTY);
-		}
+		BUG_ON(page_mapping(page) != mapping);
+		WARN_ON_ONCE(!PagePrivate(page) && !PageUptodate(page));
+		account_page_dirtied(page, mapping);
+		radix_tree_tag_set(&mapping->page_tree, page_index(page),
+				   PAGECACHE_TAG_DIRTY);
 		spin_unlock_irqrestore(&mapping->tree_lock, flags);
 		if (mapping->host) {
 			/* !PageAnon && !swapper_space */
@@ -2305,12 +2288,10 @@ int clear_page_dirty_for_io(struct page *page)
 		/*
 		 * We carefully synchronise fault handlers against
 		 * installing a dirty pte and marking the page dirty
-		 * at this point. We do this by having them hold the
-		 * page lock at some point after installing their
-		 * pte, but before marking the page dirty.
-		 * Pages are always locked coming in here, so we get
-		 * the desired exclusion. See mm/memory.c:do_wp_page()
-		 * for more comments.
+		 * at this point.  We do this by having them hold the
+		 * page lock while dirtying the page, and pages are
+		 * always locked coming in here, so we get the desired
+		 * exclusion.
 		 */
 		if (TestClearPageDirty(page)) {
 			dec_zone_page_state(page, NR_FILE_DIRTY);
-- 
cgit v1.2.3


From 75069f2b5bfb5164beafaf3da597279c25b5535a Mon Sep 17 00:00:00 2001
From: David Drysdale <drysdale@google.com>
Date: Thu, 8 Jan 2015 14:32:29 -0800
Subject: vfs: renumber FMODE_NONOTIFY and add to uniqueness check

Fix clashing values for O_PATH and FMODE_NONOTIFY on sparc.  The
clashing O_PATH value was added in commit 5229645bdc35 ("vfs: add
nonconflicting values for O_PATH") but this can't be changed as it is
user-visible.

FMODE_NONOTIFY is only used internally in the kernel, but it is in the
same numbering space as the other O_* flags, as indicated by the comment
at the top of include/uapi/asm-generic/fcntl.h (and its use in
fs/notify/fanotify/fanotify_user.c).  So renumber it to avoid the clash.

All of this has happened before (commit 12ed2e36c98a: "fanotify:
FMODE_NONOTIFY and __O_SYNC in sparc conflict"), and all of this will
happen again -- so update the uniqueness check in fcntl_init() to
include __FMODE_NONOTIFY.

Signed-off-by: David Drysdale <drysdale@google.com>
Acked-by: David S. Miller <davem@davemloft.net>
Acked-by: Jan Kara <jack@suse.cz>
Cc: Heinrich Schuchardt <xypron.glpk@gmx.de>
Cc: Alexander Viro <viro@zeniv.linux.org.uk>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Eric Paris <eparis@redhat.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 fs/fcntl.c                       | 5 +++--
 include/linux/fs.h               | 2 +-
 include/uapi/asm-generic/fcntl.h | 2 +-
 3 files changed, 5 insertions(+), 4 deletions(-)

(limited to 'include/linux')

diff --git a/fs/fcntl.c b/fs/fcntl.c
index 99d440a4a6ba..ee85cd4e136a 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -740,14 +740,15 @@ static int __init fcntl_init(void)
 	 * Exceptions: O_NONBLOCK is a two bit define on parisc; O_NDELAY
 	 * is defined as O_NONBLOCK on some platforms and not on others.
 	 */
-	BUILD_BUG_ON(20 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
+	BUILD_BUG_ON(21 - 1 /* for O_RDONLY being 0 */ != HWEIGHT32(
 		O_RDONLY	| O_WRONLY	| O_RDWR	|
 		O_CREAT		| O_EXCL	| O_NOCTTY	|
 		O_TRUNC		| O_APPEND	| /* O_NONBLOCK	| */
 		__O_SYNC	| O_DSYNC	| FASYNC	|
 		O_DIRECT	| O_LARGEFILE	| O_DIRECTORY	|
 		O_NOFOLLOW	| O_NOATIME	| O_CLOEXEC	|
-		__FMODE_EXEC	| O_PATH	| __O_TMPFILE
+		__FMODE_EXEC	| O_PATH	| __O_TMPFILE	|
+		__FMODE_NONOTIFY
 		));
 
 	fasync_cache = kmem_cache_create("fasync_cache",
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f90c0282c114..42efe13077b6 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -135,7 +135,7 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 #define FMODE_CAN_WRITE         ((__force fmode_t)0x40000)
 
 /* File was opened by fanotify and shouldn't generate fanotify events */
-#define FMODE_NONOTIFY		((__force fmode_t)0x1000000)
+#define FMODE_NONOTIFY		((__force fmode_t)0x4000000)
 
 /*
  * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h
index 7543b3e51331..e063effe0cc1 100644
--- a/include/uapi/asm-generic/fcntl.h
+++ b/include/uapi/asm-generic/fcntl.h
@@ -5,7 +5,7 @@
 
 /*
  * FMODE_EXEC is 0x20
- * FMODE_NONOTIFY is 0x1000000
+ * FMODE_NONOTIFY is 0x4000000
  * These cannot be used by userspace O_* until internal and external open
  * flags are split.
  * -Eric Paris
-- 
cgit v1.2.3


From 88a7c26af8dab2f2d69f5a6067eb670694ec38c0 Mon Sep 17 00:00:00 2001
From: Andy Lutomirski <luto@amacapital.net>
Date: Sun, 4 Jan 2015 10:36:19 -0800
Subject: perf: Move task_pt_regs sampling into arch code

On x86_64, at least, task_pt_regs may be only partially initialized
in many contexts, so x86_64 should not use it without extra care
from interrupt context, let alone NMI context.

This will allow x86_64 to override the logic and will supply some
scratch space to use to make a cleaner copy of user regs.

Tested-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Stephane Eranian <eranian@google.com>
Cc: chenggang.qcg@taobao.com
Cc: Wu Fengguang <fengguang.wu@intel.com>
Cc: Namhyung Kim <namhyung@gmail.com>
Cc: Mike Galbraith <efault@gmx.de>
Cc: Arjan van de Ven <arjan@linux.intel.com>
Cc: David Ahern <dsahern@gmail.com>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Jean Pihet <jean.pihet@linaro.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Mark Salter <msalter@redhat.com>
Cc: Russell King <linux@arm.linux.org.uk>
Cc: Will Deacon <will.deacon@arm.com>
Cc: linux-arm-kernel@lists.infradead.org
Link: http://lkml.kernel.org/r/e431cd4c18c2e1c44c774f10758527fb2d1025c4.1420396372.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
---
 arch/arm/kernel/perf_regs.c   |  8 ++++++++
 arch/arm64/kernel/perf_regs.c |  8 ++++++++
 arch/x86/kernel/perf_regs.c   | 16 ++++++++++++++++
 include/linux/perf_event.h    | 12 +++++++-----
 include/linux/perf_regs.h     | 16 ++++++++++++++++
 kernel/events/core.c          | 19 ++++++++-----------
 6 files changed, 63 insertions(+), 16 deletions(-)

(limited to 'include/linux')

diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c
index 6e4379c67cbc..592dda3f21ff 100644
--- a/arch/arm/kernel/perf_regs.c
+++ b/arch/arm/kernel/perf_regs.c
@@ -28,3 +28,11 @@ u64 perf_reg_abi(struct task_struct *task)
 {
 	return PERF_SAMPLE_REGS_ABI_32;
 }
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c
index 6762ad705587..3f62b35fb6f1 100644
--- a/arch/arm64/kernel/perf_regs.c
+++ b/arch/arm64/kernel/perf_regs.c
@@ -50,3 +50,11 @@ u64 perf_reg_abi(struct task_struct *task)
 	else
 		return PERF_SAMPLE_REGS_ABI_64;
 }
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index e309cc5c276e..3bbbb1a4fb52 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -78,6 +78,14 @@ u64 perf_reg_abi(struct task_struct *task)
 {
 	return PERF_SAMPLE_REGS_ABI_32;
 }
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
 #else /* CONFIG_X86_64 */
 #define REG_NOSUPPORT ((1ULL << PERF_REG_X86_DS) | \
 		       (1ULL << PERF_REG_X86_ES) | \
@@ -102,4 +110,12 @@ u64 perf_reg_abi(struct task_struct *task)
 	else
 		return PERF_SAMPLE_REGS_ABI_64;
 }
+
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
 #endif /* CONFIG_X86_32 */
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 486e84ccb1f9..4f7a61ca4b39 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -79,11 +79,6 @@ struct perf_branch_stack {
 	struct perf_branch_entry	entries[0];
 };
 
-struct perf_regs {
-	__u64		abi;
-	struct pt_regs	*regs;
-};
-
 struct task_struct;
 
 /*
@@ -610,7 +605,14 @@ struct perf_sample_data {
 		u32	reserved;
 	}				cpu_entry;
 	struct perf_callchain_entry	*callchain;
+
+	/*
+	 * regs_user may point to task_pt_regs or to regs_user_copy, depending
+	 * on arch details.
+	 */
 	struct perf_regs		regs_user;
+	struct pt_regs			regs_user_copy;
+
 	struct perf_regs		regs_intr;
 	u64				stack_user_size;
 } ____cacheline_aligned;
diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h
index 3c73d5fe18be..a5f98d53d732 100644
--- a/include/linux/perf_regs.h
+++ b/include/linux/perf_regs.h
@@ -1,11 +1,19 @@
 #ifndef _LINUX_PERF_REGS_H
 #define _LINUX_PERF_REGS_H
 
+struct perf_regs {
+	__u64		abi;
+	struct pt_regs	*regs;
+};
+
 #ifdef CONFIG_HAVE_PERF_REGS
 #include <asm/perf_regs.h>
 u64 perf_reg_value(struct pt_regs *regs, int idx);
 int perf_reg_validate(u64 mask);
 u64 perf_reg_abi(struct task_struct *task);
+void perf_get_regs_user(struct perf_regs *regs_user,
+			struct pt_regs *regs,
+			struct pt_regs *regs_user_copy);
 #else
 static inline u64 perf_reg_value(struct pt_regs *regs, int idx)
 {
@@ -21,5 +29,13 @@ static inline u64 perf_reg_abi(struct task_struct *task)
 {
 	return PERF_SAMPLE_REGS_ABI_NONE;
 }
+
+static inline void perf_get_regs_user(struct perf_regs *regs_user,
+				      struct pt_regs *regs,
+				      struct pt_regs *regs_user_copy)
+{
+	regs_user->regs = task_pt_regs(current);
+	regs_user->abi = perf_reg_abi(current);
+}
 #endif /* CONFIG_HAVE_PERF_REGS */
 #endif /* _LINUX_PERF_REGS_H */
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 4c1ee7f2bebc..882f835a0d85 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4461,18 +4461,14 @@ perf_output_sample_regs(struct perf_output_handle *handle,
 }
 
 static void perf_sample_regs_user(struct perf_regs *regs_user,
-				  struct pt_regs *regs)
+				  struct pt_regs *regs,
+				  struct pt_regs *regs_user_copy)
 {
-	if (!user_mode(regs)) {
-		if (current->mm)
-			regs = task_pt_regs(current);
-		else
-			regs = NULL;
-	}
-
-	if (regs) {
-		regs_user->abi  = perf_reg_abi(current);
+	if (user_mode(regs)) {
+		regs_user->abi = perf_reg_abi(current);
 		regs_user->regs = regs;
+	} else if (current->mm) {
+		perf_get_regs_user(regs_user, regs, regs_user_copy);
 	} else {
 		regs_user->abi = PERF_SAMPLE_REGS_ABI_NONE;
 		regs_user->regs = NULL;
@@ -4951,7 +4947,8 @@ void perf_prepare_sample(struct perf_event_header *header,
 	}
 
 	if (sample_type & (PERF_SAMPLE_REGS_USER | PERF_SAMPLE_STACK_USER))
-		perf_sample_regs_user(&data->regs_user, regs);
+		perf_sample_regs_user(&data->regs_user, regs,
+				      &data->regs_user_copy);
 
 	if (sample_type & PERF_SAMPLE_REGS_USER) {
 		/* regs dump ABI info */
-- 
cgit v1.2.3